[fix](nereids)update Agg stats estimation #21300
Agg stats estimation should use the biggest groupby key's NDV as base, and multiply expansion factor, which is calculated by other groupby key' ndv. Before, we use the smallest ndv as base
This commit is contained in:
@ -127,6 +127,7 @@ import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.AbstractMap.SimpleEntry;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -689,7 +690,7 @@ public class StatsCalculator extends DefaultPlanVisitor<Statistics, Void> {
|
||||
if (groupByCount > 0) {
|
||||
List<Double> groupByNdvs = groupByColStats.values().stream()
|
||||
.map(colStats -> colStats.ndv)
|
||||
.sorted().collect(Collectors.toList());
|
||||
.sorted(Comparator.reverseOrder()).collect(Collectors.toList());
|
||||
rowCount = groupByNdvs.get(0);
|
||||
for (int groupByIndex = 1; groupByIndex < groupByCount; ++groupByIndex) {
|
||||
rowCount *= Math.max(1, groupByNdvs.get(groupByIndex) * Math.pow(
|
||||
|
||||
@ -32,21 +32,22 @@ CteAnchor[cteId= ( CTEId#0=] )
|
||||
----------------PhysicalProject
|
||||
------------------PhysicalOlapScan[store_returns]
|
||||
--PhysicalQuickSort
|
||||
----PhysicalQuickSort
|
||||
------PhysicalProject
|
||||
--------NestedLoopJoin[INNER_JOIN](cast(paid as DOUBLE) > cast((0.05 * avg(netpaid)) as DOUBLE))
|
||||
----------PhysicalAssertNumRows
|
||||
------------PhysicalProject
|
||||
--------------hashAgg[GLOBAL]
|
||||
----------------PhysicalDistribute
|
||||
------------------hashAgg[LOCAL]
|
||||
--------------------PhysicalProject
|
||||
----------------------CteConsumer[cteId= ( CTEId#0=] )
|
||||
----------PhysicalDistribute
|
||||
----PhysicalDistribute
|
||||
------PhysicalQuickSort
|
||||
--------PhysicalProject
|
||||
----------NestedLoopJoin[INNER_JOIN](cast(paid as DOUBLE) > cast((0.05 * avg(netpaid)) as DOUBLE))
|
||||
------------hashAgg[GLOBAL]
|
||||
--------------PhysicalDistribute
|
||||
----------------hashAgg[LOCAL]
|
||||
------------------PhysicalProject
|
||||
--------------------filter((cast(i_color as VARCHAR(*)) = 'beige'))
|
||||
----------------------CteConsumer[cteId= ( CTEId#0=] )
|
||||
------------PhysicalDistribute
|
||||
--------------PhysicalAssertNumRows
|
||||
----------------PhysicalProject
|
||||
------------------hashAgg[GLOBAL]
|
||||
--------------------PhysicalDistribute
|
||||
----------------------hashAgg[LOCAL]
|
||||
------------------------PhysicalProject
|
||||
--------------------------CteConsumer[cteId= ( CTEId#0=] )
|
||||
|
||||
|
||||
@ -44,33 +44,33 @@ CteAnchor[cteId= ( CTEId#6=] )
|
||||
--------PhysicalQuickSort
|
||||
----------PhysicalProject
|
||||
------------hashJoin[INNER_JOIN](ws1.ca_county = ws3.ca_county)(CASE WHEN (web_sales > 0.00) THEN (cast(web_sales as DECIMALV3(38, 8)) / web_sales) ELSE NULL END > CASE WHEN (store_sales > 0.00) THEN (cast(store_sales as DECIMALV3(38, 8)) / store_sales) ELSE NULL END)
|
||||
--------------PhysicalProject
|
||||
----------------filter((ws3.d_year = 2000)(ws3.d_qoy = 3))
|
||||
------------------CteConsumer[cteId= ( CTEId#7=] )
|
||||
--------------PhysicalDistribute
|
||||
----------------PhysicalProject
|
||||
------------------hashJoin[INNER_JOIN](ss2.ca_county = ss3.ca_county)
|
||||
--------------------PhysicalDistribute
|
||||
----------------------PhysicalProject
|
||||
------------------------filter((ss3.d_year = 2000)(ss3.d_qoy = 3))
|
||||
--------------------------CteConsumer[cteId= ( CTEId#6=] )
|
||||
--------------------hashJoin[INNER_JOIN](ss1.ca_county = ss2.ca_county)(CASE WHEN (web_sales > 0.00) THEN (cast(web_sales as DECIMALV3(38, 8)) / web_sales) ELSE NULL END > CASE WHEN (store_sales > 0.00) THEN (cast(store_sales as DECIMALV3(38, 8)) / store_sales) ELSE NULL END)
|
||||
----------------------PhysicalDistribute
|
||||
------------------------PhysicalProject
|
||||
--------------------------filter((ss2.d_year = 2000)(ss2.d_qoy = 2))
|
||||
----------------------------CteConsumer[cteId= ( CTEId#6=] )
|
||||
----------------------hashJoin[INNER_JOIN](ss1.ca_county = ws1.ca_county)
|
||||
------------------filter((ws3.d_year = 2000)(ws3.d_qoy = 3))
|
||||
--------------------CteConsumer[cteId= ( CTEId#7=] )
|
||||
--------------PhysicalProject
|
||||
----------------hashJoin[INNER_JOIN](ss2.ca_county = ss3.ca_county)
|
||||
------------------PhysicalDistribute
|
||||
--------------------PhysicalProject
|
||||
----------------------filter((ss3.d_year = 2000)(ss3.d_qoy = 3))
|
||||
------------------------CteConsumer[cteId= ( CTEId#6=] )
|
||||
------------------hashJoin[INNER_JOIN](ws1.ca_county = ws2.ca_county)(CASE WHEN (web_sales > 0.00) THEN (cast(web_sales as DECIMALV3(38, 8)) / web_sales) ELSE NULL END > CASE WHEN (store_sales > 0.00) THEN (cast(store_sales as DECIMALV3(38, 8)) / store_sales) ELSE NULL END)
|
||||
--------------------hashJoin[INNER_JOIN](ss1.ca_county = ws1.ca_county)
|
||||
----------------------hashJoin[INNER_JOIN](ss1.ca_county = ss2.ca_county)
|
||||
------------------------PhysicalDistribute
|
||||
--------------------------PhysicalProject
|
||||
----------------------------filter((ss1.d_year = 2000)(ss1.d_qoy = 1))
|
||||
------------------------------CteConsumer[cteId= ( CTEId#6=] )
|
||||
------------------------hashJoin[INNER_JOIN](ws1.ca_county = ws2.ca_county)
|
||||
--------------------------PhysicalDistribute
|
||||
----------------------------PhysicalProject
|
||||
------------------------------filter((ws1.d_year = 2000)(ws1.d_qoy = 1))
|
||||
--------------------------------CteConsumer[cteId= ( CTEId#7=] )
|
||||
--------------------------PhysicalDistribute
|
||||
----------------------------PhysicalProject
|
||||
------------------------------filter((ws2.d_qoy = 2)(ws2.d_year = 2000))
|
||||
--------------------------------CteConsumer[cteId= ( CTEId#7=] )
|
||||
------------------------PhysicalDistribute
|
||||
--------------------------PhysicalProject
|
||||
----------------------------filter((ss2.d_year = 2000)(ss2.d_qoy = 2))
|
||||
------------------------------CteConsumer[cteId= ( CTEId#6=] )
|
||||
----------------------PhysicalDistribute
|
||||
------------------------PhysicalProject
|
||||
--------------------------filter((ws1.d_year = 2000)(ws1.d_qoy = 1))
|
||||
----------------------------CteConsumer[cteId= ( CTEId#7=] )
|
||||
--------------------PhysicalDistribute
|
||||
----------------------PhysicalProject
|
||||
------------------------filter((ws2.d_qoy = 2)(ws2.d_year = 2000))
|
||||
--------------------------CteConsumer[cteId= ( CTEId#7=] )
|
||||
|
||||
|
||||
Reference in New Issue
Block a user