diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index dd8d470e24..ff889d3edc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -111,8 +111,8 @@ public class Statistics { ColumnStatistic columnStatistic = entry.getValue(); ColumnStatisticBuilder columnStatisticBuilder = new ColumnStatisticBuilder(columnStatistic); columnStatisticBuilder.setNdv(Math.min(columnStatistic.ndv, rowCount)); - double nullFactor = (rowCount - columnStatistic.numNulls) / rowCount; - columnStatisticBuilder.setNumNulls(nullFactor * rowCount); + double numNulls = Math.min(columnStatistic.numNulls, rowCount - columnStatistic.ndv); + columnStatisticBuilder.setNumNulls(numNulls); columnStatisticBuilder.setCount(rowCount); statistics.addColumnStats(entry.getKey(), columnStatisticBuilder.build()); } diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query21.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query21.out index 8361e28dac..32df595226 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query21.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query21.out @@ -21,4 +21,5 @@ PhysicalTopN ------------------------PhysicalOlapScan[date_dim] ----------------PhysicalDistribute ------------------PhysicalProject ---------------------PhysicalOlapScan[warehouse] \ No newline at end of file +--------------------PhysicalOlapScan[warehouse] + diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out index ccb9b6ff50..d6ec3488d9 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query95.out @@ -14,38 +14,38 @@ CteAnchor[cteId= ( CTEId#3=] ) ----PhysicalTopN ------PhysicalProject --------hashAgg[GLOBAL] -----------hashAgg[LOCAL] -------------PhysicalDistribute +----------PhysicalDistribute +------------hashAgg[LOCAL] --------------PhysicalProject ----------------hashJoin[INNER_JOIN](ws1.ws_ship_date_sk = date_dim.d_date_sk) -------------------PhysicalProject ---------------------filter((date_dim.d_date >= 1999-02-01)(cast(d_date as DATETIMEV2(0)) <= cast(days_add(cast('1999-2-01' as DATEV2), INTERVAL 60 DAY) as DATETIMEV2(0)))) -----------------------PhysicalOlapScan[date_dim] -------------------PhysicalDistribute ---------------------hashJoin[RIGHT_SEMI_JOIN](ws1.ws_order_number = web_returns.wr_order_number) -----------------------PhysicalDistribute -------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN](web_returns.wr_order_number = ws_wh.ws_order_number) -----------------------------PhysicalProject -------------------------------CteConsumer[cteId= ( CTEId#3=] ) -----------------------------PhysicalDistribute -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[web_returns] -----------------------hashJoin[RIGHT_SEMI_JOIN](ws1.ws_order_number = ws_wh.ws_order_number) -------------------------PhysicalDistribute +------------------hashJoin[RIGHT_SEMI_JOIN](ws1.ws_order_number = web_returns.wr_order_number) +--------------------PhysicalDistribute +----------------------PhysicalProject +------------------------hashJoin[INNER_JOIN](web_returns.wr_order_number = ws_wh.ws_order_number) --------------------------PhysicalProject ----------------------------CteConsumer[cteId= ( CTEId#3=] ) -------------------------PhysicalDistribute ---------------------------hashJoin[INNER_JOIN](ws1.ws_web_site_sk = web_site.web_site_sk) -----------------------------hashJoin[INNER_JOIN](ws1.ws_ship_addr_sk = customer_address.ca_address_sk) -------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[web_sales] -------------------------------PhysicalDistribute ---------------------------------PhysicalProject -----------------------------------filter((cast(ca_state as VARCHAR(*)) = 'NC')) -------------------------------------PhysicalOlapScan[customer_address] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_returns] +--------------------hashJoin[RIGHT_SEMI_JOIN](ws1.ws_order_number = ws_wh.ws_order_number) +----------------------PhysicalDistribute +------------------------PhysicalProject +--------------------------CteConsumer[cteId= ( CTEId#3=] ) +----------------------PhysicalDistribute +------------------------hashJoin[INNER_JOIN](ws1.ws_web_site_sk = web_site.web_site_sk) +--------------------------hashJoin[INNER_JOIN](ws1.ws_ship_addr_sk = customer_address.ca_address_sk) +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_sales] ----------------------------PhysicalDistribute ------------------------------PhysicalProject ---------------------------------filter((cast(web_company_name as VARCHAR(*)) = 'pri')) -----------------------------------PhysicalOlapScan[web_site] +--------------------------------filter((cast(ca_state as VARCHAR(*)) = 'NC')) +----------------------------------PhysicalOlapScan[customer_address] +--------------------------PhysicalDistribute +----------------------------PhysicalProject +------------------------------filter((cast(web_company_name as VARCHAR(*)) = 'pri')) +--------------------------------PhysicalOlapScan[web_site] +------------------PhysicalDistribute +--------------------PhysicalProject +----------------------filter((date_dim.d_date >= 1999-02-01)(cast(d_date as DATETIMEV2(0)) <= cast(days_add(cast('1999-2-01' as DATEV2), INTERVAL 60 DAY) as DATETIMEV2(0)))) +------------------------PhysicalOlapScan[date_dim]