From 98956dfa199dcc460a015c6ff4e36125f9d64a6a Mon Sep 17 00:00:00 2001 From: Kikyou1997 <33112463+Kikyou1997@users.noreply.github.com> Date: Thu, 17 Nov 2022 20:18:14 +0800 Subject: [PATCH] [fix](statistics) statistics inaccurate after analyze same table more than once (#14279) If a table already been analyzed, then we analyze it again, the new statistics would larger than expected since the incremental would contain the values from table level statistics since the SQL lack the predication for the nullability of part_id --- .../src/main/java/org/apache/doris/statistics/AnalysisJob.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java index d34f030f87..83ac6e11a9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisJob.java @@ -124,7 +124,8 @@ public class AnalysisJob { + " FROM ${internalDB}.${columnStatTbl}" + " WHERE ${internalDB}.${columnStatTbl}.db_id = '${dbId}' AND " + " ${internalDB}.${columnStatTbl}.tbl_id='${tblId}' AND " - + " ${internalDB}.${columnStatTbl}.col_id='${colId}'" + + " ${internalDB}.${columnStatTbl}.col_id='${colId}' AND " + + " ${internalDB}.${columnStatTbl}.part_id IS NOT NULL" + " ) t1, \n" + " (SELECT NDV(${colName}) AS ndv FROM `${dbName}`.`${tblName}`) t2\n";