From 668a68967ca0de175bb5840778d250b0a6116dc2 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Fri, 19 Jan 2024 10:47:15 +0800 Subject: [PATCH] [fix](statistics)Reanalyze olapTable if getRowCount is not 0 and last time row count is 0 (#30096) Sample analyze may write 0 result if getRowCount is not updated while analyzing. So we need to reanalyze the table if getRowCount > 0 and previous analyze row count is 0. Otherwise the stats for this table may stay 0 for ever before user load new data to this table. --- .../src/main/java/org/apache/doris/common/Config.java | 2 +- .../src/main/java/org/apache/doris/catalog/OlapTable.java | 3 +++ .../java/org/apache/doris/statistics/AnalysisManager.java | 2 +- .../apache/doris/statistics/StatisticsAutoCollector.java | 2 +- .../java/org/apache/doris/statistics/TableStatsMeta.java | 2 +- .../org/apache/doris/statistics/AnalysisManagerTest.java | 7 ++++++- 6 files changed, 13 insertions(+), 5 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 01fd359689..00d3ebb8a9 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1565,7 +1565,7 @@ public class Config extends ConfigBase { "This parameter controls the time interval for automatic collection jobs to check the health of table" + "statistics and trigger automatic collection" }) - public static int auto_check_statistics_in_minutes = 10; + public static int auto_check_statistics_in_minutes = 5; /** * If set to TRUE, the compaction slower replica will be skipped when select get queryable replicas diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 3236fc09ec..b9479aeddf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -1193,6 +1193,9 @@ public class OlapTable extends Table { return true; } long rowCount = getRowCount(); + if (rowCount > 0 && tblStats.rowCount == 0) { + return true; + } long updateRows = tblStats.updatedRows.get(); int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows); return tblHealth < StatisticsUtil.getTableStatsHealthThreshold(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java index 2e12b44339..8f927694dc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/AnalysisManager.java @@ -545,7 +545,7 @@ public class AnalysisManager implements Writable { } TableStatsMeta tableStats = findTableStatsStatus(tbl.getId()); if (tableStats == null) { - updateTableStatsStatus(new TableStatsMeta(tbl.estimatedRowCount(), jobInfo, tbl)); + updateTableStatsStatus(new TableStatsMeta(jobInfo.emptyJob ? 0 : tbl.estimatedRowCount(), jobInfo, tbl)); } else { tableStats.update(jobInfo, tbl); logCreateTableStats(tableStats); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java index e2eeb21aad..a09b56acfc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticsAutoCollector.java @@ -209,7 +209,7 @@ public class StatisticsAutoCollector extends StatisticsCollector { @VisibleForTesting protected AnalysisInfo getReAnalyzeRequiredPart(AnalysisInfo jobInfo) { TableIf table = StatisticsUtil.findTable(jobInfo.catalogId, jobInfo.dbId, jobInfo.tblId); - // Skip tables that are too width. + // Skip tables that are too wide. if (table.getBaseSchema().size() > StatisticsUtil.getAutoAnalyzeTableWidthThreshold()) { return null; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java index 926194a725..9231c6a2bc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/TableStatsMeta.java @@ -157,7 +157,7 @@ public class TableStatsMeta implements Writable { jobType = analyzedJob.jobType; if (tableIf != null) { if (tableIf instanceof OlapTable) { - rowCount = tableIf.getRowCount(); + rowCount = analyzedJob.emptyJob ? 0 : tableIf.getRowCount(); } if (!analyzedJob.emptyJob && analyzedJob.colToPartitions.keySet() .containsAll(tableIf.getBaseSchema().stream() diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java index 2fc6d24e30..f8a77fe06d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/AnalysisManagerTest.java @@ -275,7 +275,7 @@ public class AnalysisManagerTest { new MockUp() { int count = 0; - int[] rowCount = new int[]{100, 100, 200, 200}; + int[] rowCount = new int[]{100, 100, 200, 200, 1, 1}; final Column c = new Column("col1", PrimitiveType.INT); @Mock @@ -304,6 +304,11 @@ public class AnalysisManagerTest { .setColToPartitions(new HashMap<>()).setColName("col1").build(), olapTable); stats2.updatedRows.addAndGet(20); Assertions.assertFalse(olapTable.needReAnalyzeTable(stats2)); + + TableStatsMeta stats3 = new TableStatsMeta(0, new AnalysisInfoBuilder() + .setColToPartitions(new HashMap<>()).setEmptyJob(true).setColName("col1").build(), olapTable); + Assertions.assertTrue(olapTable.needReAnalyzeTable(stats3)); + } @Test