[Fix](statistics) Fix partition name NPE and sample for all table during auto analyze (#28916)

Fix partition name NPE and sample for all table during auto analyze.
Sample for all tables because getData may have latency, which may cause full analyze a huge table and use too much resource. Sample for all tables to avoid this. Will improve the strategy later.
This commit is contained in:
Jibing-Li
2023-12-24 01:49:49 +08:00
committed by GitHub
parent 13a3550d8e
commit 5505fa3755
7 changed files with 11 additions and 7 deletions

View File

@ -1422,7 +1422,7 @@ public class SessionVariable implements Serializable, Writable {
+ "When enable_auto_sample is enabled, tables"
+ "larger than this value will automatically collect "
+ "statistics through sampling"})
public long hugeTableLowerBoundSizeInBytes = 5L * 1024 * 1024 * 1024;
public long hugeTableLowerBoundSizeInBytes = 0;
@VariableMgr.VarAttr(name = HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, flag = VariableMgr.GLOBAL,
description = {"控制对大表的自动ANALYZE的最小时间间隔,"

View File

@ -61,7 +61,11 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
public void doExecute() throws Exception {
Set<String> partitionNames = info.colToPartitions.get(info.colName);
if (partitionNames.isEmpty()) {
if (partitionNames == null || partitionNames.isEmpty()) {
if (partitionNames == null) {
LOG.warn("Table {}.{}.{}, partitionNames for column {} is null. ColToPartitions:[{}]",
info.catalogId, info.dbId, info.tblId, info.colName, info.colToPartitions);
}
StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId,
info.tblId, info.indexId, info.colName, null);
job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));

View File

@ -86,7 +86,7 @@ public class StatisticConstants {
public static final int INSERT_MERGE_ITEM_COUNT = 200;
public static final long HUGE_TABLE_DEFAULT_SAMPLE_ROWS = 4194304;
public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 5L * 1024 * 1024 * 1024;
public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 0;
public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = TimeUnit.HOURS.toMillis(12);

View File

@ -148,7 +148,7 @@ public class StatisticsAutoCollector extends StatisticsCollector {
protected void createAnalyzeJobForTbl(DatabaseIf<? extends TableIf> db,
List<AnalysisInfo> analysisInfos, TableIf table) {
AnalysisMethod analysisMethod = table.getDataSize(true) > StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
? AnalysisMethod.SAMPLE : AnalysisMethod.FULL;
AnalysisInfo jobInfo = new AnalysisInfoBuilder()
.setJobId(Env.getCurrentEnv().getNextId())

View File

@ -86,7 +86,7 @@ public class HMSAnalysisTaskTest {
new MockUp<HMSExternalTable>() {
@Mock
public long getDataSize(boolean singleReplica) {
return 1000;
return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
}
};
HMSAnalysisTask task = new HMSAnalysisTask();

View File

@ -101,7 +101,7 @@ public class OlapAnalysisTaskTest {
@Mock
public long getDataSize(boolean singleReplica) {
return 1000;
return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
}
};

View File

@ -346,7 +346,7 @@ public class StatisticsAutoCollectorTest {
@Mock
public long getDataSize(boolean singleReplica) {
return 1000;
return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
}
@Mock