[Fix](statistics) Fix partition name NPE and sample for all table during auto analyze (#28916)
Fix partition name NPE and sample for all table during auto analyze. Sample for all tables because getData may have latency, which may cause full analyze a huge table and use too much resource. Sample for all tables to avoid this. Will improve the strategy later.
This commit is contained in:
@ -1422,7 +1422,7 @@ public class SessionVariable implements Serializable, Writable {
|
||||
+ "When enable_auto_sample is enabled, tables"
|
||||
+ "larger than this value will automatically collect "
|
||||
+ "statistics through sampling"})
|
||||
public long hugeTableLowerBoundSizeInBytes = 5L * 1024 * 1024 * 1024;
|
||||
public long hugeTableLowerBoundSizeInBytes = 0;
|
||||
|
||||
@VariableMgr.VarAttr(name = HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS, flag = VariableMgr.GLOBAL,
|
||||
description = {"控制对大表的自动ANALYZE的最小时间间隔,"
|
||||
|
||||
@ -61,7 +61,11 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
|
||||
public void doExecute() throws Exception {
|
||||
Set<String> partitionNames = info.colToPartitions.get(info.colName);
|
||||
if (partitionNames.isEmpty()) {
|
||||
if (partitionNames == null || partitionNames.isEmpty()) {
|
||||
if (partitionNames == null) {
|
||||
LOG.warn("Table {}.{}.{}, partitionNames for column {} is null. ColToPartitions:[{}]",
|
||||
info.catalogId, info.dbId, info.tblId, info.colName, info.colToPartitions);
|
||||
}
|
||||
StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId,
|
||||
info.tblId, info.indexId, info.colName, null);
|
||||
job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
|
||||
|
||||
@ -86,7 +86,7 @@ public class StatisticConstants {
|
||||
public static final int INSERT_MERGE_ITEM_COUNT = 200;
|
||||
|
||||
public static final long HUGE_TABLE_DEFAULT_SAMPLE_ROWS = 4194304;
|
||||
public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 5L * 1024 * 1024 * 1024;
|
||||
public static final long HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = 0;
|
||||
|
||||
public static final long HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS = TimeUnit.HOURS.toMillis(12);
|
||||
|
||||
|
||||
@ -148,7 +148,7 @@ public class StatisticsAutoCollector extends StatisticsCollector {
|
||||
|
||||
protected void createAnalyzeJobForTbl(DatabaseIf<? extends TableIf> db,
|
||||
List<AnalysisInfo> analysisInfos, TableIf table) {
|
||||
AnalysisMethod analysisMethod = table.getDataSize(true) > StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
|
||||
AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
|
||||
? AnalysisMethod.SAMPLE : AnalysisMethod.FULL;
|
||||
AnalysisInfo jobInfo = new AnalysisInfoBuilder()
|
||||
.setJobId(Env.getCurrentEnv().getNextId())
|
||||
|
||||
@ -86,7 +86,7 @@ public class HMSAnalysisTaskTest {
|
||||
new MockUp<HMSExternalTable>() {
|
||||
@Mock
|
||||
public long getDataSize(boolean singleReplica) {
|
||||
return 1000;
|
||||
return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
|
||||
}
|
||||
};
|
||||
HMSAnalysisTask task = new HMSAnalysisTask();
|
||||
|
||||
@ -101,7 +101,7 @@ public class OlapAnalysisTaskTest {
|
||||
|
||||
@Mock
|
||||
public long getDataSize(boolean singleReplica) {
|
||||
return 1000;
|
||||
return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -346,7 +346,7 @@ public class StatisticsAutoCollectorTest {
|
||||
|
||||
@Mock
|
||||
public long getDataSize(boolean singleReplica) {
|
||||
return 1000;
|
||||
return StatisticsUtil.getHugeTableLowerBoundSizeInBytes() - 1;
|
||||
}
|
||||
|
||||
@Mock
|
||||
|
||||
Reference in New Issue
Block a user