[opt](statistics) create or update table stats after alter column stats.

Create or update table stats after alter column stats.
Set flag to disable auto analyze for the table after user inject column stats.
This commit is contained in:
Jibing-Li
2024-01-09 16:06:08 +08:00
committed by yiguolei
parent abb7640d37
commit e4707154fa
9 changed files with 81 additions and 3 deletions

View File

@ -55,6 +55,7 @@ public class ShowTableStatsStmt extends ShowStmt {
.add("columns")
.add("trigger")
.add("new_partition")
.add("user_inject")
.build();
private final TableName tableName;
@ -151,6 +152,7 @@ public class ShowTableStatsStmt extends ShowStmt {
row.add(tableStatistic.analyzeColumns().toString());
row.add(tableStatistic.jobType.toString());
row.add(String.valueOf(tableStatistic.newPartitionLoaded.get()));
row.add(String.valueOf(tableStatistic.userInjected));
result.add(row);
return new ShowResultSet(getMetaData(), result);
}

View File

@ -198,6 +198,8 @@ public class AnalysisInfo implements Writable {
*/
public final long tblUpdateTime;
public final boolean userInject;
public AnalysisInfo(long jobId, long taskId, List<Long> taskIds, long catalogId, long dbId, long tblId,
Map<String, Set<String>> colToPartitions, Set<String> partitionNames, String colName, Long indexId,
JobType jobType, AnalysisMode analysisMode, AnalysisMethod analysisMethod, AnalysisType analysisType,
@ -205,7 +207,7 @@ public class AnalysisInfo implements Writable {
long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType,
boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition,
boolean isAllPartition, long partitionCount, CronExpression cronExpression, boolean forceFull,
boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob) {
boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob, boolean userInject) {
this.jobId = jobId;
this.taskId = taskId;
this.taskIds = taskIds;
@ -242,6 +244,7 @@ public class AnalysisInfo implements Writable {
this.usingSqlForPartitionColumn = usingSqlForPartitionColumn;
this.tblUpdateTime = tblUpdateTime;
this.emptyJob = emptyJob;
this.userInject = userInject;
}
@Override

View File

@ -63,6 +63,7 @@ public class AnalysisInfoBuilder {
private boolean usingSqlForPartitionColumn;
private long tblUpdateTime;
private boolean emptyJob;
private boolean userInject;
public AnalysisInfoBuilder() {
}
@ -101,6 +102,7 @@ public class AnalysisInfoBuilder {
usingSqlForPartitionColumn = info.usingSqlForPartitionColumn;
tblUpdateTime = info.tblUpdateTime;
emptyJob = info.emptyJob;
userInject = info.userInject;
}
public AnalysisInfoBuilder setJobId(long jobId) {
@ -268,12 +270,17 @@ public class AnalysisInfoBuilder {
return this;
}
public AnalysisInfoBuilder setUserInject(boolean userInject) {
this.userInject = userInject;
return this;
}
public AnalysisInfo build() {
return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, tblId, colToPartitions, partitionNames,
colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent,
sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType,
externalTableLevelTask, partitionOnly, samplingPartition, isAllPartition, partitionCount,
cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob);
cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob, userInject);
}
}

View File

@ -560,6 +560,17 @@ public class AnalysisManager implements Writable {
}
}
@VisibleForTesting
public void updateTableStatsForAlterStats(AnalysisInfo jobInfo, TableIf tbl) {
TableStatsMeta tableStats = findTableStatsStatus(tbl.getId());
if (tableStats == null) {
updateTableStatsStatus(new TableStatsMeta(0, jobInfo, tbl));
} else {
tableStats.update(jobInfo, tbl);
logCreateTableStats(tableStats);
}
}
public List<AnalysisInfo> showAnalysisJob(ShowAnalyzeStmt stmt) {
return findShowAnalyzeResult(analysisJobInfoMap.values(), stmt);
}
@ -654,6 +665,7 @@ public class AnalysisManager implements Writable {
}
tableStats.updatedTime = 0;
}
tableStats.userInjected = false;
logCreateTableStats(tableStats);
StatisticsRepository.dropStatistics(tblId, cols);
}

View File

@ -158,6 +158,9 @@ public class StatisticsAutoCollector extends StatisticsCollector {
if (tableStats == null || tableStats.newPartitionLoaded.get()) {
return false;
}
if (tableStats.userInjected) {
return true;
}
return System.currentTimeMillis()
- tableStats.updatedTime < StatisticsUtil.getHugeTableAutoAnalyzeIntervalInMillis();
}

View File

@ -301,6 +301,14 @@ public class StatisticsRepository {
StatisticsUtil.execUpdate(INSERT_INTO_COLUMN_STATISTICS, params);
Env.getCurrentEnv().getStatisticsCache()
.updateColStatsCache(objects.table.getId(), -1, colName, columnStatistic);
AnalysisInfo mockedJobInfo = new AnalysisInfoBuilder()
.setTblUpdateTime(System.currentTimeMillis())
.setColName("")
.setColToPartitions(Maps.newHashMap())
.setUserInject(true)
.setJobType(AnalysisInfo.JobType.MANUAL)
.build();
Env.getCurrentEnv().getAnalysisManager().updateTableStatsForAlterStats(mockedJobInfo, objects.table);
} else {
// update partition granularity statistics
for (Long partitionId : partitionIds) {

View File

@ -72,6 +72,9 @@ public class TableStatsMeta implements Writable {
@SerializedName("newPartitionLoaded")
public AtomicBoolean newPartitionLoaded = new AtomicBoolean(false);
@SerializedName("userInjected")
public boolean userInjected;
@VisibleForTesting
public TableStatsMeta() {
tblId = 0;
@ -130,13 +133,15 @@ public class TableStatsMeta implements Writable {
public void update(AnalysisInfo analyzedJob, TableIf tableIf) {
updatedTime = analyzedJob.tblUpdateTime;
userInjected = analyzedJob.userInject;
String colNameStr = analyzedJob.colName;
// colName field AnalyzeJob's format likes: "[col1, col2]", we need to remove brackets here
// TODO: Refactor this later
if (analyzedJob.colName.startsWith("[") && analyzedJob.colName.endsWith("]")) {
colNameStr = colNameStr.substring(1, colNameStr.length() - 1);
}
List<String> cols = Arrays.stream(colNameStr.split(",")).map(String::trim).collect(Collectors.toList());
List<String> cols = Arrays.stream(colNameStr.split(","))
.map(String::trim).filter(s -> !s.isEmpty()).collect(Collectors.toList());
for (String col : cols) {
ColStatsMeta colStatsMeta = colNameToColStatsMeta.get(col);
if (colStatsMeta == null) {