[fix](statistics)Add row count to AnalysisJob and update tableStats using this row count after analyze. (#38428)

Add row count to AnalysisJob and update tableStats using this row count
after analyze.
Take row count to consideration in stats health calculation.
This commit is contained in:
Jibing-Li
2024-07-27 11:32:29 +08:00
committed by GitHub
parent ee65195ba5
commit 2b82bec406
10 changed files with 114 additions and 57 deletions

View File

@ -1300,12 +1300,33 @@ public class OlapTable extends Table implements MTMVRelatedTableIf {
.collect(Collectors.toSet())))) {
return true;
}
long rowCount = getRowCount();
if (rowCount > 0 && tblStats.rowCount == 0) {
// 1 Check row count.
long currentRowCount = getRowCount();
long lastAnalyzeRowCount = tblStats.rowCount;
// 1.1 Empty table -> non-empty table. Need analyze.
if (currentRowCount != 0 && lastAnalyzeRowCount == 0) {
return true;
}
// 1.2 Non-empty table -> empty table. Need analyze;
if (currentRowCount == 0 && lastAnalyzeRowCount != 0) {
return true;
}
// 1.3 Table is still empty. Not need to analyze. lastAnalyzeRowCount == 0 is always true here.
if (currentRowCount == 0) {
return false;
}
// 1.4 If row count changed more than the threshold, need analyze.
// lastAnalyzeRowCount == 0 is always false here.
double changeRate =
((double) Math.abs(currentRowCount - lastAnalyzeRowCount) / lastAnalyzeRowCount) * 100.0;
if (changeRate > (100 - StatisticsUtil.getTableStatsHealthThreshold())) {
return true;
}
// 2. Check update rows.
long updateRows = tblStats.updatedRows.get();
int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
int tblHealth = StatisticsUtil.getTableHealth(currentRowCount, updateRows);
return tblHealth < StatisticsUtil.getTableStatsHealthThreshold();
}

View File

@ -190,6 +190,9 @@ public class AnalysisInfo implements Writable {
@SerializedName("emptyJob")
public final boolean emptyJob;
@SerializedName("rowCount")
public final long rowCount;
/**
*
* Used to store the newest partition version of tbl when creating this job.
@ -206,7 +209,8 @@ public class AnalysisInfo implements Writable {
long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType,
boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition,
boolean isAllPartition, long partitionCount, CronExpression cronExpression, boolean forceFull,
boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob, boolean userInject) {
boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob, boolean userInject,
long rowCount) {
this.jobId = jobId;
this.taskId = taskId;
this.taskIds = taskIds;
@ -244,6 +248,7 @@ public class AnalysisInfo implements Writable {
this.tblUpdateTime = tblUpdateTime;
this.emptyJob = emptyJob;
this.userInject = userInject;
this.rowCount = rowCount;
}
@Override

View File

@ -64,6 +64,7 @@ public class AnalysisInfoBuilder {
private long tblUpdateTime;
private boolean emptyJob;
private boolean userInject;
private long rowCount;
public AnalysisInfoBuilder() {
}
@ -103,6 +104,7 @@ public class AnalysisInfoBuilder {
tblUpdateTime = info.tblUpdateTime;
emptyJob = info.emptyJob;
userInject = info.userInject;
rowCount = info.rowCount;
}
public AnalysisInfoBuilder setJobId(long jobId) {
@ -275,12 +277,17 @@ public class AnalysisInfoBuilder {
return this;
}
public AnalysisInfoBuilder setRowCount(long rowCount) {
this.rowCount = rowCount;
return this;
}
public AnalysisInfo build() {
return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, tblId, jobColumns, partitionNames,
colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent,
sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType,
externalTableLevelTask, partitionOnly, samplingPartition, isAllPartition, partitionCount,
cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob, userInject);
cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob, userInject, rowCount);
}
}

View File

@ -354,6 +354,8 @@ public class AnalysisManager implements Writable {
infoBuilder.setTblUpdateTime(table.getUpdateTime());
infoBuilder.setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0
&& analysisMethod.equals(AnalysisMethod.SAMPLE));
long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 : table.getRowCount();
infoBuilder.setRowCount(rowCount);
return infoBuilder.build();
}

View File

@ -19,6 +19,7 @@ package org.apache.doris.statistics;
import org.apache.doris.analysis.CreateMaterializedViewStmt;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.MaterializedIndex;
import org.apache.doris.catalog.MaterializedIndexMeta;
@ -37,7 +38,6 @@ import org.apache.commons.text.StringSubstitutor;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
@ -69,10 +69,16 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
return;
}
List<Pair<String, String>> columnList = info.jobColumns;
if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod) || columnList == null || columnList.isEmpty()) {
if (columnList == null || columnList.isEmpty()) {
LOG.warn("Table {}.{}.{}, jobColumns is null or empty.", info.catalogId, info.dbId, info.tblId);
throw new RuntimeException();
}
if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod)) {
StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId,
info.tblId, info.indexId, info.colName, null);
job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
ColStatsData colStatsData = new ColStatsData(statsId);
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
job.appendBuf(this, Collections.singletonList(colStatsData));
return;
}
if (tableSample != null) {

View File

@ -181,6 +181,7 @@ public class StatisticsAutoCollector extends StatisticsCollector {
List<AnalysisInfo> analysisInfos, TableIf table) {
AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
? AnalysisMethod.SAMPLE : AnalysisMethod.FULL;
long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 : table.getRowCount();
AnalysisInfo jobInfo = new AnalysisInfoBuilder()
.setJobId(Env.getCurrentEnv().getNextId())
.setCatalogId(db.getCatalog().getId())
@ -200,6 +201,7 @@ public class StatisticsAutoCollector extends StatisticsCollector {
.setTblUpdateTime(table.getUpdateTime())
.setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0
&& analysisMethod.equals(AnalysisMethod.SAMPLE))
.setRowCount(rowCount)
.build();
analysisInfos.add(jobInfo);
}

View File

@ -141,7 +141,7 @@ public class TableStatsMeta implements Writable {
jobType = analyzedJob.jobType;
if (tableIf != null) {
if (tableIf instanceof OlapTable) {
rowCount = analyzedJob.emptyJob ? 0 : tableIf.getRowCount();
rowCount = analyzedJob.rowCount;
}
if (analyzedJob.emptyJob) {
return;