[fix](statistics)Add row count to AnalysisJob and update tableStats using this row count after analyze. (#38428)
Add row count to AnalysisJob and update tableStats using this row count after analyze. Take row count to consideration in stats health calculation.
This commit is contained in:
@ -1300,12 +1300,33 @@ public class OlapTable extends Table implements MTMVRelatedTableIf {
|
||||
.collect(Collectors.toSet())))) {
|
||||
return true;
|
||||
}
|
||||
long rowCount = getRowCount();
|
||||
if (rowCount > 0 && tblStats.rowCount == 0) {
|
||||
|
||||
// 1 Check row count.
|
||||
long currentRowCount = getRowCount();
|
||||
long lastAnalyzeRowCount = tblStats.rowCount;
|
||||
// 1.1 Empty table -> non-empty table. Need analyze.
|
||||
if (currentRowCount != 0 && lastAnalyzeRowCount == 0) {
|
||||
return true;
|
||||
}
|
||||
// 1.2 Non-empty table -> empty table. Need analyze;
|
||||
if (currentRowCount == 0 && lastAnalyzeRowCount != 0) {
|
||||
return true;
|
||||
}
|
||||
// 1.3 Table is still empty. Not need to analyze. lastAnalyzeRowCount == 0 is always true here.
|
||||
if (currentRowCount == 0) {
|
||||
return false;
|
||||
}
|
||||
// 1.4 If row count changed more than the threshold, need analyze.
|
||||
// lastAnalyzeRowCount == 0 is always false here.
|
||||
double changeRate =
|
||||
((double) Math.abs(currentRowCount - lastAnalyzeRowCount) / lastAnalyzeRowCount) * 100.0;
|
||||
if (changeRate > (100 - StatisticsUtil.getTableStatsHealthThreshold())) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// 2. Check update rows.
|
||||
long updateRows = tblStats.updatedRows.get();
|
||||
int tblHealth = StatisticsUtil.getTableHealth(rowCount, updateRows);
|
||||
int tblHealth = StatisticsUtil.getTableHealth(currentRowCount, updateRows);
|
||||
return tblHealth < StatisticsUtil.getTableStatsHealthThreshold();
|
||||
}
|
||||
|
||||
|
||||
@ -190,6 +190,9 @@ public class AnalysisInfo implements Writable {
|
||||
|
||||
@SerializedName("emptyJob")
|
||||
public final boolean emptyJob;
|
||||
|
||||
@SerializedName("rowCount")
|
||||
public final long rowCount;
|
||||
/**
|
||||
*
|
||||
* Used to store the newest partition version of tbl when creating this job.
|
||||
@ -206,7 +209,8 @@ public class AnalysisInfo implements Writable {
|
||||
long lastExecTimeInMs, long timeCostInMs, AnalysisState state, ScheduleType scheduleType,
|
||||
boolean isExternalTableLevelTask, boolean partitionOnly, boolean samplingPartition,
|
||||
boolean isAllPartition, long partitionCount, CronExpression cronExpression, boolean forceFull,
|
||||
boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob, boolean userInject) {
|
||||
boolean usingSqlForPartitionColumn, long tblUpdateTime, boolean emptyJob, boolean userInject,
|
||||
long rowCount) {
|
||||
this.jobId = jobId;
|
||||
this.taskId = taskId;
|
||||
this.taskIds = taskIds;
|
||||
@ -244,6 +248,7 @@ public class AnalysisInfo implements Writable {
|
||||
this.tblUpdateTime = tblUpdateTime;
|
||||
this.emptyJob = emptyJob;
|
||||
this.userInject = userInject;
|
||||
this.rowCount = rowCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -64,6 +64,7 @@ public class AnalysisInfoBuilder {
|
||||
private long tblUpdateTime;
|
||||
private boolean emptyJob;
|
||||
private boolean userInject;
|
||||
private long rowCount;
|
||||
|
||||
public AnalysisInfoBuilder() {
|
||||
}
|
||||
@ -103,6 +104,7 @@ public class AnalysisInfoBuilder {
|
||||
tblUpdateTime = info.tblUpdateTime;
|
||||
emptyJob = info.emptyJob;
|
||||
userInject = info.userInject;
|
||||
rowCount = info.rowCount;
|
||||
}
|
||||
|
||||
public AnalysisInfoBuilder setJobId(long jobId) {
|
||||
@ -275,12 +277,17 @@ public class AnalysisInfoBuilder {
|
||||
return this;
|
||||
}
|
||||
|
||||
public AnalysisInfoBuilder setRowCount(long rowCount) {
|
||||
this.rowCount = rowCount;
|
||||
return this;
|
||||
}
|
||||
|
||||
public AnalysisInfo build() {
|
||||
return new AnalysisInfo(jobId, taskId, taskIds, catalogId, dbId, tblId, jobColumns, partitionNames,
|
||||
colName, indexId, jobType, analysisMode, analysisMethod, analysisType, samplePercent,
|
||||
sampleRows, maxBucketNum, periodTimeInMs, message, lastExecTimeInMs, timeCostInMs, state, scheduleType,
|
||||
externalTableLevelTask, partitionOnly, samplingPartition, isAllPartition, partitionCount,
|
||||
cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob, userInject);
|
||||
cronExpression, forceFull, usingSqlForPartitionColumn, tblUpdateTime, emptyJob, userInject, rowCount);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -354,6 +354,8 @@ public class AnalysisManager implements Writable {
|
||||
infoBuilder.setTblUpdateTime(table.getUpdateTime());
|
||||
infoBuilder.setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0
|
||||
&& analysisMethod.equals(AnalysisMethod.SAMPLE));
|
||||
long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 : table.getRowCount();
|
||||
infoBuilder.setRowCount(rowCount);
|
||||
return infoBuilder.build();
|
||||
}
|
||||
|
||||
|
||||
@ -19,6 +19,7 @@ package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.analysis.CreateMaterializedViewStmt;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.KeysType;
|
||||
import org.apache.doris.catalog.MaterializedIndex;
|
||||
import org.apache.doris.catalog.MaterializedIndexMeta;
|
||||
@ -37,7 +38,6 @@ import org.apache.commons.text.StringSubstitutor;
|
||||
|
||||
import java.security.SecureRandom;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
@ -69,10 +69,16 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
return;
|
||||
}
|
||||
List<Pair<String, String>> columnList = info.jobColumns;
|
||||
if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod) || columnList == null || columnList.isEmpty()) {
|
||||
if (columnList == null || columnList.isEmpty()) {
|
||||
LOG.warn("Table {}.{}.{}, jobColumns is null or empty.", info.catalogId, info.dbId, info.tblId);
|
||||
throw new RuntimeException();
|
||||
}
|
||||
if (StatisticsUtil.isEmptyTable(tbl, info.analysisMethod)) {
|
||||
StatsId statsId = new StatsId(concatColumnStatsId(), info.catalogId, info.dbId,
|
||||
info.tblId, info.indexId, info.colName, null);
|
||||
job.appendBuf(this, Arrays.asList(new ColStatsData(statsId)));
|
||||
ColStatsData colStatsData = new ColStatsData(statsId);
|
||||
Env.getCurrentEnv().getStatisticsCache().syncColStats(colStatsData);
|
||||
job.appendBuf(this, Collections.singletonList(colStatsData));
|
||||
return;
|
||||
}
|
||||
if (tableSample != null) {
|
||||
|
||||
@ -181,6 +181,7 @@ public class StatisticsAutoCollector extends StatisticsCollector {
|
||||
List<AnalysisInfo> analysisInfos, TableIf table) {
|
||||
AnalysisMethod analysisMethod = table.getDataSize(true) >= StatisticsUtil.getHugeTableLowerBoundSizeInBytes()
|
||||
? AnalysisMethod.SAMPLE : AnalysisMethod.FULL;
|
||||
long rowCount = StatisticsUtil.isEmptyTable(table, analysisMethod) ? 0 : table.getRowCount();
|
||||
AnalysisInfo jobInfo = new AnalysisInfoBuilder()
|
||||
.setJobId(Env.getCurrentEnv().getNextId())
|
||||
.setCatalogId(db.getCatalog().getId())
|
||||
@ -200,6 +201,7 @@ public class StatisticsAutoCollector extends StatisticsCollector {
|
||||
.setTblUpdateTime(table.getUpdateTime())
|
||||
.setEmptyJob(table instanceof OlapTable && table.getRowCount() == 0
|
||||
&& analysisMethod.equals(AnalysisMethod.SAMPLE))
|
||||
.setRowCount(rowCount)
|
||||
.build();
|
||||
analysisInfos.add(jobInfo);
|
||||
}
|
||||
|
||||
@ -141,7 +141,7 @@ public class TableStatsMeta implements Writable {
|
||||
jobType = analyzedJob.jobType;
|
||||
if (tableIf != null) {
|
||||
if (tableIf instanceof OlapTable) {
|
||||
rowCount = analyzedJob.emptyJob ? 0 : tableIf.getRowCount();
|
||||
rowCount = analyzedJob.rowCount;
|
||||
}
|
||||
if (analyzedJob.emptyJob) {
|
||||
return;
|
||||
|
||||
Reference in New Issue
Block a user