[improvement](statistics)Analyze all columns when partition first loaded. (#38606)

backport: https://github.com/apache/doris/pull/38540
This commit is contained in:
Jibing-Li
2024-07-31 19:59:41 +08:00
committed by GitHub
parent 66ebf709ba
commit 002232b2da
7 changed files with 198 additions and 115 deletions

View File

@ -44,6 +44,7 @@ public class AnalyzeProperties {
public static final String PROPERTY_PERIOD_SECONDS = "period.seconds";
public static final String PROPERTY_FORCE_FULL = "force.full";
public static final String PROPERTY_PARTITION_COLUMN_FROM_SQL = "partition.column.from.sql";
public static final String PROPERTY_USE_AUTO_ANALYZER = "use.auto.analyzer";
public static final AnalyzeProperties DEFAULT_PROP = new AnalyzeProperties(new HashMap<String, String>() {
{
@ -72,6 +73,7 @@ public class AnalyzeProperties {
.add(PROPERTY_PERIOD_CRON)
.add(PROPERTY_FORCE_FULL)
.add(PROPERTY_PARTITION_COLUMN_FROM_SQL)
.add(PROPERTY_USE_AUTO_ANALYZER)
.build();
public AnalyzeProperties(Map<String, String> properties) {

View File

@ -1304,6 +1304,10 @@ public class OlapTable extends Table implements MTMVRelatedTableIf {
return true;
}
// Check new partition first loaded.
if (tblStats.newPartitionLoaded != null && tblStats.newPartitionLoaded.get()) {
return true;
}
// 1 Check row count.
long currentRowCount = getRowCount();
long lastAnalyzeRowCount = tblStats.rowCount;

View File

@ -204,7 +204,21 @@ public class AnalysisManager implements Writable {
}
// Each analyze stmt corresponding to an analysis job.
public void createAnalysisJob(AnalyzeTblStmt stmt, boolean proxy) throws DdlException {
public void createAnalysisJob(AnalyzeTblStmt stmt, boolean proxy) throws DdlException, AnalysisException {
// Using auto analyzer if user specifies.
if (stmt.getAnalyzeProperties().getProperties().containsKey("use.auto.analyzer")) {
StatisticsAutoCollector autoCollector = Env.getCurrentEnv().getStatisticsAutoCollector();
if (autoCollector.skip(stmt.getTable())) {
return;
}
List<AnalysisInfo> jobs = new ArrayList<>();
autoCollector.createAnalyzeJobForTbl(stmt.getDb(), jobs, stmt.getTable());
AnalysisInfo job = autoCollector.getNeedAnalyzeColumns(jobs.get(0));
if (job != null) {
Env.getCurrentEnv().getStatisticsAutoCollector().createSystemAnalysisJob(job);
}
return;
}
AnalysisInfo jobInfo = buildAndAssignJob(stmt);
if (jobInfo == null) {
return;

View File

@ -21,7 +21,6 @@ import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DatabaseIf;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.Partition;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
@ -41,7 +40,6 @@ import org.apache.logging.log4j.Logger;
import java.time.LocalTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.StringJoiner;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@ -221,11 +219,6 @@ public class StatisticsAutoCollector extends StatisticsCollector {
if (table.needReAnalyzeTable(tblStats)) {
needRunColumns = table.getColumnIndexPairs(table.getSchemaAllIndexes(false)
.stream().map(Column::getName).collect(Collectors.toSet()));
} else if (table instanceof OlapTable && tblStats.newPartitionLoaded.get()) {
OlapTable olapTable = (OlapTable) table;
Set<String> partitionNames = olapTable.getAllPartitions().stream()
.map(Partition::getName).collect(Collectors.toSet());
needRunColumns = olapTable.getColumnIndexPairs(partitionNames);
}
if (needRunColumns == null || needRunColumns.isEmpty()) {

View File

@ -19,7 +19,6 @@ package org.apache.doris.statistics;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.common.Pair;
import org.apache.doris.common.io.Text;
@ -160,14 +159,6 @@ public class TableStatsMeta implements Writable, GsonPostProcessable {
updatedRows.set(0);
newPartitionLoaded.set(false);
}
if (tableIf instanceof OlapTable) {
PartitionInfo partitionInfo = ((OlapTable) tableIf).getPartitionInfo();
if (partitionInfo != null && analyzedJob.jobColumns
.containsAll(tableIf.getColumnIndexPairs(partitionInfo.getPartitionColumns().stream()
.map(Column::getName).collect(Collectors.toSet())))) {
newPartitionLoaded.set(false);
}
}
}
}
@ -180,6 +171,9 @@ public class TableStatsMeta implements Writable, GsonPostProcessable {
if (indexesRowCount == null) {
indexesRowCount = new ConcurrentHashMap<>();
}
if (newPartitionLoaded == null) {
newPartitionLoaded = new AtomicBoolean(false);
}
}
public long getRowCount(long indexId) {