[improvement](statistics)Analyze all columns when partition first loaded. (#38606)
backport: https://github.com/apache/doris/pull/38540
This commit is contained in:
@ -44,6 +44,7 @@ public class AnalyzeProperties {
|
||||
public static final String PROPERTY_PERIOD_SECONDS = "period.seconds";
|
||||
public static final String PROPERTY_FORCE_FULL = "force.full";
|
||||
public static final String PROPERTY_PARTITION_COLUMN_FROM_SQL = "partition.column.from.sql";
|
||||
public static final String PROPERTY_USE_AUTO_ANALYZER = "use.auto.analyzer";
|
||||
|
||||
public static final AnalyzeProperties DEFAULT_PROP = new AnalyzeProperties(new HashMap<String, String>() {
|
||||
{
|
||||
@ -72,6 +73,7 @@ public class AnalyzeProperties {
|
||||
.add(PROPERTY_PERIOD_CRON)
|
||||
.add(PROPERTY_FORCE_FULL)
|
||||
.add(PROPERTY_PARTITION_COLUMN_FROM_SQL)
|
||||
.add(PROPERTY_USE_AUTO_ANALYZER)
|
||||
.build();
|
||||
|
||||
public AnalyzeProperties(Map<String, String> properties) {
|
||||
|
||||
@ -1304,6 +1304,10 @@ public class OlapTable extends Table implements MTMVRelatedTableIf {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check new partition first loaded.
|
||||
if (tblStats.newPartitionLoaded != null && tblStats.newPartitionLoaded.get()) {
|
||||
return true;
|
||||
}
|
||||
// 1 Check row count.
|
||||
long currentRowCount = getRowCount();
|
||||
long lastAnalyzeRowCount = tblStats.rowCount;
|
||||
|
||||
@ -204,7 +204,21 @@ public class AnalysisManager implements Writable {
|
||||
}
|
||||
|
||||
// Each analyze stmt corresponding to an analysis job.
|
||||
public void createAnalysisJob(AnalyzeTblStmt stmt, boolean proxy) throws DdlException {
|
||||
public void createAnalysisJob(AnalyzeTblStmt stmt, boolean proxy) throws DdlException, AnalysisException {
|
||||
// Using auto analyzer if user specifies.
|
||||
if (stmt.getAnalyzeProperties().getProperties().containsKey("use.auto.analyzer")) {
|
||||
StatisticsAutoCollector autoCollector = Env.getCurrentEnv().getStatisticsAutoCollector();
|
||||
if (autoCollector.skip(stmt.getTable())) {
|
||||
return;
|
||||
}
|
||||
List<AnalysisInfo> jobs = new ArrayList<>();
|
||||
autoCollector.createAnalyzeJobForTbl(stmt.getDb(), jobs, stmt.getTable());
|
||||
AnalysisInfo job = autoCollector.getNeedAnalyzeColumns(jobs.get(0));
|
||||
if (job != null) {
|
||||
Env.getCurrentEnv().getStatisticsAutoCollector().createSystemAnalysisJob(job);
|
||||
}
|
||||
return;
|
||||
}
|
||||
AnalysisInfo jobInfo = buildAndAssignJob(stmt);
|
||||
if (jobInfo == null) {
|
||||
return;
|
||||
|
||||
@ -21,7 +21,6 @@ import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.DatabaseIf;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.Partition;
|
||||
import org.apache.doris.catalog.TableIf;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.DdlException;
|
||||
@ -41,7 +40,6 @@ import org.apache.logging.log4j.Logger;
|
||||
import java.time.LocalTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.StringJoiner;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.stream.Collectors;
|
||||
@ -221,11 +219,6 @@ public class StatisticsAutoCollector extends StatisticsCollector {
|
||||
if (table.needReAnalyzeTable(tblStats)) {
|
||||
needRunColumns = table.getColumnIndexPairs(table.getSchemaAllIndexes(false)
|
||||
.stream().map(Column::getName).collect(Collectors.toSet()));
|
||||
} else if (table instanceof OlapTable && tblStats.newPartitionLoaded.get()) {
|
||||
OlapTable olapTable = (OlapTable) table;
|
||||
Set<String> partitionNames = olapTable.getAllPartitions().stream()
|
||||
.map(Partition::getName).collect(Collectors.toSet());
|
||||
needRunColumns = olapTable.getColumnIndexPairs(partitionNames);
|
||||
}
|
||||
|
||||
if (needRunColumns == null || needRunColumns.isEmpty()) {
|
||||
|
||||
@ -19,7 +19,6 @@ package org.apache.doris.statistics;
|
||||
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.PartitionInfo;
|
||||
import org.apache.doris.catalog.TableIf;
|
||||
import org.apache.doris.common.Pair;
|
||||
import org.apache.doris.common.io.Text;
|
||||
@ -160,14 +159,6 @@ public class TableStatsMeta implements Writable, GsonPostProcessable {
|
||||
updatedRows.set(0);
|
||||
newPartitionLoaded.set(false);
|
||||
}
|
||||
if (tableIf instanceof OlapTable) {
|
||||
PartitionInfo partitionInfo = ((OlapTable) tableIf).getPartitionInfo();
|
||||
if (partitionInfo != null && analyzedJob.jobColumns
|
||||
.containsAll(tableIf.getColumnIndexPairs(partitionInfo.getPartitionColumns().stream()
|
||||
.map(Column::getName).collect(Collectors.toSet())))) {
|
||||
newPartitionLoaded.set(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -180,6 +171,9 @@ public class TableStatsMeta implements Writable, GsonPostProcessable {
|
||||
if (indexesRowCount == null) {
|
||||
indexesRowCount = new ConcurrentHashMap<>();
|
||||
}
|
||||
if (newPartitionLoaded == null) {
|
||||
newPartitionLoaded = new AtomicBoolean(false);
|
||||
}
|
||||
}
|
||||
|
||||
public long getRowCount(long indexId) {
|
||||
|
||||
Reference in New Issue
Block a user