Fix auto analyze doesn't filter unsupported type bug. (#27559)

Fix auto analyze doesn't filter unsupported type bug.
Catch throwable in auto analyze thread for each database, otherwise the thread will quit when one database failed to create jobs and all other databases will not get analyzed.
change FE config item full_auto_analyze_simultaneously_running_task_num to auto_analyze_simultaneously_running_task_num
This commit is contained in:
Jibing-Li
2023-11-25 10:22:52 +08:00
committed by GitHub
parent 6142a539f4
commit 6b1428dba1
6 changed files with 30 additions and 16 deletions

View File

@ -1162,6 +1162,7 @@ public class OlapTable extends Table {
}
if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
.stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName)
.collect(Collectors.toSet()))) {
return true;
@ -1178,16 +1179,20 @@ public class OlapTable extends Table {
Set<String> allPartitions = table.getPartitionNames().stream().map(table::getPartition)
.filter(Partition::hasData).map(Partition::getName).collect(Collectors.toSet());
if (tableStats == null) {
return table.getBaseSchema().stream().collect(Collectors.toMap(Column::getName, v -> allPartitions));
return table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.collect(Collectors.toMap(Column::getName, v -> allPartitions));
}
Map<String, Set<String>> colToPart = new HashMap<>();
for (Column col : table.getBaseSchema()) {
if (StatisticsUtil.isUnsupportedType(col.getType())) {
continue;
}
long lastUpdateTime = tableStats.findColumnLastUpdateTime(col.getName());
Set<String> partitions = table.getPartitionNames().stream()
.map(table::getPartition)
.filter(Partition::hasData)
.filter(partition ->
partition.getVisibleVersionTime() >= lastUpdateTime).map(Partition::getName)
partition.getVisibleVersionTime() >= lastUpdateTime).map(Partition::getName)
.collect(Collectors.toSet());
colToPart.put(col.getName(), partitions);
}
@ -2393,7 +2398,6 @@ public class OlapTable extends Table {
}
}
@Override
public boolean isDistributionColumn(String columnName) {
Set<String> distributeColumns = getDistributionColumnNames()
.stream().map(String::toLowerCase).collect(Collectors.toSet());

View File

@ -36,6 +36,7 @@ import org.apache.doris.statistics.AnalysisInfo;
import org.apache.doris.statistics.BaseAnalysisTask;
import org.apache.doris.statistics.ColumnStatistic;
import org.apache.doris.statistics.TableStatsMeta;
import org.apache.doris.statistics.util.StatisticsUtil;
import org.apache.doris.thrift.TTableDescriptor;
import com.google.common.collect.Sets;
@ -397,7 +398,8 @@ public class ExternalTable implements TableIf, Writable, GsonPostProcessable {
HashSet<String> partitions = Sets.newHashSet();
// TODO: Find a way to collect external table partitions that need to be analyzed.
partitions.add("Dummy Partition");
return getBaseSchema().stream().collect(Collectors.toMap(Column::getName, k -> partitions));
return getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.collect(Collectors.toMap(Column::getName, k -> partitions));
}
@Override

View File

@ -24,6 +24,7 @@ import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.TableIf;
import org.apache.doris.catalog.external.ExternalTable;
import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.util.TimeUtils;
import org.apache.doris.datasource.CatalogIf;
import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
@ -50,8 +51,8 @@ public class StatisticsAutoCollector extends StatisticsCollector {
public StatisticsAutoCollector() {
super("Automatic Analyzer",
TimeUnit.MINUTES.toMillis(Config.full_auto_analyze_simultaneously_running_task_num),
new AnalysisTaskExecutor(Config.full_auto_analyze_simultaneously_running_task_num));
TimeUnit.MINUTES.toMillis(Config.auto_check_statistics_in_minutes),
new AnalysisTaskExecutor(Config.auto_analyze_simultaneously_running_task_num));
}
@Override
@ -77,12 +78,17 @@ public class StatisticsAutoCollector extends StatisticsCollector {
if (StatisticConstants.SYSTEM_DBS.contains(databaseIf.getFullName())) {
continue;
}
analyzeDb(databaseIf);
try {
analyzeDb(databaseIf);
} catch (Throwable t) {
LOG.warn("Failed to analyze database {}.{}", ctl.getName(), databaseIf.getFullName(), t);
continue;
}
}
}
}
public void analyzeDb(DatabaseIf<TableIf> databaseIf) {
public void analyzeDb(DatabaseIf<TableIf> databaseIf) throws DdlException {
List<AnalysisInfo> analysisInfos = constructAnalysisInfo(databaseIf);
for (AnalysisInfo analysisInfo : analysisInfos) {
analysisInfo = getReAnalyzeRequiredPart(analysisInfo);
@ -91,8 +97,9 @@ public class StatisticsAutoCollector extends StatisticsCollector {
}
try {
createSystemAnalysisJob(analysisInfo);
} catch (Exception e) {
LOG.warn("Failed to create analysis job", e);
} catch (Throwable t) {
analysisInfo.message = t.getMessage();
throw t;
}
}
}
@ -136,8 +143,7 @@ public class StatisticsAutoCollector extends StatisticsCollector {
.setTblId(table.getId())
.setColName(
table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(
Column::getName).collect(Collectors.joining(","))
.map(Column::getName).collect(Collectors.joining(","))
)
.setAnalysisType(AnalysisInfo.AnalysisType.FUNDAMENTALS)
.setAnalysisMode(AnalysisInfo.AnalysisMode.INCREMENTAL)

View File

@ -23,6 +23,7 @@ import org.apache.doris.common.io.Text;
import org.apache.doris.common.io.Writable;
import org.apache.doris.persist.gson.GsonUtils;
import org.apache.doris.statistics.AnalysisInfo.JobType;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.gson.annotations.SerializedName;
@ -136,8 +137,9 @@ public class TableStatsMeta implements Writable {
}
jobType = analyzedJob.jobType;
if (tableIf != null && analyzedJob.colToPartitions.keySet()
.containsAll(tableIf.getBaseSchema().stream().map(Column::getName).collect(
Collectors.toSet()))) {
.containsAll(tableIf.getBaseSchema().stream()
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
.map(Column::getName).collect(Collectors.toSet()))) {
updatedRows.set(0);
}
}