Fix auto analyze doesn't filter unsupported type bug. (#27559)
Fix auto analyze doesn't filter unsupported type bug. Catch throwable in auto analyze thread for each database, otherwise the thread will quit when one database failed to create jobs and all other databases will not get analyzed. change FE config item full_auto_analyze_simultaneously_running_task_num to auto_analyze_simultaneously_running_task_num
This commit is contained in:
@ -1162,6 +1162,7 @@ public class OlapTable extends Table {
|
||||
}
|
||||
if (!tblStats.analyzeColumns().containsAll(getBaseSchema()
|
||||
.stream()
|
||||
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
|
||||
.map(Column::getName)
|
||||
.collect(Collectors.toSet()))) {
|
||||
return true;
|
||||
@ -1178,16 +1179,20 @@ public class OlapTable extends Table {
|
||||
Set<String> allPartitions = table.getPartitionNames().stream().map(table::getPartition)
|
||||
.filter(Partition::hasData).map(Partition::getName).collect(Collectors.toSet());
|
||||
if (tableStats == null) {
|
||||
return table.getBaseSchema().stream().collect(Collectors.toMap(Column::getName, v -> allPartitions));
|
||||
return table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
|
||||
.collect(Collectors.toMap(Column::getName, v -> allPartitions));
|
||||
}
|
||||
Map<String, Set<String>> colToPart = new HashMap<>();
|
||||
for (Column col : table.getBaseSchema()) {
|
||||
if (StatisticsUtil.isUnsupportedType(col.getType())) {
|
||||
continue;
|
||||
}
|
||||
long lastUpdateTime = tableStats.findColumnLastUpdateTime(col.getName());
|
||||
Set<String> partitions = table.getPartitionNames().stream()
|
||||
.map(table::getPartition)
|
||||
.filter(Partition::hasData)
|
||||
.filter(partition ->
|
||||
partition.getVisibleVersionTime() >= lastUpdateTime).map(Partition::getName)
|
||||
partition.getVisibleVersionTime() >= lastUpdateTime).map(Partition::getName)
|
||||
.collect(Collectors.toSet());
|
||||
colToPart.put(col.getName(), partitions);
|
||||
}
|
||||
@ -2393,7 +2398,6 @@ public class OlapTable extends Table {
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isDistributionColumn(String columnName) {
|
||||
Set<String> distributeColumns = getDistributionColumnNames()
|
||||
.stream().map(String::toLowerCase).collect(Collectors.toSet());
|
||||
|
||||
@ -36,6 +36,7 @@ import org.apache.doris.statistics.AnalysisInfo;
|
||||
import org.apache.doris.statistics.BaseAnalysisTask;
|
||||
import org.apache.doris.statistics.ColumnStatistic;
|
||||
import org.apache.doris.statistics.TableStatsMeta;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
import org.apache.doris.thrift.TTableDescriptor;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
@ -397,7 +398,8 @@ public class ExternalTable implements TableIf, Writable, GsonPostProcessable {
|
||||
HashSet<String> partitions = Sets.newHashSet();
|
||||
// TODO: Find a way to collect external table partitions that need to be analyzed.
|
||||
partitions.add("Dummy Partition");
|
||||
return getBaseSchema().stream().collect(Collectors.toMap(Column::getName, k -> partitions));
|
||||
return getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
|
||||
.collect(Collectors.toMap(Column::getName, k -> partitions));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -24,6 +24,7 @@ import org.apache.doris.catalog.OlapTable;
|
||||
import org.apache.doris.catalog.TableIf;
|
||||
import org.apache.doris.catalog.external.ExternalTable;
|
||||
import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.common.util.TimeUtils;
|
||||
import org.apache.doris.datasource.CatalogIf;
|
||||
import org.apache.doris.statistics.AnalysisInfo.AnalysisMethod;
|
||||
@ -50,8 +51,8 @@ public class StatisticsAutoCollector extends StatisticsCollector {
|
||||
|
||||
public StatisticsAutoCollector() {
|
||||
super("Automatic Analyzer",
|
||||
TimeUnit.MINUTES.toMillis(Config.full_auto_analyze_simultaneously_running_task_num),
|
||||
new AnalysisTaskExecutor(Config.full_auto_analyze_simultaneously_running_task_num));
|
||||
TimeUnit.MINUTES.toMillis(Config.auto_check_statistics_in_minutes),
|
||||
new AnalysisTaskExecutor(Config.auto_analyze_simultaneously_running_task_num));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -77,12 +78,17 @@ public class StatisticsAutoCollector extends StatisticsCollector {
|
||||
if (StatisticConstants.SYSTEM_DBS.contains(databaseIf.getFullName())) {
|
||||
continue;
|
||||
}
|
||||
analyzeDb(databaseIf);
|
||||
try {
|
||||
analyzeDb(databaseIf);
|
||||
} catch (Throwable t) {
|
||||
LOG.warn("Failed to analyze database {}.{}", ctl.getName(), databaseIf.getFullName(), t);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void analyzeDb(DatabaseIf<TableIf> databaseIf) {
|
||||
public void analyzeDb(DatabaseIf<TableIf> databaseIf) throws DdlException {
|
||||
List<AnalysisInfo> analysisInfos = constructAnalysisInfo(databaseIf);
|
||||
for (AnalysisInfo analysisInfo : analysisInfos) {
|
||||
analysisInfo = getReAnalyzeRequiredPart(analysisInfo);
|
||||
@ -91,8 +97,9 @@ public class StatisticsAutoCollector extends StatisticsCollector {
|
||||
}
|
||||
try {
|
||||
createSystemAnalysisJob(analysisInfo);
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Failed to create analysis job", e);
|
||||
} catch (Throwable t) {
|
||||
analysisInfo.message = t.getMessage();
|
||||
throw t;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -136,8 +143,7 @@ public class StatisticsAutoCollector extends StatisticsCollector {
|
||||
.setTblId(table.getId())
|
||||
.setColName(
|
||||
table.getBaseSchema().stream().filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
|
||||
.map(
|
||||
Column::getName).collect(Collectors.joining(","))
|
||||
.map(Column::getName).collect(Collectors.joining(","))
|
||||
)
|
||||
.setAnalysisType(AnalysisInfo.AnalysisType.FUNDAMENTALS)
|
||||
.setAnalysisMode(AnalysisInfo.AnalysisMode.INCREMENTAL)
|
||||
|
||||
@ -23,6 +23,7 @@ import org.apache.doris.common.io.Text;
|
||||
import org.apache.doris.common.io.Writable;
|
||||
import org.apache.doris.persist.gson.GsonUtils;
|
||||
import org.apache.doris.statistics.AnalysisInfo.JobType;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import com.google.gson.annotations.SerializedName;
|
||||
|
||||
@ -136,8 +137,9 @@ public class TableStatsMeta implements Writable {
|
||||
}
|
||||
jobType = analyzedJob.jobType;
|
||||
if (tableIf != null && analyzedJob.colToPartitions.keySet()
|
||||
.containsAll(tableIf.getBaseSchema().stream().map(Column::getName).collect(
|
||||
Collectors.toSet()))) {
|
||||
.containsAll(tableIf.getBaseSchema().stream()
|
||||
.filter(c -> !StatisticsUtil.isUnsupportedType(c.getType()))
|
||||
.map(Column::getName).collect(Collectors.toSet()))) {
|
||||
updatedRows.set(0);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user