[opt](stats) remove corresponding col stats status if the loading at the end of analyze task is failed (#24405)

This commit is contained in:
AKIRA
2023-09-15 18:46:48 +09:00
committed by GitHub
parent dc0c39f1d8
commit fa37a8bba8
7 changed files with 105 additions and 17 deletions

View File

@ -48,7 +48,7 @@ public class ShowTableStatsStmt extends ShowStmt {
new ImmutableList.Builder<String>()
.add("updated_rows")
.add("query_times")
.add("row_count(for external_table only)")
.add("row_count")
.add("method")
.add("type")
.add("updated_time")

View File

@ -480,6 +480,8 @@ public class AnalysisManager extends Daemon implements Writable {
return columnToPartitions;
}
// Make sure colName of job has all the column as this AnalyzeStmt specified, no matter whether it will be analyzed
// or not.
@VisibleForTesting
public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlException {
AnalysisInfoBuilder infoBuilder = new AnalysisInfoBuilder();
@ -733,7 +735,6 @@ public class AnalysisManager extends Daemon implements Writable {
}
logCreateTableStats(tableStats);
StatisticsRepository.dropStatistics(tblId, cols);
}
public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) throws DdlException {
@ -1044,4 +1045,13 @@ public class AnalysisManager extends Daemon implements Writable {
}, null);
}
// Remove col stats status from TableStats if failed load some col stats after analyze corresponding column so that
// we could make sure it would be analyzed again soon if user or system submit job for that column again.
public void removeColStatsStatus(long tblId, String colName) {
TableStats tableStats = findTableStatsStatus(tblId);
if (tableStats != null) {
tableStats.removeColumn(colName);
}
}
}

View File

@ -185,7 +185,11 @@ public abstract class BaseAnalysisTask {
if (killed) {
return;
}
Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tbl.getId(), -1, col.getName());
long tblId = tbl.getId();
String colName = col.getName();
if (!Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tblId, -1, colName)) {
Env.getCurrentEnv().getAnalysisManager().removeColStatsStatus(tblId, colName);
}
}
protected void setTaskStateToRunning() {

View File

@ -24,7 +24,6 @@ import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import com.google.gson.annotations.SerializedName;
import org.apache.logging.log4j.LogManager;
@ -136,7 +135,9 @@ public class ColumnStatistic {
LOG.debug("Failed to deserialize column stats", t);
return ColumnStatistic.UNKNOWN;
}
Preconditions.checkState(columnStatistic != null, "Column stats is null");
if (columnStatistic == null) {
return ColumnStatistic.UNKNOWN;
}
columnStatistic.partitionIdToColStats.putAll(partitionIdToColStats);
return columnStatistic;
}

View File

@ -36,8 +36,6 @@ public class StatisticConstants {
public static final int STATISTICS_CACHE_REFRESH_INTERVAL = 24 * 2;
public static final int ROW_COUNT_CACHE_VALID_DURATION_IN_HOURS = 12;
/**
* Bucket count fot column_statistics and analysis_job table.
*/
@ -59,12 +57,6 @@ public class StatisticConstants {
public static final int HISTOGRAM_MAX_BUCKET_NUM = 128;
/**
* The health of the table indicates the health of the table statistics, rang in [0, 100].
* Below this threshold will automatically re-collect statistics. TODO make it in fe.conf
*/
public static final int TABLE_STATS_HEALTH_THRESHOLD = 80;
public static final int ANALYZE_MANAGER_INTERVAL_IN_SECS = 60;
public static List<String> STATISTICS_DB_BLACK_LIST = new ArrayList<>();

View File

@ -212,17 +212,20 @@ public class StatisticsCache {
}
}
public void syncLoadColStats(long tableId, long idxId, String colName) {
/**
* Return false if the log of corresponding stats load is failed.
*/
public boolean syncLoadColStats(long tableId, long idxId, String colName) {
List<ResultRow> columnResults = StatisticsRepository.loadColStats(tableId, idxId, colName);
final StatisticsCacheKey k =
new StatisticsCacheKey(tableId, idxId, colName);
final ColumnStatistic c = ColumnStatistic.fromResultRow(columnResults);
if (c == ColumnStatistic.UNKNOWN) {
return;
return false;
}
putCache(k, c);
if (ColumnStatistic.UNKNOWN == c) {
return;
return false;
}
TUpdateFollowerStatsCacheRequest updateFollowerStatsCacheRequest = new TUpdateFollowerStatsCacheRequest();
updateFollowerStatsCacheRequest.key = GsonUtils.GSON.toJson(k);
@ -234,6 +237,7 @@ public class StatisticsCache {
}
sendStats(frontend, updateFollowerStatsCacheRequest);
}
return true;
}
@VisibleForTesting