[opt](stats) remove corresponding col stats status if the loading at the end of analyze task is failed (#24405)
This commit is contained in:
@ -48,7 +48,7 @@ public class ShowTableStatsStmt extends ShowStmt {
|
||||
new ImmutableList.Builder<String>()
|
||||
.add("updated_rows")
|
||||
.add("query_times")
|
||||
.add("row_count(for external_table only)")
|
||||
.add("row_count")
|
||||
.add("method")
|
||||
.add("type")
|
||||
.add("updated_time")
|
||||
|
||||
@ -480,6 +480,8 @@ public class AnalysisManager extends Daemon implements Writable {
|
||||
return columnToPartitions;
|
||||
}
|
||||
|
||||
// Make sure colName of job has all the column as this AnalyzeStmt specified, no matter whether it will be analyzed
|
||||
// or not.
|
||||
@VisibleForTesting
|
||||
public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlException {
|
||||
AnalysisInfoBuilder infoBuilder = new AnalysisInfoBuilder();
|
||||
@ -733,7 +735,6 @@ public class AnalysisManager extends Daemon implements Writable {
|
||||
}
|
||||
logCreateTableStats(tableStats);
|
||||
StatisticsRepository.dropStatistics(tblId, cols);
|
||||
|
||||
}
|
||||
|
||||
public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) throws DdlException {
|
||||
@ -1044,4 +1045,13 @@ public class AnalysisManager extends Daemon implements Writable {
|
||||
}, null);
|
||||
}
|
||||
|
||||
// Remove col stats status from TableStats if failed load some col stats after analyze corresponding column so that
|
||||
// we could make sure it would be analyzed again soon if user or system submit job for that column again.
|
||||
public void removeColStatsStatus(long tblId, String colName) {
|
||||
TableStats tableStats = findTableStatsStatus(tblId);
|
||||
if (tableStats != null) {
|
||||
tableStats.removeColumn(colName);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -185,7 +185,11 @@ public abstract class BaseAnalysisTask {
|
||||
if (killed) {
|
||||
return;
|
||||
}
|
||||
Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tbl.getId(), -1, col.getName());
|
||||
long tblId = tbl.getId();
|
||||
String colName = col.getName();
|
||||
if (!Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tblId, -1, colName)) {
|
||||
Env.getCurrentEnv().getAnalysisManager().removeColStatsStatus(tblId, colName);
|
||||
}
|
||||
}
|
||||
|
||||
protected void setTaskStateToRunning() {
|
||||
|
||||
@ -24,7 +24,6 @@ import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.gson.annotations.SerializedName;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
@ -136,7 +135,9 @@ public class ColumnStatistic {
|
||||
LOG.debug("Failed to deserialize column stats", t);
|
||||
return ColumnStatistic.UNKNOWN;
|
||||
}
|
||||
Preconditions.checkState(columnStatistic != null, "Column stats is null");
|
||||
if (columnStatistic == null) {
|
||||
return ColumnStatistic.UNKNOWN;
|
||||
}
|
||||
columnStatistic.partitionIdToColStats.putAll(partitionIdToColStats);
|
||||
return columnStatistic;
|
||||
}
|
||||
|
||||
@ -36,8 +36,6 @@ public class StatisticConstants {
|
||||
|
||||
public static final int STATISTICS_CACHE_REFRESH_INTERVAL = 24 * 2;
|
||||
|
||||
public static final int ROW_COUNT_CACHE_VALID_DURATION_IN_HOURS = 12;
|
||||
|
||||
/**
|
||||
* Bucket count fot column_statistics and analysis_job table.
|
||||
*/
|
||||
@ -59,12 +57,6 @@ public class StatisticConstants {
|
||||
|
||||
public static final int HISTOGRAM_MAX_BUCKET_NUM = 128;
|
||||
|
||||
/**
|
||||
* The health of the table indicates the health of the table statistics, rang in [0, 100].
|
||||
* Below this threshold will automatically re-collect statistics. TODO make it in fe.conf
|
||||
*/
|
||||
public static final int TABLE_STATS_HEALTH_THRESHOLD = 80;
|
||||
|
||||
public static final int ANALYZE_MANAGER_INTERVAL_IN_SECS = 60;
|
||||
|
||||
public static List<String> STATISTICS_DB_BLACK_LIST = new ArrayList<>();
|
||||
|
||||
@ -212,17 +212,20 @@ public class StatisticsCache {
|
||||
}
|
||||
}
|
||||
|
||||
public void syncLoadColStats(long tableId, long idxId, String colName) {
|
||||
/**
|
||||
* Return false if the log of corresponding stats load is failed.
|
||||
*/
|
||||
public boolean syncLoadColStats(long tableId, long idxId, String colName) {
|
||||
List<ResultRow> columnResults = StatisticsRepository.loadColStats(tableId, idxId, colName);
|
||||
final StatisticsCacheKey k =
|
||||
new StatisticsCacheKey(tableId, idxId, colName);
|
||||
final ColumnStatistic c = ColumnStatistic.fromResultRow(columnResults);
|
||||
if (c == ColumnStatistic.UNKNOWN) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
putCache(k, c);
|
||||
if (ColumnStatistic.UNKNOWN == c) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
TUpdateFollowerStatsCacheRequest updateFollowerStatsCacheRequest = new TUpdateFollowerStatsCacheRequest();
|
||||
updateFollowerStatsCacheRequest.key = GsonUtils.GSON.toJson(k);
|
||||
@ -234,6 +237,7 @@ public class StatisticsCache {
|
||||
}
|
||||
sendStats(frontend, updateFollowerStatsCacheRequest);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
|
||||
Reference in New Issue
Block a user