[opt](stats) remove corresponding col stats status if the loading at the end of analyze task is failed (#24405)
This commit is contained in:
@ -48,7 +48,7 @@ public class ShowTableStatsStmt extends ShowStmt {
|
||||
new ImmutableList.Builder<String>()
|
||||
.add("updated_rows")
|
||||
.add("query_times")
|
||||
.add("row_count(for external_table only)")
|
||||
.add("row_count")
|
||||
.add("method")
|
||||
.add("type")
|
||||
.add("updated_time")
|
||||
|
||||
@ -480,6 +480,8 @@ public class AnalysisManager extends Daemon implements Writable {
|
||||
return columnToPartitions;
|
||||
}
|
||||
|
||||
// Make sure colName of job has all the column as this AnalyzeStmt specified, no matter whether it will be analyzed
|
||||
// or not.
|
||||
@VisibleForTesting
|
||||
public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlException {
|
||||
AnalysisInfoBuilder infoBuilder = new AnalysisInfoBuilder();
|
||||
@ -733,7 +735,6 @@ public class AnalysisManager extends Daemon implements Writable {
|
||||
}
|
||||
logCreateTableStats(tableStats);
|
||||
StatisticsRepository.dropStatistics(tblId, cols);
|
||||
|
||||
}
|
||||
|
||||
public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) throws DdlException {
|
||||
@ -1044,4 +1045,13 @@ public class AnalysisManager extends Daemon implements Writable {
|
||||
}, null);
|
||||
}
|
||||
|
||||
// Remove col stats status from TableStats if failed load some col stats after analyze corresponding column so that
|
||||
// we could make sure it would be analyzed again soon if user or system submit job for that column again.
|
||||
public void removeColStatsStatus(long tblId, String colName) {
|
||||
TableStats tableStats = findTableStatsStatus(tblId);
|
||||
if (tableStats != null) {
|
||||
tableStats.removeColumn(colName);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -185,7 +185,11 @@ public abstract class BaseAnalysisTask {
|
||||
if (killed) {
|
||||
return;
|
||||
}
|
||||
Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tbl.getId(), -1, col.getName());
|
||||
long tblId = tbl.getId();
|
||||
String colName = col.getName();
|
||||
if (!Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tblId, -1, colName)) {
|
||||
Env.getCurrentEnv().getAnalysisManager().removeColStatsStatus(tblId, colName);
|
||||
}
|
||||
}
|
||||
|
||||
protected void setTaskStateToRunning() {
|
||||
|
||||
@ -24,7 +24,6 @@ import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
import org.apache.doris.statistics.util.StatisticsUtil;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Sets;
|
||||
import com.google.gson.annotations.SerializedName;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
@ -136,7 +135,9 @@ public class ColumnStatistic {
|
||||
LOG.debug("Failed to deserialize column stats", t);
|
||||
return ColumnStatistic.UNKNOWN;
|
||||
}
|
||||
Preconditions.checkState(columnStatistic != null, "Column stats is null");
|
||||
if (columnStatistic == null) {
|
||||
return ColumnStatistic.UNKNOWN;
|
||||
}
|
||||
columnStatistic.partitionIdToColStats.putAll(partitionIdToColStats);
|
||||
return columnStatistic;
|
||||
}
|
||||
|
||||
@ -36,8 +36,6 @@ public class StatisticConstants {
|
||||
|
||||
public static final int STATISTICS_CACHE_REFRESH_INTERVAL = 24 * 2;
|
||||
|
||||
public static final int ROW_COUNT_CACHE_VALID_DURATION_IN_HOURS = 12;
|
||||
|
||||
/**
|
||||
* Bucket count fot column_statistics and analysis_job table.
|
||||
*/
|
||||
@ -59,12 +57,6 @@ public class StatisticConstants {
|
||||
|
||||
public static final int HISTOGRAM_MAX_BUCKET_NUM = 128;
|
||||
|
||||
/**
|
||||
* The health of the table indicates the health of the table statistics, rang in [0, 100].
|
||||
* Below this threshold will automatically re-collect statistics. TODO make it in fe.conf
|
||||
*/
|
||||
public static final int TABLE_STATS_HEALTH_THRESHOLD = 80;
|
||||
|
||||
public static final int ANALYZE_MANAGER_INTERVAL_IN_SECS = 60;
|
||||
|
||||
public static List<String> STATISTICS_DB_BLACK_LIST = new ArrayList<>();
|
||||
|
||||
@ -212,17 +212,20 @@ public class StatisticsCache {
|
||||
}
|
||||
}
|
||||
|
||||
public void syncLoadColStats(long tableId, long idxId, String colName) {
|
||||
/**
|
||||
* Return false if the log of corresponding stats load is failed.
|
||||
*/
|
||||
public boolean syncLoadColStats(long tableId, long idxId, String colName) {
|
||||
List<ResultRow> columnResults = StatisticsRepository.loadColStats(tableId, idxId, colName);
|
||||
final StatisticsCacheKey k =
|
||||
new StatisticsCacheKey(tableId, idxId, colName);
|
||||
final ColumnStatistic c = ColumnStatistic.fromResultRow(columnResults);
|
||||
if (c == ColumnStatistic.UNKNOWN) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
putCache(k, c);
|
||||
if (ColumnStatistic.UNKNOWN == c) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
TUpdateFollowerStatsCacheRequest updateFollowerStatsCacheRequest = new TUpdateFollowerStatsCacheRequest();
|
||||
updateFollowerStatsCacheRequest.key = GsonUtils.GSON.toJson(k);
|
||||
@ -234,6 +237,7 @@ public class StatisticsCache {
|
||||
}
|
||||
sendStats(frontend, updateFollowerStatsCacheRequest);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@VisibleForTesting
|
||||
|
||||
@ -117,7 +117,7 @@ suite("test_analyze") {
|
||||
|
||||
try {
|
||||
sql """
|
||||
SELECT COUNT(*) FROM ${tbl};
|
||||
SELECT * FROM ${tbl};
|
||||
"""
|
||||
} catch (Exception e) {
|
||||
exception = e
|
||||
@ -959,4 +959,81 @@ PARTITION `p599` VALUES IN (599)
|
||||
"""
|
||||
expected_col_stats(col_id_res, 3, 1)
|
||||
|
||||
sql """DROP TABLE IF EXISTS `some_complex_type_test`"""
|
||||
|
||||
sql """
|
||||
CREATE TABLE `some_complex_type_test` (
|
||||
`id` int(11) NULL COMMENT "",
|
||||
`c_array` ARRAY<int(11)> NULL COMMENT ""
|
||||
) ENGINE=OLAP
|
||||
DUPLICATE KEY(`id`)
|
||||
COMMENT "OLAP"
|
||||
DISTRIBUTED BY HASH(`id`) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_allocation" = "tag.location.default: 1",
|
||||
"in_memory" = "false",
|
||||
"storage_format" = "V2"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """INSERT INTO `some_complex_type_test` VALUES (1, [1,2,3,4,5]);"""
|
||||
sql """INSERT INTO `some_complex_type_test` VALUES (2, [6,7,8]), (3, []), (4, null);"""
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE `some_complex_type_test` WITH SYNC;
|
||||
|
||||
"""
|
||||
|
||||
sql """
|
||||
SELECT COUNT(1) FROM `some_complex_type_test`
|
||||
"""
|
||||
|
||||
sql """DROP TABLE IF EXISTS `analyze_test_with_schema_update`"""
|
||||
|
||||
sql """
|
||||
CREATE TABLE `analyze_test_with_schema_update` (
|
||||
col1 varchar(11451) not null, col2 int not null, col3 int not null)
|
||||
DUPLICATE KEY(col1)
|
||||
DISTRIBUTED BY HASH(col1)
|
||||
BUCKETS 3
|
||||
PROPERTIES(
|
||||
"replication_num"="1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """insert into analyze_test_with_schema_update values(1, 2, 3);"""
|
||||
sql """insert into analyze_test_with_schema_update values(4, 5, 6);"""
|
||||
sql """insert into analyze_test_with_schema_update values(7, 1, 9);"""
|
||||
sql """insert into analyze_test_with_schema_update values(3, 8, 2);"""
|
||||
sql """insert into analyze_test_with_schema_update values(5, 2, 1);"""
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE analyze_test_with_schema_update WITH SYNC
|
||||
"""
|
||||
|
||||
sql """
|
||||
ALTER TABLE analyze_test_with_schema_update ADD COLUMN tbl_name VARCHAR(256) DEFAULT NULL;
|
||||
"""
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE analyze_test_with_schema_update WITH SYNC
|
||||
"""
|
||||
|
||||
sql """
|
||||
SELECT * FROM analyze_test_with_schema_update;
|
||||
"""
|
||||
|
||||
sql """
|
||||
DROP STATS analyze_test_with_schema_update(col3);
|
||||
"""
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE analyze_test_with_schema_update WITH SYNC
|
||||
"""
|
||||
|
||||
sql """
|
||||
SELECT * FROM analyze_test_with_schema_update;
|
||||
"""
|
||||
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user