[opt](stats) remove corresponding col stats status if the loading at the end of analyze task is failed (#24405)

This commit is contained in:
AKIRA
2023-09-15 18:46:48 +09:00
committed by GitHub
parent dc0c39f1d8
commit fa37a8bba8
7 changed files with 105 additions and 17 deletions

View File

@ -48,7 +48,7 @@ public class ShowTableStatsStmt extends ShowStmt {
new ImmutableList.Builder<String>()
.add("updated_rows")
.add("query_times")
.add("row_count(for external_table only)")
.add("row_count")
.add("method")
.add("type")
.add("updated_time")

View File

@ -480,6 +480,8 @@ public class AnalysisManager extends Daemon implements Writable {
return columnToPartitions;
}
// Make sure colName of job has all the column as this AnalyzeStmt specified, no matter whether it will be analyzed
// or not.
@VisibleForTesting
public AnalysisInfo buildAnalysisJobInfo(AnalyzeTblStmt stmt) throws DdlException {
AnalysisInfoBuilder infoBuilder = new AnalysisInfoBuilder();
@ -733,7 +735,6 @@ public class AnalysisManager extends Daemon implements Writable {
}
logCreateTableStats(tableStats);
StatisticsRepository.dropStatistics(tblId, cols);
}
public void handleKillAnalyzeStmt(KillAnalysisJobStmt killAnalysisJobStmt) throws DdlException {
@ -1044,4 +1045,13 @@ public class AnalysisManager extends Daemon implements Writable {
}, null);
}
// Remove col stats status from TableStats if failed load some col stats after analyze corresponding column so that
// we could make sure it would be analyzed again soon if user or system submit job for that column again.
public void removeColStatsStatus(long tblId, String colName) {
TableStats tableStats = findTableStatsStatus(tblId);
if (tableStats != null) {
tableStats.removeColumn(colName);
}
}
}

View File

@ -185,7 +185,11 @@ public abstract class BaseAnalysisTask {
if (killed) {
return;
}
Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tbl.getId(), -1, col.getName());
long tblId = tbl.getId();
String colName = col.getName();
if (!Env.getCurrentEnv().getStatisticsCache().syncLoadColStats(tblId, -1, colName)) {
Env.getCurrentEnv().getAnalysisManager().removeColStatsStatus(tblId, colName);
}
}
protected void setTaskStateToRunning() {

View File

@ -24,7 +24,6 @@ import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
import org.apache.doris.statistics.util.StatisticsUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import com.google.gson.annotations.SerializedName;
import org.apache.logging.log4j.LogManager;
@ -136,7 +135,9 @@ public class ColumnStatistic {
LOG.debug("Failed to deserialize column stats", t);
return ColumnStatistic.UNKNOWN;
}
Preconditions.checkState(columnStatistic != null, "Column stats is null");
if (columnStatistic == null) {
return ColumnStatistic.UNKNOWN;
}
columnStatistic.partitionIdToColStats.putAll(partitionIdToColStats);
return columnStatistic;
}

View File

@ -36,8 +36,6 @@ public class StatisticConstants {
public static final int STATISTICS_CACHE_REFRESH_INTERVAL = 24 * 2;
public static final int ROW_COUNT_CACHE_VALID_DURATION_IN_HOURS = 12;
/**
* Bucket count fot column_statistics and analysis_job table.
*/
@ -59,12 +57,6 @@ public class StatisticConstants {
public static final int HISTOGRAM_MAX_BUCKET_NUM = 128;
/**
* The health of the table indicates the health of the table statistics, rang in [0, 100].
* Below this threshold will automatically re-collect statistics. TODO make it in fe.conf
*/
public static final int TABLE_STATS_HEALTH_THRESHOLD = 80;
public static final int ANALYZE_MANAGER_INTERVAL_IN_SECS = 60;
public static List<String> STATISTICS_DB_BLACK_LIST = new ArrayList<>();

View File

@ -212,17 +212,20 @@ public class StatisticsCache {
}
}
public void syncLoadColStats(long tableId, long idxId, String colName) {
/**
* Return false if the log of corresponding stats load is failed.
*/
public boolean syncLoadColStats(long tableId, long idxId, String colName) {
List<ResultRow> columnResults = StatisticsRepository.loadColStats(tableId, idxId, colName);
final StatisticsCacheKey k =
new StatisticsCacheKey(tableId, idxId, colName);
final ColumnStatistic c = ColumnStatistic.fromResultRow(columnResults);
if (c == ColumnStatistic.UNKNOWN) {
return;
return false;
}
putCache(k, c);
if (ColumnStatistic.UNKNOWN == c) {
return;
return false;
}
TUpdateFollowerStatsCacheRequest updateFollowerStatsCacheRequest = new TUpdateFollowerStatsCacheRequest();
updateFollowerStatsCacheRequest.key = GsonUtils.GSON.toJson(k);
@ -234,6 +237,7 @@ public class StatisticsCache {
}
sendStats(frontend, updateFollowerStatsCacheRequest);
}
return true;
}
@VisibleForTesting

View File

@ -117,7 +117,7 @@ suite("test_analyze") {
try {
sql """
SELECT COUNT(*) FROM ${tbl};
SELECT * FROM ${tbl};
"""
} catch (Exception e) {
exception = e
@ -959,4 +959,81 @@ PARTITION `p599` VALUES IN (599)
"""
expected_col_stats(col_id_res, 3, 1)
sql """DROP TABLE IF EXISTS `some_complex_type_test`"""
sql """
CREATE TABLE `some_complex_type_test` (
`id` int(11) NULL COMMENT "",
`c_array` ARRAY<int(11)> NULL COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
);
"""
sql """INSERT INTO `some_complex_type_test` VALUES (1, [1,2,3,4,5]);"""
sql """INSERT INTO `some_complex_type_test` VALUES (2, [6,7,8]), (3, []), (4, null);"""
sql """
ANALYZE TABLE `some_complex_type_test` WITH SYNC;
"""
sql """
SELECT COUNT(1) FROM `some_complex_type_test`
"""
sql """DROP TABLE IF EXISTS `analyze_test_with_schema_update`"""
sql """
CREATE TABLE `analyze_test_with_schema_update` (
col1 varchar(11451) not null, col2 int not null, col3 int not null)
DUPLICATE KEY(col1)
DISTRIBUTED BY HASH(col1)
BUCKETS 3
PROPERTIES(
"replication_num"="1"
);
"""
sql """insert into analyze_test_with_schema_update values(1, 2, 3);"""
sql """insert into analyze_test_with_schema_update values(4, 5, 6);"""
sql """insert into analyze_test_with_schema_update values(7, 1, 9);"""
sql """insert into analyze_test_with_schema_update values(3, 8, 2);"""
sql """insert into analyze_test_with_schema_update values(5, 2, 1);"""
sql """
ANALYZE TABLE analyze_test_with_schema_update WITH SYNC
"""
sql """
ALTER TABLE analyze_test_with_schema_update ADD COLUMN tbl_name VARCHAR(256) DEFAULT NULL;
"""
sql """
ANALYZE TABLE analyze_test_with_schema_update WITH SYNC
"""
sql """
SELECT * FROM analyze_test_with_schema_update;
"""
sql """
DROP STATS analyze_test_with_schema_update(col3);
"""
sql """
ANALYZE TABLE analyze_test_with_schema_update WITH SYNC
"""
sql """
SELECT * FROM analyze_test_with_schema_update;
"""
}