branch-2.1: [improvement](statistics)Add session variable for partition sample count. #48218 (#49092)

Cherry-picked from #48218

Co-authored-by: James <lijibing@selectdb.com>
This commit is contained in:
github-actions[bot]
2025-03-14 20:43:00 +08:00
committed by GitHub
parent ed3a979904
commit d0f6edd212
4 changed files with 38 additions and 2 deletions

View File

@ -531,6 +531,8 @@ public class SessionVariable implements Serializable, Writable {
public static final String HUGE_TABLE_DEFAULT_SAMPLE_ROWS = "huge_table_default_sample_rows";
public static final String HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = "huge_table_lower_bound_size_in_bytes";
public static final String PARTITION_SAMPLE_COUNT = "partition_sample_count";
public static final String PARTITION_SAMPLE_ROW_COUNT = "partition_sample_row_count";
// for spill to disk
public static final String EXTERNAL_SORT_BYTES_THRESHOLD = "external_sort_bytes_threshold";
@ -1916,6 +1918,18 @@ public class SessionVariable implements Serializable, Writable {
+ "considered outdated."})
public int tableStatsHealthThreshold = 90;
@VariableMgr.VarAttr(name = PARTITION_SAMPLE_COUNT, flag = VariableMgr.GLOBAL,
description = {
"大分区表采样的分区数上限",
"The upper limit of the number of partitions for sampling large partitioned tables.\n"})
public int partitionSampleCount = 30;
@VariableMgr.VarAttr(name = PARTITION_SAMPLE_ROW_COUNT, flag = VariableMgr.GLOBAL,
description = {
"大分区表采样的行数上限",
"The upper limit of the number of rows for sampling large partitioned tables.\n"})
public long partitionSampleRowCount = 3_000_000_000L;
@VariableMgr.VarAttr(name = ENABLE_MATERIALIZED_VIEW_REWRITE, needForward = true,
description = {"是否开启基于结构信息的物化视图透明改写",
"Whether to enable materialized view rewriting based on struct info"})

View File

@ -59,7 +59,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
private boolean partitionColumnSampleTooManyRows = false;
private boolean scanFullTable = false;
private static final long MAXIMUM_SAMPLE_ROWS = 1_000_000_000;
private static final int PARTITION_COUNT_TO_SAMPLE = 5;
@VisibleForTesting
public OlapAnalysisTask() {
@ -336,7 +335,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
MaterializedIndex materializedIndex = p.getIndex(indexId);
pickedTabletIds.addAll(materializedIndex.getTabletIdsInOrder());
}
if (pickedRows >= MAXIMUM_SAMPLE_ROWS || pickedPartitionCount > PARTITION_COUNT_TO_SAMPLE) {
if (pickedRows >= StatisticsUtil.getPartitionSampleRowCount()
|| pickedPartitionCount >= StatisticsUtil.getPartitionSampleCount()) {
break;
}
}

View File

@ -99,6 +99,10 @@ public class StatisticConstants {
public static final int MSG_LEN_UPPER_BOUND = 1024;
public static final int PARTITION_SAMPLE_COUNT = 30;
public static final long PARTITION_SAMPLE_ROW_COUNT = 3_000_000_000L;
static {
SYSTEM_DBS.add(FeConstants.INTERNAL_DB_NAME);
SYSTEM_DBS.add(InfoSchemaDb.DATABASE_NAME);

View File

@ -858,6 +858,24 @@ public class StatisticsUtil {
return StatisticConstants.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD;
}
public static int getPartitionSampleCount() {
try {
return findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_COUNT).partitionSampleCount;
} catch (Exception e) {
LOG.warn("Fail to get value of partition_sample_count, return default", e);
}
return StatisticConstants.PARTITION_SAMPLE_COUNT;
}
public static long getPartitionSampleRowCount() {
try {
return findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_ROW_COUNT).partitionSampleRowCount;
} catch (Exception e) {
LOG.warn("Fail to get value of partition_sample_row_count, return default", e);
}
return StatisticConstants.PARTITION_SAMPLE_ROW_COUNT;
}
public static String encodeValue(ResultRow row, int index) {
if (row == null || row.getValues().size() <= index) {
return "NULL";