branch-2.1: [improvement](statistics)Add session variable for partition sample count. #48218 (#49092)
Cherry-picked from #48218 Co-authored-by: James <lijibing@selectdb.com>
This commit is contained in:
committed by
GitHub
parent
ed3a979904
commit
d0f6edd212
@ -531,6 +531,8 @@ public class SessionVariable implements Serializable, Writable {
|
||||
|
||||
public static final String HUGE_TABLE_DEFAULT_SAMPLE_ROWS = "huge_table_default_sample_rows";
|
||||
public static final String HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = "huge_table_lower_bound_size_in_bytes";
|
||||
public static final String PARTITION_SAMPLE_COUNT = "partition_sample_count";
|
||||
public static final String PARTITION_SAMPLE_ROW_COUNT = "partition_sample_row_count";
|
||||
|
||||
// for spill to disk
|
||||
public static final String EXTERNAL_SORT_BYTES_THRESHOLD = "external_sort_bytes_threshold";
|
||||
@ -1916,6 +1918,18 @@ public class SessionVariable implements Serializable, Writable {
|
||||
+ "considered outdated."})
|
||||
public int tableStatsHealthThreshold = 90;
|
||||
|
||||
@VariableMgr.VarAttr(name = PARTITION_SAMPLE_COUNT, flag = VariableMgr.GLOBAL,
|
||||
description = {
|
||||
"大分区表采样的分区数上限",
|
||||
"The upper limit of the number of partitions for sampling large partitioned tables.\n"})
|
||||
public int partitionSampleCount = 30;
|
||||
|
||||
@VariableMgr.VarAttr(name = PARTITION_SAMPLE_ROW_COUNT, flag = VariableMgr.GLOBAL,
|
||||
description = {
|
||||
"大分区表采样的行数上限",
|
||||
"The upper limit of the number of rows for sampling large partitioned tables.\n"})
|
||||
public long partitionSampleRowCount = 3_000_000_000L;
|
||||
|
||||
@VariableMgr.VarAttr(name = ENABLE_MATERIALIZED_VIEW_REWRITE, needForward = true,
|
||||
description = {"是否开启基于结构信息的物化视图透明改写",
|
||||
"Whether to enable materialized view rewriting based on struct info"})
|
||||
|
||||
@ -59,7 +59,6 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
private boolean partitionColumnSampleTooManyRows = false;
|
||||
private boolean scanFullTable = false;
|
||||
private static final long MAXIMUM_SAMPLE_ROWS = 1_000_000_000;
|
||||
private static final int PARTITION_COUNT_TO_SAMPLE = 5;
|
||||
|
||||
@VisibleForTesting
|
||||
public OlapAnalysisTask() {
|
||||
@ -336,7 +335,8 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
MaterializedIndex materializedIndex = p.getIndex(indexId);
|
||||
pickedTabletIds.addAll(materializedIndex.getTabletIdsInOrder());
|
||||
}
|
||||
if (pickedRows >= MAXIMUM_SAMPLE_ROWS || pickedPartitionCount > PARTITION_COUNT_TO_SAMPLE) {
|
||||
if (pickedRows >= StatisticsUtil.getPartitionSampleRowCount()
|
||||
|| pickedPartitionCount >= StatisticsUtil.getPartitionSampleCount()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -99,6 +99,10 @@ public class StatisticConstants {
|
||||
|
||||
public static final int MSG_LEN_UPPER_BOUND = 1024;
|
||||
|
||||
public static final int PARTITION_SAMPLE_COUNT = 30;
|
||||
|
||||
public static final long PARTITION_SAMPLE_ROW_COUNT = 3_000_000_000L;
|
||||
|
||||
static {
|
||||
SYSTEM_DBS.add(FeConstants.INTERNAL_DB_NAME);
|
||||
SYSTEM_DBS.add(InfoSchemaDb.DATABASE_NAME);
|
||||
|
||||
@ -858,6 +858,24 @@ public class StatisticsUtil {
|
||||
return StatisticConstants.AUTO_ANALYZE_TABLE_WIDTH_THRESHOLD;
|
||||
}
|
||||
|
||||
public static int getPartitionSampleCount() {
|
||||
try {
|
||||
return findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_COUNT).partitionSampleCount;
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Fail to get value of partition_sample_count, return default", e);
|
||||
}
|
||||
return StatisticConstants.PARTITION_SAMPLE_COUNT;
|
||||
}
|
||||
|
||||
public static long getPartitionSampleRowCount() {
|
||||
try {
|
||||
return findConfigFromGlobalSessionVar(SessionVariable.PARTITION_SAMPLE_ROW_COUNT).partitionSampleRowCount;
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Fail to get value of partition_sample_row_count, return default", e);
|
||||
}
|
||||
return StatisticConstants.PARTITION_SAMPLE_ROW_COUNT;
|
||||
}
|
||||
|
||||
public static String encodeValue(ResultRow row, int index) {
|
||||
if (row == null || row.getValues().size() <= index) {
|
||||
return "NULL";
|
||||
|
||||
Reference in New Issue
Block a user