[feature](spill) Implement spill to disk for hash join, aggregation and sort for pipelineX (#31910)
Co-authored-by: Jerry Hu <mrhhsg@gmail.com>
This commit is contained in:
@ -364,10 +364,6 @@ public class SessionVariable implements Serializable, Writable {
|
||||
|
||||
public static final String GROUP_CONCAT_MAX_LEN = "group_concat_max_len";
|
||||
|
||||
public static final String EXTERNAL_SORT_BYTES_THRESHOLD = "external_sort_bytes_threshold";
|
||||
public static final String EXTERNAL_AGG_BYTES_THRESHOLD = "external_agg_bytes_threshold";
|
||||
public static final String EXTERNAL_AGG_PARTITION_BITS = "external_agg_partition_bits";
|
||||
|
||||
public static final String ENABLE_TWO_PHASE_READ_OPT = "enable_two_phase_read_opt";
|
||||
public static final String TOPN_OPT_LIMIT_THRESHOLD = "topn_opt_limit_threshold";
|
||||
|
||||
@ -481,6 +477,15 @@ public class SessionVariable implements Serializable, Writable {
|
||||
public static final String HUGE_TABLE_DEFAULT_SAMPLE_ROWS = "huge_table_default_sample_rows";
|
||||
public static final String HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = "huge_table_lower_bound_size_in_bytes";
|
||||
|
||||
// for spill to disk
|
||||
public static final String EXTERNAL_SORT_BYTES_THRESHOLD = "external_sort_bytes_threshold";
|
||||
public static final String EXTERNAL_AGG_BYTES_THRESHOLD = "external_agg_bytes_threshold";
|
||||
public static final String EXTERNAL_AGG_PARTITION_BITS = "external_agg_partition_bits";
|
||||
public static final String MIN_REVOCABLE_MEM = "min_revocable_mem";
|
||||
public static final String ENABLE_JOIN_SPILL = "enable_join_spill";
|
||||
public static final String ENABLE_SORT_SPILL = "enable_sort_spill";
|
||||
public static final String ENABLE_AGG_SPILL = "enable_agg_spill";
|
||||
|
||||
public static final String GENERATE_STATS_FACTOR = "generate_stats_factor";
|
||||
|
||||
public static final String HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS
|
||||
@ -1270,25 +1275,6 @@ public class SessionVariable implements Serializable, Writable {
|
||||
@VariableMgr.VarAttr(name = GROUP_CONCAT_MAX_LEN)
|
||||
public long groupConcatMaxLen = 2147483646;
|
||||
|
||||
// If the memory consumption of sort node exceed this limit, will trigger spill to disk;
|
||||
// Set to 0 to disable; min: 128M
|
||||
public static final long MIN_EXTERNAL_SORT_BYTES_THRESHOLD = 134217728;
|
||||
@VariableMgr.VarAttr(name = EXTERNAL_SORT_BYTES_THRESHOLD,
|
||||
checker = "checkExternalSortBytesThreshold", fuzzy = true)
|
||||
public long externalSortBytesThreshold = 0;
|
||||
|
||||
// Set to 0 to disable; min: 128M
|
||||
public static final long MIN_EXTERNAL_AGG_BYTES_THRESHOLD = 134217728;
|
||||
@VariableMgr.VarAttr(name = EXTERNAL_AGG_BYTES_THRESHOLD,
|
||||
checker = "checkExternalAggBytesThreshold", fuzzy = true)
|
||||
public long externalAggBytesThreshold = 0;
|
||||
|
||||
public static final int MIN_EXTERNAL_AGG_PARTITION_BITS = 4;
|
||||
public static final int MAX_EXTERNAL_AGG_PARTITION_BITS = 8;
|
||||
@VariableMgr.VarAttr(name = EXTERNAL_AGG_PARTITION_BITS,
|
||||
checker = "checkExternalAggPartitionBits", fuzzy = true)
|
||||
public int externalAggPartitionBits = 8; // means that the hash table will be partitioned into 256 blocks.
|
||||
|
||||
// Whether enable two phase read optimization
|
||||
// 1. read related rowids along with necessary column data
|
||||
// 2. spawn fetch RPC to other nodes to get related data by sorted rowids
|
||||
@ -1664,6 +1650,65 @@ public class SessionVariable implements Serializable, Writable {
|
||||
public boolean disableEmptyPartitionPrune = false;
|
||||
// CLOUD_VARIABLES_END
|
||||
|
||||
// for spill to disk
|
||||
@VariableMgr.VarAttr(name = MIN_REVOCABLE_MEM, fuzzy = true)
|
||||
public long minRevocableMem = 32 * 1024 * 1024;
|
||||
|
||||
@VariableMgr.VarAttr(
|
||||
name = ENABLE_JOIN_SPILL,
|
||||
description = {"控制是否启用join算子落盘。默认为 false。",
|
||||
"Controls whether to enable spill to disk of join operation. "
|
||||
+ "The default value is false."},
|
||||
needForward = true)
|
||||
public boolean enableJoinSpill = false;
|
||||
|
||||
@VariableMgr.VarAttr(
|
||||
name = ENABLE_SORT_SPILL,
|
||||
description = {"控制是否启用排序算子落盘。默认为 false。",
|
||||
"Controls whether to enable spill to disk of sort operation. "
|
||||
+ "The default value is false."},
|
||||
needForward = true)
|
||||
public boolean enableSortSpill = false;
|
||||
|
||||
@VariableMgr.VarAttr(
|
||||
name = ENABLE_AGG_SPILL,
|
||||
description = {"控制是否启用聚合算子落盘。默认为 false。",
|
||||
"Controls whether to enable spill to disk of aggregation operation. "
|
||||
+ "The default value is false."},
|
||||
needForward = true)
|
||||
public boolean enableAggSpill = false;
|
||||
|
||||
// If the memory consumption of sort node exceed this limit, will trigger spill to disk;
|
||||
// Set to 0 to disable; min: 128M
|
||||
public static final long MIN_EXTERNAL_SORT_BYTES_THRESHOLD = 2097152;
|
||||
@VariableMgr.VarAttr(name = EXTERNAL_SORT_BYTES_THRESHOLD,
|
||||
checker = "checkExternalSortBytesThreshold", fuzzy = true)
|
||||
public long externalSortBytesThreshold = 0;
|
||||
|
||||
// Set to 0 to disable; min: 128M
|
||||
public static final long MIN_EXTERNAL_AGG_BYTES_THRESHOLD = 134217728;
|
||||
@VariableMgr.VarAttr(name = EXTERNAL_AGG_BYTES_THRESHOLD,
|
||||
checker = "checkExternalAggBytesThreshold", fuzzy = true)
|
||||
public long externalAggBytesThreshold = 0;
|
||||
|
||||
public static final int MIN_EXTERNAL_AGG_PARTITION_BITS = 4;
|
||||
public static final int MAX_EXTERNAL_AGG_PARTITION_BITS = 20;
|
||||
@VariableMgr.VarAttr(name = EXTERNAL_AGG_PARTITION_BITS,
|
||||
checker = "checkExternalAggPartitionBits", fuzzy = true)
|
||||
public int externalAggPartitionBits = 8; // means that the hash table will be partitioned into 256 blocks.
|
||||
|
||||
public boolean isEnableJoinSpill() {
|
||||
return enableJoinSpill;
|
||||
}
|
||||
|
||||
public void setEnableJoinSpill(boolean enableJoinSpill) {
|
||||
this.enableJoinSpill = enableJoinSpill;
|
||||
}
|
||||
|
||||
public boolean isEnableSortSpill() {
|
||||
return enableSortSpill;
|
||||
}
|
||||
|
||||
// If this fe is in fuzzy mode, then will use initFuzzyModeVariables to generate some variables,
|
||||
// not the default value set in the code.
|
||||
@SuppressWarnings("checkstyle:Indentation")
|
||||
@ -2939,7 +2984,7 @@ public class SessionVariable implements Serializable, Writable {
|
||||
|
||||
tResult.setRepeatMaxNum(repeatMaxNum);
|
||||
|
||||
tResult.setExternalSortBytesThreshold(0); // disable for now
|
||||
tResult.setExternalSortBytesThreshold(externalSortBytesThreshold);
|
||||
|
||||
tResult.setExternalAggBytesThreshold(0); // disable for now
|
||||
|
||||
@ -2979,7 +3024,10 @@ public class SessionVariable implements Serializable, Writable {
|
||||
tResult.setParallelScanMaxScannersCount(parallelScanMaxScannersCount);
|
||||
tResult.setParallelScanMinRowsPerScanner(parallelScanMinRowsPerScanner);
|
||||
tResult.setSkipBadTablet(skipBadTablet);
|
||||
|
||||
tResult.setEnableJoinSpill(enableJoinSpill);
|
||||
tResult.setEnableSortSpill(enableSortSpill);
|
||||
tResult.setEnableAggSpill(enableAggSpill);
|
||||
tResult.setMinRevocableMem(minRevocableMem);
|
||||
return tResult;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user