[feature](spill) Implement spill to disk for hash join, aggregation and sort for pipelineX (#31910)

Co-authored-by: Jerry Hu <mrhhsg@gmail.com>
This commit is contained in:
TengJianPing
2024-03-11 09:36:25 +08:00
committed by yiguolei
parent cf6b22c621
commit 3358f76a7f
67 changed files with 5045 additions and 718 deletions

View File

@ -364,10 +364,6 @@ public class SessionVariable implements Serializable, Writable {
public static final String GROUP_CONCAT_MAX_LEN = "group_concat_max_len";
public static final String EXTERNAL_SORT_BYTES_THRESHOLD = "external_sort_bytes_threshold";
public static final String EXTERNAL_AGG_BYTES_THRESHOLD = "external_agg_bytes_threshold";
public static final String EXTERNAL_AGG_PARTITION_BITS = "external_agg_partition_bits";
public static final String ENABLE_TWO_PHASE_READ_OPT = "enable_two_phase_read_opt";
public static final String TOPN_OPT_LIMIT_THRESHOLD = "topn_opt_limit_threshold";
@ -481,6 +477,15 @@ public class SessionVariable implements Serializable, Writable {
public static final String HUGE_TABLE_DEFAULT_SAMPLE_ROWS = "huge_table_default_sample_rows";
public static final String HUGE_TABLE_LOWER_BOUND_SIZE_IN_BYTES = "huge_table_lower_bound_size_in_bytes";
// for spill to disk
public static final String EXTERNAL_SORT_BYTES_THRESHOLD = "external_sort_bytes_threshold";
public static final String EXTERNAL_AGG_BYTES_THRESHOLD = "external_agg_bytes_threshold";
public static final String EXTERNAL_AGG_PARTITION_BITS = "external_agg_partition_bits";
public static final String MIN_REVOCABLE_MEM = "min_revocable_mem";
public static final String ENABLE_JOIN_SPILL = "enable_join_spill";
public static final String ENABLE_SORT_SPILL = "enable_sort_spill";
public static final String ENABLE_AGG_SPILL = "enable_agg_spill";
public static final String GENERATE_STATS_FACTOR = "generate_stats_factor";
public static final String HUGE_TABLE_AUTO_ANALYZE_INTERVAL_IN_MILLIS
@ -1270,25 +1275,6 @@ public class SessionVariable implements Serializable, Writable {
@VariableMgr.VarAttr(name = GROUP_CONCAT_MAX_LEN)
public long groupConcatMaxLen = 2147483646;
// If the memory consumption of sort node exceed this limit, will trigger spill to disk;
// Set to 0 to disable; min: 128M
public static final long MIN_EXTERNAL_SORT_BYTES_THRESHOLD = 134217728;
@VariableMgr.VarAttr(name = EXTERNAL_SORT_BYTES_THRESHOLD,
checker = "checkExternalSortBytesThreshold", fuzzy = true)
public long externalSortBytesThreshold = 0;
// Set to 0 to disable; min: 128M
public static final long MIN_EXTERNAL_AGG_BYTES_THRESHOLD = 134217728;
@VariableMgr.VarAttr(name = EXTERNAL_AGG_BYTES_THRESHOLD,
checker = "checkExternalAggBytesThreshold", fuzzy = true)
public long externalAggBytesThreshold = 0;
public static final int MIN_EXTERNAL_AGG_PARTITION_BITS = 4;
public static final int MAX_EXTERNAL_AGG_PARTITION_BITS = 8;
@VariableMgr.VarAttr(name = EXTERNAL_AGG_PARTITION_BITS,
checker = "checkExternalAggPartitionBits", fuzzy = true)
public int externalAggPartitionBits = 8; // means that the hash table will be partitioned into 256 blocks.
// Whether enable two phase read optimization
// 1. read related rowids along with necessary column data
// 2. spawn fetch RPC to other nodes to get related data by sorted rowids
@ -1664,6 +1650,65 @@ public class SessionVariable implements Serializable, Writable {
public boolean disableEmptyPartitionPrune = false;
// CLOUD_VARIABLES_END
// for spill to disk
@VariableMgr.VarAttr(name = MIN_REVOCABLE_MEM, fuzzy = true)
public long minRevocableMem = 32 * 1024 * 1024;
@VariableMgr.VarAttr(
name = ENABLE_JOIN_SPILL,
description = {"控制是否启用join算子落盘。默认为 false。",
"Controls whether to enable spill to disk of join operation. "
+ "The default value is false."},
needForward = true)
public boolean enableJoinSpill = false;
@VariableMgr.VarAttr(
name = ENABLE_SORT_SPILL,
description = {"控制是否启用排序算子落盘。默认为 false。",
"Controls whether to enable spill to disk of sort operation. "
+ "The default value is false."},
needForward = true)
public boolean enableSortSpill = false;
@VariableMgr.VarAttr(
name = ENABLE_AGG_SPILL,
description = {"控制是否启用聚合算子落盘。默认为 false。",
"Controls whether to enable spill to disk of aggregation operation. "
+ "The default value is false."},
needForward = true)
public boolean enableAggSpill = false;
// If the memory consumption of sort node exceed this limit, will trigger spill to disk;
// Set to 0 to disable; min: 128M
public static final long MIN_EXTERNAL_SORT_BYTES_THRESHOLD = 2097152;
@VariableMgr.VarAttr(name = EXTERNAL_SORT_BYTES_THRESHOLD,
checker = "checkExternalSortBytesThreshold", fuzzy = true)
public long externalSortBytesThreshold = 0;
// Set to 0 to disable; min: 128M
public static final long MIN_EXTERNAL_AGG_BYTES_THRESHOLD = 134217728;
@VariableMgr.VarAttr(name = EXTERNAL_AGG_BYTES_THRESHOLD,
checker = "checkExternalAggBytesThreshold", fuzzy = true)
public long externalAggBytesThreshold = 0;
public static final int MIN_EXTERNAL_AGG_PARTITION_BITS = 4;
public static final int MAX_EXTERNAL_AGG_PARTITION_BITS = 20;
@VariableMgr.VarAttr(name = EXTERNAL_AGG_PARTITION_BITS,
checker = "checkExternalAggPartitionBits", fuzzy = true)
public int externalAggPartitionBits = 8; // means that the hash table will be partitioned into 256 blocks.
public boolean isEnableJoinSpill() {
return enableJoinSpill;
}
public void setEnableJoinSpill(boolean enableJoinSpill) {
this.enableJoinSpill = enableJoinSpill;
}
public boolean isEnableSortSpill() {
return enableSortSpill;
}
// If this fe is in fuzzy mode, then will use initFuzzyModeVariables to generate some variables,
// not the default value set in the code.
@SuppressWarnings("checkstyle:Indentation")
@ -2939,7 +2984,7 @@ public class SessionVariable implements Serializable, Writable {
tResult.setRepeatMaxNum(repeatMaxNum);
tResult.setExternalSortBytesThreshold(0); // disable for now
tResult.setExternalSortBytesThreshold(externalSortBytesThreshold);
tResult.setExternalAggBytesThreshold(0); // disable for now
@ -2979,7 +3024,10 @@ public class SessionVariable implements Serializable, Writable {
tResult.setParallelScanMaxScannersCount(parallelScanMaxScannersCount);
tResult.setParallelScanMinRowsPerScanner(parallelScanMinRowsPerScanner);
tResult.setSkipBadTablet(skipBadTablet);
tResult.setEnableJoinSpill(enableJoinSpill);
tResult.setEnableSortSpill(enableSortSpill);
tResult.setEnableAggSpill(enableAggSpill);
tResult.setMinRevocableMem(minRevocableMem);
return tResult;
}