From b06794d6196299cb80ecbf56393b5571d400c780 Mon Sep 17 00:00:00 2001 From: TengJianPing <18241664+jacktengg@users.noreply.github.com> Date: Wed, 29 May 2024 09:57:31 +0800 Subject: [PATCH] [opt](spill) add session variable of 'enable_force_spill' (#34664) (#35561) ## Proposed changes pick #34664 --- .../pipeline/pipeline_x/pipeline_x_task.cpp | 7 + be/src/runtime/runtime_state.h | 13 +- .../org/apache/doris/qe/SessionVariable.java | 13 +- gensrc/thrift/PaloInternalService.thrift | 2 + .../data/tpcds_sf1_p1/spill_test/q23.out | 10 + .../suites/tpcds_sf1_p1/spill_test/q23.groovy | 220 ++++++++++++++++++ 6 files changed, 261 insertions(+), 4 deletions(-) create mode 100644 regression-test/data/tpcds_sf1_p1/spill_test/q23.out create mode 100644 regression-test/suites/tpcds_sf1_p1/spill_test/q23.groovy diff --git a/be/src/pipeline/pipeline_x/pipeline_x_task.cpp b/be/src/pipeline/pipeline_x/pipeline_x_task.cpp index 2f1abf4790..4e54ed99ed 100644 --- a/be/src/pipeline/pipeline_x/pipeline_x_task.cpp +++ b/be/src/pipeline/pipeline_x/pipeline_x_task.cpp @@ -334,6 +334,13 @@ bool PipelineXTask::should_revoke_memory(RuntimeState* state, int64_t revocable_ return false; } const auto min_revocable_mem_bytes = state->min_revocable_mem(); + + if (UNLIKELY(state->enable_force_spill())) { + if (revocable_mem_bytes >= min_revocable_mem_bytes) { + LOG_ONCE(INFO) << "spill force, query: " << print_id(state->query_id()); + return true; + } + } bool is_wg_mem_low_water_mark = false; bool is_wg_mem_high_water_mark = false; wg->check_mem_used(&is_wg_mem_low_water_mark, &is_wg_mem_high_water_mark); diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index 9f483fdc26..3e1cda7f37 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -599,15 +599,22 @@ public: bool is_nereids() const; bool enable_join_spill() const { - return _query_options.__isset.enable_join_spill && _query_options.enable_join_spill; + return (_query_options.__isset.enable_force_spill && _query_options.enable_force_spill) || + (_query_options.__isset.enable_join_spill && _query_options.enable_join_spill); } bool enable_sort_spill() const { - return _query_options.__isset.enable_sort_spill && _query_options.enable_sort_spill; + return (_query_options.__isset.enable_force_spill && _query_options.enable_force_spill) || + (_query_options.__isset.enable_sort_spill && _query_options.enable_sort_spill); } bool enable_agg_spill() const { - return _query_options.__isset.enable_agg_spill && _query_options.enable_agg_spill; + return (_query_options.__isset.enable_force_spill && _query_options.enable_force_spill) || + (_query_options.__isset.enable_agg_spill && _query_options.enable_agg_spill); + } + + bool enable_force_spill() const { + return _query_options.__isset.enable_force_spill && _query_options.enable_force_spill; } int64_t min_revocable_mem() const { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index d826af50a7..7277ef7a40 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -501,6 +501,7 @@ public class SessionVariable implements Serializable, Writable { public static final String ENABLE_JOIN_SPILL = "enable_join_spill"; public static final String ENABLE_SORT_SPILL = "enable_sort_spill"; public static final String ENABLE_AGG_SPILL = "enable_agg_spill"; + public static final String ENABLE_FORCE_SPILL = "enable_force_spill"; public static final String DATA_QUEUE_MAX_BLOCKS = "data_queue_max_blocks"; public static final String GENERATE_STATS_FACTOR = "generate_stats_factor"; @@ -1802,6 +1803,15 @@ public class SessionVariable implements Serializable, Writable { needForward = true, fuzzy = true) public boolean enableAggSpill = false; + @VariableMgr.VarAttr( + name = ENABLE_FORCE_SPILL, + description = {"控制是否开启强制落盘(即使在内存足够的情况),默认为 false。", + "Controls whether enable force spill." + }, + needForward = true, fuzzy = true + ) + public boolean enableForceSpill = false; + @VariableMgr.VarAttr( name = DATA_QUEUE_MAX_BLOCKS, description = {"DataQueue 中每个子队列允许最大的 block 个数", @@ -1985,7 +1995,7 @@ public class SessionVariable implements Serializable, Writable { this.minRevocableMem = 1024 * 1024; break; default: - this.minRevocableMem = 100 * 1024 * 1024 * 1024; + this.minRevocableMem = 100L * 1024 * 1024 * 1024; break; } } else { @@ -3263,6 +3273,7 @@ public class SessionVariable implements Serializable, Writable { tResult.setEnableJoinSpill(enableJoinSpill); tResult.setEnableSortSpill(enableSortSpill); tResult.setEnableAggSpill(enableAggSpill); + tResult.setEnableForceSpill(enableForceSpill); tResult.setMinRevocableMem(minRevocableMem); tResult.setDataQueueMaxBlocks(dataQueueMaxBlocks); diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index 78790260b5..54c6c57bdd 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -293,6 +293,8 @@ struct TQueryOptions { 111: optional bool enable_orc_filter_by_min_max = true 112: optional i32 max_column_reader_num = 0 + + 113: optional bool enable_force_spill = false; // For cloud, to control if the content would be written into file cache 1000: optional bool disable_file_cache = false diff --git a/regression-test/data/tpcds_sf1_p1/spill_test/q23.out b/regression-test/data/tpcds_sf1_p1/spill_test/q23.out new file mode 100644 index 0000000000..bfe03c337f --- /dev/null +++ b/regression-test/data/tpcds_sf1_p1/spill_test/q23.out @@ -0,0 +1,10 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select1 -- +17030.91 + +-- !select2 -- + Robert 598.86 +Brown Monika 6031.52 +Collins Gordon 727.57 +Green Jesse 9672.96 + diff --git a/regression-test/suites/tpcds_sf1_p1/spill_test/q23.groovy b/regression-test/suites/tpcds_sf1_p1/spill_test/q23.groovy new file mode 100644 index 0000000000..cc5408d6b8 --- /dev/null +++ b/regression-test/suites/tpcds_sf1_p1/spill_test/q23.groovy @@ -0,0 +1,220 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("q23") { + sql """ set enable_force_spill =true; """ + sql """ set min_revocable_mem = 65536; """ + sql """ use regression_test_tpcds_sf1_p1; """ + + qt_select1 """ + WITH + frequent_ss_items AS ( + SELECT + substr(i_item_desc, 1, 30) itemdesc + , i_item_sk item_sk + , d_date solddate + , count(*) cnt + FROM + store_sales + , date_dim + , item + WHERE (ss_sold_date_sk = d_date_sk) + AND (ss_item_sk = i_item_sk) + AND (d_year IN (2000 , (2000 + 1) , (2000 + 2) , (2000 + 3))) + GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date + HAVING (count(*) > 4) + ) + , max_store_sales AS ( + SELECT max(csales) tpcds_cmax + FROM + ( + SELECT + c_customer_sk + , sum((ss_quantity * ss_sales_price)) csales + FROM + store_sales + , customer + , date_dim + WHERE (ss_customer_sk = c_customer_sk) + AND (ss_sold_date_sk = d_date_sk) + AND (d_year IN (2000 , (2000 + 1) , (2000 + 2) , (2000 + 3))) + GROUP BY c_customer_sk + ) x + ) + , best_ss_customer AS ( + SELECT + c_customer_sk + , sum((ss_quantity * ss_sales_price)) ssales + FROM + store_sales + , customer + WHERE (ss_customer_sk = c_customer_sk) + GROUP BY c_customer_sk + HAVING (sum((ss_quantity * ss_sales_price)) > ((50 / CAST('100.0' AS DECIMAL(5,2))) * ( + SELECT * + FROM + max_store_sales + ))) + ) + SELECT sum(sales) + FROM + ( + SELECT (cs_quantity * cs_list_price) sales + FROM + catalog_sales + , date_dim + WHERE (d_year = 2000) + AND (d_moy = 2) + AND (cs_sold_date_sk = d_date_sk) + AND (cs_item_sk IN ( + SELECT item_sk + FROM + frequent_ss_items + )) + AND (cs_bill_customer_sk IN ( + SELECT c_customer_sk + FROM + best_ss_customer + )) + UNION ALL SELECT (ws_quantity * ws_list_price) sales + FROM + web_sales + , date_dim + WHERE (d_year = 2000) + AND (d_moy = 2) + AND (ws_sold_date_sk = d_date_sk) + AND (ws_item_sk IN ( + SELECT item_sk + FROM + frequent_ss_items + )) + AND (ws_bill_customer_sk IN ( + SELECT c_customer_sk + FROM + best_ss_customer + )) + ) y + LIMIT 100 + """ + + qt_select2 """ + WITH + frequent_ss_items AS ( + SELECT + substr(i_item_desc, 1, 30) itemdesc + , i_item_sk item_sk + , d_date solddate + , count(*) cnt + FROM + store_sales + , date_dim + , item + WHERE (ss_sold_date_sk = d_date_sk) + AND (ss_item_sk = i_item_sk) + AND (d_year IN (2000 , (2000 + 1) , (2000 + 2) , (2000 + 3))) + GROUP BY substr(i_item_desc, 1, 30), i_item_sk, d_date + HAVING (count(*) > 4) + ) + , max_store_sales AS ( + SELECT max(csales) tpcds_cmax + FROM + ( + SELECT + c_customer_sk + , sum((ss_quantity * ss_sales_price)) csales + FROM + store_sales + , customer + , date_dim + WHERE (ss_customer_sk = c_customer_sk) + AND (ss_sold_date_sk = d_date_sk) + AND (d_year IN (2000 , (2000 + 1) , (2000 + 2) , (2000 + 3))) + GROUP BY c_customer_sk + ) x + ) + , best_ss_customer AS ( + SELECT + c_customer_sk + , sum((ss_quantity * ss_sales_price)) ssales + FROM + store_sales + , customer + WHERE (ss_customer_sk = c_customer_sk) + GROUP BY c_customer_sk + HAVING (sum((ss_quantity * ss_sales_price)) > ((50 / CAST('100.0' AS DECIMAL(5,2))) * ( + SELECT * + FROM + max_store_sales + ))) + ) + SELECT + c_last_name + , c_first_name + , sales + FROM + ( + SELECT + c_last_name + , c_first_name + , sum((cs_quantity * cs_list_price)) sales + FROM + catalog_sales + , customer + , date_dim + WHERE (d_year = 2000) + AND (d_moy = 2) + AND (cs_sold_date_sk = d_date_sk) + AND (cs_item_sk IN ( + SELECT item_sk + FROM + frequent_ss_items + )) + AND (cs_bill_customer_sk IN ( + SELECT c_customer_sk + FROM + best_ss_customer + )) + AND (cs_bill_customer_sk = c_customer_sk) + GROUP BY c_last_name, c_first_name + UNION ALL SELECT + c_last_name + , c_first_name + , sum((ws_quantity * ws_list_price)) sales + FROM + web_sales + , customer + , date_dim + WHERE (d_year = 2000) + AND (d_moy = 2) + AND (ws_sold_date_sk = d_date_sk) + AND (ws_item_sk IN ( + SELECT item_sk + FROM + frequent_ss_items + )) + AND (ws_bill_customer_sk IN ( + SELECT c_customer_sk + FROM + best_ss_customer + )) + AND (ws_bill_customer_sk = c_customer_sk) + GROUP BY c_last_name, c_first_name + ) z + ORDER BY c_last_name ASC, c_first_name ASC, sales ASC + LIMIT 100 + """ +} \ No newline at end of file