From 62a6b132be25870379f616ea965b3fa80d5c3969 Mon Sep 17 00:00:00 2001 From: zhiqqqq Date: Tue, 10 Oct 2023 07:25:58 -0500 Subject: [PATCH] [Fix](func numbers) Remove backend_nums argument of numbers function (#25200) --- .../exec/data_gen_functions/vnumbers_tvf.cpp | 3 ++ .../sql-functions/table-functions/numbers.md | 4 +-- .../sql-functions/table-functions/numbers.md | 4 +-- .../apache/doris/planner/DataGenScanNode.java | 10 ++++++ .../java/org/apache/doris/qe/Coordinator.java | 15 ++++++--- .../NumbersTableValuedFunction.java | 32 ++++++------------- .../http_rest_api/post/test_query_stmt.groovy | 2 +- .../aggregate_strategies.groovy | 8 ++--- .../suites/nereids_syntax_p0/function.groovy | 4 +-- .../aggregate_strategies.groovy | 8 ++--- 10 files changed, 46 insertions(+), 44 deletions(-) diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp index 278e974332..2ac6c0fca4 100644 --- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp +++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp @@ -80,6 +80,9 @@ Status VNumbersTVF::get_next(RuntimeState* state, vectorized::Block* block, bool } Status VNumbersTVF::set_scan_ranges(const std::vector& scan_range_params) { + // Currently we do not support multi-threads numbers function, so there is no need to + // use more than one scan_range_param. + DCHECK(scan_range_params.size() == 1); _total_numbers = scan_range_params[0].scan_range.data_gen_scan_range.numbers_params.totalNumbers; return Status::OK(); diff --git a/docs/en/docs/sql-manual/sql-functions/table-functions/numbers.md b/docs/en/docs/sql-manual/sql-functions/table-functions/numbers.md index bade7391be..916585255c 100644 --- a/docs/en/docs/sql-manual/sql-functions/table-functions/numbers.md +++ b/docs/en/docs/sql-manual/sql-functions/table-functions/numbers.md @@ -36,14 +36,12 @@ This function is used in FROM clauses. ```sql numbers( - "number" = "n", - "backend_num" = "m" + "number" = "n" ); ``` parameter: - `number`: It means to generate rows [0, n). -- `backend_num`: Optional parameters. It means this function is executed simultaneously on `m` be nodes (multiple BEs need to be deployed). ### example ``` diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/numbers.md b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/numbers.md index 4c2e9d93f9..1223236b11 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/numbers.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/numbers.md @@ -35,14 +35,12 @@ under the License. #### syntax ```sql numbers( - "number" = "n", - "backend_num" = "m" + "number" = "n" ); ``` 参数: - `number`: 代表生成[0,n)的行。 -- `backend_num`: 可选参数,代表`m`个be节点同时执行该函数(需要部署多个be)。 ### example ``` diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/DataGenScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/DataGenScanNode.java index 46af5ec1ae..d00641d135 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/DataGenScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/DataGenScanNode.java @@ -107,4 +107,14 @@ public class DataGenScanNode extends ExternalScanNode { public boolean needToCheckColumnPriv() { return false; } + + // Currently DataGenScanNode is only used by DataGenTableValuedFunction, which is + // inherited by NumbersTableValuedFunction. + // NumbersTableValuedFunction is not a complete implementation for now, since its + // function signature do not support us to split total numbers, so it can not be executed + // by multi-processes or multi-threads. So we assign instance number to 1. + @Override + public int getNumInstances() { + return 1; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index 7cf4fdafa5..c6ea704aa7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -703,8 +703,9 @@ public class Coordinator implements CoordInterface { // else use exec_plan_fragments directly. // we choose #fragments >=2 because in some cases // we need ensure that A fragment is already prepared to receive data before B fragment sends data. - // For example: select * from numbers("10","w") will generate ExchangeNode and TableValuedFunctionScanNode, - // we should ensure TableValuedFunctionScanNode does not send data until ExchangeNode is ready to receive. + // For example: select * from numbers("number"="10") will generate ExchangeNode and + // TableValuedFunctionScanNode, we should ensure TableValuedFunctionScanNode does + // not send data until ExchangeNode is ready to receive. boolean twoPhaseExecution = fragments.size() >= 2; for (PlanFragment fragment : fragments) { FragmentExecParams params = fragmentExecParamsMap.get(fragment.getFragmentId()); @@ -825,8 +826,9 @@ public class Coordinator implements CoordInterface { // else use exec_plan_fragments directly. // we choose #fragments >=2 because in some cases // we need ensure that A fragment is already prepared to receive data before B fragment sends data. - // For example: select * from numbers("10","w") will generate ExchangeNode and TableValuedFunctionScanNode, - // we should ensure TableValuedFunctionScanNode does not send data until ExchangeNode is ready to receive. + // For example: select * from numbers("number"="10") will generate ExchangeNode and + // TableValuedFunctionScanNode, we should ensure TableValuedFunctionScanNode does not + // send data until ExchangeNode is ready to receive. boolean twoPhaseExecution = fragments.size() >= 2; for (PlanFragment fragment : fragments) { FragmentExecParams params = fragmentExecParamsMap.get(fragment.getFragmentId()); @@ -2315,6 +2317,11 @@ public class Coordinator implements CoordInterface { FragmentScanRangeAssignment assignment, Map assignedBytesPerHost, Map replicaNumPerHost) throws Exception { + // Type of locations is List, it could have elements that have same "location" + // and we do have this situation for some scan node. + // The duplicate "location" will NOT be filtered by FragmentScanRangeAssignment, + // since FragmentScanRangeAssignment use List as its value type, + // duplicate "locations" will be converted to list. for (TScanRangeLocations scanRangeLocations : locations) { Reference backendIdRef = new Reference(); TScanRangeLocation minLocation = selectBackendsByRoundRobin(scanRangeLocations, diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/NumbersTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/NumbersTableValuedFunction.java index 639dfeef35..3c0b578b50 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/NumbersTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/NumbersTableValuedFunction.java @@ -41,20 +41,16 @@ import java.util.Map; // have a single column number /** - * The Implement of table valued function——numbers("number" = "N", "backend_num" = "M"). + * The Implement of table valued function——numbers("number" = "N"). */ public class NumbersTableValuedFunction extends DataGenTableValuedFunction { public static final String NAME = "numbers"; public static final String NUMBER = "number"; - public static final String BACKEND_NUM = "backend_num"; private static final ImmutableSet PROPERTIES_SET = new ImmutableSet.Builder() .add(NUMBER) - .add(BACKEND_NUM) .build(); // The total numbers will be generated. private long totalNumbers; - // The total backends will server it. - private int tabletsNum; /** * Constructor. @@ -70,11 +66,6 @@ public class NumbersTableValuedFunction extends DataGenTableValuedFunction { validParams.put(key.toLowerCase(), params.get(key)); } - try { - tabletsNum = Integer.parseInt(validParams.getOrDefault(BACKEND_NUM, "1")); - } catch (NumberFormatException e) { - throw new AnalysisException("can not parse `backend_num` param to natural number"); - } String numberStr = validParams.get(NUMBER); if (!Strings.isNullOrEmpty(numberStr)) { try { @@ -92,10 +83,6 @@ public class NumbersTableValuedFunction extends DataGenTableValuedFunction { return totalNumbers; } - public int getTabletsNum() { - return tabletsNum; - } - @Override public TDataGenFunctionName getDataGenFunctionName() { return TDataGenFunctionName.NUMBERS; @@ -124,17 +111,16 @@ public class NumbersTableValuedFunction extends DataGenTableValuedFunction { if (backendList.isEmpty()) { throw new AnalysisException("No Alive backends"); } + Collections.shuffle(backendList); List res = Lists.newArrayList(); - for (int i = 0; i < tabletsNum; ++i) { - TScanRange scanRange = new TScanRange(); - TDataGenScanRange dataGenScanRange = new TDataGenScanRange(); - TTVFNumbersScanRange tvfNumbersScanRange = new TTVFNumbersScanRange(); - tvfNumbersScanRange.setTotalNumbers(totalNumbers); - dataGenScanRange.setNumbersParams(tvfNumbersScanRange); - scanRange.setDataGenScanRange(dataGenScanRange); - res.add(new TableValuedFunctionTask(backendList.get(i % backendList.size()), scanRange)); - } + TScanRange scanRange = new TScanRange(); + TDataGenScanRange dataGenScanRange = new TDataGenScanRange(); + TTVFNumbersScanRange tvfNumbersScanRange = new TTVFNumbersScanRange(); + tvfNumbersScanRange.setTotalNumbers(totalNumbers); + dataGenScanRange.setNumbersParams(tvfNumbersScanRange); + scanRange.setDataGenScanRange(dataGenScanRange); + res.add(new TableValuedFunctionTask(backendList.get(0), scanRange)); return res; } } diff --git a/regression-test/suites/http_rest_api/post/test_query_stmt.groovy b/regression-test/suites/http_rest_api/post/test_query_stmt.groovy index 7a03cdc48b..bcf6e23984 100644 --- a/regression-test/suites/http_rest_api/post/test_query_stmt.groovy +++ b/regression-test/suites/http_rest_api/post/test_query_stmt.groovy @@ -49,7 +49,7 @@ suite("test_query_stmt") { def url= "/api/query/default_cluster/" + context.config.defaultDb // test select - def stmt1 = """ select * from numbers('number' = '10', 'backend_num' = '1') """ + def stmt1 = """ select * from numbers('number' = '10') """ def stmt1_json = JsonOutput.toJson(new Stmt(stmt: stmt1)); def resJson = http_post(url, stmt1_json) diff --git a/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy b/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy index ea63b5b789..e2f5d8e0cf 100644 --- a/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy +++ b/regression-test/suites/nereids_syntax_p0/aggregate_strategies.groovy @@ -170,7 +170,7 @@ suite("aggregate_strategies") { sql """select /*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_WITH_DISTINCT')*/ count(distinct number) - from numbers('number' = '10000', 'backend_num'='10')""" + from numbers('number' = '10000')""" result([[10000L]]) } @@ -178,7 +178,7 @@ suite("aggregate_strategies") { sql """select /*+SET_VAR(disable_nereids_rules='THREE_PHASE_AGGREGATE_WITH_DISTINCT')*/ count(distinct number) - from numbers('number' = '10000', 'backend_num'='10')""" + from numbers('number' = '10000')""" result([[10000L]]) } @@ -186,7 +186,7 @@ suite("aggregate_strategies") { sql """select /*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_WITH_DISTINCT')*/ count(distinct number) - from numbers('number' = '10000', 'backend_num'='1')""" + from numbers('number' = '10000')""" result([[10000L]]) } @@ -194,7 +194,7 @@ suite("aggregate_strategies") { sql """select /*+SET_VAR(disable_nereids_rules='THREE_PHASE_AGGREGATE_WITH_DISTINCT')*/ count(distinct number) - from numbers('number' = '10000', 'backend_num'='1')""" + from numbers('number' = '10000')""" result([[10000L]]) } diff --git a/regression-test/suites/nereids_syntax_p0/function.groovy b/regression-test/suites/nereids_syntax_p0/function.groovy index e79d6dfb7a..3fbb8e55cb 100644 --- a/regression-test/suites/nereids_syntax_p0/function.groovy +++ b/regression-test/suites/nereids_syntax_p0/function.groovy @@ -70,7 +70,7 @@ suite("nereids_function") { // numbers: table valued function test { - sql "select `number` from numbers(number = 10, backend_num = 1)" + sql "select `number` from numbers(number = 10)" result([[0L], [1L], [2L], [3L], [4L], [5L], [6L], [7L], [8L], [9L]]) } @@ -91,7 +91,7 @@ suite("nereids_function") { qt_subquery3 """ select a.number from numbers("number" = "10") a where number in (select number from numbers("number" = "10") b where a.number=b.number); """ test { - sql """select `number` from numbers("number" = -1, 'backend_num' = `1`)""" + sql """select `number` from numbers("number" = "-1")""" result([]) } diff --git a/regression-test/suites/nereids_syntax_p2/aggregate_strategies.groovy b/regression-test/suites/nereids_syntax_p2/aggregate_strategies.groovy index cebd90a955..e2ad64f688 100644 --- a/regression-test/suites/nereids_syntax_p2/aggregate_strategies.groovy +++ b/regression-test/suites/nereids_syntax_p2/aggregate_strategies.groovy @@ -20,7 +20,7 @@ suite("aggregate_strategies") { sql """select /*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_WITH_DISTINCT')*/ count(distinct number) - from numbers('number' = '10000000', 'backend_num'='10')""" + from numbers('number' = '10000000')""" result([[10000000L]]) } @@ -28,7 +28,7 @@ suite("aggregate_strategies") { sql """select /*+SET_VAR(disable_nereids_rules='THREE_PHASE_AGGREGATE_WITH_DISTINCT')*/ count(distinct number) - from numbers('number' = '10000000', 'backend_num'='10')""" + from numbers('number' = '10000000')""" result([[10000000L]]) } @@ -36,7 +36,7 @@ suite("aggregate_strategies") { sql """select /*+SET_VAR(disable_nereids_rules='TWO_PHASE_AGGREGATE_WITH_DISTINCT')*/ count(distinct number) - from numbers('number' = '10000000', 'backend_num'='1')""" + from numbers('number' = '10000000')""" result([[10000000L]]) } @@ -44,7 +44,7 @@ suite("aggregate_strategies") { sql """select /*+SET_VAR(disable_nereids_rules='THREE_PHASE_AGGREGATE_WITH_DISTINCT')*/ count(distinct number) - from numbers('number' = '10000000', 'backend_num'='1')""" + from numbers('number' = '10000000')""" result([[10000000L]]) } }