From 81a0f8c041e06ab4f48ef813464b42c8d6f25dca Mon Sep 17 00:00:00 2001 From: zclllyybb Date: Thu, 7 Dec 2023 22:26:43 +0800 Subject: [PATCH] [Feature](function) support generating const values from tvf numbers (#28051) If specified, got a column of constant. otherwise an incremental series like it always be. mysql> select * from numbers("number" = "5", "const_value" = "-123"); +--------+ | number | +--------+ | -123 | | -123 | | -123 | | -123 | | -123 | +--------+ 5 rows in set (0.11 sec) --- be/src/vec/columns/column_vector.h | 6 +++ .../exec/data_gen_functions/vnumbers_tvf.cpp | 11 +++- .../exec/data_gen_functions/vnumbers_tvf.h | 9 ++-- .../sql-functions/table-functions/numbers.md | 31 ++++++----- .../sql-functions/table-functions/numbers.md | 30 +++++++---- .../expressions/functions/table/Numbers.java | 24 +++++---- .../NumbersTableValuedFunction.java | 53 ++++++++++++------- gensrc/thrift/PlanNodes.thrift | 4 +- .../external_table_p0/tvf/test_numbers.out | 19 +++++++ .../external_table_p0/tvf/test_numbers.groovy | 9 +++- 10 files changed, 136 insertions(+), 60 deletions(-) diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index 5319902758..772162bc87 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -195,6 +195,12 @@ public: memcpy(data.data() + old_size, data_ptr, num * sizeof(T)); } + void insert_raw_integers(T val, size_t n) { + auto old_size = data.size(); + data.resize(old_size + n); + std::fill(data.data() + old_size, data.data() + old_size + n, val); + } + void insert_range_of_integer(T begin, T end) { auto old_size = data.size(); data.resize(old_size + (end - begin)); diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp index 4c37dc3a35..480a22d694 100644 --- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp +++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp @@ -37,6 +37,9 @@ namespace doris::vectorized { +const static std::string NUMBER = std::string {"number"}; +const static std::string ZERO = std::string {"zero"}; + VNumbersTVF::VNumbersTVF(TupleId tuple_id, const TupleDescriptor* tuple_desc) : VDataGenFunctionInf(tuple_id, tuple_desc) {} @@ -61,7 +64,11 @@ Status VNumbersTVF::get_next(RuntimeState* state, vectorized::Block* block, bool } auto* column_res = assert_cast(columns[i].get()); //BIGINT int64_t end_value = std::min((int64_t)(_next_number + batch_size), _total_numbers); - column_res->insert_range_of_integer(_next_number, end_value); + if (_use_const) { + column_res->insert_raw_integers(_const_value, end_value - _next_number); + } else { + column_res->insert_range_of_integer(_next_number, end_value); + } if (end_value == _total_numbers) { *eos = true; } else { @@ -86,6 +93,8 @@ Status VNumbersTVF::set_scan_ranges(const std::vector& scan_ra // Currently we do not support multi-threads numbers function, so there is no need to // use more than one scan_range_param. DCHECK(scan_range_params.size() == 1); + _use_const = scan_range_params[0].scan_range.data_gen_scan_range.numbers_params.useConst; + _const_value = scan_range_params[0].scan_range.data_gen_scan_range.numbers_params.constValue; _total_numbers = scan_range_params[0].scan_range.data_gen_scan_range.numbers_params.totalNumbers; return Status::OK(); diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h index 310571ee76..1968637fd3 100644 --- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h +++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.h @@ -17,8 +17,7 @@ #pragma once -#include - +#include #include #include "common/global_types.h" @@ -43,8 +42,10 @@ public: Status set_scan_ranges(const std::vector& scan_ranges) override; -protected: - int64_t _total_numbers; +private: + bool _use_const = false; + int64_t _const_value = 0; + int64_t _total_numbers = 0; // Number of returned columns, actually only 1 column int _slot_num = 1; int64_t _next_number = 0; diff --git a/docs/en/docs/sql-manual/sql-functions/table-functions/numbers.md b/docs/en/docs/sql-manual/sql-functions/table-functions/numbers.md index 916585255c..723d62154a 100644 --- a/docs/en/docs/sql-manual/sql-functions/table-functions/numbers.md +++ b/docs/en/docs/sql-manual/sql-functions/table-functions/numbers.md @@ -28,24 +28,23 @@ under the License. ### description -Table-Value-Function, generate a temporary table with only one column named 'number', row values are [0,n). - -This function is used in FROM clauses. +Table function that generates a temporary table containing only one column with the column name `number` and all element values are `const_value` if `const_value` is specified, otherwise they are [0,`number`) incremented. #### syntax - ```sql numbers( "number" = "n" + <, "const_value" = "x"> ); ``` parameter: -- `number`: It means to generate rows [0, n). +- `number`: Line number. +- `const_value`: the constant value. ### example ``` -mysql> select * from numbers("number" = "10"); +mysql> select * from numbers("number" = "5"); +--------+ | number | +--------+ @@ -54,14 +53,22 @@ mysql> select * from numbers("number" = "10"); | 2 | | 3 | | 4 | -| 5 | -| 6 | -| 7 | -| 8 | -| 9 | +--------+ +5 rows in set (0.11 sec) + +mysql> select * from numbers("number" = "5", "const_value" = "-123"); ++--------+ +| number | ++--------+ +| -123 | +| -123 | +| -123 | +| -123 | +| -123 | ++--------+ +5 rows in set (0.12 sec) ``` ### keywords - numbers \ No newline at end of file + numbers, const_value \ No newline at end of file diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/numbers.md b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/numbers.md index 1223236b11..2215a5f1df 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/numbers.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/table-functions/numbers.md @@ -28,23 +28,23 @@ under the License. ### description -表函数,生成一张只含有一列的临时表,列名为`number`,行的值为[0,n)。 - -该函数用于from子句中。 +表函数,生成一张只含有一列的临时表,列名为`number`,如果指定了`const_value`,则所有元素值均为`const_value`,否则为[0,`number`)递增。 #### syntax ```sql numbers( "number" = "n" + <, "const_value" = "x"> ); ``` 参数: -- `number`: 代表生成[0,n)的行。 +- `number`: 行数。 +- `const_value` : 常量值。 ### example ``` -mysql> select * from numbers("number" = "10"); +mysql> select * from numbers("number" = "5"); +--------+ | number | +--------+ @@ -53,16 +53,24 @@ mysql> select * from numbers("number" = "10"); | 2 | | 3 | | 4 | -| 5 | -| 6 | -| 7 | -| 8 | -| 9 | +--------+ +5 rows in set (0.11 sec) + +mysql> select * from numbers("number" = "5", "const_value" = "-123"); ++--------+ +| number | ++--------+ +| -123 | +| -123 | +| -123 | +| -123 | +| -123 | ++--------+ +5 rows in set (0.12 sec) ``` ### keywords - numbers + numbers, const_value diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java index 305699bc0e..845baa045c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/Numbers.java @@ -65,17 +65,23 @@ public class Numbers extends TableValuedFunction { public Statistics computeStats(List slots) { Preconditions.checkArgument(slots.size() == 1); try { - NumbersTableValuedFunction catalogFunction = (NumbersTableValuedFunction) getCatalogFunction(); - long rowNum = catalogFunction.getTotalNumbers(); + NumbersTableValuedFunction numberTvf = (NumbersTableValuedFunction) getCatalogFunction(); + long rowNum = numberTvf.getTotalNumbers(); Map columnToStatistics = Maps.newHashMap(); - ColumnStatistic columnStat = new ColumnStatisticBuilder() - .setCount(rowNum).setNdv(rowNum).setAvgSizeByte(8).setNumNulls(0).setDataSize(8).setMinValue(0) - .setMaxValue(rowNum - 1) - .setMinExpr(new IntLiteral(0, Type.BIGINT)) - .setMaxExpr(new IntLiteral(rowNum - 1, Type.BIGINT)) - .build(); - columnToStatistics.put(slots.get(0), columnStat); + ColumnStatisticBuilder statBuilder = new ColumnStatisticBuilder() + .setCount(rowNum).setAvgSizeByte(8).setNumNulls(0).setDataSize(8); + if (numberTvf.getUseConst()) { // a column of const value + long value = numberTvf.getConstValue(); + statBuilder = statBuilder.setNdv(1).setMinValue(value).setMaxValue(value) + .setMinExpr(new IntLiteral(value, Type.BIGINT)) + .setMaxExpr(new IntLiteral(value, Type.BIGINT)); + } else { // a column of increasing value + statBuilder = statBuilder.setNdv(rowNum).setMinValue(0).setMaxValue(rowNum - 1) + .setMinExpr(new IntLiteral(0, Type.BIGINT)) + .setMaxExpr(new IntLiteral(rowNum - 1, Type.BIGINT)); + } + columnToStatistics.put(slots.get(0), statBuilder.build()); return new Statistics(rowNum, columnToStatistics); } catch (Exception t) { throw new NereidsException(t.getMessage(), t); diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/NumbersTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/NumbersTableValuedFunction.java index 3c0b578b50..8d4a627043 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/NumbersTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/NumbersTableValuedFunction.java @@ -27,10 +27,8 @@ import org.apache.doris.thrift.TDataGenScanRange; import org.apache.doris.thrift.TScanRange; import org.apache.doris.thrift.TTVFNumbersScanRange; -import com.google.common.base.Strings; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; -import com.google.common.collect.Maps; import java.util.ArrayList; import java.util.Collections; @@ -46,11 +44,15 @@ import java.util.Map; public class NumbersTableValuedFunction extends DataGenTableValuedFunction { public static final String NAME = "numbers"; public static final String NUMBER = "number"; + public static final String CONST_VALUE = "const_value"; private static final ImmutableSet PROPERTIES_SET = new ImmutableSet.Builder() .add(NUMBER) + .add(CONST_VALUE) .build(); // The total numbers will be generated. private long totalNumbers; + private boolean useConst = false; + private long constValue; /** * Constructor. @@ -58,24 +60,27 @@ public class NumbersTableValuedFunction extends DataGenTableValuedFunction { * @throws AnalysisException exception */ public NumbersTableValuedFunction(Map params) throws AnalysisException { - Map validParams = Maps.newHashMap(); - for (String key : params.keySet()) { - if (!PROPERTIES_SET.contains(key.toLowerCase())) { - throw new AnalysisException(key + " is invalid property"); - } - validParams.put(key.toLowerCase(), params.get(key)); + if (!params.containsKey(NUMBER)) { + throw new AnalysisException("number not set"); } - - String numberStr = validParams.get(NUMBER); - if (!Strings.isNullOrEmpty(numberStr)) { - try { - totalNumbers = Long.parseLong(numberStr); - } catch (NumberFormatException e) { - throw new AnalysisException("can not parse `number` param to natural number"); + for (String key : params.keySet()) { + if (PROPERTIES_SET.contains(key)) { + try { + switch (key) { + case NUMBER: + totalNumbers = Long.parseLong(params.get(key)); + break; + case CONST_VALUE: + useConst = true; + constValue = Long.parseLong(params.get(key)); + break; + default: + break; + } + } catch (NumberFormatException e) { + throw new AnalysisException("cannot parse param value " + params.get(key)); + } } - } else { - throw new AnalysisException( - "can not find `number` param, please specify `number`, like: numbers(\"number\" = \"10\")"); } } @@ -83,6 +88,14 @@ public class NumbersTableValuedFunction extends DataGenTableValuedFunction { return totalNumbers; } + public boolean getUseConst() { + return useConst; + } + + public long getConstValue() { + return constValue; + } + @Override public TDataGenFunctionName getDataGenFunctionName() { return TDataGenFunctionName.NUMBERS; @@ -116,8 +129,8 @@ public class NumbersTableValuedFunction extends DataGenTableValuedFunction { List res = Lists.newArrayList(); TScanRange scanRange = new TScanRange(); TDataGenScanRange dataGenScanRange = new TDataGenScanRange(); - TTVFNumbersScanRange tvfNumbersScanRange = new TTVFNumbersScanRange(); - tvfNumbersScanRange.setTotalNumbers(totalNumbers); + TTVFNumbersScanRange tvfNumbersScanRange = new TTVFNumbersScanRange().setTotalNumbers(totalNumbers) + .setUseConst(useConst).setConstValue(constValue); dataGenScanRange.setNumbersParams(tvfNumbersScanRange); scanRange.setDataGenScanRange(dataGenScanRange); res.add(new TableValuedFunctionTask(backendList.get(0), scanRange)); diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 9e020abdbd..5cbb15721c 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -451,7 +451,9 @@ enum TDataGenFunctionName { // Every table valued function should have a scan range definition to save its // running parameters struct TTVFNumbersScanRange { - 1: optional i64 totalNumbers + 1: optional i64 totalNumbers + 2: optional bool useConst + 3: optional i64 constValue } struct TDataGenScanRange { diff --git a/regression-test/data/external_table_p0/tvf/test_numbers.out b/regression-test/data/external_table_p0/tvf/test_numbers.out index ef65f7eea5..72ae8c9e44 100644 --- a/regression-test/data/external_table_p0/tvf/test_numbers.out +++ b/regression-test/data/external_table_p0/tvf/test_numbers.out @@ -123,6 +123,25 @@ 3 4 +-- !const1 -- +1 +1 +1 +1 +1 + +-- !const2 -- +-123 +-123 +-123 +-123 +-123 + +-- !const3 -- + +-- !const4 -- +123.0 + -- !agg_sum -- 4950 diff --git a/regression-test/suites/external_table_p0/tvf/test_numbers.groovy b/regression-test/suites/external_table_p0/tvf/test_numbers.groovy index 6dc09a4f5d..44a956f93c 100644 --- a/regression-test/suites/external_table_p0/tvf/test_numbers.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_numbers.groovy @@ -23,6 +23,11 @@ qt_basic3 """ select * from numbers("number" = "100"); """ qt_basic4_limit """ select * from numbers("number" = "10") limit 5; """ + qt_const1 """ select * from numbers("number" = "5", "const_value" = "1"); """ + qt_const2 """ select * from numbers("number" = "5", "const_value" = "-123"); """ + qt_const3 """ select * from numbers("number" = "-10", "const_value" = "1"); """ + qt_const4 """ select avg(number) from numbers("number" = "100", "const_value" = "123"); """ + // Test aggregate function withh numbers("number" = N) qt_agg_sum """ select sum(number) from numbers("number" = "100"); """ qt_agg_avg """ select avg(number) from numbers("number" = "100"); """ @@ -129,13 +134,13 @@ sql """ select * from numbers('number' = 'abc'); """ // check exception - exception "can not parse `number` param to natural number" + exception "cannot parse param value abc" } test { sql """ select * from numbers(); """ // check exception - exception """can not find `number` param, please specify `number`, like: numbers("number" = "10")""" + exception """number not set""" } }