From dc284b62d9843125d6d96121b9752e598050246a Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Mon, 20 Mar 2023 23:18:10 +0800 Subject: [PATCH] [vectorized](function) support array_filter function (#17832) --- be/src/vec/CMakeLists.txt | 1 + .../lambda_function/lambda_function_factory.h | 6 +- .../varray_filter_function.cpp | 165 ++++++++++++++++++ .../lambda_function/varray_map_function.cpp | 11 +- .../array-functions/array_filter.md | 102 +++++++++++ docs/sidebars.json | 1 + .../array-functions/array_filter.md | 102 +++++++++++ .../apache/doris/analysis/ColumnRefExpr.java | 4 +- .../analysis/LambdaFunctionCallExpr.java | 39 ++++- .../doris/analysis/LambdaFunctionExpr.java | 2 +- gensrc/script/doris_builtins_functions.py | 19 ++ .../test_array_map_function.out | 18 ++ .../test_array_map_function_not_null.out | 97 ++++++++++ .../test_array_map_function.groovy | 5 + .../test_array_map_function_not_null.groovy | 73 ++++++++ 15 files changed, 633 insertions(+), 12 deletions(-) create mode 100644 be/src/vec/exprs/lambda_function/varray_filter_function.cpp create mode 100644 docs/en/docs/sql-manual/sql-functions/array-functions/array_filter.md create mode 100644 docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_filter.md create mode 100644 regression-test/data/query_p0/sql_functions/array_functions/test_array_map_function_not_null.out create mode 100644 regression-test/suites/query_p0/sql_functions/array_functions/test_array_map_function_not_null.groovy diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 4db743459c..be1162a808 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -161,6 +161,7 @@ set(VEC_FILES exprs/table_function/vexplode_numbers.cpp exprs/table_function/vexplode_bitmap.cpp exprs/lambda_function/varray_map_function.cpp + exprs/lambda_function/varray_filter_function.cpp functions/array/function_array_index.cpp functions/array/function_array_element.cpp functions/array/function_array_register.cpp diff --git a/be/src/vec/exprs/lambda_function/lambda_function_factory.h b/be/src/vec/exprs/lambda_function/lambda_function_factory.h index f46c2d1d7f..ce60774873 100644 --- a/be/src/vec/exprs/lambda_function/lambda_function_factory.h +++ b/be/src/vec/exprs/lambda_function/lambda_function_factory.h @@ -27,6 +27,7 @@ namespace doris::vectorized { class LambdaFunctionFactory; void register_function_array_map(LambdaFunctionFactory& factory); +void register_function_array_filter(LambdaFunctionFactory& factory); class LambdaFunctionFactory { using Creator = std::function; @@ -58,7 +59,10 @@ public: static LambdaFunctionFactory& instance() { static std::once_flag oc; static LambdaFunctionFactory instance; - std::call_once(oc, []() { register_function_array_map(instance); }); + std::call_once(oc, []() { + register_function_array_map(instance); + register_function_array_filter(instance); + }); return instance; } }; diff --git a/be/src/vec/exprs/lambda_function/varray_filter_function.cpp b/be/src/vec/exprs/lambda_function/varray_filter_function.cpp new file mode 100644 index 0000000000..8d4acdf8f8 --- /dev/null +++ b/be/src/vec/exprs/lambda_function/varray_filter_function.cpp @@ -0,0 +1,165 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "common/status.h" +#include "vec/columns/column.h" +#include "vec/columns/column_array.h" +#include "vec/columns/columns_number.h" +#include "vec/core/block.h" +#include "vec/data_types/data_type_array.h" +#include "vec/exprs/lambda_function/lambda_function.h" +#include "vec/exprs/lambda_function/lambda_function_factory.h" +#include "vec/exprs/vexpr.h" +#include "vec/exprs/vexpr_context.h" +#include "vec/utils/util.hpp" + +namespace doris::vectorized { + +class ArrayFilterFunction : public LambdaFunction { +public: + ~ArrayFilterFunction() override = default; + + static constexpr auto name = "array_filter"; + + static LambdaFunctionPtr create() { return std::make_shared(); } + + std::string get_name() const override { return name; } + + doris::Status execute(VExprContext* context, doris::vectorized::Block* block, + int* result_column_id, DataTypePtr result_type, + const std::vector& children) override { + ///* array_filter(array, array) */// + + //1. child[0:end]->execute(src_block) + doris::vectorized::ColumnNumbers arguments(children.size()); + for (int i = 0; i < children.size(); ++i) { + int column_id = -1; + RETURN_IF_ERROR(children[i]->execute(context, block, &column_id)); + arguments[i] = column_id; + } + + //2. get first and second array column + auto first_column = + block->get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + auto second_column = + block->get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + + int input_rows = first_column->size(); + auto first_outside_null_map = ColumnUInt8::create(input_rows, 0); + auto first_arg_column = first_column; + if (first_arg_column->is_nullable()) { + first_arg_column = + assert_cast(first_column.get())->get_nested_column_ptr(); + const auto& column_array_nullmap = + assert_cast(first_column.get())->get_null_map_column(); + VectorizedUtils::update_null_map(first_outside_null_map->get_data(), + column_array_nullmap.get_data()); + } + const ColumnArray& first_col_array = assert_cast(*first_arg_column); + const auto& first_off_data = + assert_cast(first_col_array.get_offsets_column()) + .get_data(); + const auto& first_nested_nullable_column = + assert_cast(*first_col_array.get_data_ptr()); + + auto result_data_column = first_nested_nullable_column.clone_empty(); + auto result_offset_column = ColumnArray::ColumnOffsets::create(); + auto& result_offset_data = result_offset_column->get_data(); + vectorized::IColumn::Selector selector; + selector.reserve(first_off_data.size()); + result_offset_data.reserve(input_rows); + + auto second_arg_column = second_column; + auto second_outside_null_map = ColumnUInt8::create(input_rows, 0); + if (second_arg_column->is_nullable()) { + second_arg_column = assert_cast(second_column.get()) + ->get_nested_column_ptr(); + const auto& column_array_nullmap = + assert_cast(second_column.get())->get_null_map_column(); + VectorizedUtils::update_null_map(second_outside_null_map->get_data(), + column_array_nullmap.get_data()); + } + const ColumnArray& second_col_array = assert_cast(*second_arg_column); + const auto& second_off_data = assert_cast( + second_col_array.get_offsets_column()) + .get_data(); + const auto& second_nested_null_map_data = + assert_cast(*second_col_array.get_data_ptr()) + .get_null_map_column() + .get_data(); + const auto& second_nested_column = + assert_cast(*second_col_array.get_data_ptr()) + .get_nested_column(); + const auto& second_nested_data = + assert_cast(second_nested_column).get_data(); + + //3. get the idx of second column data is not null and not 0 + for (int row = 0; row < input_rows; ++row) { + //first or second column is null, so current row is invalid data + if (first_outside_null_map->get_data()[row] || + second_outside_null_map->get_data()[row]) { + result_offset_data.push_back(result_offset_data.back()); + } else { + unsigned long count = 0; + auto first_offset_start = first_off_data[row - 1]; + auto first_offset_end = first_off_data[row]; + auto second_offset_start = second_off_data[row - 1]; + auto second_offset_end = second_off_data[row]; + auto move_off = second_offset_start; + for (auto off = first_offset_start; + off < first_offset_end && move_off < second_offset_end; // not out range + ++off) { + if (!second_nested_null_map_data[move_off] && // not null + second_nested_data[move_off]) { // not 0 + count++; + selector.push_back(off); + } + move_off++; + } + result_offset_data.push_back(count + result_offset_data.back()); + } + } + first_nested_nullable_column.append_data_by_selector(result_data_column, selector); + + //4. insert the result column to block + ColumnWithTypeAndName result_arr; + if (result_type->is_nullable()) { + result_arr = { + ColumnNullable::create(ColumnArray::create(std::move(result_data_column), + std::move(result_offset_column)), + std::move(first_outside_null_map)), + result_type, "array_filter_result"}; + + } else { + DCHECK(!first_column->is_nullable()); + DCHECK(!second_column->is_nullable()); + result_arr = {ColumnArray::create(std::move(result_data_column), + std::move(result_offset_column)), + result_type, "array_filter_result"}; + } + block->insert(std::move(result_arr)); + *result_column_id = block->columns() - 1; + return Status::OK(); + } +}; + +void register_function_array_filter(doris::vectorized::LambdaFunctionFactory& factory) { + factory.register_function(); +} +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/lambda_function/varray_map_function.cpp b/be/src/vec/exprs/lambda_function/varray_map_function.cpp index c3a26f18b1..7b14be2260 100644 --- a/be/src/vec/exprs/lambda_function/varray_map_function.cpp +++ b/be/src/vec/exprs/lambda_function/varray_map_function.cpp @@ -66,20 +66,17 @@ public: Block lambda_block; for (int i = 0; i < arguments.size(); ++i) { const auto& array_column_type_name = block->get_by_position(arguments[i]); - auto column_array = array_column_type_name.column; - column_array = column_array->convert_to_full_column_if_const(); + auto column_array = array_column_type_name.column->convert_to_full_column_if_const(); auto type_array = array_column_type_name.type; if (type_array->is_nullable()) { // get the nullmap of nullable column const auto& column_array_nullmap = - assert_cast(*array_column_type_name.column) - .get_null_map_column(); + assert_cast(*column_array).get_null_map_column(); // get the array column from nullable column - column_array = - assert_cast(array_column_type_name.column.get()) - ->get_nested_column_ptr(); + column_array = assert_cast(column_array.get()) + ->get_nested_column_ptr(); // get the nested type from nullable type type_array = assert_cast(array_column_type_name.type.get()) diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_filter.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_filter.md new file mode 100644 index 0000000000..0fbd0e4545 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_filter.md @@ -0,0 +1,102 @@ +--- +{ + "title": "array_filter", + "language": "en" +} +--- + + + +## array_filter + + + +array_filter(lambda,array) + + + +### description + +Use the lambda expression as the input parameter to calculate and filter the data of the ARRAY column of the other input parameter. +And filter out the values of 0 and NULL in the result. + +``` +array_filter(x->x>0, array1); +array_filter(x->(x+2)=10, array1); +array_filter(x->(abs(x)-2)>0, array1); +``` + +### example + +```shell + +mysql [test]>select array_filter(x->(x > 1),[1,2,3,0,null]); ++----------------------------------------------------------------------------------------------+ +| array_filter(ARRAY(1, 2, 3, 0, NULL), array_map([x] -> (x(0) > 1), ARRAY(1, 2, 3, 0, NULL))) | ++----------------------------------------------------------------------------------------------+ +| [2, 3] | ++----------------------------------------------------------------------------------------------+ + +mysql [test]>select *, array_filter(x->x>0,c_array2) from array_test2; ++------+-----------------+-------------------------+------------------------------------------------------------------+ +| id | c_array1 | c_array2 | array_filter(`c_array2`, array_map([x] -> x(0) > 0, `c_array2`)) | ++------+-----------------+-------------------------+------------------------------------------------------------------+ +| 1 | [1, 2, 3, 4, 5] | [10, 20, -40, 80, -100] | [10, 20, 80] | +| 2 | [6, 7, 8] | [10, 12, 13] | [10, 12, 13] | +| 3 | [1] | [-100] | [] | +| 4 | NULL | NULL | NULL | ++------+-----------------+-------------------------+------------------------------------------------------------------+ +4 rows in set (0.01 sec) + +mysql [test]>select *, array_filter(x->x%2=0,c_array2) from array_test2; ++------+-----------------+-------------------------+----------------------------------------------------------------------+ +| id | c_array1 | c_array2 | array_filter(`c_array2`, array_map([x] -> x(0) % 2 = 0, `c_array2`)) | ++------+-----------------+-------------------------+----------------------------------------------------------------------+ +| 1 | [1, 2, 3, 4, 5] | [10, 20, -40, 80, -100] | [10, 20, -40, 80, -100] | +| 2 | [6, 7, 8] | [10, 12, 13] | [10, 12] | +| 3 | [1] | [-100] | [-100] | +| 4 | NULL | NULL | NULL | ++------+-----------------+-------------------------+----------------------------------------------------------------------+ + +mysql [test]>select *, array_filter(x->(x*(-10)>0),c_array2) from array_test2; ++------+-----------------+-------------------------+----------------------------------------------------------------------------+ +| id | c_array1 | c_array2 | array_filter(`c_array2`, array_map([x] -> (x(0) * (-10) > 0), `c_array2`)) | ++------+-----------------+-------------------------+----------------------------------------------------------------------------+ +| 1 | [1, 2, 3, 4, 5] | [10, 20, -40, 80, -100] | [-40, -100] | +| 2 | [6, 7, 8] | [10, 12, 13] | [] | +| 3 | [1] | [-100] | [-100] | +| 4 | NULL | NULL | NULL | ++------+-----------------+-------------------------+----------------------------------------------------------------------------+ + +mysql [test]>select *, array_filter(x->x>0, array_map((x,y)->(x>y), c_array1,c_array2)) as res from array_test2; ++------+-----------------+-------------------------+--------+ +| id | c_array1 | c_array2 | res | ++------+-----------------+-------------------------+--------+ +| 1 | [1, 2, 3, 4, 5] | [10, 20, -40, 80, -100] | [1, 1] | +| 2 | [6, 7, 8] | [10, 12, 13] | [] | +| 3 | [1] | [-100] | [1] | +| 4 | NULL | NULL | NULL | ++------+-----------------+-------------------------+--------+ +``` + +### keywords + +ARRAY,FILTER,ARRAY_FILTER + diff --git a/docs/sidebars.json b/docs/sidebars.json index 2438022749..fdeb81df24 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -275,6 +275,7 @@ "sql-manual/sql-functions/array-functions/array_max", "sql-manual/sql-functions/array-functions/array_min", "sql-manual/sql-functions/array-functions/array_map", + "sql-manual/sql-functions/array-functions/array_filter", "sql-manual/sql-functions/array-functions/array_avg", "sql-manual/sql-functions/array-functions/array_sum", "sql-manual/sql-functions/array-functions/array_size", diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_filter.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_filter.md new file mode 100644 index 0000000000..ca37704c1a --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_filter.md @@ -0,0 +1,102 @@ +--- +{ + "title": "array_filter", + "language": "zh-CN" +} +--- + + + +## array_filter + + + +array_filter(lambda,array) + + + +### description + +使用lambda表达式作为输入参数,计算筛选另外的输入参数ARRAY列的数据。 +并过滤掉在结果中0和NULL的值。 + +``` +array_filter(x->x>0, array1); +array_filter(x->(x+2)=10, array1); +array_filter(x->(abs(x)-2)>0, array1); + +``` + +### example + +```shell +mysql [test]>select array_filter(x->(x > 1),[1,2,3,0,null]); ++----------------------------------------------------------------------------------------------+ +| array_filter(ARRAY(1, 2, 3, 0, NULL), array_map([x] -> (x(0) > 1), ARRAY(1, 2, 3, 0, NULL))) | ++----------------------------------------------------------------------------------------------+ +| [2, 3] | ++----------------------------------------------------------------------------------------------+ + +mysql [test]>select *, array_filter(x->x>0,c_array2) from array_test2; ++------+-----------------+-------------------------+------------------------------------------------------------------+ +| id | c_array1 | c_array2 | array_filter(`c_array2`, array_map([x] -> x(0) > 0, `c_array2`)) | ++------+-----------------+-------------------------+------------------------------------------------------------------+ +| 1 | [1, 2, 3, 4, 5] | [10, 20, -40, 80, -100] | [10, 20, 80] | +| 2 | [6, 7, 8] | [10, 12, 13] | [10, 12, 13] | +| 3 | [1] | [-100] | [] | +| 4 | NULL | NULL | NULL | ++------+-----------------+-------------------------+------------------------------------------------------------------+ +4 rows in set (0.01 sec) + +mysql [test]>select *, array_filter(x->x%2=0,c_array2) from array_test2; ++------+-----------------+-------------------------+----------------------------------------------------------------------+ +| id | c_array1 | c_array2 | array_filter(`c_array2`, array_map([x] -> x(0) % 2 = 0, `c_array2`)) | ++------+-----------------+-------------------------+----------------------------------------------------------------------+ +| 1 | [1, 2, 3, 4, 5] | [10, 20, -40, 80, -100] | [10, 20, -40, 80, -100] | +| 2 | [6, 7, 8] | [10, 12, 13] | [10, 12] | +| 3 | [1] | [-100] | [-100] | +| 4 | NULL | NULL | NULL | ++------+-----------------+-------------------------+----------------------------------------------------------------------+ + +mysql [test]>select *, array_filter(x->(x*(-10)>0),c_array2) from array_test2; ++------+-----------------+-------------------------+----------------------------------------------------------------------------+ +| id | c_array1 | c_array2 | array_filter(`c_array2`, array_map([x] -> (x(0) * (-10) > 0), `c_array2`)) | ++------+-----------------+-------------------------+----------------------------------------------------------------------------+ +| 1 | [1, 2, 3, 4, 5] | [10, 20, -40, 80, -100] | [-40, -100] | +| 2 | [6, 7, 8] | [10, 12, 13] | [] | +| 3 | [1] | [-100] | [-100] | +| 4 | NULL | NULL | NULL | ++------+-----------------+-------------------------+----------------------------------------------------------------------------+ + +mysql [test]>select *, array_filter(x->x>0, array_map((x,y)->(x>y), c_array1,c_array2)) as res from array_test2; ++------+-----------------+-------------------------+--------+ +| id | c_array1 | c_array2 | res | ++------+-----------------+-------------------------+--------+ +| 1 | [1, 2, 3, 4, 5] | [10, 20, -40, 80, -100] | [1, 1] | +| 2 | [6, 7, 8] | [10, 12, 13] | [] | +| 3 | [1] | [-100] | [1] | +| 4 | NULL | NULL | NULL | ++------+-----------------+-------------------------+--------+ +``` + +### keywords + +ARRAY,FILTER,ARRAY_FILTER + diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnRefExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnRefExpr.java index 88c652f6f5..2e7308aeb5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnRefExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnRefExpr.java @@ -57,11 +57,11 @@ public class ColumnRefExpr extends Expr { this.columnName = name; } - public int getcolumnId() { + public int getColumnId() { return columnId; } - public void setcolumnId(int id) { + public void setColumnId(int id) { this.columnId = id; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java index 18a2a1f144..9b5f7c0b99 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java @@ -29,11 +29,12 @@ import com.google.common.collect.ImmutableSortedSet; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.ArrayList; import java.util.List; public class LambdaFunctionCallExpr extends FunctionCallExpr { public static final ImmutableSet LAMBDA_FUNCTION_SET = new ImmutableSortedSet.Builder( - String.CASE_INSENSITIVE_ORDER).add("array_map").build(); + String.CASE_INSENSITIVE_ORDER).add("array_map").add("array_filter").build(); private static final Logger LOG = LogManager.getLogger(LambdaFunctionCallExpr.class); @@ -97,6 +98,42 @@ public class LambdaFunctionCallExpr extends FunctionCallExpr { } fn.setReturnType(ArrayType.create(lambda.getChild(0).getType(), true)); } + if (fnName.getFunction().equalsIgnoreCase("array_filter")) { + if (fnParams.exprs() == null || fnParams.exprs().size() != 2) { + throw new AnalysisException("The " + fnName.getFunction() + " function must have at least two params"); + } + // array_filter(x->x>3, [1,2,3,6,34,3,11]) + // ---> array_filter([1,2,3,6,34,3,11], array_map(x->x>3, [1,2,3,6,34,3,11])) + if (getChild(1) instanceof LambdaFunctionExpr) { + List params = new ArrayList<>(); + params.add(getChild(1)); + params.add(getChild(0)); + LambdaFunctionCallExpr arrayMapFunc = new LambdaFunctionCallExpr("array_map", + params); + arrayMapFunc.analyzeImpl(analyzer); + Expr castExpr = arrayMapFunc.castTo(ArrayType.create(Type.BOOLEAN, true)); + this.setChild(1, castExpr); + argTypes[1] = castExpr.getType(); + } + if (!(getChild(1) instanceof CastExpr)) { + Expr castExpr = getChild(1).castTo(ArrayType.create(Type.BOOLEAN, true)); + this.setChild(1, castExpr); + argTypes[1] = castExpr.getType(); + } + + fn = getBuiltinFunction(fnName.getFunction(), argTypes, + Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); + if (fn == null) { + LOG.warn("fn {} not exists", this.toSqlImpl()); + throw new AnalysisException(getFunctionNotFoundError(collectChildReturnTypes())); + } + fn.setReturnType(getChild(0).getType()); + } + LOG.info("fn string: " + fn.signatureString() + ". return type: " + fn.getReturnType()); + if (fn == null) { + LOG.warn("fn {} not exists", this.toSqlImpl()); + throw new AnalysisException(getFunctionNotFoundError(collectChildReturnTypes())); + } this.type = fn.getReturnType(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionExpr.java index 5ead8c8aef..c5d3514927 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionExpr.java @@ -83,7 +83,7 @@ public class LambdaFunctionExpr extends Expr { // so could insert nested column by order. ColumnRefExpr column = new ColumnRefExpr(); column.setName(names.get(i)); - column.setcolumnId(columnId); + column.setColumnId(columnId); column.setNullable(true); column.setType(((ArrayType) paramType).getItemType()); columnId = columnId + 1; diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index b575dae588..222b1ba214 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -592,6 +592,25 @@ visible_functions = [ [['array_popfront'], 'ARRAY_VARCHAR', ['ARRAY_VARCHAR'], ''], [['array_popfront'], 'ARRAY_STRING', ['ARRAY_STRING'], ''], [['array_map'], 'ARRAY', ['LAMBDA_FUNCTION', 'ARRAY', '...'], ''], + [['array_filter'], 'ARRAY_BOOLEAN',['ARRAY_BOOLEAN', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_TINYINT',['ARRAY_TINYINT', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_SMALLINT',['ARRAY_SMALLINT', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_INT',['ARRAY_INT', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_BIGINT',['ARRAY_BIGINT', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_LARGEINT',['ARRAY_LARGEINT', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_TINYINT',['ARRAY_TINYINT', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_FLOAT',['ARRAY_FLOAT', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_DOUBLE',['ARRAY_DOUBLE', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_VARCHAR',['ARRAY_VARCHAR', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_STRING',['ARRAY_STRING', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_DECIMALV2',['ARRAY_DECIMALV2', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_DECIMAL32',['ARRAY_DECIMAL32', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_DECIMAL64',['ARRAY_DECIMAL64', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_DECIMAL128',['ARRAY_DECIMAL128', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_DATETIME',['ARRAY_DATETIME', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_DATE',['ARRAY_DATE', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_DATETIMEV2',['ARRAY_DATETIMEV2', 'ARRAY_BOOLEAN'], ''], + [['array_filter'], 'ARRAY_DATEV2',['ARRAY_DATEV2', 'ARRAY_BOOLEAN'], ''], [['array_pushfront'], 'ARRAY_BOOLEAN', ['ARRAY_BOOLEAN', 'BOOLEAN'], 'ALWAYS_NULLABLE'], [['array_pushfront'], 'ARRAY_TINYINT', ['ARRAY_TINYINT', 'TINYINT'], 'ALWAYS_NULLABLE'], diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_map_function.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_map_function.out index b864ad04d2..ab77b819a4 100644 --- a/regression-test/data/query_p0/sql_functions/array_functions/test_array_map_function.out +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_map_function.out @@ -92,3 +92,21 @@ -- !select_18 -- [NULL, 1, 2] +-- !select_20 -- +[1] + +-- !select_21 -- +[1, 2] + +-- !select_22 -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [2, 4] +2 [6, 7, 8] [10, 12, 13] [6, 8] +3 [1] [-100] [] +4 \N \N \N + +-- !select_23 -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [10, 20, -40, 80, -100] +2 [6, 7, 8] [10, 12, 13] [10, 12] +3 [1] [-100] [-100] +4 \N \N \N + diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_map_function_not_null.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_map_function_not_null.out new file mode 100644 index 0000000000..024fd4f605 --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_map_function_not_null.out @@ -0,0 +1,97 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_1 -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] +2 [6, 7, 8] [10, 12, 13] +3 [1] [-100] + +-- !select_2 -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [1, 2, 3] +2 [6, 7, 8] [10, 12, 13] [1, 2, 3] +3 [1] [-100] [1, 2, 3] + +-- !select_3 -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [3, 4, 5] +2 [6, 7, 8] [10, 12, 13] [3, 4, 5] +3 [1] [-100] [3, 4, 5] + +-- !select_4 -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [1, 2, 3] +[6, 7, 8] [10, 12, 13] [1, 2, 3] +[1] [-100] [1, 2, 3] + +-- !select_5 -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [1, 4, 9] +[6, 7, 8] [10, 12, 13] [1, 4, 9] +[1] [-100] [1, 4, 9] + +-- !select_6 -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [1, 2, 3, 4, 5] +[6, 7, 8] [10, 12, 13] [6, 7, 8] +[1] [-100] [1] + +-- !select_7 -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [3, 4, 5, 6, 7] +[6, 7, 8] [10, 12, 13] [8, 9, 10] +[1] [-100] [3] + +-- !select_8 -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [1, 4, 9, 16, 25] +[6, 7, 8] [10, 12, 13] [36, 49, 64] +[1] [-100] [1] + +-- !select_9 -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [11, 22, -37, 84, -95] +[6, 7, 8] [10, 12, 13] [16, 19, 21] +[1] [-100] [-99] + +-- !select_10 -- +[1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [11, 24, -31, 96, -75] +[6, 7, 8] [10, 12, 13] [46, 61, 77] +[1] [-100] [-99] + +-- !select_11 -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [0, 0, 1, 0, 0] +2 [6, 7, 8] [10, 12, 13] [0, 0, 0] +3 [1] [-100] [0] + +-- !select_12 -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [0, 0, 0, 1, 1] +2 [6, 7, 8] [10, 12, 13] [1, 1, 1] +3 [1] [-100] [0] + +-- !select_13 -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [0, 0, 1, 0, 1] +2 [6, 7, 8] [10, 12, 13] [0, 0, 0] +3 [1] [-100] [1] + +-- !select_14 -- +[] + +-- !select_15 -- +[NULL] + +-- !select_16 -- +[1] + +-- !select_17 -- +[1, 0, 0] + +-- !select_18 -- +[NULL, 1, 2] + +-- !select_20 -- +[1] + +-- !select_21 -- +[1, 2] + +-- !select_22 -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [2, 4] +2 [6, 7, 8] [10, 12, 13] [6, 8] +3 [1] [-100] [] + +-- !select_23 -- +1 [1, 2, 3, 4, 5] [10, 20, -40, 80, -100] [10, 20, -40, 80, -100] +2 [6, 7, 8] [10, 12, 13] [10, 12] +3 [1] [-100] [-100] + diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_map_function.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_map_function.groovy index 3d90d3f541..e516a12b03 100644 --- a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_map_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_map_function.groovy @@ -63,6 +63,11 @@ suite("test_array_map_function") { qt_select_17 "select array_map(x -> x is null, [null, 1, 2]);" qt_select_18 "select array_map(x -> abs(x), [null, 1, 2]);" + + qt_select_20 "select array_filter(x->abs(x), [1,null]);" + qt_select_21 "select array_filter(x->abs(x), [1,2]);" + qt_select_22 "select *,array_filter(x->x%2=0,c_array1) from array_test2 order by id;" + qt_select_23 "select *,array_filter(x->x%2=0,c_array2) from array_test2 order by id;" sql "DROP TABLE IF EXISTS ${tableName}" } \ No newline at end of file diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_map_function_not_null.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_map_function_not_null.groovy new file mode 100644 index 0000000000..6780976952 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_map_function_not_null.groovy @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_array_map_function_not_null") { + + def tableName = "array_test2_not_null" + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE IF NOT EXISTS `${tableName}` ( + `id` int(11) , + `c_array1` array , + `c_array2` array + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ) + """ + + + sql """INSERT INTO ${tableName} values + (1, [1,2,3,4,5], [10,20,-40,80,-100]), + (2, [6,7,8],[10,12,13]), (3, [1],[-100]); + """ + qt_select_1 "select * from ${tableName} order by id;" + + + qt_select_2 "select *, array_map(x->x,[1,2,3]) from ${tableName} order by id;" + qt_select_3 "select *, array_map(x->x+2,[1,2,3]) from ${tableName} order by id;" + qt_select_4 "select c_array1, c_array2, array_map(x->x,[1,2,3]) from array_test2_not_null order by id;" + qt_select_5 "select c_array1, c_array2, array_map(x->power(x,2),[1,2,3]) from array_test2_not_null order by id;" + + qt_select_6 "select c_array1, c_array2, array_map(x->x,c_array1) from array_test2_not_null order by id;" + qt_select_7 "select c_array1, c_array2, array_map(x->x+2,c_array1) from array_test2_not_null order by id;" + qt_select_8 "select c_array1, c_array2, array_map(x->power(x,2),c_array1) from array_test2_not_null order by id;" + + qt_select_9 "select c_array1, c_array2, array_map((x,y)->x+y,c_array1,c_array2) from array_test2_not_null order by id;" + qt_select_10 "select c_array1, c_array2, array_map((x,y)->power(x,2)+y,c_array1, c_array2) from array_test2_not_null order by id;" + + qt_select_11 "select *,array_map(x->x=3,c_array1) from array_test2_not_null order by id;" + qt_select_12 "select *,array_map(x->x>3,c_array1) from array_test2_not_null order by id;" + qt_select_13 "select *,array_map((x,y)->x>y,c_array1,c_array2) from array_test2_not_null order by id;" + + qt_select_14 "select array_map(x -> x,[]);" + qt_select_15 "select array_map(x -> x,[null]);" + qt_select_16 "select array_map(x -> x,[1]);" + qt_select_17 "select array_map(x -> x is null, [null, 1, 2]);" + qt_select_18 "select array_map(x -> abs(x), [null, 1, 2]);" + + + qt_select_20 "select array_filter(x->abs(x), [1,null]);" + qt_select_21 "select array_filter(x->abs(x), [1,2]);" + qt_select_22 "select *,array_filter(x->x%2=0,c_array1) from array_test2_not_null order by id;" + qt_select_23 "select *,array_filter(x->x%2=0,c_array2) from array_test2_not_null order by id;" + + sql "DROP TABLE IF EXISTS ${tableName}" +} \ No newline at end of file