diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index d5739e25b4..845a1c0d75 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -151,6 +151,7 @@ set(VEC_FILES functions/array/function_array_except.cpp functions/array/function_array_intersect.cpp functions/array/function_array_slice.cpp + functions/array/function_array_difference.cpp exprs/table_function/vexplode_json_array.cpp functions/math.cpp functions/function_bitmap.cpp diff --git a/be/src/vec/functions/array/function_array_difference.cpp b/be/src/vec/functions/array/function_array_difference.cpp new file mode 100644 index 0000000000..589d0c5b4e --- /dev/null +++ b/be/src/vec/functions/array/function_array_difference.cpp @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/functions/array/function_array_difference.h" + +#include "vec/functions/array/function_array_mapped.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +void register_function_array_difference(SimpleFunctionFactory& factory) { + factory.register_function(); +} + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/functions/array/function_array_difference.h b/be/src/vec/functions/array/function_array_difference.h new file mode 100644 index 0000000000..3fa1792c95 --- /dev/null +++ b/be/src/vec/functions/array/function_array_difference.h @@ -0,0 +1,210 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "vec/columns/column.h" +#include "vec/columns/column_array.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_nullable.h" +#include "vec/data_types/data_type_number.h" +#include "vec/functions/function.h" +#include "vec/utils/util.hpp" + +namespace doris::vectorized { + +class FunctionArrayDifference : public IFunction { +public: + static constexpr auto name = "array_difference"; + + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + bool is_variadic() const override { return false; } + + size_t get_number_of_arguments() const override { return 1; } + + bool use_default_implementation_for_nulls() const override { return true; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + DCHECK(is_array(arguments[0])) + << "argument for function: " << name << " should be DataTypeArray but it has type " + << arguments[0]->get_name() << "."; + auto nested_type = assert_cast(*(arguments[0])).get_nested_type(); + bool is_nullable = nested_type->is_nullable(); + + WhichDataType which(remove_nullable(nested_type)); + //return type is promoted to prevent result overflow + //like: input is int32 ---> return type will be int64 + DataTypePtr return_type = nullptr; + if (which.is_uint8() || which.is_int8()) { + return_type = std::make_shared(); + } else if (which.is_uint16() || which.is_int16()) { + return_type = std::make_shared(); + } else if (which.is_uint32() || which.is_uint64() || which.is_int32()) { + return_type = std::make_shared(); + } else if (which.is_int64() || which.is_int128()) { + return_type = std::make_shared(); + } else if (which.is_float32() || which.is_float64()) { + return_type = std::make_shared(); + } else if (which.is_decimal()) { + return arguments[0]; + } + if (return_type) { + return std::make_shared(is_nullable ? make_nullable(return_type) + : return_type); + } else { + LOG(FATAL) << "Function of " << name + << " return type get wrong: and input argument is: " + << arguments[0]->get_name(); + } + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + const ColumnWithTypeAndName& arg = block.get_by_position(arguments[0]); + auto res_column = _execute_non_nullable(arg, input_rows_count); + if (!res_column) { + return Status::RuntimeError( + fmt::format("unsupported types for function {}({})", get_name(), + block.get_by_position(arguments[0]).type->get_name())); + } + DCHECK_EQ(arg.column->size(), res_column->size()); + block.replace_by_position(result, std::move(res_column)); + return Status::OK(); + } + +private: + template + static void impl(const Element* __restrict src, Result* __restrict dst, size_t begin, + size_t end) { + size_t curr_pos = begin; + if (curr_pos < end) { + Element prev_element = src[curr_pos]; + dst[curr_pos] = {}; + curr_pos++; + Element curr_element = src[curr_pos]; + for (; curr_pos < end; ++curr_pos) { + curr_element = src[curr_pos]; + dst[curr_pos] = + static_cast(curr_element) - static_cast(prev_element); + prev_element = curr_element; + } + } + } + + template + ColumnPtr _execute_number_expanded(const ColumnArray::Offsets64& offsets, + const IColumn& nested_column, ColumnPtr nested_null_map) { + using ColVecType = ColumnVectorOrDecimal; + using ColVecResult = ColumnVectorOrDecimal; + typename ColVecResult::MutablePtr res_nested = nullptr; + + const auto& src_data = reinterpret_cast(nested_column).get_data(); + if constexpr (IsDecimalNumber) { + res_nested = ColVecResult::create(0, src_data.get_scale()); + } else { + res_nested = ColVecResult::create(); + } + auto size = nested_column.size(); + typename ColVecResult::Container& res_values = res_nested->get_data(); + res_values.resize(size); + + size_t pos = 0; + for (auto offset : offsets) { + impl(src_data.data(), res_values.data(), pos, offset); + pos = offset; + } + if (nested_null_map) { + auto null_map_col = ColumnUInt8::create(size, 0); + auto& null_map_col_data = null_map_col->get_data(); + auto nested_colum_data = static_cast*>(nested_null_map.get()); + VectorizedUtils::update_null_map(null_map_col_data, nested_colum_data->get_data()); + for (size_t row = 0; row < offsets.size(); ++row) { + auto off = offsets[row - 1]; + auto len = offsets[row] - off; + auto pos = len ? len - 1 : 0; + for (; pos > 0; --pos) { + if (null_map_col_data[pos + off - 1]) { + null_map_col_data[pos + off] = 1; + } + } + } + return ColumnNullable::create(std::move(res_nested), std::move(null_map_col)); + } else { + return res_nested; + } + } + + ColumnPtr _execute_non_nullable(const ColumnWithTypeAndName& arg, size_t input_rows_count) { + // check array nested column type and get data + auto left_column = arg.column->convert_to_full_column_if_const(); + const auto& array_column = reinterpret_cast(*left_column); + const auto& offsets = array_column.get_offsets(); + DCHECK(offsets.size() == input_rows_count); + + ColumnPtr nested_column = nullptr; + ColumnPtr nested_null_map = nullptr; + if (is_column_nullable(array_column.get_data())) { + const auto& nested_null_column = + reinterpret_cast(array_column.get_data()); + nested_column = nested_null_column.get_nested_column_ptr(); + nested_null_map = nested_null_column.get_null_map_column_ptr(); + } else { + nested_column = array_column.get_data_ptr(); + } + + ColumnPtr res = nullptr; + auto left_element_type = + remove_nullable(assert_cast(*arg.type).get_nested_type()); + if (check_column(*nested_column)) { + res = _execute_number_expanded(offsets, *nested_column, nested_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number_expanded(offsets, *nested_column, nested_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number_expanded(offsets, *nested_column, nested_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number_expanded(offsets, *nested_column, nested_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number_expanded(offsets, *nested_column, nested_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number_expanded(offsets, *nested_column, + nested_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number_expanded(offsets, *nested_column, + nested_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number_expanded(offsets, *nested_column, + nested_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number_expanded(offsets, *nested_column, + nested_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number_expanded(offsets, *nested_column, + nested_null_map); + } else if (check_column(*nested_column)) { + res = _execute_number_expanded(offsets, *nested_column, + nested_null_map); + } + return ColumnArray::create(std::move(res), array_column.get_offsets_ptr()); + } +}; + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp index 39d4902e83..aff9e67d36 100644 --- a/be/src/vec/functions/array/function_array_register.cpp +++ b/be/src/vec/functions/array/function_array_register.cpp @@ -34,6 +34,7 @@ void register_function_array_union(SimpleFunctionFactory&); void register_function_array_except(SimpleFunctionFactory&); void register_function_array_intersect(SimpleFunctionFactory&); void register_function_array_slice(SimpleFunctionFactory&); +void register_function_array_difference(SimpleFunctionFactory&); void register_function_array(SimpleFunctionFactory& factory) { register_function_array_element(factory); @@ -48,6 +49,7 @@ void register_function_array(SimpleFunctionFactory& factory) { register_function_array_except(factory); register_function_array_intersect(factory); register_function_array_slice(factory); + register_function_array_difference(factory); } } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_difference.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_difference.md new file mode 100644 index 0000000000..3442b17d35 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_difference.md @@ -0,0 +1,66 @@ +--- +{ + "title": "array_difference", + "language": "en" +} +--- + + + +## array_difference + +### description + +#### Syntax + +``` +ARRAY array_difference(ARRAY arr) +``` + +Calculates the difference between adjacent array elements. +Returns an array where the first element will be 0, the second is the difference between a[1] - a[0]. +need notice that NULL will be return NULL + +### notice + +`Only supported in vectorized engine` + +### example + +``` +mysql> set enable_vectorized_engine=true; + +mysql> select *,array_difference(k2) from array_type_table; ++------+-----------------------------+---------------------------------+ +| k1 | k2 | array_difference(`k2`) | ++------+-----------------------------+---------------------------------+ +| 0 | [] | [] | +| 1 | [NULL] | [NULL] | +| 2 | [1, 2, 3] | [0, 1, 1] | +| 3 | [1, NULL, 3] | [0, NULL, NULL] | +| 4 | [0, 1, 2, 3, NULL, 4, 6] | [0, 1, 1, 1, NULL, NULL, 2] | +| 5 | [1, 2, 3, 4, 5, 4, 3, 2, 1] | [0, 1, 1, 1, 1, -1, -1, -1, -1] | +| 6 | [6, 7, 8] | [0, 1, 1] | ++------+-----------------------------+---------------------------------+ +``` + +### keywords + +ARRAY, DIFFERENCE, ARRAY_DIFFERENCE diff --git a/docs/sidebars.json b/docs/sidebars.json index 57eca79a93..918464e3b1 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -180,6 +180,7 @@ "ecosystem/external-table/hudi-external-table", "ecosystem/external-table/iceberg-of-doris", "ecosystem/external-table/odbc-of-doris", + "ecosystem/external-table/jdbc-of-doris", "ecosystem/external-table/hive-of-doris" ] }, @@ -258,8 +259,10 @@ "sql-manual/sql-functions/array-functions/array_avg", "sql-manual/sql-functions/array-functions/size", "sql-manual/sql-functions/array-functions/array_distinct", + "sql-manual/sql-functions/array-functions/array_difference", "sql-manual/sql-functions/array-functions/array_union", - "sql-manual/sql-functions/array-functions/array_sum" + "sql-manual/sql-functions/array-functions/array_sum", + "sql-manual/sql-functions/array-functions/array_join" ] }, { diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_difference.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_difference.md new file mode 100644 index 0000000000..35b8756a7b --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_difference.md @@ -0,0 +1,66 @@ +--- +{ + "title": "array_difference", + "language": "zh-CN" +} +--- + + + +## array_difference + +### description + +#### Syntax + +``` +ARRAY array_difference(ARRAY arr) +``` + +计算相邻数组元素之间的差异。返回一个数组,其中第一个元素将为0,第二个元素是[1]-a[0]之间的差值。 +注意若 NULL 值存在,返回结果为NULL + +### notice + +`仅支持向量化引擎中使用` + +### example + +``` +mysql> set enable_vectorized_engine=true; + +mysql> select *,array_difference(k2) from array_type_table; ++------+-----------------------------+---------------------------------+ +| k1 | k2 | array_difference(`k2`) | ++------+-----------------------------+---------------------------------+ +| 0 | [] | [] | +| 1 | [NULL] | [NULL] | +| 2 | [1, 2, 3] | [0, 1, 1] | +| 3 | [1, NULL, 3] | [0, NULL, NULL] | +| 4 | [0, 1, 2, 3, NULL, 4, 6] | [0, 1, 1, 1, NULL, NULL, 2] | +| 5 | [1, 2, 3, 4, 5, 4, 3, 2, 1] | [0, 1, 1, 1, 1, -1, -1, -1, -1] | +| 6 | [6, 7, 8] | [0, 1, 1] | ++------+-----------------------------+---------------------------------+ + +``` + +### keywords + +ARRAY, DIFFERENCE, ARRAY_DIFFERENCE diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index e832428207..224d14fc70 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -214,7 +214,16 @@ visible_functions = [ [['array_distinct'], 'ARRAY_DECIMALV2', ['ARRAY_DECIMALV2'], '', '', '', 'vec', ''], [['array_distinct'], 'ARRAY_VARCHAR', ['ARRAY_VARCHAR'], '', '', '', 'vec', ''], [['array_distinct'], 'ARRAY_STRING', ['ARRAY_STRING'], '', '', '', 'vec', ''], - + + [['array_difference'], 'ARRAY_SMALLINT', ['ARRAY_TINYINT'], '', '', '', 'vec', ''], + [['array_difference'], 'ARRAY_INT', ['ARRAY_SMALLINT'], '', '', '', 'vec', ''], + [['array_difference'], 'ARRAY_BIGINT', ['ARRAY_INT'], '', '', '', 'vec', ''], + [['array_difference'], 'ARRAY_LARGEINT', ['ARRAY_BIGINT'], '', '', '', 'vec', ''], + [['array_difference'], 'ARRAY_LARGEINT', ['ARRAY_LARGEINT'], '', '', '', 'vec', ''], + [['array_difference'], 'ARRAY_FLOAT', ['ARRAY_FLOAT'], '', '', '', 'vec', ''], + [['array_difference'], 'ARRAY_DOUBLE', ['ARRAY_DOUBLE'], '', '', '', 'vec', ''], + [['array_difference'], 'ARRAY_DECIMALV2', ['ARRAY_DECIMALV2'], '', '', '', 'vec', ''], + [['array_sort'], 'ARRAY_BOOLEAN', ['ARRAY_BOOLEAN'], '', '', '', 'vec', ''], [['array_sort'], 'ARRAY_TINYINT', ['ARRAY_TINYINT'], '', '', '', 'vec', ''], [['array_sort'], 'ARRAY_SMALLINT', ['ARRAY_SMALLINT'], '', '', '', 'vec', ''], diff --git a/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_of_array_difference.out b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_of_array_difference.out new file mode 100644 index 0000000000..864aee0f8d --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/array_functions/test_array_functions_of_array_difference.out @@ -0,0 +1,10 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 [] [] +2 [NULL] [NULL] +3 [1, NULL, 3] [0, NULL, NULL] +4 [1, 2, 3] [0, 1, 1] +5 [16, 7, 8] [0, -9, 1] +6 [1, 2, 3, 4, 5, 4, 3, 2, 1] [0, 1, 1, 1, 1, -1, -1, -1, -1] +7 [1111, 12324, 8674, 123, 3434, 435, 45, 53, 54, 2] [0, 11213, -3650, -8551, 3311, -2999, -390, 8, 1, -52] + diff --git a/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions_of_array_difference.groovy b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions_of_array_difference.groovy new file mode 100644 index 0000000000..722fd7fd7c --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/array_functions/test_array_functions_of_array_difference.groovy @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_array_functions_of_array_difference") { + def tableName = "test_array_functions_of_array_difference" + // open enable_array_type + sql "ADMIN SET FRONTEND CONFIG ('enable_array_type' = 'true')" + // array functions only supported in vectorized engine + sql """ set enable_vectorized_engine = true """ + + sql """DROP TABLE IF EXISTS ${tableName}""" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `k1` int(11) NULL COMMENT "", + `k2` ARRAY NOT NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "storage_format" = "V2" + ) + """ + sql """ INSERT INTO ${tableName} VALUES(1, []) """ + sql """ INSERT INTO ${tableName} VALUES(2, [NULL]) """ + sql """ INSERT INTO ${tableName} VALUES(3, [1,NULL,3]) """ + sql """ INSERT INTO ${tableName} VALUES(4, [1,2,3]) """ + sql """ INSERT INTO ${tableName} VALUES(5, [16,7,8]) """ + sql """ INSERT INTO ${tableName} VALUES(6, [1,2,3,4,5,4,3,2,1]) """ + sql """ INSERT INTO ${tableName} VALUES(7, [1111,12324,8674,123,3434,435,45,53,54,2]) """ + + + qt_select "SELECT *, array_difference(k2) FROM ${tableName}" + +}