From cf72fa82e2de71e9d1bcbb08b6f52468f4ceb6de Mon Sep 17 00:00:00 2001 From: Benjaminwei <33219531+taptao@users.noreply.github.com> Date: Wed, 23 Apr 2025 23:27:07 +0800 Subject: [PATCH] [Improve](explode) explode function support multi param (#50310) ### What problem does this PR solve? backport:https://github.com/apache/doris/pull/48537 Issue Number: close #xxx Related PR: #xxx Problem Summary: ### Release note None ### Check List (For Author) - Test - [ ] Regression test - [ ] Unit Test - [ ] Manual test (add detailed scripts or steps below) - [ ] No need to test or manual test. Explain why: - [ ] This is a refactor/code format and no logic has been changed. - [ ] Previous test can cover this change. - [ ] No code files have been changed. - [ ] Other reason - Behavior changed: - [ ] No. - [ ] Yes. - Does this need documentation? - [ ] No. - [ ] Yes. ### Check List (For Reviewer who merge this PR) - [ ] Confirm the release note - [ ] Confirm test cases - [ ] Confirm document - [ ] Add branch pick label --- be/src/agent/be_exec_version_manager.h | 2 +- .../pipeline/exec/table_function_operator.cpp | 6 +- be/src/vec/exec/vtable_function_node.cpp | 3 +- .../table_function/table_function_factory.cpp | 13 +- .../table_function/table_function_factory.h | 17 +- .../vec/exprs/table_function/vexplode_v2.cpp | 209 +++++++++++++++++ be/src/vec/exprs/table_function/vexplode_v2.h | 55 +++++ be/src/vec/functions/function_fake.cpp | 55 ++++- .../vec/functions/simple_function_factory.h | 17 +- be/test/vec/function/function_test_util.cpp | 95 ++++++-- be/test/vec/function/function_test_util.h | 3 +- be/test/vec/function/table_function_test.cpp | 163 +++++++++++++- .../java/org/apache/doris/common/Config.java | 2 +- .../BuiltinTableGeneratingFunctions.java | 10 + .../nereids/parser/LogicalPlanBuilder.java | 10 +- .../functions/generator/Explode.java | 61 +++-- .../functions/generator/ExplodeOuter.java | 61 +++-- .../functions/generator/ExplodeOuterTest.java | 95 ++++++++ .../functions/generator/ExplodeTest.java | 95 ++++++++ .../sql_functions/table_function/explode.out | 211 ++++++++++++++++++ .../table_function/explode.groovy | 44 ++++ .../explode_array_decimal.groovy | 2 +- 22 files changed, 1162 insertions(+), 67 deletions(-) create mode 100644 be/src/vec/exprs/table_function/vexplode_v2.cpp create mode 100644 be/src/vec/exprs/table_function/vexplode_v2.h create mode 100644 fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuterTest.java create mode 100644 fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeTest.java diff --git a/be/src/agent/be_exec_version_manager.h b/be/src/agent/be_exec_version_manager.h index af43a2d5c8..ae055e9339 100644 --- a/be/src/agent/be_exec_version_manager.h +++ b/be/src/agent/be_exec_version_manager.h @@ -72,7 +72,7 @@ private: * h. "now": ALWAYS_NOT_NULLABLE -> DEPEND_ON_ARGUMENTS * i. change FunctionIsIPAddressInRange from AlwaysNotNullable to DependOnArguments. controlled by individual session variable. */ -constexpr inline int BeExecVersionManager::max_be_exec_version = 6; +constexpr inline int BeExecVersionManager::max_be_exec_version = 7; constexpr inline int BeExecVersionManager::min_be_exec_version = 0; /// functional diff --git a/be/src/pipeline/exec/table_function_operator.cpp b/be/src/pipeline/exec/table_function_operator.cpp index 8b16cb8dce..64047afbb2 100644 --- a/be/src/pipeline/exec/table_function_operator.cpp +++ b/be/src/pipeline/exec/table_function_operator.cpp @@ -55,7 +55,8 @@ Status TableFunctionLocalState::open(RuntimeState* state) { const std::string& tf_name = _vfn_ctxs[i]->root()->fn().name.function_name; vectorized::TableFunction* fn = nullptr; - RETURN_IF_ERROR(vectorized::TableFunctionFactory::get_fn(tf_name, state->obj_pool(), &fn)); + RETURN_IF_ERROR(vectorized::TableFunctionFactory::get_fn(tf_name, state->obj_pool(), &fn, + state->be_exec_version())); fn->set_expr_context(_vfn_ctxs[i]); _fns.push_back(fn); } @@ -266,7 +267,8 @@ Status TableFunctionOperatorX::init(const TPlanNode& tnode, RuntimeState* state) auto root = ctx->root(); const std::string& tf_name = root->fn().name.function_name; vectorized::TableFunction* fn = nullptr; - RETURN_IF_ERROR(vectorized::TableFunctionFactory::get_fn(tf_name, _pool, &fn)); + RETURN_IF_ERROR(vectorized::TableFunctionFactory::get_fn(tf_name, _pool, &fn, + state->be_exec_version())); fn->set_expr_context(ctx); _fns.push_back(fn); } diff --git a/be/src/vec/exec/vtable_function_node.cpp b/be/src/vec/exec/vtable_function_node.cpp index 7a2eb64af7..d8a57e2a66 100644 --- a/be/src/vec/exec/vtable_function_node.cpp +++ b/be/src/vec/exec/vtable_function_node.cpp @@ -60,7 +60,8 @@ Status VTableFunctionNode::init(const TPlanNode& tnode, RuntimeState* state) { auto root = ctx->root(); const std::string& tf_name = root->fn().name.function_name; TableFunction* fn = nullptr; - RETURN_IF_ERROR(TableFunctionFactory::get_fn(tf_name, _pool, &fn)); + RETURN_IF_ERROR( + TableFunctionFactory::get_fn(tf_name, _pool, &fn, state->be_exec_version())); fn->set_expr_context(ctx); _fns.push_back(fn); } diff --git a/be/src/vec/exprs/table_function/table_function_factory.cpp b/be/src/vec/exprs/table_function/table_function_factory.cpp index 6fd6c6cf9a..18855fa756 100644 --- a/be/src/vec/exprs/table_function/table_function_factory.cpp +++ b/be/src/vec/exprs/table_function/table_function_factory.cpp @@ -23,6 +23,7 @@ #include #include +#include "agent/be_exec_version_manager.h" #include "common/object_pool.h" #include "vec/exprs/table_function/table_function.h" #include "vec/exprs/table_function/vexplode.h" @@ -32,6 +33,7 @@ #include "vec/exprs/table_function/vexplode_map.h" #include "vec/exprs/table_function/vexplode_numbers.h" #include "vec/exprs/table_function/vexplode_split.h" +#include "vec/exprs/table_function/vexplode_v2.h" #include "vec/exprs/table_function/vposexplode.h" #include "vec/utils/util.hpp" @@ -61,14 +63,21 @@ const std::unordered_map {}}, {"explode_json_object", TableFunctionCreator {}}, {"posexplode", TableFunctionCreator {}}, - {"explode", TableFunctionCreator {}}}; + {"explode", TableFunctionCreator {}}, + {"explode_old", TableFunctionCreator {}}}; + +const std::unordered_map TableFunctionFactory::_function_to_replace = { + {"explode", "explode_old"}}; Status TableFunctionFactory::get_fn(const std::string& fn_name_raw, ObjectPool* pool, - TableFunction** fn) { + TableFunction** fn, + int be_version = BeExecVersionManager::get_newest_version()) { bool is_outer = match_suffix(fn_name_raw, COMBINATOR_SUFFIX_OUTER); std::string fn_name_real = is_outer ? remove_suffix(fn_name_raw, COMBINATOR_SUFFIX_OUTER) : fn_name_raw; + temporary_function_update(be_version, fn_name_real); + auto fn_iterator = _function_map.find(fn_name_real); if (fn_iterator != _function_map.end()) { *fn = pool->add(fn_iterator->second().release()); diff --git a/be/src/vec/exprs/table_function/table_function_factory.h b/be/src/vec/exprs/table_function/table_function_factory.h index a68a1763fc..478348f26e 100644 --- a/be/src/vec/exprs/table_function/table_function_factory.h +++ b/be/src/vec/exprs/table_function/table_function_factory.h @@ -33,10 +33,25 @@ class TableFunction; class TableFunctionFactory { public: TableFunctionFactory() = delete; - static Status get_fn(const std::string& fn_name_raw, ObjectPool* pool, TableFunction** fn); + static Status get_fn(const std::string& fn_name_raw, ObjectPool* pool, TableFunction** fn, + int be_version); const static std::unordered_map()>> _function_map; + + /// @TEMPORARY: for be_exec_version=7. replace function to old version. + const static std::unordered_map _function_to_replace; + const static int NEWEST_VERSION_EXPLODE_MULTI_PARAM = 7; + +private: + /// @TEMPORARY: for be_exec_version=7 + static void temporary_function_update(int fe_version_now, std::string& name) { + // replace if fe is old version. + if (fe_version_now < NEWEST_VERSION_EXPLODE_MULTI_PARAM && + _function_to_replace.find(name) != _function_to_replace.end()) { + name = _function_to_replace.at(name); + } + } }; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/exprs/table_function/vexplode_v2.cpp b/be/src/vec/exprs/table_function/vexplode_v2.cpp new file mode 100644 index 0000000000..b4d4881fca --- /dev/null +++ b/be/src/vec/exprs/table_function/vexplode_v2.cpp @@ -0,0 +1,209 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exprs/table_function/vexplode_v2.h" + +#include + +#include + +#include "common/status.h" +#include "vec/columns/column.h" +#include "vec/columns/column_array.h" +#include "vec/columns/column_nothing.h" +#include "vec/columns/column_object.h" +#include "vec/core/block.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/data_types/data_type_nothing.h" +#include "vec/exprs/vexpr.h" +#include "vec/exprs/vexpr_context.h" +#include "vec/functions/function_helpers.h" + +namespace doris::vectorized { + +#include "vec/columns/column_struct.h" + +VExplodeV2TableFunction::VExplodeV2TableFunction() { + _fn_name = "vexplode"; +} + +Status VExplodeV2TableFunction::process_init(Block* block, RuntimeState* state) { + CHECK(_expr_context->root()->children().size() >= 1) + << "VExplodeV2TableFunction support one or more child but has " + << _expr_context->root()->children().size(); + + int value_column_idx = -1; + _multi_detail.resize(_expr_context->root()->children().size()); + _array_offsets.resize(_expr_context->root()->children().size()); + _array_columns.resize(_expr_context->root()->children().size()); + + for (int i = 0; i < _expr_context->root()->children().size(); i++) { + RETURN_IF_ERROR(_expr_context->root()->children()[i]->execute(_expr_context.get(), block, + &value_column_idx)); + _array_columns[i] = + block->get_by_position(value_column_idx).column->convert_to_full_column_if_const(); + if (!extract_column_array_info(*_array_columns[i], _multi_detail[i])) { + return Status::NotSupported( + "column type {} not supported now", + block->get_by_position(value_column_idx).column->get_name()); + } + } + + return Status::OK(); +} + +void VExplodeV2TableFunction::process_row(size_t row_idx) { + TableFunction::process_row(row_idx); + + for (int i = 0; i < _multi_detail.size(); i++) { + auto& detail = _multi_detail[i]; + if (!detail.array_nullmap_data || !detail.array_nullmap_data[row_idx]) { + _array_offsets[i] = (*detail.offsets_ptr)[row_idx - 1]; + // find max size in array + auto cur_size = (*detail.offsets_ptr)[row_idx] - _array_offsets[i]; + if (_cur_size < cur_size) { + _cur_size = cur_size; + } + } + } + _row_idx = row_idx; +} + +void VExplodeV2TableFunction::process_close() { + _multi_detail.clear(); + _array_offsets.clear(); + _array_columns.clear(); + _row_idx = 0; +} + +void VExplodeV2TableFunction::get_same_many_values(MutableColumnPtr& column, int length) { + if (current_empty()) { + column->insert_many_defaults(length); + return; + } + ColumnStruct* struct_column = nullptr; + if (_is_nullable) { + auto* nullable_column = assert_cast(column.get()); + struct_column = assert_cast(nullable_column->get_nested_column_ptr().get()); + auto* nullmap_column = + assert_cast(nullable_column->get_null_map_column_ptr().get()); + nullmap_column->insert_many_defaults(length); + } else { + struct_column = assert_cast(column.get()); + } + if (!struct_column) { + throw Exception(ErrorCode::INTERNAL_ERROR, + "Only multiple columns can be returned within a struct."); + } + for (int i = 0; i < _multi_detail.size(); i++) { + auto& detail = _multi_detail[i]; + size_t pos = _array_offsets[i] + _cur_offset; + size_t element_size = _multi_detail[i].array_col->size_at(_row_idx); + auto& struct_field = struct_column->get_column(i); + if ((detail.array_nullmap_data && detail.array_nullmap_data[_row_idx])) { + struct_field.insert_many_defaults(length); + } else { + auto* nullable_column = assert_cast(struct_field.get_ptr().get()); + auto* nullmap_column = + assert_cast(nullable_column->get_null_map_column_ptr().get()); + // only need to check if the value at position pos is null + if (element_size < _cur_offset || + (detail.nested_nullmap_data && detail.nested_nullmap_data[pos])) { + nullable_column->insert_many_defaults(length); + } else { + nullable_column->get_nested_column_ptr()->insert_many_from(*detail.nested_col, pos, + length); + nullmap_column->insert_many_defaults(length); + } + } + } +} + +int VExplodeV2TableFunction::get_value(MutableColumnPtr& column, int max_step) { + max_step = std::min(max_step, (int)(_cur_size - _cur_offset)); + if (current_empty()) { + column->insert_default(); + max_step = 1; + } else { + ColumnStruct* struct_column = nullptr; + if (_is_nullable) { + auto* nullable_column = assert_cast(column.get()); + struct_column = + assert_cast(nullable_column->get_nested_column_ptr().get()); + auto* nullmap_column = + assert_cast(nullable_column->get_null_map_column_ptr().get()); + nullmap_column->insert_many_defaults(max_step); + + } else { + struct_column = assert_cast(column.get()); + } + if (!struct_column) { + throw Exception(ErrorCode::INTERNAL_ERROR, + "Only multiple columns can be returned within a struct."); + } + for (int i = 0; i < _multi_detail.size(); i++) { + auto& detail = _multi_detail[i]; + size_t pos = _array_offsets[i] + _cur_offset; + size_t element_size = _multi_detail[i].array_col->size_at(_row_idx); + auto& struct_field = struct_column->get_column(i); + if (detail.array_nullmap_data && detail.array_nullmap_data[_row_idx]) { + struct_field.insert_many_defaults(max_step); + } else { + auto* nullable_column = assert_cast(struct_field.get_ptr().get()); + auto* nullmap_column = + assert_cast(nullable_column->get_null_map_column_ptr().get()); + if (element_size >= _cur_offset + max_step) { + nullable_column->get_nested_column_ptr()->insert_range_from(*detail.nested_col, + pos, max_step); + if (detail.nested_nullmap_data) { + for (int j = 0; j < max_step; j++) { + if (detail.nested_nullmap_data[pos + j]) { + nullmap_column->insert_value(1); + } else { + nullmap_column->insert_value(0); + } + } + } else { + nullmap_column->insert_many_defaults(max_step); + } + } else { + nullable_column->get_nested_column_ptr()->insert_range_from( + *detail.nested_col, pos, element_size - _cur_offset); + if (detail.nested_nullmap_data) { + for (int j = 0; j < element_size - _cur_offset; j++) { + if (detail.nested_nullmap_data[pos + j]) { + nullmap_column->insert_value(1); + } else { + nullmap_column->insert_value(0); + } + } + } else { + nullmap_column->insert_many_defaults(element_size - _cur_offset); + } + nullable_column->insert_many_defaults(max_step - (element_size - _cur_offset)); + } + } + } + } + + forward(max_step); + return max_step; +} + +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/table_function/vexplode_v2.h b/be/src/vec/exprs/table_function/vexplode_v2.h new file mode 100644 index 0000000000..15188cb75f --- /dev/null +++ b/be/src/vec/exprs/table_function/vexplode_v2.h @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/status.h" +#include "vec/columns/column_struct.h" +#include "vec/data_types/data_type.h" +#include "vec/exprs/table_function/table_function.h" +#include "vec/functions/array/function_array_utils.h" + +namespace doris::vectorized { +class Block; +} // namespace doris::vectorized + +namespace doris::vectorized { + +class VExplodeV2TableFunction : public TableFunction { + ENABLE_FACTORY_CREATOR(VExplodeV2TableFunction); + +public: + VExplodeV2TableFunction(); + + ~VExplodeV2TableFunction() override = default; + + Status process_init(Block* block, RuntimeState* state) override; + void process_row(size_t row_idx) override; + void process_close() override; + void get_same_many_values(MutableColumnPtr& column, int length) override; + int get_value(MutableColumnPtr& column, int max_step) override; + +private: + std::vector _array_columns; + size_t _row_idx; + ColumnArrayExecutionDatas _multi_detail; + std::vector _array_offsets; +}; + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_fake.cpp b/be/src/vec/functions/function_fake.cpp index c72f4e4747..1bde70c876 100644 --- a/be/src/vec/functions/function_fake.cpp +++ b/be/src/vec/functions/function_fake.cpp @@ -67,6 +67,25 @@ struct FunctionExplode { static std::string get_error_msg() { return "Fake function do not support execute"; } }; +struct FunctionExplodeV2 { + static DataTypePtr get_return_type_impl(const DataTypes& arguments) { + DataTypes fieldTypes(arguments.size()); + for (int i = 0; i < arguments.size(); i++) { + auto nestedType = + check_and_get_data_type(arguments[i].get())->get_nested_type(); + if (nestedType->is_nullable()) { + fieldTypes[i] = nestedType; + } else { + fieldTypes[i] = make_nullable(nestedType); + } + } + + return make_nullable(std::make_shared(fieldTypes)); + } + static DataTypes get_variadic_argument_types() { return {}; } + static std::string get_error_msg() { return "Fake function do not support execute"; } +}; + // explode map: make map k,v as struct field struct FunctionExplodeMap { static DataTypePtr get_return_type_impl(const DataTypes& arguments) { @@ -133,6 +152,14 @@ void register_table_function_expand(SimpleFunctionFactory& factory, const std::s factory.register_function>(name + suffix); }; +template +void register_table_alternative_function_expand(SimpleFunctionFactory& factory, + const std::string& name, + const std::string& suffix) { + factory.register_alternative_function>(name); + factory.register_alternative_function>(name + suffix); +}; + template void register_table_function_expand_default(SimpleFunctionFactory& factory, const std::string& name, const std::string& suffix) { @@ -142,11 +169,28 @@ void register_table_function_expand_default(SimpleFunctionFactory& factory, cons name + suffix); }; +template +void register_table_alternative_function_expand_default(SimpleFunctionFactory& factory, + const std::string& name, + const std::string& suffix) { + factory.register_alternative_function< + FunctionFake>>(name); + factory.register_alternative_function< + FunctionFake>>(name + suffix); +}; + template void register_table_function_expand_outer(SimpleFunctionFactory& factory, const std::string& name) { register_table_function_expand(factory, name, COMBINATOR_SUFFIX_OUTER); }; +template +void register_table_alternative_function_expand_outer(SimpleFunctionFactory& factory, + const std::string& name) { + register_table_alternative_function_expand(factory, name, + COMBINATOR_SUFFIX_OUTER); +}; + template void register_table_function_expand_outer_default(SimpleFunctionFactory& factory, const std::string& name) { @@ -154,6 +198,13 @@ void register_table_function_expand_outer_default(SimpleFunctionFactory& factory COMBINATOR_SUFFIX_OUTER); }; +template +void register_table_alternative_function_expand_outer_default(SimpleFunctionFactory& factory, + const std::string& name) { + register_table_alternative_function_expand_default( + factory, name, COMBINATOR_SUFFIX_OUTER); +}; + template void register_table_function_with_impl(SimpleFunctionFactory& factory, const std::string& name, const std::string& suffix = "") { @@ -163,7 +214,9 @@ void register_table_function_with_impl(SimpleFunctionFactory& factory, const std void register_function_fake(SimpleFunctionFactory& factory) { register_function(factory, "esquery"); - register_table_function_expand_outer(factory, "explode"); + register_table_function_expand_outer(factory, "explode"); + register_table_alternative_function_expand_outer(factory, "explode"); + register_table_function_expand_outer(factory, "explode_map"); register_table_function_expand_outer(factory, "explode_json_object"); diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index c86d29aaa6..92ae50d168 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -115,6 +115,9 @@ class SimpleFunctionFactory { /// @TEMPORARY: for be_exec_version=4 constexpr static int NEWEST_VERSION_FUNCTION_SUBSTITUTE = 4; + /// @TEMPORARY: for be_exec_version=7. + constexpr static int NEWEST_VERSION_EXPLODE_MULTI_PARAM = 7; + public: void register_function(const std::string& name, const Creator& ptr) { DataTypes types = ptr()->get_variadic_argument_types(); @@ -145,6 +148,14 @@ public: register_function(name, &createDefaultFunction); } + /// @TEMPORARY: for be_exec_version=7 + template + void register_alternative_function(std::string name) { + static std::string suffix {"_old"}; + function_to_replace[name] = name + suffix; + register_function(name + suffix, &createDefaultFunction); + } + void register_alias(const std::string& name, const std::string& alias) { function_alias[alias] = name; } @@ -208,7 +219,7 @@ private: FunctionCreators function_creators; FunctionIsVariadic function_variadic_set; std::unordered_map function_alias; - /// @TEMPORARY: for be_exec_version=3. replace function to old version. + /// @TEMPORARY: for be_exec_version=7. replace function to old version. std::unordered_map function_to_replace; template @@ -216,10 +227,10 @@ private: return std::make_shared(Function::create()); } - /// @TEMPORARY: for be_exec_version=3 + /// @TEMPORARY: for be_exec_version=7 void temporary_function_update(int fe_version_now, std::string& name) { // replace if fe is old version. - if (fe_version_now < NEWEST_VERSION_FUNCTION_SUBSTITUTE && + if (fe_version_now < NEWEST_VERSION_EXPLODE_MULTI_PARAM && function_to_replace.find(name) != function_to_replace.end()) { name = function_to_replace[name]; } diff --git a/be/test/vec/function/function_test_util.cpp b/be/test/vec/function/function_test_util.cpp index 229396fba1..f50b90fd3d 100644 --- a/be/test/vec/function/function_test_util.cpp +++ b/be/test/vec/function/function_test_util.cpp @@ -25,6 +25,7 @@ #include "runtime/runtime_state.h" #include "util/binary_cast.hpp" #include "util/bitmap_value.h" +#include "vec/core/types.h" #include "vec/data_types/data_type_array.h" #include "vec/data_types/data_type_bitmap.h" #include "vec/data_types/data_type_date.h" @@ -33,7 +34,9 @@ #include "vec/data_types/data_type_ipv4.h" #include "vec/data_types/data_type_ipv6.h" #include "vec/data_types/data_type_jsonb.h" +#include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_string.h" +#include "vec/data_types/data_type_struct.h" #include "vec/data_types/data_type_time_v2.h" #include "vec/exprs/table_function/table_function.h" #include "vec/runtime/vdatetime_value.h" @@ -176,6 +179,29 @@ size_t type_index_to_data_type(const std::vector& input_types, size_t i type = std::make_shared(sub_type); return ret + 1; } + case TypeIndex::Struct: { + desc.type = doris::PrimitiveType::TYPE_STRUCT; + ++index; + size_t ret = 0; + DataTypes sub_types; + while (index < input_types.size()) { + ut_type::UTDataTypeDesc sub_desc; + DataTypePtr sub_type = nullptr; + size_t inner_ret = type_index_to_data_type(input_types, index, sub_desc, sub_type); + if (inner_ret <= 0) { + return inner_ret; + } + ret += inner_ret; + desc.children.push_back(sub_desc.type_desc); + if (sub_desc.is_nullable) { + sub_type = make_nullable(sub_type); + sub_types.push_back(sub_type); + } + ++index; + } + type = std::make_shared(sub_types); + return ret + 1; + } case TypeIndex::Nullable: { ++index; size_t ret = type_index_to_data_type(input_types, index, ut_desc, type); @@ -307,6 +333,28 @@ bool insert_cell(MutableColumnPtr& column, DataTypePtr type_ptr, const AnyType& } else if (type.is_array()) { auto v = any_cast(cell); column->insert(v); + } else if (type.is_struct()) { + auto v = any_cast(cell); + auto struct_type = assert_cast(type_ptr.get()); + auto nullable_column = assert_cast(column.get()); + auto* struct_column = + assert_cast(nullable_column->get_nested_column_ptr().get()); + auto* nullmap_column = + assert_cast(nullable_column->get_null_map_column_ptr().get()); + nullmap_column->insert_default(); + for (size_t i = 0; i < v.size(); ++i) { + auto& field = v[i]; + auto col = struct_column->get_column(i).get_ptr(); + RETURN_IF_FALSE(insert_cell(col, struct_type->get_element(i), field)); + } + } else if (type.is_nullable()) { + auto nullable_column = assert_cast(column.get()); + auto col_type = remove_nullable(type_ptr); + auto col = nullable_column->get_nested_column_ptr(); + auto* nullmap_column = + assert_cast(nullable_column->get_null_map_column_ptr().get()); + nullmap_column->insert_default(); + RETURN_IF_FALSE(insert_cell(col, col_type, cell)); } else { LOG(WARNING) << "dataset not supported for TypeIndex:" << (int)type.idx; return false; @@ -324,32 +372,39 @@ Block* create_block_from_inputset(const InputTypeSet& input_types, const InputDa // 1.1 insert data and create block auto row_size = input_set.size(); std::unique_ptr block = Block::create_unique(); + + auto input_set_size = input_set[0].size(); + // 1.2 calculate the input column size + auto input_col_size = input_set_size / descs.size(); + for (size_t i = 0; i < descs.size(); ++i) { auto& desc = descs[i]; - auto column = desc.data_type->create_column(); - column->reserve(row_size); + for (size_t j = 0; j < input_col_size; ++j) { + auto column = desc.data_type->create_column(); + column->reserve(row_size); - auto type_ptr = desc.data_type->is_nullable() - ? ((DataTypeNullable*)(desc.data_type.get()))->get_nested_type() - : desc.data_type; - WhichDataType type(type_ptr); + auto type_ptr = desc.data_type->is_nullable() + ? ((DataTypeNullable*)(desc.data_type.get()))->get_nested_type() + : desc.data_type; + WhichDataType type(type_ptr); - for (int j = 0; j < row_size; j++) { - if (!insert_cell(column, type_ptr, input_set[j][i])) { - return nullptr; + for (int r = 0; r < row_size; r++) { + if (!insert_cell(column, type_ptr, input_set[r][i * input_col_size + j])) { + return nullptr; + } } - } - if (desc.is_const) { - column = ColumnConst::create(std::move(column), row_size); + if (desc.is_const) { + column = ColumnConst::create(std::move(column), row_size); + } + block->insert({std::move(column), desc.data_type, desc.col_name}); } - block->insert({std::move(column), desc.data_type, desc.col_name}); } return block.release(); } Block* process_table_function(TableFunction* fn, Block* input_block, - const InputTypeSet& output_types) { + const InputTypeSet& output_types, bool test_get_value_func) { // pasrse output data types ut_type::UTDataTypeDescs descs; if (!parse_ut_data_type(output_types, descs)) { @@ -383,8 +438,12 @@ Block* process_table_function(TableFunction* fn, Block* input_block, } do { - fn->get_same_many_values(column, 1); - fn->forward(); + if (test_get_value_func) { + fn->get_value(column, 10); + } else { + fn->get_same_many_values(column, 1); + fn->forward(); + } } while (!fn->eos()); } @@ -395,7 +454,7 @@ Block* process_table_function(TableFunction* fn, Block* input_block, void check_vec_table_function(TableFunction* fn, const InputTypeSet& input_types, const InputDataSet& input_set, const InputTypeSet& output_types, - const InputDataSet& output_set) { + const InputDataSet& output_set, const bool test_get_value_func) { std::unique_ptr input_block(create_block_from_inputset(input_types, input_set)); EXPECT_TRUE(input_block != nullptr); @@ -404,7 +463,7 @@ void check_vec_table_function(TableFunction* fn, const InputTypeSet& input_types EXPECT_TRUE(expect_output_block != nullptr); std::unique_ptr real_output_block( - process_table_function(fn, input_block.get(), output_types)); + process_table_function(fn, input_block.get(), output_types, test_get_value_func)); EXPECT_TRUE(real_output_block != nullptr); // compare real_output_block with expect_output_block diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 34cd86eccd..b67d0e6950 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -40,6 +40,7 @@ #include "vec/columns/column_complex.h" #include "vec/columns/column_const.h" #include "vec/columns/column_nullable.h" +#include "vec/columns/column_struct.h" #include "vec/common/string_ref.h" #include "vec/core/block.h" #include "vec/core/column_numbers.h" @@ -194,7 +195,7 @@ Block* process_table_function(TableFunction* fn, Block* input_block, const InputTypeSet& output_types); void check_vec_table_function(TableFunction* fn, const InputTypeSet& input_types, const InputDataSet& input_set, const InputTypeSet& output_types, - const InputDataSet& output_set); + const InputDataSet& output_set, bool test_get_value_func = false); // Null values are represented by Null() // The type of the constant column is represented as follows: Consted {TypeIndex::String} diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp index 9031faf69b..92471d337c 100644 --- a/be/test/vec/function/table_function_test.cpp +++ b/be/test/vec/function/table_function_test.cpp @@ -34,6 +34,7 @@ #include "vec/exprs/table_function/vexplode.h" #include "vec/exprs/table_function/vexplode_numbers.h" #include "vec/exprs/table_function/vexplode_split.h" +#include "vec/exprs/table_function/vexplode_v2.h" #include "vec/exprs/vexpr_context.h" #include "vec/function/function_test_util.h" @@ -125,6 +126,61 @@ TEST_F(TableFunctionTest, vexplode_outer) { } } +TEST_F(TableFunctionTest, vexplode_outer_v2) { + init_expr_context(1); + VExplodeV2TableFunction explode_outer; + explode_outer.set_outer(); + explode_outer.set_expr_context(_ctx); + + // explode_outer(Array) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32}; + Array vec = {Int32(1), Null(), Int32(2), Int32(3)}; + InputDataSet input_set = {{vec}, {Null()}, {Array()}}; + + InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::Int32}; + + InputDataSet output_set = {{{CellSet {Int32(1)}}}, {{CellSet {Null()}}}, + {{CellSet {Int32(2)}}}, {{CellSet {Int32(3)}}}, + {{CellSet {Null()}}}, {{CellSet {Null()}}}}; + + check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); + } + + // explode_outer(Array) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; + Array vec = {Field(std::string("abc")), Field(std::string("")), Field(std::string("def"))}; + InputDataSet input_set = {{Null()}, {Array()}, {vec}}; + + InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String}; + + InputDataSet output_set = {{{CellSet {Null()}}}, + {{CellSet {Null()}}}, + {{CellSet {std::string("abc")}}}, + {{CellSet {std::string("")}}}, + {{CellSet {std::string("def")}}}}; + + check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); + } + + // explode_outer(Array) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128V2}; + Array vec = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67)}; + InputDataSet input_set = {{Null()}, {Array()}, {vec}}; + + InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::Decimal128V2}; + + InputDataSet output_set = {{{CellSet {Null()}}}, + {{CellSet {Null()}}}, + {{CellSet {ut_type::DECIMAL(17014116.67)}}}, + {{CellSet {ut_type::DECIMAL(-17014116.67)}}}}; + + check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set); + } +} + TEST_F(TableFunctionTest, vexplode) { init_expr_context(1); VExplodeTableFunction explode; @@ -168,6 +224,111 @@ TEST_F(TableFunctionTest, vexplode) { } } +TEST_F(TableFunctionTest, vexplode_v2) { + init_expr_context(1); + VExplodeV2TableFunction explode; + explode.set_expr_context(_ctx); + + // explode(Array) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32}; + + Array vec = {Int32(1), Null(), Int32(2), Int32(3)}; + InputDataSet input_set = {{vec}, {Null()}, {Array()}}; + + InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::Int32}; + InputDataSet output_set = {{{CellSet {Int32(1)}}}, + {{CellSet {Null()}}}, + {{CellSet {Int32(2)}}}, + {{CellSet {Int32(3)}}}}; + + check_vec_table_function(&explode, input_types, input_set, output_types, output_set); + } + + // explode(Array) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; + Array vec = {Field(std::string("abc")), Field(std::string("")), Field(std::string("def"))}; + InputDataSet input_set = {{Null()}, {Array()}, {vec}}; + + InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String}; + + InputDataSet output_set = {{{CellSet {std::string("abc")}}}, + {{CellSet {std::string("")}}}, + {{CellSet {std::string("def")}}}}; + check_vec_table_function(&explode, input_types, input_set, output_types, output_set); + } + + // explode(Array) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date}; + Array vec = {Null(), str_to_date_time("2022-01-02", false)}; + InputDataSet input_set = {{Null()}, {Array()}, {vec}}; + + InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::Date}; + + InputDataSet output_set = {{{CellSet {Null()}}}, {{CellSet {std::string("2022-01-02")}}}}; + check_vec_table_function(&explode, input_types, input_set, output_types, output_set); + } +} + +TEST_F(TableFunctionTest, vexplode_v2_two_param) { + init_expr_context(2); + VExplodeV2TableFunction explode; + explode.set_expr_context(_ctx); + // explode(Array, Array) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; + Array vec = {Field(std::string("one")), Field(std::string("two")), + Field(std::string("three"))}; + Array vec1 = {Field(std::string("1")), Field(std::string("2")), Field(std::string("3"))}; + InputDataSet input_set = {{vec, vec1}}; + + InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String, TypeIndex::String}; + + InputDataSet output_set = {{{CellSet {std::string("one"), std::string("1")}}}, + {{CellSet {std::string("two"), std::string("2")}}}, + {{CellSet {std::string("three"), std::string("3")}}}}; + check_vec_table_function(&explode, input_types, input_set, output_types, output_set, false); + check_vec_table_function(&explode, input_types, input_set, output_types, output_set, true); + } + + // explode(null, Array) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; + Array vec = {Field(std::string("one")), Field(std::string("two")), + Field(std::string("three"))}; + InputDataSet input_set = {{Null(), vec}}; + + InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String, TypeIndex::String}; + + InputDataSet output_set = {{{CellSet {Null(), std::string("one")}}}, + {{CellSet {Null(), std::string("two")}}}, + {{CellSet {Null(), std::string("three")}}}}; + + check_vec_table_function(&explode, input_types, input_set, output_types, output_set, false); + check_vec_table_function(&explode, input_types, input_set, output_types, output_set, true); + } + + // explode(Array, Array) + { + InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String}; + Array vec = {Field(std::string("one")), Field(std::string("two")), + Field(std::string("three"))}; + Array vec1 = {Field(std::string("1")), Field(Null()), Field(std::string("3"))}; + InputDataSet input_set = {{vec, vec1}}; + + InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String, TypeIndex::String}; + + InputDataSet output_set = {{{CellSet {std::string("one"), std::string("1")}}}, + {{CellSet {std::string("two"), Null()}}}, + {{CellSet {std::string("three"), std::string("3")}}}}; + + check_vec_table_function(&explode, input_types, input_set, output_types, output_set, false); + check_vec_table_function(&explode, input_types, input_set, output_types, output_set, true); + } +} + TEST_F(TableFunctionTest, vexplode_numbers) { init_expr_context(1); VExplodeNumbersTableFunction tfn; @@ -215,4 +376,4 @@ TEST_F(TableFunctionTest, vexplode_split) { } } -} // namespace doris::vectorized +} // namespace doris::vectorized \ No newline at end of file diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index e4d5c768aa..f25bb4de3b 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1943,7 +1943,7 @@ public class Config extends ConfigBase { * Max data version of backends serialize block. */ @ConfField(mutable = false) - public static int max_be_exec_version = 6; + public static int max_be_exec_version = 7; /** * Min data version of backends serialize block. diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java index da8614cbf6..01c5f72cd1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java @@ -41,8 +41,11 @@ import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplode import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplodeOuter; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableSortedSet; import java.util.List; +import java.util.Set; /** * Builtin table generating functions. @@ -76,6 +79,13 @@ public class BuiltinTableGeneratingFunctions implements FunctionHelper { tableGenerating(PosExplodeOuter.class, "posexplode_outer") ); + public static final ImmutableSet RETURN_MULTI_COLUMNS_FUNCTIONS = new ImmutableSortedSet.Builder( + String.CASE_INSENSITIVE_ORDER).add("explode").add("explode_outer").build(); + + public Set getReturnManyColumnFunctions() { + return RETURN_MULTI_COLUMNS_FUNCTIONS; + } + public static final BuiltinTableGeneratingFunctions INSTANCE = new BuiltinTableGeneratingFunctions(); // Note: Do not add any code here! diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index b41a8850ad..86e049c37e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -26,6 +26,7 @@ import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.analysis.UserIdentity; import org.apache.doris.catalog.AggregateType; import org.apache.doris.catalog.BuiltinAggregateFunctions; +import org.apache.doris.catalog.BuiltinTableGeneratingFunctions; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.ScalarType; @@ -1206,8 +1207,13 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor { String generateName = ctx.tableName.getText(); // if later view explode map type, we need to add a project to convert map to struct String columnName = ctx.columnNames.get(0).getText(); - List expandColumnNames = Lists.newArrayList(); - if (ctx.columnNames.size() > 1) { + List expandColumnNames = ImmutableList.of(); + + // explode can pass multiple columns + // then use struct to return the result of the expansion of multiple columns. + if (ctx.columnNames.size() > 1 + || BuiltinTableGeneratingFunctions.INSTANCE.getReturnManyColumnFunctions() + .contains(ctx.functionName.getText())) { columnName = ConnectContext.get() != null ? ConnectContext.get().getStatementContext().generateColumnName() : "expand_cols"; expandColumnNames = ctx.columnNames.stream() diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java index 7fb87b055f..22b80c7f6d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/Explode.java @@ -20,31 +20,34 @@ package org.apache.doris.nereids.trees.expressions.functions.generator; import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; -import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; +import org.apache.doris.nereids.trees.expressions.functions.ComputePrecision; +import org.apache.doris.nereids.trees.expressions.functions.CustomSignature; +import org.apache.doris.nereids.trees.expressions.functions.SearchSignature; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.ArrayType; -import org.apache.doris.nereids.types.coercion.AnyDataType; -import org.apache.doris.nereids.types.coercion.FollowToAnyDataType; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.NullType; +import org.apache.doris.nereids.types.StructField; +import org.apache.doris.nereids.types.StructType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import java.util.ArrayList; import java.util.List; /** - * explode([1, 2, 3]), generate 3 lines include 1, 2 and 3. + * explode([1, 2, 3]), generate three rows include 1, 2 and 3. + * explode([1, 2, 3], [4, 5, 6]) generates two columns and three rows + * where the first column contains 1, 2, 3, and the second column contains 4, 5, 6. */ -public class Explode extends TableGeneratingFunction implements BinaryExpression, AlwaysNullable { - - public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(new FollowToAnyDataType(0)).args(ArrayType.of(new AnyDataType(0))) - ); +public class Explode extends TableGeneratingFunction implements CustomSignature, ComputePrecision, AlwaysNullable { /** - * constructor with 1 argument. + * constructor with one or more argument. */ - public Explode(Expression arg) { - super("explode", arg); + public Explode(Expression[] args) { + super("explode", args); } /** @@ -52,17 +55,43 @@ public class Explode extends TableGeneratingFunction implements BinaryExpression */ @Override public Explode withChildren(List children) { - Preconditions.checkArgument(children.size() == 1); - return new Explode(children.get(0)); + Preconditions.checkArgument(!children.isEmpty()); + return new Explode(children.toArray(new Expression[0])); } @Override - public List getSignatures() { - return SIGNATURES; + public FunctionSignature computePrecision(FunctionSignature signature) { + return signature; + } + + @Override + public FunctionSignature customSignature() { + List arguments = new ArrayList<>(); + ImmutableList.Builder structFields = ImmutableList.builder(); + for (int i = 0; i < children.size(); i++) { + if (children.get(i).getDataType().isNullType()) { + arguments.add(ArrayType.of(NullType.INSTANCE)); + structFields.add( + new StructField("col" + (i + 1), NullType.INSTANCE, true, "")); + } else if (children.get(i).getDataType().isArrayType()) { + structFields.add( + new StructField("col" + (i + 1), + ((ArrayType) (children.get(i)).getDataType()).getItemType(), true, "")); + arguments.add(children.get(i).getDataType()); + } else { + SearchSignature.throwCanNotFoundFunctionException(this.getName(), getArguments()); + } + } + return FunctionSignature.of(new StructType(structFields.build()), arguments); } @Override public R accept(ExpressionVisitor visitor, C context) { return visitor.visitExplode(this, context); } + + @Override + public FunctionSignature searchSignature(List signatures) { + return super.searchSignature(signatures); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java index 5d90546ebe..cbcc2eaa1c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuter.java @@ -20,31 +20,34 @@ package org.apache.doris.nereids.trees.expressions.functions.generator; import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; -import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; +import org.apache.doris.nereids.trees.expressions.functions.ComputePrecision; +import org.apache.doris.nereids.trees.expressions.functions.CustomSignature; +import org.apache.doris.nereids.trees.expressions.functions.SearchSignature; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.ArrayType; -import org.apache.doris.nereids.types.coercion.AnyDataType; -import org.apache.doris.nereids.types.coercion.FollowToAnyDataType; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.NullType; +import org.apache.doris.nereids.types.StructField; +import org.apache.doris.nereids.types.StructType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import java.util.ArrayList; import java.util.List; /** - * explode([1, 2, 3]), generate 3 lines include 1, 2 and 3. + * explode_outer([1, 2, 3]), generate three rows include 1, 2 and 3. + * explode_outer([1, 2, 3], [4, 5, 6]) generates two columns and three rows + * where the first column contains 1, 2, 3, and the second column contains 4, 5, 6. */ -public class ExplodeOuter extends TableGeneratingFunction implements BinaryExpression, AlwaysNullable { - - public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(new FollowToAnyDataType(0)).args(ArrayType.of(new AnyDataType(0))) - ); +public class ExplodeOuter extends TableGeneratingFunction implements CustomSignature, ComputePrecision, AlwaysNullable { /** - * constructor with 1 argument. + * constructor with one or more argument. */ - public ExplodeOuter(Expression arg) { - super("explode_outer", arg); + public ExplodeOuter(Expression[] args) { + super("explode_outer", args); } /** @@ -52,17 +55,43 @@ public class ExplodeOuter extends TableGeneratingFunction implements BinaryExpre */ @Override public ExplodeOuter withChildren(List children) { - Preconditions.checkArgument(children.size() == 1); - return new ExplodeOuter(children.get(0)); + Preconditions.checkArgument(!children.isEmpty()); + return new ExplodeOuter(children.toArray(new Expression[0])); } @Override - public List getSignatures() { - return SIGNATURES; + public FunctionSignature computePrecision(FunctionSignature signature) { + return signature; + } + + @Override + public FunctionSignature customSignature() { + List arguments = new ArrayList<>(); + ImmutableList.Builder structFields = ImmutableList.builder(); + for (int i = 0; i < children.size(); i++) { + if (children.get(i).getDataType().isNullType()) { + arguments.add(ArrayType.of(NullType.INSTANCE)); + structFields.add( + new StructField("col" + (i + 1), NullType.INSTANCE, true, "")); + } else if (children.get(i).getDataType().isArrayType()) { + structFields.add( + new StructField("col" + (i + 1), + ((ArrayType) (children.get(i)).getDataType()).getItemType(), true, "")); + arguments.add(children.get(i).getDataType()); + } else { + SearchSignature.throwCanNotFoundFunctionException(this.getName(), getArguments()); + } + } + return FunctionSignature.of(new StructType(structFields.build()), arguments); } @Override public R accept(ExpressionVisitor visitor, C context) { return visitor.visitExplodeOuter(this, context); } + + @Override + public FunctionSignature searchSignature(List signatures) { + return super.searchSignature(signatures); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuterTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuterTest.java new file mode 100644 index 0000000000..fd381a4e7d --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeOuterTest.java @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.generator; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.types.ArrayType; +import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.nereids.types.NullType; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.StructType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.List; + +public class ExplodeOuterTest { + + ///////////////////////////////////////// + // GetSignatures + ///////////////////////////////////////// + + @Test + public void testGetSignatures() { + // build explode_outer(array, array) expression + Expression[] args = {SlotReference.of("int", ArrayType.of(IntegerType.INSTANCE)), + SlotReference.of("str", ArrayType.of(StringType.INSTANCE))}; + ExplodeOuter explode = new ExplodeOuter(args); + + // check signature + List signatures = explode.getSignatures(); + Assertions.assertEquals(1, signatures.size()); + FunctionSignature signature = signatures.get(0); + Assertions.assertEquals(2, signature.argumentsTypes.size()); + Assertions.assertTrue(signature.argumentsTypes.get(0).isArrayType()); + Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(0)).getItemType().isIntegerType()); + Assertions.assertTrue(signature.argumentsTypes.get(1).isArrayType()); + Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(1)).getItemType().isStringType()); + Assertions.assertTrue(signature.returnType.isStructType()); + StructType returnType = (StructType) signature.returnType; + Assertions.assertEquals(2, returnType.getFields().size()); + Assertions.assertEquals(IntegerType.INSTANCE, returnType.getFields().get(0).getDataType()); + Assertions.assertEquals(StringType.INSTANCE, returnType.getFields().get(1).getDataType()); + } + + @Test + public void testGetSignaturesWithNull() { + // build explode(null, array) expression + Expression[] args = { SlotReference.of("null", NullType.INSTANCE), SlotReference.of("int", ArrayType.of(IntegerType.INSTANCE))}; + ExplodeOuter explode = new ExplodeOuter(args); + + // check signature + List signatures = explode.getSignatures(); + Assertions.assertEquals(1, signatures.size()); + FunctionSignature signature = signatures.get(0); + Assertions.assertEquals(2, signature.argumentsTypes.size()); + Assertions.assertTrue(signature.argumentsTypes.get(0).isArrayType()); + Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(0)).getItemType().isNullType()); + Assertions.assertTrue(signature.argumentsTypes.get(1).isArrayType()); + Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(1)).getItemType().isIntegerType()); + Assertions.assertTrue(signature.returnType.isStructType()); + StructType returnType = (StructType) signature.returnType; + Assertions.assertEquals(2, returnType.getFields().size()); + Assertions.assertEquals(NullType.INSTANCE, returnType.getFields().get(0).getDataType()); + Assertions.assertEquals(IntegerType.INSTANCE, returnType.getFields().get(1).getDataType()); + } + + @Test + public void testGetSignaturesWithInvalidArgument() { + // build explode_outer(int) + Expression[] args = { SlotReference.of("int", IntegerType.INSTANCE) }; + ExplodeOuter explode = new ExplodeOuter(args); + + Assertions.assertThrows(AnalysisException.class, explode::getSignatures); + } + +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeTest.java new file mode 100644 index 0000000000..0cb61d6171 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeTest.java @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.generator; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.types.ArrayType; +import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.nereids.types.NullType; +import org.apache.doris.nereids.types.StringType; +import org.apache.doris.nereids.types.StructType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.List; + +public class ExplodeTest { + + ///////////////////////////////////////// + // GetSignatures + ///////////////////////////////////////// + + @Test + public void testGetSignatures() { + // build explode(array, array) expression + Expression[] args = {SlotReference.of("int", ArrayType.of(IntegerType.INSTANCE)), + SlotReference.of("str", ArrayType.of(StringType.INSTANCE))}; + Explode explode = new Explode(args); + + // check signature + List signatures = explode.getSignatures(); + Assertions.assertEquals(1, signatures.size()); + FunctionSignature signature = signatures.get(0); + Assertions.assertEquals(2, signature.argumentsTypes.size()); + Assertions.assertTrue(signature.argumentsTypes.get(0).isArrayType()); + Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(0)).getItemType().isIntegerType()); + Assertions.assertTrue(signature.argumentsTypes.get(1).isArrayType()); + Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(1)).getItemType().isStringType()); + Assertions.assertTrue(signature.returnType.isStructType()); + StructType returnType = (StructType) signature.returnType; + Assertions.assertEquals(2, returnType.getFields().size()); + Assertions.assertEquals(IntegerType.INSTANCE, returnType.getFields().get(0).getDataType()); + Assertions.assertEquals(StringType.INSTANCE, returnType.getFields().get(1).getDataType()); + } + + @Test + public void testGetSignaturesWithNull() { + // build explode(null, array) expression + Expression[] args = { SlotReference.of("null", NullType.INSTANCE), SlotReference.of("int", ArrayType.of(IntegerType.INSTANCE))}; + Explode explode = new Explode(args); + + // check signature + List signatures = explode.getSignatures(); + Assertions.assertEquals(1, signatures.size()); + FunctionSignature signature = signatures.get(0); + Assertions.assertEquals(2, signature.argumentsTypes.size()); + Assertions.assertTrue(signature.argumentsTypes.get(0).isArrayType()); + Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(0)).getItemType().isNullType()); + Assertions.assertTrue(signature.argumentsTypes.get(1).isArrayType()); + Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(1)).getItemType().isIntegerType()); + Assertions.assertTrue(signature.returnType.isStructType()); + StructType returnType = (StructType) signature.returnType; + Assertions.assertEquals(2, returnType.getFields().size()); + Assertions.assertEquals(NullType.INSTANCE, returnType.getFields().get(0).getDataType()); + Assertions.assertEquals(IntegerType.INSTANCE, returnType.getFields().get(1).getDataType()); + } + + @Test + public void testGetSignaturesWithInvalidArgument() { + // build explode(int) + Expression[] args = { SlotReference.of("int", IntegerType.INSTANCE) }; + Explode explode = new Explode(args); + + Assertions.assertThrows(AnalysisException.class, explode::getSignatures); + } + +} diff --git a/regression-test/data/query_p0/sql_functions/table_function/explode.out b/regression-test/data/query_p0/sql_functions/table_function/explode.out index 415abdf823..234119bea6 100644 --- a/regression-test/data/query_p0/sql_functions/table_function/explode.out +++ b/regression-test/data/query_p0/sql_functions/table_function/explode.out @@ -464,3 +464,214 @@ 9 {"s4d0hU-uWynWWe-eIf-ym5C":"9", "ANvepQ-oO7fMUr-qdl-XbUo":"4", "G16nau-7qY3fb1-UYL-oiNd":"5", "bVd4er-efUCDow-nzo-87fg":"2", "7I6zLI-hQTTlLe-sBr-oCTh":"7"} 9 {"s4d0hU-uWynWWe-eIf-ym5C":"9", "ANvepQ-oO7fMUr-qdl-XbUo":"4", "G16nau-7qY3fb1-UYL-oiNd":"5", "bVd4er-efUCDow-nzo-87fg":"2", "7I6zLI-hQTTlLe-sBr-oCTh":"7"} +-- !test6 -- +1 2 +1 3 +2 2 +2 \N + +-- !test7 -- +1 4 +1 5 +1 6 +2 14 +2 15 +3 114 +3 115 +3 116 + +-- !test8 -- +1 4 2 +1 5 3 +1 6 \N +2 14 2 +2 15 \N +3 114 \N +3 115 \N +3 116 \N + +-- !test9 -- +1 2 4 +1 3 5 +1 \N 6 +2 2 14 +2 \N 15 +3 \N 114 +3 \N 115 +3 \N 116 + +-- !test10 -- +1 2 4 4 +1 3 5 5 +1 \N 6 6 +2 2 14 14 +2 \N 15 15 +3 \N 114 114 +3 \N 115 115 +3 \N 116 116 + +-- !test11 -- +1 4 2 4 2 +1 4 2 5 3 +1 4 2 6 \N +1 5 3 4 2 +1 5 3 5 3 +1 5 3 6 \N +1 6 \N 4 2 +1 6 \N 5 3 +1 6 \N 6 \N +2 14 2 14 2 +2 14 2 15 \N +2 15 \N 14 2 +2 15 \N 15 \N +3 114 \N 114 \N +3 114 \N 115 \N +3 114 \N 116 \N +3 115 \N 114 \N +3 115 \N 115 \N +3 115 \N 116 \N +3 116 \N 114 \N +3 116 \N 115 \N +3 116 \N 116 \N + +-- !test12 -- +1 2 +1 3 +2 2 +2 \N +3 \N + +-- !test13 -- +1 4 +1 5 +1 6 +2 14 +2 15 +3 114 +3 115 +3 116 + +-- !test14 -- +1 4 2 +1 5 3 +1 6 \N +2 14 2 +2 15 \N +3 114 \N +3 115 \N +3 116 \N + +-- !test15 -- +1 2 4 +1 3 5 +1 \N 6 +2 2 14 +2 \N 15 +3 \N 114 +3 \N 115 +3 \N 116 + +-- !test16 -- +1 2 4 4 +1 3 5 5 +1 \N 6 6 +2 2 14 14 +2 \N 15 15 +3 \N 114 114 +3 \N 115 115 +3 \N 116 116 + +-- !test17 -- +1 4 2 4 2 +1 4 2 5 3 +1 4 2 6 \N +1 5 3 4 2 +1 5 3 5 3 +1 5 3 6 \N +1 6 \N 4 2 +1 6 \N 5 3 +1 6 \N 6 \N +2 14 2 14 2 +2 14 2 15 \N +2 15 \N 14 2 +2 15 \N 15 \N +3 114 \N 114 \N +3 114 \N 115 \N +3 114 \N 116 \N +3 115 \N 114 \N +3 115 \N 115 \N +3 115 \N 116 \N +3 116 \N 114 \N +3 116 \N 115 \N +3 116 \N 116 \N + +-- !test18 -- +1 2 +1 3 +2 2 +2 \N + +-- !test19 -- +1 4 +1 5 +1 6 +2 14 +2 15 +3 114 +3 115 +3 116 + +-- !test20 -- +1 4 2 +1 5 3 +1 6 \N +2 14 2 +2 15 \N +3 114 \N +3 115 \N +3 116 \N + +-- !test21 -- +1 2 4 +1 3 5 +1 \N 6 +2 2 14 +2 \N 15 +3 \N 114 +3 \N 115 +3 \N 116 + +-- !test22 -- +1 2 4 4 +1 3 5 5 +1 \N 6 6 +2 2 14 14 +2 \N 15 15 +3 \N 114 114 +3 \N 115 115 +3 \N 116 116 + +-- !test23 -- +1 4 2 4 2 +1 4 2 5 3 +1 4 2 6 \N +1 5 3 4 2 +1 5 3 5 3 +1 5 3 6 \N +1 6 \N 4 2 +1 6 \N 5 3 +1 6 \N 6 \N +2 14 2 14 2 +2 14 2 15 \N +2 15 \N 14 2 +2 15 \N 15 \N +3 114 \N 114 \N +3 114 \N 115 \N +3 114 \N 116 \N +3 115 \N 114 \N +3 115 \N 115 \N +3 115 \N 116 \N +3 116 \N 114 \N +3 116 \N 115 \N +3 116 \N 116 \N + diff --git a/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy b/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy index 982e27a966..765c5a76fb 100644 --- a/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy +++ b/regression-test/suites/query_p0/sql_functions/table_function/explode.groovy @@ -137,4 +137,48 @@ suite("explode") { for (int r = 0; r < res_origin_size_aa.size(); ++ r) { assertEquals(res_origin_size_aa[r][0], res_explode_aa[r][0]) } + + sql "DROP TABLE IF EXISTS array_test;" + sql """ + CREATE TABLE `array_test` ( + `id` INT NULL, + `array_int` ARRAY NOT NULL, + `array_string` ARRAY NULL, + `v_int` VARIANT NULL, + `v_string` VARIANT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "is_being_synced" = "false", + "storage_medium" = "hdd", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false", + "group_commit_interval_ms" = "10000", + "group_commit_data_bytes" = "134217728" + ); + """ + + sql """insert into array_test values( 1, [4,5,6], ["2","3"], '{"a": [4,5,6]}', '{"a": ["2","3"]}'),( 2, [14,15], ["2",null], '{"a": [14,15]}', '{"a": ["2",null]}'),( 3, [114,115,116], null, '{"a": [114,115,116]}','{"a": null}');""" + + + qt_test6 "select id,e1 from array_test as a lateral view explode(a.array_string) tmp1 as e1;" + qt_test7 "select id,e1 from array_test as a lateral view explode(a.array_int) tmp1 as e1;" + qt_test8 "select id,e1,e2 from array_test as a lateral view explode(a.array_int,a.array_string) tmp1 as e1,e2;" + qt_test9 "select id,e1,e2 from array_test as a lateral view explode(a.array_string,a.array_int) tmp1 as e1,e2;" + qt_test10 "select id,e1,e2,e3 from array_test as a lateral view explode(a.array_string,a.array_int,a.array_int) tmp1 as e1,e2,e3;" + qt_test11 "select id,e1,e2,e11,e12 from array_test as a lateral view explode(a.array_int,a.array_string) tmp1 as e1,e2 lateral view explode(a.array_int,a.array_string) tmp2 as e11,e12;" + + qt_test12 "select id,e1 from array_test as a lateral view explode_outer(a.array_string) tmp1 as e1;" + qt_test13 "select id,e1 from array_test as a lateral view explode_outer(a.array_int) tmp1 as e1;" + qt_test14 "select id,e1,e2 from array_test as a lateral view explode_outer(a.array_int,a.array_string) tmp1 as e1,e2;" + qt_test15 "select id,e1,e2 from array_test as a lateral view explode_outer(a.array_string,a.array_int) tmp1 as e1,e2;" + qt_test16 "select id,e1,e2,e3 from array_test as a lateral view explode_outer(a.array_string,a.array_int,a.array_int) tmp1 as e1,e2,e3;" + qt_test17 "select id,e1,e2,e11,e12 from array_test as a lateral view explode_outer(a.array_int,a.array_string) tmp1 as e1,e2 lateral view explode_outer(a.array_int,a.array_string) tmp2 as e11,e12;" + } diff --git a/regression-test/suites/query_p0/sql_functions/table_function/explode_array_decimal.groovy b/regression-test/suites/query_p0/sql_functions/table_function/explode_array_decimal.groovy index 60eed6b202..f8208889f3 100644 --- a/regression-test/suites/query_p0/sql_functions/table_function/explode_array_decimal.groovy +++ b/regression-test/suites/query_p0/sql_functions/table_function/explode_array_decimal.groovy @@ -39,7 +39,7 @@ suite("explode_array_decimal") { sql """insert into ods_device_data_1d_inc values(1, "[0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,1,9,5,5,5,5,5,5,5,5,5,0.8,0.8,0.8,0.8,0.8]")""" - sql "SET enable_nereids_planner=false;" + sql "SET enable_nereids_planner=true;" qt_sql_old_planner """ SELECT * from