[Improve](explode) explode function support multi param (#50310)

### What problem does this PR solve?
backport:https://github.com/apache/doris/pull/48537
Issue Number: close #xxx

Related PR: #xxx

Problem Summary:

### Release note

None

### Check List (For Author)

- Test <!-- At least one of them must be included. -->
    - [ ] Regression test
    - [ ] Unit Test
    - [ ] Manual test (add detailed scripts or steps below)
    - [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
        - [ ] Previous test can cover this change.
        - [ ] No code files have been changed.
        - [ ] Other reason <!-- Add your reason?  -->

- Behavior changed:
    - [ ] No.
    - [ ] Yes. <!-- Explain the behavior change -->

- Does this need documentation?
    - [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->

### Check List (For Reviewer who merge this PR)

- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
This commit is contained in:
Benjaminwei
2025-04-23 23:27:07 +08:00
committed by GitHub
parent e5a9c2552d
commit cf72fa82e2
22 changed files with 1162 additions and 67 deletions

View File

@ -72,7 +72,7 @@ private:
* h. "now": ALWAYS_NOT_NULLABLE -> DEPEND_ON_ARGUMENTS
* i. change FunctionIsIPAddressInRange from AlwaysNotNullable to DependOnArguments. controlled by individual session variable.
*/
constexpr inline int BeExecVersionManager::max_be_exec_version = 6;
constexpr inline int BeExecVersionManager::max_be_exec_version = 7;
constexpr inline int BeExecVersionManager::min_be_exec_version = 0;
/// functional

View File

@ -55,7 +55,8 @@ Status TableFunctionLocalState::open(RuntimeState* state) {
const std::string& tf_name = _vfn_ctxs[i]->root()->fn().name.function_name;
vectorized::TableFunction* fn = nullptr;
RETURN_IF_ERROR(vectorized::TableFunctionFactory::get_fn(tf_name, state->obj_pool(), &fn));
RETURN_IF_ERROR(vectorized::TableFunctionFactory::get_fn(tf_name, state->obj_pool(), &fn,
state->be_exec_version()));
fn->set_expr_context(_vfn_ctxs[i]);
_fns.push_back(fn);
}
@ -266,7 +267,8 @@ Status TableFunctionOperatorX::init(const TPlanNode& tnode, RuntimeState* state)
auto root = ctx->root();
const std::string& tf_name = root->fn().name.function_name;
vectorized::TableFunction* fn = nullptr;
RETURN_IF_ERROR(vectorized::TableFunctionFactory::get_fn(tf_name, _pool, &fn));
RETURN_IF_ERROR(vectorized::TableFunctionFactory::get_fn(tf_name, _pool, &fn,
state->be_exec_version()));
fn->set_expr_context(ctx);
_fns.push_back(fn);
}

View File

@ -60,7 +60,8 @@ Status VTableFunctionNode::init(const TPlanNode& tnode, RuntimeState* state) {
auto root = ctx->root();
const std::string& tf_name = root->fn().name.function_name;
TableFunction* fn = nullptr;
RETURN_IF_ERROR(TableFunctionFactory::get_fn(tf_name, _pool, &fn));
RETURN_IF_ERROR(
TableFunctionFactory::get_fn(tf_name, _pool, &fn, state->be_exec_version()));
fn->set_expr_context(ctx);
_fns.push_back(fn);
}

View File

@ -23,6 +23,7 @@
#include <string_view>
#include <utility>
#include "agent/be_exec_version_manager.h"
#include "common/object_pool.h"
#include "vec/exprs/table_function/table_function.h"
#include "vec/exprs/table_function/vexplode.h"
@ -32,6 +33,7 @@
#include "vec/exprs/table_function/vexplode_map.h"
#include "vec/exprs/table_function/vexplode_numbers.h"
#include "vec/exprs/table_function/vexplode_split.h"
#include "vec/exprs/table_function/vexplode_v2.h"
#include "vec/exprs/table_function/vposexplode.h"
#include "vec/utils/util.hpp"
@ -61,14 +63,21 @@ const std::unordered_map<std::string, std::function<std::unique_ptr<TableFunctio
{"explode_map", TableFunctionCreator<VExplodeMapTableFunction> {}},
{"explode_json_object", TableFunctionCreator<VExplodeJsonObjectTableFunction> {}},
{"posexplode", TableFunctionCreator<VPosExplodeTableFunction> {}},
{"explode", TableFunctionCreator<VExplodeTableFunction> {}}};
{"explode", TableFunctionCreator<VExplodeV2TableFunction> {}},
{"explode_old", TableFunctionCreator<VExplodeTableFunction> {}}};
const std::unordered_map<std::string, std::string> TableFunctionFactory::_function_to_replace = {
{"explode", "explode_old"}};
Status TableFunctionFactory::get_fn(const std::string& fn_name_raw, ObjectPool* pool,
TableFunction** fn) {
TableFunction** fn,
int be_version = BeExecVersionManager::get_newest_version()) {
bool is_outer = match_suffix(fn_name_raw, COMBINATOR_SUFFIX_OUTER);
std::string fn_name_real =
is_outer ? remove_suffix(fn_name_raw, COMBINATOR_SUFFIX_OUTER) : fn_name_raw;
temporary_function_update(be_version, fn_name_real);
auto fn_iterator = _function_map.find(fn_name_real);
if (fn_iterator != _function_map.end()) {
*fn = pool->add(fn_iterator->second().release());

View File

@ -33,10 +33,25 @@ class TableFunction;
class TableFunctionFactory {
public:
TableFunctionFactory() = delete;
static Status get_fn(const std::string& fn_name_raw, ObjectPool* pool, TableFunction** fn);
static Status get_fn(const std::string& fn_name_raw, ObjectPool* pool, TableFunction** fn,
int be_version);
const static std::unordered_map<std::string, std::function<std::unique_ptr<TableFunction>()>>
_function_map;
/// @TEMPORARY: for be_exec_version=7. replace function to old version.
const static std::unordered_map<std::string, std::string> _function_to_replace;
const static int NEWEST_VERSION_EXPLODE_MULTI_PARAM = 7;
private:
/// @TEMPORARY: for be_exec_version=7
static void temporary_function_update(int fe_version_now, std::string& name) {
// replace if fe is old version.
if (fe_version_now < NEWEST_VERSION_EXPLODE_MULTI_PARAM &&
_function_to_replace.find(name) != _function_to_replace.end()) {
name = _function_to_replace.at(name);
}
}
};
} // namespace vectorized
} // namespace doris

View File

@ -0,0 +1,209 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "vec/exprs/table_function/vexplode_v2.h"
#include <glog/logging.h>
#include <ostream>
#include "common/status.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_nothing.h"
#include "vec/columns/column_object.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_nothing.h"
#include "vec/exprs/vexpr.h"
#include "vec/exprs/vexpr_context.h"
#include "vec/functions/function_helpers.h"
namespace doris::vectorized {
#include "vec/columns/column_struct.h"
VExplodeV2TableFunction::VExplodeV2TableFunction() {
_fn_name = "vexplode";
}
Status VExplodeV2TableFunction::process_init(Block* block, RuntimeState* state) {
CHECK(_expr_context->root()->children().size() >= 1)
<< "VExplodeV2TableFunction support one or more child but has "
<< _expr_context->root()->children().size();
int value_column_idx = -1;
_multi_detail.resize(_expr_context->root()->children().size());
_array_offsets.resize(_expr_context->root()->children().size());
_array_columns.resize(_expr_context->root()->children().size());
for (int i = 0; i < _expr_context->root()->children().size(); i++) {
RETURN_IF_ERROR(_expr_context->root()->children()[i]->execute(_expr_context.get(), block,
&value_column_idx));
_array_columns[i] =
block->get_by_position(value_column_idx).column->convert_to_full_column_if_const();
if (!extract_column_array_info(*_array_columns[i], _multi_detail[i])) {
return Status::NotSupported(
"column type {} not supported now",
block->get_by_position(value_column_idx).column->get_name());
}
}
return Status::OK();
}
void VExplodeV2TableFunction::process_row(size_t row_idx) {
TableFunction::process_row(row_idx);
for (int i = 0; i < _multi_detail.size(); i++) {
auto& detail = _multi_detail[i];
if (!detail.array_nullmap_data || !detail.array_nullmap_data[row_idx]) {
_array_offsets[i] = (*detail.offsets_ptr)[row_idx - 1];
// find max size in array
auto cur_size = (*detail.offsets_ptr)[row_idx] - _array_offsets[i];
if (_cur_size < cur_size) {
_cur_size = cur_size;
}
}
}
_row_idx = row_idx;
}
void VExplodeV2TableFunction::process_close() {
_multi_detail.clear();
_array_offsets.clear();
_array_columns.clear();
_row_idx = 0;
}
void VExplodeV2TableFunction::get_same_many_values(MutableColumnPtr& column, int length) {
if (current_empty()) {
column->insert_many_defaults(length);
return;
}
ColumnStruct* struct_column = nullptr;
if (_is_nullable) {
auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
struct_column = assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
auto* nullmap_column =
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
nullmap_column->insert_many_defaults(length);
} else {
struct_column = assert_cast<ColumnStruct*>(column.get());
}
if (!struct_column) {
throw Exception(ErrorCode::INTERNAL_ERROR,
"Only multiple columns can be returned within a struct.");
}
for (int i = 0; i < _multi_detail.size(); i++) {
auto& detail = _multi_detail[i];
size_t pos = _array_offsets[i] + _cur_offset;
size_t element_size = _multi_detail[i].array_col->size_at(_row_idx);
auto& struct_field = struct_column->get_column(i);
if ((detail.array_nullmap_data && detail.array_nullmap_data[_row_idx])) {
struct_field.insert_many_defaults(length);
} else {
auto* nullable_column = assert_cast<ColumnNullable*>(struct_field.get_ptr().get());
auto* nullmap_column =
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
// only need to check if the value at position pos is null
if (element_size < _cur_offset ||
(detail.nested_nullmap_data && detail.nested_nullmap_data[pos])) {
nullable_column->insert_many_defaults(length);
} else {
nullable_column->get_nested_column_ptr()->insert_many_from(*detail.nested_col, pos,
length);
nullmap_column->insert_many_defaults(length);
}
}
}
}
int VExplodeV2TableFunction::get_value(MutableColumnPtr& column, int max_step) {
max_step = std::min(max_step, (int)(_cur_size - _cur_offset));
if (current_empty()) {
column->insert_default();
max_step = 1;
} else {
ColumnStruct* struct_column = nullptr;
if (_is_nullable) {
auto* nullable_column = assert_cast<ColumnNullable*>(column.get());
struct_column =
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
auto* nullmap_column =
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
nullmap_column->insert_many_defaults(max_step);
} else {
struct_column = assert_cast<ColumnStruct*>(column.get());
}
if (!struct_column) {
throw Exception(ErrorCode::INTERNAL_ERROR,
"Only multiple columns can be returned within a struct.");
}
for (int i = 0; i < _multi_detail.size(); i++) {
auto& detail = _multi_detail[i];
size_t pos = _array_offsets[i] + _cur_offset;
size_t element_size = _multi_detail[i].array_col->size_at(_row_idx);
auto& struct_field = struct_column->get_column(i);
if (detail.array_nullmap_data && detail.array_nullmap_data[_row_idx]) {
struct_field.insert_many_defaults(max_step);
} else {
auto* nullable_column = assert_cast<ColumnNullable*>(struct_field.get_ptr().get());
auto* nullmap_column =
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
if (element_size >= _cur_offset + max_step) {
nullable_column->get_nested_column_ptr()->insert_range_from(*detail.nested_col,
pos, max_step);
if (detail.nested_nullmap_data) {
for (int j = 0; j < max_step; j++) {
if (detail.nested_nullmap_data[pos + j]) {
nullmap_column->insert_value(1);
} else {
nullmap_column->insert_value(0);
}
}
} else {
nullmap_column->insert_many_defaults(max_step);
}
} else {
nullable_column->get_nested_column_ptr()->insert_range_from(
*detail.nested_col, pos, element_size - _cur_offset);
if (detail.nested_nullmap_data) {
for (int j = 0; j < element_size - _cur_offset; j++) {
if (detail.nested_nullmap_data[pos + j]) {
nullmap_column->insert_value(1);
} else {
nullmap_column->insert_value(0);
}
}
} else {
nullmap_column->insert_many_defaults(element_size - _cur_offset);
}
nullable_column->insert_many_defaults(max_step - (element_size - _cur_offset));
}
}
}
}
forward(max_step);
return max_step;
}
} // namespace doris::vectorized

View File

@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstddef>
#include "common/status.h"
#include "vec/columns/column_struct.h"
#include "vec/data_types/data_type.h"
#include "vec/exprs/table_function/table_function.h"
#include "vec/functions/array/function_array_utils.h"
namespace doris::vectorized {
class Block;
} // namespace doris::vectorized
namespace doris::vectorized {
class VExplodeV2TableFunction : public TableFunction {
ENABLE_FACTORY_CREATOR(VExplodeV2TableFunction);
public:
VExplodeV2TableFunction();
~VExplodeV2TableFunction() override = default;
Status process_init(Block* block, RuntimeState* state) override;
void process_row(size_t row_idx) override;
void process_close() override;
void get_same_many_values(MutableColumnPtr& column, int length) override;
int get_value(MutableColumnPtr& column, int max_step) override;
private:
std::vector<ColumnPtr> _array_columns;
size_t _row_idx;
ColumnArrayExecutionDatas _multi_detail;
std::vector<size_t> _array_offsets;
};
} // namespace doris::vectorized

View File

@ -67,6 +67,25 @@ struct FunctionExplode {
static std::string get_error_msg() { return "Fake function do not support execute"; }
};
struct FunctionExplodeV2 {
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
DataTypes fieldTypes(arguments.size());
for (int i = 0; i < arguments.size(); i++) {
auto nestedType =
check_and_get_data_type<DataTypeArray>(arguments[i].get())->get_nested_type();
if (nestedType->is_nullable()) {
fieldTypes[i] = nestedType;
} else {
fieldTypes[i] = make_nullable(nestedType);
}
}
return make_nullable(std::make_shared<vectorized::DataTypeStruct>(fieldTypes));
}
static DataTypes get_variadic_argument_types() { return {}; }
static std::string get_error_msg() { return "Fake function do not support execute"; }
};
// explode map: make map k,v as struct field
struct FunctionExplodeMap {
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
@ -133,6 +152,14 @@ void register_table_function_expand(SimpleFunctionFactory& factory, const std::s
factory.register_function<FunctionFake<FunctionImpl>>(name + suffix);
};
template <typename FunctionImpl>
void register_table_alternative_function_expand(SimpleFunctionFactory& factory,
const std::string& name,
const std::string& suffix) {
factory.register_alternative_function<FunctionFake<FunctionImpl>>(name);
factory.register_alternative_function<FunctionFake<FunctionImpl>>(name + suffix);
};
template <typename ReturnType, bool VARIADIC>
void register_table_function_expand_default(SimpleFunctionFactory& factory, const std::string& name,
const std::string& suffix) {
@ -142,11 +169,28 @@ void register_table_function_expand_default(SimpleFunctionFactory& factory, cons
name + suffix);
};
template <typename ReturnType, bool VARIADIC>
void register_table_alternative_function_expand_default(SimpleFunctionFactory& factory,
const std::string& name,
const std::string& suffix) {
factory.register_alternative_function<
FunctionFake<FunctionFakeBaseImpl<ReturnType, false, VARIADIC>>>(name);
factory.register_alternative_function<
FunctionFake<FunctionFakeBaseImpl<ReturnType, true, VARIADIC>>>(name + suffix);
};
template <typename FunctionImpl>
void register_table_function_expand_outer(SimpleFunctionFactory& factory, const std::string& name) {
register_table_function_expand<FunctionImpl>(factory, name, COMBINATOR_SUFFIX_OUTER);
};
template <typename FunctionImpl>
void register_table_alternative_function_expand_outer(SimpleFunctionFactory& factory,
const std::string& name) {
register_table_alternative_function_expand<FunctionImpl>(factory, name,
COMBINATOR_SUFFIX_OUTER);
};
template <typename ReturnType, bool VARIADIC>
void register_table_function_expand_outer_default(SimpleFunctionFactory& factory,
const std::string& name) {
@ -154,6 +198,13 @@ void register_table_function_expand_outer_default(SimpleFunctionFactory& factory
COMBINATOR_SUFFIX_OUTER);
};
template <typename ReturnType, bool VARIADIC>
void register_table_alternative_function_expand_outer_default(SimpleFunctionFactory& factory,
const std::string& name) {
register_table_alternative_function_expand_default<ReturnType, VARIADIC>(
factory, name, COMBINATOR_SUFFIX_OUTER);
};
template <typename FunctionImpl>
void register_table_function_with_impl(SimpleFunctionFactory& factory, const std::string& name,
const std::string& suffix = "") {
@ -163,7 +214,9 @@ void register_table_function_with_impl(SimpleFunctionFactory& factory, const std
void register_function_fake(SimpleFunctionFactory& factory) {
register_function<FunctionEsquery>(factory, "esquery");
register_table_function_expand_outer<FunctionExplode>(factory, "explode");
register_table_function_expand_outer<FunctionExplodeV2>(factory, "explode");
register_table_alternative_function_expand_outer<FunctionExplode>(factory, "explode");
register_table_function_expand_outer<FunctionExplodeMap>(factory, "explode_map");
register_table_function_expand_outer<FunctionExplodeJsonObject>(factory, "explode_json_object");

View File

@ -115,6 +115,9 @@ class SimpleFunctionFactory {
/// @TEMPORARY: for be_exec_version=4
constexpr static int NEWEST_VERSION_FUNCTION_SUBSTITUTE = 4;
/// @TEMPORARY: for be_exec_version=7.
constexpr static int NEWEST_VERSION_EXPLODE_MULTI_PARAM = 7;
public:
void register_function(const std::string& name, const Creator& ptr) {
DataTypes types = ptr()->get_variadic_argument_types();
@ -145,6 +148,14 @@ public:
register_function(name, &createDefaultFunction<Function>);
}
/// @TEMPORARY: for be_exec_version=7
template <class Function>
void register_alternative_function(std::string name) {
static std::string suffix {"_old"};
function_to_replace[name] = name + suffix;
register_function(name + suffix, &createDefaultFunction<Function>);
}
void register_alias(const std::string& name, const std::string& alias) {
function_alias[alias] = name;
}
@ -208,7 +219,7 @@ private:
FunctionCreators function_creators;
FunctionIsVariadic function_variadic_set;
std::unordered_map<std::string, std::string> function_alias;
/// @TEMPORARY: for be_exec_version=3. replace function to old version.
/// @TEMPORARY: for be_exec_version=7. replace function to old version.
std::unordered_map<std::string, std::string> function_to_replace;
template <typename Function>
@ -216,10 +227,10 @@ private:
return std::make_shared<DefaultFunctionBuilder>(Function::create());
}
/// @TEMPORARY: for be_exec_version=3
/// @TEMPORARY: for be_exec_version=7
void temporary_function_update(int fe_version_now, std::string& name) {
// replace if fe is old version.
if (fe_version_now < NEWEST_VERSION_FUNCTION_SUBSTITUTE &&
if (fe_version_now < NEWEST_VERSION_EXPLODE_MULTI_PARAM &&
function_to_replace.find(name) != function_to_replace.end()) {
name = function_to_replace[name];
}

View File

@ -25,6 +25,7 @@
#include "runtime/runtime_state.h"
#include "util/binary_cast.hpp"
#include "util/bitmap_value.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_date.h"
@ -33,7 +34,9 @@
#include "vec/data_types/data_type_ipv4.h"
#include "vec/data_types/data_type_ipv6.h"
#include "vec/data_types/data_type_jsonb.h"
#include "vec/data_types/data_type_map.h"
#include "vec/data_types/data_type_string.h"
#include "vec/data_types/data_type_struct.h"
#include "vec/data_types/data_type_time_v2.h"
#include "vec/exprs/table_function/table_function.h"
#include "vec/runtime/vdatetime_value.h"
@ -176,6 +179,29 @@ size_t type_index_to_data_type(const std::vector<AnyType>& input_types, size_t i
type = std::make_shared<DataTypeArray>(sub_type);
return ret + 1;
}
case TypeIndex::Struct: {
desc.type = doris::PrimitiveType::TYPE_STRUCT;
++index;
size_t ret = 0;
DataTypes sub_types;
while (index < input_types.size()) {
ut_type::UTDataTypeDesc sub_desc;
DataTypePtr sub_type = nullptr;
size_t inner_ret = type_index_to_data_type(input_types, index, sub_desc, sub_type);
if (inner_ret <= 0) {
return inner_ret;
}
ret += inner_ret;
desc.children.push_back(sub_desc.type_desc);
if (sub_desc.is_nullable) {
sub_type = make_nullable(sub_type);
sub_types.push_back(sub_type);
}
++index;
}
type = std::make_shared<DataTypeStruct>(sub_types);
return ret + 1;
}
case TypeIndex::Nullable: {
++index;
size_t ret = type_index_to_data_type(input_types, index, ut_desc, type);
@ -307,6 +333,28 @@ bool insert_cell(MutableColumnPtr& column, DataTypePtr type_ptr, const AnyType&
} else if (type.is_array()) {
auto v = any_cast<Array>(cell);
column->insert(v);
} else if (type.is_struct()) {
auto v = any_cast<CellSet>(cell);
auto struct_type = assert_cast<const DataTypeStruct*>(type_ptr.get());
auto nullable_column = assert_cast<ColumnNullable*>(column.get());
auto* struct_column =
assert_cast<ColumnStruct*>(nullable_column->get_nested_column_ptr().get());
auto* nullmap_column =
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
nullmap_column->insert_default();
for (size_t i = 0; i < v.size(); ++i) {
auto& field = v[i];
auto col = struct_column->get_column(i).get_ptr();
RETURN_IF_FALSE(insert_cell(col, struct_type->get_element(i), field));
}
} else if (type.is_nullable()) {
auto nullable_column = assert_cast<ColumnNullable*>(column.get());
auto col_type = remove_nullable(type_ptr);
auto col = nullable_column->get_nested_column_ptr();
auto* nullmap_column =
assert_cast<ColumnUInt8*>(nullable_column->get_null_map_column_ptr().get());
nullmap_column->insert_default();
RETURN_IF_FALSE(insert_cell(col, col_type, cell));
} else {
LOG(WARNING) << "dataset not supported for TypeIndex:" << (int)type.idx;
return false;
@ -324,32 +372,39 @@ Block* create_block_from_inputset(const InputTypeSet& input_types, const InputDa
// 1.1 insert data and create block
auto row_size = input_set.size();
std::unique_ptr<Block> block = Block::create_unique();
auto input_set_size = input_set[0].size();
// 1.2 calculate the input column size
auto input_col_size = input_set_size / descs.size();
for (size_t i = 0; i < descs.size(); ++i) {
auto& desc = descs[i];
auto column = desc.data_type->create_column();
column->reserve(row_size);
for (size_t j = 0; j < input_col_size; ++j) {
auto column = desc.data_type->create_column();
column->reserve(row_size);
auto type_ptr = desc.data_type->is_nullable()
? ((DataTypeNullable*)(desc.data_type.get()))->get_nested_type()
: desc.data_type;
WhichDataType type(type_ptr);
auto type_ptr = desc.data_type->is_nullable()
? ((DataTypeNullable*)(desc.data_type.get()))->get_nested_type()
: desc.data_type;
WhichDataType type(type_ptr);
for (int j = 0; j < row_size; j++) {
if (!insert_cell(column, type_ptr, input_set[j][i])) {
return nullptr;
for (int r = 0; r < row_size; r++) {
if (!insert_cell(column, type_ptr, input_set[r][i * input_col_size + j])) {
return nullptr;
}
}
}
if (desc.is_const) {
column = ColumnConst::create(std::move(column), row_size);
if (desc.is_const) {
column = ColumnConst::create(std::move(column), row_size);
}
block->insert({std::move(column), desc.data_type, desc.col_name});
}
block->insert({std::move(column), desc.data_type, desc.col_name});
}
return block.release();
}
Block* process_table_function(TableFunction* fn, Block* input_block,
const InputTypeSet& output_types) {
const InputTypeSet& output_types, bool test_get_value_func) {
// pasrse output data types
ut_type::UTDataTypeDescs descs;
if (!parse_ut_data_type(output_types, descs)) {
@ -383,8 +438,12 @@ Block* process_table_function(TableFunction* fn, Block* input_block,
}
do {
fn->get_same_many_values(column, 1);
fn->forward();
if (test_get_value_func) {
fn->get_value(column, 10);
} else {
fn->get_same_many_values(column, 1);
fn->forward();
}
} while (!fn->eos());
}
@ -395,7 +454,7 @@ Block* process_table_function(TableFunction* fn, Block* input_block,
void check_vec_table_function(TableFunction* fn, const InputTypeSet& input_types,
const InputDataSet& input_set, const InputTypeSet& output_types,
const InputDataSet& output_set) {
const InputDataSet& output_set, const bool test_get_value_func) {
std::unique_ptr<Block> input_block(create_block_from_inputset(input_types, input_set));
EXPECT_TRUE(input_block != nullptr);
@ -404,7 +463,7 @@ void check_vec_table_function(TableFunction* fn, const InputTypeSet& input_types
EXPECT_TRUE(expect_output_block != nullptr);
std::unique_ptr<Block> real_output_block(
process_table_function(fn, input_block.get(), output_types));
process_table_function(fn, input_block.get(), output_types, test_get_value_func));
EXPECT_TRUE(real_output_block != nullptr);
// compare real_output_block with expect_output_block

View File

@ -40,6 +40,7 @@
#include "vec/columns/column_complex.h"
#include "vec/columns/column_const.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_struct.h"
#include "vec/common/string_ref.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
@ -194,7 +195,7 @@ Block* process_table_function(TableFunction* fn, Block* input_block,
const InputTypeSet& output_types);
void check_vec_table_function(TableFunction* fn, const InputTypeSet& input_types,
const InputDataSet& input_set, const InputTypeSet& output_types,
const InputDataSet& output_set);
const InputDataSet& output_set, bool test_get_value_func = false);
// Null values are represented by Null()
// The type of the constant column is represented as follows: Consted {TypeIndex::String}

View File

@ -34,6 +34,7 @@
#include "vec/exprs/table_function/vexplode.h"
#include "vec/exprs/table_function/vexplode_numbers.h"
#include "vec/exprs/table_function/vexplode_split.h"
#include "vec/exprs/table_function/vexplode_v2.h"
#include "vec/exprs/vexpr_context.h"
#include "vec/function/function_test_util.h"
@ -125,6 +126,61 @@ TEST_F(TableFunctionTest, vexplode_outer) {
}
}
TEST_F(TableFunctionTest, vexplode_outer_v2) {
init_expr_context(1);
VExplodeV2TableFunction explode_outer;
explode_outer.set_outer();
explode_outer.set_expr_context(_ctx);
// explode_outer(Array<Int32>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32};
Array vec = {Int32(1), Null(), Int32(2), Int32(3)};
InputDataSet input_set = {{vec}, {Null()}, {Array()}};
InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::Int32};
InputDataSet output_set = {{{CellSet {Int32(1)}}}, {{CellSet {Null()}}},
{{CellSet {Int32(2)}}}, {{CellSet {Int32(3)}}},
{{CellSet {Null()}}}, {{CellSet {Null()}}}};
check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set);
}
// explode_outer(Array<String>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
Array vec = {Field(std::string("abc")), Field(std::string("")), Field(std::string("def"))};
InputDataSet input_set = {{Null()}, {Array()}, {vec}};
InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String};
InputDataSet output_set = {{{CellSet {Null()}}},
{{CellSet {Null()}}},
{{CellSet {std::string("abc")}}},
{{CellSet {std::string("")}}},
{{CellSet {std::string("def")}}}};
check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set);
}
// explode_outer(Array<Decimal>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Decimal128V2};
Array vec = {ut_type::DECIMALFIELD(17014116.67), ut_type::DECIMALFIELD(-17014116.67)};
InputDataSet input_set = {{Null()}, {Array()}, {vec}};
InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::Decimal128V2};
InputDataSet output_set = {{{CellSet {Null()}}},
{{CellSet {Null()}}},
{{CellSet {ut_type::DECIMAL(17014116.67)}}},
{{CellSet {ut_type::DECIMAL(-17014116.67)}}}};
check_vec_table_function(&explode_outer, input_types, input_set, output_types, output_set);
}
}
TEST_F(TableFunctionTest, vexplode) {
init_expr_context(1);
VExplodeTableFunction explode;
@ -168,6 +224,111 @@ TEST_F(TableFunctionTest, vexplode) {
}
}
TEST_F(TableFunctionTest, vexplode_v2) {
init_expr_context(1);
VExplodeV2TableFunction explode;
explode.set_expr_context(_ctx);
// explode(Array<Int32>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Int32};
Array vec = {Int32(1), Null(), Int32(2), Int32(3)};
InputDataSet input_set = {{vec}, {Null()}, {Array()}};
InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::Int32};
InputDataSet output_set = {{{CellSet {Int32(1)}}},
{{CellSet {Null()}}},
{{CellSet {Int32(2)}}},
{{CellSet {Int32(3)}}}};
check_vec_table_function(&explode, input_types, input_set, output_types, output_set);
}
// explode(Array<String>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
Array vec = {Field(std::string("abc")), Field(std::string("")), Field(std::string("def"))};
InputDataSet input_set = {{Null()}, {Array()}, {vec}};
InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String};
InputDataSet output_set = {{{CellSet {std::string("abc")}}},
{{CellSet {std::string("")}}},
{{CellSet {std::string("def")}}}};
check_vec_table_function(&explode, input_types, input_set, output_types, output_set);
}
// explode(Array<Date>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::Date};
Array vec = {Null(), str_to_date_time("2022-01-02", false)};
InputDataSet input_set = {{Null()}, {Array()}, {vec}};
InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::Date};
InputDataSet output_set = {{{CellSet {Null()}}}, {{CellSet {std::string("2022-01-02")}}}};
check_vec_table_function(&explode, input_types, input_set, output_types, output_set);
}
}
TEST_F(TableFunctionTest, vexplode_v2_two_param) {
init_expr_context(2);
VExplodeV2TableFunction explode;
explode.set_expr_context(_ctx);
// explode(Array<String>, Array<String>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
Array vec = {Field(std::string("one")), Field(std::string("two")),
Field(std::string("three"))};
Array vec1 = {Field(std::string("1")), Field(std::string("2")), Field(std::string("3"))};
InputDataSet input_set = {{vec, vec1}};
InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String, TypeIndex::String};
InputDataSet output_set = {{{CellSet {std::string("one"), std::string("1")}}},
{{CellSet {std::string("two"), std::string("2")}}},
{{CellSet {std::string("three"), std::string("3")}}}};
check_vec_table_function(&explode, input_types, input_set, output_types, output_set, false);
check_vec_table_function(&explode, input_types, input_set, output_types, output_set, true);
}
// explode(null, Array<String>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
Array vec = {Field(std::string("one")), Field(std::string("two")),
Field(std::string("three"))};
InputDataSet input_set = {{Null(), vec}};
InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String, TypeIndex::String};
InputDataSet output_set = {{{CellSet {Null(), std::string("one")}}},
{{CellSet {Null(), std::string("two")}}},
{{CellSet {Null(), std::string("three")}}}};
check_vec_table_function(&explode, input_types, input_set, output_types, output_set, false);
check_vec_table_function(&explode, input_types, input_set, output_types, output_set, true);
}
// explode(Array<Null>, Array<String>)
{
InputTypeSet input_types = {TypeIndex::Array, TypeIndex::String};
Array vec = {Field(std::string("one")), Field(std::string("two")),
Field(std::string("three"))};
Array vec1 = {Field(std::string("1")), Field(Null()), Field(std::string("3"))};
InputDataSet input_set = {{vec, vec1}};
InputTypeSet output_types = {TypeIndex::Struct, TypeIndex::String, TypeIndex::String};
InputDataSet output_set = {{{CellSet {std::string("one"), std::string("1")}}},
{{CellSet {std::string("two"), Null()}}},
{{CellSet {std::string("three"), std::string("3")}}}};
check_vec_table_function(&explode, input_types, input_set, output_types, output_set, false);
check_vec_table_function(&explode, input_types, input_set, output_types, output_set, true);
}
}
TEST_F(TableFunctionTest, vexplode_numbers) {
init_expr_context(1);
VExplodeNumbersTableFunction tfn;
@ -215,4 +376,4 @@ TEST_F(TableFunctionTest, vexplode_split) {
}
}
} // namespace doris::vectorized
} // namespace doris::vectorized

View File

@ -1943,7 +1943,7 @@ public class Config extends ConfigBase {
* Max data version of backends serialize block.
*/
@ConfField(mutable = false)
public static int max_be_exec_version = 6;
public static int max_be_exec_version = 7;
/**
* Min data version of backends serialize block.

View File

@ -41,8 +41,11 @@ import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplode
import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplodeOuter;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedSet;
import java.util.List;
import java.util.Set;
/**
* Builtin table generating functions.
@ -76,6 +79,13 @@ public class BuiltinTableGeneratingFunctions implements FunctionHelper {
tableGenerating(PosExplodeOuter.class, "posexplode_outer")
);
public static final ImmutableSet<String> RETURN_MULTI_COLUMNS_FUNCTIONS = new ImmutableSortedSet.Builder<String>(
String.CASE_INSENSITIVE_ORDER).add("explode").add("explode_outer").build();
public Set<String> getReturnManyColumnFunctions() {
return RETURN_MULTI_COLUMNS_FUNCTIONS;
}
public static final BuiltinTableGeneratingFunctions INSTANCE = new BuiltinTableGeneratingFunctions();
// Note: Do not add any code here!

View File

@ -26,6 +26,7 @@ import org.apache.doris.analysis.TableSnapshot;
import org.apache.doris.analysis.UserIdentity;
import org.apache.doris.catalog.AggregateType;
import org.apache.doris.catalog.BuiltinAggregateFunctions;
import org.apache.doris.catalog.BuiltinTableGeneratingFunctions;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.KeysType;
import org.apache.doris.catalog.ScalarType;
@ -1206,8 +1207,13 @@ public class LogicalPlanBuilder extends DorisParserBaseVisitor<Object> {
String generateName = ctx.tableName.getText();
// if later view explode map type, we need to add a project to convert map to struct
String columnName = ctx.columnNames.get(0).getText();
List<String> expandColumnNames = Lists.newArrayList();
if (ctx.columnNames.size() > 1) {
List<String> expandColumnNames = ImmutableList.of();
// explode can pass multiple columns
// then use struct to return the result of the expansion of multiple columns.
if (ctx.columnNames.size() > 1
|| BuiltinTableGeneratingFunctions.INSTANCE.getReturnManyColumnFunctions()
.contains(ctx.functionName.getText())) {
columnName = ConnectContext.get() != null
? ConnectContext.get().getStatementContext().generateColumnName() : "expand_cols";
expandColumnNames = ctx.columnNames.stream()

View File

@ -20,31 +20,34 @@ package org.apache.doris.nereids.trees.expressions.functions.generator;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.functions.ComputePrecision;
import org.apache.doris.nereids.trees.expressions.functions.CustomSignature;
import org.apache.doris.nereids.trees.expressions.functions.SearchSignature;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.coercion.AnyDataType;
import org.apache.doris.nereids.types.coercion.FollowToAnyDataType;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.types.NullType;
import org.apache.doris.nereids.types.StructField;
import org.apache.doris.nereids.types.StructType;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import java.util.ArrayList;
import java.util.List;
/**
* explode([1, 2, 3]), generate 3 lines include 1, 2 and 3.
* explode([1, 2, 3]), generate three rows include 1, 2 and 3.
* explode([1, 2, 3], [4, 5, 6]) generates two columns and three rows
* where the first column contains 1, 2, 3, and the second column contains 4, 5, 6.
*/
public class Explode extends TableGeneratingFunction implements BinaryExpression, AlwaysNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(new FollowToAnyDataType(0)).args(ArrayType.of(new AnyDataType(0)))
);
public class Explode extends TableGeneratingFunction implements CustomSignature, ComputePrecision, AlwaysNullable {
/**
* constructor with 1 argument.
* constructor with one or more argument.
*/
public Explode(Expression arg) {
super("explode", arg);
public Explode(Expression[] args) {
super("explode", args);
}
/**
@ -52,17 +55,43 @@ public class Explode extends TableGeneratingFunction implements BinaryExpression
*/
@Override
public Explode withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 1);
return new Explode(children.get(0));
Preconditions.checkArgument(!children.isEmpty());
return new Explode(children.toArray(new Expression[0]));
}
@Override
public List<FunctionSignature> getSignatures() {
return SIGNATURES;
public FunctionSignature computePrecision(FunctionSignature signature) {
return signature;
}
@Override
public FunctionSignature customSignature() {
List<DataType> arguments = new ArrayList<>();
ImmutableList.Builder<StructField> structFields = ImmutableList.builder();
for (int i = 0; i < children.size(); i++) {
if (children.get(i).getDataType().isNullType()) {
arguments.add(ArrayType.of(NullType.INSTANCE));
structFields.add(
new StructField("col" + (i + 1), NullType.INSTANCE, true, ""));
} else if (children.get(i).getDataType().isArrayType()) {
structFields.add(
new StructField("col" + (i + 1),
((ArrayType) (children.get(i)).getDataType()).getItemType(), true, ""));
arguments.add(children.get(i).getDataType());
} else {
SearchSignature.throwCanNotFoundFunctionException(this.getName(), getArguments());
}
}
return FunctionSignature.of(new StructType(structFields.build()), arguments);
}
@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitExplode(this, context);
}
@Override
public FunctionSignature searchSignature(List<FunctionSignature> signatures) {
return super.searchSignature(signatures);
}
}

View File

@ -20,31 +20,34 @@ package org.apache.doris.nereids.trees.expressions.functions.generator;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable;
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.functions.ComputePrecision;
import org.apache.doris.nereids.trees.expressions.functions.CustomSignature;
import org.apache.doris.nereids.trees.expressions.functions.SearchSignature;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.coercion.AnyDataType;
import org.apache.doris.nereids.types.coercion.FollowToAnyDataType;
import org.apache.doris.nereids.types.DataType;
import org.apache.doris.nereids.types.NullType;
import org.apache.doris.nereids.types.StructField;
import org.apache.doris.nereids.types.StructType;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import java.util.ArrayList;
import java.util.List;
/**
* explode([1, 2, 3]), generate 3 lines include 1, 2 and 3.
* explode_outer([1, 2, 3]), generate three rows include 1, 2 and 3.
* explode_outer([1, 2, 3], [4, 5, 6]) generates two columns and three rows
* where the first column contains 1, 2, 3, and the second column contains 4, 5, 6.
*/
public class ExplodeOuter extends TableGeneratingFunction implements BinaryExpression, AlwaysNullable {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(new FollowToAnyDataType(0)).args(ArrayType.of(new AnyDataType(0)))
);
public class ExplodeOuter extends TableGeneratingFunction implements CustomSignature, ComputePrecision, AlwaysNullable {
/**
* constructor with 1 argument.
* constructor with one or more argument.
*/
public ExplodeOuter(Expression arg) {
super("explode_outer", arg);
public ExplodeOuter(Expression[] args) {
super("explode_outer", args);
}
/**
@ -52,17 +55,43 @@ public class ExplodeOuter extends TableGeneratingFunction implements BinaryExpre
*/
@Override
public ExplodeOuter withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 1);
return new ExplodeOuter(children.get(0));
Preconditions.checkArgument(!children.isEmpty());
return new ExplodeOuter(children.toArray(new Expression[0]));
}
@Override
public List<FunctionSignature> getSignatures() {
return SIGNATURES;
public FunctionSignature computePrecision(FunctionSignature signature) {
return signature;
}
@Override
public FunctionSignature customSignature() {
List<DataType> arguments = new ArrayList<>();
ImmutableList.Builder<StructField> structFields = ImmutableList.builder();
for (int i = 0; i < children.size(); i++) {
if (children.get(i).getDataType().isNullType()) {
arguments.add(ArrayType.of(NullType.INSTANCE));
structFields.add(
new StructField("col" + (i + 1), NullType.INSTANCE, true, ""));
} else if (children.get(i).getDataType().isArrayType()) {
structFields.add(
new StructField("col" + (i + 1),
((ArrayType) (children.get(i)).getDataType()).getItemType(), true, ""));
arguments.add(children.get(i).getDataType());
} else {
SearchSignature.throwCanNotFoundFunctionException(this.getName(), getArguments());
}
}
return FunctionSignature.of(new StructType(structFields.build()), arguments);
}
@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitExplodeOuter(this, context);
}
@Override
public FunctionSignature searchSignature(List<FunctionSignature> signatures) {
return super.searchSignature(signatures);
}
}

View File

@ -0,0 +1,95 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.trees.expressions.functions.generator;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.types.NullType;
import org.apache.doris.nereids.types.StringType;
import org.apache.doris.nereids.types.StructType;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.List;
public class ExplodeOuterTest {
/////////////////////////////////////////
// GetSignatures
/////////////////////////////////////////
@Test
public void testGetSignatures() {
// build explode_outer(array<int>, array<str>) expression
Expression[] args = {SlotReference.of("int", ArrayType.of(IntegerType.INSTANCE)),
SlotReference.of("str", ArrayType.of(StringType.INSTANCE))};
ExplodeOuter explode = new ExplodeOuter(args);
// check signature
List<FunctionSignature> signatures = explode.getSignatures();
Assertions.assertEquals(1, signatures.size());
FunctionSignature signature = signatures.get(0);
Assertions.assertEquals(2, signature.argumentsTypes.size());
Assertions.assertTrue(signature.argumentsTypes.get(0).isArrayType());
Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(0)).getItemType().isIntegerType());
Assertions.assertTrue(signature.argumentsTypes.get(1).isArrayType());
Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(1)).getItemType().isStringType());
Assertions.assertTrue(signature.returnType.isStructType());
StructType returnType = (StructType) signature.returnType;
Assertions.assertEquals(2, returnType.getFields().size());
Assertions.assertEquals(IntegerType.INSTANCE, returnType.getFields().get(0).getDataType());
Assertions.assertEquals(StringType.INSTANCE, returnType.getFields().get(1).getDataType());
}
@Test
public void testGetSignaturesWithNull() {
// build explode(null, array<int>) expression
Expression[] args = { SlotReference.of("null", NullType.INSTANCE), SlotReference.of("int", ArrayType.of(IntegerType.INSTANCE))};
ExplodeOuter explode = new ExplodeOuter(args);
// check signature
List<FunctionSignature> signatures = explode.getSignatures();
Assertions.assertEquals(1, signatures.size());
FunctionSignature signature = signatures.get(0);
Assertions.assertEquals(2, signature.argumentsTypes.size());
Assertions.assertTrue(signature.argumentsTypes.get(0).isArrayType());
Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(0)).getItemType().isNullType());
Assertions.assertTrue(signature.argumentsTypes.get(1).isArrayType());
Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(1)).getItemType().isIntegerType());
Assertions.assertTrue(signature.returnType.isStructType());
StructType returnType = (StructType) signature.returnType;
Assertions.assertEquals(2, returnType.getFields().size());
Assertions.assertEquals(NullType.INSTANCE, returnType.getFields().get(0).getDataType());
Assertions.assertEquals(IntegerType.INSTANCE, returnType.getFields().get(1).getDataType());
}
@Test
public void testGetSignaturesWithInvalidArgument() {
// build explode_outer(int)
Expression[] args = { SlotReference.of("int", IntegerType.INSTANCE) };
ExplodeOuter explode = new ExplodeOuter(args);
Assertions.assertThrows(AnalysisException.class, explode::getSignatures);
}
}

View File

@ -0,0 +1,95 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.doris.nereids.trees.expressions.functions.generator;
import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.types.ArrayType;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.types.NullType;
import org.apache.doris.nereids.types.StringType;
import org.apache.doris.nereids.types.StructType;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import java.util.List;
public class ExplodeTest {
/////////////////////////////////////////
// GetSignatures
/////////////////////////////////////////
@Test
public void testGetSignatures() {
// build explode(array<int>, array<str>) expression
Expression[] args = {SlotReference.of("int", ArrayType.of(IntegerType.INSTANCE)),
SlotReference.of("str", ArrayType.of(StringType.INSTANCE))};
Explode explode = new Explode(args);
// check signature
List<FunctionSignature> signatures = explode.getSignatures();
Assertions.assertEquals(1, signatures.size());
FunctionSignature signature = signatures.get(0);
Assertions.assertEquals(2, signature.argumentsTypes.size());
Assertions.assertTrue(signature.argumentsTypes.get(0).isArrayType());
Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(0)).getItemType().isIntegerType());
Assertions.assertTrue(signature.argumentsTypes.get(1).isArrayType());
Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(1)).getItemType().isStringType());
Assertions.assertTrue(signature.returnType.isStructType());
StructType returnType = (StructType) signature.returnType;
Assertions.assertEquals(2, returnType.getFields().size());
Assertions.assertEquals(IntegerType.INSTANCE, returnType.getFields().get(0).getDataType());
Assertions.assertEquals(StringType.INSTANCE, returnType.getFields().get(1).getDataType());
}
@Test
public void testGetSignaturesWithNull() {
// build explode(null, array<int>) expression
Expression[] args = { SlotReference.of("null", NullType.INSTANCE), SlotReference.of("int", ArrayType.of(IntegerType.INSTANCE))};
Explode explode = new Explode(args);
// check signature
List<FunctionSignature> signatures = explode.getSignatures();
Assertions.assertEquals(1, signatures.size());
FunctionSignature signature = signatures.get(0);
Assertions.assertEquals(2, signature.argumentsTypes.size());
Assertions.assertTrue(signature.argumentsTypes.get(0).isArrayType());
Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(0)).getItemType().isNullType());
Assertions.assertTrue(signature.argumentsTypes.get(1).isArrayType());
Assertions.assertTrue(((ArrayType) signature.argumentsTypes.get(1)).getItemType().isIntegerType());
Assertions.assertTrue(signature.returnType.isStructType());
StructType returnType = (StructType) signature.returnType;
Assertions.assertEquals(2, returnType.getFields().size());
Assertions.assertEquals(NullType.INSTANCE, returnType.getFields().get(0).getDataType());
Assertions.assertEquals(IntegerType.INSTANCE, returnType.getFields().get(1).getDataType());
}
@Test
public void testGetSignaturesWithInvalidArgument() {
// build explode(int)
Expression[] args = { SlotReference.of("int", IntegerType.INSTANCE) };
Explode explode = new Explode(args);
Assertions.assertThrows(AnalysisException.class, explode::getSignatures);
}
}

View File

@ -464,3 +464,214 @@
9 {"s4d0hU-uWynWWe-eIf-ym5C":"9", "ANvepQ-oO7fMUr-qdl-XbUo":"4", "G16nau-7qY3fb1-UYL-oiNd":"5", "bVd4er-efUCDow-nzo-87fg":"2", "7I6zLI-hQTTlLe-sBr-oCTh":"7"}
9 {"s4d0hU-uWynWWe-eIf-ym5C":"9", "ANvepQ-oO7fMUr-qdl-XbUo":"4", "G16nau-7qY3fb1-UYL-oiNd":"5", "bVd4er-efUCDow-nzo-87fg":"2", "7I6zLI-hQTTlLe-sBr-oCTh":"7"}
-- !test6 --
1 2
1 3
2 2
2 \N
-- !test7 --
1 4
1 5
1 6
2 14
2 15
3 114
3 115
3 116
-- !test8 --
1 4 2
1 5 3
1 6 \N
2 14 2
2 15 \N
3 114 \N
3 115 \N
3 116 \N
-- !test9 --
1 2 4
1 3 5
1 \N 6
2 2 14
2 \N 15
3 \N 114
3 \N 115
3 \N 116
-- !test10 --
1 2 4 4
1 3 5 5
1 \N 6 6
2 2 14 14
2 \N 15 15
3 \N 114 114
3 \N 115 115
3 \N 116 116
-- !test11 --
1 4 2 4 2
1 4 2 5 3
1 4 2 6 \N
1 5 3 4 2
1 5 3 5 3
1 5 3 6 \N
1 6 \N 4 2
1 6 \N 5 3
1 6 \N 6 \N
2 14 2 14 2
2 14 2 15 \N
2 15 \N 14 2
2 15 \N 15 \N
3 114 \N 114 \N
3 114 \N 115 \N
3 114 \N 116 \N
3 115 \N 114 \N
3 115 \N 115 \N
3 115 \N 116 \N
3 116 \N 114 \N
3 116 \N 115 \N
3 116 \N 116 \N
-- !test12 --
1 2
1 3
2 2
2 \N
3 \N
-- !test13 --
1 4
1 5
1 6
2 14
2 15
3 114
3 115
3 116
-- !test14 --
1 4 2
1 5 3
1 6 \N
2 14 2
2 15 \N
3 114 \N
3 115 \N
3 116 \N
-- !test15 --
1 2 4
1 3 5
1 \N 6
2 2 14
2 \N 15
3 \N 114
3 \N 115
3 \N 116
-- !test16 --
1 2 4 4
1 3 5 5
1 \N 6 6
2 2 14 14
2 \N 15 15
3 \N 114 114
3 \N 115 115
3 \N 116 116
-- !test17 --
1 4 2 4 2
1 4 2 5 3
1 4 2 6 \N
1 5 3 4 2
1 5 3 5 3
1 5 3 6 \N
1 6 \N 4 2
1 6 \N 5 3
1 6 \N 6 \N
2 14 2 14 2
2 14 2 15 \N
2 15 \N 14 2
2 15 \N 15 \N
3 114 \N 114 \N
3 114 \N 115 \N
3 114 \N 116 \N
3 115 \N 114 \N
3 115 \N 115 \N
3 115 \N 116 \N
3 116 \N 114 \N
3 116 \N 115 \N
3 116 \N 116 \N
-- !test18 --
1 2
1 3
2 2
2 \N
-- !test19 --
1 4
1 5
1 6
2 14
2 15
3 114
3 115
3 116
-- !test20 --
1 4 2
1 5 3
1 6 \N
2 14 2
2 15 \N
3 114 \N
3 115 \N
3 116 \N
-- !test21 --
1 2 4
1 3 5
1 \N 6
2 2 14
2 \N 15
3 \N 114
3 \N 115
3 \N 116
-- !test22 --
1 2 4 4
1 3 5 5
1 \N 6 6
2 2 14 14
2 \N 15 15
3 \N 114 114
3 \N 115 115
3 \N 116 116
-- !test23 --
1 4 2 4 2
1 4 2 5 3
1 4 2 6 \N
1 5 3 4 2
1 5 3 5 3
1 5 3 6 \N
1 6 \N 4 2
1 6 \N 5 3
1 6 \N 6 \N
2 14 2 14 2
2 14 2 15 \N
2 15 \N 14 2
2 15 \N 15 \N
3 114 \N 114 \N
3 114 \N 115 \N
3 114 \N 116 \N
3 115 \N 114 \N
3 115 \N 115 \N
3 115 \N 116 \N
3 116 \N 114 \N
3 116 \N 115 \N
3 116 \N 116 \N

View File

@ -137,4 +137,48 @@ suite("explode") {
for (int r = 0; r < res_origin_size_aa.size(); ++ r) {
assertEquals(res_origin_size_aa[r][0], res_explode_aa[r][0])
}
sql "DROP TABLE IF EXISTS array_test;"
sql """
CREATE TABLE `array_test` (
`id` INT NULL,
`array_int` ARRAY<INT> NOT NULL,
`array_string` ARRAY<String> NULL,
`v_int` VARIANT NULL,
`v_string` VARIANT NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"min_load_replica_num" = "-1",
"is_being_synced" = "false",
"storage_medium" = "hdd",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false",
"group_commit_interval_ms" = "10000",
"group_commit_data_bytes" = "134217728"
);
"""
sql """insert into array_test values( 1, [4,5,6], ["2","3"], '{"a": [4,5,6]}', '{"a": ["2","3"]}'),( 2, [14,15], ["2",null], '{"a": [14,15]}', '{"a": ["2",null]}'),( 3, [114,115,116], null, '{"a": [114,115,116]}','{"a": null}');"""
qt_test6 "select id,e1 from array_test as a lateral view explode(a.array_string) tmp1 as e1;"
qt_test7 "select id,e1 from array_test as a lateral view explode(a.array_int) tmp1 as e1;"
qt_test8 "select id,e1,e2 from array_test as a lateral view explode(a.array_int,a.array_string) tmp1 as e1,e2;"
qt_test9 "select id,e1,e2 from array_test as a lateral view explode(a.array_string,a.array_int) tmp1 as e1,e2;"
qt_test10 "select id,e1,e2,e3 from array_test as a lateral view explode(a.array_string,a.array_int,a.array_int) tmp1 as e1,e2,e3;"
qt_test11 "select id,e1,e2,e11,e12 from array_test as a lateral view explode(a.array_int,a.array_string) tmp1 as e1,e2 lateral view explode(a.array_int,a.array_string) tmp2 as e11,e12;"
qt_test12 "select id,e1 from array_test as a lateral view explode_outer(a.array_string) tmp1 as e1;"
qt_test13 "select id,e1 from array_test as a lateral view explode_outer(a.array_int) tmp1 as e1;"
qt_test14 "select id,e1,e2 from array_test as a lateral view explode_outer(a.array_int,a.array_string) tmp1 as e1,e2;"
qt_test15 "select id,e1,e2 from array_test as a lateral view explode_outer(a.array_string,a.array_int) tmp1 as e1,e2;"
qt_test16 "select id,e1,e2,e3 from array_test as a lateral view explode_outer(a.array_string,a.array_int,a.array_int) tmp1 as e1,e2,e3;"
qt_test17 "select id,e1,e2,e11,e12 from array_test as a lateral view explode_outer(a.array_int,a.array_string) tmp1 as e1,e2 lateral view explode_outer(a.array_int,a.array_string) tmp2 as e11,e12;"
}

View File

@ -39,7 +39,7 @@ suite("explode_array_decimal") {
sql """insert into ods_device_data_1d_inc values(1, "[0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,0.8,1,9,5,5,5,5,5,5,5,5,5,0.8,0.8,0.8,0.8,0.8]")"""
sql "SET enable_nereids_planner=false;"
sql "SET enable_nereids_planner=true;"
qt_sql_old_planner """
SELECT * from