From 0fbdf8e3e16a11f48c016a98ae0e2ce69ecae9d4 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Thu, 12 Jan 2023 15:08:21 +0800 Subject: [PATCH] [Refactor](table function) Decouple vectorized table functions from non-vectorized ones (#15772) --- be/src/common/daemon.cpp | 2 - be/src/exprs/CMakeLists.txt | 5 - .../table_function/dummy_table_functions.cpp | 66 ------ .../table_function/dummy_table_functions.h | 52 ----- .../exprs/table_function/explode_bitmap.cpp | 116 ----------- be/src/exprs/table_function/explode_bitmap.h | 50 ----- .../table_function/explode_json_array.cpp | 189 ------------------ .../exprs/table_function/explode_json_array.h | 135 ------------- be/src/exprs/table_function/explode_split.cpp | 93 --------- be/src/exprs/table_function/explode_split.h | 49 ----- .../table_function/table_function_factory.cpp | 108 ---------- be/src/vec/CMakeLists.txt | 1 + be/src/vec/exec/vtable_function_node.cpp | 8 +- be/src/vec/exec/vtable_function_node.h | 7 +- .../exprs/table_function/table_function.h | 30 +-- .../table_function/table_function_factory.cpp | 82 ++++++++ .../table_function/table_function_factory.h | 16 +- be/src/vec/exprs/table_function/vexplode.h | 2 +- .../exprs/table_function/vexplode_bitmap.cpp | 54 ++++- .../exprs/table_function/vexplode_bitmap.h | 18 +- .../table_function/vexplode_json_array.cpp | 108 +++++++++- .../table_function/vexplode_json_array.h | 91 ++++++++- .../exprs/table_function/vexplode_numbers.h | 2 +- .../exprs/table_function/vexplode_split.cpp | 8 + .../vec/exprs/table_function/vexplode_split.h | 21 +- .../vec/functions/simple_function_factory.h | 2 +- be/test/vec/function/function_test_util.h | 2 +- 27 files changed, 382 insertions(+), 935 deletions(-) delete mode 100644 be/src/exprs/table_function/dummy_table_functions.cpp delete mode 100644 be/src/exprs/table_function/dummy_table_functions.h delete mode 100644 be/src/exprs/table_function/explode_bitmap.cpp delete mode 100644 be/src/exprs/table_function/explode_bitmap.h delete mode 100644 be/src/exprs/table_function/explode_json_array.cpp delete mode 100644 be/src/exprs/table_function/explode_json_array.h delete mode 100644 be/src/exprs/table_function/explode_split.cpp delete mode 100644 be/src/exprs/table_function/explode_split.h delete mode 100644 be/src/exprs/table_function/table_function_factory.cpp rename be/src/{ => vec}/exprs/table_function/table_function.h (75%) create mode 100644 be/src/vec/exprs/table_function/table_function_factory.cpp rename be/src/{ => vec}/exprs/table_function/table_function_factory.h (74%) diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 16dd61d59c..296df8afbd 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -43,7 +43,6 @@ #include "exprs/operators.h" #include "exprs/quantile_function.h" #include "exprs/string_functions.h" -#include "exprs/table_function/dummy_table_functions.h" #include "exprs/time_operators.h" #include "exprs/timestamp_functions.h" #include "exprs/topn_function.h" @@ -414,7 +413,6 @@ void Daemon::init(int argc, char** argv, const std::vector& paths) { QuantileStateFunctions::init(); HashFunctions::init(); TopNFunctions::init(); - DummyTableFunctions::init(); MatchPredicate::init(); LOG(INFO) << CpuInfo::debug_string(); diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt index 8e8839b342..98104ccf35 100644 --- a/be/src/exprs/CMakeLists.txt +++ b/be/src/exprs/CMakeLists.txt @@ -74,9 +74,4 @@ add_library(Exprs quantile_function.cpp grouping_sets_functions.cpp topn_function.cpp - table_function/explode_split.cpp - table_function/explode_bitmap.cpp - table_function/explode_json_array.cpp - table_function/table_function_factory.cpp - table_function/dummy_table_functions.cpp ) diff --git a/be/src/exprs/table_function/dummy_table_functions.cpp b/be/src/exprs/table_function/dummy_table_functions.cpp deleted file mode 100644 index a1d9b5a4fd..0000000000 --- a/be/src/exprs/table_function/dummy_table_functions.cpp +++ /dev/null @@ -1,66 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/table_function/dummy_table_functions.h" - -#include "runtime/collection_value.h" - -namespace doris { - -void DummyTableFunctions::init() {} - -StringVal DummyTableFunctions::explode_split(FunctionContext* context, const StringVal& str, - const StringVal& sep) { - return StringVal(); -} - -BigIntVal DummyTableFunctions::explode_bitmap(doris_udf::FunctionContext* context, - const doris_udf::StringVal& bitmap) { - return BigIntVal(); -} - -BigIntVal DummyTableFunctions::explode_json_array_int(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str) { - return BigIntVal(); -} - -DoubleVal DummyTableFunctions::explode_json_array_double(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str) { - return DoubleVal(); -} - -StringVal DummyTableFunctions::explode_json_array_string(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str) { - return StringVal(); -} - -IntVal DummyTableFunctions::explode_numbers(doris_udf::FunctionContext* context, - const doris_udf::IntVal& str) { - return IntVal(); -} - -AnyVal DummyTableFunctions::explode(doris_udf::FunctionContext* context, - const doris_udf::CollectionVal& value) { - return AnyVal(); -} - -AnyVal DummyTableFunctions::explode_outer(doris_udf::FunctionContext* context, - const doris_udf::CollectionVal& value) { - return AnyVal(); -} - -} // namespace doris diff --git a/be/src/exprs/table_function/dummy_table_functions.h b/be/src/exprs/table_function/dummy_table_functions.h deleted file mode 100644 index 59730cce91..0000000000 --- a/be/src/exprs/table_function/dummy_table_functions.h +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "udf/udf.h" - -namespace doris { - -// Currently Doris does not support array types, so the definition of table function -// is still using the definition of the scalar function. -// The definition here is just to facilitate the query planning stage and the query execution preparation stage -// to make smooth use of the existing function framework -// But the execution logic of the table function is not here. So the function names here are prefixed with "dummy". -// TODO: refactor here after we support real array type. -class DummyTableFunctions { -public: - static void init(); - - static doris_udf::StringVal explode_split(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str, - const doris_udf::StringVal& sep); - static doris_udf::BigIntVal explode_bitmap(doris_udf::FunctionContext* context, - const doris_udf::StringVal& bitmap); - static doris_udf::BigIntVal explode_json_array_int(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::DoubleVal explode_json_array_double(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::StringVal explode_json_array_string(doris_udf::FunctionContext* context, - const doris_udf::StringVal& str); - static doris_udf::IntVal explode_numbers(doris_udf::FunctionContext* context, - const doris_udf::IntVal& value); - static doris_udf::AnyVal explode(doris_udf::FunctionContext* context, - const doris_udf::CollectionVal& value); - static doris_udf::AnyVal explode_outer(doris_udf::FunctionContext* context, - const doris_udf::CollectionVal& value); -}; -} // namespace doris diff --git a/be/src/exprs/table_function/explode_bitmap.cpp b/be/src/exprs/table_function/explode_bitmap.cpp deleted file mode 100644 index 26d01891ba..0000000000 --- a/be/src/exprs/table_function/explode_bitmap.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/table_function/explode_bitmap.h" - -#include "exprs/expr_context.h" - -namespace doris { - -ExplodeBitmapTableFunction::ExplodeBitmapTableFunction() { - _fn_name = "explode_bitmap"; -} - -ExplodeBitmapTableFunction::~ExplodeBitmapTableFunction() { - if (_cur_iter != nullptr) { - delete _cur_iter; - _cur_iter = nullptr; - } - if (_cur_bitmap_owned && _cur_bitmap != nullptr) { - delete _cur_bitmap; - _cur_bitmap = nullptr; - } -} - -Status ExplodeBitmapTableFunction::process(TupleRow* tuple_row) { - CHECK(1 == _expr_context->root()->get_num_children()) - << _expr_context->root()->get_num_children(); - _eos = false; - _is_current_empty = false; - _cur_size = 0; - _cur_offset = 0; - - StringVal bitmap_str = - _expr_context->root()->get_child(0)->get_string_val(_expr_context, tuple_row); - if (bitmap_str.is_null) { - _is_current_empty = true; - } else { - if (bitmap_str.len == 0) { - _cur_bitmap = reinterpret_cast(bitmap_str.ptr); - _cur_bitmap_owned = false; - } else { - _cur_bitmap = new BitmapValue((char*)bitmap_str.ptr); - _cur_bitmap_owned = true; - } - _cur_size = _cur_bitmap->cardinality(); - if (_cur_size == 0) { - _is_current_empty = true; - } else { - _reset_iterator(); - } - } - - return Status::OK(); -} - -void ExplodeBitmapTableFunction::_reset_iterator() { - DCHECK(_cur_bitmap->cardinality() > 0) << _cur_bitmap->cardinality(); - if (_cur_iter != nullptr) { - delete _cur_iter; - _cur_iter = nullptr; - } - _cur_iter = new BitmapValueIterator(*_cur_bitmap); - _cur_value = **_cur_iter; - _cur_offset = 0; -} - -Status ExplodeBitmapTableFunction::reset() { - _eos = false; - if (!_is_current_empty) { - _reset_iterator(); - } - return Status::OK(); -} - -Status ExplodeBitmapTableFunction::get_value(void** output) { - if (_is_current_empty) { - *output = nullptr; - } else { - *output = &_cur_value; - } - return Status::OK(); -} - -Status ExplodeBitmapTableFunction::forward(bool* eos) { - if (_is_current_empty) { - *eos = true; - _eos = true; - } else { - ++(*_cur_iter); - ++_cur_offset; - if (_cur_offset == _cur_size) { - *eos = true; - _eos = true; - } else { - _cur_value = **_cur_iter; - *eos = false; - } - } - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exprs/table_function/explode_bitmap.h b/be/src/exprs/table_function/explode_bitmap.h deleted file mode 100644 index c75559d023..0000000000 --- a/be/src/exprs/table_function/explode_bitmap.h +++ /dev/null @@ -1,50 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "exprs/table_function/table_function.h" -#include "util/bitmap_value.h" - -namespace doris { - -class ExplodeBitmapTableFunction : public TableFunction { -public: - ExplodeBitmapTableFunction(); - virtual ~ExplodeBitmapTableFunction(); - - virtual Status process(TupleRow* tuple_row) override; - virtual Status reset() override; - virtual Status get_value(void** output) override; - - virtual Status forward(bool* eos) override; - -protected: - void _reset_iterator(); - - // Read from tuple row. - // if _cur_bitmap_owned is true, need to delete it when deconstruction - const BitmapValue* _cur_bitmap = nullptr; - bool _cur_bitmap_owned = false; - // iterator of _cur_bitmap - BitmapValueIterator* _cur_iter = nullptr; - // current value read from bitmap, it will be referenced by - // table function scan node. - uint64_t _cur_value = 0; -}; - -} // namespace doris diff --git a/be/src/exprs/table_function/explode_json_array.cpp b/be/src/exprs/table_function/explode_json_array.cpp deleted file mode 100644 index 373dcd947c..0000000000 --- a/be/src/exprs/table_function/explode_json_array.cpp +++ /dev/null @@ -1,189 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/table_function/explode_json_array.h" - -#include "exprs/expr_context.h" - -namespace doris { - -std::string ParsedData::true_value = "true"; -std::string ParsedData::false_value = "false"; - -int ParsedData::set_output(ExplodeJsonArrayType type, rapidjson::Document& document) { - int size = document.GetArray().Size(); - switch (type) { - case ExplodeJsonArrayType::INT: { - _data.resize(size); - _backup_int.resize(size); - int i = 0; - for (auto& v : document.GetArray()) { - if (v.IsInt64()) { - _backup_int[i] = v.GetInt64(); - _data[i] = &_backup_int[i]; - } else { - _data[i] = nullptr; - } - ++i; - } - break; - } - case ExplodeJsonArrayType::DOUBLE: { - _data.resize(size); - _backup_double.resize(size); - int i = 0; - for (auto& v : document.GetArray()) { - if (v.IsDouble()) { - _backup_double[i] = v.GetDouble(); - _data[i] = &_backup_double[i]; - } else { - _data[i] = nullptr; - } - ++i; - } - break; - } - case ExplodeJsonArrayType::STRING: { - _data_string.clear(); - _backup_string.clear(); - _string_nulls.clear(); - int32_t wbytes = 0; - for (auto& v : document.GetArray()) { - switch (v.GetType()) { - case rapidjson::Type::kStringType: - _backup_string.emplace_back(v.GetString(), v.GetStringLength()); - _string_nulls.push_back(false); - // do not set _data_string here. - // Because the address of the string stored in `_backup_string` may - // change each time `emplace_back()` is called. - break; - case rapidjson::Type::kNumberType: - if (v.IsUint()) { - wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%u", v.GetUint()); - } else if (v.IsInt()) { - wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", v.GetInt()); - } else if (v.IsUint64()) { - wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRIu64, v.GetUint64()); - } else if (v.IsInt64()) { - wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, v.GetInt64()); - } else { - wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%f", v.GetDouble()); - } - _backup_string.emplace_back(tmp_buf, wbytes); - _string_nulls.push_back(false); - // do not set _data_string here. - // Because the address of the string stored in `_backup_string` may - // change each time `emplace_back()` is called. - break; - case rapidjson::Type::kFalseType: - _data_string.emplace_back(true_value); - _string_nulls.push_back(false); - break; - case rapidjson::Type::kTrueType: - _data_string.emplace_back(false_value); - _string_nulls.push_back(false); - break; - case rapidjson::Type::kNullType: - _data_string.push_back({}); - _string_nulls.push_back(true); - break; - default: - _data_string.push_back({}); - _string_nulls.push_back(true); - break; - } - } - // Must set _data_string at the end, so that we can - // save the real addr of string in `_backup_string` to `_data_string`. - for (auto& str : _backup_string) { - _data_string.emplace_back(str); - } - break; - } - default: - CHECK(false) << type; - break; - } - return size; -} - -///////////////////////// -ExplodeJsonArrayTableFunction::ExplodeJsonArrayTableFunction(ExplodeJsonArrayType type) - : _type(type) { - switch (type) { - case ExplodeJsonArrayType::INT: - _fn_name = "explode_json_array_int"; - break; - case ExplodeJsonArrayType::DOUBLE: - _fn_name = "explode_json_array_double"; - break; - case ExplodeJsonArrayType::STRING: - _fn_name = "explode_json_array_string"; - break; - default: - _fn_name = "unknown"; - break; - } -} - -ExplodeJsonArrayTableFunction::~ExplodeJsonArrayTableFunction() {} - -Status ExplodeJsonArrayTableFunction::process(TupleRow* tuple_row) { - CHECK(1 == _expr_context->root()->get_num_children()) - << _expr_context->root()->get_num_children(); - _is_current_empty = false; - _eos = false; - - StringVal text = _expr_context->root()->get_child(0)->get_string_val(_expr_context, tuple_row); - if (text.is_null || text.len == 0) { - _is_current_empty = true; - } else { - rapidjson::Document document; - document.Parse((char*)text.ptr, text.len); - if (UNLIKELY(document.HasParseError()) || !document.IsArray() || - document.GetArray().Size() == 0) { - _is_current_empty = true; - } else { - _cur_size = _parsed_data.set_output(_type, document); - _cur_offset = 0; - } - } - return Status::OK(); -} - -void ExplodeJsonArrayTableFunction::_set_null_output() { - _parsed_data.set_null_output(_type); - _cur_size = 1; - _cur_offset = 0; - _eos = false; -} - -Status ExplodeJsonArrayTableFunction::reset() { - _eos = false; - _cur_offset = 0; - return Status::OK(); -} - -Status ExplodeJsonArrayTableFunction::get_value(void** output) { - if (_is_current_empty) { - *output = nullptr; - } else { - _parsed_data.get_value(_type, _cur_offset, output); - } - return Status::OK(); -} -} // namespace doris diff --git a/be/src/exprs/table_function/explode_json_array.h b/be/src/exprs/table_function/explode_json_array.h deleted file mode 100644 index b246c34f71..0000000000 --- a/be/src/exprs/table_function/explode_json_array.h +++ /dev/null @@ -1,135 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "exprs/table_function/table_function.h" -#include "runtime/string_value.h" - -namespace doris { - -enum ExplodeJsonArrayType { INT = 0, DOUBLE, STRING }; - -struct ParsedData { - static std::string true_value; - static std::string false_value; - - // The number parsed from json array - // the `_backup` saved the real number entity. - std::vector _data; - std::vector _data_string; - std::vector _backup_int; - std::vector _backup_double; - std::vector _backup_string; - std::vector _string_nulls; - char tmp_buf[128] = {0}; - - void reset(ExplodeJsonArrayType type) { - switch (type) { - case ExplodeJsonArrayType::INT: - _data.clear(); - _backup_int.clear(); - break; - case ExplodeJsonArrayType::DOUBLE: - _data.clear(); - _backup_double.clear(); - break; - case ExplodeJsonArrayType::STRING: - _data_string.clear(); - _backup_string.clear(); - _string_nulls.clear(); - break; - default: - CHECK(false) << type; - break; - } - } - - void set_null_output(ExplodeJsonArrayType type) { - switch (type) { - case ExplodeJsonArrayType::INT: - case ExplodeJsonArrayType::DOUBLE: - _data.resize(1); - _data[0] = nullptr; - break; - case ExplodeJsonArrayType::STRING: - _string_nulls.resize(1); - _string_nulls[0] = true; - break; - default: - CHECK(false) << type; - break; - } - } - - void get_value(ExplodeJsonArrayType type, int64_t offset, void** output, bool real = false) { - switch (type) { - case ExplodeJsonArrayType::INT: - case ExplodeJsonArrayType::DOUBLE: - *output = _data[offset]; - break; - case ExplodeJsonArrayType::STRING: - *output = _string_nulls[offset] ? nullptr - : real ? reinterpret_cast(_backup_string[offset].data()) - : &_data_string[offset]; - break; - default: - CHECK(false) << type; - } - } - - void get_value_length(ExplodeJsonArrayType type, int64_t offset, int64_t* length) { - switch (type) { - case ExplodeJsonArrayType::INT: - case ExplodeJsonArrayType::DOUBLE: - break; - case ExplodeJsonArrayType::STRING: - *length = _string_nulls[offset] ? -1 : _backup_string[offset].size(); - break; - default: - CHECK(false) << type; - } - } - - int set_output(ExplodeJsonArrayType type, rapidjson::Document& document); -}; - -// Input: json array: [1,2,3,4,5]; -// Output: rows: 1,2,3,4,5 -// If json array contains non-numeric type, or is not a json array, will return null -class ExplodeJsonArrayTableFunction : public TableFunction { -public: - ExplodeJsonArrayTableFunction(ExplodeJsonArrayType type); - virtual ~ExplodeJsonArrayTableFunction(); - - virtual Status process(TupleRow* tuple_row) override; - virtual Status reset() override; - virtual Status get_value(void** output) override; - -private: - void _set_null_output(); - -protected: - ParsedData _parsed_data; - - ExplodeJsonArrayType _type; -}; - -} // namespace doris diff --git a/be/src/exprs/table_function/explode_split.cpp b/be/src/exprs/table_function/explode_split.cpp deleted file mode 100644 index 0c704a6654..0000000000 --- a/be/src/exprs/table_function/explode_split.cpp +++ /dev/null @@ -1,93 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/table_function/explode_split.h" - -#include "exprs/expr_context.h" -#include "exprs/scalar_fn_call.h" -#include "gutil/strings/split.h" - -namespace doris { - -ExplodeSplitTableFunction::ExplodeSplitTableFunction() { - _fn_name = "explode_split"; -} - -ExplodeSplitTableFunction::~ExplodeSplitTableFunction() {} - -Status ExplodeSplitTableFunction::open() { - ScalarFnCall* fn_call = reinterpret_cast(_expr_context->root()); - FunctionContext* fn_ctx = _expr_context->fn_context(fn_call->get_fn_context_index()); - CHECK(2 == fn_ctx->get_num_constant_args()) << fn_ctx->get_num_constant_args(); - // check if the delimiter argument(the 2nd arg) is constant. - // if yes, cache it - if (fn_ctx->is_arg_constant(1)) { - _is_delimiter_constant = true; - StringVal* delimiter = reinterpret_cast(fn_ctx->get_constant_arg(1)); - _const_delimter = StringPiece((char*)delimiter->ptr, delimiter->len); - } - return Status::OK(); -} - -Status ExplodeSplitTableFunction::process(TupleRow* tuple_row) { - CHECK(2 == _expr_context->root()->get_num_children()) - << _expr_context->root()->get_num_children(); - _is_current_empty = false; - _eos = false; - - _data.clear(); - StringVal text = _expr_context->root()->get_child(0)->get_string_val(_expr_context, tuple_row); - if (text.is_null) { - _is_current_empty = true; - _cur_size = 0; - _cur_offset = 0; - } else { - if (_is_delimiter_constant) { - _backup = strings::Split(StringPiece((char*)text.ptr, text.len), _const_delimter); - } else { - StringVal delimiter = - _expr_context->root()->get_child(1)->get_string_val(_expr_context, tuple_row); - _backup = strings::Split(StringPiece((char*)text.ptr, text.len), - StringPiece((char*)delimiter.ptr, delimiter.len)); - } - for (const std::string& str : _backup) { - _data.emplace_back(str); - } - _cur_size = _backup.size(); - _cur_offset = 0; - _is_current_empty = (_cur_size == 0); - } - return Status::OK(); -} - -Status ExplodeSplitTableFunction::reset() { - _eos = false; - if (!_is_current_empty) { - _cur_offset = 0; - } - return Status::OK(); -} - -Status ExplodeSplitTableFunction::get_value(void** output) { - if (_is_current_empty) { - *output = nullptr; - } else { - *output = &_data[_cur_offset]; - } - return Status::OK(); -} -} // namespace doris diff --git a/be/src/exprs/table_function/explode_split.h b/be/src/exprs/table_function/explode_split.h deleted file mode 100644 index 0fea203b0b..0000000000 --- a/be/src/exprs/table_function/explode_split.h +++ /dev/null @@ -1,49 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "exprs/table_function/table_function.h" -#include "gutil/strings/stringpiece.h" -#include "runtime/string_value.h" - -namespace doris { - -class ExplodeSplitTableFunction : public TableFunction { -public: - ExplodeSplitTableFunction(); - virtual ~ExplodeSplitTableFunction(); - - virtual Status open() override; - virtual Status process(TupleRow* tuple_row) override; - virtual Status reset() override; - virtual Status get_value(void** output) override; - -protected: - // The string value splitted from source, and will be referenced by - // table function scan node. - // the `_backup` saved the real string entity. - std::vector _data; - std::vector _backup; - - // indicate whether the delimiter is constant. - // if true, the constant delimiter will be saved in `_const_delimter` - bool _is_delimiter_constant = false; - StringPiece _const_delimter; -}; - -} // namespace doris diff --git a/be/src/exprs/table_function/table_function_factory.cpp b/be/src/exprs/table_function/table_function_factory.cpp deleted file mode 100644 index 7b14e011dd..0000000000 --- a/be/src/exprs/table_function/table_function_factory.cpp +++ /dev/null @@ -1,108 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/table_function/table_function_factory.h" - -namespace doris { - -template -struct TableFunctionCreator { - TableFunction* operator()() { return new TableFunctionType(); } -}; - -template <> -struct TableFunctionCreator { - ExplodeJsonArrayType type; - TableFunction* operator()() const { return new ExplodeJsonArrayTableFunction(type); } -}; - -template <> -struct TableFunctionCreator { - ExplodeJsonArrayType type; - TableFunction* operator()() const { - return new vectorized::VExplodeJsonArrayTableFunction(type); - } -}; - -inline auto ExplodeJsonArrayIntCreator = - TableFunctionCreator {ExplodeJsonArrayType::INT}; -inline auto ExplodeJsonArrayDoubleCreator = - TableFunctionCreator {ExplodeJsonArrayType::DOUBLE}; -inline auto ExplodeJsonArrayStringCreator = - TableFunctionCreator {ExplodeJsonArrayType::STRING}; - -inline auto VExplodeJsonArrayIntCreator = - TableFunctionCreator { - ExplodeJsonArrayType::INT}; -inline auto VExplodeJsonArrayDoubleCreator = - TableFunctionCreator { - ExplodeJsonArrayType::DOUBLE}; -inline auto VExplodeJsonArrayStringCreator = - TableFunctionCreator { - ExplodeJsonArrayType::STRING}; - -// {fn_name, is_vectorized} -> table_function_creator -const std::unordered_map, std::function> - TableFunctionFactory::_function_map { - {{"explode_split", false}, TableFunctionCreator {}}, - {{"explode_bitmap", false}, TableFunctionCreator {}}, - {{"explode_json_array_int", false}, ExplodeJsonArrayIntCreator}, - {{"explode_json_array_double", false}, ExplodeJsonArrayDoubleCreator}, - {{"explode_json_array_string", false}, ExplodeJsonArrayStringCreator}, - {{"explode_split", true}, - TableFunctionCreator()}, - {{"explode_numbers", true}, - TableFunctionCreator()}, - {{"explode_json_array_int", true}, VExplodeJsonArrayIntCreator}, - {{"explode_json_array_double", true}, VExplodeJsonArrayDoubleCreator}, - {{"explode_json_array_string", true}, VExplodeJsonArrayStringCreator}, - {{"explode_bitmap", true}, - TableFunctionCreator()}, - {{"explode", true}, TableFunctionCreator {}}}; - -Status TableFunctionFactory::get_fn(const std::string& fn_name_raw, bool is_vectorized, - ObjectPool* pool, TableFunction** fn) { - auto match_suffix = [](const std::string& name, const std::string& suffix) -> bool { - if (name.length() < suffix.length()) { - return false; - } - return name.substr(name.length() - suffix.length()) == suffix; - }; - - auto remove_suffix = [](const std::string& name, const std::string& suffix) -> std::string { - return name.substr(0, name.length() - suffix.length()); - }; - - bool is_outer = match_suffix(fn_name_raw, COMBINATOR_SUFFIX_OUTER); - std::string fn_name_real = - is_outer ? remove_suffix(fn_name_raw, COMBINATOR_SUFFIX_OUTER) : fn_name_raw; - - auto fn_iterator = _function_map.find({fn_name_real, is_vectorized}); - if (fn_iterator != _function_map.end()) { - *fn = pool->add(fn_iterator->second()); - if (is_outer) { - (*fn)->set_outer(); - } - - return Status::OK(); - } - - return Status::NotSupported("{}table function {} not support", - std::string(is_vectorized ? "vectorized " : ""), fn_name_raw); -} - -} // namespace doris diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 3ffe07c27a..794562d194 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -143,6 +143,7 @@ set(VEC_FILES exprs/vcast_expr.cpp exprs/vcase_expr.cpp exprs/vinfo_func.cpp + exprs/table_function/table_function_factory.cpp exprs/table_function/vexplode.cpp exprs/table_function/vexplode_split.cpp exprs/table_function/vexplode_numbers.cpp diff --git a/be/src/vec/exec/vtable_function_node.cpp b/be/src/vec/exec/vtable_function_node.cpp index 8813bce081..c3e9f2a829 100644 --- a/be/src/vec/exec/vtable_function_node.cpp +++ b/be/src/vec/exec/vtable_function_node.cpp @@ -17,13 +17,13 @@ #include "vec/exec/vtable_function_node.h" -#include "exprs/table_function/table_function.h" -#include "exprs/table_function/table_function_factory.h" +#include "vec/exprs/table_function/table_function.h" +#include "vec/exprs/table_function/table_function_factory.h" #include "vec/exprs/vexpr.h" namespace doris::vectorized { -VTableFunctionNode::VTableFunctionNode(ObjectPool* pool, const TPlanNode& tnode, +VTableFunctionNode::VTableFunctionNode(doris::ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ExecNode(pool, tnode, descs) {} @@ -38,7 +38,7 @@ Status VTableFunctionNode::init(const TPlanNode& tnode, RuntimeState* state) { VExpr* root = ctx->root(); const std::string& tf_name = root->fn().name.function_name; TableFunction* fn = nullptr; - RETURN_IF_ERROR(TableFunctionFactory::get_fn(tf_name, true, _pool, &fn)); + RETURN_IF_ERROR(TableFunctionFactory::get_fn(tf_name, _pool, &fn)); fn->set_vexpr_context(ctx); _fns.push_back(fn); } diff --git a/be/src/vec/exec/vtable_function_node.h b/be/src/vec/exec/vtable_function_node.h index e28a700f46..7f6ff1be4b 100644 --- a/be/src/vec/exec/vtable_function_node.h +++ b/be/src/vec/exec/vtable_function_node.h @@ -18,15 +18,14 @@ #pragma once #include "exec/exec_node.h" -#include "exprs/expr.h" -#include "exprs/table_function/table_function.h" +#include "vec/exprs/table_function/table_function.h" #include "vec/exprs/vexpr.h" namespace doris::vectorized { -class VTableFunctionNode : public ExecNode { +class VTableFunctionNode final : public ExecNode { public: - VTableFunctionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); + VTableFunctionNode(doris::ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs); ~VTableFunctionNode() override = default; Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override; diff --git a/be/src/exprs/table_function/table_function.h b/be/src/vec/exprs/table_function/table_function.h similarity index 75% rename from be/src/exprs/table_function/table_function.h rename to be/src/vec/exprs/table_function/table_function.h index 6326406e75..68e6829df5 100644 --- a/be/src/exprs/table_function/table_function.h +++ b/be/src/vec/exprs/table_function/table_function.h @@ -24,16 +24,10 @@ #include "vec/core/block.h" #include "vec/exprs/vexpr_context.h" -namespace doris { - -// TODO: think about how to manager memeory consumption of table functions. -// Currently, the memory allocated from table function is from malloc directly. -class TableFunctionState {}; +namespace doris::vectorized { constexpr auto COMBINATOR_SUFFIX_OUTER = "_outer"; -class ExprContext; -class TupleRow; class TableFunction { public: virtual ~TableFunction() {} @@ -42,24 +36,14 @@ public: virtual Status open() { return Status::OK(); } - virtual Status process(TupleRow* tuple_row) { - return Status::NotSupported("table function {} not supported now.", _fn_name); - } + // only used for vectorized. + virtual Status process_init(vectorized::Block* block) = 0; // only used for vectorized. - virtual Status process_init(vectorized::Block* block) { - return Status::NotSupported("vectorized table function {} not supported now.", _fn_name); - } + virtual Status process_row(size_t row_idx) = 0; // only used for vectorized. - virtual Status process_row(size_t row_idx) { - return Status::NotSupported("vectorized table function {} not supported now.", _fn_name); - } - - // only used for vectorized. - virtual Status process_close() { - return Status::NotSupported("vectorized table function {} not supported now.", _fn_name); - } + virtual Status process_close() = 0; virtual Status reset() = 0; @@ -92,7 +76,6 @@ public: std::string name() const { return _fn_name; } bool eos() const { return _eos; } - void set_expr_context(ExprContext* expr_context) { _expr_context = expr_context; } void set_vexpr_context(vectorized::VExprContext* vexpr_context) { _vexpr_context = vexpr_context; } @@ -110,7 +93,6 @@ public: protected: std::string _fn_name; - ExprContext* _expr_context = nullptr; vectorized::VExprContext* _vexpr_context = nullptr; // true if there is no more data can be read from this function. bool _eos = false; @@ -124,4 +106,4 @@ protected: // set _is_outer to false for explode function, and should not return tuple while array is null or empty bool _is_outer = false; }; -} // namespace doris +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/table_function/table_function_factory.cpp b/be/src/vec/exprs/table_function/table_function_factory.cpp new file mode 100644 index 0000000000..c09a3b365a --- /dev/null +++ b/be/src/vec/exprs/table_function/table_function_factory.cpp @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exprs/table_function/table_function_factory.h" + +#include "common/object_pool.h" + +namespace doris::vectorized { + +template +struct TableFunctionCreator { + TableFunction* operator()() { return new TableFunctionType(); } +}; + +template <> +struct TableFunctionCreator { + ExplodeJsonArrayType type; + TableFunction* operator()() const { return new VExplodeJsonArrayTableFunction(type); } +}; + +inline auto VExplodeJsonArrayIntCreator = + TableFunctionCreator {ExplodeJsonArrayType::INT}; +inline auto VExplodeJsonArrayDoubleCreator = + TableFunctionCreator {ExplodeJsonArrayType::DOUBLE}; +inline auto VExplodeJsonArrayStringCreator = + TableFunctionCreator {ExplodeJsonArrayType::STRING}; + +const std::unordered_map> + TableFunctionFactory::_function_map { + {"explode_split", TableFunctionCreator()}, + {"explode_numbers", TableFunctionCreator()}, + {"explode_json_array_int", VExplodeJsonArrayIntCreator}, + {"explode_json_array_double", VExplodeJsonArrayDoubleCreator}, + {"explode_json_array_string", VExplodeJsonArrayStringCreator}, + {"explode_bitmap", TableFunctionCreator()}, + {"explode", TableFunctionCreator {}}}; + +Status TableFunctionFactory::get_fn(const std::string& fn_name_raw, ObjectPool* pool, + TableFunction** fn) { + auto match_suffix = [](const std::string& name, const std::string& suffix) -> bool { + if (name.length() < suffix.length()) { + return false; + } + return name.substr(name.length() - suffix.length()) == suffix; + }; + + auto remove_suffix = [](const std::string& name, const std::string& suffix) -> std::string { + return name.substr(0, name.length() - suffix.length()); + }; + + bool is_outer = match_suffix(fn_name_raw, COMBINATOR_SUFFIX_OUTER); + std::string fn_name_real = + is_outer ? remove_suffix(fn_name_raw, COMBINATOR_SUFFIX_OUTER) : fn_name_raw; + + auto fn_iterator = _function_map.find(fn_name_real); + if (fn_iterator != _function_map.end()) { + *fn = pool->add(fn_iterator->second()); + if (is_outer) { + (*fn)->set_outer(); + } + + return Status::OK(); + } + + return Status::NotSupported("Table function {} is not support", fn_name_raw); +} + +} // namespace doris::vectorized diff --git a/be/src/exprs/table_function/table_function_factory.h b/be/src/vec/exprs/table_function/table_function_factory.h similarity index 74% rename from be/src/exprs/table_function/table_function_factory.h rename to be/src/vec/exprs/table_function/table_function_factory.h index 05d4fa2e74..456dba0ddc 100644 --- a/be/src/exprs/table_function/table_function_factory.h +++ b/be/src/vec/exprs/table_function/table_function_factory.h @@ -22,10 +22,7 @@ #include "common/object_pool.h" #include "common/status.h" -#include "exprs/table_function/explode_bitmap.h" -#include "exprs/table_function/explode_json_array.h" -#include "exprs/table_function/explode_split.h" -#include "exprs/table_function/table_function.h" +#include "vec/exprs/table_function/table_function.h" #include "vec/exprs/table_function/vexplode.h" #include "vec/exprs/table_function/vexplode_bitmap.h" #include "vec/exprs/table_function/vexplode_json_array.h" @@ -33,18 +30,17 @@ #include "vec/exprs/table_function/vexplode_split.h" namespace doris { - class ObjectPool; + +namespace vectorized { class TableFunction; class TableFunctionFactory { public: TableFunctionFactory() {} ~TableFunctionFactory() {} - static Status get_fn(const std::string& fn_name_raw, bool is_vectorized, ObjectPool* pool, - TableFunction** fn); + static Status get_fn(const std::string& fn_name_raw, ObjectPool* pool, TableFunction** fn); - const static std::unordered_map, std::function> - _function_map; + const static std::unordered_map> _function_map; }; - +} // namespace vectorized } // namespace doris diff --git a/be/src/vec/exprs/table_function/vexplode.h b/be/src/vec/exprs/table_function/vexplode.h index b82f9be2e8..3bc8ba9ef0 100644 --- a/be/src/vec/exprs/table_function/vexplode.h +++ b/be/src/vec/exprs/table_function/vexplode.h @@ -17,11 +17,11 @@ #pragma once -#include "exprs/table_function/table_function.h" #include "vec/columns/column.h" #include "vec/columns/column_array.h" #include "vec/columns/column_nullable.h" #include "vec/common/string_ref.h" +#include "vec/exprs/table_function/table_function.h" #include "vec/functions/array/function_array_utils.h" namespace doris::vectorized { diff --git a/be/src/vec/exprs/table_function/vexplode_bitmap.cpp b/be/src/vec/exprs/table_function/vexplode_bitmap.cpp index f015ac7330..4269a34339 100644 --- a/be/src/vec/exprs/table_function/vexplode_bitmap.cpp +++ b/be/src/vec/exprs/table_function/vexplode_bitmap.cpp @@ -27,6 +27,13 @@ VExplodeBitmapTableFunction::VExplodeBitmapTableFunction() { _fn_name = "vexplode_bitmap"; } +VExplodeBitmapTableFunction::~VExplodeBitmapTableFunction() { + if (_cur_iter != nullptr) { + delete _cur_iter; + _cur_iter = nullptr; + } +} + Status VExplodeBitmapTableFunction::process_init(vectorized::Block* block) { CHECK(_vexpr_context->root()->children().size() == 1) << "VExplodeNumbersTableFunction must be have 1 children but have " @@ -40,6 +47,52 @@ Status VExplodeBitmapTableFunction::process_init(vectorized::Block* block) { return Status::OK(); } +Status VExplodeBitmapTableFunction::reset() { + _eos = false; + if (!_is_current_empty) { + _reset_iterator(); + } + return Status::OK(); +} + +Status VExplodeBitmapTableFunction::forward(bool* eos) { + if (_is_current_empty) { + *eos = true; + _eos = true; + } else { + ++(*_cur_iter); + ++_cur_offset; + if (_cur_offset == _cur_size) { + *eos = true; + _eos = true; + } else { + _cur_value = **_cur_iter; + *eos = false; + } + } + return Status::OK(); +} + +Status VExplodeBitmapTableFunction::get_value(void** output) { + if (_is_current_empty) { + *output = nullptr; + } else { + *output = &_cur_value; + } + return Status::OK(); +} + +void VExplodeBitmapTableFunction::_reset_iterator() { + DCHECK(_cur_bitmap->cardinality() > 0) << _cur_bitmap->cardinality(); + if (_cur_iter != nullptr) { + delete _cur_iter; + _cur_iter = nullptr; + } + _cur_iter = new BitmapValueIterator(*_cur_bitmap); + _cur_value = **_cur_iter; + _cur_offset = 0; +} + Status VExplodeBitmapTableFunction::process_row(size_t row_idx) { _eos = false; _is_current_empty = false; @@ -52,7 +105,6 @@ Status VExplodeBitmapTableFunction::process_row(size_t row_idx) { _is_current_empty = true; } else { _cur_bitmap = reinterpret_cast(value.data); - _cur_bitmap_owned = false; _cur_size = _cur_bitmap->cardinality(); if (_cur_size == 0) { diff --git a/be/src/vec/exprs/table_function/vexplode_bitmap.h b/be/src/vec/exprs/table_function/vexplode_bitmap.h index 5e46ab20c8..535fdaf645 100644 --- a/be/src/vec/exprs/table_function/vexplode_bitmap.h +++ b/be/src/vec/exprs/table_function/vexplode_bitmap.h @@ -17,16 +17,20 @@ #pragma once -#include "exprs/table_function/explode_bitmap.h" -#include "exprs/table_function/table_function.h" #include "util/bitmap_value.h" #include "vec/columns/column.h" +#include "vec/exprs/table_function/table_function.h" namespace doris::vectorized { -class VExplodeBitmapTableFunction : public ExplodeBitmapTableFunction { +class VExplodeBitmapTableFunction final : public TableFunction { public: VExplodeBitmapTableFunction(); + ~VExplodeBitmapTableFunction() override; + + Status reset() override; + Status get_value(void** output) override; + Status forward(bool* eos) override; Status process_init(vectorized::Block* block) override; Status process_row(size_t row_idx) override; @@ -34,6 +38,14 @@ public: Status get_value_length(int64_t* length) override; private: + void _reset_iterator(); + + const BitmapValue* _cur_bitmap = nullptr; + // iterator of _cur_bitmap + BitmapValueIterator* _cur_iter = nullptr; + // current value read from bitmap, it will be referenced by + // table function scan node. + uint64_t _cur_value = 0; ColumnPtr _value_column; }; diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp b/be/src/vec/exprs/table_function/vexplode_json_array.cpp index 8ee1815117..f12fa617bc 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp @@ -22,8 +22,108 @@ namespace doris::vectorized { +std::string ParsedData::true_value = "true"; +std::string ParsedData::false_value = "false"; + +int ParsedData::set_output(ExplodeJsonArrayType type, rapidjson::Document& document) { + int size = document.GetArray().Size(); + switch (type) { + case ExplodeJsonArrayType::INT: { + _data.resize(size); + _backup_int.resize(size); + int i = 0; + for (auto& v : document.GetArray()) { + if (v.IsInt64()) { + _backup_int[i] = v.GetInt64(); + _data[i] = &_backup_int[i]; + } else { + _data[i] = nullptr; + } + ++i; + } + break; + } + case ExplodeJsonArrayType::DOUBLE: { + _data.resize(size); + _backup_double.resize(size); + int i = 0; + for (auto& v : document.GetArray()) { + if (v.IsDouble()) { + _backup_double[i] = v.GetDouble(); + _data[i] = &_backup_double[i]; + } else { + _data[i] = nullptr; + } + ++i; + } + break; + } + case ExplodeJsonArrayType::STRING: { + _data_string.clear(); + _backup_string.clear(); + _string_nulls.clear(); + int32_t wbytes = 0; + for (auto& v : document.GetArray()) { + switch (v.GetType()) { + case rapidjson::Type::kStringType: + _backup_string.emplace_back(v.GetString(), v.GetStringLength()); + _string_nulls.push_back(false); + // do not set _data_string here. + // Because the address of the string stored in `_backup_string` may + // change each time `emplace_back()` is called. + break; + case rapidjson::Type::kNumberType: + if (v.IsUint()) { + wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%u", v.GetUint()); + } else if (v.IsInt()) { + wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%d", v.GetInt()); + } else if (v.IsUint64()) { + wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRIu64, v.GetUint64()); + } else if (v.IsInt64()) { + wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%" PRId64, v.GetInt64()); + } else { + wbytes = snprintf(tmp_buf, sizeof(tmp_buf), "%f", v.GetDouble()); + } + _backup_string.emplace_back(tmp_buf, wbytes); + _string_nulls.push_back(false); + // do not set _data_string here. + // Because the address of the string stored in `_backup_string` may + // change each time `emplace_back()` is called. + break; + case rapidjson::Type::kFalseType: + _data_string.emplace_back(true_value); + _string_nulls.push_back(false); + break; + case rapidjson::Type::kTrueType: + _data_string.emplace_back(false_value); + _string_nulls.push_back(false); + break; + case rapidjson::Type::kNullType: + _data_string.push_back({}); + _string_nulls.push_back(true); + break; + default: + _data_string.push_back({}); + _string_nulls.push_back(true); + break; + } + } + // Must set _data_string at the end, so that we can + // save the real addr of string in `_backup_string` to `_data_string`. + for (auto& str : _backup_string) { + _data_string.emplace_back(str); + } + break; + } + default: + CHECK(false) << type; + break; + } + return size; +} + VExplodeJsonArrayTableFunction::VExplodeJsonArrayTableFunction(ExplodeJsonArrayType type) - : ExplodeJsonArrayTableFunction(type) { + : _type(type) { _fn_name = "vexplode_json_array"; } @@ -39,6 +139,12 @@ Status VExplodeJsonArrayTableFunction::process_init(vectorized::Block* block) { return Status::OK(); } +Status VExplodeJsonArrayTableFunction::reset() { + _eos = false; + _cur_offset = 0; + return Status::OK(); +} + Status VExplodeJsonArrayTableFunction::process_row(size_t row_idx) { _is_current_empty = false; _eos = false; diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.h b/be/src/vec/exprs/table_function/vexplode_json_array.h index 8c496c39ac..00531b9a22 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.h +++ b/be/src/vec/exprs/table_function/vexplode_json_array.h @@ -20,25 +20,98 @@ #include #include -#include "exprs/table_function/explode_json_array.h" -#include "gutil/strings/stringpiece.h" #include "runtime/string_value.h" #include "vec/columns/column.h" +#include "vec/exprs/table_function/table_function.h" namespace doris::vectorized { -class VExplodeJsonArrayTableFunction : public ExplodeJsonArrayTableFunction { +enum ExplodeJsonArrayType { INT = 0, DOUBLE, STRING }; + +struct ParsedData { + static std::string true_value; + static std::string false_value; + + // The number parsed from json array + // the `_backup` saved the real number entity. + std::vector _data; + std::vector _data_string; + std::vector _backup_int; + std::vector _backup_double; + std::vector _backup_string; + std::vector _string_nulls; + char tmp_buf[128] = {0}; + + void reset(ExplodeJsonArrayType type) { + switch (type) { + case ExplodeJsonArrayType::INT: + _data.clear(); + _backup_int.clear(); + break; + case ExplodeJsonArrayType::DOUBLE: + _data.clear(); + _backup_double.clear(); + break; + case ExplodeJsonArrayType::STRING: + _data_string.clear(); + _backup_string.clear(); + _string_nulls.clear(); + break; + default: + CHECK(false) << type; + break; + } + } + + void get_value(ExplodeJsonArrayType type, int64_t offset, void** output, bool real = false) { + switch (type) { + case ExplodeJsonArrayType::INT: + case ExplodeJsonArrayType::DOUBLE: + *output = _data[offset]; + break; + case ExplodeJsonArrayType::STRING: + *output = _string_nulls[offset] ? nullptr + : real ? reinterpret_cast(_backup_string[offset].data()) + : &_data_string[offset]; + break; + default: + CHECK(false) << type; + } + } + + void get_value_length(ExplodeJsonArrayType type, int64_t offset, int64_t* length) { + switch (type) { + case ExplodeJsonArrayType::INT: + case ExplodeJsonArrayType::DOUBLE: + break; + case ExplodeJsonArrayType::STRING: + *length = _string_nulls[offset] ? -1 : _backup_string[offset].size(); + break; + default: + CHECK(false) << type; + } + } + + int set_output(ExplodeJsonArrayType type, rapidjson::Document& document); +}; + +class VExplodeJsonArrayTableFunction final : public TableFunction { public: VExplodeJsonArrayTableFunction(ExplodeJsonArrayType type); - virtual ~VExplodeJsonArrayTableFunction() = default; + ~VExplodeJsonArrayTableFunction() override = default; - virtual Status process_init(vectorized::Block* block) override; - virtual Status process_row(size_t row_idx) override; - virtual Status process_close() override; - virtual Status get_value(void** output) override; - virtual Status get_value_length(int64_t* length) override; + Status process_init(vectorized::Block* block) override; + Status process_row(size_t row_idx) override; + Status process_close() override; + Status get_value(void** output) override; + Status get_value_length(int64_t* length) override; + + Status reset() override; private: + ParsedData _parsed_data; + ExplodeJsonArrayType _type; + ColumnPtr _text_column; }; diff --git a/be/src/vec/exprs/table_function/vexplode_numbers.h b/be/src/vec/exprs/table_function/vexplode_numbers.h index df0e25c6a5..e125e5d5b8 100644 --- a/be/src/vec/exprs/table_function/vexplode_numbers.h +++ b/be/src/vec/exprs/table_function/vexplode_numbers.h @@ -17,8 +17,8 @@ #pragma once -#include "exprs/table_function/table_function.h" #include "vec/columns/column.h" +#include "vec/exprs/table_function/table_function.h" namespace doris::vectorized { diff --git a/be/src/vec/exprs/table_function/vexplode_split.cpp b/be/src/vec/exprs/table_function/vexplode_split.cpp index fbd37d4d3f..fce53b5c55 100644 --- a/be/src/vec/exprs/table_function/vexplode_split.cpp +++ b/be/src/vec/exprs/table_function/vexplode_split.cpp @@ -32,6 +32,14 @@ Status VExplodeSplitTableFunction::open() { return Status::OK(); } +Status VExplodeSplitTableFunction::reset() { + _eos = false; + if (!_is_current_empty) { + _cur_offset = 0; + } + return Status::OK(); +} + Status VExplodeSplitTableFunction::process_init(vectorized::Block* block) { CHECK(_vexpr_context->root()->children().size() == 2) << "VExplodeSplitTableFunction must be have 2 children but have " diff --git a/be/src/vec/exprs/table_function/vexplode_split.h b/be/src/vec/exprs/table_function/vexplode_split.h index c7b780764e..055e27928b 100644 --- a/be/src/vec/exprs/table_function/vexplode_split.h +++ b/be/src/vec/exprs/table_function/vexplode_split.h @@ -17,27 +17,28 @@ #pragma once -#include "exprs/table_function/explode_split.h" #include "gutil/strings/stringpiece.h" #include "runtime/string_value.h" #include "vec/columns/column.h" +#include "vec/exprs/table_function/table_function.h" namespace doris::vectorized { -class VExplodeSplitTableFunction : public ExplodeSplitTableFunction { +class VExplodeSplitTableFunction final : public TableFunction { public: VExplodeSplitTableFunction(); - virtual ~VExplodeSplitTableFunction() = default; + ~VExplodeSplitTableFunction() override = default; - virtual Status open() override; - virtual Status process_init(vectorized::Block* block) override; - virtual Status process_row(size_t row_idx) override; - virtual Status process_close() override; - virtual Status get_value(void** output) override; - virtual Status get_value_length(int64_t* length) override; + Status open() override; + Status process_init(vectorized::Block* block) override; + Status process_row(size_t row_idx) override; + Status process_close() override; + Status get_value(void** output) override; + Status get_value_length(int64_t* length) override; + Status reset() override; private: - using ExplodeSplitTableFunction::process; + std::vector _backup; ColumnPtr _text_column; ColumnPtr _delimiter_column; diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index 0c43733ead..8b1cd8d621 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -23,7 +23,7 @@ #include #include -#include "exprs/table_function/table_function.h" +#include "vec/exprs/table_function/table_function.h" #include "vec/functions/function.h" namespace doris::vectorized { diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 6083278d29..10ed7c032f 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -22,7 +22,6 @@ #include #include -#include "exprs/table_function/table_function.h" #include "testutil/any_type.h" #include "testutil/function_utils.h" #include "udf/udf.h" @@ -36,6 +35,7 @@ #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" #include "vec/data_types/data_type_time.h" +#include "vec/exprs/table_function/table_function.h" #include "vec/functions/simple_function_factory.h" namespace doris::vectorized {