diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h index 625326b747..c146b48187 100644 --- a/be/src/vec/common/string_ref.h +++ b/be/src/vec/common/string_ref.h @@ -151,6 +151,7 @@ struct StringRef { std::string to_prefix(size_t length) const { return std::string(data, std::min(length, size)); } explicit operator std::string() const { return to_string(); } + operator std::string_view() const { return std::string_view {data, size}; } StringRef substring(int start_pos, int new_len) const { return StringRef(data + start_pos, (new_len < 0) ? (size - start_pos) : new_len); diff --git a/be/src/vec/exec/vtable_function_node.cpp b/be/src/vec/exec/vtable_function_node.cpp index f7fe4f71d6..5d2bb1eff5 100644 --- a/be/src/vec/exec/vtable_function_node.cpp +++ b/be/src/vec/exec/vtable_function_node.cpp @@ -129,7 +129,7 @@ Status VTableFunctionNode::get_next(RuntimeState* state, Block* block, bool* eos std::placeholders::_3)), child(0)->get_next_span(), _child_eos); - push(state, &_child_block, _child_eos); + RETURN_IF_ERROR(push(state, &_child_block, _child_eos)); } return pull(state, block, eos); diff --git a/be/src/vec/exprs/table_function/vexplode_split.cpp b/be/src/vec/exprs/table_function/vexplode_split.cpp index fce53b5c55..1bceffeeba 100644 --- a/be/src/vec/exprs/table_function/vexplode_split.cpp +++ b/be/src/vec/exprs/table_function/vexplode_split.cpp @@ -53,8 +53,25 @@ Status VExplodeSplitTableFunction::process_init(vectorized::Block* block) { RETURN_IF_ERROR(_vexpr_context->root()->children()[1]->execute(_vexpr_context, block, &delimiter_column_idx)); - _text_column = block->get_by_position(text_column_idx).column; - _delimiter_column = block->get_by_position(delimiter_column_idx).column; + // dispose test column + _text_column = + block->get_by_position(text_column_idx).column->convert_to_full_column_if_const(); + if (_text_column->is_nullable()) { + const auto& column_null = assert_cast(*_text_column); + _test_null_map = column_null.get_null_map_data().data(); + _real_text_column = &assert_cast(column_null.get_nested_column()); + } else { + _real_text_column = &assert_cast(*_text_column); + } + + // dispose delimiter column + auto& delimiter_const_column = block->get_by_position(delimiter_column_idx).column; + if (is_column_const(*delimiter_const_column)) { + _delimiter = delimiter_const_column->get_data_at(0); + } else { + return Status::NotSupported( + "explode_split(test, delimiter) delimiter column must be const"); + } return Status::OK(); } @@ -63,17 +80,37 @@ Status VExplodeSplitTableFunction::process_row(size_t row_idx) { _is_current_empty = false; _eos = false; - StringRef text = _text_column->get_data_at(row_idx); - StringRef delimiter = _delimiter_column->get_data_at(row_idx); - - if (text.data == nullptr) { + if ((_test_null_map and _test_null_map[row_idx]) || _delimiter.data == nullptr) { _is_current_empty = true; _cur_size = 0; _cur_offset = 0; } else { - //TODO: implement non-copy split string reference - _backup = strings::Split(StringPiece((char*)text.data, text.size), - StringPiece((char*)delimiter.data, delimiter.size)); + // TODO: use the function to be better string_view/StringRef split + auto split = [](std::string_view strv, std::string_view delims = " ") { + std::vector output; + auto first = strv.begin(); + auto last = strv.end(); + + do { + const auto second = + std::search(first, last, std::cbegin(delims), std::cend(delims)); + if (first != second) { + output.emplace_back(strv.substr(std::distance(strv.begin(), first), + std::distance(first, second))); + first = std::next(second); + } else { + output.emplace_back("", 0); + first = std::next(second, delims.size()); + } + + if (second == last) { + break; + } + } while (first != last); + + return output; + }; + _backup = split(_real_text_column->get_data_at(row_idx), _delimiter); _cur_size = _backup.size(); _cur_offset = 0; @@ -84,7 +121,9 @@ Status VExplodeSplitTableFunction::process_row(size_t row_idx) { Status VExplodeSplitTableFunction::process_close() { _text_column = nullptr; - _delimiter_column = nullptr; + _real_text_column = nullptr; + _test_null_map = nullptr; + _delimiter = {}; return Status::OK(); } @@ -92,7 +131,7 @@ Status VExplodeSplitTableFunction::get_value(void** output) { if (_is_current_empty) { *output = nullptr; } else { - *output = _backup[_cur_offset].data(); + *output = const_cast(_backup[_cur_offset].data()); } return Status::OK(); } diff --git a/be/src/vec/exprs/table_function/vexplode_split.h b/be/src/vec/exprs/table_function/vexplode_split.h index 055e27928b..63396bff91 100644 --- a/be/src/vec/exprs/table_function/vexplode_split.h +++ b/be/src/vec/exprs/table_function/vexplode_split.h @@ -38,10 +38,13 @@ public: Status reset() override; private: - std::vector _backup; + std::vector _backup; ColumnPtr _text_column; - ColumnPtr _delimiter_column; + const uint8_t* _test_null_map = nullptr; + const ColumnString* _real_text_column = nullptr; + + StringRef _delimiter = {}; }; } // namespace doris::vectorized diff --git a/be/test/vec/function/table_function_test.cpp b/be/test/vec/function/table_function_test.cpp index b83c019e32..3b86a219cb 100644 --- a/be/test/vec/function/table_function_test.cpp +++ b/be/test/vec/function/table_function_test.cpp @@ -179,17 +179,26 @@ TEST_F(TableFunctionTest, vexplode_split) { // Case 2: explode_split("a,b,c", ",") --> ["a", "b", "c"] // Case 3: explode_split("a,b,c", "a,")) --> ["", "b,c"] // Case 4: explode_split("", ",")) --> [""] - InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; - InputDataSet input_set = {{Null(), Null()}, - {std::string("a,b,c"), std::string(",")}, - {std::string("a,b,c"), std::string("a,")}, - {std::string(""), std::string(",")}}; + InputTypeSet input_types = {TypeIndex::String, Consted {TypeIndex::String}}; + InputDataSet input_sets = {{Null(), Null()}, + {std::string("a,b,c"), std::string(",")}, + {std::string("a,b,c"), std::string("a,")}, + {std::string(""), std::string(",")}}; InputTypeSet output_types = {TypeIndex::String}; - InputDataSet output_set = {{std::string("a")}, {std::string("b")}, {std::string("c")}, - {std::string("")}, {std::string("b,c")}, {std::string("")}}; + InputDataSet output_sets = {{}, + {std::string("a"), std::string("b"), std::string("c")}, + {std::string(""), std::string("b,c")}, + {std::string("")}}; - check_vec_table_function(&tfn, input_types, input_set, output_types, output_set); + for (int i = 0; i < input_sets.size(); ++i) { + InputDataSet input_set {input_sets[i]}; + InputDataSet output_set {}; + for (const auto& data : output_sets[i]) { + output_set.emplace_back(std::vector {data}); + } + check_vec_table_function(&tfn, input_types, input_set, output_types, output_set); + } } }