From ce49f37a5eedaed050f581423d90b50c31d6eff3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 20 Mar 2025 22:56:44 +0800 Subject: [PATCH] branch-2.1: [fix](core) fix subreplace when inputting a large number of empty strings #49241 (#49303) Cherry-picked from #49241 Co-authored-by: Mryange --- be/src/vec/functions/function_string.h | 3 +- .../function/function_sub_replace_test.cpp | 51 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 be/test/vec/function/function_sub_replace_test.cpp diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index dbcafa78a9..79c5823f9f 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -3909,7 +3909,8 @@ struct SubReplaceImpl { std::visit( [&](auto origin_str_const, auto new_str_const, auto start_const, auto len_const) { if (simd::VStringFunctions::is_ascii( - StringRef {data_column->get_chars().data(), data_column->size()})) { + StringRef {data_column->get_chars().data(), + data_column->get_chars().size()})) { vector_ascii( data_column, mask_column, start_column->get_data(), length_column->get_data(), args_null_map->get_data(), result_column, diff --git a/be/test/vec/function/function_sub_replace_test.cpp b/be/test/vec/function/function_sub_replace_test.cpp new file mode 100644 index 0000000000..7412dab769 --- /dev/null +++ b/be/test/vec/function/function_sub_replace_test.cpp @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "vec/core/block.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" +#include "vec/functions/function_string.h" + +namespace doris::vectorized { +TEST(SubReplaceTest, test) { + const int rows = 10240; + auto str = ColumnString::create(); + auto new_str = ColumnString::create(); + auto start = ColumnInt32::create(); + auto length = ColumnInt32::create(); + + for (int i = 0; i < rows; i++) { + str->insert_default(); + new_str->insert_default(); + start->insert_default(); + length->insert_default(); + } + + Block block { + ColumnWithTypeAndName {std::move(str), std::make_shared(), "str"}, + ColumnWithTypeAndName {std::move(new_str), std::make_shared(), + "new_str"}, + ColumnWithTypeAndName {std::move(start), std::make_shared(), "start"}, + ColumnWithTypeAndName {std::move(length), std::make_shared(), "length"}, + ColumnWithTypeAndName {nullptr, std::make_shared(), "res"}, + }; + + EXPECT_TRUE(SubReplaceImpl::replace_execute(block, ColumnNumbers {0, 1, 2, 3}, 4, rows)); +} +} // namespace doris::vectorized