diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 5aac7a1572..c3676656be 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -671,14 +671,22 @@ public: res_offset.resize(input_rows_count); - int res_reserve_size = 0; + size_t res_reserve_size = 0; // we could ignore null string column // but it's not necessary to ignore it for (size_t i = 0; i < offsets_list.size(); ++i) { for (size_t j = 0; j < input_rows_count; ++j) { - res_reserve_size += (*offsets_list[i])[j] - (*offsets_list[i])[j - 1]; + size_t append = (*offsets_list[i])[j] - (*offsets_list[i])[j - 1]; + // check whether the concat output might overflow(unlikely) + if (UNLIKELY(UINT_MAX - append < res_reserve_size)) { + return Status::BufferAllocFailed("concat output is too large to allocate"); + } + res_reserve_size += append; } } + if ((UNLIKELY(UINT_MAX - input_rows_count < res_reserve_size))) { + return Status::BufferAllocFailed("concat output is too large to allocate"); + } // for each terminal zero res_reserve_size += input_rows_count; diff --git a/regression-test/suites/tpcds_sf1_p1/functions_test/test_string_concat_extremely_long_string.groovy b/regression-test/suites/tpcds_sf1_p1/functions_test/test_string_concat_extremely_long_string.groovy new file mode 100644 index 0000000000..95d72de9f6 --- /dev/null +++ b/regression-test/suites/tpcds_sf1_p1/functions_test/test_string_concat_extremely_long_string.groovy @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_concat_extreme_input") { + // there is need to check the result, the following query would return error due to + // concat output exceeds the UINT size. This case just tests whether the BE could cover + // such occasion without crash + test { + sql ''' select + concat( + cast(substr( + cast(ref_1.`cp_type` as varchar), + cast( + max( + cast(ref_1.`cp_catalog_page_number` as int)) over (partition by ref_1.`cp_end_date_sk` order by ref_1.`cp_catalog_page_number`) as int), + cast(ref_1.`cp_end_date_sk` as int)) as varchar), + cast(substring( + cast(ref_1.`cp_department` as varchar), + cast(ref_1.`cp_end_date_sk` as int), + cast(ref_1.`cp_end_date_sk` as int)) as varchar), + cast(rpad( + cast(ref_1.`cp_type` as varchar), + cast(ref_1.`cp_start_date_sk` as int), + cast(ref_1.`cp_description` as varchar)) as varchar)) as c1 +from + regression_test_tpcds_sf1_p1.catalog_page as ref_1 ''' + exception "concat output is too large to allocate" + } +} +