From 0b074ade02fe8aa6080991d4f67c429fc18ceec2 Mon Sep 17 00:00:00 2001 From: TengJianPing <18241664+jacktengg@users.noreply.github.com> Date: Tue, 18 Apr 2023 13:57:55 +0800 Subject: [PATCH] [fix](const column) fix coredump caused by const column for some functions (#18737) --- be/src/vec/columns/column_nullable.cpp | 6 ++- be/src/vec/columns/column_nullable.h | 3 +- be/src/vec/functions/function_bitmap.cpp | 2 +- be/src/vec/functions/function_conv.cpp | 2 +- be/src/vec/functions/function_convert_tz.h | 2 +- be/src/vec/functions/function_jsonb.cpp | 2 +- be/src/vec/functions/function_regexp.cpp | 2 +- be/src/vec/functions/function_string.h | 2 +- be/src/vec/functions/function_timestamp.cpp | 8 ++-- be/src/vec/functions/function_totype.h | 4 +- be/src/vec/utils/util.hpp | 13 ++++-- .../bitmap_functions/test_bitmap_function.out | 19 ++++++++ .../datetime_functions/test_date_function.out | 20 +++++++++ .../math_functions/test_conv.out | 19 ++++++++ .../string_functions/test_string_function.out | 19 ++++++++ .../test_string_function_regexp.out | 19 ++++++++ .../test_bitmap_function.groovy | 45 +++++++++++++++++++ .../test_date_function.groovy | 22 +++++++++ .../math_functions/test_conv.groovy | 29 ++++++++++++ .../test_string_function.groovy | 29 ++++++++++++ .../test_string_function_regexp.groovy | 22 ++++++++- 21 files changed, 270 insertions(+), 19 deletions(-) diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 64db3e6455..959ae55472 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -634,12 +634,14 @@ ColumnPtr ColumnNullable::index(const IColumn& indexes, size_t limit) const { return ColumnNullable::create(indexed_data, indexed_null_map); } -void check_set_nullable(ColumnPtr& argument_column, ColumnVector::MutablePtr& null_map) { +void check_set_nullable(ColumnPtr& argument_column, ColumnVector::MutablePtr& null_map, + bool is_single) { if (auto* nullable = check_and_get_column(*argument_column)) { // Danger: Here must dispose the null map data first! Because // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem // of column nullable mem of null map - VectorizedUtils::update_null_map(null_map->get_data(), nullable->get_null_map_data()); + VectorizedUtils::update_null_map(null_map->get_data(), nullable->get_null_map_data(), + is_single); argument_column = nullable->get_nested_column_ptr(); } } diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 5200531b90..b815121e03 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -369,5 +369,6 @@ ColumnPtr make_nullable(const ColumnPtr& column, bool is_nullable = false); ColumnPtr remove_nullable(const ColumnPtr& column); // check if argument column is nullable. If so, extract its concrete column and set null_map. //TODO: use this to replace inner usages. -void check_set_nullable(ColumnPtr&, ColumnVector::MutablePtr&); +// is_single: whether null_map is null map of a ColumnConst +void check_set_nullable(ColumnPtr&, ColumnVector::MutablePtr& null_map, bool is_single); } // namespace doris::vectorized diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 7d340f6937..8f81e8c2a6 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -1001,7 +1001,7 @@ public: default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); for (int i = 0; i < 3; i++) { - check_set_nullable(argument_columns[i], res_null_map); + check_set_nullable(argument_columns[i], res_null_map, col_const[i]); } auto bitmap_column = assert_cast(argument_columns[0].get()); diff --git a/be/src/vec/functions/function_conv.cpp b/be/src/vec/functions/function_conv.cpp index 027a7e6a17..68f2070bea 100644 --- a/be/src/vec/functions/function_conv.cpp +++ b/be/src/vec/functions/function_conv.cpp @@ -65,7 +65,7 @@ public: default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); for (int i = 0; i < 3; i++) { - check_set_nullable(argument_columns[i], result_null_map_column); + check_set_nullable(argument_columns[i], result_null_map_column, col_const[i]); } if (col_const[1] && col_const[2]) { diff --git a/be/src/vec/functions/function_convert_tz.h b/be/src/vec/functions/function_convert_tz.h index 9857417653..4a1d66b459 100644 --- a/be/src/vec/functions/function_convert_tz.h +++ b/be/src/vec/functions/function_convert_tz.h @@ -194,7 +194,7 @@ public: default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); for (int i = 0; i < 3; i++) { - check_set_nullable(argument_columns[i], result_null_map_column); + check_set_nullable(argument_columns[i], result_null_map_column, col_const[i]); } if (col_const[1] && col_const[2]) { diff --git a/be/src/vec/functions/function_jsonb.cpp b/be/src/vec/functions/function_jsonb.cpp index 750f11bca5..024c390072 100644 --- a/be/src/vec/functions/function_jsonb.cpp +++ b/be/src/vec/functions/function_jsonb.cpp @@ -316,7 +316,7 @@ public: for (int i = 0; i < 2; ++i) { std::tie(argument_columns[i], col_const[i]) = unpack_if_const(block.get_by_position(arguments[i]).column); - check_set_nullable(argument_columns[i], null_map); + check_set_nullable(argument_columns[i], null_map, col_const[i]); } auto res = Impl::ColumnType::create(); diff --git a/be/src/vec/functions/function_regexp.cpp b/be/src/vec/functions/function_regexp.cpp index 8f646a82f6..c9061d3c61 100644 --- a/be/src/vec/functions/function_regexp.cpp +++ b/be/src/vec/functions/function_regexp.cpp @@ -429,7 +429,7 @@ public: arguments); } for (int i = 0; i < argument_size; i++) { - check_set_nullable(argument_columns[i], result_null_map); + check_set_nullable(argument_columns[i], result_null_map, col_const[i]); } if constexpr (std::is_same_v) { diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index b02f379781..cb28fa7b82 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -140,7 +140,7 @@ struct SubstringUtil { default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); for (int i = 0; i < 3; i++) { - check_set_nullable(argument_columns[i], null_map); + check_set_nullable(argument_columns[i], null_map, col_const[i]); } auto specific_str_column = assert_cast(argument_columns[0].get()); diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index 4ef50490ab..2122c2b09d 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -52,12 +52,12 @@ struct StrToDate { ColumnPtr argument_columns[2] = { col_const[0] ? static_cast(*col0).convert_to_full_column() : col0}; - check_set_nullable(argument_columns[0], null_map); + check_set_nullable(argument_columns[0], null_map, col_const[0]); //TODO: when we set default implementation for nullable, the check_set_nullable for arguments is useless. consider to remove it. std::tie(argument_columns[1], col_const[1]) = unpack_if_const(block.get_by_position(arguments[1]).column); - check_set_nullable(argument_columns[1], null_map); + check_set_nullable(argument_columns[1], null_map, col_const[1]); auto specific_str_column = assert_cast(argument_columns[0].get()); auto specific_char_column = assert_cast(argument_columns[1].get()); @@ -190,11 +190,11 @@ struct MakeDateImpl { ColumnPtr argument_columns[2] = { col_const[0] ? static_cast(*col0).convert_to_full_column() : col0}; - check_set_nullable(argument_columns[0], null_map); + check_set_nullable(argument_columns[0], null_map, col_const[0]); std::tie(argument_columns[1], col_const[1]) = unpack_if_const(block.get_by_position(arguments[1]).column); - check_set_nullable(argument_columns[1], null_map); + check_set_nullable(argument_columns[1], null_map, col_const[1]); ColumnPtr res = nullptr; WhichDataType which(remove_nullable(block.get_by_position(result).type)); diff --git a/be/src/vec/functions/function_totype.h b/be/src/vec/functions/function_totype.h index 0fa2724a22..2ead9ab704 100644 --- a/be/src/vec/functions/function_totype.h +++ b/be/src/vec/functions/function_totype.h @@ -330,7 +330,7 @@ public: for (int i = 0; i < 2; ++i) { std::tie(argument_columns[i], col_const[i]) = unpack_if_const(block.get_by_position(arguments[i]).column); - check_set_nullable(argument_columns[i], null_map); + check_set_nullable(argument_columns[i], null_map, col_const[i]); } using ResultDataType = typename Impl b +-- !sql_regexp_null -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N + -- !sql -- false 1 1989 1001 11011902 123.123 true 1989-03-21 1989-03-21T13:00 wangjuoo4 0.1 6.333 string12345 170141183460469231731687303715884105727 diff --git a/regression-test/suites/query_p0/sql_functions/bitmap_functions/test_bitmap_function.groovy b/regression-test/suites/query_p0/sql_functions/bitmap_functions/test_bitmap_function.groovy index b4cf3cdc3b..a14267a395 100644 --- a/regression-test/suites/query_p0/sql_functions/bitmap_functions/test_bitmap_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/bitmap_functions/test_bitmap_function.groovy @@ -645,4 +645,49 @@ suite("test_bitmap_function") { sql "insert into d_table select -4,-4,-4,'d';" try_sql "select bitmap_union(to_bitmap_with_check(k2)) from d_table;" qt_sql "select bitmap_union(to_bitmap(k2)) from d_table;" + + // bug fix + sql """ DROP TABLE IF EXISTS test_bitmap1 """ + sql """ + CREATE TABLE test_bitmap1 ( + dt INT(11) NULL, + id bitmap BITMAP_UNION NULL + ) ENGINE=OLAP + AGGREGATE KEY(dt) + DISTRIBUTED BY HASH(dt) BUCKETS 1 + properties ( + "replication_num" = "1" + ); + """ + sql """ + insert into + test_bitmap1 + values + (1, to_bitmap(11)), + (2, to_bitmap(22)), + (3, to_bitmap(33)), + (4, to_bitmap(44)), + (5, to_bitmap(44)), + (6, to_bitmap(44)), + (7, to_bitmap(44)), + (8, to_bitmap(44)), + (9, to_bitmap(44)), + (10, to_bitmap(44)), + (11, to_bitmap(44)), + (12, to_bitmap(44)), + (13, to_bitmap(44)), + (14, to_bitmap(44)), + (15, to_bitmap(44)), + (16, to_bitmap(44)), + (17, to_bitmap(44)); + """ + qt_sql_bitmap_subset_in_range """ + select /*+SET_VAR(parallel_fragment_exec_instance_num=1)*/ + bitmap_to_string( + bitmap_subset_in_range(id, cast(null as bigint), cast(null as bigint)) + ) + from + test_bitmap1; + """ + } diff --git a/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy index ebd2f98b88..b344198391 100644 --- a/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy @@ -49,6 +49,28 @@ suite("test_date_function") { qt_sql """ SELECT convert_tz('2022-02-29 13:21:03', '+08:00', 'America/London') result; """ qt_sql """ SELECT convert_tz('1900-00-00 13:21:03', '+08:00', 'America/London') result; """ + // bug fix + sql """ insert into ${tableName} values + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"), + ("2019-08-01 13:21:03"); + """ + qt_sql_convert_tz_null """ SELECT /*+SET_VAR(parallel_fragment_exec_instance_num=1)*/ convert_tz(test_datetime, cast(null as varchar), cast(null as varchar)) result from test_date_function; """ + sql """ truncate table ${tableName} """ def timezoneCachedTableName = "test_convert_tz_with_timezone_cache" diff --git a/regression-test/suites/query_p0/sql_functions/math_functions/test_conv.groovy b/regression-test/suites/query_p0/sql_functions/math_functions/test_conv.groovy index f0bd110ab8..6c4867174d 100644 --- a/regression-test/suites/query_p0/sql_functions/math_functions/test_conv.groovy +++ b/regression-test/suites/query_p0/sql_functions/math_functions/test_conv.groovy @@ -17,5 +17,34 @@ suite("test_conv") { qt_select "SELECT CONV(15,10,2)" + + sql """ drop table if exists test_conv; """ + sql """ create table test_conv( + k1 varchar(16), + v1 int + ) distributed by hash (k1) buckets 1 + properties ("replication_num"="1"); + """ + sql """ insert into test_conv values + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1), + ("100", 1) + """ + + qt_sql_conv1 """ select /*+SET_VAR(parallel_fragment_exec_instance_num=1)*/conv(k1, cast(null as bigint), cast(null as bigint)) from test_conv; """ } diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy index 13d217dd68..73a9222aa1 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function.groovy @@ -144,6 +144,35 @@ suite("test_string_function") { qt_sql "select substring('abc1', 5);" qt_sql "select substring('abc1def', 2, 2);" + sql """ drop table if exists test_string_function; """ + sql """ create table test_string_function ( + k1 varchar(16), + v1 int + ) distributed by hash (k1) buckets 1 + properties ("replication_num"="1"); + """ + sql """ insert into test_string_function values + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1), + ("aaaaaaaa", 1) + """ + // bug fix + qt_sql_substring1 """ select /*+SET_VAR(parallel_fragment_exec_instance_num=1)*/ substring(k1, cast(null as int), cast(null as int)) from test_string_function; """ + qt_sql "select substr('a',3,1);" qt_sql "select substr('a',2,1);" qt_sql "select substr('a',1,1);" diff --git a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy index c56690337e..fba213ef98 100644 --- a/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy +++ b/regression-test/suites/query_p0/sql_functions/string_functions/test_string_function_regexp.groovy @@ -24,7 +24,7 @@ suite("test_string_function_regexp") { CREATE TABLE IF NOT EXISTS ${tbName} ( k varchar(32) ) - DISTRIBUTED BY HASH(k) BUCKETS 5 properties("replication_num" = "1"); + DISTRIBUTED BY HASH(k) BUCKETS 1 properties("replication_num" = "1"); """ sql """ INSERT INTO ${tbName} VALUES @@ -55,6 +55,26 @@ suite("test_string_function_regexp") { qt_sql "SELECT regexp_replace_one('a b c', \" \", \"-\");" qt_sql "SELECT regexp_replace_one('a b b','(b)','<\\\\1>');" + // bug fix + sql """ + INSERT INTO ${tbName} VALUES + ("billie eillish"), + ("billie eillish"), + ("billie eillish"), + ("billie eillish"), + ("billie eillish"), + ("billie eillish"), + ("billie eillish"), + ("billie eillish"), + ("billie eillish"), + ("billie eillish"), + ("billie eillish"), + ("billie eillish"), + ("billie eillish") + """ + qt_sql_regexp_null "SELECT /*+SET_VAR(parallel_fragment_exec_instance_num=1)*/regexp_extract(k, cast(null as varchar), 1) from test_string_function_regexp;" + // end bug fix + sql "DROP TABLE ${tbName};" def tableName= "test"