From a79b8ede88dfdccb83be5346335a5c4ee7b2ac78 Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Fri, 10 Mar 2023 11:52:22 +0800 Subject: [PATCH] [Bug](ColumnArray) Fix array column replicate `replicate_offsets` not matched (#17616) the input replicate_offsets should be the same size as ColumnArray's offset. ``` IColumn::Offsets replicate_offsets(get_offsets().size(), 0); // |---------------------|-------------------------|-------------------------| // [0, begin) [begin, begin + count_sz) [begin + count_sz, size()) // do not need to copy copy counts[n] times do not need to copy ``` we should --- be/src/vec/columns/column_array.cpp | 18 ++++++-- .../load/insert/test_insert_nested_array.out | 26 +++++++++++ .../sql/array_nested_with_join.out | 25 ++++++++++ .../insert/test_insert_nested_array.groovy | 1 + .../query_p0/set_operations/load.groovy | 46 ++++++++++++++++++- .../sql/array_nested_with_join.sql | 7 +++ 6 files changed, 117 insertions(+), 6 deletions(-) create mode 100644 regression-test/data/query_p0/set_operations/sql/array_nested_with_join.out create mode 100644 regression-test/suites/query_p0/set_operations/sql/array_nested_with_join.sql diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 8a9b260ffe..eaa49cfd78 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -742,14 +742,22 @@ void ColumnArray::replicate(const uint32_t* counts, size_t target_size, IColumn& if (col_size == 0) { return; } - - IColumn::Offsets replicate_offsets(col_size); + // |---------------------|-------------------------|-------------------------| + // [0, begin) [begin, begin + count_sz) [begin + count_sz, size()) + // do not need to copy copy counts[n] times do not need to copy + IColumn::Offsets replicate_offsets(get_offsets().size(), 0); size_t cur_offset = 0; size_t end = begin + col_size; + // copy original data at offset n counts[n] times for (size_t i = begin; i < end; ++i) { cur_offset += counts[i]; - replicate_offsets[i - begin] = cur_offset; + replicate_offsets[i] = cur_offset; } + // ignored + for (size_t i = end; i < size(); ++i) { + replicate_offsets[i] = replicate_offsets[i - 1]; + } + if (cur_offset != target_size) { LOG(WARNING) << "ColumnArray replicate input target_size:" << target_size << " not equal SUM(counts):" << cur_offset; @@ -941,7 +949,9 @@ ColumnPtr ColumnArray::replicate_generic(const IColumn::Offsets& replicate_offse size_t size_to_replicate = replicate_offsets[i] - prev_offset; prev_offset = replicate_offsets[i]; - for (size_t j = 0; j < size_to_replicate; ++j) res_concrete.insert_from(*this, i); + for (size_t j = 0; j < size_to_replicate; ++j) { + res_concrete.insert_from(*this, i); + } } return res; diff --git a/regression-test/data/load/insert/test_insert_nested_array.out b/regression-test/data/load/insert/test_insert_nested_array.out index 4808c9c7f0..e40e3d26c8 100644 --- a/regression-test/data/load/insert/test_insert_nested_array.out +++ b/regression-test/data/load/insert/test_insert_nested_array.out @@ -27,3 +27,29 @@ 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] +-- !select -- +1 [] 1 [] +1 [] 1 [] +1 [] 1 [] +1 [] 1 [] +2 [NULL] 2 [NULL] +2 [NULL] 2 [NULL] +2 [NULL] 2 [NULL] +2 [NULL] 2 [NULL] +3 [[]] 3 [[]] +3 [[]] 3 [[]] +3 [[]] 3 [[]] +3 [[]] 3 [[]] +4 [[NULL]] 4 [[NULL]] +4 [[NULL]] 4 [[NULL]] +4 [[NULL]] 4 [[NULL]] +4 [[NULL]] 4 [[NULL]] +5 [[[]]] 5 [[[]]] +5 [[[]]] 5 [[[]]] +5 [[[]]] 5 [[[]]] +5 [[[]]] 5 [[[]]] +6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] +6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] +6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] +6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] + diff --git a/regression-test/data/query_p0/set_operations/sql/array_nested_with_join.out b/regression-test/data/query_p0/set_operations/sql/array_nested_with_join.out new file mode 100644 index 0000000000..220c1f7bdb --- /dev/null +++ b/regression-test/data/query_p0/set_operations/sql/array_nested_with_join.out @@ -0,0 +1,25 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !array_nested_with_join -- +1 F AIR 1992-01-16 AIR [] +1 F AIR 1992-01-16 AIR [] +1 F AIR 1992-01-16 AIR [] +1 F AIR 1992-01-19 AIR [] +1 F AIR 1992-01-19 AIR [] +1 F AIR 1992-01-19 AIR [] +1 F AIR 1992-01-23 AIR [] +1 F AIR 1992-01-23 AIR [] +1 F AIR 1992-01-23 AIR [] +1 F AIR 1992-01-26 AIR [] + +-- !array_nested_with_join_2 -- +1 F AIR 1992-01-16 AIR [] +1 F AIR 1992-01-16 AIR [] +1 F AIR 1992-01-16 AIR [] +1 F AIR 1992-01-19 AIR [] +1 F AIR 1992-01-19 AIR [] +1 F AIR 1992-01-19 AIR [] +1 F AIR 1992-01-23 AIR [] +1 F AIR 1992-01-23 AIR [] +1 F AIR 1992-01-23 AIR [] +1 F AIR 1992-01-26 AIR [] + diff --git a/regression-test/suites/load/insert/test_insert_nested_array.groovy b/regression-test/suites/load/insert/test_insert_nested_array.groovy index 1052a2e042..e3291d7831 100644 --- a/regression-test/suites/load/insert/test_insert_nested_array.groovy +++ b/regression-test/suites/load/insert/test_insert_nested_array.groovy @@ -74,6 +74,7 @@ suite("test_insert_nested_array", "load") { (6, [[[null]], [[1], [2, 3]], [[4, 5, 6], null, null]]) """ qt_select "select * from ${tableName} order by `key`" + qt_select "select * from ${tableName} as t1 right join ${tableName} as t2 on t1.`key` = t2.`key` order by t1.`key`" } test_nested_array_2_depths.call() diff --git a/regression-test/suites/query_p0/set_operations/load.groovy b/regression-test/suites/query_p0/set_operations/load.groovy index 0086ed87a5..b374ab1e9c 100644 --- a/regression-test/suites/query_p0/set_operations/load.groovy +++ b/regression-test/suites/query_p0/set_operations/load.groovy @@ -28,7 +28,7 @@ suite("load") { for (String table in tables) { sql new File("""${context.file.parent}/ddl/${table}.sql""").text } - + for (String tableName in tables) { streamLoad { // you can skip declare db, because a default db already specify in ${DORIS_HOME}/conf/regression-conf.groovy @@ -59,4 +59,46 @@ suite("load") { } } } -} \ No newline at end of file + + // nested array with join + def test_nested_array_2_depths = { + def tableName = "nested_array_test_2_vectorized" + + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + `key` INT, + value ARRAY> + ) DUPLICATE KEY (`key`) DISTRIBUTED BY HASH (`key`) BUCKETS 1 + PROPERTIES ('replication_num' = '1') + """ + + sql "INSERT INTO ${tableName} VALUES (1, [])" + sql "INSERT INTO ${tableName} VALUES (2, [null])" + sql "INSERT INTO ${tableName} VALUES (3, [[]])" + sql "INSERT INTO ${tableName} VALUES (4, [[1, 2, 3], [4, 5, 6]])" + sql "INSERT INTO ${tableName} VALUES (5, [[1, 2, 3], null, [4, 5, 6]])" + sql "INSERT INTO ${tableName} VALUES (6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]])" + + sql """ + INSERT INTO ${tableName} VALUES + (1, []), + (2, [null]), + (3, [[]]), + (4, [[1, 2, 3], [4, 5, 6]]), + (5, [[1, 2, 3], null, [4, 5, 6]]), + (6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]]) + """ + + sql """ + INSERT INTO ${tableName} VALUES + (1, []), + (2, [null]), + (3, [[]]), + (4, [[1, 2, 3], [4, 5, 6]]), + (5, [[1, 2, 3], null, [4, 5, 6]]), + (6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]]) + """ + } + test_nested_array_2_depths.call() + } diff --git a/regression-test/suites/query_p0/set_operations/sql/array_nested_with_join.sql b/regression-test/suites/query_p0/set_operations/sql/array_nested_with_join.sql new file mode 100644 index 0000000000..d552ca552c --- /dev/null +++ b/regression-test/suites/query_p0/set_operations/sql/array_nested_with_join.sql @@ -0,0 +1,7 @@ +select /*+ SET_VAR(query_timeout = 600) */ ref_52.`key` as k, ref_54.`linestatus` as c0, ref_54.`shipmode` as c1, ref_54.`shipdate` as c2, ref_54.`shipmode` as c3, ref_52.`value` as c4 +from nested_array_test_2_vectorized as ref_52 right join tpch_tiny_lineitem as ref_54 on (ref_52.`key` = ref_54.`linenumber` ) +where ref_52.`value` is not NULL order by 1, 2, 3, 4, 5 limit 10; + +select /*+ SET_VAR(query_timeout = 600) */ ref_52.`key` as k, ref_54.`linestatus` as c0, ref_54.`shipmode` as c1, ref_54.`shipdate` as c2, ref_54.`shipmode` as c3, ref_52.`value` as c4 +from nested_array_test_2_vectorized as ref_52 right join tpch_tiny_lineitem as ref_54 on (ref_52.`key` = ref_54.`linenumber` ) +where ref_52.`value` is not NULL order by 1, 2, 3, 4, 5 limit 10;