[Bug](ColumnArray) Fix array column replicate replicate_offsets not matched (#17616)

the input replicate_offsets should be the same size as ColumnArray's offset.
```
IColumn::Offsets replicate_offsets(get_offsets().size(), 0);
// |---------------------|-------------------------|-------------------------|
// [0, begin)             [begin, begin + count_sz)  [begin + count_sz, size())
//  do not need to copy    copy counts[n] times       do not need to copy
```

we should
This commit is contained in:
lihangyu
2023-03-10 11:52:22 +08:00
committed by GitHub
parent 1a549edac2
commit a79b8ede88
6 changed files with 117 additions and 6 deletions

View File

@ -742,14 +742,22 @@ void ColumnArray::replicate(const uint32_t* counts, size_t target_size, IColumn&
if (col_size == 0) {
return;
}
IColumn::Offsets replicate_offsets(col_size);
// |---------------------|-------------------------|-------------------------|
// [0, begin) [begin, begin + count_sz) [begin + count_sz, size())
// do not need to copy copy counts[n] times do not need to copy
IColumn::Offsets replicate_offsets(get_offsets().size(), 0);
size_t cur_offset = 0;
size_t end = begin + col_size;
// copy original data at offset n counts[n] times
for (size_t i = begin; i < end; ++i) {
cur_offset += counts[i];
replicate_offsets[i - begin] = cur_offset;
replicate_offsets[i] = cur_offset;
}
// ignored
for (size_t i = end; i < size(); ++i) {
replicate_offsets[i] = replicate_offsets[i - 1];
}
if (cur_offset != target_size) {
LOG(WARNING) << "ColumnArray replicate input target_size:" << target_size
<< " not equal SUM(counts):" << cur_offset;
@ -941,7 +949,9 @@ ColumnPtr ColumnArray::replicate_generic(const IColumn::Offsets& replicate_offse
size_t size_to_replicate = replicate_offsets[i] - prev_offset;
prev_offset = replicate_offsets[i];
for (size_t j = 0; j < size_to_replicate; ++j) res_concrete.insert_from(*this, i);
for (size_t j = 0; j < size_to_replicate; ++j) {
res_concrete.insert_from(*this, i);
}
}
return res;

View File

@ -27,3 +27,29 @@
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
-- !select --
1 [] 1 []
1 [] 1 []
1 [] 1 []
1 [] 1 []
2 [NULL] 2 [NULL]
2 [NULL] 2 [NULL]
2 [NULL] 2 [NULL]
2 [NULL] 2 [NULL]
3 [[]] 3 [[]]
3 [[]] 3 [[]]
3 [[]] 3 [[]]
3 [[]] 3 [[]]
4 [[NULL]] 4 [[NULL]]
4 [[NULL]] 4 [[NULL]]
4 [[NULL]] 4 [[NULL]]
4 [[NULL]] 4 [[NULL]]
5 [[[]]] 5 [[[]]]
5 [[[]]] 5 [[[]]]
5 [[[]]] 5 [[[]]]
5 [[[]]] 5 [[[]]]
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]
6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] 6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]]

View File

@ -0,0 +1,25 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !array_nested_with_join --
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-26 AIR []
-- !array_nested_with_join_2 --
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-16 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-19 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-23 AIR []
1 F AIR 1992-01-26 AIR []

View File

@ -74,6 +74,7 @@ suite("test_insert_nested_array", "load") {
(6, [[[null]], [[1], [2, 3]], [[4, 5, 6], null, null]])
"""
qt_select "select * from ${tableName} order by `key`"
qt_select "select * from ${tableName} as t1 right join ${tableName} as t2 on t1.`key` = t2.`key` order by t1.`key`"
}
test_nested_array_2_depths.call()

View File

@ -28,7 +28,7 @@ suite("load") {
for (String table in tables) {
sql new File("""${context.file.parent}/ddl/${table}.sql""").text
}
for (String tableName in tables) {
streamLoad {
// you can skip declare db, because a default db already specify in ${DORIS_HOME}/conf/regression-conf.groovy
@ -59,4 +59,46 @@ suite("load") {
}
}
}
}
// nested array with join
def test_nested_array_2_depths = {
def tableName = "nested_array_test_2_vectorized"
sql "DROP TABLE IF EXISTS ${tableName}"
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`key` INT,
value ARRAY<ARRAY<INT>>
) DUPLICATE KEY (`key`) DISTRIBUTED BY HASH (`key`) BUCKETS 1
PROPERTIES ('replication_num' = '1')
"""
sql "INSERT INTO ${tableName} VALUES (1, [])"
sql "INSERT INTO ${tableName} VALUES (2, [null])"
sql "INSERT INTO ${tableName} VALUES (3, [[]])"
sql "INSERT INTO ${tableName} VALUES (4, [[1, 2, 3], [4, 5, 6]])"
sql "INSERT INTO ${tableName} VALUES (5, [[1, 2, 3], null, [4, 5, 6]])"
sql "INSERT INTO ${tableName} VALUES (6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]])"
sql """
INSERT INTO ${tableName} VALUES
(1, []),
(2, [null]),
(3, [[]]),
(4, [[1, 2, 3], [4, 5, 6]]),
(5, [[1, 2, 3], null, [4, 5, 6]]),
(6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]])
"""
sql """
INSERT INTO ${tableName} VALUES
(1, []),
(2, [null]),
(3, [[]]),
(4, [[1, 2, 3], [4, 5, 6]]),
(5, [[1, 2, 3], null, [4, 5, 6]]),
(6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]])
"""
}
test_nested_array_2_depths.call()
}

View File

@ -0,0 +1,7 @@
select /*+ SET_VAR(query_timeout = 600) */ ref_52.`key` as k, ref_54.`linestatus` as c0, ref_54.`shipmode` as c1, ref_54.`shipdate` as c2, ref_54.`shipmode` as c3, ref_52.`value` as c4
from nested_array_test_2_vectorized as ref_52 right join tpch_tiny_lineitem as ref_54 on (ref_52.`key` = ref_54.`linenumber` )
where ref_52.`value` is not NULL order by 1, 2, 3, 4, 5 limit 10;
select /*+ SET_VAR(query_timeout = 600) */ ref_52.`key` as k, ref_54.`linestatus` as c0, ref_54.`shipmode` as c1, ref_54.`shipdate` as c2, ref_54.`shipmode` as c3, ref_52.`value` as c4
from nested_array_test_2_vectorized as ref_52 right join tpch_tiny_lineitem as ref_54 on (ref_52.`key` = ref_54.`linenumber` )
where ref_52.`value` is not NULL order by 1, 2, 3, 4, 5 limit 10;