From 8bfb89c10093bb0ef6dec79fa2f6d87c4d96ba75 Mon Sep 17 00:00:00 2001 From: Adonis Ling Date: Mon, 5 Sep 2022 14:05:24 +0800 Subject: [PATCH] [feature-wip](array-type) Add some regression tests for nested array (#12322) #11392 made _input_block in each BetaRowsetReaders sharable. However, for some types (e.g. nested array with more than 1 depth), the _column_vector_batches in RowBlockV2 can be nested which means that there is a ColumnVectorBatch inside another ColumnVectorBatch. In this case, the data of inner ColumnVectorBatch may be corrupted because the data of _input_block is copied shallowly to the _output_block. --- be/src/olap/rowset/beta_rowset_reader.cpp | 12 ++- .../apache/doris/analysis/ArrayLiteral.java | 2 +- .../java/org/apache/doris/catalog/Type.java | 4 + .../load/insert/test_insert_nested_array.out | 57 ++++++++++ .../insert/test_insert_nested_array.groovy | 100 ++++++++++++++++++ 5 files changed, 173 insertions(+), 2 deletions(-) create mode 100644 regression-test/data/load/insert/test_insert_nested_array.out create mode 100644 regression-test/suites/load/insert/test_insert_nested_array.groovy diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index cb9160f5cc..17223c9eb6 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -203,8 +203,18 @@ Status BetaRowsetReader::init(RowsetReaderContext* read_context) { } _iterator.reset(final_iterator); + // The data in _input_block will be copied shallowly to _output_block. + // Therefore, for nestable fields, the _input_block can't be shared. + bool has_nestable_fields = false; + for (const auto* field : _input_schema->columns()) { + if (field != nullptr && field->get_sub_field_count() > 0) { + has_nestable_fields = true; + break; + } + } + // init input block - if (can_reuse_schema) { + if (can_reuse_schema && !has_nestable_fields) { if (read_context->reuse_block == nullptr) { read_context->reuse_block.reset( new RowBlockV2(*_input_schema, std::min(1024, read_context->batch_size))); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java index cf2162c687..f6df3c83a2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java @@ -53,7 +53,7 @@ public class ArrayLiteral extends LiteralExpr { } } - if (itemType == Type.NULL || itemType == Type.INVALID) { + if (itemType == Type.INVALID) { throw new AnalysisException("Invalid element type in ARRAY"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index a3d458948d..257724038c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -508,6 +508,10 @@ public abstract class Type { } return new ArrayType(itemCompatibleType, arrayType1.getContainsNull() || arrayType2.getContainsNull()); + } else if (t1.isArrayType() && t2.isNull()) { + return t1; + } else if (t1.isNull() && t2.isArrayType()) { + return t2; } return ScalarType.INVALID; diff --git a/regression-test/data/load/insert/test_insert_nested_array.out b/regression-test/data/load/insert/test_insert_nested_array.out new file mode 100644 index 0000000000..9dc4355cfb --- /dev/null +++ b/regression-test/data/load/insert/test_insert_nested_array.out @@ -0,0 +1,57 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 [] +1 [] +2 [NULL] +2 [NULL] +3 [[]] +3 [[]] +4 [[1, 2, 3], [4, 5, 6]] +4 [[1, 2, 3], [4, 5, 6]] +5 [[1, 2, 3], NULL, [4, 5, 6]] +5 [[1, 2, 3], NULL, [4, 5, 6]] +6 [[1, 2, NULL], NULL, [4, NULL, 6], NULL, [NULL, 8, 9]] +6 [[1, 2, NULL], NULL, [4, NULL, 6], NULL, [NULL, 8, 9]] + +-- !select -- +1 [] +1 [] +2 [NULL] +2 [NULL] +3 [[]] +3 [[]] +4 [[1, 2, 3], [4, 5, 6]] +4 [[1, 2, 3], [4, 5, 6]] +5 [[1, 2, 3], NULL, [4, 5, 6]] +5 [[1, 2, 3], NULL, [4, 5, 6]] +6 [[1, 2, NULL], NULL, [4, NULL, 6], NULL, [NULL, 8, 9]] +6 [[1, 2, NULL], NULL, [4, NULL, 6], NULL, [NULL, 8, 9]] + +-- !select -- +1 [] +1 [] +2 [NULL] +2 [NULL] +3 [[]] +3 [[]] +4 [[NULL]] +4 [[NULL]] +5 [[[]]] +5 [[[]]] +6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] +6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] + +-- !select -- +1 [] +1 [] +2 [NULL] +2 [NULL] +3 [[]] +3 [[]] +4 [[NULL]] +4 [[NULL]] +5 [[[]]] +5 [[[]]] +6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] +6 [[[NULL]], [[1], [2, 3]], [[4, 5, 6], NULL, NULL]] + diff --git a/regression-test/suites/load/insert/test_insert_nested_array.groovy b/regression-test/suites/load/insert/test_insert_nested_array.groovy new file mode 100644 index 0000000000..5f7c04f323 --- /dev/null +++ b/regression-test/suites/load/insert/test_insert_nested_array.groovy @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_insert_nested_array", "load") { + def test_nested_array_2_depths = { enable_vectorized -> + sql "ADMIN SET FRONTEND CONFIG ('enable_array_type' = 'true')" + sql "set enable_vectorized_engine = ${enable_vectorized}" + + def tableName + if (enable_vectorized) { + tableName = "nested_array_test_2_vectorized" + } else { + tableName = "nested_array_test_2_non_vectorized" + } + + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE ${tableName} ( + `key` INT, + value ARRAY> + ) DUPLICATE KEY (`key`) DISTRIBUTED BY HASH (`key`) BUCKETS 1 + PROPERTIES ('replication_num' = '1') + """ + + sql "INSERT INTO ${tableName} VALUES (1, [])" + sql "INSERT INTO ${tableName} VALUES (2, [null])" + sql "INSERT INTO ${tableName} VALUES (3, [[]])" + sql "INSERT INTO ${tableName} VALUES (4, [[1, 2, 3], [4, 5, 6]])" + sql "INSERT INTO ${tableName} VALUES (5, [[1, 2, 3], null, [4, 5, 6]])" + sql "INSERT INTO ${tableName} VALUES (6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]])" + sql """ + INSERT INTO ${tableName} VALUES + (1, []), + (2, [null]), + (3, [[]]), + (4, [[1, 2, 3], [4, 5, 6]]), + (5, [[1, 2, 3], null, [4, 5, 6]]), + (6, [[1, 2, null], null, [4, null, 6], null, [null, 8, 9]]) + """ + qt_select "select * from ${tableName} order by `key`" + } + + def test_nested_array_3_depths = { enable_vectorized -> + sql "ADMIN SET FRONTEND CONFIG ('enable_array_type' = 'true')" + sql "set enable_vectorized_engine = ${enable_vectorized}" + + def tableName + if (enable_vectorized) { + tableName = "nested_array_test_3_vectorized" + } else { + tableName = "nested_array_test_3_non_vectorized" + } + + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE ${tableName} ( + `key` INT, + value ARRAY>> + ) DUPLICATE KEY (`key`) DISTRIBUTED BY HASH (`key`) BUCKETS 1 + PROPERTIES ('replication_num' = '1') + """ + + sql "INSERT INTO ${tableName} VALUES (1, [])" + sql "INSERT INTO ${tableName} VALUES (2, [null])" + sql "INSERT INTO ${tableName} VALUES (3, [[]])" + sql "INSERT INTO ${tableName} VALUES (4, [[null]])" + sql "INSERT INTO ${tableName} VALUES (5, [[[]]])" + sql "INSERT INTO ${tableName} VALUES (6, [[[null]], [[1], [2, 3]], [[4, 5, 6], null, null]])" + sql """ + INSERT INTO ${tableName} VALUES + (1, []), + (2, [null]), + (3, [[]]), + (4, [[null]]), + (5, [[[]]]), + (6, [[[null]], [[1], [2, 3]], [[4, 5, 6], null, null]]) + """ + qt_select "select * from ${tableName} order by `key`" + } + + test_nested_array_2_depths.call(false) + test_nested_array_2_depths.call(true) + + test_nested_array_3_depths.call(false) + test_nested_array_3_depths.call(true) +}