diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp index c535392a31..bd35929f85 100644 --- a/be/src/vec/sink/vtablet_sink.cpp +++ b/be/src/vec/sink/vtablet_sink.cpp @@ -37,6 +37,7 @@ #include "util/time.h" #include "util/uid_util.h" #include "vec/columns/column_array.h" +#include "vec/columns/column_struct.h" #include "vec/core/block.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" @@ -846,36 +847,6 @@ Status VOlapTableSink::prepare(RuntimeState* state) { _output_row_desc = _pool->add(new RowDescriptor(_output_tuple_desc, false)); - _max_decimalv2_val.resize(_output_tuple_desc->slots().size()); - _min_decimalv2_val.resize(_output_tuple_desc->slots().size()); - // check if need validate batch - for (int i = 0; i < _output_tuple_desc->slots().size(); ++i) { - auto slot = _output_tuple_desc->slots()[i]; - switch (slot->type().type) { - // For DECIMAL32,DECIMAL64,DECIMAL128, we have done precision and scale conversion so just - // skip data validation here. - case TYPE_DECIMALV2: - _max_decimalv2_val[i].to_max_decimal(slot->type().precision, slot->type().scale); - _min_decimalv2_val[i].to_min_decimal(slot->type().precision, slot->type().scale); - _need_validate_data = true; - break; - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_DATE: - case TYPE_DATETIME: - case TYPE_DATEV2: - case TYPE_DATETIMEV2: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_STRING: - case TYPE_ARRAY: - _need_validate_data = true; - break; - default: - break; - } - } - // add all counter _input_rows_counter = ADD_COUNTER(_profile, "RowsRead", TUnit::UNIT); _output_rows_counter = ADD_COUNTER(_profile, "RowsReturned", TUnit::UNIT); @@ -1272,6 +1243,32 @@ Status VOlapTableSink::close(RuntimeState* state, Status exec_status) { return status; } +template +DecimalV2Value VOlapTableSink::_get_decimalv2_min_or_max(const TypeDescriptor& type) { + std::map, DecimalV2Value>* pmap = nullptr; + if constexpr (is_min) { + pmap = &_min_decimalv2_val; + } else { + pmap = &_max_decimalv2_val; + } + + // found + auto iter = pmap->find({type.precision, type.scale}); + if (iter != pmap->end()) { + return iter->second; + } + + // save min or max DecimalV2Value for next time + DecimalV2Value value; + if constexpr (is_min) { + value.to_min_decimal(type.precision, type.scale); + } else { + value.to_max_decimal(type.precision, type.scale); + } + pmap->emplace(std::pair {type.precision, type.scale}, value); + return value; +} + Status VOlapTableSink::_validate_column(RuntimeState* state, const TypeDescriptor& type, bool is_nullable, vectorized::ColumnPtr column, size_t slot_index, Bitmap* filter_bitmap, @@ -1387,8 +1384,9 @@ Status VOlapTableSink::_validate_column(RuntimeState* state, const TypeDescripto invalid = true; } } - if (dec_val > _max_decimalv2_val[slot_index] || - dec_val < _min_decimalv2_val[slot_index]) { + + if (dec_val > _get_decimalv2_min_or_max(type) || + dec_val < _get_decimalv2_min_or_max(type)) { fmt::format_to(error_msg, "{}", "decimal value is not valid for definition"); fmt::format_to(error_msg, ", value={}", dec_val.to_string()); fmt::format_to(error_msg, ", precision={}, scale={}; ", type.precision, @@ -1425,7 +1423,17 @@ Status VOlapTableSink::_validate_column(RuntimeState* state, const TypeDescripto } break; } - // TODO(xy): add struct type validate + case TYPE_STRUCT: { + const auto column_struct = + assert_cast(real_column_ptr.get()); + DCHECK(type.children.size() == column_struct->tuple_size()); + for (size_t sc = 0; sc < column_struct->tuple_size(); ++sc) { + RETURN_IF_ERROR(_validate_column(state, type.children[sc], type.contains_nulls[sc], + column_struct->get_column_ptr(sc), slot_index, + filter_bitmap, stop_processing, error_prefix)); + } + break; + } default: break; } diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h index cb48057ec5..5200c29f37 100644 --- a/be/src/vec/sink/vtablet_sink.h +++ b/be/src/vec/sink/vtablet_sink.h @@ -427,6 +427,10 @@ private: // set stop_processing if we want to stop the whole process now. Status _validate_data(RuntimeState* state, vectorized::Block* block, Bitmap* filter_bitmap, int* filtered_rows, bool* stop_processing); + + template + DecimalV2Value _get_decimalv2_min_or_max(const TypeDescriptor& type); + Status _validate_column(RuntimeState* state, const TypeDescriptor& type, bool is_nullable, vectorized::ColumnPtr column, size_t slot_index, Bitmap* filter_bitmap, bool* stop_processing, fmt::memory_buffer& error_prefix, @@ -455,8 +459,6 @@ private: TupleDescriptor* _output_tuple_desc = nullptr; RowDescriptor* _output_row_desc = nullptr; - bool _need_validate_data = false; - // number of senders used to insert into OlapTable, if we only support single node insert, // all data from select should collectted and then send to OlapTable. // To support multiple senders, we maintain a channel for each sender. @@ -486,8 +488,8 @@ private: scoped_refptr _sender_thread; std::unique_ptr _send_batch_thread_pool_token; - std::vector _max_decimalv2_val; - std::vector _min_decimalv2_val; + std::map, DecimalV2Value> _max_decimalv2_val; + std::map, DecimalV2Value> _min_decimalv2_val; // Stats for this int64_t _validate_data_ns = 0; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java index 95362a37ab..a06fe9e53e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StructLiteral.java @@ -43,7 +43,7 @@ public class StructLiteral extends LiteralExpr { type = new StructType(); children = new ArrayList<>(); for (LiteralExpr expr : exprs) { - if (!type.supportSubType(expr.getType())) { + if (!expr.getType().isNull() && !type.supportSubType(expr.getType())) { throw new AnalysisException("Invalid element type in STRUCT."); } ((StructType) type).addField(new StructField(expr.getType())); diff --git a/regression-test/data/insert_p0/test_struct_insert.out b/regression-test/data/insert_p0/test_struct_insert.out new file mode 100644 index 0000000000..b0bf805c04 Binary files /dev/null and b/regression-test/data/insert_p0/test_struct_insert.out differ diff --git a/regression-test/suites/insert_p0/test_struct_insert.groovy b/regression-test/suites/insert_p0/test_struct_insert.groovy new file mode 100644 index 0000000000..8696e0552a --- /dev/null +++ b/regression-test/suites/insert_p0/test_struct_insert.groovy @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_struct_insert") { + // define a sql table + def testTable = "tbl_test_struct_insert" + + def create_test_table = {testTablex -> + def result1 = sql """ + CREATE TABLE IF NOT EXISTS ${testTable} ( + `k1` INT(11) NULL, + `k2` STRUCT NULL, + `k3` STRUCT NULL, + `k4` STRUCT NULL, + `k5` STRUCT NOT NULL + ) + DUPLICATE KEY(`k1`) + DISTRIBUTED BY HASH(`k1`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + // DDL/DML return 1 row and 5 column, the only value is update row count + assertTrue(result1.size() == 1) + assertTrue(result1[0].size() == 1) + assertTrue(result1[0][0] == 0, "Create table should update 0 rows") + } + + sql "ADMIN SET FRONTEND CONFIG ('enable_struct_type' = 'true')" + + sql "DROP TABLE IF EXISTS ${testTable}" + create_test_table.call(testTable) + + sql "set enable_insert_strict = true" + + // invalid cases + test { + // k5 is not nullable, can not insert null + sql "insert into ${testTable} values (111,null,null,null,null)" + exception "Insert has filtered data" + } + test { + // size of char type in struct is 10, can not insert string with length more than 10 + sql "insert into ${testTable} values (112,null,null,null,{'1234567890123',null,null})" + exception "Insert has filtered data" + } + test { + // size of varchar type in struct is 10, can not insert string with length more than 10 + sql "insert into ${testTable} values (113,null,null,null,{null,'12345678901234',null})" + exception "Insert has filtered data" + } + test { + // input decimal is invalid + sql "insert into ${testTable} values (114,null,{null,null,1234.1234},null,{null,'',null})" + exception "Insert has filtered data" + } + + // normal cases include nullable and nullable nested fields + sql "INSERT INTO ${testTable} VALUES(1, {1,11,111,1111,11111,11111,111111},null,null,{'','',''})" + sql "INSERT INTO ${testTable} VALUES(2, {null,null,null,null,null,null,null},{2.1,2.22,2.333},null,{null,null,null})" + sql "INSERT INTO ${testTable} VALUES(3, null,{null,null,null},{'2023-02-23','2023-02-23 00:10:19','2023-02-23','2023-02-23 00:10:19'},{'','',''})" + sql "INSERT INTO ${testTable} VALUES(4, null,null,{null,null,null,null},{'abc','def','hij'})" + + // select the table and check whether the data is correct + qt_select "select * from ${testTable} order by k1" +} \ No newline at end of file