From ed3ff470ce3edc5015f20ea1cb9ee412cdc6869b Mon Sep 17 00:00:00 2001 From: Zhengguo Yang Date: Tue, 13 Jul 2021 14:02:39 +0800 Subject: [PATCH] [ARRAY] Support array type load and select not include access by index (#5980) This is part of the array type support and has not been fully completed. The following functions are implemented 1. fe array type support and implementation of array function, support array syntax analysis and planning 2. Support import array type data through insert into 3. Support select array type data 4. Only the array type is supported on the value lie of the duplicate table this pr merge some code from #4655 #4650 #4644 #4643 #4623 #2979 --- be/CMakeLists.txt | 2 +- be/src/common/daemon.cpp | 2 + be/src/common/object_pool.h | 11 +- be/src/exec/olap_scan_node.cpp | 4 + be/src/exec/olap_scan_node.h | 2 + be/src/exec/olap_scanner.cpp | 44 +++ be/src/exec/olap_scanner.h | 1 + be/src/exec/tablet_info.cpp | 10 +- be/src/exec/tablet_sink.cpp | 1 + be/src/exprs/CMakeLists.txt | 1 + be/src/exprs/anyval_util.cpp | 10 + be/src/exprs/anyval_util.h | 10 + be/src/exprs/array_functions.cpp | 44 +++ be/src/exprs/array_functions.h | 38 +++ be/src/exprs/expr.cpp | 12 + be/src/exprs/expr.h | 2 + be/src/exprs/expr_context.cpp | 15 +- be/src/exprs/expr_context.h | 2 +- be/src/exprs/expr_ir.cpp | 3 + be/src/exprs/expr_value.h | 5 +- be/src/exprs/literal.cpp | 31 ++ be/src/exprs/literal.h | 4 + be/src/exprs/null_literal.cpp | 3 + be/src/exprs/null_literal.h | 1 + be/src/exprs/scalar_fn_call.cpp | 13 + be/src/exprs/scalar_fn_call.h | 2 +- be/src/exprs/slot_ref.cpp | 12 + be/src/exprs/slot_ref.h | 2 +- be/src/olap/aggregate_func.cpp | 74 ++++- be/src/olap/aggregate_func.h | 28 +- be/src/olap/collection.h | 61 ---- be/src/olap/column_vector.cpp | 32 ++- be/src/olap/column_vector.h | 30 +- be/src/olap/field.h | 43 ++- be/src/olap/olap_define.h | 4 + be/src/olap/row_cursor.cpp | 4 +- be/src/olap/rowset/beta_rowset_reader.h | 2 +- .../olap/rowset/segment_v2/column_reader.cpp | 22 +- .../olap/rowset/segment_v2/column_writer.cpp | 82 ++++-- be/src/olap/rowset/segment_v2/column_writer.h | 39 ++- .../olap/rowset/segment_v2/encoding_info.cpp | 3 +- be/src/olap/tablet_meta.cpp | 88 +++--- be/src/olap/tablet_meta.h | 5 +- be/src/olap/tablet_schema.cpp | 14 +- be/src/olap/tablet_schema.h | 1 + be/src/olap/types.cpp | 10 +- be/src/olap/types.h | 137 ++++----- be/src/runtime/CMakeLists.txt | 1 + be/src/runtime/collection_value.cpp | 270 ++++++++++++++++++ be/src/runtime/collection_value.h | 159 +++++++++++ be/src/runtime/datetime_value.h | 4 +- be/src/runtime/descriptors.cpp | 4 +- be/src/runtime/descriptors.h | 11 + be/src/runtime/mysql_result_writer.cpp | 249 +++++++++------- be/src/runtime/mysql_result_writer.h | 1 + be/src/runtime/primitive_type.cpp | 55 ++++ be/src/runtime/primitive_type.h | 47 +-- be/src/runtime/raw_value.cpp | 42 +++ be/src/runtime/result_writer.h | 1 + be/src/runtime/row_batch.cpp | 106 ++++++- be/src/runtime/tuple.cpp | 115 ++++++++ be/src/runtime/tuple.h | 12 + be/src/runtime/types.cpp | 25 +- be/src/runtime/types.h | 4 + be/src/udf/udf.h | 29 +- be/src/util/mysql_row_buffer.cpp | 101 ++++--- be/src/util/mysql_row_buffer.h | 65 ++++- be/test/exec/CMakeLists.txt | 2 +- be/test/exprs/CMakeLists.txt | 2 + be/test/exprs/array_functions_test.cpp | 82 ++++++ be/test/olap/column_vector_test.cpp | 34 ++- .../segment_v2/column_reader_writer_test.cpp | 22 +- be/test/olap/storage_types_test.cpp | 34 +-- be/test/runtime/CMakeLists.txt | 1 + be/test/runtime/collection_value_test.cpp | 97 +++++++ be/test/util/CMakeLists.txt | 1 + be/test/util/mysql_row_buffer_test.cpp | 127 ++++++++ fe/fe-core/AlterRoutineLoadOperationLogTest | Bin 0 -> 478 bytes fe/fe-core/diskInfoTest | Bin 0 -> 158 bytes fe/fe-core/src/main/cup/sql_parser.cup | 73 ++++- .../apache/doris/analysis/ArrayLiteral.java | 116 ++++++++ .../org/apache/doris/analysis/ColumnDef.java | 26 +- .../doris/analysis/CreateTableStmt.java | 16 ++ .../java/org/apache/doris/analysis/Expr.java | 7 +- .../doris/analysis/ExpressionFunctions.java | 46 ++- .../doris/analysis/FunctionCallExpr.java | 37 ++- .../doris/analysis/RangePartitionDesc.java | 5 +- .../org/apache/doris/analysis/Subquery.java | 6 +- .../org/apache/doris/analysis/TypeDef.java | 58 +++- .../org/apache/doris/catalog/ArrayType.java | 89 +++++- .../java/org/apache/doris/catalog/Column.java | 82 +++++- .../org/apache/doris/catalog/ColumnType.java | 42 ++- .../org/apache/doris/catalog/Function.java | 4 + .../org/apache/doris/catalog/MapType.java | 10 +- .../apache/doris/catalog/MultiRowType.java | 79 +++++ .../apache/doris/catalog/PrimitiveType.java | 15 + .../org/apache/doris/catalog/ScalarType.java | 5 + .../org/apache/doris/catalog/StructType.java | 4 + .../java/org/apache/doris/catalog/Type.java | 42 ++- .../java/org/apache/doris/common/Config.java | 5 + .../org/apache/doris/common/util/Util.java | 1 + .../apache/doris/persist/gson/GsonUtils.java | 8 +- .../apache/doris/rewrite/FEFunctionList.java | 29 ++ .../org/apache/doris/rewrite/FEFunctions.java | 85 ++---- fe/fe-core/src/main/jflex/sql_scanner.flex | 3 + .../apache/doris/rewrite/FEFunctionsTest.java | 49 ---- fe/pom.xml | 2 +- gensrc/proto/olap_file.proto | 1 + gensrc/proto/segment_v2.proto | 4 +- gensrc/script/doris_builtins_functions.py | 18 +- gensrc/script/gen_functions.py | 23 +- gensrc/script/gen_vector_functions.py | 7 +- gensrc/thrift/AgentService.thrift | 1 + gensrc/thrift/Exprs.thrift | 1 + gensrc/thrift/Types.thrift | 5 +- 115 files changed, 2919 insertions(+), 754 deletions(-) create mode 100644 be/src/exprs/array_functions.cpp create mode 100644 be/src/exprs/array_functions.h delete mode 100644 be/src/olap/collection.h create mode 100644 be/src/runtime/collection_value.cpp create mode 100644 be/src/runtime/collection_value.h create mode 100644 be/test/exprs/array_functions_test.cpp create mode 100644 be/test/runtime/collection_value_test.cpp create mode 100644 be/test/util/mysql_row_buffer_test.cpp create mode 100644 fe/fe-core/AlterRoutineLoadOperationLogTest create mode 100644 fe/fe-core/diskInfoTest create mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/catalog/MultiRowType.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctionList.java diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 30148ca7c3..38ef7f9f18 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -369,7 +369,7 @@ set(CXX_GCC_FLAGS "${CXX_GCC_FLAGS} -g -Wno-unused-local-typedefs") # Debug information is stored as dwarf2 to be as compatible as possible # -Werror: compile warnings should be errors when using the toolchain compiler. # Only enable for debug builds because this is what we test in pre-commit tests. -set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -ggdb3 -O0 -gdwarf-2") +set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -ggdb -O0") # For CMAKE_BUILD_TYPE=Release # -O3: Enable all compiler optimizations diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index cabaedb2f2..ac4e645720 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -22,6 +22,7 @@ #include #include "common/config.h" +#include "exprs/array_functions.h" #include "exprs/bitmap_function.h" #include "exprs/cast_functions.h" #include "exprs/compound_predicate.h" @@ -244,6 +245,7 @@ void Daemon::init(int argc, char** argv, const std::vector& paths) { IsNullPredicate::init(); LikePredicate::init(); StringFunctions::init(); + ArrayFunctions::init(); CastFunctions::init(); InPredicate::init(); MathFunctions::init(); diff --git a/be/src/common/object_pool.h b/be/src/common/object_pool.h index db82bdf79e..b2eb4e3024 100644 --- a/be/src/common/object_pool.h +++ b/be/src/common/object_pool.h @@ -42,6 +42,13 @@ public: return t; } + template + T* add_array(T* t) { + std::lock_guard l(_lock); + _objects.emplace_back(Element{t, [](void* obj) { delete[] reinterpret_cast(obj); }}); + return t; + } + void clear() { std::lock_guard l(_lock); for (Element& elem : _objects) elem.delete_fn(elem.obj); @@ -57,14 +64,14 @@ private: DISALLOW_COPY_AND_ASSIGN(ObjectPool); /// A generic deletion function pointer. Deletes its first argument. - using DeleteFn = void (*)(void*); + using DeleteFn = void (*)(void*); /// For each object, a pointer to the object and a function that deletes it. struct Element { void* obj; DeleteFn delete_fn; }; - + std::vector _objects; SpinLock _lock; }; diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 92b31b85d5..3da65b489a 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -182,6 +182,10 @@ Status OlapScanNode::prepare(RuntimeState* state) { continue; } + if (slots[i]->type().is_collection_type()) { + _collection_slots.push_back(slots[i]); + } + if (!slots[i]->type().is_string_type()) { continue; } diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h index 4d288f5f35..8e59e23e93 100644 --- a/be/src/exec/olap_scan_node.h +++ b/be/src/exec/olap_scan_node.h @@ -209,6 +209,8 @@ private: // conjunct's index which already be push down storage engine // should be remove in olap_scan_node, no need check this conjunct again std::set _pushed_conjuncts_index; + // collection slots + std::vector _collection_slots; bool _eos; diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 84a43683cb..476f032ca1 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -43,6 +43,7 @@ OlapScanner::OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool _tuple_desc(parent->_tuple_desc), _profile(parent->runtime_profile()), _string_slots(parent->_string_slots), + _collection_slots(parent->_collection_slots), _id(-1), _is_open(false), _aggregation(aggregation), @@ -340,6 +341,43 @@ Status OlapScanner::get_batch(RuntimeState* state, RowBatch* batch, bool* eof) { } } + // Copy collection slot + for (auto desc : _collection_slots) { + CollectionValue* slot = tuple->get_collection_slot(desc->tuple_offset()); + + TypeDescriptor item_type = desc->type().children.at(0); + size_t item_size = item_type.get_slot_size() * slot->length(); + + size_t nulls_size = slot->length(); + uint8_t* data = batch->tuple_data_pool()->allocate(item_size + nulls_size); + + // copy null_signs + memory_copy(data, slot->null_signs(), nulls_size); + memory_copy(data + nulls_size, slot->data(), item_size); + + slot->set_null_signs(reinterpret_cast(data)); + slot->set_data(reinterpret_cast(data + nulls_size)); + + if (!item_type.is_string_type()) { + continue; + } + + // when string type, copy every item + for (int i = 0; i < slot->length(); ++i) { + int item_offset = nulls_size + i * item_type.get_slot_size(); + if (slot->is_null_at(i)) { + continue; + } + StringValue* dst_item_v = + reinterpret_cast(data + item_offset); + if (dst_item_v->len != 0) { + char* string_copy = reinterpret_cast( + batch->tuple_data_pool()->allocate(dst_item_v->len)); + memory_copy(string_copy, dst_item_v->ptr, dst_item_v->len); + dst_item_v->ptr = string_copy; + } + } + } // the memory allocate by mem pool has been copied, // so we should release these memory immediately mem_pool->clear(); @@ -442,6 +480,12 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) { } break; } + case TYPE_ARRAY: { + CollectionValue* array_v = reinterpret_cast(ptr); + CollectionValue* slot = tuple->get_collection_slot(slot_desc->tuple_offset()); + slot->shallow_copy(array_v); + break; + } default: { void* slot = tuple->get_slot(slot_desc->tuple_offset()); memory_copy(slot, ptr, len); diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h index ccfca3e21a..6dbd2fdbd2 100644 --- a/be/src/exec/olap_scanner.h +++ b/be/src/exec/olap_scanner.h @@ -111,6 +111,7 @@ private: const TupleDescriptor* _tuple_desc; /**< tuple descriptor */ RuntimeProfile* _profile; const std::vector& _string_slots; + const std::vector& _collection_slots; std::vector _conjunct_ctxs; // to record which runtime filters have been used diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index f73e308d52..9b04f061ce 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -39,11 +39,13 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { _version = pschema.version(); std::map slots_map; _tuple_desc = _obj_pool.add(new TupleDescriptor(pschema.tuple_desc())); + for (auto& p_slot_desc : pschema.slot_descs()) { auto slot_desc = _obj_pool.add(new SlotDescriptor(p_slot_desc)); _tuple_desc->add_slot(slot_desc); slots_map.emplace(slot_desc->col_name(), slot_desc); } + for (auto& p_index : pschema.indexes()) { auto index = _obj_pool.add(new OlapTableIndexSchema()); index->index_id = p_index.id(); @@ -78,6 +80,7 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { _tuple_desc->add_slot(slot_desc); slots_map.emplace(slot_desc->col_name(), slot_desc); } + for (auto& t_index : tschema.indexes) { auto index = _obj_pool.add(new OlapTableIndexSchema()); index->index_id = t_index.id; @@ -133,10 +136,8 @@ std::string OlapTablePartition::debug_string(TupleDescriptor* tuple_desc) const } in_keys_ss << "]"; ss << "(id=" << id << ",start_key=" << Tuple::to_string(start_key, *tuple_desc) - << ",end_key=" << Tuple::to_string(end_key, *tuple_desc) - << ",in_key=" << in_keys_ss.str() - << ",num_buckets=" << num_buckets - << ",indexes=["; + << ",end_key=" << Tuple::to_string(end_key, *tuple_desc) << ",in_key=" << in_keys_ss.str() + << ",num_buckets=" << num_buckets << ",indexes=["; idx = 0; for (auto& index : indexes) { if (idx++ > 0) { @@ -274,7 +275,6 @@ bool OlapTablePartitionParam::find_tablet(Tuple* tuple, const OlapTablePartition it = _partitions_map->find(tuple); } else { it = _partitions_map->upper_bound(tuple); - } if (it == _partitions_map->end()) { return false; diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index 1b04e02bb7..d7d24d1d96 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -241,6 +241,7 @@ Status NodeChannel::add_row(Tuple* input_tuple, int64_t tablet_id) { } DCHECK_NE(row_no, RowBatch::INVALID_ROW_INDEX); auto tuple = input_tuple->deep_copy(*_tuple_desc, _cur_batch->tuple_data_pool()); + _cur_batch->get_row(row_no)->set_tuple(0, tuple); _cur_batch->commit_last_row(); _cur_add_batch_request.add_tablet_ids(tablet_id); diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt index dcb397ffdf..421cfc2cfc 100644 --- a/be/src/exprs/CMakeLists.txt +++ b/be/src/exprs/CMakeLists.txt @@ -56,6 +56,7 @@ add_library(Exprs scalar_fn_call.cpp slot_ref.cpp string_functions.cpp + array_functions.cpp timestamp_functions.cpp tuple_is_null_predicate.cpp udf_builtins.cpp diff --git a/be/src/exprs/anyval_util.cpp b/be/src/exprs/anyval_util.cpp index baff010e18..c61a9aa55e 100644 --- a/be/src/exprs/anyval_util.cpp +++ b/be/src/exprs/anyval_util.cpp @@ -90,6 +90,10 @@ AnyVal* create_any_val(ObjectPool* pool, const TypeDescriptor& type) { case TYPE_DATETIME: return pool->add(new DateTimeVal); + + case TYPE_ARRAY: + return pool->add(new CollectionVal); + default: DCHECK(false) << "Unsupported type: " << type.type; return NULL; @@ -152,6 +156,12 @@ FunctionContext::TypeDesc AnyValUtil::column_type_to_type_desc(const TypeDescrip case TYPE_NULL: out.type = FunctionContext::TYPE_NULL; break; + case TYPE_ARRAY: + out.type = FunctionContext::TYPE_ARRAY; + for (const auto& t : type.children) { + out.children.push_back(column_type_to_type_desc(t)); + } + break; default: DCHECK(false) << "Unknown type: " << type; } diff --git a/be/src/exprs/anyval_util.h b/be/src/exprs/anyval_util.h index daa1a0f514..477553939d 100644 --- a/be/src/exprs/anyval_util.h +++ b/be/src/exprs/anyval_util.h @@ -20,6 +20,7 @@ #include "common/status.h" #include "exprs/expr.h" +#include "runtime/collection_value.h" #include "runtime/primitive_type.h" #include "udf/udf.h" #include "util/hash_util.hpp" @@ -207,6 +208,9 @@ public: case TYPE_DECIMALV2: return sizeof(doris_udf::DecimalV2Val); + case TYPE_ARRAY: + return sizeof(doris_udf::CollectionVal); + default: DCHECK(false) << t; return 0; @@ -242,6 +246,8 @@ public: return alignof(DateTimeVal); case TYPE_DECIMALV2: return alignof(DecimalV2Val); + case TYPE_ARRAY: + return alignof(doris_udf::CollectionVal); default: DCHECK(false) << t; return 0; @@ -345,6 +351,10 @@ public: reinterpret_cast(slot)->to_datetime_val( reinterpret_cast(dst)); return; + case TYPE_ARRAY: + reinterpret_cast(slot)->to_collection_val( + reinterpret_cast(dst)); + return; default: DCHECK(false) << "NYI"; } diff --git a/be/src/exprs/array_functions.cpp b/be/src/exprs/array_functions.cpp new file mode 100644 index 0000000000..b9b7fcf8a4 --- /dev/null +++ b/be/src/exprs/array_functions.cpp @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/array_functions.h" + +#include "common/logging.h" +#include "runtime/collection_value.h" + +namespace doris { + +void ArrayFunctions::init() {} + +#define ARRAY_FUNCTION(TYPE, PRIMARY_TYPE) \ + CollectionVal ArrayFunctions::array(FunctionContext* context, int num_children, \ + const TYPE* values) { \ + DCHECK_EQ(context->get_return_type().children.size(), 1); \ + CollectionValue v; \ + CollectionValue::init_collection(context, num_children, PRIMARY_TYPE, &v); \ + for (int i = 0; i < num_children; ++i) { \ + v.set(i, PRIMARY_TYPE, values + i); \ + } \ + CollectionVal ret; \ + v.to_collection_val(&ret); \ + return ret; \ + } + +ARRAY_FUNCTION(IntVal, TYPE_INT); +ARRAY_FUNCTION(StringVal, TYPE_VARCHAR); + +} // namespace doris diff --git a/be/src/exprs/array_functions.h b/be/src/exprs/array_functions.h new file mode 100644 index 0000000000..d0a32f0e91 --- /dev/null +++ b/be/src/exprs/array_functions.h @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_QUERY_EXPRS_COLLECTION_FUNCTIONS_H +#define DORIS_BE_SRC_QUERY_EXPRS_COLLECTION_FUNCTIONS_H + +#include "anyval_util.h" + +namespace doris { + +class ArrayFunctions { +public: + static void init(); + + /** + * array construct functions, create array with the children values + */ + static CollectionVal array(FunctionContext* context, int num_children, const IntVal* values); + + static CollectionVal array(FunctionContext* context, int num_children, const StringVal* values); +}; +} // namespace doris + +#endif diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index 480aac4e89..e4f88eb450 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -312,6 +312,9 @@ Status Expr::create_expr(ObjectPool* pool, const TExprNode& texpr_node, Expr** e case TExprNodeType::STRING_LITERAL: *expr = pool->add(new Literal(texpr_node)); return Status::OK(); + case TExprNodeType::ARRAY_LITERAL: + *expr = pool->add(new Literal(texpr_node)); + return Status::OK(); case TExprNodeType::COMPOUND_PRED: switch (texpr_node.opcode) { case TExprOpcode::COMPOUND_AND: @@ -719,6 +722,10 @@ doris_udf::AnyVal* Expr::get_const_val(ExprContext* context) { _constant_val.reset(new AnyVal(true)); break; } + case TYPE_ARRAY: { + _constant_val.reset(new CollectionVal(get_array_val(context, NULL))); + break; + } default: DCHECK(false) << "Type not implemented: " << type(); } @@ -797,6 +804,11 @@ DecimalV2Val Expr::get_decimalv2_val(ExprContext* context, TupleRow* row) { return val; } +CollectionVal Expr::get_array_val(ExprContext* context, TupleRow* row) { + CollectionVal val; + return val; +} + Status Expr::get_fn_context_error(ExprContext* ctx) { if (_fn_context_index != -1) { FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index d3bc711cc3..5bc3c50a6b 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -112,6 +112,7 @@ public: // virtual ArrayVal GetArrayVal(ExprContext* context, TupleRow*); virtual DateTimeVal get_datetime_val(ExprContext* context, TupleRow*); virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*); + virtual CollectionVal get_array_val(ExprContext* context, TupleRow*); // Get the number of digits after the decimal that should be displayed for this // value. Returns -1 if no scale has been specified (currently the scale is only set for @@ -429,6 +430,7 @@ private: static DoubleVal get_double_val(Expr* expr, ExprContext* context, TupleRow* row); static StringVal get_string_val(Expr* expr, ExprContext* context, TupleRow* row); static DateTimeVal get_datetime_val(Expr* expr, ExprContext* context, TupleRow* row); + static CollectionVal get_array_val(Expr* expr, ExprContext* context, TupleRow* row); static DecimalV2Val get_decimalv2_val(Expr* expr, ExprContext* context, TupleRow* row); /// Creates an expression tree rooted at 'root' via depth-first traversal. diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp index 25d11286a7..d8cd5a3a5d 100644 --- a/be/src/exprs/expr_context.cpp +++ b/be/src/exprs/expr_context.cpp @@ -286,16 +286,15 @@ void* ExprContext::get_value(Expr* e, TupleRow* row) { _result.decimalv2_val = DecimalV2Value::from_decimal_val(v); return &_result.decimalv2_val; } -#if 0 - case TYPE_ARRAY: - case TYPE_MAP: { - doris_udf::ArrayVal v = e->GetArrayVal(this, row); - if (v.is_null) return NULL; - _result.array_val.ptr = v.ptr; - _result.array_val.num_tuples = v.num_tuples; + case TYPE_ARRAY: { + doris_udf::CollectionVal v = e->get_array_val(this, row); + if (v.is_null) { + return NULL; + } + + _result.array_val = CollectionValue::from_collection_val(v); return &_result.array_val; } -#endif default: DCHECK(false) << "Type not implemented: " << e->_type; return NULL; diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h index a786c27832..5c7f4ecb2d 100644 --- a/be/src/exprs/expr_context.h +++ b/be/src/exprs/expr_context.h @@ -25,7 +25,7 @@ #include "exprs/expr_value.h" #include "exprs/slot_ref.h" #include "udf/udf.h" -#include "udf/udf_internal.h" // for ArrayVal +#include "udf/udf_internal.h" // for CollectionVal #undef USING_DORIS_UDF #define USING_DORIS_UDF using namespace doris_udf diff --git a/be/src/exprs/expr_ir.cpp b/be/src/exprs/expr_ir.cpp index 02d223e715..7be579846e 100644 --- a/be/src/exprs/expr_ir.cpp +++ b/be/src/exprs/expr_ir.cpp @@ -75,4 +75,7 @@ DateTimeVal Expr::get_datetime_val(Expr* expr, ExprContext* context, TupleRow* r DecimalV2Val Expr::get_decimalv2_val(Expr* expr, ExprContext* context, TupleRow* row) { return expr->get_decimalv2_val(context, row); } +CollectionVal Expr::get_array_val(Expr* expr, ExprContext* context, TupleRow* row) { + return expr->get_array_val(context, row); +} } // namespace doris diff --git a/be/src/exprs/expr_value.h b/be/src/exprs/expr_value.h index a2f2dc876a..bb98c4938f 100644 --- a/be/src/exprs/expr_value.h +++ b/be/src/exprs/expr_value.h @@ -18,6 +18,7 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_EXPR_VALUE_H #define DORIS_BE_SRC_QUERY_EXPRS_EXPR_VALUE_H +#include "runtime/collection_value.h" #include "runtime/datetime_value.h" #include "runtime/decimalv2_value.h" #include "runtime/string_value.h" @@ -44,6 +45,7 @@ struct ExprValue { StringValue string_val; DateTimeValue datetime_val; DecimalV2Value decimalv2_val; + CollectionValue array_val; ExprValue() : bool_val(false), @@ -57,7 +59,8 @@ struct ExprValue { string_data(), string_val(NULL, 0), datetime_val(), - decimalv2_val(0) {} + decimalv2_val(0), + array_val() {} ExprValue(bool v) : bool_val(v) {} ExprValue(int8_t v) : tinyint_val(v) {} diff --git a/be/src/exprs/literal.cpp b/be/src/exprs/literal.cpp index 4ad17c39b5..45dfcb0b2a 100644 --- a/be/src/exprs/literal.cpp +++ b/be/src/exprs/literal.cpp @@ -20,6 +20,7 @@ #include #include "gen_cpp/Exprs_types.h" +#include "runtime/collection_value.h" #include "runtime/runtime_state.h" #include "util/string_parser.hpp" @@ -92,6 +93,11 @@ Literal::Literal(const TExprNode& node) : Expr(node) { _value.decimalv2_val = DecimalV2Value(node.decimal_literal.value); break; } + case TYPE_ARRAY: { + DCHECK_EQ(node.node_type, TExprNodeType::ARRAY_LITERAL); + // init in prepare + break; + } default: break; // DCHECK(false) << "Invalid type: " << TypeToString(_type.type); @@ -160,4 +166,29 @@ StringVal Literal::get_string_val(ExprContext* context, TupleRow* row) { return str_val; } +CollectionVal Literal::get_array_val(ExprContext* context, TupleRow*) { + DCHECK(_type.is_collection_type()); + CollectionVal val; + _value.array_val.to_collection_val(&val); + return val; +} + +Status Literal::prepare(RuntimeState* state, const RowDescriptor& row_desc, ExprContext* context) { + RETURN_IF_ERROR(Expr::prepare(state, row_desc, context)); + + if (type().type == TYPE_ARRAY) { + DCHECK_EQ(type().children.size(), 1) << "array children type not 1"; + // init array value + auto td = type().children.at(0).type; + RETURN_IF_ERROR(CollectionValue::init_collection(state->obj_pool(), get_num_children(), td, + &_value.array_val)); + // init every item + for (int i = 0; i < get_num_children(); ++i) { + Expr* children = get_child(i); + RETURN_IF_ERROR(_value.array_val.set(i, td, children->get_const_val(context))); + } + } + + return Status::OK(); +} } // namespace doris diff --git a/be/src/exprs/literal.h b/be/src/exprs/literal.h index 4899116c42..49b402f792 100644 --- a/be/src/exprs/literal.h +++ b/be/src/exprs/literal.h @@ -43,6 +43,10 @@ public: virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*); virtual DateTimeVal get_datetime_val(ExprContext* context, TupleRow*); virtual StringVal get_string_val(ExprContext* context, TupleRow* row); + virtual CollectionVal get_array_val(ExprContext* context, TupleRow*); + // init val before use + virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, + ExprContext* context); protected: friend class Expr; diff --git a/be/src/exprs/null_literal.cpp b/be/src/exprs/null_literal.cpp index c2bafe8d9a..427e2bafac 100644 --- a/be/src/exprs/null_literal.cpp +++ b/be/src/exprs/null_literal.cpp @@ -67,4 +67,7 @@ DecimalV2Val NullLiteral::get_decimalv2_val(ExprContext*, TupleRow*) { return DecimalV2Val::null(); } +CollectionVal NullLiteral::get_array_val(ExprContext* context, TupleRow*) { + return CollectionVal::null(); +} } // namespace doris diff --git a/be/src/exprs/null_literal.h b/be/src/exprs/null_literal.h index 242e23e6fc..38d7dcd07a 100644 --- a/be/src/exprs/null_literal.h +++ b/be/src/exprs/null_literal.h @@ -41,6 +41,7 @@ public: virtual doris_udf::StringVal get_string_val(ExprContext*, TupleRow*); virtual doris_udf::DateTimeVal get_datetime_val(ExprContext*, TupleRow*); virtual doris_udf::DecimalV2Val get_decimalv2_val(ExprContext*, TupleRow*); + virtual CollectionVal get_array_val(ExprContext* context, TupleRow*); protected: friend class Expr; diff --git a/be/src/exprs/scalar_fn_call.cpp b/be/src/exprs/scalar_fn_call.cpp index 5ed75166cc..c887073973 100644 --- a/be/src/exprs/scalar_fn_call.cpp +++ b/be/src/exprs/scalar_fn_call.cpp @@ -422,6 +422,7 @@ typedef DoubleVal (*DoubleWrapper)(ExprContext*, TupleRow*); typedef StringVal (*StringWrapper)(ExprContext*, TupleRow*); typedef DateTimeVal (*DatetimeWrapper)(ExprContext*, TupleRow*); typedef DecimalV2Val (*DecimalV2Wrapper)(ExprContext*, TupleRow*); +typedef CollectionVal (*ArrayWrapper)(ExprContext*, TupleRow*); // TODO: macroify this? BooleanVal ScalarFnCall::get_boolean_val(ExprContext* context, TupleRow* row) { @@ -535,6 +536,18 @@ DecimalV2Val ScalarFnCall::get_decimalv2_val(ExprContext* context, TupleRow* row return fn(context, row); } +CollectionVal ScalarFnCall::get_array_val(ExprContext* context, TupleRow* row) { + DCHECK_EQ(_type.type, TYPE_ARRAY); + DCHECK(context != NULL); + + if (_scalar_fn_wrapper == NULL) { + return interpret_eval(context, row); + } + + ArrayWrapper fn = reinterpret_cast(_scalar_fn_wrapper); + return fn(context, row); +} + std::string ScalarFnCall::debug_string() const { std::stringstream out; out << "ScalarFnCall(udf_type=" << _fn.binary_type << " location=" << _fn.hdfs_location diff --git a/be/src/exprs/scalar_fn_call.h b/be/src/exprs/scalar_fn_call.h index 8b81498d53..33ae72ffd5 100644 --- a/be/src/exprs/scalar_fn_call.h +++ b/be/src/exprs/scalar_fn_call.h @@ -77,7 +77,7 @@ protected: virtual doris_udf::StringVal get_string_val(ExprContext* context, TupleRow*); virtual doris_udf::DateTimeVal get_datetime_val(ExprContext* context, TupleRow*); virtual doris_udf::DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*); - // virtual doris_udf::ArrayVal GetArrayVal(ExprContext* context, TupleRow*); + virtual CollectionVal get_array_val(ExprContext* context, TupleRow*); private: /// If this function has var args, children()[_vararg_start_idx] is the first vararg diff --git a/be/src/exprs/slot_ref.cpp b/be/src/exprs/slot_ref.cpp index a12e4b627c..bc106d2516 100644 --- a/be/src/exprs/slot_ref.cpp +++ b/be/src/exprs/slot_ref.cpp @@ -234,4 +234,16 @@ DecimalV2Val SlotRef::get_decimalv2_val(ExprContext* context, TupleRow* row) { return DecimalV2Val(reinterpret_cast(t->get_slot(_slot_offset))->value); } +doris_udf::CollectionVal SlotRef::get_array_val(ExprContext* context, TupleRow* row) { + DCHECK_EQ(_type.type, TYPE_ARRAY); + + Tuple* t = row->get_tuple(_tuple_idx); + if (t == NULL || t->is_null(_null_indicator_offset)) { + return CollectionVal::null(); + } + + CollectionVal val; + reinterpret_cast(t->get_slot(_slot_offset))->to_collection_val(&val); + return val; +} } // namespace doris diff --git a/be/src/exprs/slot_ref.h b/be/src/exprs/slot_ref.h index b3244ce2aa..cbeb4b6cb0 100644 --- a/be/src/exprs/slot_ref.h +++ b/be/src/exprs/slot_ref.h @@ -68,7 +68,7 @@ public: virtual doris_udf::StringVal get_string_val(ExprContext* context, TupleRow*); virtual doris_udf::DateTimeVal get_datetime_val(ExprContext* context, TupleRow*); virtual doris_udf::DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*); - // virtual doris_udf::ArrayVal GetArrayVal(ExprContext* context, TupleRow*); + virtual doris_udf::CollectionVal get_array_val(ExprContext* context, TupleRow*); private: int _tuple_idx; // within row diff --git a/be/src/olap/aggregate_func.cpp b/be/src/olap/aggregate_func.cpp index d98a43ce11..35482d8dca 100644 --- a/be/src/olap/aggregate_func.cpp +++ b/be/src/olap/aggregate_func.cpp @@ -17,6 +17,38 @@ #include "olap/aggregate_func.h" +namespace std { +namespace { +// algorithm from boost: http://www.boost.org/doc/libs/1_61_0/doc/html/hash/reference.html#boost.hash_combine +template +inline void hash_combine(std::size_t& seed, T const& v) { + seed ^= std::hash()(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +template ::value - 1> +struct HashValueImpl { + static void apply(size_t& seed, Tuple const& tuple) { + HashValueImpl::apply(seed, tuple); + hash_combine(seed, std::get(tuple)); + } +}; + +template +struct HashValueImpl { + static void apply(size_t& seed, Tuple const& tuple) { hash_combine(seed, std::get<0>(tuple)); } +}; +} // namespace + +template +struct hash> { + size_t operator()(std::tuple const& tt) const { + size_t seed = 0; + HashValueImpl>::apply(seed, tt); + return seed; + } +}; +} // namespace std + namespace doris { template @@ -26,19 +58,14 @@ AggregateInfo::AggregateInfo(const Traits& traits) _finalize_fn(traits.finalize), _agg_method(traits.agg_method) {} -struct AggregateFuncMapHash { - size_t operator()(const std::pair& pair) const { - return (pair.first + 31) ^ pair.second; - } -}; - class AggregateFuncResolver { DECLARE_SINGLETON(AggregateFuncResolver); public: const AggregateInfo* get_aggregate_info(const FieldAggregationMethod agg_method, - const FieldType field_type) const { - auto pair = _infos_mapping.find(std::make_pair(agg_method, field_type)); + const FieldType field_type, + const FieldType sub_type) const { + auto pair = _infos_mapping.find(std::make_tuple(agg_method, field_type, sub_type)); if (pair != _infos_mapping.end()) { return pair->second; } else { @@ -46,15 +73,17 @@ public: } } - template + template void add_aggregate_mapping() { - _infos_mapping.emplace(std::make_pair(agg_method, field_type), - new AggregateInfo(AggregateTraits())); + _infos_mapping.emplace( + std::make_tuple(agg_method, field_type, sub_type), + new AggregateInfo(AggregateTraits())); } private: - typedef std::pair key_t; - std::unordered_map _infos_mapping; + typedef std::tuple key_t; + std::unordered_map _infos_mapping; DISALLOW_COPY_AND_ASSIGN(AggregateFuncResolver); }; @@ -74,6 +103,21 @@ AggregateFuncResolver::AggregateFuncResolver() { add_aggregate_mapping(); add_aggregate_mapping(); add_aggregate_mapping(); + // array types has sub type like array field type is array, subtype is int + add_aggregate_mapping(); + add_aggregate_mapping(); + add_aggregate_mapping(); + add_aggregate_mapping(); + add_aggregate_mapping(); + add_aggregate_mapping(); + add_aggregate_mapping(); // Min Aggregate Function add_aggregate_mapping(); @@ -159,8 +203,8 @@ AggregateFuncResolver::~AggregateFuncResolver() { } const AggregateInfo* get_aggregate_info(const FieldAggregationMethod agg_method, - const FieldType field_type) { - return AggregateFuncResolver::instance()->get_aggregate_info(agg_method, field_type); + const FieldType field_type, const FieldType sub_type) { + return AggregateFuncResolver::instance()->get_aggregate_info(agg_method, field_type, sub_type); } } // namespace doris diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index 1d31f292e2..39e043ceb8 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -90,7 +90,7 @@ private: FieldAggregationMethod _agg_method; }; -template +template struct BaseAggregateFuncs { static void init(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, ObjectPool* agg_pool) { @@ -98,9 +98,13 @@ struct BaseAggregateFuncs { if (src_null) { return; } - - const TypeInfo* _type_info = get_type_info(field_type); - _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + if constexpr (field_type == OLAP_FIELD_TYPE_ARRAY) { + const TypeInfo* _type_info = get_collection_type_info(sub_type); + _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + } else { + const TypeInfo* _type_info = get_type_info(field_type); + _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + } } // Default update do nothing. @@ -110,8 +114,9 @@ struct BaseAggregateFuncs { static void finalize(RowCursorCell* src, MemPool* mem_pool) {} }; -template -struct AggregateFuncTraits : public BaseAggregateFuncs {}; +template +struct AggregateFuncTraits : public BaseAggregateFuncs {}; template <> struct AggregateFuncTraits @@ -461,7 +466,7 @@ struct AggregateFuncTraitssize = 0; auto* hll = new HyperLogLog(*src_slice); - + dst_slice->data = reinterpret_cast(hll); agg_pool->add(hll); @@ -548,12 +553,15 @@ struct AggregateFuncTraits { }; -template -struct AggregateTraits : public AggregateFuncTraits { +template +struct AggregateTraits : public AggregateFuncTraits { static const FieldAggregationMethod agg_method = aggMethod; static const FieldType type = fieldType; + static const FieldType sub_type = subType; }; const AggregateInfo* get_aggregate_info(const FieldAggregationMethod agg_method, - const FieldType field_type); + const FieldType field_type, + const FieldType sub_type = OLAP_FIELD_TYPE_NONE); } // namespace doris diff --git a/be/src/olap/collection.h b/be/src/olap/collection.h deleted file mode 100644 index 328869ba55..0000000000 --- a/be/src/olap/collection.h +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -namespace doris { - -// cpp type for ARRAY -struct Collection { - // child column data - void* data; - uint64_t length; - // item has no null value if has_null is false. - // item ```may``` has null value if has_null is true. - // null_count is better? - bool has_null; - // null bitmap - bool* null_signs; - - Collection() : data(nullptr), length(0), has_null(false), null_signs(nullptr) {} - - explicit Collection(uint64_t length) - : data(nullptr), length(length), has_null(false), null_signs(nullptr) {} - - Collection(void* data, size_t length) - : data(data), length(length), has_null(false), null_signs(nullptr) {} - - Collection(void* data, size_t length, bool* null_signs) - : data(data), length(length), has_null(true), null_signs(null_signs) {} - - Collection(void* data, size_t length, bool has_null, bool* null_signs) - : data(data), length(length), has_null(has_null), null_signs(null_signs) {} - - bool is_null_at(uint64_t index) { return this->has_null && this->null_signs[index]; } - - bool operator==(const Collection& y) const; - bool operator!=(const Collection& value) const; - bool operator<(const Collection& value) const; - bool operator<=(const Collection& value) const; - bool operator>(const Collection& value) const; - bool operator>=(const Collection& value) const; - int32_t cmp(const Collection& other) const; -}; - -} // namespace doris diff --git a/be/src/olap/column_vector.cpp b/be/src/olap/column_vector.cpp index e21952f937..84f4f61267 100644 --- a/be/src/olap/column_vector.cpp +++ b/be/src/olap/column_vector.cpp @@ -130,19 +130,20 @@ Status ColumnVectorBatch::create(size_t init_capacity, bool is_nullable, const T std::unique_ptr elements; auto array_type_info = reinterpret_cast(type_info); - RETURN_IF_ERROR(ColumnVectorBatch::create(init_capacity * 2, field->get_sub_field(0)->is_nullable(), + RETURN_IF_ERROR(ColumnVectorBatch::create( + init_capacity * 2, field->get_sub_field(0)->is_nullable(), array_type_info->item_type_info(), field->get_sub_field(0), &elements)); std::unique_ptr offsets; - TypeInfo* bigint_type_info = get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT); - RETURN_IF_ERROR(ColumnVectorBatch::create(init_capacity + 1, false, - bigint_type_info, nullptr, &offsets)); + TypeInfo* offsets_type_info = + get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT); + RETURN_IF_ERROR(ColumnVectorBatch::create(init_capacity + 1, false, offsets_type_info, + nullptr, &offsets)); - std::unique_ptr local( - new ArrayColumnVectorBatch(type_info, - is_nullable, - reinterpret_cast*>(offsets.release()), - elements.release())); + std::unique_ptr local(new ArrayColumnVectorBatch( + type_info, is_nullable, + reinterpret_cast*>(offsets.release()), + elements.release())); RETURN_IF_ERROR(local->resize(init_capacity)); *column_vector_batch = std::move(local); return Status::OK(); @@ -172,7 +173,7 @@ Status ScalarColumnVectorBatch::resize(size_t new_cap) { } ArrayColumnVectorBatch::ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable, - ScalarColumnVectorBatch* offsets, + ScalarColumnVectorBatch* offsets, ColumnVectorBatch* elements) : ColumnVectorBatch(type_info, is_nullable), _data(0) { _offsets.reset(offsets); @@ -205,13 +206,14 @@ void ArrayColumnVectorBatch::prepare_for_read(size_t start_idx, size_t size, boo DCHECK(start_idx + size <= capacity()); for (size_t i = 0; i < size; ++i) { if (!is_null_at(start_idx + i)) { - _data[start_idx + i] = Collection( + _data[start_idx + i] = CollectionValue( _elements->mutable_cell_ptr(*(_offsets->scalar_cell_ptr(start_idx + i))), - *(_offsets->scalar_cell_ptr(start_idx + i + 1)) - *(_offsets->scalar_cell_ptr(start_idx + i)), + *(_offsets->scalar_cell_ptr(start_idx + i + 1)) - + *(_offsets->scalar_cell_ptr(start_idx + i)), item_has_null, - _elements->is_nullable() - ? const_cast(&_elements->null_signs()[*(_offsets->scalar_cell_ptr(start_idx + i))]) - : nullptr); + _elements->is_nullable() ? const_cast(&_elements->null_signs()[*( + _offsets->scalar_cell_ptr(start_idx + i))]) + : nullptr); } } } diff --git a/be/src/olap/column_vector.h b/be/src/olap/column_vector.h index 67a79fbeaa..ca5b6e1c4d 100644 --- a/be/src/olap/column_vector.h +++ b/be/src/olap/column_vector.h @@ -64,7 +64,7 @@ template class DataBuffer; template class DataBuffer; template class DataBuffer; template class DataBuffer; -template class DataBuffer; +template class DataBuffer; // struct that contains column data(null bitmap), data array in sub class. class ColumnVectorBatch { @@ -155,9 +155,7 @@ public: return reinterpret_cast(&_data[idx]); } - ScalarCppType* scalar_cell_ptr(size_t idx) { - return &_data[idx]; - } + ScalarCppType* scalar_cell_ptr(size_t idx) { return &_data[idx]; } private: DataBuffer _data; @@ -166,8 +164,9 @@ private: // util class for read array's null signs. class ArrayNullColumnVectorBatch : public ColumnVectorBatch { public: - explicit ArrayNullColumnVectorBatch(ColumnVectorBatch* array) : - ColumnVectorBatch(get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_TINYINT), false), _array(array) {} + explicit ArrayNullColumnVectorBatch(ColumnVectorBatch* array) + : ColumnVectorBatch(get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_TINYINT), false), + _array(array) {} ~ArrayNullColumnVectorBatch() override = default; @@ -194,7 +193,7 @@ private: class ArrayColumnVectorBatch : public ColumnVectorBatch { public: explicit ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable, - ScalarColumnVectorBatch* offsets, + ScalarColumnVectorBatch* offsets, ColumnVectorBatch* elements); ~ArrayColumnVectorBatch() override; Status resize(size_t new_cap) override; @@ -205,7 +204,7 @@ public: // Get the start of the data. uint8_t* data() const override { - return reinterpret_cast(const_cast(_data.data())); + return reinterpret_cast(const_cast(_data.data())); } // Get the idx's cell_ptr @@ -216,9 +215,7 @@ public: // Get thr idx's cell_ptr for write uint8_t* mutable_cell_ptr(size_t idx) override { return reinterpret_cast(&_data[idx]); } - size_t item_offset(size_t idx) const { - return *(_offsets->scalar_cell_ptr(idx)); - } + size_t item_offset(size_t idx) const { return *(_offsets->scalar_cell_ptr(idx)); } /** * Change array size to offset in this batch @@ -252,23 +249,22 @@ public: void get_offset_by_length(size_t start_idx, size_t size); size_t get_item_size(size_t start_idx, size_t size) { - return *(_offsets->scalar_cell_ptr(start_idx + size)) - *(_offsets->scalar_cell_ptr(start_idx)); + return *(_offsets->scalar_cell_ptr(start_idx + size)) - + *(_offsets->scalar_cell_ptr(start_idx)); } - ArrayNullColumnVectorBatch get_null_as_batch() { - return ArrayNullColumnVectorBatch(this); - } + ArrayNullColumnVectorBatch get_null_as_batch() { return ArrayNullColumnVectorBatch(this); } // Generate collection slots. void prepare_for_read(size_t start_idx, size_t end_idx, bool item_has_null); private: - DataBuffer _data; + DataBuffer _data; std::unique_ptr _elements; // Stores each array's start offsets in _elements. - std::unique_ptr> _offsets; + std::unique_ptr> _offsets; }; template class ScalarColumnVectorBatch; diff --git a/be/src/olap/field.h b/be/src/olap/field.h index bd8d535a4e..0edb4dbc99 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -29,6 +29,7 @@ #include "olap/tablet_schema.h" #include "olap/types.h" #include "olap/utils.h" +#include "runtime/collection_value.h" #include "runtime/mem_pool.h" #include "util/hash_util.hpp" #include "util/mem_util.hpp" @@ -47,8 +48,14 @@ public: _name(column.name()), _index_size(column.index_length()), _is_nullable(column.is_nullable()), - _agg_info(get_aggregate_info(column.aggregation(), column.type())), - _length(column.length()) {} + _length(column.length()) { + if (column.type() == OLAP_FIELD_TYPE_ARRAY) { + _agg_info = get_aggregate_info(column.aggregation(), column.type(), + column.get_sub_column(0).type()); + } else { + _agg_info = get_aggregate_info(column.aggregation(), column.type()); + } + } virtual ~Field() = default; @@ -257,9 +264,11 @@ public: } Field* get_sub_field(int i) { return _sub_fields[i].get(); } +protected: + const TypeInfo* _type_info; + private: // Field的最大长度,单位为字节,通常等于length, 变长字符串不同 - const TypeInfo* _type_info; const KeyCoder* _key_coder; std::string _name; uint16_t _index_size; @@ -377,6 +386,30 @@ uint32_t Field::hash_code(const CellType& cell, uint32_t seed) const { return _type_info->hash_code(cell.cell_ptr(), seed); } +class ArrayField : public Field { +public: + explicit ArrayField(const TabletColumn& column) : Field(column) {} + + void consume(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, + ObjectPool* agg_pool) const override { + dst->set_is_null(src_null); + if (src_null) { + return; + } + _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + } + + char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { + auto array_v = (CollectionValue*)cell_ptr; + array_v->set_null_signs(reinterpret_cast(variable_ptr + sizeof(CollectionValue))); + array_v->set_data(variable_ptr + sizeof(CollectionValue) + + OLAP_ARRAY_MAX_BYTES / sizeof(char*)); + return variable_ptr + _length; + } + + size_t get_variable_len() const override { return _length; } +}; + class CharField : public Field { public: explicit CharField() : Field() {} @@ -518,7 +551,7 @@ public: return new VarcharField(column); case OLAP_FIELD_TYPE_ARRAY: { std::unique_ptr item_field(FieldFactory::create(column.get_sub_column(0))); - auto* local = new Field(column); + auto* local = new ArrayField(column); local->add_sub_field(std::move(item_field)); return local; } @@ -542,7 +575,7 @@ public: return new VarcharField(column); case OLAP_FIELD_TYPE_ARRAY: { std::unique_ptr item_field(FieldFactory::create(column.get_sub_column(0))); - auto* local = new Field(column); + auto* local = new ArrayField(column); local->add_sub_field(std::move(item_field)); return local; } diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index 2d828d4abe..977e04b47a 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -52,11 +52,15 @@ static constexpr uint32_t OLAP_COMPACTION_DEFAULT_CANDIDATE_SIZE = 10; // the max length supported for varchar type static const uint16_t OLAP_STRING_MAX_LENGTH = 65535; +// the max length supported for array +static const uint16_t OLAP_ARRAY_MAX_LENGTH = 65535; // the max bytes for stored string length using StringOffsetType = uint32_t; using StringLengthType = uint16_t; static const uint16_t OLAP_STRING_MAX_BYTES = sizeof(StringLengthType); +// the max bytes for stored array length +static const uint16_t OLAP_ARRAY_MAX_BYTES = OLAP_ARRAY_MAX_LENGTH; enum OLAPDataVersion { OLAP_V1 = 0, diff --git a/be/src/olap/row_cursor.cpp b/be/src/olap/row_cursor.cpp index e76d9bea40..acde936186 100644 --- a/be/src/olap/row_cursor.cpp +++ b/be/src/olap/row_cursor.cpp @@ -131,7 +131,7 @@ OLAPStatus RowCursor::init_scan_key(const TabletSchema& schema, FieldType type = column.type(); if (type == OLAP_FIELD_TYPE_VARCHAR) { _variable_len += scan_keys[cid].length(); - } else if (type == OLAP_FIELD_TYPE_CHAR) { + } else if (type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_ARRAY) { _variable_len += std::max(scan_keys[cid].length(), column.length()); } } @@ -167,7 +167,7 @@ OLAPStatus RowCursor::init_scan_key(const TabletSchema& schema, // TODO(yingchun): parameter 'const TabletSchema& schema' is not used OLAPStatus RowCursor::allocate_memory_for_string_type(const TabletSchema& schema) { - // allocate memory for string type(char, varchar, hll) + // allocate memory for string type(char, varchar, hll, array) // The memory allocated in this function is used in aggregate and copy function if (_variable_len == 0) { return OLAP_SUCCESS; diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h index 6d8a3f1bed..2e3f157ccd 100644 --- a/be/src/olap/rowset/beta_rowset_reader.h +++ b/be/src/olap/rowset/beta_rowset_reader.h @@ -54,9 +54,9 @@ public: } private: + RowsetReaderContext* _context; BetaRowsetSharedPtr _rowset; - RowsetReaderContext* _context; OlapReaderStatistics _owned_stats; OlapReaderStatistics* _stats; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 37ead54c44..0eda8f373d 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -357,7 +357,9 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { if (is_nullable()) { RETURN_IF_ERROR(_sub_readers[2]->new_iterator(&null_iterator)); } - *iterator = new ArrayFileColumnIterator(this, reinterpret_cast(offset_iterator), item_iterator, null_iterator); + *iterator = new ArrayFileColumnIterator( + this, reinterpret_cast(offset_iterator), item_iterator, + null_iterator); return Status::OK(); } default: @@ -370,9 +372,10 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { //////////////////////////////////////////////////////////////////////////////// ArrayFileColumnIterator::ArrayFileColumnIterator(ColumnReader* reader, - FileColumnIterator* offset_reader, - ColumnIterator* item_iterator, - ColumnIterator* null_iterator) : _array_reader(reader) { + FileColumnIterator* offset_reader, + ColumnIterator* item_iterator, + ColumnIterator* null_iterator) + : _array_reader(reader) { _length_iterator.reset(offset_reader); _item_iterator.reset(item_iterator); if (_array_reader->is_nullable()) { @@ -386,8 +389,9 @@ Status ArrayFileColumnIterator::init(const ColumnIteratorOptions& opts) { if (_array_reader->is_nullable()) { RETURN_IF_ERROR(_null_iterator->init(opts)); } - TypeInfo* bigint_type_info = get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT); - RETURN_IF_ERROR(ColumnVectorBatch::create(1024, false, bigint_type_info, nullptr, &_length_batch)); + TypeInfo* offset_type_info = get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT); + RETURN_IF_ERROR( + ColumnVectorBatch::create(1024, false, offset_type_info, nullptr, &_length_batch)); return Status::OK(); } @@ -397,7 +401,8 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool // 1. read n offsets ColumnBlock offset_block(array_batch->offsets(), nullptr); - ColumnBlockView offset_view(&offset_block, dst->current_offset() + 1); // offset应该比collection的游标多1 + ColumnBlockView offset_view(&offset_block, + dst->current_offset() + 1); // offset应该比collection的游标多1 bool offset_has_null = false; RETURN_IF_ERROR(_length_iterator->next_batch(n, &offset_view, &offset_has_null)); DCHECK(!offset_has_null); @@ -434,7 +439,8 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool } ColumnBlock item_block = ColumnBlock(item_vector_batch, dst->pool()); - ColumnBlockView item_view = ColumnBlockView(&item_block, array_batch->item_offset(dst->current_offset())); + ColumnBlockView item_view = + ColumnBlockView(&item_block, array_batch->item_offset(dst->current_offset())); size_t real_read = item_size; RETURN_IF_ERROR(_item_iterator->next_batch(&real_read, &item_view, &item_has_null)); DCHECK(item_size == real_read); diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index e2c6189496..7df9900ebb 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -110,7 +110,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* ColumnWriter::create(item_options, &item_column, _wblock, &item_writer)); // create length writer - FieldType length_type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT; + FieldType length_type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT; ColumnWriterOptions length_options; length_options.meta = opts.meta->add_children_columns(); @@ -126,13 +126,14 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* length_options.need_bloom_filter = false; length_options.need_bitmap_index = false; - TabletColumn length_column = TabletColumn(OLAP_FIELD_AGGREGATION_NONE, length_type, length_options.meta->is_nullable(), - length_options.meta->unique_id(), length_options.meta->length()); + TabletColumn length_column = TabletColumn( + OLAP_FIELD_AGGREGATION_NONE, length_type, length_options.meta->is_nullable(), + length_options.meta->unique_id(), length_options.meta->length()); length_column.set_name("length"); length_column.set_index_length(-1); // no short key index - std::unique_ptr bigint_field( - FieldFactory::create(length_column)); - auto* length_writer = new ScalarColumnWriter(length_options, std::move(bigint_field), _wblock); + std::unique_ptr bigint_field(FieldFactory::create(length_column)); + auto* length_writer = + new ScalarColumnWriter(length_options, std::move(bigint_field), _wblock); // if nullable, create null writer ScalarColumnWriter* null_writer = nullptr; @@ -152,18 +153,18 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* null_options.need_bloom_filter = false; null_options.need_bitmap_index = false; - TabletColumn null_column = TabletColumn(OLAP_FIELD_AGGREGATION_NONE, null_type, length_options.meta->is_nullable(), - null_options.meta->unique_id(), null_options.meta->length()); + TabletColumn null_column = TabletColumn( + OLAP_FIELD_AGGREGATION_NONE, null_type, length_options.meta->is_nullable(), + null_options.meta->unique_id(), null_options.meta->length()); null_column.set_name("nullable"); null_column.set_index_length(-1); // no short key index - std::unique_ptr null_field( - FieldFactory::create(null_column)); + std::unique_ptr null_field(FieldFactory::create(null_column)); null_writer = new ScalarColumnWriter(null_options, std::move(null_field), _wblock); } - std::unique_ptr writer_local = - std::unique_ptr(new ArrayColumnWriter( - opts, std::move(field), length_writer, null_writer, std::move(item_writer))); + std::unique_ptr writer_local = std::unique_ptr( + new ArrayColumnWriter(opts, std::move(field), length_writer, null_writer, + std::move(item_writer))); *writer = std::move(writer_local); return Status::OK(); } @@ -316,6 +317,25 @@ Status ScalarColumnWriter::append_data_in_current_page(const uint8_t** ptr, size return Status::OK(); } +Status ScalarColumnWriter::append_data_in_current_page(const uint8_t* ptr, size_t* num_written) { + RETURN_IF_ERROR(_page_builder->add(ptr, num_written)); + if (_opts.need_zone_map) { + _zone_map_index_builder->add_values(ptr, *num_written); + } + if (_opts.need_bitmap_index) { + _bitmap_index_builder->add_values(ptr, *num_written); + } + if (_opts.need_bloom_filter) { + _bloom_filter_index_builder->add_values(ptr, *num_written); + } + + _next_rowid += *num_written; + if (is_nullable()) { + _null_bitmap_builder->add_run(false, *num_written); + } + return Status::OK(); +} + uint64_t ScalarColumnWriter::estimate_buffer_size() { uint64_t size = _data_size; size += _page_builder->size(); @@ -469,7 +489,8 @@ ArrayColumnWriter::ArrayColumnWriter(const ColumnWriterOptions& opts, std::uniqu ScalarColumnWriter* null_writer, std::unique_ptr item_writer) : ColumnWriter(std::move(field), opts.meta->is_nullable()), - _item_writer(std::move(item_writer)) { + _item_writer(std::move(item_writer)), + _opts(opts) { _length_writer.reset(length_writer); if (is_nullable()) { _null_writer.reset(null_writer); @@ -494,29 +515,32 @@ Status ArrayColumnWriter::put_extra_info_in_page(DataPageFooterPB* footer) { // Now we can only write data one by one. Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { size_t remaining = num_rows; - const auto* col_cursor = reinterpret_cast(*ptr); - + const auto* col_cursor = reinterpret_cast(*ptr); while (remaining > 0) { // TODO llj: bulk write size_t num_written = 1; - auto size_ptr = &(col_cursor->length); - RETURN_IF_ERROR(_length_writer->append_data_in_current_page((const uint8_t**)&size_ptr, &num_written)); - if (num_written < 1) { // page is full, write first item offset and update current length page's start ordinal + auto size_ptr = col_cursor->length(); + RETURN_IF_ERROR(_length_writer->append_data_in_current_page( + reinterpret_cast(&size_ptr), &num_written)); + if (num_written < + 1) { // page is full, write first item offset and update current length page's start ordinal RETURN_IF_ERROR(_length_writer->finish_current_page()); _current_length_page_first_ordinal += _lengh_sum_in_cur_page; _lengh_sum_in_cur_page = 0; } else { // write child item. if (_item_writer->is_nullable()) { - auto* item_data_ptr = col_cursor->data; - for (size_t i = 0; i < col_cursor->length; ++i) { - RETURN_IF_ERROR(_item_writer->append(col_cursor->null_signs[i], item_data_ptr)); + auto* item_data_ptr = const_cast(col_cursor)->mutable_data(); + for (size_t i = 0; i < col_cursor->length(); ++i) { + RETURN_IF_ERROR(_item_writer->append(col_cursor->is_null_at(i), item_data_ptr)); item_data_ptr = (uint8_t*)item_data_ptr + _item_writer->get_field()->size(); } } else { - RETURN_IF_ERROR(_item_writer->append_data((const uint8_t**)&(col_cursor->data), col_cursor->length)); + const void* data = col_cursor->data(); + RETURN_IF_ERROR(_item_writer->append_data(reinterpret_cast(&data), + col_cursor->length())); } - _lengh_sum_in_cur_page += col_cursor->length; + _lengh_sum_in_cur_page += col_cursor->length(); } remaining -= num_written; col_cursor += num_written; @@ -529,8 +553,8 @@ Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { uint64_t ArrayColumnWriter::estimate_buffer_size() { return _length_writer->estimate_buffer_size() + - (is_nullable() ? _null_writer->estimate_buffer_size() : 0) + - _item_writer->estimate_buffer_size(); + (is_nullable() ? _null_writer->estimate_buffer_size() : 0) + + _item_writer->estimate_buffer_size(); } Status ArrayColumnWriter::finish() { @@ -563,9 +587,9 @@ Status ArrayColumnWriter::write_ordinal_index() { Status ArrayColumnWriter::append_nulls(size_t num_rows) { size_t num_lengths = num_rows; const ordinal_t zero = 0; - while(num_lengths > 0) { + while (num_lengths > 0) { // TODO llj bulk write - const auto* zero_ptr = reinterpret_cast(&zero); + const auto* zero_ptr = reinterpret_cast(&zero); RETURN_IF_ERROR(_length_writer->append_data(&zero_ptr, 1)); --num_lengths; } @@ -574,7 +598,7 @@ Status ArrayColumnWriter::append_nulls(size_t num_rows) { Status ArrayColumnWriter::write_null_column(size_t num_rows, bool is_null) { uint8_t null_sign = is_null ? 1 : 0; - while(num_rows > 0) { + while (num_rows > 0) { // TODO llj bulk write const uint8_t* null_sign_ptr = &null_sign; RETURN_IF_ERROR(_null_writer->append_data(&null_sign_ptr, 1)); diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index ba904cccf3..b98f4883ca 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -132,6 +132,9 @@ public: // used for append not null data. When page is full, will append data not reach num_rows. virtual Status append_data_in_current_page(const uint8_t** ptr, size_t* num_rows) = 0; + // used for append not null data. When page is full, will append data not reach num_rows. + virtual Status append_data_in_current_page(const uint8_t* ptr, size_t* num_rows) = 0; + bool is_nullable() const { return _is_nullable; } Field* get_field() const { return _field.get(); } @@ -139,6 +142,7 @@ public: private: std::unique_ptr _field; bool _is_nullable; + protected: std::shared_ptr _mem_tracker; }; @@ -183,6 +187,7 @@ public: Status append_data(const uint8_t** ptr, size_t num_rows) override; Status append_data_in_current_page(const uint8_t** ptr, size_t* num_rows) override; + Status append_data_in_current_page(const uint8_t* ptr, size_t* num_rows) override; private: std::unique_ptr _page_builder; @@ -253,8 +258,7 @@ private: class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback { public: explicit ArrayColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* offset_writer, - ScalarColumnWriter* null_writer, + ScalarColumnWriter* offset_writer, ScalarColumnWriter* null_writer, std::unique_ptr item_writer); ~ArrayColumnWriter() override = default; @@ -262,7 +266,12 @@ public: Status append_data(const uint8_t** ptr, size_t num_rows) override; Status append_data_in_current_page(const uint8_t** ptr, size_t* num_rows) override { - return Status::NotSupported("array writer has no data, can not append_data_in_current_page"); + return Status::NotSupported( + "array writer has no data, can not append_data_in_current_page"); + } + Status append_data_in_current_page(const uint8_t* ptr, size_t* num_rows) override { + return Status::NotSupported( + "array writer has no data, can not append_data_in_current_page"); } uint64_t estimate_buffer_size() override; @@ -274,12 +283,25 @@ public: Status finish_current_page() override; - Status write_zone_map() override { return Status::OK(); } - - Status write_bitmap_index() override { return Status::OK(); } - - Status write_bloom_filter_index() override { return Status::OK(); } + Status write_zone_map() override { + if (_opts.need_zone_map) { + return Status::NotSupported("array not support zone map"); + } + return Status::OK(); + } + Status write_bitmap_index() override { + if (_opts.need_bitmap_index) { + return Status::NotSupported("array not support bitmap index"); + } + return Status::OK(); + } + Status write_bloom_filter_index() override { + if (_opts.need_bloom_filter) { + return Status::NotSupported("array not support bloom filter index"); + } + return Status::OK(); + } ordinal_t get_next_rowid() const override { return _length_writer->get_next_rowid(); } private: @@ -290,6 +312,7 @@ private: std::unique_ptr _length_writer; std::unique_ptr _null_writer; std::unique_ptr _item_writer; + ColumnWriterOptions _opts; ordinal_t _current_length_page_first_ordinal = 0; ordinal_t _lengh_sum_in_cur_page = 0; }; diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp b/be/src/olap/rowset/segment_v2/encoding_info.cpp index 6438bb6c8c..322ed7f507 100644 --- a/be/src/olap/rowset/segment_v2/encoding_info.cpp +++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp @@ -82,7 +82,7 @@ struct TypeEncodingTraits -struct TypeEncodingTraits { +struct TypeEncodingTraits { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { *builder = new BitshufflePageBuilder(opts); return Status::OK(); @@ -234,6 +234,7 @@ EncodingInfoResolver::EncodingInfoResolver() { _add_map(); _add_map(); + _add_map(); _add_map(); _add_map(); diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 866dda1107..ca942c5fa1 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -119,45 +119,16 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id for (TColumn tcolumn : tablet_schema.columns) { ColumnPB* column = schema->add_column(); uint32_t unique_id = col_ordinal_to_unique_id.at(col_ordinal++); - column->set_unique_id(unique_id); - column->set_name(tcolumn.column_name); - column->set_has_bitmap_index(false); - string data_type; - EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type); - column->set_type(data_type); - if (tcolumn.column_type.type == TPrimitiveType::DECIMALV2) { - column->set_precision(tcolumn.column_type.precision); - column->set_frac(tcolumn.column_type.scale); - } - uint32_t length = TabletColumn::get_field_length_by_type(tcolumn.column_type.type, - tcolumn.column_type.len); - column->set_length(length); - column->set_index_length(length); - if (tcolumn.column_type.type == TPrimitiveType::VARCHAR) { - if (!tcolumn.column_type.__isset.index_len) { - column->set_index_length(10); - } else { - column->set_index_length(tcolumn.column_type.index_len); - } - } - if (!tcolumn.is_key) { - column->set_is_key(false); - string aggregation_type; - EnumToString(TAggregationType, tcolumn.aggregation_type, aggregation_type); - column->set_aggregation(aggregation_type); - } else { + _init_column_from_tcolumn(unique_id, tcolumn, column); + + if (column->is_key()) { ++key_count; - column->set_is_key(true); - column->set_aggregation("NONE"); } - column->set_is_nullable(tcolumn.is_allow_null); - if (tcolumn.__isset.default_value) { - column->set_default_value(tcolumn.default_value); - } - if (tcolumn.__isset.is_bloom_filter_column) { - column->set_is_bf_column(tcolumn.is_bloom_filter_column); + + if (column->is_bf_column()) { has_bf_columns = true; } + if (tablet_schema.__isset.indexes) { for (auto& index : tablet_schema.indexes) { if (index.index_type == TIndexType::type::BITMAP) { @@ -169,6 +140,11 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id } } } + + if (tcolumn.column_type.type == TPrimitiveType::ARRAY) { + ColumnPB* children_column = column->add_children_columns(); + _init_column_from_tcolumn(0, tcolumn.children_column[0], children_column); + } } schema->set_next_column_unique_id(next_unique_id); @@ -187,6 +163,48 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id init_from_pb(tablet_meta_pb); } +void TabletMeta::_init_column_from_tcolumn(uint32_t unique_id, const TColumn& tcolumn, + ColumnPB* column) { + column->set_unique_id(unique_id); + column->set_name(tcolumn.column_name); + column->set_has_bitmap_index(false); + string data_type; + EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type); + column->set_type(data_type); + + if (tcolumn.column_type.type == TPrimitiveType::DECIMALV2) { + column->set_precision(tcolumn.column_type.precision); + column->set_frac(tcolumn.column_type.scale); + } + uint32_t length = TabletColumn::get_field_length_by_type(tcolumn.column_type.type, + tcolumn.column_type.len); + column->set_length(length); + column->set_index_length(length); + if (tcolumn.column_type.type == TPrimitiveType::VARCHAR) { + if (!tcolumn.column_type.__isset.index_len) { + column->set_index_length(10); + } else { + column->set_index_length(tcolumn.column_type.index_len); + } + } + if (!tcolumn.is_key) { + column->set_is_key(false); + string aggregation_type; + EnumToString(TAggregationType, tcolumn.aggregation_type, aggregation_type); + column->set_aggregation(aggregation_type); + } else { + column->set_is_key(true); + column->set_aggregation("NONE"); + } + column->set_is_nullable(tcolumn.is_allow_null); + if (tcolumn.__isset.default_value) { + column->set_default_value(tcolumn.default_value); + } + if (tcolumn.__isset.is_bloom_filter_column) { + column->set_is_bf_column(tcolumn.is_bloom_filter_column); + } +} + OLAPStatus TabletMeta::create_from_file(const string& file_path) { FileHeader file_header; FileHandler file_handler; diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 781928036e..dbd594ed37 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -200,12 +200,11 @@ public: } // used for after tablet cloned to clear stale rowset - void clear_stale_rowset() { - _stale_rs_metas.clear(); - } + void clear_stale_rowset() { _stale_rs_metas.clear(); } private: OLAPStatus _save_meta(DataDir* data_dir); + void _init_column_from_tcolumn(uint32_t unique_id, const TColumn& tcolumn, ColumnPB* column); // _del_pred_array is ignored to compare. friend bool operator==(const TabletMeta& a, const TabletMeta& b); diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 96b35b80f2..09f456d637 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -72,6 +72,8 @@ FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) { type = OLAP_FIELD_TYPE_MAP; } else if (0 == upper_type_str.compare("OBJECT")) { type = OLAP_FIELD_TYPE_OBJECT; + } else if (0 == upper_type_str.compare("ARRAY")) { + type = OLAP_FIELD_TYPE_ARRAY; } else { LOG(WARNING) << "invalid type string. [type='" << type_str << "']"; type = OLAP_FIELD_TYPE_UNKNOWN; @@ -172,7 +174,7 @@ std::string TabletColumn::get_string_by_field_type(FieldType type) { return "STRUCT"; case OLAP_FIELD_TYPE_ARRAY: - return "LIST"; + return "ARRAY"; case OLAP_FIELD_TYPE_MAP: return "MAP"; @@ -244,6 +246,8 @@ uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint3 case TPrimitiveType::VARCHAR: case TPrimitiveType::HLL: return string_length + sizeof(OLAP_STRING_MAX_LENGTH); + case TPrimitiveType::ARRAY: + return OLAP_ARRAY_MAX_LENGTH; case TPrimitiveType::DECIMALV2: return 12; // use 12 bytes in olap engine. default: @@ -319,7 +323,7 @@ void TabletColumn::init_from_pb(const ColumnPB& column) { _visible = column.visible(); } if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { - DCHECK(column.children_columns_size() == 1) << "LIST type has more than 1 children types."; + DCHECK(column.children_columns_size() == 1) << "ARRAY type has more than 1 children types."; TabletColumn child_column; child_column.init_from_pb(column.children_columns(0)); add_sub_column(child_column); @@ -352,6 +356,12 @@ void TabletColumn::to_schema_pb(ColumnPB* column) { column->set_has_bitmap_index(_has_bitmap_index); } column->set_visible(_visible); + + if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { + DCHECK(_sub_columns.size() == 1) << "ARRAY type has more than 1 children types."; + ColumnPB* child = column->add_children_columns(); + _sub_columns[0].to_schema_pb(child); + } } uint32_t TabletColumn::mem_size() const { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 2a5053f2d9..ee2f9207fc 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -22,6 +22,7 @@ #include "gen_cpp/olap_file.pb.h" #include "olap/olap_define.h" +#include "olap/tablet_schema.h" #include "olap/types.h" namespace doris { diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp index f52fe5edb2..136bd7e2b0 100644 --- a/be/src/olap/types.cpp +++ b/be/src/olap/types.cpp @@ -145,6 +145,11 @@ TypeInfo* get_type_info(FieldType field_type) { return get_scalar_type_info(field_type); } +// get array array type info +TypeInfo* get_collection_type_info(FieldType sub_type) { + return ArrayTypeInfoResolver::instance()->get_type_info(sub_type); +} + TypeInfo* get_type_info(segment_v2::ColumnMetaPB* column_meta_pb) { FieldType type = (FieldType)column_meta_pb->type(); if (is_scalar_type(type)) { @@ -152,8 +157,9 @@ TypeInfo* get_type_info(segment_v2::ColumnMetaPB* column_meta_pb) { } else { switch (type) { case OLAP_FIELD_TYPE_ARRAY: { - DCHECK(column_meta_pb->children_columns_size() >= 1 && column_meta_pb->children_columns_size() <=3) - << "more than 3 children or no children."; + DCHECK(column_meta_pb->children_columns_size() >= 1 && + column_meta_pb->children_columns_size() <= 3) + << "more than 3 children or no children."; auto child_type = (FieldType)column_meta_pb->children_columns(0).type(); return ArrayTypeInfoResolver::instance()->get_type_info(child_type); } diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 6c74b2c34c..0d8aec385c 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -27,12 +27,12 @@ #include "gen_cpp/segment_v2.pb.h" // for ColumnMetaPB #include "gutil/strings/numbers.h" -#include "olap/collection.h" #include "olap/decimal12.h" #include "olap/olap_common.h" #include "olap/olap_define.h" #include "olap/tablet_schema.h" // for TabletColumn #include "olap/uint24.h" +#include "runtime/collection_value.h" #include "runtime/datetime_value.h" #include "runtime/mem_pool.h" #include "util/hash_util.hpp" @@ -155,33 +155,33 @@ public: : _item_type_info(item_type_info), _item_size(item_type_info->size()) {} inline bool equal(const void* left, const void* right) const override { - auto l_value = reinterpret_cast(left); - auto r_value = reinterpret_cast(right); - if (l_value->length != r_value->length) { + auto l_value = reinterpret_cast(left); + auto r_value = reinterpret_cast(right); + if (l_value->length() != r_value->length()) { return false; } - size_t len = l_value->length; + size_t len = l_value->length(); - if (!l_value->has_null && !r_value->has_null) { + if (!l_value->has_null() && !r_value->has_null()) { for (size_t i = 0; i < len; ++i) { - if (!_item_type_info->equal((uint8_t*)(l_value->data) + i * _item_size, - (uint8_t*)(r_value->data) + i * _item_size)) { + if (!_item_type_info->equal((uint8_t*)(l_value->data()) + i * _item_size, + (uint8_t*)(r_value->data()) + i * _item_size)) { return false; } } } else { for (size_t i = 0; i < len; ++i) { - if (l_value->null_signs[i]) { - if (r_value->null_signs[i]) { // both are null + if (l_value->is_null_at(i)) { + if (r_value->is_null_at(i)) { // both are null continue; } else { // left is null & right is not null return false; } - } else if (r_value->null_signs[i]) { // left is not null & right is null + } else if (r_value->is_null_at(i)) { // left is not null & right is null return false; } - if (!_item_type_info->equal((uint8_t*)(l_value->data) + i * _item_size, - (uint8_t*)(r_value->data) + i * _item_size)) { + if (!_item_type_info->equal((uint8_t*)(l_value->data()) + i * _item_size, + (uint8_t*)(r_value->data()) + i * _item_size)) { return false; } } @@ -190,16 +190,16 @@ public: } inline int cmp(const void* left, const void* right) const override { - auto l_value = reinterpret_cast(left); - auto r_value = reinterpret_cast(right); - size_t l_length = l_value->length; - size_t r_length = r_value->length; + auto l_value = reinterpret_cast(left); + auto r_value = reinterpret_cast(right); + size_t l_length = l_value->length(); + size_t r_length = r_value->length(); size_t cur = 0; - if (!l_value->has_null && !r_value->has_null) { + if (!l_value->has_null() && !r_value->has_null()) { while (cur < l_length && cur < r_length) { - int result = _item_type_info->cmp((uint8_t*)(l_value->data) + cur * _item_size, - (uint8_t*)(r_value->data) + cur * _item_size); + int result = _item_type_info->cmp((uint8_t*)(l_value->data()) + cur * _item_size, + (uint8_t*)(r_value->data()) + cur * _item_size); if (result != 0) { return result; } @@ -207,15 +207,16 @@ public: } } else { while (cur < l_length && cur < r_length) { - if (l_value->null_signs[cur]) { - if (!r_value->null_signs[cur]) { // left is null & right is not null + if (l_value->is_null_at(cur)) { + if (!r_value->is_null_at(cur)) { // left is null & right is not null return -1; } - } else if (r_value->null_signs[cur]) { // left is not null & right is null + } else if (r_value->is_null_at(cur)) { // left is not null & right is null return 1; } else { // both are not null - int result = _item_type_info->cmp((uint8_t*)(l_value->data) + cur * _item_size, - (uint8_t*)(r_value->data) + cur * _item_size); + int result = + _item_type_info->cmp((uint8_t*)(l_value->data()) + cur * _item_size, + (uint8_t*)(r_value->data()) + cur * _item_size); if (result != 0) { return result; } @@ -234,34 +235,36 @@ public: } inline void shallow_copy(void* dest, const void* src) const override { - *reinterpret_cast(dest) = *reinterpret_cast(src); + auto dest_value = reinterpret_cast(dest); + auto src_value = reinterpret_cast(src); + dest_value->shallow_copy(src_value); } inline void deep_copy(void* dest, const void* src, MemPool* mem_pool) const { - auto dest_value = reinterpret_cast(dest); - auto src_value = reinterpret_cast(src); + auto dest_value = reinterpret_cast(dest); + auto src_value = reinterpret_cast(src); - dest_value->length = src_value->length; + dest_value->set_length(src_value->length()); - size_t item_size = src_value->length * _item_size; - size_t nulls_size = src_value->has_null ? src_value->length : 0; - dest_value->data = mem_pool->allocate(item_size + nulls_size); - dest_value->has_null = src_value->has_null; - dest_value->null_signs = src_value->has_null - ? reinterpret_cast(dest_value->data) + item_size - : nullptr; + size_t item_size = src_value->length() * _item_size; + size_t nulls_size = src_value->has_null() ? src_value->length() : 0; + dest_value->set_data(mem_pool->allocate(item_size + nulls_size)); + dest_value->set_has_null(src_value->has_null()); + dest_value->set_null_signs(src_value->has_null() + ? reinterpret_cast(dest_value->mutable_data()) + + item_size + : nullptr); // copy null_signs - if (src_value->has_null) { - memory_copy(dest_value->null_signs, src_value->null_signs, - sizeof(bool) * src_value->length); + if (src_value->has_null()) { + dest_value->copy_null_signs(src_value); } // copy item - for (uint32_t i = 0; i < src_value->length; ++i) { + for (uint32_t i = 0; i < src_value->length(); ++i) { if (dest_value->is_null_at(i)) continue; - _item_type_info->deep_copy((uint8_t*)(dest_value->data) + i * _item_size, - (uint8_t*)(src_value->data) + i * _item_size, mem_pool); + _item_type_info->deep_copy((uint8_t*)(dest_value->mutable_data()) + i * _item_size, + (uint8_t*)(src_value->data()) + i * _item_size, mem_pool); } } @@ -269,23 +272,22 @@ public: deep_copy(dest, src, mem_pool); } - // TODO llj: How to ensure sufficient length of item inline void direct_copy(void* dest, const void* src) const override { - auto dest_value = reinterpret_cast(dest); - auto src_value = reinterpret_cast(src); - - dest_value->length = src_value->length; - dest_value->has_null = src_value->has_null; - if (src_value->has_null) { + auto dest_value = reinterpret_cast(dest); + auto src_value = reinterpret_cast(src); + dest_value->set_length(src_value->length()); + dest_value->set_has_null(src_value->has_null()); + if (src_value->has_null()) { // direct copy null_signs - memory_copy(dest_value->null_signs, src_value->null_signs, src_value->length); + memory_copy(dest_value->mutable_null_signs(), src_value->null_signs(), + src_value->length()); } // direct opy item - for (uint32_t i = 0; i < src_value->length; ++i) { + for (uint32_t i = 0; i < src_value->length(); ++i) { if (dest_value->is_null_at(i)) continue; - _item_type_info->direct_copy((uint8_t*)(dest_value->data) + i * _item_size, - (uint8_t*)(src_value->data) + i * _item_size); + _item_type_info->direct_copy((uint8_t*)(dest_value->mutable_data()) + i * _item_size, + (uint8_t*)(src_value->data()) + i * _item_size); } } @@ -299,14 +301,14 @@ public: } std::string to_string(const void* src) const override { - auto src_value = reinterpret_cast(src); + auto src_value = reinterpret_cast(src); std::string result = "["; - for (size_t i = 0; i < src_value->length; ++i) { + for (size_t i = 0; i < src_value->length(); ++i) { std::string item = - _item_type_info->to_string((uint8_t*)(src_value->data) + i * _item_size); + _item_type_info->to_string((uint8_t*)(src_value->data()) + i * _item_size); result += item; - if (i != src_value->length - 1) { + if (i != src_value->length() - 1) { result += ", "; } } @@ -323,20 +325,21 @@ public: } inline uint32_t hash_code(const void* data, uint32_t seed) const override { - auto value = reinterpret_cast(data); - uint32_t result = HashUtil::hash(&(value->length), sizeof(size_t), seed); - for (size_t i = 0; i < value->length; ++i) { - if (value->null_signs[i]) { + auto value = reinterpret_cast(data); + auto len = value->length(); + uint32_t result = HashUtil::hash(&len, sizeof(size_t), seed); + for (size_t i = 0; i < len; ++i) { + if (value->is_null_at(i)) { result = seed * result; } else { - result = seed * result + - _item_type_info->hash_code((uint8_t*)(value->data) + i * _item_size, seed); + result = seed * result + _item_type_info->hash_code( + (uint8_t*)(value->data()) + i * _item_size, seed); } } return result; } - inline const size_t size() const override { return sizeof(Collection); } + inline const size_t size() const override { return sizeof(CollectionValue); } inline FieldType type() const override { return OLAP_FIELD_TYPE_ARRAY; } @@ -351,6 +354,8 @@ extern bool is_scalar_type(FieldType field_type); extern TypeInfo* get_scalar_type_info(FieldType field_type); +extern TypeInfo* get_collection_type_info(FieldType sub_type); + extern TypeInfo* get_type_info(FieldType field_type); extern TypeInfo* get_type_info(segment_v2::ColumnMetaPB* column_meta_pb); @@ -358,7 +363,7 @@ extern TypeInfo* get_type_info(segment_v2::ColumnMetaPB* column_meta_pb); extern TypeInfo* get_type_info(const TabletColumn* col); // support following formats when convert varchar to date -static const std::vector DATE_FORMATS{ +static const std::vector DATE_FORMATS { "%Y-%m-%d", "%y-%m-%d", "%Y%m%d", "%y%m%d", "%Y/%m/%d", "%y/%m/%d", }; @@ -446,7 +451,7 @@ struct CppTypeTraits { }; template <> struct CppTypeTraits { - using CppType = Collection; + using CppType = CollectionValue; }; template diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index 9372f0dafb..0fffb5a964 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -50,6 +50,7 @@ set(RUNTIME_FILES # timestamp_value.cpp decimalv2_value.cpp large_int_value.cpp + collection_value.cpp tuple.cpp tuple_row.cpp vectorized_row_batch.cpp diff --git a/be/src/runtime/collection_value.cpp b/be/src/runtime/collection_value.cpp new file mode 100644 index 0000000000..0729646e13 --- /dev/null +++ b/be/src/runtime/collection_value.cpp @@ -0,0 +1,270 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime/collection_value.h" + +#include "common/logging.h" +#include "exprs/anyval_util.h" + +namespace doris { +int sizeof_type(PrimitiveType type) { + switch (type) { + case TYPE_INT: + return sizeof(int32_t); + case TYPE_CHAR: + case TYPE_VARCHAR: + return sizeof(StringValue); + case TYPE_NULL: + return 0; + default: + DCHECK(false) << "Type not implemented: " << type; + break; + } + + return 0; +} + +Status type_check(PrimitiveType type) { + switch (type) { + case TYPE_INT: + case TYPE_CHAR: + case TYPE_VARCHAR: + case TYPE_NULL: + break; + default: + return Status::InvalidArgument("Type not implemented: " + type); + } + + return Status::OK(); +} + +void CollectionValue::to_collection_val(CollectionVal* val) const { + val->length = _length; + val->data = _data; + val->null_signs = _null_signs; + val->has_null = _has_null; +} + +void CollectionValue::shallow_copy(const CollectionValue* value) { + _length = value->_length; + _null_signs = value->_null_signs; + _data = value->_data; + _has_null = value->_has_null; +} + +void CollectionValue::copy_null_signs(const CollectionValue* other) { + if (other->_has_null) { + memcpy(_null_signs, other->_null_signs, other->size()); + } else { + _null_signs = nullptr; + } +} + +ArrayIterator CollectionValue::iterator(PrimitiveType children_type) const { + return ArrayIterator(children_type, this); +} + +Status CollectionValue::init_collection(ObjectPool* pool, uint32_t size, PrimitiveType child_type, + CollectionValue* val) { + if (val == nullptr) { + return Status::InvalidArgument("collection value is null"); + } + + RETURN_IF_ERROR(type_check(child_type)); + + if (size == 0) { + return Status::OK(); + } + + val->_length = size; + val->_null_signs = pool->add_array(new bool[size]{0}); + val->_data = pool->add_array(new uint8_t[size * sizeof_type(child_type)]); + + return Status::OK(); +} + +Status CollectionValue::init_collection(MemPool* pool, uint32_t size, PrimitiveType child_type, + CollectionValue* val) { + if (val == nullptr) { + return Status::InvalidArgument("collection value is null"); + } + + RETURN_IF_ERROR(type_check(child_type)); + + if (size == 0) { + return Status::OK(); + } + + val->_length = size; + val->_null_signs = (bool*)pool->allocate(size * sizeof(bool)); + memset(val->_null_signs, 0, size); + + val->_data = pool->allocate(sizeof_type(child_type) * size); + + return Status::OK(); +} + +Status CollectionValue::init_collection(FunctionContext* context, uint32_t size, + PrimitiveType child_type, CollectionValue* val) { + if (val == nullptr) { + return Status::InvalidArgument("collection value is null"); + } + + RETURN_IF_ERROR(type_check(child_type)); + + if (size == 0) { + return Status::OK(); + } + + val->_length = size; + val->_null_signs = (bool*)context->allocate(size * sizeof(bool)); + memset(val->_null_signs, 0, size); + + val->_data = context->allocate(sizeof_type(child_type) * size); + + return Status::OK(); +} + +CollectionValue CollectionValue::from_collection_val(const CollectionVal& val) { + return CollectionValue(val.data, val.length, val.null_signs); +} + +Status CollectionValue::set(uint32_t i, PrimitiveType type, const AnyVal* value) { + RETURN_IF_ERROR(type_check(type)); + + ArrayIterator iter(type, this); + if (!iter.seek(i)) { + return Status::InvalidArgument("over of collection size"); + } + + if (value->is_null) { + *(_null_signs + i) = true; + _has_null = true; + return Status::OK(); + } else { + *(_null_signs + i) = false; + } + + switch (type) { + case TYPE_INT: + *reinterpret_cast(iter.value()) = reinterpret_cast(value)->val; + break; + case TYPE_CHAR: + case TYPE_VARCHAR: { + const StringVal* src = reinterpret_cast(value); + StringValue* dest = reinterpret_cast(iter.value()); + dest->len = src->len; + dest->ptr = (char*)src->ptr; + break; + } + default: + DCHECK(false) << "Type not implemented: " << type; + return Status::InvalidArgument("Type not implemented"); + } + + return Status::OK(); +} + +/** + * ----------- Array Iterator -------- + */ +ArrayIterator::ArrayIterator(PrimitiveType children_type, const CollectionValue* data) + : _offset(0), _type(children_type), _data(data) { + _type_size = sizeof_type(children_type); +} + +void* ArrayIterator::value() { + if (is_null()) { + return nullptr; + } + return ((char*)_data->_data) + _offset * _type_size; +} + +bool ArrayIterator::is_null() { + return _data->is_null_at(_offset); +} + +void ArrayIterator::value(AnyVal* dest) { + if (is_null()) { + dest->is_null = true; + return; + } + dest->is_null = false; + switch (_type) { + case TYPE_BOOLEAN: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_TINYINT: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_SMALLINT: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_INT: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_BIGINT: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_FLOAT: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_DOUBLE: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + case TYPE_HLL: + case TYPE_CHAR: + case TYPE_VARCHAR: { + const StringValue* str_value = reinterpret_cast(value()); + reinterpret_cast(dest)->len = str_value->len; + reinterpret_cast(dest)->ptr = (uint8_t*)(str_value->ptr); + break; + } + case TYPE_DATE: + case TYPE_DATETIME: { + const DateTimeValue* date_time_value = reinterpret_cast(value()); + reinterpret_cast(dest)->packed_time = date_time_value->to_int64(); + reinterpret_cast(dest)->type = date_time_value->type(); + break; + } + + case TYPE_DECIMALV2: + reinterpret_cast(dest)->val = + reinterpret_cast(value())->value; + break; + + case TYPE_LARGEINT: + reinterpret_cast(dest)->val = + reinterpret_cast(value())->value; + break; + + case TYPE_ARRAY: + reinterpret_cast(value())->to_collection_val( + reinterpret_cast(dest)); + break; + + default: + DCHECK(false) << "bad type: " << _type; + } +} +} // namespace doris diff --git a/be/src/runtime/collection_value.h b/be/src/runtime/collection_value.h new file mode 100644 index 0000000000..ed9f4c8268 --- /dev/null +++ b/be/src/runtime/collection_value.h @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "common/object_pool.h" +#include "common/status.h" +#include "runtime/mem_pool.h" +#include "runtime/primitive_type.h" +#include "udf/udf.h" + +namespace doris { + +using doris_udf::AnyVal; + +class ArrayIterator; + +/** + * The format of array-typed slot. + * The array's sub-element type just support: + * - INT32 + * - CHAR + * - VARCHAR + * - NULL + * + * A new array need initialization memory before used + */ +struct CollectionValue { +public: + CollectionValue() = default; + + explicit CollectionValue(uint32_t length) + : _data(nullptr), _length(length), _has_null(false), _null_signs(nullptr) {} + + CollectionValue(void* data, uint32_t length) + : _data(data), _length(length), _has_null(false), _null_signs(nullptr) {} + + CollectionValue(void* data, uint32_t length, bool* null_signs) + : _data(data), _length(length), _has_null(true), _null_signs(null_signs) {} + + CollectionValue(void* data, uint32_t length, bool has_null, bool* null_signs) + : _data(data), _length(length), _has_null(has_null), _null_signs(null_signs) {} + + inline bool is_null_at(uint32_t index) const { + return this->_has_null && this->_null_signs[index]; + } + + void to_collection_val(CollectionVal* val) const; + + inline uint32_t size() const { return _length; } + + inline uint32_t length() const { return _length; } + + void shallow_copy(const CollectionValue* other); + + void copy_null_signs(const CollectionValue* other); + + ArrayIterator iterator(PrimitiveType children_type) const; + + /** + * just shallow copy sub-elment value + * For special type, will shared actual value's memory, like StringValue. + */ + Status set(uint32_t i, PrimitiveType type, const AnyVal* value); + + /** + * init collection, will alloc (children Type's size + 1) * (children Nums) memory + */ + static Status init_collection(ObjectPool* pool, uint32_t size, PrimitiveType child_type, + CollectionValue* val); + + static Status init_collection(MemPool* pool, uint32_t size, PrimitiveType child_type, + CollectionValue* val); + + static Status init_collection(FunctionContext* context, uint32_t size, PrimitiveType child_type, + CollectionValue* val); + + static CollectionValue from_collection_val(const CollectionVal& val); + const void* data() const { return _data; } + + inline bool has_null() const { return _has_null; } + inline const bool* null_signs() const { return _null_signs; } + inline void* mutable_data() { return _data; } + inline bool* mutable_null_signs() { return _null_signs; } + inline void set_length(uint32_t length) { _length = length; } + inline void set_has_null(bool has_null) { _has_null = has_null; } + inline void set_data(void* data) { _data = data; } + inline void set_null_signs(bool* null_signs) { _null_signs = null_signs; } + +public: + // child column data + void* _data; + uint32_t _length; + // item has no null value if has_null is false. + // item ```may``` has null value if has_null is true. + bool _has_null; + // null bitmap + bool* _null_signs; + + friend ArrayIterator; +}; + +/** + * Array's Iterator, support read array by special type + */ +class ArrayIterator { +private: + ArrayIterator(PrimitiveType children_type, const CollectionValue* data); + +public: + bool seek(uint32_t n) { + if (n >= _data->size()) { + return false; + } + + _offset = n; + return true; + } + + bool has_next() { return _offset < _data->size(); } + + bool next() { + if (_offset < _data->size()) { + _offset++; + return true; + } + + return false; + } + + bool is_null(); + + void* value(); + + void value(AnyVal* dest); + +private: + size_t _offset; + int _type_size; + const PrimitiveType _type; + const CollectionValue* _data; + + friend CollectionValue; +}; +} // namespace doris diff --git a/be/src/runtime/datetime_value.h b/be/src/runtime/datetime_value.h index c0ffb9798b..54bd4ba8b3 100644 --- a/be/src/runtime/datetime_value.h +++ b/be/src/runtime/datetime_value.h @@ -278,7 +278,7 @@ public: cast_to_date(); return loss_accuracy; } - + void cast_to_date() { _hour = 0; _minute = 0; @@ -453,6 +453,8 @@ public: void set_type(int type); + int type() const { return _type; } + bool is_valid_date() const { return !check_range() && !check_date() && _month > 0 && _day > 0; } private: diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp index 675eadc0e9..40299cedf6 100644 --- a/be/src/runtime/descriptors.cpp +++ b/be/src/runtime/descriptors.cpp @@ -218,6 +218,9 @@ void TupleDescriptor::add_slot(SlotDescriptor* slot) { if (slot->type().is_string_type()) { _string_slots.push_back(slot); _has_varlen_slots = true; + } else if (slot->type().is_collection_type()) { + _collection_slots.push_back(slot); + _has_varlen_slots = true; } else { _no_string_slots.push_back(slot); } @@ -520,7 +523,6 @@ Status DescriptorTbl::create(ObjectPool* pool, const TDescriptorTable& thrift_tb if (entry == (*tbl)->_tuple_desc_map.end()) { return Status::InternalError("unknown tid in slot descriptor msg"); } - entry->second->add_slot(slot_d); } diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index a85ba74db2..a98451b7ac 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -69,6 +69,8 @@ struct NullIndicatorOffset { std::ostream& operator<<(std::ostream& os, const NullIndicatorOffset& null_indicator); +class TupleDescriptor; + class SlotDescriptor { public: // virtual ~SlotDescriptor() {}; @@ -102,6 +104,7 @@ private: friend class TupleDescriptor; friend class SchemaScanner; friend class OlapTableSchemaParam; + friend class TupleDescriptor; const SlotId _id; const TypeDescriptor _type; @@ -243,6 +246,8 @@ public: const std::vector& slots() const { return _slots; } const std::vector& string_slots() const { return _string_slots; } const std::vector& no_string_slots() const { return _no_string_slots; } + const std::vector& collection_slots() const { return _collection_slots; } + bool has_varlen_slots() const { { return _has_varlen_slots; } } @@ -253,6 +258,9 @@ public: if (desc->string_slots().size() > 0) { return true; } + if (desc->collection_slots().size() > 0) { + return true; + } } return false; } @@ -282,6 +290,8 @@ private: std::vector _string_slots; // contains only materialized string slots // contains only materialized slots except string slots std::vector _no_string_slots; + // _collection_slots + std::vector _collection_slots; // Provide quick way to check if there are variable length slots. // True if _string_slots or _collection_slots have entries. @@ -289,6 +299,7 @@ private: TupleDescriptor(const TTupleDescriptor& tdesc); TupleDescriptor(const PTupleDescriptor& tdesc); + void add_slot(SlotDescriptor* slot); /// Returns slots in their physical order. diff --git a/be/src/runtime/mysql_result_writer.cpp b/be/src/runtime/mysql_result_writer.cpp index 3cad9d6ac7..81d70ca226 100644 --- a/be/src/runtime/mysql_result_writer.cpp +++ b/be/src/runtime/mysql_result_writer.cpp @@ -65,7 +65,152 @@ void MysqlResultWriter::_init_profile() { _sent_rows_counter = ADD_COUNTER(_parent_profile, "NumSentRows", TUnit::UNIT); } +int MysqlResultWriter::_add_row_value(int index, const TypeDescriptor& type, void* item) { + int buf_ret = 0; + if (item == nullptr) { + return _row_buffer->push_null(); + } + + switch (type.type) { + case TYPE_BOOLEAN: + case TYPE_TINYINT: + buf_ret = _row_buffer->push_tinyint(*static_cast(item)); + break; + + case TYPE_SMALLINT: + buf_ret = _row_buffer->push_smallint(*static_cast(item)); + break; + + case TYPE_INT: + buf_ret = _row_buffer->push_int(*static_cast(item)); + break; + + case TYPE_BIGINT: + buf_ret = _row_buffer->push_bigint(*static_cast(item)); + break; + + case TYPE_LARGEINT: { + char buf[48]; + int len = 48; + char* v = LargeIntValue::to_string(reinterpret_cast(item)->value, buf, + &len); + buf_ret = _row_buffer->push_string(v, len); + break; + } + + case TYPE_FLOAT: + buf_ret = _row_buffer->push_float(*static_cast(item)); + break; + + case TYPE_DOUBLE: + buf_ret = _row_buffer->push_double(*static_cast(item)); + break; + + case TYPE_TIME: { + double time = *static_cast(item); + std::string time_str = time_str_from_double(time); + buf_ret = _row_buffer->push_string(time_str.c_str(), time_str.size()); + break; + } + + case TYPE_DATE: + case TYPE_DATETIME: { + char buf[64]; + const DateTimeValue* time_val = (const DateTimeValue*)(item); + // TODO(zhaochun), this function has core risk + char* pos = time_val->to_string(buf); + buf_ret = _row_buffer->push_string(buf, pos - buf - 1); + break; + } + + case TYPE_HLL: + case TYPE_OBJECT: { + buf_ret = _row_buffer->push_null(); + break; + } + + case TYPE_VARCHAR: + case TYPE_CHAR: { + const StringValue* string_val = (const StringValue*)(item); + + if (string_val->ptr == NULL) { + if (string_val->len == 0) { + // 0x01 is a magic num, not useful actually, just for present "" + char* tmp_val = reinterpret_cast(0x01); + buf_ret = _row_buffer->push_string(tmp_val, string_val->len); + } else { + buf_ret = _row_buffer->push_null(); + } + } else { + buf_ret = _row_buffer->push_string(string_val->ptr, string_val->len); + } + + break; + } + + case TYPE_DECIMALV2: { + DecimalV2Value decimal_val(reinterpret_cast(item)->value); + std::string decimal_str; + int output_scale = _output_expr_ctxs[index]->root()->output_scale(); + + if (output_scale > 0 && output_scale <= 30) { + decimal_str = decimal_val.to_string(output_scale); + } else { + decimal_str = decimal_val.to_string(); + } + + buf_ret = _row_buffer->push_string(decimal_str.c_str(), decimal_str.length()); + break; + } + + case TYPE_ARRAY: { + auto children_type = type.children[0].type; + auto array_value = (const CollectionValue*)(item); + + ArrayIterator iter = array_value->iterator(children_type); + + _row_buffer->open_dynamic_mode(); + + buf_ret = _row_buffer->push_string("[", 1); + + int begin = 0; + while (iter.has_next() && !buf_ret) { + if (begin != 0) { + buf_ret = _row_buffer->push_string(", ", 2); + } + + if (children_type == TYPE_CHAR || children_type == TYPE_VARCHAR) { + buf_ret = _row_buffer->push_string("'", 1); + buf_ret = _add_row_value(index, children_type, iter.value()); + buf_ret = _row_buffer->push_string("'", 1); + } else { + buf_ret = _add_row_value(index, children_type, iter.value()); + } + + iter.next(); + begin++; + } + + if (!buf_ret) { + buf_ret = _row_buffer->push_string("]", 1); + } + + _row_buffer->close_dynamic_mode(); + break; + } + + default: + LOG(WARNING) << "can't convert this type to mysql type. type = " + << _output_expr_ctxs[index]->root()->type(); + buf_ret = -1; + break; + } + + return buf_ret; +} + Status MysqlResultWriter::_add_one_row(TupleRow* row) { + SCOPED_TIMER(_convert_tuple_timer); _row_buffer->reset(); int num_columns = _output_expr_ctxs.size(); int buf_ret = 0; @@ -73,109 +218,7 @@ Status MysqlResultWriter::_add_one_row(TupleRow* row) { for (int i = 0; 0 == buf_ret && i < num_columns; ++i) { void* item = _output_expr_ctxs[i]->get_value(row); - if (NULL == item) { - buf_ret = _row_buffer->push_null(); - continue; - } - - switch (_output_expr_ctxs[i]->root()->type().type) { - case TYPE_BOOLEAN: - case TYPE_TINYINT: - buf_ret = _row_buffer->push_tinyint(*static_cast(item)); - break; - - case TYPE_SMALLINT: - buf_ret = _row_buffer->push_smallint(*static_cast(item)); - break; - - case TYPE_INT: - buf_ret = _row_buffer->push_int(*static_cast(item)); - break; - - case TYPE_BIGINT: - buf_ret = _row_buffer->push_bigint(*static_cast(item)); - break; - - case TYPE_LARGEINT: { - char buf[48]; - int len = 48; - char* v = LargeIntValue::to_string(reinterpret_cast(item)->value, - buf, &len); - buf_ret = _row_buffer->push_string(v, len); - break; - } - - case TYPE_FLOAT: - buf_ret = _row_buffer->push_float(*static_cast(item)); - break; - - case TYPE_DOUBLE: - buf_ret = _row_buffer->push_double(*static_cast(item)); - break; - - case TYPE_TIME: { - double time = *static_cast(item); - std::string time_str = time_str_from_double(time); - buf_ret = _row_buffer->push_string(time_str.c_str(), time_str.size()); - break; - } - - case TYPE_DATE: - case TYPE_DATETIME: { - char buf[64]; - const DateTimeValue* time_val = (const DateTimeValue*)(item); - // TODO(zhaochun), this function has core risk - char* pos = time_val->to_string(buf); - buf_ret = _row_buffer->push_string(buf, pos - buf - 1); - break; - } - - case TYPE_HLL: - case TYPE_OBJECT: { - buf_ret = _row_buffer->push_null(); - break; - } - - case TYPE_VARCHAR: - case TYPE_CHAR: { - const StringValue* string_val = (const StringValue*)(item); - - if (string_val->ptr == NULL) { - if (string_val->len == 0) { - // 0x01 is a magic num, not useful actually, just for present "" - char* tmp_val = reinterpret_cast(0x01); - buf_ret = _row_buffer->push_string(tmp_val, string_val->len); - } else { - buf_ret = _row_buffer->push_null(); - } - } else { - buf_ret = _row_buffer->push_string(string_val->ptr, string_val->len); - } - - break; - } - - case TYPE_DECIMALV2: { - DecimalV2Value decimal_val(reinterpret_cast(item)->value); - std::string decimal_str; - int output_scale = _output_expr_ctxs[i]->root()->output_scale(); - - if (output_scale > 0 && output_scale <= 30) { - decimal_str = decimal_val.to_string(output_scale); - } else { - decimal_str = decimal_val.to_string(); - } - - buf_ret = _row_buffer->push_string(decimal_str.c_str(), decimal_str.length()); - break; - } - - default: - LOG(WARNING) << "can't convert this type to mysql type. type = " - << _output_expr_ctxs[i]->root()->type(); - buf_ret = -1; - break; - } + buf_ret = _add_row_value(i, _output_expr_ctxs[i]->root()->type(), item); } if (0 != buf_ret) { diff --git a/be/src/runtime/mysql_result_writer.h b/be/src/runtime/mysql_result_writer.h index a993e966ef..8d8c215310 100644 --- a/be/src/runtime/mysql_result_writer.h +++ b/be/src/runtime/mysql_result_writer.h @@ -47,6 +47,7 @@ private: void _init_profile(); // convert one tuple row Status _add_one_row(TupleRow* row); + int _add_row_value(int index, const TypeDescriptor& type, void* item); private: BufferControlBlock* _sinker; diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 66a7620d83..320e58ba0f 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -20,6 +20,7 @@ #include #include "gen_cpp/Types_types.h" +#include "runtime/collection_value.h" namespace doris { //to_tcolumn_type_thrift only test @@ -92,6 +93,9 @@ PrimitiveType thrift_to_type(TPrimitiveType::type ttype) { case TPrimitiveType::OBJECT: return TYPE_OBJECT; + case TPrimitiveType::ARRAY: + return TYPE_ARRAY; + default: return INVALID_TYPE; } @@ -156,6 +160,9 @@ TPrimitiveType::type to_thrift(PrimitiveType ptype) { case TYPE_OBJECT: return TPrimitiveType::OBJECT; + case TYPE_ARRAY: + return TPrimitiveType::ARRAY; + default: return TPrimitiveType::INVALID_TYPE; } @@ -220,6 +227,9 @@ std::string type_to_string(PrimitiveType t) { case TYPE_OBJECT: return "OBJECT"; + case TYPE_ARRAY: + return "ARRAY"; + default: return ""; }; @@ -320,4 +330,49 @@ TTypeDesc gen_type_desc(const TPrimitiveType::type val, const std::string& name) return type_desc; } +int get_slot_size(PrimitiveType type) { + switch (type) { + case TYPE_OBJECT: + case TYPE_HLL: + case TYPE_CHAR: + case TYPE_VARCHAR: + return sizeof(StringValue); + case TYPE_ARRAY: + return sizeof(CollectionValue); + + case TYPE_NULL: + case TYPE_BOOLEAN: + case TYPE_TINYINT: + return 1; + + case TYPE_SMALLINT: + return 2; + + case TYPE_INT: + case TYPE_FLOAT: + return 4; + + case TYPE_BIGINT: + case TYPE_DOUBLE: + return 8; + + case TYPE_LARGEINT: + return sizeof(__int128); + + case TYPE_DATE: + case TYPE_DATETIME: + // This is the size of the slot, the actual size of the data is 12. + return 16; + + case TYPE_DECIMALV2: + return 16; + + case INVALID_TYPE: + default: + DCHECK(false); + } + + return 0; +} + } // namespace doris diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index 19b8eaf46c..ebd12b5602 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -69,9 +69,9 @@ inline bool is_enumeration_type(PrimitiveType type) { case TYPE_DATETIME: case TYPE_DECIMALV2: case TYPE_BOOLEAN: + case TYPE_ARRAY: case TYPE_HLL: return false; - case TYPE_TINYINT: case TYPE_SMALLINT: case TYPE_INT: @@ -102,6 +102,7 @@ inline int get_byte_size(PrimitiveType type) { case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: + case TYPE_ARRAY: return 0; case TYPE_NULL: @@ -140,6 +141,7 @@ inline int get_real_byte_size(PrimitiveType type) { case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: + case TYPE_ARRAY: return 0; case TYPE_NULL: @@ -175,48 +177,7 @@ inline int get_real_byte_size(PrimitiveType type) { return 0; } // Returns the byte size of type when in a tuple -inline int get_slot_size(PrimitiveType type) { - switch (type) { - case TYPE_OBJECT: - case TYPE_HLL: - case TYPE_CHAR: - case TYPE_VARCHAR: - return sizeof(StringValue); - - case TYPE_NULL: - case TYPE_BOOLEAN: - case TYPE_TINYINT: - return 1; - - case TYPE_SMALLINT: - return 2; - - case TYPE_INT: - case TYPE_FLOAT: - return 4; - - case TYPE_BIGINT: - case TYPE_DOUBLE: - return 8; - - case TYPE_LARGEINT: - return sizeof(__int128); - - case TYPE_DATE: - case TYPE_DATETIME: - // This is the size of the slot, the actual size of the data is 12. - return 16; - - case TYPE_DECIMALV2: - return 16; - - case INVALID_TYPE: - default: - DCHECK(false); - } - - return 0; -} +int get_slot_size(PrimitiveType type); inline bool is_type_compatible(PrimitiveType lhs, PrimitiveType rhs) { if (lhs == TYPE_VARCHAR) { diff --git a/be/src/runtime/raw_value.cpp b/be/src/runtime/raw_value.cpp index 280fd301b7..d83e9e5752 100644 --- a/be/src/runtime/raw_value.cpp +++ b/be/src/runtime/raw_value.cpp @@ -20,6 +20,7 @@ #include #include "olap/utils.h" +#include "runtime/collection_value.h" #include "runtime/string_value.hpp" #include "runtime/tuple.h" #include "util/types.h" @@ -165,6 +166,21 @@ void RawValue::print_value(const void* value, const TypeDescriptor& type, int sc *stream << reinterpret_cast(value)->value; break; + case TYPE_ARRAY: { + const CollectionValue* src = reinterpret_cast(value); + auto children_type = type.children.at(0); + auto iter = src->iterator(children_type.type); + *stream << "["; + print_value(iter.value(), children_type, scale, stream); + iter.next(); + for (; iter.has_next(); iter.next()) { + *stream << ", "; + print_value(iter.value(), children_type, scale, stream); + } + *stream << "]"; + break; + } + default: DCHECK(false) << "bad RawValue::print_value() type: " << type; } @@ -292,7 +308,33 @@ void RawValue::write(const void* value, void* dst, const TypeDescriptor& type, M break; } + case TYPE_ARRAY: { + DCHECK_EQ(type.children.size(), 1); + const CollectionValue* src = reinterpret_cast(value); + CollectionValue* val = reinterpret_cast(dst); + + if (pool != NULL) { + auto children_type = type.children.at(0).type; + CollectionValue::init_collection(pool, src->size(), children_type, val); + ArrayIterator src_iter = src->iterator(children_type); + ArrayIterator val_iter = val->iterator(children_type); + + val->copy_null_signs(src); + + while (src_iter.has_next() && val_iter.has_next()) { + if (!src_iter.is_null()) { + // write children + write(src_iter.value(), val_iter.value(), children_type, pool); + } + src_iter.next(); + val_iter.next(); + } + } else { + val->shallow_copy(src); + } + break; + } default: DCHECK(false) << "RawValue::write(): bad type: " << type; } diff --git a/be/src/runtime/result_writer.h b/be/src/runtime/result_writer.h index 418e7b644e..477d79c7e1 100644 --- a/be/src/runtime/result_writer.h +++ b/be/src/runtime/result_writer.h @@ -25,6 +25,7 @@ namespace doris { class Status; class RowBatch; class RuntimeState; +class TypeDescriptor; // abstract class of the result writer class ResultWriter { diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp index f0238fae35..a1958508ba 100644 --- a/be/src/runtime/row_batch.cpp +++ b/be/src/runtime/row_batch.cpp @@ -28,6 +28,7 @@ //#include "runtime/mem_tracker.h" #include "gen_cpp/Data_types.h" #include "gen_cpp/data.pb.h" +#include "runtime/collection_value.h" #include "util/debug_util.h" using std::vector; @@ -126,7 +127,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch, TupleRow* row = get_row(i); std::vector::const_iterator desc = tuple_descs.begin(); for (int j = 0; desc != tuple_descs.end(); ++desc, ++j) { - if ((*desc)->string_slots().empty()) { + if ((*desc)->string_slots().empty() && (*desc)->collection_slots().empty()) { continue; } Tuple* tuple = row->get_tuple(j); @@ -146,6 +147,42 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch, // length. So we make the high bits zero here. string_val->len &= 0x7FFFFFFFL; } + + // copy collection slots + vector::const_iterator slot_collection = + (*desc)->collection_slots().begin(); + for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) { + DCHECK((*slot_collection)->type().is_collection_type()); + + CollectionValue* array_val = + tuple->get_collection_slot((*slot_collection)->tuple_offset()); + + // assgin data and null_sign pointer position in tuple_data + int data_offset = reinterpret_cast(array_val->data()); + array_val->set_data(reinterpret_cast(tuple_data + data_offset)); + int null_offset = reinterpret_cast(array_val->null_signs()); + array_val->set_null_signs(reinterpret_cast(tuple_data + null_offset)); + + const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0); + if (!item_type.is_string_type()) { + continue; + } + + // copy every string item + for (int i = 0; i < array_val->length(); ++i) { + if (array_val->is_null_at(i)) { + continue; + } + + StringValue* dst_item_v = reinterpret_cast( + (uint8_t*)array_val->data() + i * item_type.get_slot_size()); + + if (dst_item_v->len != 0) { + int offset = reinterpret_cast(dst_item_v->ptr); + dst_item_v->ptr = reinterpret_cast(tuple_data + offset); + } + } + } } } } @@ -221,7 +258,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const TRowBatch& input_batch, TupleRow* row = get_row(i); std::vector::const_iterator desc = tuple_descs.begin(); for (int j = 0; desc != tuple_descs.end(); ++desc, ++j) { - if ((*desc)->string_slots().empty()) { + if ((*desc)->string_slots().empty() && (*desc)->collection_slots().empty()) { continue; } @@ -244,6 +281,40 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const TRowBatch& input_batch, // length. So we make the high bits zero here. string_val->len &= 0x7FFFFFFFL; } + + // copy collection slot + vector::const_iterator slot_collection = + (*desc)->collection_slots().begin(); + for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) { + DCHECK((*slot_collection)->type().is_collection_type()); + CollectionValue* array_val = + tuple->get_collection_slot((*slot_collection)->tuple_offset()); + + int offset = reinterpret_cast(array_val->data()); + array_val->set_data(reinterpret_cast(tuple_data + offset)); + int null_offset = reinterpret_cast(array_val->null_signs()); + array_val->set_null_signs(reinterpret_cast(tuple_data + null_offset)); + + const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0); + if (!item_type.is_string_type()) { + continue; + } + + // copy string item + for (int i = 0; i < array_val->length(); ++i) { + if (array_val->is_null_at(i)) { + continue; + } + + StringValue* dst_item_v = reinterpret_cast( + (uint8_t*)array_val->data() + i * item_type.get_slot_size()); + + if (dst_item_v->len != 0) { + int offset = reinterpret_cast(dst_item_v->ptr); + dst_item_v->ptr = reinterpret_cast(tuple_data + offset); + } + } + } } } } @@ -606,6 +677,37 @@ int RowBatch::total_byte_size() { StringValue* string_val = tuple->get_string_slot((*slot)->tuple_offset()); result += string_val->len; } + + // compute slot collection size + vector::const_iterator slot_collection = + (*desc)->collection_slots().begin(); + for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) { + DCHECK((*slot_collection)->type().is_collection_type()); + if (tuple->is_null((*slot_collection)->null_indicator_offset())) { + continue; + } + // compute data null_signs size + CollectionValue* array_val = + tuple->get_collection_slot((*slot_collection)->tuple_offset()); + result += array_val->length() * sizeof(bool); + + const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0); + result += array_val->length() * item_type.get_slot_size(); + + if (!item_type.is_string_type()) { + continue; + } + + // compute string type item size + for (int i = 0; i < array_val->length(); ++i) { + if (array_val->is_null_at(i)) { + continue; + } + StringValue* dst_item_v = reinterpret_cast( + (uint8_t*)array_val->data() + i * item_type.get_slot_size()); + result += dst_item_v->len; + } + } } } diff --git a/be/src/runtime/tuple.cpp b/be/src/runtime/tuple.cpp index 19a8f0cd3a..0cd778114c 100644 --- a/be/src/runtime/tuple.cpp +++ b/be/src/runtime/tuple.cpp @@ -17,10 +17,15 @@ #include "runtime/tuple.h" +#include +#include +#include +#include #include #include "exprs/expr.h" #include "exprs/expr_context.h" +#include "runtime/collection_value.h" #include "runtime/descriptors.h" #include "runtime/mem_pool.h" #include "runtime/raw_value.h" @@ -78,6 +83,59 @@ void Tuple::deep_copy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool, bo } } } + + // copy collection slot + for (auto slot_desc : desc.collection_slots()) { + DCHECK(slot_desc->type().is_collection_type()); + if (dst->is_null(slot_desc->null_indicator_offset())) { + continue; + } + + // copy collection item + CollectionValue* cv = dst->get_collection_slot(slot_desc->tuple_offset()); + + const TypeDescriptor& item_type = slot_desc->type().children.at(0); + + int coll_byte_size = cv->length() * item_type.get_slot_size(); + int nulls_size = cv->length() * sizeof(bool); + + int offset = pool->total_allocated_bytes(); + char* coll_data = reinterpret_cast(pool->allocate(coll_byte_size + nulls_size)); + + // copy data and null_signs + if (nulls_size > 0) { + cv->set_has_null(true); + cv->set_null_signs(reinterpret_cast(coll_data) + coll_byte_size); + memory_copy(coll_data, cv->null_signs(), nulls_size); + } else { + cv->set_has_null(false); + } + memory_copy(coll_data + nulls_size, cv->data(), coll_byte_size); + + // assgin new null_sign and data location + cv->set_null_signs(convert_ptrs ? reinterpret_cast(offset) + : reinterpret_cast(coll_data)); + cv->set_data(convert_ptrs ? reinterpret_cast(offset + nulls_size) + : coll_data + nulls_size); + + if (!item_type.is_string_type()) { + continue; + } + // when itemtype is string, copy every string item + for (int i = 0; i < cv->length(); ++i) { + int item_offset = nulls_size + i * item_type.get_slot_size(); + if (cv->is_null_at(i)) { + continue; + } + StringValue* dst_item_v = reinterpret_cast(coll_data + item_offset); + if (dst_item_v->len != 0) { + int offset = pool->total_allocated_bytes(); + char* string_copy = reinterpret_cast(pool->allocate(dst_item_v->len)); + memory_copy(string_copy, dst_item_v->ptr, dst_item_v->len); + dst_item_v->ptr = (convert_ptrs ? reinterpret_cast(offset) : string_copy); + } + } + } } Tuple* Tuple::dcopy_with_new(const TupleDescriptor& desc, MemPool* pool, int64_t* bytes) { @@ -137,6 +195,61 @@ void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int* offset, boo *offset += string_v->len; } } + + // copy collection slots + for (auto slot_desc : desc.collection_slots()) { + DCHECK(slot_desc->type().is_collection_type()); + if (dst->is_null(slot_desc->null_indicator_offset())) { + continue; + } + // get cv to copy elements + CollectionValue* cv = dst->get_collection_slot(slot_desc->tuple_offset()); + const TypeDescriptor& item_type = slot_desc->type().children.at(0); + + int coll_byte_size = cv->length() * item_type.get_slot_size(); + int nulls_size = cv->length() * sizeof(bool); + + // copy null_sign + memory_copy(*data, cv->null_signs(), nulls_size); + // copy data + memory_copy(*data + nulls_size, cv->data(), coll_byte_size); + + if (!item_type.is_string_type()) { + cv->set_null_signs(convert_ptrs ? reinterpret_cast(*offset) + : reinterpret_cast(*data)); + cv->set_data(convert_ptrs ? reinterpret_cast(*offset + nulls_size) + : *data + nulls_size); + *data += coll_byte_size + nulls_size; + *offset += coll_byte_size + nulls_size; + continue; + } + + // when item is string type, copy every item + char* base_data = *data; + int base_offset = *offset; + + *data += coll_byte_size + nulls_size; + *offset += coll_byte_size + nulls_size; + + for (int i = 0; i < cv->length(); ++i) { + int item_offset = nulls_size + i * item_type.get_slot_size(); + if (cv->is_null_at(i)) { + continue; + } + StringValue* dst_item_v = reinterpret_cast(base_data + item_offset); + if (dst_item_v->len != 0) { + memory_copy(*data, dst_item_v->ptr, dst_item_v->len); + dst_item_v->ptr = (convert_ptrs ? reinterpret_cast(*offset) : *data); + *data += dst_item_v->len; + *offset += dst_item_v->len; + } + } + // assgin new null_sign and data location + cv->set_null_signs(convert_ptrs ? reinterpret_cast(base_offset) + : reinterpret_cast(base_data)); + cv->set_data(convert_ptrs ? reinterpret_cast(base_offset + nulls_size) + : base_data + nulls_size); + } } template @@ -166,6 +279,8 @@ void Tuple::materialize_exprs(TupleRow* row, const TupleDescriptor& desc, (expr_type == TYPE_HLL)); } else if ((slot_type == TYPE_DATE) || (slot_type == TYPE_DATETIME)) { DCHECK((expr_type == TYPE_DATE) || (expr_type == TYPE_DATETIME)); + } else if (slot_type == TYPE_ARRAY) { + DCHECK((expr_type == TYPE_ARRAY)); } else { DCHECK(slot_type == TYPE_NULL || slot_type == expr_type); } diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h index b0b0f7b765..f08d2378bd 100644 --- a/be/src/runtime/tuple.h +++ b/be/src/runtime/tuple.h @@ -27,6 +27,7 @@ namespace doris { struct StringValue; +struct CollectionValue; class TupleDescriptor; class DateTimeValue; class TupleRow; @@ -154,6 +155,17 @@ public: return reinterpret_cast(reinterpret_cast(this) + offset); } + CollectionValue* get_collection_slot(int offset) { + DCHECK(offset != -1); // -1 offset indicates non-materialized slot + return reinterpret_cast(reinterpret_cast(this) + offset); + } + + const CollectionValue* get_collection_slot(int offset) const { + DCHECK(offset != -1); // -1 offset indicates non-materialized slot + return reinterpret_cast(reinterpret_cast(this) + + offset); + } + DateTimeValue* get_datetime_slot(int offset) { DCHECK(offset != -1); // -1 offset indicates non-materialized slot return reinterpret_cast(reinterpret_cast(this) + offset); diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index 3ebd2644b8..c39a161b5c 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -43,6 +43,14 @@ TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) } break; } + case TTypeNodeType::ARRAY: { + DCHECK(!node.__isset.scalar_type); + DCHECK_LT(*idx, types.size() - 1); + type = TYPE_ARRAY; + ++(*idx); + children.push_back(TypeDescriptor(types, idx)); + break; + } #if 0 // Don't support now case TTypeNodeType::STRUCT: type = TYPE_STRUCT; @@ -112,7 +120,8 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const { } void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const { - DCHECK(!is_complex_type()) << "Don't support complex type now, type=" << type; + DCHECK(!is_complex_type() || type == TYPE_ARRAY) + << "Don't support complex type now, type=" << type; auto node = ptype->add_types(); node->set_type(TTypeNodeType::SCALAR); auto scalar_type = node->mutable_scalar_type(); @@ -124,6 +133,11 @@ void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const { DCHECK_NE(scale, -1); scalar_type->set_precision(precision); scalar_type->set_scale(scale); + } else if (type == TYPE_ARRAY) { + node->set_type(TTypeNodeType::ARRAY); + for (const TypeDescriptor& child : children) { + child.to_protobuf(ptype); + } } } @@ -149,6 +163,12 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField #include +#include // This is the only Doris header required to develop UDFs and UDAs. This header // contains the types that need to be used and the FunctionContext object. The context @@ -44,6 +45,7 @@ struct StringVal; struct DateTimeVal; struct DecimalV2Val; struct HllVal; +struct CollectionVal; // The FunctionContext is passed to every UDF/UDA and is the interface for the UDF to the // rest of the system. It contains APIs to examine the system state, report errors @@ -74,7 +76,8 @@ public: TYPE_STRING, TYPE_FIXED_BUFFER, TYPE_DECIMALV2, - TYPE_OBJECT + TYPE_OBJECT, + TYPE_ARRAY }; struct TypeDesc { @@ -86,6 +89,9 @@ public: /// Only valid if type == TYPE_FIXED_BUFFER || type == TYPE_VARCHAR int len; + + // only vaild if type == TYPE_ARRAY + std::vector children; }; struct UniqueId { @@ -718,6 +724,26 @@ struct HllVal : public StringVal { void agg_merge(const HllVal& other); }; +struct CollectionVal : public AnyVal { + void* data; + uint32_t length; + // item has no null value if has_null is false. + // item ```may``` has null value if has_null is true. + bool has_null; + // null bitmap + bool* null_signs; + + CollectionVal() = default; + + CollectionVal(void* data, uint32_t length, bool has_null, bool* null_signs) + : data(data), length(length), has_null(has_null), null_signs(null_signs){}; + + static CollectionVal null() { + CollectionVal val; + val.is_null = true; + return val; + } +}; typedef uint8_t* BufferVal; } // namespace doris_udf @@ -734,5 +760,6 @@ using doris_udf::DecimalV2Val; using doris_udf::DateTimeVal; using doris_udf::HllVal; using doris_udf::FunctionContext; +using doris_udf::CollectionVal; #endif diff --git a/be/src/util/mysql_row_buffer.cpp b/be/src/util/mysql_row_buffer.cpp index 51b2bcee85..8b90259c8c 100644 --- a/be/src/util/mysql_row_buffer.cpp +++ b/be/src/util/mysql_row_buffer.cpp @@ -17,6 +17,7 @@ #include "util/mysql_row_buffer.h" +#include #include #include @@ -24,8 +25,6 @@ #include "gutil/strings/numbers.h" #include "util/mysql_global.h" -#include - namespace doris { // the first byte: @@ -58,7 +57,11 @@ static char* pack_vlen(char* packet, uint64_t length) { return packet + 8; } MysqlRowBuffer::MysqlRowBuffer() - : _pos(_default_buf), _buf(_default_buf), _buf_size(sizeof(_default_buf)) {} + : _pos(_default_buf), + _buf(_default_buf), + _buf_size(sizeof(_default_buf)), + _dynamic_mode(0), + _len_pos(nullptr) {} MysqlRowBuffer::~MysqlRowBuffer() { if (_buf != _default_buf) { @@ -66,6 +69,25 @@ MysqlRowBuffer::~MysqlRowBuffer() { } } +void MysqlRowBuffer::open_dynamic_mode() { + if (!_dynamic_mode) { + *_pos++ = 254; + // write length when dynamic mode close + _len_pos = _pos; + _pos = _pos + 8; + } + _dynamic_mode++; +} + +void MysqlRowBuffer::close_dynamic_mode() { + _dynamic_mode--; + + if (!_dynamic_mode) { + int8store(_len_pos, _pos - _len_pos - 8); + _len_pos = nullptr; + } +} + int MysqlRowBuffer::reserve(int size) { if (size < 0) { LOG(ERROR) << "alloc memory failed. size = " << size; @@ -99,15 +121,29 @@ int MysqlRowBuffer::reserve(int size) { return 0; } -template -static char* add_int(T data, char* pos) -{ +template +static char* add_int(T data, char* pos, bool dynamic_mode) { auto fi = fmt::format_int(data); int length = fi.size(); - int1store(pos++, length); + if (!dynamic_mode) { + int1store(pos++, length); + } memcpy(pos, fi.data(), length); return pos + length; } +template +static char* add_float(T data, char* pos, bool dynamic_mode) { + int length = 0; + if constexpr (std::is_same_v) { + length = FloatToBuffer(data, MAX_FLOAT_STR_LENGTH + 2, pos + !dynamic_mode); + } else if constexpr (std::is_same_v) { + length = DoubleToBuffer(data, MAX_DOUBLE_STR_LENGTH + 2, pos + !dynamic_mode); + } + if (!dynamic_mode) { + int1store(pos++, length); + } + return pos + length; +} int MysqlRowBuffer::push_tinyint(int8_t data) { // 1 for string trail, 1 for length, 1 for sign, other for digits @@ -118,7 +154,7 @@ int MysqlRowBuffer::push_tinyint(int8_t data) { return ret; } - _pos = add_int(data, _pos); + _pos = add_int(data, _pos, _dynamic_mode); return 0; } @@ -131,7 +167,7 @@ int MysqlRowBuffer::push_smallint(int16_t data) { return ret; } - _pos = add_int(data, _pos); + _pos = add_int(data, _pos, _dynamic_mode); return 0; } @@ -144,7 +180,7 @@ int MysqlRowBuffer::push_int(int32_t data) { return ret; } - _pos = add_int(data, _pos); + _pos = add_int(data, _pos, _dynamic_mode); return 0; } @@ -157,7 +193,7 @@ int MysqlRowBuffer::push_bigint(int64_t data) { return ret; } - _pos = add_int(data, _pos); + _pos = add_int(data, _pos, _dynamic_mode); return 0; } @@ -166,19 +202,11 @@ int MysqlRowBuffer::push_unsigned_bigint(uint64_t data) { int ret = reserve(4 + MAX_BIGINT_WIDTH); if (0 != ret) { - LOG(ERROR) << "mysql row buffer reserve failed."; + LOG(ERROR) << "mysql row buffer reserver failed."; return ret; } - int length = snprintf(_pos + 1, MAX_BIGINT_WIDTH + 3, "%ld", data); - - if (length < 0) { - LOG(ERROR) << "snprintf failed. data = " << data; - return length; - } - - int1store(_pos, length); - _pos += length + 1; + _pos = add_int(data, _pos, _dynamic_mode); return 0; } @@ -191,15 +219,7 @@ int MysqlRowBuffer::push_float(float data) { return ret; } - int length = FloatToBuffer(data, MAX_FLOAT_STR_LENGTH + 2, _pos + 1); - - if (length < 0) { - LOG(ERROR) << "gcvt float failed. data = " << data; - return length; - } - - int1store(_pos, length); - _pos += length + 1; + _pos = add_float(data, _pos, _dynamic_mode); return 0; } @@ -212,15 +232,7 @@ int MysqlRowBuffer::push_double(double data) { return ret; } - int length = DoubleToBuffer(data, MAX_DOUBLE_STR_LENGTH + 2, _pos + 1); - - if (length < 0) { - LOG(ERROR) << "gcvt double failed. data = " << data; - return length; - } - - int1store(_pos, length); - _pos += length + 1; + _pos = add_float(data, _pos, _dynamic_mode); return 0; } @@ -238,13 +250,20 @@ int MysqlRowBuffer::push_string(const char* str, int length) { return ret; } - _pos = pack_vlen(_pos, length); + if (!_dynamic_mode) { + _pos = pack_vlen(_pos, length); + } memcpy(_pos, str, length); _pos += length; return 0; } int MysqlRowBuffer::push_null() { + if (_dynamic_mode) { + // dynamic mode not write + return 0; + } + int ret = reserve(1); if (0 != ret) { @@ -272,5 +291,3 @@ char* MysqlRowBuffer::reserved(int size) { } } // namespace doris - -/* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/be/src/util/mysql_row_buffer.h b/be/src/util/mysql_row_buffer.h index b0dad9b9a0..ddea35e159 100644 --- a/be/src/util/mysql_row_buffer.h +++ b/be/src/util/mysql_row_buffer.h @@ -22,8 +22,32 @@ namespace doris { -// helper for construct MySQL send row +/** // Now only support text protocol + * helper for construct MySQL send row + * The MYSQL protocal: + * + * | flag | (length) | value | flag | (length) | value | ...... + * <--------A column--------><--------A column--------><-.....-> + * + * The flag means value's length or null value: + * If value is NULL, flag is 251 + * If value's length < 251, flag is the value's length + * If 251 <= value's length < 65536, flag is 252 and the next two byte is length + * If 65536 <= value's length < 16777216 , flag is 253 and the next three byte is length + * If 16777216 <= value's length, flag is 254 and the next eighth byte is length + * + * the code example: + * mrb.push_null(); + * mrb.push_tinyint(5); + * mrb.push_int(120); + * mrb.push_string("...my length is 65536..."); + * + * the protocol contents: + * + * 251-1-'5'-3-'120'-253-65536-"...my length is 65536..." + * + */ class MysqlRowBuffer { public: MysqlRowBuffer(); @@ -50,6 +74,40 @@ public: const char* pos() const { return _pos; } int length() const { return _pos - _buf; } + /** + * Why? + * Because the Nested-Type's data need pushed multiple times, but mysql protocol don't + * support nested type and each push will decide a column data. + * + * How? + * Dynamic mode allow we push data in a column multiple times, and allow recursive push. + * We will think that the length of the next column is uncertain when open dynamic + * mode, so we will set the flag to 254(longest flag) and skip 8 bytes which used for + * record length, then compute the actual data length when close dynamic mode. + * In a recursive call(special for nested type), the mode will open multiple times, but + * the data is actually written in one column, so we only need to deal it at the beginning + * and at the end. + * + * the code: + * mrb.push_tinyint(5); + * mrb.push_smallint(120); + * mrb.push_int(-30000); + * + * In normal mode, the buffer contains three column: + * 1-'5'-3-'120'-6-'-30000' + * + * Same code in dynamic mode, the buffer contains a column: + * 254-48-'5'-'120'-'-30000' + * + * NOTE: The open_dynamic_mode() and close_dynamic_mode() need appear in pairs + */ + void open_dynamic_mode(); + + /** + * NOTE: The open_dynamic_mode() and close_dynamic_mode() need appear in pairs + */ + void close_dynamic_mode(); + private: int reserve(int size); @@ -57,10 +115,11 @@ private: char* _buf; int _buf_size; char _default_buf[4096]; + + int _dynamic_mode; + char* _len_pos; }; } // namespace doris #endif // DORIS_BE_SRC_QUERY_MYSQL_MYSQL_ROW_BUFFER_H - -/* vim: set ts=4 sw=4 sts=4 tw=100 noet: */ diff --git a/be/test/exec/CMakeLists.txt b/be/test/exec/CMakeLists.txt index a35c51f42c..c31b917515 100644 --- a/be/test/exec/CMakeLists.txt +++ b/be/test/exec/CMakeLists.txt @@ -58,7 +58,7 @@ ADD_BE_TEST(es_predicate_test) ADD_BE_TEST(es_query_builder_test) ADD_BE_TEST(es_scan_reader_test) #ADD_BE_TEST(schema_scan_node_test) -ADD_BE_TEST(unix_odbc_test) +#ADD_BE_TEST(unix_odbc_test) #ADD_BE_TEST(schema_scanner_test) ##ADD_BE_TEST(set_executor_test) #ADD_BE_TEST(schema_scanner/schema_authors_scanner_test) diff --git a/be/test/exprs/CMakeLists.txt b/be/test/exprs/CMakeLists.txt index b24cd171c2..26e73f01ce 100644 --- a/be/test/exprs/CMakeLists.txt +++ b/be/test/exprs/CMakeLists.txt @@ -37,3 +37,5 @@ ADD_BE_TEST(math_functions_test) ADD_BE_TEST(topn_function_test) ADD_BE_TEST(runtime_filter_test) ADD_BE_TEST(bloom_filter_predicate_test) +ADD_BE_TEST(array_functions_test) + diff --git a/be/test/exprs/array_functions_test.cpp b/be/test/exprs/array_functions_test.cpp new file mode 100644 index 0000000000..8e410e22bf --- /dev/null +++ b/be/test/exprs/array_functions_test.cpp @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/array_functions.h" + +#include + +#include "gmock/gmock.h" +#include "runtime/collection_value.h" +#include "runtime/free_pool.hpp" +#include "string" +#include "testutil/function_utils.h" +#include "udf/udf.h" +#include "udf/udf_internal.h" + +#define private public + +namespace doris { + +class ArrayFunctionsTest : public testing::Test { +public: + ArrayFunctionsTest() { + _utils = new FunctionUtils(); + _context = _utils->get_fn_ctx(); + } + ~ArrayFunctionsTest() { delete _utils; } + +public: + FunctionUtils* _utils; + FunctionContext* _context; +}; + +TEST_F(ArrayFunctionsTest, array) { + // Int array + { + FunctionContext::TypeDesc childTypeDesc; + childTypeDesc.type = FunctionContext::TYPE_INT; + + _context->impl()->_return_type.type = FunctionContext::TYPE_ARRAY; + _context->impl()->_return_type.children.clear(); + _context->impl()->_return_type.children.push_back(childTypeDesc); + + IntVal v[10]; + + for (int i = 0; i < 10; ++i) { + v[i].val = i + 1; + } + + CollectionVal cv = ArrayFunctions::array(_context, 10, v); + + CollectionValue value = CollectionValue::from_collection_val(cv); + + int i = 0; + for (auto&& iter = value.iterator(TYPE_INT); iter.has_next(); iter.next()) { + i++; + IntVal a; + iter.value(&a); + EXPECT_EQ(i, a.val); + } + } +} + +} // namespace doris + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/olap/column_vector_test.cpp b/be/test/olap/column_vector_test.cpp index 39e6b020cf..476f6ceb4c 100644 --- a/be/test/olap/column_vector_test.cpp +++ b/be/test/olap/column_vector_test.cpp @@ -19,10 +19,10 @@ #include -#include "olap/collection.h" #include "olap/field.h" #include "olap/tablet_schema_helper.h" #include "olap/types.cpp" +#include "runtime/collection_value.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" @@ -72,7 +72,8 @@ void test_read_write_scalar_column_vector(const TypeInfo* type_info, const uint8 } template -void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, size_t array_size, Collection* result) { +void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, size_t array_size, + CollectionValue* result) { DCHECK(array_size > 1); using ItemType = typename TypeTraits::CppType; @@ -85,7 +86,8 @@ void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, s size_t array_init_size = array_size / 2; std::unique_ptr cvb; - ASSERT_TRUE(ColumnVectorBatch::create(array_init_size, true, array_type_info, field, &cvb).ok()); + ASSERT_TRUE( + ColumnVectorBatch::create(array_init_size, true, array_type_info, field, &cvb).ok()); auto* array_cvb = reinterpret_cast(cvb.get()); ColumnVectorBatch* item_cvb = array_cvb->elements(); @@ -93,7 +95,8 @@ void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, s // first write for (size_t i = 0; i < array_init_size; ++i) { - memcpy(offset_cvb->mutable_cell_ptr(1 + i), &(result[i].length), sizeof(segment_v2::ordinal_t)); + uint32_t len = result[i].length(); + memcpy(offset_cvb->mutable_cell_ptr(1 + i), &len, sizeof(uint32_t)); } array_cvb->set_null_bits(0, array_init_size, false); array_cvb->get_offset_by_length(0, array_init_size); @@ -101,7 +104,8 @@ void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, s size_t first_write_item = array_cvb->item_offset(array_init_size) - array_cvb->item_offset(0); ASSERT_TRUE(item_cvb->resize(first_write_item).ok()); for (size_t i = 0; i < array_init_size; ++i) { - memcpy(item_cvb->mutable_cell_ptr(array_cvb->item_offset(i)), result[i].data, result[i].length * ITEM_TYPE_SIZE); + memcpy(item_cvb->mutable_cell_ptr(array_cvb->item_offset(i)), result[i].data(), + result[i].length() * ITEM_TYPE_SIZE); } item_cvb->set_null_bits(0, first_write_item, false); @@ -110,7 +114,8 @@ void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, s // second write ASSERT_TRUE(array_cvb->resize(array_size).ok()); for (int i = array_init_size; i < array_size; ++i) { - memcpy(offset_cvb->mutable_cell_ptr(i + 1), &(result[i].length), sizeof(segment_v2::ordinal_t)); + uint32_t len = result[i].length(); + memcpy(offset_cvb->mutable_cell_ptr(i + 1), &len, sizeof(uint32_t)); } array_cvb->set_null_bits(array_init_size, array_size - array_init_size, false); array_cvb->get_offset_by_length(array_init_size, array_size - array_init_size); @@ -119,14 +124,16 @@ void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, s ASSERT_TRUE(item_cvb->resize(total_item_size).ok()); for (size_t i = array_init_size; i < array_size; ++i) { - memcpy(item_cvb->mutable_cell_ptr(array_cvb->item_offset(i)), result[i].data, result[i].length * ITEM_TYPE_SIZE); + memcpy(item_cvb->mutable_cell_ptr(array_cvb->item_offset(i)), result[i].data(), + result[i].length() * ITEM_TYPE_SIZE); } size_t second_write_item = total_item_size - first_write_item; item_cvb->set_null_bits(first_write_item, second_write_item, false); array_cvb->prepare_for_read(0, array_size, false); for (size_t idx = 0; idx < array_size; ++idx) { - ASSERT_TRUE(array_type_info->equal(&result[idx], array_cvb->cell_ptr(idx))) << "idx:" << idx; + ASSERT_TRUE(array_type_info->equal(&result[idx], array_cvb->cell_ptr(idx))) + << "idx:" << idx; } delete field; } @@ -158,7 +165,7 @@ TEST_F(ColumnVectorTest, array_column_vector_test) { size_t num_array = 1024; size_t num_item = num_array * 3; { - auto* array_val = new Collection[num_array]; + auto* array_val = new CollectionValue[num_array]; bool null_signs[3] = {false, false, false}; auto* item_val = new uint8_t[num_item]; @@ -167,14 +174,15 @@ TEST_F(ColumnVectorTest, array_column_vector_test) { item_val[i] = i; if (i % 3 == 0) { size_t array_index = i / 3; - array_val[array_index].data = &item_val[i]; - array_val[array_index].null_signs = null_signs; - array_val[array_index].length = 3; + array_val[array_index].set_data(&item_val[i]); + array_val[array_index].set_null_signs(null_signs); + array_val[array_index].set_length(3); } } auto type_info = reinterpret_cast( ArrayTypeInfoResolver::instance()->get_type_info(OLAP_FIELD_TYPE_TINYINT)); - test_read_write_array_column_vector(type_info, num_array, array_val); + test_read_write_array_column_vector(type_info, num_array, + array_val); delete[] array_val; delete[] item_val; diff --git a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp index 23a2d3936e..4f0990c998 100644 --- a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp +++ b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp @@ -31,8 +31,8 @@ #include "olap/types.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" -#include "util/file_utils.h" #include "test_util/test_util.h" +#include "util/file_utils.h" using std::string; @@ -221,9 +221,9 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, } template -void test_array_nullable_data(Collection* src_data, uint8_t* src_is_null, int num_rows, +void test_array_nullable_data(CollectionValue* src_data, uint8_t* src_is_null, int num_rows, std::string test_name) { - Collection* src = src_data; + CollectionValue* src = src_data; ColumnMetaPB meta; TabletColumn list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY); int32 item_length = 0; @@ -375,7 +375,7 @@ TEST_F(ColumnReaderWriterTest, test_array_type) { size_t num_item = num_array * 3; uint8_t* array_is_null = new uint8_t[BitmapSize(num_array)]; - Collection* array_val = new Collection[num_array]; + CollectionValue* array_val = new CollectionValue[num_array]; bool* item_is_null = new bool[num_item]; uint8_t* item_val = new uint8_t[num_item]; for (int i = 0; i < num_item; ++i) { @@ -388,9 +388,9 @@ TEST_F(ColumnReaderWriterTest, test_array_type) { if (is_null) { continue; } - array_val[array_index].data = &item_val[i]; - array_val[array_index].null_signs = &item_is_null[i]; - array_val[array_index].length = 3; + array_val[array_index].set_data(&item_val[i]); + array_val[array_index].set_null_signs(&item_is_null[i]); + array_val[array_index].set_length(3); } } test_array_nullable_data( @@ -400,7 +400,7 @@ TEST_F(ColumnReaderWriterTest, test_array_type) { delete[] item_val; delete[] item_is_null; - array_val = new Collection[num_array]; + array_val = new CollectionValue[num_array]; Slice* varchar_vals = new Slice[3]; item_is_null = new bool[3]; for (int i = 0; i < 3; ++i) { @@ -415,9 +415,9 @@ TEST_F(ColumnReaderWriterTest, test_array_type) { if (is_null) { continue; } - array_val[i].data = varchar_vals; - array_val[i].null_signs = item_is_null; - array_val[i].length = 3; + array_val[i].set_data(varchar_vals); + array_val[i].set_null_signs(item_is_null); + array_val[i].set_length(3); } test_array_nullable_data( array_val, array_is_null, num_array, "null_array_chars"); diff --git a/be/test/olap/storage_types_test.cpp b/be/test/olap/storage_types_test.cpp index d85e6fc8a8..78819cc92a 100644 --- a/be/test/olap/storage_types_test.cpp +++ b/be/test/olap/storage_types_test.cpp @@ -146,7 +146,7 @@ TEST(TypesTest, copy_and_equal) { } template -void common_test_array(Collection src_val) { +void common_test_array(CollectionValue src_val) { TabletColumn list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY); int32 item_length = 0; if (item_type == OLAP_FIELD_TYPE_CHAR || item_type == OLAP_FIELD_TYPE_VARCHAR) { @@ -160,7 +160,7 @@ void common_test_array(Collection src_val) { ASSERT_EQ(item_type, array_type->item_type_info()->type()); { // test deep copy - Collection dst_val; + CollectionValue dst_val; auto tracker = std::make_shared(); MemPool pool(tracker.get()); array_type->deep_copy((char*)&dst_val, (char*)&src_val, &pool); @@ -170,7 +170,7 @@ void common_test_array(Collection src_val) { { // test direct copy bool null_signs[50]; uint8_t data[50]; - Collection dst_val(data, sizeof(null_signs), null_signs); + CollectionValue dst_val(data, sizeof(null_signs), null_signs); array_type->direct_copy((char*)&dst_val, (char*)&src_val); ASSERT_TRUE(array_type->equal((char*)&src_val, (char*)&dst_val)); ASSERT_EQ(0, array_type->cmp((char*)&src_val, (char*)&dst_val)); @@ -180,45 +180,45 @@ void common_test_array(Collection src_val) { TEST(ArrayTypeTest, copy_and_equal) { bool bool_array[3] = {true, false, true}; bool null_signs[3] = {true, true, true}; - common_test_array(Collection(bool_array, 3, null_signs)); + common_test_array(CollectionValue(bool_array, 3, null_signs)); uint8_t tiny_int_array[3] = {3, 4, 5}; - common_test_array(Collection(tiny_int_array, 3, null_signs)); + common_test_array(CollectionValue(tiny_int_array, 3, null_signs)); int16_t small_int_array[3] = {123, 234, 345}; - common_test_array(Collection(small_int_array, 3, null_signs)); + common_test_array(CollectionValue(small_int_array, 3, null_signs)); int32_t int_array[3] = {-123454321, 123454321, 323412343}; - common_test_array(Collection(int_array, 3, null_signs)); + common_test_array(CollectionValue(int_array, 3, null_signs)); uint32_t uint_array[3] = {123454321, 2342341, 52435234}; - common_test_array(Collection(uint_array, 3, null_signs)); + common_test_array(CollectionValue(uint_array, 3, null_signs)); int64_t bigint_array[3] = {123454321123456789L, 23534543234L, -123454321123456789L}; - common_test_array(Collection(bigint_array, 3, null_signs)); + common_test_array(CollectionValue(bigint_array, 3, null_signs)); __int128 large_int_array[3] = {1234567899L, 1234567899L, -12345631899L}; - common_test_array(Collection(large_int_array, 3, null_signs)); + common_test_array(CollectionValue(large_int_array, 3, null_signs)); float float_array[3] = {1.11, 2.22, -3.33}; - common_test_array(Collection(float_array, 3, null_signs)); + common_test_array(CollectionValue(float_array, 3, null_signs)); double double_array[3] = {12221.11, 12221.11, -12221.11}; - common_test_array(Collection(double_array, 3, null_signs)); + common_test_array(CollectionValue(double_array, 3, null_signs)); decimal12_t decimal_array[3] = {{123, 234}, {345, 453}, {4524, 2123}}; - common_test_array(Collection(decimal_array, 3, null_signs)); + common_test_array(CollectionValue(decimal_array, 3, null_signs)); uint24_t date_array[3] = {(1988 << 9) | (2 << 5) | 1, (1998 << 9) | (2 << 5) | 1, (2008 << 9) | (2 << 5) | 1}; - common_test_array(Collection(date_array, 3, null_signs)); + common_test_array(CollectionValue(date_array, 3, null_signs)); int64_t datetime_array[3] = {19880201010203L, 19980201010203L, 20080204010203L}; - common_test_array(Collection(datetime_array, 3, null_signs)); + common_test_array(CollectionValue(datetime_array, 3, null_signs)); Slice char_array[3] = {"12345abcde", "12345abcde", "asdf322"}; - common_test_array(Collection(char_array, 3, null_signs)); - common_test_array(Collection(char_array, 3, null_signs)); + common_test_array(CollectionValue(char_array, 3, null_signs)); + common_test_array(CollectionValue(char_array, 3, null_signs)); } } // namespace doris diff --git a/be/test/runtime/CMakeLists.txt b/be/test/runtime/CMakeLists.txt index 90087d2055..fc3e37aa38 100644 --- a/be/test/runtime/CMakeLists.txt +++ b/be/test/runtime/CMakeLists.txt @@ -62,3 +62,4 @@ ADD_BE_TEST(external_scan_context_mgr_test) ADD_BE_TEST(memory/chunk_allocator_test) ADD_BE_TEST(memory/system_allocator_test) ADD_BE_TEST(cache/partition_cache_test) +ADD_BE_TEST(collection_value_test) diff --git a/be/test/runtime/collection_value_test.cpp b/be/test/runtime/collection_value_test.cpp new file mode 100644 index 0000000000..b9825b984e --- /dev/null +++ b/be/test/runtime/collection_value_test.cpp @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime/collection_value.h" + +#include + +#include "string" +#include "util/bitmap.h" + +#define private public + +namespace doris { + +TEST(CollectionValueTest, init) { + { + CollectionValue cv; + + ObjectPool pool; + EXPECT_TRUE(CollectionValue::init_collection(&pool, 10, TYPE_INT, &cv).ok()); + + EXPECT_EQ(10, cv.size()); + + for (int j = 0; j < 10; ++j) { + EXPECT_FALSE(*(cv._null_signs + j)); + } + + EXPECT_FALSE(CollectionValue::init_collection(&pool, 10, TYPE_INT, nullptr).ok()); + + CollectionValue cv_null; + bzero(&cv_null, sizeof(cv_null)); + EXPECT_TRUE(CollectionValue::init_collection(&pool, 0, TYPE_INT, &cv_null).ok()); + EXPECT_EQ(0, cv_null.size()); + } + + { + CollectionValue cv; + ObjectPool pool; + EXPECT_TRUE(CollectionValue::init_collection(&pool, 10, TYPE_INT, &cv).ok()); + } +} + +TEST(CollectionValueTest, set) { + CollectionValue cv; + ObjectPool pool; + EXPECT_TRUE(CollectionValue::init_collection(&pool, 10, TYPE_INT, &cv).ok()); + + // normal + { + IntVal v0 = IntVal::null(); + cv.set(0, TYPE_INT, &v0); + for (int j = 1; j < cv.size(); ++j) { + IntVal i(j + 10); + ASSERT_TRUE(cv.set(j, TYPE_INT, &i).ok()); + } + } + + { + auto iter = cv.iterator(TYPE_INT); + IntVal v0; + iter.value(&v0); + ASSERT_TRUE(v0.is_null); + ASSERT_TRUE(iter.is_null()); + iter.next(); + for (int k = 1; k < cv.size(); ++k, iter.next()) { + IntVal v; + iter.value(&v); + EXPECT_EQ(k + 10, v.val); + } + } + + // over size + { + IntVal intv(20); + ASSERT_FALSE(cv.set(10, TYPE_INT, &intv).ok()); + } +} +} // namespace doris + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/be/test/util/CMakeLists.txt b/be/test/util/CMakeLists.txt index d6bab33fe6..74374cdafe 100644 --- a/be/test/util/CMakeLists.txt +++ b/be/test/util/CMakeLists.txt @@ -63,6 +63,7 @@ ADD_BE_TEST(monotime_test) ADD_BE_TEST(scoped_cleanup_test) ADD_BE_TEST(thread_test) ADD_BE_TEST(threadpool_test) +ADD_BE_TEST(mysql_row_buffer_test) ADD_BE_TEST(trace_test) ADD_BE_TEST(easy_json-test) ADD_BE_TEST(http_channel_test) diff --git a/be/test/util/mysql_row_buffer_test.cpp b/be/test/util/mysql_row_buffer_test.cpp new file mode 100644 index 0000000000..43c9355355 --- /dev/null +++ b/be/test/util/mysql_row_buffer_test.cpp @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/mysql_row_buffer.h" + +#include +#include +#include + +#include + +#include "env/env.h" +#include "gutil/strings/util.h" +#include "util/logging.h" + +namespace doris { + +using namespace strings; + +TEST(MysqlRowBufferTest, basic) { + MysqlRowBuffer mrb; + + std::string s("test"); + mrb.push_tinyint(5); + mrb.push_smallint(120); + mrb.push_int(-30000); + mrb.push_bigint(900000); + mrb.push_unsigned_bigint(90000000); + mrb.push_float(56.45); + mrb.push_double(10.12); + mrb.push_string(s.c_str(), 4); + mrb.push_null(); + + const char* buf = mrb.buf(); + + // mem: size-data-size-data + // 1-'5'-3-'120'-6-'-30000'-6-'900000'-8-'90000000'-5-'56.45'-5-'10.12'-4-'test'-251 + // 1b-1b-1b-3b--1b-----6b--1b----6b---1b-----8b----1b---5b---1b---5b---1b---4b---1b + // 0 1 2 3 6 7 13 14 20 21 29 30 35 36 41 42 46 + EXPECT_EQ(47, mrb.length()); + + EXPECT_EQ(1, *((int8_t*)(buf))); + EXPECT_EQ(0, strncmp(buf + 1, "5", 1)); + + EXPECT_EQ(3, *((int8_t*)(buf + 2))); + EXPECT_EQ(0, strncmp(buf + 3, "120", 3)); + + EXPECT_EQ(6, *((int8_t*)(buf + 6))); + EXPECT_EQ(0, strncmp(buf + 7, "-30000", 6)); + + EXPECT_EQ(6, *((int8_t*)(buf + 13))); + EXPECT_EQ(0, strncmp(buf + 14, "900000", 6)); + + EXPECT_EQ(8, *((int8_t*)(buf + 20))); + EXPECT_EQ(0, strncmp(buf + 21, "90000000", 8)); + + EXPECT_EQ(5, *((int8_t*)(buf + 29))); + EXPECT_EQ(0, strncmp(buf + 30, "56.45", 5)); + + EXPECT_EQ(5, *((int8_t*)(buf + 35))); + EXPECT_EQ(0, strncmp(buf + 36, "10.12", 5)); + + EXPECT_EQ(4, *((int8_t*)(buf + 41))); + EXPECT_EQ(0, strncmp(buf + 42, "test", 4)); + + EXPECT_EQ(251, *((uint8_t*)(buf + 46))); +} + +TEST(MysqlRowBufferTest, dynamic_mode) { + MysqlRowBuffer mrb; + + mrb.open_dynamic_mode(); + + std::string s("test"); + mrb.push_tinyint(5); + mrb.push_smallint(120); + mrb.push_int(-30000); + mrb.push_bigint(900000); + mrb.push_unsigned_bigint(90000000); + mrb.push_float(56.45); + mrb.push_double(10.12); + mrb.push_string(s.c_str(), 4); + mrb.push_null(); + + mrb.close_dynamic_mode(); + + const char* buf = mrb.buf(); + + // mem: size-data-data + // 254-48-'5'-'120'-'-30000'-'900000'-'90000000'-'56.45'-'10.12'-'test'-'' + // 1b--8b-1b----3b-----6b-------6b--------8b-------5b------5b------4b---0b + // 0 1 9 10 13 19 25 33 38 43 47 + EXPECT_EQ(47, mrb.length()); + + EXPECT_EQ(254, *((uint8_t*)(buf))); + EXPECT_EQ(38, *((int64_t*)(buf + 1))); + + EXPECT_EQ(0, strncmp(buf + 9, "5", 1)); + EXPECT_EQ(0, strncmp(buf + 10, "120", 3)); + EXPECT_EQ(0, strncmp(buf + 13, "-30000", 6)); + EXPECT_EQ(0, strncmp(buf + 19, "900000", 6)); + EXPECT_EQ(0, strncmp(buf + 25, "90000000", 8)); + EXPECT_EQ(0, strncmp(buf + 33, "56.45", 5)); + EXPECT_EQ(0, strncmp(buf + 38, "10.12", 5)); + EXPECT_EQ(0, strncmp(buf + 43, "test", 4)); +} + +} // namespace doris + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/fe/fe-core/AlterRoutineLoadOperationLogTest b/fe/fe-core/AlterRoutineLoadOperationLogTest new file mode 100644 index 0000000000000000000000000000000000000000..ae3953e57df97846669fee9c3d828d9fdc1c9340 GIT binary patch literal 478 zcmZQzV7yh$rJ$gcm7nC9qGY9DXkcKV0~H7;$}dPQD#=VORlP%>5GQc$P`YfnimNes>}ElN(sW=ctAK`Katx1*c4 zqY_Aif`U?ZVp?`$Kw?ozW=Up#o_|_eacT+3=4dVjFo3uK#8gVlEGjNhvQhv$6(pjd zpj4cioSz5tf&o+y*fAglSTq|#>;=gpG#eQh7{E1y4T*&)Q%Wu^F3HdJ20IcnU=$RT z(u?v-3-mHUPEg9N1oJ@Q0Syra1*OblsHfcWi$XGU!NHoAm{XhzwE-LePDT0IsYO1S xVE-w>!Uk+@NPaz~&PdEl&q&M!*;~t13jnnxf5HF& literal 0 HcmV?d00001 diff --git a/fe/fe-core/diskInfoTest b/fe/fe-core/diskInfoTest new file mode 100644 index 0000000000000000000000000000000000000000..866d0394bc96ca2dd132d8286fb140f8c668b583 GIT binary patch literal 158 zcmZQzV3<|SrJ$fxl%HP`kXVwTWTl{_pORUeZK$LJ5--UwNz8FhEJ#ewEU9!-vQjWK zFgG?gF|;r;0n4W(mL!H2r>3APF|jZ;H-pG$7H2z_C1&O%Cgr4}DzdaRH8eCcGB<}R VEiOqcNd?*O@8{#`=c>e23jm(xD~kXC literal 0 HcmV?d00001 diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 8e509aec61..c454336457 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -34,6 +34,10 @@ import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; +import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.MapType; +import org.apache.doris.catalog.StructField; +import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.View; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Version; @@ -63,6 +67,7 @@ parser code {: private boolean reportExpectedToken(Integer tokenId) { if (SqlScanner.isKeyword(tokenId) || tokenId.intValue() == SqlParserSymbols.COMMA || + tokenId.intValue() == SqlParserSymbols.DOT || tokenId.intValue() == SqlParserSymbols.IDENT) { return true; } else { @@ -228,10 +233,10 @@ parser code {: :}; // Total keywords of doris -terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_AND, KW_ANTI, KW_APPEND, KW_AS, KW_ASC, KW_AUTHORS, +terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_AND, KW_ANTI, KW_APPEND, KW_AS, KW_ASC, KW_AUTHORS, KW_ARRAY, KW_BACKEND, KW_BACKUP, KW_BETWEEN, KW_BEGIN, KW_BIGINT, KW_BITMAP, KW_BITMAP_UNION, KW_BOOLEAN, KW_BROKER, KW_BACKENDS, KW_BY, KW_BUILTIN, KW_CANCEL, KW_CASE, KW_CAST, KW_CHAIN, KW_CHAR, KW_CHARSET, KW_CHECK, KW_CLUSTER, KW_CLUSTERS, - KW_COLLATE, KW_COLLATION, KW_COLUMN, KW_COLUMNS, KW_COMMENT, KW_COMMIT, KW_COMMITTED, + KW_COLLATE, KW_COLLATION, KW_COLUMN, KW_COLON, KW_COLUMNS, KW_COMMENT, KW_COMMIT, KW_COMMITTED, KW_CONFIG, KW_CONNECTION, KW_CONNECTION_ID, KW_CONSISTENT, KW_CONVERT, KW_COUNT, KW_CREATE, KW_CROSS, KW_CUBE, KW_CURRENT, KW_CURRENT_USER, KW_DATA, KW_DATABASE, KW_DATABASES, KW_DATE, KW_DATETIME, KW_DAY, KW_DECIMAL, KW_DECOMMISSION, KW_DEFAULT, KW_DESC, KW_DESCRIBE, KW_DELETE, KW_DISTINCT, KW_DISTINCTPC, KW_DISTINCTPCSA, KW_DISTRIBUTED, KW_DISTRIBUTION, KW_DYNAMIC, KW_BUCKETS, KW_DIV, KW_DOUBLE, KW_DROP, KW_DROPP, KW_DUPLICATE, @@ -246,8 +251,8 @@ terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_A KW_KEY, KW_KEYS, KW_KILL, KW_LABEL, KW_LARGEINT, KW_LAST, KW_LEFT, KW_LESS, KW_LEVEL, KW_LIKE, KW_LIMIT, KW_LINK, KW_LIST, KW_LOAD, KW_LOCAL, KW_LOCATION, - KW_MATERIALIZED, KW_MAX, KW_MAX_VALUE, KW_MERGE, KW_MIN, KW_MINUTE, KW_MINUS, KW_MIGRATE, KW_MIGRATIONS, KW_MODIFY, KW_MONTH, - KW_NAME, KW_NAMES, KW_NEGATIVE, KW_NO, KW_NOT, KW_NULL, KW_NULLS, + KW_MAP, KW_MATERIALIZED, KW_MAX, KW_MAX_VALUE, KW_MERGE, KW_MIN, KW_MINUTE, KW_MINUS, KW_MIGRATE, KW_MIGRATIONS, KW_MODIFY, KW_MONTH, + KW_NAME, KW_NAMED_STRUCT, KW_NAMES, KW_NEGATIVE, KW_NO, KW_NOT, KW_NULL, KW_NULLS, KW_OBSERVER, KW_OFFSET, KW_ON, KW_ONLY, KW_OPEN, KW_OR, KW_ORDER, KW_OUTER, KW_OUTFILE, KW_OVER, KW_PARTITION, KW_PARTITIONS, KW_PASSWORD, KW_PATH, KW_PAUSE, KW_PIPE, KW_PRECEDING, KW_PLUGIN, KW_PLUGINS, @@ -257,7 +262,7 @@ terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_A KW_REPAIR, KW_REPEATABLE, KW_REPOSITORY, KW_REPOSITORIES, KW_REPLACE, KW_REPLACE_IF_NOT_NULL, KW_REPLICA, KW_RESOURCE, KW_RESOURCES, KW_RESTORE, KW_RETURNS, KW_RESUME, KW_REVOKE, KW_RIGHT, KW_ROLE, KW_ROLES, KW_ROLLBACK, KW_ROLLUP, KW_ROUTINE, KW_ROW, KW_ROWS, KW_S3, KW_SCHEMA, KW_SCHEMAS, KW_SECOND, KW_SELECT, KW_SEMI, KW_SERIALIZABLE, KW_SESSION, KW_SET, KW_SETS, KW_SET_VAR, KW_SHOW, KW_SIGNED, - KW_SMALLINT, KW_SNAPSHOT, KW_SONAME, KW_SPLIT, KW_START, KW_STATUS, KW_STOP, KW_STORAGE, KW_STREAM, KW_STRING, + KW_SMALLINT, KW_SNAPSHOT, KW_SONAME, KW_SPLIT, KW_START, KW_STATUS, KW_STOP, KW_STORAGE, KW_STREAM, KW_STRING, KW_STRUCT, KW_SUM, KW_SUPERUSER, KW_SYNC, KW_SYSTEM, KW_TABLE, KW_TABLES, KW_TABLET, KW_TASK, KW_TEMPORARY, KW_TERMINATED, KW_THAN, KW_TIME, KW_THEN, KW_TIMESTAMP, KW_TINYINT, KW_TO, KW_TRANSACTION, KW_TRIGGERS, KW_TRIM, KW_TRUE, KW_TRUNCATE, KW_TYPE, KW_TYPES, @@ -266,7 +271,7 @@ terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_A KW_WARNINGS, KW_WEEK, KW_WHEN, KW_WHITELIST, KW_WHERE, KW_WITH, KW_WORK, KW_WRITE, KW_YEAR; -terminal COMMA, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON, LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT; +terminal COMMA, COLON, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON, LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT; terminal BITAND, BITOR, BITXOR, BITNOT; terminal EQUAL, NOT, LESSTHAN, GREATERTHAN, SET_VAR; terminal COMMENTED_PLAN_HINT_START, COMMENTED_PLAN_HINT_END; @@ -367,11 +372,14 @@ nonterminal LiteralExpr literal; nonterminal CaseExpr case_expr; nonterminal ArrayList case_when_clause_list; nonterminal FunctionParams function_params; -nonterminal Expr function_call_expr; +nonterminal Expr function_call_expr, array_expr; +nonterminal StructField struct_field; +nonterminal ArrayList struct_field_list; nonterminal AnalyticWindow opt_window_clause; nonterminal AnalyticWindow.Type window_type; nonterminal AnalyticWindow.Boundary window_boundary; nonterminal SlotRef column_ref; +nonterminal FunctionCallExpr column_subscript; nonterminal ArrayList table_ref_list, base_table_ref_list; nonterminal FromClause from_clause; nonterminal TableRef table_ref; @@ -4087,6 +4095,12 @@ type ::= :} | KW_VARCHAR {: RESULT = ScalarType.createVarcharType(-1); :} + | KW_ARRAY LESSTHAN type:value_type GREATERTHAN + {: RESULT = new ArrayType(value_type); :} + | KW_MAP LESSTHAN type:key_type COMMA type:value_type GREATERTHAN + {: RESULT = new MapType(key_type,value_type); :} + | KW_STRUCT LESSTHAN struct_field_list:fields GREATERTHAN + {: RESULT = new StructType(fields); :} | KW_CHAR LPAREN INTEGER_LITERAL:len RPAREN {: ScalarType type = ScalarType.createCharType(len.intValue()); type.setAssignedStrLenInColDefinition(); @@ -4237,6 +4251,34 @@ function_call_expr ::= :} ; +array_expr ::= + KW_ARRAY LPAREN function_params:params RPAREN + {: + RESULT = new FunctionCallExpr("array", params); + :} + | KW_ARRAY LPAREN RPAREN + {: + RESULT = new ArrayLiteral(); + :} + ; + +struct_field ::= + ident:name COLON type:type + {: RESULT = new StructField(name, type); :} + ; + +struct_field_list ::= + struct_field:field + {: + RESULT = Lists.newArrayList(field); + :} + | struct_field_list:fields COMMA struct_field:field + {: + fields.add(field); + RESULT = fields; + :} + ; + exists_predicate ::= KW_EXISTS subquery:s {: RESULT = new ExistsPredicate(s, false); :} @@ -4255,6 +4297,8 @@ non_pred_expr ::= :} | literal:l {: RESULT = l; :} + | array_expr:a + {: RESULT = a; :} | function_call_expr:e {: RESULT = e; :} | KW_DATE STRING_LITERAL:l @@ -4283,6 +4327,8 @@ non_pred_expr ::= {: RESULT = c; :} | column_ref:c {: RESULT = c; :} + | column_subscript:c + {: RESULT = c; :} | timestamp_arithmetic_expr:e {: RESULT = e; :} | arithmetic_expr:e @@ -4632,6 +4678,15 @@ column_ref ::= {: RESULT = new SlotRef(new TableName(db, tbl), col); :} ; +column_subscript ::= + expr:e LBRACKET expr:index RBRACKET + {: ArrayList list = new ArrayList(); + list.add(e); + list.add(index); + RESULT = new FunctionCallExpr("%element_extract%", list); + :} + ; + privilege_type ::= ident:name {: @@ -4806,6 +4861,8 @@ keyword ::= {: RESULT = id; :} | KW_AUTHORS:id {: RESULT = id; :} + | KW_ARRAY:id + {: RESULT = id; :} | KW_BACKUP:id {: RESULT = id; :} | KW_BEGIN:id @@ -5086,6 +5143,8 @@ keyword ::= {: RESULT = id; :} | KW_FEATURE:id {: RESULT = id; :} + | KW_MAP:id + {: RESULT = id; :} ; // Identifier that contain keyword diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java new file mode 100644 index 0000000000..74cb323cc5 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang.StringUtils; +import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.Type; +import org.apache.doris.thrift.TExprNode; +import org.apache.doris.thrift.TExprNodeType; + +public class ArrayLiteral extends LiteralExpr { + + public ArrayLiteral() { + this.type = new ArrayType(Type.NULL); + children = new ArrayList<>(); + } + + public ArrayLiteral(LiteralExpr... v) { + if (v.length < 1) { + this.type = new ArrayType(Type.NULL); + return; + } + + this.type = new ArrayType(v[0].type); + children = new ArrayList<>(v.length); + children.addAll(Arrays.asList(v)); + } + + protected ArrayLiteral(ArrayLiteral other) { + super(other); + } + + @Override + public boolean isMinValue() { + return false; + } + + @Override + public int compareLiteral(LiteralExpr expr) { + return 0; + } + + @Override + protected String toSqlImpl() { + List list = new ArrayList<>(children.size()); + children.forEach(v -> list.add(v.toSqlImpl())); + + return "ARRAY(" + StringUtils.join(list, ", ") + ")"; + } + + @Override + public String getStringValue() { + List list = new ArrayList<>(children.size()); + children.forEach(v -> list.add(((LiteralExpr) v).getStringValue())); + + return "ARRAY[" + StringUtils.join(list, ", ") + "]"; + } + + @Override + protected void toThrift(TExprNode msg) { + msg.node_type = TExprNodeType.ARRAY_LITERAL; + msg.setChildType(((ArrayType) type).getItemType().getPrimitiveType().toThrift()); + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + out.writeInt(children.size()); + for (Expr e : children) { + Expr.writeTo(e, out); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + int size = in.readInt(); + children = new ArrayList<>(size); + for (int i = 0; i < size; i++) { + children.add(Expr.readIn(in)); + } + } + + public static ArrayLiteral read(DataInput in) throws IOException { + ArrayLiteral literal = new ArrayLiteral(); + literal.readFields(in); + return literal; + } + + @Override + public Expr clone() { + return new ArrayLiteral(this); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java index fc455db174..97f31840bf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java @@ -200,12 +200,30 @@ public class ColumnDef { } if (type.getPrimitiveType() == PrimitiveType.BITMAP) { - if (defaultValue.isSet) { + if (defaultValue.isSet && defaultValue != DefaultValue.NULL_DEFAULT_VALUE) { throw new AnalysisException("Bitmap type column can not set default value"); } defaultValue = DefaultValue.BITMAP_EMPTY_DEFAULT_VALUE; } + if (type.getPrimitiveType() == PrimitiveType.ARRAY) { + if (defaultValue.isSet && defaultValue != DefaultValue.NULL_DEFAULT_VALUE) { + throw new AnalysisException("Array type column default value only support null"); + } + } + + if (type.getPrimitiveType() == PrimitiveType.MAP) { + if (defaultValue.isSet && defaultValue != DefaultValue.NULL_DEFAULT_VALUE) { + throw new AnalysisException("Map type column default value just support null"); + } + } + + if (type.getPrimitiveType() == PrimitiveType.STRUCT) { + if (defaultValue.isSet && defaultValue != DefaultValue.NULL_DEFAULT_VALUE) { + throw new AnalysisException("Struct type column default value just support null"); + } + } + // If aggregate type is REPLACE_IF_NOT_NULL, we set it nullable. // If default value is not set, we set it NULL if (aggregateType == AggregateType.REPLACE_IF_NOT_NULL) { @@ -267,6 +285,12 @@ public class ColumnDef { break; case BITMAP: break; + case ARRAY: + break; + case MAP: + break; + case STRUCT: + break; case BOOLEAN: BoolLiteral boolLiteral = new BoolLiteral(defaultValue); break; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java index d20c75a0b5..78fb1550e3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java @@ -18,6 +18,7 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.AggregateType; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Index; @@ -350,6 +351,21 @@ public class CreateTableStmt extends DdlStmt { for (ColumnDef columnDef : columnDefs) { columnDef.analyze(engineName.equals("olap")); + if (columnDef.getType().isArrayType()) { + ArrayType tp = (ArrayType) columnDef.getType(); + if (!tp.getItemType().getPrimitiveType().isIntegerType() && + !tp.getItemType().getPrimitiveType().isCharFamily()) { + throw new AnalysisException("Array column just support INT/VARCHAR sub-type"); + } + if (columnDef.getAggregateType() != null && columnDef.getAggregateType() != AggregateType.NONE) { + throw new AnalysisException("Array column can't support aggregation " + columnDef.getAggregateType()); + } + if (columnDef.isKey()) { + throw new AnalysisException("Array can only be used in the non-key column of" + + " the duplicate table at present."); + } + } + if (columnDef.getType().isHllType()) { hasHll = true; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index cafd88fc54..1b3dabae9f 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -1582,7 +1582,8 @@ abstract public class Expr extends TreeNode implements ParseNode, Cloneabl DATE_LITERAL(9), MAX_LITERAL(10), BINARY_PREDICATE(11), - FUNCTION_CALL(12); + FUNCTION_CALL(12), + ARRAY_LITERAL(13); private static Map codeMap = Maps.newHashMap(); @@ -1630,6 +1631,8 @@ abstract public class Expr extends TreeNode implements ParseNode, Cloneabl output.writeInt(ExprSerCode.BINARY_PREDICATE.getCode()); } else if (expr instanceof FunctionCallExpr) { output.writeInt(ExprSerCode.FUNCTION_CALL.getCode()); + } else if (expr instanceof ArrayLiteral) { + output.writeInt(ExprSerCode.ARRAY_LITERAL.getCode()); } else { throw new IOException("Unknown class " + expr.getClass().getName()); } @@ -1671,6 +1674,8 @@ abstract public class Expr extends TreeNode implements ParseNode, Cloneabl return BinaryPredicate.read(in); case FUNCTION_CALL: return FunctionCallExpr.read(in); + case ARRAY_LITERAL: + return ArrayLiteral.read(in); default: throw new IOException("Unknown code: " + code); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java index 017802eb23..55f0f64565 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java @@ -19,12 +19,14 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Function; +import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.VariableMgr; import org.apache.doris.rewrite.FEFunction; +import org.apache.doris.rewrite.FEFunctionList; import org.apache.doris.rewrite.FEFunctions; import com.google.common.base.Joiner; @@ -96,7 +98,7 @@ public enum ExpressionFunctions { argTypes.add((ScalarType) type); } FEFunctionSignature signature = new FEFunctionSignature(fn.functionName(), - argTypes.toArray(new ScalarType[argTypes.size()]), (ScalarType) fn.getReturnType()); + argTypes.toArray(new ScalarType[argTypes.size()]), fn.getReturnType()); FEFunctionInvoker invoker = getFunction(signature); if (invoker != null) { try { @@ -149,22 +151,32 @@ public enum ExpressionFunctions { new ImmutableMultimap.Builder(); Class clazz = FEFunctions.class; for (Method method : clazz.getDeclaredMethods()) { - FEFunction annotation = method.getAnnotation(FEFunction.class); - if (annotation != null) { - String name = annotation.name(); - ScalarType returnType = ScalarType.createType(annotation.returnType()); - List argTypes = new ArrayList<>(); - for (String type : annotation.argTypes()) { - argTypes.add(ScalarType.createType(type)); + FEFunctionList annotationList = method.getAnnotation(FEFunctionList.class); + if (annotationList != null) { + for (FEFunction f : annotationList.value()) { + registerFEFunction(mapBuilder, method, f); } - FEFunctionSignature signature = new FEFunctionSignature(name, - argTypes.toArray(new ScalarType[argTypes.size()]), returnType); - mapBuilder.put(name, new FEFunctionInvoker(method, signature)); } + registerFEFunction(mapBuilder, method, method.getAnnotation(FEFunction.class)); } this.functions = mapBuilder.build(); } + private void registerFEFunction(ImmutableMultimap.Builder mapBuilder, + Method method, FEFunction annotation) { + if (annotation != null) { + String name = annotation.name(); + Type returnType = Type.fromPrimitiveType(PrimitiveType.valueOf(annotation.returnType())); + List argTypes = new ArrayList<>(); + for (String type : annotation.argTypes()) { + argTypes.add(ScalarType.createType(type)); + } + FEFunctionSignature signature = new FEFunctionSignature(name, + argTypes.toArray(new ScalarType[argTypes.size()]), returnType); + mapBuilder.put(name, new FEFunctionInvoker(method, signature)); + } + } + public static class FEFunctionInvoker { private final Method method; private final FEFunctionSignature signature; @@ -239,6 +251,12 @@ public enum ExpressionFunctions { } else { throw new IllegalArgumentException("Doris doesn't support type:" + argType); } + + // if args all is NullLiteral + long size = args.stream().filter(e -> e instanceof NullLiteral).count(); + if (args.size() == size) { + exprs = new NullLiteral[args.size()]; + } args.toArray(exprs); return exprs; } @@ -247,9 +265,9 @@ public enum ExpressionFunctions { public static class FEFunctionSignature { private final String name; private final ScalarType[] argTypes; - private final ScalarType returnType; + private final Type returnType; - public FEFunctionSignature(String name, ScalarType[] argTypes, ScalarType returnType) { + public FEFunctionSignature(String name, ScalarType[] argTypes, Type returnType) { this.name = name; this.argTypes = argTypes; this.returnType = returnType; @@ -259,7 +277,7 @@ public enum ExpressionFunctions { return argTypes; } - public ScalarType getReturnType() { + public Type getReturnType() { return returnType; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 9f41e695b5..a04cdb105b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -18,6 +18,7 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.AggregateFunction; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Function; @@ -69,6 +70,7 @@ public class FunctionCallExpr extends Expr { new ImmutableSortedSet.Builder(String.CASE_INSENSITIVE_ORDER) .add("stddev").add("stddev_val").add("stddev_samp") .add("variance").add("variance_pop").add("variance_pop").add("var_samp").add("var_pop").build(); + private static final String ELEMENT_EXTRACT_FN_NAME = "%element_extract%"; public void setIsAnalyticFnCall(boolean v) { isAnalyticFnCall = v; @@ -658,16 +660,18 @@ public class FunctionCallExpr extends Expr { } } - Type[] args = fn.getArgs(); - if (args.length > 0) { - // Implicitly cast all the children to match the function if necessary - for (int i = 0; i < argTypes.length; ++i) { - // For varargs, we must compare with the last type in callArgs.argTypes. - int ix = Math.min(args.length - 1, i); - if (!argTypes[i].matchesType(args[ix]) && !( - argTypes[i].isDateType() && args[ix].isDateType())) { - uncheckedCastChild(args[ix], i); - //if (argTypes[i] != args[ix]) castChild(args[ix], i); + if (!fn.getFunctionName().getFunction().equals(ELEMENT_EXTRACT_FN_NAME)) { + Type[] args = fn.getArgs(); + if (args.length > 0) { + // Implicitly cast all the children to match the function if necessary + for (int i = 0; i < argTypes.length; ++i) { + // For varargs, we must compare with the last type in callArgs.argTypes. + int ix = Math.min(args.length - 1, i); + if (!argTypes[i].matchesType(args[ix]) && !( + argTypes[i].isDateType() && args[ix].isDateType())) { + uncheckedCastChild(args[ix], i); + //if (argTypes[i] != args[ix]) castChild(args[ix], i); + } } } } @@ -711,6 +715,18 @@ public class FunctionCallExpr extends Expr { } else { this.type = fn.getReturnType(); } + // rewrite return type if is nested type function + analyzeNestedFunction(); + } + + // if return type is nested type, need to be determined the sub-element type + private void analyzeNestedFunction() { + // array + if ("array".equalsIgnoreCase(fnName.getFunction())) { + if (children.size() > 0) { + this.type = new ArrayType(children.get(0).getType()); + } + } } @Override @@ -798,4 +814,3 @@ public class FunctionCallExpr extends Expr { return result; } } - diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/RangePartitionDesc.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/RangePartitionDesc.java index eb08ad2780..d5b4854303 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/RangePartitionDesc.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/RangePartitionDesc.java @@ -93,7 +93,10 @@ public class RangePartitionDesc extends PartitionDesc { partitionColumns.add(column); find = true; break; - + } + if (column.getType().isComplexType()) { + throw new DdlException("Complex type column can't be partition column: " + + column.getType().toString()); } } if (!find) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java index 775c981642..e6dffd8e46 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java @@ -20,10 +20,10 @@ package org.apache.doris.analysis; import java.util.ArrayList; import java.util.List; +import org.apache.doris.catalog.MultiRowType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.StructField; import org.apache.doris.catalog.StructType; import org.apache.doris.common.AnalysisException; @@ -102,8 +102,8 @@ public class Subquery extends Expr { type = createStructTypeFromExprList(); } - // If the subquery returns many rows, set its type to ArrayType. - if (!((SelectStmt)stmt).returnsSingleRow()) type = new ArrayType(type); + // If the subquery returns many rows, set its type to MultiRowType. + if (!((SelectStmt)stmt).returnsSingleRow()) type = new MultiRowType(type); // Preconditions.checkNotNull(type); // type.analyze(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java index e62fa877fe..1f3ae733fc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java @@ -17,13 +17,20 @@ package org.apache.doris.analysis; +import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; +import org.apache.doris.catalog.StructField; +import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; import com.google.common.base.Preconditions; +import java.util.ArrayList; + /** * Represents an anonymous type definition, e.g., used in DDL and CASTs. */ @@ -38,15 +45,19 @@ public class TypeDef implements ParseNode { public static TypeDef create(PrimitiveType type) { return new TypeDef(ScalarType.createType(type)); } + public static TypeDef createDecimal(int precision, int scale) { return new TypeDef(ScalarType.createDecimalV2Type(precision, scale)); } + public static TypeDef createVarchar(int len) { return new TypeDef(ScalarType.createVarchar(len)); } + public static TypeDef createChar(int len) { return new TypeDef(ScalarType.createChar(len)); } + @Override public void analyze(Analyzer analyzer) throws AnalysisException { if (isAnalyzed) { @@ -56,8 +67,8 @@ public class TypeDef implements ParseNode { // a stack overflow. if (parsedType.exceedsMaxNestingDepth()) { throw new AnalysisException(String.format( - "Type exceeds the maximum nesting depth of %s:\n%s", - Type.MAX_NESTING_DEPTH, parsedType.toSql())); + "Type exceeds the maximum nesting depth of %s:\n%s", + Type.MAX_NESTING_DEPTH, parsedType.toSql())); } analyze(parsedType); isAnalyzed = true; @@ -70,10 +81,49 @@ public class TypeDef implements ParseNode { if (type.isScalarType()) { analyzeScalarType((ScalarType) type); } + + if (type.isArrayType()) { + Type itemType = ((ArrayType) type).getItemType(); + analyze(itemType); + } + + if (type.isComplexType()) { + if (!Config.enable_complex_type_support) { + throw new AnalysisException("Unsupported data type: " + type.toSql()); + } + if (type.isArrayType()) { + ScalarType itemType = (ScalarType) ((ArrayType) type).getItemType(); + analyzeNestedType(itemType); + } + if (type.isMapType()) { + ScalarType keyType = (ScalarType) ((MapType) type).getKeyType(); + ScalarType valueType = (ScalarType) ((MapType) type).getKeyType(); + analyzeNestedType(keyType); + analyzeNestedType(valueType); + } + if (type.isStructType()) { + ArrayList fields = ((StructType) type).getFields(); + for (int i = 0; i < fields.size(); i++) { + ScalarType filedType = (ScalarType) fields.get(i).getType(); + analyzeNestedType(filedType); + } + } + } + } + + private void analyzeNestedType(ScalarType type) throws AnalysisException { + if (type.isNull()) { + throw new AnalysisException("Unsupported data type: " + type.toSql()); + } + if (type.getPrimitiveType().isStringType() + && !type.isAssignedStrLenInColDefinition()) { + type.setLength(1); + } + analyze(type); } private void analyzeScalarType(ScalarType scalarType) - throws AnalysisException { + throws AnalysisException { PrimitiveType type = scalarType.getPrimitiveType(); switch (type) { case CHAR: @@ -98,7 +148,7 @@ public class TypeDef implements ParseNode { } if (scalarType.getLength() > maxLen) { throw new AnalysisException( - name + " size must be <= " + maxLen + ": " + len); + name + " size must be <= " + maxLen + ": " + len); } break; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java index e7b4a82856..e4c16a14cd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java @@ -17,27 +17,70 @@ package org.apache.doris.catalog; +import org.apache.doris.common.Config; +import org.apache.doris.thrift.TColumnType; import org.apache.doris.thrift.TTypeDesc; import org.apache.doris.thrift.TTypeNode; import org.apache.doris.thrift.TTypeNodeType; import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.gson.annotations.SerializedName; /** * Describes an ARRAY type. */ public class ArrayType extends Type { - private final Type itemType; + + @SerializedName(value = "itemType") + private Type itemType; + + public ArrayType() { + this.itemType = NULL; + } public ArrayType(Type itemType) { this.itemType = itemType; } + public void setItemType(Type itemType) { + this.itemType = itemType; + } + public Type getItemType() { return itemType; } + @Override + public PrimitiveType getPrimitiveType() { + return PrimitiveType.ARRAY; + } + + @Override + public boolean matchesType(Type t) { + if (equals(t)) { + return true; + } + + if (!t.isArrayType()) { + return false; + } + + if (itemType.isNull()) { + return true; + } + + return itemType.matchesType(((ArrayType) t).itemType); + } + + public static ArrayType create() { + return new ArrayType(); + } + + public static ArrayType create(Type type) { + return new ArrayType(type); + } + @Override public String toSql(int depth) { if (depth >= MAX_NESTING_DEPTH) { @@ -76,6 +119,46 @@ public class ArrayType extends Type { structStr = structStr.substring(lpad); return String.format("%sARRAY<%s>", leftPadding, structStr); } + + @Override + public boolean isSupported() { + if (!Config.enable_complex_type_support) { + return false; + } + + if (itemType.isNull()) { + return false; + } + return true; + } + + @Override + public String toString() { + return toSql(0); + } + + @Override + public TColumnType toColumnTypeThrift() { + TColumnType thrift = new TColumnType(); + thrift.type = PrimitiveType.ARRAY.toThrift(); + return thrift; + } + + @Override + public boolean isFixedLengthType() { + return false; + } + + @Override + public boolean supportsTablePartitioning() { + if (!isSupported() || isComplexType()) { + return false; + } + return true; + } + + @Override + public int getSlotSize() { + return PrimitiveType.ARRAY.getSlotSize(); + } } - - diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index b92cf03125..fc2511bba4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -50,6 +50,8 @@ public class Column implements Writable { private static final Logger LOG = LogManager.getLogger(Column.class); public static final String DELETE_SIGN = "__DORIS_DELETE_SIGN__"; public static final String SEQUENCE_COL = "__DORIS_SEQUENCE_COL__"; + private static final String COLUMN_ARRAY_CHILDREN = "item"; + @SerializedName(value = "name") private String name; @SerializedName(value = "type") @@ -75,6 +77,8 @@ public class Column implements Writable { private String comment; @SerializedName(value = "stats") private ColumnStats stats; // cardinality and selectivity etc. + @SerializedName(value = "children") + private List children; // Define expr may exist in two forms, one is analyzed, and the other is not analyzed. // Currently, analyzed define expr is only used when creating materialized views, so the define expr in RollupJob must be analyzed. // In other cases, such as define expr in `MaterializedIndexMeta`, it may not be analyzed after being relayed. @@ -89,6 +93,7 @@ public class Column implements Writable { this.isKey = false; this.stats = new ColumnStats(); this.visible = true; + this.children = new ArrayList<>(Type.MAX_NESTING_DEPTH); } public Column(String name, PrimitiveType dataType) { @@ -130,9 +135,10 @@ public class Column implements Writable { this.isAllowNull = isAllowNull; this.defaultValue = defaultValue; this.comment = comment; - this.stats = new ColumnStats(); this.visible = visible; + this.children = new ArrayList<>(Type.MAX_NESTING_DEPTH); + createChildrenColumn(this.type, this); } public Column(Column column) { @@ -146,6 +152,22 @@ public class Column implements Writable { this.comment = column.getComment(); this.stats = column.getStats(); this.visible = column.visible; + this.children = column.getChildren(); + } + + public void createChildrenColumn(Type type, Column column) { + if (type.isArrayType()) { + Column c = new Column(COLUMN_ARRAY_CHILDREN, ((ArrayType) type).getItemType()); + column.addChildrenColumn(c); + } + } + + public List getChildren() { + return children; + } + + private void addChildrenColumn(Column column) { + this.children.add(column); } public void setName(String newName) { @@ -201,7 +223,12 @@ public class Column implements Writable { public PrimitiveType getDataType() { return type.getPrimitiveType(); } - public Type getType() { return ScalarType.createType(type.getPrimitiveType()); } + public Type getType() { + if (type.isArrayType() || type.isMapType() || type.isStructType()) { + return type; + } + return ScalarType.createType(type.getPrimitiveType()); + } public void setType(Type type) { this.type = type; @@ -209,9 +236,9 @@ public class Column implements Writable { public Type getOriginType() { return type; } - public int getStrLen() { return ((ScalarType) type).getLength(); } - public int getPrecision() { return ((ScalarType) type).getScalarPrecision(); } - public int getScale() { return ((ScalarType) type).getScalarScale(); } + public int getStrLen() { return type.getLength(); } + public int getPrecision() { return type instanceof ScalarType ? ((ScalarType) type).getScalarPrecision() : -1; } + public int getScale() { return type instanceof ScalarType ? ((ScalarType) type).getScalarScale() : -1; } public AggregateType getAggregationType() { return this.aggregationType; @@ -291,12 +318,41 @@ public class Column implements Writable { tColumn.setIsAllowNull(this.isAllowNull); tColumn.setDefaultValue(this.defaultValue); tColumn.setVisible(visible); + tColumn.setChildrenColumn(new ArrayList<>()); + toChildrenThrift(this, tColumn); + // The define expr does not need to be serialized here for now. // At present, only serialized(analyzed) define expr is directly used when creating a materialized view. // It will not be used here, but through another structure `TAlterMaterializedViewParam`. + if (this.defineExpr != null) { + tColumn.setDefineExpr(this.defineExpr.treeToThrift()); + } return tColumn; } + private void toChildrenThrift(Column column, TColumn tColumn) { + if (column.type.isArrayType()) { + Column children = column.getChildren().get(0); + + TColumn childrenTColumn = new TColumn(); + childrenTColumn.setColumnName(children.name); + + TColumnType childrenTColumnType = new TColumnType(); + childrenTColumnType.setType(children.getDataType().toThrift()); + childrenTColumnType.setType(children.getDataType().toThrift()); + childrenTColumnType.setLen(children.getStrLen()); + childrenTColumnType.setPrecision(children.getPrecision()); + childrenTColumnType.setScale(children.getScale()); + + childrenTColumnType.setIndexLen(children.getOlapColumnIndexSize()); + childrenTColumn.setColumnType(childrenTColumnType); + + tColumn.children_column.add(childrenTColumn); + + toChildrenThrift(children, childrenTColumn); + } + } + public void checkSchemaChangeAllowed(Column other) throws DdlException { if (Strings.isNullOrEmpty(other.name)) { throw new DdlException("Dest column name is empty"); @@ -475,6 +531,16 @@ public class Column implements Writable { return false; } + if (children.size() != other.children.size()) { + return false; + } + + for (int i = 0; i < children.size(); i++) { + if (!children.get(i).equals(other.getChildren().get(i))) { + return false; + } + } + return true; } @@ -550,6 +616,12 @@ public class Column implements Writable { case DECIMALV2: sb.append(String.format(typeStringMap.get(dataType), getPrecision(), getScale())); break; + case ARRAY: + sb.append(type.toString()); + case MAP: + sb.append(type.toString()); + case STRUCT: + sb.append(type.toString()); default: sb.append(typeStringMap.get(dataType)); break; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java index d2d2c3cacd..7aae66092c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java @@ -111,26 +111,38 @@ public abstract class ColumnType { } public static void write(DataOutput out, Type type) throws IOException { - Preconditions.checkArgument(type.isScalarType(), "only support scalar type serialization"); - ScalarType scalarType = (ScalarType) type; - Text.writeString(out, scalarType.getPrimitiveType().name()); - out.writeInt(scalarType.getScalarScale()); - out.writeInt(scalarType.getScalarPrecision()); - out.writeInt(scalarType.getLength()); - // Actually, varcharLimit need not to write here, write true to back compatible - out.writeBoolean(true); + Preconditions.checkArgument(type.isScalarType() || type.isArrayType(), + "only support scalar type and array serialization"); + if (type.isScalarType()) { + ScalarType scalarType = (ScalarType) type; + Text.writeString(out, scalarType.getPrimitiveType().name()); + out.writeInt(scalarType.getScalarScale()); + out.writeInt(scalarType.getScalarPrecision()); + out.writeInt(scalarType.getLength()); + // Actually, varcharLimit need not to write here, write true to back compatible + out.writeBoolean(true); + } else if (type.isArrayType()) { + ArrayType arrayType = (ArrayType) type; + Text.writeString(out, arrayType.getPrimitiveType().name()); + write(out, arrayType.getItemType()); + } } public static Type read(DataInput in) throws IOException { PrimitiveType primitiveType = PrimitiveType.valueOf(Text.readString(in)); - int scale = in.readInt(); - int precision = in.readInt(); - int len = in.readInt(); - if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_22) { - // Useless, just for back compatible - in.readBoolean(); + if (primitiveType == PrimitiveType.ARRAY) { + Type itermType = read(in); + return ArrayType.create(itermType); + } else { + int scale = in.readInt(); + int precision = in.readInt(); + int len = in.readInt(); + if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_22) { + // Useless, just for back compatible + in.readBoolean(); + } + return ScalarType.createType(primitiveType, len, precision, scale); } - return ScalarType.createType(primitiveType, len, precision, scale); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java index fdc75e90ec..6931b19c03 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java @@ -156,6 +156,10 @@ public class Function implements Writable { return retType; } + public void setReturnType(Type type) { + this.retType = type; + } + public Type[] getArgs() { return argTypes; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java index 71f2c4b679..d3494670df 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java @@ -30,7 +30,10 @@ import com.google.common.base.Strings; public class MapType extends Type { private final Type keyType; private final Type valueType; - + public MapType() { + this.keyType = NULL; + this.valueType = NULL; + } public MapType(Type keyType, Type valueType) { Preconditions.checkNotNull(keyType); Preconditions.checkNotNull(valueType); @@ -38,6 +41,11 @@ public class MapType extends Type { this.valueType = valueType; } + @Override + public PrimitiveType getPrimitiveType() { + return PrimitiveType.MAP; + } + public Type getKeyType() { return keyType; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MultiRowType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MultiRowType.java new file mode 100644 index 0000000000..59182b01ed --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MultiRowType.java @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.thrift.TTypeDesc; +import org.apache.doris.thrift.TTypeNode; +import org.apache.doris.thrift.TTypeNodeType; + +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; + +/** + * Describes a multi-row type in sub-query. + */ +public class MultiRowType extends Type { + private final Type itemType; + + public MultiRowType(Type itemType) { + this.itemType = itemType; + } + + public Type getItemType() { + return itemType; + } + + @Override + public String toSql(int depth) { + if (depth >= MAX_NESTING_DEPTH) { + return "ARRAY<...>"; + } + return String.format("ARRAY<%s>", itemType.toSql(depth + 1)); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof MultiRowType)) { + return false; + } + MultiRowType otherMultiRowType = (MultiRowType) other; + return otherMultiRowType.itemType.equals(itemType); + } + + @Override + public void toThrift(TTypeDesc container) { + TTypeNode node = new TTypeNode(); + container.types.add(node); + Preconditions.checkNotNull(itemType); + node.setType(TTypeNodeType.ARRAY); + itemType.toThrift(container); + } + + @Override + protected String prettyPrint(int lpad) { + String leftPadding = Strings.repeat(" ", lpad); + if (!itemType.isStructType()) { + return leftPadding + toSql(); + } + // Pass in the padding to make sure nested fields are aligned properly, + // even if we then strip the top-level padding. + String structStr = itemType.prettyPrint(lpad); + structStr = structStr.substring(lpad); + return String.format("%sARRAY<%s>", leftPadding, structStr); + } +} \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java index 3357f61076..15082c4dc5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java @@ -54,6 +54,9 @@ public enum PrimitiveType { TIME("TIME", 8, TPrimitiveType.TIME), // we use OBJECT type represent BITMAP type in Backend BITMAP("BITMAP", 16, TPrimitiveType.OBJECT), + ARRAY("ARRAY", 24, TPrimitiveType.ARRAY), + MAP("MAP", 24, TPrimitiveType.MAP), + STRUCT("MAP", 24, TPrimitiveType.STRUCT), // Unsupported scalar types. BINARY("BINARY", -1, TPrimitiveType.BINARY); @@ -298,6 +301,8 @@ public enum PrimitiveType { supportedTypes.add(TIME); supportedTypes.add(DECIMALV2); supportedTypes.add(BITMAP); + supportedTypes.add(ARRAY); + supportedTypes.add(MAP); } public static ArrayList getIntegerTypes() { @@ -541,6 +546,12 @@ public enum PrimitiveType { return HLL; case OBJECT: return BITMAP; + case ARRAY: + return ARRAY; + case MAP: + return MAP; + case STRUCT: + return STRUCT; default: return INVALID_TYPE; } @@ -629,6 +640,10 @@ public enum PrimitiveType { return (this == DATE || this == DATETIME); } + public boolean isArrayType(){ + return this == ARRAY; + } + public boolean isStringType() { return (this == VARCHAR || this == CHAR || this == HLL); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java index d137361457..2e0c91f076 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java @@ -304,6 +304,9 @@ public class ScalarType extends Type { case BITMAP: stringBuilder.append(type.toString().toLowerCase()); break; + case ARRAY: + stringBuilder.append(type.toString().toLowerCase()); + break; default: stringBuilder.append("unknown type: " + type.toString()); break; @@ -371,6 +374,8 @@ public class ScalarType extends Type { @Override public PrimitiveType getPrimitiveType() { return type; } public int ordinal() { return type.ordinal(); } + + @Override public int getLength() { return len; } public void setLength(int len) {this.len = len; } public boolean isAssignedStrLenInColDefinition() { return isAssignedStrLenInColDefinition; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java index b9f2ccbf2f..9105e6db16 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java @@ -114,5 +114,9 @@ public class StructType extends Type { field.toThrift(container, node); } } + @Override + public String toString() { + return toSql(0); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index 7eaa872cee..541f838225 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -45,11 +45,11 @@ import java.util.List; public abstract class Type { private static final Logger LOG = LogManager.getLogger(Type.class); - // Maximum nesting depth of a type. This limit was determined experimentally by + // Maximum nesting depth of a type. This limit was determined experimentally byorg.apache.doris.rewrite.FoldConstantsRule.apply // generating and scanning deeply nested Parquet and Avro files. In those experiments, // we exceeded the stack space in the scanner (which uses recursion for dealing with // nested types) at a nesting depth between 200 and 300 (200 worked, 300 crashed). - public static int MAX_NESTING_DEPTH = 100; + public static int MAX_NESTING_DEPTH = 2; // Static constant types for scalar types that don't require additional information. public static final ScalarType INVALID = new ScalarType(PrimitiveType.INVALID_TYPE); @@ -69,12 +69,13 @@ public abstract class Type { ScalarType.createDecimalV2Type(ScalarType.DEFAULT_PRECISION, ScalarType.DEFAULT_SCALE); public static final ScalarType DECIMALV2 = DEFAULT_DECIMALV2; - // (ScalarType) ScalarType.createDecimalTypeInternal(-1, -1); + // (ScalarType) ScalarType.createDecimalTypeInternal(-1, -1); public static final ScalarType DEFAULT_VARCHAR = ScalarType.createVarcharType(-1); public static final ScalarType VARCHAR = ScalarType.createVarcharType(-1); public static final ScalarType HLL = ScalarType.createHllType(); public static final ScalarType CHAR = (ScalarType) ScalarType.createCharType(-1); public static final ScalarType BITMAP = new ScalarType(PrimitiveType.BITMAP); + public static final MapType Map = new MapType(); private static ArrayList integerTypes; private static ArrayList numericTypes; @@ -262,7 +263,7 @@ public abstract class Type { } public boolean isCollectionType() { - return isMapType() || isArrayType(); + return isMapType() || isArrayType() || isMultiRowType(); } public boolean isMapType() { @@ -273,6 +274,10 @@ public abstract class Type { return this instanceof ArrayType; } + public boolean isMultiRowType() { + return this instanceof MultiRowType; + } + public boolean isStructType() { return this instanceof StructType; } @@ -289,6 +294,8 @@ public abstract class Type { return true; } + public int getLength() { return -1; } + /** * Indicates whether we support partitioning tables on columns of this type. */ @@ -352,6 +359,16 @@ public abstract class Type { if (t1.isScalarType() && t2.isScalarType()) { return ScalarType.isImplicitlyCastable((ScalarType) t1, (ScalarType) t2, strict); } + if (t1.isComplexType() || t2.isComplexType()) { + if (t1.isArrayType() && t2.isArrayType()) { + return true; + } else if (t1.isMapType() && t2.isMapType()) { + return true; + } else if (t1.isStructType() && t2.isStructType()) { + return true; + } + return false; + } return false; } @@ -413,7 +430,7 @@ public abstract class Type { if (d >= MAX_NESTING_DEPTH) return true; if (isStructType()) { StructType structType = (StructType) this; - for (StructField f: structType.getFields()) { + for (StructField f : structType.getFields()) { if (f.getType().exceedsMaxNestingDepth(d + 1)) { return true; } @@ -423,6 +440,11 @@ public abstract class Type { if (arrayType.getItemType().exceedsMaxNestingDepth(d + 1)) { return true; } + } else if (isMultiRowType()) { + MultiRowType multiRowType = (MultiRowType) this; + if (multiRowType.getItemType().exceedsMaxNestingDepth(d + 1)) { + return true; + } } else if (isMapType()) { MapType mapType = (MapType) this; if (mapType.getValueType().exceedsMaxNestingDepth(d + 1)) { @@ -467,6 +489,12 @@ public abstract class Type { return Type.VARCHAR; case HLL: return Type.HLL; + case ARRAY: + return ArrayType.create(); + case MAP: + return new MapType(); + case STRUCT: + return new StructType(); case BITMAP: return Type.BITMAP; default: @@ -890,8 +918,12 @@ public abstract class Type { if (t1 == PrimitiveType.INVALID_TYPE || t2 == PrimitiveType.INVALID_TYPE) continue; if (t1 == PrimitiveType.NULL_TYPE || t2 == PrimitiveType.NULL_TYPE) continue; + if (t1 == PrimitiveType.ARRAY || t2 == PrimitiveType.ARRAY) continue; if (t1 == PrimitiveType.DECIMALV2 || t2 == PrimitiveType.DECIMALV2) continue; if (t1 == PrimitiveType.TIME || t2 == PrimitiveType.TIME) continue; + if (t1 == PrimitiveType.ARRAY || t2 == PrimitiveType.ARRAY) continue; + if (t1 == PrimitiveType.MAP || t2 == PrimitiveType.MAP) continue; + if (t1 == PrimitiveType.STRUCT || t2 == PrimitiveType.STRUCT) continue; Preconditions.checkNotNull(compatibilityMatrix[i][j]); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java index 95287e01e1..9128c548ad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java @@ -1235,6 +1235,11 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true) public static int period_of_auto_resume_min = 5; + /* + * If set to true, Doris will support complex type + */ + @ConfField + public static boolean enable_complex_type_support = false; /** * If set to true, the backend will be automatically dropped after finishing decommission. * If set to false, the backend will not be dropped and remaining in DECOMMISSION state. diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java index 6551829b05..a6cd83fe04 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java @@ -72,6 +72,7 @@ public class Util { TYPE_STRING_MAP.put(PrimitiveType.HLL, "varchar(%d)"); TYPE_STRING_MAP.put(PrimitiveType.BOOLEAN, "bool"); TYPE_STRING_MAP.put(PrimitiveType.BITMAP, "bitmap"); + TYPE_STRING_MAP.put(PrimitiveType.ARRAY, "Array<%s>"); TYPE_STRING_MAP.put(PrimitiveType.NULL_TYPE, "null"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java index 5a2f9734b7..56e8ac8a8b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java @@ -20,13 +20,16 @@ package org.apache.doris.persist.gson; import org.apache.doris.alter.AlterJobV2; import org.apache.doris.alter.RollupJobV2; import org.apache.doris.alter.SchemaChangeJobV2; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.DistributionInfo; import org.apache.doris.catalog.HashDistributionInfo; +import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.OdbcCatalogResource; import org.apache.doris.catalog.RandomDistributionInfo; import org.apache.doris.catalog.Resource; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.SparkResource; +import org.apache.doris.catalog.StructType; import org.apache.doris.load.loadv2.LoadJob.LoadJobStateUpdateInfo; import org.apache.doris.load.loadv2.SparkLoadJob.SparkLoadJobStateUpdateInfo; @@ -90,7 +93,10 @@ public class GsonUtils { private static RuntimeTypeAdapterFactory columnTypeAdapterFactory = RuntimeTypeAdapterFactory .of(org.apache.doris.catalog.Type.class, "clazz") // TODO: register other sub type after Doris support more types. - .registerSubtype(ScalarType.class, ScalarType.class.getSimpleName()); + .registerSubtype(ScalarType.class, ScalarType.class.getSimpleName()) + .registerSubtype(ArrayType.class, ArrayType.class.getSimpleName()) + .registerSubtype(MapType.class, MapType.class.getSimpleName()) + .registerSubtype(StructType.class, StructType.class.getSimpleName()); // runtime adapter for class "DistributionInfo" private static RuntimeTypeAdapterFactory distributionInfoTypeAdapterFactory = RuntimeTypeAdapterFactory diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctionList.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctionList.java new file mode 100644 index 0000000000..d6119d337b --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctionList.java @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.rewrite; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.METHOD) +public @interface FEFunctionList { + FEFunction[] value(); +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java index b766da49ba..d67c55c183 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java @@ -17,6 +17,10 @@ package org.apache.doris.rewrite; +import java.math.BigDecimal; +import java.math.BigInteger; + +import org.apache.doris.analysis.ArrayLiteral; import org.apache.doris.analysis.DateLiteral; import org.apache.doris.analysis.DecimalLiteral; import org.apache.doris.analysis.FloatLiteral; @@ -347,14 +351,6 @@ public class FEFunctions { return new FloatLiteral(result, Type.DOUBLE); } - @FEFunction(name = "add", argTypes = { "DECIMAL", "DECIMAL" }, returnType = "DECIMAL") - public static DecimalLiteral addDecimal(LiteralExpr first, LiteralExpr second) throws AnalysisException { - BigDecimal left = new BigDecimal(first.getStringValue()); - BigDecimal right = new BigDecimal(second.getStringValue()); - BigDecimal result = left.add(right); - return new DecimalLiteral(result); - } - @FEFunction(name = "add", argTypes = { "DECIMALV2", "DECIMALV2" }, returnType = "DECIMALV2") public static DecimalLiteral addDecimalV2(LiteralExpr first, LiteralExpr second) throws AnalysisException { BigDecimal left = new BigDecimal(first.getStringValue()); @@ -384,14 +380,6 @@ public class FEFunctions { return new FloatLiteral(result, Type.DOUBLE); } - @FEFunction(name = "subtract", argTypes = { "DECIMAL", "DECIMAL" }, returnType = "DECIMAL") - public static DecimalLiteral subtractDecimal(LiteralExpr first, LiteralExpr second) throws AnalysisException { - BigDecimal left = new BigDecimal(first.getStringValue()); - BigDecimal right = new BigDecimal(second.getStringValue()); - BigDecimal result = left.subtract(right); - return new DecimalLiteral(result); - } - @FEFunction(name = "subtract", argTypes = { "DECIMALV2", "DECIMALV2" }, returnType = "DECIMALV2") public static DecimalLiteral subtractDecimalV2(LiteralExpr first, LiteralExpr second) throws AnalysisException { BigDecimal left = new BigDecimal(first.getStringValue()); @@ -423,14 +411,6 @@ public class FEFunctions { return new FloatLiteral(result, Type.DOUBLE); } - @FEFunction(name = "multiply", argTypes = { "DECIMAL", "DECIMAL" }, returnType = "DECIMAL") - public static DecimalLiteral multiplyDecimal(LiteralExpr first, LiteralExpr second) throws AnalysisException { - BigDecimal left = new BigDecimal(first.getStringValue()); - BigDecimal right = new BigDecimal(second.getStringValue()); - BigDecimal result = left.multiply(right); - return new DecimalLiteral(result); - } - @FEFunction(name = "multiply", argTypes = { "DECIMALV2", "DECIMALV2" }, returnType = "DECIMALV2") public static DecimalLiteral multiplyDecimalV2(LiteralExpr first, LiteralExpr second) throws AnalysisException { BigDecimal left = new BigDecimal(first.getStringValue()); @@ -457,17 +437,6 @@ public class FEFunctions { return new FloatLiteral(result, Type.DOUBLE); } - @FEFunction(name = "divide", argTypes = { "DECIMAL", "DECIMAL" }, returnType = "DECIMAL") - public static DecimalLiteral divideDecimal(LiteralExpr first, LiteralExpr second) throws AnalysisException { - BigDecimal left = new BigDecimal(first.getStringValue()); - BigDecimal right = new BigDecimal(second.getStringValue()); - if (right.compareTo(BigDecimal.ZERO) == 0) { - return null; - } - BigDecimal result = left.divide(right); - return new DecimalLiteral(result); - } - @FEFunction(name = "divide", argTypes = { "DECIMALV2", "DECIMALV2" }, returnType = "DECIMALV2") public static DecimalLiteral divideDecimalV2(LiteralExpr first, LiteralExpr second) throws AnalysisException { BigDecimal left = new BigDecimal(first.getStringValue()); @@ -500,39 +469,25 @@ public class FEFunctions { return new StringLiteral(resultBuilder.toString()); } - @FEFunction(name = "ifnull", argTypes = {"VARCHAR", "VARCHAR"}, returnType = "VARCHAR") - public static LiteralExpr ifNullString(LiteralExpr first, LiteralExpr second) throws AnalysisException { + @FEFunctionList({ + @FEFunction(name = "ifnull", argTypes = {"VARCHAR", "VARCHAR"}, returnType = "VARCHAR"), + @FEFunction(name = "ifnull", argTypes = {"TINYINT", "TINYINT"}, returnType = "TINYINT"), + @FEFunction(name = "ifnull", argTypes = {"INT", "INT"}, returnType = "INT"), + @FEFunction(name = "ifnull", argTypes = {"BIGINT", "BIGINT"}, returnType = "BIGINT"), + @FEFunction(name = "ifnull", argTypes = {"DATETIME", "DATETIME"}, returnType = "DATETIME"), + @FEFunction(name = "ifnull", argTypes = { "DATE", "DATETIME" }, returnType = "DATETIME"), + @FEFunction(name = "ifnull", argTypes = { "DATETIME", "DATE" }, returnType = "DATETIME") + }) + public static LiteralExpr ifNull(LiteralExpr first, LiteralExpr second) throws AnalysisException { return first instanceof NullLiteral ? second : first; } - @FEFunction(name = "ifnull", argTypes = {"TINYINT", "TINYINT"}, returnType = "TINYINT") - public static LiteralExpr ifNullTinyInt(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; - } - - @FEFunction(name = "ifnull", argTypes = {"INT", "INT"}, returnType = "INT") - public static LiteralExpr ifNullInt(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; - } - - @FEFunction(name = "ifnull", argTypes = {"BIGINT", "BIGINT"}, returnType = "BIGINT") - public static LiteralExpr ifNullBigInt(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; - } - - @FEFunction(name = "ifnull", argTypes = { "DATETIME", "DATETIME" }, returnType = "DATETIME") - public static LiteralExpr ifNullDateTime(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; - } - - @FEFunction(name = "ifnull", argTypes = { "DATE", "DATETIME" }, returnType = "DATETIME") - public static LiteralExpr ifNullDateDatetime(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; - } - - @FEFunction(name = "ifnull", argTypes = { "DATETIME", "DATE" }, returnType = "DATETIME") - public static LiteralExpr ifNullDatetimeDate(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; + @FEFunctionList({ + @FEFunction(name = "array", argTypes = {"INT"}, returnType = "ARRAY"), + @FEFunction(name = "array", argTypes = {"VARCHAR"}, returnType = "ARRAY") + }) + public static ArrayLiteral array(LiteralExpr... exprs) throws AnalysisException { + return new ArrayLiteral(exprs); } } diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex index 65c7789f04..53c6c703c2 100644 --- a/fe/fe-core/src/main/jflex/sql_scanner.flex +++ b/fe/fe-core/src/main/jflex/sql_scanner.flex @@ -100,6 +100,7 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("as", new Integer(SqlParserSymbols.KW_AS)); keywordMap.put("asc", new Integer(SqlParserSymbols.KW_ASC)); keywordMap.put("authors", new Integer(SqlParserSymbols.KW_AUTHORS)); + keywordMap.put("array", new Integer(SqlParserSymbols.KW_ARRAY)); keywordMap.put("backend", new Integer(SqlParserSymbols.KW_BACKEND)); keywordMap.put("backends", new Integer(SqlParserSymbols.KW_BACKENDS)); keywordMap.put("backup", new Integer(SqlParserSymbols.KW_BACKUP)); @@ -249,6 +250,7 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("load", new Integer(SqlParserSymbols.KW_LOAD)); keywordMap.put("local", new Integer(SqlParserSymbols.KW_LOCAL)); keywordMap.put("location", new Integer(SqlParserSymbols.KW_LOCATION)); + keywordMap.put("map", new Integer(SqlParserSymbols.KW_MAP)); keywordMap.put("materialized", new Integer(SqlParserSymbols.KW_MATERIALIZED)); keywordMap.put("max", new Integer(SqlParserSymbols.KW_MAX)); keywordMap.put("maxvalue", new Integer(SqlParserSymbols.KW_MAX_VALUE)); @@ -346,6 +348,7 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("storage", new Integer(SqlParserSymbols.KW_STORAGE)); keywordMap.put("stream", new Integer(SqlParserSymbols.KW_STREAM)); keywordMap.put("string", new Integer(SqlParserSymbols.KW_STRING)); + keywordMap.put("struct", new Integer(SqlParserSymbols.KW_STRUCT)); keywordMap.put("sum", new Integer(SqlParserSymbols.KW_SUM)); keywordMap.put("superuser", new Integer(SqlParserSymbols.KW_SUPERUSER)); keywordMap.put("sync", new Integer(SqlParserSymbols.KW_SYNC)); diff --git a/fe/fe-core/src/test/java/org/apache/doris/rewrite/FEFunctionsTest.java b/fe/fe-core/src/test/java/org/apache/doris/rewrite/FEFunctionsTest.java index 115ec23c0f..af93c5ceda 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/rewrite/FEFunctionsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/rewrite/FEFunctionsTest.java @@ -304,17 +304,6 @@ public class FEFunctionsTest { Assert.assertEquals(expectedResult, actualResult); } - @Test - public void addDecimalTest() throws AnalysisException { - DecimalLiteral actualResult = FEFunctions.addDecimal(new DecimalLiteral("2.2"), new DecimalLiteral("3.3")); - DecimalLiteral expectedResult = new DecimalLiteral("5.5"); - Assert.assertEquals(expectedResult, actualResult); - - actualResult = FEFunctions.addDecimal(new DecimalLiteral("-2.2"), new DecimalLiteral("3.3")); - expectedResult = new DecimalLiteral("1.1"); - Assert.assertEquals(expectedResult, actualResult); - } - @Test public void addDecimalV2Test() throws AnalysisException { DecimalLiteral actualResult = FEFunctions.addDecimalV2(new DecimalLiteral("2.2"), new DecimalLiteral("3.3")); @@ -359,17 +348,6 @@ public class FEFunctionsTest { Assert.assertEquals(expectedResult, actualResult); } - @Test - public void subtractDecimalTest() throws AnalysisException { - DecimalLiteral actualResult = FEFunctions.subtractDecimal(new DecimalLiteral("2.2"), new DecimalLiteral("3.3")); - DecimalLiteral expectedResult = new DecimalLiteral("-1.1"); - Assert.assertEquals(expectedResult, actualResult); - - actualResult = FEFunctions.subtractDecimal(new DecimalLiteral("5.5"), new DecimalLiteral("3.3")); - expectedResult = new DecimalLiteral("2.2"); - Assert.assertEquals(expectedResult, actualResult); - } - @Test public void subtractDecimalV2Test() throws AnalysisException { DecimalLiteral actualResult = FEFunctions.subtractDecimalV2(new DecimalLiteral("2.2"), new DecimalLiteral("3.3")); @@ -422,22 +400,6 @@ public class FEFunctionsTest { Assert.assertEquals(expectedResult, actualResult); } - @Test - public void multiplyDecimalTest() throws AnalysisException { - DecimalLiteral actualResult = FEFunctions.multiplyDecimal(new DecimalLiteral("1.1"), new DecimalLiteral("1.0")); - DecimalLiteral expectedResult = new DecimalLiteral("1.1"); - Assert.assertEquals(expectedResult, actualResult); - - actualResult = FEFunctions.multiplyDecimal(new DecimalLiteral("-1.1"), new DecimalLiteral("-10.0")); - expectedResult = new DecimalLiteral("11.0"); - Assert.assertEquals(expectedResult, actualResult); - - actualResult = FEFunctions.multiplyDecimal(new DecimalLiteral("-1.1"), new DecimalLiteral("-1.1")); - expectedResult = new DecimalLiteral("1.21"); - Assert.assertEquals(expectedResult, actualResult); - } - - @Test public void multiplyDecimalV2Test() throws AnalysisException { DecimalLiteral actualResult = FEFunctions.multiplyDecimalV2(new DecimalLiteral("1.1"), new DecimalLiteral("1.0")); @@ -479,17 +441,6 @@ public class FEFunctionsTest { Assert.assertEquals(expectedResult, actualResult); } - @Test - public void divideDecimalTest() throws AnalysisException { - DecimalLiteral actualResult = FEFunctions.divideDecimal(new DecimalLiteral("1.1"), new DecimalLiteral("1.0")); - DecimalLiteral expectedResult = new DecimalLiteral("1.1"); - Assert.assertEquals(expectedResult, actualResult); - - actualResult = FEFunctions.divideDecimal(new DecimalLiteral("-1.1"), new DecimalLiteral("-10.0")); - expectedResult = new DecimalLiteral("0.11"); - Assert.assertEquals(expectedResult, actualResult); - } - @Test public void divideDecimalV2Test() throws AnalysisException { DecimalLiteral actualResult = FEFunctions.divideDecimalV2(new DecimalLiteral("1.1"), new DecimalLiteral("1.0")); diff --git a/fe/pom.xml b/fe/pom.xml index 2a8f9ea72b..0665f6542c 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -101,7 +101,7 @@ under the License. oracleReleases - http://download.oracle.com/maven + https://download.oracle.com/maven diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 48b28acdeb..622db02fb3 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -259,6 +259,7 @@ message ColumnPB { optional bool has_bitmap_index = 15 [default=false]; // ColumnMessage.has_bitmap_index optional bool visible = 16 [default=true]; repeated ColumnPB children_columns = 17; + repeated string children_column_names = 18; } message TabletSchemaPB { diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto index 5696285fa7..cc767f8058 100644 --- a/gensrc/proto/segment_v2.proto +++ b/gensrc/proto/segment_v2.proto @@ -156,8 +156,10 @@ message ColumnMetaPB { repeated ColumnMetaPB children_columns = 10; - // required by array/struct/map reader to create child reader. + // required by array/struct/map reader to create child reader. optional uint64 num_rows = 11; + repeated string children_column_names = 12; + } message SegmentFooterPB { diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 88d84b9ed9..7979224b50 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -99,6 +99,21 @@ visible_functions = [ '_ZN5doris9Operators20bitnot_large_int_valEPN9doris_udf' '15FunctionContextERKNS1_11LargeIntValE'], + # array functions + [['array'], 'ARRAY', ['INT', '...'], + '_ZN5doris14ArrayFunctions5arrayEPN9doris_udf15FunctionContextEiPKNS1_6IntValE'], + [['array'], 'ARRAY', ['VARCHAR', '...'], + '_ZN5doris14ArrayFunctions5arrayEPN9doris_udf15FunctionContextEiPKNS1_9StringValE'], + [['array'], 'ARRAY', ['ARRAY', '...'], '', ''], + [['array'], 'ARRAY', ['MAP', '...'], '', ''], + [['array'], 'ARRAY', ['STRUCT', '...'], '', ''], + [['%element_extract%'], 'VARCHAR', ['ARRAY', 'INT'], '', ''], + [['%element_extract%'], 'VARCHAR', ['ARRAY', 'VARCHAR'], '', ''], + [['%element_extract%'], 'VARCHAR', ['MAP', 'VARCHAR'], '', ''], + [['%element_extract%'], 'VARCHAR', ['MAP', 'INT'], '', ''], + [['%element_extract%'], 'VARCHAR', ['STRUCT', 'INT'], '', ''], + [['%element_extract%'], 'VARCHAR', ['STRUCT', 'VARCHAR'], '', ''], + # Timestamp functions [['unix_timestamp'], 'INT', [], '_ZN5doris18TimestampFunctions7to_unixEPN9doris_udf15FunctionContextE'], @@ -888,7 +903,8 @@ non_null_result_with_null_param_functions = [ 'ifnull', 'nullif', 'null_or_empty', - 'coalesce' + 'coalesce', + 'array' ] # Nondeterministic functions may return different results each time they are called diff --git a/gensrc/script/gen_functions.py b/gensrc/script/gen_functions.py index d03090f1a9..5173ad94b3 100755 --- a/gensrc/script/gen_functions.py +++ b/gensrc/script/gen_functions.py @@ -388,21 +388,20 @@ types = { 'STRING': ['VARCHAR'], 'DATE': ['DATE'], 'DATETIME': ['DATETIME'], - 'DECIMAL': ['DECIMAL'], 'DECIMALV2': ['DECIMALV2'], 'NATIVE_INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT'], 'INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'], 'FLOAT_TYPES': ['FLOAT', 'DOUBLE'], 'NUMERIC_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE', \ - 'LARGEINT', 'DECIMAL', 'DECIMALV2'], + 'LARGEINT', 'DECIMALV2'], 'STRING_TYPES': ['VARCHAR'], 'DATETIME_TYPES': ['DATE', 'DATETIME'], 'FIXED_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'], 'NATIVE_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'], 'STRCAST_FIXED_TYPES': ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT'], 'ALL_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT', 'FLOAT',\ - 'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMAL', 'DECIMALV2'], - 'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMAL', 'DECIMALV2'], + 'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMALV2'], + 'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMALV2'], } # Operation, [ReturnType], [[Args1], [Args2], ... [ArgsN]] @@ -414,7 +413,6 @@ functions = [ ['Divide', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]], ['Int_Divide', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]], ['Mod', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]], - ['Mod', ['DECIMAL'], [['DECIMAL'], ['DECIMAL']]], ['Mod', ['DECIMALV2'], [['DECIMALV2'], ['DECIMALV2']]], ['Mod', ['DOUBLE'], [['DOUBLE'], ['DOUBLE']], double_mod], ['BitAnd', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]], @@ -447,12 +445,6 @@ functions = [ ['Lt', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], ['Ge', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], ['Le', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], - ['Eq', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], - ['Ne', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], - ['Gt', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], - ['Lt', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], - ['Ge', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], - ['Le', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], ['Eq', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], ['Ne', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], ['Gt', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], @@ -467,18 +459,13 @@ functions = [ ['Cast', ['INT'], [['NATIVE_TYPES'], ['INT']]], ['Cast', ['BIGINT'], [['NATIVE_TYPES'], ['BIGINT']]], ['Cast', ['LARGEINT'], [['NATIVE_TYPES'], ['LARGEINT']]], - ['Cast', ['LARGEINT'], [['DECIMAL'], ['LARGEINT']]], ['Cast', ['LARGEINT'], [['DECIMALV2'], ['LARGEINT']]], ['Cast', ['NATIVE_TYPES'], [['LARGEINT'], ['NATIVE_TYPES']]], ['Cast', ['FLOAT'], [['NATIVE_TYPES'], ['FLOAT']]], ['Cast', ['DOUBLE'], [['NATIVE_TYPES'], ['DOUBLE']]], - ['Cast', ['DECIMAL'], [['FIXED_TYPES'], ['DECIMAL']]], ['Cast', ['DECIMALV2'], [['FIXED_TYPES'], ['DECIMALV2']]], - ['Cast', ['DECIMAL'], [['FLOAT'], ['DECIMAL']], float_to_decimal], ['Cast', ['DECIMALV2'], [['FLOAT'], ['DECIMALV2']], float_to_decimal], - ['Cast', ['DECIMAL'], [['DOUBLE'], ['DECIMAL']], double_to_decimal], ['Cast', ['DECIMALV2'], [['DOUBLE'], ['DECIMALV2']], double_to_decimal], - ['Cast', ['NATIVE_TYPES'], [['DECIMAL'], ['NATIVE_TYPES']]], ['Cast', ['NATIVE_TYPES'], [['DECIMALV2'], ['NATIVE_TYPES']]], ['Cast', ['NATIVE_INT_TYPES'], [['STRING'], ['NATIVE_INT_TYPES']], string_to_int], ['Cast', ['LARGEINT'], [['STRING'], ['LARGEINT']], string_to_int], @@ -488,7 +475,6 @@ functions = [ ['Cast', ['STRING'], [['FLOAT'], ['STRING']], float_to_string], ['Cast', ['STRING'], [['DOUBLE'], ['STRING']], double_to_string], ['Cast', ['STRING'], [['TINYINT'], ['STRING']], tinyint_to_string], - ['Cast', ['STRING'], [['DECIMAL'], ['STRING']], decimal_to_string], ['Cast', ['STRING'], [['DECIMALV2'], ['STRING']], decimal_to_string], # Datetime cast ['Cast', ['DATE'], [['NUMERIC_TYPES'], ['DATE']], numeric_to_date], @@ -524,7 +510,6 @@ native_types = { 'DATE': 'Date', 'DATETIME': 'DateTime', 'TIME': 'double', - 'DECIMAL': 'DecimalValue', 'DECIMALV2': 'DecimalV2Value', } @@ -542,7 +527,6 @@ implemented_types = { 'DATE': 'DateTimeValue', 'DATETIME': 'DateTimeValue', 'TIME': 'double', - 'DECIMAL': 'DecimalValue', 'DECIMALV2': 'DecimalV2Value', } result_fields = { @@ -558,7 +542,6 @@ result_fields = { 'DATE': 'datetime_val', 'DATETIME': 'datetime_val', 'TIME': 'double_val', - 'DECIMAL': 'decimal_val', 'DECIMALV2': 'decimalv2_val', } diff --git a/gensrc/script/gen_vector_functions.py b/gensrc/script/gen_vector_functions.py index 40c7482533..0f3231db1d 100755 --- a/gensrc/script/gen_vector_functions.py +++ b/gensrc/script/gen_vector_functions.py @@ -287,7 +287,6 @@ types = { 'STRING': ['VARCHAR'], 'DATE': ['DATE'], 'DATETIME': ['DATETIME'], - 'DECIMAL': ['DECIMAL'], 'DECIMALV2': ['DECIMALV2'], 'NATIVE_INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT'], 'INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'], @@ -296,8 +295,8 @@ types = { 'NATIVE_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'], 'STRCAST_TYPES': ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'], 'ALL_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT', 'FLOAT',\ - 'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMAL', 'DECIMALV2'], - 'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMAL', 'DECIMALV2'], + 'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMALV2'], + 'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMALV2'], } # Operation, [ReturnType], [[Args1], [Args2], ... [ArgsN]] @@ -326,7 +325,6 @@ native_types = { 'VARCHAR': 'StringValue', 'DATE': 'DateTimeValue', 'DATETIME': 'DateTimeValue', - 'DECIMAL': 'DecimalValue', 'DECIMALV2': 'DecimalV2Value', } @@ -343,7 +341,6 @@ implemented_types = { 'VARCHAR': 'StringValue', 'DATE': 'DateTimeValue', 'DATETIME': 'DateTimeValue', - 'DECIMAL': 'DecimalValue', 'DECIMALV2': 'DecimalV2Value', } diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift index 10ee99533a..a18d6fd21e 100644 --- a/gensrc/thrift/AgentService.thrift +++ b/gensrc/thrift/AgentService.thrift @@ -35,6 +35,7 @@ struct TColumn { 7: optional bool is_bloom_filter_column 8: optional Exprs.TExpr define_expr 9: optional bool visible = true + 10: optional list children_column } struct TTabletSchema { diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift index e91573a58a..584706ae7d 100644 --- a/gensrc/thrift/Exprs.thrift +++ b/gensrc/thrift/Exprs.thrift @@ -43,6 +43,7 @@ enum TExprNodeType { TUPLE_IS_NULL_PRED, INFO_FUNC, FUNCTION_CALL, + ARRAY_LITERAL, // TODO: old style compute functions. this will be deprecated COMPUTE_FUNCTION_CALL, diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index ee2dc87ee7..a526ec33a2 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -74,7 +74,10 @@ enum TPrimitiveType { HLL, DECIMALV2, TIME, - OBJECT + OBJECT, + ARRAY, + MAP, + STRUCT } enum TTypeNodeType {