diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 30148ca7c3..38ef7f9f18 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -369,7 +369,7 @@ set(CXX_GCC_FLAGS "${CXX_GCC_FLAGS} -g -Wno-unused-local-typedefs") # Debug information is stored as dwarf2 to be as compatible as possible # -Werror: compile warnings should be errors when using the toolchain compiler. # Only enable for debug builds because this is what we test in pre-commit tests. -set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -ggdb3 -O0 -gdwarf-2") +set(CXX_FLAGS_DEBUG "${CXX_GCC_FLAGS} -ggdb -O0") # For CMAKE_BUILD_TYPE=Release # -O3: Enable all compiler optimizations diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index cabaedb2f2..ac4e645720 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -22,6 +22,7 @@ #include #include "common/config.h" +#include "exprs/array_functions.h" #include "exprs/bitmap_function.h" #include "exprs/cast_functions.h" #include "exprs/compound_predicate.h" @@ -244,6 +245,7 @@ void Daemon::init(int argc, char** argv, const std::vector& paths) { IsNullPredicate::init(); LikePredicate::init(); StringFunctions::init(); + ArrayFunctions::init(); CastFunctions::init(); InPredicate::init(); MathFunctions::init(); diff --git a/be/src/common/object_pool.h b/be/src/common/object_pool.h index db82bdf79e..b2eb4e3024 100644 --- a/be/src/common/object_pool.h +++ b/be/src/common/object_pool.h @@ -42,6 +42,13 @@ public: return t; } + template + T* add_array(T* t) { + std::lock_guard l(_lock); + _objects.emplace_back(Element{t, [](void* obj) { delete[] reinterpret_cast(obj); }}); + return t; + } + void clear() { std::lock_guard l(_lock); for (Element& elem : _objects) elem.delete_fn(elem.obj); @@ -57,14 +64,14 @@ private: DISALLOW_COPY_AND_ASSIGN(ObjectPool); /// A generic deletion function pointer. Deletes its first argument. - using DeleteFn = void (*)(void*); + using DeleteFn = void (*)(void*); /// For each object, a pointer to the object and a function that deletes it. struct Element { void* obj; DeleteFn delete_fn; }; - + std::vector _objects; SpinLock _lock; }; diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index 92b31b85d5..3da65b489a 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -182,6 +182,10 @@ Status OlapScanNode::prepare(RuntimeState* state) { continue; } + if (slots[i]->type().is_collection_type()) { + _collection_slots.push_back(slots[i]); + } + if (!slots[i]->type().is_string_type()) { continue; } diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h index 4d288f5f35..8e59e23e93 100644 --- a/be/src/exec/olap_scan_node.h +++ b/be/src/exec/olap_scan_node.h @@ -209,6 +209,8 @@ private: // conjunct's index which already be push down storage engine // should be remove in olap_scan_node, no need check this conjunct again std::set _pushed_conjuncts_index; + // collection slots + std::vector _collection_slots; bool _eos; diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp index 84a43683cb..476f032ca1 100644 --- a/be/src/exec/olap_scanner.cpp +++ b/be/src/exec/olap_scanner.cpp @@ -43,6 +43,7 @@ OlapScanner::OlapScanner(RuntimeState* runtime_state, OlapScanNode* parent, bool _tuple_desc(parent->_tuple_desc), _profile(parent->runtime_profile()), _string_slots(parent->_string_slots), + _collection_slots(parent->_collection_slots), _id(-1), _is_open(false), _aggregation(aggregation), @@ -340,6 +341,43 @@ Status OlapScanner::get_batch(RuntimeState* state, RowBatch* batch, bool* eof) { } } + // Copy collection slot + for (auto desc : _collection_slots) { + CollectionValue* slot = tuple->get_collection_slot(desc->tuple_offset()); + + TypeDescriptor item_type = desc->type().children.at(0); + size_t item_size = item_type.get_slot_size() * slot->length(); + + size_t nulls_size = slot->length(); + uint8_t* data = batch->tuple_data_pool()->allocate(item_size + nulls_size); + + // copy null_signs + memory_copy(data, slot->null_signs(), nulls_size); + memory_copy(data + nulls_size, slot->data(), item_size); + + slot->set_null_signs(reinterpret_cast(data)); + slot->set_data(reinterpret_cast(data + nulls_size)); + + if (!item_type.is_string_type()) { + continue; + } + + // when string type, copy every item + for (int i = 0; i < slot->length(); ++i) { + int item_offset = nulls_size + i * item_type.get_slot_size(); + if (slot->is_null_at(i)) { + continue; + } + StringValue* dst_item_v = + reinterpret_cast(data + item_offset); + if (dst_item_v->len != 0) { + char* string_copy = reinterpret_cast( + batch->tuple_data_pool()->allocate(dst_item_v->len)); + memory_copy(string_copy, dst_item_v->ptr, dst_item_v->len); + dst_item_v->ptr = string_copy; + } + } + } // the memory allocate by mem pool has been copied, // so we should release these memory immediately mem_pool->clear(); @@ -442,6 +480,12 @@ void OlapScanner::_convert_row_to_tuple(Tuple* tuple) { } break; } + case TYPE_ARRAY: { + CollectionValue* array_v = reinterpret_cast(ptr); + CollectionValue* slot = tuple->get_collection_slot(slot_desc->tuple_offset()); + slot->shallow_copy(array_v); + break; + } default: { void* slot = tuple->get_slot(slot_desc->tuple_offset()); memory_copy(slot, ptr, len); diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h index ccfca3e21a..6dbd2fdbd2 100644 --- a/be/src/exec/olap_scanner.h +++ b/be/src/exec/olap_scanner.h @@ -111,6 +111,7 @@ private: const TupleDescriptor* _tuple_desc; /**< tuple descriptor */ RuntimeProfile* _profile; const std::vector& _string_slots; + const std::vector& _collection_slots; std::vector _conjunct_ctxs; // to record which runtime filters have been used diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index f73e308d52..9b04f061ce 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -39,11 +39,13 @@ Status OlapTableSchemaParam::init(const POlapTableSchemaParam& pschema) { _version = pschema.version(); std::map slots_map; _tuple_desc = _obj_pool.add(new TupleDescriptor(pschema.tuple_desc())); + for (auto& p_slot_desc : pschema.slot_descs()) { auto slot_desc = _obj_pool.add(new SlotDescriptor(p_slot_desc)); _tuple_desc->add_slot(slot_desc); slots_map.emplace(slot_desc->col_name(), slot_desc); } + for (auto& p_index : pschema.indexes()) { auto index = _obj_pool.add(new OlapTableIndexSchema()); index->index_id = p_index.id(); @@ -78,6 +80,7 @@ Status OlapTableSchemaParam::init(const TOlapTableSchemaParam& tschema) { _tuple_desc->add_slot(slot_desc); slots_map.emplace(slot_desc->col_name(), slot_desc); } + for (auto& t_index : tschema.indexes) { auto index = _obj_pool.add(new OlapTableIndexSchema()); index->index_id = t_index.id; @@ -133,10 +136,8 @@ std::string OlapTablePartition::debug_string(TupleDescriptor* tuple_desc) const } in_keys_ss << "]"; ss << "(id=" << id << ",start_key=" << Tuple::to_string(start_key, *tuple_desc) - << ",end_key=" << Tuple::to_string(end_key, *tuple_desc) - << ",in_key=" << in_keys_ss.str() - << ",num_buckets=" << num_buckets - << ",indexes=["; + << ",end_key=" << Tuple::to_string(end_key, *tuple_desc) << ",in_key=" << in_keys_ss.str() + << ",num_buckets=" << num_buckets << ",indexes=["; idx = 0; for (auto& index : indexes) { if (idx++ > 0) { @@ -274,7 +275,6 @@ bool OlapTablePartitionParam::find_tablet(Tuple* tuple, const OlapTablePartition it = _partitions_map->find(tuple); } else { it = _partitions_map->upper_bound(tuple); - } if (it == _partitions_map->end()) { return false; diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp index 1b04e02bb7..d7d24d1d96 100644 --- a/be/src/exec/tablet_sink.cpp +++ b/be/src/exec/tablet_sink.cpp @@ -241,6 +241,7 @@ Status NodeChannel::add_row(Tuple* input_tuple, int64_t tablet_id) { } DCHECK_NE(row_no, RowBatch::INVALID_ROW_INDEX); auto tuple = input_tuple->deep_copy(*_tuple_desc, _cur_batch->tuple_data_pool()); + _cur_batch->get_row(row_no)->set_tuple(0, tuple); _cur_batch->commit_last_row(); _cur_add_batch_request.add_tablet_ids(tablet_id); diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt index dcb397ffdf..421cfc2cfc 100644 --- a/be/src/exprs/CMakeLists.txt +++ b/be/src/exprs/CMakeLists.txt @@ -56,6 +56,7 @@ add_library(Exprs scalar_fn_call.cpp slot_ref.cpp string_functions.cpp + array_functions.cpp timestamp_functions.cpp tuple_is_null_predicate.cpp udf_builtins.cpp diff --git a/be/src/exprs/anyval_util.cpp b/be/src/exprs/anyval_util.cpp index baff010e18..c61a9aa55e 100644 --- a/be/src/exprs/anyval_util.cpp +++ b/be/src/exprs/anyval_util.cpp @@ -90,6 +90,10 @@ AnyVal* create_any_val(ObjectPool* pool, const TypeDescriptor& type) { case TYPE_DATETIME: return pool->add(new DateTimeVal); + + case TYPE_ARRAY: + return pool->add(new CollectionVal); + default: DCHECK(false) << "Unsupported type: " << type.type; return NULL; @@ -152,6 +156,12 @@ FunctionContext::TypeDesc AnyValUtil::column_type_to_type_desc(const TypeDescrip case TYPE_NULL: out.type = FunctionContext::TYPE_NULL; break; + case TYPE_ARRAY: + out.type = FunctionContext::TYPE_ARRAY; + for (const auto& t : type.children) { + out.children.push_back(column_type_to_type_desc(t)); + } + break; default: DCHECK(false) << "Unknown type: " << type; } diff --git a/be/src/exprs/anyval_util.h b/be/src/exprs/anyval_util.h index daa1a0f514..477553939d 100644 --- a/be/src/exprs/anyval_util.h +++ b/be/src/exprs/anyval_util.h @@ -20,6 +20,7 @@ #include "common/status.h" #include "exprs/expr.h" +#include "runtime/collection_value.h" #include "runtime/primitive_type.h" #include "udf/udf.h" #include "util/hash_util.hpp" @@ -207,6 +208,9 @@ public: case TYPE_DECIMALV2: return sizeof(doris_udf::DecimalV2Val); + case TYPE_ARRAY: + return sizeof(doris_udf::CollectionVal); + default: DCHECK(false) << t; return 0; @@ -242,6 +246,8 @@ public: return alignof(DateTimeVal); case TYPE_DECIMALV2: return alignof(DecimalV2Val); + case TYPE_ARRAY: + return alignof(doris_udf::CollectionVal); default: DCHECK(false) << t; return 0; @@ -345,6 +351,10 @@ public: reinterpret_cast(slot)->to_datetime_val( reinterpret_cast(dst)); return; + case TYPE_ARRAY: + reinterpret_cast(slot)->to_collection_val( + reinterpret_cast(dst)); + return; default: DCHECK(false) << "NYI"; } diff --git a/be/src/exprs/array_functions.cpp b/be/src/exprs/array_functions.cpp new file mode 100644 index 0000000000..b9b7fcf8a4 --- /dev/null +++ b/be/src/exprs/array_functions.cpp @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/array_functions.h" + +#include "common/logging.h" +#include "runtime/collection_value.h" + +namespace doris { + +void ArrayFunctions::init() {} + +#define ARRAY_FUNCTION(TYPE, PRIMARY_TYPE) \ + CollectionVal ArrayFunctions::array(FunctionContext* context, int num_children, \ + const TYPE* values) { \ + DCHECK_EQ(context->get_return_type().children.size(), 1); \ + CollectionValue v; \ + CollectionValue::init_collection(context, num_children, PRIMARY_TYPE, &v); \ + for (int i = 0; i < num_children; ++i) { \ + v.set(i, PRIMARY_TYPE, values + i); \ + } \ + CollectionVal ret; \ + v.to_collection_val(&ret); \ + return ret; \ + } + +ARRAY_FUNCTION(IntVal, TYPE_INT); +ARRAY_FUNCTION(StringVal, TYPE_VARCHAR); + +} // namespace doris diff --git a/be/src/exprs/array_functions.h b/be/src/exprs/array_functions.h new file mode 100644 index 0000000000..d0a32f0e91 --- /dev/null +++ b/be/src/exprs/array_functions.h @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_QUERY_EXPRS_COLLECTION_FUNCTIONS_H +#define DORIS_BE_SRC_QUERY_EXPRS_COLLECTION_FUNCTIONS_H + +#include "anyval_util.h" + +namespace doris { + +class ArrayFunctions { +public: + static void init(); + + /** + * array construct functions, create array with the children values + */ + static CollectionVal array(FunctionContext* context, int num_children, const IntVal* values); + + static CollectionVal array(FunctionContext* context, int num_children, const StringVal* values); +}; +} // namespace doris + +#endif diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index 480aac4e89..e4f88eb450 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -312,6 +312,9 @@ Status Expr::create_expr(ObjectPool* pool, const TExprNode& texpr_node, Expr** e case TExprNodeType::STRING_LITERAL: *expr = pool->add(new Literal(texpr_node)); return Status::OK(); + case TExprNodeType::ARRAY_LITERAL: + *expr = pool->add(new Literal(texpr_node)); + return Status::OK(); case TExprNodeType::COMPOUND_PRED: switch (texpr_node.opcode) { case TExprOpcode::COMPOUND_AND: @@ -719,6 +722,10 @@ doris_udf::AnyVal* Expr::get_const_val(ExprContext* context) { _constant_val.reset(new AnyVal(true)); break; } + case TYPE_ARRAY: { + _constant_val.reset(new CollectionVal(get_array_val(context, NULL))); + break; + } default: DCHECK(false) << "Type not implemented: " << type(); } @@ -797,6 +804,11 @@ DecimalV2Val Expr::get_decimalv2_val(ExprContext* context, TupleRow* row) { return val; } +CollectionVal Expr::get_array_val(ExprContext* context, TupleRow* row) { + CollectionVal val; + return val; +} + Status Expr::get_fn_context_error(ExprContext* ctx) { if (_fn_context_index != -1) { FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index d3bc711cc3..5bc3c50a6b 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -112,6 +112,7 @@ public: // virtual ArrayVal GetArrayVal(ExprContext* context, TupleRow*); virtual DateTimeVal get_datetime_val(ExprContext* context, TupleRow*); virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*); + virtual CollectionVal get_array_val(ExprContext* context, TupleRow*); // Get the number of digits after the decimal that should be displayed for this // value. Returns -1 if no scale has been specified (currently the scale is only set for @@ -429,6 +430,7 @@ private: static DoubleVal get_double_val(Expr* expr, ExprContext* context, TupleRow* row); static StringVal get_string_val(Expr* expr, ExprContext* context, TupleRow* row); static DateTimeVal get_datetime_val(Expr* expr, ExprContext* context, TupleRow* row); + static CollectionVal get_array_val(Expr* expr, ExprContext* context, TupleRow* row); static DecimalV2Val get_decimalv2_val(Expr* expr, ExprContext* context, TupleRow* row); /// Creates an expression tree rooted at 'root' via depth-first traversal. diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp index 25d11286a7..d8cd5a3a5d 100644 --- a/be/src/exprs/expr_context.cpp +++ b/be/src/exprs/expr_context.cpp @@ -286,16 +286,15 @@ void* ExprContext::get_value(Expr* e, TupleRow* row) { _result.decimalv2_val = DecimalV2Value::from_decimal_val(v); return &_result.decimalv2_val; } -#if 0 - case TYPE_ARRAY: - case TYPE_MAP: { - doris_udf::ArrayVal v = e->GetArrayVal(this, row); - if (v.is_null) return NULL; - _result.array_val.ptr = v.ptr; - _result.array_val.num_tuples = v.num_tuples; + case TYPE_ARRAY: { + doris_udf::CollectionVal v = e->get_array_val(this, row); + if (v.is_null) { + return NULL; + } + + _result.array_val = CollectionValue::from_collection_val(v); return &_result.array_val; } -#endif default: DCHECK(false) << "Type not implemented: " << e->_type; return NULL; diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h index a786c27832..5c7f4ecb2d 100644 --- a/be/src/exprs/expr_context.h +++ b/be/src/exprs/expr_context.h @@ -25,7 +25,7 @@ #include "exprs/expr_value.h" #include "exprs/slot_ref.h" #include "udf/udf.h" -#include "udf/udf_internal.h" // for ArrayVal +#include "udf/udf_internal.h" // for CollectionVal #undef USING_DORIS_UDF #define USING_DORIS_UDF using namespace doris_udf diff --git a/be/src/exprs/expr_ir.cpp b/be/src/exprs/expr_ir.cpp index 02d223e715..7be579846e 100644 --- a/be/src/exprs/expr_ir.cpp +++ b/be/src/exprs/expr_ir.cpp @@ -75,4 +75,7 @@ DateTimeVal Expr::get_datetime_val(Expr* expr, ExprContext* context, TupleRow* r DecimalV2Val Expr::get_decimalv2_val(Expr* expr, ExprContext* context, TupleRow* row) { return expr->get_decimalv2_val(context, row); } +CollectionVal Expr::get_array_val(Expr* expr, ExprContext* context, TupleRow* row) { + return expr->get_array_val(context, row); +} } // namespace doris diff --git a/be/src/exprs/expr_value.h b/be/src/exprs/expr_value.h index a2f2dc876a..bb98c4938f 100644 --- a/be/src/exprs/expr_value.h +++ b/be/src/exprs/expr_value.h @@ -18,6 +18,7 @@ #ifndef DORIS_BE_SRC_QUERY_EXPRS_EXPR_VALUE_H #define DORIS_BE_SRC_QUERY_EXPRS_EXPR_VALUE_H +#include "runtime/collection_value.h" #include "runtime/datetime_value.h" #include "runtime/decimalv2_value.h" #include "runtime/string_value.h" @@ -44,6 +45,7 @@ struct ExprValue { StringValue string_val; DateTimeValue datetime_val; DecimalV2Value decimalv2_val; + CollectionValue array_val; ExprValue() : bool_val(false), @@ -57,7 +59,8 @@ struct ExprValue { string_data(), string_val(NULL, 0), datetime_val(), - decimalv2_val(0) {} + decimalv2_val(0), + array_val() {} ExprValue(bool v) : bool_val(v) {} ExprValue(int8_t v) : tinyint_val(v) {} diff --git a/be/src/exprs/literal.cpp b/be/src/exprs/literal.cpp index 4ad17c39b5..45dfcb0b2a 100644 --- a/be/src/exprs/literal.cpp +++ b/be/src/exprs/literal.cpp @@ -20,6 +20,7 @@ #include #include "gen_cpp/Exprs_types.h" +#include "runtime/collection_value.h" #include "runtime/runtime_state.h" #include "util/string_parser.hpp" @@ -92,6 +93,11 @@ Literal::Literal(const TExprNode& node) : Expr(node) { _value.decimalv2_val = DecimalV2Value(node.decimal_literal.value); break; } + case TYPE_ARRAY: { + DCHECK_EQ(node.node_type, TExprNodeType::ARRAY_LITERAL); + // init in prepare + break; + } default: break; // DCHECK(false) << "Invalid type: " << TypeToString(_type.type); @@ -160,4 +166,29 @@ StringVal Literal::get_string_val(ExprContext* context, TupleRow* row) { return str_val; } +CollectionVal Literal::get_array_val(ExprContext* context, TupleRow*) { + DCHECK(_type.is_collection_type()); + CollectionVal val; + _value.array_val.to_collection_val(&val); + return val; +} + +Status Literal::prepare(RuntimeState* state, const RowDescriptor& row_desc, ExprContext* context) { + RETURN_IF_ERROR(Expr::prepare(state, row_desc, context)); + + if (type().type == TYPE_ARRAY) { + DCHECK_EQ(type().children.size(), 1) << "array children type not 1"; + // init array value + auto td = type().children.at(0).type; + RETURN_IF_ERROR(CollectionValue::init_collection(state->obj_pool(), get_num_children(), td, + &_value.array_val)); + // init every item + for (int i = 0; i < get_num_children(); ++i) { + Expr* children = get_child(i); + RETURN_IF_ERROR(_value.array_val.set(i, td, children->get_const_val(context))); + } + } + + return Status::OK(); +} } // namespace doris diff --git a/be/src/exprs/literal.h b/be/src/exprs/literal.h index 4899116c42..49b402f792 100644 --- a/be/src/exprs/literal.h +++ b/be/src/exprs/literal.h @@ -43,6 +43,10 @@ public: virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*); virtual DateTimeVal get_datetime_val(ExprContext* context, TupleRow*); virtual StringVal get_string_val(ExprContext* context, TupleRow* row); + virtual CollectionVal get_array_val(ExprContext* context, TupleRow*); + // init val before use + virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, + ExprContext* context); protected: friend class Expr; diff --git a/be/src/exprs/null_literal.cpp b/be/src/exprs/null_literal.cpp index c2bafe8d9a..427e2bafac 100644 --- a/be/src/exprs/null_literal.cpp +++ b/be/src/exprs/null_literal.cpp @@ -67,4 +67,7 @@ DecimalV2Val NullLiteral::get_decimalv2_val(ExprContext*, TupleRow*) { return DecimalV2Val::null(); } +CollectionVal NullLiteral::get_array_val(ExprContext* context, TupleRow*) { + return CollectionVal::null(); +} } // namespace doris diff --git a/be/src/exprs/null_literal.h b/be/src/exprs/null_literal.h index 242e23e6fc..38d7dcd07a 100644 --- a/be/src/exprs/null_literal.h +++ b/be/src/exprs/null_literal.h @@ -41,6 +41,7 @@ public: virtual doris_udf::StringVal get_string_val(ExprContext*, TupleRow*); virtual doris_udf::DateTimeVal get_datetime_val(ExprContext*, TupleRow*); virtual doris_udf::DecimalV2Val get_decimalv2_val(ExprContext*, TupleRow*); + virtual CollectionVal get_array_val(ExprContext* context, TupleRow*); protected: friend class Expr; diff --git a/be/src/exprs/scalar_fn_call.cpp b/be/src/exprs/scalar_fn_call.cpp index 5ed75166cc..c887073973 100644 --- a/be/src/exprs/scalar_fn_call.cpp +++ b/be/src/exprs/scalar_fn_call.cpp @@ -422,6 +422,7 @@ typedef DoubleVal (*DoubleWrapper)(ExprContext*, TupleRow*); typedef StringVal (*StringWrapper)(ExprContext*, TupleRow*); typedef DateTimeVal (*DatetimeWrapper)(ExprContext*, TupleRow*); typedef DecimalV2Val (*DecimalV2Wrapper)(ExprContext*, TupleRow*); +typedef CollectionVal (*ArrayWrapper)(ExprContext*, TupleRow*); // TODO: macroify this? BooleanVal ScalarFnCall::get_boolean_val(ExprContext* context, TupleRow* row) { @@ -535,6 +536,18 @@ DecimalV2Val ScalarFnCall::get_decimalv2_val(ExprContext* context, TupleRow* row return fn(context, row); } +CollectionVal ScalarFnCall::get_array_val(ExprContext* context, TupleRow* row) { + DCHECK_EQ(_type.type, TYPE_ARRAY); + DCHECK(context != NULL); + + if (_scalar_fn_wrapper == NULL) { + return interpret_eval(context, row); + } + + ArrayWrapper fn = reinterpret_cast(_scalar_fn_wrapper); + return fn(context, row); +} + std::string ScalarFnCall::debug_string() const { std::stringstream out; out << "ScalarFnCall(udf_type=" << _fn.binary_type << " location=" << _fn.hdfs_location diff --git a/be/src/exprs/scalar_fn_call.h b/be/src/exprs/scalar_fn_call.h index 8b81498d53..33ae72ffd5 100644 --- a/be/src/exprs/scalar_fn_call.h +++ b/be/src/exprs/scalar_fn_call.h @@ -77,7 +77,7 @@ protected: virtual doris_udf::StringVal get_string_val(ExprContext* context, TupleRow*); virtual doris_udf::DateTimeVal get_datetime_val(ExprContext* context, TupleRow*); virtual doris_udf::DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*); - // virtual doris_udf::ArrayVal GetArrayVal(ExprContext* context, TupleRow*); + virtual CollectionVal get_array_val(ExprContext* context, TupleRow*); private: /// If this function has var args, children()[_vararg_start_idx] is the first vararg diff --git a/be/src/exprs/slot_ref.cpp b/be/src/exprs/slot_ref.cpp index a12e4b627c..bc106d2516 100644 --- a/be/src/exprs/slot_ref.cpp +++ b/be/src/exprs/slot_ref.cpp @@ -234,4 +234,16 @@ DecimalV2Val SlotRef::get_decimalv2_val(ExprContext* context, TupleRow* row) { return DecimalV2Val(reinterpret_cast(t->get_slot(_slot_offset))->value); } +doris_udf::CollectionVal SlotRef::get_array_val(ExprContext* context, TupleRow* row) { + DCHECK_EQ(_type.type, TYPE_ARRAY); + + Tuple* t = row->get_tuple(_tuple_idx); + if (t == NULL || t->is_null(_null_indicator_offset)) { + return CollectionVal::null(); + } + + CollectionVal val; + reinterpret_cast(t->get_slot(_slot_offset))->to_collection_val(&val); + return val; +} } // namespace doris diff --git a/be/src/exprs/slot_ref.h b/be/src/exprs/slot_ref.h index b3244ce2aa..cbeb4b6cb0 100644 --- a/be/src/exprs/slot_ref.h +++ b/be/src/exprs/slot_ref.h @@ -68,7 +68,7 @@ public: virtual doris_udf::StringVal get_string_val(ExprContext* context, TupleRow*); virtual doris_udf::DateTimeVal get_datetime_val(ExprContext* context, TupleRow*); virtual doris_udf::DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*); - // virtual doris_udf::ArrayVal GetArrayVal(ExprContext* context, TupleRow*); + virtual doris_udf::CollectionVal get_array_val(ExprContext* context, TupleRow*); private: int _tuple_idx; // within row diff --git a/be/src/olap/aggregate_func.cpp b/be/src/olap/aggregate_func.cpp index d98a43ce11..35482d8dca 100644 --- a/be/src/olap/aggregate_func.cpp +++ b/be/src/olap/aggregate_func.cpp @@ -17,6 +17,38 @@ #include "olap/aggregate_func.h" +namespace std { +namespace { +// algorithm from boost: http://www.boost.org/doc/libs/1_61_0/doc/html/hash/reference.html#boost.hash_combine +template +inline void hash_combine(std::size_t& seed, T const& v) { + seed ^= std::hash()(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +template ::value - 1> +struct HashValueImpl { + static void apply(size_t& seed, Tuple const& tuple) { + HashValueImpl::apply(seed, tuple); + hash_combine(seed, std::get(tuple)); + } +}; + +template +struct HashValueImpl { + static void apply(size_t& seed, Tuple const& tuple) { hash_combine(seed, std::get<0>(tuple)); } +}; +} // namespace + +template +struct hash> { + size_t operator()(std::tuple const& tt) const { + size_t seed = 0; + HashValueImpl>::apply(seed, tt); + return seed; + } +}; +} // namespace std + namespace doris { template @@ -26,19 +58,14 @@ AggregateInfo::AggregateInfo(const Traits& traits) _finalize_fn(traits.finalize), _agg_method(traits.agg_method) {} -struct AggregateFuncMapHash { - size_t operator()(const std::pair& pair) const { - return (pair.first + 31) ^ pair.second; - } -}; - class AggregateFuncResolver { DECLARE_SINGLETON(AggregateFuncResolver); public: const AggregateInfo* get_aggregate_info(const FieldAggregationMethod agg_method, - const FieldType field_type) const { - auto pair = _infos_mapping.find(std::make_pair(agg_method, field_type)); + const FieldType field_type, + const FieldType sub_type) const { + auto pair = _infos_mapping.find(std::make_tuple(agg_method, field_type, sub_type)); if (pair != _infos_mapping.end()) { return pair->second; } else { @@ -46,15 +73,17 @@ public: } } - template + template void add_aggregate_mapping() { - _infos_mapping.emplace(std::make_pair(agg_method, field_type), - new AggregateInfo(AggregateTraits())); + _infos_mapping.emplace( + std::make_tuple(agg_method, field_type, sub_type), + new AggregateInfo(AggregateTraits())); } private: - typedef std::pair key_t; - std::unordered_map _infos_mapping; + typedef std::tuple key_t; + std::unordered_map _infos_mapping; DISALLOW_COPY_AND_ASSIGN(AggregateFuncResolver); }; @@ -74,6 +103,21 @@ AggregateFuncResolver::AggregateFuncResolver() { add_aggregate_mapping(); add_aggregate_mapping(); add_aggregate_mapping(); + // array types has sub type like array field type is array, subtype is int + add_aggregate_mapping(); + add_aggregate_mapping(); + add_aggregate_mapping(); + add_aggregate_mapping(); + add_aggregate_mapping(); + add_aggregate_mapping(); + add_aggregate_mapping(); // Min Aggregate Function add_aggregate_mapping(); @@ -159,8 +203,8 @@ AggregateFuncResolver::~AggregateFuncResolver() { } const AggregateInfo* get_aggregate_info(const FieldAggregationMethod agg_method, - const FieldType field_type) { - return AggregateFuncResolver::instance()->get_aggregate_info(agg_method, field_type); + const FieldType field_type, const FieldType sub_type) { + return AggregateFuncResolver::instance()->get_aggregate_info(agg_method, field_type, sub_type); } } // namespace doris diff --git a/be/src/olap/aggregate_func.h b/be/src/olap/aggregate_func.h index 1d31f292e2..39e043ceb8 100644 --- a/be/src/olap/aggregate_func.h +++ b/be/src/olap/aggregate_func.h @@ -90,7 +90,7 @@ private: FieldAggregationMethod _agg_method; }; -template +template struct BaseAggregateFuncs { static void init(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, ObjectPool* agg_pool) { @@ -98,9 +98,13 @@ struct BaseAggregateFuncs { if (src_null) { return; } - - const TypeInfo* _type_info = get_type_info(field_type); - _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + if constexpr (field_type == OLAP_FIELD_TYPE_ARRAY) { + const TypeInfo* _type_info = get_collection_type_info(sub_type); + _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + } else { + const TypeInfo* _type_info = get_type_info(field_type); + _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + } } // Default update do nothing. @@ -110,8 +114,9 @@ struct BaseAggregateFuncs { static void finalize(RowCursorCell* src, MemPool* mem_pool) {} }; -template -struct AggregateFuncTraits : public BaseAggregateFuncs {}; +template +struct AggregateFuncTraits : public BaseAggregateFuncs {}; template <> struct AggregateFuncTraits @@ -461,7 +466,7 @@ struct AggregateFuncTraitssize = 0; auto* hll = new HyperLogLog(*src_slice); - + dst_slice->data = reinterpret_cast(hll); agg_pool->add(hll); @@ -548,12 +553,15 @@ struct AggregateFuncTraits { }; -template -struct AggregateTraits : public AggregateFuncTraits { +template +struct AggregateTraits : public AggregateFuncTraits { static const FieldAggregationMethod agg_method = aggMethod; static const FieldType type = fieldType; + static const FieldType sub_type = subType; }; const AggregateInfo* get_aggregate_info(const FieldAggregationMethod agg_method, - const FieldType field_type); + const FieldType field_type, + const FieldType sub_type = OLAP_FIELD_TYPE_NONE); } // namespace doris diff --git a/be/src/olap/collection.h b/be/src/olap/collection.h deleted file mode 100644 index 328869ba55..0000000000 --- a/be/src/olap/collection.h +++ /dev/null @@ -1,61 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -namespace doris { - -// cpp type for ARRAY -struct Collection { - // child column data - void* data; - uint64_t length; - // item has no null value if has_null is false. - // item ```may``` has null value if has_null is true. - // null_count is better? - bool has_null; - // null bitmap - bool* null_signs; - - Collection() : data(nullptr), length(0), has_null(false), null_signs(nullptr) {} - - explicit Collection(uint64_t length) - : data(nullptr), length(length), has_null(false), null_signs(nullptr) {} - - Collection(void* data, size_t length) - : data(data), length(length), has_null(false), null_signs(nullptr) {} - - Collection(void* data, size_t length, bool* null_signs) - : data(data), length(length), has_null(true), null_signs(null_signs) {} - - Collection(void* data, size_t length, bool has_null, bool* null_signs) - : data(data), length(length), has_null(has_null), null_signs(null_signs) {} - - bool is_null_at(uint64_t index) { return this->has_null && this->null_signs[index]; } - - bool operator==(const Collection& y) const; - bool operator!=(const Collection& value) const; - bool operator<(const Collection& value) const; - bool operator<=(const Collection& value) const; - bool operator>(const Collection& value) const; - bool operator>=(const Collection& value) const; - int32_t cmp(const Collection& other) const; -}; - -} // namespace doris diff --git a/be/src/olap/column_vector.cpp b/be/src/olap/column_vector.cpp index e21952f937..84f4f61267 100644 --- a/be/src/olap/column_vector.cpp +++ b/be/src/olap/column_vector.cpp @@ -130,19 +130,20 @@ Status ColumnVectorBatch::create(size_t init_capacity, bool is_nullable, const T std::unique_ptr elements; auto array_type_info = reinterpret_cast(type_info); - RETURN_IF_ERROR(ColumnVectorBatch::create(init_capacity * 2, field->get_sub_field(0)->is_nullable(), + RETURN_IF_ERROR(ColumnVectorBatch::create( + init_capacity * 2, field->get_sub_field(0)->is_nullable(), array_type_info->item_type_info(), field->get_sub_field(0), &elements)); std::unique_ptr offsets; - TypeInfo* bigint_type_info = get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT); - RETURN_IF_ERROR(ColumnVectorBatch::create(init_capacity + 1, false, - bigint_type_info, nullptr, &offsets)); + TypeInfo* offsets_type_info = + get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT); + RETURN_IF_ERROR(ColumnVectorBatch::create(init_capacity + 1, false, offsets_type_info, + nullptr, &offsets)); - std::unique_ptr local( - new ArrayColumnVectorBatch(type_info, - is_nullable, - reinterpret_cast*>(offsets.release()), - elements.release())); + std::unique_ptr local(new ArrayColumnVectorBatch( + type_info, is_nullable, + reinterpret_cast*>(offsets.release()), + elements.release())); RETURN_IF_ERROR(local->resize(init_capacity)); *column_vector_batch = std::move(local); return Status::OK(); @@ -172,7 +173,7 @@ Status ScalarColumnVectorBatch::resize(size_t new_cap) { } ArrayColumnVectorBatch::ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable, - ScalarColumnVectorBatch* offsets, + ScalarColumnVectorBatch* offsets, ColumnVectorBatch* elements) : ColumnVectorBatch(type_info, is_nullable), _data(0) { _offsets.reset(offsets); @@ -205,13 +206,14 @@ void ArrayColumnVectorBatch::prepare_for_read(size_t start_idx, size_t size, boo DCHECK(start_idx + size <= capacity()); for (size_t i = 0; i < size; ++i) { if (!is_null_at(start_idx + i)) { - _data[start_idx + i] = Collection( + _data[start_idx + i] = CollectionValue( _elements->mutable_cell_ptr(*(_offsets->scalar_cell_ptr(start_idx + i))), - *(_offsets->scalar_cell_ptr(start_idx + i + 1)) - *(_offsets->scalar_cell_ptr(start_idx + i)), + *(_offsets->scalar_cell_ptr(start_idx + i + 1)) - + *(_offsets->scalar_cell_ptr(start_idx + i)), item_has_null, - _elements->is_nullable() - ? const_cast(&_elements->null_signs()[*(_offsets->scalar_cell_ptr(start_idx + i))]) - : nullptr); + _elements->is_nullable() ? const_cast(&_elements->null_signs()[*( + _offsets->scalar_cell_ptr(start_idx + i))]) + : nullptr); } } } diff --git a/be/src/olap/column_vector.h b/be/src/olap/column_vector.h index 67a79fbeaa..ca5b6e1c4d 100644 --- a/be/src/olap/column_vector.h +++ b/be/src/olap/column_vector.h @@ -64,7 +64,7 @@ template class DataBuffer; template class DataBuffer; template class DataBuffer; template class DataBuffer; -template class DataBuffer; +template class DataBuffer; // struct that contains column data(null bitmap), data array in sub class. class ColumnVectorBatch { @@ -155,9 +155,7 @@ public: return reinterpret_cast(&_data[idx]); } - ScalarCppType* scalar_cell_ptr(size_t idx) { - return &_data[idx]; - } + ScalarCppType* scalar_cell_ptr(size_t idx) { return &_data[idx]; } private: DataBuffer _data; @@ -166,8 +164,9 @@ private: // util class for read array's null signs. class ArrayNullColumnVectorBatch : public ColumnVectorBatch { public: - explicit ArrayNullColumnVectorBatch(ColumnVectorBatch* array) : - ColumnVectorBatch(get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_TINYINT), false), _array(array) {} + explicit ArrayNullColumnVectorBatch(ColumnVectorBatch* array) + : ColumnVectorBatch(get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_TINYINT), false), + _array(array) {} ~ArrayNullColumnVectorBatch() override = default; @@ -194,7 +193,7 @@ private: class ArrayColumnVectorBatch : public ColumnVectorBatch { public: explicit ArrayColumnVectorBatch(const TypeInfo* type_info, bool is_nullable, - ScalarColumnVectorBatch* offsets, + ScalarColumnVectorBatch* offsets, ColumnVectorBatch* elements); ~ArrayColumnVectorBatch() override; Status resize(size_t new_cap) override; @@ -205,7 +204,7 @@ public: // Get the start of the data. uint8_t* data() const override { - return reinterpret_cast(const_cast(_data.data())); + return reinterpret_cast(const_cast(_data.data())); } // Get the idx's cell_ptr @@ -216,9 +215,7 @@ public: // Get thr idx's cell_ptr for write uint8_t* mutable_cell_ptr(size_t idx) override { return reinterpret_cast(&_data[idx]); } - size_t item_offset(size_t idx) const { - return *(_offsets->scalar_cell_ptr(idx)); - } + size_t item_offset(size_t idx) const { return *(_offsets->scalar_cell_ptr(idx)); } /** * Change array size to offset in this batch @@ -252,23 +249,22 @@ public: void get_offset_by_length(size_t start_idx, size_t size); size_t get_item_size(size_t start_idx, size_t size) { - return *(_offsets->scalar_cell_ptr(start_idx + size)) - *(_offsets->scalar_cell_ptr(start_idx)); + return *(_offsets->scalar_cell_ptr(start_idx + size)) - + *(_offsets->scalar_cell_ptr(start_idx)); } - ArrayNullColumnVectorBatch get_null_as_batch() { - return ArrayNullColumnVectorBatch(this); - } + ArrayNullColumnVectorBatch get_null_as_batch() { return ArrayNullColumnVectorBatch(this); } // Generate collection slots. void prepare_for_read(size_t start_idx, size_t end_idx, bool item_has_null); private: - DataBuffer _data; + DataBuffer _data; std::unique_ptr _elements; // Stores each array's start offsets in _elements. - std::unique_ptr> _offsets; + std::unique_ptr> _offsets; }; template class ScalarColumnVectorBatch; diff --git a/be/src/olap/field.h b/be/src/olap/field.h index bd8d535a4e..0edb4dbc99 100644 --- a/be/src/olap/field.h +++ b/be/src/olap/field.h @@ -29,6 +29,7 @@ #include "olap/tablet_schema.h" #include "olap/types.h" #include "olap/utils.h" +#include "runtime/collection_value.h" #include "runtime/mem_pool.h" #include "util/hash_util.hpp" #include "util/mem_util.hpp" @@ -47,8 +48,14 @@ public: _name(column.name()), _index_size(column.index_length()), _is_nullable(column.is_nullable()), - _agg_info(get_aggregate_info(column.aggregation(), column.type())), - _length(column.length()) {} + _length(column.length()) { + if (column.type() == OLAP_FIELD_TYPE_ARRAY) { + _agg_info = get_aggregate_info(column.aggregation(), column.type(), + column.get_sub_column(0).type()); + } else { + _agg_info = get_aggregate_info(column.aggregation(), column.type()); + } + } virtual ~Field() = default; @@ -257,9 +264,11 @@ public: } Field* get_sub_field(int i) { return _sub_fields[i].get(); } +protected: + const TypeInfo* _type_info; + private: // Field的最大长度,单位为字节,通常等于length, 变长字符串不同 - const TypeInfo* _type_info; const KeyCoder* _key_coder; std::string _name; uint16_t _index_size; @@ -377,6 +386,30 @@ uint32_t Field::hash_code(const CellType& cell, uint32_t seed) const { return _type_info->hash_code(cell.cell_ptr(), seed); } +class ArrayField : public Field { +public: + explicit ArrayField(const TabletColumn& column) : Field(column) {} + + void consume(RowCursorCell* dst, const char* src, bool src_null, MemPool* mem_pool, + ObjectPool* agg_pool) const override { + dst->set_is_null(src_null); + if (src_null) { + return; + } + _type_info->deep_copy(dst->mutable_cell_ptr(), src, mem_pool); + } + + char* allocate_memory(char* cell_ptr, char* variable_ptr) const override { + auto array_v = (CollectionValue*)cell_ptr; + array_v->set_null_signs(reinterpret_cast(variable_ptr + sizeof(CollectionValue))); + array_v->set_data(variable_ptr + sizeof(CollectionValue) + + OLAP_ARRAY_MAX_BYTES / sizeof(char*)); + return variable_ptr + _length; + } + + size_t get_variable_len() const override { return _length; } +}; + class CharField : public Field { public: explicit CharField() : Field() {} @@ -518,7 +551,7 @@ public: return new VarcharField(column); case OLAP_FIELD_TYPE_ARRAY: { std::unique_ptr item_field(FieldFactory::create(column.get_sub_column(0))); - auto* local = new Field(column); + auto* local = new ArrayField(column); local->add_sub_field(std::move(item_field)); return local; } @@ -542,7 +575,7 @@ public: return new VarcharField(column); case OLAP_FIELD_TYPE_ARRAY: { std::unique_ptr item_field(FieldFactory::create(column.get_sub_column(0))); - auto* local = new Field(column); + auto* local = new ArrayField(column); local->add_sub_field(std::move(item_field)); return local; } diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index 2d828d4abe..977e04b47a 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -52,11 +52,15 @@ static constexpr uint32_t OLAP_COMPACTION_DEFAULT_CANDIDATE_SIZE = 10; // the max length supported for varchar type static const uint16_t OLAP_STRING_MAX_LENGTH = 65535; +// the max length supported for array +static const uint16_t OLAP_ARRAY_MAX_LENGTH = 65535; // the max bytes for stored string length using StringOffsetType = uint32_t; using StringLengthType = uint16_t; static const uint16_t OLAP_STRING_MAX_BYTES = sizeof(StringLengthType); +// the max bytes for stored array length +static const uint16_t OLAP_ARRAY_MAX_BYTES = OLAP_ARRAY_MAX_LENGTH; enum OLAPDataVersion { OLAP_V1 = 0, diff --git a/be/src/olap/row_cursor.cpp b/be/src/olap/row_cursor.cpp index e76d9bea40..acde936186 100644 --- a/be/src/olap/row_cursor.cpp +++ b/be/src/olap/row_cursor.cpp @@ -131,7 +131,7 @@ OLAPStatus RowCursor::init_scan_key(const TabletSchema& schema, FieldType type = column.type(); if (type == OLAP_FIELD_TYPE_VARCHAR) { _variable_len += scan_keys[cid].length(); - } else if (type == OLAP_FIELD_TYPE_CHAR) { + } else if (type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_ARRAY) { _variable_len += std::max(scan_keys[cid].length(), column.length()); } } @@ -167,7 +167,7 @@ OLAPStatus RowCursor::init_scan_key(const TabletSchema& schema, // TODO(yingchun): parameter 'const TabletSchema& schema' is not used OLAPStatus RowCursor::allocate_memory_for_string_type(const TabletSchema& schema) { - // allocate memory for string type(char, varchar, hll) + // allocate memory for string type(char, varchar, hll, array) // The memory allocated in this function is used in aggregate and copy function if (_variable_len == 0) { return OLAP_SUCCESS; diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h index 6d8a3f1bed..2e3f157ccd 100644 --- a/be/src/olap/rowset/beta_rowset_reader.h +++ b/be/src/olap/rowset/beta_rowset_reader.h @@ -54,9 +54,9 @@ public: } private: + RowsetReaderContext* _context; BetaRowsetSharedPtr _rowset; - RowsetReaderContext* _context; OlapReaderStatistics _owned_stats; OlapReaderStatistics* _stats; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 37ead54c44..0eda8f373d 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -357,7 +357,9 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { if (is_nullable()) { RETURN_IF_ERROR(_sub_readers[2]->new_iterator(&null_iterator)); } - *iterator = new ArrayFileColumnIterator(this, reinterpret_cast(offset_iterator), item_iterator, null_iterator); + *iterator = new ArrayFileColumnIterator( + this, reinterpret_cast(offset_iterator), item_iterator, + null_iterator); return Status::OK(); } default: @@ -370,9 +372,10 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { //////////////////////////////////////////////////////////////////////////////// ArrayFileColumnIterator::ArrayFileColumnIterator(ColumnReader* reader, - FileColumnIterator* offset_reader, - ColumnIterator* item_iterator, - ColumnIterator* null_iterator) : _array_reader(reader) { + FileColumnIterator* offset_reader, + ColumnIterator* item_iterator, + ColumnIterator* null_iterator) + : _array_reader(reader) { _length_iterator.reset(offset_reader); _item_iterator.reset(item_iterator); if (_array_reader->is_nullable()) { @@ -386,8 +389,9 @@ Status ArrayFileColumnIterator::init(const ColumnIteratorOptions& opts) { if (_array_reader->is_nullable()) { RETURN_IF_ERROR(_null_iterator->init(opts)); } - TypeInfo* bigint_type_info = get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT); - RETURN_IF_ERROR(ColumnVectorBatch::create(1024, false, bigint_type_info, nullptr, &_length_batch)); + TypeInfo* offset_type_info = get_scalar_type_info(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT); + RETURN_IF_ERROR( + ColumnVectorBatch::create(1024, false, offset_type_info, nullptr, &_length_batch)); return Status::OK(); } @@ -397,7 +401,8 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool // 1. read n offsets ColumnBlock offset_block(array_batch->offsets(), nullptr); - ColumnBlockView offset_view(&offset_block, dst->current_offset() + 1); // offset应该比collection的游标多1 + ColumnBlockView offset_view(&offset_block, + dst->current_offset() + 1); // offset应该比collection的游标多1 bool offset_has_null = false; RETURN_IF_ERROR(_length_iterator->next_batch(n, &offset_view, &offset_has_null)); DCHECK(!offset_has_null); @@ -434,7 +439,8 @@ Status ArrayFileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool } ColumnBlock item_block = ColumnBlock(item_vector_batch, dst->pool()); - ColumnBlockView item_view = ColumnBlockView(&item_block, array_batch->item_offset(dst->current_offset())); + ColumnBlockView item_view = + ColumnBlockView(&item_block, array_batch->item_offset(dst->current_offset())); size_t real_read = item_size; RETURN_IF_ERROR(_item_iterator->next_batch(&real_read, &item_view, &item_has_null)); DCHECK(item_size == real_read); diff --git a/be/src/olap/rowset/segment_v2/column_writer.cpp b/be/src/olap/rowset/segment_v2/column_writer.cpp index e2c6189496..7df9900ebb 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.cpp +++ b/be/src/olap/rowset/segment_v2/column_writer.cpp @@ -110,7 +110,7 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* ColumnWriter::create(item_options, &item_column, _wblock, &item_writer)); // create length writer - FieldType length_type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_BIGINT; + FieldType length_type = FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT; ColumnWriterOptions length_options; length_options.meta = opts.meta->add_children_columns(); @@ -126,13 +126,14 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* length_options.need_bloom_filter = false; length_options.need_bitmap_index = false; - TabletColumn length_column = TabletColumn(OLAP_FIELD_AGGREGATION_NONE, length_type, length_options.meta->is_nullable(), - length_options.meta->unique_id(), length_options.meta->length()); + TabletColumn length_column = TabletColumn( + OLAP_FIELD_AGGREGATION_NONE, length_type, length_options.meta->is_nullable(), + length_options.meta->unique_id(), length_options.meta->length()); length_column.set_name("length"); length_column.set_index_length(-1); // no short key index - std::unique_ptr bigint_field( - FieldFactory::create(length_column)); - auto* length_writer = new ScalarColumnWriter(length_options, std::move(bigint_field), _wblock); + std::unique_ptr bigint_field(FieldFactory::create(length_column)); + auto* length_writer = + new ScalarColumnWriter(length_options, std::move(bigint_field), _wblock); // if nullable, create null writer ScalarColumnWriter* null_writer = nullptr; @@ -152,18 +153,18 @@ Status ColumnWriter::create(const ColumnWriterOptions& opts, const TabletColumn* null_options.need_bloom_filter = false; null_options.need_bitmap_index = false; - TabletColumn null_column = TabletColumn(OLAP_FIELD_AGGREGATION_NONE, null_type, length_options.meta->is_nullable(), - null_options.meta->unique_id(), null_options.meta->length()); + TabletColumn null_column = TabletColumn( + OLAP_FIELD_AGGREGATION_NONE, null_type, length_options.meta->is_nullable(), + null_options.meta->unique_id(), null_options.meta->length()); null_column.set_name("nullable"); null_column.set_index_length(-1); // no short key index - std::unique_ptr null_field( - FieldFactory::create(null_column)); + std::unique_ptr null_field(FieldFactory::create(null_column)); null_writer = new ScalarColumnWriter(null_options, std::move(null_field), _wblock); } - std::unique_ptr writer_local = - std::unique_ptr(new ArrayColumnWriter( - opts, std::move(field), length_writer, null_writer, std::move(item_writer))); + std::unique_ptr writer_local = std::unique_ptr( + new ArrayColumnWriter(opts, std::move(field), length_writer, null_writer, + std::move(item_writer))); *writer = std::move(writer_local); return Status::OK(); } @@ -316,6 +317,25 @@ Status ScalarColumnWriter::append_data_in_current_page(const uint8_t** ptr, size return Status::OK(); } +Status ScalarColumnWriter::append_data_in_current_page(const uint8_t* ptr, size_t* num_written) { + RETURN_IF_ERROR(_page_builder->add(ptr, num_written)); + if (_opts.need_zone_map) { + _zone_map_index_builder->add_values(ptr, *num_written); + } + if (_opts.need_bitmap_index) { + _bitmap_index_builder->add_values(ptr, *num_written); + } + if (_opts.need_bloom_filter) { + _bloom_filter_index_builder->add_values(ptr, *num_written); + } + + _next_rowid += *num_written; + if (is_nullable()) { + _null_bitmap_builder->add_run(false, *num_written); + } + return Status::OK(); +} + uint64_t ScalarColumnWriter::estimate_buffer_size() { uint64_t size = _data_size; size += _page_builder->size(); @@ -469,7 +489,8 @@ ArrayColumnWriter::ArrayColumnWriter(const ColumnWriterOptions& opts, std::uniqu ScalarColumnWriter* null_writer, std::unique_ptr item_writer) : ColumnWriter(std::move(field), opts.meta->is_nullable()), - _item_writer(std::move(item_writer)) { + _item_writer(std::move(item_writer)), + _opts(opts) { _length_writer.reset(length_writer); if (is_nullable()) { _null_writer.reset(null_writer); @@ -494,29 +515,32 @@ Status ArrayColumnWriter::put_extra_info_in_page(DataPageFooterPB* footer) { // Now we can only write data one by one. Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { size_t remaining = num_rows; - const auto* col_cursor = reinterpret_cast(*ptr); - + const auto* col_cursor = reinterpret_cast(*ptr); while (remaining > 0) { // TODO llj: bulk write size_t num_written = 1; - auto size_ptr = &(col_cursor->length); - RETURN_IF_ERROR(_length_writer->append_data_in_current_page((const uint8_t**)&size_ptr, &num_written)); - if (num_written < 1) { // page is full, write first item offset and update current length page's start ordinal + auto size_ptr = col_cursor->length(); + RETURN_IF_ERROR(_length_writer->append_data_in_current_page( + reinterpret_cast(&size_ptr), &num_written)); + if (num_written < + 1) { // page is full, write first item offset and update current length page's start ordinal RETURN_IF_ERROR(_length_writer->finish_current_page()); _current_length_page_first_ordinal += _lengh_sum_in_cur_page; _lengh_sum_in_cur_page = 0; } else { // write child item. if (_item_writer->is_nullable()) { - auto* item_data_ptr = col_cursor->data; - for (size_t i = 0; i < col_cursor->length; ++i) { - RETURN_IF_ERROR(_item_writer->append(col_cursor->null_signs[i], item_data_ptr)); + auto* item_data_ptr = const_cast(col_cursor)->mutable_data(); + for (size_t i = 0; i < col_cursor->length(); ++i) { + RETURN_IF_ERROR(_item_writer->append(col_cursor->is_null_at(i), item_data_ptr)); item_data_ptr = (uint8_t*)item_data_ptr + _item_writer->get_field()->size(); } } else { - RETURN_IF_ERROR(_item_writer->append_data((const uint8_t**)&(col_cursor->data), col_cursor->length)); + const void* data = col_cursor->data(); + RETURN_IF_ERROR(_item_writer->append_data(reinterpret_cast(&data), + col_cursor->length())); } - _lengh_sum_in_cur_page += col_cursor->length; + _lengh_sum_in_cur_page += col_cursor->length(); } remaining -= num_written; col_cursor += num_written; @@ -529,8 +553,8 @@ Status ArrayColumnWriter::append_data(const uint8_t** ptr, size_t num_rows) { uint64_t ArrayColumnWriter::estimate_buffer_size() { return _length_writer->estimate_buffer_size() + - (is_nullable() ? _null_writer->estimate_buffer_size() : 0) + - _item_writer->estimate_buffer_size(); + (is_nullable() ? _null_writer->estimate_buffer_size() : 0) + + _item_writer->estimate_buffer_size(); } Status ArrayColumnWriter::finish() { @@ -563,9 +587,9 @@ Status ArrayColumnWriter::write_ordinal_index() { Status ArrayColumnWriter::append_nulls(size_t num_rows) { size_t num_lengths = num_rows; const ordinal_t zero = 0; - while(num_lengths > 0) { + while (num_lengths > 0) { // TODO llj bulk write - const auto* zero_ptr = reinterpret_cast(&zero); + const auto* zero_ptr = reinterpret_cast(&zero); RETURN_IF_ERROR(_length_writer->append_data(&zero_ptr, 1)); --num_lengths; } @@ -574,7 +598,7 @@ Status ArrayColumnWriter::append_nulls(size_t num_rows) { Status ArrayColumnWriter::write_null_column(size_t num_rows, bool is_null) { uint8_t null_sign = is_null ? 1 : 0; - while(num_rows > 0) { + while (num_rows > 0) { // TODO llj bulk write const uint8_t* null_sign_ptr = &null_sign; RETURN_IF_ERROR(_null_writer->append_data(&null_sign_ptr, 1)); diff --git a/be/src/olap/rowset/segment_v2/column_writer.h b/be/src/olap/rowset/segment_v2/column_writer.h index ba904cccf3..b98f4883ca 100644 --- a/be/src/olap/rowset/segment_v2/column_writer.h +++ b/be/src/olap/rowset/segment_v2/column_writer.h @@ -132,6 +132,9 @@ public: // used for append not null data. When page is full, will append data not reach num_rows. virtual Status append_data_in_current_page(const uint8_t** ptr, size_t* num_rows) = 0; + // used for append not null data. When page is full, will append data not reach num_rows. + virtual Status append_data_in_current_page(const uint8_t* ptr, size_t* num_rows) = 0; + bool is_nullable() const { return _is_nullable; } Field* get_field() const { return _field.get(); } @@ -139,6 +142,7 @@ public: private: std::unique_ptr _field; bool _is_nullable; + protected: std::shared_ptr _mem_tracker; }; @@ -183,6 +187,7 @@ public: Status append_data(const uint8_t** ptr, size_t num_rows) override; Status append_data_in_current_page(const uint8_t** ptr, size_t* num_rows) override; + Status append_data_in_current_page(const uint8_t* ptr, size_t* num_rows) override; private: std::unique_ptr _page_builder; @@ -253,8 +258,7 @@ private: class ArrayColumnWriter final : public ColumnWriter, public FlushPageCallback { public: explicit ArrayColumnWriter(const ColumnWriterOptions& opts, std::unique_ptr field, - ScalarColumnWriter* offset_writer, - ScalarColumnWriter* null_writer, + ScalarColumnWriter* offset_writer, ScalarColumnWriter* null_writer, std::unique_ptr item_writer); ~ArrayColumnWriter() override = default; @@ -262,7 +266,12 @@ public: Status append_data(const uint8_t** ptr, size_t num_rows) override; Status append_data_in_current_page(const uint8_t** ptr, size_t* num_rows) override { - return Status::NotSupported("array writer has no data, can not append_data_in_current_page"); + return Status::NotSupported( + "array writer has no data, can not append_data_in_current_page"); + } + Status append_data_in_current_page(const uint8_t* ptr, size_t* num_rows) override { + return Status::NotSupported( + "array writer has no data, can not append_data_in_current_page"); } uint64_t estimate_buffer_size() override; @@ -274,12 +283,25 @@ public: Status finish_current_page() override; - Status write_zone_map() override { return Status::OK(); } - - Status write_bitmap_index() override { return Status::OK(); } - - Status write_bloom_filter_index() override { return Status::OK(); } + Status write_zone_map() override { + if (_opts.need_zone_map) { + return Status::NotSupported("array not support zone map"); + } + return Status::OK(); + } + Status write_bitmap_index() override { + if (_opts.need_bitmap_index) { + return Status::NotSupported("array not support bitmap index"); + } + return Status::OK(); + } + Status write_bloom_filter_index() override { + if (_opts.need_bloom_filter) { + return Status::NotSupported("array not support bloom filter index"); + } + return Status::OK(); + } ordinal_t get_next_rowid() const override { return _length_writer->get_next_rowid(); } private: @@ -290,6 +312,7 @@ private: std::unique_ptr _length_writer; std::unique_ptr _null_writer; std::unique_ptr _item_writer; + ColumnWriterOptions _opts; ordinal_t _current_length_page_first_ordinal = 0; ordinal_t _lengh_sum_in_cur_page = 0; }; diff --git a/be/src/olap/rowset/segment_v2/encoding_info.cpp b/be/src/olap/rowset/segment_v2/encoding_info.cpp index 6438bb6c8c..322ed7f507 100644 --- a/be/src/olap/rowset/segment_v2/encoding_info.cpp +++ b/be/src/olap/rowset/segment_v2/encoding_info.cpp @@ -82,7 +82,7 @@ struct TypeEncodingTraits -struct TypeEncodingTraits { +struct TypeEncodingTraits { static Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) { *builder = new BitshufflePageBuilder(opts); return Status::OK(); @@ -234,6 +234,7 @@ EncodingInfoResolver::EncodingInfoResolver() { _add_map(); _add_map(); + _add_map(); _add_map(); _add_map(); diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 866dda1107..ca942c5fa1 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -119,45 +119,16 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id for (TColumn tcolumn : tablet_schema.columns) { ColumnPB* column = schema->add_column(); uint32_t unique_id = col_ordinal_to_unique_id.at(col_ordinal++); - column->set_unique_id(unique_id); - column->set_name(tcolumn.column_name); - column->set_has_bitmap_index(false); - string data_type; - EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type); - column->set_type(data_type); - if (tcolumn.column_type.type == TPrimitiveType::DECIMALV2) { - column->set_precision(tcolumn.column_type.precision); - column->set_frac(tcolumn.column_type.scale); - } - uint32_t length = TabletColumn::get_field_length_by_type(tcolumn.column_type.type, - tcolumn.column_type.len); - column->set_length(length); - column->set_index_length(length); - if (tcolumn.column_type.type == TPrimitiveType::VARCHAR) { - if (!tcolumn.column_type.__isset.index_len) { - column->set_index_length(10); - } else { - column->set_index_length(tcolumn.column_type.index_len); - } - } - if (!tcolumn.is_key) { - column->set_is_key(false); - string aggregation_type; - EnumToString(TAggregationType, tcolumn.aggregation_type, aggregation_type); - column->set_aggregation(aggregation_type); - } else { + _init_column_from_tcolumn(unique_id, tcolumn, column); + + if (column->is_key()) { ++key_count; - column->set_is_key(true); - column->set_aggregation("NONE"); } - column->set_is_nullable(tcolumn.is_allow_null); - if (tcolumn.__isset.default_value) { - column->set_default_value(tcolumn.default_value); - } - if (tcolumn.__isset.is_bloom_filter_column) { - column->set_is_bf_column(tcolumn.is_bloom_filter_column); + + if (column->is_bf_column()) { has_bf_columns = true; } + if (tablet_schema.__isset.indexes) { for (auto& index : tablet_schema.indexes) { if (index.index_type == TIndexType::type::BITMAP) { @@ -169,6 +140,11 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id } } } + + if (tcolumn.column_type.type == TPrimitiveType::ARRAY) { + ColumnPB* children_column = column->add_children_columns(); + _init_column_from_tcolumn(0, tcolumn.children_column[0], children_column); + } } schema->set_next_column_unique_id(next_unique_id); @@ -187,6 +163,48 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id init_from_pb(tablet_meta_pb); } +void TabletMeta::_init_column_from_tcolumn(uint32_t unique_id, const TColumn& tcolumn, + ColumnPB* column) { + column->set_unique_id(unique_id); + column->set_name(tcolumn.column_name); + column->set_has_bitmap_index(false); + string data_type; + EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type); + column->set_type(data_type); + + if (tcolumn.column_type.type == TPrimitiveType::DECIMALV2) { + column->set_precision(tcolumn.column_type.precision); + column->set_frac(tcolumn.column_type.scale); + } + uint32_t length = TabletColumn::get_field_length_by_type(tcolumn.column_type.type, + tcolumn.column_type.len); + column->set_length(length); + column->set_index_length(length); + if (tcolumn.column_type.type == TPrimitiveType::VARCHAR) { + if (!tcolumn.column_type.__isset.index_len) { + column->set_index_length(10); + } else { + column->set_index_length(tcolumn.column_type.index_len); + } + } + if (!tcolumn.is_key) { + column->set_is_key(false); + string aggregation_type; + EnumToString(TAggregationType, tcolumn.aggregation_type, aggregation_type); + column->set_aggregation(aggregation_type); + } else { + column->set_is_key(true); + column->set_aggregation("NONE"); + } + column->set_is_nullable(tcolumn.is_allow_null); + if (tcolumn.__isset.default_value) { + column->set_default_value(tcolumn.default_value); + } + if (tcolumn.__isset.is_bloom_filter_column) { + column->set_is_bf_column(tcolumn.is_bloom_filter_column); + } +} + OLAPStatus TabletMeta::create_from_file(const string& file_path) { FileHeader file_header; FileHandler file_handler; diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 781928036e..dbd594ed37 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -200,12 +200,11 @@ public: } // used for after tablet cloned to clear stale rowset - void clear_stale_rowset() { - _stale_rs_metas.clear(); - } + void clear_stale_rowset() { _stale_rs_metas.clear(); } private: OLAPStatus _save_meta(DataDir* data_dir); + void _init_column_from_tcolumn(uint32_t unique_id, const TColumn& tcolumn, ColumnPB* column); // _del_pred_array is ignored to compare. friend bool operator==(const TabletMeta& a, const TabletMeta& b); diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 96b35b80f2..09f456d637 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -72,6 +72,8 @@ FieldType TabletColumn::get_field_type_by_string(const std::string& type_str) { type = OLAP_FIELD_TYPE_MAP; } else if (0 == upper_type_str.compare("OBJECT")) { type = OLAP_FIELD_TYPE_OBJECT; + } else if (0 == upper_type_str.compare("ARRAY")) { + type = OLAP_FIELD_TYPE_ARRAY; } else { LOG(WARNING) << "invalid type string. [type='" << type_str << "']"; type = OLAP_FIELD_TYPE_UNKNOWN; @@ -172,7 +174,7 @@ std::string TabletColumn::get_string_by_field_type(FieldType type) { return "STRUCT"; case OLAP_FIELD_TYPE_ARRAY: - return "LIST"; + return "ARRAY"; case OLAP_FIELD_TYPE_MAP: return "MAP"; @@ -244,6 +246,8 @@ uint32_t TabletColumn::get_field_length_by_type(TPrimitiveType::type type, uint3 case TPrimitiveType::VARCHAR: case TPrimitiveType::HLL: return string_length + sizeof(OLAP_STRING_MAX_LENGTH); + case TPrimitiveType::ARRAY: + return OLAP_ARRAY_MAX_LENGTH; case TPrimitiveType::DECIMALV2: return 12; // use 12 bytes in olap engine. default: @@ -319,7 +323,7 @@ void TabletColumn::init_from_pb(const ColumnPB& column) { _visible = column.visible(); } if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { - DCHECK(column.children_columns_size() == 1) << "LIST type has more than 1 children types."; + DCHECK(column.children_columns_size() == 1) << "ARRAY type has more than 1 children types."; TabletColumn child_column; child_column.init_from_pb(column.children_columns(0)); add_sub_column(child_column); @@ -352,6 +356,12 @@ void TabletColumn::to_schema_pb(ColumnPB* column) { column->set_has_bitmap_index(_has_bitmap_index); } column->set_visible(_visible); + + if (_type == FieldType::OLAP_FIELD_TYPE_ARRAY) { + DCHECK(_sub_columns.size() == 1) << "ARRAY type has more than 1 children types."; + ColumnPB* child = column->add_children_columns(); + _sub_columns[0].to_schema_pb(child); + } } uint32_t TabletColumn::mem_size() const { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 2a5053f2d9..ee2f9207fc 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -22,6 +22,7 @@ #include "gen_cpp/olap_file.pb.h" #include "olap/olap_define.h" +#include "olap/tablet_schema.h" #include "olap/types.h" namespace doris { diff --git a/be/src/olap/types.cpp b/be/src/olap/types.cpp index f52fe5edb2..136bd7e2b0 100644 --- a/be/src/olap/types.cpp +++ b/be/src/olap/types.cpp @@ -145,6 +145,11 @@ TypeInfo* get_type_info(FieldType field_type) { return get_scalar_type_info(field_type); } +// get array array type info +TypeInfo* get_collection_type_info(FieldType sub_type) { + return ArrayTypeInfoResolver::instance()->get_type_info(sub_type); +} + TypeInfo* get_type_info(segment_v2::ColumnMetaPB* column_meta_pb) { FieldType type = (FieldType)column_meta_pb->type(); if (is_scalar_type(type)) { @@ -152,8 +157,9 @@ TypeInfo* get_type_info(segment_v2::ColumnMetaPB* column_meta_pb) { } else { switch (type) { case OLAP_FIELD_TYPE_ARRAY: { - DCHECK(column_meta_pb->children_columns_size() >= 1 && column_meta_pb->children_columns_size() <=3) - << "more than 3 children or no children."; + DCHECK(column_meta_pb->children_columns_size() >= 1 && + column_meta_pb->children_columns_size() <= 3) + << "more than 3 children or no children."; auto child_type = (FieldType)column_meta_pb->children_columns(0).type(); return ArrayTypeInfoResolver::instance()->get_type_info(child_type); } diff --git a/be/src/olap/types.h b/be/src/olap/types.h index 6c74b2c34c..0d8aec385c 100644 --- a/be/src/olap/types.h +++ b/be/src/olap/types.h @@ -27,12 +27,12 @@ #include "gen_cpp/segment_v2.pb.h" // for ColumnMetaPB #include "gutil/strings/numbers.h" -#include "olap/collection.h" #include "olap/decimal12.h" #include "olap/olap_common.h" #include "olap/olap_define.h" #include "olap/tablet_schema.h" // for TabletColumn #include "olap/uint24.h" +#include "runtime/collection_value.h" #include "runtime/datetime_value.h" #include "runtime/mem_pool.h" #include "util/hash_util.hpp" @@ -155,33 +155,33 @@ public: : _item_type_info(item_type_info), _item_size(item_type_info->size()) {} inline bool equal(const void* left, const void* right) const override { - auto l_value = reinterpret_cast(left); - auto r_value = reinterpret_cast(right); - if (l_value->length != r_value->length) { + auto l_value = reinterpret_cast(left); + auto r_value = reinterpret_cast(right); + if (l_value->length() != r_value->length()) { return false; } - size_t len = l_value->length; + size_t len = l_value->length(); - if (!l_value->has_null && !r_value->has_null) { + if (!l_value->has_null() && !r_value->has_null()) { for (size_t i = 0; i < len; ++i) { - if (!_item_type_info->equal((uint8_t*)(l_value->data) + i * _item_size, - (uint8_t*)(r_value->data) + i * _item_size)) { + if (!_item_type_info->equal((uint8_t*)(l_value->data()) + i * _item_size, + (uint8_t*)(r_value->data()) + i * _item_size)) { return false; } } } else { for (size_t i = 0; i < len; ++i) { - if (l_value->null_signs[i]) { - if (r_value->null_signs[i]) { // both are null + if (l_value->is_null_at(i)) { + if (r_value->is_null_at(i)) { // both are null continue; } else { // left is null & right is not null return false; } - } else if (r_value->null_signs[i]) { // left is not null & right is null + } else if (r_value->is_null_at(i)) { // left is not null & right is null return false; } - if (!_item_type_info->equal((uint8_t*)(l_value->data) + i * _item_size, - (uint8_t*)(r_value->data) + i * _item_size)) { + if (!_item_type_info->equal((uint8_t*)(l_value->data()) + i * _item_size, + (uint8_t*)(r_value->data()) + i * _item_size)) { return false; } } @@ -190,16 +190,16 @@ public: } inline int cmp(const void* left, const void* right) const override { - auto l_value = reinterpret_cast(left); - auto r_value = reinterpret_cast(right); - size_t l_length = l_value->length; - size_t r_length = r_value->length; + auto l_value = reinterpret_cast(left); + auto r_value = reinterpret_cast(right); + size_t l_length = l_value->length(); + size_t r_length = r_value->length(); size_t cur = 0; - if (!l_value->has_null && !r_value->has_null) { + if (!l_value->has_null() && !r_value->has_null()) { while (cur < l_length && cur < r_length) { - int result = _item_type_info->cmp((uint8_t*)(l_value->data) + cur * _item_size, - (uint8_t*)(r_value->data) + cur * _item_size); + int result = _item_type_info->cmp((uint8_t*)(l_value->data()) + cur * _item_size, + (uint8_t*)(r_value->data()) + cur * _item_size); if (result != 0) { return result; } @@ -207,15 +207,16 @@ public: } } else { while (cur < l_length && cur < r_length) { - if (l_value->null_signs[cur]) { - if (!r_value->null_signs[cur]) { // left is null & right is not null + if (l_value->is_null_at(cur)) { + if (!r_value->is_null_at(cur)) { // left is null & right is not null return -1; } - } else if (r_value->null_signs[cur]) { // left is not null & right is null + } else if (r_value->is_null_at(cur)) { // left is not null & right is null return 1; } else { // both are not null - int result = _item_type_info->cmp((uint8_t*)(l_value->data) + cur * _item_size, - (uint8_t*)(r_value->data) + cur * _item_size); + int result = + _item_type_info->cmp((uint8_t*)(l_value->data()) + cur * _item_size, + (uint8_t*)(r_value->data()) + cur * _item_size); if (result != 0) { return result; } @@ -234,34 +235,36 @@ public: } inline void shallow_copy(void* dest, const void* src) const override { - *reinterpret_cast(dest) = *reinterpret_cast(src); + auto dest_value = reinterpret_cast(dest); + auto src_value = reinterpret_cast(src); + dest_value->shallow_copy(src_value); } inline void deep_copy(void* dest, const void* src, MemPool* mem_pool) const { - auto dest_value = reinterpret_cast(dest); - auto src_value = reinterpret_cast(src); + auto dest_value = reinterpret_cast(dest); + auto src_value = reinterpret_cast(src); - dest_value->length = src_value->length; + dest_value->set_length(src_value->length()); - size_t item_size = src_value->length * _item_size; - size_t nulls_size = src_value->has_null ? src_value->length : 0; - dest_value->data = mem_pool->allocate(item_size + nulls_size); - dest_value->has_null = src_value->has_null; - dest_value->null_signs = src_value->has_null - ? reinterpret_cast(dest_value->data) + item_size - : nullptr; + size_t item_size = src_value->length() * _item_size; + size_t nulls_size = src_value->has_null() ? src_value->length() : 0; + dest_value->set_data(mem_pool->allocate(item_size + nulls_size)); + dest_value->set_has_null(src_value->has_null()); + dest_value->set_null_signs(src_value->has_null() + ? reinterpret_cast(dest_value->mutable_data()) + + item_size + : nullptr); // copy null_signs - if (src_value->has_null) { - memory_copy(dest_value->null_signs, src_value->null_signs, - sizeof(bool) * src_value->length); + if (src_value->has_null()) { + dest_value->copy_null_signs(src_value); } // copy item - for (uint32_t i = 0; i < src_value->length; ++i) { + for (uint32_t i = 0; i < src_value->length(); ++i) { if (dest_value->is_null_at(i)) continue; - _item_type_info->deep_copy((uint8_t*)(dest_value->data) + i * _item_size, - (uint8_t*)(src_value->data) + i * _item_size, mem_pool); + _item_type_info->deep_copy((uint8_t*)(dest_value->mutable_data()) + i * _item_size, + (uint8_t*)(src_value->data()) + i * _item_size, mem_pool); } } @@ -269,23 +272,22 @@ public: deep_copy(dest, src, mem_pool); } - // TODO llj: How to ensure sufficient length of item inline void direct_copy(void* dest, const void* src) const override { - auto dest_value = reinterpret_cast(dest); - auto src_value = reinterpret_cast(src); - - dest_value->length = src_value->length; - dest_value->has_null = src_value->has_null; - if (src_value->has_null) { + auto dest_value = reinterpret_cast(dest); + auto src_value = reinterpret_cast(src); + dest_value->set_length(src_value->length()); + dest_value->set_has_null(src_value->has_null()); + if (src_value->has_null()) { // direct copy null_signs - memory_copy(dest_value->null_signs, src_value->null_signs, src_value->length); + memory_copy(dest_value->mutable_null_signs(), src_value->null_signs(), + src_value->length()); } // direct opy item - for (uint32_t i = 0; i < src_value->length; ++i) { + for (uint32_t i = 0; i < src_value->length(); ++i) { if (dest_value->is_null_at(i)) continue; - _item_type_info->direct_copy((uint8_t*)(dest_value->data) + i * _item_size, - (uint8_t*)(src_value->data) + i * _item_size); + _item_type_info->direct_copy((uint8_t*)(dest_value->mutable_data()) + i * _item_size, + (uint8_t*)(src_value->data()) + i * _item_size); } } @@ -299,14 +301,14 @@ public: } std::string to_string(const void* src) const override { - auto src_value = reinterpret_cast(src); + auto src_value = reinterpret_cast(src); std::string result = "["; - for (size_t i = 0; i < src_value->length; ++i) { + for (size_t i = 0; i < src_value->length(); ++i) { std::string item = - _item_type_info->to_string((uint8_t*)(src_value->data) + i * _item_size); + _item_type_info->to_string((uint8_t*)(src_value->data()) + i * _item_size); result += item; - if (i != src_value->length - 1) { + if (i != src_value->length() - 1) { result += ", "; } } @@ -323,20 +325,21 @@ public: } inline uint32_t hash_code(const void* data, uint32_t seed) const override { - auto value = reinterpret_cast(data); - uint32_t result = HashUtil::hash(&(value->length), sizeof(size_t), seed); - for (size_t i = 0; i < value->length; ++i) { - if (value->null_signs[i]) { + auto value = reinterpret_cast(data); + auto len = value->length(); + uint32_t result = HashUtil::hash(&len, sizeof(size_t), seed); + for (size_t i = 0; i < len; ++i) { + if (value->is_null_at(i)) { result = seed * result; } else { - result = seed * result + - _item_type_info->hash_code((uint8_t*)(value->data) + i * _item_size, seed); + result = seed * result + _item_type_info->hash_code( + (uint8_t*)(value->data()) + i * _item_size, seed); } } return result; } - inline const size_t size() const override { return sizeof(Collection); } + inline const size_t size() const override { return sizeof(CollectionValue); } inline FieldType type() const override { return OLAP_FIELD_TYPE_ARRAY; } @@ -351,6 +354,8 @@ extern bool is_scalar_type(FieldType field_type); extern TypeInfo* get_scalar_type_info(FieldType field_type); +extern TypeInfo* get_collection_type_info(FieldType sub_type); + extern TypeInfo* get_type_info(FieldType field_type); extern TypeInfo* get_type_info(segment_v2::ColumnMetaPB* column_meta_pb); @@ -358,7 +363,7 @@ extern TypeInfo* get_type_info(segment_v2::ColumnMetaPB* column_meta_pb); extern TypeInfo* get_type_info(const TabletColumn* col); // support following formats when convert varchar to date -static const std::vector DATE_FORMATS{ +static const std::vector DATE_FORMATS { "%Y-%m-%d", "%y-%m-%d", "%Y%m%d", "%y%m%d", "%Y/%m/%d", "%y/%m/%d", }; @@ -446,7 +451,7 @@ struct CppTypeTraits { }; template <> struct CppTypeTraits { - using CppType = Collection; + using CppType = CollectionValue; }; template diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index 9372f0dafb..0fffb5a964 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -50,6 +50,7 @@ set(RUNTIME_FILES # timestamp_value.cpp decimalv2_value.cpp large_int_value.cpp + collection_value.cpp tuple.cpp tuple_row.cpp vectorized_row_batch.cpp diff --git a/be/src/runtime/collection_value.cpp b/be/src/runtime/collection_value.cpp new file mode 100644 index 0000000000..0729646e13 --- /dev/null +++ b/be/src/runtime/collection_value.cpp @@ -0,0 +1,270 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime/collection_value.h" + +#include "common/logging.h" +#include "exprs/anyval_util.h" + +namespace doris { +int sizeof_type(PrimitiveType type) { + switch (type) { + case TYPE_INT: + return sizeof(int32_t); + case TYPE_CHAR: + case TYPE_VARCHAR: + return sizeof(StringValue); + case TYPE_NULL: + return 0; + default: + DCHECK(false) << "Type not implemented: " << type; + break; + } + + return 0; +} + +Status type_check(PrimitiveType type) { + switch (type) { + case TYPE_INT: + case TYPE_CHAR: + case TYPE_VARCHAR: + case TYPE_NULL: + break; + default: + return Status::InvalidArgument("Type not implemented: " + type); + } + + return Status::OK(); +} + +void CollectionValue::to_collection_val(CollectionVal* val) const { + val->length = _length; + val->data = _data; + val->null_signs = _null_signs; + val->has_null = _has_null; +} + +void CollectionValue::shallow_copy(const CollectionValue* value) { + _length = value->_length; + _null_signs = value->_null_signs; + _data = value->_data; + _has_null = value->_has_null; +} + +void CollectionValue::copy_null_signs(const CollectionValue* other) { + if (other->_has_null) { + memcpy(_null_signs, other->_null_signs, other->size()); + } else { + _null_signs = nullptr; + } +} + +ArrayIterator CollectionValue::iterator(PrimitiveType children_type) const { + return ArrayIterator(children_type, this); +} + +Status CollectionValue::init_collection(ObjectPool* pool, uint32_t size, PrimitiveType child_type, + CollectionValue* val) { + if (val == nullptr) { + return Status::InvalidArgument("collection value is null"); + } + + RETURN_IF_ERROR(type_check(child_type)); + + if (size == 0) { + return Status::OK(); + } + + val->_length = size; + val->_null_signs = pool->add_array(new bool[size]{0}); + val->_data = pool->add_array(new uint8_t[size * sizeof_type(child_type)]); + + return Status::OK(); +} + +Status CollectionValue::init_collection(MemPool* pool, uint32_t size, PrimitiveType child_type, + CollectionValue* val) { + if (val == nullptr) { + return Status::InvalidArgument("collection value is null"); + } + + RETURN_IF_ERROR(type_check(child_type)); + + if (size == 0) { + return Status::OK(); + } + + val->_length = size; + val->_null_signs = (bool*)pool->allocate(size * sizeof(bool)); + memset(val->_null_signs, 0, size); + + val->_data = pool->allocate(sizeof_type(child_type) * size); + + return Status::OK(); +} + +Status CollectionValue::init_collection(FunctionContext* context, uint32_t size, + PrimitiveType child_type, CollectionValue* val) { + if (val == nullptr) { + return Status::InvalidArgument("collection value is null"); + } + + RETURN_IF_ERROR(type_check(child_type)); + + if (size == 0) { + return Status::OK(); + } + + val->_length = size; + val->_null_signs = (bool*)context->allocate(size * sizeof(bool)); + memset(val->_null_signs, 0, size); + + val->_data = context->allocate(sizeof_type(child_type) * size); + + return Status::OK(); +} + +CollectionValue CollectionValue::from_collection_val(const CollectionVal& val) { + return CollectionValue(val.data, val.length, val.null_signs); +} + +Status CollectionValue::set(uint32_t i, PrimitiveType type, const AnyVal* value) { + RETURN_IF_ERROR(type_check(type)); + + ArrayIterator iter(type, this); + if (!iter.seek(i)) { + return Status::InvalidArgument("over of collection size"); + } + + if (value->is_null) { + *(_null_signs + i) = true; + _has_null = true; + return Status::OK(); + } else { + *(_null_signs + i) = false; + } + + switch (type) { + case TYPE_INT: + *reinterpret_cast(iter.value()) = reinterpret_cast(value)->val; + break; + case TYPE_CHAR: + case TYPE_VARCHAR: { + const StringVal* src = reinterpret_cast(value); + StringValue* dest = reinterpret_cast(iter.value()); + dest->len = src->len; + dest->ptr = (char*)src->ptr; + break; + } + default: + DCHECK(false) << "Type not implemented: " << type; + return Status::InvalidArgument("Type not implemented"); + } + + return Status::OK(); +} + +/** + * ----------- Array Iterator -------- + */ +ArrayIterator::ArrayIterator(PrimitiveType children_type, const CollectionValue* data) + : _offset(0), _type(children_type), _data(data) { + _type_size = sizeof_type(children_type); +} + +void* ArrayIterator::value() { + if (is_null()) { + return nullptr; + } + return ((char*)_data->_data) + _offset * _type_size; +} + +bool ArrayIterator::is_null() { + return _data->is_null_at(_offset); +} + +void ArrayIterator::value(AnyVal* dest) { + if (is_null()) { + dest->is_null = true; + return; + } + dest->is_null = false; + switch (_type) { + case TYPE_BOOLEAN: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_TINYINT: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_SMALLINT: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_INT: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_BIGINT: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_FLOAT: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + + case TYPE_DOUBLE: + reinterpret_cast(dest)->val = *reinterpret_cast(value()); + break; + case TYPE_HLL: + case TYPE_CHAR: + case TYPE_VARCHAR: { + const StringValue* str_value = reinterpret_cast(value()); + reinterpret_cast(dest)->len = str_value->len; + reinterpret_cast(dest)->ptr = (uint8_t*)(str_value->ptr); + break; + } + case TYPE_DATE: + case TYPE_DATETIME: { + const DateTimeValue* date_time_value = reinterpret_cast(value()); + reinterpret_cast(dest)->packed_time = date_time_value->to_int64(); + reinterpret_cast(dest)->type = date_time_value->type(); + break; + } + + case TYPE_DECIMALV2: + reinterpret_cast(dest)->val = + reinterpret_cast(value())->value; + break; + + case TYPE_LARGEINT: + reinterpret_cast(dest)->val = + reinterpret_cast(value())->value; + break; + + case TYPE_ARRAY: + reinterpret_cast(value())->to_collection_val( + reinterpret_cast(dest)); + break; + + default: + DCHECK(false) << "bad type: " << _type; + } +} +} // namespace doris diff --git a/be/src/runtime/collection_value.h b/be/src/runtime/collection_value.h new file mode 100644 index 0000000000..ed9f4c8268 --- /dev/null +++ b/be/src/runtime/collection_value.h @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "common/object_pool.h" +#include "common/status.h" +#include "runtime/mem_pool.h" +#include "runtime/primitive_type.h" +#include "udf/udf.h" + +namespace doris { + +using doris_udf::AnyVal; + +class ArrayIterator; + +/** + * The format of array-typed slot. + * The array's sub-element type just support: + * - INT32 + * - CHAR + * - VARCHAR + * - NULL + * + * A new array need initialization memory before used + */ +struct CollectionValue { +public: + CollectionValue() = default; + + explicit CollectionValue(uint32_t length) + : _data(nullptr), _length(length), _has_null(false), _null_signs(nullptr) {} + + CollectionValue(void* data, uint32_t length) + : _data(data), _length(length), _has_null(false), _null_signs(nullptr) {} + + CollectionValue(void* data, uint32_t length, bool* null_signs) + : _data(data), _length(length), _has_null(true), _null_signs(null_signs) {} + + CollectionValue(void* data, uint32_t length, bool has_null, bool* null_signs) + : _data(data), _length(length), _has_null(has_null), _null_signs(null_signs) {} + + inline bool is_null_at(uint32_t index) const { + return this->_has_null && this->_null_signs[index]; + } + + void to_collection_val(CollectionVal* val) const; + + inline uint32_t size() const { return _length; } + + inline uint32_t length() const { return _length; } + + void shallow_copy(const CollectionValue* other); + + void copy_null_signs(const CollectionValue* other); + + ArrayIterator iterator(PrimitiveType children_type) const; + + /** + * just shallow copy sub-elment value + * For special type, will shared actual value's memory, like StringValue. + */ + Status set(uint32_t i, PrimitiveType type, const AnyVal* value); + + /** + * init collection, will alloc (children Type's size + 1) * (children Nums) memory + */ + static Status init_collection(ObjectPool* pool, uint32_t size, PrimitiveType child_type, + CollectionValue* val); + + static Status init_collection(MemPool* pool, uint32_t size, PrimitiveType child_type, + CollectionValue* val); + + static Status init_collection(FunctionContext* context, uint32_t size, PrimitiveType child_type, + CollectionValue* val); + + static CollectionValue from_collection_val(const CollectionVal& val); + const void* data() const { return _data; } + + inline bool has_null() const { return _has_null; } + inline const bool* null_signs() const { return _null_signs; } + inline void* mutable_data() { return _data; } + inline bool* mutable_null_signs() { return _null_signs; } + inline void set_length(uint32_t length) { _length = length; } + inline void set_has_null(bool has_null) { _has_null = has_null; } + inline void set_data(void* data) { _data = data; } + inline void set_null_signs(bool* null_signs) { _null_signs = null_signs; } + +public: + // child column data + void* _data; + uint32_t _length; + // item has no null value if has_null is false. + // item ```may``` has null value if has_null is true. + bool _has_null; + // null bitmap + bool* _null_signs; + + friend ArrayIterator; +}; + +/** + * Array's Iterator, support read array by special type + */ +class ArrayIterator { +private: + ArrayIterator(PrimitiveType children_type, const CollectionValue* data); + +public: + bool seek(uint32_t n) { + if (n >= _data->size()) { + return false; + } + + _offset = n; + return true; + } + + bool has_next() { return _offset < _data->size(); } + + bool next() { + if (_offset < _data->size()) { + _offset++; + return true; + } + + return false; + } + + bool is_null(); + + void* value(); + + void value(AnyVal* dest); + +private: + size_t _offset; + int _type_size; + const PrimitiveType _type; + const CollectionValue* _data; + + friend CollectionValue; +}; +} // namespace doris diff --git a/be/src/runtime/datetime_value.h b/be/src/runtime/datetime_value.h index c0ffb9798b..54bd4ba8b3 100644 --- a/be/src/runtime/datetime_value.h +++ b/be/src/runtime/datetime_value.h @@ -278,7 +278,7 @@ public: cast_to_date(); return loss_accuracy; } - + void cast_to_date() { _hour = 0; _minute = 0; @@ -453,6 +453,8 @@ public: void set_type(int type); + int type() const { return _type; } + bool is_valid_date() const { return !check_range() && !check_date() && _month > 0 && _day > 0; } private: diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp index 675eadc0e9..40299cedf6 100644 --- a/be/src/runtime/descriptors.cpp +++ b/be/src/runtime/descriptors.cpp @@ -218,6 +218,9 @@ void TupleDescriptor::add_slot(SlotDescriptor* slot) { if (slot->type().is_string_type()) { _string_slots.push_back(slot); _has_varlen_slots = true; + } else if (slot->type().is_collection_type()) { + _collection_slots.push_back(slot); + _has_varlen_slots = true; } else { _no_string_slots.push_back(slot); } @@ -520,7 +523,6 @@ Status DescriptorTbl::create(ObjectPool* pool, const TDescriptorTable& thrift_tb if (entry == (*tbl)->_tuple_desc_map.end()) { return Status::InternalError("unknown tid in slot descriptor msg"); } - entry->second->add_slot(slot_d); } diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index a85ba74db2..a98451b7ac 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -69,6 +69,8 @@ struct NullIndicatorOffset { std::ostream& operator<<(std::ostream& os, const NullIndicatorOffset& null_indicator); +class TupleDescriptor; + class SlotDescriptor { public: // virtual ~SlotDescriptor() {}; @@ -102,6 +104,7 @@ private: friend class TupleDescriptor; friend class SchemaScanner; friend class OlapTableSchemaParam; + friend class TupleDescriptor; const SlotId _id; const TypeDescriptor _type; @@ -243,6 +246,8 @@ public: const std::vector& slots() const { return _slots; } const std::vector& string_slots() const { return _string_slots; } const std::vector& no_string_slots() const { return _no_string_slots; } + const std::vector& collection_slots() const { return _collection_slots; } + bool has_varlen_slots() const { { return _has_varlen_slots; } } @@ -253,6 +258,9 @@ public: if (desc->string_slots().size() > 0) { return true; } + if (desc->collection_slots().size() > 0) { + return true; + } } return false; } @@ -282,6 +290,8 @@ private: std::vector _string_slots; // contains only materialized string slots // contains only materialized slots except string slots std::vector _no_string_slots; + // _collection_slots + std::vector _collection_slots; // Provide quick way to check if there are variable length slots. // True if _string_slots or _collection_slots have entries. @@ -289,6 +299,7 @@ private: TupleDescriptor(const TTupleDescriptor& tdesc); TupleDescriptor(const PTupleDescriptor& tdesc); + void add_slot(SlotDescriptor* slot); /// Returns slots in their physical order. diff --git a/be/src/runtime/mysql_result_writer.cpp b/be/src/runtime/mysql_result_writer.cpp index 3cad9d6ac7..81d70ca226 100644 --- a/be/src/runtime/mysql_result_writer.cpp +++ b/be/src/runtime/mysql_result_writer.cpp @@ -65,7 +65,152 @@ void MysqlResultWriter::_init_profile() { _sent_rows_counter = ADD_COUNTER(_parent_profile, "NumSentRows", TUnit::UNIT); } +int MysqlResultWriter::_add_row_value(int index, const TypeDescriptor& type, void* item) { + int buf_ret = 0; + if (item == nullptr) { + return _row_buffer->push_null(); + } + + switch (type.type) { + case TYPE_BOOLEAN: + case TYPE_TINYINT: + buf_ret = _row_buffer->push_tinyint(*static_cast(item)); + break; + + case TYPE_SMALLINT: + buf_ret = _row_buffer->push_smallint(*static_cast(item)); + break; + + case TYPE_INT: + buf_ret = _row_buffer->push_int(*static_cast(item)); + break; + + case TYPE_BIGINT: + buf_ret = _row_buffer->push_bigint(*static_cast(item)); + break; + + case TYPE_LARGEINT: { + char buf[48]; + int len = 48; + char* v = LargeIntValue::to_string(reinterpret_cast(item)->value, buf, + &len); + buf_ret = _row_buffer->push_string(v, len); + break; + } + + case TYPE_FLOAT: + buf_ret = _row_buffer->push_float(*static_cast(item)); + break; + + case TYPE_DOUBLE: + buf_ret = _row_buffer->push_double(*static_cast(item)); + break; + + case TYPE_TIME: { + double time = *static_cast(item); + std::string time_str = time_str_from_double(time); + buf_ret = _row_buffer->push_string(time_str.c_str(), time_str.size()); + break; + } + + case TYPE_DATE: + case TYPE_DATETIME: { + char buf[64]; + const DateTimeValue* time_val = (const DateTimeValue*)(item); + // TODO(zhaochun), this function has core risk + char* pos = time_val->to_string(buf); + buf_ret = _row_buffer->push_string(buf, pos - buf - 1); + break; + } + + case TYPE_HLL: + case TYPE_OBJECT: { + buf_ret = _row_buffer->push_null(); + break; + } + + case TYPE_VARCHAR: + case TYPE_CHAR: { + const StringValue* string_val = (const StringValue*)(item); + + if (string_val->ptr == NULL) { + if (string_val->len == 0) { + // 0x01 is a magic num, not useful actually, just for present "" + char* tmp_val = reinterpret_cast(0x01); + buf_ret = _row_buffer->push_string(tmp_val, string_val->len); + } else { + buf_ret = _row_buffer->push_null(); + } + } else { + buf_ret = _row_buffer->push_string(string_val->ptr, string_val->len); + } + + break; + } + + case TYPE_DECIMALV2: { + DecimalV2Value decimal_val(reinterpret_cast(item)->value); + std::string decimal_str; + int output_scale = _output_expr_ctxs[index]->root()->output_scale(); + + if (output_scale > 0 && output_scale <= 30) { + decimal_str = decimal_val.to_string(output_scale); + } else { + decimal_str = decimal_val.to_string(); + } + + buf_ret = _row_buffer->push_string(decimal_str.c_str(), decimal_str.length()); + break; + } + + case TYPE_ARRAY: { + auto children_type = type.children[0].type; + auto array_value = (const CollectionValue*)(item); + + ArrayIterator iter = array_value->iterator(children_type); + + _row_buffer->open_dynamic_mode(); + + buf_ret = _row_buffer->push_string("[", 1); + + int begin = 0; + while (iter.has_next() && !buf_ret) { + if (begin != 0) { + buf_ret = _row_buffer->push_string(", ", 2); + } + + if (children_type == TYPE_CHAR || children_type == TYPE_VARCHAR) { + buf_ret = _row_buffer->push_string("'", 1); + buf_ret = _add_row_value(index, children_type, iter.value()); + buf_ret = _row_buffer->push_string("'", 1); + } else { + buf_ret = _add_row_value(index, children_type, iter.value()); + } + + iter.next(); + begin++; + } + + if (!buf_ret) { + buf_ret = _row_buffer->push_string("]", 1); + } + + _row_buffer->close_dynamic_mode(); + break; + } + + default: + LOG(WARNING) << "can't convert this type to mysql type. type = " + << _output_expr_ctxs[index]->root()->type(); + buf_ret = -1; + break; + } + + return buf_ret; +} + Status MysqlResultWriter::_add_one_row(TupleRow* row) { + SCOPED_TIMER(_convert_tuple_timer); _row_buffer->reset(); int num_columns = _output_expr_ctxs.size(); int buf_ret = 0; @@ -73,109 +218,7 @@ Status MysqlResultWriter::_add_one_row(TupleRow* row) { for (int i = 0; 0 == buf_ret && i < num_columns; ++i) { void* item = _output_expr_ctxs[i]->get_value(row); - if (NULL == item) { - buf_ret = _row_buffer->push_null(); - continue; - } - - switch (_output_expr_ctxs[i]->root()->type().type) { - case TYPE_BOOLEAN: - case TYPE_TINYINT: - buf_ret = _row_buffer->push_tinyint(*static_cast(item)); - break; - - case TYPE_SMALLINT: - buf_ret = _row_buffer->push_smallint(*static_cast(item)); - break; - - case TYPE_INT: - buf_ret = _row_buffer->push_int(*static_cast(item)); - break; - - case TYPE_BIGINT: - buf_ret = _row_buffer->push_bigint(*static_cast(item)); - break; - - case TYPE_LARGEINT: { - char buf[48]; - int len = 48; - char* v = LargeIntValue::to_string(reinterpret_cast(item)->value, - buf, &len); - buf_ret = _row_buffer->push_string(v, len); - break; - } - - case TYPE_FLOAT: - buf_ret = _row_buffer->push_float(*static_cast(item)); - break; - - case TYPE_DOUBLE: - buf_ret = _row_buffer->push_double(*static_cast(item)); - break; - - case TYPE_TIME: { - double time = *static_cast(item); - std::string time_str = time_str_from_double(time); - buf_ret = _row_buffer->push_string(time_str.c_str(), time_str.size()); - break; - } - - case TYPE_DATE: - case TYPE_DATETIME: { - char buf[64]; - const DateTimeValue* time_val = (const DateTimeValue*)(item); - // TODO(zhaochun), this function has core risk - char* pos = time_val->to_string(buf); - buf_ret = _row_buffer->push_string(buf, pos - buf - 1); - break; - } - - case TYPE_HLL: - case TYPE_OBJECT: { - buf_ret = _row_buffer->push_null(); - break; - } - - case TYPE_VARCHAR: - case TYPE_CHAR: { - const StringValue* string_val = (const StringValue*)(item); - - if (string_val->ptr == NULL) { - if (string_val->len == 0) { - // 0x01 is a magic num, not useful actually, just for present "" - char* tmp_val = reinterpret_cast(0x01); - buf_ret = _row_buffer->push_string(tmp_val, string_val->len); - } else { - buf_ret = _row_buffer->push_null(); - } - } else { - buf_ret = _row_buffer->push_string(string_val->ptr, string_val->len); - } - - break; - } - - case TYPE_DECIMALV2: { - DecimalV2Value decimal_val(reinterpret_cast(item)->value); - std::string decimal_str; - int output_scale = _output_expr_ctxs[i]->root()->output_scale(); - - if (output_scale > 0 && output_scale <= 30) { - decimal_str = decimal_val.to_string(output_scale); - } else { - decimal_str = decimal_val.to_string(); - } - - buf_ret = _row_buffer->push_string(decimal_str.c_str(), decimal_str.length()); - break; - } - - default: - LOG(WARNING) << "can't convert this type to mysql type. type = " - << _output_expr_ctxs[i]->root()->type(); - buf_ret = -1; - break; - } + buf_ret = _add_row_value(i, _output_expr_ctxs[i]->root()->type(), item); } if (0 != buf_ret) { diff --git a/be/src/runtime/mysql_result_writer.h b/be/src/runtime/mysql_result_writer.h index a993e966ef..8d8c215310 100644 --- a/be/src/runtime/mysql_result_writer.h +++ b/be/src/runtime/mysql_result_writer.h @@ -47,6 +47,7 @@ private: void _init_profile(); // convert one tuple row Status _add_one_row(TupleRow* row); + int _add_row_value(int index, const TypeDescriptor& type, void* item); private: BufferControlBlock* _sinker; diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 66a7620d83..320e58ba0f 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -20,6 +20,7 @@ #include #include "gen_cpp/Types_types.h" +#include "runtime/collection_value.h" namespace doris { //to_tcolumn_type_thrift only test @@ -92,6 +93,9 @@ PrimitiveType thrift_to_type(TPrimitiveType::type ttype) { case TPrimitiveType::OBJECT: return TYPE_OBJECT; + case TPrimitiveType::ARRAY: + return TYPE_ARRAY; + default: return INVALID_TYPE; } @@ -156,6 +160,9 @@ TPrimitiveType::type to_thrift(PrimitiveType ptype) { case TYPE_OBJECT: return TPrimitiveType::OBJECT; + case TYPE_ARRAY: + return TPrimitiveType::ARRAY; + default: return TPrimitiveType::INVALID_TYPE; } @@ -220,6 +227,9 @@ std::string type_to_string(PrimitiveType t) { case TYPE_OBJECT: return "OBJECT"; + case TYPE_ARRAY: + return "ARRAY"; + default: return ""; }; @@ -320,4 +330,49 @@ TTypeDesc gen_type_desc(const TPrimitiveType::type val, const std::string& name) return type_desc; } +int get_slot_size(PrimitiveType type) { + switch (type) { + case TYPE_OBJECT: + case TYPE_HLL: + case TYPE_CHAR: + case TYPE_VARCHAR: + return sizeof(StringValue); + case TYPE_ARRAY: + return sizeof(CollectionValue); + + case TYPE_NULL: + case TYPE_BOOLEAN: + case TYPE_TINYINT: + return 1; + + case TYPE_SMALLINT: + return 2; + + case TYPE_INT: + case TYPE_FLOAT: + return 4; + + case TYPE_BIGINT: + case TYPE_DOUBLE: + return 8; + + case TYPE_LARGEINT: + return sizeof(__int128); + + case TYPE_DATE: + case TYPE_DATETIME: + // This is the size of the slot, the actual size of the data is 12. + return 16; + + case TYPE_DECIMALV2: + return 16; + + case INVALID_TYPE: + default: + DCHECK(false); + } + + return 0; +} + } // namespace doris diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index 19b8eaf46c..ebd12b5602 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -69,9 +69,9 @@ inline bool is_enumeration_type(PrimitiveType type) { case TYPE_DATETIME: case TYPE_DECIMALV2: case TYPE_BOOLEAN: + case TYPE_ARRAY: case TYPE_HLL: return false; - case TYPE_TINYINT: case TYPE_SMALLINT: case TYPE_INT: @@ -102,6 +102,7 @@ inline int get_byte_size(PrimitiveType type) { case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: + case TYPE_ARRAY: return 0; case TYPE_NULL: @@ -140,6 +141,7 @@ inline int get_real_byte_size(PrimitiveType type) { case TYPE_OBJECT: case TYPE_HLL: case TYPE_VARCHAR: + case TYPE_ARRAY: return 0; case TYPE_NULL: @@ -175,48 +177,7 @@ inline int get_real_byte_size(PrimitiveType type) { return 0; } // Returns the byte size of type when in a tuple -inline int get_slot_size(PrimitiveType type) { - switch (type) { - case TYPE_OBJECT: - case TYPE_HLL: - case TYPE_CHAR: - case TYPE_VARCHAR: - return sizeof(StringValue); - - case TYPE_NULL: - case TYPE_BOOLEAN: - case TYPE_TINYINT: - return 1; - - case TYPE_SMALLINT: - return 2; - - case TYPE_INT: - case TYPE_FLOAT: - return 4; - - case TYPE_BIGINT: - case TYPE_DOUBLE: - return 8; - - case TYPE_LARGEINT: - return sizeof(__int128); - - case TYPE_DATE: - case TYPE_DATETIME: - // This is the size of the slot, the actual size of the data is 12. - return 16; - - case TYPE_DECIMALV2: - return 16; - - case INVALID_TYPE: - default: - DCHECK(false); - } - - return 0; -} +int get_slot_size(PrimitiveType type); inline bool is_type_compatible(PrimitiveType lhs, PrimitiveType rhs) { if (lhs == TYPE_VARCHAR) { diff --git a/be/src/runtime/raw_value.cpp b/be/src/runtime/raw_value.cpp index 280fd301b7..d83e9e5752 100644 --- a/be/src/runtime/raw_value.cpp +++ b/be/src/runtime/raw_value.cpp @@ -20,6 +20,7 @@ #include #include "olap/utils.h" +#include "runtime/collection_value.h" #include "runtime/string_value.hpp" #include "runtime/tuple.h" #include "util/types.h" @@ -165,6 +166,21 @@ void RawValue::print_value(const void* value, const TypeDescriptor& type, int sc *stream << reinterpret_cast(value)->value; break; + case TYPE_ARRAY: { + const CollectionValue* src = reinterpret_cast(value); + auto children_type = type.children.at(0); + auto iter = src->iterator(children_type.type); + *stream << "["; + print_value(iter.value(), children_type, scale, stream); + iter.next(); + for (; iter.has_next(); iter.next()) { + *stream << ", "; + print_value(iter.value(), children_type, scale, stream); + } + *stream << "]"; + break; + } + default: DCHECK(false) << "bad RawValue::print_value() type: " << type; } @@ -292,7 +308,33 @@ void RawValue::write(const void* value, void* dst, const TypeDescriptor& type, M break; } + case TYPE_ARRAY: { + DCHECK_EQ(type.children.size(), 1); + const CollectionValue* src = reinterpret_cast(value); + CollectionValue* val = reinterpret_cast(dst); + + if (pool != NULL) { + auto children_type = type.children.at(0).type; + CollectionValue::init_collection(pool, src->size(), children_type, val); + ArrayIterator src_iter = src->iterator(children_type); + ArrayIterator val_iter = val->iterator(children_type); + + val->copy_null_signs(src); + + while (src_iter.has_next() && val_iter.has_next()) { + if (!src_iter.is_null()) { + // write children + write(src_iter.value(), val_iter.value(), children_type, pool); + } + src_iter.next(); + val_iter.next(); + } + } else { + val->shallow_copy(src); + } + break; + } default: DCHECK(false) << "RawValue::write(): bad type: " << type; } diff --git a/be/src/runtime/result_writer.h b/be/src/runtime/result_writer.h index 418e7b644e..477d79c7e1 100644 --- a/be/src/runtime/result_writer.h +++ b/be/src/runtime/result_writer.h @@ -25,6 +25,7 @@ namespace doris { class Status; class RowBatch; class RuntimeState; +class TypeDescriptor; // abstract class of the result writer class ResultWriter { diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp index f0238fae35..a1958508ba 100644 --- a/be/src/runtime/row_batch.cpp +++ b/be/src/runtime/row_batch.cpp @@ -28,6 +28,7 @@ //#include "runtime/mem_tracker.h" #include "gen_cpp/Data_types.h" #include "gen_cpp/data.pb.h" +#include "runtime/collection_value.h" #include "util/debug_util.h" using std::vector; @@ -126,7 +127,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch, TupleRow* row = get_row(i); std::vector::const_iterator desc = tuple_descs.begin(); for (int j = 0; desc != tuple_descs.end(); ++desc, ++j) { - if ((*desc)->string_slots().empty()) { + if ((*desc)->string_slots().empty() && (*desc)->collection_slots().empty()) { continue; } Tuple* tuple = row->get_tuple(j); @@ -146,6 +147,42 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch, // length. So we make the high bits zero here. string_val->len &= 0x7FFFFFFFL; } + + // copy collection slots + vector::const_iterator slot_collection = + (*desc)->collection_slots().begin(); + for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) { + DCHECK((*slot_collection)->type().is_collection_type()); + + CollectionValue* array_val = + tuple->get_collection_slot((*slot_collection)->tuple_offset()); + + // assgin data and null_sign pointer position in tuple_data + int data_offset = reinterpret_cast(array_val->data()); + array_val->set_data(reinterpret_cast(tuple_data + data_offset)); + int null_offset = reinterpret_cast(array_val->null_signs()); + array_val->set_null_signs(reinterpret_cast(tuple_data + null_offset)); + + const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0); + if (!item_type.is_string_type()) { + continue; + } + + // copy every string item + for (int i = 0; i < array_val->length(); ++i) { + if (array_val->is_null_at(i)) { + continue; + } + + StringValue* dst_item_v = reinterpret_cast( + (uint8_t*)array_val->data() + i * item_type.get_slot_size()); + + if (dst_item_v->len != 0) { + int offset = reinterpret_cast(dst_item_v->ptr); + dst_item_v->ptr = reinterpret_cast(tuple_data + offset); + } + } + } } } } @@ -221,7 +258,7 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const TRowBatch& input_batch, TupleRow* row = get_row(i); std::vector::const_iterator desc = tuple_descs.begin(); for (int j = 0; desc != tuple_descs.end(); ++desc, ++j) { - if ((*desc)->string_slots().empty()) { + if ((*desc)->string_slots().empty() && (*desc)->collection_slots().empty()) { continue; } @@ -244,6 +281,40 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const TRowBatch& input_batch, // length. So we make the high bits zero here. string_val->len &= 0x7FFFFFFFL; } + + // copy collection slot + vector::const_iterator slot_collection = + (*desc)->collection_slots().begin(); + for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) { + DCHECK((*slot_collection)->type().is_collection_type()); + CollectionValue* array_val = + tuple->get_collection_slot((*slot_collection)->tuple_offset()); + + int offset = reinterpret_cast(array_val->data()); + array_val->set_data(reinterpret_cast(tuple_data + offset)); + int null_offset = reinterpret_cast(array_val->null_signs()); + array_val->set_null_signs(reinterpret_cast(tuple_data + null_offset)); + + const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0); + if (!item_type.is_string_type()) { + continue; + } + + // copy string item + for (int i = 0; i < array_val->length(); ++i) { + if (array_val->is_null_at(i)) { + continue; + } + + StringValue* dst_item_v = reinterpret_cast( + (uint8_t*)array_val->data() + i * item_type.get_slot_size()); + + if (dst_item_v->len != 0) { + int offset = reinterpret_cast(dst_item_v->ptr); + dst_item_v->ptr = reinterpret_cast(tuple_data + offset); + } + } + } } } } @@ -606,6 +677,37 @@ int RowBatch::total_byte_size() { StringValue* string_val = tuple->get_string_slot((*slot)->tuple_offset()); result += string_val->len; } + + // compute slot collection size + vector::const_iterator slot_collection = + (*desc)->collection_slots().begin(); + for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) { + DCHECK((*slot_collection)->type().is_collection_type()); + if (tuple->is_null((*slot_collection)->null_indicator_offset())) { + continue; + } + // compute data null_signs size + CollectionValue* array_val = + tuple->get_collection_slot((*slot_collection)->tuple_offset()); + result += array_val->length() * sizeof(bool); + + const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0); + result += array_val->length() * item_type.get_slot_size(); + + if (!item_type.is_string_type()) { + continue; + } + + // compute string type item size + for (int i = 0; i < array_val->length(); ++i) { + if (array_val->is_null_at(i)) { + continue; + } + StringValue* dst_item_v = reinterpret_cast( + (uint8_t*)array_val->data() + i * item_type.get_slot_size()); + result += dst_item_v->len; + } + } } } diff --git a/be/src/runtime/tuple.cpp b/be/src/runtime/tuple.cpp index 19a8f0cd3a..0cd778114c 100644 --- a/be/src/runtime/tuple.cpp +++ b/be/src/runtime/tuple.cpp @@ -17,10 +17,15 @@ #include "runtime/tuple.h" +#include +#include +#include +#include #include #include "exprs/expr.h" #include "exprs/expr_context.h" +#include "runtime/collection_value.h" #include "runtime/descriptors.h" #include "runtime/mem_pool.h" #include "runtime/raw_value.h" @@ -78,6 +83,59 @@ void Tuple::deep_copy(Tuple* dst, const TupleDescriptor& desc, MemPool* pool, bo } } } + + // copy collection slot + for (auto slot_desc : desc.collection_slots()) { + DCHECK(slot_desc->type().is_collection_type()); + if (dst->is_null(slot_desc->null_indicator_offset())) { + continue; + } + + // copy collection item + CollectionValue* cv = dst->get_collection_slot(slot_desc->tuple_offset()); + + const TypeDescriptor& item_type = slot_desc->type().children.at(0); + + int coll_byte_size = cv->length() * item_type.get_slot_size(); + int nulls_size = cv->length() * sizeof(bool); + + int offset = pool->total_allocated_bytes(); + char* coll_data = reinterpret_cast(pool->allocate(coll_byte_size + nulls_size)); + + // copy data and null_signs + if (nulls_size > 0) { + cv->set_has_null(true); + cv->set_null_signs(reinterpret_cast(coll_data) + coll_byte_size); + memory_copy(coll_data, cv->null_signs(), nulls_size); + } else { + cv->set_has_null(false); + } + memory_copy(coll_data + nulls_size, cv->data(), coll_byte_size); + + // assgin new null_sign and data location + cv->set_null_signs(convert_ptrs ? reinterpret_cast(offset) + : reinterpret_cast(coll_data)); + cv->set_data(convert_ptrs ? reinterpret_cast(offset + nulls_size) + : coll_data + nulls_size); + + if (!item_type.is_string_type()) { + continue; + } + // when itemtype is string, copy every string item + for (int i = 0; i < cv->length(); ++i) { + int item_offset = nulls_size + i * item_type.get_slot_size(); + if (cv->is_null_at(i)) { + continue; + } + StringValue* dst_item_v = reinterpret_cast(coll_data + item_offset); + if (dst_item_v->len != 0) { + int offset = pool->total_allocated_bytes(); + char* string_copy = reinterpret_cast(pool->allocate(dst_item_v->len)); + memory_copy(string_copy, dst_item_v->ptr, dst_item_v->len); + dst_item_v->ptr = (convert_ptrs ? reinterpret_cast(offset) : string_copy); + } + } + } } Tuple* Tuple::dcopy_with_new(const TupleDescriptor& desc, MemPool* pool, int64_t* bytes) { @@ -137,6 +195,61 @@ void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int* offset, boo *offset += string_v->len; } } + + // copy collection slots + for (auto slot_desc : desc.collection_slots()) { + DCHECK(slot_desc->type().is_collection_type()); + if (dst->is_null(slot_desc->null_indicator_offset())) { + continue; + } + // get cv to copy elements + CollectionValue* cv = dst->get_collection_slot(slot_desc->tuple_offset()); + const TypeDescriptor& item_type = slot_desc->type().children.at(0); + + int coll_byte_size = cv->length() * item_type.get_slot_size(); + int nulls_size = cv->length() * sizeof(bool); + + // copy null_sign + memory_copy(*data, cv->null_signs(), nulls_size); + // copy data + memory_copy(*data + nulls_size, cv->data(), coll_byte_size); + + if (!item_type.is_string_type()) { + cv->set_null_signs(convert_ptrs ? reinterpret_cast(*offset) + : reinterpret_cast(*data)); + cv->set_data(convert_ptrs ? reinterpret_cast(*offset + nulls_size) + : *data + nulls_size); + *data += coll_byte_size + nulls_size; + *offset += coll_byte_size + nulls_size; + continue; + } + + // when item is string type, copy every item + char* base_data = *data; + int base_offset = *offset; + + *data += coll_byte_size + nulls_size; + *offset += coll_byte_size + nulls_size; + + for (int i = 0; i < cv->length(); ++i) { + int item_offset = nulls_size + i * item_type.get_slot_size(); + if (cv->is_null_at(i)) { + continue; + } + StringValue* dst_item_v = reinterpret_cast(base_data + item_offset); + if (dst_item_v->len != 0) { + memory_copy(*data, dst_item_v->ptr, dst_item_v->len); + dst_item_v->ptr = (convert_ptrs ? reinterpret_cast(*offset) : *data); + *data += dst_item_v->len; + *offset += dst_item_v->len; + } + } + // assgin new null_sign and data location + cv->set_null_signs(convert_ptrs ? reinterpret_cast(base_offset) + : reinterpret_cast(base_data)); + cv->set_data(convert_ptrs ? reinterpret_cast(base_offset + nulls_size) + : base_data + nulls_size); + } } template @@ -166,6 +279,8 @@ void Tuple::materialize_exprs(TupleRow* row, const TupleDescriptor& desc, (expr_type == TYPE_HLL)); } else if ((slot_type == TYPE_DATE) || (slot_type == TYPE_DATETIME)) { DCHECK((expr_type == TYPE_DATE) || (expr_type == TYPE_DATETIME)); + } else if (slot_type == TYPE_ARRAY) { + DCHECK((expr_type == TYPE_ARRAY)); } else { DCHECK(slot_type == TYPE_NULL || slot_type == expr_type); } diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h index b0b0f7b765..f08d2378bd 100644 --- a/be/src/runtime/tuple.h +++ b/be/src/runtime/tuple.h @@ -27,6 +27,7 @@ namespace doris { struct StringValue; +struct CollectionValue; class TupleDescriptor; class DateTimeValue; class TupleRow; @@ -154,6 +155,17 @@ public: return reinterpret_cast(reinterpret_cast(this) + offset); } + CollectionValue* get_collection_slot(int offset) { + DCHECK(offset != -1); // -1 offset indicates non-materialized slot + return reinterpret_cast(reinterpret_cast(this) + offset); + } + + const CollectionValue* get_collection_slot(int offset) const { + DCHECK(offset != -1); // -1 offset indicates non-materialized slot + return reinterpret_cast(reinterpret_cast(this) + + offset); + } + DateTimeValue* get_datetime_slot(int offset) { DCHECK(offset != -1); // -1 offset indicates non-materialized slot return reinterpret_cast(reinterpret_cast(this) + offset); diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index 3ebd2644b8..c39a161b5c 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -43,6 +43,14 @@ TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) } break; } + case TTypeNodeType::ARRAY: { + DCHECK(!node.__isset.scalar_type); + DCHECK_LT(*idx, types.size() - 1); + type = TYPE_ARRAY; + ++(*idx); + children.push_back(TypeDescriptor(types, idx)); + break; + } #if 0 // Don't support now case TTypeNodeType::STRUCT: type = TYPE_STRUCT; @@ -112,7 +120,8 @@ void TypeDescriptor::to_thrift(TTypeDesc* thrift_type) const { } void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const { - DCHECK(!is_complex_type()) << "Don't support complex type now, type=" << type; + DCHECK(!is_complex_type() || type == TYPE_ARRAY) + << "Don't support complex type now, type=" << type; auto node = ptype->add_types(); node->set_type(TTypeNodeType::SCALAR); auto scalar_type = node->mutable_scalar_type(); @@ -124,6 +133,11 @@ void TypeDescriptor::to_protobuf(PTypeDesc* ptype) const { DCHECK_NE(scale, -1); scalar_type->set_precision(precision); scalar_type->set_scale(scale); + } else if (type == TYPE_ARRAY) { + node->set_type(TTypeNodeType::ARRAY); + for (const TypeDescriptor& child : children) { + child.to_protobuf(ptype); + } } } @@ -149,6 +163,12 @@ TypeDescriptor::TypeDescriptor(const google::protobuf::RepeatedPtrField #include +#include // This is the only Doris header required to develop UDFs and UDAs. This header // contains the types that need to be used and the FunctionContext object. The context @@ -44,6 +45,7 @@ struct StringVal; struct DateTimeVal; struct DecimalV2Val; struct HllVal; +struct CollectionVal; // The FunctionContext is passed to every UDF/UDA and is the interface for the UDF to the // rest of the system. It contains APIs to examine the system state, report errors @@ -74,7 +76,8 @@ public: TYPE_STRING, TYPE_FIXED_BUFFER, TYPE_DECIMALV2, - TYPE_OBJECT + TYPE_OBJECT, + TYPE_ARRAY }; struct TypeDesc { @@ -86,6 +89,9 @@ public: /// Only valid if type == TYPE_FIXED_BUFFER || type == TYPE_VARCHAR int len; + + // only vaild if type == TYPE_ARRAY + std::vector children; }; struct UniqueId { @@ -718,6 +724,26 @@ struct HllVal : public StringVal { void agg_merge(const HllVal& other); }; +struct CollectionVal : public AnyVal { + void* data; + uint32_t length; + // item has no null value if has_null is false. + // item ```may``` has null value if has_null is true. + bool has_null; + // null bitmap + bool* null_signs; + + CollectionVal() = default; + + CollectionVal(void* data, uint32_t length, bool has_null, bool* null_signs) + : data(data), length(length), has_null(has_null), null_signs(null_signs){}; + + static CollectionVal null() { + CollectionVal val; + val.is_null = true; + return val; + } +}; typedef uint8_t* BufferVal; } // namespace doris_udf @@ -734,5 +760,6 @@ using doris_udf::DecimalV2Val; using doris_udf::DateTimeVal; using doris_udf::HllVal; using doris_udf::FunctionContext; +using doris_udf::CollectionVal; #endif diff --git a/be/src/util/mysql_row_buffer.cpp b/be/src/util/mysql_row_buffer.cpp index 51b2bcee85..8b90259c8c 100644 --- a/be/src/util/mysql_row_buffer.cpp +++ b/be/src/util/mysql_row_buffer.cpp @@ -17,6 +17,7 @@ #include "util/mysql_row_buffer.h" +#include #include #include @@ -24,8 +25,6 @@ #include "gutil/strings/numbers.h" #include "util/mysql_global.h" -#include - namespace doris { // the first byte: @@ -58,7 +57,11 @@ static char* pack_vlen(char* packet, uint64_t length) { return packet + 8; } MysqlRowBuffer::MysqlRowBuffer() - : _pos(_default_buf), _buf(_default_buf), _buf_size(sizeof(_default_buf)) {} + : _pos(_default_buf), + _buf(_default_buf), + _buf_size(sizeof(_default_buf)), + _dynamic_mode(0), + _len_pos(nullptr) {} MysqlRowBuffer::~MysqlRowBuffer() { if (_buf != _default_buf) { @@ -66,6 +69,25 @@ MysqlRowBuffer::~MysqlRowBuffer() { } } +void MysqlRowBuffer::open_dynamic_mode() { + if (!_dynamic_mode) { + *_pos++ = 254; + // write length when dynamic mode close + _len_pos = _pos; + _pos = _pos + 8; + } + _dynamic_mode++; +} + +void MysqlRowBuffer::close_dynamic_mode() { + _dynamic_mode--; + + if (!_dynamic_mode) { + int8store(_len_pos, _pos - _len_pos - 8); + _len_pos = nullptr; + } +} + int MysqlRowBuffer::reserve(int size) { if (size < 0) { LOG(ERROR) << "alloc memory failed. size = " << size; @@ -99,15 +121,29 @@ int MysqlRowBuffer::reserve(int size) { return 0; } -template -static char* add_int(T data, char* pos) -{ +template +static char* add_int(T data, char* pos, bool dynamic_mode) { auto fi = fmt::format_int(data); int length = fi.size(); - int1store(pos++, length); + if (!dynamic_mode) { + int1store(pos++, length); + } memcpy(pos, fi.data(), length); return pos + length; } +template +static char* add_float(T data, char* pos, bool dynamic_mode) { + int length = 0; + if constexpr (std::is_same_v) { + length = FloatToBuffer(data, MAX_FLOAT_STR_LENGTH + 2, pos + !dynamic_mode); + } else if constexpr (std::is_same_v) { + length = DoubleToBuffer(data, MAX_DOUBLE_STR_LENGTH + 2, pos + !dynamic_mode); + } + if (!dynamic_mode) { + int1store(pos++, length); + } + return pos + length; +} int MysqlRowBuffer::push_tinyint(int8_t data) { // 1 for string trail, 1 for length, 1 for sign, other for digits @@ -118,7 +154,7 @@ int MysqlRowBuffer::push_tinyint(int8_t data) { return ret; } - _pos = add_int(data, _pos); + _pos = add_int(data, _pos, _dynamic_mode); return 0; } @@ -131,7 +167,7 @@ int MysqlRowBuffer::push_smallint(int16_t data) { return ret; } - _pos = add_int(data, _pos); + _pos = add_int(data, _pos, _dynamic_mode); return 0; } @@ -144,7 +180,7 @@ int MysqlRowBuffer::push_int(int32_t data) { return ret; } - _pos = add_int(data, _pos); + _pos = add_int(data, _pos, _dynamic_mode); return 0; } @@ -157,7 +193,7 @@ int MysqlRowBuffer::push_bigint(int64_t data) { return ret; } - _pos = add_int(data, _pos); + _pos = add_int(data, _pos, _dynamic_mode); return 0; } @@ -166,19 +202,11 @@ int MysqlRowBuffer::push_unsigned_bigint(uint64_t data) { int ret = reserve(4 + MAX_BIGINT_WIDTH); if (0 != ret) { - LOG(ERROR) << "mysql row buffer reserve failed."; + LOG(ERROR) << "mysql row buffer reserver failed."; return ret; } - int length = snprintf(_pos + 1, MAX_BIGINT_WIDTH + 3, "%ld", data); - - if (length < 0) { - LOG(ERROR) << "snprintf failed. data = " << data; - return length; - } - - int1store(_pos, length); - _pos += length + 1; + _pos = add_int(data, _pos, _dynamic_mode); return 0; } @@ -191,15 +219,7 @@ int MysqlRowBuffer::push_float(float data) { return ret; } - int length = FloatToBuffer(data, MAX_FLOAT_STR_LENGTH + 2, _pos + 1); - - if (length < 0) { - LOG(ERROR) << "gcvt float failed. data = " << data; - return length; - } - - int1store(_pos, length); - _pos += length + 1; + _pos = add_float(data, _pos, _dynamic_mode); return 0; } @@ -212,15 +232,7 @@ int MysqlRowBuffer::push_double(double data) { return ret; } - int length = DoubleToBuffer(data, MAX_DOUBLE_STR_LENGTH + 2, _pos + 1); - - if (length < 0) { - LOG(ERROR) << "gcvt double failed. data = " << data; - return length; - } - - int1store(_pos, length); - _pos += length + 1; + _pos = add_float(data, _pos, _dynamic_mode); return 0; } @@ -238,13 +250,20 @@ int MysqlRowBuffer::push_string(const char* str, int length) { return ret; } - _pos = pack_vlen(_pos, length); + if (!_dynamic_mode) { + _pos = pack_vlen(_pos, length); + } memcpy(_pos, str, length); _pos += length; return 0; } int MysqlRowBuffer::push_null() { + if (_dynamic_mode) { + // dynamic mode not write + return 0; + } + int ret = reserve(1); if (0 != ret) { @@ -272,5 +291,3 @@ char* MysqlRowBuffer::reserved(int size) { } } // namespace doris - -/* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/be/src/util/mysql_row_buffer.h b/be/src/util/mysql_row_buffer.h index b0dad9b9a0..ddea35e159 100644 --- a/be/src/util/mysql_row_buffer.h +++ b/be/src/util/mysql_row_buffer.h @@ -22,8 +22,32 @@ namespace doris { -// helper for construct MySQL send row +/** // Now only support text protocol + * helper for construct MySQL send row + * The MYSQL protocal: + * + * | flag | (length) | value | flag | (length) | value | ...... + * <--------A column--------><--------A column--------><-.....-> + * + * The flag means value's length or null value: + * If value is NULL, flag is 251 + * If value's length < 251, flag is the value's length + * If 251 <= value's length < 65536, flag is 252 and the next two byte is length + * If 65536 <= value's length < 16777216 , flag is 253 and the next three byte is length + * If 16777216 <= value's length, flag is 254 and the next eighth byte is length + * + * the code example: + * mrb.push_null(); + * mrb.push_tinyint(5); + * mrb.push_int(120); + * mrb.push_string("...my length is 65536..."); + * + * the protocol contents: + * + * 251-1-'5'-3-'120'-253-65536-"...my length is 65536..." + * + */ class MysqlRowBuffer { public: MysqlRowBuffer(); @@ -50,6 +74,40 @@ public: const char* pos() const { return _pos; } int length() const { return _pos - _buf; } + /** + * Why? + * Because the Nested-Type's data need pushed multiple times, but mysql protocol don't + * support nested type and each push will decide a column data. + * + * How? + * Dynamic mode allow we push data in a column multiple times, and allow recursive push. + * We will think that the length of the next column is uncertain when open dynamic + * mode, so we will set the flag to 254(longest flag) and skip 8 bytes which used for + * record length, then compute the actual data length when close dynamic mode. + * In a recursive call(special for nested type), the mode will open multiple times, but + * the data is actually written in one column, so we only need to deal it at the beginning + * and at the end. + * + * the code: + * mrb.push_tinyint(5); + * mrb.push_smallint(120); + * mrb.push_int(-30000); + * + * In normal mode, the buffer contains three column: + * 1-'5'-3-'120'-6-'-30000' + * + * Same code in dynamic mode, the buffer contains a column: + * 254-48-'5'-'120'-'-30000' + * + * NOTE: The open_dynamic_mode() and close_dynamic_mode() need appear in pairs + */ + void open_dynamic_mode(); + + /** + * NOTE: The open_dynamic_mode() and close_dynamic_mode() need appear in pairs + */ + void close_dynamic_mode(); + private: int reserve(int size); @@ -57,10 +115,11 @@ private: char* _buf; int _buf_size; char _default_buf[4096]; + + int _dynamic_mode; + char* _len_pos; }; } // namespace doris #endif // DORIS_BE_SRC_QUERY_MYSQL_MYSQL_ROW_BUFFER_H - -/* vim: set ts=4 sw=4 sts=4 tw=100 noet: */ diff --git a/be/test/exec/CMakeLists.txt b/be/test/exec/CMakeLists.txt index a35c51f42c..c31b917515 100644 --- a/be/test/exec/CMakeLists.txt +++ b/be/test/exec/CMakeLists.txt @@ -58,7 +58,7 @@ ADD_BE_TEST(es_predicate_test) ADD_BE_TEST(es_query_builder_test) ADD_BE_TEST(es_scan_reader_test) #ADD_BE_TEST(schema_scan_node_test) -ADD_BE_TEST(unix_odbc_test) +#ADD_BE_TEST(unix_odbc_test) #ADD_BE_TEST(schema_scanner_test) ##ADD_BE_TEST(set_executor_test) #ADD_BE_TEST(schema_scanner/schema_authors_scanner_test) diff --git a/be/test/exprs/CMakeLists.txt b/be/test/exprs/CMakeLists.txt index b24cd171c2..26e73f01ce 100644 --- a/be/test/exprs/CMakeLists.txt +++ b/be/test/exprs/CMakeLists.txt @@ -37,3 +37,5 @@ ADD_BE_TEST(math_functions_test) ADD_BE_TEST(topn_function_test) ADD_BE_TEST(runtime_filter_test) ADD_BE_TEST(bloom_filter_predicate_test) +ADD_BE_TEST(array_functions_test) + diff --git a/be/test/exprs/array_functions_test.cpp b/be/test/exprs/array_functions_test.cpp new file mode 100644 index 0000000000..8e410e22bf --- /dev/null +++ b/be/test/exprs/array_functions_test.cpp @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/array_functions.h" + +#include + +#include "gmock/gmock.h" +#include "runtime/collection_value.h" +#include "runtime/free_pool.hpp" +#include "string" +#include "testutil/function_utils.h" +#include "udf/udf.h" +#include "udf/udf_internal.h" + +#define private public + +namespace doris { + +class ArrayFunctionsTest : public testing::Test { +public: + ArrayFunctionsTest() { + _utils = new FunctionUtils(); + _context = _utils->get_fn_ctx(); + } + ~ArrayFunctionsTest() { delete _utils; } + +public: + FunctionUtils* _utils; + FunctionContext* _context; +}; + +TEST_F(ArrayFunctionsTest, array) { + // Int array + { + FunctionContext::TypeDesc childTypeDesc; + childTypeDesc.type = FunctionContext::TYPE_INT; + + _context->impl()->_return_type.type = FunctionContext::TYPE_ARRAY; + _context->impl()->_return_type.children.clear(); + _context->impl()->_return_type.children.push_back(childTypeDesc); + + IntVal v[10]; + + for (int i = 0; i < 10; ++i) { + v[i].val = i + 1; + } + + CollectionVal cv = ArrayFunctions::array(_context, 10, v); + + CollectionValue value = CollectionValue::from_collection_val(cv); + + int i = 0; + for (auto&& iter = value.iterator(TYPE_INT); iter.has_next(); iter.next()) { + i++; + IntVal a; + iter.value(&a); + EXPECT_EQ(i, a.val); + } + } +} + +} // namespace doris + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/be/test/olap/column_vector_test.cpp b/be/test/olap/column_vector_test.cpp index 39e6b020cf..476f6ceb4c 100644 --- a/be/test/olap/column_vector_test.cpp +++ b/be/test/olap/column_vector_test.cpp @@ -19,10 +19,10 @@ #include -#include "olap/collection.h" #include "olap/field.h" #include "olap/tablet_schema_helper.h" #include "olap/types.cpp" +#include "runtime/collection_value.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" @@ -72,7 +72,8 @@ void test_read_write_scalar_column_vector(const TypeInfo* type_info, const uint8 } template -void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, size_t array_size, Collection* result) { +void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, size_t array_size, + CollectionValue* result) { DCHECK(array_size > 1); using ItemType = typename TypeTraits::CppType; @@ -85,7 +86,8 @@ void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, s size_t array_init_size = array_size / 2; std::unique_ptr cvb; - ASSERT_TRUE(ColumnVectorBatch::create(array_init_size, true, array_type_info, field, &cvb).ok()); + ASSERT_TRUE( + ColumnVectorBatch::create(array_init_size, true, array_type_info, field, &cvb).ok()); auto* array_cvb = reinterpret_cast(cvb.get()); ColumnVectorBatch* item_cvb = array_cvb->elements(); @@ -93,7 +95,8 @@ void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, s // first write for (size_t i = 0; i < array_init_size; ++i) { - memcpy(offset_cvb->mutable_cell_ptr(1 + i), &(result[i].length), sizeof(segment_v2::ordinal_t)); + uint32_t len = result[i].length(); + memcpy(offset_cvb->mutable_cell_ptr(1 + i), &len, sizeof(uint32_t)); } array_cvb->set_null_bits(0, array_init_size, false); array_cvb->get_offset_by_length(0, array_init_size); @@ -101,7 +104,8 @@ void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, s size_t first_write_item = array_cvb->item_offset(array_init_size) - array_cvb->item_offset(0); ASSERT_TRUE(item_cvb->resize(first_write_item).ok()); for (size_t i = 0; i < array_init_size; ++i) { - memcpy(item_cvb->mutable_cell_ptr(array_cvb->item_offset(i)), result[i].data, result[i].length * ITEM_TYPE_SIZE); + memcpy(item_cvb->mutable_cell_ptr(array_cvb->item_offset(i)), result[i].data(), + result[i].length() * ITEM_TYPE_SIZE); } item_cvb->set_null_bits(0, first_write_item, false); @@ -110,7 +114,8 @@ void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, s // second write ASSERT_TRUE(array_cvb->resize(array_size).ok()); for (int i = array_init_size; i < array_size; ++i) { - memcpy(offset_cvb->mutable_cell_ptr(i + 1), &(result[i].length), sizeof(segment_v2::ordinal_t)); + uint32_t len = result[i].length(); + memcpy(offset_cvb->mutable_cell_ptr(i + 1), &len, sizeof(uint32_t)); } array_cvb->set_null_bits(array_init_size, array_size - array_init_size, false); array_cvb->get_offset_by_length(array_init_size, array_size - array_init_size); @@ -119,14 +124,16 @@ void test_read_write_array_column_vector(const ArrayTypeInfo* array_type_info, s ASSERT_TRUE(item_cvb->resize(total_item_size).ok()); for (size_t i = array_init_size; i < array_size; ++i) { - memcpy(item_cvb->mutable_cell_ptr(array_cvb->item_offset(i)), result[i].data, result[i].length * ITEM_TYPE_SIZE); + memcpy(item_cvb->mutable_cell_ptr(array_cvb->item_offset(i)), result[i].data(), + result[i].length() * ITEM_TYPE_SIZE); } size_t second_write_item = total_item_size - first_write_item; item_cvb->set_null_bits(first_write_item, second_write_item, false); array_cvb->prepare_for_read(0, array_size, false); for (size_t idx = 0; idx < array_size; ++idx) { - ASSERT_TRUE(array_type_info->equal(&result[idx], array_cvb->cell_ptr(idx))) << "idx:" << idx; + ASSERT_TRUE(array_type_info->equal(&result[idx], array_cvb->cell_ptr(idx))) + << "idx:" << idx; } delete field; } @@ -158,7 +165,7 @@ TEST_F(ColumnVectorTest, array_column_vector_test) { size_t num_array = 1024; size_t num_item = num_array * 3; { - auto* array_val = new Collection[num_array]; + auto* array_val = new CollectionValue[num_array]; bool null_signs[3] = {false, false, false}; auto* item_val = new uint8_t[num_item]; @@ -167,14 +174,15 @@ TEST_F(ColumnVectorTest, array_column_vector_test) { item_val[i] = i; if (i % 3 == 0) { size_t array_index = i / 3; - array_val[array_index].data = &item_val[i]; - array_val[array_index].null_signs = null_signs; - array_val[array_index].length = 3; + array_val[array_index].set_data(&item_val[i]); + array_val[array_index].set_null_signs(null_signs); + array_val[array_index].set_length(3); } } auto type_info = reinterpret_cast( ArrayTypeInfoResolver::instance()->get_type_info(OLAP_FIELD_TYPE_TINYINT)); - test_read_write_array_column_vector(type_info, num_array, array_val); + test_read_write_array_column_vector(type_info, num_array, + array_val); delete[] array_val; delete[] item_val; diff --git a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp index 23a2d3936e..4f0990c998 100644 --- a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp +++ b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp @@ -31,8 +31,8 @@ #include "olap/types.h" #include "runtime/mem_pool.h" #include "runtime/mem_tracker.h" -#include "util/file_utils.h" #include "test_util/test_util.h" +#include "util/file_utils.h" using std::string; @@ -221,9 +221,9 @@ void test_nullable_data(uint8_t* src_data, uint8_t* src_is_null, int num_rows, } template -void test_array_nullable_data(Collection* src_data, uint8_t* src_is_null, int num_rows, +void test_array_nullable_data(CollectionValue* src_data, uint8_t* src_is_null, int num_rows, std::string test_name) { - Collection* src = src_data; + CollectionValue* src = src_data; ColumnMetaPB meta; TabletColumn list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY); int32 item_length = 0; @@ -375,7 +375,7 @@ TEST_F(ColumnReaderWriterTest, test_array_type) { size_t num_item = num_array * 3; uint8_t* array_is_null = new uint8_t[BitmapSize(num_array)]; - Collection* array_val = new Collection[num_array]; + CollectionValue* array_val = new CollectionValue[num_array]; bool* item_is_null = new bool[num_item]; uint8_t* item_val = new uint8_t[num_item]; for (int i = 0; i < num_item; ++i) { @@ -388,9 +388,9 @@ TEST_F(ColumnReaderWriterTest, test_array_type) { if (is_null) { continue; } - array_val[array_index].data = &item_val[i]; - array_val[array_index].null_signs = &item_is_null[i]; - array_val[array_index].length = 3; + array_val[array_index].set_data(&item_val[i]); + array_val[array_index].set_null_signs(&item_is_null[i]); + array_val[array_index].set_length(3); } } test_array_nullable_data( @@ -400,7 +400,7 @@ TEST_F(ColumnReaderWriterTest, test_array_type) { delete[] item_val; delete[] item_is_null; - array_val = new Collection[num_array]; + array_val = new CollectionValue[num_array]; Slice* varchar_vals = new Slice[3]; item_is_null = new bool[3]; for (int i = 0; i < 3; ++i) { @@ -415,9 +415,9 @@ TEST_F(ColumnReaderWriterTest, test_array_type) { if (is_null) { continue; } - array_val[i].data = varchar_vals; - array_val[i].null_signs = item_is_null; - array_val[i].length = 3; + array_val[i].set_data(varchar_vals); + array_val[i].set_null_signs(item_is_null); + array_val[i].set_length(3); } test_array_nullable_data( array_val, array_is_null, num_array, "null_array_chars"); diff --git a/be/test/olap/storage_types_test.cpp b/be/test/olap/storage_types_test.cpp index d85e6fc8a8..78819cc92a 100644 --- a/be/test/olap/storage_types_test.cpp +++ b/be/test/olap/storage_types_test.cpp @@ -146,7 +146,7 @@ TEST(TypesTest, copy_and_equal) { } template -void common_test_array(Collection src_val) { +void common_test_array(CollectionValue src_val) { TabletColumn list_column(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_ARRAY); int32 item_length = 0; if (item_type == OLAP_FIELD_TYPE_CHAR || item_type == OLAP_FIELD_TYPE_VARCHAR) { @@ -160,7 +160,7 @@ void common_test_array(Collection src_val) { ASSERT_EQ(item_type, array_type->item_type_info()->type()); { // test deep copy - Collection dst_val; + CollectionValue dst_val; auto tracker = std::make_shared(); MemPool pool(tracker.get()); array_type->deep_copy((char*)&dst_val, (char*)&src_val, &pool); @@ -170,7 +170,7 @@ void common_test_array(Collection src_val) { { // test direct copy bool null_signs[50]; uint8_t data[50]; - Collection dst_val(data, sizeof(null_signs), null_signs); + CollectionValue dst_val(data, sizeof(null_signs), null_signs); array_type->direct_copy((char*)&dst_val, (char*)&src_val); ASSERT_TRUE(array_type->equal((char*)&src_val, (char*)&dst_val)); ASSERT_EQ(0, array_type->cmp((char*)&src_val, (char*)&dst_val)); @@ -180,45 +180,45 @@ void common_test_array(Collection src_val) { TEST(ArrayTypeTest, copy_and_equal) { bool bool_array[3] = {true, false, true}; bool null_signs[3] = {true, true, true}; - common_test_array(Collection(bool_array, 3, null_signs)); + common_test_array(CollectionValue(bool_array, 3, null_signs)); uint8_t tiny_int_array[3] = {3, 4, 5}; - common_test_array(Collection(tiny_int_array, 3, null_signs)); + common_test_array(CollectionValue(tiny_int_array, 3, null_signs)); int16_t small_int_array[3] = {123, 234, 345}; - common_test_array(Collection(small_int_array, 3, null_signs)); + common_test_array(CollectionValue(small_int_array, 3, null_signs)); int32_t int_array[3] = {-123454321, 123454321, 323412343}; - common_test_array(Collection(int_array, 3, null_signs)); + common_test_array(CollectionValue(int_array, 3, null_signs)); uint32_t uint_array[3] = {123454321, 2342341, 52435234}; - common_test_array(Collection(uint_array, 3, null_signs)); + common_test_array(CollectionValue(uint_array, 3, null_signs)); int64_t bigint_array[3] = {123454321123456789L, 23534543234L, -123454321123456789L}; - common_test_array(Collection(bigint_array, 3, null_signs)); + common_test_array(CollectionValue(bigint_array, 3, null_signs)); __int128 large_int_array[3] = {1234567899L, 1234567899L, -12345631899L}; - common_test_array(Collection(large_int_array, 3, null_signs)); + common_test_array(CollectionValue(large_int_array, 3, null_signs)); float float_array[3] = {1.11, 2.22, -3.33}; - common_test_array(Collection(float_array, 3, null_signs)); + common_test_array(CollectionValue(float_array, 3, null_signs)); double double_array[3] = {12221.11, 12221.11, -12221.11}; - common_test_array(Collection(double_array, 3, null_signs)); + common_test_array(CollectionValue(double_array, 3, null_signs)); decimal12_t decimal_array[3] = {{123, 234}, {345, 453}, {4524, 2123}}; - common_test_array(Collection(decimal_array, 3, null_signs)); + common_test_array(CollectionValue(decimal_array, 3, null_signs)); uint24_t date_array[3] = {(1988 << 9) | (2 << 5) | 1, (1998 << 9) | (2 << 5) | 1, (2008 << 9) | (2 << 5) | 1}; - common_test_array(Collection(date_array, 3, null_signs)); + common_test_array(CollectionValue(date_array, 3, null_signs)); int64_t datetime_array[3] = {19880201010203L, 19980201010203L, 20080204010203L}; - common_test_array(Collection(datetime_array, 3, null_signs)); + common_test_array(CollectionValue(datetime_array, 3, null_signs)); Slice char_array[3] = {"12345abcde", "12345abcde", "asdf322"}; - common_test_array(Collection(char_array, 3, null_signs)); - common_test_array(Collection(char_array, 3, null_signs)); + common_test_array(CollectionValue(char_array, 3, null_signs)); + common_test_array(CollectionValue(char_array, 3, null_signs)); } } // namespace doris diff --git a/be/test/runtime/CMakeLists.txt b/be/test/runtime/CMakeLists.txt index 90087d2055..fc3e37aa38 100644 --- a/be/test/runtime/CMakeLists.txt +++ b/be/test/runtime/CMakeLists.txt @@ -62,3 +62,4 @@ ADD_BE_TEST(external_scan_context_mgr_test) ADD_BE_TEST(memory/chunk_allocator_test) ADD_BE_TEST(memory/system_allocator_test) ADD_BE_TEST(cache/partition_cache_test) +ADD_BE_TEST(collection_value_test) diff --git a/be/test/runtime/collection_value_test.cpp b/be/test/runtime/collection_value_test.cpp new file mode 100644 index 0000000000..b9825b984e --- /dev/null +++ b/be/test/runtime/collection_value_test.cpp @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime/collection_value.h" + +#include + +#include "string" +#include "util/bitmap.h" + +#define private public + +namespace doris { + +TEST(CollectionValueTest, init) { + { + CollectionValue cv; + + ObjectPool pool; + EXPECT_TRUE(CollectionValue::init_collection(&pool, 10, TYPE_INT, &cv).ok()); + + EXPECT_EQ(10, cv.size()); + + for (int j = 0; j < 10; ++j) { + EXPECT_FALSE(*(cv._null_signs + j)); + } + + EXPECT_FALSE(CollectionValue::init_collection(&pool, 10, TYPE_INT, nullptr).ok()); + + CollectionValue cv_null; + bzero(&cv_null, sizeof(cv_null)); + EXPECT_TRUE(CollectionValue::init_collection(&pool, 0, TYPE_INT, &cv_null).ok()); + EXPECT_EQ(0, cv_null.size()); + } + + { + CollectionValue cv; + ObjectPool pool; + EXPECT_TRUE(CollectionValue::init_collection(&pool, 10, TYPE_INT, &cv).ok()); + } +} + +TEST(CollectionValueTest, set) { + CollectionValue cv; + ObjectPool pool; + EXPECT_TRUE(CollectionValue::init_collection(&pool, 10, TYPE_INT, &cv).ok()); + + // normal + { + IntVal v0 = IntVal::null(); + cv.set(0, TYPE_INT, &v0); + for (int j = 1; j < cv.size(); ++j) { + IntVal i(j + 10); + ASSERT_TRUE(cv.set(j, TYPE_INT, &i).ok()); + } + } + + { + auto iter = cv.iterator(TYPE_INT); + IntVal v0; + iter.value(&v0); + ASSERT_TRUE(v0.is_null); + ASSERT_TRUE(iter.is_null()); + iter.next(); + for (int k = 1; k < cv.size(); ++k, iter.next()) { + IntVal v; + iter.value(&v); + EXPECT_EQ(k + 10, v.val); + } + } + + // over size + { + IntVal intv(20); + ASSERT_FALSE(cv.set(10, TYPE_INT, &intv).ok()); + } +} +} // namespace doris + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/be/test/util/CMakeLists.txt b/be/test/util/CMakeLists.txt index d6bab33fe6..74374cdafe 100644 --- a/be/test/util/CMakeLists.txt +++ b/be/test/util/CMakeLists.txt @@ -63,6 +63,7 @@ ADD_BE_TEST(monotime_test) ADD_BE_TEST(scoped_cleanup_test) ADD_BE_TEST(thread_test) ADD_BE_TEST(threadpool_test) +ADD_BE_TEST(mysql_row_buffer_test) ADD_BE_TEST(trace_test) ADD_BE_TEST(easy_json-test) ADD_BE_TEST(http_channel_test) diff --git a/be/test/util/mysql_row_buffer_test.cpp b/be/test/util/mysql_row_buffer_test.cpp new file mode 100644 index 0000000000..43c9355355 --- /dev/null +++ b/be/test/util/mysql_row_buffer_test.cpp @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/mysql_row_buffer.h" + +#include +#include +#include + +#include + +#include "env/env.h" +#include "gutil/strings/util.h" +#include "util/logging.h" + +namespace doris { + +using namespace strings; + +TEST(MysqlRowBufferTest, basic) { + MysqlRowBuffer mrb; + + std::string s("test"); + mrb.push_tinyint(5); + mrb.push_smallint(120); + mrb.push_int(-30000); + mrb.push_bigint(900000); + mrb.push_unsigned_bigint(90000000); + mrb.push_float(56.45); + mrb.push_double(10.12); + mrb.push_string(s.c_str(), 4); + mrb.push_null(); + + const char* buf = mrb.buf(); + + // mem: size-data-size-data + // 1-'5'-3-'120'-6-'-30000'-6-'900000'-8-'90000000'-5-'56.45'-5-'10.12'-4-'test'-251 + // 1b-1b-1b-3b--1b-----6b--1b----6b---1b-----8b----1b---5b---1b---5b---1b---4b---1b + // 0 1 2 3 6 7 13 14 20 21 29 30 35 36 41 42 46 + EXPECT_EQ(47, mrb.length()); + + EXPECT_EQ(1, *((int8_t*)(buf))); + EXPECT_EQ(0, strncmp(buf + 1, "5", 1)); + + EXPECT_EQ(3, *((int8_t*)(buf + 2))); + EXPECT_EQ(0, strncmp(buf + 3, "120", 3)); + + EXPECT_EQ(6, *((int8_t*)(buf + 6))); + EXPECT_EQ(0, strncmp(buf + 7, "-30000", 6)); + + EXPECT_EQ(6, *((int8_t*)(buf + 13))); + EXPECT_EQ(0, strncmp(buf + 14, "900000", 6)); + + EXPECT_EQ(8, *((int8_t*)(buf + 20))); + EXPECT_EQ(0, strncmp(buf + 21, "90000000", 8)); + + EXPECT_EQ(5, *((int8_t*)(buf + 29))); + EXPECT_EQ(0, strncmp(buf + 30, "56.45", 5)); + + EXPECT_EQ(5, *((int8_t*)(buf + 35))); + EXPECT_EQ(0, strncmp(buf + 36, "10.12", 5)); + + EXPECT_EQ(4, *((int8_t*)(buf + 41))); + EXPECT_EQ(0, strncmp(buf + 42, "test", 4)); + + EXPECT_EQ(251, *((uint8_t*)(buf + 46))); +} + +TEST(MysqlRowBufferTest, dynamic_mode) { + MysqlRowBuffer mrb; + + mrb.open_dynamic_mode(); + + std::string s("test"); + mrb.push_tinyint(5); + mrb.push_smallint(120); + mrb.push_int(-30000); + mrb.push_bigint(900000); + mrb.push_unsigned_bigint(90000000); + mrb.push_float(56.45); + mrb.push_double(10.12); + mrb.push_string(s.c_str(), 4); + mrb.push_null(); + + mrb.close_dynamic_mode(); + + const char* buf = mrb.buf(); + + // mem: size-data-data + // 254-48-'5'-'120'-'-30000'-'900000'-'90000000'-'56.45'-'10.12'-'test'-'' + // 1b--8b-1b----3b-----6b-------6b--------8b-------5b------5b------4b---0b + // 0 1 9 10 13 19 25 33 38 43 47 + EXPECT_EQ(47, mrb.length()); + + EXPECT_EQ(254, *((uint8_t*)(buf))); + EXPECT_EQ(38, *((int64_t*)(buf + 1))); + + EXPECT_EQ(0, strncmp(buf + 9, "5", 1)); + EXPECT_EQ(0, strncmp(buf + 10, "120", 3)); + EXPECT_EQ(0, strncmp(buf + 13, "-30000", 6)); + EXPECT_EQ(0, strncmp(buf + 19, "900000", 6)); + EXPECT_EQ(0, strncmp(buf + 25, "90000000", 8)); + EXPECT_EQ(0, strncmp(buf + 33, "56.45", 5)); + EXPECT_EQ(0, strncmp(buf + 38, "10.12", 5)); + EXPECT_EQ(0, strncmp(buf + 43, "test", 4)); +} + +} // namespace doris + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/fe/fe-core/AlterRoutineLoadOperationLogTest b/fe/fe-core/AlterRoutineLoadOperationLogTest new file mode 100644 index 0000000000..ae3953e57d Binary files /dev/null and b/fe/fe-core/AlterRoutineLoadOperationLogTest differ diff --git a/fe/fe-core/diskInfoTest b/fe/fe-core/diskInfoTest new file mode 100644 index 0000000000..866d0394bc Binary files /dev/null and b/fe/fe-core/diskInfoTest differ diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 8e509aec61..c454336457 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -34,6 +34,10 @@ import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; +import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.MapType; +import org.apache.doris.catalog.StructField; +import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.View; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Version; @@ -63,6 +67,7 @@ parser code {: private boolean reportExpectedToken(Integer tokenId) { if (SqlScanner.isKeyword(tokenId) || tokenId.intValue() == SqlParserSymbols.COMMA || + tokenId.intValue() == SqlParserSymbols.DOT || tokenId.intValue() == SqlParserSymbols.IDENT) { return true; } else { @@ -228,10 +233,10 @@ parser code {: :}; // Total keywords of doris -terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_AND, KW_ANTI, KW_APPEND, KW_AS, KW_ASC, KW_AUTHORS, +terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_AND, KW_ANTI, KW_APPEND, KW_AS, KW_ASC, KW_AUTHORS, KW_ARRAY, KW_BACKEND, KW_BACKUP, KW_BETWEEN, KW_BEGIN, KW_BIGINT, KW_BITMAP, KW_BITMAP_UNION, KW_BOOLEAN, KW_BROKER, KW_BACKENDS, KW_BY, KW_BUILTIN, KW_CANCEL, KW_CASE, KW_CAST, KW_CHAIN, KW_CHAR, KW_CHARSET, KW_CHECK, KW_CLUSTER, KW_CLUSTERS, - KW_COLLATE, KW_COLLATION, KW_COLUMN, KW_COLUMNS, KW_COMMENT, KW_COMMIT, KW_COMMITTED, + KW_COLLATE, KW_COLLATION, KW_COLUMN, KW_COLON, KW_COLUMNS, KW_COMMENT, KW_COMMIT, KW_COMMITTED, KW_CONFIG, KW_CONNECTION, KW_CONNECTION_ID, KW_CONSISTENT, KW_CONVERT, KW_COUNT, KW_CREATE, KW_CROSS, KW_CUBE, KW_CURRENT, KW_CURRENT_USER, KW_DATA, KW_DATABASE, KW_DATABASES, KW_DATE, KW_DATETIME, KW_DAY, KW_DECIMAL, KW_DECOMMISSION, KW_DEFAULT, KW_DESC, KW_DESCRIBE, KW_DELETE, KW_DISTINCT, KW_DISTINCTPC, KW_DISTINCTPCSA, KW_DISTRIBUTED, KW_DISTRIBUTION, KW_DYNAMIC, KW_BUCKETS, KW_DIV, KW_DOUBLE, KW_DROP, KW_DROPP, KW_DUPLICATE, @@ -246,8 +251,8 @@ terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_A KW_KEY, KW_KEYS, KW_KILL, KW_LABEL, KW_LARGEINT, KW_LAST, KW_LEFT, KW_LESS, KW_LEVEL, KW_LIKE, KW_LIMIT, KW_LINK, KW_LIST, KW_LOAD, KW_LOCAL, KW_LOCATION, - KW_MATERIALIZED, KW_MAX, KW_MAX_VALUE, KW_MERGE, KW_MIN, KW_MINUTE, KW_MINUS, KW_MIGRATE, KW_MIGRATIONS, KW_MODIFY, KW_MONTH, - KW_NAME, KW_NAMES, KW_NEGATIVE, KW_NO, KW_NOT, KW_NULL, KW_NULLS, + KW_MAP, KW_MATERIALIZED, KW_MAX, KW_MAX_VALUE, KW_MERGE, KW_MIN, KW_MINUTE, KW_MINUS, KW_MIGRATE, KW_MIGRATIONS, KW_MODIFY, KW_MONTH, + KW_NAME, KW_NAMED_STRUCT, KW_NAMES, KW_NEGATIVE, KW_NO, KW_NOT, KW_NULL, KW_NULLS, KW_OBSERVER, KW_OFFSET, KW_ON, KW_ONLY, KW_OPEN, KW_OR, KW_ORDER, KW_OUTER, KW_OUTFILE, KW_OVER, KW_PARTITION, KW_PARTITIONS, KW_PASSWORD, KW_PATH, KW_PAUSE, KW_PIPE, KW_PRECEDING, KW_PLUGIN, KW_PLUGINS, @@ -257,7 +262,7 @@ terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_A KW_REPAIR, KW_REPEATABLE, KW_REPOSITORY, KW_REPOSITORIES, KW_REPLACE, KW_REPLACE_IF_NOT_NULL, KW_REPLICA, KW_RESOURCE, KW_RESOURCES, KW_RESTORE, KW_RETURNS, KW_RESUME, KW_REVOKE, KW_RIGHT, KW_ROLE, KW_ROLES, KW_ROLLBACK, KW_ROLLUP, KW_ROUTINE, KW_ROW, KW_ROWS, KW_S3, KW_SCHEMA, KW_SCHEMAS, KW_SECOND, KW_SELECT, KW_SEMI, KW_SERIALIZABLE, KW_SESSION, KW_SET, KW_SETS, KW_SET_VAR, KW_SHOW, KW_SIGNED, - KW_SMALLINT, KW_SNAPSHOT, KW_SONAME, KW_SPLIT, KW_START, KW_STATUS, KW_STOP, KW_STORAGE, KW_STREAM, KW_STRING, + KW_SMALLINT, KW_SNAPSHOT, KW_SONAME, KW_SPLIT, KW_START, KW_STATUS, KW_STOP, KW_STORAGE, KW_STREAM, KW_STRING, KW_STRUCT, KW_SUM, KW_SUPERUSER, KW_SYNC, KW_SYSTEM, KW_TABLE, KW_TABLES, KW_TABLET, KW_TASK, KW_TEMPORARY, KW_TERMINATED, KW_THAN, KW_TIME, KW_THEN, KW_TIMESTAMP, KW_TINYINT, KW_TO, KW_TRANSACTION, KW_TRIGGERS, KW_TRIM, KW_TRUE, KW_TRUNCATE, KW_TYPE, KW_TYPES, @@ -266,7 +271,7 @@ terminal String KW_ADD, KW_ADMIN, KW_AFTER, KW_AGGREGATE, KW_ALL, KW_ALTER, KW_A KW_WARNINGS, KW_WEEK, KW_WHEN, KW_WHITELIST, KW_WHERE, KW_WITH, KW_WORK, KW_WRITE, KW_YEAR; -terminal COMMA, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON, LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT; +terminal COMMA, COLON, DOT, DOTDOTDOT, AT, STAR, LPAREN, RPAREN, SEMICOLON, LBRACKET, RBRACKET, DIVIDE, MOD, ADD, SUBTRACT; terminal BITAND, BITOR, BITXOR, BITNOT; terminal EQUAL, NOT, LESSTHAN, GREATERTHAN, SET_VAR; terminal COMMENTED_PLAN_HINT_START, COMMENTED_PLAN_HINT_END; @@ -367,11 +372,14 @@ nonterminal LiteralExpr literal; nonterminal CaseExpr case_expr; nonterminal ArrayList case_when_clause_list; nonterminal FunctionParams function_params; -nonterminal Expr function_call_expr; +nonterminal Expr function_call_expr, array_expr; +nonterminal StructField struct_field; +nonterminal ArrayList struct_field_list; nonterminal AnalyticWindow opt_window_clause; nonterminal AnalyticWindow.Type window_type; nonterminal AnalyticWindow.Boundary window_boundary; nonterminal SlotRef column_ref; +nonterminal FunctionCallExpr column_subscript; nonterminal ArrayList table_ref_list, base_table_ref_list; nonterminal FromClause from_clause; nonterminal TableRef table_ref; @@ -4087,6 +4095,12 @@ type ::= :} | KW_VARCHAR {: RESULT = ScalarType.createVarcharType(-1); :} + | KW_ARRAY LESSTHAN type:value_type GREATERTHAN + {: RESULT = new ArrayType(value_type); :} + | KW_MAP LESSTHAN type:key_type COMMA type:value_type GREATERTHAN + {: RESULT = new MapType(key_type,value_type); :} + | KW_STRUCT LESSTHAN struct_field_list:fields GREATERTHAN + {: RESULT = new StructType(fields); :} | KW_CHAR LPAREN INTEGER_LITERAL:len RPAREN {: ScalarType type = ScalarType.createCharType(len.intValue()); type.setAssignedStrLenInColDefinition(); @@ -4237,6 +4251,34 @@ function_call_expr ::= :} ; +array_expr ::= + KW_ARRAY LPAREN function_params:params RPAREN + {: + RESULT = new FunctionCallExpr("array", params); + :} + | KW_ARRAY LPAREN RPAREN + {: + RESULT = new ArrayLiteral(); + :} + ; + +struct_field ::= + ident:name COLON type:type + {: RESULT = new StructField(name, type); :} + ; + +struct_field_list ::= + struct_field:field + {: + RESULT = Lists.newArrayList(field); + :} + | struct_field_list:fields COMMA struct_field:field + {: + fields.add(field); + RESULT = fields; + :} + ; + exists_predicate ::= KW_EXISTS subquery:s {: RESULT = new ExistsPredicate(s, false); :} @@ -4255,6 +4297,8 @@ non_pred_expr ::= :} | literal:l {: RESULT = l; :} + | array_expr:a + {: RESULT = a; :} | function_call_expr:e {: RESULT = e; :} | KW_DATE STRING_LITERAL:l @@ -4283,6 +4327,8 @@ non_pred_expr ::= {: RESULT = c; :} | column_ref:c {: RESULT = c; :} + | column_subscript:c + {: RESULT = c; :} | timestamp_arithmetic_expr:e {: RESULT = e; :} | arithmetic_expr:e @@ -4632,6 +4678,15 @@ column_ref ::= {: RESULT = new SlotRef(new TableName(db, tbl), col); :} ; +column_subscript ::= + expr:e LBRACKET expr:index RBRACKET + {: ArrayList list = new ArrayList(); + list.add(e); + list.add(index); + RESULT = new FunctionCallExpr("%element_extract%", list); + :} + ; + privilege_type ::= ident:name {: @@ -4806,6 +4861,8 @@ keyword ::= {: RESULT = id; :} | KW_AUTHORS:id {: RESULT = id; :} + | KW_ARRAY:id + {: RESULT = id; :} | KW_BACKUP:id {: RESULT = id; :} | KW_BEGIN:id @@ -5086,6 +5143,8 @@ keyword ::= {: RESULT = id; :} | KW_FEATURE:id {: RESULT = id; :} + | KW_MAP:id + {: RESULT = id; :} ; // Identifier that contain keyword diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java new file mode 100644 index 0000000000..74cb323cc5 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArrayLiteral.java @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.lang.StringUtils; +import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.Type; +import org.apache.doris.thrift.TExprNode; +import org.apache.doris.thrift.TExprNodeType; + +public class ArrayLiteral extends LiteralExpr { + + public ArrayLiteral() { + this.type = new ArrayType(Type.NULL); + children = new ArrayList<>(); + } + + public ArrayLiteral(LiteralExpr... v) { + if (v.length < 1) { + this.type = new ArrayType(Type.NULL); + return; + } + + this.type = new ArrayType(v[0].type); + children = new ArrayList<>(v.length); + children.addAll(Arrays.asList(v)); + } + + protected ArrayLiteral(ArrayLiteral other) { + super(other); + } + + @Override + public boolean isMinValue() { + return false; + } + + @Override + public int compareLiteral(LiteralExpr expr) { + return 0; + } + + @Override + protected String toSqlImpl() { + List list = new ArrayList<>(children.size()); + children.forEach(v -> list.add(v.toSqlImpl())); + + return "ARRAY(" + StringUtils.join(list, ", ") + ")"; + } + + @Override + public String getStringValue() { + List list = new ArrayList<>(children.size()); + children.forEach(v -> list.add(((LiteralExpr) v).getStringValue())); + + return "ARRAY[" + StringUtils.join(list, ", ") + "]"; + } + + @Override + protected void toThrift(TExprNode msg) { + msg.node_type = TExprNodeType.ARRAY_LITERAL; + msg.setChildType(((ArrayType) type).getItemType().getPrimitiveType().toThrift()); + } + + @Override + public void write(DataOutput out) throws IOException { + super.write(out); + out.writeInt(children.size()); + for (Expr e : children) { + Expr.writeTo(e, out); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + super.readFields(in); + int size = in.readInt(); + children = new ArrayList<>(size); + for (int i = 0; i < size; i++) { + children.add(Expr.readIn(in)); + } + } + + public static ArrayLiteral read(DataInput in) throws IOException { + ArrayLiteral literal = new ArrayLiteral(); + literal.readFields(in); + return literal; + } + + @Override + public Expr clone() { + return new ArrayLiteral(this); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java index fc455db174..97f31840bf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ColumnDef.java @@ -200,12 +200,30 @@ public class ColumnDef { } if (type.getPrimitiveType() == PrimitiveType.BITMAP) { - if (defaultValue.isSet) { + if (defaultValue.isSet && defaultValue != DefaultValue.NULL_DEFAULT_VALUE) { throw new AnalysisException("Bitmap type column can not set default value"); } defaultValue = DefaultValue.BITMAP_EMPTY_DEFAULT_VALUE; } + if (type.getPrimitiveType() == PrimitiveType.ARRAY) { + if (defaultValue.isSet && defaultValue != DefaultValue.NULL_DEFAULT_VALUE) { + throw new AnalysisException("Array type column default value only support null"); + } + } + + if (type.getPrimitiveType() == PrimitiveType.MAP) { + if (defaultValue.isSet && defaultValue != DefaultValue.NULL_DEFAULT_VALUE) { + throw new AnalysisException("Map type column default value just support null"); + } + } + + if (type.getPrimitiveType() == PrimitiveType.STRUCT) { + if (defaultValue.isSet && defaultValue != DefaultValue.NULL_DEFAULT_VALUE) { + throw new AnalysisException("Struct type column default value just support null"); + } + } + // If aggregate type is REPLACE_IF_NOT_NULL, we set it nullable. // If default value is not set, we set it NULL if (aggregateType == AggregateType.REPLACE_IF_NOT_NULL) { @@ -267,6 +285,12 @@ public class ColumnDef { break; case BITMAP: break; + case ARRAY: + break; + case MAP: + break; + case STRUCT: + break; case BOOLEAN: BoolLiteral boolLiteral = new BoolLiteral(defaultValue); break; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java index d20c75a0b5..78fb1550e3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java @@ -18,6 +18,7 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.AggregateType; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Index; @@ -350,6 +351,21 @@ public class CreateTableStmt extends DdlStmt { for (ColumnDef columnDef : columnDefs) { columnDef.analyze(engineName.equals("olap")); + if (columnDef.getType().isArrayType()) { + ArrayType tp = (ArrayType) columnDef.getType(); + if (!tp.getItemType().getPrimitiveType().isIntegerType() && + !tp.getItemType().getPrimitiveType().isCharFamily()) { + throw new AnalysisException("Array column just support INT/VARCHAR sub-type"); + } + if (columnDef.getAggregateType() != null && columnDef.getAggregateType() != AggregateType.NONE) { + throw new AnalysisException("Array column can't support aggregation " + columnDef.getAggregateType()); + } + if (columnDef.isKey()) { + throw new AnalysisException("Array can only be used in the non-key column of" + + " the duplicate table at present."); + } + } + if (columnDef.getType().isHllType()) { hasHll = true; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index cafd88fc54..1b3dabae9f 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -1582,7 +1582,8 @@ abstract public class Expr extends TreeNode implements ParseNode, Cloneabl DATE_LITERAL(9), MAX_LITERAL(10), BINARY_PREDICATE(11), - FUNCTION_CALL(12); + FUNCTION_CALL(12), + ARRAY_LITERAL(13); private static Map codeMap = Maps.newHashMap(); @@ -1630,6 +1631,8 @@ abstract public class Expr extends TreeNode implements ParseNode, Cloneabl output.writeInt(ExprSerCode.BINARY_PREDICATE.getCode()); } else if (expr instanceof FunctionCallExpr) { output.writeInt(ExprSerCode.FUNCTION_CALL.getCode()); + } else if (expr instanceof ArrayLiteral) { + output.writeInt(ExprSerCode.ARRAY_LITERAL.getCode()); } else { throw new IOException("Unknown class " + expr.getClass().getName()); } @@ -1671,6 +1674,8 @@ abstract public class Expr extends TreeNode implements ParseNode, Cloneabl return BinaryPredicate.read(in); case FUNCTION_CALL: return FunctionCallExpr.read(in); + case ARRAY_LITERAL: + return ArrayLiteral.read(in); default: throw new IOException("Unknown code: " + code); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java index 017802eb23..55f0f64565 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ExpressionFunctions.java @@ -19,12 +19,14 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Function; +import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.VariableMgr; import org.apache.doris.rewrite.FEFunction; +import org.apache.doris.rewrite.FEFunctionList; import org.apache.doris.rewrite.FEFunctions; import com.google.common.base.Joiner; @@ -96,7 +98,7 @@ public enum ExpressionFunctions { argTypes.add((ScalarType) type); } FEFunctionSignature signature = new FEFunctionSignature(fn.functionName(), - argTypes.toArray(new ScalarType[argTypes.size()]), (ScalarType) fn.getReturnType()); + argTypes.toArray(new ScalarType[argTypes.size()]), fn.getReturnType()); FEFunctionInvoker invoker = getFunction(signature); if (invoker != null) { try { @@ -149,22 +151,32 @@ public enum ExpressionFunctions { new ImmutableMultimap.Builder(); Class clazz = FEFunctions.class; for (Method method : clazz.getDeclaredMethods()) { - FEFunction annotation = method.getAnnotation(FEFunction.class); - if (annotation != null) { - String name = annotation.name(); - ScalarType returnType = ScalarType.createType(annotation.returnType()); - List argTypes = new ArrayList<>(); - for (String type : annotation.argTypes()) { - argTypes.add(ScalarType.createType(type)); + FEFunctionList annotationList = method.getAnnotation(FEFunctionList.class); + if (annotationList != null) { + for (FEFunction f : annotationList.value()) { + registerFEFunction(mapBuilder, method, f); } - FEFunctionSignature signature = new FEFunctionSignature(name, - argTypes.toArray(new ScalarType[argTypes.size()]), returnType); - mapBuilder.put(name, new FEFunctionInvoker(method, signature)); } + registerFEFunction(mapBuilder, method, method.getAnnotation(FEFunction.class)); } this.functions = mapBuilder.build(); } + private void registerFEFunction(ImmutableMultimap.Builder mapBuilder, + Method method, FEFunction annotation) { + if (annotation != null) { + String name = annotation.name(); + Type returnType = Type.fromPrimitiveType(PrimitiveType.valueOf(annotation.returnType())); + List argTypes = new ArrayList<>(); + for (String type : annotation.argTypes()) { + argTypes.add(ScalarType.createType(type)); + } + FEFunctionSignature signature = new FEFunctionSignature(name, + argTypes.toArray(new ScalarType[argTypes.size()]), returnType); + mapBuilder.put(name, new FEFunctionInvoker(method, signature)); + } + } + public static class FEFunctionInvoker { private final Method method; private final FEFunctionSignature signature; @@ -239,6 +251,12 @@ public enum ExpressionFunctions { } else { throw new IllegalArgumentException("Doris doesn't support type:" + argType); } + + // if args all is NullLiteral + long size = args.stream().filter(e -> e instanceof NullLiteral).count(); + if (args.size() == size) { + exprs = new NullLiteral[args.size()]; + } args.toArray(exprs); return exprs; } @@ -247,9 +265,9 @@ public enum ExpressionFunctions { public static class FEFunctionSignature { private final String name; private final ScalarType[] argTypes; - private final ScalarType returnType; + private final Type returnType; - public FEFunctionSignature(String name, ScalarType[] argTypes, ScalarType returnType) { + public FEFunctionSignature(String name, ScalarType[] argTypes, Type returnType) { this.name = name; this.argTypes = argTypes; this.returnType = returnType; @@ -259,7 +277,7 @@ public enum ExpressionFunctions { return argTypes; } - public ScalarType getReturnType() { + public Type getReturnType() { return returnType; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java index 9f41e695b5..a04cdb105b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java @@ -18,6 +18,7 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.AggregateFunction; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Catalog; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Function; @@ -69,6 +70,7 @@ public class FunctionCallExpr extends Expr { new ImmutableSortedSet.Builder(String.CASE_INSENSITIVE_ORDER) .add("stddev").add("stddev_val").add("stddev_samp") .add("variance").add("variance_pop").add("variance_pop").add("var_samp").add("var_pop").build(); + private static final String ELEMENT_EXTRACT_FN_NAME = "%element_extract%"; public void setIsAnalyticFnCall(boolean v) { isAnalyticFnCall = v; @@ -658,16 +660,18 @@ public class FunctionCallExpr extends Expr { } } - Type[] args = fn.getArgs(); - if (args.length > 0) { - // Implicitly cast all the children to match the function if necessary - for (int i = 0; i < argTypes.length; ++i) { - // For varargs, we must compare with the last type in callArgs.argTypes. - int ix = Math.min(args.length - 1, i); - if (!argTypes[i].matchesType(args[ix]) && !( - argTypes[i].isDateType() && args[ix].isDateType())) { - uncheckedCastChild(args[ix], i); - //if (argTypes[i] != args[ix]) castChild(args[ix], i); + if (!fn.getFunctionName().getFunction().equals(ELEMENT_EXTRACT_FN_NAME)) { + Type[] args = fn.getArgs(); + if (args.length > 0) { + // Implicitly cast all the children to match the function if necessary + for (int i = 0; i < argTypes.length; ++i) { + // For varargs, we must compare with the last type in callArgs.argTypes. + int ix = Math.min(args.length - 1, i); + if (!argTypes[i].matchesType(args[ix]) && !( + argTypes[i].isDateType() && args[ix].isDateType())) { + uncheckedCastChild(args[ix], i); + //if (argTypes[i] != args[ix]) castChild(args[ix], i); + } } } } @@ -711,6 +715,18 @@ public class FunctionCallExpr extends Expr { } else { this.type = fn.getReturnType(); } + // rewrite return type if is nested type function + analyzeNestedFunction(); + } + + // if return type is nested type, need to be determined the sub-element type + private void analyzeNestedFunction() { + // array + if ("array".equalsIgnoreCase(fnName.getFunction())) { + if (children.size() > 0) { + this.type = new ArrayType(children.get(0).getType()); + } + } } @Override @@ -798,4 +814,3 @@ public class FunctionCallExpr extends Expr { return result; } } - diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/RangePartitionDesc.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/RangePartitionDesc.java index eb08ad2780..d5b4854303 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/RangePartitionDesc.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/RangePartitionDesc.java @@ -93,7 +93,10 @@ public class RangePartitionDesc extends PartitionDesc { partitionColumns.add(column); find = true; break; - + } + if (column.getType().isComplexType()) { + throw new DdlException("Complex type column can't be partition column: " + + column.getType().toString()); } } if (!find) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java index 775c981642..e6dffd8e46 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Subquery.java @@ -20,10 +20,10 @@ package org.apache.doris.analysis; import java.util.ArrayList; import java.util.List; +import org.apache.doris.catalog.MultiRowType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.StructField; import org.apache.doris.catalog.StructType; import org.apache.doris.common.AnalysisException; @@ -102,8 +102,8 @@ public class Subquery extends Expr { type = createStructTypeFromExprList(); } - // If the subquery returns many rows, set its type to ArrayType. - if (!((SelectStmt)stmt).returnsSingleRow()) type = new ArrayType(type); + // If the subquery returns many rows, set its type to MultiRowType. + if (!((SelectStmt)stmt).returnsSingleRow()) type = new MultiRowType(type); // Preconditions.checkNotNull(type); // type.analyze(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java index e62fa877fe..1f3ae733fc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TypeDef.java @@ -17,13 +17,20 @@ package org.apache.doris.analysis; +import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.ScalarType; +import org.apache.doris.catalog.StructField; +import org.apache.doris.catalog.StructType; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; import com.google.common.base.Preconditions; +import java.util.ArrayList; + /** * Represents an anonymous type definition, e.g., used in DDL and CASTs. */ @@ -38,15 +45,19 @@ public class TypeDef implements ParseNode { public static TypeDef create(PrimitiveType type) { return new TypeDef(ScalarType.createType(type)); } + public static TypeDef createDecimal(int precision, int scale) { return new TypeDef(ScalarType.createDecimalV2Type(precision, scale)); } + public static TypeDef createVarchar(int len) { return new TypeDef(ScalarType.createVarchar(len)); } + public static TypeDef createChar(int len) { return new TypeDef(ScalarType.createChar(len)); } + @Override public void analyze(Analyzer analyzer) throws AnalysisException { if (isAnalyzed) { @@ -56,8 +67,8 @@ public class TypeDef implements ParseNode { // a stack overflow. if (parsedType.exceedsMaxNestingDepth()) { throw new AnalysisException(String.format( - "Type exceeds the maximum nesting depth of %s:\n%s", - Type.MAX_NESTING_DEPTH, parsedType.toSql())); + "Type exceeds the maximum nesting depth of %s:\n%s", + Type.MAX_NESTING_DEPTH, parsedType.toSql())); } analyze(parsedType); isAnalyzed = true; @@ -70,10 +81,49 @@ public class TypeDef implements ParseNode { if (type.isScalarType()) { analyzeScalarType((ScalarType) type); } + + if (type.isArrayType()) { + Type itemType = ((ArrayType) type).getItemType(); + analyze(itemType); + } + + if (type.isComplexType()) { + if (!Config.enable_complex_type_support) { + throw new AnalysisException("Unsupported data type: " + type.toSql()); + } + if (type.isArrayType()) { + ScalarType itemType = (ScalarType) ((ArrayType) type).getItemType(); + analyzeNestedType(itemType); + } + if (type.isMapType()) { + ScalarType keyType = (ScalarType) ((MapType) type).getKeyType(); + ScalarType valueType = (ScalarType) ((MapType) type).getKeyType(); + analyzeNestedType(keyType); + analyzeNestedType(valueType); + } + if (type.isStructType()) { + ArrayList fields = ((StructType) type).getFields(); + for (int i = 0; i < fields.size(); i++) { + ScalarType filedType = (ScalarType) fields.get(i).getType(); + analyzeNestedType(filedType); + } + } + } + } + + private void analyzeNestedType(ScalarType type) throws AnalysisException { + if (type.isNull()) { + throw new AnalysisException("Unsupported data type: " + type.toSql()); + } + if (type.getPrimitiveType().isStringType() + && !type.isAssignedStrLenInColDefinition()) { + type.setLength(1); + } + analyze(type); } private void analyzeScalarType(ScalarType scalarType) - throws AnalysisException { + throws AnalysisException { PrimitiveType type = scalarType.getPrimitiveType(); switch (type) { case CHAR: @@ -98,7 +148,7 @@ public class TypeDef implements ParseNode { } if (scalarType.getLength() > maxLen) { throw new AnalysisException( - name + " size must be <= " + maxLen + ": " + len); + name + " size must be <= " + maxLen + ": " + len); } break; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java index e7b4a82856..e4c16a14cd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ArrayType.java @@ -17,27 +17,70 @@ package org.apache.doris.catalog; +import org.apache.doris.common.Config; +import org.apache.doris.thrift.TColumnType; import org.apache.doris.thrift.TTypeDesc; import org.apache.doris.thrift.TTypeNode; import org.apache.doris.thrift.TTypeNodeType; import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.gson.annotations.SerializedName; /** * Describes an ARRAY type. */ public class ArrayType extends Type { - private final Type itemType; + + @SerializedName(value = "itemType") + private Type itemType; + + public ArrayType() { + this.itemType = NULL; + } public ArrayType(Type itemType) { this.itemType = itemType; } + public void setItemType(Type itemType) { + this.itemType = itemType; + } + public Type getItemType() { return itemType; } + @Override + public PrimitiveType getPrimitiveType() { + return PrimitiveType.ARRAY; + } + + @Override + public boolean matchesType(Type t) { + if (equals(t)) { + return true; + } + + if (!t.isArrayType()) { + return false; + } + + if (itemType.isNull()) { + return true; + } + + return itemType.matchesType(((ArrayType) t).itemType); + } + + public static ArrayType create() { + return new ArrayType(); + } + + public static ArrayType create(Type type) { + return new ArrayType(type); + } + @Override public String toSql(int depth) { if (depth >= MAX_NESTING_DEPTH) { @@ -76,6 +119,46 @@ public class ArrayType extends Type { structStr = structStr.substring(lpad); return String.format("%sARRAY<%s>", leftPadding, structStr); } + + @Override + public boolean isSupported() { + if (!Config.enable_complex_type_support) { + return false; + } + + if (itemType.isNull()) { + return false; + } + return true; + } + + @Override + public String toString() { + return toSql(0); + } + + @Override + public TColumnType toColumnTypeThrift() { + TColumnType thrift = new TColumnType(); + thrift.type = PrimitiveType.ARRAY.toThrift(); + return thrift; + } + + @Override + public boolean isFixedLengthType() { + return false; + } + + @Override + public boolean supportsTablePartitioning() { + if (!isSupported() || isComplexType()) { + return false; + } + return true; + } + + @Override + public int getSlotSize() { + return PrimitiveType.ARRAY.getSlotSize(); + } } - - diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java index b92cf03125..fc2511bba4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Column.java @@ -50,6 +50,8 @@ public class Column implements Writable { private static final Logger LOG = LogManager.getLogger(Column.class); public static final String DELETE_SIGN = "__DORIS_DELETE_SIGN__"; public static final String SEQUENCE_COL = "__DORIS_SEQUENCE_COL__"; + private static final String COLUMN_ARRAY_CHILDREN = "item"; + @SerializedName(value = "name") private String name; @SerializedName(value = "type") @@ -75,6 +77,8 @@ public class Column implements Writable { private String comment; @SerializedName(value = "stats") private ColumnStats stats; // cardinality and selectivity etc. + @SerializedName(value = "children") + private List children; // Define expr may exist in two forms, one is analyzed, and the other is not analyzed. // Currently, analyzed define expr is only used when creating materialized views, so the define expr in RollupJob must be analyzed. // In other cases, such as define expr in `MaterializedIndexMeta`, it may not be analyzed after being relayed. @@ -89,6 +93,7 @@ public class Column implements Writable { this.isKey = false; this.stats = new ColumnStats(); this.visible = true; + this.children = new ArrayList<>(Type.MAX_NESTING_DEPTH); } public Column(String name, PrimitiveType dataType) { @@ -130,9 +135,10 @@ public class Column implements Writable { this.isAllowNull = isAllowNull; this.defaultValue = defaultValue; this.comment = comment; - this.stats = new ColumnStats(); this.visible = visible; + this.children = new ArrayList<>(Type.MAX_NESTING_DEPTH); + createChildrenColumn(this.type, this); } public Column(Column column) { @@ -146,6 +152,22 @@ public class Column implements Writable { this.comment = column.getComment(); this.stats = column.getStats(); this.visible = column.visible; + this.children = column.getChildren(); + } + + public void createChildrenColumn(Type type, Column column) { + if (type.isArrayType()) { + Column c = new Column(COLUMN_ARRAY_CHILDREN, ((ArrayType) type).getItemType()); + column.addChildrenColumn(c); + } + } + + public List getChildren() { + return children; + } + + private void addChildrenColumn(Column column) { + this.children.add(column); } public void setName(String newName) { @@ -201,7 +223,12 @@ public class Column implements Writable { public PrimitiveType getDataType() { return type.getPrimitiveType(); } - public Type getType() { return ScalarType.createType(type.getPrimitiveType()); } + public Type getType() { + if (type.isArrayType() || type.isMapType() || type.isStructType()) { + return type; + } + return ScalarType.createType(type.getPrimitiveType()); + } public void setType(Type type) { this.type = type; @@ -209,9 +236,9 @@ public class Column implements Writable { public Type getOriginType() { return type; } - public int getStrLen() { return ((ScalarType) type).getLength(); } - public int getPrecision() { return ((ScalarType) type).getScalarPrecision(); } - public int getScale() { return ((ScalarType) type).getScalarScale(); } + public int getStrLen() { return type.getLength(); } + public int getPrecision() { return type instanceof ScalarType ? ((ScalarType) type).getScalarPrecision() : -1; } + public int getScale() { return type instanceof ScalarType ? ((ScalarType) type).getScalarScale() : -1; } public AggregateType getAggregationType() { return this.aggregationType; @@ -291,12 +318,41 @@ public class Column implements Writable { tColumn.setIsAllowNull(this.isAllowNull); tColumn.setDefaultValue(this.defaultValue); tColumn.setVisible(visible); + tColumn.setChildrenColumn(new ArrayList<>()); + toChildrenThrift(this, tColumn); + // The define expr does not need to be serialized here for now. // At present, only serialized(analyzed) define expr is directly used when creating a materialized view. // It will not be used here, but through another structure `TAlterMaterializedViewParam`. + if (this.defineExpr != null) { + tColumn.setDefineExpr(this.defineExpr.treeToThrift()); + } return tColumn; } + private void toChildrenThrift(Column column, TColumn tColumn) { + if (column.type.isArrayType()) { + Column children = column.getChildren().get(0); + + TColumn childrenTColumn = new TColumn(); + childrenTColumn.setColumnName(children.name); + + TColumnType childrenTColumnType = new TColumnType(); + childrenTColumnType.setType(children.getDataType().toThrift()); + childrenTColumnType.setType(children.getDataType().toThrift()); + childrenTColumnType.setLen(children.getStrLen()); + childrenTColumnType.setPrecision(children.getPrecision()); + childrenTColumnType.setScale(children.getScale()); + + childrenTColumnType.setIndexLen(children.getOlapColumnIndexSize()); + childrenTColumn.setColumnType(childrenTColumnType); + + tColumn.children_column.add(childrenTColumn); + + toChildrenThrift(children, childrenTColumn); + } + } + public void checkSchemaChangeAllowed(Column other) throws DdlException { if (Strings.isNullOrEmpty(other.name)) { throw new DdlException("Dest column name is empty"); @@ -475,6 +531,16 @@ public class Column implements Writable { return false; } + if (children.size() != other.children.size()) { + return false; + } + + for (int i = 0; i < children.size(); i++) { + if (!children.get(i).equals(other.getChildren().get(i))) { + return false; + } + } + return true; } @@ -550,6 +616,12 @@ public class Column implements Writable { case DECIMALV2: sb.append(String.format(typeStringMap.get(dataType), getPrecision(), getScale())); break; + case ARRAY: + sb.append(type.toString()); + case MAP: + sb.append(type.toString()); + case STRUCT: + sb.append(type.toString()); default: sb.append(typeStringMap.get(dataType)); break; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java index d2d2c3cacd..7aae66092c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ColumnType.java @@ -111,26 +111,38 @@ public abstract class ColumnType { } public static void write(DataOutput out, Type type) throws IOException { - Preconditions.checkArgument(type.isScalarType(), "only support scalar type serialization"); - ScalarType scalarType = (ScalarType) type; - Text.writeString(out, scalarType.getPrimitiveType().name()); - out.writeInt(scalarType.getScalarScale()); - out.writeInt(scalarType.getScalarPrecision()); - out.writeInt(scalarType.getLength()); - // Actually, varcharLimit need not to write here, write true to back compatible - out.writeBoolean(true); + Preconditions.checkArgument(type.isScalarType() || type.isArrayType(), + "only support scalar type and array serialization"); + if (type.isScalarType()) { + ScalarType scalarType = (ScalarType) type; + Text.writeString(out, scalarType.getPrimitiveType().name()); + out.writeInt(scalarType.getScalarScale()); + out.writeInt(scalarType.getScalarPrecision()); + out.writeInt(scalarType.getLength()); + // Actually, varcharLimit need not to write here, write true to back compatible + out.writeBoolean(true); + } else if (type.isArrayType()) { + ArrayType arrayType = (ArrayType) type; + Text.writeString(out, arrayType.getPrimitiveType().name()); + write(out, arrayType.getItemType()); + } } public static Type read(DataInput in) throws IOException { PrimitiveType primitiveType = PrimitiveType.valueOf(Text.readString(in)); - int scale = in.readInt(); - int precision = in.readInt(); - int len = in.readInt(); - if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_22) { - // Useless, just for back compatible - in.readBoolean(); + if (primitiveType == PrimitiveType.ARRAY) { + Type itermType = read(in); + return ArrayType.create(itermType); + } else { + int scale = in.readInt(); + int precision = in.readInt(); + int len = in.readInt(); + if (Catalog.getCurrentCatalogJournalVersion() >= FeMetaVersion.VERSION_22) { + // Useless, just for back compatible + in.readBoolean(); + } + return ScalarType.createType(primitiveType, len, precision, scale); } - return ScalarType.createType(primitiveType, len, precision, scale); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java index fdc75e90ec..6931b19c03 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Function.java @@ -156,6 +156,10 @@ public class Function implements Writable { return retType; } + public void setReturnType(Type type) { + this.retType = type; + } + public Type[] getArgs() { return argTypes; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java index 71f2c4b679..d3494670df 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MapType.java @@ -30,7 +30,10 @@ import com.google.common.base.Strings; public class MapType extends Type { private final Type keyType; private final Type valueType; - + public MapType() { + this.keyType = NULL; + this.valueType = NULL; + } public MapType(Type keyType, Type valueType) { Preconditions.checkNotNull(keyType); Preconditions.checkNotNull(valueType); @@ -38,6 +41,11 @@ public class MapType extends Type { this.valueType = valueType; } + @Override + public PrimitiveType getPrimitiveType() { + return PrimitiveType.MAP; + } + public Type getKeyType() { return keyType; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MultiRowType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MultiRowType.java new file mode 100644 index 0000000000..59182b01ed --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MultiRowType.java @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.thrift.TTypeDesc; +import org.apache.doris.thrift.TTypeNode; +import org.apache.doris.thrift.TTypeNodeType; + +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; + +/** + * Describes a multi-row type in sub-query. + */ +public class MultiRowType extends Type { + private final Type itemType; + + public MultiRowType(Type itemType) { + this.itemType = itemType; + } + + public Type getItemType() { + return itemType; + } + + @Override + public String toSql(int depth) { + if (depth >= MAX_NESTING_DEPTH) { + return "ARRAY<...>"; + } + return String.format("ARRAY<%s>", itemType.toSql(depth + 1)); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof MultiRowType)) { + return false; + } + MultiRowType otherMultiRowType = (MultiRowType) other; + return otherMultiRowType.itemType.equals(itemType); + } + + @Override + public void toThrift(TTypeDesc container) { + TTypeNode node = new TTypeNode(); + container.types.add(node); + Preconditions.checkNotNull(itemType); + node.setType(TTypeNodeType.ARRAY); + itemType.toThrift(container); + } + + @Override + protected String prettyPrint(int lpad) { + String leftPadding = Strings.repeat(" ", lpad); + if (!itemType.isStructType()) { + return leftPadding + toSql(); + } + // Pass in the padding to make sure nested fields are aligned properly, + // even if we then strip the top-level padding. + String structStr = itemType.prettyPrint(lpad); + structStr = structStr.substring(lpad); + return String.format("%sARRAY<%s>", leftPadding, structStr); + } +} \ No newline at end of file diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java index 3357f61076..15082c4dc5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PrimitiveType.java @@ -54,6 +54,9 @@ public enum PrimitiveType { TIME("TIME", 8, TPrimitiveType.TIME), // we use OBJECT type represent BITMAP type in Backend BITMAP("BITMAP", 16, TPrimitiveType.OBJECT), + ARRAY("ARRAY", 24, TPrimitiveType.ARRAY), + MAP("MAP", 24, TPrimitiveType.MAP), + STRUCT("MAP", 24, TPrimitiveType.STRUCT), // Unsupported scalar types. BINARY("BINARY", -1, TPrimitiveType.BINARY); @@ -298,6 +301,8 @@ public enum PrimitiveType { supportedTypes.add(TIME); supportedTypes.add(DECIMALV2); supportedTypes.add(BITMAP); + supportedTypes.add(ARRAY); + supportedTypes.add(MAP); } public static ArrayList getIntegerTypes() { @@ -541,6 +546,12 @@ public enum PrimitiveType { return HLL; case OBJECT: return BITMAP; + case ARRAY: + return ARRAY; + case MAP: + return MAP; + case STRUCT: + return STRUCT; default: return INVALID_TYPE; } @@ -629,6 +640,10 @@ public enum PrimitiveType { return (this == DATE || this == DATETIME); } + public boolean isArrayType(){ + return this == ARRAY; + } + public boolean isStringType() { return (this == VARCHAR || this == CHAR || this == HLL); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java index d137361457..2e0c91f076 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/ScalarType.java @@ -304,6 +304,9 @@ public class ScalarType extends Type { case BITMAP: stringBuilder.append(type.toString().toLowerCase()); break; + case ARRAY: + stringBuilder.append(type.toString().toLowerCase()); + break; default: stringBuilder.append("unknown type: " + type.toString()); break; @@ -371,6 +374,8 @@ public class ScalarType extends Type { @Override public PrimitiveType getPrimitiveType() { return type; } public int ordinal() { return type.ordinal(); } + + @Override public int getLength() { return len; } public void setLength(int len) {this.len = len; } public boolean isAssignedStrLenInColDefinition() { return isAssignedStrLenInColDefinition; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java index b9f2ccbf2f..9105e6db16 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/StructType.java @@ -114,5 +114,9 @@ public class StructType extends Type { field.toThrift(container, node); } } + @Override + public String toString() { + return toSql(0); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index 7eaa872cee..541f838225 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -45,11 +45,11 @@ import java.util.List; public abstract class Type { private static final Logger LOG = LogManager.getLogger(Type.class); - // Maximum nesting depth of a type. This limit was determined experimentally by + // Maximum nesting depth of a type. This limit was determined experimentally byorg.apache.doris.rewrite.FoldConstantsRule.apply // generating and scanning deeply nested Parquet and Avro files. In those experiments, // we exceeded the stack space in the scanner (which uses recursion for dealing with // nested types) at a nesting depth between 200 and 300 (200 worked, 300 crashed). - public static int MAX_NESTING_DEPTH = 100; + public static int MAX_NESTING_DEPTH = 2; // Static constant types for scalar types that don't require additional information. public static final ScalarType INVALID = new ScalarType(PrimitiveType.INVALID_TYPE); @@ -69,12 +69,13 @@ public abstract class Type { ScalarType.createDecimalV2Type(ScalarType.DEFAULT_PRECISION, ScalarType.DEFAULT_SCALE); public static final ScalarType DECIMALV2 = DEFAULT_DECIMALV2; - // (ScalarType) ScalarType.createDecimalTypeInternal(-1, -1); + // (ScalarType) ScalarType.createDecimalTypeInternal(-1, -1); public static final ScalarType DEFAULT_VARCHAR = ScalarType.createVarcharType(-1); public static final ScalarType VARCHAR = ScalarType.createVarcharType(-1); public static final ScalarType HLL = ScalarType.createHllType(); public static final ScalarType CHAR = (ScalarType) ScalarType.createCharType(-1); public static final ScalarType BITMAP = new ScalarType(PrimitiveType.BITMAP); + public static final MapType Map = new MapType(); private static ArrayList integerTypes; private static ArrayList numericTypes; @@ -262,7 +263,7 @@ public abstract class Type { } public boolean isCollectionType() { - return isMapType() || isArrayType(); + return isMapType() || isArrayType() || isMultiRowType(); } public boolean isMapType() { @@ -273,6 +274,10 @@ public abstract class Type { return this instanceof ArrayType; } + public boolean isMultiRowType() { + return this instanceof MultiRowType; + } + public boolean isStructType() { return this instanceof StructType; } @@ -289,6 +294,8 @@ public abstract class Type { return true; } + public int getLength() { return -1; } + /** * Indicates whether we support partitioning tables on columns of this type. */ @@ -352,6 +359,16 @@ public abstract class Type { if (t1.isScalarType() && t2.isScalarType()) { return ScalarType.isImplicitlyCastable((ScalarType) t1, (ScalarType) t2, strict); } + if (t1.isComplexType() || t2.isComplexType()) { + if (t1.isArrayType() && t2.isArrayType()) { + return true; + } else if (t1.isMapType() && t2.isMapType()) { + return true; + } else if (t1.isStructType() && t2.isStructType()) { + return true; + } + return false; + } return false; } @@ -413,7 +430,7 @@ public abstract class Type { if (d >= MAX_NESTING_DEPTH) return true; if (isStructType()) { StructType structType = (StructType) this; - for (StructField f: structType.getFields()) { + for (StructField f : structType.getFields()) { if (f.getType().exceedsMaxNestingDepth(d + 1)) { return true; } @@ -423,6 +440,11 @@ public abstract class Type { if (arrayType.getItemType().exceedsMaxNestingDepth(d + 1)) { return true; } + } else if (isMultiRowType()) { + MultiRowType multiRowType = (MultiRowType) this; + if (multiRowType.getItemType().exceedsMaxNestingDepth(d + 1)) { + return true; + } } else if (isMapType()) { MapType mapType = (MapType) this; if (mapType.getValueType().exceedsMaxNestingDepth(d + 1)) { @@ -467,6 +489,12 @@ public abstract class Type { return Type.VARCHAR; case HLL: return Type.HLL; + case ARRAY: + return ArrayType.create(); + case MAP: + return new MapType(); + case STRUCT: + return new StructType(); case BITMAP: return Type.BITMAP; default: @@ -890,8 +918,12 @@ public abstract class Type { if (t1 == PrimitiveType.INVALID_TYPE || t2 == PrimitiveType.INVALID_TYPE) continue; if (t1 == PrimitiveType.NULL_TYPE || t2 == PrimitiveType.NULL_TYPE) continue; + if (t1 == PrimitiveType.ARRAY || t2 == PrimitiveType.ARRAY) continue; if (t1 == PrimitiveType.DECIMALV2 || t2 == PrimitiveType.DECIMALV2) continue; if (t1 == PrimitiveType.TIME || t2 == PrimitiveType.TIME) continue; + if (t1 == PrimitiveType.ARRAY || t2 == PrimitiveType.ARRAY) continue; + if (t1 == PrimitiveType.MAP || t2 == PrimitiveType.MAP) continue; + if (t1 == PrimitiveType.STRUCT || t2 == PrimitiveType.STRUCT) continue; Preconditions.checkNotNull(compatibilityMatrix[i][j]); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java index 95287e01e1..9128c548ad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java @@ -1235,6 +1235,11 @@ public class Config extends ConfigBase { @ConfField(mutable = true, masterOnly = true) public static int period_of_auto_resume_min = 5; + /* + * If set to true, Doris will support complex type + */ + @ConfField + public static boolean enable_complex_type_support = false; /** * If set to true, the backend will be automatically dropped after finishing decommission. * If set to false, the backend will not be dropped and remaining in DECOMMISSION state. diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java index 6551829b05..a6cd83fe04 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java @@ -72,6 +72,7 @@ public class Util { TYPE_STRING_MAP.put(PrimitiveType.HLL, "varchar(%d)"); TYPE_STRING_MAP.put(PrimitiveType.BOOLEAN, "bool"); TYPE_STRING_MAP.put(PrimitiveType.BITMAP, "bitmap"); + TYPE_STRING_MAP.put(PrimitiveType.ARRAY, "Array<%s>"); TYPE_STRING_MAP.put(PrimitiveType.NULL_TYPE, "null"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java index 5a2f9734b7..56e8ac8a8b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/gson/GsonUtils.java @@ -20,13 +20,16 @@ package org.apache.doris.persist.gson; import org.apache.doris.alter.AlterJobV2; import org.apache.doris.alter.RollupJobV2; import org.apache.doris.alter.SchemaChangeJobV2; +import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.DistributionInfo; import org.apache.doris.catalog.HashDistributionInfo; +import org.apache.doris.catalog.MapType; import org.apache.doris.catalog.OdbcCatalogResource; import org.apache.doris.catalog.RandomDistributionInfo; import org.apache.doris.catalog.Resource; import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.SparkResource; +import org.apache.doris.catalog.StructType; import org.apache.doris.load.loadv2.LoadJob.LoadJobStateUpdateInfo; import org.apache.doris.load.loadv2.SparkLoadJob.SparkLoadJobStateUpdateInfo; @@ -90,7 +93,10 @@ public class GsonUtils { private static RuntimeTypeAdapterFactory columnTypeAdapterFactory = RuntimeTypeAdapterFactory .of(org.apache.doris.catalog.Type.class, "clazz") // TODO: register other sub type after Doris support more types. - .registerSubtype(ScalarType.class, ScalarType.class.getSimpleName()); + .registerSubtype(ScalarType.class, ScalarType.class.getSimpleName()) + .registerSubtype(ArrayType.class, ArrayType.class.getSimpleName()) + .registerSubtype(MapType.class, MapType.class.getSimpleName()) + .registerSubtype(StructType.class, StructType.class.getSimpleName()); // runtime adapter for class "DistributionInfo" private static RuntimeTypeAdapterFactory distributionInfoTypeAdapterFactory = RuntimeTypeAdapterFactory diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctionList.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctionList.java new file mode 100644 index 0000000000..d6119d337b --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctionList.java @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.rewrite; + +import java.lang.annotation.ElementType; +import java.lang.annotation.Retention; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.Target; + +@Retention(RetentionPolicy.RUNTIME) +@Target(ElementType.METHOD) +public @interface FEFunctionList { + FEFunction[] value(); +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java index b766da49ba..d67c55c183 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java @@ -17,6 +17,10 @@ package org.apache.doris.rewrite; +import java.math.BigDecimal; +import java.math.BigInteger; + +import org.apache.doris.analysis.ArrayLiteral; import org.apache.doris.analysis.DateLiteral; import org.apache.doris.analysis.DecimalLiteral; import org.apache.doris.analysis.FloatLiteral; @@ -347,14 +351,6 @@ public class FEFunctions { return new FloatLiteral(result, Type.DOUBLE); } - @FEFunction(name = "add", argTypes = { "DECIMAL", "DECIMAL" }, returnType = "DECIMAL") - public static DecimalLiteral addDecimal(LiteralExpr first, LiteralExpr second) throws AnalysisException { - BigDecimal left = new BigDecimal(first.getStringValue()); - BigDecimal right = new BigDecimal(second.getStringValue()); - BigDecimal result = left.add(right); - return new DecimalLiteral(result); - } - @FEFunction(name = "add", argTypes = { "DECIMALV2", "DECIMALV2" }, returnType = "DECIMALV2") public static DecimalLiteral addDecimalV2(LiteralExpr first, LiteralExpr second) throws AnalysisException { BigDecimal left = new BigDecimal(first.getStringValue()); @@ -384,14 +380,6 @@ public class FEFunctions { return new FloatLiteral(result, Type.DOUBLE); } - @FEFunction(name = "subtract", argTypes = { "DECIMAL", "DECIMAL" }, returnType = "DECIMAL") - public static DecimalLiteral subtractDecimal(LiteralExpr first, LiteralExpr second) throws AnalysisException { - BigDecimal left = new BigDecimal(first.getStringValue()); - BigDecimal right = new BigDecimal(second.getStringValue()); - BigDecimal result = left.subtract(right); - return new DecimalLiteral(result); - } - @FEFunction(name = "subtract", argTypes = { "DECIMALV2", "DECIMALV2" }, returnType = "DECIMALV2") public static DecimalLiteral subtractDecimalV2(LiteralExpr first, LiteralExpr second) throws AnalysisException { BigDecimal left = new BigDecimal(first.getStringValue()); @@ -423,14 +411,6 @@ public class FEFunctions { return new FloatLiteral(result, Type.DOUBLE); } - @FEFunction(name = "multiply", argTypes = { "DECIMAL", "DECIMAL" }, returnType = "DECIMAL") - public static DecimalLiteral multiplyDecimal(LiteralExpr first, LiteralExpr second) throws AnalysisException { - BigDecimal left = new BigDecimal(first.getStringValue()); - BigDecimal right = new BigDecimal(second.getStringValue()); - BigDecimal result = left.multiply(right); - return new DecimalLiteral(result); - } - @FEFunction(name = "multiply", argTypes = { "DECIMALV2", "DECIMALV2" }, returnType = "DECIMALV2") public static DecimalLiteral multiplyDecimalV2(LiteralExpr first, LiteralExpr second) throws AnalysisException { BigDecimal left = new BigDecimal(first.getStringValue()); @@ -457,17 +437,6 @@ public class FEFunctions { return new FloatLiteral(result, Type.DOUBLE); } - @FEFunction(name = "divide", argTypes = { "DECIMAL", "DECIMAL" }, returnType = "DECIMAL") - public static DecimalLiteral divideDecimal(LiteralExpr first, LiteralExpr second) throws AnalysisException { - BigDecimal left = new BigDecimal(first.getStringValue()); - BigDecimal right = new BigDecimal(second.getStringValue()); - if (right.compareTo(BigDecimal.ZERO) == 0) { - return null; - } - BigDecimal result = left.divide(right); - return new DecimalLiteral(result); - } - @FEFunction(name = "divide", argTypes = { "DECIMALV2", "DECIMALV2" }, returnType = "DECIMALV2") public static DecimalLiteral divideDecimalV2(LiteralExpr first, LiteralExpr second) throws AnalysisException { BigDecimal left = new BigDecimal(first.getStringValue()); @@ -500,39 +469,25 @@ public class FEFunctions { return new StringLiteral(resultBuilder.toString()); } - @FEFunction(name = "ifnull", argTypes = {"VARCHAR", "VARCHAR"}, returnType = "VARCHAR") - public static LiteralExpr ifNullString(LiteralExpr first, LiteralExpr second) throws AnalysisException { + @FEFunctionList({ + @FEFunction(name = "ifnull", argTypes = {"VARCHAR", "VARCHAR"}, returnType = "VARCHAR"), + @FEFunction(name = "ifnull", argTypes = {"TINYINT", "TINYINT"}, returnType = "TINYINT"), + @FEFunction(name = "ifnull", argTypes = {"INT", "INT"}, returnType = "INT"), + @FEFunction(name = "ifnull", argTypes = {"BIGINT", "BIGINT"}, returnType = "BIGINT"), + @FEFunction(name = "ifnull", argTypes = {"DATETIME", "DATETIME"}, returnType = "DATETIME"), + @FEFunction(name = "ifnull", argTypes = { "DATE", "DATETIME" }, returnType = "DATETIME"), + @FEFunction(name = "ifnull", argTypes = { "DATETIME", "DATE" }, returnType = "DATETIME") + }) + public static LiteralExpr ifNull(LiteralExpr first, LiteralExpr second) throws AnalysisException { return first instanceof NullLiteral ? second : first; } - @FEFunction(name = "ifnull", argTypes = {"TINYINT", "TINYINT"}, returnType = "TINYINT") - public static LiteralExpr ifNullTinyInt(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; - } - - @FEFunction(name = "ifnull", argTypes = {"INT", "INT"}, returnType = "INT") - public static LiteralExpr ifNullInt(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; - } - - @FEFunction(name = "ifnull", argTypes = {"BIGINT", "BIGINT"}, returnType = "BIGINT") - public static LiteralExpr ifNullBigInt(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; - } - - @FEFunction(name = "ifnull", argTypes = { "DATETIME", "DATETIME" }, returnType = "DATETIME") - public static LiteralExpr ifNullDateTime(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; - } - - @FEFunction(name = "ifnull", argTypes = { "DATE", "DATETIME" }, returnType = "DATETIME") - public static LiteralExpr ifNullDateDatetime(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; - } - - @FEFunction(name = "ifnull", argTypes = { "DATETIME", "DATE" }, returnType = "DATETIME") - public static LiteralExpr ifNullDatetimeDate(LiteralExpr first, LiteralExpr second) throws AnalysisException { - return first instanceof NullLiteral ? second : first; + @FEFunctionList({ + @FEFunction(name = "array", argTypes = {"INT"}, returnType = "ARRAY"), + @FEFunction(name = "array", argTypes = {"VARCHAR"}, returnType = "ARRAY") + }) + public static ArrayLiteral array(LiteralExpr... exprs) throws AnalysisException { + return new ArrayLiteral(exprs); } } diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex index 65c7789f04..53c6c703c2 100644 --- a/fe/fe-core/src/main/jflex/sql_scanner.flex +++ b/fe/fe-core/src/main/jflex/sql_scanner.flex @@ -100,6 +100,7 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("as", new Integer(SqlParserSymbols.KW_AS)); keywordMap.put("asc", new Integer(SqlParserSymbols.KW_ASC)); keywordMap.put("authors", new Integer(SqlParserSymbols.KW_AUTHORS)); + keywordMap.put("array", new Integer(SqlParserSymbols.KW_ARRAY)); keywordMap.put("backend", new Integer(SqlParserSymbols.KW_BACKEND)); keywordMap.put("backends", new Integer(SqlParserSymbols.KW_BACKENDS)); keywordMap.put("backup", new Integer(SqlParserSymbols.KW_BACKUP)); @@ -249,6 +250,7 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("load", new Integer(SqlParserSymbols.KW_LOAD)); keywordMap.put("local", new Integer(SqlParserSymbols.KW_LOCAL)); keywordMap.put("location", new Integer(SqlParserSymbols.KW_LOCATION)); + keywordMap.put("map", new Integer(SqlParserSymbols.KW_MAP)); keywordMap.put("materialized", new Integer(SqlParserSymbols.KW_MATERIALIZED)); keywordMap.put("max", new Integer(SqlParserSymbols.KW_MAX)); keywordMap.put("maxvalue", new Integer(SqlParserSymbols.KW_MAX_VALUE)); @@ -346,6 +348,7 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("storage", new Integer(SqlParserSymbols.KW_STORAGE)); keywordMap.put("stream", new Integer(SqlParserSymbols.KW_STREAM)); keywordMap.put("string", new Integer(SqlParserSymbols.KW_STRING)); + keywordMap.put("struct", new Integer(SqlParserSymbols.KW_STRUCT)); keywordMap.put("sum", new Integer(SqlParserSymbols.KW_SUM)); keywordMap.put("superuser", new Integer(SqlParserSymbols.KW_SUPERUSER)); keywordMap.put("sync", new Integer(SqlParserSymbols.KW_SYNC)); diff --git a/fe/fe-core/src/test/java/org/apache/doris/rewrite/FEFunctionsTest.java b/fe/fe-core/src/test/java/org/apache/doris/rewrite/FEFunctionsTest.java index 115ec23c0f..af93c5ceda 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/rewrite/FEFunctionsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/rewrite/FEFunctionsTest.java @@ -304,17 +304,6 @@ public class FEFunctionsTest { Assert.assertEquals(expectedResult, actualResult); } - @Test - public void addDecimalTest() throws AnalysisException { - DecimalLiteral actualResult = FEFunctions.addDecimal(new DecimalLiteral("2.2"), new DecimalLiteral("3.3")); - DecimalLiteral expectedResult = new DecimalLiteral("5.5"); - Assert.assertEquals(expectedResult, actualResult); - - actualResult = FEFunctions.addDecimal(new DecimalLiteral("-2.2"), new DecimalLiteral("3.3")); - expectedResult = new DecimalLiteral("1.1"); - Assert.assertEquals(expectedResult, actualResult); - } - @Test public void addDecimalV2Test() throws AnalysisException { DecimalLiteral actualResult = FEFunctions.addDecimalV2(new DecimalLiteral("2.2"), new DecimalLiteral("3.3")); @@ -359,17 +348,6 @@ public class FEFunctionsTest { Assert.assertEquals(expectedResult, actualResult); } - @Test - public void subtractDecimalTest() throws AnalysisException { - DecimalLiteral actualResult = FEFunctions.subtractDecimal(new DecimalLiteral("2.2"), new DecimalLiteral("3.3")); - DecimalLiteral expectedResult = new DecimalLiteral("-1.1"); - Assert.assertEquals(expectedResult, actualResult); - - actualResult = FEFunctions.subtractDecimal(new DecimalLiteral("5.5"), new DecimalLiteral("3.3")); - expectedResult = new DecimalLiteral("2.2"); - Assert.assertEquals(expectedResult, actualResult); - } - @Test public void subtractDecimalV2Test() throws AnalysisException { DecimalLiteral actualResult = FEFunctions.subtractDecimalV2(new DecimalLiteral("2.2"), new DecimalLiteral("3.3")); @@ -422,22 +400,6 @@ public class FEFunctionsTest { Assert.assertEquals(expectedResult, actualResult); } - @Test - public void multiplyDecimalTest() throws AnalysisException { - DecimalLiteral actualResult = FEFunctions.multiplyDecimal(new DecimalLiteral("1.1"), new DecimalLiteral("1.0")); - DecimalLiteral expectedResult = new DecimalLiteral("1.1"); - Assert.assertEquals(expectedResult, actualResult); - - actualResult = FEFunctions.multiplyDecimal(new DecimalLiteral("-1.1"), new DecimalLiteral("-10.0")); - expectedResult = new DecimalLiteral("11.0"); - Assert.assertEquals(expectedResult, actualResult); - - actualResult = FEFunctions.multiplyDecimal(new DecimalLiteral("-1.1"), new DecimalLiteral("-1.1")); - expectedResult = new DecimalLiteral("1.21"); - Assert.assertEquals(expectedResult, actualResult); - } - - @Test public void multiplyDecimalV2Test() throws AnalysisException { DecimalLiteral actualResult = FEFunctions.multiplyDecimalV2(new DecimalLiteral("1.1"), new DecimalLiteral("1.0")); @@ -479,17 +441,6 @@ public class FEFunctionsTest { Assert.assertEquals(expectedResult, actualResult); } - @Test - public void divideDecimalTest() throws AnalysisException { - DecimalLiteral actualResult = FEFunctions.divideDecimal(new DecimalLiteral("1.1"), new DecimalLiteral("1.0")); - DecimalLiteral expectedResult = new DecimalLiteral("1.1"); - Assert.assertEquals(expectedResult, actualResult); - - actualResult = FEFunctions.divideDecimal(new DecimalLiteral("-1.1"), new DecimalLiteral("-10.0")); - expectedResult = new DecimalLiteral("0.11"); - Assert.assertEquals(expectedResult, actualResult); - } - @Test public void divideDecimalV2Test() throws AnalysisException { DecimalLiteral actualResult = FEFunctions.divideDecimalV2(new DecimalLiteral("1.1"), new DecimalLiteral("1.0")); diff --git a/fe/pom.xml b/fe/pom.xml index 2a8f9ea72b..0665f6542c 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -101,7 +101,7 @@ under the License. oracleReleases - http://download.oracle.com/maven + https://download.oracle.com/maven diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 48b28acdeb..622db02fb3 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -259,6 +259,7 @@ message ColumnPB { optional bool has_bitmap_index = 15 [default=false]; // ColumnMessage.has_bitmap_index optional bool visible = 16 [default=true]; repeated ColumnPB children_columns = 17; + repeated string children_column_names = 18; } message TabletSchemaPB { diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto index 5696285fa7..cc767f8058 100644 --- a/gensrc/proto/segment_v2.proto +++ b/gensrc/proto/segment_v2.proto @@ -156,8 +156,10 @@ message ColumnMetaPB { repeated ColumnMetaPB children_columns = 10; - // required by array/struct/map reader to create child reader. + // required by array/struct/map reader to create child reader. optional uint64 num_rows = 11; + repeated string children_column_names = 12; + } message SegmentFooterPB { diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 88d84b9ed9..7979224b50 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -99,6 +99,21 @@ visible_functions = [ '_ZN5doris9Operators20bitnot_large_int_valEPN9doris_udf' '15FunctionContextERKNS1_11LargeIntValE'], + # array functions + [['array'], 'ARRAY', ['INT', '...'], + '_ZN5doris14ArrayFunctions5arrayEPN9doris_udf15FunctionContextEiPKNS1_6IntValE'], + [['array'], 'ARRAY', ['VARCHAR', '...'], + '_ZN5doris14ArrayFunctions5arrayEPN9doris_udf15FunctionContextEiPKNS1_9StringValE'], + [['array'], 'ARRAY', ['ARRAY', '...'], '', ''], + [['array'], 'ARRAY', ['MAP', '...'], '', ''], + [['array'], 'ARRAY', ['STRUCT', '...'], '', ''], + [['%element_extract%'], 'VARCHAR', ['ARRAY', 'INT'], '', ''], + [['%element_extract%'], 'VARCHAR', ['ARRAY', 'VARCHAR'], '', ''], + [['%element_extract%'], 'VARCHAR', ['MAP', 'VARCHAR'], '', ''], + [['%element_extract%'], 'VARCHAR', ['MAP', 'INT'], '', ''], + [['%element_extract%'], 'VARCHAR', ['STRUCT', 'INT'], '', ''], + [['%element_extract%'], 'VARCHAR', ['STRUCT', 'VARCHAR'], '', ''], + # Timestamp functions [['unix_timestamp'], 'INT', [], '_ZN5doris18TimestampFunctions7to_unixEPN9doris_udf15FunctionContextE'], @@ -888,7 +903,8 @@ non_null_result_with_null_param_functions = [ 'ifnull', 'nullif', 'null_or_empty', - 'coalesce' + 'coalesce', + 'array' ] # Nondeterministic functions may return different results each time they are called diff --git a/gensrc/script/gen_functions.py b/gensrc/script/gen_functions.py index d03090f1a9..5173ad94b3 100755 --- a/gensrc/script/gen_functions.py +++ b/gensrc/script/gen_functions.py @@ -388,21 +388,20 @@ types = { 'STRING': ['VARCHAR'], 'DATE': ['DATE'], 'DATETIME': ['DATETIME'], - 'DECIMAL': ['DECIMAL'], 'DECIMALV2': ['DECIMALV2'], 'NATIVE_INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT'], 'INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'], 'FLOAT_TYPES': ['FLOAT', 'DOUBLE'], 'NUMERIC_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE', \ - 'LARGEINT', 'DECIMAL', 'DECIMALV2'], + 'LARGEINT', 'DECIMALV2'], 'STRING_TYPES': ['VARCHAR'], 'DATETIME_TYPES': ['DATE', 'DATETIME'], 'FIXED_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'], 'NATIVE_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'], 'STRCAST_FIXED_TYPES': ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT'], 'ALL_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT', 'FLOAT',\ - 'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMAL', 'DECIMALV2'], - 'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMAL', 'DECIMALV2'], + 'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMALV2'], + 'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMALV2'], } # Operation, [ReturnType], [[Args1], [Args2], ... [ArgsN]] @@ -414,7 +413,6 @@ functions = [ ['Divide', ['MAX_TYPES'], [['MAX_TYPES'], ['MAX_TYPES']]], ['Int_Divide', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]], ['Mod', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]], - ['Mod', ['DECIMAL'], [['DECIMAL'], ['DECIMAL']]], ['Mod', ['DECIMALV2'], [['DECIMALV2'], ['DECIMALV2']]], ['Mod', ['DOUBLE'], [['DOUBLE'], ['DOUBLE']], double_mod], ['BitAnd', ['INT_TYPES'], [['INT_TYPES'], ['INT_TYPES']]], @@ -447,12 +445,6 @@ functions = [ ['Lt', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], ['Ge', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], ['Le', ['BOOLEAN'], [['DATETIME'], ['DATETIME']],], - ['Eq', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], - ['Ne', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], - ['Gt', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], - ['Lt', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], - ['Ge', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], - ['Le', ['BOOLEAN'], [['DECIMAL'], ['DECIMAL']],], ['Eq', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], ['Ne', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], ['Gt', ['BOOLEAN'], [['DECIMALV2'], ['DECIMALV2']],], @@ -467,18 +459,13 @@ functions = [ ['Cast', ['INT'], [['NATIVE_TYPES'], ['INT']]], ['Cast', ['BIGINT'], [['NATIVE_TYPES'], ['BIGINT']]], ['Cast', ['LARGEINT'], [['NATIVE_TYPES'], ['LARGEINT']]], - ['Cast', ['LARGEINT'], [['DECIMAL'], ['LARGEINT']]], ['Cast', ['LARGEINT'], [['DECIMALV2'], ['LARGEINT']]], ['Cast', ['NATIVE_TYPES'], [['LARGEINT'], ['NATIVE_TYPES']]], ['Cast', ['FLOAT'], [['NATIVE_TYPES'], ['FLOAT']]], ['Cast', ['DOUBLE'], [['NATIVE_TYPES'], ['DOUBLE']]], - ['Cast', ['DECIMAL'], [['FIXED_TYPES'], ['DECIMAL']]], ['Cast', ['DECIMALV2'], [['FIXED_TYPES'], ['DECIMALV2']]], - ['Cast', ['DECIMAL'], [['FLOAT'], ['DECIMAL']], float_to_decimal], ['Cast', ['DECIMALV2'], [['FLOAT'], ['DECIMALV2']], float_to_decimal], - ['Cast', ['DECIMAL'], [['DOUBLE'], ['DECIMAL']], double_to_decimal], ['Cast', ['DECIMALV2'], [['DOUBLE'], ['DECIMALV2']], double_to_decimal], - ['Cast', ['NATIVE_TYPES'], [['DECIMAL'], ['NATIVE_TYPES']]], ['Cast', ['NATIVE_TYPES'], [['DECIMALV2'], ['NATIVE_TYPES']]], ['Cast', ['NATIVE_INT_TYPES'], [['STRING'], ['NATIVE_INT_TYPES']], string_to_int], ['Cast', ['LARGEINT'], [['STRING'], ['LARGEINT']], string_to_int], @@ -488,7 +475,6 @@ functions = [ ['Cast', ['STRING'], [['FLOAT'], ['STRING']], float_to_string], ['Cast', ['STRING'], [['DOUBLE'], ['STRING']], double_to_string], ['Cast', ['STRING'], [['TINYINT'], ['STRING']], tinyint_to_string], - ['Cast', ['STRING'], [['DECIMAL'], ['STRING']], decimal_to_string], ['Cast', ['STRING'], [['DECIMALV2'], ['STRING']], decimal_to_string], # Datetime cast ['Cast', ['DATE'], [['NUMERIC_TYPES'], ['DATE']], numeric_to_date], @@ -524,7 +510,6 @@ native_types = { 'DATE': 'Date', 'DATETIME': 'DateTime', 'TIME': 'double', - 'DECIMAL': 'DecimalValue', 'DECIMALV2': 'DecimalV2Value', } @@ -542,7 +527,6 @@ implemented_types = { 'DATE': 'DateTimeValue', 'DATETIME': 'DateTimeValue', 'TIME': 'double', - 'DECIMAL': 'DecimalValue', 'DECIMALV2': 'DecimalV2Value', } result_fields = { @@ -558,7 +542,6 @@ result_fields = { 'DATE': 'datetime_val', 'DATETIME': 'datetime_val', 'TIME': 'double_val', - 'DECIMAL': 'decimal_val', 'DECIMALV2': 'decimalv2_val', } diff --git a/gensrc/script/gen_vector_functions.py b/gensrc/script/gen_vector_functions.py index 40c7482533..0f3231db1d 100755 --- a/gensrc/script/gen_vector_functions.py +++ b/gensrc/script/gen_vector_functions.py @@ -287,7 +287,6 @@ types = { 'STRING': ['VARCHAR'], 'DATE': ['DATE'], 'DATETIME': ['DATETIME'], - 'DECIMAL': ['DECIMAL'], 'DECIMALV2': ['DECIMALV2'], 'NATIVE_INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT'], 'INT_TYPES': ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT'], @@ -296,8 +295,8 @@ types = { 'NATIVE_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'], 'STRCAST_TYPES': ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'], 'ALL_TYPES': ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'LARGEINT', 'FLOAT',\ - 'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMAL', 'DECIMALV2'], - 'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMAL', 'DECIMALV2'], + 'DOUBLE', 'VARCHAR', 'DATETIME', 'DECIMALV2'], + 'MAX_TYPES': ['BIGINT', 'LARGEINT', 'DOUBLE', 'DECIMALV2'], } # Operation, [ReturnType], [[Args1], [Args2], ... [ArgsN]] @@ -326,7 +325,6 @@ native_types = { 'VARCHAR': 'StringValue', 'DATE': 'DateTimeValue', 'DATETIME': 'DateTimeValue', - 'DECIMAL': 'DecimalValue', 'DECIMALV2': 'DecimalV2Value', } @@ -343,7 +341,6 @@ implemented_types = { 'VARCHAR': 'StringValue', 'DATE': 'DateTimeValue', 'DATETIME': 'DateTimeValue', - 'DECIMAL': 'DecimalValue', 'DECIMALV2': 'DecimalV2Value', } diff --git a/gensrc/thrift/AgentService.thrift b/gensrc/thrift/AgentService.thrift index 10ee99533a..a18d6fd21e 100644 --- a/gensrc/thrift/AgentService.thrift +++ b/gensrc/thrift/AgentService.thrift @@ -35,6 +35,7 @@ struct TColumn { 7: optional bool is_bloom_filter_column 8: optional Exprs.TExpr define_expr 9: optional bool visible = true + 10: optional list children_column } struct TTabletSchema { diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift index e91573a58a..584706ae7d 100644 --- a/gensrc/thrift/Exprs.thrift +++ b/gensrc/thrift/Exprs.thrift @@ -43,6 +43,7 @@ enum TExprNodeType { TUPLE_IS_NULL_PRED, INFO_FUNC, FUNCTION_CALL, + ARRAY_LITERAL, // TODO: old style compute functions. this will be deprecated COMPUTE_FUNCTION_CALL, diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index ee2dc87ee7..a526ec33a2 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -74,7 +74,10 @@ enum TPrimitiveType { HLL, DECIMALV2, TIME, - OBJECT + OBJECT, + ARRAY, + MAP, + STRUCT } enum TTypeNodeType {