// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "runtime/collection_value.h" #include #include "common/object_pool.h" #include "common/utils.h" #include "runtime/mem_pool.h" #include "runtime/raw_value.h" #include "runtime/string_value.h" #include "runtime/types.h" #include "util/mem_util.hpp" namespace doris { template struct CollectionValueSubTypeTrait; template <> struct CollectionValueSubTypeTrait { using CppType = int8_t; // slot size : 1 }; template <> struct CollectionValueSubTypeTrait { using CppType = bool; using AnyValType = BooleanVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = int8_t; using AnyValType = TinyIntVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = int16_t; using AnyValType = SmallIntVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = int32_t; using AnyValType = IntVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = int64_t; using AnyValType = BigIntVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = __int128_t; using AnyValType = LargeIntVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = float; using AnyValType = FloatVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = double; using AnyValType = DoubleVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = StringValue; using AnyValType = StringVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = StringValue; using AnyValType = StringVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = StringValue; using AnyValType = StringVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = uint24_t; using AnyValType = DateTimeVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = uint64_t; using AnyValType = DateTimeVal; }; template <> struct CollectionValueSubTypeTrait { using CppType = decimal12_t; using AnyValType = DecimalV2Val; }; template <> struct CollectionValueSubTypeTrait { using CppType = CollectionValue; using AnyValType = CollectionVal; }; struct ArrayIteratorFunctionsBase {}; template struct GenericArrayIteratorFunctions : public ArrayIteratorFunctionsBase { using CppType = typename CollectionValueSubTypeTrait::CppType; using AnyValType = typename CollectionValueSubTypeTrait::AnyValType; constexpr static int get_type_size() { return sizeof(CppType); } static void shallow_set(void* item, const AnyVal* value) { *static_cast(item) = static_cast(value)->val; } static void shallow_get(AnyVal* value, const void* item) { static_cast(value)->val = *static_cast(item); } static void self_deep_copy(void* item, const TypeDescriptor& type_desc, const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) {} static void deserialize(void* item, const char* tuple_data, const TypeDescriptor& type_desc) {} static size_t get_byte_size(const void* item, const TypeDescriptor& type_desc) { return 0; } static void raw_value_write(void* item, const void* value, const TypeDescriptor& type_desc, MemPool* pool) { RawValue::write(value, item, type_desc, pool); } }; template struct ArrayIteratorFunctions : public GenericArrayIteratorFunctions {}; template struct ArrayIteratorFunctionsForString : public GenericArrayIteratorFunctions { using CppType = StringValue; using AnyValType = StringVal; static void shallow_set(void* item, const AnyVal* value) { const auto* src = static_cast(value); auto* dst = static_cast(item); dst->ptr = convert_to(src->ptr); dst->len = src->len; } static void shallow_get(AnyVal* value, const void* item) { const auto* src = static_cast(item); auto* dst = static_cast(value); dst->ptr = convert_to(src->ptr); dst->len = src->len; } static void self_deep_copy(void* item, const TypeDescriptor&, const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) { auto* string = static_cast(item); if (!string->len) { return; } MemFootprint footprint = gen_mem_footprint(string->len); int64_t offset = footprint.first; auto* copied_string = reinterpret_cast(footprint.second); memory_copy(copied_string, string->ptr, string->len); string->ptr = (convert_ptrs ? convert_to(offset) : copied_string); } static void deserialize(void* item, const char* tuple_data, const TypeDescriptor& type_desc) { DCHECK((item != nullptr) && (tuple_data != nullptr)) << "item or tuple_data is nullptr"; auto* string_value = static_cast(item); if (string_value->len) { int64_t offset = convert_to(string_value->ptr); string_value->ptr = convert_to(tuple_data + offset); } } static size_t get_byte_size(const void* item, const TypeDescriptor&) { return static_cast(item)->len; } }; template <> struct ArrayIteratorFunctions : public ArrayIteratorFunctionsForString {}; template <> struct ArrayIteratorFunctions : public ArrayIteratorFunctionsForString { }; template <> struct ArrayIteratorFunctions : public ArrayIteratorFunctionsForString {}; template <> struct ArrayIteratorFunctions : public GenericArrayIteratorFunctions { using GenericArrayIteratorFunctions::CppType; using GenericArrayIteratorFunctions::AnyValType; static void shallow_set(void* item, const AnyVal* value) { const auto* src = static_cast(value); auto* dst = static_cast(item); *dst = DateTimeValue::from_datetime_val(*src).to_olap_date(); } static void shallow_get(AnyVal* value, const void* item) { const auto* src = static_cast(item); auto* dst = static_cast(value); DateTimeValue data; data.from_olap_date(uint32_t(*src)); data.to_datetime_val(dst); } static void raw_value_write(void* item, const void* value, const TypeDescriptor& type_desc, MemPool* pool) { DateTimeVal date_time_val; shallow_get(&date_time_val, value); shallow_set(item, &date_time_val); } }; template <> struct ArrayIteratorFunctions : public GenericArrayIteratorFunctions { using GenericArrayIteratorFunctions::CppType; using GenericArrayIteratorFunctions::AnyValType; static void shallow_set(void* item, const AnyVal* value) { const auto* src = static_cast(value); auto* dst = static_cast(item); *dst = DateTimeValue::from_datetime_val(*src).to_olap_datetime(); } static void shallow_get(AnyVal* value, const void* item) { const auto* src = static_cast(item); auto* dst = static_cast(value); DateTimeValue data; data.from_olap_datetime(*src); data.to_datetime_val(dst); } static void raw_value_write(void* item, const void* value, const TypeDescriptor& type_desc, MemPool* pool) { DateTimeVal date_time_val; shallow_get(&date_time_val, value); shallow_set(item, &date_time_val); } }; template <> struct ArrayIteratorFunctions : public GenericArrayIteratorFunctions { using GenericArrayIteratorFunctions::CppType; using GenericArrayIteratorFunctions::AnyValType; static void shallow_set(void* item, const AnyVal* value) { const auto* src = static_cast(value); auto* dst = static_cast(item); auto decimal_value = DecimalV2Value::from_decimal_val(*src); dst->integer = decimal_value.int_value(); dst->fraction = decimal_value.frac_value(); } static void shallow_get(AnyVal* value, const void* item) { const auto* src = static_cast(item); auto* dst = static_cast(value); DecimalV2Value(src->integer, src->fraction).to_decimal_val(dst); } static void raw_value_write(void* item, const void* value, const TypeDescriptor& type_desc, MemPool* pool) { DecimalV2Val decimal_val; shallow_get(&decimal_val, value); shallow_set(item, &decimal_val); } }; template <> struct ArrayIteratorFunctions : public GenericArrayIteratorFunctions { using GenericArrayIteratorFunctions::CppType; using GenericArrayIteratorFunctions::AnyValType; static void shallow_set(void* item, const AnyVal* value) { *static_cast(item) = CppType::from_collection_val(*static_cast(value)); } static void shallow_get(AnyVal* value, const void* item) { static_cast(item)->to_collection_val(static_cast(value)); } static void self_deep_copy(void* item, const TypeDescriptor& type_desc, const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) { auto* collection_value = static_cast(item); CollectionValue::deep_copy_collection(collection_value, type_desc.children[0], gen_mem_footprint, convert_ptrs); } static void deserialize(void* item, const char* tuple_data, const TypeDescriptor& type_desc) { CollectionValue::deserialize_collection(static_cast(item), tuple_data, type_desc.children[0]); } static size_t get_byte_size(const void* item, const TypeDescriptor& type_desc) { const auto* collection_value = static_cast(item); return collection_value->get_byte_size(type_desc.children[0]); } }; ArrayIterator CollectionValue::iterator(PrimitiveType child_type) { return internal_iterator(child_type); } ArrayIterator CollectionValue::internal_iterator(PrimitiveType child_type) const { switch (child_type) { case TYPE_BOOLEAN: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_TINYINT: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_SMALLINT: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_INT: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_BIGINT: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_LARGEINT: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_FLOAT: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_DOUBLE: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_CHAR: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_VARCHAR: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_STRING: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_DATE: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_DATETIME: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_ARRAY: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); case TYPE_DECIMALV2: return ArrayIterator(const_cast(this), static_cast*>(nullptr)); default: DCHECK(false) << "Invalid child type: " << child_type; __builtin_unreachable(); } } const ArrayIterator CollectionValue::iterator(PrimitiveType child_type) const { return internal_iterator(child_type); } Status type_check(PrimitiveType type) { switch (type) { case TYPE_NULL: case TYPE_BOOLEAN: case TYPE_TINYINT: case TYPE_SMALLINT: case TYPE_INT: case TYPE_BIGINT: case TYPE_LARGEINT: case TYPE_FLOAT: case TYPE_DOUBLE: case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_STRING: case TYPE_DATE: case TYPE_DATETIME: case TYPE_DECIMALV2: case TYPE_ARRAY: break; default: return Status::InvalidArgument("Type not implemented: {}", type); } return Status::OK(); } int sizeof_type(PrimitiveType type) { if (type_check(type).ok()) { return CollectionValue().iterator(type).type_size(); } else { DCHECK(false) << "Type not implemented: " << type; return 0; } } void CollectionValue::to_collection_val(CollectionVal* val) const { val->length = _length; val->data = _data; val->null_signs = _null_signs; val->has_null = _has_null; } void CollectionValue::shallow_copy(const CollectionValue* value) { _length = value->_length; _null_signs = value->_null_signs; _data = value->_data; _has_null = value->_has_null; } void CollectionValue::copy_null_signs(const CollectionValue* other) { if (other->_has_null) { memcpy(_null_signs, other->_null_signs, other->size()); } else { _null_signs = nullptr; } } size_t CollectionValue::get_byte_size(const TypeDescriptor& item_type) const { size_t result = 0; if (_length == 0) { return result; } if (_has_null) { result += _length * sizeof(bool); } auto iterator = CollectionValue::iterator(item_type.type); result += _length * iterator.type_size(); while (!iterator.is_type_fixed_width() && iterator.has_next()) { result += iterator.get_byte_size(item_type); iterator.next(); } return result; } Status CollectionValue::init_collection(ObjectPool* pool, uint64_t size, PrimitiveType child_type, CollectionValue* value) { return init_collection( value, [pool](size_t size) -> uint8_t* { return pool->add_array(new uint8_t[size]); }, size, child_type); } Status CollectionValue::init_collection(CollectionValue* value, const AllocateMemFunc& allocate, uint64_t size, PrimitiveType child_type) { if (value == nullptr) { return Status::InvalidArgument("collection value is null"); } RETURN_IF_ERROR(type_check(child_type)); if (size == 0) { new (value) CollectionValue(size); return Status::OK(); } value->_data = allocate(size * sizeof_type(child_type)); value->_length = size; value->_has_null = false; value->_null_signs = reinterpret_cast(allocate(size)); memset(value->_null_signs, 0, size * sizeof(bool)); return Status::OK(); } Status CollectionValue::init_collection(MemPool* pool, uint64_t size, PrimitiveType child_type, CollectionValue* value) { return init_collection( value, [pool](size_t size) { return pool->allocate_aligned(size, 16); }, size, child_type); } Status CollectionValue::init_collection(FunctionContext* context, uint64_t size, PrimitiveType child_type, CollectionValue* value) { return init_collection( value, [context](size_t size) { return context->aligned_allocate(16, size); }, size, child_type); } CollectionValue CollectionValue::from_collection_val(const CollectionVal& val) { return CollectionValue(val.data, val.length, val.has_null, val.null_signs); } // Deep copy collection. // NOTICE: The CollectionValue* shallow_copied_cv must be initialized by calling memcpy function first ( // copy data from origin collection value). void CollectionValue::deep_copy_collection(CollectionValue* shallow_copied_cv, const TypeDescriptor& item_type, const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) { CollectionValue* cv = shallow_copied_cv; if (cv->length() == 0) { return; } auto iterator = cv->iterator(item_type.type); uint64_t coll_byte_size = cv->length() * iterator.type_size(); uint64_t nulls_size = cv->has_null() ? cv->length() * sizeof(bool) : 0; MemFootprint footprint = gen_mem_footprint(coll_byte_size + nulls_size); int64_t offset = footprint.first; char* coll_data = reinterpret_cast(footprint.second); // copy and assign null_signs if (cv->has_null()) { memory_copy(convert_to(coll_data), cv->null_signs(), nulls_size); cv->set_null_signs(convert_to(coll_data)); } else { cv->set_null_signs(nullptr); } // copy and assgin data memory_copy(coll_data + nulls_size, cv->data(), coll_byte_size); cv->set_data(coll_data + nulls_size); while (!iterator.is_type_fixed_width() && iterator.has_next()) { iterator.self_deep_copy(item_type, gen_mem_footprint, convert_ptrs); iterator.next(); } if (convert_ptrs) { cv->set_data(convert_to(offset + nulls_size)); if (cv->has_null()) { cv->set_null_signs(convert_to(offset)); } } } void CollectionValue::deserialize_collection(CollectionValue* cv, const char* tuple_data, const TypeDescriptor& item_type) { if (cv->length() == 0) { new (cv) CollectionValue(cv->length()); return; } // assgin data and null_sign pointer position in tuple_data int64_t data_offset = convert_to(cv->data()); cv->set_data(convert_to(tuple_data + data_offset)); if (cv->has_null()) { int64_t null_offset = convert_to(cv->null_signs()); cv->set_null_signs(convert_to(tuple_data + null_offset)); } auto iterator = cv->iterator(item_type.type); while (!iterator.is_type_fixed_width() && iterator.has_next()) { iterator.deserialize(tuple_data, item_type); iterator.next(); } } } // namespace doris