// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "runtime/collection_value.h" #include #include "common/logging.h" #include "common/utils.h" #include "runtime/descriptors.h" #include "util//mem_util.hpp" namespace doris { using AllocateMemFunc = std::function; static Status init_collection( CollectionValue* value, const AllocateMemFunc& allocate, uint32_t size, PrimitiveType child_type); int sizeof_type(PrimitiveType type) { switch (type) { case TYPE_TINYINT: return sizeof(int8_t); case TYPE_SMALLINT: return sizeof(int16_t); case TYPE_INT: return sizeof(int32_t); case TYPE_CHAR: case TYPE_VARCHAR: return sizeof(StringValue); case TYPE_ARRAY: return sizeof(CollectionValue); case TYPE_NULL: return 0; default: DCHECK(false) << "Type not implemented: " << type; break; } return 0; } Status type_check(PrimitiveType type) { switch (type) { case TYPE_TINYINT: case TYPE_SMALLINT: case TYPE_INT: case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_NULL: case TYPE_ARRAY: break; default: return Status::InvalidArgument(fmt::format("Type not implemented: {}", type)); } return Status::OK(); } void CollectionValue::to_collection_val(CollectionVal* val) const { val->length = _length; val->data = _data; val->null_signs = _null_signs; val->has_null = _has_null; } void CollectionValue::shallow_copy(const CollectionValue* value) { _length = value->_length; _null_signs = value->_null_signs; _data = value->_data; _has_null = value->_has_null; } void CollectionValue::copy_null_signs(const CollectionValue* other) { if (other->_has_null) { memcpy(_null_signs, other->_null_signs, other->size()); } else { _null_signs = nullptr; } } size_t CollectionValue::get_byte_size(const TypeDescriptor& type) const { size_t result = 0; if (_length == 0) { return result; } if (_has_null) { result += _length * sizeof(bool); } const auto& item_type = type.children[0]; result += _length * item_type.get_slot_size(); if (item_type.is_string_type()) { for (int i = 0; i < _length; ++ i) { if (is_null_at(i)) { continue; } int item_offset = i * item_type.get_slot_size(); StringValue* item = reinterpret_cast(((uint8_t*)_data) + item_offset); result += item->len; } } else if (item_type.type == TYPE_ARRAY) { for (int i = 0; i < _length; ++ i) { if (is_null_at(i)) { continue; } int item_offset = i * item_type.get_slot_size(); CollectionValue* item = reinterpret_cast(((uint8_t*)_data) + item_offset); result += item->get_byte_size(item_type); } } return result; } ArrayIterator CollectionValue::iterator(PrimitiveType children_type) const { return ArrayIterator(children_type, this); } Status CollectionValue::init_collection(ObjectPool* pool, uint32_t size, PrimitiveType child_type, CollectionValue* value) { return doris::init_collection(value, [pool](size_t size) -> uint8_t* { return pool->add_array(new uint8_t[size]); }, size, child_type ); } static Status init_collection( CollectionValue* value, const AllocateMemFunc& allocate, uint32_t size, PrimitiveType child_type) { if (value == nullptr) { return Status::InvalidArgument("collection value is null"); } RETURN_IF_ERROR(type_check(child_type)); if (size == 0) { new (value) CollectionValue(size); return Status::OK(); } value->_data = allocate(size * sizeof_type(child_type)); value->_length = size; value->_has_null = false; value->_null_signs = reinterpret_cast(allocate(size)); memset(value->_null_signs, 0, size * sizeof(bool)); return Status::OK(); } Status CollectionValue::init_collection(MemPool* pool, uint32_t size, PrimitiveType child_type, CollectionValue* value) { return doris::init_collection(value, [pool](size_t size) { return pool->allocate(size); }, size, child_type ); } Status CollectionValue::init_collection(FunctionContext* context, uint32_t size, PrimitiveType child_type, CollectionValue* value) { return doris::init_collection(value, [context](size_t size) { return context->allocate(size); }, size, child_type ); } CollectionValue CollectionValue::from_collection_val(const CollectionVal& val) { return CollectionValue(val.data, val.length, val.has_null, val.null_signs); } // Deep copy collection. // NOTICE: The CollectionValue* shallow_copied_cv must be initialized by calling memcpy function first ( // copy data from origin collection value). void CollectionValue::deep_copy_collection( CollectionValue* shallow_copied_cv, const TypeDescriptor& item_type, const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) { CollectionValue* cv = shallow_copied_cv; if (cv->length() == 0) { return; } int coll_byte_size = cv->length() * item_type.get_slot_size(); int nulls_size = cv->has_null() ? cv->length() * sizeof(bool) : 0; MemFootprint footprint = gen_mem_footprint(coll_byte_size + nulls_size); int64_t offset = footprint.first; char* coll_data = reinterpret_cast(footprint.second); // copy and assign null_signs if (cv->has_null()) { memory_copy(convert_to(coll_data), cv->null_signs(), nulls_size); cv->set_null_signs(convert_to(coll_data)); } else { cv->set_null_signs(nullptr); } // copy and assgin data memory_copy(coll_data + nulls_size, cv->data(), coll_byte_size); cv->set_data(coll_data + nulls_size); deep_copy_items_in_collection(cv, coll_data, item_type, gen_mem_footprint, convert_ptrs); if (convert_ptrs) { cv->set_data(convert_to(offset + nulls_size)); if (cv->has_null()) { cv->set_null_signs(convert_to(offset)); } } } // Deep copy items in collection. // NOTICE: The CollectionValue* shallow_copied_cv must be initialized by calling memcpy function first ( // copy data from origin collection value). void CollectionValue::deep_copy_items_in_collection( CollectionValue* shallow_copied_cv, char* base, const TypeDescriptor& item_type, const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) { int nulls_size = shallow_copied_cv->has_null() ? shallow_copied_cv->length() : 0; char* item_base = base + nulls_size; if (item_type.is_string_type()) { // when itemtype is string, copy every string item for (int i = 0; i < shallow_copied_cv->length(); ++ i) { if (shallow_copied_cv->is_null_at(i)) { continue; } char* item_offset = item_base + i * item_type.get_slot_size(); StringValue* dst_item_v = convert_to(item_offset); if (dst_item_v->len != 0) { MemFootprint footprint = gen_mem_footprint(dst_item_v->len); int64_t offset = footprint.first; char* string_copy = reinterpret_cast(footprint.second); memory_copy(string_copy, dst_item_v->ptr, dst_item_v->len); dst_item_v->ptr = (convert_ptrs ? convert_to(offset) : string_copy); } } } else if (item_type.type == TYPE_ARRAY) { for (int i = 0; i < shallow_copied_cv->length(); ++ i) { if (shallow_copied_cv->is_null_at(i)) { continue; } char* item_offset = item_base + i * item_type.get_slot_size(); CollectionValue* item_cv = convert_to(item_offset); deep_copy_collection(item_cv, item_type.children[0], gen_mem_footprint, convert_ptrs); } } } void CollectionValue::deserialize_collection( CollectionValue* cv, const char* tuple_data, const TypeDescriptor& type) { if (cv->length() == 0) { new (cv) CollectionValue(cv->length()); return; } // assgin data and null_sign pointer position in tuple_data int data_offset = convert_to(cv->data()); cv->set_data(convert_to(tuple_data + data_offset)); if (cv->has_null()) { int null_offset = convert_to(cv->null_signs()); cv->set_null_signs(convert_to(tuple_data + null_offset)); } const TypeDescriptor& item_type = type.children[0]; if (item_type.is_string_type()) { // copy every string item for (size_t i = 0; i < cv->length(); ++i) { if (cv->is_null_at(i)) { continue; } StringValue* dst_item_v = convert_to( (uint8_t*)cv->data() + i * item_type.get_slot_size()); if (dst_item_v->len != 0) { int offset = convert_to(dst_item_v->ptr); dst_item_v->ptr = convert_to(tuple_data + offset); } } } else if (item_type.type == TYPE_ARRAY) { for (size_t i = 0; i < cv->length(); ++i) { if (cv->is_null_at(i)) { continue; } CollectionValue* item_cv = convert_to( (uint8_t*)cv->data() + i * item_type.get_slot_size()); deserialize_collection(item_cv, tuple_data, item_type); } } } Status CollectionValue::set(uint32_t i, PrimitiveType type, const AnyVal* value) { RETURN_IF_ERROR(type_check(type)); ArrayIterator iter(type, this); if (!iter.seek(i)) { return Status::InvalidArgument("over of collection size"); } if (value->is_null) { *(_null_signs + i) = true; _has_null = true; return Status::OK(); } else { *(_null_signs + i) = false; } switch (type) { case TYPE_TINYINT: *reinterpret_cast(iter.value()) = reinterpret_cast(value)->val; break; case TYPE_SMALLINT: *reinterpret_cast(iter.value()) = reinterpret_cast(value)->val; break; case TYPE_INT: *reinterpret_cast(iter.value()) = reinterpret_cast(value)->val; break; case TYPE_CHAR: case TYPE_VARCHAR: { const StringVal* src = reinterpret_cast(value); StringValue* dest = reinterpret_cast(iter.value()); dest->len = src->len; dest->ptr = (char*)src->ptr; break; } case TYPE_ARRAY: { const CollectionVal* src = reinterpret_cast(value); CollectionValue* dest = reinterpret_cast(iter.value()); *dest = CollectionValue::from_collection_val(*src); break; } default: DCHECK(false) << "Type not implemented: " << type; return Status::InvalidArgument("Type not implemented"); } return Status::OK(); } /** * ----------- Array Iterator -------- */ ArrayIterator::ArrayIterator(PrimitiveType children_type, const CollectionValue* data) : _offset(0), _type(children_type), _data(data) { _type_size = sizeof_type(children_type); } void* ArrayIterator::value() { if (is_null()) { return nullptr; } return ((char*)_data->_data) + _offset * _type_size; } bool ArrayIterator::is_null() { return _data->is_null_at(_offset); } void ArrayIterator::value(AnyVal* dest) { if (is_null()) { dest->is_null = true; return; } dest->is_null = false; switch (_type) { case TYPE_BOOLEAN: reinterpret_cast(dest)->val = *reinterpret_cast(value()); break; case TYPE_TINYINT: reinterpret_cast(dest)->val = *reinterpret_cast(value()); break; case TYPE_SMALLINT: reinterpret_cast(dest)->val = *reinterpret_cast(value()); break; case TYPE_INT: reinterpret_cast(dest)->val = *reinterpret_cast(value()); break; case TYPE_BIGINT: reinterpret_cast(dest)->val = *reinterpret_cast(value()); break; case TYPE_FLOAT: reinterpret_cast(dest)->val = *reinterpret_cast(value()); break; case TYPE_DOUBLE: reinterpret_cast(dest)->val = *reinterpret_cast(value()); break; case TYPE_HLL: case TYPE_CHAR: case TYPE_VARCHAR: { const StringValue* str_value = reinterpret_cast(value()); reinterpret_cast(dest)->len = str_value->len; reinterpret_cast(dest)->ptr = (uint8_t*)(str_value->ptr); break; } case TYPE_DATE: case TYPE_DATETIME: { const DateTimeValue* date_time_value = reinterpret_cast(value()); reinterpret_cast(dest)->packed_time = date_time_value->to_int64(); reinterpret_cast(dest)->type = date_time_value->type(); break; } case TYPE_DECIMALV2: reinterpret_cast(dest)->val = reinterpret_cast(value())->value; break; case TYPE_LARGEINT: reinterpret_cast(dest)->val = reinterpret_cast(value())->value; break; case TYPE_ARRAY: reinterpret_cast(value())->to_collection_val( reinterpret_cast(dest)); break; default: DCHECK(false) << "bad type: " << _type; } } } // namespace doris