Files
doris/be/src/runtime/collection_value.h
camby 0e404edf54 [improvement] Change array offset type from UInt32 to UInt64 (#10070)
Now column `Array<T>` contains column `offsets` and `data`, and type of column `offsets` is UInt32 now.
If we call array_union to merge arrays repeatedly, the size of array may overflow.
So we need to extend it before `Array Data Type` release.
2022-06-19 10:24:08 +08:00

258 lines
9.0 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <type_traits>
#include "runtime/primitive_type.h"
namespace doris_udf {
class FunctionContext;
struct AnyVal;
} // namespace doris_udf
namespace doris {
using doris_udf::FunctionContext;
using doris_udf::AnyVal;
using MemFootprint = std::pair<int64_t, uint8_t*>;
using GenMemFootprintFunc = std::function<MemFootprint(int64_t size)>;
struct ArrayIteratorFunctionsBase;
class ArrayIterator;
class Status;
class ObjectPool;
class MemPool;
struct TypeDescriptor;
template <PrimitiveType type>
struct ArrayIteratorFunctions;
template <typename T>
inline constexpr std::enable_if_t<std::is_base_of_v<ArrayIteratorFunctionsBase, T>, bool>
IsTypeFixedWidth = true;
template <>
inline constexpr bool IsTypeFixedWidth<ArrayIteratorFunctions<TYPE_CHAR>> = false;
template <>
inline constexpr bool IsTypeFixedWidth<ArrayIteratorFunctions<TYPE_VARCHAR>> = false;
template <>
inline constexpr bool IsTypeFixedWidth<ArrayIteratorFunctions<TYPE_STRING>> = false;
template <>
inline constexpr bool IsTypeFixedWidth<ArrayIteratorFunctions<TYPE_ARRAY>> = false;
/**
* The format of array-typed slot.
* A new array needs to be initialized before using it.
*/
class CollectionValue {
public:
CollectionValue() = default;
explicit CollectionValue(uint64_t length)
: _data(nullptr), _length(length), _has_null(false), _null_signs(nullptr) {}
CollectionValue(void* data, uint64_t length)
: _data(data), _length(length), _has_null(false), _null_signs(nullptr) {}
CollectionValue(void* data, uint64_t length, bool* null_signs)
: _data(data), _length(length), _has_null(true), _null_signs(null_signs) {}
CollectionValue(void* data, uint64_t length, bool has_null, bool* null_signs)
: _data(data), _length(length), _has_null(has_null), _null_signs(null_signs) {}
bool is_null_at(uint64_t index) const { return this->_has_null && this->_null_signs[index]; }
void to_collection_val(CollectionVal* val) const;
uint64_t size() const { return _length; }
uint64_t length() const { return _length; }
void shallow_copy(const CollectionValue* other);
void copy_null_signs(const CollectionValue* other);
size_t get_byte_size(const TypeDescriptor& item_type) const;
ArrayIterator iterator(PrimitiveType child_type);
const ArrayIterator iterator(PrimitiveType child_type) const;
/**
* init collection, will alloc (children Type's size + 1) * (children Nums) memory
*/
static Status init_collection(ObjectPool* pool, uint64_t size, PrimitiveType child_type,
CollectionValue* value);
static Status init_collection(MemPool* pool, uint64_t size, PrimitiveType child_type,
CollectionValue* value);
static Status init_collection(FunctionContext* context, uint64_t size, PrimitiveType child_type,
CollectionValue* value);
static CollectionValue from_collection_val(const CollectionVal& val);
// Deep copy collection.
// NOTICE: The CollectionValue* shallow_copied_cv must be initialized by calling memcpy function first (
// copy data from origin collection value).
static void deep_copy_collection(CollectionValue* shallow_copied_cv,
const TypeDescriptor& item_type,
const GenMemFootprintFunc& gen_mem_footprint,
bool convert_ptrs);
static void deserialize_collection(CollectionValue* cv, const char* tuple_data,
const TypeDescriptor& item_type);
const void* data() const { return _data; }
bool has_null() const { return _has_null; }
const bool* null_signs() const { return _null_signs; }
void* mutable_data() { return _data; }
bool* mutable_null_signs() { return _null_signs; }
void set_length(uint64_t length) { _length = length; }
void set_has_null(bool has_null) { _has_null = has_null; }
void set_data(void* data) { _data = data; }
void set_null_signs(bool* null_signs) { _null_signs = null_signs; }
private:
using AllocateMemFunc = std::function<uint8_t*(size_t size)>;
static Status init_collection(CollectionValue* value, const AllocateMemFunc& allocate,
uint64_t size, PrimitiveType child_type);
ArrayIterator internal_iterator(PrimitiveType child_type) const;
private:
// child column data
void* _data;
uint64_t _length;
// item has no null value if has_null is false.
// item ```may``` has null value if has_null is true.
bool _has_null;
// null bitmap
bool* _null_signs;
friend ArrayIterator;
};
class ArrayIterator {
public:
int type_size() const { return _type_size; }
bool is_type_fixed_width() const { return _is_type_fixed_width; }
bool has_next() const { return _offset < _collection_value->size(); }
bool next() const {
if (has_next()) {
++_offset;
return true;
}
return false;
}
bool seek(uint64_t n) const {
if (n >= _collection_value->size()) {
return false;
}
_offset = n;
return true;
}
bool is_null() const { return _collection_value->is_null_at(_offset); }
const void* get() const {
if (is_null()) {
return nullptr;
}
return reinterpret_cast<const uint8_t*>(_collection_value->data()) + _offset * _type_size;
}
void* get() {
if (is_null()) {
return nullptr;
}
return reinterpret_cast<uint8_t*>(_collection_value->mutable_data()) + _offset * _type_size;
}
void get(AnyVal* value) const {
if (is_null()) {
value->is_null = true;
return;
}
value->is_null = false;
_shallow_get(value, get());
}
void set(const AnyVal* value) {
if (_collection_value->mutable_null_signs()) {
_collection_value->mutable_null_signs()[_offset] = value->is_null;
}
if (value->is_null) {
_collection_value->set_has_null(true);
} else {
_shallow_set(get(), value);
}
}
void self_deep_copy(const TypeDescriptor& type_desc,
const GenMemFootprintFunc& gen_mem_footprint, bool convert_ptrs) {
if (is_null()) {
return;
}
_self_deep_copy(get(), type_desc, gen_mem_footprint, convert_ptrs);
}
void deserialize(const char* tuple_data, const TypeDescriptor& type_desc) {
if (is_null()) {
return;
}
_deserialize(get(), tuple_data, type_desc);
}
size_t get_byte_size(const TypeDescriptor& type) const {
if (is_null()) {
return 0;
}
return _get_byte_size(get(), type);
}
void raw_value_write(const void* value, const TypeDescriptor& type_desc, MemPool* pool) {
if (is_null()) {
return;
}
return _raw_value_write(get(), value, type_desc, pool);
}
private:
template <typename T,
typename = std::enable_if_t<std::is_base_of_v<ArrayIteratorFunctionsBase, T>>>
ArrayIterator(CollectionValue* data, const T*)
: _shallow_get(T::shallow_get),
_shallow_set(T::shallow_set),
_self_deep_copy(T::self_deep_copy),
_deserialize(T::deserialize),
_get_byte_size(T::get_byte_size),
_raw_value_write(T::raw_value_write),
_collection_value(data),
_offset(0),
_type_size(T::get_type_size()),
_is_type_fixed_width(IsTypeFixedWidth<T>) {}
void (*_shallow_get)(AnyVal*, const void*);
void (*_shallow_set)(void*, const AnyVal*);
void (*_self_deep_copy)(void*, const TypeDescriptor&, const GenMemFootprintFunc&, bool);
void (*_deserialize)(void*, const char*, const TypeDescriptor&);
size_t (*_get_byte_size)(const void* item, const TypeDescriptor&);
void (*_raw_value_write)(void* item, const void* value, const TypeDescriptor& type_desc,
MemPool* pool);
private:
CollectionValue* _collection_value;
mutable uint64_t _offset;
const int _type_size;
const bool _is_type_fixed_width;
friend CollectionValue;
};
} // namespace doris