441 lines
14 KiB
C++
441 lines
14 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
// This file is copied from
|
|
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnComplex.h
|
|
// and modified by Doris
|
|
|
|
#pragma once
|
|
|
|
#include <vector>
|
|
|
|
#include "olap/hll.h"
|
|
#include "util/bitmap_value.h"
|
|
#include "util/quantile_state.h"
|
|
#include "vec/columns/column.h"
|
|
#include "vec/columns/column_impl.h"
|
|
#include "vec/columns/column_string.h"
|
|
#include "vec/columns/column_vector.h"
|
|
#include "vec/columns/columns_common.h"
|
|
#include "vec/core/types.h"
|
|
|
|
namespace doris::vectorized {
|
|
|
|
template <typename T>
|
|
class ColumnComplexType final : public COWHelper<IColumn, ColumnComplexType<T>> {
|
|
private:
|
|
ColumnComplexType() {}
|
|
ColumnComplexType(const size_t n) : data(n) {}
|
|
friend class COWHelper<IColumn, ColumnComplexType<T>>;
|
|
|
|
public:
|
|
using Self = ColumnComplexType;
|
|
using value_type = T;
|
|
using Container = std::vector<value_type>;
|
|
|
|
bool is_numeric() const override { return false; }
|
|
|
|
bool is_bitmap() const override { return std::is_same_v<T, BitmapValue>; }
|
|
bool is_hll() const override { return std::is_same_v<T, HyperLogLog>; }
|
|
bool is_quantile_state() const override { return std::is_same_v<T, QuantileState>; }
|
|
|
|
size_t size() const override { return data.size(); }
|
|
|
|
StringRef get_data_at(size_t n) const override {
|
|
return StringRef(reinterpret_cast<const char*>(&data[n]), sizeof(data[n]));
|
|
}
|
|
|
|
void insert_from(const IColumn& src, size_t n) override {
|
|
data.push_back(assert_cast<const Self&>(src).get_data()[n]);
|
|
}
|
|
|
|
void insert_data(const char* pos, size_t /*length*/) override {
|
|
data.push_back(*reinterpret_cast<const T*>(pos));
|
|
}
|
|
|
|
void insert_binary_data(const char* pos, size_t length) {
|
|
insert_default();
|
|
T* pvalue = &get_element(size() - 1);
|
|
if (!length) {
|
|
*pvalue = *reinterpret_cast<const T*>(pos);
|
|
return;
|
|
}
|
|
|
|
if constexpr (std::is_same_v<T, BitmapValue>) {
|
|
pvalue->deserialize(pos);
|
|
} else if constexpr (std::is_same_v<T, HyperLogLog>) {
|
|
pvalue->deserialize(Slice(pos, length));
|
|
} else if constexpr (std::is_same_v<T, QuantileState>) {
|
|
pvalue->deserialize(Slice(pos, length));
|
|
} else {
|
|
LOG(FATAL) << "Unexpected type in column complex";
|
|
}
|
|
}
|
|
|
|
void insert_many_continuous_binary_data(const char* data, const uint32_t* offsets,
|
|
const size_t num) override {
|
|
if (UNLIKELY(num == 0)) {
|
|
return;
|
|
}
|
|
|
|
for (size_t i = 0; i != num; ++i) {
|
|
insert_binary_data(data + offsets[i], offsets[i + 1] - offsets[i]);
|
|
}
|
|
}
|
|
|
|
void insert_many_binary_data(char* data_array, uint32_t* len_array,
|
|
uint32_t* start_offset_array, size_t num) override {
|
|
for (size_t i = 0; i < num; i++) {
|
|
insert_binary_data(data_array + start_offset_array[i], len_array[i]);
|
|
}
|
|
}
|
|
|
|
void insert_default() override { data.push_back(T()); }
|
|
|
|
void insert_many_defaults(size_t length) override {
|
|
size_t old_size = data.size();
|
|
data.resize(old_size + length);
|
|
}
|
|
|
|
void clear() override { data.clear(); }
|
|
|
|
// TODO: value_type is not a pod type, so we also need to
|
|
// calculate the memory requested by value_type
|
|
size_t byte_size() const override { return data.size() * sizeof(data[0]); }
|
|
|
|
size_t allocated_bytes() const override { return byte_size(); }
|
|
|
|
void insert_value(T value) { data.emplace_back(std::move(value)); }
|
|
|
|
[[noreturn]] void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
|
|
IColumn::Permutation& res) const override {
|
|
LOG(FATAL) << "get_permutation not implemented";
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
void get_indices_of_non_default_rows(IColumn::Offsets64& indices, size_t from,
|
|
size_t limit) const override {
|
|
LOG(FATAL) << "get_indices_of_non_default_rows not implemented";
|
|
}
|
|
[[noreturn]] ColumnPtr index(const IColumn& indexes, size_t limit) const override {
|
|
LOG(FATAL) << "index not implemented";
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
void reserve(size_t n) override { data.reserve(n); }
|
|
|
|
void resize(size_t n) override { data.resize(n); }
|
|
|
|
const char* get_family_name() const override { return TypeName<T>::get(); }
|
|
|
|
MutableColumnPtr clone_resized(size_t size) const override;
|
|
|
|
void insert(const Field& x) override {
|
|
const String& s = doris::vectorized::get<const String&>(x);
|
|
data.push_back(*reinterpret_cast<const T*>(s.c_str()));
|
|
}
|
|
|
|
Field operator[](size_t n) const override {
|
|
assert(n < size());
|
|
return Field(reinterpret_cast<const char*>(&data[n]), sizeof(data[n]));
|
|
}
|
|
|
|
void get(size_t n, Field& res) const override {
|
|
assert(n < size());
|
|
res.assign_string(reinterpret_cast<const char*>(&data[n]), sizeof(data[n]));
|
|
}
|
|
|
|
[[noreturn]] UInt64 get64(size_t n) const override {
|
|
LOG(FATAL) << "get field not implemented";
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
[[noreturn]] Float64 get_float64(size_t n) const override {
|
|
LOG(FATAL) << "get field not implemented";
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
[[noreturn]] UInt64 get_uint(size_t n) const override {
|
|
LOG(FATAL) << "get field not implemented";
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
[[noreturn]] bool get_bool(size_t n) const override {
|
|
LOG(FATAL) << "get field not implemented";
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
[[noreturn]] Int64 get_int(size_t n) const override {
|
|
LOG(FATAL) << "get field not implemented";
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
void insert_range_from(const IColumn& src, size_t start, size_t length) override {
|
|
auto& col = assert_cast<const Self&>(src);
|
|
auto& src_data = col.get_data();
|
|
auto st = src_data.begin() + start;
|
|
auto ed = st + length;
|
|
data.insert(data.end(), st, ed);
|
|
}
|
|
|
|
void insert_indices_from(const IColumn& src, const uint32_t* indices_begin,
|
|
const uint32_t* indices_end) override {
|
|
const Self& src_vec = assert_cast<const Self&>(src);
|
|
auto new_size = indices_end - indices_begin;
|
|
|
|
for (uint32_t i = 0; i < new_size; ++i) {
|
|
auto offset = *(indices_begin + i);
|
|
data.emplace_back(src_vec.get_element(offset));
|
|
}
|
|
}
|
|
|
|
void pop_back(size_t n) override { data.erase(data.end() - n, data.end()); }
|
|
// it's impossible to use ComplexType as key , so we don't have to implement them
|
|
[[noreturn]] StringRef serialize_value_into_arena(size_t n, Arena& arena,
|
|
char const*& begin) const override {
|
|
LOG(FATAL) << "serialize_value_into_arena not implemented";
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
[[noreturn]] const char* deserialize_and_insert_from_arena(const char* pos) override {
|
|
LOG(FATAL) << "deserialize_and_insert_from_arena not implemented";
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
// maybe we do not need to impl the function
|
|
void update_hash_with_value(size_t n, SipHash& hash) const override {
|
|
// TODO add hash function
|
|
}
|
|
|
|
virtual void update_hashes_with_value(
|
|
std::vector<SipHash>& hashes,
|
|
const uint8_t* __restrict null_data = nullptr) const override {
|
|
// TODO add hash function
|
|
}
|
|
|
|
virtual void update_hashes_with_value(
|
|
uint64_t* __restrict hashes,
|
|
const uint8_t* __restrict null_data = nullptr) const override {
|
|
// TODO add hash function
|
|
}
|
|
|
|
[[noreturn]] int compare_at(size_t n, size_t m, const IColumn& rhs,
|
|
int nan_direction_hint) const override {
|
|
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
|
|
"compare_at for " + std::string(get_family_name()));
|
|
}
|
|
|
|
bool is_fixed_and_contiguous() const override { return true; }
|
|
size_t size_of_value_if_fixed() const override { return sizeof(T); }
|
|
|
|
StringRef get_raw_data() const override {
|
|
return StringRef(reinterpret_cast<const char*>(data.data()), data.size());
|
|
}
|
|
|
|
bool structure_equals(const IColumn& rhs) const override {
|
|
return typeid(rhs) == typeid(ColumnComplexType<T>);
|
|
}
|
|
|
|
ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) const override;
|
|
|
|
size_t filter(const IColumn::Filter& filter) override;
|
|
|
|
ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override;
|
|
|
|
Container& get_data() { return data; }
|
|
|
|
const Container& get_data() const { return data; }
|
|
|
|
const T& get_element(size_t n) const { return data[n]; }
|
|
|
|
T& get_element(size_t n) { return data[n]; }
|
|
|
|
ColumnPtr replicate(const IColumn::Offsets& replicate_offsets) const override;
|
|
|
|
void replicate(const uint32_t* indexs, size_t target_size, IColumn& column) const override;
|
|
|
|
[[noreturn]] MutableColumns scatter(IColumn::ColumnIndex num_columns,
|
|
const IColumn::Selector& selector) const override {
|
|
LOG(FATAL) << "scatter not implemented";
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
void append_data_by_selector(MutableColumnPtr& res,
|
|
const IColumn::Selector& selector) const override {
|
|
this->template append_data_by_selector_impl<ColumnComplexType<T>>(res, selector);
|
|
}
|
|
|
|
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
|
|
DCHECK(size() > self_row);
|
|
data[self_row] = assert_cast<const Self&>(rhs).data[row];
|
|
}
|
|
|
|
void replace_column_data_default(size_t self_row = 0) override {
|
|
DCHECK(size() > self_row);
|
|
data[self_row] = T();
|
|
}
|
|
|
|
private:
|
|
Container data;
|
|
};
|
|
|
|
template <typename T>
|
|
MutableColumnPtr ColumnComplexType<T>::clone_resized(size_t size) const {
|
|
auto res = this->create();
|
|
|
|
if (size > 0) {
|
|
auto& new_col = assert_cast<Self&>(*res);
|
|
new_col.data = this->data;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
template <typename T>
|
|
ColumnPtr ColumnComplexType<T>::filter(const IColumn::Filter& filt,
|
|
ssize_t result_size_hint) const {
|
|
size_t size = data.size();
|
|
column_match_filter_size(size, filt.size());
|
|
|
|
if (data.size() == 0) return this->create();
|
|
auto res = this->create();
|
|
Container& res_data = res->get_data();
|
|
|
|
if (result_size_hint) res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
|
|
|
|
const UInt8* filt_pos = filt.data();
|
|
const UInt8* filt_end = filt_pos + size;
|
|
const T* data_pos = data.data();
|
|
|
|
while (filt_pos < filt_end) {
|
|
if (*filt_pos) res_data.push_back(*data_pos);
|
|
|
|
++filt_pos;
|
|
++data_pos;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
template <typename T>
|
|
size_t ColumnComplexType<T>::filter(const IColumn::Filter& filter) {
|
|
size_t size = data.size();
|
|
column_match_filter_size(size, filter.size());
|
|
|
|
if (data.size() == 0) {
|
|
return 0;
|
|
}
|
|
|
|
T* res_data = data.data();
|
|
|
|
const UInt8* filter_pos = filter.data();
|
|
const UInt8* filter_end = filter_pos + size;
|
|
const T* data_pos = data.data();
|
|
|
|
while (filter_pos < filter_end) {
|
|
if (*filter_pos) {
|
|
*res_data = std::move(*data_pos);
|
|
++res_data;
|
|
}
|
|
|
|
++filter_pos;
|
|
++data_pos;
|
|
}
|
|
|
|
data.resize(res_data - data.data());
|
|
|
|
return res_data - data.data();
|
|
}
|
|
|
|
template <typename T>
|
|
ColumnPtr ColumnComplexType<T>::permute(const IColumn::Permutation& perm, size_t limit) const {
|
|
size_t size = data.size();
|
|
|
|
limit = limit ? std::min(size, limit) : size;
|
|
|
|
if (perm.size() < limit) {
|
|
LOG(FATAL) << "Size of permutation is less than required.";
|
|
}
|
|
|
|
auto res = this->create(limit);
|
|
typename Self::Container& res_data = res->get_data();
|
|
for (size_t i = 0; i < limit; ++i) {
|
|
res_data[i] = data[perm[i]];
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
template <typename T>
|
|
ColumnPtr ColumnComplexType<T>::replicate(const IColumn::Offsets& offsets) const {
|
|
size_t size = data.size();
|
|
column_match_offsets_size(size, offsets.size());
|
|
|
|
if (0 == size) return this->create();
|
|
|
|
auto res = this->create();
|
|
typename Self::Container& res_data = res->get_data();
|
|
res_data.reserve(offsets.back());
|
|
|
|
IColumn::Offset prev_offset = 0;
|
|
for (size_t i = 0; i < size; ++i) {
|
|
size_t size_to_replicate = offsets[i] - prev_offset;
|
|
prev_offset = offsets[i];
|
|
|
|
for (size_t j = 0; j < size_to_replicate; ++j) {
|
|
res_data.push_back(data[i]);
|
|
}
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
template <typename T>
|
|
void ColumnComplexType<T>::replicate(const uint32_t* indexs, size_t target_size,
|
|
IColumn& column) const {
|
|
auto& res = reinterpret_cast<ColumnComplexType<T>&>(column);
|
|
typename Self::Container& res_data = res.get_data();
|
|
res_data.resize(target_size);
|
|
|
|
for (size_t i = 0; i < target_size; ++i) {
|
|
res_data[i] = data[indexs[i]];
|
|
}
|
|
}
|
|
|
|
using ColumnBitmap = ColumnComplexType<BitmapValue>;
|
|
using ColumnHLL = ColumnComplexType<HyperLogLog>;
|
|
using ColumnQuantileState = ColumnComplexType<QuantileState>;
|
|
|
|
template <typename T>
|
|
struct is_complex : std::false_type {};
|
|
|
|
template <>
|
|
struct is_complex<BitmapValue> : std::true_type {};
|
|
//DataTypeBitMap::FieldType = BitmapValue
|
|
|
|
template <>
|
|
struct is_complex<HyperLogLog> : std::true_type {};
|
|
//DataTypeHLL::FieldType = HyperLogLog
|
|
|
|
template <>
|
|
struct is_complex<QuantileState> : std::true_type {};
|
|
//DataTypeQuantileState::FieldType = QuantileState
|
|
|
|
template <class T>
|
|
constexpr bool is_complex_v = is_complex<T>::value;
|
|
|
|
} // namespace doris::vectorized
|