// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include "util/bitmap_value.h" #include "vec/common/string_ref.h" namespace doris { namespace detail { class Helper { public: static const int DATETIME_PACKED_TIME_BYTE_SIZE = 8; static const int DATETIME_TYPE_BYTE_SIZE = 4; static const int DECIMAL_BYTE_SIZE = 16; // serialize_size start template static int32_t serialize_size(const T& v) { return sizeof(T); } // write_to start template static char* write_to(const T& v, char* dest) { size_t type_size = sizeof(T); memcpy(dest, &v, type_size); dest += type_size; return dest; } // read_from start template static void read_from(const char** src, T* result) { size_t type_size = sizeof(T); memcpy(result, *src, type_size); *src += type_size; } }; template <> char* Helper::write_to(const vectorized::VecDateTimeValue& v, char* dest) { *(int64_t*)dest = v.to_int64_datetime_packed(); dest += DATETIME_PACKED_TIME_BYTE_SIZE; *(int*)dest = v.type(); dest += DATETIME_TYPE_BYTE_SIZE; return dest; } template <> char* Helper::write_to(const DecimalV2Value& v, char* dest) { __int128 value = v.value(); memcpy(dest, &value, DECIMAL_BYTE_SIZE); dest += DECIMAL_BYTE_SIZE; return dest; } template <> char* Helper::write_to(const StringRef& v, char* dest) { *(int32_t*)dest = v.size; dest += 4; memcpy(dest, v.data, v.size); dest += v.size; return dest; } template <> char* Helper::write_to(const std::string& v, char* dest) { *(uint32_t*)dest = v.size(); dest += 4; memcpy(dest, v.c_str(), v.size()); dest += v.size(); return dest; } // write_to end template <> int32_t Helper::serialize_size( const vectorized::VecDateTimeValue& v) { return Helper::DATETIME_PACKED_TIME_BYTE_SIZE + Helper::DATETIME_TYPE_BYTE_SIZE; } template <> int32_t Helper::serialize_size(const DecimalV2Value& v) { return Helper::DECIMAL_BYTE_SIZE; } template <> int32_t Helper::serialize_size(const StringRef& v) { return v.size + 4; } template <> int32_t Helper::serialize_size(const std::string& v) { return v.size() + 4; } // serialize_size end template <> void Helper::read_from(const char** src, vectorized::VecDateTimeValue* result) { result->from_packed_time(*(int64_t*)(*src)); *src += DATETIME_PACKED_TIME_BYTE_SIZE; if (*(int*)(*src) == TIME_DATE) { result->cast_to_date(); } *src += DATETIME_TYPE_BYTE_SIZE; } template <> void Helper::read_from(const char** src, DecimalV2Value* result) { __int128 v = 0; memcpy(&v, *src, DECIMAL_BYTE_SIZE); *src += DECIMAL_BYTE_SIZE; *result = DecimalV2Value(v); } template <> void Helper::read_from(const char** src, StringRef* result) { int32_t length = *(int32_t*)(*src); *src += 4; *result = StringRef((char*)*src, length); *src += length; } template <> void Helper::read_from(const char** src, std::string* result) { int32_t length = *(int32_t*)(*src); *src += 4; *result = std::string((char*)*src, length); *src += length; } // read_from end } // namespace detail // Calculate the intersection of two or more bitmaps // Usage: intersect_count(bitmap_column_to_count, filter_column, filter_values ...) // Example: intersect_count(user_id, event, 'A', 'B', 'C'), meaning find the intersect count of user_id in all A/B/C 3 bitmaps // Todo(kks) Use Array type instead of variable arguments template struct BitmapIntersect { public: BitmapIntersect() = default; explicit BitmapIntersect(const char* src) { deserialize(src); } void add_key(const T key) { BitmapValue empty_bitmap; _bitmaps[key] = empty_bitmap; } void update(const T& key, const BitmapValue& bitmap) { if (_bitmaps.find(key) != _bitmaps.end()) { _bitmaps[key] |= bitmap; } } void merge(const BitmapIntersect& other) { for (auto& kv : other._bitmaps) { if (_bitmaps.find(kv.first) != _bitmaps.end()) { _bitmaps[kv.first] |= kv.second; } else { _bitmaps[kv.first] = kv.second; } } } // intersection BitmapValue intersect() const { BitmapValue result; auto it = _bitmaps.begin(); result |= it->second; it++; for (; it != _bitmaps.end(); it++) { result &= it->second; } return result; } // calculate the intersection for _bitmaps's bitmap values int64_t intersect_count() const { if (_bitmaps.empty()) { return 0; } return intersect().cardinality(); } // the serialize size size_t size() { size_t size = 4; for (auto& kv : _bitmaps) { size += detail::Helper::serialize_size(kv.first); size += kv.second.getSizeInBytes(); } return size; } //must call size() first void serialize(char* dest) { char* writer = dest; *(int32_t*)writer = _bitmaps.size(); writer += 4; for (auto& kv : _bitmaps) { writer = detail::Helper::write_to(kv.first, writer); kv.second.write_to(writer); writer += kv.second.getSizeInBytes(); } } void deserialize(const char* src) { const char* reader = src; int32_t bitmaps_size = *(int32_t*)reader; reader += 4; for (int32_t i = 0; i < bitmaps_size; i++) { T key; detail::Helper::read_from(&reader, &key); BitmapValue bitmap(reader); reader += bitmap.getSizeInBytes(); _bitmaps[key] = bitmap; } } protected: std::map _bitmaps; }; template <> struct BitmapIntersect { public: BitmapIntersect() = default; explicit BitmapIntersect(const char* src) { deserialize(src); } void add_key(const std::string_view key) { BitmapValue empty_bitmap; _bitmaps[key] = empty_bitmap; } void update(const std::string_view& key, const BitmapValue& bitmap) { if (_bitmaps.find(key) != _bitmaps.end()) { _bitmaps[key] |= bitmap; } } void merge(const BitmapIntersect& other) { for (auto& kv : other._bitmaps) { if (_bitmaps.find(kv.first) != _bitmaps.end()) { _bitmaps[kv.first] |= kv.second; } else { _bitmaps[kv.first] = kv.second; } } } // intersection BitmapValue intersect() const { BitmapValue result; auto it = _bitmaps.begin(); result |= it->second; it++; for (; it != _bitmaps.end(); it++) { result &= it->second; } return result; } // calculate the intersection for _bitmaps's bitmap values int64_t intersect_count() const { if (_bitmaps.empty()) { return 0; } return intersect().cardinality(); } // the serialize size size_t size() { size_t size = 4; for (auto& kv : _bitmaps) { size += detail::Helper::serialize_size(kv.first); size += kv.second.getSizeInBytes(); } return size; } //must call size() first void serialize(char* dest) { char* writer = dest; *(int32_t*)writer = _bitmaps.size(); writer += 4; for (auto& kv : _bitmaps) { writer = detail::Helper::write_to(kv.first, writer); kv.second.write_to(writer); writer += kv.second.getSizeInBytes(); } } void deserialize(const char* src) { const char* reader = src; int32_t bitmaps_size = *(int32_t*)reader; reader += 4; for (int32_t i = 0; i < bitmaps_size; i++) { std::string key; detail::Helper::read_from(&reader, &key); BitmapValue bitmap(reader); reader += bitmap.getSizeInBytes(); _bitmaps[key] = bitmap; } } protected: phmap::flat_hash_map _bitmaps; }; } // namespace doris