[function](bitmap) support bitmap_to_base64 and bitmap_from_base64 (#23759)

This commit is contained in:
TengJianPing
2023-09-02 00:58:48 +08:00
committed by GitHub
parent e0efda1234
commit 75e2bc8a25
11 changed files with 627 additions and 40 deletions

View File

@ -40,6 +40,7 @@
#include "util/hash_util.hpp"
#include "util/murmur_hash3.h"
#include "util/string_parser.hpp"
#include "util/url_coding.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
@ -250,6 +251,58 @@ struct BitmapFromString {
}
};
struct NameBitmapFromBase64 {
static constexpr auto name = "bitmap_from_base64";
};
struct BitmapFromBase64 {
using ArgumentType = DataTypeString;
static constexpr auto name = "bitmap_from_base64";
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
std::vector<BitmapValue>& res, NullMap& null_map,
size_t input_rows_count) {
res.reserve(input_rows_count);
if (offsets.size() == 0 && input_rows_count == 1) {
// For NULL constant
res.emplace_back();
null_map[0] = 1;
return Status::OK();
}
std::string decode_buff;
int last_decode_buff_len = 0;
int curr_decode_buff_len = 0;
for (size_t i = 0; i < input_rows_count; ++i) {
const char* src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
int64_t src_size = offsets[i] - offsets[i - 1];
if (0 != src_size % 4) {
// return Status::InvalidArgument(
// fmt::format("invalid base64: {}", std::string(src_str, src_size)));
res.emplace_back();
null_map[i] = 1;
continue;
}
curr_decode_buff_len = src_size + 3;
if (curr_decode_buff_len > last_decode_buff_len) {
decode_buff.resize(curr_decode_buff_len);
last_decode_buff_len = curr_decode_buff_len;
}
int outlen = base64_decode(src_str, src_size, decode_buff.data());
if (outlen < 0) {
res.emplace_back();
null_map[i] = 1;
} else {
BitmapValue bitmap_val;
if (!bitmap_val.deserialize(decode_buff.data())) {
return Status::RuntimeError(
fmt::format("bitmap_from_base64 decode failed: base64: {}", src_str));
}
res.emplace_back(std::move(bitmap_val));
}
}
return Status::OK();
}
};
struct BitmapFromArray {
using ArgumentType = DataTypeArray;
static constexpr auto name = "bitmap_from_array";
@ -887,6 +940,55 @@ struct BitmapToString {
}
};
struct NameBitmapToBase64 {
static constexpr auto name = "bitmap_to_base64";
};
struct BitmapToBase64 {
using ReturnType = DataTypeString;
static constexpr auto TYPE_INDEX = TypeIndex::BitMap;
using Type = DataTypeBitMap::FieldType;
using ReturnColumnType = ColumnString;
using Chars = ColumnString::Chars;
using Offsets = ColumnString::Offsets;
static Status vector(const std::vector<BitmapValue>& data, Chars& chars, Offsets& offsets) {
size_t size = data.size();
offsets.resize(size);
size_t output_char_size = 0;
for (size_t i = 0; i < size; ++i) {
BitmapValue& bitmap_val = const_cast<BitmapValue&>(data[i]);
auto ser_size = bitmap_val.getSizeInBytes();
output_char_size += ser_size * (int)(4.0 * ceil((double)ser_size / 3.0));
}
ColumnString::check_chars_length(output_char_size, size);
chars.resize(output_char_size);
auto chars_data = chars.data();
size_t cur_ser_size = 0;
size_t last_ser_size = 0;
std::string ser_buff;
size_t encoded_offset = 0;
for (size_t i = 0; i < size; ++i) {
BitmapValue& bitmap_val = const_cast<BitmapValue&>(data[i]);
cur_ser_size = bitmap_val.getSizeInBytes();
if (cur_ser_size > last_ser_size) {
last_ser_size = cur_ser_size;
ser_buff.resize(cur_ser_size);
}
bitmap_val.write_to(ser_buff.data());
int outlen = base64_encode((const unsigned char*)ser_buff.data(), cur_ser_size,
chars_data + encoded_offset);
DCHECK(outlen > 0);
encoded_offset += (int)(4.0 * ceil((double)cur_ser_size / 3.0));
offsets[i] = encoded_offset;
}
return Status::OK();
}
};
struct SubBitmap {
static constexpr auto name = "sub_bitmap";
using TData1 = std::vector<BitmapValue>;
@ -1117,6 +1219,8 @@ using FunctionBitmapMin = FunctionBitmapSingle<FunctionBitmapMinImpl>;
using FunctionBitmapMax = FunctionBitmapSingle<FunctionBitmapMaxImpl>;
using FunctionBitmapToString = FunctionUnaryToType<BitmapToString, NameBitmapToString>;
using FunctionBitmapToBase64 = FunctionUnaryToType<BitmapToBase64, NameBitmapToBase64>;
using FunctionBitmapFromBase64 = FunctionBitmapAlwaysNull<BitmapFromBase64>;
using FunctionBitmapNot =
FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapNot, NameBitmapNot>;
using FunctionBitmapAndNot =
@ -1137,6 +1241,8 @@ void register_function_bitmap(SimpleFunctionFactory& factory) {
factory.register_function<FunctionToBitmap>();
factory.register_function<FunctionToBitmapWithCheck>();
factory.register_function<FunctionBitmapFromString>();
factory.register_function<FunctionBitmapToBase64>();
factory.register_function<FunctionBitmapFromBase64>();
factory.register_function<FunctionBitmapFromArray>();
factory.register_function<FunctionBitmapHash>();
factory.register_function<FunctionBitmapHash64>();