[Enhancement](function) add to_bitmap() function with int type (#13973)
to_bitmap function only support string param only,add to_bitmap() function with int type, this can avoid convert int type to string and then convert string to int
This commit is contained in:
@ -43,34 +43,52 @@ struct ToBitmap {
|
||||
static constexpr auto name = "to_bitmap";
|
||||
using ReturnType = DataTypeBitMap;
|
||||
|
||||
static void vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
|
||||
MutableColumnPtr& col_res) {
|
||||
execute<false>(data, offsets, nullptr, col_res);
|
||||
template <typename ColumnType>
|
||||
static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
|
||||
execute<ColumnType, false>(col, nullptr, col_res);
|
||||
}
|
||||
|
||||
static void vector_nullable(const ColumnString::Chars& data,
|
||||
const ColumnString::Offsets& offsets, const NullMap& nullmap,
|
||||
template <typename ColumnType>
|
||||
static void vector_nullable(const ColumnType* col, const NullMap& nullmap,
|
||||
MutableColumnPtr& col_res) {
|
||||
execute<true>(data, offsets, &nullmap, col_res);
|
||||
execute<ColumnType, true>(col, &nullmap, col_res);
|
||||
}
|
||||
template <bool arg_is_nullable>
|
||||
static void execute(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
|
||||
const NullMap* nullmap, MutableColumnPtr& col_res) {
|
||||
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
|
||||
auto& res_data = res_column->get_data();
|
||||
size_t size = offsets.size();
|
||||
template <typename ColumnType, bool arg_is_nullable>
|
||||
static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
|
||||
if constexpr (std::is_same_v<ColumnType, ColumnString>) {
|
||||
const ColumnString::Chars& data = col->get_chars();
|
||||
const ColumnString::Offsets& offsets = col->get_offsets();
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (arg_is_nullable && ((*nullmap)[i])) {
|
||||
continue;
|
||||
} else {
|
||||
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
|
||||
size_t str_size = offsets[i] - offsets[i - 1];
|
||||
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
|
||||
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
|
||||
raw_str, str_size, &parse_result);
|
||||
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
|
||||
res_data[i].add(int_value);
|
||||
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
|
||||
auto& res_data = res_column->get_data();
|
||||
size_t size = offsets.size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (arg_is_nullable && ((*nullmap)[i])) {
|
||||
continue;
|
||||
} else {
|
||||
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
|
||||
size_t str_size = offsets[i] - offsets[i - 1];
|
||||
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
|
||||
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
|
||||
raw_str, str_size, &parse_result);
|
||||
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
|
||||
res_data[i].add(int_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
|
||||
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
|
||||
auto& res_data = res_column->get_data();
|
||||
size_t size = col->size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (arg_is_nullable && ((*nullmap)[i])) {
|
||||
continue;
|
||||
} else {
|
||||
int64_t int_value = col->get_data()[i];
|
||||
if (LIKELY(int_value >= 0)) {
|
||||
res_data[i].add(int_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -81,45 +99,76 @@ struct ToBitmapWithCheck {
|
||||
static constexpr auto name = "to_bitmap_with_check";
|
||||
using ReturnType = DataTypeBitMap;
|
||||
|
||||
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
|
||||
MutableColumnPtr& col_res) {
|
||||
return execute<false>(data, offsets, nullptr, col_res);
|
||||
template <typename ColumnType>
|
||||
static Status vector(const ColumnType* col, MutableColumnPtr& col_res) {
|
||||
return execute<ColumnType, false>(col, nullptr, col_res);
|
||||
}
|
||||
|
||||
static Status vector_nullable(const ColumnString::Chars& data,
|
||||
const ColumnString::Offsets& offsets, const NullMap& nullmap,
|
||||
template <typename ColumnType>
|
||||
static Status vector_nullable(const ColumnType* col, const NullMap& nullmap,
|
||||
MutableColumnPtr& col_res) {
|
||||
return execute<true>(data, offsets, &nullmap, col_res);
|
||||
return execute<ColumnType, true>(col, &nullmap, col_res);
|
||||
}
|
||||
template <bool arg_is_nullable>
|
||||
static Status execute(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
|
||||
const NullMap* nullmap, MutableColumnPtr& col_res) {
|
||||
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
|
||||
auto& res_data = res_column->get_data();
|
||||
size_t size = offsets.size();
|
||||
template <typename ColumnType, bool arg_is_nullable>
|
||||
static Status execute(const ColumnType* col, const NullMap* nullmap,
|
||||
MutableColumnPtr& col_res) {
|
||||
if constexpr (std::is_same_v<ColumnType, ColumnString>) {
|
||||
const ColumnString::Chars& data = col->get_chars();
|
||||
const ColumnString::Offsets& offsets = col->get_offsets();
|
||||
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
|
||||
auto& res_data = res_column->get_data();
|
||||
size_t size = offsets.size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (arg_is_nullable && ((*nullmap)[i])) {
|
||||
continue;
|
||||
} else {
|
||||
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
|
||||
size_t str_size = offsets[i] - offsets[i - 1];
|
||||
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
|
||||
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
|
||||
raw_str, str_size, &parse_result);
|
||||
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
|
||||
res_data[i].add(int_value);
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (arg_is_nullable && ((*nullmap)[i])) {
|
||||
continue;
|
||||
} else {
|
||||
std::stringstream ss;
|
||||
ss << "The input: " << std::string(raw_str, str_size)
|
||||
<< " is not valid, to_bitmap only support bigint value from 0 to "
|
||||
"18446744073709551615 currently, cannot create MV with to_bitmap on "
|
||||
"column with negative values or cannot load negative values to column "
|
||||
"with to_bitmap MV on it.";
|
||||
LOG(WARNING) << ss.str();
|
||||
return Status::InternalError(ss.str());
|
||||
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
|
||||
size_t str_size = offsets[i] - offsets[i - 1];
|
||||
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
|
||||
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
|
||||
raw_str, str_size, &parse_result);
|
||||
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
|
||||
res_data[i].add(int_value);
|
||||
} else {
|
||||
std::stringstream ss;
|
||||
ss << "The input: " << std::string(raw_str, str_size)
|
||||
<< " is not valid, to_bitmap only support bigint value from 0 to "
|
||||
"18446744073709551615 currently, cannot create MV with to_bitmap on "
|
||||
"column with negative values or cannot load negative values to "
|
||||
"column "
|
||||
"with to_bitmap MV on it.";
|
||||
LOG(WARNING) << ss.str();
|
||||
return Status::InternalError(ss.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
|
||||
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
|
||||
auto& res_data = res_column->get_data();
|
||||
size_t size = col->size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (arg_is_nullable && ((*nullmap)[i])) {
|
||||
continue;
|
||||
} else {
|
||||
int64_t int_value = col->get_data()[i];
|
||||
if (LIKELY(int_value >= 0)) {
|
||||
res_data[i].add(int_value);
|
||||
} else {
|
||||
std::stringstream ss;
|
||||
ss << "The input: " << int_value
|
||||
<< " is not valid, to_bitmap only support bigint value from 0 to "
|
||||
"18446744073709551615 currently, cannot create MV with to_bitmap on "
|
||||
"column with negative values or cannot load negative values to "
|
||||
"column "
|
||||
"with to_bitmap MV on it.";
|
||||
LOG(WARNING) << ss.str();
|
||||
return Status::InternalError(ss.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Status::InternalError("not support type");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
@ -209,38 +258,16 @@ struct BitmapHash {
|
||||
|
||||
using ReturnType = DataTypeBitMap;
|
||||
|
||||
static void vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
|
||||
MutableColumnPtr& col_res) {
|
||||
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
|
||||
auto& res_data = res_column->get_data();
|
||||
size_t size = offsets.size();
|
||||
template <typename ColumnType>
|
||||
static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
|
||||
if constexpr (std::is_same_v<ColumnType, ColumnString>) {
|
||||
const ColumnString::Chars& data = col->get_chars();
|
||||
const ColumnString::Offsets& offsets = col->get_offsets();
|
||||
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
|
||||
auto& res_data = res_column->get_data();
|
||||
size_t size = offsets.size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
|
||||
size_t str_size = offsets[i] - offsets[i - 1];
|
||||
if constexpr (HashBits == 32) {
|
||||
uint32_t hash_value =
|
||||
HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED);
|
||||
res_data[i].add(hash_value);
|
||||
} else {
|
||||
uint64_t hash_value = 0;
|
||||
murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
|
||||
res_data[i].add(hash_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void vector_nullable(const ColumnString::Chars& data,
|
||||
const ColumnString::Offsets& offsets, const NullMap& nullmap,
|
||||
MutableColumnPtr& col_res) {
|
||||
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
|
||||
auto& res_data = res_column->get_data();
|
||||
size_t size = offsets.size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (nullmap[i]) {
|
||||
continue;
|
||||
} else {
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
|
||||
size_t str_size = offsets[i] - offsets[i - 1];
|
||||
if constexpr (HashBits == 32) {
|
||||
@ -255,6 +282,36 @@ struct BitmapHash {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
static void vector_nullable(const ColumnType* col, const NullMap& nullmap,
|
||||
MutableColumnPtr& col_res) {
|
||||
if constexpr (std::is_same_v<ColumnType, ColumnString>) {
|
||||
const ColumnString::Chars& data = col->get_chars();
|
||||
const ColumnString::Offsets& offsets = col->get_offsets();
|
||||
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
|
||||
auto& res_data = res_column->get_data();
|
||||
size_t size = offsets.size();
|
||||
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
if (nullmap[i]) {
|
||||
continue;
|
||||
} else {
|
||||
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
|
||||
size_t str_size = offsets[i] - offsets[i - 1];
|
||||
if constexpr (HashBits == 32) {
|
||||
uint32_t hash_value = HashUtil::murmur_hash3_32(raw_str, str_size,
|
||||
HashUtil::MURMUR3_32_SEED);
|
||||
res_data[i].add(hash_value);
|
||||
} else {
|
||||
uint64_t hash_value = 0;
|
||||
murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
|
||||
res_data[i].add(hash_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class FunctionBitmapCount : public IFunction {
|
||||
|
||||
Reference in New Issue
Block a user