[Enhancement](function) add to_bitmap() function with int type (#13973)

to_bitmap function only support string param only,add to_bitmap() function with int type, this can avoid convert int type to string and then convert string to int
This commit is contained in:
Zhengguo Yang
2022-11-08 09:15:26 +08:00
committed by GitHub
parent 34f43ac781
commit e1654bc6ef
6 changed files with 276 additions and 152 deletions

View File

@ -43,34 +43,52 @@ struct ToBitmap {
static constexpr auto name = "to_bitmap";
using ReturnType = DataTypeBitMap;
static void vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
MutableColumnPtr& col_res) {
execute<false>(data, offsets, nullptr, col_res);
template <typename ColumnType>
static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
execute<ColumnType, false>(col, nullptr, col_res);
}
static void vector_nullable(const ColumnString::Chars& data,
const ColumnString::Offsets& offsets, const NullMap& nullmap,
template <typename ColumnType>
static void vector_nullable(const ColumnType* col, const NullMap& nullmap,
MutableColumnPtr& col_res) {
execute<true>(data, offsets, &nullmap, col_res);
execute<ColumnType, true>(col, &nullmap, col_res);
}
template <bool arg_is_nullable>
static void execute(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
const NullMap* nullmap, MutableColumnPtr& col_res) {
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = offsets.size();
template <typename ColumnType, bool arg_is_nullable>
static void execute(const ColumnType* col, const NullMap* nullmap, MutableColumnPtr& col_res) {
if constexpr (std::is_same_v<ColumnType, ColumnString>) {
const ColumnString::Chars& data = col->get_chars();
const ColumnString::Offsets& offsets = col->get_offsets();
for (size_t i = 0; i < size; ++i) {
if (arg_is_nullable && ((*nullmap)[i])) {
continue;
} else {
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1];
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
raw_str, str_size, &parse_result);
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
res_data[i].add(int_value);
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = offsets.size();
for (size_t i = 0; i < size; ++i) {
if (arg_is_nullable && ((*nullmap)[i])) {
continue;
} else {
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1];
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
raw_str, str_size, &parse_result);
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
res_data[i].add(int_value);
}
}
}
} else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = col->size();
for (size_t i = 0; i < size; ++i) {
if (arg_is_nullable && ((*nullmap)[i])) {
continue;
} else {
int64_t int_value = col->get_data()[i];
if (LIKELY(int_value >= 0)) {
res_data[i].add(int_value);
}
}
}
}
@ -81,45 +99,76 @@ struct ToBitmapWithCheck {
static constexpr auto name = "to_bitmap_with_check";
using ReturnType = DataTypeBitMap;
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
MutableColumnPtr& col_res) {
return execute<false>(data, offsets, nullptr, col_res);
template <typename ColumnType>
static Status vector(const ColumnType* col, MutableColumnPtr& col_res) {
return execute<ColumnType, false>(col, nullptr, col_res);
}
static Status vector_nullable(const ColumnString::Chars& data,
const ColumnString::Offsets& offsets, const NullMap& nullmap,
template <typename ColumnType>
static Status vector_nullable(const ColumnType* col, const NullMap& nullmap,
MutableColumnPtr& col_res) {
return execute<true>(data, offsets, &nullmap, col_res);
return execute<ColumnType, true>(col, &nullmap, col_res);
}
template <bool arg_is_nullable>
static Status execute(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
const NullMap* nullmap, MutableColumnPtr& col_res) {
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = offsets.size();
template <typename ColumnType, bool arg_is_nullable>
static Status execute(const ColumnType* col, const NullMap* nullmap,
MutableColumnPtr& col_res) {
if constexpr (std::is_same_v<ColumnType, ColumnString>) {
const ColumnString::Chars& data = col->get_chars();
const ColumnString::Offsets& offsets = col->get_offsets();
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = offsets.size();
for (size_t i = 0; i < size; ++i) {
if (arg_is_nullable && ((*nullmap)[i])) {
continue;
} else {
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1];
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
raw_str, str_size, &parse_result);
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
res_data[i].add(int_value);
for (size_t i = 0; i < size; ++i) {
if (arg_is_nullable && ((*nullmap)[i])) {
continue;
} else {
std::stringstream ss;
ss << "The input: " << std::string(raw_str, str_size)
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently, cannot create MV with to_bitmap on "
"column with negative values or cannot load negative values to column "
"with to_bitmap MV on it.";
LOG(WARNING) << ss.str();
return Status::InternalError(ss.str());
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1];
StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(
raw_str, str_size, &parse_result);
if (LIKELY(parse_result == StringParser::PARSE_SUCCESS)) {
res_data[i].add(int_value);
} else {
std::stringstream ss;
ss << "The input: " << std::string(raw_str, str_size)
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently, cannot create MV with to_bitmap on "
"column with negative values or cannot load negative values to "
"column "
"with to_bitmap MV on it.";
LOG(WARNING) << ss.str();
return Status::InternalError(ss.str());
}
}
}
} else if constexpr (std::is_same_v<ColumnType, ColumnInt64>) {
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = col->size();
for (size_t i = 0; i < size; ++i) {
if (arg_is_nullable && ((*nullmap)[i])) {
continue;
} else {
int64_t int_value = col->get_data()[i];
if (LIKELY(int_value >= 0)) {
res_data[i].add(int_value);
} else {
std::stringstream ss;
ss << "The input: " << int_value
<< " is not valid, to_bitmap only support bigint value from 0 to "
"18446744073709551615 currently, cannot create MV with to_bitmap on "
"column with negative values or cannot load negative values to "
"column "
"with to_bitmap MV on it.";
LOG(WARNING) << ss.str();
return Status::InternalError(ss.str());
}
}
}
} else {
return Status::InternalError("not support type");
}
return Status::OK();
}
@ -209,38 +258,16 @@ struct BitmapHash {
using ReturnType = DataTypeBitMap;
static void vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
MutableColumnPtr& col_res) {
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = offsets.size();
template <typename ColumnType>
static void vector(const ColumnType* col, MutableColumnPtr& col_res) {
if constexpr (std::is_same_v<ColumnType, ColumnString>) {
const ColumnString::Chars& data = col->get_chars();
const ColumnString::Offsets& offsets = col->get_offsets();
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = offsets.size();
for (size_t i = 0; i < size; ++i) {
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1];
if constexpr (HashBits == 32) {
uint32_t hash_value =
HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED);
res_data[i].add(hash_value);
} else {
uint64_t hash_value = 0;
murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
res_data[i].add(hash_value);
}
}
}
static void vector_nullable(const ColumnString::Chars& data,
const ColumnString::Offsets& offsets, const NullMap& nullmap,
MutableColumnPtr& col_res) {
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = offsets.size();
for (size_t i = 0; i < size; ++i) {
if (nullmap[i]) {
continue;
} else {
for (size_t i = 0; i < size; ++i) {
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1];
if constexpr (HashBits == 32) {
@ -255,6 +282,36 @@ struct BitmapHash {
}
}
}
template <typename ColumnType>
static void vector_nullable(const ColumnType* col, const NullMap& nullmap,
MutableColumnPtr& col_res) {
if constexpr (std::is_same_v<ColumnType, ColumnString>) {
const ColumnString::Chars& data = col->get_chars();
const ColumnString::Offsets& offsets = col->get_offsets();
auto* res_column = reinterpret_cast<ColumnBitmap*>(col_res.get());
auto& res_data = res_column->get_data();
size_t size = offsets.size();
for (size_t i = 0; i < size; ++i) {
if (nullmap[i]) {
continue;
} else {
const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t str_size = offsets[i] - offsets[i - 1];
if constexpr (HashBits == 32) {
uint32_t hash_value = HashUtil::murmur_hash3_32(raw_str, str_size,
HashUtil::MURMUR3_32_SEED);
res_data[i].add(hash_value);
} else {
uint64_t hash_value = 0;
murmur_hash3_x64_64(raw_str, str_size, 0, &hash_value);
res_data[i].add(hash_value);
}
}
}
}
}
};
class FunctionBitmapCount : public IFunction {