[fix](bitmap) fix wrong result of bitmap count functions for null values (#17849)

bitmap count functions result is null when there are null values, which is not right:
This commit is contained in:
TengJianPing
2023-03-19 11:49:58 +08:00
committed by GitHub
parent 14dcdd188e
commit dfa2528b5e
11 changed files with 859 additions and 582 deletions

View File

@ -489,9 +489,9 @@ struct BitmapAndNotCount {
using T0 = typename LeftDataType::FieldType;
using T1 = typename RightDataType::FieldType;
using TData = std::vector<BitmapValue>;
using ResTData = typename ColumnVector<Int64>::Container;
using ResTData = typename ColumnVector<Int64>::Container::value_type;
static Status vector_vector(const TData& lvec, const TData& rvec, ResTData& res) {
static Status vector_vector(const TData& lvec, const TData& rvec, ResTData* res) {
size_t size = lvec.size();
BitmapValue mid_data;
for (size_t i = 0; i < size; ++i) {
@ -504,6 +504,153 @@ struct BitmapAndNotCount {
}
};
void update_bitmap_op_count(int64_t* __restrict count, const NullMap& null_map) {
static constexpr int64_t flags[2] = {-1, 0};
size_t size = null_map.size();
auto* __restrict null_map_data = null_map.data();
for (size_t i = 0; i < size; ++i) {
count[i] &= flags[null_map_data[i]];
}
}
// for bitmap_and_count, bitmap_xor_count and bitmap_and_not_count,
// result is 0 for rows that if any column is null value
ColumnPtr handle_bitmap_op_count_null_value(ColumnPtr& src, const Block& block,
const ColumnNumbers& args, size_t result,
size_t input_rows_count) {
auto* nullable = assert_cast<const ColumnNullable*>(src.get());
ColumnPtr src_not_nullable = nullable->get_nested_column_ptr();
MutableColumnPtr src_not_nullable_mutable = (*std::move(src_not_nullable)).assume_mutable();
auto* __restrict count_data =
assert_cast<ColumnInt64*>(src_not_nullable_mutable.get())->get_data().data();
for (const auto& arg : args) {
const ColumnWithTypeAndName& elem = block.get_by_position(arg);
if (!elem.type->is_nullable()) {
continue;
}
bool is_const = is_column_const(*elem.column);
/// Const Nullable that are NULL.
if (is_const && assert_cast<const ColumnConst*>(elem.column.get())->only_null()) {
return block.get_by_position(result).type->create_column_const(input_rows_count, 0);
}
if (is_const) {
continue;
}
if (auto* nullable = assert_cast<const ColumnNullable*>(elem.column.get())) {
const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr();
const NullMap& src_null_map =
assert_cast<const ColumnUInt8&>(*null_map_column).get_data();
update_bitmap_op_count(count_data, src_null_map);
}
}
return src;
}
Status execute_bitmap_op_count_null_to_zero(
FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result,
size_t input_rows_count,
const std::function<Status(FunctionContext*, Block&, const ColumnNumbers&, size_t, size_t)>&
exec_impl_func) {
NullPresence null_presence = get_null_presence(block, arguments);
if (null_presence.has_null_constant) {
block.get_by_position(result).column =
block.get_by_position(result).type->create_column_const(input_rows_count, 0);
} else if (null_presence.has_nullable) {
auto [temporary_block, new_args, new_result] =
create_block_with_nested_columns(block, arguments, result);
RETURN_IF_ERROR(exec_impl_func(context, temporary_block, new_args, new_result,
temporary_block.rows()));
block.get_by_position(result).column = handle_bitmap_op_count_null_value(
temporary_block.get_by_position(new_result).column, block, arguments, result,
input_rows_count);
} else {
return exec_impl_func(context, block, arguments, result, input_rows_count);
}
return Status::OK();
}
class FunctionBitmapAndNotCount : public IFunction {
public:
using LeftDataType = DataTypeBitMap;
using RightDataType = DataTypeBitMap;
using ResultDataType = typename BitmapAndNotCount<LeftDataType, RightDataType>::ResultDataType;
static constexpr auto name = "bitmap_and_not_count";
static FunctionPtr create() { return std::make_shared<FunctionBitmapAndNotCount>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
bool return_nullable = false;
// result is nullable only when any columns is nullable for bitmap_and_not_count
for (size_t i = 0; i < arguments.size(); ++i) {
if (arguments[i]->is_nullable()) {
return_nullable = true;
break;
}
}
auto result_type = std::make_shared<ResultDataType>();
return return_nullable ? make_nullable(result_type) : result_type;
}
bool use_default_implementation_for_constants() const override { return true; }
bool use_default_implementation_for_nulls() const override {
// for bitmap_and_not_count, result is always not null, and if the bitmap op result is null,
// the count is 0
return false;
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
DCHECK_EQ(arguments.size(), 2);
return execute_bitmap_op_count_null_to_zero(
context, block, arguments, result, input_rows_count,
std::bind((Status(FunctionBitmapAndNotCount::*)(
FunctionContext*, Block&, const ColumnNumbers&, size_t, size_t)) &
FunctionBitmapAndNotCount::execute_impl_internal,
this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3,
std::placeholders::_4, std::placeholders::_5));
}
Status execute_impl_internal(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) {
const auto& left = block.get_by_position(arguments[0]);
auto lcol = left.column->convert_to_full_column_if_const();
const auto& right = block.get_by_position(arguments[1]);
auto rcol = right.column->convert_to_full_column_if_const();
using ResultType = typename ResultDataType::FieldType;
using ColVecResult = ColumnVector<ResultType>;
typename ColVecResult::MutablePtr col_res = ColVecResult::create();
auto& vec_res = col_res->get_data();
vec_res.resize(block.rows());
const ColumnBitmap* l_bitmap_col = assert_cast<const ColumnBitmap*>(lcol.get());
const ColumnBitmap* r_bitmap_col = assert_cast<const ColumnBitmap*>(rcol.get());
BitmapAndNotCount<LeftDataType, RightDataType>::vector_vector(
l_bitmap_col->get_data(), r_bitmap_col->get_data(), vec_res.data());
auto& result_info = block.get_by_position(result);
if (result_info.type->is_nullable()) {
block.replace_by_position(
result, ColumnNullable::create(std::move(col_res),
ColumnUInt8::create(input_rows_count, 0)));
} else {
block.replace_by_position(result, std::move(col_res));
}
return Status::OK();
}
};
struct NameBitmapContains {
static constexpr auto name = "bitmap_contains";
};
@ -780,8 +927,6 @@ using FunctionBitmapNot =
FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapNot, NameBitmapNot>;
using FunctionBitmapAndNot =
FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapAndNot, NameBitmapAndNot>;
using FunctionBitmapAndNotCount = FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap,
BitmapAndNotCount, NameBitmapAndNotCount>;
using FunctionBitmapContains =
FunctionBinaryToType<DataTypeBitMap, DataTypeInt64, BitmapContains, NameBitmapContains>;