[bug](bitmap) fix bitmap value copy operator not call reset (#26451)

when a empty bitmap assign to other bitmap
the other bitmap should reset self firstly, and then set empty type.
This commit is contained in:
zhangstar333
2023-11-09 10:05:09 +08:00
committed by GitHub
parent 7df60a4980
commit 74e452f19c
8 changed files with 165 additions and 28 deletions

View File

@ -1172,10 +1172,11 @@ public:
using SetContainer = phmap::flat_hash_set<T>;
// Construct an empty bitmap.
BitmapValue() : _type(EMPTY), _is_shared(false) {}
BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); }
// Construct a bitmap with one element.
explicit BitmapValue(uint64_t value) : _sv(value), _type(SINGLE), _is_shared(false) {}
explicit BitmapValue(uint64_t value)
: _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}
// Construct a bitmap from serialized data.
explicit BitmapValue(const char* src) : _is_shared(false) {
@ -1199,7 +1200,7 @@ public:
break;
}
if (other._type != EMPTY) {
if (other._type == BITMAP) {
_is_shared = true;
// should also set other's state to shared, so that other bitmap value will
// create a new bitmap when it wants to modify it.
@ -1229,6 +1230,10 @@ public:
}
BitmapValue& operator=(const BitmapValue& other) {
if (this == &other) {
return *this;
}
reset();
_type = other._type;
switch (other._type) {
case EMPTY:
@ -1244,7 +1249,7 @@ public:
break;
}
if (other._type != EMPTY) {
if (other._type == BITMAP) {
_is_shared = true;
// should also set other's state to shared, so that other bitmap value will
// create a new bitmap when it wants to modify it.
@ -1265,6 +1270,7 @@ public:
if (this == &other) {
return *this;
}
reset();
_type = other._type;
switch (other._type) {
@ -1721,8 +1727,7 @@ public:
BitmapValue& operator&=(const BitmapValue& rhs) {
switch (rhs._type) {
case EMPTY:
_type = EMPTY;
_bitmap.reset();
reset(); // empty & any = empty
break;
case SINGLE:
switch (_type) {
@ -1741,6 +1746,7 @@ public:
_sv = rhs._sv;
}
_bitmap.reset();
_is_shared = false;
break;
case SET:
if (!_set.contains(rhs._sv)) {
@ -1797,6 +1803,7 @@ public:
}
_type = SET;
_bitmap.reset();
_is_shared = false;
_convert_to_smaller_type();
break;
case SET:
@ -1832,7 +1839,6 @@ public:
case SINGLE:
if (_sv == rhs._sv) {
_type = EMPTY;
_bitmap.reset();
} else {
add(rhs._sv);
}
@ -2162,7 +2168,7 @@ public:
// Return how many bytes are required to serialize this bitmap.
// See BitmapTypeCode for the serialized format.
size_t getSizeInBytes() {
size_t getSizeInBytes() const {
size_t res = 0;
switch (_type) {
case EMPTY:
@ -2613,12 +2619,13 @@ public:
}
}
void clear() {
void reset() {
_type = EMPTY;
_bitmap.reset();
_sv = 0;
_set.clear();
_is_shared = false;
_bitmap = nullptr;
}
// Implement an iterator for convenience
friend class BitmapValueIterator;
typedef BitmapValueIterator b_iterator;

View File

@ -143,7 +143,7 @@ struct AggregateFunctionBitmapData {
void reset() {
is_first = true;
value.clear();
value.reset(); // it's better to call reset function by self firstly.
}
BitmapValue& get() { return value; }

View File

@ -46,7 +46,7 @@ struct AggregateFunctionBitmapAggData {
void add(const T& value_) { value.add(value_); }
void reset() { value.clear(); }
void reset() { value.reset(); }
void merge(const AggregateFunctionBitmapAggData& other) { value |= other.value; }

View File

@ -114,9 +114,7 @@ void DataTypeBitMap::to_string(const IColumn& column, size_t row_num, BufferWrit
ColumnPtr ptr = result.first;
row_num = result.second;
auto& data =
const_cast<BitmapValue&>(assert_cast<const ColumnBitmap&>(*ptr).get_element(row_num));
const auto& data = assert_cast<const ColumnBitmap&>(*ptr).get_element(row_num);
std::string buffer(data.getSizeInBytes(), '0');
data.write_to(const_cast<char*>(buffer.data()));
ostr.write(buffer.c_str(), buffer.size());

View File

@ -30,6 +30,7 @@
#include "serde/data_type_bitmap_serde.h"
#include "util/bitmap_value.h"
#include "vec/columns/column_complex.h"
#include "vec/columns/column_const.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
@ -94,7 +95,12 @@ public:
bool can_be_inside_low_cardinality() const override { return false; }
std::string to_string(const IColumn& column, size_t row_num) const override {
return "BitMap()";
auto result = check_column_const_set_readability(column, row_num);
ColumnPtr ptr = result.first;
row_num = result.second;
const auto& data = assert_cast<const ColumnBitmap&>(*ptr).get_element(row_num);
return data.to_string();
}
void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override;
Status from_string(ReadBuffer& rb, IColumn* column) const override;

View File

@ -578,7 +578,7 @@ struct BitmapAndNot {
mid_data &= rvec[i];
res[i] = lvec[i];
res[i] -= mid_data;
mid_data.clear();
mid_data.reset();
}
}
static void vector_scalar(const TData& lvec, const BitmapValue& rval, TData& res) {
@ -589,7 +589,7 @@ struct BitmapAndNot {
mid_data &= rval;
res[i] = lvec[i];
res[i] -= mid_data;
mid_data.clear();
mid_data.reset();
}
}
static void scalar_vector(const BitmapValue& lval, const TData& rvec, TData& res) {
@ -600,7 +600,7 @@ struct BitmapAndNot {
mid_data &= rvec[i];
res[i] = lval;
res[i] -= mid_data;
mid_data.clear();
mid_data.reset();
}
}
};
@ -624,7 +624,7 @@ struct BitmapAndNotCount {
mid_data = lvec[i];
mid_data &= rvec[i];
res[i] = lvec[i].andnot_cardinality(mid_data);
mid_data.clear();
mid_data.reset();
}
}
static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData* res) {
@ -634,7 +634,7 @@ struct BitmapAndNotCount {
mid_data = lval;
mid_data &= rvec[i];
res[i] = lval.andnot_cardinality(mid_data);
mid_data.clear();
mid_data.reset();
}
}
static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData* res) {
@ -644,7 +644,7 @@ struct BitmapAndNotCount {
mid_data = lvec[i];
mid_data &= rval;
res[i] = lvec[i].andnot_cardinality(mid_data);
mid_data.clear();
mid_data.reset();
}
}
};

View File

@ -24,6 +24,7 @@
#include <cstdint>
#include <string>
#include "gtest/gtest.h"
#include "gtest/gtest_pred_impl.h"
#include "util/coding.h"
@ -422,7 +423,7 @@ TEST(BitmapValueTest, set) {
bitmap_value.add(4294967297);
EXPECT_EQ(bitmap_value.get_type_code(), BitmapTypeCode::SINGLE64);
bitmap_value.clear();
bitmap_value.reset();
bitmap_value.add(10);
EXPECT_EQ(bitmap_value.get_type_code(), BitmapTypeCode::SINGLE32);
@ -494,7 +495,7 @@ TEST(BitmapValueTest, add) {
bitmap_value.add_many(values.data(), values.size());
EXPECT_EQ(bitmap_value.get_type_code(), BitmapTypeCode::BITMAP32);
bitmap_value.clear();
bitmap_value.reset();
values.clear();
values.resize(31);
std::iota(values.begin(), values.end(), 0);
@ -545,6 +546,39 @@ void check_bitmap_value_operator(const BitmapValue& left, const BitmapValue& rig
EXPECT_EQ(copy.cardinality(), left_cardinality + right_cardinality - and_cardinality * 2);
}
// '='
TEST(BitmapValueTest, copy_operator) {
BitmapValue test_bitmap;
std::vector<uint64_t> values1(31);
BitmapValue bitmap;
values1.resize(128);
std::iota(values1.begin(), values1.begin() + 16, 0);
std::iota(values1.begin() + 16, values1.begin() + 32, 4294967297);
std::iota(values1.begin() + 32, values1.begin() + 64, 8589934594);
std::iota(values1.begin() + 64, values1.end(), 42949672970);
bitmap.add_many(values1.data(), values1.size());
test_bitmap = bitmap; //should be bitmap
EXPECT_EQ(test_bitmap.cardinality(), bitmap.cardinality());
EXPECT_EQ(test_bitmap.to_string(), bitmap.to_string());
BitmapValue single(1);
test_bitmap = single; //should be single
EXPECT_EQ(test_bitmap.cardinality(), 1);
EXPECT_EQ(test_bitmap.cardinality(), single.cardinality());
EXPECT_EQ(test_bitmap.to_string(), single.to_string());
BitmapValue empty;
test_bitmap = empty; // should be empty
EXPECT_TRUE(test_bitmap.empty());
BitmapValue bitmap2(bitmap);
EXPECT_EQ(bitmap2.to_string(), bitmap.to_string());
bitmap2 = bitmap;
EXPECT_EQ(bitmap2.to_string(), bitmap.to_string());
}
// '-=', '|=', '&=', '^='
TEST(BitmapValueTest, operators) {
config::enable_set_in_bitmap_value = true;
@ -658,7 +692,7 @@ TEST(BitmapValueTest, write_read) {
buffer.reset(new char[size]);
bitmap_single.write_to(buffer.get());
deserialized.clear();
deserialized.reset();
deserialized.deserialize(buffer.get());
check_bitmap_equal(deserialized, bitmap_single);
@ -667,7 +701,7 @@ TEST(BitmapValueTest, write_read) {
buffer.reset(new char[size]);
bitmap_set.write_to(buffer.get());
deserialized.clear();
deserialized.reset();
deserialized.deserialize(buffer.get());
check_bitmap_equal(deserialized, bitmap_set);
@ -676,7 +710,7 @@ TEST(BitmapValueTest, write_read) {
buffer.reset(new char[size]);
bitmap.write_to(buffer.get());
deserialized.clear();
deserialized.reset();
deserialized.deserialize(buffer.get());
check_bitmap_equal(deserialized, bitmap);

View File

@ -0,0 +1,92 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <glog/logging.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-param-test.h>
#include <gtest/gtest-test-part.h>
#include <stddef.h>
#include <memory>
#include <string>
#include <vector>
#include "gtest/gtest_pred_impl.h"
#include "util/bitmap_value.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
#include "vec/columns/column.h"
#include "vec/columns/column_complex.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
#include "vec/common/string_ref.h"
#include "vec/core/field.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_decimal.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
const int agg_test_batch_size = 10;
namespace doris::vectorized {
// declare function
void register_aggregate_function_bitmap(AggregateFunctionSimpleFactory& factory);
TEST(AggBitmapTest, bitmap_union_test) {
std::string function_name = "bitmap_union";
auto data_type = std::make_shared<DataTypeBitMap>();
// Prepare test data.
auto column_bitmap = data_type->create_column();
for (int i = 0; i < agg_test_batch_size; i++) {
BitmapValue bitmap_value(i);
assert_cast<ColumnBitmap&>(*column_bitmap).insert_value(bitmap_value);
}
// Prepare test function and parameters.
AggregateFunctionSimpleFactory factory;
register_aggregate_function_bitmap(factory);
DataTypes data_types = {data_type};
auto agg_function = factory.get(function_name, data_types);
agg_function->set_version(3);
std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
AggregateDataPtr place = memory.get();
agg_function->create(place);
// Do aggregation.
const IColumn* column[1] = {column_bitmap.get()};
for (int i = 0; i < agg_test_batch_size; i++) {
agg_function->add(place, column, i, nullptr);
}
// Check result.
ColumnBitmap ans;
agg_function->insert_result_into(place, ans);
EXPECT_EQ(ans.size(), 1);
EXPECT_EQ(ans.get_element(0).cardinality(), agg_test_batch_size);
agg_function->destroy(place);
auto dst = agg_function->create_serialize_column();
agg_function->streaming_agg_serialize_to_column(column, dst, agg_test_batch_size, nullptr);
for (size_t i = 0; i != agg_test_batch_size; ++i) {
EXPECT_EQ(std::to_string(i), assert_cast<ColumnBitmap&>(*dst).get_element(i).to_string());
}
}
} // namespace doris::vectorized