[bug](bitmap) fix bitmap value copy operator not call reset (#26451)
when a empty bitmap assign to other bitmap the other bitmap should reset self firstly, and then set empty type.
This commit is contained in:
@ -1172,10 +1172,11 @@ public:
|
||||
using SetContainer = phmap::flat_hash_set<T>;
|
||||
|
||||
// Construct an empty bitmap.
|
||||
BitmapValue() : _type(EMPTY), _is_shared(false) {}
|
||||
BitmapValue() : _sv(0), _bitmap(nullptr), _type(EMPTY), _is_shared(false) { _set.clear(); }
|
||||
|
||||
// Construct a bitmap with one element.
|
||||
explicit BitmapValue(uint64_t value) : _sv(value), _type(SINGLE), _is_shared(false) {}
|
||||
explicit BitmapValue(uint64_t value)
|
||||
: _sv(value), _bitmap(nullptr), _type(SINGLE), _is_shared(false) {}
|
||||
|
||||
// Construct a bitmap from serialized data.
|
||||
explicit BitmapValue(const char* src) : _is_shared(false) {
|
||||
@ -1199,7 +1200,7 @@ public:
|
||||
break;
|
||||
}
|
||||
|
||||
if (other._type != EMPTY) {
|
||||
if (other._type == BITMAP) {
|
||||
_is_shared = true;
|
||||
// should also set other's state to shared, so that other bitmap value will
|
||||
// create a new bitmap when it wants to modify it.
|
||||
@ -1229,6 +1230,10 @@ public:
|
||||
}
|
||||
|
||||
BitmapValue& operator=(const BitmapValue& other) {
|
||||
if (this == &other) {
|
||||
return *this;
|
||||
}
|
||||
reset();
|
||||
_type = other._type;
|
||||
switch (other._type) {
|
||||
case EMPTY:
|
||||
@ -1244,7 +1249,7 @@ public:
|
||||
break;
|
||||
}
|
||||
|
||||
if (other._type != EMPTY) {
|
||||
if (other._type == BITMAP) {
|
||||
_is_shared = true;
|
||||
// should also set other's state to shared, so that other bitmap value will
|
||||
// create a new bitmap when it wants to modify it.
|
||||
@ -1265,6 +1270,7 @@ public:
|
||||
if (this == &other) {
|
||||
return *this;
|
||||
}
|
||||
reset();
|
||||
|
||||
_type = other._type;
|
||||
switch (other._type) {
|
||||
@ -1721,8 +1727,7 @@ public:
|
||||
BitmapValue& operator&=(const BitmapValue& rhs) {
|
||||
switch (rhs._type) {
|
||||
case EMPTY:
|
||||
_type = EMPTY;
|
||||
_bitmap.reset();
|
||||
reset(); // empty & any = empty
|
||||
break;
|
||||
case SINGLE:
|
||||
switch (_type) {
|
||||
@ -1741,6 +1746,7 @@ public:
|
||||
_sv = rhs._sv;
|
||||
}
|
||||
_bitmap.reset();
|
||||
_is_shared = false;
|
||||
break;
|
||||
case SET:
|
||||
if (!_set.contains(rhs._sv)) {
|
||||
@ -1797,6 +1803,7 @@ public:
|
||||
}
|
||||
_type = SET;
|
||||
_bitmap.reset();
|
||||
_is_shared = false;
|
||||
_convert_to_smaller_type();
|
||||
break;
|
||||
case SET:
|
||||
@ -1832,7 +1839,6 @@ public:
|
||||
case SINGLE:
|
||||
if (_sv == rhs._sv) {
|
||||
_type = EMPTY;
|
||||
_bitmap.reset();
|
||||
} else {
|
||||
add(rhs._sv);
|
||||
}
|
||||
@ -2162,7 +2168,7 @@ public:
|
||||
|
||||
// Return how many bytes are required to serialize this bitmap.
|
||||
// See BitmapTypeCode for the serialized format.
|
||||
size_t getSizeInBytes() {
|
||||
size_t getSizeInBytes() const {
|
||||
size_t res = 0;
|
||||
switch (_type) {
|
||||
case EMPTY:
|
||||
@ -2613,12 +2619,13 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void clear() {
|
||||
void reset() {
|
||||
_type = EMPTY;
|
||||
_bitmap.reset();
|
||||
_sv = 0;
|
||||
_set.clear();
|
||||
_is_shared = false;
|
||||
_bitmap = nullptr;
|
||||
}
|
||||
|
||||
// Implement an iterator for convenience
|
||||
friend class BitmapValueIterator;
|
||||
typedef BitmapValueIterator b_iterator;
|
||||
|
||||
@ -143,7 +143,7 @@ struct AggregateFunctionBitmapData {
|
||||
|
||||
void reset() {
|
||||
is_first = true;
|
||||
value.clear();
|
||||
value.reset(); // it's better to call reset function by self firstly.
|
||||
}
|
||||
|
||||
BitmapValue& get() { return value; }
|
||||
|
||||
@ -46,7 +46,7 @@ struct AggregateFunctionBitmapAggData {
|
||||
|
||||
void add(const T& value_) { value.add(value_); }
|
||||
|
||||
void reset() { value.clear(); }
|
||||
void reset() { value.reset(); }
|
||||
|
||||
void merge(const AggregateFunctionBitmapAggData& other) { value |= other.value; }
|
||||
|
||||
|
||||
@ -114,9 +114,7 @@ void DataTypeBitMap::to_string(const IColumn& column, size_t row_num, BufferWrit
|
||||
ColumnPtr ptr = result.first;
|
||||
row_num = result.second;
|
||||
|
||||
auto& data =
|
||||
const_cast<BitmapValue&>(assert_cast<const ColumnBitmap&>(*ptr).get_element(row_num));
|
||||
|
||||
const auto& data = assert_cast<const ColumnBitmap&>(*ptr).get_element(row_num);
|
||||
std::string buffer(data.getSizeInBytes(), '0');
|
||||
data.write_to(const_cast<char*>(buffer.data()));
|
||||
ostr.write(buffer.c_str(), buffer.size());
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
#include "serde/data_type_bitmap_serde.h"
|
||||
#include "util/bitmap_value.h"
|
||||
#include "vec/columns/column_complex.h"
|
||||
#include "vec/columns/column_const.h"
|
||||
#include "vec/core/field.h"
|
||||
#include "vec/core/types.h"
|
||||
#include "vec/data_types/data_type.h"
|
||||
@ -94,7 +95,12 @@ public:
|
||||
bool can_be_inside_low_cardinality() const override { return false; }
|
||||
|
||||
std::string to_string(const IColumn& column, size_t row_num) const override {
|
||||
return "BitMap()";
|
||||
auto result = check_column_const_set_readability(column, row_num);
|
||||
ColumnPtr ptr = result.first;
|
||||
row_num = result.second;
|
||||
|
||||
const auto& data = assert_cast<const ColumnBitmap&>(*ptr).get_element(row_num);
|
||||
return data.to_string();
|
||||
}
|
||||
void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override;
|
||||
Status from_string(ReadBuffer& rb, IColumn* column) const override;
|
||||
|
||||
@ -578,7 +578,7 @@ struct BitmapAndNot {
|
||||
mid_data &= rvec[i];
|
||||
res[i] = lvec[i];
|
||||
res[i] -= mid_data;
|
||||
mid_data.clear();
|
||||
mid_data.reset();
|
||||
}
|
||||
}
|
||||
static void vector_scalar(const TData& lvec, const BitmapValue& rval, TData& res) {
|
||||
@ -589,7 +589,7 @@ struct BitmapAndNot {
|
||||
mid_data &= rval;
|
||||
res[i] = lvec[i];
|
||||
res[i] -= mid_data;
|
||||
mid_data.clear();
|
||||
mid_data.reset();
|
||||
}
|
||||
}
|
||||
static void scalar_vector(const BitmapValue& lval, const TData& rvec, TData& res) {
|
||||
@ -600,7 +600,7 @@ struct BitmapAndNot {
|
||||
mid_data &= rvec[i];
|
||||
res[i] = lval;
|
||||
res[i] -= mid_data;
|
||||
mid_data.clear();
|
||||
mid_data.reset();
|
||||
}
|
||||
}
|
||||
};
|
||||
@ -624,7 +624,7 @@ struct BitmapAndNotCount {
|
||||
mid_data = lvec[i];
|
||||
mid_data &= rvec[i];
|
||||
res[i] = lvec[i].andnot_cardinality(mid_data);
|
||||
mid_data.clear();
|
||||
mid_data.reset();
|
||||
}
|
||||
}
|
||||
static void scalar_vector(const BitmapValue& lval, const TData& rvec, ResTData* res) {
|
||||
@ -634,7 +634,7 @@ struct BitmapAndNotCount {
|
||||
mid_data = lval;
|
||||
mid_data &= rvec[i];
|
||||
res[i] = lval.andnot_cardinality(mid_data);
|
||||
mid_data.clear();
|
||||
mid_data.reset();
|
||||
}
|
||||
}
|
||||
static void vector_scalar(const TData& lvec, const BitmapValue& rval, ResTData* res) {
|
||||
@ -644,7 +644,7 @@ struct BitmapAndNotCount {
|
||||
mid_data = lvec[i];
|
||||
mid_data &= rval;
|
||||
res[i] = lvec[i].andnot_cardinality(mid_data);
|
||||
mid_data.clear();
|
||||
mid_data.reset();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
#include "gtest/gtest_pred_impl.h"
|
||||
#include "util/coding.h"
|
||||
|
||||
@ -422,7 +423,7 @@ TEST(BitmapValueTest, set) {
|
||||
|
||||
bitmap_value.add(4294967297);
|
||||
EXPECT_EQ(bitmap_value.get_type_code(), BitmapTypeCode::SINGLE64);
|
||||
bitmap_value.clear();
|
||||
bitmap_value.reset();
|
||||
|
||||
bitmap_value.add(10);
|
||||
EXPECT_EQ(bitmap_value.get_type_code(), BitmapTypeCode::SINGLE32);
|
||||
@ -494,7 +495,7 @@ TEST(BitmapValueTest, add) {
|
||||
bitmap_value.add_many(values.data(), values.size());
|
||||
EXPECT_EQ(bitmap_value.get_type_code(), BitmapTypeCode::BITMAP32);
|
||||
|
||||
bitmap_value.clear();
|
||||
bitmap_value.reset();
|
||||
values.clear();
|
||||
values.resize(31);
|
||||
std::iota(values.begin(), values.end(), 0);
|
||||
@ -545,6 +546,39 @@ void check_bitmap_value_operator(const BitmapValue& left, const BitmapValue& rig
|
||||
EXPECT_EQ(copy.cardinality(), left_cardinality + right_cardinality - and_cardinality * 2);
|
||||
}
|
||||
|
||||
// '='
|
||||
TEST(BitmapValueTest, copy_operator) {
|
||||
BitmapValue test_bitmap;
|
||||
|
||||
std::vector<uint64_t> values1(31);
|
||||
BitmapValue bitmap;
|
||||
values1.resize(128);
|
||||
std::iota(values1.begin(), values1.begin() + 16, 0);
|
||||
std::iota(values1.begin() + 16, values1.begin() + 32, 4294967297);
|
||||
std::iota(values1.begin() + 32, values1.begin() + 64, 8589934594);
|
||||
std::iota(values1.begin() + 64, values1.end(), 42949672970);
|
||||
bitmap.add_many(values1.data(), values1.size());
|
||||
|
||||
test_bitmap = bitmap; //should be bitmap
|
||||
EXPECT_EQ(test_bitmap.cardinality(), bitmap.cardinality());
|
||||
EXPECT_EQ(test_bitmap.to_string(), bitmap.to_string());
|
||||
|
||||
BitmapValue single(1);
|
||||
test_bitmap = single; //should be single
|
||||
EXPECT_EQ(test_bitmap.cardinality(), 1);
|
||||
EXPECT_EQ(test_bitmap.cardinality(), single.cardinality());
|
||||
EXPECT_EQ(test_bitmap.to_string(), single.to_string());
|
||||
|
||||
BitmapValue empty;
|
||||
test_bitmap = empty; // should be empty
|
||||
EXPECT_TRUE(test_bitmap.empty());
|
||||
|
||||
BitmapValue bitmap2(bitmap);
|
||||
EXPECT_EQ(bitmap2.to_string(), bitmap.to_string());
|
||||
bitmap2 = bitmap;
|
||||
EXPECT_EQ(bitmap2.to_string(), bitmap.to_string());
|
||||
}
|
||||
|
||||
// '-=', '|=', '&=', '^='
|
||||
TEST(BitmapValueTest, operators) {
|
||||
config::enable_set_in_bitmap_value = true;
|
||||
@ -658,7 +692,7 @@ TEST(BitmapValueTest, write_read) {
|
||||
buffer.reset(new char[size]);
|
||||
|
||||
bitmap_single.write_to(buffer.get());
|
||||
deserialized.clear();
|
||||
deserialized.reset();
|
||||
deserialized.deserialize(buffer.get());
|
||||
|
||||
check_bitmap_equal(deserialized, bitmap_single);
|
||||
@ -667,7 +701,7 @@ TEST(BitmapValueTest, write_read) {
|
||||
buffer.reset(new char[size]);
|
||||
|
||||
bitmap_set.write_to(buffer.get());
|
||||
deserialized.clear();
|
||||
deserialized.reset();
|
||||
deserialized.deserialize(buffer.get());
|
||||
|
||||
check_bitmap_equal(deserialized, bitmap_set);
|
||||
@ -676,7 +710,7 @@ TEST(BitmapValueTest, write_read) {
|
||||
buffer.reset(new char[size]);
|
||||
|
||||
bitmap.write_to(buffer.get());
|
||||
deserialized.clear();
|
||||
deserialized.reset();
|
||||
deserialized.deserialize(buffer.get());
|
||||
|
||||
check_bitmap_equal(deserialized, bitmap);
|
||||
|
||||
92
be/test/vec/aggregate_functions/agg_bitmap_test.cpp
Normal file
92
be/test/vec/aggregate_functions/agg_bitmap_test.cpp
Normal file
@ -0,0 +1,92 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#include <glog/logging.h>
|
||||
#include <gtest/gtest-message.h>
|
||||
#include <gtest/gtest-param-test.h>
|
||||
#include <gtest/gtest-test-part.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "gtest/gtest_pred_impl.h"
|
||||
#include "util/bitmap_value.h"
|
||||
#include "vec/aggregate_functions/aggregate_function.h"
|
||||
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
|
||||
#include "vec/columns/column.h"
|
||||
#include "vec/columns/column_complex.h"
|
||||
#include "vec/columns/column_string.h"
|
||||
#include "vec/columns/column_vector.h"
|
||||
#include "vec/columns/columns_number.h"
|
||||
#include "vec/common/string_ref.h"
|
||||
#include "vec/core/field.h"
|
||||
#include "vec/core/types.h"
|
||||
#include "vec/data_types/data_type_bitmap.h"
|
||||
#include "vec/data_types/data_type_decimal.h"
|
||||
#include "vec/data_types/data_type_number.h"
|
||||
#include "vec/data_types/data_type_string.h"
|
||||
|
||||
const int agg_test_batch_size = 10;
|
||||
|
||||
namespace doris::vectorized {
|
||||
// declare function
|
||||
void register_aggregate_function_bitmap(AggregateFunctionSimpleFactory& factory);
|
||||
|
||||
TEST(AggBitmapTest, bitmap_union_test) {
|
||||
std::string function_name = "bitmap_union";
|
||||
auto data_type = std::make_shared<DataTypeBitMap>();
|
||||
// Prepare test data.
|
||||
auto column_bitmap = data_type->create_column();
|
||||
for (int i = 0; i < agg_test_batch_size; i++) {
|
||||
BitmapValue bitmap_value(i);
|
||||
assert_cast<ColumnBitmap&>(*column_bitmap).insert_value(bitmap_value);
|
||||
}
|
||||
|
||||
// Prepare test function and parameters.
|
||||
AggregateFunctionSimpleFactory factory;
|
||||
register_aggregate_function_bitmap(factory);
|
||||
DataTypes data_types = {data_type};
|
||||
auto agg_function = factory.get(function_name, data_types);
|
||||
agg_function->set_version(3);
|
||||
std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
|
||||
AggregateDataPtr place = memory.get();
|
||||
agg_function->create(place);
|
||||
|
||||
// Do aggregation.
|
||||
const IColumn* column[1] = {column_bitmap.get()};
|
||||
for (int i = 0; i < agg_test_batch_size; i++) {
|
||||
agg_function->add(place, column, i, nullptr);
|
||||
}
|
||||
|
||||
// Check result.
|
||||
ColumnBitmap ans;
|
||||
agg_function->insert_result_into(place, ans);
|
||||
EXPECT_EQ(ans.size(), 1);
|
||||
EXPECT_EQ(ans.get_element(0).cardinality(), agg_test_batch_size);
|
||||
agg_function->destroy(place);
|
||||
|
||||
auto dst = agg_function->create_serialize_column();
|
||||
agg_function->streaming_agg_serialize_to_column(column, dst, agg_test_batch_size, nullptr);
|
||||
|
||||
for (size_t i = 0; i != agg_test_batch_size; ++i) {
|
||||
EXPECT_EQ(std::to_string(i), assert_cast<ColumnBitmap&>(*dst).get_element(i).to_string());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace doris::vectorized
|
||||
Reference in New Issue
Block a user