oceanbase/unittest/sql/engine/test_bit_vector.cpp

370 lines
14 KiB
C++

/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include <gtest/gtest.h>
#include "src/sql/engine/ob_bit_vector.h"
#include "src/sql/ob_eval_bound.h"
#define private public
#define WordType uint64_t
using namespace std;
namespace oceanbase
{
namespace sql
{
class ObTestBitVector : public ::testing::Test
{
public:
ObTestBitVector()
{}
~ObTestBitVector()
{}
virtual void SetUp()
{}
virtual void TearDown()
{}
private:
DISALLOW_COPY_AND_ASSIGN(ObTestBitVector);
};
void expect_range(ObBitVector *dest_bit_vector, int64_t start, int64_t middle, int64_t end)
{
for (int64_t i = 0; i < start; i++) {
EXPECT_EQ(0, dest_bit_vector->at(i));
}
for (int64_t i = start; i < middle; i++) {
EXPECT_EQ(1, dest_bit_vector->at(i));
dest_bit_vector->unset(i);
}
EXPECT_EQ(0, dest_bit_vector->at(middle));
for (int64_t i = middle + 1; i < end; i++) {
EXPECT_EQ(1, dest_bit_vector->at(i));
dest_bit_vector->unset(i);
}
for (int64_t i = end; i < end + 100; i++) {
EXPECT_EQ(0, dest_bit_vector->at(i));
}
}
void test_range(ObBitVector *dest_bit_vector, ObBitVector *src_bit_vector, int64_t start,
int64_t end)
{
for (int i = 0; i < 2000; i++) {
src_bit_vector->set(i);
}
int64_t middle = (start + end) / 2;
dest_bit_vector->set_all(start, end);
dest_bit_vector->unset(middle);
expect_range(dest_bit_vector, start, middle, end);
dest_bit_vector->set_all(start, end);
dest_bit_vector->unset_all(start, end);
for (int64_t i = 0; i < end + 100; i++) {
EXPECT_EQ(0, dest_bit_vector->at(i));
}
src_bit_vector->unset(middle);
dest_bit_vector->deep_copy(*src_bit_vector, start, end);
expect_range(dest_bit_vector, start, middle, end);
dest_bit_vector->bit_or(*src_bit_vector, start, end);
expect_range(dest_bit_vector, start, middle, end);
src_bit_vector->set(middle);
for (int64_t i = start; i < end; i++) {
dest_bit_vector->set(i);
}
EXPECT_EQ(1, dest_bit_vector->is_all_true(start, end));
if (start > 0) {
EXPECT_EQ(0, dest_bit_vector->is_all_true(start - 1, end));
}
EXPECT_EQ(0, dest_bit_vector->is_all_true(start, end + 1));
for (int64_t i = start; i < end; i++) {
dest_bit_vector->unset(i);
}
}
TEST(ObTestBitVector, bit_or_range)
{
char src_buf[1024];
char dest_buf[1024];
MEMSET(src_buf, 0, 1024);
MEMSET(dest_buf, 0, 1024);
ObBitVector *src_bit_vector = new (src_buf) ObBitVector;
ObBitVector *dest_bit_vector = new (dest_buf) ObBitVector;
test_range(dest_bit_vector, src_bit_vector, 13, 40);
test_range(dest_bit_vector, src_bit_vector, 13, 63);
test_range(dest_bit_vector, src_bit_vector, 13, 64);
test_range(dest_bit_vector, src_bit_vector, 13, 127);
test_range(dest_bit_vector, src_bit_vector, 13, 128);
test_range(dest_bit_vector, src_bit_vector, 13, 258);
test_range(dest_bit_vector, src_bit_vector, 0, 50);
test_range(dest_bit_vector, src_bit_vector, 0, 100);
test_range(dest_bit_vector, src_bit_vector, 0, 63);
test_range(dest_bit_vector, src_bit_vector, 0, 64);
test_range(dest_bit_vector, src_bit_vector, 0, 0);
test_range(dest_bit_vector, src_bit_vector, 64, 64);
test_range(dest_bit_vector, src_bit_vector, 64, 127);
}
// copy from the previos version ObBitVectorImpl, for check result
template <bool IS_FLIP, typename OP>
void copied_inner_foreach(const ObBitVectorImpl<WordType> &skip, int64_t size, OP op)
{
int ret = OB_SUCCESS;
int64_t tmp_step = 0;
typedef uint16_t StepType;
const int64_t step_size = sizeof(StepType) * CHAR_BIT;
int64_t word_cnt = ObBitVectorImpl<WordType>::word_count(size);
int64_t step = 0;
const int64_t remain = size % ObBitVectorImpl<WordType>::WORD_BITS;
for (int64_t i = 0; i < word_cnt && OB_SUCC(ret); ++i) {
WordType s_word = (IS_FLIP ? ~skip.data_[i] : skip.data_[i]);
// bool all_bits = (false ? skip.data_[i] == 0 : (~skip.data_[i]) == 0);
if (i >= word_cnt - 1 && remain > 0) {
// all_bits = ((false ? skip.data_[i] : ~skip.data_[i]) & ((1LU << remain) - 1)) == 0;
s_word = s_word & ((1LU << remain) - 1);
}
if (s_word > 0) {
WordType tmp_s_word = s_word;
tmp_step = step;
do {
uint16_t step_val = tmp_s_word & 0xFFFF;
if (0xFFFF == step_val) {
// no skip
// last batch ?
int64_t mini_cnt = step_size;
if (tmp_step + step_size > size) {
mini_cnt = size - tmp_step;
}
for (int64_t j = 0; OB_SUCC(ret) && j < mini_cnt; j++) {
int64_t k = j + tmp_step;
ret = op(k);
}
} else if (step_val > 0) {
do {
int64_t start_bit_idx = __builtin_ctz(step_val);
int64_t k = start_bit_idx + tmp_step;
ret = op(k);
step_val &= (step_val - 1);
} while (step_val > 0 && OB_SUCC(ret)); // end for, for one step size
}
tmp_step += step_size;
tmp_s_word >>= step_size;
} while (tmp_s_word > 0 && OB_SUCC(ret)); // one word-uint64_t
}
step += ObBitVectorImpl<WordType>::WORD_BITS;
} // end for
}
// 这部分代码不要删除,用于调试新接口,因为ob的单测编译要编译一大堆无效文件,而ob_bit_vector.h这个头文件又被很多地方引用,
// 导致编译速度巨慢,尽量不要直接在ob_bit_vector.h改代码调试,而是在这里先把接口改正确了,然后再放到ob_bit_vector.h里面
// 进行调试
template <bool IS_FLIP, typename OP>
void my_foreach_bound(const ObBitVectorImpl<WordType> &skip, int64_t start_idx, int64_t end_idx, OP op)
{
int ret = OB_SUCCESS;
int64_t tmp_step = 0;
typedef uint16_t StepType;
const int64_t step_size = sizeof(StepType) * CHAR_BIT;
int64_t start_cnt = start_idx / ObBitVectorImpl<WordType>::WORD_BITS; // start_idx is included
const int64_t begin_remain = start_idx % ObBitVectorImpl<WordType>::WORD_BITS;
const int64_t begin_mask = (-1LU << begin_remain);
int64_t end_cnt = ObBitVectorImpl<WordType>::word_count(end_idx); // end_idx is not included
const int64_t end_remain = end_idx % ObBitVectorImpl<WordType>::WORD_BITS;
const int64_t end_mask = (1LU << end_remain) - 1;
int64_t step = ObBitVectorImpl<WordType>::WORD_BITS * start_cnt;
for (int64_t i = start_cnt; i < end_cnt && OB_SUCC(ret); ++i) {
WordType s_word = (IS_FLIP ? ~skip.data_[i] : skip.data_[i]);
if (start_cnt == end_cnt - 1) {
// if only one word, both begin_mask and end_mask should be used
if (begin_remain > 0) {
s_word = s_word & begin_mask;
}
if (end_remain > 0) {
s_word = s_word & end_mask;
}
} else if (i == start_cnt && begin_remain > 0) {
// add begin_mask for first word, remove the bit less than start_idx
s_word = s_word & begin_mask;
} else if (i == end_cnt - 1 && end_remain > 0) {
// add end_mask for last word, remove the bit greater equal than end_idx
s_word = s_word & end_mask;
}
if (s_word > 0) {
WordType tmp_s_word = s_word;
tmp_step = step;
do {
uint16_t step_val = tmp_s_word & 0xFFFF;
if (0xFFFF == step_val) {
for (int64_t j = 0; OB_SUCC(ret) && j < step_size; j++) {
int64_t k = j + tmp_step;
ret = op(k);
}
} else if (step_val > 0) {
do {
int64_t start_bit_idx = __builtin_ctz(step_val);
int64_t k = start_bit_idx + tmp_step;
ret = op(k);
step_val &= (step_val - 1);
} while (step_val > 0 && OB_SUCC(ret)); // end for, for one step size
}
tmp_step += step_size;
tmp_s_word >>= step_size;
} while (tmp_s_word > 0 && OB_SUCC(ret)); // one word-uint64_t
}
step += ObBitVectorImpl<WordType>::ObBitVectorImpl<WordType>::WORD_BITS;
} // end for
}
void test_foreach_result_random(int64_t batch_size, int64_t start_idx, int64_t end_idx)
{
void *buf = malloc(batch_size);
ObBitVector *bit_vector = to_bit_vector(buf);
bit_vector->init(batch_size);
int64_t true_start_idx = common::ObRandom::rand(0, batch_size);
int64_t true_end_idx = common::ObRandom::rand(0, batch_size);
if (true_start_idx > true_end_idx) {
swap(true_start_idx, true_end_idx);
}
bit_vector->set_all(true_start_idx, true_end_idx);
EvalBound bound(batch_size, start_idx, end_idx, false);
// cout << "start_idx: " << start_idx << "\nend_idx: " << end_idx
// << "\ntrue_start_idx: " << true_start_idx << "\ntrue_end_idx: " << true_end_idx << endl;
// test foreach
std::vector<int> result_foreach_ori(batch_size, 0);
std::vector<int> result_foreach_batch(batch_size, 0);
std::vector<int> result_foreach_bound(batch_size, 0);
copied_inner_foreach<false>(*bit_vector, end_idx, [&](int64_t idx) __attribute__((always_inline)) {
result_foreach_ori[idx] = 1;
return OB_SUCCESS;
});
ObBitVector::foreach (*bit_vector, end_idx, [&](int64_t idx) __attribute__((always_inline)) {
result_foreach_batch[idx] = 1;
return OB_SUCCESS;
});
ObBitVector::foreach (*bit_vector, bound, [&](int64_t idx) __attribute__((always_inline)) {
result_foreach_bound[idx] = 1;
return OB_SUCCESS;
});
// test flip_foreach
std::vector<int> result_flip_foreach_ori(batch_size, 0);
std::vector<int> result_flip_foreach_batch(batch_size, 0);
std::vector<int> result_flip_foreach_bound(batch_size, 0);
copied_inner_foreach<true>(*bit_vector, end_idx, [&](int64_t idx) __attribute__((always_inline)) {
result_flip_foreach_ori[idx] = 1;
return OB_SUCCESS;
});
ObBitVector::flip_foreach(*bit_vector, end_idx, [&](int64_t idx) __attribute__((always_inline)) {
result_flip_foreach_batch[idx] = 1;
return OB_SUCCESS;
});
ObBitVector::flip_foreach(*bit_vector, bound, [&](int64_t idx) __attribute__((always_inline)) {
result_flip_foreach_bound[idx] = 1;
return OB_SUCCESS;
});
// result结果,0表示未处理,1表示处理
for (int64_t i = 0; i < batch_size; ++i) {
// 固定check新的batch接口是否和老的batch接口结果是否相同
EXPECT_EQ(result_foreach_ori[i], result_foreach_batch[i]);
EXPECT_EQ(result_flip_foreach_ori[i], result_flip_foreach_batch[i]);
// 1. 对于 i < start_idx 部分, bound接口不会处理,只有batch接口和copied接口会处理
// 2. 对于 start_idx <= i < end_idx 部分, 所有接口都会处理
// 3. 对于 i >= end_idx 部分, 所有接口都不会处理
if (i < start_idx) {
if (i < true_start_idx) {
// 此部分 bit vector 为 0,因此 foreach 结果为 0, flip foreach 结果为 1
EXPECT_EQ(0, result_foreach_batch[i]);
EXPECT_EQ(1, result_flip_foreach_batch[i]);
} else if (i >= true_start_idx && i < true_end_idx) {
// 此部分 bit vector 为 1,因此 foreach 结果为 1, flip foreach 结果为 0
EXPECT_EQ(1, result_foreach_batch[i]);
EXPECT_EQ(0, result_flip_foreach_batch[i]);
} else if (i >= true_end_idx) {
// 此部分 bit vector 为 0,因此 foreach 结果为 0, flip foreach 结果为 1
EXPECT_EQ(0, result_foreach_batch[i]);
EXPECT_EQ(1, result_flip_foreach_batch[i]);
}
// bound接口不会处理这部分数据,因此全部结果为 0
EXPECT_EQ(0, result_foreach_bound[i]);
EXPECT_EQ(0, result_flip_foreach_bound[i]);
} else if (i >= start_idx && i < end_idx) {
if (i < true_start_idx) {
// 此部分 bit vector 为 0,因此 foreach 结果为 0, flip foreach 结果为 1
EXPECT_EQ(0, result_foreach_batch[i]);
EXPECT_EQ(1, result_flip_foreach_batch[i]);
EXPECT_EQ(0, result_foreach_bound[i]);
EXPECT_EQ(1, result_flip_foreach_bound[i]);
} else if (i >= true_start_idx && i < true_end_idx) {
// 此部分 bit vector 为 1,因此 foreach 结果为 1, flip foreach 结果为 0
EXPECT_EQ(1, result_foreach_batch[i]);
EXPECT_EQ(0, result_flip_foreach_batch[i]);
EXPECT_EQ(1, result_foreach_bound[i]);
EXPECT_EQ(0, result_flip_foreach_bound[i]);
} else if (i >= true_end_idx) {
// 此部分 bit vector 为 0,因此 foreach 结果为 0, flip foreach 结果为 1
EXPECT_EQ(0, result_foreach_batch[i]);
EXPECT_EQ(1, result_flip_foreach_batch[i]);
EXPECT_EQ(0, result_foreach_bound[i]);
EXPECT_EQ(1, result_flip_foreach_bound[i]);
}
} else if (i >= end_idx) {
// 所有接口不会处理这部分数据,因此全部结果为 0
EXPECT_EQ(0, result_foreach_batch[i]);
EXPECT_EQ(0, result_flip_foreach_batch[i]);
EXPECT_EQ(0, result_foreach_bound[i]);
EXPECT_EQ(0, result_flip_foreach_bound[i]);
}
}
}
TEST(ObTestBitVector, test_foreach)
{
int64_t batch_size = common::ObRandom::rand(0, 1024);
int64_t round = 100;
for (int64_t i = 0; i < round; ++i) {
int64_t start_idx = common::ObRandom::rand(0, batch_size);
int64_t end_idx = common::ObRandom::rand(0, batch_size);
if (start_idx > end_idx) {
swap(start_idx, end_idx);
}
test_foreach_result_random(batch_size, start_idx, end_idx);
}
}
}
}
int main(int argc, char **argv)
{
::testing::InitGoogleTest(&argc, argv);
int ret = RUN_ALL_TESTS();
return ret;
}