[Vec][Storage] Support delete condition;ut (#8091)

Co-authored-by: Wang Bo <wangbo36@meituan.com>
This commit is contained in:
wangbo
2022-02-23 12:48:18 +08:00
committed by GitHub
parent d17ed5e27a
commit e3f1efcbbf
3 changed files with 265 additions and 17 deletions

View File

@ -259,12 +259,31 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(LessEqualPredicate, <=)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterPredicate, >)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterEqualPredicate, >=)
// todo(wb) support it
#define COMPARISON_PRED_COLUMN_EVALUATE_OR(CLASS, OP) \
template <class type> \
void CLASS<type>::evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \
bool* flags) const {}
#define COMPARISON_PRED_COLUMN_EVALUATE_OR(CLASS, OP) \
template <class type> \
void CLASS<type>::evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const { \
if (column.is_nullable()) { \
auto* nullable_column = vectorized::check_and_get_column<vectorized::ColumnNullable>(column); \
auto& data_array = reinterpret_cast<const vectorized::PredicateColumnType<type>&>(nullable_column->get_nested_column()).get_data(); \
auto& null_bitmap = reinterpret_cast<const vectorized::ColumnVector<uint8_t>&>(*(nullable_column->get_null_map_column_ptr())).get_data();\
for (uint16_t i = 0; i < size; i++) { \
if (flags[i]) continue; \
uint16_t idx = sel[i]; \
bool ret = !null_bitmap[idx] && (data_array[idx] OP _value); \
flags[i] |= _opposite ? !ret : ret; \
} \
} else { \
auto& predicate_column = reinterpret_cast<vectorized::PredicateColumnType<type>&>(column); \
auto& data_array = predicate_column.get_data(); \
for (uint16_t i = 0; i < size; ++i) { \
if (flags[i]) continue; \
uint16_t idx = sel[i]; \
bool ret = data_array[idx] OP _value; \
flags[i] |= _opposite ? !ret : ret; \
} \
} \
}
COMPARISON_PRED_COLUMN_EVALUATE_OR(EqualPredicate, ==)
COMPARISON_PRED_COLUMN_EVALUATE_OR(NotEqualPredicate, !=)
COMPARISON_PRED_COLUMN_EVALUATE_OR(LessPredicate, <)
@ -304,12 +323,31 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(LessEqualPredicate, <=)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterPredicate, >)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterEqualPredicate, >=)
//todo(wb) support it
#define COMPARISON_PRED_COLUMN_EVALUATE_AND(CLASS, OP) \
template <class type> \
void CLASS<type>::evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \
bool* flags) const {}
#define COMPARISON_PRED_COLUMN_EVALUATE_AND(CLASS, OP) \
template <class type> \
void CLASS<type>::evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const { \
if (column.is_nullable()) { \
auto* nullable_column = vectorized::check_and_get_column<vectorized::ColumnNullable>(column); \
auto& data_array = reinterpret_cast<const vectorized::PredicateColumnType<type>&>(nullable_column->get_nested_column()).get_data(); \
auto& null_bitmap = reinterpret_cast<const vectorized::ColumnVector<uint8_t>&>(*(nullable_column->get_null_map_column_ptr())).get_data();\
for (uint16_t i = 0; i < size; i++) { \
if (!flags[i]) continue; \
uint16_t idx = sel[i]; \
bool ret = !null_bitmap[idx] && (data_array[idx] OP _value); \
flags[i] &= _opposite ? !ret : ret; \
} \
} else { \
auto& predicate_column = reinterpret_cast<vectorized::PredicateColumnType<type>&>(column); \
auto& data_array = predicate_column.get_data(); \
for (uint16_t i = 0; i < size; ++i) { \
if (!flags[i]) continue; \
uint16_t idx = sel[i]; \
bool ret = data_array[idx] OP _value; \
flags[i] &= _opposite ? !ret : ret; \
} \
} \
}
COMPARISON_PRED_COLUMN_EVALUATE_AND(EqualPredicate, ==)
COMPARISON_PRED_COLUMN_EVALUATE_AND(NotEqualPredicate, !=)
COMPARISON_PRED_COLUMN_EVALUATE_AND(LessPredicate, <)

View File

@ -598,7 +598,10 @@ void SegmentIterator::_vec_init_lazy_materialization() {
bool is_predicate_column_exists = false;
bool is_non_predicate_column_exists = false;
if (!_col_predicates.empty()) {
std::set<ColumnId> del_cond_id_set;
_opts.delete_condition_predicates->get_all_column_ids(del_cond_id_set);
if (!_col_predicates.empty() || !del_cond_id_set.empty()) {
is_predicate_column_exists = true;
std::set<ColumnId> short_cir_pred_col_id_set; // using set for distinct cid
@ -628,11 +631,16 @@ void SegmentIterator::_vec_init_lazy_materialization() {
}
// handle delete_condition
std::set<ColumnId> del_cond_id_set;
_opts.delete_condition_predicates.get()->get_all_column_ids(del_cond_id_set);
short_cir_pred_col_id_set.insert(del_cond_id_set.begin(), del_cond_id_set.end());
pred_column_ids.insert(del_cond_id_set.begin(), del_cond_id_set.end());
if (!del_cond_id_set.empty()) {
short_cir_pred_col_id_set.insert(del_cond_id_set.begin(), del_cond_id_set.end());
pred_column_ids.insert(del_cond_id_set.begin(), del_cond_id_set.end());
_is_all_column_basic_type = false;
for (auto cid : del_cond_id_set) {
_is_pred_column[cid] = true;
}
}
if (_schema.column_ids().size() > pred_column_ids.size()) {
for (auto cid : _schema.column_ids()) {
if (!_is_pred_column[cid]) {

View File

@ -29,6 +29,7 @@
#include "runtime/string_value.hpp"
#include "runtime/vectorized_row_batch.h"
#include "util/logging.h"
#include "vec/columns/predicate_column.h"
namespace doris {
@ -96,6 +97,31 @@ TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN) {
ASSERT_FLOAT_EQ(*(float *) col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.0);
}
TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN_VEC) {
vectorized::MutableColumns block;
block.push_back(vectorized::PredicateColumnType<int>::create());
int value = 5;
int rows = 10;
int col_idx = 0;
std::unique_ptr<ColumnPredicate> pred(new EqualPredicate<int>(col_idx, value));
SingleColumnBlockPredicate single_column_block_pred(pred.get());
uint16_t sel_idx[rows];
uint16_t selected_size = rows;
block[col_idx]->reserve(rows);
for (int i = 0; i < rows; i++) {
int* int_ptr = &i;
block[col_idx]->insert_data((char*)int_ptr, 0);
sel_idx[i] = i;
}
single_column_block_pred.evaluate(block, sel_idx, &selected_size);
ASSERT_EQ(selected_size, 1);
auto* pred_col = reinterpret_cast<vectorized::PredicateColumnType<int>*>(block[col_idx].get());
ASSERT_EQ(pred_col->get_data()[sel_idx[0]], value);
}
TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN) {
TabletSchema tablet_schema;
@ -130,6 +156,38 @@ TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN) {
ASSERT_DOUBLE_EQ(*(double *) col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.0);
}
TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN_VEC) {
vectorized::MutableColumns block;
block.push_back(vectorized::PredicateColumnType<int>::create());
int less_value = 5;
int great_value = 3;
int rows = 10;
int col_idx = 0;
std::unique_ptr<ColumnPredicate> less_pred(new LessPredicate<int>(col_idx, less_value));
std::unique_ptr<ColumnPredicate> great_pred(new GreaterPredicate<int>(col_idx, great_value));
auto single_less_pred = new SingleColumnBlockPredicate(less_pred.get());
auto single_great_pred = new SingleColumnBlockPredicate(great_pred.get());
AndBlockColumnPredicate and_block_column_pred;
and_block_column_pred.add_column_predicate(single_less_pred);
and_block_column_pred.add_column_predicate(single_great_pred);
uint16_t sel_idx[rows];
uint16_t selected_size = rows;
block[col_idx]->reserve(rows);
for (int i = 0; i < rows; i++) {
int* int_ptr = &i;
block[col_idx]->insert_data((char*)int_ptr, 0);
sel_idx[i] = i;
}
and_block_column_pred.evaluate(block, sel_idx, &selected_size);
ASSERT_EQ(selected_size, 1);
auto* pred_col = reinterpret_cast<vectorized::PredicateColumnType<int>*>(block[col_idx].get());
ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 4);
}
TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN) {
TabletSchema tablet_schema;
SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true,
@ -164,6 +222,38 @@ TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN) {
ASSERT_DOUBLE_EQ(*(double *) col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 0.0);
}
TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN_VEC) {
vectorized::MutableColumns block;
block.push_back(vectorized::PredicateColumnType<int>::create());
int less_value = 5;
int great_value = 3;
int rows = 10;
int col_idx = 0;
std::unique_ptr<ColumnPredicate> less_pred(new LessPredicate<int>(col_idx, less_value));
std::unique_ptr<ColumnPredicate> great_pred(new GreaterPredicate<int>(col_idx, great_value));
auto single_less_pred = new SingleColumnBlockPredicate(less_pred.get());
auto single_great_pred = new SingleColumnBlockPredicate(great_pred.get());
OrBlockColumnPredicate or_block_column_pred;
or_block_column_pred.add_column_predicate(single_less_pred);
or_block_column_pred.add_column_predicate(single_great_pred);
uint16_t sel_idx[rows];
uint16_t selected_size = rows;
block[col_idx]->reserve(rows);
for (int i = 0; i < rows; i++) {
int* int_ptr = &i;
block[col_idx]->insert_data((char*)int_ptr, 0);
sel_idx[i] = i;
}
or_block_column_pred.evaluate(block, sel_idx, &selected_size);
ASSERT_EQ(selected_size, 10);
auto* pred_col = reinterpret_cast<vectorized::PredicateColumnType<int>*>(block[col_idx].get());
ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 0);
}
TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN) {
TabletSchema tablet_schema;
SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true,
@ -189,6 +279,7 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN) {
}
// Test for and or single
// (column < 5 and column > 3) or column < 3
auto and_block_column_pred = new AndBlockColumnPredicate();
and_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
and_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(great_pred.get()));
@ -207,6 +298,7 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN) {
_row_block->clear();
select_size = _row_block->selected_size();
// Test for single or and
// column < 3 or (column < 5 and column > 3)
auto and_block_column_pred1 = new AndBlockColumnPredicate();
and_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
and_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(great_pred.get()));
@ -223,6 +315,63 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN) {
ASSERT_DOUBLE_EQ(*(double *) col_block.cell(_row_block->selection_vector()[3]).cell_ptr(), 4.0);
}
TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) {
vectorized::MutableColumns block;
block.push_back(vectorized::PredicateColumnType<int>::create());
int less_value = 5;
int great_value = 3;
int rows = 10;
int col_idx = 0;
std::unique_ptr<ColumnPredicate> less_pred(new LessPredicate<int>(0, less_value));
std::unique_ptr<ColumnPredicate> great_pred(new GreaterPredicate<int>(0, great_value));
std::unique_ptr<ColumnPredicate> less_pred1(new LessPredicate<int>(0, great_value));
// Test for and or single
// (column < 5 and column > 3) or column < 3
auto and_block_column_pred = new AndBlockColumnPredicate();
and_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
and_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(great_pred.get()));
OrBlockColumnPredicate or_block_column_pred;
or_block_column_pred.add_column_predicate(and_block_column_pred);
or_block_column_pred.add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get()));
uint16_t sel_idx[rows];
uint16_t selected_size = rows;
block[col_idx]->reserve(rows);
for (int i = 0; i < rows; i++) {
int* int_ptr = &i;
block[col_idx]->insert_data((char*)int_ptr, 0);
sel_idx[i] = i;
}
or_block_column_pred.evaluate(block, sel_idx, &selected_size);
ASSERT_EQ(selected_size, 4);
auto* pred_col = reinterpret_cast<vectorized::PredicateColumnType<int>*>(block[col_idx].get());
ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 0);
ASSERT_EQ(pred_col->get_data()[sel_idx[1]], 1);
ASSERT_EQ(pred_col->get_data()[sel_idx[2]], 2);
ASSERT_EQ(pred_col->get_data()[sel_idx[3]], 4);
// Test for single or and
// column < 3 or (column < 5 and column > 3)
auto and_block_column_pred1 = new AndBlockColumnPredicate();
and_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
and_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(great_pred.get()));
OrBlockColumnPredicate or_block_column_pred1;
or_block_column_pred1.add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get()));
or_block_column_pred1.add_column_predicate(and_block_column_pred1);
or_block_column_pred1.evaluate(block, sel_idx, &selected_size);
ASSERT_EQ(selected_size, 4);
ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 0);
ASSERT_EQ(pred_col->get_data()[sel_idx[1]], 1);
ASSERT_EQ(pred_col->get_data()[sel_idx[2]], 2);
ASSERT_EQ(pred_col->get_data()[sel_idx[3]], 4);
}
TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN) {
TabletSchema tablet_schema;
SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true,
@ -248,6 +397,7 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN) {
}
// Test for and or single
// (column < 5 or column < 3) and column > 3
auto or_block_column_pred = new OrBlockColumnPredicate();
or_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
or_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get()));
@ -263,6 +413,7 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN) {
_row_block->clear();
select_size = _row_block->selected_size();
// Test for single or and
// column > 3 and (column < 5 or column < 3)
auto or_block_column_pred1 = new OrBlockColumnPredicate();
or_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
or_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get()));
@ -276,6 +427,57 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN) {
ASSERT_DOUBLE_EQ(*(double *) col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.0);
}
TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN_VEC) {
vectorized::MutableColumns block;
block.push_back(vectorized::PredicateColumnType<int>::create());
int less_value = 5;
int great_value = 3;
int rows = 10;
int col_idx = 0;
std::unique_ptr<ColumnPredicate> less_pred(new LessPredicate<int>(0, less_value));
std::unique_ptr<ColumnPredicate> great_pred(new GreaterPredicate<int>(0, great_value));
std::unique_ptr<ColumnPredicate> less_pred1(new LessPredicate<int>(0, great_value));
// Test for and or single
// (column < 5 or column < 3) and column > 3
auto or_block_column_pred = new OrBlockColumnPredicate();
or_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
or_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get()));
AndBlockColumnPredicate and_block_column_pred;
and_block_column_pred.add_column_predicate(or_block_column_pred);
and_block_column_pred.add_column_predicate(new SingleColumnBlockPredicate(great_pred.get()));
uint16_t sel_idx[rows];
uint16_t selected_size = rows;
block[col_idx]->reserve(rows);
for (int i = 0; i < rows; i++) {
int* int_ptr = &i;
block[col_idx]->insert_data((char*)int_ptr, 0);
sel_idx[i] = i;
}
and_block_column_pred.evaluate(block, sel_idx, &selected_size);
auto* pred_col = reinterpret_cast<vectorized::PredicateColumnType<int>*>(block[col_idx].get());
ASSERT_EQ(selected_size, 1);
ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 4);
// Test for single or and
// column > 3 and (column < 5 or column < 3)
auto or_block_column_pred1 = new OrBlockColumnPredicate();
or_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get()));
or_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get()));
AndBlockColumnPredicate and_block_column_pred1;
and_block_column_pred1.add_column_predicate(new SingleColumnBlockPredicate(great_pred.get()));
and_block_column_pred1.add_column_predicate(or_block_column_pred1);
ASSERT_EQ(selected_size, 1);
ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 4);
}
}
int main(int argc, char** argv) {