From e3f1efcbbfee57fc87bd2a6149244a0028f30f0f Mon Sep 17 00:00:00 2001 From: wangbo Date: Wed, 23 Feb 2022 12:48:18 +0800 Subject: [PATCH] [Vec][Storage] Support delete condition;ut (#8091) Co-authored-by: Wang Bo --- be/src/olap/comparison_predicate.cpp | 62 ++++-- .../rowset/segment_v2/segment_iterator.cpp | 18 +- be/test/olap/block_column_predicate_test.cpp | 202 ++++++++++++++++++ 3 files changed, 265 insertions(+), 17 deletions(-) diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp index ff5db3f94f..2d55456c1e 100644 --- a/be/src/olap/comparison_predicate.cpp +++ b/be/src/olap/comparison_predicate.cpp @@ -259,12 +259,31 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(LessEqualPredicate, <=) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterPredicate, >) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterEqualPredicate, >=) -// todo(wb) support it -#define COMPARISON_PRED_COLUMN_EVALUATE_OR(CLASS, OP) \ - template \ - void CLASS::evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \ - bool* flags) const {} - +#define COMPARISON_PRED_COLUMN_EVALUATE_OR(CLASS, OP) \ + template \ + void CLASS::evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const { \ + if (column.is_nullable()) { \ + auto* nullable_column = vectorized::check_and_get_column(column); \ + auto& data_array = reinterpret_cast&>(nullable_column->get_nested_column()).get_data(); \ + auto& null_bitmap = reinterpret_cast&>(*(nullable_column->get_null_map_column_ptr())).get_data();\ + for (uint16_t i = 0; i < size; i++) { \ + if (flags[i]) continue; \ + uint16_t idx = sel[i]; \ + bool ret = !null_bitmap[idx] && (data_array[idx] OP _value); \ + flags[i] |= _opposite ? !ret : ret; \ + } \ + } else { \ + auto& predicate_column = reinterpret_cast&>(column); \ + auto& data_array = predicate_column.get_data(); \ + for (uint16_t i = 0; i < size; ++i) { \ + if (flags[i]) continue; \ + uint16_t idx = sel[i]; \ + bool ret = data_array[idx] OP _value; \ + flags[i] |= _opposite ? !ret : ret; \ + } \ + } \ + } + COMPARISON_PRED_COLUMN_EVALUATE_OR(EqualPredicate, ==) COMPARISON_PRED_COLUMN_EVALUATE_OR(NotEqualPredicate, !=) COMPARISON_PRED_COLUMN_EVALUATE_OR(LessPredicate, <) @@ -304,12 +323,31 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(LessEqualPredicate, <=) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterPredicate, >) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterEqualPredicate, >=) -//todo(wb) support it -#define COMPARISON_PRED_COLUMN_EVALUATE_AND(CLASS, OP) \ - template \ - void CLASS::evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \ - bool* flags) const {} - +#define COMPARISON_PRED_COLUMN_EVALUATE_AND(CLASS, OP) \ + template \ + void CLASS::evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const { \ + if (column.is_nullable()) { \ + auto* nullable_column = vectorized::check_and_get_column(column); \ + auto& data_array = reinterpret_cast&>(nullable_column->get_nested_column()).get_data(); \ + auto& null_bitmap = reinterpret_cast&>(*(nullable_column->get_null_map_column_ptr())).get_data();\ + for (uint16_t i = 0; i < size; i++) { \ + if (!flags[i]) continue; \ + uint16_t idx = sel[i]; \ + bool ret = !null_bitmap[idx] && (data_array[idx] OP _value); \ + flags[i] &= _opposite ? !ret : ret; \ + } \ + } else { \ + auto& predicate_column = reinterpret_cast&>(column); \ + auto& data_array = predicate_column.get_data(); \ + for (uint16_t i = 0; i < size; ++i) { \ + if (!flags[i]) continue; \ + uint16_t idx = sel[i]; \ + bool ret = data_array[idx] OP _value; \ + flags[i] &= _opposite ? !ret : ret; \ + } \ + } \ + } + COMPARISON_PRED_COLUMN_EVALUATE_AND(EqualPredicate, ==) COMPARISON_PRED_COLUMN_EVALUATE_AND(NotEqualPredicate, !=) COMPARISON_PRED_COLUMN_EVALUATE_AND(LessPredicate, <) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 8b32e6d79a..2883e6314e 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -598,7 +598,10 @@ void SegmentIterator::_vec_init_lazy_materialization() { bool is_predicate_column_exists = false; bool is_non_predicate_column_exists = false; - if (!_col_predicates.empty()) { + std::set del_cond_id_set; + _opts.delete_condition_predicates->get_all_column_ids(del_cond_id_set); + + if (!_col_predicates.empty() || !del_cond_id_set.empty()) { is_predicate_column_exists = true; std::set short_cir_pred_col_id_set; // using set for distinct cid @@ -628,11 +631,16 @@ void SegmentIterator::_vec_init_lazy_materialization() { } // handle delete_condition - std::set del_cond_id_set; - _opts.delete_condition_predicates.get()->get_all_column_ids(del_cond_id_set); - short_cir_pred_col_id_set.insert(del_cond_id_set.begin(), del_cond_id_set.end()); - pred_column_ids.insert(del_cond_id_set.begin(), del_cond_id_set.end()); + if (!del_cond_id_set.empty()) { + short_cir_pred_col_id_set.insert(del_cond_id_set.begin(), del_cond_id_set.end()); + pred_column_ids.insert(del_cond_id_set.begin(), del_cond_id_set.end()); + _is_all_column_basic_type = false; + for (auto cid : del_cond_id_set) { + _is_pred_column[cid] = true; + } + } + if (_schema.column_ids().size() > pred_column_ids.size()) { for (auto cid : _schema.column_ids()) { if (!_is_pred_column[cid]) { diff --git a/be/test/olap/block_column_predicate_test.cpp b/be/test/olap/block_column_predicate_test.cpp index 87aa34fbda..0e3e17dfaa 100644 --- a/be/test/olap/block_column_predicate_test.cpp +++ b/be/test/olap/block_column_predicate_test.cpp @@ -29,6 +29,7 @@ #include "runtime/string_value.hpp" #include "runtime/vectorized_row_batch.h" #include "util/logging.h" +#include "vec/columns/predicate_column.h" namespace doris { @@ -96,6 +97,31 @@ TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN) { ASSERT_FLOAT_EQ(*(float *) col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.0); } +TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN_VEC) { + vectorized::MutableColumns block; + block.push_back(vectorized::PredicateColumnType::create()); + + int value = 5; + int rows = 10; + int col_idx = 0; + std::unique_ptr pred(new EqualPredicate(col_idx, value)); + SingleColumnBlockPredicate single_column_block_pred(pred.get()); + + uint16_t sel_idx[rows]; + uint16_t selected_size = rows; + block[col_idx]->reserve(rows); + for (int i = 0; i < rows; i++) { + int* int_ptr = &i; + block[col_idx]->insert_data((char*)int_ptr, 0); + sel_idx[i] = i; + } + + single_column_block_pred.evaluate(block, sel_idx, &selected_size); + ASSERT_EQ(selected_size, 1); + auto* pred_col = reinterpret_cast*>(block[col_idx].get()); + ASSERT_EQ(pred_col->get_data()[sel_idx[0]], value); +} + TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN) { TabletSchema tablet_schema; @@ -130,6 +156,38 @@ TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN) { ASSERT_DOUBLE_EQ(*(double *) col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.0); } +TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN_VEC) { + vectorized::MutableColumns block; + block.push_back(vectorized::PredicateColumnType::create()); + + int less_value = 5; + int great_value = 3; + int rows = 10; + int col_idx = 0; + std::unique_ptr less_pred(new LessPredicate(col_idx, less_value)); + std::unique_ptr great_pred(new GreaterPredicate(col_idx, great_value)); + auto single_less_pred = new SingleColumnBlockPredicate(less_pred.get()); + auto single_great_pred = new SingleColumnBlockPredicate(great_pred.get()); + + AndBlockColumnPredicate and_block_column_pred; + and_block_column_pred.add_column_predicate(single_less_pred); + and_block_column_pred.add_column_predicate(single_great_pred); + + uint16_t sel_idx[rows]; + uint16_t selected_size = rows; + block[col_idx]->reserve(rows); + for (int i = 0; i < rows; i++) { + int* int_ptr = &i; + block[col_idx]->insert_data((char*)int_ptr, 0); + sel_idx[i] = i; + } + + and_block_column_pred.evaluate(block, sel_idx, &selected_size); + ASSERT_EQ(selected_size, 1); + auto* pred_col = reinterpret_cast*>(block[col_idx].get()); + ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 4); +} + TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN) { TabletSchema tablet_schema; SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true, @@ -164,6 +222,38 @@ TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN) { ASSERT_DOUBLE_EQ(*(double *) col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 0.0); } +TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN_VEC) { + vectorized::MutableColumns block; + block.push_back(vectorized::PredicateColumnType::create()); + + int less_value = 5; + int great_value = 3; + int rows = 10; + int col_idx = 0; + std::unique_ptr less_pred(new LessPredicate(col_idx, less_value)); + std::unique_ptr great_pred(new GreaterPredicate(col_idx, great_value)); + auto single_less_pred = new SingleColumnBlockPredicate(less_pred.get()); + auto single_great_pred = new SingleColumnBlockPredicate(great_pred.get()); + + OrBlockColumnPredicate or_block_column_pred; + or_block_column_pred.add_column_predicate(single_less_pred); + or_block_column_pred.add_column_predicate(single_great_pred); + + uint16_t sel_idx[rows]; + uint16_t selected_size = rows; + block[col_idx]->reserve(rows); + for (int i = 0; i < rows; i++) { + int* int_ptr = &i; + block[col_idx]->insert_data((char*)int_ptr, 0); + sel_idx[i] = i; + } + + or_block_column_pred.evaluate(block, sel_idx, &selected_size); + ASSERT_EQ(selected_size, 10); + auto* pred_col = reinterpret_cast*>(block[col_idx].get()); + ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 0); +} + TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN) { TabletSchema tablet_schema; SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true, @@ -189,6 +279,7 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN) { } // Test for and or single + // (column < 5 and column > 3) or column < 3 auto and_block_column_pred = new AndBlockColumnPredicate(); and_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get())); and_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(great_pred.get())); @@ -207,6 +298,7 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN) { _row_block->clear(); select_size = _row_block->selected_size(); // Test for single or and + // column < 3 or (column < 5 and column > 3) auto and_block_column_pred1 = new AndBlockColumnPredicate(); and_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get())); and_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(great_pred.get())); @@ -223,6 +315,63 @@ TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN) { ASSERT_DOUBLE_EQ(*(double *) col_block.cell(_row_block->selection_vector()[3]).cell_ptr(), 4.0); } +TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) { + vectorized::MutableColumns block; + block.push_back(vectorized::PredicateColumnType::create()); + + int less_value = 5; + int great_value = 3; + int rows = 10; + int col_idx = 0; + std::unique_ptr less_pred(new LessPredicate(0, less_value)); + std::unique_ptr great_pred(new GreaterPredicate(0, great_value)); + std::unique_ptr less_pred1(new LessPredicate(0, great_value)); + + // Test for and or single + // (column < 5 and column > 3) or column < 3 + auto and_block_column_pred = new AndBlockColumnPredicate(); + and_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get())); + and_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(great_pred.get())); + + OrBlockColumnPredicate or_block_column_pred; + or_block_column_pred.add_column_predicate(and_block_column_pred); + or_block_column_pred.add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get())); + + uint16_t sel_idx[rows]; + uint16_t selected_size = rows; + block[col_idx]->reserve(rows); + for (int i = 0; i < rows; i++) { + int* int_ptr = &i; + block[col_idx]->insert_data((char*)int_ptr, 0); + sel_idx[i] = i; + } + + or_block_column_pred.evaluate(block, sel_idx, &selected_size); + ASSERT_EQ(selected_size, 4); + auto* pred_col = reinterpret_cast*>(block[col_idx].get()); + ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 0); + ASSERT_EQ(pred_col->get_data()[sel_idx[1]], 1); + ASSERT_EQ(pred_col->get_data()[sel_idx[2]], 2); + ASSERT_EQ(pred_col->get_data()[sel_idx[3]], 4); + + // Test for single or and + // column < 3 or (column < 5 and column > 3) + auto and_block_column_pred1 = new AndBlockColumnPredicate(); + and_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get())); + and_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(great_pred.get())); + + OrBlockColumnPredicate or_block_column_pred1; + or_block_column_pred1.add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get())); + or_block_column_pred1.add_column_predicate(and_block_column_pred1); + + or_block_column_pred1.evaluate(block, sel_idx, &selected_size); + ASSERT_EQ(selected_size, 4); + ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 0); + ASSERT_EQ(pred_col->get_data()[sel_idx[1]], 1); + ASSERT_EQ(pred_col->get_data()[sel_idx[2]], 2); + ASSERT_EQ(pred_col->get_data()[sel_idx[3]], 4); +} + TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN) { TabletSchema tablet_schema; SetTabletSchema(std::string("DOUBLE_COLUMN"), "DOUBLE", "REPLACE", 1, true, true, @@ -248,6 +397,7 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN) { } // Test for and or single + // (column < 5 or column < 3) and column > 3 auto or_block_column_pred = new OrBlockColumnPredicate(); or_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get())); or_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get())); @@ -263,6 +413,7 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN) { _row_block->clear(); select_size = _row_block->selected_size(); // Test for single or and + // column > 3 and (column < 5 or column < 3) auto or_block_column_pred1 = new OrBlockColumnPredicate(); or_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get())); or_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get())); @@ -276,6 +427,57 @@ TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN) { ASSERT_DOUBLE_EQ(*(double *) col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 4.0); } +TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN_VEC) { + vectorized::MutableColumns block; + block.push_back(vectorized::PredicateColumnType::create()); + + int less_value = 5; + int great_value = 3; + int rows = 10; + int col_idx = 0; + std::unique_ptr less_pred(new LessPredicate(0, less_value)); + std::unique_ptr great_pred(new GreaterPredicate(0, great_value)); + std::unique_ptr less_pred1(new LessPredicate(0, great_value)); + + // Test for and or single + // (column < 5 or column < 3) and column > 3 + auto or_block_column_pred = new OrBlockColumnPredicate(); + or_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get())); + or_block_column_pred->add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get())); + + AndBlockColumnPredicate and_block_column_pred; + and_block_column_pred.add_column_predicate(or_block_column_pred); + and_block_column_pred.add_column_predicate(new SingleColumnBlockPredicate(great_pred.get())); + + uint16_t sel_idx[rows]; + uint16_t selected_size = rows; + block[col_idx]->reserve(rows); + for (int i = 0; i < rows; i++) { + int* int_ptr = &i; + block[col_idx]->insert_data((char*)int_ptr, 0); + sel_idx[i] = i; + } + + and_block_column_pred.evaluate(block, sel_idx, &selected_size); + + auto* pred_col = reinterpret_cast*>(block[col_idx].get()); + ASSERT_EQ(selected_size, 1); + ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 4); + + // Test for single or and + // column > 3 and (column < 5 or column < 3) + auto or_block_column_pred1 = new OrBlockColumnPredicate(); + or_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred.get())); + or_block_column_pred1->add_column_predicate(new SingleColumnBlockPredicate(less_pred1.get())); + + AndBlockColumnPredicate and_block_column_pred1; + and_block_column_pred1.add_column_predicate(new SingleColumnBlockPredicate(great_pred.get())); + and_block_column_pred1.add_column_predicate(or_block_column_pred1); + + ASSERT_EQ(selected_size, 1); + ASSERT_EQ(pred_col->get_data()[sel_idx[0]], 4); +} + } int main(int argc, char** argv) {