diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index c544968d54..0e7f52abf1 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -41,7 +41,7 @@ enum class PredicateType { GT = 5, GE = 6, IN_LIST = 7, - NO_IN_LIST = 8, + NOT_IN_LIST = 8, IS_NULL = 9, NOT_IS_NULL = 10, BF = 11, // BloomFilter @@ -85,8 +85,6 @@ public: virtual void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const {}; uint32_t column_id() const { return _column_id; } - virtual void set_dict_code_if_necessary(vectorized::IColumn& column) { } - protected: uint32_t _column_id; bool _opposite; diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp index 57549c349c..45a89f92ad 100644 --- a/be/src/olap/comparison_predicate.cpp +++ b/be/src/olap/comparison_predicate.cpp @@ -147,7 +147,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >) COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) // todo(zeno) define interface in IColumn to simplify code -#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP) \ +#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE) \ template \ void CLASS::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \ uint16_t new_size = 0; \ @@ -163,11 +163,14 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) auto* nested_col_ptr = vectorized::check_and_get_column< \ vectorized::ColumnDictionary>(nested_col); \ auto& data_array = nested_col_ptr->get_data(); \ + auto dict_code = \ + IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1) \ + : nested_col_ptr->find_code(_value); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ sel[new_size] = idx; \ const auto& cell_value = data_array[idx]; \ - bool ret = !null_bitmap[idx] && (cell_value OP _dict_code); \ + bool ret = !null_bitmap[idx] && (cell_value OP dict_code); \ new_size += _opposite ? !ret : ret; \ } \ } \ @@ -184,20 +187,20 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) new_size += _opposite ? !ret : ret; \ } \ } \ - *size = new_size; \ } else if (column.is_column_dictionary()) { \ if constexpr (std::is_same_v) { \ auto& dict_col = \ reinterpret_cast&>(column);\ auto& data_array = dict_col.get_data(); \ + auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1) \ + : dict_col.find_code(_value); \ for (uint16_t i = 0; i < *size; ++i) { \ uint16_t idx = sel[i]; \ sel[new_size] = idx; \ const auto& cell_value = data_array[idx]; \ - bool ret = cell_value OP _dict_code; \ + bool ret = cell_value OP dict_code; \ new_size += _opposite ? !ret : ret; \ } \ - *size = new_size; \ } \ } else { \ auto& pred_column_ref = \ @@ -210,17 +213,17 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=) auto ret = cell_value OP _value; \ new_size += _opposite ? !ret : ret; \ } \ - *size = new_size; \ } \ + *size = new_size; \ } -COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==) -COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=) -COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <) -COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=) -COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >) -COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=) +COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==, false) +COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=, false) +COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <, true) +COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=, true) +COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >, true) +COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true) #define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP) \ template \ @@ -502,65 +505,6 @@ COMPARISON_PRED_BITMAP_EVALUATE(LessEqualPredicate, <=) COMPARISON_PRED_BITMAP_EVALUATE(GreaterPredicate, >) COMPARISON_PRED_BITMAP_EVALUATE(GreaterEqualPredicate, >=) - -#define COMPARISON_PRED_SET_DICT_CODE(CLASS) \ - template \ - void CLASS::set_dict_code_if_necessary(vectorized::IColumn& column) { \ - if (_dict_code_inited) { \ - return; \ - } \ - if constexpr (std::is_same_v) { \ - auto* col_ptr = column.get_ptr().get(); \ - if (column.is_nullable()) { \ - auto nullable_col = \ - reinterpret_cast(col_ptr); \ - col_ptr = nullable_col->get_nested_column_ptr().get(); \ - } \ - if (col_ptr->is_column_dictionary()) { \ - auto& dict_col = \ - reinterpret_cast&>( \ - *col_ptr); \ - _dict_code = dict_col.find_code(_value); \ - _dict_code_inited = true; \ - } \ - } \ - } - -COMPARISON_PRED_SET_DICT_CODE(EqualPredicate) -COMPARISON_PRED_SET_DICT_CODE(NotEqualPredicate) - -// If 1 OP 0 returns true, it means the predicate is > or >= -// If 1 OP 1 returns true, it means the predicate is >= or <= -// by this way, avoid redundant code -#define RAMGE_COMPARISON_PRED_SET_DICT_CODE(CLASS, OP) \ - template \ - void CLASS::set_dict_code_if_necessary(vectorized::IColumn& column) { \ - if (_dict_code_inited) { \ - return; \ - } \ - if constexpr (std::is_same_v) { \ - auto* col_ptr = column.get_ptr().get(); \ - if (column.is_nullable()) { \ - auto nullable_col = \ - reinterpret_cast(col_ptr); \ - col_ptr = nullable_col->get_nested_column_ptr().get(); \ - } \ - \ - if (col_ptr->is_column_dictionary()) { \ - auto& dict_col = \ - reinterpret_cast&>( \ - *col_ptr); \ - _dict_code = dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1); \ - _dict_code_inited = true; \ - } \ - } \ - } - -RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessPredicate, <) -RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessEqualPredicate, <=) -RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterPredicate, >) -RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterEqualPredicate, >=) - #define COMPARISON_PRED_CONSTRUCTOR_DECLARATION(CLASS) \ template CLASS::CLASS(uint32_t column_id, const int8_t& value, bool opposite); \ template CLASS::CLASS(uint32_t column_id, const int16_t& value, bool opposite); \ @@ -745,14 +689,4 @@ COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(LessEqualPredicate) COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterPredicate) COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterEqualPredicate) -#define COMPARISON_PRED_SET_DICT_CODE_DECLARATION(CLASS) \ -template void CLASS::set_dict_code_if_necessary(vectorized::IColumn& column); - -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(EqualPredicate) -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(NotEqualPredicate) -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessPredicate) -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessEqualPredicate) -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterPredicate) -COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterEqualPredicate) - } //namespace doris diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index 7c3247bd55..a2cb966054 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -46,11 +46,8 @@ class VectorizedRowBatch; void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \ bool* flags) const override; \ void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const override; \ - void set_dict_code_if_necessary(vectorized::IColumn& column) override; \ private: \ T _value; \ - bool _dict_code_inited = false; \ - int32_t _dict_code; \ }; COMPARISON_PRED_CLASS_DEFINE(EqualPredicate, EQ) diff --git a/be/src/olap/in_list_predicate.cpp b/be/src/olap/in_list_predicate.cpp index 0401673926..9b78a8705f 100644 --- a/be/src/olap/in_list_predicate.cpp +++ b/be/src/olap/in_list_predicate.cpp @@ -134,12 +134,13 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==) auto* nested_col_ptr = vectorized::check_and_get_column< \ vectorized::ColumnDictionary>(nested_col); \ auto& data_array = nested_col_ptr->get_data(); \ + auto dict_codes = nested_col_ptr->find_codes(_values); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ sel[new_size] = idx; \ const auto& cell_value = data_array[idx]; \ bool ret = !null_bitmap[idx] \ - && (_dict_codes.find(cell_value) OP _dict_codes.end()); \ + && (dict_codes.find(cell_value) OP dict_codes.end()); \ new_size += _opposite ? !ret : ret; \ } \ } \ @@ -155,18 +156,18 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==) new_size += _opposite ? !ret : ret; \ } \ } \ - *size = new_size; \ } else if (column.is_column_dictionary()) { \ if constexpr (std::is_same_v) { \ auto& dict_col = \ reinterpret_cast&>( \ column); \ auto& data_array = dict_col.get_data(); \ + auto dict_codes = dict_col.find_codes(_values); \ for (uint16_t i = 0; i < *size; i++) { \ uint16_t idx = sel[i]; \ sel[new_size] = idx; \ const auto& cell_value = data_array[idx]; \ - auto result = (_dict_codes.find(cell_value) OP _dict_codes.end()); \ + auto result = (dict_codes.find(cell_value) OP dict_codes.end()); \ new_size += _opposite ? !result : result; \ } \ } \ @@ -282,32 +283,6 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, ==) IN_LIST_PRED_BITMAP_EVALUATE(InListPredicate, &=) IN_LIST_PRED_BITMAP_EVALUATE(NotInListPredicate, -=) -#define IN_LIST_PRED_SET_DICT_CODE(CLASS) \ - template \ - void CLASS::set_dict_code_if_necessary(vectorized::IColumn& column) { \ - if (_dict_code_inited) { \ - return; \ - } \ - if constexpr (std::is_same_v) { \ - auto* col_ptr = column.get_ptr().get(); \ - if (column.is_nullable()) { \ - auto nullable_col = \ - reinterpret_cast(col_ptr); \ - col_ptr = nullable_col->get_nested_column_ptr().get(); \ - } \ - if (col_ptr->is_column_dictionary()) { \ - auto& dict_col = \ - reinterpret_cast&>( \ - *col_ptr); \ - _dict_codes = dict_col.find_codes(_values); \ - _dict_code_inited = true; \ - } \ - } \ - } - -IN_LIST_PRED_SET_DICT_CODE(InListPredicate) -IN_LIST_PRED_SET_DICT_CODE(NotInListPredicate) - #define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS) \ template CLASS::CLASS(uint32_t column_id, phmap::flat_hash_set&& values, \ bool opposite); \ @@ -415,8 +390,4 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate) IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(InListPredicate) IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(NotInListPredicate) -template void InListPredicate::set_dict_code_if_necessary(vectorized::IColumn& column); -template void NotInListPredicate::set_dict_code_if_necessary( - vectorized::IColumn& column); - } //namespace doris diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index b437384666..cca6b44c9e 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -95,14 +95,11 @@ class VectorizedRowBatch; void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; \ void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \ void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \ - void set_dict_code_if_necessary(vectorized::IColumn& column) override; \ private: \ phmap::flat_hash_set _values; \ - bool _dict_code_inited = false; \ - phmap::flat_hash_set _dict_codes; \ }; IN_LIST_PRED_CLASS_DEFINE(InListPredicate, IN_LIST) -IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NO_IN_LIST) +IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NOT_IN_LIST) } //namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 2e2d3a8ccd..0fd4a16ee4 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -635,7 +635,7 @@ void SegmentIterator::_vec_init_lazy_materialization() { if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_STRING || predicate->type() == PredicateType::BF || predicate->type() == PredicateType::IN_LIST || - predicate->type() == PredicateType::NO_IN_LIST) { + predicate->type() == PredicateType::NOT_IN_LIST) { short_cir_pred_col_id_set.insert(cid); _short_cir_eval_predicate.push_back(predicate); _is_all_column_basic_type = false; @@ -873,7 +873,6 @@ void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_ predicate->type() == PredicateType::GT || predicate->type() == PredicateType::GE) { col_ptr->convert_dict_codes_if_necessary(); } - predicate->set_dict_code_if_necessary(*short_cir_column); predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size_ptr); } _opts.stats->rows_vec_cond_filtered += original_size - *selected_size_ptr;