[fix](vectorized) Query get wrong result when ColumnDict concurrent predicate eval (#9270)
This commit is contained in:
@ -41,7 +41,7 @@ enum class PredicateType {
|
||||
GT = 5,
|
||||
GE = 6,
|
||||
IN_LIST = 7,
|
||||
NO_IN_LIST = 8,
|
||||
NOT_IN_LIST = 8,
|
||||
IS_NULL = 9,
|
||||
NOT_IS_NULL = 10,
|
||||
BF = 11, // BloomFilter
|
||||
@ -85,8 +85,6 @@ public:
|
||||
virtual void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const {};
|
||||
uint32_t column_id() const { return _column_id; }
|
||||
|
||||
virtual void set_dict_code_if_necessary(vectorized::IColumn& column) { }
|
||||
|
||||
protected:
|
||||
uint32_t _column_id;
|
||||
bool _opposite;
|
||||
|
||||
@ -147,7 +147,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
|
||||
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
|
||||
|
||||
// todo(zeno) define interface in IColumn to simplify code
|
||||
#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP) \
|
||||
#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE) \
|
||||
template <class T> \
|
||||
void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \
|
||||
uint16_t new_size = 0; \
|
||||
@ -163,11 +163,14 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
|
||||
auto* nested_col_ptr = vectorized::check_and_get_column< \
|
||||
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \
|
||||
auto& data_array = nested_col_ptr->get_data(); \
|
||||
auto dict_code = \
|
||||
IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1) \
|
||||
: nested_col_ptr->find_code(_value); \
|
||||
for (uint16_t i = 0; i < *size; i++) { \
|
||||
uint16_t idx = sel[i]; \
|
||||
sel[new_size] = idx; \
|
||||
const auto& cell_value = data_array[idx]; \
|
||||
bool ret = !null_bitmap[idx] && (cell_value OP _dict_code); \
|
||||
bool ret = !null_bitmap[idx] && (cell_value OP dict_code); \
|
||||
new_size += _opposite ? !ret : ret; \
|
||||
} \
|
||||
} \
|
||||
@ -184,20 +187,20 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
|
||||
new_size += _opposite ? !ret : ret; \
|
||||
} \
|
||||
} \
|
||||
*size = new_size; \
|
||||
} else if (column.is_column_dictionary()) { \
|
||||
if constexpr (std::is_same_v<T, StringValue>) { \
|
||||
auto& dict_col = \
|
||||
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(column);\
|
||||
auto& data_array = dict_col.get_data(); \
|
||||
auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1) \
|
||||
: dict_col.find_code(_value); \
|
||||
for (uint16_t i = 0; i < *size; ++i) { \
|
||||
uint16_t idx = sel[i]; \
|
||||
sel[new_size] = idx; \
|
||||
const auto& cell_value = data_array[idx]; \
|
||||
bool ret = cell_value OP _dict_code; \
|
||||
bool ret = cell_value OP dict_code; \
|
||||
new_size += _opposite ? !ret : ret; \
|
||||
} \
|
||||
*size = new_size; \
|
||||
} \
|
||||
} else { \
|
||||
auto& pred_column_ref = \
|
||||
@ -210,17 +213,17 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
|
||||
auto ret = cell_value OP _value; \
|
||||
new_size += _opposite ? !ret : ret; \
|
||||
} \
|
||||
*size = new_size; \
|
||||
} \
|
||||
*size = new_size; \
|
||||
}
|
||||
|
||||
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==, false)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=, false)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <, true)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=, true)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >, true)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true)
|
||||
|
||||
#define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP) \
|
||||
template <class T> \
|
||||
@ -502,65 +505,6 @@ COMPARISON_PRED_BITMAP_EVALUATE(LessEqualPredicate, <=)
|
||||
COMPARISON_PRED_BITMAP_EVALUATE(GreaterPredicate, >)
|
||||
COMPARISON_PRED_BITMAP_EVALUATE(GreaterEqualPredicate, >=)
|
||||
|
||||
|
||||
#define COMPARISON_PRED_SET_DICT_CODE(CLASS) \
|
||||
template <class T> \
|
||||
void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) { \
|
||||
if (_dict_code_inited) { \
|
||||
return; \
|
||||
} \
|
||||
if constexpr (std::is_same_v<T, StringValue>) { \
|
||||
auto* col_ptr = column.get_ptr().get(); \
|
||||
if (column.is_nullable()) { \
|
||||
auto nullable_col = \
|
||||
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr); \
|
||||
col_ptr = nullable_col->get_nested_column_ptr().get(); \
|
||||
} \
|
||||
if (col_ptr->is_column_dictionary()) { \
|
||||
auto& dict_col = \
|
||||
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
|
||||
*col_ptr); \
|
||||
_dict_code = dict_col.find_code(_value); \
|
||||
_dict_code_inited = true; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
COMPARISON_PRED_SET_DICT_CODE(EqualPredicate)
|
||||
COMPARISON_PRED_SET_DICT_CODE(NotEqualPredicate)
|
||||
|
||||
// If 1 OP 0 returns true, it means the predicate is > or >=
|
||||
// If 1 OP 1 returns true, it means the predicate is >= or <=
|
||||
// by this way, avoid redundant code
|
||||
#define RAMGE_COMPARISON_PRED_SET_DICT_CODE(CLASS, OP) \
|
||||
template <class T> \
|
||||
void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) { \
|
||||
if (_dict_code_inited) { \
|
||||
return; \
|
||||
} \
|
||||
if constexpr (std::is_same_v<T, StringValue>) { \
|
||||
auto* col_ptr = column.get_ptr().get(); \
|
||||
if (column.is_nullable()) { \
|
||||
auto nullable_col = \
|
||||
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr); \
|
||||
col_ptr = nullable_col->get_nested_column_ptr().get(); \
|
||||
} \
|
||||
\
|
||||
if (col_ptr->is_column_dictionary()) { \
|
||||
auto& dict_col = \
|
||||
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
|
||||
*col_ptr); \
|
||||
_dict_code = dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1); \
|
||||
_dict_code_inited = true; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessPredicate, <)
|
||||
RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessEqualPredicate, <=)
|
||||
RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterPredicate, >)
|
||||
RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterEqualPredicate, >=)
|
||||
|
||||
#define COMPARISON_PRED_CONSTRUCTOR_DECLARATION(CLASS) \
|
||||
template CLASS<int8_t>::CLASS(uint32_t column_id, const int8_t& value, bool opposite); \
|
||||
template CLASS<int16_t>::CLASS(uint32_t column_id, const int16_t& value, bool opposite); \
|
||||
@ -745,14 +689,4 @@ COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(LessEqualPredicate)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterPredicate)
|
||||
COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterEqualPredicate)
|
||||
|
||||
#define COMPARISON_PRED_SET_DICT_CODE_DECLARATION(CLASS) \
|
||||
template void CLASS<StringValue>::set_dict_code_if_necessary(vectorized::IColumn& column);
|
||||
|
||||
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(EqualPredicate)
|
||||
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(NotEqualPredicate)
|
||||
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessPredicate)
|
||||
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessEqualPredicate)
|
||||
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterPredicate)
|
||||
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterEqualPredicate)
|
||||
|
||||
} //namespace doris
|
||||
|
||||
@ -46,11 +46,8 @@ class VectorizedRowBatch;
|
||||
void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \
|
||||
bool* flags) const override; \
|
||||
void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const override; \
|
||||
void set_dict_code_if_necessary(vectorized::IColumn& column) override; \
|
||||
private: \
|
||||
T _value; \
|
||||
bool _dict_code_inited = false; \
|
||||
int32_t _dict_code; \
|
||||
};
|
||||
|
||||
COMPARISON_PRED_CLASS_DEFINE(EqualPredicate, EQ)
|
||||
|
||||
@ -134,12 +134,13 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
|
||||
auto* nested_col_ptr = vectorized::check_and_get_column< \
|
||||
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \
|
||||
auto& data_array = nested_col_ptr->get_data(); \
|
||||
auto dict_codes = nested_col_ptr->find_codes(_values); \
|
||||
for (uint16_t i = 0; i < *size; i++) { \
|
||||
uint16_t idx = sel[i]; \
|
||||
sel[new_size] = idx; \
|
||||
const auto& cell_value = data_array[idx]; \
|
||||
bool ret = !null_bitmap[idx] \
|
||||
&& (_dict_codes.find(cell_value) OP _dict_codes.end()); \
|
||||
&& (dict_codes.find(cell_value) OP dict_codes.end()); \
|
||||
new_size += _opposite ? !ret : ret; \
|
||||
} \
|
||||
} \
|
||||
@ -155,18 +156,18 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
|
||||
new_size += _opposite ? !ret : ret; \
|
||||
} \
|
||||
} \
|
||||
*size = new_size; \
|
||||
} else if (column.is_column_dictionary()) { \
|
||||
if constexpr (std::is_same_v<T, StringValue>) { \
|
||||
auto& dict_col = \
|
||||
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
|
||||
column); \
|
||||
auto& data_array = dict_col.get_data(); \
|
||||
auto dict_codes = dict_col.find_codes(_values); \
|
||||
for (uint16_t i = 0; i < *size; i++) { \
|
||||
uint16_t idx = sel[i]; \
|
||||
sel[new_size] = idx; \
|
||||
const auto& cell_value = data_array[idx]; \
|
||||
auto result = (_dict_codes.find(cell_value) OP _dict_codes.end()); \
|
||||
auto result = (dict_codes.find(cell_value) OP dict_codes.end()); \
|
||||
new_size += _opposite ? !result : result; \
|
||||
} \
|
||||
} \
|
||||
@ -282,32 +283,6 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, ==)
|
||||
IN_LIST_PRED_BITMAP_EVALUATE(InListPredicate, &=)
|
||||
IN_LIST_PRED_BITMAP_EVALUATE(NotInListPredicate, -=)
|
||||
|
||||
#define IN_LIST_PRED_SET_DICT_CODE(CLASS) \
|
||||
template <class T> \
|
||||
void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) { \
|
||||
if (_dict_code_inited) { \
|
||||
return; \
|
||||
} \
|
||||
if constexpr (std::is_same_v<T, StringValue>) { \
|
||||
auto* col_ptr = column.get_ptr().get(); \
|
||||
if (column.is_nullable()) { \
|
||||
auto nullable_col = \
|
||||
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr); \
|
||||
col_ptr = nullable_col->get_nested_column_ptr().get(); \
|
||||
} \
|
||||
if (col_ptr->is_column_dictionary()) { \
|
||||
auto& dict_col = \
|
||||
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
|
||||
*col_ptr); \
|
||||
_dict_codes = dict_col.find_codes(_values); \
|
||||
_dict_code_inited = true; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
IN_LIST_PRED_SET_DICT_CODE(InListPredicate)
|
||||
IN_LIST_PRED_SET_DICT_CODE(NotInListPredicate)
|
||||
|
||||
#define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS) \
|
||||
template CLASS<int8_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int8_t>&& values, \
|
||||
bool opposite); \
|
||||
@ -415,8 +390,4 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate)
|
||||
IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(InListPredicate)
|
||||
IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(NotInListPredicate)
|
||||
|
||||
template void InListPredicate<StringValue>::set_dict_code_if_necessary(vectorized::IColumn& column);
|
||||
template void NotInListPredicate<StringValue>::set_dict_code_if_necessary(
|
||||
vectorized::IColumn& column);
|
||||
|
||||
} //namespace doris
|
||||
|
||||
@ -95,14 +95,11 @@ class VectorizedRowBatch;
|
||||
void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; \
|
||||
void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \
|
||||
void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \
|
||||
void set_dict_code_if_necessary(vectorized::IColumn& column) override; \
|
||||
private: \
|
||||
phmap::flat_hash_set<T> _values; \
|
||||
bool _dict_code_inited = false; \
|
||||
phmap::flat_hash_set<int32_t> _dict_codes; \
|
||||
};
|
||||
|
||||
IN_LIST_PRED_CLASS_DEFINE(InListPredicate, IN_LIST)
|
||||
IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NO_IN_LIST)
|
||||
IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NOT_IN_LIST)
|
||||
|
||||
} //namespace doris
|
||||
|
||||
@ -635,7 +635,7 @@ void SegmentIterator::_vec_init_lazy_materialization() {
|
||||
if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR ||
|
||||
type == OLAP_FIELD_TYPE_STRING || predicate->type() == PredicateType::BF ||
|
||||
predicate->type() == PredicateType::IN_LIST ||
|
||||
predicate->type() == PredicateType::NO_IN_LIST) {
|
||||
predicate->type() == PredicateType::NOT_IN_LIST) {
|
||||
short_cir_pred_col_id_set.insert(cid);
|
||||
_short_cir_eval_predicate.push_back(predicate);
|
||||
_is_all_column_basic_type = false;
|
||||
@ -873,7 +873,6 @@ void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_
|
||||
predicate->type() == PredicateType::GT || predicate->type() == PredicateType::GE) {
|
||||
col_ptr->convert_dict_codes_if_necessary();
|
||||
}
|
||||
predicate->set_dict_code_if_necessary(*short_cir_column);
|
||||
predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size_ptr);
|
||||
}
|
||||
_opts.stats->rows_vec_cond_filtered += original_size - *selected_size_ptr;
|
||||
|
||||
Reference in New Issue
Block a user