[fix](vectorized) Query get wrong result when ColumnDict concurrent predicate eval (#9270)

This commit is contained in:
ZenoYang
2022-04-29 11:45:04 +08:00
committed by GitHub
parent 420cc2c3d8
commit ce7905e983
6 changed files with 22 additions and 126 deletions

View File

@ -41,7 +41,7 @@ enum class PredicateType {
GT = 5,
GE = 6,
IN_LIST = 7,
NO_IN_LIST = 8,
NOT_IN_LIST = 8,
IS_NULL = 9,
NOT_IS_NULL = 10,
BF = 11, // BloomFilter
@ -85,8 +85,6 @@ public:
virtual void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const {};
uint32_t column_id() const { return _column_id; }
virtual void set_dict_code_if_necessary(vectorized::IColumn& column) { }
protected:
uint32_t _column_id;
bool _opposite;

View File

@ -147,7 +147,7 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
// todo(zeno) define interface in IColumn to simplify code
#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP) \
#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE) \
template <class T> \
void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \
uint16_t new_size = 0; \
@ -163,11 +163,14 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
auto* nested_col_ptr = vectorized::check_and_get_column< \
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \
auto& data_array = nested_col_ptr->get_data(); \
auto dict_code = \
IS_RANGE ? nested_col_ptr->find_code_by_bound(_value, 0 OP 1, 1 OP 1) \
: nested_col_ptr->find_code(_value); \
for (uint16_t i = 0; i < *size; i++) { \
uint16_t idx = sel[i]; \
sel[new_size] = idx; \
const auto& cell_value = data_array[idx]; \
bool ret = !null_bitmap[idx] && (cell_value OP _dict_code); \
bool ret = !null_bitmap[idx] && (cell_value OP dict_code); \
new_size += _opposite ? !ret : ret; \
} \
} \
@ -184,20 +187,20 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
new_size += _opposite ? !ret : ret; \
} \
} \
*size = new_size; \
} else if (column.is_column_dictionary()) { \
if constexpr (std::is_same_v<T, StringValue>) { \
auto& dict_col = \
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(column);\
auto& data_array = dict_col.get_data(); \
auto dict_code = IS_RANGE ? dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 1) \
: dict_col.find_code(_value); \
for (uint16_t i = 0; i < *size; ++i) { \
uint16_t idx = sel[i]; \
sel[new_size] = idx; \
const auto& cell_value = data_array[idx]; \
bool ret = cell_value OP _dict_code; \
bool ret = cell_value OP dict_code; \
new_size += _opposite ? !ret : ret; \
} \
*size = new_size; \
} \
} else { \
auto& pred_column_ref = \
@ -210,17 +213,17 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
auto ret = cell_value OP _value; \
new_size += _opposite ? !ret : ret; \
} \
*size = new_size; \
} \
*size = new_size; \
}
COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==)
COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=)
COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <)
COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=)
COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >)
COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=)
COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==, false)
COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=, false)
COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <, true)
COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=, true)
COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >, true)
COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true)
#define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP) \
template <class T> \
@ -502,65 +505,6 @@ COMPARISON_PRED_BITMAP_EVALUATE(LessEqualPredicate, <=)
COMPARISON_PRED_BITMAP_EVALUATE(GreaterPredicate, >)
COMPARISON_PRED_BITMAP_EVALUATE(GreaterEqualPredicate, >=)
#define COMPARISON_PRED_SET_DICT_CODE(CLASS) \
template <class T> \
void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) { \
if (_dict_code_inited) { \
return; \
} \
if constexpr (std::is_same_v<T, StringValue>) { \
auto* col_ptr = column.get_ptr().get(); \
if (column.is_nullable()) { \
auto nullable_col = \
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr); \
col_ptr = nullable_col->get_nested_column_ptr().get(); \
} \
if (col_ptr->is_column_dictionary()) { \
auto& dict_col = \
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
*col_ptr); \
_dict_code = dict_col.find_code(_value); \
_dict_code_inited = true; \
} \
} \
}
COMPARISON_PRED_SET_DICT_CODE(EqualPredicate)
COMPARISON_PRED_SET_DICT_CODE(NotEqualPredicate)
// If 1 OP 0 returns true, it means the predicate is > or >=
// If 1 OP 1 returns true, it means the predicate is >= or <=
// by this way, avoid redundant code
#define RAMGE_COMPARISON_PRED_SET_DICT_CODE(CLASS, OP) \
template <class T> \
void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) { \
if (_dict_code_inited) { \
return; \
} \
if constexpr (std::is_same_v<T, StringValue>) { \
auto* col_ptr = column.get_ptr().get(); \
if (column.is_nullable()) { \
auto nullable_col = \
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr); \
col_ptr = nullable_col->get_nested_column_ptr().get(); \
} \
\
if (col_ptr->is_column_dictionary()) { \
auto& dict_col = \
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
*col_ptr); \
_dict_code = dict_col.find_code_by_bound(_value, 1 OP 0, 1 OP 1); \
_dict_code_inited = true; \
} \
} \
}
RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessPredicate, <)
RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessEqualPredicate, <=)
RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterPredicate, >)
RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterEqualPredicate, >=)
#define COMPARISON_PRED_CONSTRUCTOR_DECLARATION(CLASS) \
template CLASS<int8_t>::CLASS(uint32_t column_id, const int8_t& value, bool opposite); \
template CLASS<int16_t>::CLASS(uint32_t column_id, const int16_t& value, bool opposite); \
@ -745,14 +689,4 @@ COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(LessEqualPredicate)
COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterPredicate)
COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterEqualPredicate)
#define COMPARISON_PRED_SET_DICT_CODE_DECLARATION(CLASS) \
template void CLASS<StringValue>::set_dict_code_if_necessary(vectorized::IColumn& column);
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(EqualPredicate)
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(NotEqualPredicate)
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessPredicate)
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessEqualPredicate)
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterPredicate)
COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterEqualPredicate)
} //namespace doris

View File

@ -46,11 +46,8 @@ class VectorizedRowBatch;
void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, \
bool* flags) const override; \
void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const override; \
void set_dict_code_if_necessary(vectorized::IColumn& column) override; \
private: \
T _value; \
bool _dict_code_inited = false; \
int32_t _dict_code; \
};
COMPARISON_PRED_CLASS_DEFINE(EqualPredicate, EQ)

View File

@ -134,12 +134,13 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
auto* nested_col_ptr = vectorized::check_and_get_column< \
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col); \
auto& data_array = nested_col_ptr->get_data(); \
auto dict_codes = nested_col_ptr->find_codes(_values); \
for (uint16_t i = 0; i < *size; i++) { \
uint16_t idx = sel[i]; \
sel[new_size] = idx; \
const auto& cell_value = data_array[idx]; \
bool ret = !null_bitmap[idx] \
&& (_dict_codes.find(cell_value) OP _dict_codes.end()); \
&& (dict_codes.find(cell_value) OP dict_codes.end()); \
new_size += _opposite ? !ret : ret; \
} \
} \
@ -155,18 +156,18 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
new_size += _opposite ? !ret : ret; \
} \
} \
*size = new_size; \
} else if (column.is_column_dictionary()) { \
if constexpr (std::is_same_v<T, StringValue>) { \
auto& dict_col = \
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
column); \
auto& data_array = dict_col.get_data(); \
auto dict_codes = dict_col.find_codes(_values); \
for (uint16_t i = 0; i < *size; i++) { \
uint16_t idx = sel[i]; \
sel[new_size] = idx; \
const auto& cell_value = data_array[idx]; \
auto result = (_dict_codes.find(cell_value) OP _dict_codes.end()); \
auto result = (dict_codes.find(cell_value) OP dict_codes.end()); \
new_size += _opposite ? !result : result; \
} \
} \
@ -282,32 +283,6 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, ==)
IN_LIST_PRED_BITMAP_EVALUATE(InListPredicate, &=)
IN_LIST_PRED_BITMAP_EVALUATE(NotInListPredicate, -=)
#define IN_LIST_PRED_SET_DICT_CODE(CLASS) \
template <class T> \
void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) { \
if (_dict_code_inited) { \
return; \
} \
if constexpr (std::is_same_v<T, StringValue>) { \
auto* col_ptr = column.get_ptr().get(); \
if (column.is_nullable()) { \
auto nullable_col = \
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr); \
col_ptr = nullable_col->get_nested_column_ptr().get(); \
} \
if (col_ptr->is_column_dictionary()) { \
auto& dict_col = \
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>( \
*col_ptr); \
_dict_codes = dict_col.find_codes(_values); \
_dict_code_inited = true; \
} \
} \
}
IN_LIST_PRED_SET_DICT_CODE(InListPredicate)
IN_LIST_PRED_SET_DICT_CODE(NotInListPredicate)
#define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS) \
template CLASS<int8_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int8_t>&& values, \
bool opposite); \
@ -415,8 +390,4 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate)
IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(InListPredicate)
IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(NotInListPredicate)
template void InListPredicate<StringValue>::set_dict_code_if_necessary(vectorized::IColumn& column);
template void NotInListPredicate<StringValue>::set_dict_code_if_necessary(
vectorized::IColumn& column);
} //namespace doris

View File

@ -95,14 +95,11 @@ class VectorizedRowBatch;
void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; \
void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \
void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \
void set_dict_code_if_necessary(vectorized::IColumn& column) override; \
private: \
phmap::flat_hash_set<T> _values; \
bool _dict_code_inited = false; \
phmap::flat_hash_set<int32_t> _dict_codes; \
};
IN_LIST_PRED_CLASS_DEFINE(InListPredicate, IN_LIST)
IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NO_IN_LIST)
IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NOT_IN_LIST)
} //namespace doris

View File

@ -635,7 +635,7 @@ void SegmentIterator::_vec_init_lazy_materialization() {
if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR ||
type == OLAP_FIELD_TYPE_STRING || predicate->type() == PredicateType::BF ||
predicate->type() == PredicateType::IN_LIST ||
predicate->type() == PredicateType::NO_IN_LIST) {
predicate->type() == PredicateType::NOT_IN_LIST) {
short_cir_pred_col_id_set.insert(cid);
_short_cir_eval_predicate.push_back(predicate);
_is_all_column_basic_type = false;
@ -873,7 +873,6 @@ void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_
predicate->type() == PredicateType::GT || predicate->type() == PredicateType::GE) {
col_ptr->convert_dict_codes_if_necessary();
}
predicate->set_dict_code_if_necessary(*short_cir_column);
predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size_ptr);
}
_opts.stats->rows_vec_cond_filtered += original_size - *selected_size_ptr;