[refactor] add evaluate_and_vec() for ComparisonPredicateBase (#10631)
This commit is contained in:
@ -92,7 +92,10 @@ public:
|
||||
virtual void evaluate_vec(const vectorized::IColumn& column, uint16_t size, bool* flags) const {
|
||||
DCHECK(false) << "should not reach here";
|
||||
}
|
||||
|
||||
virtual void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size,
|
||||
bool* flags) const {
|
||||
DCHECK(false) << "should not reach here";
|
||||
}
|
||||
uint32_t column_id() const { return _column_id; }
|
||||
|
||||
protected:
|
||||
|
||||
@ -160,8 +160,9 @@ public:
|
||||
_evaluate_bit<false>(column, sel, size, flags);
|
||||
}
|
||||
|
||||
void evaluate_vec(const vectorized::IColumn& column, uint16_t size,
|
||||
bool* flags) const override {
|
||||
template <bool is_and>
|
||||
__attribute__((flatten)) void _evaluate_vec_internal(const vectorized::IColumn& column,
|
||||
uint16_t size, bool* flags) const {
|
||||
if (column.is_nullable()) {
|
||||
auto* nullable_column_ptr =
|
||||
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
|
||||
@ -180,7 +181,8 @@ public:
|
||||
: dict_column_ptr->find_code(_value);
|
||||
auto* data_array = dict_column_ptr->get_data().data();
|
||||
|
||||
_base_loop_vec<true>(size, flags, null_map.data(), data_array, dict_code);
|
||||
_base_loop_vec<true, is_and>(size, flags, null_map.data(), data_array,
|
||||
dict_code);
|
||||
} else {
|
||||
LOG(FATAL) << "column_dictionary must use StringValue predicate.";
|
||||
}
|
||||
@ -190,7 +192,7 @@ public:
|
||||
.get_data()
|
||||
.data();
|
||||
|
||||
_base_loop_vec<true>(size, flags, null_map.data(), data_array, _value_real);
|
||||
_base_loop_vec<true, is_and>(size, flags, null_map.data(), data_array, _value_real);
|
||||
}
|
||||
} else {
|
||||
if (column.is_column_dictionary()) {
|
||||
@ -202,7 +204,7 @@ public:
|
||||
: dict_column_ptr->find_code(_value);
|
||||
auto* data_array = dict_column_ptr->get_data().data();
|
||||
|
||||
_base_loop_vec<false>(size, flags, nullptr, data_array, dict_code);
|
||||
_base_loop_vec<false, is_and>(size, flags, nullptr, data_array, dict_code);
|
||||
} else {
|
||||
LOG(FATAL) << "column_dictionary must use StringValue predicate.";
|
||||
}
|
||||
@ -213,7 +215,7 @@ public:
|
||||
->get_data()
|
||||
.data();
|
||||
|
||||
_base_loop_vec<false>(size, flags, nullptr, data_array, _value_real);
|
||||
_base_loop_vec<false, is_and>(size, flags, nullptr, data_array, _value_real);
|
||||
}
|
||||
}
|
||||
|
||||
@ -224,6 +226,16 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
void evaluate_vec(const vectorized::IColumn& column, uint16_t size,
|
||||
bool* flags) const override {
|
||||
_evaluate_vec_internal<false>(column, size, flags);
|
||||
}
|
||||
|
||||
void evaluate_and_vec(const vectorized::IColumn& column, uint16_t size,
|
||||
bool* flags) const override {
|
||||
_evaluate_vec_internal<true>(column, size, flags);
|
||||
}
|
||||
|
||||
private:
|
||||
using TReal = std::conditional_t<std::is_same_v<T, uint24_t>, uint32_t, T>;
|
||||
|
||||
@ -313,14 +325,28 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
template <bool is_nullable, typename TArray, typename TValue>
|
||||
void _base_loop_vec(uint16_t size, bool* __restrict flags, const uint8_t* __restrict null_map,
|
||||
const TArray* __restrict data_array, const TValue& value) const {
|
||||
for (uint16_t i = 0; i < size; i++) {
|
||||
if constexpr (is_nullable) {
|
||||
flags[i] = !null_map[i] && _operator(data_array[i], value);
|
||||
} else {
|
||||
flags[i] = _operator(data_array[i], value);
|
||||
template <bool is_nullable, bool is_and, typename TArray, typename TValue>
|
||||
__attribute__((flatten)) void _base_loop_vec(uint16_t size, bool* __restrict bflags,
|
||||
const uint8_t* __restrict null_map,
|
||||
const TArray* __restrict data_array,
|
||||
const TValue& value) const {
|
||||
//uint8_t helps compiler to generate vectorized code
|
||||
uint8_t* flags = reinterpret_cast<uint8_t*>(bflags);
|
||||
if constexpr (is_and) {
|
||||
for (uint16_t i = 0; i < size; i++) {
|
||||
if constexpr (is_nullable) {
|
||||
flags[i] &= (uint8_t)(!null_map[i] && _operator(data_array[i], value));
|
||||
} else {
|
||||
flags[i] &= (uint8_t)_operator(data_array[i], value);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (uint16_t i = 0; i < size; i++) {
|
||||
if constexpr (is_nullable) {
|
||||
flags[i] = !null_map[i] && _operator(data_array[i], value);
|
||||
} else {
|
||||
flags[i] = _operator(data_array[i], value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -661,11 +661,7 @@ void SegmentIterator::_vec_init_lazy_materialization() {
|
||||
// Step1: check pred using short eval or vec eval
|
||||
if (_can_evaluated_by_vectorized(predicate)) {
|
||||
vec_pred_col_id_set.insert(predicate->column_id());
|
||||
if (_pre_eval_block_predicate == nullptr) {
|
||||
_pre_eval_block_predicate.reset(new AndBlockColumnPredicate());
|
||||
}
|
||||
_pre_eval_block_predicate->add_column_predicate(
|
||||
new SingleColumnBlockPredicate(predicate));
|
||||
_pre_eval_block_predicate.push_back(predicate);
|
||||
} else {
|
||||
short_cir_pred_col_id_set.insert(cid);
|
||||
_short_cir_eval_predicate.push_back(predicate);
|
||||
@ -879,8 +875,16 @@ uint16_t SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_
|
||||
}
|
||||
|
||||
uint16_t original_size = selected_size;
|
||||
bool ret_flags[selected_size];
|
||||
_pre_eval_block_predicate->evaluate_vec(_current_return_columns, selected_size, ret_flags);
|
||||
bool ret_flags[original_size];
|
||||
DCHECK(_pre_eval_block_predicate.size() > 0);
|
||||
auto column_id = _pre_eval_block_predicate[0]->column_id();
|
||||
auto& column = _current_return_columns[column_id];
|
||||
_pre_eval_block_predicate[0]->evaluate_vec(*column, original_size, ret_flags);
|
||||
for (int i = 1; i < _pre_eval_block_predicate.size(); i++) {
|
||||
auto column_id2 = _pre_eval_block_predicate[i]->column_id();
|
||||
auto& column2 = _current_return_columns[column_id2];
|
||||
_pre_eval_block_predicate[i]->evaluate_and_vec(*column2, original_size, ret_flags);
|
||||
}
|
||||
|
||||
uint16_t new_size = 0;
|
||||
|
||||
@ -928,15 +932,6 @@ uint16_t SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_ro
|
||||
for (auto predicate : _short_cir_eval_predicate) {
|
||||
auto column_id = predicate->column_id();
|
||||
auto& short_cir_column = _current_return_columns[column_id];
|
||||
auto* col_ptr = short_cir_column.get();
|
||||
|
||||
// Dictionary column should do something to initial.
|
||||
if (PredicateTypeTraits::is_range(predicate->type())) {
|
||||
col_ptr->convert_dict_codes_if_necessary();
|
||||
} else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) {
|
||||
col_ptr->generate_hash_values_for_runtime_filter();
|
||||
}
|
||||
|
||||
selected_size = predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size);
|
||||
}
|
||||
_opts.stats->rows_vec_cond_filtered += original_size - selected_size;
|
||||
@ -1024,6 +1019,7 @@ Status SegmentIterator::next_batch(vectorized::Block* block) {
|
||||
if (!_is_need_vec_eval && !_is_need_short_eval) {
|
||||
_output_non_pred_columns(block);
|
||||
} else {
|
||||
_convert_dict_code_for_predicate_if_necessary();
|
||||
uint16_t selected_size = nrows_read;
|
||||
uint16_t sel_rowid_idx[selected_size];
|
||||
|
||||
|
||||
@ -122,6 +122,29 @@ private:
|
||||
|
||||
bool _can_evaluated_by_vectorized(ColumnPredicate* predicate);
|
||||
|
||||
// Dictionary column should do something to initial.
|
||||
void _convert_dict_code_for_predicate_if_necessary() {
|
||||
for (auto predicate : _short_cir_eval_predicate) {
|
||||
auto& column = _current_return_columns[predicate->column_id()];
|
||||
auto* col_ptr = column.get();
|
||||
if (PredicateTypeTraits::is_range(predicate->type())) {
|
||||
col_ptr->convert_dict_codes_if_necessary();
|
||||
} else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) {
|
||||
col_ptr->generate_hash_values_for_runtime_filter();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto predicate : _pre_eval_block_predicate) {
|
||||
auto& column = _current_return_columns[predicate->column_id()];
|
||||
auto* col_ptr = column.get();
|
||||
if (PredicateTypeTraits::is_range(predicate->type())) {
|
||||
col_ptr->convert_dict_codes_if_necessary();
|
||||
} else if (PredicateTypeTraits::is_bloom_filter(predicate->type())) {
|
||||
col_ptr->generate_hash_values_for_runtime_filter();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
class BitmapRangeIterator;
|
||||
|
||||
@ -159,7 +182,7 @@ private:
|
||||
_short_cir_pred_column_ids; // keep columnId of columns for short circuit predicate evaluation
|
||||
std::vector<bool> _is_pred_column; // columns hold by segmentIter
|
||||
vectorized::MutableColumns _current_return_columns;
|
||||
std::unique_ptr<AndBlockColumnPredicate> _pre_eval_block_predicate;
|
||||
std::vector<ColumnPredicate*> _pre_eval_block_predicate;
|
||||
std::vector<ColumnPredicate*> _short_cir_eval_predicate;
|
||||
// when lazy materialization is enable, segmentIter need to read data at least twice
|
||||
// first, read predicate columns by various index
|
||||
|
||||
Reference in New Issue
Block a user