[refactor](predicate) refactor predicates in scan node (#10701)

* [reafactor](predicate) refactor predicates in scan node

* update
This commit is contained in:
Gabriel
2022-07-11 09:21:01 +08:00
committed by GitHub
parent 4cb80c5733
commit a044b5dcc5
8 changed files with 322 additions and 257 deletions

View File

@ -642,74 +642,81 @@ Status VOlapScanNode::normalize_conjuncts() {
for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) {
switch (slots[slot_idx]->type().type) {
case TYPE_TINYINT: {
ColumnValueRange<int8_t> range(slots[slot_idx]->col_name(),
slots[slot_idx]->type().type);
ColumnValueRange<TYPE_TINYINT> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_SMALLINT: {
ColumnValueRange<int16_t> range(slots[slot_idx]->col_name(),
slots[slot_idx]->type().type);
ColumnValueRange<TYPE_SMALLINT> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_INT: {
ColumnValueRange<int32_t> range(slots[slot_idx]->col_name(),
slots[slot_idx]->type().type);
ColumnValueRange<TYPE_INT> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_BIGINT: {
ColumnValueRange<int64_t> range(slots[slot_idx]->col_name(),
slots[slot_idx]->type().type);
ColumnValueRange<TYPE_BIGINT> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_LARGEINT: {
ColumnValueRange<__int128> range(slots[slot_idx]->col_name(),
slots[slot_idx]->type().type);
ColumnValueRange<TYPE_LARGEINT> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_CHAR: {
ColumnValueRange<TYPE_CHAR> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_VARCHAR: {
ColumnValueRange<TYPE_VARCHAR> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_HLL: {
ColumnValueRange<TYPE_HLL> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_STRING: {
ColumnValueRange<StringValue> range(slots[slot_idx]->col_name(),
slots[slot_idx]->type().type);
ColumnValueRange<TYPE_STRING> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_DATE:
case TYPE_DATE: {
ColumnValueRange<TYPE_DATE> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_DATETIME: {
ColumnValueRange<DateTimeValue> range(slots[slot_idx]->col_name(),
slots[slot_idx]->type().type);
ColumnValueRange<TYPE_DATETIME> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_DATEV2: {
ColumnValueRange<doris::vectorized::DateV2Value> range(slots[slot_idx]->col_name(),
slots[slot_idx]->type().type);
ColumnValueRange<TYPE_DATEV2> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_DECIMALV2: {
ColumnValueRange<DecimalV2Value> range(slots[slot_idx]->col_name(),
slots[slot_idx]->type().type);
ColumnValueRange<TYPE_DECIMALV2> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
case TYPE_BOOLEAN: {
ColumnValueRange<bool> range(slots[slot_idx]->col_name(), slots[slot_idx]->type().type);
ColumnValueRange<TYPE_BOOLEAN> range(slots[slot_idx]->col_name());
normalize_predicate(range, slots[slot_idx]);
break;
}
@ -830,7 +837,7 @@ Status VOlapScanNode::start_scan(RuntimeState* state) {
return Status::OK();
}
template <class T>
template <PrimitiveType T>
Status VOlapScanNode::normalize_predicate(ColumnValueRange<T>& range, SlotDescriptor* slot) {
// 1. Normalize InPredicate, add to ColumnValueRange
RETURN_IF_ERROR(normalize_in_and_eq_predicate(slot, &range));
@ -946,16 +953,18 @@ std::pair<bool, void*> VOlapScanNode::should_push_down_eq_predicate(doris::SlotD
return result_pair;
}
template <typename T, typename ChangeFixedValueRangeFunc>
Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range, PrimitiveType type,
template <PrimitiveType primitive_type, typename ChangeFixedValueRangeFunc>
Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<primitive_type>& temp_range,
void* value, const ChangeFixedValueRangeFunc& func) {
switch (type) {
switch (primitive_type) {
case TYPE_DATE: {
DateTimeValue date_value = *reinterpret_cast<DateTimeValue*>(value);
// There is must return empty data in olap_scan_node,
// Because data value loss accuracy
if (!date_value.check_loss_accuracy_cast_to_date()) {
func(temp_range, reinterpret_cast<T*>(&date_value));
func(temp_range,
reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(
&date_value));
}
break;
}
@ -970,12 +979,14 @@ Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range,
case TYPE_BIGINT:
case TYPE_LARGEINT:
case TYPE_STRING: {
func(temp_range, reinterpret_cast<T*>(value));
func(temp_range,
reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(value));
break;
}
case TYPE_BOOLEAN: {
bool v = *reinterpret_cast<bool*>(value);
func(temp_range, reinterpret_cast<T*>(&v));
func(temp_range,
reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(&v));
break;
}
case TYPE_DATEV2: {
@ -983,7 +994,7 @@ Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range,
if (!date_value.check_loss_accuracy_cast_to_date()) {
doris::vectorized::DateV2Value date_v2;
date_v2.convert_dt_to_date_v2(&date_value);
if constexpr (std::is_same_v<T, doris::vectorized::DateV2Value>) {
if constexpr (primitive_type == PrimitiveType::TYPE_DATEV2) {
func(temp_range, &date_v2);
} else {
__builtin_unreachable();
@ -992,7 +1003,8 @@ Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range,
break;
}
default: {
LOG(WARNING) << "Normalize filter fail, Unsupported Primitive type. [type=" << type << "]";
LOG(WARNING) << "Normalize filter fail, Unsupported Primitive type. [type="
<< primitive_type << "]";
return Status::InternalError("Normalize filter fail, Unsupported Primitive type");
}
}
@ -1066,13 +1078,13 @@ void VOlapScanNode::remove_pushed_conjuncts(RuntimeState* state) {
// It will only handle the InPredicate and eq BinaryPredicate in conjunct_ctxs.
// It will try to push down conditions of that column as much as possible,
// But if the number of conditions exceeds the limit, none of conditions will be pushed down.
template <class T>
template <PrimitiveType T>
Status VOlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
ColumnValueRange<T>* range) {
std::vector<uint32_t> filter_conjuncts_index;
for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) {
// create empty range as temp range, temp range should do intersection on range
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(range->type());
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range();
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
if (TExprOpcode::FILTER_IN == _conjunct_ctxs[conj_idx]->root()->op()) {
@ -1090,9 +1102,8 @@ Status VOlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
continue;
}
auto value = const_cast<void*>(iter->get_value());
RETURN_IF_ERROR(
change_fixed_value_range(temp_range, slot->type().type, value,
ColumnValueRange<T>::add_fixed_value_range));
RETURN_IF_ERROR(change_fixed_value_range(
temp_range, value, ColumnValueRange<T>::add_fixed_value_range));
iter->next();
}
@ -1119,9 +1130,8 @@ Status VOlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
auto value = result_pair.second;
// where A = nullptr should return empty result set
if (value != nullptr) {
RETURN_IF_ERROR(
change_fixed_value_range(temp_range, slot->type().type, value,
ColumnValueRange<T>::add_fixed_value_range));
RETURN_IF_ERROR(change_fixed_value_range(
temp_range, value, ColumnValueRange<T>::add_fixed_value_range));
}
if (is_key_column(slot->col_name())) {
@ -1146,14 +1156,13 @@ Status VOlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
// It will only handle the NotInPredicate and not eq BinaryPredicate in conjunct_ctxs.
// It will try to push down conditions of that column as much as possible,
// But if the number of conditions exceeds the limit, none of conditions will be pushed down.
template <class T>
template <PrimitiveType T>
Status VOlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot,
ColumnValueRange<T>* range) {
// If the conjunct of slot is fixed value, will change the fixed value set of column value range
// else add value to not in range and push down predicate directly
bool is_fixed_range = range->is_fixed_value_range();
auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(range->column_name(),
range->type());
auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(range->column_name());
std::vector<uint32_t> filter_conjuncts_index;
for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) {
@ -1174,12 +1183,10 @@ Status VOlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot
auto value = const_cast<void*>(iter->get_value());
if (is_fixed_range) {
RETURN_IF_ERROR(change_fixed_value_range(
*range, slot->type().type, value,
ColumnValueRange<T>::remove_fixed_value_range));
*range, value, ColumnValueRange<T>::remove_fixed_value_range));
} else {
RETURN_IF_ERROR(
change_fixed_value_range(not_in_range, slot->type().type, value,
ColumnValueRange<T>::add_fixed_value_range));
RETURN_IF_ERROR(change_fixed_value_range(
not_in_range, value, ColumnValueRange<T>::add_fixed_value_range));
}
iter->next();
}
@ -1209,12 +1216,10 @@ Status VOlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot
if (is_fixed_range) {
RETURN_IF_ERROR(change_fixed_value_range(
*range, slot->type().type, value,
ColumnValueRange<T>::remove_fixed_value_range));
*range, value, ColumnValueRange<T>::remove_fixed_value_range));
} else {
RETURN_IF_ERROR(
change_fixed_value_range(not_in_range, slot->type().type, value,
ColumnValueRange<T>::add_fixed_value_range));
RETURN_IF_ERROR(change_fixed_value_range(
not_in_range, value, ColumnValueRange<T>::add_fixed_value_range));
}
if (is_key_column(slot->col_name())) {
@ -1237,7 +1242,7 @@ Status VOlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot
return Status::OK();
}
template <typename T>
template <PrimitiveType T>
bool VOlapScanNode::normalize_is_null_predicate(Expr* expr, SlotDescriptor* slot,
const std::string& is_null_str,
ColumnValueRange<T>* range) {
@ -1254,14 +1259,14 @@ bool VOlapScanNode::normalize_is_null_predicate(Expr* expr, SlotDescriptor* slot
return false;
}
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(range->type());
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range();
temp_range.set_contain_null(is_null_str == "null");
range->intersection(temp_range);
return true;
}
template <class T>
template <PrimitiveType T>
Status VOlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
ColumnValueRange<T>* range) {
std::vector<uint32_t> filter_conjuncts_index;
@ -1331,7 +1336,8 @@ Status VOlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
}
}
range->add_range(to_olap_filter_type(pred->op(), child_idx),
*reinterpret_cast<T*>(&date_value));
*reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(
&date_value));
break;
}
case TYPE_DATEV2: {
@ -1343,7 +1349,7 @@ Status VOlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
}
doris::vectorized::DateV2Value date_v2;
date_v2.convert_dt_to_date_v2(&date_value);
if constexpr (std::is_same_v<T, doris::vectorized::DateV2Value>) {
if constexpr (T == PrimitiveType::TYPE_DATEV2) {
range->add_range(to_olap_filter_type(pred->op(), child_idx), date_v2);
break;
} else {
@ -1362,8 +1368,9 @@ Status VOlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
case TYPE_LARGEINT:
case TYPE_BOOLEAN:
case TYPE_STRING: {
range->add_range(to_olap_filter_type(pred->op(), child_idx),
*reinterpret_cast<T*>(value));
range->add_range(
to_olap_filter_type(pred->op(), child_idx),
*reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(value));
break;
}
@ -1381,7 +1388,9 @@ Status VOlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
VLOG_CRITICAL << slot->col_name() << " op: "
<< static_cast<int>(to_olap_filter_type(pred->op(), child_idx))
<< " value: " << *reinterpret_cast<T*>(value);
<< " value: "
<< *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(
value);
}
}
}