[refactor](predicate) refactor predicates in scan node (#10701)
* [reafactor](predicate) refactor predicates in scan node * update
This commit is contained in:
@ -642,74 +642,81 @@ Status VOlapScanNode::normalize_conjuncts() {
|
||||
for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) {
|
||||
switch (slots[slot_idx]->type().type) {
|
||||
case TYPE_TINYINT: {
|
||||
ColumnValueRange<int8_t> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_TINYINT> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_SMALLINT: {
|
||||
ColumnValueRange<int16_t> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_SMALLINT> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_INT: {
|
||||
ColumnValueRange<int32_t> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_INT> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_BIGINT: {
|
||||
ColumnValueRange<int64_t> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_BIGINT> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_LARGEINT: {
|
||||
ColumnValueRange<__int128> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_LARGEINT> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_CHAR:
|
||||
case TYPE_VARCHAR:
|
||||
case TYPE_HLL:
|
||||
case TYPE_CHAR: {
|
||||
ColumnValueRange<TYPE_CHAR> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
case TYPE_VARCHAR: {
|
||||
ColumnValueRange<TYPE_VARCHAR> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
case TYPE_HLL: {
|
||||
ColumnValueRange<TYPE_HLL> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
case TYPE_STRING: {
|
||||
ColumnValueRange<StringValue> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_STRING> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_DATE:
|
||||
case TYPE_DATE: {
|
||||
ColumnValueRange<TYPE_DATE> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
case TYPE_DATETIME: {
|
||||
ColumnValueRange<DateTimeValue> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_DATETIME> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_DATEV2: {
|
||||
ColumnValueRange<doris::vectorized::DateV2Value> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_DATEV2> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_DECIMALV2: {
|
||||
ColumnValueRange<DecimalV2Value> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_DECIMALV2> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_BOOLEAN: {
|
||||
ColumnValueRange<bool> range(slots[slot_idx]->col_name(), slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_BOOLEAN> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
@ -830,7 +837,7 @@ Status VOlapScanNode::start_scan(RuntimeState* state) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
template <PrimitiveType T>
|
||||
Status VOlapScanNode::normalize_predicate(ColumnValueRange<T>& range, SlotDescriptor* slot) {
|
||||
// 1. Normalize InPredicate, add to ColumnValueRange
|
||||
RETURN_IF_ERROR(normalize_in_and_eq_predicate(slot, &range));
|
||||
@ -946,16 +953,18 @@ std::pair<bool, void*> VOlapScanNode::should_push_down_eq_predicate(doris::SlotD
|
||||
return result_pair;
|
||||
}
|
||||
|
||||
template <typename T, typename ChangeFixedValueRangeFunc>
|
||||
Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range, PrimitiveType type,
|
||||
template <PrimitiveType primitive_type, typename ChangeFixedValueRangeFunc>
|
||||
Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<primitive_type>& temp_range,
|
||||
void* value, const ChangeFixedValueRangeFunc& func) {
|
||||
switch (type) {
|
||||
switch (primitive_type) {
|
||||
case TYPE_DATE: {
|
||||
DateTimeValue date_value = *reinterpret_cast<DateTimeValue*>(value);
|
||||
// There is must return empty data in olap_scan_node,
|
||||
// Because data value loss accuracy
|
||||
if (!date_value.check_loss_accuracy_cast_to_date()) {
|
||||
func(temp_range, reinterpret_cast<T*>(&date_value));
|
||||
func(temp_range,
|
||||
reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(
|
||||
&date_value));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -970,12 +979,14 @@ Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range,
|
||||
case TYPE_BIGINT:
|
||||
case TYPE_LARGEINT:
|
||||
case TYPE_STRING: {
|
||||
func(temp_range, reinterpret_cast<T*>(value));
|
||||
func(temp_range,
|
||||
reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(value));
|
||||
break;
|
||||
}
|
||||
case TYPE_BOOLEAN: {
|
||||
bool v = *reinterpret_cast<bool*>(value);
|
||||
func(temp_range, reinterpret_cast<T*>(&v));
|
||||
func(temp_range,
|
||||
reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(&v));
|
||||
break;
|
||||
}
|
||||
case TYPE_DATEV2: {
|
||||
@ -983,7 +994,7 @@ Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range,
|
||||
if (!date_value.check_loss_accuracy_cast_to_date()) {
|
||||
doris::vectorized::DateV2Value date_v2;
|
||||
date_v2.convert_dt_to_date_v2(&date_value);
|
||||
if constexpr (std::is_same_v<T, doris::vectorized::DateV2Value>) {
|
||||
if constexpr (primitive_type == PrimitiveType::TYPE_DATEV2) {
|
||||
func(temp_range, &date_v2);
|
||||
} else {
|
||||
__builtin_unreachable();
|
||||
@ -992,7 +1003,8 @@ Status VOlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range,
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
LOG(WARNING) << "Normalize filter fail, Unsupported Primitive type. [type=" << type << "]";
|
||||
LOG(WARNING) << "Normalize filter fail, Unsupported Primitive type. [type="
|
||||
<< primitive_type << "]";
|
||||
return Status::InternalError("Normalize filter fail, Unsupported Primitive type");
|
||||
}
|
||||
}
|
||||
@ -1066,13 +1078,13 @@ void VOlapScanNode::remove_pushed_conjuncts(RuntimeState* state) {
|
||||
// It will only handle the InPredicate and eq BinaryPredicate in conjunct_ctxs.
|
||||
// It will try to push down conditions of that column as much as possible,
|
||||
// But if the number of conditions exceeds the limit, none of conditions will be pushed down.
|
||||
template <class T>
|
||||
template <PrimitiveType T>
|
||||
Status VOlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
|
||||
ColumnValueRange<T>* range) {
|
||||
std::vector<uint32_t> filter_conjuncts_index;
|
||||
for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) {
|
||||
// create empty range as temp range, temp range should do intersection on range
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(range->type());
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range();
|
||||
|
||||
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
|
||||
if (TExprOpcode::FILTER_IN == _conjunct_ctxs[conj_idx]->root()->op()) {
|
||||
@ -1090,9 +1102,8 @@ Status VOlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
|
||||
continue;
|
||||
}
|
||||
auto value = const_cast<void*>(iter->get_value());
|
||||
RETURN_IF_ERROR(
|
||||
change_fixed_value_range(temp_range, slot->type().type, value,
|
||||
ColumnValueRange<T>::add_fixed_value_range));
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
temp_range, value, ColumnValueRange<T>::add_fixed_value_range));
|
||||
iter->next();
|
||||
}
|
||||
|
||||
@ -1119,9 +1130,8 @@ Status VOlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
|
||||
auto value = result_pair.second;
|
||||
// where A = nullptr should return empty result set
|
||||
if (value != nullptr) {
|
||||
RETURN_IF_ERROR(
|
||||
change_fixed_value_range(temp_range, slot->type().type, value,
|
||||
ColumnValueRange<T>::add_fixed_value_range));
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
temp_range, value, ColumnValueRange<T>::add_fixed_value_range));
|
||||
}
|
||||
|
||||
if (is_key_column(slot->col_name())) {
|
||||
@ -1146,14 +1156,13 @@ Status VOlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
|
||||
// It will only handle the NotInPredicate and not eq BinaryPredicate in conjunct_ctxs.
|
||||
// It will try to push down conditions of that column as much as possible,
|
||||
// But if the number of conditions exceeds the limit, none of conditions will be pushed down.
|
||||
template <class T>
|
||||
template <PrimitiveType T>
|
||||
Status VOlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot,
|
||||
ColumnValueRange<T>* range) {
|
||||
// If the conjunct of slot is fixed value, will change the fixed value set of column value range
|
||||
// else add value to not in range and push down predicate directly
|
||||
bool is_fixed_range = range->is_fixed_value_range();
|
||||
auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(range->column_name(),
|
||||
range->type());
|
||||
auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(range->column_name());
|
||||
|
||||
std::vector<uint32_t> filter_conjuncts_index;
|
||||
for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) {
|
||||
@ -1174,12 +1183,10 @@ Status VOlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot
|
||||
auto value = const_cast<void*>(iter->get_value());
|
||||
if (is_fixed_range) {
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
*range, slot->type().type, value,
|
||||
ColumnValueRange<T>::remove_fixed_value_range));
|
||||
*range, value, ColumnValueRange<T>::remove_fixed_value_range));
|
||||
} else {
|
||||
RETURN_IF_ERROR(
|
||||
change_fixed_value_range(not_in_range, slot->type().type, value,
|
||||
ColumnValueRange<T>::add_fixed_value_range));
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
not_in_range, value, ColumnValueRange<T>::add_fixed_value_range));
|
||||
}
|
||||
iter->next();
|
||||
}
|
||||
@ -1209,12 +1216,10 @@ Status VOlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot
|
||||
|
||||
if (is_fixed_range) {
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
*range, slot->type().type, value,
|
||||
ColumnValueRange<T>::remove_fixed_value_range));
|
||||
*range, value, ColumnValueRange<T>::remove_fixed_value_range));
|
||||
} else {
|
||||
RETURN_IF_ERROR(
|
||||
change_fixed_value_range(not_in_range, slot->type().type, value,
|
||||
ColumnValueRange<T>::add_fixed_value_range));
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
not_in_range, value, ColumnValueRange<T>::add_fixed_value_range));
|
||||
}
|
||||
|
||||
if (is_key_column(slot->col_name())) {
|
||||
@ -1237,7 +1242,7 @@ Status VOlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <PrimitiveType T>
|
||||
bool VOlapScanNode::normalize_is_null_predicate(Expr* expr, SlotDescriptor* slot,
|
||||
const std::string& is_null_str,
|
||||
ColumnValueRange<T>* range) {
|
||||
@ -1254,14 +1259,14 @@ bool VOlapScanNode::normalize_is_null_predicate(Expr* expr, SlotDescriptor* slot
|
||||
return false;
|
||||
}
|
||||
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(range->type());
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range();
|
||||
temp_range.set_contain_null(is_null_str == "null");
|
||||
range->intersection(temp_range);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
template <PrimitiveType T>
|
||||
Status VOlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
|
||||
ColumnValueRange<T>* range) {
|
||||
std::vector<uint32_t> filter_conjuncts_index;
|
||||
@ -1331,7 +1336,8 @@ Status VOlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
|
||||
}
|
||||
}
|
||||
range->add_range(to_olap_filter_type(pred->op(), child_idx),
|
||||
*reinterpret_cast<T*>(&date_value));
|
||||
*reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(
|
||||
&date_value));
|
||||
break;
|
||||
}
|
||||
case TYPE_DATEV2: {
|
||||
@ -1343,7 +1349,7 @@ Status VOlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
|
||||
}
|
||||
doris::vectorized::DateV2Value date_v2;
|
||||
date_v2.convert_dt_to_date_v2(&date_value);
|
||||
if constexpr (std::is_same_v<T, doris::vectorized::DateV2Value>) {
|
||||
if constexpr (T == PrimitiveType::TYPE_DATEV2) {
|
||||
range->add_range(to_olap_filter_type(pred->op(), child_idx), date_v2);
|
||||
break;
|
||||
} else {
|
||||
@ -1362,8 +1368,9 @@ Status VOlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
|
||||
case TYPE_LARGEINT:
|
||||
case TYPE_BOOLEAN:
|
||||
case TYPE_STRING: {
|
||||
range->add_range(to_olap_filter_type(pred->op(), child_idx),
|
||||
*reinterpret_cast<T*>(value));
|
||||
range->add_range(
|
||||
to_olap_filter_type(pred->op(), child_idx),
|
||||
*reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(value));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1381,7 +1388,9 @@ Status VOlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
|
||||
|
||||
VLOG_CRITICAL << slot->col_name() << " op: "
|
||||
<< static_cast<int>(to_olap_filter_type(pred->op(), child_idx))
|
||||
<< " value: " << *reinterpret_cast<T*>(value);
|
||||
<< " value: "
|
||||
<< *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(
|
||||
value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user