[refactor](predicate) refactor predicates in scan node (#10701)
* [reafactor](predicate) refactor predicates in scan node * update
This commit is contained in:
@ -574,67 +574,81 @@ Status OlapScanNode::normalize_conjuncts() {
|
||||
for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) {
|
||||
switch (slots[slot_idx]->type().type) {
|
||||
case TYPE_TINYINT: {
|
||||
ColumnValueRange<int8_t> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_TINYINT> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_SMALLINT: {
|
||||
ColumnValueRange<int16_t> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_SMALLINT> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_INT: {
|
||||
ColumnValueRange<int32_t> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_INT> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_BIGINT: {
|
||||
ColumnValueRange<int64_t> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_BIGINT> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_LARGEINT: {
|
||||
ColumnValueRange<__int128> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_LARGEINT> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_CHAR:
|
||||
case TYPE_VARCHAR:
|
||||
case TYPE_HLL:
|
||||
case TYPE_CHAR: {
|
||||
ColumnValueRange<TYPE_CHAR> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
case TYPE_VARCHAR: {
|
||||
ColumnValueRange<TYPE_VARCHAR> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
case TYPE_HLL: {
|
||||
ColumnValueRange<TYPE_HLL> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
case TYPE_STRING: {
|
||||
ColumnValueRange<StringValue> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_STRING> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_DATE:
|
||||
case TYPE_DATE: {
|
||||
ColumnValueRange<TYPE_DATE> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
case TYPE_DATETIME: {
|
||||
ColumnValueRange<DateTimeValue> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_DATETIME> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_DATEV2: {
|
||||
ColumnValueRange<TYPE_DATEV2> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_DECIMALV2: {
|
||||
ColumnValueRange<DecimalV2Value> range(slots[slot_idx]->col_name(),
|
||||
slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_DECIMALV2> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
|
||||
case TYPE_BOOLEAN: {
|
||||
ColumnValueRange<bool> range(slots[slot_idx]->col_name(), slots[slot_idx]->type().type);
|
||||
ColumnValueRange<TYPE_BOOLEAN> range(slots[slot_idx]->col_name());
|
||||
normalize_predicate(range, slots[slot_idx]);
|
||||
break;
|
||||
}
|
||||
@ -879,7 +893,7 @@ Status OlapScanNode::start_scan_thread(RuntimeState* state) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
template <PrimitiveType T>
|
||||
Status OlapScanNode::normalize_predicate(ColumnValueRange<T>& range, SlotDescriptor* slot) {
|
||||
// 1. Normalize InPredicate, add to ColumnValueRange
|
||||
RETURN_IF_ERROR(normalize_in_and_eq_predicate(slot, &range));
|
||||
@ -994,16 +1008,18 @@ std::pair<bool, void*> OlapScanNode::should_push_down_eq_predicate(doris::SlotDe
|
||||
return result_pair;
|
||||
}
|
||||
|
||||
template <typename T, typename ChangeFixedValueRangeFunc>
|
||||
Status OlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range, PrimitiveType type,
|
||||
template <PrimitiveType primitive_type, typename ChangeFixedValueRangeFunc>
|
||||
Status OlapScanNode::change_fixed_value_range(ColumnValueRange<primitive_type>& temp_range,
|
||||
void* value, const ChangeFixedValueRangeFunc& func) {
|
||||
switch (type) {
|
||||
switch (primitive_type) {
|
||||
case TYPE_DATE: {
|
||||
DateTimeValue date_value = *reinterpret_cast<DateTimeValue*>(value);
|
||||
// There is must return empty data in olap_scan_node,
|
||||
// Because data value loss accuracy
|
||||
if (!date_value.check_loss_accuracy_cast_to_date()) {
|
||||
func(temp_range, reinterpret_cast<T*>(&date_value));
|
||||
func(temp_range,
|
||||
reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(
|
||||
&date_value));
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1018,16 +1034,19 @@ Status OlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range, P
|
||||
case TYPE_BIGINT:
|
||||
case TYPE_LARGEINT:
|
||||
case TYPE_STRING: {
|
||||
func(temp_range, reinterpret_cast<T*>(value));
|
||||
func(temp_range,
|
||||
reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(value));
|
||||
break;
|
||||
}
|
||||
case TYPE_BOOLEAN: {
|
||||
bool v = *reinterpret_cast<bool*>(value);
|
||||
func(temp_range, reinterpret_cast<T*>(&v));
|
||||
func(temp_range,
|
||||
reinterpret_cast<typename PrimitiveTypeTraits<primitive_type>::CppType*>(&v));
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
LOG(WARNING) << "Normalize filter fail, Unsupported Primitive type. [type=" << type << "]";
|
||||
LOG(WARNING) << "Normalize filter fail, Unsupported Primitive type. [type="
|
||||
<< primitive_type << "]";
|
||||
return Status::InternalError("Normalize filter fail, Unsupported Primitive type");
|
||||
}
|
||||
}
|
||||
@ -1038,13 +1057,13 @@ Status OlapScanNode::change_fixed_value_range(ColumnValueRange<T>& temp_range, P
|
||||
// It will only handle the InPredicate and eq BinaryPredicate in _conjunct_ctxs.
|
||||
// It will try to push down conditions of that column as much as possible,
|
||||
// But if the number of conditions exceeds the limit, none of conditions will be pushed down.
|
||||
template <class T>
|
||||
template <PrimitiveType T>
|
||||
Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
|
||||
ColumnValueRange<T>* range) {
|
||||
std::vector<uint32_t> filter_conjuncts_index;
|
||||
for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) {
|
||||
// create empty range as temp range, temp range should do intersection on range
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(range->type());
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range();
|
||||
|
||||
// 1. Normalize in conjuncts like 'where col in (v1, v2, v3)'
|
||||
if (TExprOpcode::FILTER_IN == _conjunct_ctxs[conj_idx]->root()->op()) {
|
||||
@ -1062,9 +1081,8 @@ Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
|
||||
continue;
|
||||
}
|
||||
auto value = const_cast<void*>(iter->get_value());
|
||||
RETURN_IF_ERROR(
|
||||
change_fixed_value_range(temp_range, slot->type().type, value,
|
||||
ColumnValueRange<T>::add_fixed_value_range));
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
temp_range, value, ColumnValueRange<T>::add_fixed_value_range));
|
||||
iter->next();
|
||||
}
|
||||
|
||||
@ -1091,9 +1109,8 @@ Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
|
||||
auto value = result_pair.second;
|
||||
// where A = nullptr should return empty result set
|
||||
if (value != nullptr) {
|
||||
RETURN_IF_ERROR(
|
||||
change_fixed_value_range(temp_range, slot->type().type, value,
|
||||
ColumnValueRange<T>::add_fixed_value_range));
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
temp_range, value, ColumnValueRange<T>::add_fixed_value_range));
|
||||
}
|
||||
|
||||
if (is_key_column(slot->col_name())) {
|
||||
@ -1118,14 +1135,13 @@ Status OlapScanNode::normalize_in_and_eq_predicate(SlotDescriptor* slot,
|
||||
// It will only handle the NotInPredicate and not eq BinaryPredicate in _conjunct_ctxs.
|
||||
// It will try to push down conditions of that column as much as possible,
|
||||
// But if the number of conditions exceeds the limit, none of conditions will be pushed down.
|
||||
template <class T>
|
||||
template <PrimitiveType T>
|
||||
Status OlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot,
|
||||
ColumnValueRange<T>* range) {
|
||||
// If the conjunct of slot is fixed value, will change the fixed value set of column value range
|
||||
// else add value to not in range and push down predicate directly
|
||||
bool is_fixed_range = range->is_fixed_value_range();
|
||||
auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(range->column_name(),
|
||||
range->type());
|
||||
auto not_in_range = ColumnValueRange<T>::create_empty_column_value_range(range->column_name());
|
||||
|
||||
std::vector<uint32_t> filter_conjuncts_index;
|
||||
for (int conj_idx = 0; conj_idx < _conjunct_ctxs.size(); ++conj_idx) {
|
||||
@ -1146,12 +1162,10 @@ Status OlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot,
|
||||
auto value = const_cast<void*>(iter->get_value());
|
||||
if (is_fixed_range) {
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
*range, slot->type().type, value,
|
||||
ColumnValueRange<T>::remove_fixed_value_range));
|
||||
*range, value, ColumnValueRange<T>::remove_fixed_value_range));
|
||||
} else {
|
||||
RETURN_IF_ERROR(
|
||||
change_fixed_value_range(not_in_range, slot->type().type, value,
|
||||
ColumnValueRange<T>::add_fixed_value_range));
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
not_in_range, value, ColumnValueRange<T>::add_fixed_value_range));
|
||||
}
|
||||
iter->next();
|
||||
}
|
||||
@ -1181,12 +1195,10 @@ Status OlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot,
|
||||
|
||||
if (is_fixed_range) {
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
*range, slot->type().type, value,
|
||||
ColumnValueRange<T>::remove_fixed_value_range));
|
||||
*range, value, ColumnValueRange<T>::remove_fixed_value_range));
|
||||
} else {
|
||||
RETURN_IF_ERROR(
|
||||
change_fixed_value_range(not_in_range, slot->type().type, value,
|
||||
ColumnValueRange<T>::add_fixed_value_range));
|
||||
RETURN_IF_ERROR(change_fixed_value_range(
|
||||
not_in_range, value, ColumnValueRange<T>::add_fixed_value_range));
|
||||
}
|
||||
|
||||
if (is_key_column(slot->col_name())) {
|
||||
@ -1209,7 +1221,7 @@ Status OlapScanNode::normalize_not_in_and_not_eq_predicate(SlotDescriptor* slot,
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <PrimitiveType T>
|
||||
bool OlapScanNode::normalize_is_null_predicate(Expr* expr, SlotDescriptor* slot,
|
||||
const std::string& is_null_str,
|
||||
ColumnValueRange<T>* range) {
|
||||
@ -1226,14 +1238,14 @@ bool OlapScanNode::normalize_is_null_predicate(Expr* expr, SlotDescriptor* slot,
|
||||
return false;
|
||||
}
|
||||
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(range->type());
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range();
|
||||
temp_range.set_contain_null(is_null_str == "null");
|
||||
range->intersection(temp_range);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
template <PrimitiveType T>
|
||||
Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
|
||||
ColumnValueRange<T>* range) {
|
||||
std::vector<uint32_t> filter_conjuncts_index;
|
||||
@ -1303,7 +1315,8 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
|
||||
}
|
||||
}
|
||||
range->add_range(to_olap_filter_type(pred->op(), child_idx),
|
||||
*reinterpret_cast<T*>(&date_value));
|
||||
*reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(
|
||||
&date_value));
|
||||
break;
|
||||
}
|
||||
case TYPE_TINYINT:
|
||||
@ -1318,8 +1331,9 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
|
||||
case TYPE_LARGEINT:
|
||||
case TYPE_BOOLEAN:
|
||||
case TYPE_STRING: {
|
||||
range->add_range(to_olap_filter_type(pred->op(), child_idx),
|
||||
*reinterpret_cast<T*>(value));
|
||||
range->add_range(
|
||||
to_olap_filter_type(pred->op(), child_idx),
|
||||
*reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(value));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1337,7 +1351,9 @@ Status OlapScanNode::normalize_noneq_binary_predicate(SlotDescriptor* slot,
|
||||
|
||||
VLOG_CRITICAL << slot->col_name() << " op: "
|
||||
<< static_cast<int>(to_olap_filter_type(pred->op(), child_idx))
|
||||
<< " value: " << *reinterpret_cast<T*>(value);
|
||||
<< " value: "
|
||||
<< *reinterpret_cast<typename PrimitiveTypeTraits<T>::CppType*>(
|
||||
value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user