[Feature-Variant](Variant Type) support variant type query and index (#27676)

This commit is contained in:
lihangyu
2023-11-29 10:37:28 +08:00
committed by GitHub
parent 3bc09e55f6
commit 7398c3daf1
433 changed files with 10443 additions and 879 deletions

View File

@ -25,6 +25,7 @@
#include <string.h>
#include <algorithm>
#include <memory>
#include <mutex>
#include <ostream>
#include <variant>
@ -37,6 +38,7 @@
#include "exprs/bloom_filter_func.h"
#include "exprs/hybrid_set.h"
#include "exprs/runtime_filter.h"
#include "runtime/define_primitive_type.h"
#include "runtime/descriptors.h"
#include "runtime/exec_env.h"
#include "runtime/primitive_type.h"
@ -78,6 +80,11 @@ static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) {
if (slot->type().is_string_type() && expr->type().is_string_type()) {
return true;
}
// Variant slot cast could be eliminated
// We could use predicate to speed up query, so ignore cast to build predicate
if (slot->type().is_variant_type()) {
return true;
}
if (slot->type().is_array_type()) {
if (slot->type().children[0].type == expr->type().type) {
return true;
@ -362,24 +369,14 @@ Status VScanNode::_normalize_conjuncts() {
// The conjuncts is always on output tuple, so use _output_tuple_desc;
std::vector<SlotDescriptor*> slots = _output_tuple_desc->slots();
for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) {
_colname_to_slot_id[slots[slot_idx]->col_name()] = slots[slot_idx]->id();
auto type = slots[slot_idx]->type().type;
if (slots[slot_idx]->type().type == TYPE_ARRAY) {
type = slots[slot_idx]->type().children[0].type;
if (type == TYPE_ARRAY) {
continue;
}
}
auto init_value_range = [&](SlotDescriptor* slot, PrimitiveType type) {
switch (type) {
#define M(NAME) \
case TYPE_##NAME: { \
ColumnValueRange<TYPE_##NAME> range( \
slots[slot_idx]->col_name(), slots[slot_idx]->is_nullable(), \
slots[slot_idx]->type().precision, slots[slot_idx]->type().scale); \
_slot_id_to_value_range[slots[slot_idx]->id()] = std::pair {slots[slot_idx], range}; \
break; \
#define M(NAME) \
case TYPE_##NAME: { \
ColumnValueRange<TYPE_##NAME> range(slot->col_name(), slot->is_nullable(), \
slot->type().precision, slot->type().scale); \
_slot_id_to_value_range[slot->id()] = std::pair {slot, range}; \
break; \
}
#define APPLY_FOR_PRIMITIVE_TYPE(M) \
M(TINYINT) \
@ -404,11 +401,29 @@ Status VScanNode::_normalize_conjuncts() {
APPLY_FOR_PRIMITIVE_TYPE(M)
#undef M
default: {
VLOG_CRITICAL << "Unsupported Normalize Slot [ColName=" << slots[slot_idx]->col_name()
<< "]";
VLOG_CRITICAL << "Unsupported Normalize Slot [ColName=" << slot->col_name() << "]";
break;
}
}
};
for (int slot_idx = 0; slot_idx < slots.size(); ++slot_idx) {
_colname_to_slot_id[slots[slot_idx]->col_name()] = slots[slot_idx]->id();
_slot_id_to_slot_desc[slots[slot_idx]->id()] = slots[slot_idx];
auto type = slots[slot_idx]->type().type;
if (slots[slot_idx]->type().type == TYPE_ARRAY) {
type = slots[slot_idx]->type().children[0].type;
if (type == TYPE_ARRAY) {
continue;
}
}
init_value_range(slots[slot_idx], slots[slot_idx]->type().type);
}
get_cast_types_for_variants();
for (const auto& [colname, type] : _cast_types_for_variants) {
init_value_range(_slot_id_to_slot_desc[_colname_to_slot_id[colname]], type);
}
for (auto it = _conjuncts.begin(); it != _conjuncts.end();) {
@ -494,6 +509,14 @@ Status VScanNode::_normalize_predicate(const VExprSPtr& conjunct_expr_root, VExp
output_expr = nullptr;
return Status::OK();
}
std::shared_ptr<VSlotRef> slotref;
for (const auto& child : cur_expr->children()) {
if (VExpr::expr_without_cast(child)->node_type() != TExprNodeType::SLOT_REF) {
// not a slot ref(column)
continue;
}
slotref = std::dynamic_pointer_cast<VSlotRef>(VExpr::expr_without_cast(child));
}
if (_is_predicate_acting_on_slot(cur_expr, in_predicate_checker, &slot, &range) ||
_is_predicate_acting_on_slot(cur_expr, eq_predicate_checker, &slot, &range)) {
Status status = Status::OK();
@ -553,6 +576,14 @@ Status VScanNode::_normalize_predicate(const VExprSPtr& conjunct_expr_root, VExp
return Status::OK();
}
if (pdt == PushDownType::ACCEPTABLE && slotref != nullptr &&
slotref->type().is_variant_type()) {
// remaining it in the expr tree, in order to filter by function if the pushdown
// predicate is not applied
output_expr = conjunct_expr_root; // remaining in conjunct tree
return Status::OK();
}
if (pdt == PushDownType::ACCEPTABLE &&
(_is_key_column(slot->col_name()) || _storage_no_merge())) {
output_expr = nullptr;