[feature-wip](inverted index) API for inverted index reader and syntax for fulltext match (#14211)
* [feature-wip](inverted index)inverted index api: reader * [feature-wip](inverted index) Fulltext query syntax with MATCH/MATCH_ALL/MATCH_ALL * [feature-wip](inverted index) Adapt to index meta * [enhance] add more metrics * [enhance] add fulltext match query check for column type and index parser * [feature-wip](inverted index) Support apply inverted index in compound predicate which except leaf node of and node
This commit is contained in:
@ -17,9 +17,11 @@
|
||||
|
||||
#include "vec/exec/scan/vscan_node.h"
|
||||
|
||||
#include "common/consts.h"
|
||||
#include "common/status.h"
|
||||
#include "exprs/hybrid_set.h"
|
||||
#include "runtime/runtime_filter_mgr.h"
|
||||
#include "util/defer_op.h"
|
||||
#include "util/runtime_profile.h"
|
||||
#include "vec/columns/column_const.h"
|
||||
#include "vec/exec/scan/pip_scanner_context.h"
|
||||
@ -124,6 +126,18 @@ Status VScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool*
|
||||
SCOPED_TIMER(_get_next_timer);
|
||||
SCOPED_TIMER(_runtime_profile->total_time_counter());
|
||||
SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh());
|
||||
// in inverted index apply logic, in order to optimize query performance,
|
||||
// we built some temporary columns into block, these columns only used in scan node level,
|
||||
// remove them when query leave scan node to avoid other nodes use block->columns() to make a wrong decision
|
||||
Defer drop_block_temp_column {[&]() {
|
||||
auto all_column_names = block->get_names();
|
||||
for (auto& name : all_column_names) {
|
||||
if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
|
||||
block->erase(name);
|
||||
}
|
||||
}
|
||||
}};
|
||||
|
||||
if (state->is_cancelled()) {
|
||||
_scanner_ctx->set_status_on_error(Status::Cancelled("query cancelled"));
|
||||
return _scanner_ctx->status();
|
||||
@ -491,6 +505,9 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output
|
||||
RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate(
|
||||
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
|
||||
&pdt));
|
||||
RETURN_IF_PUSH_DOWN(_normalize_match_predicate(
|
||||
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
|
||||
&pdt));
|
||||
if (_is_key_column(slot->col_name())) {
|
||||
RETURN_IF_PUSH_DOWN(_normalize_bitmap_filter(
|
||||
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt));
|
||||
@ -1013,7 +1030,48 @@ Status VScanNode::_normalize_binary_in_compound_predicate(vectorized::VExpr* exp
|
||||
*pdt = PushDownType::ACCEPTABLE;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
template <PrimitiveType T>
|
||||
Status VScanNode::_normalize_match_predicate(VExpr* expr, VExprContext* expr_ctx,
|
||||
SlotDescriptor* slot, ColumnValueRange<T>& range,
|
||||
PushDownType* pdt) {
|
||||
if (TExprNodeType::MATCH_PRED == expr->node_type()) {
|
||||
DCHECK(expr->children().size() == 2);
|
||||
|
||||
// create empty range as temp range, temp range should do intersection on range
|
||||
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
|
||||
slot->type().precision, slot->type().scale);
|
||||
// Normalize match conjuncts like 'where col match value'
|
||||
|
||||
auto match_checker = [](const std::string& fn_name) { return fn_name == "match"; };
|
||||
StringRef value;
|
||||
int slot_ref_child = -1;
|
||||
PushDownType temp_pdt;
|
||||
RETURN_IF_ERROR(_should_push_down_binary_predicate(
|
||||
reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx, &value, &slot_ref_child,
|
||||
match_checker, temp_pdt));
|
||||
if (temp_pdt != PushDownType::UNACCEPTABLE) {
|
||||
DCHECK(slot_ref_child >= 0);
|
||||
if (value.data != nullptr) {
|
||||
using CppType = typename PrimitiveTypeTraits<T>::CppType;
|
||||
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
|
||||
T == TYPE_HLL) {
|
||||
auto val = StringValue(value.data, value.size);
|
||||
ColumnValueRange<T>::add_match_value_range(temp_range,
|
||||
to_match_type(expr->op()),
|
||||
reinterpret_cast<CppType*>(&val));
|
||||
} else {
|
||||
ColumnValueRange<T>::add_match_value_range(
|
||||
temp_range, to_match_type(expr->op()),
|
||||
reinterpret_cast<CppType*>(const_cast<char*>(value.data)));
|
||||
}
|
||||
range.intersection(temp_range);
|
||||
}
|
||||
*pdt = temp_pdt;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user