[feature-wip](inverted index) API for inverted index reader and syntax for fulltext match (#14211)

* [feature-wip](inverted index)inverted index api: reader

* [feature-wip](inverted index) Fulltext query syntax with MATCH/MATCH_ALL/MATCH_ALL

* [feature-wip](inverted index) Adapt to index meta

* [enhance] add more metrics

* [enhance] add fulltext match query check for column type and index parser

* [feature-wip](inverted index) Support apply inverted index in compound predicate which except leaf node of and node
This commit is contained in:
YueW
2022-12-30 21:48:14 +08:00
committed by GitHub
parent b23d068281
commit edecc2e706
45 changed files with 1450 additions and 31 deletions

View File

@ -17,9 +17,11 @@
#include "vec/exec/scan/vscan_node.h"
#include "common/consts.h"
#include "common/status.h"
#include "exprs/hybrid_set.h"
#include "runtime/runtime_filter_mgr.h"
#include "util/defer_op.h"
#include "util/runtime_profile.h"
#include "vec/columns/column_const.h"
#include "vec/exec/scan/pip_scanner_context.h"
@ -124,6 +126,18 @@ Status VScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool*
SCOPED_TIMER(_get_next_timer);
SCOPED_TIMER(_runtime_profile->total_time_counter());
SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh());
// in inverted index apply logic, in order to optimize query performance,
// we built some temporary columns into block, these columns only used in scan node level,
// remove them when query leave scan node to avoid other nodes use block->columns() to make a wrong decision
Defer drop_block_temp_column {[&]() {
auto all_column_names = block->get_names();
for (auto& name : all_column_names) {
if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) {
block->erase(name);
}
}
}};
if (state->is_cancelled()) {
_scanner_ctx->set_status_on_error(Status::Cancelled("query cancelled"));
return _scanner_ctx->status();
@ -491,6 +505,9 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output
RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate(
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
&pdt));
RETURN_IF_PUSH_DOWN(_normalize_match_predicate(
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range,
&pdt));
if (_is_key_column(slot->col_name())) {
RETURN_IF_PUSH_DOWN(_normalize_bitmap_filter(
cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt));
@ -1013,7 +1030,48 @@ Status VScanNode::_normalize_binary_in_compound_predicate(vectorized::VExpr* exp
*pdt = PushDownType::ACCEPTABLE;
}
}
return Status::OK();
}
template <PrimitiveType T>
Status VScanNode::_normalize_match_predicate(VExpr* expr, VExprContext* expr_ctx,
SlotDescriptor* slot, ColumnValueRange<T>& range,
PushDownType* pdt) {
if (TExprNodeType::MATCH_PRED == expr->node_type()) {
DCHECK(expr->children().size() == 2);
// create empty range as temp range, temp range should do intersection on range
auto temp_range = ColumnValueRange<T>::create_empty_column_value_range(
slot->type().precision, slot->type().scale);
// Normalize match conjuncts like 'where col match value'
auto match_checker = [](const std::string& fn_name) { return fn_name == "match"; };
StringRef value;
int slot_ref_child = -1;
PushDownType temp_pdt;
RETURN_IF_ERROR(_should_push_down_binary_predicate(
reinterpret_cast<VectorizedFnCall*>(expr), expr_ctx, &value, &slot_ref_child,
match_checker, temp_pdt));
if (temp_pdt != PushDownType::UNACCEPTABLE) {
DCHECK(slot_ref_child >= 0);
if (value.data != nullptr) {
using CppType = typename PrimitiveTypeTraits<T>::CppType;
if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING ||
T == TYPE_HLL) {
auto val = StringValue(value.data, value.size);
ColumnValueRange<T>::add_match_value_range(temp_range,
to_match_type(expr->op()),
reinterpret_cast<CppType*>(&val));
} else {
ColumnValueRange<T>::add_match_value_range(
temp_range, to_match_type(expr->op()),
reinterpret_cast<CppType*>(const_cast<char*>(value.data)));
}
range.intersection(temp_range);
}
*pdt = temp_pdt;
}
}
return Status::OK();
}