[Refactor](inverted index) refactor inverted index compound predicates evaluate logic #38908 (#41385)

cherry pick from #38908
This commit is contained in:
airborne12
2024-09-29 09:19:17 +08:00
committed by GitHub
parent 9a9226e541
commit 727f0374be
106 changed files with 1333 additions and 1008 deletions

View File

@ -73,6 +73,47 @@
namespace doris::segment_v2 {
template <PrimitiveType PT>
Status InvertedIndexQueryParamFactory::create_query_value(
const void* value, std::unique_ptr<InvertedIndexQueryParamFactory>& result_param) {
using CPP_TYPE = typename PrimitiveTypeTraits<PT>::CppType;
std::unique_ptr<InvertedIndexQueryParam<PT>> param =
InvertedIndexQueryParam<PT>::create_unique();
auto&& storage_val = PrimitiveTypeConvertor<PT>::to_storage_field_type(
*reinterpret_cast<const CPP_TYPE*>(value));
param->set_value(&storage_val);
result_param = std::move(param);
return Status::OK();
};
#define CREATE_QUERY_VALUE_TEMPLATE(PT) \
template Status InvertedIndexQueryParamFactory::create_query_value<PT>( \
const void* value, std::unique_ptr<InvertedIndexQueryParamFactory>& result_param);
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_BOOLEAN)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_TINYINT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_SMALLINT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_INT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_BIGINT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_LARGEINT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_FLOAT)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DOUBLE)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_VARCHAR)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATE)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATEV2)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATETIME)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATETIMEV2)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_CHAR)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMALV2)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL32)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL64)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL128I)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL256)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_HLL)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_STRING)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_IPV4)
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_IPV6)
std::unique_ptr<lucene::analysis::Analyzer> InvertedIndexReader::create_analyzer(
InvertedIndexCtx* inverted_index_ctx) {
std::unique_ptr<lucene::analysis::Analyzer> analyzer;
@ -115,6 +156,10 @@ std::unique_ptr<lucene::util::Reader> InvertedIndexReader::create_reader(
return reader;
}
std::string InvertedIndexReader::get_index_file_path() {
return _inverted_index_file_reader->get_index_file_path(&_index_meta);
}
void InvertedIndexReader::get_analyse_result(std::vector<std::string>& analyse_result,
lucene::util::Reader* reader,
lucene::analysis::Analyzer* analyzer,
@ -182,6 +227,8 @@ Status InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats,
null_bitmap->runOptimize();
cache->insert(cache_key, null_bitmap, cache_handle);
FINALIZE_INPUT(null_bitmap_in);
} else {
cache->insert(cache_key, null_bitmap, cache_handle);
}
if (owned_dir) {
FINALIZE_INPUT(dir);

View File

@ -33,6 +33,7 @@
#include "olap/rowset/segment_v2/inverted_index_desc.h"
#include "olap/rowset/segment_v2/inverted_index_query_type.h"
#include "olap/tablet_schema.h"
#include "runtime/primitive_type.h"
#include "util/once.h"
#define FINALIZE_INPUT(x) \
@ -72,6 +73,104 @@ class InvertedIndexIterator;
class InvertedIndexQueryCacheHandle;
class InvertedIndexFileReader;
struct InvertedIndexQueryInfo;
class InvertedIndexResultBitmap {
private:
std::shared_ptr<roaring::Roaring> _data_bitmap = nullptr;
std::shared_ptr<roaring::Roaring> _null_bitmap = nullptr;
public:
// Default constructor
InvertedIndexResultBitmap() = default;
~InvertedIndexResultBitmap() = default;
// Constructor with arguments
InvertedIndexResultBitmap(std::shared_ptr<roaring::Roaring> data_bitmap,
std::shared_ptr<roaring::Roaring> null_bitmap)
: _data_bitmap(std::move(data_bitmap)), _null_bitmap(std::move(null_bitmap)) {}
// Copy constructor
InvertedIndexResultBitmap(const InvertedIndexResultBitmap& other)
: _data_bitmap(std::make_shared<roaring::Roaring>(*other._data_bitmap)),
_null_bitmap(std::make_shared<roaring::Roaring>(*other._null_bitmap)) {}
// Move constructor
InvertedIndexResultBitmap(InvertedIndexResultBitmap&& other) noexcept
: _data_bitmap(std::move(other._data_bitmap)),
_null_bitmap(std::move(other._null_bitmap)) {}
// Copy assignment operator
InvertedIndexResultBitmap& operator=(const InvertedIndexResultBitmap& other) {
if (this != &other) { // Prevent self-assignment
_data_bitmap = std::make_shared<roaring::Roaring>(*other._data_bitmap);
_null_bitmap = std::make_shared<roaring::Roaring>(*other._null_bitmap);
}
return *this;
}
// Move assignment operator
InvertedIndexResultBitmap& operator=(InvertedIndexResultBitmap&& other) noexcept {
if (this != &other) { // Prevent self-assignment
_data_bitmap = std::move(other._data_bitmap);
_null_bitmap = std::move(other._null_bitmap);
}
return *this;
}
// Operator &=
InvertedIndexResultBitmap& operator&=(const InvertedIndexResultBitmap& other) {
if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) {
auto new_null_bitmap = (*_data_bitmap & *other._null_bitmap) |
(*_null_bitmap & *other._data_bitmap) |
(*_null_bitmap & *other._null_bitmap);
*_data_bitmap &= *other._data_bitmap;
*_null_bitmap = std::move(new_null_bitmap);
}
return *this;
}
// Operator |=
InvertedIndexResultBitmap& operator|=(const InvertedIndexResultBitmap& other) {
if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) {
auto new_null_bitmap = (*_null_bitmap | *other._null_bitmap) - *_data_bitmap;
*_data_bitmap |= *other._data_bitmap;
*_null_bitmap = std::move(new_null_bitmap);
}
return *this;
}
// NOT operation
const InvertedIndexResultBitmap& op_not(const roaring::Roaring* universe) const {
if (_data_bitmap && _null_bitmap) {
*_data_bitmap = *universe - *_data_bitmap - *_null_bitmap;
// The _null_bitmap remains unchanged.
}
return *this;
}
// Operator -=
InvertedIndexResultBitmap& operator-=(const InvertedIndexResultBitmap& other) {
if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) {
*_data_bitmap -= *other._data_bitmap;
*_data_bitmap -= *other._null_bitmap;
*_null_bitmap -= *other._null_bitmap;
}
return *this;
}
void mask_out_null() {
if (_data_bitmap && _null_bitmap) {
*_data_bitmap -= *_null_bitmap;
}
}
const std::shared_ptr<roaring::Roaring>& get_data_bitmap() const { return _data_bitmap; }
const std::shared_ptr<roaring::Roaring>& get_null_bitmap() const { return _null_bitmap; }
// Check if both bitmaps are empty
bool is_empty() const { return (_data_bitmap == nullptr && _null_bitmap == nullptr); }
};
class InvertedIndexReader : public std::enable_shared_from_this<InvertedIndexReader> {
public:
explicit InvertedIndexReader(
@ -135,7 +234,7 @@ public:
virtual Status handle_searcher_cache(InvertedIndexCacheHandle* inverted_index_cache_handle,
OlapReaderStatistics* stats);
std::string get_index_file_path();
static Status create_index_searcher(lucene::store::Directory* dir, IndexSearcherPtr* searcher,
MemTracker* mem_tracker,
InvertedIndexReaderType reader_type);
@ -282,6 +381,79 @@ private:
const KeyCoder* _value_key_coder {};
};
/**
* @brief InvertedIndexQueryParamFactory is a factory class to create QueryValue object.
* we need a template function to make predict class like in_list_predict template class to use.
* also need a function with primitive type parameter to create inverted index query value. like some function expr: function_array_index
* Now we just mapping field value in query engine to storage field value
*/
class InvertedIndexQueryParamFactory {
ENABLE_FACTORY_CREATOR(InvertedIndexQueryParamFactory);
public:
virtual ~InvertedIndexQueryParamFactory() = default;
template <PrimitiveType PT>
static Status create_query_value(const void* value,
std::unique_ptr<InvertedIndexQueryParamFactory>& result_param);
static Status create_query_value(
const PrimitiveType& primitiveType, const void* value,
std::unique_ptr<InvertedIndexQueryParamFactory>& result_param) {
switch (primitiveType) {
#define M(TYPE) \
case TYPE: { \
return create_query_value<TYPE>(value, result_param); \
}
M(PrimitiveType::TYPE_BOOLEAN)
M(PrimitiveType::TYPE_TINYINT)
M(PrimitiveType::TYPE_SMALLINT)
M(PrimitiveType::TYPE_INT)
M(PrimitiveType::TYPE_BIGINT)
M(PrimitiveType::TYPE_LARGEINT)
M(PrimitiveType::TYPE_FLOAT)
M(PrimitiveType::TYPE_DOUBLE)
M(PrimitiveType::TYPE_DECIMALV2)
M(PrimitiveType::TYPE_DECIMAL32)
M(PrimitiveType::TYPE_DECIMAL64)
M(PrimitiveType::TYPE_DECIMAL128I)
M(PrimitiveType::TYPE_DECIMAL256)
M(PrimitiveType::TYPE_DATE)
M(PrimitiveType::TYPE_DATETIME)
M(PrimitiveType::TYPE_CHAR)
M(PrimitiveType::TYPE_VARCHAR)
M(PrimitiveType::TYPE_STRING)
#undef M
default:
return Status::NotSupported("Unsupported primitive type {} for inverted index reader",
primitiveType);
}
};
virtual const void* get_value() const {
LOG_FATAL(
"Execution reached an undefined behavior code path in "
"InvertedIndexQueryParamFactory");
__builtin_unreachable();
};
};
template <PrimitiveType PT>
class InvertedIndexQueryParam : public InvertedIndexQueryParamFactory {
ENABLE_FACTORY_CREATOR(InvertedIndexQueryParam);
using storage_val = typename PrimitiveTypeTraits<PT>::StorageFieldType;
public:
void set_value(const storage_val* value) {
_value = *reinterpret_cast<const storage_val*>(value);
}
const void* get_value() const override { return &_value; }
private:
storage_val _value;
};
class InvertedIndexIterator {
ENABLE_FACTORY_CREATOR(InvertedIndexIterator);

View File

@ -297,26 +297,18 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) {
_block_rowids.resize(_opts.block_row_max);
// compound predicates
_col_preds_except_leafnode_of_andnode.clear();
//TODO: need to check can_apply_predicate_safely in expr
/*_col_preds_except_leafnode_of_andnode.clear();
for (const auto& predicate : opts.column_predicates_except_leafnode_of_andnode) {
if (!_segment->can_apply_predicate_safely(predicate->column_id(), predicate, *_schema,
_opts.io_ctx.reader_type)) {
continue;
}
_col_preds_except_leafnode_of_andnode.push_back(predicate);
}
}*/
_remaining_conjunct_roots = opts.remaining_conjunct_roots;
_common_expr_ctxs_push_down = opts.common_expr_ctxs_push_down;
_enable_common_expr_pushdown = !_common_expr_ctxs_push_down.empty();
_column_predicate_info.reset(new ColumnPredicateInfo());
for (auto& expr : _remaining_conjunct_roots) {
_calculate_pred_in_remaining_conjunct_root(expr);
}
_calculate_func_in_remaining_conjunct_root();
_column_predicate_info.reset(new ColumnPredicateInfo());
if (_schema->rowid_col_idx() > 0) {
_record_rowids = true;
}
@ -366,8 +358,9 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) {
}
}
RETURN_IF_ERROR(_construct_compound_expr_context());
_enable_common_expr_pushdown = !_common_expr_ctxs_push_down.empty();
_initialize_predicate_results();
return Status::OK();
}
@ -375,33 +368,10 @@ void SegmentIterator::_initialize_predicate_results() {
// Initialize from _col_predicates
for (auto* pred : _col_predicates) {
int cid = pred->column_id();
std::string pred_sign = _gen_predicate_result_sign(pred);
_column_predicate_inverted_index_status[cid][pred_sign] = false;
_column_predicate_inverted_index_status[cid][pred] = false;
}
// Initialize from _col_preds_except_leafnode_of_andnode
for (auto* pred : _col_preds_except_leafnode_of_andnode) {
int cid = pred->column_id();
std::string pred_sign = _gen_predicate_result_sign(pred);
_column_predicate_inverted_index_status[cid][pred_sign] = false;
}
// Initialize from _column_pred_in_remaining_vconjunct
for (auto& preds_in_remaining_vconjuct : _column_pred_in_remaining_vconjunct) {
for (auto& pred_info : preds_in_remaining_vconjuct.second) {
int cid = _schema->column_id(pred_info.column_id);
std::string pred_sign = _gen_predicate_result_sign(&pred_info);
_column_predicate_inverted_index_status[cid][pred_sign] = false;
}
}
// Initialize from _func_name_to_result_sign
for (auto& iter : _func_name_to_result_sign) {
for (auto& pred_sign : iter.second) {
auto column_id = _opts.tablet_schema->field_index(iter.first);
_column_predicate_inverted_index_status[column_id][pred_sign] = false;
}
}
_calculate_expr_in_remaining_conjunct_root();
}
Status SegmentIterator::init_iterators() {
@ -542,47 +512,34 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() {
return Status::OK();
}
if (config::enable_index_apply_preds_except_leafnode_of_andnode) {
size_t input_rows = _row_bitmap.cardinality();
RETURN_IF_ERROR(_apply_index_except_leafnode_of_andnode());
if (_can_filter_by_preds_except_leafnode_of_andnode()) {
for (auto it = _remaining_conjunct_roots.begin();
it != _remaining_conjunct_roots.end();) {
_pred_except_leafnode_of_andnode_evaluate_result.clear();
auto res = _execute_predicates_except_leafnode_of_andnode(*it);
VLOG_DEBUG << "_execute_predicates_except_leafnode_of_andnode expr: "
<< (*it)->debug_string() << " res: " << res;
if (res.ok() && _pred_except_leafnode_of_andnode_evaluate_result.size() == 1) {
_row_bitmap &= _pred_except_leafnode_of_andnode_evaluate_result[0];
// Delete expr after it obtains the final result.
{
std::erase_if(_common_expr_ctxs_push_down,
[&it](const auto& iter) { return iter->root() == *it; });
VLOG_DEBUG << "_remaining_conjunct_roots erase expr: "
<< (*it)->debug_string();
it = _remaining_conjunct_roots.erase(it);
}
} else {
++it;
}
}
_col_preds_except_leafnode_of_andnode.clear();
compound_func_exprs.clear();
// 1. if all conditions in the compound hit the inverted index and there are no other expr to handle.
// 2. then there is no need to generate index_result_column.
if (_enable_common_expr_pushdown && _remaining_conjunct_roots.empty()) {
for (auto& iter : _rowid_result_for_index) {
iter.second.first = false;
}
}
}
_opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality());
}
RETURN_IF_ERROR(_apply_bitmap_index());
RETURN_IF_ERROR(_apply_inverted_index());
RETURN_IF_ERROR(_apply_index_expr());
size_t input_rows = _row_bitmap.cardinality();
for (auto it = _common_expr_ctxs_push_down.begin(); it != _common_expr_ctxs_push_down.end();) {
if ((*it)->all_expr_inverted_index_evaluated()) {
const auto* result =
(*it)->get_inverted_index_context()->get_inverted_index_result_for_expr(
(*it)->root().get());
if (result != nullptr) {
_row_bitmap &= *result->get_data_bitmap();
auto root = (*it)->root();
auto iter_find = std::find(_remaining_conjunct_roots.begin(),
_remaining_conjunct_roots.end(), root);
if (iter_find != _remaining_conjunct_roots.end()) {
_remaining_conjunct_roots.erase(iter_find);
}
it = _common_expr_ctxs_push_down.erase(it);
}
} else {
++it;
}
}
_opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality());
for (auto cid : _schema->column_ids()) {
bool result_true = _check_all_predicates_passed_inverted_index_for_column(cid);
bool result_true = _check_all_conditions_passed_inverted_index_for_column(cid);
if (result_true) {
_need_read_data_indices[cid] = false;
}
@ -795,127 +752,6 @@ Status SegmentIterator::_extract_common_expr_columns(const vectorized::VExprSPtr
return Status::OK();
}
Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode(
const vectorized::VExprSPtr& expr) {
if (expr == nullptr) {
return Status::OK();
}
auto& children = expr->children();
for (int i = 0; i < children.size(); ++i) {
RETURN_IF_ERROR(_execute_predicates_except_leafnode_of_andnode(children[i]));
}
auto node_type = expr->node_type();
if (node_type == TExprNodeType::SLOT_REF) {
auto slot_expr = std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr);
_column_predicate_info->column_name = expr->expr_name();
_column_predicate_info->column_id = slot_expr->column_id();
} else if (_is_literal_node(node_type)) {
auto v_literal_expr = std::dynamic_pointer_cast<doris::vectorized::VLiteral>(expr);
_column_predicate_info->query_values.insert(v_literal_expr->value());
} else if (node_type == TExprNodeType::BINARY_PRED || node_type == TExprNodeType::MATCH_PRED ||
node_type == TExprNodeType::IN_PRED || node_type == TExprNodeType::FUNCTION_CALL) {
std::string result_sign;
if (node_type == TExprNodeType::FUNCTION_CALL) {
result_sign =
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + std::to_string(expr->index_unique_id());
} else {
if (node_type == TExprNodeType::MATCH_PRED) {
_column_predicate_info->query_op = "match";
} else if (node_type == TExprNodeType::IN_PRED) {
if (expr->op() == TExprOpcode::type::FILTER_IN) {
_column_predicate_info->query_op = "in";
} else {
_column_predicate_info->query_op = "not_in";
}
} else {
_column_predicate_info->query_op = expr->fn().name.function_name;
}
result_sign = _gen_predicate_result_sign(_column_predicate_info.get());
}
// get child condition result in compound conditions
_column_predicate_info.reset(new ColumnPredicateInfo());
VLOG_DEBUG << "result_sign " << result_sign;
if (_rowid_result_for_index.count(result_sign) > 0 &&
_rowid_result_for_index[result_sign].first) {
auto apply_result = _rowid_result_for_index[result_sign].second;
_pred_except_leafnode_of_andnode_evaluate_result.push_back(apply_result);
} else {
return Status::InvalidArgument(
"_execute_predicates_except_leafnode_of_andnode has no result for {}",
result_sign);
}
} else if (node_type == TExprNodeType::COMPOUND_PRED) {
auto function_name = expr->fn().name.function_name;
// execute logic function
RETURN_IF_ERROR(_execute_compound_fn(function_name));
} else {
return Status::InvalidArgument(
"_execute_predicates_except_leafnode_of_andnode not supported for TExprNodeType:{}",
node_type);
}
return Status::OK();
}
Status SegmentIterator::_execute_compound_fn(const std::string& function_name) {
auto size = _pred_except_leafnode_of_andnode_evaluate_result.size();
if (function_name == "and") {
if (size < 2) {
return Status::InvalidArgument("_execute_compound_fn {} arg num {} < 2", function_name,
size);
}
_pred_except_leafnode_of_andnode_evaluate_result.at(size - 2) &=
_pred_except_leafnode_of_andnode_evaluate_result.at(size - 1);
_pred_except_leafnode_of_andnode_evaluate_result.pop_back();
} else if (function_name == "or") {
if (size < 2) {
return Status::InvalidArgument("_execute_compound_fn {} arg num {} < 2", function_name,
size);
}
_pred_except_leafnode_of_andnode_evaluate_result.at(size - 2) |=
_pred_except_leafnode_of_andnode_evaluate_result.at(size - 1);
_pred_except_leafnode_of_andnode_evaluate_result.pop_back();
} else if (function_name == "not") {
if (size < 1) {
return Status::InvalidArgument("_execute_compound_fn {} arg num {} < 1", function_name,
size);
}
roaring::Roaring tmp = _row_bitmap;
tmp -= _pred_except_leafnode_of_andnode_evaluate_result.at(size - 1);
_pred_except_leafnode_of_andnode_evaluate_result.at(size - 1) = tmp;
}
return Status::OK();
}
bool SegmentIterator::_can_filter_by_preds_except_leafnode_of_andnode() {
// no compound predicates push down, so no need to filter
if (_col_preds_except_leafnode_of_andnode.empty() && compound_func_exprs.empty()) {
return false;
}
for (auto pred : _col_preds_except_leafnode_of_andnode) {
if (_not_apply_index_pred.count(pred->column_id()) ||
(!_check_apply_by_inverted_index(pred, true))) {
return false;
}
// all predicates are evaluated by index, then true, else false
std::string pred_result_sign = _gen_predicate_result_sign(pred);
if (_rowid_result_for_index.count(pred_result_sign) == 0) {
return false;
}
}
for (const auto& expr : compound_func_exprs) {
std::string pred_result_sign =
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + std::to_string(expr->index_unique_id());
if (!_rowid_result_for_index.contains(pred_result_sign)) {
return false;
}
}
return true;
}
bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound) {
if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) {
return false;
@ -973,74 +809,20 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
return true;
}
Status SegmentIterator::_apply_inverted_index_except_leafnode_of_andnode(
ColumnPredicate* pred, roaring::Roaring* output_result) {
RETURN_IF_ERROR(pred->evaluate(_storage_name_and_type[pred->column_id()],
_inverted_index_iterators[pred->column_id()].get(), num_rows(),
output_result));
return Status::OK();
}
Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
for (auto* pred : _col_preds_except_leafnode_of_andnode) {
auto column_id = pred->column_id();
auto pred_type = pred->type();
bool is_support = pred_type == PredicateType::EQ || pred_type == PredicateType::NE ||
pred_type == PredicateType::LT || pred_type == PredicateType::LE ||
pred_type == PredicateType::GT || pred_type == PredicateType::GE ||
pred_type == PredicateType::MATCH ||
pred_type == PredicateType::IN_LIST ||
pred_type == PredicateType::NOT_IN_LIST;
if (!is_support) {
continue;
}
bool can_apply_by_inverted_index = _check_apply_by_inverted_index(pred, true);
roaring::Roaring bitmap = _row_bitmap;
Status res = Status::OK();
if (can_apply_by_inverted_index) {
res = _apply_inverted_index_except_leafnode_of_andnode(pred, &bitmap);
} else {
continue;
}
bool need_remaining_after_evaluate = _column_has_fulltext_index(column_id) &&
PredicateTypeTraits::is_equal_or_list(pred_type);
if (!res.ok()) {
if (_downgrade_without_index(res, need_remaining_after_evaluate)) {
// downgrade without index query
_not_apply_index_pred.insert(column_id);
Status SegmentIterator::_apply_index_expr() {
for (const auto& expr_ctx : _common_expr_ctxs_push_down) {
if (Status st = expr_ctx->evaluate_inverted_index(num_rows()); !st.ok()) {
if (_downgrade_without_index(st) || st.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) {
continue;
}
LOG(WARNING) << "failed to evaluate index"
<< ", column predicate type: " << pred->pred_type_string(pred->type())
<< ", error msg: " << res.to_string();
return res;
}
std::string pred_result_sign = _gen_predicate_result_sign(pred);
_rowid_result_for_index.emplace(pred_result_sign, std::make_pair(true, std::move(bitmap)));
if (!pred->predicate_params()->marked_by_runtime_filter) {
_column_predicate_inverted_index_status[column_id][pred_result_sign] = true;
}
}
for (const auto& expr : compound_func_exprs) {
roaring::Roaring bitmap = _row_bitmap;
auto result = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(execute_func_expr(expr, result));
bitmap &= *result;
std::string result_sign =
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + std::to_string(expr->index_unique_id());
_rowid_result_for_index.emplace(result_sign, std::make_pair(true, std::move(bitmap)));
for (const auto& child_expr : expr->children()) {
if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
auto column_id = _opts.tablet_schema->field_index(child_expr->expr_name());
_column_predicate_inverted_index_status[column_id][result_sign] = true;
} else {
// other code is not to be handled, we should just break
LOG(WARNING) << "failed to evaluate inverted index for expr_ctx: "
<< expr_ctx->root()->debug_string()
<< ", error msg: " << st.to_string();
return st;
}
}
}
return Status::OK();
}
@ -1071,47 +853,6 @@ bool SegmentIterator::_downgrade_without_index(Status res, bool need_remaining)
return false;
}
std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicate* predicate) {
std::string pred_result_sign;
auto column_desc = _schema->column(predicate->column_id());
auto pred_type = predicate->type();
auto predicate_params = predicate->predicate_params();
std::string col_name = column_desc->name();
if (column_desc->path() != nullptr) {
const static std::string pattern = "(CAST {}(Nullable(Variant)) TO {})";
// indicate a subcolumn access for variant, using the expression pattern as pred result sign name
col_name = fmt::format(pattern, col_name,
_storage_name_and_type[predicate->column_id()].second->get_name());
}
pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + col_name + "_" +
predicate->pred_type_string(pred_type) + "_" +
join(predicate_params->values, ",");
VLOG_DEBUG << "_gen_predicate_result_sign: " << pred_result_sign;
return pred_result_sign;
}
std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicateInfo* predicate_info) {
auto column_desc = _schema->column(_schema->column_id(predicate_info->column_id));
std::string col_name = predicate_info->column_name;
if (column_desc->path() != nullptr) {
const static std::string pattern = "(CAST {}(Nullable(Variant)) TO {})";
// indicate a subcolumn access for variant, using the expression pattern as pred result sign name
col_name = fmt::format(pattern, col_name,
_storage_name_and_type[_schema->column_id(predicate_info->column_id)]
.second->get_name());
}
std::string pred_result_sign;
pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + col_name + "_" +
predicate_info->query_op + "_" +
boost::join(predicate_info->query_values, ",");
VLOG_DEBUG << "_gen_predicate_result_sign: " << pred_result_sign;
return pred_result_sign;
}
bool SegmentIterator::_column_has_fulltext_index(int32_t cid) {
bool has_fulltext_index = _inverted_index_iterators[cid] != nullptr &&
_inverted_index_iterators[cid]->get_inverted_index_reader_type() ==
@ -1124,10 +865,6 @@ inline bool SegmentIterator::_inverted_index_not_support_pred_type(const Predica
return type == PredicateType::BF || type == PredicateType::BITMAP_FILTER;
}
#define all_predicates_are_range_predicate(predicate_set) \
std::all_of(predicate_set.begin(), predicate_set.end(), \
[](const ColumnPredicate* p) { return PredicateTypeTraits::is_range(p->type()); })
Status SegmentIterator::_apply_inverted_index_on_column_predicate(
ColumnPredicate* pred, std::vector<ColumnPredicate*>& remaining_predicates,
bool* continue_apply) {
@ -1150,12 +887,6 @@ Status SegmentIterator::_apply_inverted_index_on_column_predicate(
return res;
}
auto pred_type = pred->type();
if (pred_type == PredicateType::MATCH || pred_type == PredicateType::IN_LIST) {
std::string pred_result_sign = _gen_predicate_result_sign(pred);
_rowid_result_for_index.emplace(pred_result_sign, std::make_pair(false, _row_bitmap));
}
if (_row_bitmap.isEmpty()) {
// all rows have been pruned, no need to process further predicates
*continue_apply = false;
@ -1166,52 +897,7 @@ Status SegmentIterator::_apply_inverted_index_on_column_predicate(
return Status::OK();
}
if (!pred->predicate_params()->marked_by_runtime_filter) {
std::string pred_result_sign = _gen_predicate_result_sign(pred);
_column_predicate_inverted_index_status[pred->column_id()][pred_result_sign] = true;
}
}
return Status::OK();
}
Status SegmentIterator::_apply_inverted_index_on_block_column_predicate(
ColumnId column_id, MutilColumnBlockPredicate* pred,
std::set<const ColumnPredicate*>& no_need_to_pass_column_predicate_set,
bool* continue_apply) {
bool handle_by_fulltext = _column_has_fulltext_index(column_id);
std::set<const ColumnPredicate*> predicate_set {};
pred->get_all_column_predicate(predicate_set);
//four requirements here.
//1. Column has inverted index
//2. There are multiple predicates for this column.
//3. All the predicates are range predicate.
//4. if it's under fulltext parser type, we need to skip inverted index evaluate.
if (_inverted_index_iterators[column_id] != nullptr && predicate_set.size() > 1 &&
all_predicates_are_range_predicate(predicate_set) && !handle_by_fulltext) {
roaring::Roaring output_result = _row_bitmap;
std::string column_name = _schema->column(column_id)->name();
auto res = pred->evaluate(column_name, _inverted_index_iterators[column_id].get(),
num_rows(), &_row_bitmap);
if (res.ok()) {
no_need_to_pass_column_predicate_set.insert(predicate_set.begin(), predicate_set.end());
if (_row_bitmap.isEmpty()) {
// all rows have been pruned, no need to process further predicates
*continue_apply = false;
}
return res;
} else {
//TODO:mock until AndBlockColumnPredicate evaluate is ok.
if (res.code() == ErrorCode::INVERTED_INDEX_NOT_IMPLEMENTED) {
return Status::OK();
}
LOG(WARNING) << "failed to evaluate index"
<< ", column predicate type: range predicate"
<< ", error msg: " << res;
return res;
_column_predicate_inverted_index_status[pred->column_id()][pred] = true;
}
}
return Status::OK();
@ -1310,63 +996,49 @@ Status SegmentIterator::_apply_inverted_index() {
}
}
// delete from _common_expr_ctxs_push_down if a MATCH predicate will be removed from _col_predicates
// since it's not necessary to eval it anymore to avoid index miss, which is added in _normalize_predicate
for (auto pred : _col_predicates) {
auto* match_pred = dynamic_cast<MatchPredicate*>(pred);
if (!match_pred ||
!_is_match_predicate_and_not_remaining(match_pred, remaining_predicates)) {
continue;
}
for (auto it = _common_expr_ctxs_push_down.begin();
it != _common_expr_ctxs_push_down.end();) {
if (_is_target_expr_match_predicate((*it)->root(), match_pred, _schema.get())) {
_delete_expr_from_conjunct_roots((*it)->root(), _remaining_conjunct_roots);
it = _common_expr_ctxs_push_down.erase(it);
VLOG_DEBUG << "delete expr from _remaining_conjunct_roots "
<< (*it)->root()->debug_string();
} else {
++it;
}
}
}
for (const auto& expr : no_compound_func_exprs) {
auto result = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(execute_func_expr(expr, result));
_row_bitmap &= *result;
for (auto it = _remaining_conjunct_roots.begin(); it != _remaining_conjunct_roots.end();) {
if (*it == expr) {
std::erase_if(_common_expr_ctxs_push_down,
[&it](const auto& iter) { return iter->root() == *it; });
it = _remaining_conjunct_roots.erase(it);
} else {
++it;
}
}
std::string result_sign =
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + std::to_string(expr->index_unique_id());
for (const auto& child_expr : expr->children()) {
if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
auto column_id = _opts.tablet_schema->field_index(child_expr->expr_name());
_column_predicate_inverted_index_status[column_id][result_sign] = true;
}
}
}
_col_predicates = std::move(remaining_predicates);
_opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality());
return Status::OK();
}
bool SegmentIterator::_check_all_predicates_passed_inverted_index_for_column(ColumnId cid,
/**
* @brief Checks if all conditions related to a specific column have passed in both
* `_column_predicate_inverted_index_status` and `_common_expr_inverted_index_status`.
*
* This function first checks the conditions in `_column_predicate_inverted_index_status`
* for the given `ColumnId`. If all conditions pass, it sets `default_return` to `true`.
* It then checks the conditions in `_common_expr_inverted_index_status` for the same column.
*
* The function returns `true` if all conditions in both maps pass. If any condition fails
* in either map, the function immediately returns `false`. If the column does not exist
* in one of the maps, the function returns `default_return`.
*
* @param cid The ColumnId of the column to check.
* @param default_return The default value to return if the column is not found in the status maps.
* @return true if all conditions in both status maps pass, or if the column is not found
* and `default_return` is true.
* @return false if any condition in either status map fails, or if the column is not found
* and `default_return` is false.
*/
bool SegmentIterator::_check_all_conditions_passed_inverted_index_for_column(ColumnId cid,
bool default_return) {
auto it = _column_predicate_inverted_index_status.find(cid);
if (it != _column_predicate_inverted_index_status.end()) {
const auto& pred_map = it->second;
return std::all_of(pred_map.begin(), pred_map.end(),
[](const auto& pred_entry) { return pred_entry.second; });
auto pred_it = _column_predicate_inverted_index_status.find(cid);
if (pred_it != _column_predicate_inverted_index_status.end()) {
const auto& pred_map = pred_it->second;
bool pred_passed = std::all_of(pred_map.begin(), pred_map.end(),
[](const auto& pred_entry) { return pred_entry.second; });
if (!pred_passed) {
return false;
} else {
default_return = true;
}
}
auto expr_it = _common_expr_inverted_index_status.find(cid);
if (expr_it != _common_expr_inverted_index_status.end()) {
const auto& expr_map = expr_it->second;
return std::all_of(expr_map.begin(), expr_map.end(),
[](const auto& expr_entry) { return expr_entry.second; });
}
return default_return;
}
@ -2043,15 +1715,16 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32
}
DBUG_EXECUTE_IF("segment_iterator._read_columns_by_index", {
auto col_name = _opts.tablet_schema->column(cid).name();
auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
"segment_iterator._read_columns_by_index", "column_name", "");
if (debug_col_name.empty()) {
return Status::Error<ErrorCode::INTERNAL_ERROR>("does not need to read data");
if (debug_col_name.empty() && col_name != "__DORIS_DELETE_SIGN__") {
return Status::Error<ErrorCode::INTERNAL_ERROR>("does not need to read data, {}",
col_name);
}
auto col_name = _opts.tablet_schema->column(cid).name();
if (debug_col_name.find(col_name) != std::string::npos) {
return Status::Error<ErrorCode::INTERNAL_ERROR>("does not need to read data, {}",
debug_col_name);
col_name);
}
})
@ -2462,15 +2135,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
return Status::EndOfFile("no more data in segment");
}
DBUG_EXECUTE_IF("segment_iterator._rowid_result_for_index", {
for (auto& iter : _rowid_result_for_index) {
if (iter.second.first) {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"_rowid_result_for_index exists true");
}
}
})
if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) {
if (_non_predicate_columns.empty()) {
return Status::InternalError("_non_predicate_columns is empty");
@ -2478,9 +2142,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
RETURN_IF_ERROR(_convert_to_expected_type(_first_read_column_ids));
RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns));
_output_non_pred_columns(block);
if (!_enable_common_expr_pushdown || !_remaining_conjunct_roots.empty()) {
_output_index_result_column(nullptr, 0, block);
}
} else {
uint16_t selected_size = _current_batch_rows_read;
uint16_t sel_rowid_idx[selected_size];
@ -2537,12 +2198,12 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
auto col_const = vectorized::ColumnConst::create(std::move(res_column),
selected_size);
block->replace_by_position(0, std::move(col_const));
_output_index_result_column(sel_rowid_idx, selected_size, block);
_output_index_result_column_for_expr(sel_rowid_idx, selected_size, block);
block->shrink_char_type_column_suffix_zero(_char_type_idx_no_0);
RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
block->replace_by_position(0, std::move(col0));
} else {
_output_index_result_column(sel_rowid_idx, selected_size, block);
_output_index_result_column_for_expr(sel_rowid_idx, selected_size, block);
block->shrink_char_type_column_suffix_zero(_char_type_idx);
RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
}
@ -2574,12 +2235,12 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
auto col_const =
vectorized::ColumnConst::create(std::move(res_column), selected_size);
block->replace_by_position(0, std::move(col_const));
_output_index_result_column(sel_rowid_idx, selected_size, block);
_output_index_result_column_for_expr(sel_rowid_idx, selected_size, block);
block->shrink_char_type_column_suffix_zero(_char_type_idx_no_0);
RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
block->replace_by_position(0, std::move(col0));
} else {
_output_index_result_column(sel_rowid_idx, selected_size, block);
_output_index_result_column_for_expr(sel_rowid_idx, selected_size, block);
block->shrink_char_type_column_suffix_zero(_char_type_idx);
RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
}
@ -2613,10 +2274,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns));
// step5: output columns
_output_non_pred_columns(block);
if (!_is_need_expr_eval) {
_output_index_result_column(sel_rowid_idx, selected_size, block);
}
}
// shrink char_type suffix zero data
@ -2693,53 +2350,42 @@ uint16_t SegmentIterator::_evaluate_common_expr_filter(uint16_t* sel_rowid_idx,
}
}
void SegmentIterator::_output_index_result_column(uint16_t* sel_rowid_idx, uint16_t select_size,
vectorized::Block* block) {
void SegmentIterator::_output_index_result_column_for_expr(uint16_t* sel_rowid_idx,
uint16_t select_size,
vectorized::Block* block) {
SCOPED_RAW_TIMER(&_opts.stats->output_index_result_column_timer);
if (block->rows() == 0) {
return;
}
for (auto& expr_ctx : _common_expr_ctxs_push_down) {
for (auto& inverted_index_result_bitmap_for_expr :
expr_ctx->get_inverted_index_context()->get_inverted_index_result_bitmap()) {
const auto* expr = inverted_index_result_bitmap_for_expr.first;
const auto& index_result_bitmap =
inverted_index_result_bitmap_for_expr.second.get_data_bitmap();
auto index_result_column = vectorized::ColumnUInt8::create();
vectorized::ColumnUInt8::Container& vec_match_pred = index_result_column->get_data();
vec_match_pred.resize(block->rows());
size_t idx_in_selected = 0;
roaring::BulkContext bulk_context;
for (auto& iter : _rowid_result_for_index) {
_columns_to_filter.push_back(block->columns());
block->insert({vectorized::ColumnUInt8::create(),
std::make_shared<vectorized::DataTypeUInt8>(), iter.first});
if (!iter.second.first) {
// predicate not in compound query
block->get_by_name(iter.first).column =
vectorized::DataTypeUInt8().create_column_const(block->rows(), (uint8_t)1);
continue;
}
_build_index_result_column(sel_rowid_idx, select_size, block, iter.first,
iter.second.second);
}
}
void SegmentIterator::_build_index_result_column(const uint16_t* sel_rowid_idx,
uint16_t select_size, vectorized::Block* block,
const std::string& pred_result_sign,
const roaring::Roaring& index_result) {
auto index_result_column = vectorized::ColumnUInt8::create();
vectorized::ColumnUInt8::Container& vec_match_pred = index_result_column->get_data();
vec_match_pred.resize(block->rows());
size_t idx_in_selected = 0;
roaring::BulkContext bulk_context;
for (uint32_t i = 0; i < _current_batch_rows_read; i++) {
auto rowid = _block_rowids[i];
if (sel_rowid_idx == nullptr ||
(idx_in_selected < select_size && i == sel_rowid_idx[idx_in_selected])) {
if (index_result.containsBulk(bulk_context, rowid)) {
vec_match_pred[idx_in_selected] = true;
} else {
vec_match_pred[idx_in_selected] = false;
for (uint32_t i = 0; i < _current_batch_rows_read; i++) {
auto rowid = _block_rowids[i];
if (sel_rowid_idx == nullptr ||
(idx_in_selected < select_size && i == sel_rowid_idx[idx_in_selected])) {
if (index_result_bitmap->containsBulk(bulk_context, rowid)) {
vec_match_pred[idx_in_selected] = true;
} else {
vec_match_pred[idx_in_selected] = false;
}
idx_in_selected++;
}
}
idx_in_selected++;
DCHECK(block->rows() == vec_match_pred.size());
expr_ctx->get_inverted_index_context()->set_inverted_index_result_column_for_expr(
expr, std::move(index_result_column));
}
}
DCHECK(block->rows() == vec_match_pred.size());
auto index_result_position = block->get_position_by_name(pred_result_sign);
block->replace_by_position(index_result_position, std::move(index_result_column));
}
void SegmentIterator::_convert_dict_code_for_predicate_if_necessary() {
@ -2790,125 +2436,45 @@ Status SegmentIterator::current_block_row_locations(std::vector<RowLocation>* bl
return Status::OK();
}
void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
const vectorized::VExprSPtr& expr) {
if (expr == nullptr) {
return;
}
if (expr->fn().name.function_name == "multi_match") {
return;
}
auto& children = expr->children();
for (int i = 0; i < children.size(); ++i) {
_calculate_pred_in_remaining_conjunct_root(children[i]);
}
auto node_type = expr->node_type();
if (node_type == TExprNodeType::SLOT_REF) {
auto slot_expr = std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr);
if (_column_predicate_info->column_name.empty()) {
_column_predicate_info->column_name = expr->expr_name();
_column_predicate_info->column_id = slot_expr->column_id();
} else {
// If column name already exists, create a new ColumnPredicateInfo
// if expr is columnA > columnB, then column name will exist, in this situation, we need to add it to _column_pred_in_remaining_vconjunct
auto new_column_pred_info = std::make_shared<ColumnPredicateInfo>();
new_column_pred_info->column_name = expr->expr_name();
new_column_pred_info->column_id = slot_expr->column_id();
_column_pred_in_remaining_vconjunct[new_column_pred_info->column_name].push_back(
*new_column_pred_info);
}
} else if (_is_literal_node(node_type)) {
auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get());
_column_predicate_info->query_values.insert(v_literal_expr->value());
} else if (node_type == TExprNodeType::NULL_LITERAL) {
if (!_column_predicate_info->column_name.empty()) {
auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get());
_column_predicate_info->query_values.insert(v_literal_expr->value());
}
} else {
if (node_type == TExprNodeType::MATCH_PRED) {
_column_predicate_info->query_op = "match";
} else if (node_type == TExprNodeType::IN_PRED) {
if (expr->op() == TExprOpcode::type::FILTER_IN) {
_column_predicate_info->query_op = "in";
} else {
_column_predicate_info->query_op = "not_in";
}
} else if (node_type != TExprNodeType::COMPOUND_PRED) {
_column_predicate_info->query_op = expr->fn().name.function_name;
}
if (!_column_predicate_info->is_empty()) {
_column_pred_in_remaining_vconjunct[_column_predicate_info->column_name].push_back(
*_column_predicate_info);
_column_predicate_info.reset(new ColumnPredicateInfo());
}
Status SegmentIterator::_construct_compound_expr_context() {
auto inverted_index_context = std::make_shared<vectorized::InvertedIndexContext>(
_schema->column_ids(), _inverted_index_iterators, _storage_name_and_type,
_common_expr_inverted_index_status);
for (const auto& expr_ctx : _opts.common_expr_ctxs_push_down) {
vectorized::VExprContextSPtr context;
RETURN_IF_ERROR(expr_ctx->clone(_opts.runtime_state, context));
context->set_inverted_index_context(inverted_index_context);
_common_expr_ctxs_push_down.emplace_back(context);
}
return Status::OK();
}
void SegmentIterator::_calculate_func_in_remaining_conjunct_root() {
auto hash = [](const vectorized::VExprSPtr& expr) -> std::size_t {
return std::hash<std::string>()(expr->expr_name());
};
auto equal = [](const vectorized::VExprSPtr& lhs, const vectorized::VExprSPtr& rhs) -> bool {
return lhs->equals(*rhs);
};
uint32_t next_id = 0;
std::unordered_map<vectorized::VExprSPtr, uint32_t, decltype(hash), decltype(equal)> unique_map(
0, hash, equal);
auto gen_func_unique_id = [&unique_map, &next_id](const vectorized::VExprSPtr& expr) {
auto it = unique_map.find(expr);
if (it != unique_map.end()) {
return it->second;
} else {
unique_map[expr] = ++next_id;
return next_id;
}
};
void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
for (const auto& root_expr_ctx : _common_expr_ctxs_push_down) {
const auto& root_expr = root_expr_ctx->root();
if (root_expr == nullptr) {
continue;
}
std::stack<std::pair<vectorized::VExprSPtr, bool>> stack;
stack.emplace(root_expr, false);
std::stack<vectorized::VExprSPtr> stack;
stack.emplace(root_expr);
while (!stack.empty()) {
const auto& [expr, has_compound_pred] = stack.top();
const auto& expr = stack.top();
stack.pop();
bool current_has_compound_pred =
has_compound_pred || (expr->node_type() == TExprNodeType::COMPOUND_PRED);
if (expr->fn().name.function_name == "multi_match") {
expr->set_index_unique_id(gen_func_unique_id(expr));
if (current_has_compound_pred) {
compound_func_exprs.emplace_back(expr);
} else {
no_compound_func_exprs.emplace_back(expr);
}
for (int32_t i = expr->get_num_children() - 1; i >= 0; i--) {
auto child_expr = expr->get_child(i);
if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
std::string result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
std::to_string(expr->index_unique_id());
_func_name_to_result_sign[child_expr->expr_name()].push_back(result_sign);
}
for (const auto& child : expr->children()) {
if (child->is_slot_ref()) {
auto* column_slot_ref = assert_cast<vectorized::VSlotRef*>(child.get());
_common_expr_inverted_index_status[_schema->column_id(
column_slot_ref->column_id())][expr.get()] = false;
}
}
const auto& children = expr->children();
for (int32_t i = children.size() - 1; i >= 0; --i) {
if (!children[i]->children().empty()) {
stack.emplace(children[i], current_has_compound_pred);
stack.emplace(children[i]);
}
}
}
@ -2935,7 +2501,7 @@ bool SegmentIterator::_no_need_read_key_data(ColumnId cid, vectorized::MutableCo
return false;
}
if (!_check_all_predicates_passed_inverted_index_for_column(cid)) {
if (!_check_all_conditions_passed_inverted_index_for_column(cid)) {
return false;
}
@ -2970,7 +2536,7 @@ bool SegmentIterator::_can_opt_topn_reads() {
_opts.tablet_schema->column(cid).is_key()) {
return true;
}
if (_check_all_predicates_passed_inverted_index_for_column(cid, true)) {
if (_check_all_conditions_passed_inverted_index_for_column(cid, true)) {
return true;
}
return false;
@ -2985,22 +2551,5 @@ bool SegmentIterator::_can_opt_topn_reads() {
return all_true;
}
Status SegmentIterator::execute_func_expr(const vectorized::VExprSPtr& expr,
std::shared_ptr<roaring::Roaring>& result) {
const auto& expr0 = expr->get_child(0);
if (!expr0 || expr0->node_type() != TExprNodeType::SLOT_REF) {
return Status::RuntimeError("cannot perform index filtering");
}
FuncExprParams params;
auto slot_expr = std::static_pointer_cast<vectorized::VSlotRef>(expr0);
params._column_id = _schema->column_id(slot_expr->column_id());
params._unique_id = _schema->unique_id(slot_expr->column_id());
params._column_name = _opts.tablet_schema->column(params._column_id).name();
params._segment_iterator = this;
return expr->eval_inverted_index(params, result);
}
} // namespace segment_v2
} // namespace doris

View File

@ -201,20 +201,10 @@ private:
[[nodiscard]] Status _apply_inverted_index_on_column_predicate(
ColumnPredicate* pred, std::vector<ColumnPredicate*>& remaining_predicates,
bool* continue_apply);
[[nodiscard]] Status _apply_inverted_index_on_block_column_predicate(
ColumnId column_id, MutilColumnBlockPredicate* pred,
std::set<const ColumnPredicate*>& no_need_to_pass_column_predicate_set,
bool* continue_apply);
[[nodiscard]] Status _apply_index_except_leafnode_of_andnode();
[[nodiscard]] Status _apply_inverted_index_except_leafnode_of_andnode(
ColumnPredicate* pred, roaring::Roaring* output_result);
[[nodiscard]] Status _apply_index_expr();
bool _column_has_fulltext_index(int32_t cid);
bool _downgrade_without_index(Status res, bool need_remaining = false);
inline bool _inverted_index_not_support_pred_type(const PredicateType& type);
bool _can_filter_by_preds_except_leafnode_of_andnode();
[[nodiscard]] Status _execute_predicates_except_leafnode_of_andnode(
const vectorized::VExprSPtr& expr);
[[nodiscard]] Status _execute_compound_fn(const std::string& function_name);
bool _is_literal_node(const TExprNodeType::type& node_type);
Status _vec_init_lazy_materialization();
@ -298,6 +288,7 @@ private:
bool _can_evaluated_by_vectorized(ColumnPredicate* predicate);
[[nodiscard]] Status _extract_common_expr_columns(const vectorized::VExprSPtr& expr);
// same with _extract_common_expr_columns, but only extract columns that can be used for index
[[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size,
vectorized::Block* block);
uint16_t _evaluate_common_expr_filter(uint16_t* sel_rowid_idx, uint16_t selected_size,
@ -310,24 +301,14 @@ private:
bool _check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound = false);
std::string _gen_predicate_result_sign(ColumnPredicate* predicate);
std::string _gen_predicate_result_sign(ColumnPredicateInfo* predicate_info);
void _build_index_result_column(const uint16_t* sel_rowid_idx, uint16_t select_size,
vectorized::Block* block, const std::string& pred_result_sign,
const roaring::Roaring& index_result);
void _output_index_result_column(uint16_t* sel_rowid_idx, uint16_t select_size,
vectorized::Block* block);
void _output_index_result_column_for_expr(uint16_t* sel_rowid_idx, uint16_t select_size,
vectorized::Block* block);
bool _need_read_data(ColumnId cid);
bool _prune_column(ColumnId cid, vectorized::MutableColumnPtr& column, bool fill_defaults,
size_t num_of_defaults);
// return true means one column's predicates all pushed down
bool _check_column_pred_all_push_down(const std::string& column_name, bool in_compound = false,
bool is_match = false);
void _calculate_pred_in_remaining_conjunct_root(const vectorized::VExprSPtr& expr);
void _calculate_func_in_remaining_conjunct_root();
Status _construct_compound_expr_context();
// todo(wb) remove this method after RowCursor is removed
void _convert_rowcursor_to_short_key(const RowCursor& key, size_t num_keys) {
@ -410,9 +391,10 @@ private:
Status execute_func_expr(const vectorized::VExprSPtr& expr,
std::shared_ptr<roaring::Roaring>& result);
void _initialize_predicate_results();
bool _check_all_predicates_passed_inverted_index_for_column(ColumnId cid,
bool _check_all_conditions_passed_inverted_index_for_column(ColumnId cid,
bool default_return = false);
void _calculate_expr_in_remaining_conjunct_root();
class BitmapRangeIterator;
class BackwardBitmapRangeIterator;
@ -427,8 +409,6 @@ private:
std::vector<std::unique_ptr<InvertedIndexIterator>> _inverted_index_iterators;
// after init(), `_row_bitmap` contains all rowid to scan
roaring::Roaring _row_bitmap;
// "column_name+operator+value-> <in_compound_query, rowid_result>
std::unordered_map<std::string, std::pair<bool, roaring::Roaring>> _rowid_result_for_index;
// an iterator for `_row_bitmap` that can be used to extract row range to scan
std::unique_ptr<BitmapRangeIterator> _range_iter;
// the next rowid to read
@ -477,19 +457,9 @@ private:
StorageReadOptions _opts;
// make a copy of `_opts.column_predicates` in order to make local changes
std::vector<ColumnPredicate*> _col_predicates;
std::vector<ColumnPredicate*> _col_preds_except_leafnode_of_andnode;
std::vector<vectorized::VExprSPtr> no_compound_func_exprs;
std::vector<vectorized::VExprSPtr> compound_func_exprs;
vectorized::VExprContextSPtrs _common_expr_ctxs_push_down;
bool _enable_common_expr_pushdown = false;
std::vector<vectorized::VExprSPtr> _remaining_conjunct_roots;
std::vector<roaring::Roaring> _pred_except_leafnode_of_andnode_evaluate_result;
std::unique_ptr<ColumnPredicateInfo> _column_predicate_info;
std::unordered_map<std::string, std::vector<ColumnPredicateInfo>>
_column_pred_in_remaining_vconjunct;
std::unordered_map<std::string, std::vector<std::string>> _func_name_to_result_sign;
std::set<ColumnId> _not_apply_index_pred;
// row schema of the key to seek
@ -527,8 +497,11 @@ private:
std::vector<uint8_t> _ret_flags;
std::unordered_map<int, std::unordered_map<std::string, bool>>
std::unordered_map<ColumnId, std::unordered_map<ColumnPredicate*, bool>>
_column_predicate_inverted_index_status;
std::unordered_map<ColumnId, std::unordered_map<const vectorized::VExpr*, bool>>
_common_expr_inverted_index_status;
};
} // namespace segment_v2

View File

@ -348,9 +348,6 @@ Status ScanLocalState<Derived>::_normalize_predicate(
RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate(
cur_expr, context, slot, value_range, &pdt),
status);
RETURN_IF_PUSH_DOWN(_normalize_match_predicate(cur_expr, context, slot,
value_range, &pdt),
status);
if (_is_key_column(slot->col_name())) {
RETURN_IF_PUSH_DOWN(
_normalize_bitmap_filter(cur_expr, context, slot, &pdt),
@ -368,23 +365,6 @@ Status ScanLocalState<Derived>::_normalize_predicate(
*range);
RETURN_IF_ERROR(status);
}
if (pdt == vectorized::VScanNode::PushDownType::UNACCEPTABLE &&
TExprNodeType::COMPOUND_PRED == cur_expr->node_type()) {
_normalize_compound_predicate(cur_expr, context, &pdt, _is_runtime_filter_predicate,
in_predicate_checker, eq_predicate_checker);
output_expr = conjunct_expr_root; // remaining in conjunct tree
return Status::OK();
}
if (pdt == vectorized::VScanNode::PushDownType::ACCEPTABLE &&
TExprNodeType::MATCH_PRED == cur_expr->node_type()) {
// remaining it in the expr tree, in order to filter by function if the pushdown
// match_predicate failed to apply inverted index in the storage layer
output_expr = conjunct_expr_root; // remaining in conjunct tree
return Status::OK();
}
if (pdt == vectorized::VScanNode::PushDownType::ACCEPTABLE && slotref != nullptr &&
slotref->type().is_variant_type()) {
// remaining it in the expr tree, in order to filter by function if the pushdown

View File

@ -27,6 +27,7 @@
#include "vec/data_types/data_type_number.h"
#include "vec/exprs/vectorized_fn_call.h"
#include "vec/exprs/vexpr.h"
#include "vec/exprs/vexpr_context.h"
namespace doris::vectorized {
@ -53,7 +54,107 @@ public:
const std::string& expr_name() const override { return _expr_name; }
Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override {
segment_v2::InvertedIndexResultBitmap res;
bool all_pass = true;
switch (_op) {
case TExprOpcode::COMPOUND_OR: {
for (const auto& child : _children) {
if (Status st = child->evaluate_inverted_index(context, segment_num_rows);
!st.ok()) {
LOG(ERROR) << "expr:" << child->expr_name()
<< " evaluate_inverted_index error:" << st.to_string();
all_pass = false;
continue;
}
if (context->get_inverted_index_context()->has_inverted_index_result_for_expr(
child.get())) {
const auto* index_result =
context->get_inverted_index_context()
->get_inverted_index_result_for_expr(child.get());
if (res.is_empty()) {
res = *index_result;
} else {
res |= *index_result;
}
if (res.get_data_bitmap()->cardinality() == segment_num_rows) {
break; // Early exit if result is full
}
} else {
all_pass = false;
}
}
break;
}
case TExprOpcode::COMPOUND_AND: {
for (const auto& child : _children) {
if (Status st = child->evaluate_inverted_index(context, segment_num_rows);
!st.ok()) {
LOG(ERROR) << "expr:" << child->expr_name()
<< " evaluate_inverted_index error:" << st.to_string();
all_pass = false;
continue;
}
if (context->get_inverted_index_context()->has_inverted_index_result_for_expr(
child.get())) {
const auto* index_result =
context->get_inverted_index_context()
->get_inverted_index_result_for_expr(child.get());
if (res.is_empty()) {
res = *index_result;
} else {
res &= *index_result;
}
if (res.get_data_bitmap()->isEmpty()) {
break; // Early exit if result is empty
}
} else {
all_pass = false;
}
}
break;
}
case TExprOpcode::COMPOUND_NOT: {
const auto& child = _children[0];
Status st = child->evaluate_inverted_index(context, segment_num_rows);
if (!st.ok()) {
LOG(ERROR) << "expr:" << child->expr_name()
<< " evaluate_inverted_index error:" << st.to_string();
return st;
}
if (context->get_inverted_index_context()->has_inverted_index_result_for_expr(
child.get())) {
const auto* index_result =
context->get_inverted_index_context()->get_inverted_index_result_for_expr(
child.get());
roaring::Roaring full_result;
full_result.addRange(0, segment_num_rows);
res = index_result->op_not(&full_result);
} else {
all_pass = false;
}
break;
}
default:
return Status::NotSupported(
"Compound operator must be AND, OR, or NOT to execute with inverted index.");
}
if (all_pass && !res.is_empty()) {
// set fast_execute when expr evaluated by inverted index correctly
_can_fast_execute = true;
context->get_inverted_index_context()->set_inverted_index_result_for_expr(this, res);
}
return Status::OK();
}
Status execute(VExprContext* context, Block* block, int* result_column_id) override {
if (_can_fast_execute && fast_execute(context, block, result_column_id)) {
return Status::OK();
}
if (children().size() == 1 || !_all_child_is_compound_and_not_const()) {
return VectorizedFnCall::execute(context, block, result_column_id);
}
@ -249,8 +350,8 @@ private:
}
std::pair<uint8*, uint8*> _get_raw_data_and_null_map(ColumnPtr column,
bool nullable_column) const {
if (nullable_column) {
bool has_nullable_column) const {
if (has_nullable_column) {
auto* nullable_column = assert_cast<ColumnNullable*>(column->assume_mutable().get());
auto* data_column =
assert_cast<ColumnUInt8*>(nullable_column->get_nested_column_ptr().get())

View File

@ -22,7 +22,6 @@
#include <gen_cpp/Types_types.h>
#include <ostream>
#include <string_view>
#include <utility>
#include "common/config.h"
@ -30,11 +29,8 @@
#include "common/status.h"
#include "runtime/runtime_state.h"
#include "udf/udf.h"
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
#include "vec/columns/column.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/core/columns_with_type_and_name.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_agg_state.h"
#include "vec/exprs/vexpr_context.h"
@ -111,7 +107,6 @@ Status VectorizedFnCall::prepare(RuntimeState* state, const RowDescriptor& desc,
}
VExpr::register_function_context(state, context);
_function_name = _fn.name.function_name;
_can_fast_execute = can_fast_execute();
_prepare_finished = true;
return Status::OK();
}
@ -135,13 +130,38 @@ void VectorizedFnCall::close(VExprContext* context, FunctionContext::FunctionSta
VExpr::close(context, scope);
}
Status VectorizedFnCall::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
DCHECK_GE(get_num_children(), 1);
return _evaluate_inverted_index(context, _function, segment_num_rows);
}
Status VectorizedFnCall::_do_execute(doris::vectorized::VExprContext* context,
doris::vectorized::Block* block, int* result_column_id,
std::vector<size_t>& args) {
if (is_const_and_have_executed()) { // const have execute in open function
if (is_const_and_have_executed()) { // const have executed in open function
return get_result_from_const(block, _expr_name, result_column_id);
}
if (_can_fast_execute && fast_execute(context, block, result_column_id)) {
return Status::OK();
}
DBUG_EXECUTE_IF("VectorizedFnCall.must_in_slow_path", {
if (get_child(0)->is_slot_ref()) {
auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
"VectorizedFnCall.must_in_slow_path", "column_name", "");
std::vector<std::string> column_names;
boost::split(column_names, debug_col_name, boost::algorithm::is_any_of(","));
auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
std::string column_name = column_slot_ref->expr_name();
auto it = std::find(column_names.begin(), column_names.end(), column_name);
if (it == column_names.end()) {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"column {} should in slow path while VectorizedFnCall::execute.",
column_name);
}
}
})
DCHECK(_open_finished || _getting_const_col) << debug_string();
// TODO: not execute const expr again, but use the const column in function context
args.resize(_children.size());
@ -156,14 +176,6 @@ Status VectorizedFnCall::_do_execute(doris::vectorized::VExprContext* context,
size_t num_columns_without_result = block->columns();
// prepare a column to save result
block->insert({nullptr, _data_type, _expr_name});
if (_can_fast_execute) {
auto can_fast_execute = fast_execute(*block, args, num_columns_without_result,
block->rows(), _function->get_name());
if (can_fast_execute) {
*result_column_id = num_columns_without_result;
return Status::OK();
}
}
RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, args,
num_columns_without_result, block->rows(), false));
*result_column_id = num_columns_without_result;
@ -218,22 +230,6 @@ bool VectorizedFnCall::can_push_down_to_index() const {
return _function->can_push_down_to_index();
}
bool VectorizedFnCall::can_fast_execute() const {
auto function_name = _function->get_name();
if (function_name == "eq" || function_name == "ne" || function_name == "lt" ||
function_name == "gt" || function_name == "le" || function_name == "ge") {
if (_children.size() == 2 && _children[0]->is_slot_ref() && _children[1]->is_literal()) {
return true;
}
}
return _function->can_push_down_to_index();
}
Status VectorizedFnCall::eval_inverted_index(segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>& result) {
return _function->eval_inverted_index(this, params, result);
}
bool VectorizedFnCall::equals(const VExpr& other) {
const auto* other_ptr = dynamic_cast<const VectorizedFnCall*>(&other);
if (!other_ptr) {

View File

@ -27,6 +27,8 @@
#include "udf/udf.h"
#include "vec/core/column_numbers.h"
#include "vec/exprs/vexpr.h"
#include "vec/exprs/vliteral.h"
#include "vec/exprs/vslot_ref.h"
#include "vec/functions/function.h"
namespace doris {
@ -50,6 +52,7 @@ public:
Status execute_runtime_fitler(doris::vectorized::VExprContext* context,
doris::vectorized::Block* block, int* result_column_id,
std::vector<size_t>& args) override;
Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override;
Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override;
Status open(RuntimeState* state, VExprContext* context,
FunctionContext::FunctionStateScope scope) override;
@ -67,9 +70,6 @@ public:
static std::string debug_string(const std::vector<VectorizedFnCall*>& exprs);
bool can_push_down_to_index() const override;
bool can_fast_execute() const override;
Status eval_inverted_index(segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>& result) override;
bool equals(const VExpr& other) override;
protected:

View File

@ -33,6 +33,7 @@
#include "common/status.h"
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_factory.hpp"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
@ -43,7 +44,6 @@
#include "vec/exprs/vcompound_pred.h"
#include "vec/exprs/vectorized_fn_call.h"
#include "vec/exprs/vexpr_context.h"
#include "vec/exprs/vexpr_fwd.h"
#include "vec/exprs/vin_predicate.h"
#include "vec/exprs/vinfo_func.h"
#include "vec/exprs/vlambda_function_call_expr.h"
@ -602,80 +602,134 @@ Status VExpr::get_result_from_const(vectorized::Block* block, const std::string&
return Status::OK();
}
bool VExpr::fast_execute(Block& block, const ColumnNumbers& arguments, size_t result,
size_t input_rows_count, const std::string& function_name) {
if (!_enable_inverted_index_query) {
return false;
}
std::string result_column_name = gen_predicate_result_sign(block, arguments, function_name);
if (!block.has(result_column_name)) {
DBUG_EXECUTE_IF("segment_iterator.fast_execute", {
auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
"segment_iterator._read_columns_by_index", "column_name", "");
std::vector<std::string> column_names;
boost::split(column_names, debug_col_name, boost::algorithm::is_any_of(","));
std::string column_name = block.get_by_position(arguments[0]).name;
auto it = std::find(column_names.begin(), column_names.end(), column_name);
if (it == column_names.end()) {
return Status::Error<ErrorCode::INTERNAL_ERROR>("fast_execute failed: {}",
result_column_name);
}
})
return false;
}
auto result_column =
block.get_by_name(result_column_name).column->convert_to_full_column_if_const();
auto& result_info = block.get_by_position(result);
if (result_info.type->is_nullable()) {
block.replace_by_position(result,
ColumnNullable::create(std::move(result_column),
ColumnUInt8::create(input_rows_count, 0)));
} else {
block.replace_by_position(result, std::move(result_column));
}
return true;
}
std::string VExpr::gen_predicate_result_sign(Block& block, const ColumnNumbers& arguments,
const std::string& function_name) const {
std::string pred_result_sign;
if (this->fn().name.function_name == "multi_match") {
pred_result_sign =
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + std::to_string(this->index_unique_id());
} else {
std::string column_name = block.get_by_position(arguments[0]).name;
pred_result_sign +=
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" + function_name + "_";
if (function_name == "in" || function_name == "not_in") {
if (arguments.size() - 1 > _in_list_value_count_threshold) {
return pred_result_sign;
}
// Generating 'result_sign' from 'inlist' requires sorting the values.
std::set<std::string> values;
for (size_t i = 1; i < arguments.size(); i++) {
const auto& entry = block.get_by_position(arguments[i]);
if (!is_column_const(*entry.column)) {
return pred_result_sign;
Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBasePtr& function,
uint32_t segment_num_rows) {
std::vector<segment_v2::InvertedIndexIterator*> iterators;
std::vector<vectorized::IndexFieldNameAndTypePair> data_type_with_names;
std::vector<int> column_ids;
vectorized::ColumnsWithTypeAndName arguments;
VExprSPtrs children_exprs;
for (auto child : children()) {
// if child is cast expr, we need to ensure target data type is the same with storage data type.
// or they are all string type
// and if data type is array, we need to get the nested data type to ensure that.
if (child->node_type() == TExprNodeType::CAST_EXPR) {
auto* cast_expr = assert_cast<VCastExpr*>(child.get());
DCHECK_EQ(cast_expr->children().size(), 1);
if (cast_expr->get_child(0)->is_slot_ref()) {
auto* column_slot_ref = assert_cast<VSlotRef*>(cast_expr->get_child(0).get());
auto column_id = column_slot_ref->column_id();
const auto* storage_name_type =
context->get_inverted_index_context()
->get_storage_name_and_type_by_column_id(column_id);
auto storage_type = remove_nullable(storage_name_type->second);
auto target_type = cast_expr->get_target_type();
auto origin_primitive_type = storage_type->get_type_as_type_descriptor().type;
auto target_primitive_type = target_type->get_type_as_type_descriptor().type;
if (is_complex_type(storage_type)) {
if (is_array(storage_type) && is_array(target_type)) {
auto nested_storage_type =
(assert_cast<const DataTypeArray*>(storage_type.get()))
->get_nested_type();
origin_primitive_type =
nested_storage_type->get_type_as_type_descriptor().type;
auto nested_target_type =
(assert_cast<const DataTypeArray*>(target_type.get()))
->get_nested_type();
target_primitive_type =
nested_target_type->get_type_as_type_descriptor().type;
} else {
continue;
}
}
if (origin_primitive_type != TYPE_VARIANT &&
(origin_primitive_type == target_primitive_type ||
(is_string_type(target_primitive_type) &&
is_string_type(origin_primitive_type)))) {
children_exprs.emplace_back(expr_without_cast(child));
}
values.insert(entry.type->to_string(*entry.column, 0));
}
pred_result_sign += boost::join(values, ",");
} else if (function_name == "collection_in" || function_name == "collection_not_in") {
return pred_result_sign;
} else {
const auto& entry = block.get_by_position(arguments[1]);
if (!is_column_const(*entry.column)) {
return pred_result_sign;
}
pred_result_sign += entry.type->to_string(*entry.column, 0);
children_exprs.emplace_back(child);
}
}
return pred_result_sign;
for (auto child : children_exprs) {
if (child->is_slot_ref()) {
auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
auto column_id = column_slot_ref->column_id();
auto* iter =
context->get_inverted_index_context()->get_inverted_index_iterator_by_column_id(
column_id);
//column does not have inverted index
if (iter == nullptr) {
continue;
}
const auto* storage_name_type =
context->get_inverted_index_context()->get_storage_name_and_type_by_column_id(
column_id);
if (storage_name_type == nullptr) {
auto err_msg = fmt::format(
"storage_name_type cannot be found for column {} while in {} "
"evaluate_inverted_index",
column_id, expr_name());
LOG(ERROR) << err_msg;
return Status::InternalError(err_msg);
}
iterators.emplace_back(iter);
data_type_with_names.emplace_back(*storage_name_type);
column_ids.emplace_back(column_id);
} else if (child->is_literal()) {
auto* column_literal = assert_cast<VLiteral*>(child.get());
arguments.emplace_back(column_literal->get_column_ptr(),
column_literal->get_data_type(), column_literal->expr_name());
}
}
auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
if (iterators.empty()) {
return Status::OK();
}
// If arguments are empty, it means the left value in the expression is not a literal.
if (arguments.empty()) {
return Status::OK();
}
auto res = function->evaluate_inverted_index(arguments, data_type_with_names, iterators,
segment_num_rows, result_bitmap);
if (!res.ok()) {
return res;
}
if (!result_bitmap.is_empty()) {
context->get_inverted_index_context()->set_inverted_index_result_for_expr(this,
result_bitmap);
for (auto column_id : column_ids) {
context->get_inverted_index_context()->set_true_for_inverted_index_status(this,
column_id);
}
// set fast_execute when expr evaluated by inverted index correctly
_can_fast_execute = true;
}
return Status::OK();
}
bool VExpr::fast_execute(doris::vectorized::VExprContext* context, doris::vectorized::Block* block,
int* result_column_id) {
if (context->get_inverted_index_context() &&
context->get_inverted_index_context()->get_inverted_index_result_column().contains(this)) {
size_t num_columns_without_result = block->columns();
// prepare a column to save result
auto result_column =
context->get_inverted_index_context()->get_inverted_index_result_column()[this];
if (_data_type->is_nullable()) {
block->insert(
{ColumnNullable::create(result_column, ColumnUInt8::create(block->rows(), 0)),
_data_type, expr_name()});
} else {
block->insert({result_column, _data_type, expr_name()});
}
*result_column_id = num_columns_without_result;
return true;
}
return false;
}
bool VExpr::equals(const VExpr& other) {

View File

@ -115,6 +115,14 @@ public:
virtual Status execute(VExprContext* context, Block* block, int* result_column_id) = 0;
// execute current expr with inverted index to filter block. Given a roaring bitmap of match rows
virtual Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
return Status::NotSupported("Not supported execute_with_inverted_index");
}
Status _evaluate_inverted_index(VExprContext* context, const FunctionBasePtr& function,
uint32_t segment_num_rows);
// Only the 4th parameter is used in the runtime filter. In and MinMax need overwrite the
// interface
virtual Status execute_runtime_fitler(VExprContext* context, Block* block,
@ -222,18 +230,10 @@ public:
}
// fast_execute can direct copy expr filter result which build by apply index in segment_iterator
bool fast_execute(Block& block, const ColumnNumbers& arguments, size_t result,
size_t input_rows_count, const std::string& function_name);
std::string gen_predicate_result_sign(Block& block, const ColumnNumbers& arguments,
const std::string& function_name) const;
bool fast_execute(doris::vectorized::VExprContext* context, doris::vectorized::Block* block,
int* result_column_id);
virtual bool can_push_down_to_index() const { return false; }
virtual bool can_fast_execute() const { return false; }
virtual Status eval_inverted_index(segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>& result) {
return Status::NotSupported("Not supported execute_with_inverted_index");
}
virtual bool equals(const VExpr& other);
void set_index_unique_id(uint32_t index_unique_id) { _index_unique_id = index_unique_id; }
uint32_t index_unique_id() const { return _index_unique_id; }

View File

@ -120,6 +120,16 @@ int VExprContext::register_function_context(RuntimeState* state, const TypeDescr
return _fn_contexts.size() - 1;
}
Status VExprContext::evaluate_inverted_index(uint32_t segment_num_rows) {
Status st;
RETURN_IF_CATCH_EXCEPTION({ st = _root->evaluate_inverted_index(this, segment_num_rows); });
return st;
}
bool VExprContext::all_expr_inverted_index_evaluated() {
return _inverted_index_context->has_inverted_index_result_for_expr(_root.get());
}
Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep) {
if (vexpr_ctx == nullptr || block->rows() == 0) {
return Status::OK();

View File

@ -25,6 +25,7 @@
#include "common/factory_creator.h"
#include "common/status.h"
#include "olap/rowset/segment_v2/inverted_index_reader.h"
#include "runtime/types.h"
#include "udf/udf.h"
#include "vec/core/block.h"
@ -37,6 +38,114 @@ class RuntimeState;
namespace doris::vectorized {
class InvertedIndexContext {
public:
InvertedIndexContext(
const std::vector<ColumnId>& col_ids,
const std::vector<std::unique_ptr<segment_v2::InvertedIndexIterator>>&
inverted_index_iterators,
const std::vector<vectorized::IndexFieldNameAndTypePair>& storage_name_and_type_vec,
std::unordered_map<ColumnId, std::unordered_map<const vectorized::VExpr*, bool>>&
common_expr_inverted_index_status)
: _col_ids(col_ids),
_inverted_index_iterators(inverted_index_iterators),
_storage_name_and_type(storage_name_and_type_vec),
_expr_inverted_index_status(common_expr_inverted_index_status) {}
segment_v2::InvertedIndexIterator* get_inverted_index_iterator_by_column_id(
int column_index) const {
if (column_index < 0 || column_index >= _col_ids.size()) {
return nullptr;
}
const auto& column_id = _col_ids[column_index];
if (column_id >= _inverted_index_iterators.size()) {
return nullptr;
}
if (!_inverted_index_iterators[column_id]) {
return nullptr;
}
return _inverted_index_iterators[column_id].get();
}
const vectorized::IndexFieldNameAndTypePair* get_storage_name_and_type_by_column_id(
int column_index) const {
if (column_index < 0 || column_index >= _col_ids.size()) {
return nullptr;
}
const auto& column_id = _col_ids[column_index];
if (column_id >= _storage_name_and_type.size()) {
return nullptr;
}
return &_storage_name_and_type[column_id];
}
bool has_inverted_index_result_for_expr(const vectorized::VExpr* expr) const {
return _inverted_index_result_bitmap.contains(expr);
}
void set_inverted_index_result_for_expr(const vectorized::VExpr* expr,
segment_v2::InvertedIndexResultBitmap bitmap) {
_inverted_index_result_bitmap[expr] = std::move(bitmap);
}
std::unordered_map<const vectorized::VExpr*, segment_v2::InvertedIndexResultBitmap>&
get_inverted_index_result_bitmap() {
return _inverted_index_result_bitmap;
}
std::unordered_map<const vectorized::VExpr*, ColumnPtr>& get_inverted_index_result_column() {
return _inverted_index_result_column;
}
const segment_v2::InvertedIndexResultBitmap* get_inverted_index_result_for_expr(
const vectorized::VExpr* expr) {
auto iter = _inverted_index_result_bitmap.find(expr);
if (iter == _inverted_index_result_bitmap.end()) {
return nullptr;
}
return &iter->second;
}
void set_inverted_index_result_column_for_expr(const vectorized::VExpr* expr,
ColumnPtr column) {
_inverted_index_result_column[expr] = std::move(column);
}
void set_true_for_inverted_index_status(const vectorized::VExpr* expr, int column_index) {
if (column_index < 0 || column_index >= _col_ids.size()) {
return;
}
const auto& column_id = _col_ids[column_index];
if (_expr_inverted_index_status.contains(column_id)) {
if (_expr_inverted_index_status[column_id].contains(expr)) {
_expr_inverted_index_status[column_id][expr] = true;
}
}
}
private:
// A reference to a vector of column IDs for the current expression's output columns.
const std::vector<ColumnId>& _col_ids;
// A reference to a vector of unique pointers to inverted index iterators.
const std::vector<std::unique_ptr<segment_v2::InvertedIndexIterator>>&
_inverted_index_iterators;
// A reference to a vector of storage name and type pairs related to schema.
const std::vector<vectorized::IndexFieldNameAndTypePair>& _storage_name_and_type;
// A map of expressions to their corresponding inverted index result bitmaps.
std::unordered_map<const vectorized::VExpr*, segment_v2::InvertedIndexResultBitmap>
_inverted_index_result_bitmap;
// A map of expressions to their corresponding result columns.
std::unordered_map<const vectorized::VExpr*, ColumnPtr> _inverted_index_result_column;
// A reference to a map of common expressions to their inverted index evaluation status.
std::unordered_map<ColumnId, std::unordered_map<const vectorized::VExpr*, bool>>&
_expr_inverted_index_status;
};
class VExprContext {
ENABLE_FACTORY_CREATOR(VExprContext);
@ -50,6 +159,13 @@ public:
VExprSPtr root() { return _root; }
void set_root(const VExprSPtr& expr) { _root = expr; }
void set_inverted_index_context(std::shared_ptr<InvertedIndexContext> inverted_index_context) {
_inverted_index_context = std::move(inverted_index_context);
}
std::shared_ptr<InvertedIndexContext> get_inverted_index_context() const {
return _inverted_index_context;
}
/// Creates a FunctionContext, and returns the index that's passed to fn_context() to
/// retrieve the created context. Exprs that need a FunctionContext should call this in
@ -69,6 +185,14 @@ public:
return _fn_contexts[i].get();
}
// execute expr with inverted index which column a, b has inverted indexes
// but some situation although column b has indexes, but apply index is not useful, we should
// skip this expr, just do not apply index anymore.
[[nodiscard]] Status evaluate_inverted_index(uint32_t segment_num_rows);
bool all_expr_inverted_index_evaluated();
[[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block,
int column_to_keep);
@ -175,5 +299,7 @@ private:
// This flag only works on VSlotRef.
// Force to materialize even if the slot need_materialize is false, we just ignore need_materialize flag
bool _force_materialize_slot = false;
std::shared_ptr<InvertedIndexContext> _inverted_index_context;
};
} // namespace doris::vectorized

View File

@ -34,6 +34,8 @@
#include "vec/core/column_with_type_and_name.h"
#include "vec/core/columns_with_type_and_name.h"
#include "vec/exprs/vexpr_context.h"
#include "vec/exprs/vliteral.h"
#include "vec/exprs/vslot_ref.h"
#include "vec/functions/simple_function_factory.h"
namespace doris {
@ -79,16 +81,14 @@ Status VInPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
VExpr::register_function_context(state, context);
_prepare_finished = true;
_can_fast_execute = can_fast_execute();
_in_list_value_count_threshold = state->query_options().in_list_value_count_threshold;
return Status::OK();
}
Status VInPredicate::open(RuntimeState* state, VExprContext* context,
FunctionContext::FunctionStateScope scope) {
DCHECK(_prepare_finished);
for (int i = 0; i < _children.size(); ++i) {
RETURN_IF_ERROR(_children[i]->open(state, context, scope));
for (auto& child : _children) {
RETURN_IF_ERROR(child->open(state, context, scope));
}
RETURN_IF_ERROR(VExpr::init_function_context(context, scope, _function));
if (scope == FunctionContext::FRAGMENT_LOCAL) {
@ -103,10 +103,18 @@ void VInPredicate::close(VExprContext* context, FunctionContext::FunctionStateSc
VExpr::close(context, scope);
}
Status VInPredicate::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
DCHECK_GE(get_num_children(), 2);
return _evaluate_inverted_index(context, _function, segment_num_rows);
}
Status VInPredicate::execute(VExprContext* context, Block* block, int* result_column_id) {
if (is_const_and_have_executed()) { // const have execute in open function
return get_result_from_const(block, _expr_name, result_column_id);
}
if (_can_fast_execute && fast_execute(context, block, result_column_id)) {
return Status::OK();
}
DCHECK(_open_finished || _getting_const_col);
// TODO: not execute const expr again, but use the const column in function context
doris::vectorized::ColumnNumbers arguments(_children.size());
@ -120,15 +128,6 @@ Status VInPredicate::execute(VExprContext* context, Block* block, int* result_co
// prepare a column to save result
block->insert({nullptr, _data_type, _expr_name});
if (_can_fast_execute) {
auto can_fast_execute = fast_execute(*block, arguments, num_columns_without_result,
block->rows(), _function->get_name());
if (can_fast_execute) {
*result_column_id = num_columns_without_result;
return Status::OK();
}
}
RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, arguments,
num_columns_without_result, block->rows(), false));
*result_column_id = num_columns_without_result;
@ -152,4 +151,4 @@ std::string VInPredicate::debug_string() const {
return out.str();
}
} // namespace doris::vectorized
} // namespace doris::vectorized

View File

@ -54,7 +54,7 @@ public:
const FunctionBasePtr function() { return _function; }
bool is_not_in() const { return _is_not_in; };
bool can_fast_execute() const override { return true; }
Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override;
private:
FunctionBasePtr _function;

View File

@ -54,6 +54,7 @@ public:
std::string value() const;
const ColumnPtr& get_column_ptr() const { return _column_ptr; }
const DataTypePtr& get_data_type() const { return _data_type; }
bool is_literal() const override { return true; }

View File

@ -43,6 +43,8 @@
#include "vec/core/column_with_type_and_name.h"
#include "vec/core/columns_with_type_and_name.h"
#include "vec/exprs/vexpr_context.h"
#include "vec/exprs/vliteral.h"
#include "vec/exprs/vslot_ref.h"
#include "vec/functions/simple_function_factory.h"
namespace doris {
@ -130,9 +132,35 @@ void VMatchPredicate::close(VExprContext* context, FunctionContext::FunctionStat
VExpr::close(context, scope);
}
Status VMatchPredicate::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
DCHECK_EQ(get_num_children(), 2);
return _evaluate_inverted_index(context, _function, segment_num_rows);
}
Status VMatchPredicate::execute(VExprContext* context, Block* block, int* result_column_id) {
DCHECK(_open_finished || _getting_const_col);
// TODO: not execute const expr again, but use the const column in function context
if (_can_fast_execute && fast_execute(context, block, result_column_id)) {
return Status::OK();
}
DBUG_EXECUTE_IF("VMatchPredicate.execute", {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
"{} not support slow path, hit debug point.", _expr_name);
});
DBUG_EXECUTE_IF("VMatchPredicate.must_in_slow_path", {
auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
"VMatchPredicate.must_in_slow_path", "column_name", "");
std::vector<std::string> column_names;
boost::split(column_names, debug_col_name, boost::algorithm::is_any_of(","));
auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
std::string column_name = column_slot_ref->expr_name();
auto it = std::find(column_names.begin(), column_names.end(), column_name);
if (it == column_names.end()) {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"column {} should in slow path while VMatchPredicate::execute.", column_name);
}
})
doris::vectorized::ColumnNumbers arguments(_children.size());
for (int i = 0; i < _children.size(); ++i) {
int column_id = -1;

View File

@ -54,12 +54,13 @@ public:
Status open(RuntimeState* state, VExprContext* context,
FunctionContext::FunctionStateScope scope) override;
void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override;
Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override;
const std::string& expr_name() const override;
const std::string& function_name() const;
std::string debug_string() const override;
const FunctionBasePtr function() { return _function; }
FunctionBasePtr function() { return _function; }
private:
FunctionBasePtr _function;

View File

@ -25,6 +25,7 @@
#include <utility>
#include "common/status.h"
#include "olap/rowset/segment_v2/inverted_index_reader.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_nullable.h"
@ -87,6 +88,91 @@ public:
bool use_default_implementation_for_nulls() const override { return false; }
Status evaluate_inverted_index(
const ColumnsWithTypeAndName& arguments,
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
DCHECK(arguments.size() == 1);
DCHECK(data_type_with_names.size() == 1);
DCHECK(iterators.size() == 1);
auto* iter = iterators[0];
auto data_type_with_name = data_type_with_names[0];
if (iter == nullptr) {
return Status::OK();
}
if (iter->get_inverted_index_reader_type() ==
segment_v2::InvertedIndexReaderType::FULLTEXT) {
// parser is not none we can not make sure the result is correct in expr combination
// for example, filter: !array_index(array, 'tall:120cm, weight: 35kg')
// here we have rows [tall:120cm, weight: 35kg, hobbies: reading book] which be tokenized
// but query is also tokenized, and FULLTEXT reader will catch this row as matched,
// so array_index(array, 'tall:120cm, weight: 35kg') return this rowid,
// but we expect it to be filtered, because we want row is equal to 'tall:120cm, weight: 35kg'
return Status::OK();
}
Field param_value;
arguments[0].column->get(0, param_value);
auto param_type = arguments[0].type->get_type_as_type_descriptor().type;
// The current implementation for the inverted index of arrays cannot handle cases where the array contains null values,
// meaning an item in the array is null.
if (param_value.is_null()) {
return Status::OK();
}
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
if (iter->has_null()) {
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
null_bitmap = null_bitmap_cache_handle.get_bitmap();
}
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
param_type, &param_value, query_param));
if (is_string_type(param_type)) {
Status st = iter->read_from_inverted_index(
data_type_with_name.first, query_param->get_value(),
segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, roaring);
if (st.code() == ErrorCode::INVERTED_INDEX_NO_TERMS) {
// if analyzed param with no term, we do not filter any rows
// return all rows with OK status
roaring->addRange(0, num_rows);
} else if (st != Status::OK()) {
return st;
}
} else {
RETURN_IF_ERROR(iter->read_from_inverted_index(
data_type_with_name.first, query_param->get_value(),
segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, roaring));
}
// here debug for check array_contains function really filter rows by inverted index correctly
DBUG_EXECUTE_IF("array_func.array_contains", {
auto result_bitmap = DebugPoints::instance()->get_debug_param_or_default<int32_t>(
"array_func.array_contains", "result_bitmap", 0);
if (result_bitmap < 0) {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"result_bitmap count cannot be negative");
}
if (roaring->cardinality() != result_bitmap) {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"array_contains really filtered {} by inverted index not equal to expected "
"{}",
roaring->cardinality(), result_bitmap);
}
})
if (iter->has_null()) {
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
null_bitmap = null_bitmap_cache_handle.get_bitmap();
}
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
bitmap_result = result;
bitmap_result.mask_out_null();
return Status::OK();
}
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
if constexpr (OldVersion) {
return make_nullable(std::make_shared<DataTypeNumber<ResultType>>());

View File

@ -185,6 +185,15 @@ public:
->execute(context, block, arguments, result, input_rows_count, dry_run);
}
virtual Status evaluate_inverted_index(
const ColumnsWithTypeAndName& arguments,
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
return Status::NotSupported("evaluate_inverted_index is not supported in function: ",
get_name());
}
/// Do cleaning work when function is finished, i.e., release state variables in the
/// `FunctionContext` which are registered in `prepare` phase.
virtual Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
@ -220,12 +229,6 @@ public:
}
virtual bool can_push_down_to_index() const { return false; }
virtual Status eval_inverted_index(VExpr* context, segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>& result) {
return Status::NotSupported("eval_inverted_index is not supported in function: ",
get_name());
}
};
using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
@ -441,6 +444,16 @@ protected:
size_t result, size_t input_rows_count) const final {
return function->execute_impl(context, block, arguments, result, input_rows_count);
}
Status evaluate_inverted_index(
const ColumnsWithTypeAndName& arguments,
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
return function->evaluate_inverted_index(arguments, data_type_with_names, iterators,
num_rows, bitmap_result);
}
Status execute_impl_dry_run(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) const final {
@ -498,6 +511,15 @@ public:
return function->close(context, scope);
}
Status evaluate_inverted_index(
const ColumnsWithTypeAndName& args,
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
return function->evaluate_inverted_index(args, data_type_with_names, iterators, num_rows,
bitmap_result);
}
IFunctionBase::Monotonicity get_monotonicity_for_range(const IDataType& type, const Field& left,
const Field& right) const override {
return function->get_monotonicity_for_range(type, left, right);
@ -509,11 +531,6 @@ public:
bool can_push_down_to_index() const override { return function->can_push_down_to_index(); }
Status eval_inverted_index(VExpr* expr, segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>& result) override {
return function->eval_inverted_index(expr, params, result);
}
private:
std::shared_ptr<IFunction> function;
DataTypes arguments;

View File

@ -17,27 +17,17 @@
#include "vec/functions/function_multi_match.h"
#include <gen_cpp/PaloBrokerService_types.h>
#include <glog/logging.h>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/classification.hpp>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <roaring/roaring.hh>
#include <string>
#include <vector>
#include "io/fs/file_reader.h"
#include "olap/olap_common.h"
#include "olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h"
#include "olap/rowset/segment_v2/segment_iterator.h"
#include "runtime/primitive_type.h"
#include "vec/columns/column.h"
#include "vec/data_types/data_type.h"
#include "vec/exprs/varray_literal.h"
#include "vec/exprs/vexpr.h"
#include "vec/exprs/vslot_ref.h"
#include "vec/functions/simple_function_factory.h"
@ -56,97 +46,42 @@ InvertedIndexQueryType get_query_type(const std::string& query_type) {
return InvertedIndexQueryType::UNKNOWN_QUERY;
}
Status FunctionMultiMatch::eval_inverted_index(VExpr* expr, segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>& result) {
// fields
std::vector<std::string> query_fileds;
size_t i = 0;
for (; i < expr->get_num_children(); i++) {
auto child_expr = expr->get_child(i);
if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
query_fileds.emplace_back(child_expr->expr_name());
} else {
break;
}
}
if (i != expr->get_num_children() - 2) {
return Status::RuntimeError("parameter type incorrect: slot = {}", i);
}
Status FunctionMultiMatch::evaluate_inverted_index(
const ColumnsWithTypeAndName& arguments,
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
DCHECK(arguments.size() == 2);
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
// type
std::string param1 = std::static_pointer_cast<VLiteral>(expr->get_child(i))->value();
auto query_type = get_query_type(param1);
auto query_type_value = arguments[0].column->get_data_at(0);
auto query_type = get_query_type(query_type_value.to_string());
if (query_type == InvertedIndexQueryType::UNKNOWN_QUERY) {
return Status::RuntimeError("parameter query type incorrect: query_type = {}", query_type);
return Status::RuntimeError(
"parameter query type incorrect for function multi_match: query_type = {}",
query_type);
}
// query
std::string query_str = std::static_pointer_cast<VLiteral>(expr->get_child(i + 1))->value();
auto& segment_iterator = params._segment_iterator;
auto& segment = segment_iterator->segment();
auto& opts = segment_iterator->storage_read_options();
auto& tablet_schema = opts.tablet_schema;
auto& idx_iterators = segment_iterator->inverted_index_iterators();
// check
std::vector<ColumnId> columns_ids;
for (const auto& column_name : query_fileds) {
auto cid = tablet_schema->field_index(column_name);
if (cid < 0) {
return Status::RuntimeError("column name is incorrect: {}", column_name);
}
if (idx_iterators[cid] == nullptr) {
return Status::RuntimeError("column idx is incorrect: {}", column_name);
}
columns_ids.emplace_back(cid);
auto query_str = arguments[1].column->get_data_at(0);
auto param_type = arguments[1].type->get_type_as_type_descriptor().type;
if (!is_string_type(param_type)) {
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
"arguments for multi_match must be string");
}
// cache key
roaring::Roaring cids_str;
cids_str.addMany(columns_ids.size(), columns_ids.data());
cids_str.runOptimize();
std::string column_name_binary(cids_str.getSizeInBytes(), 0);
cids_str.write(column_name_binary.data());
InvertedIndexQueryCache::CacheKey cache_key;
io::Path index_path = segment.file_reader()->path();
cache_key.index_path = index_path.parent_path() / index_path.stem();
cache_key.column_name = column_name_binary;
cache_key.query_type = query_type;
cache_key.value = query_str;
// query cache
auto* cache = InvertedIndexQueryCache::instance();
InvertedIndexQueryCacheHandle cache_handler;
if (cache->lookup(cache_key, &cache_handler)) {
result = cache_handler.get_bitmap();
return Status::OK();
}
// search
for (const auto& column_name : query_fileds) {
auto cid = tablet_schema->field_index(column_name);
const auto& column = tablet_schema->column(column_name);
const auto& index_reader = idx_iterators[cid]->reader();
for (int i = 0; i < data_type_with_names.size(); i++) {
auto column_name = data_type_with_names[i].first;
auto* iter = iterators[i];
auto single_result = std::make_shared<roaring::Roaring>();
StringRef query_value(query_str.data());
auto index_version = tablet_schema->get_inverted_index_storage_format();
if (index_version == InvertedIndexStorageFormatPB::V1) {
RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state, column_name,
&query_value, query_type, single_result));
} else if (index_version == InvertedIndexStorageFormatPB::V2) {
RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state,
std::to_string(column.unique_id()), &query_value,
query_type, single_result));
}
(*result) |= (*single_result);
std::shared_ptr<roaring::Roaring> index = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(iter->read_from_inverted_index(column_name, &query_str, query_type,
num_rows, index));
*roaring |= *index;
}
result->runOptimize();
cache->insert(cache_key, result, &cache_handler);
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
bitmap_result = result;
return Status::OK();
}

View File

@ -58,8 +58,11 @@ public:
bool can_push_down_to_index() const override { return true; }
Status eval_inverted_index(VExpr* expr, segment_v2::FuncExprParams& params,
std::shared_ptr<roaring::Roaring>& result) override;
Status evaluate_inverted_index(
const ColumnsWithTypeAndName& arguments,
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override;
};
} // namespace doris::vectorized

View File

@ -39,6 +39,8 @@
#include "vec/functions/function_helpers.h"
#include "vec/functions/functions_logical.h"
#include "vec/runtime/vdatetime_value.h"
//#include "olap/rowset/segment_v2/inverted_index_reader.h"
namespace doris::vectorized {
/** Comparison functions: ==, !=, <, >, <=, >=.
@ -524,6 +526,72 @@ public:
return std::make_shared<DataTypeUInt8>();
}
Status evaluate_inverted_index(
const ColumnsWithTypeAndName& arguments,
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
DCHECK(arguments.size() == 1);
DCHECK(data_type_with_names.size() == 1);
DCHECK(iterators.size() == 1);
auto* iter = iterators[0];
auto data_type_with_name = data_type_with_names[0];
if (iter == nullptr) {
return Status::OK();
}
if (iter->get_inverted_index_reader_type() ==
segment_v2::InvertedIndexReaderType::FULLTEXT) {
//NOT support comparison predicate when parser is FULLTEXT for expr inverted index evaluate.
return Status::OK();
}
std::string column_name = data_type_with_name.first;
Field param_value;
arguments[0].column->get(0, param_value);
auto param_type = arguments[0].type->get_type_as_type_descriptor().type;
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
param_type, &param_value, query_param));
segment_v2::InvertedIndexQueryType query_type;
std::string_view name_view(name);
if (name_view == NameEquals::name || name_view == NameNotEquals::name) {
query_type = segment_v2::InvertedIndexQueryType::EQUAL_QUERY;
} else if (name_view == NameLess::name) {
query_type = segment_v2::InvertedIndexQueryType::LESS_THAN_QUERY;
} else if (name_view == NameLessOrEquals::name) {
query_type = segment_v2::InvertedIndexQueryType::LESS_EQUAL_QUERY;
} else if (name_view == NameGreater::name) {
query_type = segment_v2::InvertedIndexQueryType::GREATER_THAN_QUERY;
} else if (name_view == NameGreaterOrEquals::name) {
query_type = segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY;
} else {
return Status::InvalidArgument("invalid comparison op type {}", Name::name);
}
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
param_type, &param_value, query_param));
RETURN_IF_ERROR(iter->read_from_inverted_index(column_name, query_param->get_value(),
query_type, num_rows, roaring));
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
if (iter->has_null()) {
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
null_bitmap = null_bitmap_cache_handle.get_bitmap();
}
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
bitmap_result = result;
bitmap_result.mask_out_null();
if (name == "ne") {
roaring::Roaring full_result;
full_result.addRange(0, num_rows);
bitmap_result.op_not(&full_result);
}
return Status::OK();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
const auto& col_with_type_and_name_left = block.get_by_position(arguments[0]);

View File

@ -29,6 +29,7 @@
#include "common/status.h"
#include "exprs/create_predicate_function.h"
#include "exprs/hybrid_set.h"
#include "olap/rowset/segment_v2/inverted_index_reader.h"
#include "runtime/define_primitive_type.h"
#include "runtime/types.h"
#include "udf/udf.h"
@ -135,6 +136,64 @@ public:
return Status::OK();
}
Status evaluate_inverted_index(
const ColumnsWithTypeAndName& arguments,
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
DCHECK(data_type_with_names.size() == 1);
DCHECK(iterators.size() == 1);
auto* iter = iterators[0];
auto data_type_with_name = data_type_with_names[0];
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
if (iter == nullptr) {
return Status::OK();
}
if (iter->get_inverted_index_reader_type() ==
segment_v2::InvertedIndexReaderType::FULLTEXT) {
//NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
return Status::OK();
}
if (iter->has_null()) {
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
null_bitmap = null_bitmap_cache_handle.get_bitmap();
}
std::string column_name = data_type_with_name.first;
for (const auto& arg : arguments) {
Field param_value;
arg.column->get(0, param_value);
auto param_type = arg.type->get_type_as_type_descriptor().type;
if (param_value.is_null()) {
// predicate like column NOT IN (NULL, '') should not push down to index.
if (negative) {
return Status::OK();
}
*roaring |= *null_bitmap;
continue;
}
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
param_type, &param_value, query_param));
InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
std::shared_ptr<roaring::Roaring> index = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(iter->read_from_inverted_index(column_name, query_param->get_value(),
query_type, num_rows, index));
*roaring |= *index;
}
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
bitmap_result = result;
bitmap_result.mask_out_null();
if constexpr (negative) {
roaring::Roaring full_result;
full_result.addRange(0, num_rows);
bitmap_result.op_not(&full_result);
}
return Status::OK();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
auto* in_state = reinterpret_cast<InState*>(

View File

@ -24,7 +24,63 @@
#include "util/debug_points.h"
namespace doris::vectorized {
Status FunctionMatchBase::evaluate_inverted_index(
const ColumnsWithTypeAndName& arguments,
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
DCHECK(arguments.size() == 1);
DCHECK(data_type_with_names.size() == 1);
DCHECK(iterators.size() == 1);
auto* iter = iterators[0];
auto data_type_with_name = data_type_with_names[0];
if (iter == nullptr) {
return Status::OK();
}
const std::string& function_name = get_name();
if (function_name == MATCH_PHRASE_FUNCTION || function_name == MATCH_PHRASE_PREFIX_FUNCTION ||
function_name == MATCH_PHRASE_EDGE_FUNCTION) {
if (iter->get_inverted_index_reader_type() == InvertedIndexReaderType::FULLTEXT &&
get_parser_phrase_support_string_from_properties(iter->get_index_properties()) ==
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO) {
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
"phrase queries require setting support_phrase = true");
}
}
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
Field param_value;
arguments[0].column->get(0, param_value);
auto param_type = arguments[0].type->get_type_as_type_descriptor().type;
if (!is_string_type(param_type)) {
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
"arguments for match must be string");
}
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
param_type, &param_value, query_param));
if (is_string_type(param_type)) {
auto inverted_index_query_type = get_query_type_from_fn_name();
RETURN_IF_ERROR(
iter->read_from_inverted_index(data_type_with_name.first, query_param->get_value(),
inverted_index_query_type, num_rows, roaring));
} else {
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
"invalid params type for FunctionMatchBase::evaluate_inverted_index {}",
param_type);
}
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
if (iter->has_null()) {
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
null_bitmap = null_bitmap_cache_handle.get_bitmap();
}
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
bitmap_result = result;
bitmap_result.mask_out_null();
return Status::OK();
}
Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) const {
@ -32,78 +88,70 @@ Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block,
DataTypePtr& type_ptr = block.get_by_position(arguments[1]).type;
auto match_query_str = type_ptr->to_string(*column_ptr, 0);
std::string column_name = block.get_by_position(arguments[0]).name;
auto match_pred_column_name =
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_match_" + match_query_str;
if (!block.has(match_pred_column_name)) {
VLOG_DEBUG << "begin to execute match directly, column_name=" << column_name
<< ", match_query_str=" << match_query_str;
InvertedIndexCtx* inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
context->get_function_state(FunctionContext::THREAD_LOCAL));
if (inverted_index_ctx == nullptr) {
inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
}
const ColumnPtr source_col =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
const auto* values = check_and_get_column<ColumnString>(source_col.get());
const ColumnArray* array_col = nullptr;
if (source_col->is_column_array()) {
if (source_col->is_nullable()) {
auto* nullable = check_and_get_column<ColumnNullable>(source_col.get());
array_col = check_and_get_column<ColumnArray>(*nullable->get_nested_column_ptr());
} else {
array_col = check_and_get_column<ColumnArray>(source_col.get());
}
if (array_col && !array_col->get_data().is_column_string()) {
return Status::NotSupported(
fmt::format("unsupported nested array of type {} for function {}",
is_column_nullable(array_col->get_data())
? array_col->get_data().get_name()
: array_col->get_data().get_family_name(),
get_name()));
}
if (is_column_nullable(array_col->get_data())) {
const auto& array_nested_null_column =
reinterpret_cast<const ColumnNullable&>(array_col->get_data());
values = check_and_get_column<ColumnString>(
*(array_nested_null_column.get_nested_column_ptr()));
} else {
// array column element is always set Nullable for now.
values = check_and_get_column<ColumnString>(*(array_col->get_data_ptr()));
}
} else if (auto* nullable = check_and_get_column<ColumnNullable>(source_col.get())) {
// match null
if (type_ptr->is_nullable()) {
if (column_ptr->only_null()) {
block.get_by_position(result).column = nullable->get_null_map_column_ptr();
return Status::OK();
}
} else {
values = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
}
}
if (!values) {
LOG(WARNING) << "Illegal column " << source_col->get_name();
return Status::InternalError("Not supported input column types");
}
// result column
auto res = ColumnUInt8::create();
ColumnUInt8::Container& vec_res = res->get_data();
// set default value to 0, and match functions only need to set 1/true
vec_res.resize_fill(input_rows_count);
RETURN_IF_ERROR(execute_match(
context, column_name, match_query_str, input_rows_count, values, inverted_index_ctx,
(array_col ? &(array_col->get_offsets()) : nullptr), vec_res));
block.replace_by_position(result, std::move(res));
} else {
auto match_pred_column =
block.get_by_name(match_pred_column_name).column->convert_to_full_column_if_const();
block.replace_by_position(result, std::move(match_pred_column));
VLOG_DEBUG << "begin to execute match directly, column_name=" << column_name
<< ", match_query_str=" << match_query_str;
InvertedIndexCtx* inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
context->get_function_state(FunctionContext::THREAD_LOCAL));
if (inverted_index_ctx == nullptr) {
inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
}
const ColumnPtr source_col =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
const auto* values = check_and_get_column<ColumnString>(source_col.get());
const ColumnArray* array_col = nullptr;
if (source_col->is_column_array()) {
if (source_col->is_nullable()) {
auto* nullable = check_and_get_column<ColumnNullable>(source_col.get());
array_col = check_and_get_column<ColumnArray>(*nullable->get_nested_column_ptr());
} else {
array_col = check_and_get_column<ColumnArray>(source_col.get());
}
if (array_col && !array_col->get_data().is_column_string()) {
return Status::NotSupported(
fmt::format("unsupported nested array of type {} for function {}",
is_column_nullable(array_col->get_data())
? array_col->get_data().get_name()
: array_col->get_data().get_family_name(),
get_name()));
}
if (is_column_nullable(array_col->get_data())) {
const auto& array_nested_null_column =
reinterpret_cast<const ColumnNullable&>(array_col->get_data());
values = check_and_get_column<ColumnString>(
*(array_nested_null_column.get_nested_column_ptr()));
} else {
// array column element is always set Nullable for now.
values = check_and_get_column<ColumnString>(*(array_col->get_data_ptr()));
}
} else if (auto* nullable = check_and_get_column<ColumnNullable>(source_col.get())) {
// match null
if (type_ptr->is_nullable()) {
if (column_ptr->only_null()) {
block.get_by_position(result).column = nullable->get_null_map_column_ptr();
return Status::OK();
}
} else {
values = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
}
}
if (!values) {
LOG(WARNING) << "Illegal column " << source_col->get_name();
return Status::InternalError("Not supported input column types");
}
// result column
auto res = ColumnUInt8::create();
ColumnUInt8::Container& vec_res = res->get_data();
// set default value to 0, and match functions only need to set 1/true
vec_res.resize_fill(input_rows_count);
RETURN_IF_ERROR(execute_match(context, column_name, match_query_str, input_rows_count, values,
inverted_index_ctx,
(array_col ? &(array_col->get_offsets()) : nullptr), vec_res));
block.replace_by_position(result, std::move(res));
return Status::OK();
}

View File

@ -95,6 +95,11 @@ public:
int32_t& current_src_array_offset) const;
Status check(FunctionContext* context, const std::string& function_name) const;
Status evaluate_inverted_index(
const ColumnsWithTypeAndName& arguments,
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override;
};
class FunctionMatchAny : public FunctionMatchBase {

View File

@ -2,6 +2,18 @@
-- !sql --
863
-- !sql --
235
-- !sql --
166
-- !sql --
56
-- !sql --
7
-- !sql --
863
@ -14,21 +26,12 @@
-- !sql --
235
-- !sql --
235
-- !sql --
166
-- !sql --
166
-- !sql --
166
-- !sql --
56
-- !sql --
56
@ -41,6 +44,3 @@
-- !sql --
7
-- !sql --
7

View File

@ -96,10 +96,10 @@ suite("test_all_index_hit_fault_injection", "nonConcurrent") {
load_httplogs_data.call(indexTbName2, 'test_all_index_hit_fault_injection_2', 'true', 'json', 'documents-1000.json')
sql "sync"
sql """ set enable_common_expr_pushdown = true """
try {
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._read_columns_by_index", [column_name: "clientip,request"])
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator.fast_execute", [column_name: "status,size"])
GetDebugPoint().enableDebugPointForAllBEs("VectorizedFnCall.must_in_slow_path", [column_name: "status,size"])
qt_sql """ select count() from ${indexTbName1} where (request match_phrase 'hm'); """
@ -124,7 +124,7 @@ suite("test_all_index_hit_fault_injection", "nonConcurrent") {
} finally {
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator.fast_execute")
GetDebugPoint().disableDebugPointForAllBEs("VectorizedFnCall.must_in_slow_path")
}
} finally {
}

View File

@ -76,8 +76,6 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") {
sql "sync"
try {
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._rowid_result_for_index")
sql """ set enable_common_expr_pushdown = true; """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName} where clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0'); """
@ -88,7 +86,6 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") {
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName} where (request match 'hm' and status in (1, 304, 200)); """
} finally {
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._rowid_result_for_index")
}
try {

View File

@ -59,6 +59,7 @@ suite("test_index_lowercase_fault_injection", "nonConcurrent") {
sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 3781); """
sql 'sync'
sql """ set enable_common_expr_pushdown = true """
qt_sql """ select count() from ${testTable} where (request match 'HTTP'); """
qt_sql """ select count() from ${testTable} where (request match 'http'); """

View File

@ -47,6 +47,7 @@ suite("test_index_mow_fault_injection", "nonConcurrent") {
try {
sql "DROP TABLE IF EXISTS ${testTable_unique}"
create_httplogs_unique_table.call(testTable_unique)
sql """ set enable_common_expr_pushdown = true """
sql """ INSERT INTO ${testTable_unique} VALUES (893964617, '40.135.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 24736); """
sql """ INSERT INTO ${testTable_unique} VALUES (893964653, '232.0.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 3781); """

View File

@ -73,6 +73,7 @@ suite("test_need_read_data_fault_injection", "nonConcurrent") {
load_httplogs_data.call(indexTbName, 'test_need_read_data_fault_injection', 'true', 'json', 'documents-1000.json')
sql "sync"
sql """ set enable_common_expr_pushdown = true """
try {
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._read_columns_by_index")

View File

@ -96,6 +96,7 @@ suite("test_topn_fault_injection", "nonConcurrent") {
load_httplogs_data.call(indexTbName2, 'test_topn_fault_injection2', 'true', 'json', 'documents-1000.json')
sql "sync"
sql """ set enable_common_expr_pushdown = true """
try {
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator.topn_opt_1")

View File

@ -45,7 +45,7 @@ suite("test_char_replace") {
"replication_allocation" = "tag.location.default: 1"
);
"""
sql """ set enable_common_expr_pushdown = true """
def var_result = sql "show variables"
logger.info("show variales result: " + var_result )

View File

@ -155,6 +155,7 @@ suite("test_index_compaction_dup_keys", "nonConcurrent") {
sql """ INSERT INTO ${tableName} VALUES (2, "bason", "bason hate pear", 99); """
sql """ INSERT INTO ${tableName} VALUES (3, "andy", "andy love apple", 100); """
sql """ INSERT INTO ${tableName} VALUES (3, "bason", "bason hate pear", 99); """
sql """ set enable_common_expr_pushdown = true """
qt_sql """ select * from ${tableName} order by id, name, hobbies, score """
qt_sql """ select * from ${tableName} where name match "andy" order by id, name, hobbies, score """

View File

@ -295,6 +295,7 @@ suite("test_index_compaction_null", "nonConcurrent") {
"inverted_index_storage_format" = "V1"
)
"""
sql """ set enable_common_expr_pushdown = true """
tablets = sql_return_maparray """ show tablets from ${tableName}; """
run_test.call(tablets)

View File

@ -153,6 +153,7 @@ suite("test_index_compaction_unique_keys", "nonConcurrent") {
"inverted_index_storage_format" = "V1"
);
"""
sql """ set enable_common_expr_pushdown = true """
sql """ INSERT INTO ${tableName} VALUES (1, "andy", "andy love apple", 100); """
sql """ INSERT INTO ${tableName} VALUES (1, "bason", "bason hate pear", 99); """

View File

@ -135,6 +135,7 @@ suite("test_index_compaction_with_multi_index_segments", "nonConcurrent") {
// check config
check_config.call("inverted_index_compaction_enable", "true")
check_config.call("inverted_index_max_buffered_docs", "5")
sql """ set enable_common_expr_pushdown = true """
/**
* test duplicated tables

View File

@ -155,6 +155,7 @@ suite("test_cumulative_compaction_with_format_v2", "inverted_index_format_v2") {
"""
sql """ sync """
sql """ set enable_common_expr_pushdown = true """
qt_select_default """ SELECT * FROM ${tableName} t WHERE city MATCH 'Beijing' ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """

View File

@ -95,6 +95,7 @@ suite("test_drop_column_with_format_v2", "inverted_index_format_v2"){
sql """ INSERT INTO ${tableName} VALUES (2, "bason", 99); """
sql """ INSERT INTO ${tableName} VALUES (3, "andy", 100); """
sql """ INSERT INTO ${tableName} VALUES (3, "bason", 99); """
sql """ set enable_common_expr_pushdown = true """
qt_sql "SELECT * FROM $tableName WHERE name match 'andy' order by id, name, score;"

View File

@ -74,6 +74,7 @@ suite("test_drop_index_with_format_v2", "inverted_index_format_v2"){
sql """ INSERT INTO ${tableName} VALUES (2, "bason", 99); """
sql """ INSERT INTO ${tableName} VALUES (3, "andy", 100); """
sql """ INSERT INTO ${tableName} VALUES (3, "bason", 99); """
sql """ set enable_common_expr_pushdown = true """
qt_sql "SELECT * FROM $tableName WHERE name match 'andy' order by id, name, score;"

View File

@ -155,6 +155,7 @@ suite("test_mor_table_with_format_v2", "inverted_index_format_v2") {
"""
sql """ sync """
sql """ set enable_common_expr_pushdown = true """
sql """ DELETE FROM ${tableName} WHERE user_id = 3 """

View File

@ -155,6 +155,7 @@ suite("test_mow_table_with_format_v2", "inverted_index_format_v2") {
"""
sql """ sync """
sql """ set enable_common_expr_pushdown = true """
qt_select_default """ SELECT * FROM ${tableName} t WHERE city MATCH 'Beijing' ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """

View File

@ -67,6 +67,7 @@ suite("test_rename_column_with_format_v2", "inverted_index_format_v2"){
sql """ INSERT INTO ${tableName} VALUES (2, "bason", 99); """
sql """ INSERT INTO ${tableName} VALUES (3, "andy", 100); """
sql """ INSERT INTO ${tableName} VALUES (3, "bason", 99); """
sql """ set enable_common_expr_pushdown = true """
qt_sql "SELECT * FROM $tableName WHERE name match 'andy' order by id, name, score;"

View File

@ -159,6 +159,7 @@ suite("test_single_replica_compaction_with_format_v2", "inverted_index_format_v2
"""
sql """ sync """
sql """ set enable_common_expr_pushdown = true """
qt_select_default """ SELECT * FROM ${tableName} t WHERE city MATCH 'Beijing' ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """

View File

@ -98,6 +98,7 @@ suite("test_storage_format_v1", "p0") {
create_httplogs_dup_table.call(testTable_dup)
load_httplogs_data.call(testTable_dup, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')
sql "sync"
sql """ set enable_common_expr_pushdown = true """
qt_sql(" select COUNT(*) from ${testTable_dup} where request match 'images' ")

View File

@ -63,6 +63,7 @@ suite("test_array_contains_with_inverted_index"){
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2', '[\"w\"]'); """
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300', '[\"x\"]'); """
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '[\"y\"]'); """
sql """ set enable_common_expr_pushdown = true """
qt_sql """ select count() from ${indexTblName}"""
order_qt_sql """ select * from tai where array_contains(inventors, 's') order by id; """

View File

@ -61,6 +61,7 @@ suite("test_array_index1"){
"enable_single_replica_compaction" = "false"
);
"""
sql """ set enable_common_expr_pushdown = true """
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '[\"a\", \"b\", \"c\"]'); """
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '[\"d\", \"e\", \"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\"]'); """

View File

@ -279,6 +279,7 @@ suite("test_array_with_inverted_index_all_type"){
}
// query test
sql """ set enable_common_expr_pushdown = true """
for (int i = 0; i < 6; i+=1) {
def indexTblName = indexTblNames[i]

View File

@ -98,6 +98,7 @@ suite("test_chinese_analyzer"){
"replication_allocation" = "tag.location.default: 1"
);
"""
sql """ set enable_common_expr_pushdown = true """
sql "INSERT INTO $indexTblName3 VALUES (1, '我来到北京清华大学'), (2, '我爱你中国'), (3, '人民可以得到更多实惠'), (4, '陕西省西安市高新区创业大厦A座,我的手机号码是12345678901,邮箱是12345678@qq.com,,ip是1.1.1.1,this information is created automatically.');"
qt_sql "SELECT * FROM $indexTblName3 WHERE c MATCH_PHRASE '我爱你' ORDER BY id;"

View File

@ -57,6 +57,7 @@ suite("test_compound", "p0"){
(9, '9', '9', '9'),
(10, '10', '10', '10');
"""
sql """ set enable_common_expr_pushdown = true """
qt_sql "SELECT count() FROM $indexTblName WHERE (id >= 2 AND id < 9) and (a match '2' or b match '5' and c match '5');"
qt_sql "SELECT count() FROM $indexTblName WHERE (id >= 2 AND id < 9) and (a match '2' or b match '5' or c match '6');"

View File

@ -80,6 +80,7 @@ suite("test_compound_1", "p0"){
load_httplogs_data.call(indexTbName, 'test_compound_1', 'true', 'json', 'documents-1000.json')
sql "sync"
sql """ set enable_common_expr_pushdown = true """
qt_sql """ select count() from ${indexTbName} where (request match_phrase 'english' and clientip match_phrase '4' or request match_phrase 'images'); """
qt_sql """ select count() from ${indexTbName} where (request match_phrase 'hm' and clientip match_phrase '3' or request match_phrase 'gif'); """

View File

@ -103,6 +103,7 @@ suite("test_compound_inlist", "nonConcurrent"){
load_httplogs_data.call(indexTbName2, 'test_compound_list_2', 'true', 'json', 'documents-1000.json')
sql "sync"
sql """ set enable_common_expr_pushdown = true """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304)); """
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304)); """

View File

@ -141,6 +141,7 @@ suite("test_count_on_index_httplogs", "p0") {
stream_load_data.call(testTable_unique, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')
sql "sync"
sql """ set enable_common_expr_pushdown = true """
sql """set experimental_enable_nereids_planner=true;"""
sql """set enable_fallback_to_original_planner=false;"""
// case1: test duplicate table

View File

@ -153,6 +153,7 @@ suite("test_count_on_index_2", "p0"){
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
sql "sync"
sql """ set enable_common_expr_pushdown = true """
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453; """
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453; """

View File

@ -47,6 +47,7 @@ suite("test_delete"){
"enable_single_replica_compaction" = "false"
);
"""
sql """ set enable_common_expr_pushdown = true """
sql """ INSERT INTO `${indexTblName}`(`a`, `b`, `c`) VALUES ('1', '6afef581285b6608bf80d5a4e46cf839', 'aaa'), ('2', '48a33ec3453a28bce84b8f96fe161956', 'bbb'),
('3', '021603e7dcfe65d44af0efd0e5aee154', 'ccc'), ('4', 'ee27ee1da291e46403c408e220bed6e1', 'ddd'),

View File

@ -45,6 +45,7 @@ suite("test_equal_on_fulltext", "p0"){
def var_result = sql "show variables"
logger.info("show variales result: " + var_result )
sql """ set enable_common_expr_pushdown = true """
sql "INSERT INTO $indexTblName VALUES (1, 'I am the person'), (2, 'I am a person'), (3, 'I am your person');"
qt_sql "SELECT * FROM $indexTblName WHERE c = 'I am a person' ORDER BY id;"

View File

@ -38,6 +38,7 @@ suite("test_index_chinese_column", "inverted_index_select"){
def table_name_v2 = "test_index_chinese_column_v2"
sql "set enable_unicode_name_support=true"
sql """ set enable_common_expr_pushdown = true """
createAndInsertData(table_name_v1, "V1")
createAndInsertData(table_name_v2, "V2")

View File

@ -101,6 +101,7 @@ suite("test_index_complex_match", "p0"){
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
sql "sync"
sql """ set enable_common_expr_pushdown = true """
qt_sql """ select count() from ${indexTbName1} where clientip match_phrase '247.37.0.0'; """
qt_sql """ select count() from ${indexTbName1} where clientip match_phrase_prefix '247'; """

View File

@ -49,6 +49,7 @@ suite("test_index_delete", "p0") {
try {
sql "sync"
sql """ set enable_common_expr_pushdown = true """
sql """ delete from ${indexTbName1} where a >= 9; """
sql "sync"

View File

@ -47,7 +47,7 @@ suite("test_index_empty_string", "p0"){
(1, '', '1'),
(2, '2', '');
"""
sql """ set enable_common_expr_pushdown = true """
qt_sql "SELECT count() FROM $indexTblName WHERE a match '';"
qt_sql "SELECT count() FROM $indexTblName WHERE b match '';"
}

View File

@ -54,6 +54,7 @@ suite("test_index_equal_select", "inverted_index_select"){
("san zhang", 10, "grade 5", "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", "", "", "", ""),
("li sisi", 11, "grade 6", "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "", "li ba", "li liuliu", "")
"""
sql """ set enable_common_expr_pushdown = true """
// case1: test equal
// case1.0: test index coulume equal ‘’

View File

@ -44,6 +44,7 @@ suite("test_index_key_match_select", "inverted_index_select"){
("u3", ["u1"]),
("u4", ["u3"])
"""
sql """ set enable_common_expr_pushdown = true """
qt_sql "SELECT * FROM ${indexTbName1} WHERE user MATCH_ANY 'u1, u2' ORDER BY user LIMIT 10;"
qt_sql "SELECT * FROM ${indexTbName1} WHERE user MATCH_ANY 'u1, u2, u3' ORDER BY user LIMIT 10;"
}

View File

@ -153,6 +153,7 @@ suite("test_index_match_phrase_select", "inverted_index_select"){
sql """ build index ${text_colume1}_idx on ${indexTbName1} """
wait_for_build_index_on_partition_finish(indexTbName1, timeout)
}
sql """ set enable_common_expr_pushdown = true; """
// case1: test match_phrase ""
try {

View File

@ -16,7 +16,7 @@
// under the License.
suite("test_index_match_phrase_edge", "p0"){
suite("test_index_match_phrase_edge", "nonConcurrent"){
def indexTbName1 = "test_index_match_phrase_edge"
sql "DROP TABLE IF EXISTS ${indexTbName1}"
@ -56,6 +56,8 @@ suite("test_index_match_phrase_edge", "p0"){
try {
sql "sync"
sql """ set enable_common_expr_pushdown = true; """
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
qt_sql """ select * from ${indexTbName1} where b match_phrase_edge 'x.h'; """
qt_sql """ select * from ${indexTbName1} where b match_phrase_edge 'v_i'; """
@ -75,6 +77,6 @@ suite("test_index_match_phrase_edge", "p0"){
qt_sql """ select count() from ${indexTbName1} where c match_phrase_edge 'b'; """
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
}
}

View File

@ -16,7 +16,7 @@
// under the License.
suite("test_index_match_phrase_ordered", "p0"){
suite("test_index_match_phrase_ordered", "nonConcurrent"){
def indexTbName1 = "test_index_match_phrase_ordered"
sql "DROP TABLE IF EXISTS ${indexTbName1}"
@ -49,6 +49,8 @@ suite("test_index_match_phrase_ordered", "p0"){
try {
sql "sync"
sql """ set enable_common_expr_pushdown = true; """
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
qt_sql """ select count() from ${indexTbName1} where b match_phrase 'the lazy'; """
qt_sql """ select count() from ${indexTbName1} where b match_phrase 'the lazy ~1'; """
@ -82,6 +84,6 @@ suite("test_index_match_phrase_ordered", "p0"){
qt_sql """ select count() from ${indexTbName1} where b match_phrase 'the quick ~6'; """
qt_sql """ select count() from ${indexTbName1} where b match_phrase 'the quick ~6+'; """
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
}
}

View File

@ -16,7 +16,7 @@
// under the License.
suite("test_index_match_phrase_prefix", "p0"){
suite("test_index_match_phrase_prefix", "nonConcurrent"){
def indexTbName1 = "test_index_match_phrase_prefix"
def indexTbName2 = "test_index_match_phrase_prefix2"
@ -98,28 +98,32 @@ suite("test_index_match_phrase_prefix", "p0"){
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
sql "sync"
sql """ set enable_common_expr_pushdown = true; """
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'ima'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/h'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/hm'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/images/n'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/tickets/images/ti'; """
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'ima'; """
qt_sql """ select count() from ${indexTbName1} where request like '%ima%'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/h'; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'images/h'; """
qt_sql """ select count() from ${indexTbName1} where request like '%images/h%'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/hm'; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'images/hm'; """
qt_sql """ select count() from ${indexTbName1} where request like '%images/hm%'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/images/n'; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix '/french/images/n'; """
qt_sql """ select count() from ${indexTbName1} where request like '%/french/images/n%'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/tickets/images/ti'; """
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix '/french/tickets/images/ti'; """
qt_sql """ select count() from ${indexTbName1} where request like '%/french/tickets/images/ti%'; """
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
}
}

View File

@ -16,7 +16,7 @@
// under the License.
suite("test_index_match_phrase_prefix_1", "p0"){
suite("test_index_match_phrase_prefix_1", "nonConcurrent"){
def indexTbName1 = "test_index_match_phrase_prefix_1"
sql "DROP TABLE IF EXISTS ${indexTbName1}"
@ -49,6 +49,8 @@ suite("test_index_match_phrase_prefix_1", "p0"){
try {
sql "sync"
sql """ set enable_common_expr_pushdown = true; """
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
qt_sql """ select count() from ${indexTbName1} where c match_phrase_prefix 'O1704361998540E2Cemx9S'; """
qt_sql """ select count() from ${indexTbName1} where d match_phrase_prefix 'O1704361998540E2Cemx9S'; """
@ -57,6 +59,6 @@ suite("test_index_match_phrase_prefix_1", "p0"){
qt_sql """ select count() from ${indexTbName1} where d match_phrase_prefix 'O1704361998540E2Cemx9S=123456789'; """
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
}
}

View File

@ -16,7 +16,7 @@
// under the License.
suite("test_index_match_phrase_slop", "p0"){
suite("test_index_match_phrase_slop", "nonConcurrent"){
def indexTbName1 = "test_index_match_phrase_slop"
sql "DROP TABLE IF EXISTS ${indexTbName1}"
@ -82,6 +82,8 @@ suite("test_index_match_phrase_slop", "p0"){
sql """ INSERT INTO ${indexTbName1} VALUES (1, "127.0.0.1", "I'm glad I kept my fingers crossed ~4", 1, 1); """
sql "sync"
sql """ set enable_common_expr_pushdown = true; """
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
qt_sql """ select count() from ${indexTbName1} where request match_phrase 'get jpg'; """
qt_sql """ select count() from ${indexTbName1} where request match_phrase 'get jpg ~2'; """
@ -117,6 +119,6 @@ suite("test_index_match_phrase_slop", "p0"){
qt_sql """ select * from ${indexTbName1} where request match_phrase 'glad crossed \\~4'; """
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
}
}

View File

@ -16,7 +16,7 @@
// under the License.
suite("test_index_match_regexp", "p0"){
suite("test_index_match_regexp", "nonConcurrent"){
def indexTbName1 = "test_index_match_regexp"
sql "DROP TABLE IF EXISTS ${indexTbName1}"
@ -79,6 +79,8 @@ suite("test_index_match_regexp", "p0"){
load_httplogs_data.call(indexTbName1, 'test_index_match_regexp', 'true', 'json', 'documents-1000.json')
sql "sync"
sql """ set enable_common_expr_pushdown = true; """
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
qt_sql """ select count() from test_index_match_regexp where request match_regexp ''; """
qt_sql """ select count() from test_index_match_regexp where request match_regexp '^h'; """
@ -89,6 +91,6 @@ suite("test_index_match_regexp", "p0"){
qt_sql """ select count() from test_index_match_regexp where request match_regexp 'nonexistence'; """
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
}
}

View File

@ -152,6 +152,7 @@ suite("test_index_match_select", "inverted_index_select"){
sql """ build index ${text_colume1}_idx on ${indexTbName1} """
wait_for_build_index_on_partition_finish(indexTbName1, timeout)
}
sql """ set enable_common_expr_pushdown = true; """
// case1: match term
// case1.0 test match ""

View File

@ -126,4 +126,4 @@ suite("test_index_multi_match", "p0"){
} finally {
//try_sql("DROP TABLE IF EXISTS ${testTable}")
}
}
}

View File

@ -54,6 +54,7 @@ suite("test_index_no_need_read_data", "inverted_index_select"){
// case1: enable nereids planner
sql "set enable_nereids_planner = true"
sql """ set enable_common_expr_pushdown = true; """
qt_select_nereids_0 "SELECT * FROM ${table1} ORDER BY id"
qt_select_nereids_1 "SELECT count() FROM ${table1} WHERE n > 100"

View File

@ -42,7 +42,7 @@ suite("test__null_index", "inverted_index"){
"replication_allocation" = "tag.location.default: 1"
);
"""
sql """ set enable_common_expr_pushdown = true; """
sql "INSERT INTO $indexTblName VALUES (1, []), (2, []), (3, []);"
qt_sql "SELECT * FROM $indexTblName WHERE value match_all 'a';"
}

View File

@ -117,6 +117,7 @@ suite("test_index_range_between_select", "inverted_index_select"){
"""
wait_for_latest_op_on_table_finish(indexTbName1, timeout)
}
sql """ set enable_common_expr_pushdown = true; """
// case1: test simple between case
// case1.0: test data index colume select in specific between condition

View File

@ -54,7 +54,7 @@ suite("test_index_range_bigger_and_equal_select", "inverted_index_select"){
("san zhang", 10, "grade 5", "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", "", "", "", ""),
("li sisi", 11, "grade 6", "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "", "li ba", "li liuliu", "")
"""
sql """ set enable_common_expr_pushdown = true; """
// case1. test >=
// case1.0: test only >=
qt_sql "select * from ${indexTbName1} where name>='' order by name "

View File

@ -54,7 +54,7 @@ suite("test_index_range_bigger_select", "inverted_index_select"){
("san zhang", 10, "grade 5", "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", "", "", "", ""),
("li sisi", 11, "grade 6", "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "", "li ba", "li liuliu", "")
"""
sql """ set enable_common_expr_pushdown = true; """
// case1. test >
// case1.0: test only >
qt_sql "select * from ${indexTbName1} where name>'' order by name "

View File

@ -151,6 +151,7 @@ suite("test_index_range_in_select", "inverted_index_select"){
sql """ build index ${text_colume1}_idx on ${indexTbName1} """
wait_for_build_index_on_partition_finish(indexTbName1, timeout)
}
sql """ set enable_common_expr_pushdown = true; """
// case1: select in
// case1.0: select in specific condition

View File

@ -111,6 +111,7 @@ suite("test_index_range_not_in_select", "inverted_index_select"){
}
assertTrue(useTime <= OpTimeout, "wait_for_latest_build_index_on_partition_finish timeout")
}
sql """ set enable_common_expr_pushdown = true; """
for (int i = 0; i < 2; i++) {
logger.info("select table with index times " + i)

View File

@ -54,7 +54,7 @@ suite("test_index_range_smaller_and_equal_select", "inverted_index_select"){
("san zhang", 10, "grade 5", "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", "", "", "", ""),
("li sisi", 11, "grade 6", "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "", "li ba", "li liuliu", "")
"""
sql """ set enable_common_expr_pushdown = true; """
// case1. test <=
// case1.0: test only <=
qt_sql "select * from ${indexTbName1} where name<='' order by name"

View File

@ -54,7 +54,7 @@ suite("test_index_range_smaller_select", "inverted_index_select"){
("san zhang", 10, "grade 5", "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", "", "", "", ""),
("li sisi", 11, "grade 6", "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "", "li ba", "li liuliu", "")
"""
sql """ set enable_common_expr_pushdown = true; """
// case1. test <
// case1.0: test only <
sql "select * from ${indexTbName1} where name<'' order by name "

View File

@ -142,7 +142,7 @@ suite("test_index_rqg_bug", "test_index_rqg_bug"){
(48, -10, 7, 3, 4, -1290467110130692882, NULL, -5421887030808227301, 2147894047624029750, '2023-12-20', '2026-02-18', '2023-12-10', '2024-02-18', 'v', 'f', 'u', 'z', 'w', 'l', 'i', 'b'),
(49, 4, -10, -10, -4, 7177870619817484302, 2010854013707344984, 515636226818986547, -4617727694631456148, '2023-12-14', '2024-01-09', '2023-12-11', '2024-01-08', 'k', 'o', 'r', 'h', 'x', 'v', 'm', 'r');
"""
sql """ set enable_common_expr_pushdown = true; """
qt_select_bug_1 """
SELECT
MIN(DISTINCT table1.col_date_undef_signed_not_null) AS field1,

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -126,6 +126,7 @@ suite("test_index_rqg_bug4", "test_index_rqg_bug"){
try {
sql "sync"
sql """ set enable_common_expr_pushdown = true; """
qt_sql """
select

View File

@ -21,7 +21,7 @@ suite("test_index_skip_read_data", "p0"){
def indexTbName2 = "test_index_skip_read_data_mow"
def indexTbName3 = "test_index_skip_read_data_mor"
sql """ set enable_common_expr_pushdown = true; """
// dup
sql "DROP TABLE IF EXISTS ${indexTbName1}"

View File

@ -21,6 +21,7 @@ suite("test_inverted_index", "inverted_index") {
def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE TableName='${tableName}' ORDER BY createtime DESC LIMIT 1 """
return jobStateResult[0][9]
}
sql """ set enable_common_expr_pushdown = true; """
sql "DROP TABLE IF EXISTS ${tbName1}"
sql """
CREATE TABLE IF NOT EXISTS ${tbName1} (

View File

@ -68,7 +68,7 @@ suite("test_inverted_index_keyword"){
(3, '我在北京市'),
(3, '我在西安市')
"""
sql """ set enable_common_expr_pushdown = true; """
qt_sql "SELECT * FROM ${indexTblName} where c match '330204195805121025'";
qt_sql "SELECT * FROM ${indexTblName} where c match '36'";
qt_sql "SELECT * FROM ${indexTblName} where c match '330225197806187713'";

View File

@ -48,7 +48,7 @@ suite("test_inverted_index_mor", "p0"){
sql """ INSERT INTO $indexTblName VALUES (1, 2, 12, 1.2, '1 2'), (3, 4, 34, 3.4, '3 4'); """
sql """ INSERT INTO $indexTblName VALUES (11, 12, 1112, 11.12, '11 22'), (13, 14, 1314, 13.14, '13 14'); """
sql """ set enable_common_expr_pushdown = true; """
// original data
qt_11 """ SELECT * FROM $indexTblName ORDER BY k1,k2 """

View File

@ -53,6 +53,7 @@ suite("test_inverted_index_null") {
(7,'tengxun','qie','addr gg','lj',null),
(8,'tengxun2','qie',null,'lj',800)
"""
sql """ set enable_common_expr_pushdown = true """
// select all data
qt_select_0 "SELECT * FROM ${table1} ORDER BY id"

View File

@ -105,6 +105,7 @@ suite("test_inverted_index_null_ram_dir") {
(7,'tengxun','qie','addr gg','lj',null),
(8,'tengxun2','qie',null,'lj',800)
"""
sql """ set enable_common_expr_pushdown = true """
// select all data
qt_select_0 "SELECT * FROM ${tableName} ORDER BY id"

View File

@ -56,6 +56,7 @@ suite("test_lowercase"){
"replication_allocation" = "tag.location.default: 1"
);
"""
sql """ set enable_common_expr_pushdown = true """
sql "INSERT INTO $indexTblName2 VALUES (1, 'hello 我来到北京清华大学'), (2, 'HELLO 我爱你中国'), (3, 'Hello 人民可以得到更多实惠');"
qt_sql "SELECT * FROM $indexTblName2 WHERE c MATCH 'hello' ORDER BY id";

View File

@ -59,7 +59,7 @@ suite("test_match_query_without_index", "inverted_index_select"){
("san zhang", "grade 5", "", "", "", 10, "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", ""),
("li sisi", "grade 6", "li ba", "li liuliu", "", 11, "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "")
"""
sql """ set enable_common_expr_pushdown = true """
// case1: match any
try {
sql """ select * from ${indexTbName1} where ${varchar_colume1} match_any "" order by name; """

View File

@ -39,6 +39,7 @@ suite("test_match_without_index", "p0") {
"replication_allocation" = "tag.location.default: 1"
);
"""
sql """ set enable_common_expr_pushdown = true """
sql """ INSERT INTO ${testTable} VALUES (123, '17.0.0.0', 'HTTP GET', '200', 20); """
sql """ INSERT INTO ${testTable} VALUES (123, '17.0.0.0', 'Life is like a box of chocolates, you never know what you are going to get.', '200', 20); """

File diff suppressed because one or more lines are too long

View File

@ -44,7 +44,7 @@ suite("test_null_index", "p0"){
"replication_allocation" = "tag.location.default: 1"
);
"""
sql """ set enable_common_expr_pushdown = true """
sql "INSERT INTO $indexTblName VALUES (1, 'a', null, [null], [1]), (2, 'b', 'b', ['b'], [2]), (3, 'c', 'c', ['c'], [3]);"
qt_sql "SELECT * FROM $indexTblName WHERE str match null order by id;"
qt_sql "SELECT * FROM $indexTblName WHERE str_null match null order by id;"

View File

@ -64,6 +64,7 @@ suite("test_or_not_match", "p0") {
sql "set enable_nereids_planner = true"
sql "set enable_fallback_to_original_planner = false"
sql """ set enable_common_expr_pushdown = true """
qt_sql "select request from ${tableName} where request like '1.0' or not request MATCH 'GETA' order by request limit 2;"
}

View File

@ -54,6 +54,7 @@ suite("test_pk_no_need_read_data", "p0"){
// case1: enable count on index
sql "set enable_count_on_index_pushdown = true"
sql """ set enable_common_expr_pushdown = true """
qt_select_0 "SELECT COUNT() FROM ${table1} WHERE date='2017-10-01'"
qt_select_1 "SELECT COUNT() FROM ${table1} WHERE year(date)='2017'"

View File

@ -42,6 +42,7 @@ suite("test_stopwords", "p0"){
try {
sql "sync"
sql """ set enable_common_expr_pushdown = true """
qt_sql """ select * from ${indexTbName} where b match 'a'; """
qt_sql """ select * from ${indexTbName} where b match 'are'; """

View File

@ -40,6 +40,7 @@ suite("test_clear_block") {
}
sql """ set enable_match_without_inverted_index = false; """
sql """ set enable_common_expr_pushdown = true """
// sql """ set
def dupTableName = "dup_httplogs"
sql """ drop table if exists ${dupTableName} """

View File

@ -38,6 +38,7 @@ suite("test_mow_with_null_sequence", "inverted_index") {
);
"""
sql """ set enable_common_expr_pushdown = true """
sql """ insert into $tableName values('a', 'zhang san', 'address1', NULL) """
sql """ insert into $tableName values('a', 'zhang si', 'address2', '2022-10-20') """

Some files were not shown because too many files have changed in this diff Show More