[Refactor](inverted index) refactor inverted index compound predicates evaluate logic #38908 (#41385)
cherry pick from #38908
This commit is contained in:
@ -73,6 +73,47 @@
|
||||
|
||||
namespace doris::segment_v2 {
|
||||
|
||||
template <PrimitiveType PT>
|
||||
Status InvertedIndexQueryParamFactory::create_query_value(
|
||||
const void* value, std::unique_ptr<InvertedIndexQueryParamFactory>& result_param) {
|
||||
using CPP_TYPE = typename PrimitiveTypeTraits<PT>::CppType;
|
||||
std::unique_ptr<InvertedIndexQueryParam<PT>> param =
|
||||
InvertedIndexQueryParam<PT>::create_unique();
|
||||
auto&& storage_val = PrimitiveTypeConvertor<PT>::to_storage_field_type(
|
||||
*reinterpret_cast<const CPP_TYPE*>(value));
|
||||
param->set_value(&storage_val);
|
||||
result_param = std::move(param);
|
||||
return Status::OK();
|
||||
};
|
||||
|
||||
#define CREATE_QUERY_VALUE_TEMPLATE(PT) \
|
||||
template Status InvertedIndexQueryParamFactory::create_query_value<PT>( \
|
||||
const void* value, std::unique_ptr<InvertedIndexQueryParamFactory>& result_param);
|
||||
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_BOOLEAN)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_TINYINT)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_SMALLINT)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_INT)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_BIGINT)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_LARGEINT)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_FLOAT)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DOUBLE)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_VARCHAR)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATE)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATEV2)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATETIME)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DATETIMEV2)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_CHAR)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMALV2)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL32)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL64)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL128I)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_DECIMAL256)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_HLL)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_STRING)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_IPV4)
|
||||
CREATE_QUERY_VALUE_TEMPLATE(PrimitiveType::TYPE_IPV6)
|
||||
|
||||
std::unique_ptr<lucene::analysis::Analyzer> InvertedIndexReader::create_analyzer(
|
||||
InvertedIndexCtx* inverted_index_ctx) {
|
||||
std::unique_ptr<lucene::analysis::Analyzer> analyzer;
|
||||
@ -115,6 +156,10 @@ std::unique_ptr<lucene::util::Reader> InvertedIndexReader::create_reader(
|
||||
return reader;
|
||||
}
|
||||
|
||||
std::string InvertedIndexReader::get_index_file_path() {
|
||||
return _inverted_index_file_reader->get_index_file_path(&_index_meta);
|
||||
}
|
||||
|
||||
void InvertedIndexReader::get_analyse_result(std::vector<std::string>& analyse_result,
|
||||
lucene::util::Reader* reader,
|
||||
lucene::analysis::Analyzer* analyzer,
|
||||
@ -182,6 +227,8 @@ Status InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats,
|
||||
null_bitmap->runOptimize();
|
||||
cache->insert(cache_key, null_bitmap, cache_handle);
|
||||
FINALIZE_INPUT(null_bitmap_in);
|
||||
} else {
|
||||
cache->insert(cache_key, null_bitmap, cache_handle);
|
||||
}
|
||||
if (owned_dir) {
|
||||
FINALIZE_INPUT(dir);
|
||||
|
||||
@ -33,6 +33,7 @@
|
||||
#include "olap/rowset/segment_v2/inverted_index_desc.h"
|
||||
#include "olap/rowset/segment_v2/inverted_index_query_type.h"
|
||||
#include "olap/tablet_schema.h"
|
||||
#include "runtime/primitive_type.h"
|
||||
#include "util/once.h"
|
||||
|
||||
#define FINALIZE_INPUT(x) \
|
||||
@ -72,6 +73,104 @@ class InvertedIndexIterator;
|
||||
class InvertedIndexQueryCacheHandle;
|
||||
class InvertedIndexFileReader;
|
||||
struct InvertedIndexQueryInfo;
|
||||
class InvertedIndexResultBitmap {
|
||||
private:
|
||||
std::shared_ptr<roaring::Roaring> _data_bitmap = nullptr;
|
||||
std::shared_ptr<roaring::Roaring> _null_bitmap = nullptr;
|
||||
|
||||
public:
|
||||
// Default constructor
|
||||
InvertedIndexResultBitmap() = default;
|
||||
~InvertedIndexResultBitmap() = default;
|
||||
|
||||
// Constructor with arguments
|
||||
InvertedIndexResultBitmap(std::shared_ptr<roaring::Roaring> data_bitmap,
|
||||
std::shared_ptr<roaring::Roaring> null_bitmap)
|
||||
: _data_bitmap(std::move(data_bitmap)), _null_bitmap(std::move(null_bitmap)) {}
|
||||
|
||||
// Copy constructor
|
||||
InvertedIndexResultBitmap(const InvertedIndexResultBitmap& other)
|
||||
: _data_bitmap(std::make_shared<roaring::Roaring>(*other._data_bitmap)),
|
||||
_null_bitmap(std::make_shared<roaring::Roaring>(*other._null_bitmap)) {}
|
||||
|
||||
// Move constructor
|
||||
InvertedIndexResultBitmap(InvertedIndexResultBitmap&& other) noexcept
|
||||
: _data_bitmap(std::move(other._data_bitmap)),
|
||||
_null_bitmap(std::move(other._null_bitmap)) {}
|
||||
|
||||
// Copy assignment operator
|
||||
InvertedIndexResultBitmap& operator=(const InvertedIndexResultBitmap& other) {
|
||||
if (this != &other) { // Prevent self-assignment
|
||||
_data_bitmap = std::make_shared<roaring::Roaring>(*other._data_bitmap);
|
||||
_null_bitmap = std::make_shared<roaring::Roaring>(*other._null_bitmap);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Move assignment operator
|
||||
InvertedIndexResultBitmap& operator=(InvertedIndexResultBitmap&& other) noexcept {
|
||||
if (this != &other) { // Prevent self-assignment
|
||||
_data_bitmap = std::move(other._data_bitmap);
|
||||
_null_bitmap = std::move(other._null_bitmap);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Operator &=
|
||||
InvertedIndexResultBitmap& operator&=(const InvertedIndexResultBitmap& other) {
|
||||
if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) {
|
||||
auto new_null_bitmap = (*_data_bitmap & *other._null_bitmap) |
|
||||
(*_null_bitmap & *other._data_bitmap) |
|
||||
(*_null_bitmap & *other._null_bitmap);
|
||||
*_data_bitmap &= *other._data_bitmap;
|
||||
*_null_bitmap = std::move(new_null_bitmap);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Operator |=
|
||||
InvertedIndexResultBitmap& operator|=(const InvertedIndexResultBitmap& other) {
|
||||
if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) {
|
||||
auto new_null_bitmap = (*_null_bitmap | *other._null_bitmap) - *_data_bitmap;
|
||||
*_data_bitmap |= *other._data_bitmap;
|
||||
*_null_bitmap = std::move(new_null_bitmap);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// NOT operation
|
||||
const InvertedIndexResultBitmap& op_not(const roaring::Roaring* universe) const {
|
||||
if (_data_bitmap && _null_bitmap) {
|
||||
*_data_bitmap = *universe - *_data_bitmap - *_null_bitmap;
|
||||
// The _null_bitmap remains unchanged.
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Operator -=
|
||||
InvertedIndexResultBitmap& operator-=(const InvertedIndexResultBitmap& other) {
|
||||
if (_data_bitmap && _null_bitmap && other._data_bitmap && other._null_bitmap) {
|
||||
*_data_bitmap -= *other._data_bitmap;
|
||||
*_data_bitmap -= *other._null_bitmap;
|
||||
*_null_bitmap -= *other._null_bitmap;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void mask_out_null() {
|
||||
if (_data_bitmap && _null_bitmap) {
|
||||
*_data_bitmap -= *_null_bitmap;
|
||||
}
|
||||
}
|
||||
|
||||
const std::shared_ptr<roaring::Roaring>& get_data_bitmap() const { return _data_bitmap; }
|
||||
|
||||
const std::shared_ptr<roaring::Roaring>& get_null_bitmap() const { return _null_bitmap; }
|
||||
|
||||
// Check if both bitmaps are empty
|
||||
bool is_empty() const { return (_data_bitmap == nullptr && _null_bitmap == nullptr); }
|
||||
};
|
||||
|
||||
class InvertedIndexReader : public std::enable_shared_from_this<InvertedIndexReader> {
|
||||
public:
|
||||
explicit InvertedIndexReader(
|
||||
@ -135,7 +234,7 @@ public:
|
||||
|
||||
virtual Status handle_searcher_cache(InvertedIndexCacheHandle* inverted_index_cache_handle,
|
||||
OlapReaderStatistics* stats);
|
||||
|
||||
std::string get_index_file_path();
|
||||
static Status create_index_searcher(lucene::store::Directory* dir, IndexSearcherPtr* searcher,
|
||||
MemTracker* mem_tracker,
|
||||
InvertedIndexReaderType reader_type);
|
||||
@ -282,6 +381,79 @@ private:
|
||||
const KeyCoder* _value_key_coder {};
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief InvertedIndexQueryParamFactory is a factory class to create QueryValue object.
|
||||
* we need a template function to make predict class like in_list_predict template class to use.
|
||||
* also need a function with primitive type parameter to create inverted index query value. like some function expr: function_array_index
|
||||
* Now we just mapping field value in query engine to storage field value
|
||||
*/
|
||||
class InvertedIndexQueryParamFactory {
|
||||
ENABLE_FACTORY_CREATOR(InvertedIndexQueryParamFactory);
|
||||
|
||||
public:
|
||||
virtual ~InvertedIndexQueryParamFactory() = default;
|
||||
|
||||
template <PrimitiveType PT>
|
||||
static Status create_query_value(const void* value,
|
||||
std::unique_ptr<InvertedIndexQueryParamFactory>& result_param);
|
||||
|
||||
static Status create_query_value(
|
||||
const PrimitiveType& primitiveType, const void* value,
|
||||
std::unique_ptr<InvertedIndexQueryParamFactory>& result_param) {
|
||||
switch (primitiveType) {
|
||||
#define M(TYPE) \
|
||||
case TYPE: { \
|
||||
return create_query_value<TYPE>(value, result_param); \
|
||||
}
|
||||
M(PrimitiveType::TYPE_BOOLEAN)
|
||||
M(PrimitiveType::TYPE_TINYINT)
|
||||
M(PrimitiveType::TYPE_SMALLINT)
|
||||
M(PrimitiveType::TYPE_INT)
|
||||
M(PrimitiveType::TYPE_BIGINT)
|
||||
M(PrimitiveType::TYPE_LARGEINT)
|
||||
M(PrimitiveType::TYPE_FLOAT)
|
||||
M(PrimitiveType::TYPE_DOUBLE)
|
||||
M(PrimitiveType::TYPE_DECIMALV2)
|
||||
M(PrimitiveType::TYPE_DECIMAL32)
|
||||
M(PrimitiveType::TYPE_DECIMAL64)
|
||||
M(PrimitiveType::TYPE_DECIMAL128I)
|
||||
M(PrimitiveType::TYPE_DECIMAL256)
|
||||
M(PrimitiveType::TYPE_DATE)
|
||||
M(PrimitiveType::TYPE_DATETIME)
|
||||
M(PrimitiveType::TYPE_CHAR)
|
||||
M(PrimitiveType::TYPE_VARCHAR)
|
||||
M(PrimitiveType::TYPE_STRING)
|
||||
#undef M
|
||||
default:
|
||||
return Status::NotSupported("Unsupported primitive type {} for inverted index reader",
|
||||
primitiveType);
|
||||
}
|
||||
};
|
||||
|
||||
virtual const void* get_value() const {
|
||||
LOG_FATAL(
|
||||
"Execution reached an undefined behavior code path in "
|
||||
"InvertedIndexQueryParamFactory");
|
||||
__builtin_unreachable();
|
||||
};
|
||||
};
|
||||
|
||||
template <PrimitiveType PT>
|
||||
class InvertedIndexQueryParam : public InvertedIndexQueryParamFactory {
|
||||
ENABLE_FACTORY_CREATOR(InvertedIndexQueryParam);
|
||||
using storage_val = typename PrimitiveTypeTraits<PT>::StorageFieldType;
|
||||
|
||||
public:
|
||||
void set_value(const storage_val* value) {
|
||||
_value = *reinterpret_cast<const storage_val*>(value);
|
||||
}
|
||||
|
||||
const void* get_value() const override { return &_value; }
|
||||
|
||||
private:
|
||||
storage_val _value;
|
||||
};
|
||||
|
||||
class InvertedIndexIterator {
|
||||
ENABLE_FACTORY_CREATOR(InvertedIndexIterator);
|
||||
|
||||
|
||||
@ -297,26 +297,18 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) {
|
||||
_block_rowids.resize(_opts.block_row_max);
|
||||
|
||||
// compound predicates
|
||||
_col_preds_except_leafnode_of_andnode.clear();
|
||||
//TODO: need to check can_apply_predicate_safely in expr
|
||||
/*_col_preds_except_leafnode_of_andnode.clear();
|
||||
for (const auto& predicate : opts.column_predicates_except_leafnode_of_andnode) {
|
||||
if (!_segment->can_apply_predicate_safely(predicate->column_id(), predicate, *_schema,
|
||||
_opts.io_ctx.reader_type)) {
|
||||
continue;
|
||||
}
|
||||
_col_preds_except_leafnode_of_andnode.push_back(predicate);
|
||||
}
|
||||
}*/
|
||||
|
||||
_remaining_conjunct_roots = opts.remaining_conjunct_roots;
|
||||
_common_expr_ctxs_push_down = opts.common_expr_ctxs_push_down;
|
||||
_enable_common_expr_pushdown = !_common_expr_ctxs_push_down.empty();
|
||||
_column_predicate_info.reset(new ColumnPredicateInfo());
|
||||
|
||||
for (auto& expr : _remaining_conjunct_roots) {
|
||||
_calculate_pred_in_remaining_conjunct_root(expr);
|
||||
}
|
||||
_calculate_func_in_remaining_conjunct_root();
|
||||
|
||||
_column_predicate_info.reset(new ColumnPredicateInfo());
|
||||
if (_schema->rowid_col_idx() > 0) {
|
||||
_record_rowids = true;
|
||||
}
|
||||
@ -366,8 +358,9 @@ Status SegmentIterator::_init_impl(const StorageReadOptions& opts) {
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(_construct_compound_expr_context());
|
||||
_enable_common_expr_pushdown = !_common_expr_ctxs_push_down.empty();
|
||||
_initialize_predicate_results();
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -375,33 +368,10 @@ void SegmentIterator::_initialize_predicate_results() {
|
||||
// Initialize from _col_predicates
|
||||
for (auto* pred : _col_predicates) {
|
||||
int cid = pred->column_id();
|
||||
std::string pred_sign = _gen_predicate_result_sign(pred);
|
||||
_column_predicate_inverted_index_status[cid][pred_sign] = false;
|
||||
_column_predicate_inverted_index_status[cid][pred] = false;
|
||||
}
|
||||
|
||||
// Initialize from _col_preds_except_leafnode_of_andnode
|
||||
for (auto* pred : _col_preds_except_leafnode_of_andnode) {
|
||||
int cid = pred->column_id();
|
||||
std::string pred_sign = _gen_predicate_result_sign(pred);
|
||||
_column_predicate_inverted_index_status[cid][pred_sign] = false;
|
||||
}
|
||||
|
||||
// Initialize from _column_pred_in_remaining_vconjunct
|
||||
for (auto& preds_in_remaining_vconjuct : _column_pred_in_remaining_vconjunct) {
|
||||
for (auto& pred_info : preds_in_remaining_vconjuct.second) {
|
||||
int cid = _schema->column_id(pred_info.column_id);
|
||||
std::string pred_sign = _gen_predicate_result_sign(&pred_info);
|
||||
_column_predicate_inverted_index_status[cid][pred_sign] = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize from _func_name_to_result_sign
|
||||
for (auto& iter : _func_name_to_result_sign) {
|
||||
for (auto& pred_sign : iter.second) {
|
||||
auto column_id = _opts.tablet_schema->field_index(iter.first);
|
||||
_column_predicate_inverted_index_status[column_id][pred_sign] = false;
|
||||
}
|
||||
}
|
||||
_calculate_expr_in_remaining_conjunct_root();
|
||||
}
|
||||
|
||||
Status SegmentIterator::init_iterators() {
|
||||
@ -542,47 +512,34 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (config::enable_index_apply_preds_except_leafnode_of_andnode) {
|
||||
size_t input_rows = _row_bitmap.cardinality();
|
||||
RETURN_IF_ERROR(_apply_index_except_leafnode_of_andnode());
|
||||
if (_can_filter_by_preds_except_leafnode_of_andnode()) {
|
||||
for (auto it = _remaining_conjunct_roots.begin();
|
||||
it != _remaining_conjunct_roots.end();) {
|
||||
_pred_except_leafnode_of_andnode_evaluate_result.clear();
|
||||
auto res = _execute_predicates_except_leafnode_of_andnode(*it);
|
||||
VLOG_DEBUG << "_execute_predicates_except_leafnode_of_andnode expr: "
|
||||
<< (*it)->debug_string() << " res: " << res;
|
||||
if (res.ok() && _pred_except_leafnode_of_andnode_evaluate_result.size() == 1) {
|
||||
_row_bitmap &= _pred_except_leafnode_of_andnode_evaluate_result[0];
|
||||
// Delete expr after it obtains the final result.
|
||||
{
|
||||
std::erase_if(_common_expr_ctxs_push_down,
|
||||
[&it](const auto& iter) { return iter->root() == *it; });
|
||||
VLOG_DEBUG << "_remaining_conjunct_roots erase expr: "
|
||||
<< (*it)->debug_string();
|
||||
it = _remaining_conjunct_roots.erase(it);
|
||||
}
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
_col_preds_except_leafnode_of_andnode.clear();
|
||||
compound_func_exprs.clear();
|
||||
// 1. if all conditions in the compound hit the inverted index and there are no other expr to handle.
|
||||
// 2. then there is no need to generate index_result_column.
|
||||
if (_enable_common_expr_pushdown && _remaining_conjunct_roots.empty()) {
|
||||
for (auto& iter : _rowid_result_for_index) {
|
||||
iter.second.first = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
_opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality());
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(_apply_bitmap_index());
|
||||
RETURN_IF_ERROR(_apply_inverted_index());
|
||||
RETURN_IF_ERROR(_apply_index_expr());
|
||||
size_t input_rows = _row_bitmap.cardinality();
|
||||
for (auto it = _common_expr_ctxs_push_down.begin(); it != _common_expr_ctxs_push_down.end();) {
|
||||
if ((*it)->all_expr_inverted_index_evaluated()) {
|
||||
const auto* result =
|
||||
(*it)->get_inverted_index_context()->get_inverted_index_result_for_expr(
|
||||
(*it)->root().get());
|
||||
if (result != nullptr) {
|
||||
_row_bitmap &= *result->get_data_bitmap();
|
||||
auto root = (*it)->root();
|
||||
auto iter_find = std::find(_remaining_conjunct_roots.begin(),
|
||||
_remaining_conjunct_roots.end(), root);
|
||||
if (iter_find != _remaining_conjunct_roots.end()) {
|
||||
_remaining_conjunct_roots.erase(iter_find);
|
||||
}
|
||||
it = _common_expr_ctxs_push_down.erase(it);
|
||||
}
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
_opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality());
|
||||
for (auto cid : _schema->column_ids()) {
|
||||
bool result_true = _check_all_predicates_passed_inverted_index_for_column(cid);
|
||||
bool result_true = _check_all_conditions_passed_inverted_index_for_column(cid);
|
||||
|
||||
if (result_true) {
|
||||
_need_read_data_indices[cid] = false;
|
||||
}
|
||||
@ -795,127 +752,6 @@ Status SegmentIterator::_extract_common_expr_columns(const vectorized::VExprSPtr
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode(
|
||||
const vectorized::VExprSPtr& expr) {
|
||||
if (expr == nullptr) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
auto& children = expr->children();
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
RETURN_IF_ERROR(_execute_predicates_except_leafnode_of_andnode(children[i]));
|
||||
}
|
||||
|
||||
auto node_type = expr->node_type();
|
||||
if (node_type == TExprNodeType::SLOT_REF) {
|
||||
auto slot_expr = std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr);
|
||||
_column_predicate_info->column_name = expr->expr_name();
|
||||
_column_predicate_info->column_id = slot_expr->column_id();
|
||||
} else if (_is_literal_node(node_type)) {
|
||||
auto v_literal_expr = std::dynamic_pointer_cast<doris::vectorized::VLiteral>(expr);
|
||||
_column_predicate_info->query_values.insert(v_literal_expr->value());
|
||||
} else if (node_type == TExprNodeType::BINARY_PRED || node_type == TExprNodeType::MATCH_PRED ||
|
||||
node_type == TExprNodeType::IN_PRED || node_type == TExprNodeType::FUNCTION_CALL) {
|
||||
std::string result_sign;
|
||||
if (node_type == TExprNodeType::FUNCTION_CALL) {
|
||||
result_sign =
|
||||
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + std::to_string(expr->index_unique_id());
|
||||
} else {
|
||||
if (node_type == TExprNodeType::MATCH_PRED) {
|
||||
_column_predicate_info->query_op = "match";
|
||||
} else if (node_type == TExprNodeType::IN_PRED) {
|
||||
if (expr->op() == TExprOpcode::type::FILTER_IN) {
|
||||
_column_predicate_info->query_op = "in";
|
||||
} else {
|
||||
_column_predicate_info->query_op = "not_in";
|
||||
}
|
||||
} else {
|
||||
_column_predicate_info->query_op = expr->fn().name.function_name;
|
||||
}
|
||||
result_sign = _gen_predicate_result_sign(_column_predicate_info.get());
|
||||
}
|
||||
|
||||
// get child condition result in compound conditions
|
||||
_column_predicate_info.reset(new ColumnPredicateInfo());
|
||||
VLOG_DEBUG << "result_sign " << result_sign;
|
||||
if (_rowid_result_for_index.count(result_sign) > 0 &&
|
||||
_rowid_result_for_index[result_sign].first) {
|
||||
auto apply_result = _rowid_result_for_index[result_sign].second;
|
||||
_pred_except_leafnode_of_andnode_evaluate_result.push_back(apply_result);
|
||||
} else {
|
||||
return Status::InvalidArgument(
|
||||
"_execute_predicates_except_leafnode_of_andnode has no result for {}",
|
||||
result_sign);
|
||||
}
|
||||
} else if (node_type == TExprNodeType::COMPOUND_PRED) {
|
||||
auto function_name = expr->fn().name.function_name;
|
||||
// execute logic function
|
||||
RETURN_IF_ERROR(_execute_compound_fn(function_name));
|
||||
} else {
|
||||
return Status::InvalidArgument(
|
||||
"_execute_predicates_except_leafnode_of_andnode not supported for TExprNodeType:{}",
|
||||
node_type);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SegmentIterator::_execute_compound_fn(const std::string& function_name) {
|
||||
auto size = _pred_except_leafnode_of_andnode_evaluate_result.size();
|
||||
if (function_name == "and") {
|
||||
if (size < 2) {
|
||||
return Status::InvalidArgument("_execute_compound_fn {} arg num {} < 2", function_name,
|
||||
size);
|
||||
}
|
||||
_pred_except_leafnode_of_andnode_evaluate_result.at(size - 2) &=
|
||||
_pred_except_leafnode_of_andnode_evaluate_result.at(size - 1);
|
||||
_pred_except_leafnode_of_andnode_evaluate_result.pop_back();
|
||||
} else if (function_name == "or") {
|
||||
if (size < 2) {
|
||||
return Status::InvalidArgument("_execute_compound_fn {} arg num {} < 2", function_name,
|
||||
size);
|
||||
}
|
||||
_pred_except_leafnode_of_andnode_evaluate_result.at(size - 2) |=
|
||||
_pred_except_leafnode_of_andnode_evaluate_result.at(size - 1);
|
||||
_pred_except_leafnode_of_andnode_evaluate_result.pop_back();
|
||||
} else if (function_name == "not") {
|
||||
if (size < 1) {
|
||||
return Status::InvalidArgument("_execute_compound_fn {} arg num {} < 1", function_name,
|
||||
size);
|
||||
}
|
||||
roaring::Roaring tmp = _row_bitmap;
|
||||
tmp -= _pred_except_leafnode_of_andnode_evaluate_result.at(size - 1);
|
||||
_pred_except_leafnode_of_andnode_evaluate_result.at(size - 1) = tmp;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool SegmentIterator::_can_filter_by_preds_except_leafnode_of_andnode() {
|
||||
// no compound predicates push down, so no need to filter
|
||||
if (_col_preds_except_leafnode_of_andnode.empty() && compound_func_exprs.empty()) {
|
||||
return false;
|
||||
}
|
||||
for (auto pred : _col_preds_except_leafnode_of_andnode) {
|
||||
if (_not_apply_index_pred.count(pred->column_id()) ||
|
||||
(!_check_apply_by_inverted_index(pred, true))) {
|
||||
return false;
|
||||
}
|
||||
// all predicates are evaluated by index, then true, else false
|
||||
std::string pred_result_sign = _gen_predicate_result_sign(pred);
|
||||
if (_rowid_result_for_index.count(pred_result_sign) == 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
for (const auto& expr : compound_func_exprs) {
|
||||
std::string pred_result_sign =
|
||||
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + std::to_string(expr->index_unique_id());
|
||||
if (!_rowid_result_for_index.contains(pred_result_sign)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound) {
|
||||
if (_opts.runtime_state && !_opts.runtime_state->query_options().enable_inverted_index_query) {
|
||||
return false;
|
||||
@ -973,74 +809,20 @@ bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred, bool
|
||||
return true;
|
||||
}
|
||||
|
||||
Status SegmentIterator::_apply_inverted_index_except_leafnode_of_andnode(
|
||||
ColumnPredicate* pred, roaring::Roaring* output_result) {
|
||||
RETURN_IF_ERROR(pred->evaluate(_storage_name_and_type[pred->column_id()],
|
||||
_inverted_index_iterators[pred->column_id()].get(), num_rows(),
|
||||
output_result));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SegmentIterator::_apply_index_except_leafnode_of_andnode() {
|
||||
for (auto* pred : _col_preds_except_leafnode_of_andnode) {
|
||||
auto column_id = pred->column_id();
|
||||
auto pred_type = pred->type();
|
||||
bool is_support = pred_type == PredicateType::EQ || pred_type == PredicateType::NE ||
|
||||
pred_type == PredicateType::LT || pred_type == PredicateType::LE ||
|
||||
pred_type == PredicateType::GT || pred_type == PredicateType::GE ||
|
||||
pred_type == PredicateType::MATCH ||
|
||||
pred_type == PredicateType::IN_LIST ||
|
||||
pred_type == PredicateType::NOT_IN_LIST;
|
||||
if (!is_support) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool can_apply_by_inverted_index = _check_apply_by_inverted_index(pred, true);
|
||||
roaring::Roaring bitmap = _row_bitmap;
|
||||
Status res = Status::OK();
|
||||
if (can_apply_by_inverted_index) {
|
||||
res = _apply_inverted_index_except_leafnode_of_andnode(pred, &bitmap);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool need_remaining_after_evaluate = _column_has_fulltext_index(column_id) &&
|
||||
PredicateTypeTraits::is_equal_or_list(pred_type);
|
||||
if (!res.ok()) {
|
||||
if (_downgrade_without_index(res, need_remaining_after_evaluate)) {
|
||||
// downgrade without index query
|
||||
_not_apply_index_pred.insert(column_id);
|
||||
Status SegmentIterator::_apply_index_expr() {
|
||||
for (const auto& expr_ctx : _common_expr_ctxs_push_down) {
|
||||
if (Status st = expr_ctx->evaluate_inverted_index(num_rows()); !st.ok()) {
|
||||
if (_downgrade_without_index(st) || st.code() == ErrorCode::NOT_IMPLEMENTED_ERROR) {
|
||||
continue;
|
||||
}
|
||||
LOG(WARNING) << "failed to evaluate index"
|
||||
<< ", column predicate type: " << pred->pred_type_string(pred->type())
|
||||
<< ", error msg: " << res.to_string();
|
||||
return res;
|
||||
}
|
||||
|
||||
std::string pred_result_sign = _gen_predicate_result_sign(pred);
|
||||
_rowid_result_for_index.emplace(pred_result_sign, std::make_pair(true, std::move(bitmap)));
|
||||
if (!pred->predicate_params()->marked_by_runtime_filter) {
|
||||
_column_predicate_inverted_index_status[column_id][pred_result_sign] = true;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& expr : compound_func_exprs) {
|
||||
roaring::Roaring bitmap = _row_bitmap;
|
||||
auto result = std::make_shared<roaring::Roaring>();
|
||||
RETURN_IF_ERROR(execute_func_expr(expr, result));
|
||||
bitmap &= *result;
|
||||
std::string result_sign =
|
||||
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + std::to_string(expr->index_unique_id());
|
||||
_rowid_result_for_index.emplace(result_sign, std::make_pair(true, std::move(bitmap)));
|
||||
for (const auto& child_expr : expr->children()) {
|
||||
if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
|
||||
auto column_id = _opts.tablet_schema->field_index(child_expr->expr_name());
|
||||
_column_predicate_inverted_index_status[column_id][result_sign] = true;
|
||||
} else {
|
||||
// other code is not to be handled, we should just break
|
||||
LOG(WARNING) << "failed to evaluate inverted index for expr_ctx: "
|
||||
<< expr_ctx->root()->debug_string()
|
||||
<< ", error msg: " << st.to_string();
|
||||
return st;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -1071,47 +853,6 @@ bool SegmentIterator::_downgrade_without_index(Status res, bool need_remaining)
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicate* predicate) {
|
||||
std::string pred_result_sign;
|
||||
|
||||
auto column_desc = _schema->column(predicate->column_id());
|
||||
auto pred_type = predicate->type();
|
||||
auto predicate_params = predicate->predicate_params();
|
||||
|
||||
std::string col_name = column_desc->name();
|
||||
|
||||
if (column_desc->path() != nullptr) {
|
||||
const static std::string pattern = "(CAST {}(Nullable(Variant)) TO {})";
|
||||
// indicate a subcolumn access for variant, using the expression pattern as pred result sign name
|
||||
col_name = fmt::format(pattern, col_name,
|
||||
_storage_name_and_type[predicate->column_id()].second->get_name());
|
||||
}
|
||||
|
||||
pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + col_name + "_" +
|
||||
predicate->pred_type_string(pred_type) + "_" +
|
||||
join(predicate_params->values, ",");
|
||||
VLOG_DEBUG << "_gen_predicate_result_sign: " << pred_result_sign;
|
||||
return pred_result_sign;
|
||||
}
|
||||
|
||||
std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicateInfo* predicate_info) {
|
||||
auto column_desc = _schema->column(_schema->column_id(predicate_info->column_id));
|
||||
std::string col_name = predicate_info->column_name;
|
||||
if (column_desc->path() != nullptr) {
|
||||
const static std::string pattern = "(CAST {}(Nullable(Variant)) TO {})";
|
||||
// indicate a subcolumn access for variant, using the expression pattern as pred result sign name
|
||||
col_name = fmt::format(pattern, col_name,
|
||||
_storage_name_and_type[_schema->column_id(predicate_info->column_id)]
|
||||
.second->get_name());
|
||||
}
|
||||
std::string pred_result_sign;
|
||||
pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + col_name + "_" +
|
||||
predicate_info->query_op + "_" +
|
||||
boost::join(predicate_info->query_values, ",");
|
||||
VLOG_DEBUG << "_gen_predicate_result_sign: " << pred_result_sign;
|
||||
return pred_result_sign;
|
||||
}
|
||||
|
||||
bool SegmentIterator::_column_has_fulltext_index(int32_t cid) {
|
||||
bool has_fulltext_index = _inverted_index_iterators[cid] != nullptr &&
|
||||
_inverted_index_iterators[cid]->get_inverted_index_reader_type() ==
|
||||
@ -1124,10 +865,6 @@ inline bool SegmentIterator::_inverted_index_not_support_pred_type(const Predica
|
||||
return type == PredicateType::BF || type == PredicateType::BITMAP_FILTER;
|
||||
}
|
||||
|
||||
#define all_predicates_are_range_predicate(predicate_set) \
|
||||
std::all_of(predicate_set.begin(), predicate_set.end(), \
|
||||
[](const ColumnPredicate* p) { return PredicateTypeTraits::is_range(p->type()); })
|
||||
|
||||
Status SegmentIterator::_apply_inverted_index_on_column_predicate(
|
||||
ColumnPredicate* pred, std::vector<ColumnPredicate*>& remaining_predicates,
|
||||
bool* continue_apply) {
|
||||
@ -1150,12 +887,6 @@ Status SegmentIterator::_apply_inverted_index_on_column_predicate(
|
||||
return res;
|
||||
}
|
||||
|
||||
auto pred_type = pred->type();
|
||||
if (pred_type == PredicateType::MATCH || pred_type == PredicateType::IN_LIST) {
|
||||
std::string pred_result_sign = _gen_predicate_result_sign(pred);
|
||||
_rowid_result_for_index.emplace(pred_result_sign, std::make_pair(false, _row_bitmap));
|
||||
}
|
||||
|
||||
if (_row_bitmap.isEmpty()) {
|
||||
// all rows have been pruned, no need to process further predicates
|
||||
*continue_apply = false;
|
||||
@ -1166,52 +897,7 @@ Status SegmentIterator::_apply_inverted_index_on_column_predicate(
|
||||
return Status::OK();
|
||||
}
|
||||
if (!pred->predicate_params()->marked_by_runtime_filter) {
|
||||
std::string pred_result_sign = _gen_predicate_result_sign(pred);
|
||||
_column_predicate_inverted_index_status[pred->column_id()][pred_result_sign] = true;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SegmentIterator::_apply_inverted_index_on_block_column_predicate(
|
||||
ColumnId column_id, MutilColumnBlockPredicate* pred,
|
||||
std::set<const ColumnPredicate*>& no_need_to_pass_column_predicate_set,
|
||||
bool* continue_apply) {
|
||||
bool handle_by_fulltext = _column_has_fulltext_index(column_id);
|
||||
std::set<const ColumnPredicate*> predicate_set {};
|
||||
|
||||
pred->get_all_column_predicate(predicate_set);
|
||||
|
||||
//four requirements here.
|
||||
//1. Column has inverted index
|
||||
//2. There are multiple predicates for this column.
|
||||
//3. All the predicates are range predicate.
|
||||
//4. if it's under fulltext parser type, we need to skip inverted index evaluate.
|
||||
if (_inverted_index_iterators[column_id] != nullptr && predicate_set.size() > 1 &&
|
||||
all_predicates_are_range_predicate(predicate_set) && !handle_by_fulltext) {
|
||||
roaring::Roaring output_result = _row_bitmap;
|
||||
|
||||
std::string column_name = _schema->column(column_id)->name();
|
||||
|
||||
auto res = pred->evaluate(column_name, _inverted_index_iterators[column_id].get(),
|
||||
num_rows(), &_row_bitmap);
|
||||
|
||||
if (res.ok()) {
|
||||
no_need_to_pass_column_predicate_set.insert(predicate_set.begin(), predicate_set.end());
|
||||
if (_row_bitmap.isEmpty()) {
|
||||
// all rows have been pruned, no need to process further predicates
|
||||
*continue_apply = false;
|
||||
}
|
||||
return res;
|
||||
} else {
|
||||
//TODO:mock until AndBlockColumnPredicate evaluate is ok.
|
||||
if (res.code() == ErrorCode::INVERTED_INDEX_NOT_IMPLEMENTED) {
|
||||
return Status::OK();
|
||||
}
|
||||
LOG(WARNING) << "failed to evaluate index"
|
||||
<< ", column predicate type: range predicate"
|
||||
<< ", error msg: " << res;
|
||||
return res;
|
||||
_column_predicate_inverted_index_status[pred->column_id()][pred] = true;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
@ -1310,63 +996,49 @@ Status SegmentIterator::_apply_inverted_index() {
|
||||
}
|
||||
}
|
||||
|
||||
// delete from _common_expr_ctxs_push_down if a MATCH predicate will be removed from _col_predicates
|
||||
// since it's not necessary to eval it anymore to avoid index miss, which is added in _normalize_predicate
|
||||
for (auto pred : _col_predicates) {
|
||||
auto* match_pred = dynamic_cast<MatchPredicate*>(pred);
|
||||
if (!match_pred ||
|
||||
!_is_match_predicate_and_not_remaining(match_pred, remaining_predicates)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto it = _common_expr_ctxs_push_down.begin();
|
||||
it != _common_expr_ctxs_push_down.end();) {
|
||||
if (_is_target_expr_match_predicate((*it)->root(), match_pred, _schema.get())) {
|
||||
_delete_expr_from_conjunct_roots((*it)->root(), _remaining_conjunct_roots);
|
||||
it = _common_expr_ctxs_push_down.erase(it);
|
||||
VLOG_DEBUG << "delete expr from _remaining_conjunct_roots "
|
||||
<< (*it)->root()->debug_string();
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto& expr : no_compound_func_exprs) {
|
||||
auto result = std::make_shared<roaring::Roaring>();
|
||||
RETURN_IF_ERROR(execute_func_expr(expr, result));
|
||||
_row_bitmap &= *result;
|
||||
for (auto it = _remaining_conjunct_roots.begin(); it != _remaining_conjunct_roots.end();) {
|
||||
if (*it == expr) {
|
||||
std::erase_if(_common_expr_ctxs_push_down,
|
||||
[&it](const auto& iter) { return iter->root() == *it; });
|
||||
it = _remaining_conjunct_roots.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
std::string result_sign =
|
||||
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + std::to_string(expr->index_unique_id());
|
||||
for (const auto& child_expr : expr->children()) {
|
||||
if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
|
||||
auto column_id = _opts.tablet_schema->field_index(child_expr->expr_name());
|
||||
_column_predicate_inverted_index_status[column_id][result_sign] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_col_predicates = std::move(remaining_predicates);
|
||||
_opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool SegmentIterator::_check_all_predicates_passed_inverted_index_for_column(ColumnId cid,
|
||||
/**
|
||||
* @brief Checks if all conditions related to a specific column have passed in both
|
||||
* `_column_predicate_inverted_index_status` and `_common_expr_inverted_index_status`.
|
||||
*
|
||||
* This function first checks the conditions in `_column_predicate_inverted_index_status`
|
||||
* for the given `ColumnId`. If all conditions pass, it sets `default_return` to `true`.
|
||||
* It then checks the conditions in `_common_expr_inverted_index_status` for the same column.
|
||||
*
|
||||
* The function returns `true` if all conditions in both maps pass. If any condition fails
|
||||
* in either map, the function immediately returns `false`. If the column does not exist
|
||||
* in one of the maps, the function returns `default_return`.
|
||||
*
|
||||
* @param cid The ColumnId of the column to check.
|
||||
* @param default_return The default value to return if the column is not found in the status maps.
|
||||
* @return true if all conditions in both status maps pass, or if the column is not found
|
||||
* and `default_return` is true.
|
||||
* @return false if any condition in either status map fails, or if the column is not found
|
||||
* and `default_return` is false.
|
||||
*/
|
||||
bool SegmentIterator::_check_all_conditions_passed_inverted_index_for_column(ColumnId cid,
|
||||
bool default_return) {
|
||||
auto it = _column_predicate_inverted_index_status.find(cid);
|
||||
if (it != _column_predicate_inverted_index_status.end()) {
|
||||
const auto& pred_map = it->second;
|
||||
return std::all_of(pred_map.begin(), pred_map.end(),
|
||||
[](const auto& pred_entry) { return pred_entry.second; });
|
||||
auto pred_it = _column_predicate_inverted_index_status.find(cid);
|
||||
if (pred_it != _column_predicate_inverted_index_status.end()) {
|
||||
const auto& pred_map = pred_it->second;
|
||||
bool pred_passed = std::all_of(pred_map.begin(), pred_map.end(),
|
||||
[](const auto& pred_entry) { return pred_entry.second; });
|
||||
if (!pred_passed) {
|
||||
return false;
|
||||
} else {
|
||||
default_return = true;
|
||||
}
|
||||
}
|
||||
|
||||
auto expr_it = _common_expr_inverted_index_status.find(cid);
|
||||
if (expr_it != _common_expr_inverted_index_status.end()) {
|
||||
const auto& expr_map = expr_it->second;
|
||||
return std::all_of(expr_map.begin(), expr_map.end(),
|
||||
[](const auto& expr_entry) { return expr_entry.second; });
|
||||
}
|
||||
return default_return;
|
||||
}
|
||||
@ -2043,15 +1715,16 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32
|
||||
}
|
||||
|
||||
DBUG_EXECUTE_IF("segment_iterator._read_columns_by_index", {
|
||||
auto col_name = _opts.tablet_schema->column(cid).name();
|
||||
auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
|
||||
"segment_iterator._read_columns_by_index", "column_name", "");
|
||||
if (debug_col_name.empty()) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>("does not need to read data");
|
||||
if (debug_col_name.empty() && col_name != "__DORIS_DELETE_SIGN__") {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>("does not need to read data, {}",
|
||||
col_name);
|
||||
}
|
||||
auto col_name = _opts.tablet_schema->column(cid).name();
|
||||
if (debug_col_name.find(col_name) != std::string::npos) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>("does not need to read data, {}",
|
||||
debug_col_name);
|
||||
col_name);
|
||||
}
|
||||
})
|
||||
|
||||
@ -2462,15 +2135,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
|
||||
return Status::EndOfFile("no more data in segment");
|
||||
}
|
||||
|
||||
DBUG_EXECUTE_IF("segment_iterator._rowid_result_for_index", {
|
||||
for (auto& iter : _rowid_result_for_index) {
|
||||
if (iter.second.first) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>(
|
||||
"_rowid_result_for_index exists true");
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
if (!_is_need_vec_eval && !_is_need_short_eval && !_is_need_expr_eval) {
|
||||
if (_non_predicate_columns.empty()) {
|
||||
return Status::InternalError("_non_predicate_columns is empty");
|
||||
@ -2478,9 +2142,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
|
||||
RETURN_IF_ERROR(_convert_to_expected_type(_first_read_column_ids));
|
||||
RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns));
|
||||
_output_non_pred_columns(block);
|
||||
if (!_enable_common_expr_pushdown || !_remaining_conjunct_roots.empty()) {
|
||||
_output_index_result_column(nullptr, 0, block);
|
||||
}
|
||||
} else {
|
||||
uint16_t selected_size = _current_batch_rows_read;
|
||||
uint16_t sel_rowid_idx[selected_size];
|
||||
@ -2537,12 +2198,12 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
|
||||
auto col_const = vectorized::ColumnConst::create(std::move(res_column),
|
||||
selected_size);
|
||||
block->replace_by_position(0, std::move(col_const));
|
||||
_output_index_result_column(sel_rowid_idx, selected_size, block);
|
||||
_output_index_result_column_for_expr(sel_rowid_idx, selected_size, block);
|
||||
block->shrink_char_type_column_suffix_zero(_char_type_idx_no_0);
|
||||
RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
|
||||
block->replace_by_position(0, std::move(col0));
|
||||
} else {
|
||||
_output_index_result_column(sel_rowid_idx, selected_size, block);
|
||||
_output_index_result_column_for_expr(sel_rowid_idx, selected_size, block);
|
||||
block->shrink_char_type_column_suffix_zero(_char_type_idx);
|
||||
RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
|
||||
}
|
||||
@ -2574,12 +2235,12 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
|
||||
auto col_const =
|
||||
vectorized::ColumnConst::create(std::move(res_column), selected_size);
|
||||
block->replace_by_position(0, std::move(col_const));
|
||||
_output_index_result_column(sel_rowid_idx, selected_size, block);
|
||||
_output_index_result_column_for_expr(sel_rowid_idx, selected_size, block);
|
||||
block->shrink_char_type_column_suffix_zero(_char_type_idx_no_0);
|
||||
RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
|
||||
block->replace_by_position(0, std::move(col0));
|
||||
} else {
|
||||
_output_index_result_column(sel_rowid_idx, selected_size, block);
|
||||
_output_index_result_column_for_expr(sel_rowid_idx, selected_size, block);
|
||||
block->shrink_char_type_column_suffix_zero(_char_type_idx);
|
||||
RETURN_IF_ERROR(_execute_common_expr(sel_rowid_idx, selected_size, block));
|
||||
}
|
||||
@ -2613,10 +2274,6 @@ Status SegmentIterator::_next_batch_internal(vectorized::Block* block) {
|
||||
RETURN_IF_ERROR(_convert_to_expected_type(_non_predicate_columns));
|
||||
// step5: output columns
|
||||
_output_non_pred_columns(block);
|
||||
|
||||
if (!_is_need_expr_eval) {
|
||||
_output_index_result_column(sel_rowid_idx, selected_size, block);
|
||||
}
|
||||
}
|
||||
|
||||
// shrink char_type suffix zero data
|
||||
@ -2693,53 +2350,42 @@ uint16_t SegmentIterator::_evaluate_common_expr_filter(uint16_t* sel_rowid_idx,
|
||||
}
|
||||
}
|
||||
|
||||
void SegmentIterator::_output_index_result_column(uint16_t* sel_rowid_idx, uint16_t select_size,
|
||||
vectorized::Block* block) {
|
||||
void SegmentIterator::_output_index_result_column_for_expr(uint16_t* sel_rowid_idx,
|
||||
uint16_t select_size,
|
||||
vectorized::Block* block) {
|
||||
SCOPED_RAW_TIMER(&_opts.stats->output_index_result_column_timer);
|
||||
if (block->rows() == 0) {
|
||||
return;
|
||||
}
|
||||
for (auto& expr_ctx : _common_expr_ctxs_push_down) {
|
||||
for (auto& inverted_index_result_bitmap_for_expr :
|
||||
expr_ctx->get_inverted_index_context()->get_inverted_index_result_bitmap()) {
|
||||
const auto* expr = inverted_index_result_bitmap_for_expr.first;
|
||||
const auto& index_result_bitmap =
|
||||
inverted_index_result_bitmap_for_expr.second.get_data_bitmap();
|
||||
auto index_result_column = vectorized::ColumnUInt8::create();
|
||||
vectorized::ColumnUInt8::Container& vec_match_pred = index_result_column->get_data();
|
||||
vec_match_pred.resize(block->rows());
|
||||
size_t idx_in_selected = 0;
|
||||
roaring::BulkContext bulk_context;
|
||||
|
||||
for (auto& iter : _rowid_result_for_index) {
|
||||
_columns_to_filter.push_back(block->columns());
|
||||
block->insert({vectorized::ColumnUInt8::create(),
|
||||
std::make_shared<vectorized::DataTypeUInt8>(), iter.first});
|
||||
if (!iter.second.first) {
|
||||
// predicate not in compound query
|
||||
block->get_by_name(iter.first).column =
|
||||
vectorized::DataTypeUInt8().create_column_const(block->rows(), (uint8_t)1);
|
||||
continue;
|
||||
}
|
||||
_build_index_result_column(sel_rowid_idx, select_size, block, iter.first,
|
||||
iter.second.second);
|
||||
}
|
||||
}
|
||||
|
||||
void SegmentIterator::_build_index_result_column(const uint16_t* sel_rowid_idx,
|
||||
uint16_t select_size, vectorized::Block* block,
|
||||
const std::string& pred_result_sign,
|
||||
const roaring::Roaring& index_result) {
|
||||
auto index_result_column = vectorized::ColumnUInt8::create();
|
||||
vectorized::ColumnUInt8::Container& vec_match_pred = index_result_column->get_data();
|
||||
vec_match_pred.resize(block->rows());
|
||||
size_t idx_in_selected = 0;
|
||||
roaring::BulkContext bulk_context;
|
||||
|
||||
for (uint32_t i = 0; i < _current_batch_rows_read; i++) {
|
||||
auto rowid = _block_rowids[i];
|
||||
if (sel_rowid_idx == nullptr ||
|
||||
(idx_in_selected < select_size && i == sel_rowid_idx[idx_in_selected])) {
|
||||
if (index_result.containsBulk(bulk_context, rowid)) {
|
||||
vec_match_pred[idx_in_selected] = true;
|
||||
} else {
|
||||
vec_match_pred[idx_in_selected] = false;
|
||||
for (uint32_t i = 0; i < _current_batch_rows_read; i++) {
|
||||
auto rowid = _block_rowids[i];
|
||||
if (sel_rowid_idx == nullptr ||
|
||||
(idx_in_selected < select_size && i == sel_rowid_idx[idx_in_selected])) {
|
||||
if (index_result_bitmap->containsBulk(bulk_context, rowid)) {
|
||||
vec_match_pred[idx_in_selected] = true;
|
||||
} else {
|
||||
vec_match_pred[idx_in_selected] = false;
|
||||
}
|
||||
idx_in_selected++;
|
||||
}
|
||||
}
|
||||
idx_in_selected++;
|
||||
DCHECK(block->rows() == vec_match_pred.size());
|
||||
expr_ctx->get_inverted_index_context()->set_inverted_index_result_column_for_expr(
|
||||
expr, std::move(index_result_column));
|
||||
}
|
||||
}
|
||||
DCHECK(block->rows() == vec_match_pred.size());
|
||||
auto index_result_position = block->get_position_by_name(pred_result_sign);
|
||||
block->replace_by_position(index_result_position, std::move(index_result_column));
|
||||
}
|
||||
|
||||
void SegmentIterator::_convert_dict_code_for_predicate_if_necessary() {
|
||||
@ -2790,125 +2436,45 @@ Status SegmentIterator::current_block_row_locations(std::vector<RowLocation>* bl
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void SegmentIterator::_calculate_pred_in_remaining_conjunct_root(
|
||||
const vectorized::VExprSPtr& expr) {
|
||||
if (expr == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (expr->fn().name.function_name == "multi_match") {
|
||||
return;
|
||||
}
|
||||
|
||||
auto& children = expr->children();
|
||||
for (int i = 0; i < children.size(); ++i) {
|
||||
_calculate_pred_in_remaining_conjunct_root(children[i]);
|
||||
}
|
||||
|
||||
auto node_type = expr->node_type();
|
||||
if (node_type == TExprNodeType::SLOT_REF) {
|
||||
auto slot_expr = std::dynamic_pointer_cast<doris::vectorized::VSlotRef>(expr);
|
||||
if (_column_predicate_info->column_name.empty()) {
|
||||
_column_predicate_info->column_name = expr->expr_name();
|
||||
_column_predicate_info->column_id = slot_expr->column_id();
|
||||
} else {
|
||||
// If column name already exists, create a new ColumnPredicateInfo
|
||||
// if expr is columnA > columnB, then column name will exist, in this situation, we need to add it to _column_pred_in_remaining_vconjunct
|
||||
auto new_column_pred_info = std::make_shared<ColumnPredicateInfo>();
|
||||
new_column_pred_info->column_name = expr->expr_name();
|
||||
new_column_pred_info->column_id = slot_expr->column_id();
|
||||
_column_pred_in_remaining_vconjunct[new_column_pred_info->column_name].push_back(
|
||||
*new_column_pred_info);
|
||||
}
|
||||
} else if (_is_literal_node(node_type)) {
|
||||
auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get());
|
||||
_column_predicate_info->query_values.insert(v_literal_expr->value());
|
||||
} else if (node_type == TExprNodeType::NULL_LITERAL) {
|
||||
if (!_column_predicate_info->column_name.empty()) {
|
||||
auto v_literal_expr = static_cast<const doris::vectorized::VLiteral*>(expr.get());
|
||||
_column_predicate_info->query_values.insert(v_literal_expr->value());
|
||||
}
|
||||
} else {
|
||||
if (node_type == TExprNodeType::MATCH_PRED) {
|
||||
_column_predicate_info->query_op = "match";
|
||||
} else if (node_type == TExprNodeType::IN_PRED) {
|
||||
if (expr->op() == TExprOpcode::type::FILTER_IN) {
|
||||
_column_predicate_info->query_op = "in";
|
||||
} else {
|
||||
_column_predicate_info->query_op = "not_in";
|
||||
}
|
||||
} else if (node_type != TExprNodeType::COMPOUND_PRED) {
|
||||
_column_predicate_info->query_op = expr->fn().name.function_name;
|
||||
}
|
||||
|
||||
if (!_column_predicate_info->is_empty()) {
|
||||
_column_pred_in_remaining_vconjunct[_column_predicate_info->column_name].push_back(
|
||||
*_column_predicate_info);
|
||||
_column_predicate_info.reset(new ColumnPredicateInfo());
|
||||
}
|
||||
Status SegmentIterator::_construct_compound_expr_context() {
|
||||
auto inverted_index_context = std::make_shared<vectorized::InvertedIndexContext>(
|
||||
_schema->column_ids(), _inverted_index_iterators, _storage_name_and_type,
|
||||
_common_expr_inverted_index_status);
|
||||
for (const auto& expr_ctx : _opts.common_expr_ctxs_push_down) {
|
||||
vectorized::VExprContextSPtr context;
|
||||
RETURN_IF_ERROR(expr_ctx->clone(_opts.runtime_state, context));
|
||||
context->set_inverted_index_context(inverted_index_context);
|
||||
_common_expr_ctxs_push_down.emplace_back(context);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void SegmentIterator::_calculate_func_in_remaining_conjunct_root() {
|
||||
auto hash = [](const vectorized::VExprSPtr& expr) -> std::size_t {
|
||||
return std::hash<std::string>()(expr->expr_name());
|
||||
};
|
||||
auto equal = [](const vectorized::VExprSPtr& lhs, const vectorized::VExprSPtr& rhs) -> bool {
|
||||
return lhs->equals(*rhs);
|
||||
};
|
||||
|
||||
uint32_t next_id = 0;
|
||||
std::unordered_map<vectorized::VExprSPtr, uint32_t, decltype(hash), decltype(equal)> unique_map(
|
||||
0, hash, equal);
|
||||
|
||||
auto gen_func_unique_id = [&unique_map, &next_id](const vectorized::VExprSPtr& expr) {
|
||||
auto it = unique_map.find(expr);
|
||||
if (it != unique_map.end()) {
|
||||
return it->second;
|
||||
} else {
|
||||
unique_map[expr] = ++next_id;
|
||||
return next_id;
|
||||
}
|
||||
};
|
||||
|
||||
void SegmentIterator::_calculate_expr_in_remaining_conjunct_root() {
|
||||
for (const auto& root_expr_ctx : _common_expr_ctxs_push_down) {
|
||||
const auto& root_expr = root_expr_ctx->root();
|
||||
if (root_expr == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::stack<std::pair<vectorized::VExprSPtr, bool>> stack;
|
||||
stack.emplace(root_expr, false);
|
||||
std::stack<vectorized::VExprSPtr> stack;
|
||||
stack.emplace(root_expr);
|
||||
|
||||
while (!stack.empty()) {
|
||||
const auto& [expr, has_compound_pred] = stack.top();
|
||||
const auto& expr = stack.top();
|
||||
stack.pop();
|
||||
|
||||
bool current_has_compound_pred =
|
||||
has_compound_pred || (expr->node_type() == TExprNodeType::COMPOUND_PRED);
|
||||
|
||||
if (expr->fn().name.function_name == "multi_match") {
|
||||
expr->set_index_unique_id(gen_func_unique_id(expr));
|
||||
if (current_has_compound_pred) {
|
||||
compound_func_exprs.emplace_back(expr);
|
||||
} else {
|
||||
no_compound_func_exprs.emplace_back(expr);
|
||||
}
|
||||
|
||||
for (int32_t i = expr->get_num_children() - 1; i >= 0; i--) {
|
||||
auto child_expr = expr->get_child(i);
|
||||
if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
|
||||
std::string result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX +
|
||||
std::to_string(expr->index_unique_id());
|
||||
_func_name_to_result_sign[child_expr->expr_name()].push_back(result_sign);
|
||||
}
|
||||
for (const auto& child : expr->children()) {
|
||||
if (child->is_slot_ref()) {
|
||||
auto* column_slot_ref = assert_cast<vectorized::VSlotRef*>(child.get());
|
||||
_common_expr_inverted_index_status[_schema->column_id(
|
||||
column_slot_ref->column_id())][expr.get()] = false;
|
||||
}
|
||||
}
|
||||
|
||||
const auto& children = expr->children();
|
||||
for (int32_t i = children.size() - 1; i >= 0; --i) {
|
||||
if (!children[i]->children().empty()) {
|
||||
stack.emplace(children[i], current_has_compound_pred);
|
||||
stack.emplace(children[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2935,7 +2501,7 @@ bool SegmentIterator::_no_need_read_key_data(ColumnId cid, vectorized::MutableCo
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!_check_all_predicates_passed_inverted_index_for_column(cid)) {
|
||||
if (!_check_all_conditions_passed_inverted_index_for_column(cid)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -2970,7 +2536,7 @@ bool SegmentIterator::_can_opt_topn_reads() {
|
||||
_opts.tablet_schema->column(cid).is_key()) {
|
||||
return true;
|
||||
}
|
||||
if (_check_all_predicates_passed_inverted_index_for_column(cid, true)) {
|
||||
if (_check_all_conditions_passed_inverted_index_for_column(cid, true)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
@ -2985,22 +2551,5 @@ bool SegmentIterator::_can_opt_topn_reads() {
|
||||
return all_true;
|
||||
}
|
||||
|
||||
Status SegmentIterator::execute_func_expr(const vectorized::VExprSPtr& expr,
|
||||
std::shared_ptr<roaring::Roaring>& result) {
|
||||
const auto& expr0 = expr->get_child(0);
|
||||
if (!expr0 || expr0->node_type() != TExprNodeType::SLOT_REF) {
|
||||
return Status::RuntimeError("cannot perform index filtering");
|
||||
}
|
||||
|
||||
FuncExprParams params;
|
||||
auto slot_expr = std::static_pointer_cast<vectorized::VSlotRef>(expr0);
|
||||
params._column_id = _schema->column_id(slot_expr->column_id());
|
||||
params._unique_id = _schema->unique_id(slot_expr->column_id());
|
||||
params._column_name = _opts.tablet_schema->column(params._column_id).name();
|
||||
params._segment_iterator = this;
|
||||
|
||||
return expr->eval_inverted_index(params, result);
|
||||
}
|
||||
|
||||
} // namespace segment_v2
|
||||
} // namespace doris
|
||||
|
||||
@ -201,20 +201,10 @@ private:
|
||||
[[nodiscard]] Status _apply_inverted_index_on_column_predicate(
|
||||
ColumnPredicate* pred, std::vector<ColumnPredicate*>& remaining_predicates,
|
||||
bool* continue_apply);
|
||||
[[nodiscard]] Status _apply_inverted_index_on_block_column_predicate(
|
||||
ColumnId column_id, MutilColumnBlockPredicate* pred,
|
||||
std::set<const ColumnPredicate*>& no_need_to_pass_column_predicate_set,
|
||||
bool* continue_apply);
|
||||
[[nodiscard]] Status _apply_index_except_leafnode_of_andnode();
|
||||
[[nodiscard]] Status _apply_inverted_index_except_leafnode_of_andnode(
|
||||
ColumnPredicate* pred, roaring::Roaring* output_result);
|
||||
[[nodiscard]] Status _apply_index_expr();
|
||||
bool _column_has_fulltext_index(int32_t cid);
|
||||
bool _downgrade_without_index(Status res, bool need_remaining = false);
|
||||
inline bool _inverted_index_not_support_pred_type(const PredicateType& type);
|
||||
bool _can_filter_by_preds_except_leafnode_of_andnode();
|
||||
[[nodiscard]] Status _execute_predicates_except_leafnode_of_andnode(
|
||||
const vectorized::VExprSPtr& expr);
|
||||
[[nodiscard]] Status _execute_compound_fn(const std::string& function_name);
|
||||
bool _is_literal_node(const TExprNodeType::type& node_type);
|
||||
|
||||
Status _vec_init_lazy_materialization();
|
||||
@ -298,6 +288,7 @@ private:
|
||||
bool _can_evaluated_by_vectorized(ColumnPredicate* predicate);
|
||||
|
||||
[[nodiscard]] Status _extract_common_expr_columns(const vectorized::VExprSPtr& expr);
|
||||
// same with _extract_common_expr_columns, but only extract columns that can be used for index
|
||||
[[nodiscard]] Status _execute_common_expr(uint16_t* sel_rowid_idx, uint16_t& selected_size,
|
||||
vectorized::Block* block);
|
||||
uint16_t _evaluate_common_expr_filter(uint16_t* sel_rowid_idx, uint16_t selected_size,
|
||||
@ -310,24 +301,14 @@ private:
|
||||
|
||||
bool _check_apply_by_inverted_index(ColumnPredicate* pred, bool pred_in_compound = false);
|
||||
|
||||
std::string _gen_predicate_result_sign(ColumnPredicate* predicate);
|
||||
std::string _gen_predicate_result_sign(ColumnPredicateInfo* predicate_info);
|
||||
|
||||
void _build_index_result_column(const uint16_t* sel_rowid_idx, uint16_t select_size,
|
||||
vectorized::Block* block, const std::string& pred_result_sign,
|
||||
const roaring::Roaring& index_result);
|
||||
void _output_index_result_column(uint16_t* sel_rowid_idx, uint16_t select_size,
|
||||
vectorized::Block* block);
|
||||
void _output_index_result_column_for_expr(uint16_t* sel_rowid_idx, uint16_t select_size,
|
||||
vectorized::Block* block);
|
||||
|
||||
bool _need_read_data(ColumnId cid);
|
||||
bool _prune_column(ColumnId cid, vectorized::MutableColumnPtr& column, bool fill_defaults,
|
||||
size_t num_of_defaults);
|
||||
|
||||
// return true means one column's predicates all pushed down
|
||||
bool _check_column_pred_all_push_down(const std::string& column_name, bool in_compound = false,
|
||||
bool is_match = false);
|
||||
void _calculate_pred_in_remaining_conjunct_root(const vectorized::VExprSPtr& expr);
|
||||
void _calculate_func_in_remaining_conjunct_root();
|
||||
Status _construct_compound_expr_context();
|
||||
|
||||
// todo(wb) remove this method after RowCursor is removed
|
||||
void _convert_rowcursor_to_short_key(const RowCursor& key, size_t num_keys) {
|
||||
@ -410,9 +391,10 @@ private:
|
||||
Status execute_func_expr(const vectorized::VExprSPtr& expr,
|
||||
std::shared_ptr<roaring::Roaring>& result);
|
||||
void _initialize_predicate_results();
|
||||
bool _check_all_predicates_passed_inverted_index_for_column(ColumnId cid,
|
||||
bool _check_all_conditions_passed_inverted_index_for_column(ColumnId cid,
|
||||
bool default_return = false);
|
||||
|
||||
void _calculate_expr_in_remaining_conjunct_root();
|
||||
class BitmapRangeIterator;
|
||||
class BackwardBitmapRangeIterator;
|
||||
|
||||
@ -427,8 +409,6 @@ private:
|
||||
std::vector<std::unique_ptr<InvertedIndexIterator>> _inverted_index_iterators;
|
||||
// after init(), `_row_bitmap` contains all rowid to scan
|
||||
roaring::Roaring _row_bitmap;
|
||||
// "column_name+operator+value-> <in_compound_query, rowid_result>
|
||||
std::unordered_map<std::string, std::pair<bool, roaring::Roaring>> _rowid_result_for_index;
|
||||
// an iterator for `_row_bitmap` that can be used to extract row range to scan
|
||||
std::unique_ptr<BitmapRangeIterator> _range_iter;
|
||||
// the next rowid to read
|
||||
@ -477,19 +457,9 @@ private:
|
||||
StorageReadOptions _opts;
|
||||
// make a copy of `_opts.column_predicates` in order to make local changes
|
||||
std::vector<ColumnPredicate*> _col_predicates;
|
||||
std::vector<ColumnPredicate*> _col_preds_except_leafnode_of_andnode;
|
||||
|
||||
std::vector<vectorized::VExprSPtr> no_compound_func_exprs;
|
||||
std::vector<vectorized::VExprSPtr> compound_func_exprs;
|
||||
|
||||
vectorized::VExprContextSPtrs _common_expr_ctxs_push_down;
|
||||
bool _enable_common_expr_pushdown = false;
|
||||
std::vector<vectorized::VExprSPtr> _remaining_conjunct_roots;
|
||||
std::vector<roaring::Roaring> _pred_except_leafnode_of_andnode_evaluate_result;
|
||||
std::unique_ptr<ColumnPredicateInfo> _column_predicate_info;
|
||||
std::unordered_map<std::string, std::vector<ColumnPredicateInfo>>
|
||||
_column_pred_in_remaining_vconjunct;
|
||||
std::unordered_map<std::string, std::vector<std::string>> _func_name_to_result_sign;
|
||||
std::set<ColumnId> _not_apply_index_pred;
|
||||
|
||||
// row schema of the key to seek
|
||||
@ -527,8 +497,11 @@ private:
|
||||
|
||||
std::vector<uint8_t> _ret_flags;
|
||||
|
||||
std::unordered_map<int, std::unordered_map<std::string, bool>>
|
||||
std::unordered_map<ColumnId, std::unordered_map<ColumnPredicate*, bool>>
|
||||
_column_predicate_inverted_index_status;
|
||||
|
||||
std::unordered_map<ColumnId, std::unordered_map<const vectorized::VExpr*, bool>>
|
||||
_common_expr_inverted_index_status;
|
||||
};
|
||||
|
||||
} // namespace segment_v2
|
||||
|
||||
@ -348,9 +348,6 @@ Status ScanLocalState<Derived>::_normalize_predicate(
|
||||
RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate(
|
||||
cur_expr, context, slot, value_range, &pdt),
|
||||
status);
|
||||
RETURN_IF_PUSH_DOWN(_normalize_match_predicate(cur_expr, context, slot,
|
||||
value_range, &pdt),
|
||||
status);
|
||||
if (_is_key_column(slot->col_name())) {
|
||||
RETURN_IF_PUSH_DOWN(
|
||||
_normalize_bitmap_filter(cur_expr, context, slot, &pdt),
|
||||
@ -368,23 +365,6 @@ Status ScanLocalState<Derived>::_normalize_predicate(
|
||||
*range);
|
||||
RETURN_IF_ERROR(status);
|
||||
}
|
||||
|
||||
if (pdt == vectorized::VScanNode::PushDownType::UNACCEPTABLE &&
|
||||
TExprNodeType::COMPOUND_PRED == cur_expr->node_type()) {
|
||||
_normalize_compound_predicate(cur_expr, context, &pdt, _is_runtime_filter_predicate,
|
||||
in_predicate_checker, eq_predicate_checker);
|
||||
output_expr = conjunct_expr_root; // remaining in conjunct tree
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (pdt == vectorized::VScanNode::PushDownType::ACCEPTABLE &&
|
||||
TExprNodeType::MATCH_PRED == cur_expr->node_type()) {
|
||||
// remaining it in the expr tree, in order to filter by function if the pushdown
|
||||
// match_predicate failed to apply inverted index in the storage layer
|
||||
output_expr = conjunct_expr_root; // remaining in conjunct tree
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
if (pdt == vectorized::VScanNode::PushDownType::ACCEPTABLE && slotref != nullptr &&
|
||||
slotref->type().is_variant_type()) {
|
||||
// remaining it in the expr tree, in order to filter by function if the pushdown
|
||||
|
||||
@ -27,6 +27,7 @@
|
||||
#include "vec/data_types/data_type_number.h"
|
||||
#include "vec/exprs/vectorized_fn_call.h"
|
||||
#include "vec/exprs/vexpr.h"
|
||||
#include "vec/exprs/vexpr_context.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
@ -53,7 +54,107 @@ public:
|
||||
|
||||
const std::string& expr_name() const override { return _expr_name; }
|
||||
|
||||
Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override {
|
||||
segment_v2::InvertedIndexResultBitmap res;
|
||||
bool all_pass = true;
|
||||
|
||||
switch (_op) {
|
||||
case TExprOpcode::COMPOUND_OR: {
|
||||
for (const auto& child : _children) {
|
||||
if (Status st = child->evaluate_inverted_index(context, segment_num_rows);
|
||||
!st.ok()) {
|
||||
LOG(ERROR) << "expr:" << child->expr_name()
|
||||
<< " evaluate_inverted_index error:" << st.to_string();
|
||||
all_pass = false;
|
||||
continue;
|
||||
}
|
||||
if (context->get_inverted_index_context()->has_inverted_index_result_for_expr(
|
||||
child.get())) {
|
||||
const auto* index_result =
|
||||
context->get_inverted_index_context()
|
||||
->get_inverted_index_result_for_expr(child.get());
|
||||
if (res.is_empty()) {
|
||||
res = *index_result;
|
||||
} else {
|
||||
res |= *index_result;
|
||||
}
|
||||
if (res.get_data_bitmap()->cardinality() == segment_num_rows) {
|
||||
break; // Early exit if result is full
|
||||
}
|
||||
} else {
|
||||
all_pass = false;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TExprOpcode::COMPOUND_AND: {
|
||||
for (const auto& child : _children) {
|
||||
if (Status st = child->evaluate_inverted_index(context, segment_num_rows);
|
||||
!st.ok()) {
|
||||
LOG(ERROR) << "expr:" << child->expr_name()
|
||||
<< " evaluate_inverted_index error:" << st.to_string();
|
||||
all_pass = false;
|
||||
continue;
|
||||
}
|
||||
if (context->get_inverted_index_context()->has_inverted_index_result_for_expr(
|
||||
child.get())) {
|
||||
const auto* index_result =
|
||||
context->get_inverted_index_context()
|
||||
->get_inverted_index_result_for_expr(child.get());
|
||||
if (res.is_empty()) {
|
||||
res = *index_result;
|
||||
} else {
|
||||
res &= *index_result;
|
||||
}
|
||||
|
||||
if (res.get_data_bitmap()->isEmpty()) {
|
||||
break; // Early exit if result is empty
|
||||
}
|
||||
} else {
|
||||
all_pass = false;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case TExprOpcode::COMPOUND_NOT: {
|
||||
const auto& child = _children[0];
|
||||
Status st = child->evaluate_inverted_index(context, segment_num_rows);
|
||||
if (!st.ok()) {
|
||||
LOG(ERROR) << "expr:" << child->expr_name()
|
||||
<< " evaluate_inverted_index error:" << st.to_string();
|
||||
return st;
|
||||
}
|
||||
|
||||
if (context->get_inverted_index_context()->has_inverted_index_result_for_expr(
|
||||
child.get())) {
|
||||
const auto* index_result =
|
||||
context->get_inverted_index_context()->get_inverted_index_result_for_expr(
|
||||
child.get());
|
||||
roaring::Roaring full_result;
|
||||
full_result.addRange(0, segment_num_rows);
|
||||
res = index_result->op_not(&full_result);
|
||||
} else {
|
||||
all_pass = false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return Status::NotSupported(
|
||||
"Compound operator must be AND, OR, or NOT to execute with inverted index.");
|
||||
}
|
||||
|
||||
if (all_pass && !res.is_empty()) {
|
||||
// set fast_execute when expr evaluated by inverted index correctly
|
||||
_can_fast_execute = true;
|
||||
context->get_inverted_index_context()->set_inverted_index_result_for_expr(this, res);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status execute(VExprContext* context, Block* block, int* result_column_id) override {
|
||||
if (_can_fast_execute && fast_execute(context, block, result_column_id)) {
|
||||
return Status::OK();
|
||||
}
|
||||
if (children().size() == 1 || !_all_child_is_compound_and_not_const()) {
|
||||
return VectorizedFnCall::execute(context, block, result_column_id);
|
||||
}
|
||||
@ -249,8 +350,8 @@ private:
|
||||
}
|
||||
|
||||
std::pair<uint8*, uint8*> _get_raw_data_and_null_map(ColumnPtr column,
|
||||
bool nullable_column) const {
|
||||
if (nullable_column) {
|
||||
bool has_nullable_column) const {
|
||||
if (has_nullable_column) {
|
||||
auto* nullable_column = assert_cast<ColumnNullable*>(column->assume_mutable().get());
|
||||
auto* data_column =
|
||||
assert_cast<ColumnUInt8*>(nullable_column->get_nested_column_ptr().get())
|
||||
|
||||
@ -22,7 +22,6 @@
|
||||
#include <gen_cpp/Types_types.h>
|
||||
|
||||
#include <ostream>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
#include "common/config.h"
|
||||
@ -30,11 +29,8 @@
|
||||
#include "common/status.h"
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "udf/udf.h"
|
||||
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
|
||||
#include "vec/columns/column.h"
|
||||
#include "vec/core/block.h"
|
||||
#include "vec/core/column_with_type_and_name.h"
|
||||
#include "vec/core/columns_with_type_and_name.h"
|
||||
#include "vec/data_types/data_type.h"
|
||||
#include "vec/data_types/data_type_agg_state.h"
|
||||
#include "vec/exprs/vexpr_context.h"
|
||||
@ -111,7 +107,6 @@ Status VectorizedFnCall::prepare(RuntimeState* state, const RowDescriptor& desc,
|
||||
}
|
||||
VExpr::register_function_context(state, context);
|
||||
_function_name = _fn.name.function_name;
|
||||
_can_fast_execute = can_fast_execute();
|
||||
_prepare_finished = true;
|
||||
return Status::OK();
|
||||
}
|
||||
@ -135,13 +130,38 @@ void VectorizedFnCall::close(VExprContext* context, FunctionContext::FunctionSta
|
||||
VExpr::close(context, scope);
|
||||
}
|
||||
|
||||
Status VectorizedFnCall::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
|
||||
DCHECK_GE(get_num_children(), 1);
|
||||
return _evaluate_inverted_index(context, _function, segment_num_rows);
|
||||
}
|
||||
|
||||
Status VectorizedFnCall::_do_execute(doris::vectorized::VExprContext* context,
|
||||
doris::vectorized::Block* block, int* result_column_id,
|
||||
std::vector<size_t>& args) {
|
||||
if (is_const_and_have_executed()) { // const have execute in open function
|
||||
if (is_const_and_have_executed()) { // const have executed in open function
|
||||
return get_result_from_const(block, _expr_name, result_column_id);
|
||||
}
|
||||
if (_can_fast_execute && fast_execute(context, block, result_column_id)) {
|
||||
return Status::OK();
|
||||
}
|
||||
DBUG_EXECUTE_IF("VectorizedFnCall.must_in_slow_path", {
|
||||
if (get_child(0)->is_slot_ref()) {
|
||||
auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
|
||||
"VectorizedFnCall.must_in_slow_path", "column_name", "");
|
||||
|
||||
std::vector<std::string> column_names;
|
||||
boost::split(column_names, debug_col_name, boost::algorithm::is_any_of(","));
|
||||
|
||||
auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
|
||||
std::string column_name = column_slot_ref->expr_name();
|
||||
auto it = std::find(column_names.begin(), column_names.end(), column_name);
|
||||
if (it == column_names.end()) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>(
|
||||
"column {} should in slow path while VectorizedFnCall::execute.",
|
||||
column_name);
|
||||
}
|
||||
}
|
||||
})
|
||||
DCHECK(_open_finished || _getting_const_col) << debug_string();
|
||||
// TODO: not execute const expr again, but use the const column in function context
|
||||
args.resize(_children.size());
|
||||
@ -156,14 +176,6 @@ Status VectorizedFnCall::_do_execute(doris::vectorized::VExprContext* context,
|
||||
size_t num_columns_without_result = block->columns();
|
||||
// prepare a column to save result
|
||||
block->insert({nullptr, _data_type, _expr_name});
|
||||
if (_can_fast_execute) {
|
||||
auto can_fast_execute = fast_execute(*block, args, num_columns_without_result,
|
||||
block->rows(), _function->get_name());
|
||||
if (can_fast_execute) {
|
||||
*result_column_id = num_columns_without_result;
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, args,
|
||||
num_columns_without_result, block->rows(), false));
|
||||
*result_column_id = num_columns_without_result;
|
||||
@ -218,22 +230,6 @@ bool VectorizedFnCall::can_push_down_to_index() const {
|
||||
return _function->can_push_down_to_index();
|
||||
}
|
||||
|
||||
bool VectorizedFnCall::can_fast_execute() const {
|
||||
auto function_name = _function->get_name();
|
||||
if (function_name == "eq" || function_name == "ne" || function_name == "lt" ||
|
||||
function_name == "gt" || function_name == "le" || function_name == "ge") {
|
||||
if (_children.size() == 2 && _children[0]->is_slot_ref() && _children[1]->is_literal()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return _function->can_push_down_to_index();
|
||||
}
|
||||
|
||||
Status VectorizedFnCall::eval_inverted_index(segment_v2::FuncExprParams& params,
|
||||
std::shared_ptr<roaring::Roaring>& result) {
|
||||
return _function->eval_inverted_index(this, params, result);
|
||||
}
|
||||
|
||||
bool VectorizedFnCall::equals(const VExpr& other) {
|
||||
const auto* other_ptr = dynamic_cast<const VectorizedFnCall*>(&other);
|
||||
if (!other_ptr) {
|
||||
|
||||
@ -27,6 +27,8 @@
|
||||
#include "udf/udf.h"
|
||||
#include "vec/core/column_numbers.h"
|
||||
#include "vec/exprs/vexpr.h"
|
||||
#include "vec/exprs/vliteral.h"
|
||||
#include "vec/exprs/vslot_ref.h"
|
||||
#include "vec/functions/function.h"
|
||||
|
||||
namespace doris {
|
||||
@ -50,6 +52,7 @@ public:
|
||||
Status execute_runtime_fitler(doris::vectorized::VExprContext* context,
|
||||
doris::vectorized::Block* block, int* result_column_id,
|
||||
std::vector<size_t>& args) override;
|
||||
Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override;
|
||||
Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context) override;
|
||||
Status open(RuntimeState* state, VExprContext* context,
|
||||
FunctionContext::FunctionStateScope scope) override;
|
||||
@ -67,9 +70,6 @@ public:
|
||||
static std::string debug_string(const std::vector<VectorizedFnCall*>& exprs);
|
||||
|
||||
bool can_push_down_to_index() const override;
|
||||
bool can_fast_execute() const override;
|
||||
Status eval_inverted_index(segment_v2::FuncExprParams& params,
|
||||
std::shared_ptr<roaring::Roaring>& result) override;
|
||||
bool equals(const VExpr& other) override;
|
||||
|
||||
protected:
|
||||
|
||||
@ -33,6 +33,7 @@
|
||||
#include "common/status.h"
|
||||
#include "vec/columns/column_vector.h"
|
||||
#include "vec/columns/columns_number.h"
|
||||
#include "vec/data_types/data_type_array.h"
|
||||
#include "vec/data_types/data_type_factory.hpp"
|
||||
#include "vec/data_types/data_type_nullable.h"
|
||||
#include "vec/data_types/data_type_number.h"
|
||||
@ -43,7 +44,6 @@
|
||||
#include "vec/exprs/vcompound_pred.h"
|
||||
#include "vec/exprs/vectorized_fn_call.h"
|
||||
#include "vec/exprs/vexpr_context.h"
|
||||
#include "vec/exprs/vexpr_fwd.h"
|
||||
#include "vec/exprs/vin_predicate.h"
|
||||
#include "vec/exprs/vinfo_func.h"
|
||||
#include "vec/exprs/vlambda_function_call_expr.h"
|
||||
@ -602,80 +602,134 @@ Status VExpr::get_result_from_const(vectorized::Block* block, const std::string&
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool VExpr::fast_execute(Block& block, const ColumnNumbers& arguments, size_t result,
|
||||
size_t input_rows_count, const std::string& function_name) {
|
||||
if (!_enable_inverted_index_query) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string result_column_name = gen_predicate_result_sign(block, arguments, function_name);
|
||||
if (!block.has(result_column_name)) {
|
||||
DBUG_EXECUTE_IF("segment_iterator.fast_execute", {
|
||||
auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
|
||||
"segment_iterator._read_columns_by_index", "column_name", "");
|
||||
|
||||
std::vector<std::string> column_names;
|
||||
boost::split(column_names, debug_col_name, boost::algorithm::is_any_of(","));
|
||||
|
||||
std::string column_name = block.get_by_position(arguments[0]).name;
|
||||
auto it = std::find(column_names.begin(), column_names.end(), column_name);
|
||||
if (it == column_names.end()) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>("fast_execute failed: {}",
|
||||
result_column_name);
|
||||
}
|
||||
})
|
||||
return false;
|
||||
}
|
||||
|
||||
auto result_column =
|
||||
block.get_by_name(result_column_name).column->convert_to_full_column_if_const();
|
||||
auto& result_info = block.get_by_position(result);
|
||||
if (result_info.type->is_nullable()) {
|
||||
block.replace_by_position(result,
|
||||
ColumnNullable::create(std::move(result_column),
|
||||
ColumnUInt8::create(input_rows_count, 0)));
|
||||
} else {
|
||||
block.replace_by_position(result, std::move(result_column));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string VExpr::gen_predicate_result_sign(Block& block, const ColumnNumbers& arguments,
|
||||
const std::string& function_name) const {
|
||||
std::string pred_result_sign;
|
||||
if (this->fn().name.function_name == "multi_match") {
|
||||
pred_result_sign =
|
||||
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + std::to_string(this->index_unique_id());
|
||||
} else {
|
||||
std::string column_name = block.get_by_position(arguments[0]).name;
|
||||
pred_result_sign +=
|
||||
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" + function_name + "_";
|
||||
if (function_name == "in" || function_name == "not_in") {
|
||||
if (arguments.size() - 1 > _in_list_value_count_threshold) {
|
||||
return pred_result_sign;
|
||||
}
|
||||
// Generating 'result_sign' from 'inlist' requires sorting the values.
|
||||
std::set<std::string> values;
|
||||
for (size_t i = 1; i < arguments.size(); i++) {
|
||||
const auto& entry = block.get_by_position(arguments[i]);
|
||||
if (!is_column_const(*entry.column)) {
|
||||
return pred_result_sign;
|
||||
Status VExpr::_evaluate_inverted_index(VExprContext* context, const FunctionBasePtr& function,
|
||||
uint32_t segment_num_rows) {
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators;
|
||||
std::vector<vectorized::IndexFieldNameAndTypePair> data_type_with_names;
|
||||
std::vector<int> column_ids;
|
||||
vectorized::ColumnsWithTypeAndName arguments;
|
||||
VExprSPtrs children_exprs;
|
||||
for (auto child : children()) {
|
||||
// if child is cast expr, we need to ensure target data type is the same with storage data type.
|
||||
// or they are all string type
|
||||
// and if data type is array, we need to get the nested data type to ensure that.
|
||||
if (child->node_type() == TExprNodeType::CAST_EXPR) {
|
||||
auto* cast_expr = assert_cast<VCastExpr*>(child.get());
|
||||
DCHECK_EQ(cast_expr->children().size(), 1);
|
||||
if (cast_expr->get_child(0)->is_slot_ref()) {
|
||||
auto* column_slot_ref = assert_cast<VSlotRef*>(cast_expr->get_child(0).get());
|
||||
auto column_id = column_slot_ref->column_id();
|
||||
const auto* storage_name_type =
|
||||
context->get_inverted_index_context()
|
||||
->get_storage_name_and_type_by_column_id(column_id);
|
||||
auto storage_type = remove_nullable(storage_name_type->second);
|
||||
auto target_type = cast_expr->get_target_type();
|
||||
auto origin_primitive_type = storage_type->get_type_as_type_descriptor().type;
|
||||
auto target_primitive_type = target_type->get_type_as_type_descriptor().type;
|
||||
if (is_complex_type(storage_type)) {
|
||||
if (is_array(storage_type) && is_array(target_type)) {
|
||||
auto nested_storage_type =
|
||||
(assert_cast<const DataTypeArray*>(storage_type.get()))
|
||||
->get_nested_type();
|
||||
origin_primitive_type =
|
||||
nested_storage_type->get_type_as_type_descriptor().type;
|
||||
auto nested_target_type =
|
||||
(assert_cast<const DataTypeArray*>(target_type.get()))
|
||||
->get_nested_type();
|
||||
target_primitive_type =
|
||||
nested_target_type->get_type_as_type_descriptor().type;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (origin_primitive_type != TYPE_VARIANT &&
|
||||
(origin_primitive_type == target_primitive_type ||
|
||||
(is_string_type(target_primitive_type) &&
|
||||
is_string_type(origin_primitive_type)))) {
|
||||
children_exprs.emplace_back(expr_without_cast(child));
|
||||
}
|
||||
values.insert(entry.type->to_string(*entry.column, 0));
|
||||
}
|
||||
pred_result_sign += boost::join(values, ",");
|
||||
} else if (function_name == "collection_in" || function_name == "collection_not_in") {
|
||||
return pred_result_sign;
|
||||
} else {
|
||||
const auto& entry = block.get_by_position(arguments[1]);
|
||||
if (!is_column_const(*entry.column)) {
|
||||
return pred_result_sign;
|
||||
}
|
||||
pred_result_sign += entry.type->to_string(*entry.column, 0);
|
||||
children_exprs.emplace_back(child);
|
||||
}
|
||||
}
|
||||
return pred_result_sign;
|
||||
|
||||
for (auto child : children_exprs) {
|
||||
if (child->is_slot_ref()) {
|
||||
auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
|
||||
auto column_id = column_slot_ref->column_id();
|
||||
auto* iter =
|
||||
context->get_inverted_index_context()->get_inverted_index_iterator_by_column_id(
|
||||
column_id);
|
||||
//column does not have inverted index
|
||||
if (iter == nullptr) {
|
||||
continue;
|
||||
}
|
||||
const auto* storage_name_type =
|
||||
context->get_inverted_index_context()->get_storage_name_and_type_by_column_id(
|
||||
column_id);
|
||||
if (storage_name_type == nullptr) {
|
||||
auto err_msg = fmt::format(
|
||||
"storage_name_type cannot be found for column {} while in {} "
|
||||
"evaluate_inverted_index",
|
||||
column_id, expr_name());
|
||||
LOG(ERROR) << err_msg;
|
||||
return Status::InternalError(err_msg);
|
||||
}
|
||||
iterators.emplace_back(iter);
|
||||
data_type_with_names.emplace_back(*storage_name_type);
|
||||
column_ids.emplace_back(column_id);
|
||||
} else if (child->is_literal()) {
|
||||
auto* column_literal = assert_cast<VLiteral*>(child.get());
|
||||
arguments.emplace_back(column_literal->get_column_ptr(),
|
||||
column_literal->get_data_type(), column_literal->expr_name());
|
||||
}
|
||||
}
|
||||
auto result_bitmap = segment_v2::InvertedIndexResultBitmap();
|
||||
if (iterators.empty()) {
|
||||
return Status::OK();
|
||||
}
|
||||
// If arguments are empty, it means the left value in the expression is not a literal.
|
||||
if (arguments.empty()) {
|
||||
return Status::OK();
|
||||
}
|
||||
auto res = function->evaluate_inverted_index(arguments, data_type_with_names, iterators,
|
||||
segment_num_rows, result_bitmap);
|
||||
if (!res.ok()) {
|
||||
return res;
|
||||
}
|
||||
if (!result_bitmap.is_empty()) {
|
||||
context->get_inverted_index_context()->set_inverted_index_result_for_expr(this,
|
||||
result_bitmap);
|
||||
for (auto column_id : column_ids) {
|
||||
context->get_inverted_index_context()->set_true_for_inverted_index_status(this,
|
||||
column_id);
|
||||
}
|
||||
// set fast_execute when expr evaluated by inverted index correctly
|
||||
_can_fast_execute = true;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
bool VExpr::fast_execute(doris::vectorized::VExprContext* context, doris::vectorized::Block* block,
|
||||
int* result_column_id) {
|
||||
if (context->get_inverted_index_context() &&
|
||||
context->get_inverted_index_context()->get_inverted_index_result_column().contains(this)) {
|
||||
size_t num_columns_without_result = block->columns();
|
||||
// prepare a column to save result
|
||||
auto result_column =
|
||||
context->get_inverted_index_context()->get_inverted_index_result_column()[this];
|
||||
if (_data_type->is_nullable()) {
|
||||
block->insert(
|
||||
{ColumnNullable::create(result_column, ColumnUInt8::create(block->rows(), 0)),
|
||||
_data_type, expr_name()});
|
||||
} else {
|
||||
block->insert({result_column, _data_type, expr_name()});
|
||||
}
|
||||
*result_column_id = num_columns_without_result;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool VExpr::equals(const VExpr& other) {
|
||||
|
||||
@ -115,6 +115,14 @@ public:
|
||||
|
||||
virtual Status execute(VExprContext* context, Block* block, int* result_column_id) = 0;
|
||||
|
||||
// execute current expr with inverted index to filter block. Given a roaring bitmap of match rows
|
||||
virtual Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
|
||||
return Status::NotSupported("Not supported execute_with_inverted_index");
|
||||
}
|
||||
|
||||
Status _evaluate_inverted_index(VExprContext* context, const FunctionBasePtr& function,
|
||||
uint32_t segment_num_rows);
|
||||
|
||||
// Only the 4th parameter is used in the runtime filter. In and MinMax need overwrite the
|
||||
// interface
|
||||
virtual Status execute_runtime_fitler(VExprContext* context, Block* block,
|
||||
@ -222,18 +230,10 @@ public:
|
||||
}
|
||||
|
||||
// fast_execute can direct copy expr filter result which build by apply index in segment_iterator
|
||||
bool fast_execute(Block& block, const ColumnNumbers& arguments, size_t result,
|
||||
size_t input_rows_count, const std::string& function_name);
|
||||
|
||||
std::string gen_predicate_result_sign(Block& block, const ColumnNumbers& arguments,
|
||||
const std::string& function_name) const;
|
||||
bool fast_execute(doris::vectorized::VExprContext* context, doris::vectorized::Block* block,
|
||||
int* result_column_id);
|
||||
|
||||
virtual bool can_push_down_to_index() const { return false; }
|
||||
virtual bool can_fast_execute() const { return false; }
|
||||
virtual Status eval_inverted_index(segment_v2::FuncExprParams& params,
|
||||
std::shared_ptr<roaring::Roaring>& result) {
|
||||
return Status::NotSupported("Not supported execute_with_inverted_index");
|
||||
}
|
||||
virtual bool equals(const VExpr& other);
|
||||
void set_index_unique_id(uint32_t index_unique_id) { _index_unique_id = index_unique_id; }
|
||||
uint32_t index_unique_id() const { return _index_unique_id; }
|
||||
|
||||
@ -120,6 +120,16 @@ int VExprContext::register_function_context(RuntimeState* state, const TypeDescr
|
||||
return _fn_contexts.size() - 1;
|
||||
}
|
||||
|
||||
Status VExprContext::evaluate_inverted_index(uint32_t segment_num_rows) {
|
||||
Status st;
|
||||
RETURN_IF_CATCH_EXCEPTION({ st = _root->evaluate_inverted_index(this, segment_num_rows); });
|
||||
return st;
|
||||
}
|
||||
|
||||
bool VExprContext::all_expr_inverted_index_evaluated() {
|
||||
return _inverted_index_context->has_inverted_index_result_for_expr(_root.get());
|
||||
}
|
||||
|
||||
Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep) {
|
||||
if (vexpr_ctx == nullptr || block->rows() == 0) {
|
||||
return Status::OK();
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
|
||||
#include "common/factory_creator.h"
|
||||
#include "common/status.h"
|
||||
#include "olap/rowset/segment_v2/inverted_index_reader.h"
|
||||
#include "runtime/types.h"
|
||||
#include "udf/udf.h"
|
||||
#include "vec/core/block.h"
|
||||
@ -37,6 +38,114 @@ class RuntimeState;
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
class InvertedIndexContext {
|
||||
public:
|
||||
InvertedIndexContext(
|
||||
const std::vector<ColumnId>& col_ids,
|
||||
const std::vector<std::unique_ptr<segment_v2::InvertedIndexIterator>>&
|
||||
inverted_index_iterators,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& storage_name_and_type_vec,
|
||||
std::unordered_map<ColumnId, std::unordered_map<const vectorized::VExpr*, bool>>&
|
||||
common_expr_inverted_index_status)
|
||||
: _col_ids(col_ids),
|
||||
_inverted_index_iterators(inverted_index_iterators),
|
||||
_storage_name_and_type(storage_name_and_type_vec),
|
||||
_expr_inverted_index_status(common_expr_inverted_index_status) {}
|
||||
|
||||
segment_v2::InvertedIndexIterator* get_inverted_index_iterator_by_column_id(
|
||||
int column_index) const {
|
||||
if (column_index < 0 || column_index >= _col_ids.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
const auto& column_id = _col_ids[column_index];
|
||||
if (column_id >= _inverted_index_iterators.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
if (!_inverted_index_iterators[column_id]) {
|
||||
return nullptr;
|
||||
}
|
||||
return _inverted_index_iterators[column_id].get();
|
||||
}
|
||||
|
||||
const vectorized::IndexFieldNameAndTypePair* get_storage_name_and_type_by_column_id(
|
||||
int column_index) const {
|
||||
if (column_index < 0 || column_index >= _col_ids.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
const auto& column_id = _col_ids[column_index];
|
||||
if (column_id >= _storage_name_and_type.size()) {
|
||||
return nullptr;
|
||||
}
|
||||
return &_storage_name_and_type[column_id];
|
||||
}
|
||||
|
||||
bool has_inverted_index_result_for_expr(const vectorized::VExpr* expr) const {
|
||||
return _inverted_index_result_bitmap.contains(expr);
|
||||
}
|
||||
|
||||
void set_inverted_index_result_for_expr(const vectorized::VExpr* expr,
|
||||
segment_v2::InvertedIndexResultBitmap bitmap) {
|
||||
_inverted_index_result_bitmap[expr] = std::move(bitmap);
|
||||
}
|
||||
|
||||
std::unordered_map<const vectorized::VExpr*, segment_v2::InvertedIndexResultBitmap>&
|
||||
get_inverted_index_result_bitmap() {
|
||||
return _inverted_index_result_bitmap;
|
||||
}
|
||||
|
||||
std::unordered_map<const vectorized::VExpr*, ColumnPtr>& get_inverted_index_result_column() {
|
||||
return _inverted_index_result_column;
|
||||
}
|
||||
|
||||
const segment_v2::InvertedIndexResultBitmap* get_inverted_index_result_for_expr(
|
||||
const vectorized::VExpr* expr) {
|
||||
auto iter = _inverted_index_result_bitmap.find(expr);
|
||||
if (iter == _inverted_index_result_bitmap.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return &iter->second;
|
||||
}
|
||||
|
||||
void set_inverted_index_result_column_for_expr(const vectorized::VExpr* expr,
|
||||
ColumnPtr column) {
|
||||
_inverted_index_result_column[expr] = std::move(column);
|
||||
}
|
||||
|
||||
void set_true_for_inverted_index_status(const vectorized::VExpr* expr, int column_index) {
|
||||
if (column_index < 0 || column_index >= _col_ids.size()) {
|
||||
return;
|
||||
}
|
||||
const auto& column_id = _col_ids[column_index];
|
||||
if (_expr_inverted_index_status.contains(column_id)) {
|
||||
if (_expr_inverted_index_status[column_id].contains(expr)) {
|
||||
_expr_inverted_index_status[column_id][expr] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// A reference to a vector of column IDs for the current expression's output columns.
|
||||
const std::vector<ColumnId>& _col_ids;
|
||||
|
||||
// A reference to a vector of unique pointers to inverted index iterators.
|
||||
const std::vector<std::unique_ptr<segment_v2::InvertedIndexIterator>>&
|
||||
_inverted_index_iterators;
|
||||
|
||||
// A reference to a vector of storage name and type pairs related to schema.
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& _storage_name_and_type;
|
||||
|
||||
// A map of expressions to their corresponding inverted index result bitmaps.
|
||||
std::unordered_map<const vectorized::VExpr*, segment_v2::InvertedIndexResultBitmap>
|
||||
_inverted_index_result_bitmap;
|
||||
|
||||
// A map of expressions to their corresponding result columns.
|
||||
std::unordered_map<const vectorized::VExpr*, ColumnPtr> _inverted_index_result_column;
|
||||
|
||||
// A reference to a map of common expressions to their inverted index evaluation status.
|
||||
std::unordered_map<ColumnId, std::unordered_map<const vectorized::VExpr*, bool>>&
|
||||
_expr_inverted_index_status;
|
||||
};
|
||||
|
||||
class VExprContext {
|
||||
ENABLE_FACTORY_CREATOR(VExprContext);
|
||||
|
||||
@ -50,6 +159,13 @@ public:
|
||||
|
||||
VExprSPtr root() { return _root; }
|
||||
void set_root(const VExprSPtr& expr) { _root = expr; }
|
||||
void set_inverted_index_context(std::shared_ptr<InvertedIndexContext> inverted_index_context) {
|
||||
_inverted_index_context = std::move(inverted_index_context);
|
||||
}
|
||||
|
||||
std::shared_ptr<InvertedIndexContext> get_inverted_index_context() const {
|
||||
return _inverted_index_context;
|
||||
}
|
||||
|
||||
/// Creates a FunctionContext, and returns the index that's passed to fn_context() to
|
||||
/// retrieve the created context. Exprs that need a FunctionContext should call this in
|
||||
@ -69,6 +185,14 @@ public:
|
||||
return _fn_contexts[i].get();
|
||||
}
|
||||
|
||||
// execute expr with inverted index which column a, b has inverted indexes
|
||||
// but some situation although column b has indexes, but apply index is not useful, we should
|
||||
// skip this expr, just do not apply index anymore.
|
||||
|
||||
[[nodiscard]] Status evaluate_inverted_index(uint32_t segment_num_rows);
|
||||
|
||||
bool all_expr_inverted_index_evaluated();
|
||||
|
||||
[[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block,
|
||||
int column_to_keep);
|
||||
|
||||
@ -175,5 +299,7 @@ private:
|
||||
// This flag only works on VSlotRef.
|
||||
// Force to materialize even if the slot need_materialize is false, we just ignore need_materialize flag
|
||||
bool _force_materialize_slot = false;
|
||||
|
||||
std::shared_ptr<InvertedIndexContext> _inverted_index_context;
|
||||
};
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -34,6 +34,8 @@
|
||||
#include "vec/core/column_with_type_and_name.h"
|
||||
#include "vec/core/columns_with_type_and_name.h"
|
||||
#include "vec/exprs/vexpr_context.h"
|
||||
#include "vec/exprs/vliteral.h"
|
||||
#include "vec/exprs/vslot_ref.h"
|
||||
#include "vec/functions/simple_function_factory.h"
|
||||
|
||||
namespace doris {
|
||||
@ -79,16 +81,14 @@ Status VInPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
|
||||
|
||||
VExpr::register_function_context(state, context);
|
||||
_prepare_finished = true;
|
||||
_can_fast_execute = can_fast_execute();
|
||||
_in_list_value_count_threshold = state->query_options().in_list_value_count_threshold;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status VInPredicate::open(RuntimeState* state, VExprContext* context,
|
||||
FunctionContext::FunctionStateScope scope) {
|
||||
DCHECK(_prepare_finished);
|
||||
for (int i = 0; i < _children.size(); ++i) {
|
||||
RETURN_IF_ERROR(_children[i]->open(state, context, scope));
|
||||
for (auto& child : _children) {
|
||||
RETURN_IF_ERROR(child->open(state, context, scope));
|
||||
}
|
||||
RETURN_IF_ERROR(VExpr::init_function_context(context, scope, _function));
|
||||
if (scope == FunctionContext::FRAGMENT_LOCAL) {
|
||||
@ -103,10 +103,18 @@ void VInPredicate::close(VExprContext* context, FunctionContext::FunctionStateSc
|
||||
VExpr::close(context, scope);
|
||||
}
|
||||
|
||||
Status VInPredicate::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
|
||||
DCHECK_GE(get_num_children(), 2);
|
||||
return _evaluate_inverted_index(context, _function, segment_num_rows);
|
||||
}
|
||||
|
||||
Status VInPredicate::execute(VExprContext* context, Block* block, int* result_column_id) {
|
||||
if (is_const_and_have_executed()) { // const have execute in open function
|
||||
return get_result_from_const(block, _expr_name, result_column_id);
|
||||
}
|
||||
if (_can_fast_execute && fast_execute(context, block, result_column_id)) {
|
||||
return Status::OK();
|
||||
}
|
||||
DCHECK(_open_finished || _getting_const_col);
|
||||
// TODO: not execute const expr again, but use the const column in function context
|
||||
doris::vectorized::ColumnNumbers arguments(_children.size());
|
||||
@ -120,15 +128,6 @@ Status VInPredicate::execute(VExprContext* context, Block* block, int* result_co
|
||||
// prepare a column to save result
|
||||
block->insert({nullptr, _data_type, _expr_name});
|
||||
|
||||
if (_can_fast_execute) {
|
||||
auto can_fast_execute = fast_execute(*block, arguments, num_columns_without_result,
|
||||
block->rows(), _function->get_name());
|
||||
if (can_fast_execute) {
|
||||
*result_column_id = num_columns_without_result;
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, arguments,
|
||||
num_columns_without_result, block->rows(), false));
|
||||
*result_column_id = num_columns_without_result;
|
||||
@ -152,4 +151,4 @@ std::string VInPredicate::debug_string() const {
|
||||
return out.str();
|
||||
}
|
||||
|
||||
} // namespace doris::vectorized
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -54,7 +54,7 @@ public:
|
||||
const FunctionBasePtr function() { return _function; }
|
||||
|
||||
bool is_not_in() const { return _is_not_in; };
|
||||
bool can_fast_execute() const override { return true; }
|
||||
Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override;
|
||||
|
||||
private:
|
||||
FunctionBasePtr _function;
|
||||
|
||||
@ -54,6 +54,7 @@ public:
|
||||
std::string value() const;
|
||||
|
||||
const ColumnPtr& get_column_ptr() const { return _column_ptr; }
|
||||
const DataTypePtr& get_data_type() const { return _data_type; }
|
||||
|
||||
bool is_literal() const override { return true; }
|
||||
|
||||
|
||||
@ -43,6 +43,8 @@
|
||||
#include "vec/core/column_with_type_and_name.h"
|
||||
#include "vec/core/columns_with_type_and_name.h"
|
||||
#include "vec/exprs/vexpr_context.h"
|
||||
#include "vec/exprs/vliteral.h"
|
||||
#include "vec/exprs/vslot_ref.h"
|
||||
#include "vec/functions/simple_function_factory.h"
|
||||
|
||||
namespace doris {
|
||||
@ -130,9 +132,35 @@ void VMatchPredicate::close(VExprContext* context, FunctionContext::FunctionStat
|
||||
VExpr::close(context, scope);
|
||||
}
|
||||
|
||||
Status VMatchPredicate::evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) {
|
||||
DCHECK_EQ(get_num_children(), 2);
|
||||
return _evaluate_inverted_index(context, _function, segment_num_rows);
|
||||
}
|
||||
|
||||
Status VMatchPredicate::execute(VExprContext* context, Block* block, int* result_column_id) {
|
||||
DCHECK(_open_finished || _getting_const_col);
|
||||
// TODO: not execute const expr again, but use the const column in function context
|
||||
if (_can_fast_execute && fast_execute(context, block, result_column_id)) {
|
||||
return Status::OK();
|
||||
}
|
||||
DBUG_EXECUTE_IF("VMatchPredicate.execute", {
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
|
||||
"{} not support slow path, hit debug point.", _expr_name);
|
||||
});
|
||||
DBUG_EXECUTE_IF("VMatchPredicate.must_in_slow_path", {
|
||||
auto debug_col_name = DebugPoints::instance()->get_debug_param_or_default<std::string>(
|
||||
"VMatchPredicate.must_in_slow_path", "column_name", "");
|
||||
|
||||
std::vector<std::string> column_names;
|
||||
boost::split(column_names, debug_col_name, boost::algorithm::is_any_of(","));
|
||||
|
||||
auto* column_slot_ref = assert_cast<VSlotRef*>(get_child(0).get());
|
||||
std::string column_name = column_slot_ref->expr_name();
|
||||
auto it = std::find(column_names.begin(), column_names.end(), column_name);
|
||||
if (it == column_names.end()) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>(
|
||||
"column {} should in slow path while VMatchPredicate::execute.", column_name);
|
||||
}
|
||||
})
|
||||
doris::vectorized::ColumnNumbers arguments(_children.size());
|
||||
for (int i = 0; i < _children.size(); ++i) {
|
||||
int column_id = -1;
|
||||
|
||||
@ -54,12 +54,13 @@ public:
|
||||
Status open(RuntimeState* state, VExprContext* context,
|
||||
FunctionContext::FunctionStateScope scope) override;
|
||||
void close(VExprContext* context, FunctionContext::FunctionStateScope scope) override;
|
||||
Status evaluate_inverted_index(VExprContext* context, uint32_t segment_num_rows) override;
|
||||
const std::string& expr_name() const override;
|
||||
const std::string& function_name() const;
|
||||
|
||||
std::string debug_string() const override;
|
||||
|
||||
const FunctionBasePtr function() { return _function; }
|
||||
FunctionBasePtr function() { return _function; }
|
||||
|
||||
private:
|
||||
FunctionBasePtr _function;
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
#include <utility>
|
||||
|
||||
#include "common/status.h"
|
||||
#include "olap/rowset/segment_v2/inverted_index_reader.h"
|
||||
#include "vec/columns/column.h"
|
||||
#include "vec/columns/column_array.h"
|
||||
#include "vec/columns/column_nullable.h"
|
||||
@ -87,6 +88,91 @@ public:
|
||||
|
||||
bool use_default_implementation_for_nulls() const override { return false; }
|
||||
|
||||
Status evaluate_inverted_index(
|
||||
const ColumnsWithTypeAndName& arguments,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
|
||||
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
|
||||
DCHECK(arguments.size() == 1);
|
||||
DCHECK(data_type_with_names.size() == 1);
|
||||
DCHECK(iterators.size() == 1);
|
||||
auto* iter = iterators[0];
|
||||
auto data_type_with_name = data_type_with_names[0];
|
||||
if (iter == nullptr) {
|
||||
return Status::OK();
|
||||
}
|
||||
if (iter->get_inverted_index_reader_type() ==
|
||||
segment_v2::InvertedIndexReaderType::FULLTEXT) {
|
||||
// parser is not none we can not make sure the result is correct in expr combination
|
||||
// for example, filter: !array_index(array, 'tall:120cm, weight: 35kg')
|
||||
// here we have rows [tall:120cm, weight: 35kg, hobbies: reading book] which be tokenized
|
||||
// but query is also tokenized, and FULLTEXT reader will catch this row as matched,
|
||||
// so array_index(array, 'tall:120cm, weight: 35kg') return this rowid,
|
||||
// but we expect it to be filtered, because we want row is equal to 'tall:120cm, weight: 35kg'
|
||||
return Status::OK();
|
||||
}
|
||||
Field param_value;
|
||||
arguments[0].column->get(0, param_value);
|
||||
auto param_type = arguments[0].type->get_type_as_type_descriptor().type;
|
||||
// The current implementation for the inverted index of arrays cannot handle cases where the array contains null values,
|
||||
// meaning an item in the array is null.
|
||||
if (param_value.is_null()) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
|
||||
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
|
||||
if (iter->has_null()) {
|
||||
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
|
||||
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
|
||||
null_bitmap = null_bitmap_cache_handle.get_bitmap();
|
||||
}
|
||||
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
|
||||
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
|
||||
param_type, ¶m_value, query_param));
|
||||
if (is_string_type(param_type)) {
|
||||
Status st = iter->read_from_inverted_index(
|
||||
data_type_with_name.first, query_param->get_value(),
|
||||
segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, roaring);
|
||||
if (st.code() == ErrorCode::INVERTED_INDEX_NO_TERMS) {
|
||||
// if analyzed param with no term, we do not filter any rows
|
||||
// return all rows with OK status
|
||||
roaring->addRange(0, num_rows);
|
||||
} else if (st != Status::OK()) {
|
||||
return st;
|
||||
}
|
||||
} else {
|
||||
RETURN_IF_ERROR(iter->read_from_inverted_index(
|
||||
data_type_with_name.first, query_param->get_value(),
|
||||
segment_v2::InvertedIndexQueryType::EQUAL_QUERY, num_rows, roaring));
|
||||
}
|
||||
// here debug for check array_contains function really filter rows by inverted index correctly
|
||||
DBUG_EXECUTE_IF("array_func.array_contains", {
|
||||
auto result_bitmap = DebugPoints::instance()->get_debug_param_or_default<int32_t>(
|
||||
"array_func.array_contains", "result_bitmap", 0);
|
||||
if (result_bitmap < 0) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>(
|
||||
"result_bitmap count cannot be negative");
|
||||
}
|
||||
if (roaring->cardinality() != result_bitmap) {
|
||||
return Status::Error<ErrorCode::INTERNAL_ERROR>(
|
||||
"array_contains really filtered {} by inverted index not equal to expected "
|
||||
"{}",
|
||||
roaring->cardinality(), result_bitmap);
|
||||
}
|
||||
})
|
||||
if (iter->has_null()) {
|
||||
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
|
||||
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
|
||||
null_bitmap = null_bitmap_cache_handle.get_bitmap();
|
||||
}
|
||||
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
|
||||
bitmap_result = result;
|
||||
bitmap_result.mask_out_null();
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
|
||||
if constexpr (OldVersion) {
|
||||
return make_nullable(std::make_shared<DataTypeNumber<ResultType>>());
|
||||
|
||||
@ -185,6 +185,15 @@ public:
|
||||
->execute(context, block, arguments, result, input_rows_count, dry_run);
|
||||
}
|
||||
|
||||
virtual Status evaluate_inverted_index(
|
||||
const ColumnsWithTypeAndName& arguments,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
|
||||
segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
|
||||
return Status::NotSupported("evaluate_inverted_index is not supported in function: ",
|
||||
get_name());
|
||||
}
|
||||
|
||||
/// Do cleaning work when function is finished, i.e., release state variables in the
|
||||
/// `FunctionContext` which are registered in `prepare` phase.
|
||||
virtual Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
|
||||
@ -220,12 +229,6 @@ public:
|
||||
}
|
||||
|
||||
virtual bool can_push_down_to_index() const { return false; }
|
||||
|
||||
virtual Status eval_inverted_index(VExpr* context, segment_v2::FuncExprParams& params,
|
||||
std::shared_ptr<roaring::Roaring>& result) {
|
||||
return Status::NotSupported("eval_inverted_index is not supported in function: ",
|
||||
get_name());
|
||||
}
|
||||
};
|
||||
|
||||
using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
|
||||
@ -441,6 +444,16 @@ protected:
|
||||
size_t result, size_t input_rows_count) const final {
|
||||
return function->execute_impl(context, block, arguments, result, input_rows_count);
|
||||
}
|
||||
|
||||
Status evaluate_inverted_index(
|
||||
const ColumnsWithTypeAndName& arguments,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
|
||||
segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
|
||||
return function->evaluate_inverted_index(arguments, data_type_with_names, iterators,
|
||||
num_rows, bitmap_result);
|
||||
}
|
||||
|
||||
Status execute_impl_dry_run(FunctionContext* context, Block& block,
|
||||
const ColumnNumbers& arguments, size_t result,
|
||||
size_t input_rows_count) const final {
|
||||
@ -498,6 +511,15 @@ public:
|
||||
return function->close(context, scope);
|
||||
}
|
||||
|
||||
Status evaluate_inverted_index(
|
||||
const ColumnsWithTypeAndName& args,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
|
||||
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
|
||||
return function->evaluate_inverted_index(args, data_type_with_names, iterators, num_rows,
|
||||
bitmap_result);
|
||||
}
|
||||
|
||||
IFunctionBase::Monotonicity get_monotonicity_for_range(const IDataType& type, const Field& left,
|
||||
const Field& right) const override {
|
||||
return function->get_monotonicity_for_range(type, left, right);
|
||||
@ -509,11 +531,6 @@ public:
|
||||
|
||||
bool can_push_down_to_index() const override { return function->can_push_down_to_index(); }
|
||||
|
||||
Status eval_inverted_index(VExpr* expr, segment_v2::FuncExprParams& params,
|
||||
std::shared_ptr<roaring::Roaring>& result) override {
|
||||
return function->eval_inverted_index(expr, params, result);
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_ptr<IFunction> function;
|
||||
DataTypes arguments;
|
||||
|
||||
@ -17,27 +17,17 @@
|
||||
|
||||
#include "vec/functions/function_multi_match.h"
|
||||
|
||||
#include <gen_cpp/PaloBrokerService_types.h>
|
||||
#include <glog/logging.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/algorithm/string/classification.hpp>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <roaring/roaring.hh>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "io/fs/file_reader.h"
|
||||
#include "olap/olap_common.h"
|
||||
#include "olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h"
|
||||
#include "olap/rowset/segment_v2/segment_iterator.h"
|
||||
#include "runtime/primitive_type.h"
|
||||
#include "vec/columns/column.h"
|
||||
#include "vec/data_types/data_type.h"
|
||||
#include "vec/exprs/varray_literal.h"
|
||||
#include "vec/exprs/vexpr.h"
|
||||
#include "vec/exprs/vslot_ref.h"
|
||||
#include "vec/functions/simple_function_factory.h"
|
||||
|
||||
@ -56,97 +46,42 @@ InvertedIndexQueryType get_query_type(const std::string& query_type) {
|
||||
return InvertedIndexQueryType::UNKNOWN_QUERY;
|
||||
}
|
||||
|
||||
Status FunctionMultiMatch::eval_inverted_index(VExpr* expr, segment_v2::FuncExprParams& params,
|
||||
std::shared_ptr<roaring::Roaring>& result) {
|
||||
// fields
|
||||
std::vector<std::string> query_fileds;
|
||||
size_t i = 0;
|
||||
for (; i < expr->get_num_children(); i++) {
|
||||
auto child_expr = expr->get_child(i);
|
||||
if (child_expr->node_type() == TExprNodeType::type::SLOT_REF) {
|
||||
query_fileds.emplace_back(child_expr->expr_name());
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i != expr->get_num_children() - 2) {
|
||||
return Status::RuntimeError("parameter type incorrect: slot = {}", i);
|
||||
}
|
||||
|
||||
Status FunctionMultiMatch::evaluate_inverted_index(
|
||||
const ColumnsWithTypeAndName& arguments,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
|
||||
segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
|
||||
DCHECK(arguments.size() == 2);
|
||||
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
|
||||
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
|
||||
// type
|
||||
std::string param1 = std::static_pointer_cast<VLiteral>(expr->get_child(i))->value();
|
||||
auto query_type = get_query_type(param1);
|
||||
auto query_type_value = arguments[0].column->get_data_at(0);
|
||||
auto query_type = get_query_type(query_type_value.to_string());
|
||||
if (query_type == InvertedIndexQueryType::UNKNOWN_QUERY) {
|
||||
return Status::RuntimeError("parameter query type incorrect: query_type = {}", query_type);
|
||||
return Status::RuntimeError(
|
||||
"parameter query type incorrect for function multi_match: query_type = {}",
|
||||
query_type);
|
||||
}
|
||||
|
||||
// query
|
||||
std::string query_str = std::static_pointer_cast<VLiteral>(expr->get_child(i + 1))->value();
|
||||
|
||||
auto& segment_iterator = params._segment_iterator;
|
||||
auto& segment = segment_iterator->segment();
|
||||
auto& opts = segment_iterator->storage_read_options();
|
||||
auto& tablet_schema = opts.tablet_schema;
|
||||
auto& idx_iterators = segment_iterator->inverted_index_iterators();
|
||||
|
||||
// check
|
||||
std::vector<ColumnId> columns_ids;
|
||||
for (const auto& column_name : query_fileds) {
|
||||
auto cid = tablet_schema->field_index(column_name);
|
||||
if (cid < 0) {
|
||||
return Status::RuntimeError("column name is incorrect: {}", column_name);
|
||||
}
|
||||
if (idx_iterators[cid] == nullptr) {
|
||||
return Status::RuntimeError("column idx is incorrect: {}", column_name);
|
||||
}
|
||||
columns_ids.emplace_back(cid);
|
||||
auto query_str = arguments[1].column->get_data_at(0);
|
||||
auto param_type = arguments[1].type->get_type_as_type_descriptor().type;
|
||||
if (!is_string_type(param_type)) {
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
|
||||
"arguments for multi_match must be string");
|
||||
}
|
||||
|
||||
// cache key
|
||||
roaring::Roaring cids_str;
|
||||
cids_str.addMany(columns_ids.size(), columns_ids.data());
|
||||
cids_str.runOptimize();
|
||||
std::string column_name_binary(cids_str.getSizeInBytes(), 0);
|
||||
cids_str.write(column_name_binary.data());
|
||||
|
||||
InvertedIndexQueryCache::CacheKey cache_key;
|
||||
io::Path index_path = segment.file_reader()->path();
|
||||
cache_key.index_path = index_path.parent_path() / index_path.stem();
|
||||
cache_key.column_name = column_name_binary;
|
||||
cache_key.query_type = query_type;
|
||||
cache_key.value = query_str;
|
||||
|
||||
// query cache
|
||||
auto* cache = InvertedIndexQueryCache::instance();
|
||||
InvertedIndexQueryCacheHandle cache_handler;
|
||||
if (cache->lookup(cache_key, &cache_handler)) {
|
||||
result = cache_handler.get_bitmap();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// search
|
||||
for (const auto& column_name : query_fileds) {
|
||||
auto cid = tablet_schema->field_index(column_name);
|
||||
const auto& column = tablet_schema->column(column_name);
|
||||
const auto& index_reader = idx_iterators[cid]->reader();
|
||||
|
||||
for (int i = 0; i < data_type_with_names.size(); i++) {
|
||||
auto column_name = data_type_with_names[i].first;
|
||||
auto* iter = iterators[i];
|
||||
auto single_result = std::make_shared<roaring::Roaring>();
|
||||
StringRef query_value(query_str.data());
|
||||
auto index_version = tablet_schema->get_inverted_index_storage_format();
|
||||
if (index_version == InvertedIndexStorageFormatPB::V1) {
|
||||
RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state, column_name,
|
||||
&query_value, query_type, single_result));
|
||||
} else if (index_version == InvertedIndexStorageFormatPB::V2) {
|
||||
RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state,
|
||||
std::to_string(column.unique_id()), &query_value,
|
||||
query_type, single_result));
|
||||
}
|
||||
(*result) |= (*single_result);
|
||||
std::shared_ptr<roaring::Roaring> index = std::make_shared<roaring::Roaring>();
|
||||
RETURN_IF_ERROR(iter->read_from_inverted_index(column_name, &query_str, query_type,
|
||||
num_rows, index));
|
||||
*roaring |= *index;
|
||||
}
|
||||
|
||||
result->runOptimize();
|
||||
cache->insert(cache_key, result, &cache_handler);
|
||||
|
||||
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
|
||||
bitmap_result = result;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -58,8 +58,11 @@ public:
|
||||
|
||||
bool can_push_down_to_index() const override { return true; }
|
||||
|
||||
Status eval_inverted_index(VExpr* expr, segment_v2::FuncExprParams& params,
|
||||
std::shared_ptr<roaring::Roaring>& result) override;
|
||||
Status evaluate_inverted_index(
|
||||
const ColumnsWithTypeAndName& arguments,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
|
||||
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override;
|
||||
};
|
||||
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -39,6 +39,8 @@
|
||||
#include "vec/functions/function_helpers.h"
|
||||
#include "vec/functions/functions_logical.h"
|
||||
#include "vec/runtime/vdatetime_value.h"
|
||||
//#include "olap/rowset/segment_v2/inverted_index_reader.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
/** Comparison functions: ==, !=, <, >, <=, >=.
|
||||
@ -524,6 +526,72 @@ public:
|
||||
return std::make_shared<DataTypeUInt8>();
|
||||
}
|
||||
|
||||
Status evaluate_inverted_index(
|
||||
const ColumnsWithTypeAndName& arguments,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
|
||||
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
|
||||
DCHECK(arguments.size() == 1);
|
||||
DCHECK(data_type_with_names.size() == 1);
|
||||
DCHECK(iterators.size() == 1);
|
||||
auto* iter = iterators[0];
|
||||
auto data_type_with_name = data_type_with_names[0];
|
||||
if (iter == nullptr) {
|
||||
return Status::OK();
|
||||
}
|
||||
if (iter->get_inverted_index_reader_type() ==
|
||||
segment_v2::InvertedIndexReaderType::FULLTEXT) {
|
||||
//NOT support comparison predicate when parser is FULLTEXT for expr inverted index evaluate.
|
||||
return Status::OK();
|
||||
}
|
||||
std::string column_name = data_type_with_name.first;
|
||||
Field param_value;
|
||||
arguments[0].column->get(0, param_value);
|
||||
auto param_type = arguments[0].type->get_type_as_type_descriptor().type;
|
||||
|
||||
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
|
||||
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
|
||||
param_type, ¶m_value, query_param));
|
||||
segment_v2::InvertedIndexQueryType query_type;
|
||||
std::string_view name_view(name);
|
||||
if (name_view == NameEquals::name || name_view == NameNotEquals::name) {
|
||||
query_type = segment_v2::InvertedIndexQueryType::EQUAL_QUERY;
|
||||
} else if (name_view == NameLess::name) {
|
||||
query_type = segment_v2::InvertedIndexQueryType::LESS_THAN_QUERY;
|
||||
} else if (name_view == NameLessOrEquals::name) {
|
||||
query_type = segment_v2::InvertedIndexQueryType::LESS_EQUAL_QUERY;
|
||||
} else if (name_view == NameGreater::name) {
|
||||
query_type = segment_v2::InvertedIndexQueryType::GREATER_THAN_QUERY;
|
||||
} else if (name_view == NameGreaterOrEquals::name) {
|
||||
query_type = segment_v2::InvertedIndexQueryType::GREATER_EQUAL_QUERY;
|
||||
} else {
|
||||
return Status::InvalidArgument("invalid comparison op type {}", Name::name);
|
||||
}
|
||||
|
||||
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
|
||||
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
|
||||
param_type, ¶m_value, query_param));
|
||||
RETURN_IF_ERROR(iter->read_from_inverted_index(column_name, query_param->get_value(),
|
||||
query_type, num_rows, roaring));
|
||||
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
|
||||
if (iter->has_null()) {
|
||||
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
|
||||
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
|
||||
null_bitmap = null_bitmap_cache_handle.get_bitmap();
|
||||
}
|
||||
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
|
||||
bitmap_result = result;
|
||||
bitmap_result.mask_out_null();
|
||||
|
||||
if (name == "ne") {
|
||||
roaring::Roaring full_result;
|
||||
full_result.addRange(0, num_rows);
|
||||
bitmap_result.op_not(&full_result);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
||||
size_t result, size_t input_rows_count) const override {
|
||||
const auto& col_with_type_and_name_left = block.get_by_position(arguments[0]);
|
||||
|
||||
@ -29,6 +29,7 @@
|
||||
#include "common/status.h"
|
||||
#include "exprs/create_predicate_function.h"
|
||||
#include "exprs/hybrid_set.h"
|
||||
#include "olap/rowset/segment_v2/inverted_index_reader.h"
|
||||
#include "runtime/define_primitive_type.h"
|
||||
#include "runtime/types.h"
|
||||
#include "udf/udf.h"
|
||||
@ -135,6 +136,64 @@ public:
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status evaluate_inverted_index(
|
||||
const ColumnsWithTypeAndName& arguments,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
|
||||
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override {
|
||||
DCHECK(data_type_with_names.size() == 1);
|
||||
DCHECK(iterators.size() == 1);
|
||||
auto* iter = iterators[0];
|
||||
auto data_type_with_name = data_type_with_names[0];
|
||||
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
|
||||
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
|
||||
|
||||
if (iter == nullptr) {
|
||||
return Status::OK();
|
||||
}
|
||||
if (iter->get_inverted_index_reader_type() ==
|
||||
segment_v2::InvertedIndexReaderType::FULLTEXT) {
|
||||
//NOT support in list when parser is FULLTEXT for expr inverted index evaluate.
|
||||
return Status::OK();
|
||||
}
|
||||
if (iter->has_null()) {
|
||||
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
|
||||
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
|
||||
null_bitmap = null_bitmap_cache_handle.get_bitmap();
|
||||
}
|
||||
std::string column_name = data_type_with_name.first;
|
||||
for (const auto& arg : arguments) {
|
||||
Field param_value;
|
||||
arg.column->get(0, param_value);
|
||||
auto param_type = arg.type->get_type_as_type_descriptor().type;
|
||||
if (param_value.is_null()) {
|
||||
// predicate like column NOT IN (NULL, '') should not push down to index.
|
||||
if (negative) {
|
||||
return Status::OK();
|
||||
}
|
||||
*roaring |= *null_bitmap;
|
||||
continue;
|
||||
}
|
||||
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
|
||||
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
|
||||
param_type, ¶m_value, query_param));
|
||||
InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
|
||||
std::shared_ptr<roaring::Roaring> index = std::make_shared<roaring::Roaring>();
|
||||
RETURN_IF_ERROR(iter->read_from_inverted_index(column_name, query_param->get_value(),
|
||||
query_type, num_rows, index));
|
||||
*roaring |= *index;
|
||||
}
|
||||
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
|
||||
bitmap_result = result;
|
||||
bitmap_result.mask_out_null();
|
||||
if constexpr (negative) {
|
||||
roaring::Roaring full_result;
|
||||
full_result.addRange(0, num_rows);
|
||||
bitmap_result.op_not(&full_result);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
||||
size_t result, size_t input_rows_count) const override {
|
||||
auto* in_state = reinterpret_cast<InState*>(
|
||||
|
||||
@ -24,7 +24,63 @@
|
||||
#include "util/debug_points.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
Status FunctionMatchBase::evaluate_inverted_index(
|
||||
const ColumnsWithTypeAndName& arguments,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
|
||||
segment_v2::InvertedIndexResultBitmap& bitmap_result) const {
|
||||
DCHECK(arguments.size() == 1);
|
||||
DCHECK(data_type_with_names.size() == 1);
|
||||
DCHECK(iterators.size() == 1);
|
||||
auto* iter = iterators[0];
|
||||
auto data_type_with_name = data_type_with_names[0];
|
||||
if (iter == nullptr) {
|
||||
return Status::OK();
|
||||
}
|
||||
const std::string& function_name = get_name();
|
||||
|
||||
if (function_name == MATCH_PHRASE_FUNCTION || function_name == MATCH_PHRASE_PREFIX_FUNCTION ||
|
||||
function_name == MATCH_PHRASE_EDGE_FUNCTION) {
|
||||
if (iter->get_inverted_index_reader_type() == InvertedIndexReaderType::FULLTEXT &&
|
||||
get_parser_phrase_support_string_from_properties(iter->get_index_properties()) ==
|
||||
INVERTED_INDEX_PARSER_PHRASE_SUPPORT_NO) {
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
|
||||
"phrase queries require setting support_phrase = true");
|
||||
}
|
||||
}
|
||||
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
|
||||
Field param_value;
|
||||
arguments[0].column->get(0, param_value);
|
||||
auto param_type = arguments[0].type->get_type_as_type_descriptor().type;
|
||||
if (!is_string_type(param_type)) {
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
|
||||
"arguments for match must be string");
|
||||
}
|
||||
std::unique_ptr<segment_v2::InvertedIndexQueryParamFactory> query_param = nullptr;
|
||||
RETURN_IF_ERROR(segment_v2::InvertedIndexQueryParamFactory::create_query_value(
|
||||
param_type, ¶m_value, query_param));
|
||||
if (is_string_type(param_type)) {
|
||||
auto inverted_index_query_type = get_query_type_from_fn_name();
|
||||
RETURN_IF_ERROR(
|
||||
iter->read_from_inverted_index(data_type_with_name.first, query_param->get_value(),
|
||||
inverted_index_query_type, num_rows, roaring));
|
||||
} else {
|
||||
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
|
||||
"invalid params type for FunctionMatchBase::evaluate_inverted_index {}",
|
||||
param_type);
|
||||
}
|
||||
std::shared_ptr<roaring::Roaring> null_bitmap = std::make_shared<roaring::Roaring>();
|
||||
if (iter->has_null()) {
|
||||
segment_v2::InvertedIndexQueryCacheHandle null_bitmap_cache_handle;
|
||||
RETURN_IF_ERROR(iter->read_null_bitmap(&null_bitmap_cache_handle));
|
||||
null_bitmap = null_bitmap_cache_handle.get_bitmap();
|
||||
}
|
||||
segment_v2::InvertedIndexResultBitmap result(roaring, null_bitmap);
|
||||
bitmap_result = result;
|
||||
bitmap_result.mask_out_null();
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block,
|
||||
const ColumnNumbers& arguments, size_t result,
|
||||
size_t input_rows_count) const {
|
||||
@ -32,78 +88,70 @@ Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block,
|
||||
DataTypePtr& type_ptr = block.get_by_position(arguments[1]).type;
|
||||
auto match_query_str = type_ptr->to_string(*column_ptr, 0);
|
||||
std::string column_name = block.get_by_position(arguments[0]).name;
|
||||
auto match_pred_column_name =
|
||||
BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_match_" + match_query_str;
|
||||
if (!block.has(match_pred_column_name)) {
|
||||
VLOG_DEBUG << "begin to execute match directly, column_name=" << column_name
|
||||
<< ", match_query_str=" << match_query_str;
|
||||
InvertedIndexCtx* inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
|
||||
context->get_function_state(FunctionContext::THREAD_LOCAL));
|
||||
if (inverted_index_ctx == nullptr) {
|
||||
inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
|
||||
context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
}
|
||||
|
||||
const ColumnPtr source_col =
|
||||
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
|
||||
const auto* values = check_and_get_column<ColumnString>(source_col.get());
|
||||
const ColumnArray* array_col = nullptr;
|
||||
if (source_col->is_column_array()) {
|
||||
if (source_col->is_nullable()) {
|
||||
auto* nullable = check_and_get_column<ColumnNullable>(source_col.get());
|
||||
array_col = check_and_get_column<ColumnArray>(*nullable->get_nested_column_ptr());
|
||||
} else {
|
||||
array_col = check_and_get_column<ColumnArray>(source_col.get());
|
||||
}
|
||||
if (array_col && !array_col->get_data().is_column_string()) {
|
||||
return Status::NotSupported(
|
||||
fmt::format("unsupported nested array of type {} for function {}",
|
||||
is_column_nullable(array_col->get_data())
|
||||
? array_col->get_data().get_name()
|
||||
: array_col->get_data().get_family_name(),
|
||||
get_name()));
|
||||
}
|
||||
|
||||
if (is_column_nullable(array_col->get_data())) {
|
||||
const auto& array_nested_null_column =
|
||||
reinterpret_cast<const ColumnNullable&>(array_col->get_data());
|
||||
values = check_and_get_column<ColumnString>(
|
||||
*(array_nested_null_column.get_nested_column_ptr()));
|
||||
} else {
|
||||
// array column element is always set Nullable for now.
|
||||
values = check_and_get_column<ColumnString>(*(array_col->get_data_ptr()));
|
||||
}
|
||||
} else if (auto* nullable = check_and_get_column<ColumnNullable>(source_col.get())) {
|
||||
// match null
|
||||
if (type_ptr->is_nullable()) {
|
||||
if (column_ptr->only_null()) {
|
||||
block.get_by_position(result).column = nullable->get_null_map_column_ptr();
|
||||
return Status::OK();
|
||||
}
|
||||
} else {
|
||||
values = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
|
||||
}
|
||||
}
|
||||
|
||||
if (!values) {
|
||||
LOG(WARNING) << "Illegal column " << source_col->get_name();
|
||||
return Status::InternalError("Not supported input column types");
|
||||
}
|
||||
// result column
|
||||
auto res = ColumnUInt8::create();
|
||||
ColumnUInt8::Container& vec_res = res->get_data();
|
||||
// set default value to 0, and match functions only need to set 1/true
|
||||
vec_res.resize_fill(input_rows_count);
|
||||
RETURN_IF_ERROR(execute_match(
|
||||
context, column_name, match_query_str, input_rows_count, values, inverted_index_ctx,
|
||||
(array_col ? &(array_col->get_offsets()) : nullptr), vec_res));
|
||||
block.replace_by_position(result, std::move(res));
|
||||
} else {
|
||||
auto match_pred_column =
|
||||
block.get_by_name(match_pred_column_name).column->convert_to_full_column_if_const();
|
||||
block.replace_by_position(result, std::move(match_pred_column));
|
||||
VLOG_DEBUG << "begin to execute match directly, column_name=" << column_name
|
||||
<< ", match_query_str=" << match_query_str;
|
||||
InvertedIndexCtx* inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
|
||||
context->get_function_state(FunctionContext::THREAD_LOCAL));
|
||||
if (inverted_index_ctx == nullptr) {
|
||||
inverted_index_ctx = reinterpret_cast<InvertedIndexCtx*>(
|
||||
context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
|
||||
}
|
||||
|
||||
const ColumnPtr source_col =
|
||||
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
|
||||
const auto* values = check_and_get_column<ColumnString>(source_col.get());
|
||||
const ColumnArray* array_col = nullptr;
|
||||
if (source_col->is_column_array()) {
|
||||
if (source_col->is_nullable()) {
|
||||
auto* nullable = check_and_get_column<ColumnNullable>(source_col.get());
|
||||
array_col = check_and_get_column<ColumnArray>(*nullable->get_nested_column_ptr());
|
||||
} else {
|
||||
array_col = check_and_get_column<ColumnArray>(source_col.get());
|
||||
}
|
||||
if (array_col && !array_col->get_data().is_column_string()) {
|
||||
return Status::NotSupported(
|
||||
fmt::format("unsupported nested array of type {} for function {}",
|
||||
is_column_nullable(array_col->get_data())
|
||||
? array_col->get_data().get_name()
|
||||
: array_col->get_data().get_family_name(),
|
||||
get_name()));
|
||||
}
|
||||
|
||||
if (is_column_nullable(array_col->get_data())) {
|
||||
const auto& array_nested_null_column =
|
||||
reinterpret_cast<const ColumnNullable&>(array_col->get_data());
|
||||
values = check_and_get_column<ColumnString>(
|
||||
*(array_nested_null_column.get_nested_column_ptr()));
|
||||
} else {
|
||||
// array column element is always set Nullable for now.
|
||||
values = check_and_get_column<ColumnString>(*(array_col->get_data_ptr()));
|
||||
}
|
||||
} else if (auto* nullable = check_and_get_column<ColumnNullable>(source_col.get())) {
|
||||
// match null
|
||||
if (type_ptr->is_nullable()) {
|
||||
if (column_ptr->only_null()) {
|
||||
block.get_by_position(result).column = nullable->get_null_map_column_ptr();
|
||||
return Status::OK();
|
||||
}
|
||||
} else {
|
||||
values = check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
|
||||
}
|
||||
}
|
||||
|
||||
if (!values) {
|
||||
LOG(WARNING) << "Illegal column " << source_col->get_name();
|
||||
return Status::InternalError("Not supported input column types");
|
||||
}
|
||||
// result column
|
||||
auto res = ColumnUInt8::create();
|
||||
ColumnUInt8::Container& vec_res = res->get_data();
|
||||
// set default value to 0, and match functions only need to set 1/true
|
||||
vec_res.resize_fill(input_rows_count);
|
||||
RETURN_IF_ERROR(execute_match(context, column_name, match_query_str, input_rows_count, values,
|
||||
inverted_index_ctx,
|
||||
(array_col ? &(array_col->get_offsets()) : nullptr), vec_res));
|
||||
block.replace_by_position(result, std::move(res));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@ -95,6 +95,11 @@ public:
|
||||
int32_t& current_src_array_offset) const;
|
||||
|
||||
Status check(FunctionContext* context, const std::string& function_name) const;
|
||||
Status evaluate_inverted_index(
|
||||
const ColumnsWithTypeAndName& arguments,
|
||||
const std::vector<vectorized::IndexFieldNameAndTypePair>& data_type_with_names,
|
||||
std::vector<segment_v2::InvertedIndexIterator*> iterators, uint32_t num_rows,
|
||||
segment_v2::InvertedIndexResultBitmap& bitmap_result) const override;
|
||||
};
|
||||
|
||||
class FunctionMatchAny : public FunctionMatchBase {
|
||||
|
||||
@ -2,6 +2,18 @@
|
||||
-- !sql --
|
||||
863
|
||||
|
||||
-- !sql --
|
||||
235
|
||||
|
||||
-- !sql --
|
||||
166
|
||||
|
||||
-- !sql --
|
||||
56
|
||||
|
||||
-- !sql --
|
||||
7
|
||||
|
||||
-- !sql --
|
||||
863
|
||||
|
||||
@ -14,21 +26,12 @@
|
||||
-- !sql --
|
||||
235
|
||||
|
||||
-- !sql --
|
||||
235
|
||||
|
||||
-- !sql --
|
||||
166
|
||||
|
||||
-- !sql --
|
||||
166
|
||||
|
||||
-- !sql --
|
||||
166
|
||||
|
||||
-- !sql --
|
||||
56
|
||||
|
||||
-- !sql --
|
||||
56
|
||||
|
||||
@ -41,6 +44,3 @@
|
||||
-- !sql --
|
||||
7
|
||||
|
||||
-- !sql --
|
||||
7
|
||||
|
||||
|
||||
@ -96,10 +96,10 @@ suite("test_all_index_hit_fault_injection", "nonConcurrent") {
|
||||
load_httplogs_data.call(indexTbName2, 'test_all_index_hit_fault_injection_2', 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
try {
|
||||
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._read_columns_by_index", [column_name: "clientip,request"])
|
||||
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator.fast_execute", [column_name: "status,size"])
|
||||
GetDebugPoint().enableDebugPointForAllBEs("VectorizedFnCall.must_in_slow_path", [column_name: "status,size"])
|
||||
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where (request match_phrase 'hm'); """
|
||||
@ -124,7 +124,7 @@ suite("test_all_index_hit_fault_injection", "nonConcurrent") {
|
||||
|
||||
} finally {
|
||||
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
|
||||
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator.fast_execute")
|
||||
GetDebugPoint().disableDebugPointForAllBEs("VectorizedFnCall.must_in_slow_path")
|
||||
}
|
||||
} finally {
|
||||
}
|
||||
|
||||
@ -76,8 +76,6 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") {
|
||||
sql "sync"
|
||||
|
||||
try {
|
||||
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._rowid_result_for_index")
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
|
||||
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName} where clientip in ('40.135.0.0', '232.0.0.0', '26.1.0.0'); """
|
||||
@ -88,7 +86,6 @@ suite("test_index_inlist_fault_injection", "nonConcurrent") {
|
||||
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName} where (request match 'hm' and status in (1, 304, 200)); """
|
||||
|
||||
} finally {
|
||||
GetDebugPoint().disableDebugPointForAllBEs("segment_iterator._rowid_result_for_index")
|
||||
}
|
||||
|
||||
try {
|
||||
|
||||
@ -59,6 +59,7 @@ suite("test_index_lowercase_fault_injection", "nonConcurrent") {
|
||||
sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 3781); """
|
||||
|
||||
sql 'sync'
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql """ select count() from ${testTable} where (request match 'HTTP'); """
|
||||
qt_sql """ select count() from ${testTable} where (request match 'http'); """
|
||||
|
||||
@ -47,6 +47,7 @@ suite("test_index_mow_fault_injection", "nonConcurrent") {
|
||||
try {
|
||||
sql "DROP TABLE IF EXISTS ${testTable_unique}"
|
||||
create_httplogs_unique_table.call(testTable_unique)
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
sql """ INSERT INTO ${testTable_unique} VALUES (893964617, '40.135.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 24736); """
|
||||
sql """ INSERT INTO ${testTable_unique} VALUES (893964653, '232.0.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 3781); """
|
||||
|
||||
@ -73,6 +73,7 @@ suite("test_need_read_data_fault_injection", "nonConcurrent") {
|
||||
load_httplogs_data.call(indexTbName, 'test_need_read_data_fault_injection', 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
try {
|
||||
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator._read_columns_by_index")
|
||||
|
||||
@ -96,6 +96,7 @@ suite("test_topn_fault_injection", "nonConcurrent") {
|
||||
load_httplogs_data.call(indexTbName2, 'test_topn_fault_injection2', 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
try {
|
||||
GetDebugPoint().enableDebugPointForAllBEs("segment_iterator.topn_opt_1")
|
||||
|
||||
@ -45,7 +45,7 @@ suite("test_char_replace") {
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
def var_result = sql "show variables"
|
||||
logger.info("show variales result: " + var_result )
|
||||
|
||||
|
||||
@ -155,6 +155,7 @@ suite("test_index_compaction_dup_keys", "nonConcurrent") {
|
||||
sql """ INSERT INTO ${tableName} VALUES (2, "bason", "bason hate pear", 99); """
|
||||
sql """ INSERT INTO ${tableName} VALUES (3, "andy", "andy love apple", 100); """
|
||||
sql """ INSERT INTO ${tableName} VALUES (3, "bason", "bason hate pear", 99); """
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql """ select * from ${tableName} order by id, name, hobbies, score """
|
||||
qt_sql """ select * from ${tableName} where name match "andy" order by id, name, hobbies, score """
|
||||
|
||||
@ -295,6 +295,7 @@ suite("test_index_compaction_null", "nonConcurrent") {
|
||||
"inverted_index_storage_format" = "V1"
|
||||
)
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
tablets = sql_return_maparray """ show tablets from ${tableName}; """
|
||||
run_test.call(tablets)
|
||||
|
||||
@ -153,6 +153,7 @@ suite("test_index_compaction_unique_keys", "nonConcurrent") {
|
||||
"inverted_index_storage_format" = "V1"
|
||||
);
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
sql """ INSERT INTO ${tableName} VALUES (1, "andy", "andy love apple", 100); """
|
||||
sql """ INSERT INTO ${tableName} VALUES (1, "bason", "bason hate pear", 99); """
|
||||
|
||||
@ -135,6 +135,7 @@ suite("test_index_compaction_with_multi_index_segments", "nonConcurrent") {
|
||||
// check config
|
||||
check_config.call("inverted_index_compaction_enable", "true")
|
||||
check_config.call("inverted_index_max_buffered_docs", "5")
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
/**
|
||||
* test duplicated tables
|
||||
|
||||
@ -155,6 +155,7 @@ suite("test_cumulative_compaction_with_format_v2", "inverted_index_format_v2") {
|
||||
"""
|
||||
|
||||
sql """ sync """
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_select_default """ SELECT * FROM ${tableName} t WHERE city MATCH 'Beijing' ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """
|
||||
|
||||
|
||||
@ -95,6 +95,7 @@ suite("test_drop_column_with_format_v2", "inverted_index_format_v2"){
|
||||
sql """ INSERT INTO ${tableName} VALUES (2, "bason", 99); """
|
||||
sql """ INSERT INTO ${tableName} VALUES (3, "andy", 100); """
|
||||
sql """ INSERT INTO ${tableName} VALUES (3, "bason", 99); """
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql "SELECT * FROM $tableName WHERE name match 'andy' order by id, name, score;"
|
||||
|
||||
|
||||
@ -74,6 +74,7 @@ suite("test_drop_index_with_format_v2", "inverted_index_format_v2"){
|
||||
sql """ INSERT INTO ${tableName} VALUES (2, "bason", 99); """
|
||||
sql """ INSERT INTO ${tableName} VALUES (3, "andy", 100); """
|
||||
sql """ INSERT INTO ${tableName} VALUES (3, "bason", 99); """
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql "SELECT * FROM $tableName WHERE name match 'andy' order by id, name, score;"
|
||||
|
||||
|
||||
@ -155,6 +155,7 @@ suite("test_mor_table_with_format_v2", "inverted_index_format_v2") {
|
||||
"""
|
||||
|
||||
sql """ sync """
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
sql """ DELETE FROM ${tableName} WHERE user_id = 3 """
|
||||
|
||||
|
||||
@ -155,6 +155,7 @@ suite("test_mow_table_with_format_v2", "inverted_index_format_v2") {
|
||||
"""
|
||||
|
||||
sql """ sync """
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_select_default """ SELECT * FROM ${tableName} t WHERE city MATCH 'Beijing' ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """
|
||||
|
||||
|
||||
@ -67,6 +67,7 @@ suite("test_rename_column_with_format_v2", "inverted_index_format_v2"){
|
||||
sql """ INSERT INTO ${tableName} VALUES (2, "bason", 99); """
|
||||
sql """ INSERT INTO ${tableName} VALUES (3, "andy", 100); """
|
||||
sql """ INSERT INTO ${tableName} VALUES (3, "bason", 99); """
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql "SELECT * FROM $tableName WHERE name match 'andy' order by id, name, score;"
|
||||
|
||||
|
||||
@ -159,6 +159,7 @@ suite("test_single_replica_compaction_with_format_v2", "inverted_index_format_v2
|
||||
"""
|
||||
|
||||
sql """ sync """
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_select_default """ SELECT * FROM ${tableName} t WHERE city MATCH 'Beijing' ORDER BY user_id,date,city,age,sex,last_visit_date,last_update_date,last_visit_date_not_null,cost,max_dwell_time,min_dwell_time; """
|
||||
|
||||
|
||||
@ -98,6 +98,7 @@ suite("test_storage_format_v1", "p0") {
|
||||
create_httplogs_dup_table.call(testTable_dup)
|
||||
load_httplogs_data.call(testTable_dup, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql(" select COUNT(*) from ${testTable_dup} where request match 'images' ")
|
||||
|
||||
|
||||
@ -63,6 +63,7 @@ suite("test_array_contains_with_inverted_index"){
|
||||
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '0974e7a82e30d1af83205e474fadd0a2', '[\"w\"]'); """
|
||||
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', '26823b3995ee38bd145ddd910b2f6300', '[\"x\"]'); """
|
||||
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2019-01-01', 'ee27ee1da291e46403c408e220bed6e1', '[\"y\"]'); """
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql """ select count() from ${indexTblName}"""
|
||||
order_qt_sql """ select * from tai where array_contains(inventors, 's') order by id; """
|
||||
|
||||
@ -61,6 +61,7 @@ suite("test_array_index1"){
|
||||
"enable_single_replica_compaction" = "false"
|
||||
);
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', '6afef581285b6608bf80d5a4e46cf839', '[\"a\", \"b\", \"c\"]'); """
|
||||
sql """ INSERT INTO `${indexTblName}`(`apply_date`, `id`, `inventors`) VALUES ('2017-01-01', 'd93d942d985a8fb7547c72dada8d332d', '[\"d\", \"e\", \"f\", \"g\", \"h\", \"i\", \"j\", \"k\", \"l\"]'); """
|
||||
|
||||
@ -279,6 +279,7 @@ suite("test_array_with_inverted_index_all_type"){
|
||||
}
|
||||
|
||||
// query test
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
for (int i = 0; i < 6; i+=1) {
|
||||
def indexTblName = indexTblNames[i]
|
||||
|
||||
@ -98,6 +98,7 @@ suite("test_chinese_analyzer"){
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
sql "INSERT INTO $indexTblName3 VALUES (1, '我来到北京清华大学'), (2, '我爱你中国'), (3, '人民可以得到更多实惠'), (4, '陕西省西安市高新区创业大厦A座,我的手机号码是12345678901,邮箱是12345678@qq.com,,ip是1.1.1.1,this information is created automatically.');"
|
||||
qt_sql "SELECT * FROM $indexTblName3 WHERE c MATCH_PHRASE '我爱你' ORDER BY id;"
|
||||
|
||||
@ -57,6 +57,7 @@ suite("test_compound", "p0"){
|
||||
(9, '9', '9', '9'),
|
||||
(10, '10', '10', '10');
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql "SELECT count() FROM $indexTblName WHERE (id >= 2 AND id < 9) and (a match '2' or b match '5' and c match '5');"
|
||||
qt_sql "SELECT count() FROM $indexTblName WHERE (id >= 2 AND id < 9) and (a match '2' or b match '5' or c match '6');"
|
||||
|
||||
@ -80,6 +80,7 @@ suite("test_compound_1", "p0"){
|
||||
load_httplogs_data.call(indexTbName, 'test_compound_1', 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName} where (request match_phrase 'english' and clientip match_phrase '4' or request match_phrase 'images'); """
|
||||
qt_sql """ select count() from ${indexTbName} where (request match_phrase 'hm' and clientip match_phrase '3' or request match_phrase 'gif'); """
|
||||
|
||||
@ -103,6 +103,7 @@ suite("test_compound_inlist", "nonConcurrent"){
|
||||
load_httplogs_data.call(indexTbName2, 'test_compound_list_2', 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName1} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304)); """
|
||||
qt_sql """ select /*+ SET_VAR(inverted_index_skip_threshold = 0) */ count() from ${indexTbName2} where (((request match_phrase 'images' and clientip match_phrase '3') or (request match_phrase 'english' and clientip match_phrase '4')) and status in (1, 2, 304)); """
|
||||
|
||||
@ -141,6 +141,7 @@ suite("test_count_on_index_httplogs", "p0") {
|
||||
stream_load_data.call(testTable_unique, 'test_httplogs_load_count_on_index', 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
sql """set experimental_enable_nereids_planner=true;"""
|
||||
sql """set enable_fallback_to_original_planner=false;"""
|
||||
// case1: test duplicate table
|
||||
|
||||
@ -153,6 +153,7 @@ suite("test_count_on_index_2", "p0"){
|
||||
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453; """
|
||||
qt_sql """ select count() from ${indexTbName2} where `@timestamp` >= 893964736 and `@timestamp` <= 893966453; """
|
||||
|
||||
@ -47,6 +47,7 @@ suite("test_delete"){
|
||||
"enable_single_replica_compaction" = "false"
|
||||
);
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
sql """ INSERT INTO `${indexTblName}`(`a`, `b`, `c`) VALUES ('1', '6afef581285b6608bf80d5a4e46cf839', 'aaa'), ('2', '48a33ec3453a28bce84b8f96fe161956', 'bbb'),
|
||||
('3', '021603e7dcfe65d44af0efd0e5aee154', 'ccc'), ('4', 'ee27ee1da291e46403c408e220bed6e1', 'ddd'),
|
||||
|
||||
@ -45,6 +45,7 @@ suite("test_equal_on_fulltext", "p0"){
|
||||
|
||||
def var_result = sql "show variables"
|
||||
logger.info("show variales result: " + var_result )
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
sql "INSERT INTO $indexTblName VALUES (1, 'I am the person'), (2, 'I am a person'), (3, 'I am your person');"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE c = 'I am a person' ORDER BY id;"
|
||||
|
||||
@ -38,6 +38,7 @@ suite("test_index_chinese_column", "inverted_index_select"){
|
||||
def table_name_v2 = "test_index_chinese_column_v2"
|
||||
|
||||
sql "set enable_unicode_name_support=true"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
createAndInsertData(table_name_v1, "V1")
|
||||
createAndInsertData(table_name_v2, "V2")
|
||||
|
||||
@ -101,6 +101,7 @@ suite("test_index_complex_match", "p0"){
|
||||
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where clientip match_phrase '247.37.0.0'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where clientip match_phrase_prefix '247'; """
|
||||
|
||||
@ -49,6 +49,7 @@ suite("test_index_delete", "p0") {
|
||||
|
||||
try {
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
sql """ delete from ${indexTbName1} where a >= 9; """
|
||||
sql "sync"
|
||||
|
||||
@ -47,7 +47,7 @@ suite("test_index_empty_string", "p0"){
|
||||
(1, '', '1'),
|
||||
(2, '2', '');
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
qt_sql "SELECT count() FROM $indexTblName WHERE a match '';"
|
||||
qt_sql "SELECT count() FROM $indexTblName WHERE b match '';"
|
||||
}
|
||||
|
||||
@ -54,6 +54,7 @@ suite("test_index_equal_select", "inverted_index_select"){
|
||||
("san zhang", 10, "grade 5", "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", "", "", "", ""),
|
||||
("li sisi", 11, "grade 6", "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "", "li ba", "li liuliu", "")
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
// case1: test equal
|
||||
// case1.0: test index coulume equal ‘’
|
||||
|
||||
@ -44,6 +44,7 @@ suite("test_index_key_match_select", "inverted_index_select"){
|
||||
("u3", ["u1"]),
|
||||
("u4", ["u3"])
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
qt_sql "SELECT * FROM ${indexTbName1} WHERE user MATCH_ANY 'u1, u2' ORDER BY user LIMIT 10;"
|
||||
qt_sql "SELECT * FROM ${indexTbName1} WHERE user MATCH_ANY 'u1, u2, u3' ORDER BY user LIMIT 10;"
|
||||
}
|
||||
@ -153,6 +153,7 @@ suite("test_index_match_phrase_select", "inverted_index_select"){
|
||||
sql """ build index ${text_colume1}_idx on ${indexTbName1} """
|
||||
wait_for_build_index_on_partition_finish(indexTbName1, timeout)
|
||||
}
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
|
||||
// case1: test match_phrase ""
|
||||
try {
|
||||
|
||||
@ -16,7 +16,7 @@
|
||||
// under the License.
|
||||
|
||||
|
||||
suite("test_index_match_phrase_edge", "p0"){
|
||||
suite("test_index_match_phrase_edge", "nonConcurrent"){
|
||||
def indexTbName1 = "test_index_match_phrase_edge"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName1}"
|
||||
@ -56,6 +56,8 @@ suite("test_index_match_phrase_edge", "p0"){
|
||||
|
||||
try {
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
|
||||
qt_sql """ select * from ${indexTbName1} where b match_phrase_edge 'x.h'; """
|
||||
qt_sql """ select * from ${indexTbName1} where b match_phrase_edge 'v_i'; """
|
||||
@ -75,6 +77,6 @@ suite("test_index_match_phrase_edge", "p0"){
|
||||
qt_sql """ select count() from ${indexTbName1} where c match_phrase_edge 'b'; """
|
||||
|
||||
} finally {
|
||||
//try_sql("DROP TABLE IF EXISTS ${testTable}")
|
||||
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
}
|
||||
}
|
||||
@ -16,7 +16,7 @@
|
||||
// under the License.
|
||||
|
||||
|
||||
suite("test_index_match_phrase_ordered", "p0"){
|
||||
suite("test_index_match_phrase_ordered", "nonConcurrent"){
|
||||
def indexTbName1 = "test_index_match_phrase_ordered"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName1}"
|
||||
@ -49,6 +49,8 @@ suite("test_index_match_phrase_ordered", "p0"){
|
||||
|
||||
try {
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where b match_phrase 'the lazy'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where b match_phrase 'the lazy ~1'; """
|
||||
@ -82,6 +84,6 @@ suite("test_index_match_phrase_ordered", "p0"){
|
||||
qt_sql """ select count() from ${indexTbName1} where b match_phrase 'the quick ~6'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where b match_phrase 'the quick ~6+'; """
|
||||
} finally {
|
||||
//try_sql("DROP TABLE IF EXISTS ${testTable}")
|
||||
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
}
|
||||
}
|
||||
@ -16,7 +16,7 @@
|
||||
// under the License.
|
||||
|
||||
|
||||
suite("test_index_match_phrase_prefix", "p0"){
|
||||
suite("test_index_match_phrase_prefix", "nonConcurrent"){
|
||||
def indexTbName1 = "test_index_match_phrase_prefix"
|
||||
def indexTbName2 = "test_index_match_phrase_prefix2"
|
||||
|
||||
@ -98,28 +98,32 @@ suite("test_index_match_phrase_prefix", "p0"){
|
||||
load_httplogs_data.call(indexTbName2, indexTbName2, 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'ima'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/h'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/hm'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/images/n'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/tickets/images/ti'; """
|
||||
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
|
||||
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'ima'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request like '%ima%'; """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/h'; """
|
||||
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'images/h'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request like '%images/h%'; """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix 'images/hm'; """
|
||||
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix 'images/hm'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request like '%images/hm%'; """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/images/n'; """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix '/french/images/n'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request like '%/french/images/n%'; """
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase_prefix '/french/tickets/images/ti'; """
|
||||
qt_sql """ select count() from ${indexTbName2} where request match_phrase_prefix '/french/tickets/images/ti'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request like '%/french/tickets/images/ti%'; """
|
||||
|
||||
} finally {
|
||||
//try_sql("DROP TABLE IF EXISTS ${testTable}")
|
||||
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
}
|
||||
}
|
||||
@ -16,7 +16,7 @@
|
||||
// under the License.
|
||||
|
||||
|
||||
suite("test_index_match_phrase_prefix_1", "p0"){
|
||||
suite("test_index_match_phrase_prefix_1", "nonConcurrent"){
|
||||
def indexTbName1 = "test_index_match_phrase_prefix_1"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName1}"
|
||||
@ -49,6 +49,8 @@ suite("test_index_match_phrase_prefix_1", "p0"){
|
||||
|
||||
try {
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where c match_phrase_prefix 'O1704361998540E2Cemx9S'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where d match_phrase_prefix 'O1704361998540E2Cemx9S'; """
|
||||
@ -57,6 +59,6 @@ suite("test_index_match_phrase_prefix_1", "p0"){
|
||||
qt_sql """ select count() from ${indexTbName1} where d match_phrase_prefix 'O1704361998540E2Cemx9S=123456789'; """
|
||||
|
||||
} finally {
|
||||
//try_sql("DROP TABLE IF EXISTS ${testTable}")
|
||||
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
}
|
||||
}
|
||||
@ -16,7 +16,7 @@
|
||||
// under the License.
|
||||
|
||||
|
||||
suite("test_index_match_phrase_slop", "p0"){
|
||||
suite("test_index_match_phrase_slop", "nonConcurrent"){
|
||||
def indexTbName1 = "test_index_match_phrase_slop"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName1}"
|
||||
@ -82,6 +82,8 @@ suite("test_index_match_phrase_slop", "p0"){
|
||||
sql """ INSERT INTO ${indexTbName1} VALUES (1, "127.0.0.1", "I'm glad I kept my fingers crossed ~4", 1, 1); """
|
||||
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase 'get jpg'; """
|
||||
qt_sql """ select count() from ${indexTbName1} where request match_phrase 'get jpg ~2'; """
|
||||
@ -117,6 +119,6 @@ suite("test_index_match_phrase_slop", "p0"){
|
||||
qt_sql """ select * from ${indexTbName1} where request match_phrase 'glad crossed \\~4'; """
|
||||
|
||||
} finally {
|
||||
//try_sql("DROP TABLE IF EXISTS ${testTable}")
|
||||
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
}
|
||||
}
|
||||
@ -16,7 +16,7 @@
|
||||
// under the License.
|
||||
|
||||
|
||||
suite("test_index_match_regexp", "p0"){
|
||||
suite("test_index_match_regexp", "nonConcurrent"){
|
||||
def indexTbName1 = "test_index_match_regexp"
|
||||
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName1}"
|
||||
@ -79,6 +79,8 @@ suite("test_index_match_regexp", "p0"){
|
||||
load_httplogs_data.call(indexTbName1, 'test_index_match_regexp', 'true', 'json', 'documents-1000.json')
|
||||
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
GetDebugPoint().enableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
|
||||
qt_sql """ select count() from test_index_match_regexp where request match_regexp ''; """
|
||||
qt_sql """ select count() from test_index_match_regexp where request match_regexp '^h'; """
|
||||
@ -89,6 +91,6 @@ suite("test_index_match_regexp", "p0"){
|
||||
qt_sql """ select count() from test_index_match_regexp where request match_regexp 'nonexistence'; """
|
||||
|
||||
} finally {
|
||||
//try_sql("DROP TABLE IF EXISTS ${testTable}")
|
||||
GetDebugPoint().disableDebugPointForAllBEs("VMatchPredicate.execute")
|
||||
}
|
||||
}
|
||||
@ -152,6 +152,7 @@ suite("test_index_match_select", "inverted_index_select"){
|
||||
sql """ build index ${text_colume1}_idx on ${indexTbName1} """
|
||||
wait_for_build_index_on_partition_finish(indexTbName1, timeout)
|
||||
}
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
|
||||
// case1: match term
|
||||
// case1.0 test match ""
|
||||
|
||||
@ -126,4 +126,4 @@ suite("test_index_multi_match", "p0"){
|
||||
} finally {
|
||||
//try_sql("DROP TABLE IF EXISTS ${testTable}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -54,6 +54,7 @@ suite("test_index_no_need_read_data", "inverted_index_select"){
|
||||
|
||||
// case1: enable nereids planner
|
||||
sql "set enable_nereids_planner = true"
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
|
||||
qt_select_nereids_0 "SELECT * FROM ${table1} ORDER BY id"
|
||||
qt_select_nereids_1 "SELECT count() FROM ${table1} WHERE n > 100"
|
||||
|
||||
@ -42,7 +42,7 @@ suite("test__null_index", "inverted_index"){
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
sql "INSERT INTO $indexTblName VALUES (1, []), (2, []), (3, []);"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE value match_all 'a';"
|
||||
}
|
||||
|
||||
@ -117,6 +117,7 @@ suite("test_index_range_between_select", "inverted_index_select"){
|
||||
"""
|
||||
wait_for_latest_op_on_table_finish(indexTbName1, timeout)
|
||||
}
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
|
||||
// case1: test simple between case
|
||||
// case1.0: test data index colume select in specific between condition
|
||||
|
||||
@ -54,7 +54,7 @@ suite("test_index_range_bigger_and_equal_select", "inverted_index_select"){
|
||||
("san zhang", 10, "grade 5", "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", "", "", "", ""),
|
||||
("li sisi", 11, "grade 6", "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "", "li ba", "li liuliu", "")
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
// case1. test >=
|
||||
// case1.0: test only >=
|
||||
qt_sql "select * from ${indexTbName1} where name>='' order by name "
|
||||
|
||||
@ -54,7 +54,7 @@ suite("test_index_range_bigger_select", "inverted_index_select"){
|
||||
("san zhang", 10, "grade 5", "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", "", "", "", ""),
|
||||
("li sisi", 11, "grade 6", "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "", "li ba", "li liuliu", "")
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
// case1. test >
|
||||
// case1.0: test only >
|
||||
qt_sql "select * from ${indexTbName1} where name>'' order by name "
|
||||
|
||||
@ -151,6 +151,7 @@ suite("test_index_range_in_select", "inverted_index_select"){
|
||||
sql """ build index ${text_colume1}_idx on ${indexTbName1} """
|
||||
wait_for_build_index_on_partition_finish(indexTbName1, timeout)
|
||||
}
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
|
||||
// case1: select in
|
||||
// case1.0: select in specific condition
|
||||
|
||||
@ -111,6 +111,7 @@ suite("test_index_range_not_in_select", "inverted_index_select"){
|
||||
}
|
||||
assertTrue(useTime <= OpTimeout, "wait_for_latest_build_index_on_partition_finish timeout")
|
||||
}
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
logger.info("select table with index times " + i)
|
||||
|
||||
@ -54,7 +54,7 @@ suite("test_index_range_smaller_and_equal_select", "inverted_index_select"){
|
||||
("san zhang", 10, "grade 5", "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", "", "", "", ""),
|
||||
("li sisi", 11, "grade 6", "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "", "li ba", "li liuliu", "")
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
// case1. test <=
|
||||
// case1.0: test only <=
|
||||
qt_sql "select * from ${indexTbName1} where name<='' order by name"
|
||||
|
||||
@ -54,7 +54,7 @@ suite("test_index_range_smaller_select", "inverted_index_select"){
|
||||
("san zhang", 10, "grade 5", "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", "", "", "", ""),
|
||||
("li sisi", 11, "grade 6", "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "", "li ba", "li liuliu", "")
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
// case1. test <
|
||||
// case1.0: test only <
|
||||
sql "select * from ${indexTbName1} where name<'' order by name "
|
||||
|
||||
@ -142,7 +142,7 @@ suite("test_index_rqg_bug", "test_index_rqg_bug"){
|
||||
(48, -10, 7, 3, 4, -1290467110130692882, NULL, -5421887030808227301, 2147894047624029750, '2023-12-20', '2026-02-18', '2023-12-10', '2024-02-18', 'v', 'f', 'u', 'z', 'w', 'l', 'i', 'b'),
|
||||
(49, 4, -10, -10, -4, 7177870619817484302, 2010854013707344984, 515636226818986547, -4617727694631456148, '2023-12-14', '2024-01-09', '2023-12-11', '2024-01-08', 'k', 'o', 'r', 'h', 'x', 'v', 'm', 'r');
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
qt_select_bug_1 """
|
||||
SELECT
|
||||
MIN(DISTINCT table1.col_date_undef_signed_not_null) AS field1,
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -126,6 +126,7 @@ suite("test_index_rqg_bug4", "test_index_rqg_bug"){
|
||||
|
||||
try {
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
|
||||
qt_sql """
|
||||
select
|
||||
|
||||
@ -21,7 +21,7 @@ suite("test_index_skip_read_data", "p0"){
|
||||
def indexTbName2 = "test_index_skip_read_data_mow"
|
||||
def indexTbName3 = "test_index_skip_read_data_mor"
|
||||
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
// dup
|
||||
sql "DROP TABLE IF EXISTS ${indexTbName1}"
|
||||
|
||||
|
||||
@ -21,6 +21,7 @@ suite("test_inverted_index", "inverted_index") {
|
||||
def jobStateResult = sql """ SHOW ALTER TABLE COLUMN WHERE TableName='${tableName}' ORDER BY createtime DESC LIMIT 1 """
|
||||
return jobStateResult[0][9]
|
||||
}
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
sql "DROP TABLE IF EXISTS ${tbName1}"
|
||||
sql """
|
||||
CREATE TABLE IF NOT EXISTS ${tbName1} (
|
||||
|
||||
@ -68,7 +68,7 @@ suite("test_inverted_index_keyword"){
|
||||
(3, '我在北京市'),
|
||||
(3, '我在西安市')
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
qt_sql "SELECT * FROM ${indexTblName} where c match '330204195805121025'";
|
||||
qt_sql "SELECT * FROM ${indexTblName} where c match '36'";
|
||||
qt_sql "SELECT * FROM ${indexTblName} where c match '330225197806187713'";
|
||||
|
||||
@ -48,7 +48,7 @@ suite("test_inverted_index_mor", "p0"){
|
||||
|
||||
sql """ INSERT INTO $indexTblName VALUES (1, 2, 12, 1.2, '1 2'), (3, 4, 34, 3.4, '3 4'); """
|
||||
sql """ INSERT INTO $indexTblName VALUES (11, 12, 1112, 11.12, '11 22'), (13, 14, 1314, 13.14, '13 14'); """
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true; """
|
||||
// original data
|
||||
qt_11 """ SELECT * FROM $indexTblName ORDER BY k1,k2 """
|
||||
|
||||
|
||||
@ -53,6 +53,7 @@ suite("test_inverted_index_null") {
|
||||
(7,'tengxun','qie','addr gg','lj',null),
|
||||
(8,'tengxun2','qie',null,'lj',800)
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
// select all data
|
||||
qt_select_0 "SELECT * FROM ${table1} ORDER BY id"
|
||||
|
||||
@ -105,6 +105,7 @@ suite("test_inverted_index_null_ram_dir") {
|
||||
(7,'tengxun','qie','addr gg','lj',null),
|
||||
(8,'tengxun2','qie',null,'lj',800)
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
// select all data
|
||||
qt_select_0 "SELECT * FROM ${tableName} ORDER BY id"
|
||||
|
||||
@ -56,6 +56,7 @@ suite("test_lowercase"){
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
sql "INSERT INTO $indexTblName2 VALUES (1, 'hello 我来到北京清华大学'), (2, 'HELLO 我爱你中国'), (3, 'Hello 人民可以得到更多实惠');"
|
||||
qt_sql "SELECT * FROM $indexTblName2 WHERE c MATCH 'hello' ORDER BY id";
|
||||
|
||||
@ -59,7 +59,7 @@ suite("test_match_query_without_index", "inverted_index_select"){
|
||||
("san zhang", "grade 5", "", "", "", 10, "2017-10-01", "tall:100cm, weight: 30kg, hobbies:", "", ""),
|
||||
("li sisi", "grade 6", "li ba", "li liuliu", "", 11, "2016-10-01", "tall:150cm, weight: 40kg, hobbies: sing, dancing, running", "good at handiwork and beaty", "")
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
// case1: match any
|
||||
try {
|
||||
sql """ select * from ${indexTbName1} where ${varchar_colume1} match_any "" order by name; """
|
||||
|
||||
@ -39,6 +39,7 @@ suite("test_match_without_index", "p0") {
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
sql """ INSERT INTO ${testTable} VALUES (123, '17.0.0.0', 'HTTP GET', '200', 20); """
|
||||
sql """ INSERT INTO ${testTable} VALUES (123, '17.0.0.0', 'Life is like a box of chocolates, you never know what you are going to get.', '200', 20); """
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -44,7 +44,7 @@ suite("test_null_index", "p0"){
|
||||
"replication_allocation" = "tag.location.default: 1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
sql "INSERT INTO $indexTblName VALUES (1, 'a', null, [null], [1]), (2, 'b', 'b', ['b'], [2]), (3, 'c', 'c', ['c'], [3]);"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE str match null order by id;"
|
||||
qt_sql "SELECT * FROM $indexTblName WHERE str_null match null order by id;"
|
||||
|
||||
@ -64,6 +64,7 @@ suite("test_or_not_match", "p0") {
|
||||
|
||||
sql "set enable_nereids_planner = true"
|
||||
sql "set enable_fallback_to_original_planner = false"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql "select request from ${tableName} where request like '1.0' or not request MATCH 'GETA' order by request limit 2;"
|
||||
}
|
||||
|
||||
@ -54,6 +54,7 @@ suite("test_pk_no_need_read_data", "p0"){
|
||||
|
||||
// case1: enable count on index
|
||||
sql "set enable_count_on_index_pushdown = true"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_select_0 "SELECT COUNT() FROM ${table1} WHERE date='2017-10-01'"
|
||||
qt_select_1 "SELECT COUNT() FROM ${table1} WHERE year(date)='2017'"
|
||||
|
||||
@ -42,6 +42,7 @@ suite("test_stopwords", "p0"){
|
||||
|
||||
try {
|
||||
sql "sync"
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
qt_sql """ select * from ${indexTbName} where b match 'a'; """
|
||||
qt_sql """ select * from ${indexTbName} where b match 'are'; """
|
||||
|
||||
@ -40,6 +40,7 @@ suite("test_clear_block") {
|
||||
}
|
||||
|
||||
sql """ set enable_match_without_inverted_index = false; """
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
// sql """ set
|
||||
def dupTableName = "dup_httplogs"
|
||||
sql """ drop table if exists ${dupTableName} """
|
||||
|
||||
@ -38,6 +38,7 @@ suite("test_mow_with_null_sequence", "inverted_index") {
|
||||
);
|
||||
"""
|
||||
|
||||
sql """ set enable_common_expr_pushdown = true """
|
||||
|
||||
sql """ insert into $tableName values('a', 'zhang san', 'address1', NULL) """
|
||||
sql """ insert into $tableName values('a', 'zhang si', 'address2', '2022-10-20') """
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user