diff --git a/be/src/common/consts.h b/be/src/common/consts.h index 22aaae512c..4e2045a1ce 100644 --- a/be/src/common/consts.h +++ b/be/src/common/consts.h @@ -24,6 +24,7 @@ namespace BeConsts { const std::string CSV = "csv"; const std::string CSV_WITH_NAMES = "csv_with_names"; const std::string CSV_WITH_NAMES_AND_TYPES = "csv_with_names_and_types"; +const std::string BLOCK_TEMP_COLUMN_PREFIX = "__TEMP__"; constexpr int MAX_DECIMAL32_PRECISION = 9; constexpr int MAX_DECIMAL64_PRECISION = 18; diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index b3c97723d1..c87df3512d 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -37,6 +37,7 @@ #include "exprs/is_null_predicate.h" #include "exprs/json_functions.h" #include "exprs/like_predicate.h" +#include "exprs/match_predicate.h" #include "exprs/math_functions.h" #include "exprs/new_in_predicate.h" #include "exprs/operators.h" @@ -404,6 +405,7 @@ void Daemon::init(int argc, char** argv, const std::vector& paths) { HashFunctions::init(); TopNFunctions::init(); DummyTableFunctions::init(); + MatchPredicate::init(); LOG(INFO) << CpuInfo::debug_string(); LOG(INFO) << DiskInfo::debug_string(); diff --git a/be/src/common/status.h b/be/src/common/status.h index cd514305b3..5aee3f48e5 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -247,6 +247,8 @@ E(SEGCOMPACTION_INIT_READER, -3117); E(SEGCOMPACTION_INIT_WRITER, -3118); E(SEGCOMPACTION_FAILED, -3119); E(PIP_WAIT_FOR_RF, -3120); +E(INVERTED_INDEX_INVALID_PARAMETERS, -4000); +E(INVERTED_INDEX_NOT_SUPPORTED, -4001); #undef E }; // namespace ErrorCode @@ -356,6 +358,7 @@ public: static Status name(std::string_view msg, Args&&... args) { \ return Error(msg, std::forward(args)...); \ } + ERROR_CTOR(PublishTimeout, PUBLISH_TIMEOUT) ERROR_CTOR(MemoryAllocFailed, MEM_ALLOC_FAILED) ERROR_CTOR(BufferAllocFailed, BUFFER_ALLOCATION_FAILED) diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index 8f19502761..f95e76ef80 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -100,8 +100,12 @@ public: bool is_in_compound_value_range() const; + Status add_match_value(MatchType match_type, const CppType& value); + bool is_fixed_value_range() const; + bool is_match_value_range() const; + bool is_scope_value_range() const; bool is_empty_value_range() const; @@ -172,9 +176,15 @@ public: size_t get_fixed_value_size() const { return _fixed_values.size(); } void to_olap_filter(std::vector& filters) { - if (is_fixed_value_range()) { + if (is_fixed_value_range() || is_match_value_range()) { // 1. convert to in filter condition - to_in_condition(filters, true); + if (is_fixed_value_range()) { + to_in_condition(filters, true); + } + + if (is_match_value_range()) { + to_match_condition(filters); + } } else if (_low_value < _high_value) { // 2. convert to min max filter condition TCondition null_pred; @@ -268,6 +278,36 @@ public: } } + void to_match_condition(std::vector& filters) { + for (const auto& value : _match_values) { + TCondition condition; + condition.__set_column_name(_column_name); + + if (value.first == MatchType::MATCH_ANY) { + condition.__set_condition_op("match_any"); + } else if (value.first == MatchType::MATCH_ALL) { + condition.__set_condition_op("match_all"); + } else if (value.first == MatchType::MATCH_PHRASE) { + condition.__set_condition_op("match_phrase"); + } else if (value.first == MatchType::MATCH_ELEMENT_EQ) { + condition.__set_condition_op("match_element_eq"); + } else if (value.first == MatchType::MATCH_ELEMENT_LT) { + condition.__set_condition_op("match_element_lt"); + } else if (value.first == MatchType::MATCH_ELEMENT_GT) { + condition.__set_condition_op("match_element_gt"); + } else if (value.first == MatchType::MATCH_ELEMENT_LE) { + condition.__set_condition_op("match_element_le"); + } else if (value.first == MatchType::MATCH_ELEMENT_GE) { + condition.__set_condition_op("match_element_ge"); + } + condition.condition_values.push_back( + cast_to_string(value.second, 0)); + if (condition.condition_values.size() != 0) { + filters.push_back(condition); + } + } + } + void set_whole_value_range() { _fixed_values.clear(); _low_value = TYPE_MIN; @@ -320,6 +360,11 @@ public: range.add_compound_value(op, *value); } + static void add_match_value_range(ColumnValueRange& range, MatchType match_type, + CppType* match_value) { + range.add_match_value(match_type, *match_value); + } + static ColumnValueRange create_empty_column_value_range() { return ColumnValueRange::create_empty_column_value_range(""); } @@ -354,7 +399,8 @@ private: CppType _high_value; // Column's high value, open interval at right SQLFilterOp _low_op; SQLFilterOp _high_op; - std::set _fixed_values; // Column's fixed int value + std::set _fixed_values; // Column's fixed int value + std::set> _match_values; // match value using in full-text search bool _contain_null; int _precision; @@ -521,7 +567,18 @@ Status ColumnValueRange::add_compound_value(SQLFilterOp op, CppT _high_value = TYPE_MIN; _low_value = TYPE_MAX; + return Status::OK(); +} +template +Status ColumnValueRange::add_match_value(MatchType match_type, + const CppType& value) { + std::pair match_value(match_type, value); + _match_values.insert(match_value); + _contain_null = false; + + // _high_value = TYPE_MIN; + // _low_value = TYPE_MAX; return Status::OK(); } @@ -540,6 +597,11 @@ bool ColumnValueRange::is_in_compound_value_range() const { return _compound_values.size() != 0; } +template +bool ColumnValueRange::is_match_value_range() const { + return _match_values.size() != 0; +} + template bool ColumnValueRange::is_scope_value_range() const { return _high_value > _low_value; @@ -551,7 +613,8 @@ bool ColumnValueRange::is_empty_value_range() const { return true; } - return (!is_fixed_value_range() && !is_scope_value_range() && !contain_null()); + return (!is_fixed_value_range() && !is_scope_value_range() && !contain_null() && + !is_match_value_range()); } template @@ -883,7 +946,7 @@ void ColumnValueRange::intersection(ColumnValueRange result_values; // 3. fixed_value intersection, fixed value range do not contain null - if (is_fixed_value_range() || range.is_fixed_value_range()) { + if (is_fixed_value_range() || range.is_fixed_value_range() || range.is_match_value_range()) { if (is_fixed_value_range() && range.is_fixed_value_range()) { set_intersection(_fixed_values.begin(), _fixed_values.end(), range._fixed_values.begin(), range._fixed_values.end(), @@ -912,6 +975,10 @@ void ColumnValueRange::intersection(ColumnValueRange& range, } } + // extend ScanKey with MatchValueRange + if (range.is_match_value_range() && _begin_scan_keys.empty()) { + _begin_scan_keys.emplace_back(); + _begin_scan_keys.back().add_value( + cast_to_string(type_limit::min(), 0)); + _end_scan_keys.emplace_back(); + _end_scan_keys.back().add_value( + cast_to_string(type_limit::max(), 0)); + _begin_include = true; + _end_include = true; + *exact_value = false; + // not empty, do nothing + } + // 3.1 extend ScanKey with FixedValueRange if (range.is_fixed_value_range()) { // 3.1.1 construct num of fixed value ScanKey (begin_key == end_key) diff --git a/be/src/exec/olap_utils.h b/be/src/exec/olap_utils.h index 1fe0e21f71..6f1dca46e2 100644 --- a/be/src/exec/olap_utils.h +++ b/be/src/exec/olap_utils.h @@ -237,4 +237,90 @@ inline SQLFilterOp to_olap_filter_type(const std::string& function_name, bool op } } +enum class MatchType { + UNKNOWN = -1, + MATCH_ANY = 0, + MATCH_ALL = 1, + MATCH_PHRASE = 2, + MATCH_ELEMENT_EQ = 3, + MATCH_ELEMENT_LT = 4, + MATCH_ELEMENT_GT = 5, + MATCH_ELEMENT_LE = 6, + MATCH_ELEMENT_GE = 7, +}; + +inline MatchType to_match_type(TExprOpcode::type type) { + switch (type) { + case TExprOpcode::type::MATCH_ANY: + return MatchType::MATCH_ANY; + break; + case TExprOpcode::type::MATCH_ALL: + return MatchType::MATCH_ALL; + break; + case TExprOpcode::type::MATCH_PHRASE: + return MatchType::MATCH_PHRASE; + break; + case TExprOpcode::type::MATCH_ELEMENT_EQ: + return MatchType::MATCH_ELEMENT_EQ; + break; + case TExprOpcode::type::MATCH_ELEMENT_LT: + return MatchType::MATCH_ELEMENT_LT; + break; + case TExprOpcode::type::MATCH_ELEMENT_GT: + return MatchType::MATCH_ELEMENT_GT; + break; + case TExprOpcode::type::MATCH_ELEMENT_LE: + return MatchType::MATCH_ELEMENT_LE; + break; + case TExprOpcode::type::MATCH_ELEMENT_GE: + return MatchType::MATCH_ELEMENT_GE; + break; + default: + VLOG_CRITICAL << "TExprOpcode: " << type; + DCHECK(false); + } + return MatchType::MATCH_ANY; +} + +inline MatchType to_match_type(const std::string& condition_op) { + if (condition_op.compare("match_any") == 0) { + return MatchType::MATCH_ANY; + } else if (condition_op.compare("match_all") == 0) { + return MatchType::MATCH_ALL; + } else if (condition_op.compare("match_phrase") == 0) { + return MatchType::MATCH_PHRASE; + } else if (condition_op.compare("match_element_eq") == 0) { + return MatchType::MATCH_ELEMENT_EQ; + } else if (condition_op.compare("match_element_lt") == 0) { + return MatchType::MATCH_ELEMENT_LT; + } else if (condition_op.compare("match_element_gt") == 0) { + return MatchType::MATCH_ELEMENT_GT; + } else if (condition_op.compare("match_element_le") == 0) { + return MatchType::MATCH_ELEMENT_LE; + } else if (condition_op.compare("match_element_ge") == 0) { + return MatchType::MATCH_ELEMENT_GE; + } + return MatchType::UNKNOWN; +} + +inline bool is_match_condition(const std::string& op) { + if (0 == strcasecmp(op.c_str(), "match_any") || 0 == strcasecmp(op.c_str(), "match_all") || + 0 == strcasecmp(op.c_str(), "match_phrase") || + 0 == strcasecmp(op.c_str(), "match_element_eq") || + 0 == strcasecmp(op.c_str(), "match_element_lt") || + 0 == strcasecmp(op.c_str(), "match_element_gt") || + 0 == strcasecmp(op.c_str(), "match_element_le") || + 0 == strcasecmp(op.c_str(), "match_element_ge")) { + return true; + } + return false; +} + +inline bool is_match_operator(const TExprOpcode::type& op_type) { + return TExprOpcode::MATCH_ANY == op_type || TExprOpcode::MATCH_ALL == op_type || + TExprOpcode::MATCH_PHRASE == op_type || TExprOpcode::MATCH_ELEMENT_EQ == op_type || + TExprOpcode::MATCH_ELEMENT_LT == op_type || TExprOpcode::MATCH_ELEMENT_GT == op_type || + TExprOpcode::MATCH_ELEMENT_LE == op_type || TExprOpcode::MATCH_ELEMENT_GE == op_type; +} + } // namespace doris diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt index 2ec9e0a974..8e8839b342 100644 --- a/be/src/exprs/CMakeLists.txt +++ b/be/src/exprs/CMakeLists.txt @@ -49,6 +49,7 @@ add_library(Exprs runtime_filter_rpc.cpp is_null_predicate.cpp like_predicate.cpp + match_predicate.cpp math_functions.cpp null_literal.cpp scalar_fn_call.cpp diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp index 89e3077e4b..1fee250153 100644 --- a/be/src/exprs/expr.cpp +++ b/be/src/exprs/expr.cpp @@ -39,6 +39,7 @@ #include "exprs/in_predicate.h" #include "exprs/info_func.h" #include "exprs/literal.h" +#include "exprs/match_predicate.h" #include "exprs/null_literal.h" #include "exprs/rpc_fn_call.h" #include "exprs/scalar_fn_call.h" @@ -386,6 +387,14 @@ Status Expr::create_expr(ObjectPool* pool, const TExprNode& texpr_node, Expr** e return Status::OK(); } + case TExprNodeType::MATCH_PRED: { + DCHECK(texpr_node.__isset.fn); + if (MatchPredicateExpr::is_valid(texpr_node.fn.name.function_name)) { + *expr = pool->add(new MatchPredicateExpr(texpr_node)); + } + return Status::OK(); + } + case TExprNodeType::IN_PRED: { switch (texpr_node.opcode) { case TExprOpcode::FILTER_IN: diff --git a/be/src/exprs/match_predicate.cpp b/be/src/exprs/match_predicate.cpp new file mode 100644 index 0000000000..45b359f26d --- /dev/null +++ b/be/src/exprs/match_predicate.cpp @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/match_predicate.h" + +#include + +#include +#include + +#include "exec/olap_utils.h" +#include "exprs/string_functions.h" +#include "olap/schema.h" +#include "runtime/string_value.hpp" + +namespace doris { + +MatchPredicate::MatchPredicate(uint32_t column_id, const std::string& value, MatchType match_type) + : ColumnPredicate(column_id), _value(value), _match_type(match_type) {} + +PredicateType MatchPredicate::type() const { + return PredicateType::MATCH; +} + +Status MatchPredicate::evaluate(const Schema& schema, InvertedIndexIterator* iterator, + uint32_t num_rows, roaring::Roaring* bitmap) const { + if (iterator == nullptr) { + return Status::OK(); + } + auto column_desc = schema.column(_column_id); + roaring::Roaring roaring; + Status s = Status::OK(); + auto inverted_index_query_type = _to_inverted_index_query_type(_match_type); + + if (is_string_type(column_desc->type()) || + (column_desc->type() == OLAP_FIELD_TYPE_ARRAY && + is_string_type(column_desc->get_sub_field(0)->type_info()->type()))) { + StringValue match_value; + int32_t length = _value.length(); + char* buffer = const_cast(_value.c_str()); + match_value.replace(buffer, length); + s = iterator->read_from_inverted_index(column_desc->name(), &match_value, + inverted_index_query_type, num_rows, &roaring); + } else if (column_desc->type() == OLAP_FIELD_TYPE_ARRAY && + is_numeric_type(column_desc->get_sub_field(0)->type_info()->type())) { + char buf[column_desc->get_sub_field(0)->type_info()->size()]; + column_desc->get_sub_field(0)->from_string(buf, _value); + s = iterator->read_from_inverted_index(column_desc->name(), buf, inverted_index_query_type, + num_rows, &roaring); + } + *bitmap &= roaring; + return s; +} + +InvertedIndexQueryType MatchPredicate::_to_inverted_index_query_type(MatchType match_type) const { + auto ret = InvertedIndexQueryType::UNKNOWN_QUERY; + switch (match_type) { + case MatchType::MATCH_ANY: + ret = InvertedIndexQueryType::MATCH_ANY_QUERY; + break; + case MatchType::MATCH_ALL: + ret = InvertedIndexQueryType::MATCH_ALL_QUERY; + break; + case MatchType::MATCH_PHRASE: + ret = InvertedIndexQueryType::MATCH_PHRASE_QUERY; + break; + case MatchType::MATCH_ELEMENT_EQ: + ret = InvertedIndexQueryType::EQUAL_QUERY; + break; + case MatchType::MATCH_ELEMENT_LT: + ret = InvertedIndexQueryType::LESS_THAN_QUERY; + break; + case MatchType::MATCH_ELEMENT_GT: + ret = InvertedIndexQueryType::GREATER_THAN_QUERY; + break; + case MatchType::MATCH_ELEMENT_LE: + ret = InvertedIndexQueryType::LESS_EQUAL_QUERY; + break; + case MatchType::MATCH_ELEMENT_GE: + ret = InvertedIndexQueryType::GREATER_EQUAL_QUERY; + break; + default: + DCHECK(false); + } + return ret; +} + +} // namespace doris \ No newline at end of file diff --git a/be/src/exprs/match_predicate.h b/be/src/exprs/match_predicate.h new file mode 100644 index 0000000000..9acaea477c --- /dev/null +++ b/be/src/exprs/match_predicate.h @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef DORIS_BE_SRC_QUERY_EXPRS_MATCH_PREDICATE_H +#define DORIS_BE_SRC_QUERY_EXPRS_MATCH_PREDICATE_H + +#include +#include + +#include "exprs/predicate.h" +#include "gen_cpp/Exprs_types.h" +#include "olap/column_predicate.h" +#include "runtime/string_search.hpp" + +namespace doris { + +enum class MatchType; + +class MatchPredicateExpr : public Predicate { +public: + MatchPredicateExpr(const TExprNode& node) : Predicate(node) {} + virtual ~MatchPredicateExpr() {} + Expr* clone(ObjectPool* pool) const override { + return pool->add(new MatchPredicateExpr(*this)); + } + + static bool is_valid(std::string fn_name) { + return fn_name == "match_any" || fn_name == "match_all" || fn_name == "match_phrase" || + fn_name == "match_element_eq" || fn_name == "match_element_lt" || + fn_name == "match_element_gt" || fn_name == "match_element_le" || + fn_name == "match_element_ge"; + } + +protected: + friend class Expr; +}; + +class MatchPredicate : public ColumnPredicate { +public: + static void init() {} + +public: + MatchPredicate(uint32_t column_id, const std::string& value, MatchType match_type); + + virtual PredicateType type() const override; + + //evaluate predicate on Bitmap + virtual Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows, + roaring::Roaring* roaring) const override { + LOG(FATAL) << "Not Implemented MatchPredicate::evaluate"; + } + + //evaluate predicate on inverted + Status evaluate(const Schema& schema, InvertedIndexIterator* iterator, uint32_t num_rows, + roaring::Roaring* bitmap) const override; + +private: + InvertedIndexQueryType _to_inverted_index_query_type(MatchType match_type) const; + std::string _debug_string() const override { + std::string info = "MatchPredicate"; + return info; + } + +private: + std::string _value; + MatchType _match_type; +}; + +} // namespace doris + +#endif \ No newline at end of file diff --git a/be/src/olap/CMakeLists.txt b/be/src/olap/CMakeLists.txt index 7f542e9718..25100a090f 100644 --- a/be/src/olap/CMakeLists.txt +++ b/be/src/olap/CMakeLists.txt @@ -77,6 +77,7 @@ add_library(Olap STATIC primary_key_index.cpp rowset/segment_v2/bitmap_index_reader.cpp rowset/segment_v2/bitmap_index_writer.cpp + rowset/segment_v2/inverted_index_reader.cpp rowset/segment_v2/bitshuffle_page.cpp rowset/segment_v2/bitshuffle_wrapper.cpp rowset/segment_v2/column_reader.cpp diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h index 293ac742e0..cbcce39bf7 100644 --- a/be/src/olap/column_predicate.h +++ b/be/src/olap/column_predicate.h @@ -22,6 +22,8 @@ #include "olap/column_block.h" #include "olap/rowset/segment_v2/bitmap_index_reader.h" #include "olap/rowset/segment_v2/bloom_filter.h" +#include "olap/rowset/segment_v2/inverted_index_reader.h" +#include "olap/schema.h" #include "olap/selection_vector.h" #include "vec/columns/column.h" @@ -49,6 +51,7 @@ enum class PredicateType { IS_NOT_NULL = 10, BF = 11, // BloomFilter BITMAP_FILTER = 12, // BitmapFilter + MATCH = 13, // fulltext match }; inline std::string type_to_string(PredicateType type) { @@ -129,6 +132,13 @@ public: virtual Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* roaring) const = 0; + //evaluate predicate on inverted + virtual Status evaluate(const Schema& schema, InvertedIndexIterator* iterator, + uint32_t num_rows, roaring::Roaring* bitmap) const { + return Status::NotSupported( + "Not Implemented evaluate with inverted index, please check the predicate"); + } + // evaluate predicate on IColumn // a short circuit eval way virtual uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel, @@ -179,6 +189,7 @@ public: } std::shared_ptr predicate_params() { return _predicate_params; } + const std::string pred_type_string(PredicateType type) { switch (type) { case PredicateType::EQ: @@ -203,6 +214,8 @@ public: return "is_not_null"; case PredicateType::BF: return "bf"; + case PredicateType::MATCH: + return "match"; default: return "unknown"; } diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index 72bf91ea9f..91ad1705e5 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -21,6 +21,7 @@ #include "olap/column_predicate.h" #include "olap/rowset/segment_v2/bloom_filter.h" +#include "olap/rowset/segment_v2/inverted_index_reader.h" #include "olap/wrapper_field.h" #include "vec/columns/column_dictionary.h" @@ -58,6 +59,51 @@ public: bitmap); } + Status evaluate(const Schema& schema, InvertedIndexIterator* iterator, uint32_t num_rows, + roaring::Roaring* bitmap) const override { + if (iterator == nullptr) { + return Status::OK(); + } + auto column_desc = schema.column(_column_id); + std::string column_name = column_desc->name(); + + InvertedIndexQueryType query_type; + switch (PT) { + case PredicateType::EQ: + query_type = InvertedIndexQueryType::EQUAL_QUERY; + break; + case PredicateType::NE: + query_type = InvertedIndexQueryType::EQUAL_QUERY; + break; + case PredicateType::LT: + query_type = InvertedIndexQueryType::LESS_THAN_QUERY; + break; + case PredicateType::LE: + query_type = InvertedIndexQueryType::LESS_EQUAL_QUERY; + break; + case PredicateType::GT: + query_type = InvertedIndexQueryType::GREATER_THAN_QUERY; + break; + case PredicateType::GE: + query_type = InvertedIndexQueryType::GREATER_EQUAL_QUERY; + break; + default: + return Status::InvalidArgument("invalid comparison predicate type {}", PT); + } + + roaring::Roaring roaring; + RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &_value, query_type, + num_rows, &roaring)); + + if constexpr (PT == PredicateType::NE) { + *bitmap -= roaring; + } else { + *bitmap &= roaring; + } + + return Status::OK(); + } + uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel, uint16_t size) const override { if (column.is_nullable()) { diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index 861ba5612d..3e46d63f39 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -27,6 +27,7 @@ #include "olap/column_predicate.h" #include "olap/olap_common.h" #include "olap/rowset/segment_v2/bloom_filter.h" +#include "olap/rowset/segment_v2/inverted_index_reader.h" #include "olap/wrapper_field.h" #include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" @@ -217,6 +218,29 @@ public: return Status::OK(); } + Status evaluate(const Schema& schema, InvertedIndexIterator* iterator, uint32_t num_rows, + roaring::Roaring* result) const override { + if (iterator == nullptr) { + return Status::OK(); + } + auto column_desc = schema.column(_column_id); + std::string column_name = column_desc->name(); + roaring::Roaring indices; + for (auto value : *_values) { + InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY; + roaring::Roaring index; + RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &value, query_type, + num_rows, &index)); + indices |= index; + } + if constexpr (PT == PredicateType::IN_LIST) { + *result &= indices; + } else { + *result -= indices; + } + return Status::OK(); + } + uint16_t evaluate(const vectorized::IColumn& column, uint16_t* sel, uint16_t size) const override { if (column.is_nullable()) { diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 41f9c2132a..0cece99d6c 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -332,6 +332,11 @@ struct OlapReaderStatistics { int64_t rows_bitmap_index_filtered = 0; int64_t bitmap_index_filter_timer = 0; + + int64_t rows_inverted_index_filtered = 0; + int64_t inverted_index_filter_timer = 0; + + int64_t output_index_result_column_timer = 0; // number of segment filtered by column stat when creating seg iterator int64_t filtered_segment_number = 0; // total number of segment diff --git a/be/src/olap/predicate_creator.h b/be/src/olap/predicate_creator.h index 12d10e109f..df9c759c8a 100644 --- a/be/src/olap/predicate_creator.h +++ b/be/src/olap/predicate_creator.h @@ -20,6 +20,12 @@ #include #include +#include "exec/olap_utils.h" +#include "exprs/bloomfilter_predicate.h" +#include "exprs/create_predicate_function.h" +#include "exprs/hybrid_set.h" +#include "exprs/match_predicate.h" +#include "olap/bloom_filter_predicate.h" #include "olap/column_predicate.h" #include "olap/comparison_predicate.h" #include "olap/in_list_predicate.h" @@ -257,6 +263,9 @@ inline ColumnPredicate* parse_to_predicate(TabletSchemaSPtr tablet_schema, if (to_lower(condition.condition_op) == "is") { return new NullPredicate(index, to_lower(condition.condition_values[0]) == "null", opposite); + } else if (is_match_condition(condition.condition_op)) { + return new MatchPredicate(index, condition.condition_values[0], + to_match_type(condition.condition_op)); } if ((condition.condition_op == "*=" || condition.condition_op == "!*=") && diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 4ce70b1b0d..6f49aa23cf 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -142,6 +142,15 @@ Status ColumnReader::new_bitmap_index_iterator(BitmapIndexIterator** iterator) { return Status::OK(); } +Status ColumnReader::new_inverted_index_iterator(const TabletIndex* index_meta, + InvertedIndexIterator** iterator) { + RETURN_IF_ERROR(_ensure_inverted_index_loaded(index_meta)); + if (_inverted_index) { + RETURN_IF_ERROR(_inverted_index->new_iterator(index_meta, iterator)); + } + return Status::OK(); +} + Status ColumnReader::read_page(const ColumnIteratorOptions& iter_opts, const PagePointer& pp, PageHandle* handle, Slice* page_body, PageFooterPB* footer, BlockCompressionCodec* codec) const { @@ -361,6 +370,32 @@ Status ColumnReader::_load_bitmap_index(bool use_page_cache, bool kept_in_memory return Status::OK(); } +Status ColumnReader::_load_inverted_index_index(const TabletIndex* index_meta) { + std::lock_guard wlock(_load_index_lock); + + if (_inverted_index && index_meta && + _inverted_index->get_index_id() == index_meta->index_id()) { + return Status::OK(); + } + + FieldType type; + if ((FieldType)_meta.type() == FieldType::OLAP_FIELD_TYPE_ARRAY) { + type = (FieldType)_meta.children_columns(0).type(); + } else { + type = _type_info->type(); + } + + if (is_string_type(type)) { + // todo(wy): implement + } else if (is_numeric_type(type)) { + // todo(wy): implement + } else { + _inverted_index.reset(); + } + + return Status::OK(); +} + Status ColumnReader::_load_bloom_filter_index(bool use_page_cache, bool kept_in_memory) { if (_bf_index_meta != nullptr) { _bloom_filter_index.reset(new BloomFilterIndexReader(_file_reader, _bf_index_meta)); diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 5a86bdec10..c5dd729d82 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -31,10 +31,11 @@ #include "olap/iterators.h" #include "olap/rowset/segment_v2/bitmap_index_reader.h" // for BitmapIndexReader #include "olap/rowset/segment_v2/common.h" -#include "olap/rowset/segment_v2/ordinal_page_index.h" // for OrdinalPageIndexIterator -#include "olap/rowset/segment_v2/page_handle.h" // for PageHandle -#include "olap/rowset/segment_v2/parsed_page.h" // for ParsedPage -#include "olap/rowset/segment_v2/row_ranges.h" // for RowRanges +#include "olap/rowset/segment_v2/inverted_index_reader.h" // for InvertedIndexReader +#include "olap/rowset/segment_v2/ordinal_page_index.h" // for OrdinalPageIndexIterator +#include "olap/rowset/segment_v2/page_handle.h" // for PageHandle +#include "olap/rowset/segment_v2/parsed_page.h" // for ParsedPage +#include "olap/rowset/segment_v2/row_ranges.h" // for RowRanges #include "olap/rowset/segment_v2/zone_map_index.h" #include "olap/tablet_schema.h" #include "util/file_cache.h" @@ -105,6 +106,9 @@ public: // Client should delete returned iterator Status new_bitmap_index_iterator(BitmapIndexIterator** iterator); + Status new_inverted_index_iterator(const TabletIndex* index_meta, + InvertedIndexIterator** iterator); + // Seek to the first entry in the column. Status seek_to_first(OrdinalPageIndexIterator* iter); Status seek_at_or_before(ordinal_t ordinal, OrdinalPageIndexIterator* iter); @@ -174,9 +178,18 @@ private: }); } + // Read column inverted indexes into memory + // May be called multiple times, subsequent calls will no op. + Status _ensure_inverted_index_loaded(const TabletIndex* index_meta) { + // load inverted index only if not loaded or index_id is changed + RETURN_IF_ERROR(_load_inverted_index_index(index_meta)); + return Status::OK(); + } + Status _load_zone_map_index(bool use_page_cache, bool kept_in_memory); Status _load_ordinal_index(bool use_page_cache, bool kept_in_memory); Status _load_bitmap_index(bool use_page_cache, bool kept_in_memory); + Status _load_inverted_index_index(const TabletIndex* index_meta); Status _load_bloom_filter_index(bool use_page_cache, bool kept_in_memory); bool _zone_map_match_condition(const ZoneMapPB& zone_map, WrapperField* min_value_container, @@ -214,9 +227,11 @@ private: const BloomFilterIndexPB* _bf_index_meta = nullptr; DorisCallOnce _load_index_once; + mutable std::mutex _load_index_lock; std::unique_ptr _zone_map_index; std::unique_ptr _ordinal_index; std::unique_ptr _bitmap_index; + std::unique_ptr _inverted_index; std::unique_ptr _bloom_filter_index; std::vector> _sub_readers; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp new file mode 100644 index 0000000000..b406ea9163 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "olap/rowset/segment_v2/inverted_index_reader.h" + +#include "common/status.h" + +namespace doris { +namespace segment_v2 { + +bool InvertedIndexReader::indexExists(io::Path& index_file_path) { + bool exists = false; + RETURN_IF_ERROR(_fs->exists(index_file_path, &exists)); + return exists; +} + +Status InvertedIndexIterator::read_from_inverted_index(const std::string& column_name, + const void* query_value, + InvertedIndexQueryType query_type, + uint32_t segment_num_rows, + roaring::Roaring* bit_map) { + return Status::Error(); +} + +Status InvertedIndexIterator::try_read_from_inverted_index(const std::string& column_name, + const void* query_value, + InvertedIndexQueryType query_type, + uint32_t* count) { + return Status::Error(); +} + +InvertedIndexParserType InvertedIndexIterator::get_inverted_index_analyser_type() const { + return _analyser_type; +} + +InvertedIndexReaderType InvertedIndexIterator::get_inverted_index_reader_type() const { + return _reader->type(); +} + +} // namespace segment_v2 +} // namespace doris diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h new file mode 100644 index 0000000000..c2284fb019 --- /dev/null +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -0,0 +1,103 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/status.h" +#include "env/env.h" +#include "gen_cpp/segment_v2.pb.h" +#include "gutil/macros.h" +#include "io/fs/file_system.h" +#include "olap/inverted_index_parser.h" +#include "olap/olap_common.h" +#include "olap/rowset/segment_v2/common.h" +#include "olap/tablet_schema.h" + +namespace doris { + +namespace segment_v2 { + +class InvertedIndexIterator; + +enum class InvertedIndexReaderType { + UNKNOWN = -1, +}; + +enum class InvertedIndexQueryType { + UNKNOWN_QUERY = -1, + EQUAL_QUERY = 0, + LESS_THAN_QUERY = 1, + LESS_EQUAL_QUERY = 2, + GREATER_THAN_QUERY = 3, + GREATER_EQUAL_QUERY = 4, + MATCH_ANY_QUERY = 5, + MATCH_ALL_QUERY = 6, + MATCH_PHRASE_QUERY = 7, +}; + +class InvertedIndexReader { +public: + explicit InvertedIndexReader(io::FileSystem* fs, const std::string& path, + const uint32_t index_id) + : _fs(fs), _path(path), _index_id(index_id) {}; + virtual ~InvertedIndexReader() = default; + + // create a new column iterator. Client should delete returned iterator + virtual Status new_iterator(const TabletIndex* index_meta, + InvertedIndexIterator** iterator) = 0; + virtual Status query(const std::string& column_name, const void* query_value, + InvertedIndexQueryType query_type, InvertedIndexParserType analyser_type, + roaring::Roaring* bit_map) = 0; + virtual Status try_query(const std::string& column_name, const void* query_value, + InvertedIndexQueryType query_type, + InvertedIndexParserType analyser_type, uint32_t* count) = 0; + + virtual InvertedIndexReaderType type() = 0; + bool indexExists(io::Path& index_file_path); + uint32_t get_index_id() { return _index_id; } + +protected: + friend class InvertedIndexIterator; + io::FileSystem* _fs; + std::string _path; + uint32_t _index_id; +}; + +class InvertedIndexIterator { +public: + InvertedIndexIterator(InvertedIndexParserType analyser_type, InvertedIndexReader* reader) + : _reader(reader), _analyser_type(analyser_type) {} + + Status read_from_inverted_index(const std::string& column_name, const void* query_value, + InvertedIndexQueryType query_type, uint32_t segment_num_rows, + roaring::Roaring* bit_map); + Status try_read_from_inverted_index(const std::string& column_name, const void* query_value, + InvertedIndexQueryType query_type, uint32_t* count); + + InvertedIndexParserType get_inverted_index_analyser_type() const; + + InvertedIndexReaderType get_inverted_index_reader_type() const; + +private: + InvertedIndexReader* _reader; + InvertedIndexParserType _analyser_type; +}; + +} // namespace segment_v2 +} // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index cc489ef85c..76b1c82a0c 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -282,6 +282,16 @@ Status Segment::new_bitmap_index_iterator(const TabletColumn& tablet_column, return Status::OK(); } +Status Segment::new_inverted_index_iterator(const TabletColumn& tablet_column, + const TabletIndex* index_meta, + InvertedIndexIterator** iter) { + auto col_unique_id = tablet_column.unique_id(); + if (_column_readers.count(col_unique_id) > 0 && index_meta) { + return _column_readers.at(col_unique_id)->new_inverted_index_iterator(index_meta, iter); + } + return Status::OK(); +} + Status Segment::lookup_row_key(const Slice& key, RowLocation* row_location) { RETURN_IF_ERROR(load_pk_index_and_bf()); bool has_seq_col = _tablet_schema->has_sequence_col(); diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index f2fa40bb43..70d3450901 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -80,6 +80,9 @@ public: Status new_bitmap_index_iterator(const TabletColumn& tablet_column, BitmapIndexIterator** iter); + Status new_inverted_index_iterator(const TabletColumn& tablet_column, + const TabletIndex* index_meta, InvertedIndexIterator** iter); + const ShortKeyIndexDecoder* get_short_key_index() const { DCHECK(_load_index_once.has_called() && _load_index_once.stored_result().ok()); return _sk_index_decoder.get(); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index e6f13ee956..f6da5ce4be 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -21,6 +21,7 @@ #include #include +#include "common/consts.h" #include "common/status.h" #include "olap/column_predicate.h" #include "olap/olap_common.h" @@ -160,6 +161,9 @@ SegmentIterator::~SegmentIterator() { for (auto iter : _bitmap_index_iterators) { delete iter.second; } + for (auto iter : _inverted_index_iterators) { + delete iter.second; + } } Status SegmentIterator::init(const StorageReadOptions& opts) { @@ -187,6 +191,7 @@ Status SegmentIterator::_init() { _row_bitmap.addRange(0, _segment->num_rows()); RETURN_IF_ERROR(_init_return_column_iterators()); RETURN_IF_ERROR(_init_bitmap_index_iterators()); + RETURN_IF_ERROR(_init_inverted_index_iterators()); // z-order can not use prefix index if (_segment->_tablet_schema->sort_type() != SortType::ZORDER) { RETURN_IF_ERROR(_get_row_ranges_by_keys()); @@ -311,6 +316,7 @@ Status SegmentIterator::_get_row_ranges_by_column_conditions() { } RETURN_IF_ERROR(_apply_bitmap_index()); + RETURN_IF_ERROR(_apply_inverted_index()); if (!_row_bitmap.isEmpty() && (!_opts.col_id_to_predicates.empty() || @@ -434,7 +440,7 @@ Status SegmentIterator::_execute_predicates_except_leafnode_of_andnode(vectorize } else if (node_type == TExprNodeType::BINARY_PRED) { _column_predicate_info->query_op = expr->fn().name.function_name; // get child condition result in compound condtions - auto pred_result_sign = _gen_predicate_sign(_column_predicate_info.get()); + auto pred_result_sign = _gen_predicate_result_sign(_column_predicate_info.get()); _column_predicate_info.reset(new ColumnPredicateInfo()); if (_rowid_result_for_index.count(pred_result_sign) > 0 && _rowid_result_for_index[pred_result_sign].first) { @@ -479,7 +485,7 @@ Status SegmentIterator::_execute_compound_fn(const std::string& function_name) { bool SegmentIterator::_can_filter_by_preds_except_leafnode_of_andnode() { for (auto pred : _col_preds_except_leafnode_of_andnode) { - if (!_check_apply_by_bitmap_index(pred)) { + if (!_check_apply_by_bitmap_index(pred) && !_check_apply_by_inverted_index(pred)) { return false; } } @@ -496,6 +502,19 @@ bool SegmentIterator::_check_apply_by_bitmap_index(ColumnPredicate* pred) { return true; } +bool SegmentIterator::_check_apply_by_inverted_index(ColumnPredicate* pred) { + bool handle_by_fulltext = _is_handle_predicate_by_fulltext(pred); + int32_t unique_id = _schema.unique_id(pred->column_id()); + if (_inverted_index_iterators.count(unique_id) < 1 || + _inverted_index_iterators[unique_id] == nullptr || + (pred->type() != PredicateType::MATCH && handle_by_fulltext)) { + // 1. this column without inverted index + // 2. equal or range qeury for fulltext index + return false; + } + return true; +} + Status SegmentIterator::_apply_bitmap_index_except_leafnode_of_andnode( ColumnPredicate* pred, roaring::Roaring* output_result) { int32_t unique_id = _schema.unique_id(pred->column_id()); @@ -504,6 +523,14 @@ Status SegmentIterator::_apply_bitmap_index_except_leafnode_of_andnode( return Status::OK(); } +Status SegmentIterator::_apply_inverted_index_except_leafnode_of_andnode( + ColumnPredicate* pred, roaring::Roaring* output_result) { + int32_t unique_id = _schema.unique_id(pred->column_id()); + RETURN_IF_ERROR(pred->evaluate(_schema, _inverted_index_iterators[unique_id], num_rows(), + output_result)); + return Status::OK(); +} + Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { for (auto pred : _col_preds_except_leafnode_of_andnode) { auto pred_type = pred->type(); @@ -515,10 +542,13 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { } bool can_apply_by_bitmap_index = _check_apply_by_bitmap_index(pred); + bool can_apply_by_inverted_index = _check_apply_by_inverted_index(pred); roaring::Roaring bitmap = _row_bitmap; Status res = Status::OK(); if (can_apply_by_bitmap_index) { res = _apply_bitmap_index_except_leafnode_of_andnode(pred, &bitmap); + } else if (can_apply_by_inverted_index) { + res = _apply_inverted_index_except_leafnode_of_andnode(pred, &bitmap); } else { continue; } @@ -530,7 +560,7 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { return res; } - std::string pred_result_sign = _gen_predicate_sign(pred); + std::string pred_result_sign = _gen_predicate_result_sign(pred); _rowid_result_for_index.emplace( std::make_pair(pred_result_sign, std::make_pair(true, bitmap))); } @@ -538,25 +568,82 @@ Status SegmentIterator::_apply_index_except_leafnode_of_andnode() { return Status::OK(); } -std::string SegmentIterator::_gen_predicate_sign(ColumnPredicate* predicate) { +std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicate* predicate) { std::string pred_result_sign; auto column_desc = _schema.column(predicate->column_id()); auto pred_type = predicate->type(); auto predicate_params = predicate->predicate_params(); - pred_result_sign = column_desc->name() + "_" + predicate->pred_type_string(pred_type) + "_" + - predicate_params->value; + pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_desc->name() + "_" + + predicate->pred_type_string(pred_type) + "_" + predicate_params->value; return pred_result_sign; } -std::string SegmentIterator::_gen_predicate_sign(ColumnPredicateInfo* predicate_info) { +std::string SegmentIterator::_gen_predicate_result_sign(ColumnPredicateInfo* predicate_info) { std::string pred_result_sign; - pred_result_sign = predicate_info->column_name + "_" + predicate_info->query_op + "_" + - predicate_info->query_value; + pred_result_sign = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + predicate_info->column_name + "_" + + predicate_info->query_op + "_" + predicate_info->query_value; return pred_result_sign; } +bool SegmentIterator::_is_handle_predicate_by_fulltext(ColumnPredicate* predicate) { + auto column_id = predicate->column_id(); + int32_t unique_id = _schema.unique_id(column_id); + bool handle_by_fulltext = + (_inverted_index_iterators[unique_id] != nullptr) && + (is_string_type(_schema.column(column_id)->type())) && + ((_inverted_index_iterators[unique_id]->get_inverted_index_analyser_type() == + InvertedIndexParserType::PARSER_ENGLISH) || + (_inverted_index_iterators[unique_id]->get_inverted_index_analyser_type() == + InvertedIndexParserType::PARSER_STANDARD)); + + return handle_by_fulltext; +} + +Status SegmentIterator::_apply_inverted_index() { + SCOPED_RAW_TIMER(&_opts.stats->inverted_index_filter_timer); + size_t input_rows = _row_bitmap.cardinality(); + std::vector remaining_predicates; + + for (auto pred : _col_predicates) { + bool handle_by_fulltext = _is_handle_predicate_by_fulltext(pred); + int32_t unique_id = _schema.unique_id(pred->column_id()); + if (_inverted_index_iterators.count(unique_id) < 1 || + _inverted_index_iterators[unique_id] == nullptr || + (pred->type() != PredicateType::MATCH && handle_by_fulltext)) { + // 1. this column no inverted index + // 2. equal or range for fulltext index + remaining_predicates.push_back(pred); + } else { + roaring::Roaring bitmap = _row_bitmap; + Status res = pred->evaluate(_schema, _inverted_index_iterators[unique_id], num_rows(), + &bitmap); + if (!res.ok()) { + LOG(WARNING) << "failed to evaluate index" + << ", column predicate type: " << pred->pred_type_string(pred->type()) + << ", error msg: " << res.code_as_string(); + return res; + } + + auto pred_type = pred->type(); + if (pred_type == PredicateType::MATCH) { + std::string pred_result_sign = _gen_predicate_result_sign(pred); + _rowid_result_for_index.emplace( + std::make_pair(pred_result_sign, std::make_pair(false, bitmap))); + } + + _row_bitmap &= bitmap; + if (_row_bitmap.isEmpty()) { + break; // all rows have been pruned, no need to process further predicates + } + } + } + _col_predicates = std::move(remaining_predicates); + _opts.stats->rows_inverted_index_filtered += (input_rows - _row_bitmap.cardinality()); + return Status::OK(); +} + Status SegmentIterator::_init_return_column_iterators() { if (_cur_rowid >= num_rows()) { return Status::OK(); @@ -591,6 +678,21 @@ Status SegmentIterator::_init_bitmap_index_iterators() { return Status::OK(); } +Status SegmentIterator::_init_inverted_index_iterators() { + if (_cur_rowid >= num_rows()) { + return Status::OK(); + } + for (auto cid : _schema.column_ids()) { + int32_t unique_id = _schema.unique_id(cid); + if (_inverted_index_iterators.count(unique_id) < 1) { + RETURN_IF_ERROR(_segment->new_inverted_index_iterator( + _opts.tablet_schema->column(cid), _opts.tablet_schema->get_inverted_index(cid), + &_inverted_index_iterators[unique_id])); + } + } + return Status::OK(); +} + Status SegmentIterator::_lookup_ordinal(const RowCursor& key, bool is_include, rowid_t upper_bound, rowid_t* rowid) { if (_segment->_tablet_schema->keys_type() == UNIQUE_KEYS && @@ -1040,9 +1142,7 @@ Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32 nrows_read += rows_to_read; } - auto next_range = RowRanges::create_single(range_from, range_to); - auto next_bm_in_row_bitmap = RowRanges::ranges_to_roaring(next_range); - _split_row_ranges.emplace_back(next_bm_in_row_bitmap); + _split_row_ranges.emplace_back(std::pair {range_from, range_to}); // if _opts.read_orderby_key_reverse is true, only read one range for fast reverse purpose } while (nrows_read < nrows_read_limit && !_opts.read_orderby_key_reverse); return Status::OK(); @@ -1189,6 +1289,7 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { nrows_read_limit = 100; } _split_row_ranges.clear(); + _split_row_ranges.reserve(nrows_read_limit / 2); _read_columns_by_index(nrows_read_limit, _current_batch_rows_read, _lazy_materialization_read || _opts.record_rowids); @@ -1293,6 +1394,7 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { void SegmentIterator::_output_index_result_column(uint16_t* sel_rowid_idx, uint16_t select_size, vectorized::Block* block) { + SCOPED_RAW_TIMER(&_opts.stats->output_index_result_column_timer); if (block->rows() == 0) { return; } @@ -1302,6 +1404,8 @@ void SegmentIterator::_output_index_result_column(uint16_t* sel_rowid_idx, uint1 std::make_shared(), iter.first}); if (!iter.second.first) { // predicate not in compound query + block->get_by_name(iter.first).column = + vectorized::DataTypeUInt8().create_column_const(block->rows(), 1u); continue; } _build_index_result_column(sel_rowid_idx, select_size, block, iter.first, @@ -1319,8 +1423,12 @@ void SegmentIterator::_build_index_result_column(uint16_t* sel_rowid_idx, uint16 size_t idx_in_block = 0; size_t idx_in_row_range = 0; size_t idx_in_selected = 0; + // _split_row_ranges store multiple ranges which split in function _read_columns_by_index(), + // index_result is a column predicate apply result in a whole segement, + // but a scanner thread one time can read max rows limit by block_row_max, + // so split _row_bitmap by one time scan range, in order to match size of one scanner thread read rows. for (auto origin_row_range : _split_row_ranges) { - for (auto rowid : origin_row_range) { + for (size_t rowid = origin_row_range.first; rowid < origin_row_range.second; ++rowid) { if (sel_rowid_idx == nullptr || (idx_in_selected < select_size && idx_in_row_range == sel_rowid_idx[idx_in_selected])) { if (index_result.contains(rowid)) { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h index ee056a2753..2e60393186 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.h +++ b/be/src/olap/rowset/segment_v2/segment_iterator.h @@ -27,6 +27,7 @@ #include "olap/olap_common.h" #include "olap/row_cursor.h" #include "olap/rowset/segment_v2/common.h" +#include "olap/rowset/segment_v2/inverted_index_reader.h" #include "olap/rowset/segment_v2/row_ranges.h" #include "olap/rowset/segment_v2/segment.h" #include "olap/schema.h" @@ -106,6 +107,7 @@ private: Status _init_return_column_iterators(); Status _init_bitmap_index_iterators(); + Status _init_inverted_index_iterators(); // calculate row ranges that fall into requested key ranges using short key index Status _get_row_ranges_by_keys(); @@ -123,11 +125,14 @@ private: Status _get_row_ranges_by_column_conditions(); Status _get_row_ranges_from_conditions(RowRanges* condition_row_ranges); Status _apply_bitmap_index(); + Status _apply_inverted_index(); Status _apply_index_except_leafnode_of_andnode(); Status _apply_bitmap_index_except_leafnode_of_andnode(ColumnPredicate* pred, roaring::Roaring* output_result); - + Status _apply_inverted_index_except_leafnode_of_andnode(ColumnPredicate* pred, + roaring::Roaring* output_result); + bool _is_handle_predicate_by_fulltext(ColumnPredicate* predicate); bool _can_filter_by_preds_except_leafnode_of_andnode(); Status _execute_predicates_except_leafnode_of_andnode(vectorized::VExpr* expr); Status _execute_compound_fn(const std::string& function_name); @@ -185,9 +190,10 @@ private: void _update_max_row(const vectorized::Block* block); bool _check_apply_by_bitmap_index(ColumnPredicate* pred); + bool _check_apply_by_inverted_index(ColumnPredicate* pred); - std::string _gen_predicate_sign(ColumnPredicate* predicate); - std::string _gen_predicate_sign(ColumnPredicateInfo* predicate_info); + std::string _gen_predicate_result_sign(ColumnPredicate* predicate); + std::string _gen_predicate_result_sign(ColumnPredicateInfo* predicate_info); void _build_index_result_column(uint16_t* sel_rowid_idx, uint16_t select_size, vectorized::Block* block, const std::string& pred_result_sign, @@ -259,11 +265,12 @@ private: // can use _schema get unique_id by cid std::map _column_iterators; std::map _bitmap_index_iterators; + std::map _inverted_index_iterators; // after init(), `_row_bitmap` contains all rowid to scan roaring::Roaring _row_bitmap; // "column_name+operator+value-> - std::unordered_map > _rowid_result_for_index; - std::vector _split_row_ranges; + std::unordered_map> _rowid_result_for_index; + std::vector> _split_row_ranges; // an iterator for `_row_bitmap` that can be used to extract row range to scan std::unique_ptr _range_iter; // the next rowid to read diff --git a/be/src/olap/utils.h b/be/src/olap/utils.h index 8998a82a42..964b973569 100644 --- a/be/src/olap/utils.h +++ b/be/src/olap/utils.h @@ -272,6 +272,22 @@ bool valid_datetime(const std::string& value_str, const uint32_t scale); bool valid_bool(const std::string& value_str); +constexpr bool is_string_type(const FieldType& field_type) { + return field_type == OLAP_FIELD_TYPE_VARCHAR || field_type == OLAP_FIELD_TYPE_CHAR || + field_type == OLAP_FIELD_TYPE_STRING; +} + +constexpr bool is_numeric_type(const FieldType& field_type) { + return field_type == OLAP_FIELD_TYPE_INT || field_type == OLAP_FIELD_TYPE_UNSIGNED_INT || + field_type == OLAP_FIELD_TYPE_BIGINT || field_type == OLAP_FIELD_TYPE_SMALLINT || + field_type == OLAP_FIELD_TYPE_UNSIGNED_TINYINT || + field_type == OLAP_FIELD_TYPE_UNSIGNED_SMALLINT || + field_type == OLAP_FIELD_TYPE_TINYINT || field_type == OLAP_FIELD_TYPE_DOUBLE || + field_type == OLAP_FIELD_TYPE_FLOAT || field_type == OLAP_FIELD_TYPE_DATE || + field_type == OLAP_FIELD_TYPE_DATETIME || field_type == OLAP_FIELD_TYPE_LARGEINT || + field_type == OLAP_FIELD_TYPE_DECIMAL || field_type == OLAP_FIELD_TYPE_BOOL; +} + // Util used to get string name of thrift enum item #define EnumToString(enum_type, index, out) \ do { \ diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 0d845b19dc..621bd2af2c 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -227,6 +227,7 @@ set(VEC_FILES functions/functions_multi_string_position.cpp functions/functions_multi_string_search.cpp functions/function_running_difference.cpp + functions/match.cpp olap/vgeneric_iterators.cpp olap/vcollect_iterator.cpp olap/block_reader.cpp diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index 891d53cf53..a84443d38d 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -109,6 +109,12 @@ Status NewOlapScanNode::_init_profile() { ADD_COUNTER(_segment_profile, "RowsBitmapIndexFiltered", TUnit::UNIT); _bitmap_index_filter_timer = ADD_TIMER(_segment_profile, "BitmapIndexFilterTimer"); + _inverted_index_filter_counter = + ADD_COUNTER(_segment_profile, "RowsInvertedIndexFiltered", TUnit::UNIT); + _inverted_index_filter_timer = ADD_TIMER(_segment_profile, "InvertedIndexFilterTimer"); + + _output_index_result_column_timer = ADD_TIMER(_segment_profile, "OutputIndexResultColumnTimer"); + _filtered_segment_counter = ADD_COUNTER(_segment_profile, "NumSegmentFiltered", TUnit::UNIT); _total_segment_counter = ADD_COUNTER(_segment_profile, "NumSegmentTotal", TUnit::UNIT); diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h b/be/src/vec/exec/scan/new_olap_scan_node.h index 3d37f2c5d8..af3a7d0ea5 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.h +++ b/be/src/vec/exec/scan/new_olap_scan_node.h @@ -126,6 +126,12 @@ private: RuntimeProfile::Counter* _bitmap_index_filter_counter = nullptr; // time fro bitmap inverted index read and filter RuntimeProfile::Counter* _bitmap_index_filter_timer = nullptr; + + RuntimeProfile::Counter* _inverted_index_filter_counter = nullptr; + RuntimeProfile::Counter* _inverted_index_filter_timer = nullptr; + + RuntimeProfile::Counter* _output_index_result_column_timer = nullptr; + // number of created olap scanners RuntimeProfile::Counter* _num_scanners = nullptr; diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 93919e2d41..2f5aa41ab7 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -177,6 +177,12 @@ Status NewOlapScanner::_init_tablet_reader_params( // Condition for (auto& filter : filters) { + if (is_match_condition(filter.condition_op) && + !_tablet_schema->has_inverted_index( + _tablet_schema->column(filter.column_name).unique_id())) { + return Status::NotSupported("Match query must with inverted index, column `" + + filter.column_name + "` is not inverted index column"); + } _tablet_reader_params.conditions.push_back(filter); } @@ -428,6 +434,12 @@ void NewOlapScanner::_update_counters_before_close() { COUNTER_UPDATE(olap_parent->_bitmap_index_filter_counter, stats.rows_bitmap_index_filtered); COUNTER_UPDATE(olap_parent->_bitmap_index_filter_timer, stats.bitmap_index_filter_timer); + COUNTER_UPDATE(olap_parent->_inverted_index_filter_counter, stats.rows_inverted_index_filtered); + COUNTER_UPDATE(olap_parent->_inverted_index_filter_timer, stats.inverted_index_filter_timer); + + COUNTER_UPDATE(olap_parent->_output_index_result_column_timer, + stats.output_index_result_column_timer); + COUNTER_UPDATE(olap_parent->_filtered_segment_counter, stats.filtered_segment_number); COUNTER_UPDATE(olap_parent->_total_segment_counter, stats.total_segment_number); diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index 2445a5d9c9..b88a2d9546 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -17,9 +17,11 @@ #include "vec/exec/scan/vscan_node.h" +#include "common/consts.h" #include "common/status.h" #include "exprs/hybrid_set.h" #include "runtime/runtime_filter_mgr.h" +#include "util/defer_op.h" #include "util/runtime_profile.h" #include "vec/columns/column_const.h" #include "vec/exec/scan/pip_scanner_context.h" @@ -124,6 +126,18 @@ Status VScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool* SCOPED_TIMER(_get_next_timer); SCOPED_TIMER(_runtime_profile->total_time_counter()); SCOPED_CONSUME_MEM_TRACKER(mem_tracker_growh()); + // in inverted index apply logic, in order to optimize query performance, + // we built some temporary columns into block, these columns only used in scan node level, + // remove them when query leave scan node to avoid other nodes use block->columns() to make a wrong decision + Defer drop_block_temp_column {[&]() { + auto all_column_names = block->get_names(); + for (auto& name : all_column_names) { + if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) { + block->erase(name); + } + } + }}; + if (state->is_cancelled()) { _scanner_ctx->set_status_on_error(Status::Cancelled("query cancelled")); return _scanner_ctx->status(); @@ -491,6 +505,9 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output RETURN_IF_PUSH_DOWN(_normalize_noneq_binary_predicate( cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range, &pdt)); + RETURN_IF_PUSH_DOWN(_normalize_match_predicate( + cur_expr, *(_vconjunct_ctx_ptr.get()), slot, value_range, + &pdt)); if (_is_key_column(slot->col_name())) { RETURN_IF_PUSH_DOWN(_normalize_bitmap_filter( cur_expr, *(_vconjunct_ctx_ptr.get()), slot, &pdt)); @@ -1013,7 +1030,48 @@ Status VScanNode::_normalize_binary_in_compound_predicate(vectorized::VExpr* exp *pdt = PushDownType::ACCEPTABLE; } } + return Status::OK(); +} +template +Status VScanNode::_normalize_match_predicate(VExpr* expr, VExprContext* expr_ctx, + SlotDescriptor* slot, ColumnValueRange& range, + PushDownType* pdt) { + if (TExprNodeType::MATCH_PRED == expr->node_type()) { + DCHECK(expr->children().size() == 2); + + // create empty range as temp range, temp range should do intersection on range + auto temp_range = ColumnValueRange::create_empty_column_value_range( + slot->type().precision, slot->type().scale); + // Normalize match conjuncts like 'where col match value' + + auto match_checker = [](const std::string& fn_name) { return fn_name == "match"; }; + StringRef value; + int slot_ref_child = -1; + PushDownType temp_pdt; + RETURN_IF_ERROR(_should_push_down_binary_predicate( + reinterpret_cast(expr), expr_ctx, &value, &slot_ref_child, + match_checker, temp_pdt)); + if (temp_pdt != PushDownType::UNACCEPTABLE) { + DCHECK(slot_ref_child >= 0); + if (value.data != nullptr) { + using CppType = typename PrimitiveTypeTraits::CppType; + if constexpr (T == TYPE_CHAR || T == TYPE_VARCHAR || T == TYPE_STRING || + T == TYPE_HLL) { + auto val = StringValue(value.data, value.size); + ColumnValueRange::add_match_value_range(temp_range, + to_match_type(expr->op()), + reinterpret_cast(&val)); + } else { + ColumnValueRange::add_match_value_range( + temp_range, to_match_type(expr->op()), + reinterpret_cast(const_cast(value.data))); + } + range.intersection(temp_range); + } + *pdt = temp_pdt; + } + } return Status::OK(); } diff --git a/be/src/vec/exec/scan/vscan_node.h b/be/src/vec/exec/scan/vscan_node.h index 836d6a67da..69089f0e41 100644 --- a/be/src/vec/exec/scan/vscan_node.h +++ b/be/src/vec/exec/scan/vscan_node.h @@ -339,6 +339,11 @@ private: SlotDescriptor* slot, ColumnValueRange& range, PushDownType* pdt); + template + Status _normalize_match_predicate(vectorized::VExpr* expr, VExprContext* expr_ctx, + SlotDescriptor* slot, ColumnValueRange& range, + PushDownType* pdt); + template static Status _change_value_range(ColumnValueRange& range, void* value, const ChangeFixedValueRangeFunc& func, diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index cfb2657ff5..dc5d0c1eec 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -19,6 +19,7 @@ #include +#include "common/consts.h" #include "common/status.h" #include "exprs/anyval_util.h" #include "exprs/rpc_fn.h" @@ -125,7 +126,8 @@ bool VectorizedFnCall::fast_execute(FunctionContext* context, Block& block, size_t input_rows_count) { auto query_value = block.get_by_position(arguments[1]).to_string(0); std::string column_name = block.get_by_position(arguments[0]).name; - auto result_column_name = column_name + "_" + _function->get_name() + "_" + query_value; + auto result_column_name = BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_" + + _function->get_name() + "_" + query_value; if (!block.has(result_column_name)) { return false; } diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 12af55f060..9ad3ccc7e8 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -45,6 +45,7 @@ using doris::TypeDescriptor; VExpr::VExpr(const doris::TExprNode& node) : _node_type(node.node_type), + _opcode(node.__isset.opcode ? node.opcode : TExprOpcode::INVALID_OPCODE), _type(TypeDescriptor::from_thrift(node.type)), _fn_context_index(-1), _prepared(false) { @@ -61,6 +62,7 @@ VExpr::VExpr(const doris::TExprNode& node) VExpr::VExpr(const VExpr& vexpr) : _node_type(vexpr._node_type), + _opcode(vexpr._opcode), _type(vexpr._type), _data_type(vexpr._data_type), _children(vexpr._children), @@ -70,7 +72,10 @@ VExpr::VExpr(const VExpr& vexpr) _prepared(vexpr._prepared) {} VExpr::VExpr(const TypeDescriptor& type, bool is_slotref, bool is_nullable) - : _type(type), _fn_context_index(-1), _prepared(false) { + : _opcode(TExprOpcode::INVALID_OPCODE), + _type(type), + _fn_context_index(-1), + _prepared(false) { if (is_slotref) { _node_type = TExprNodeType::SLOT_REF; } @@ -130,7 +135,8 @@ Status VExpr::create_expr(doris::ObjectPool* pool, const doris::TExprNode& texpr case doris::TExprNodeType::ARITHMETIC_EXPR: case doris::TExprNodeType::BINARY_PRED: case doris::TExprNodeType::FUNCTION_CALL: - case doris::TExprNodeType::COMPUTE_FUNCTION_CALL: { + case doris::TExprNodeType::COMPUTE_FUNCTION_CALL: + case doris::TExprNodeType::MATCH_PRED: { *expr = pool->add(new VectorizedFnCall(texpr_node)); break; } diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index b1f37477c4..9d0e5ce42d 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -101,6 +101,8 @@ public: TExprNodeType::type node_type() const { return _node_type; } + TExprOpcode::type op() const { return _opcode; } + void add_child(VExpr* expr) { _children.push_back(expr); } static Status create_expr_tree(ObjectPool* pool, const TExpr& texpr, VExprContext** ctx); @@ -203,6 +205,8 @@ protected: const FunctionBasePtr& function) const; TExprNodeType::type _node_type; + // Used to check what opcode + TExprOpcode::type _opcode; TypeDescriptor _type; DataTypePtr _data_type; std::vector _children; diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp new file mode 100644 index 0000000000..bd9ad66503 --- /dev/null +++ b/be/src/vec/functions/match.cpp @@ -0,0 +1,141 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "common/consts.h" +#include "common/logging.h" +#include "vec/columns/column_string.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris::vectorized { + +class FunctionMatchBase : public IFunction { +public: + size_t get_number_of_arguments() const override { return 2; } + + String get_name() const override { return "match"; } + + /// Get result types by argument types. If the function does not apply to these arguments, throw an exception. + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + auto match_query_str = block.get_by_position(arguments[1]).to_string(0); + std::string column_name = block.get_by_position(arguments[0]).name; + auto match_pred_column_name = + BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_match_" + match_query_str; + if (!block.has(match_pred_column_name)) { + if (!config::enable_storage_vectorization) { + return Status::Cancelled( + "please check whether turn on the configuration " + "'enable_storage_vectorization'"); + } + LOG(WARNING) << "execute match query meet error, block no column: " + << match_pred_column_name; + return Status::InternalError( + "match query meet error, no match predicate evaluate result column in block."); + } + auto match_pred_column = + block.get_by_name(match_pred_column_name).column->convert_to_full_column_if_const(); + + block.replace_by_position(result, std::move(match_pred_column)); + return Status::OK(); + } +}; + +class FunctionMatchAny : public FunctionMatchBase { +public: + static constexpr auto name = "match_any"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } +}; + +class FunctionMatchAll : public FunctionMatchBase { +public: + static constexpr auto name = "match_all"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } +}; + +class FunctionMatchPhrase : public FunctionMatchBase { +public: + static constexpr auto name = "match_phrase"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } +}; + +class FunctionMatchElementEQ : public FunctionMatchBase { +public: + static constexpr auto name = "match_element_eq"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } +}; + +class FunctionMatchElementLT : public FunctionMatchBase { +public: + static constexpr auto name = "match_element_lt"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } +}; + +class FunctionMatchElementGT : public FunctionMatchBase { +public: + static constexpr auto name = "match_element_gt"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } +}; + +class FunctionMatchElementLE : public FunctionMatchBase { +public: + static constexpr auto name = "match_element_le"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } +}; + +class FunctionMatchElementGE : public FunctionMatchBase { +public: + static constexpr auto name = "match_element_ge"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } +}; + +void register_function_match(SimpleFunctionFactory& factory) { + factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); + factory.register_function(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index 0839870a67..e311c725a5 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -86,6 +86,7 @@ void register_function_multi_string_search(SimpleFunctionFactory& factory); void register_function_encryption(SimpleFunctionFactory& factory); void register_function_regexp_extract(SimpleFunctionFactory& factory); void register_function_hex_variadic(SimpleFunctionFactory& factory); +void register_function_match(SimpleFunctionFactory& factory); void register_function_url(SimpleFunctionFactory& factory); @@ -228,6 +229,7 @@ public: register_function_url(instance); register_function_multi_string_position(instance); register_function_multi_string_search(instance); + register_function_match(instance); }); return instance; } diff --git a/be/src/vec/olap/vcollect_iterator.cpp b/be/src/vec/olap/vcollect_iterator.cpp index fafb653715..30f3924f72 100644 --- a/be/src/vec/olap/vcollect_iterator.cpp +++ b/be/src/vec/olap/vcollect_iterator.cpp @@ -468,6 +468,11 @@ Status VCollectIterator::Level1Iterator::_merge_next(Block* block) { IteratorRowRef cur_row = _ref; IteratorRowRef pre_row_ref = _ref; + // append extra columns (eg. MATCH pred result column) from src_block to block + for (size_t i = block->columns(); i < cur_row.block->columns(); ++i) { + block->insert(cur_row.block->get_by_position(i).clone_empty()); + } + if (UNLIKELY(_reader->_reader_context.record_rowids)) { _block_row_locations.resize(_batch_size); } diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 8f6aaa63b8..b8b4ce56b3 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -446,6 +446,15 @@ terminal String KW_MINUTE, KW_MODIFY, KW_MONTH, + KW_MATCH, + KW_MATCH_ANY, + KW_MATCH_ALL, + KW_MATCH_PHRASE, + KW_MATCH_ELEMENT_EQ, + KW_MATCH_ELEMENT_LT, + KW_MATCH_ELEMENT_GT, + KW_MATCH_ELEMENT_LE, + KW_MATCH_ELEMENT_GE, KW_NAME, KW_NAMES, KW_NATURAL, @@ -698,7 +707,7 @@ nonterminal Expr where_clause; nonterminal Expr delete_on_clause; nonterminal String sequence_col_clause; nonterminal Predicate predicate, between_predicate, comparison_predicate, - compound_predicate, in_predicate, like_predicate, exists_predicate; + compound_predicate, in_predicate, like_predicate, exists_predicate, match_predicate; nonterminal ArrayList opt_partition_by_clause; nonterminal Expr having_clause; nonterminal ArrayList order_by_elements, order_by_clause; @@ -899,6 +908,7 @@ precedence left KW_AND; precedence left KW_NOT, NOT; precedence left KW_BETWEEN, KW_IN, KW_IS, KW_EXISTS; precedence left KW_LIKE, KW_REGEXP; +precedence left KW_MATCH_ANY, KW_MATCH_ALL, KW_MATCH_PHRASE, KW_MATCH, KW_MATCH_ELEMENT_EQ, KW_MATCH_ELEMENT_LT, KW_MATCH_ELEMENT_GT, KW_MATCH_ELEMENT_LE, KW_MATCH_ELEMENT_GE; precedence left EQUAL, LESSTHAN, GREATERTHAN; precedence left ADD, SUBTRACT; precedence left AT, STAR, DIVIDE, MOD, KW_DIV; @@ -6150,6 +6160,8 @@ predicate ::= {: RESULT = p; :} | like_predicate:p {: RESULT = p; :} + | match_predicate:p + {: RESULT = p; :} | LPAREN predicate:p RPAREN {: p.setPrintSqlInParens(true); @@ -6189,6 +6201,27 @@ like_predicate ::= new LikePredicate(LikePredicate.Operator.REGEXP, e1, e2), null); :} ; +match_predicate ::= + expr:e1 KW_MATCH_ANY expr:e2 + {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ANY, e1, e2); :} + | expr:e1 KW_MATCH expr:e2 + {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ANY, e1, e2); :} + | expr:e1 KW_MATCH_ALL expr:e2 + {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ALL, e1, e2); :} + | expr:e1 KW_MATCH_PHRASE expr:e2 + {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_PHRASE, e1, e2); :} + | expr:e1 KW_MATCH_ELEMENT_EQ expr:e2 + {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ELEMENT_EQ, e1, e2); :} + | expr:e1 KW_MATCH_ELEMENT_LT expr:e2 + {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ELEMENT_LT, e1, e2); :} + | expr:e1 KW_MATCH_ELEMENT_GT expr:e2 + {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ELEMENT_GT, e1, e2); :} + | expr:e1 KW_MATCH_ELEMENT_LE expr:e2 + {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ELEMENT_LE, e1, e2); :} + | expr:e1 KW_MATCH_ELEMENT_GE expr:e2 + {: RESULT = new MatchPredicate(MatchPredicate.Operator.MATCH_ELEMENT_GE, e1, e2); :} + ; + // Avoid a reduce/reduce conflict with compound_predicate by explicitly // using non_pred_expr and predicate separately instead of expr. between_predicate ::= diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java index 946718b2c6..6dac70dbe9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java @@ -52,6 +52,7 @@ import org.apache.doris.rewrite.ExprRewriter; import org.apache.doris.rewrite.ExtractCommonFactorsRule; import org.apache.doris.rewrite.FoldConstantsRule; import org.apache.doris.rewrite.InferFiltersRule; +import org.apache.doris.rewrite.MatchPredicateRule; import org.apache.doris.rewrite.NormalizeBinaryPredicatesRule; import org.apache.doris.rewrite.RewriteAliasFunctionRule; import org.apache.doris.rewrite.RewriteBinaryPredicatesRule; @@ -414,6 +415,7 @@ public class Analyzer { rules.add(RewriteEncryptKeyRule.INSTANCE); rules.add(RewriteInPredicateRule.INSTANCE); rules.add(RewriteAliasFunctionRule.INSTANCE); + rules.add(MatchPredicateRule.INSTANCE); List onceRules = Lists.newArrayList(); onceRules.add(ExtractCommonFactorsRule.INSTANCE); onceRules.add(InferFiltersRule.INSTANCE); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java new file mode 100644 index 0000000000..ad6a6968a7 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java @@ -0,0 +1,243 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.analysis; + +import org.apache.doris.catalog.ArrayType; +import org.apache.doris.catalog.Function; +import org.apache.doris.catalog.FunctionSet; +import org.apache.doris.catalog.ScalarFunction; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.thrift.TExprNode; +import org.apache.doris.thrift.TExprNodeType; +import org.apache.doris.thrift.TExprOpcode; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Objects; + +/** + * filed MATCH query_str + */ +public class MatchPredicate extends Predicate { + private static final Logger LOG = LogManager.getLogger(MatchPredicate.class); + + public enum Operator { + MATCH_ANY("MATCH_ANY", "match_any", TExprOpcode.MATCH_ANY), + MATCH_ALL("MATCH_ALL", "match_all", TExprOpcode.MATCH_ALL), + MATCH_PHRASE("MATCH_PHRASE", "match_phrase", TExprOpcode.MATCH_PHRASE), + MATCH_ELEMENT_EQ("MATCH_ELEMENT_EQ", "match_element_eq", TExprOpcode.MATCH_ELEMENT_EQ), + MATCH_ELEMENT_LT("MATCH_ELEMENT_LT", "match_element_lt", TExprOpcode.MATCH_ELEMENT_LT), + MATCH_ELEMENT_GT("MATCH_ELEMENT_GT", "match_element_gt", TExprOpcode.MATCH_ELEMENT_GT), + MATCH_ELEMENT_LE("MATCH_ELEMENT_LE", "match_element_le", TExprOpcode.MATCH_ELEMENT_LE), + MATCH_ELEMENT_GE("MATCH_ELEMENT_GE", "match_element_ge", TExprOpcode.MATCH_ELEMENT_GE); + + + private final String description; + private final String name; + private final TExprOpcode opcode; + + Operator(String description, + String name, + TExprOpcode opcode) { + this.description = description; + this.name = name; + this.opcode = opcode; + } + + @Override + public String toString() { + return description; + } + + public String getName() { + return name; + } + + public TExprOpcode getOpcode() { + return opcode; + } + } + + public static void initBuiltins(FunctionSet functionSet) { + String symbolNotUsed = "symbol_not_used"; + + for (Type t : Type.getNumericDateTimeTypes()) { + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ELEMENT_EQ.getName(), + symbolNotUsed, + Lists.newArrayList(new ArrayType(t), t), + Type.BOOLEAN)); + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ELEMENT_LT.getName(), + symbolNotUsed, + Lists.newArrayList(new ArrayType(t), t), + Type.BOOLEAN)); + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ELEMENT_GT.getName(), + symbolNotUsed, + Lists.newArrayList(new ArrayType(t), t), + Type.BOOLEAN)); + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ELEMENT_LE.getName(), + symbolNotUsed, + Lists.newArrayList(new ArrayType(t), t), + Type.BOOLEAN)); + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ELEMENT_GE.getName(), + symbolNotUsed, + Lists.newArrayList(new ArrayType(t), t), + Type.BOOLEAN)); + } + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ANY.getName(), + symbolNotUsed, + Lists.newArrayList(Type.VARCHAR, Type.VARCHAR), + Type.BOOLEAN)); + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ANY.getName(), + symbolNotUsed, + Lists.newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR), + Type.BOOLEAN)); + + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ALL.getName(), + symbolNotUsed, + Lists.newArrayList(Type.VARCHAR, Type.VARCHAR), + Type.BOOLEAN)); + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_ALL.getName(), + symbolNotUsed, + Lists.newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR), + Type.BOOLEAN)); + + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_PHRASE.getName(), + symbolNotUsed, + Lists.newArrayList(Type.VARCHAR, Type.VARCHAR), + Type.BOOLEAN)); + functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltinOperator( + Operator.MATCH_PHRASE.getName(), + symbolNotUsed, + Lists.newArrayList(new ArrayType(Type.VARCHAR), Type.VARCHAR), + Type.BOOLEAN)); + } + + private final Operator op; + + public MatchPredicate(Operator op, Expr e1, Expr e2) { + super(); + this.op = op; + Preconditions.checkNotNull(e1); + children.add(e1); + Preconditions.checkNotNull(e2); + children.add(e2); + // TODO: Calculate selectivity + selectivity = Expr.DEFAULT_SELECTIVITY; + } + + public Boolean isMatchElement(Operator op) { + return Objects.equals(op.getName(), Operator.MATCH_ELEMENT_EQ.getName()) + || Objects.equals(op.getName(), Operator.MATCH_ELEMENT_LT.getName()) + || Objects.equals(op.getName(), Operator.MATCH_ELEMENT_GT.getName()) + || Objects.equals(op.getName(), Operator.MATCH_ELEMENT_LE.getName()) + || Objects.equals(op.getName(), Operator.MATCH_ELEMENT_GE.getName()); + } + + protected MatchPredicate(MatchPredicate other) { + super(other); + op = other.op; + } + + @Override + public Expr clone() { + return new MatchPredicate(this); + } + + public Operator getOp() { + return this.op; + } + + @Override + public boolean equals(Object obj) { + if (!super.equals(obj)) { + return false; + } + return ((MatchPredicate) obj).op == op; + } + + @Override + public String toSqlImpl() { + return getChild(0).toSql() + " " + op.toString() + " " + getChild(1).toSql(); + } + + @Override + protected void toThrift(TExprNode msg) { + msg.node_type = TExprNodeType.MATCH_PRED; + msg.setOpcode(op.getOpcode()); + } + + @Override + public void analyzeImpl(Analyzer analyzer) throws AnalysisException { + super.analyzeImpl(analyzer); + if (isMatchElement(op) && !getChild(0).getType().isArrayType()) { + throw new AnalysisException( + "left operand of " + op.toString() + " must be Array: " + toSql()); + } + if (getChild(0).getType().isObjectStored()) { + throw new AnalysisException( + "left operand of " + op.toString() + " must not be Bitmap or HLL: " + toSql()); + } + if (!isMatchElement(op) && !getChild(1).getType().isStringType() && !getChild(1).getType().isNull()) { + throw new AnalysisException("right operand of " + op.toString() + " must be of type STRING: " + toSql()); + } + + if (!getChild(0).getType().isStringType() && !getChild(0).getType().isArrayType()) { + throw new AnalysisException( + "left operand of " + op.toString() + " must be of type STRING or ARRAY: " + toSql()); + } + + fn = getBuiltinFunction(op.toString(), + collectChildReturnTypes(), Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF); + if (fn == null) { + throw new AnalysisException( + "no function found for " + op.toString() + " " + toSql()); + } + Expr e1 = getChild(0); + Expr e2 = getChild(1); + // Here we cast match_element_xxx value type from string to array item type. + // Because be need to know the actual TExprNodeType when doing Expr Literal transform + if (isMatchElement(op) && e1.type.isArrayType() && (e2 instanceof StringLiteral)) { + Type itemType = ((ArrayType) e1.type).getItemType(); + try { + setChild(1, e2.castTo(itemType)); + } catch (NumberFormatException nfe) { + throw new AnalysisException("Invalid number format literal: " + e2.getStringValue()); + } + } + } + + @Override + public int hashCode() { + return 31 * super.hashCode() + Objects.hashCode(op); + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java index 364ba262e6..1d93829d83 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java @@ -24,6 +24,7 @@ import org.apache.doris.analysis.CompoundPredicate; import org.apache.doris.analysis.InPredicate; import org.apache.doris.analysis.IsNullPredicate; import org.apache.doris.analysis.LikePredicate; +import org.apache.doris.analysis.MatchPredicate; import org.apache.doris.builtins.ScalarBuiltins; import org.apache.doris.catalog.Function.NullableMode; @@ -83,6 +84,7 @@ public class FunctionSet { IsNullPredicate.initBuiltins(this); ScalarBuiltins.initBuiltins(this); LikePredicate.initBuiltins(this); + MatchPredicate.initBuiltins(this); InPredicate.initBuiltins(this); AliasFunction.initBuiltins(this); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java index 774ead88c9..8c5517d01c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java @@ -108,6 +108,7 @@ public abstract class Type { private static final Logger LOG = LogManager.getLogger(Type.class); private static final ArrayList integerTypes; private static final ArrayList numericTypes; + private static final ArrayList numericDateTimeTypes; private static final ArrayList supportedTypes; private static final ArrayList arraySubTypes; private static final ArrayList trivialTypes; @@ -129,6 +130,11 @@ public abstract class Type { numericTypes.add(DECIMAL64); numericTypes.add(DECIMAL128); + numericDateTimeTypes = Lists.newArrayList(); + numericDateTimeTypes.add(DATE); + numericDateTimeTypes.add(DATETIME); + numericDateTimeTypes.addAll(numericTypes); + trivialTypes = Lists.newArrayList(); trivialTypes.addAll(numericTypes); trivialTypes.add(BOOLEAN); @@ -176,6 +182,10 @@ public abstract class Type { return numericTypes; } + public static ArrayList getNumericDateTimeTypes() { + return numericDateTimeTypes; + } + public static ArrayList getTrivialTypes() { return trivialTypes; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/MatchPredicateRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/MatchPredicateRule.java new file mode 100644 index 0000000000..109fb24d4a --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/MatchPredicateRule.java @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.rewrite; + +import org.apache.doris.analysis.Analyzer; +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.MatchPredicate; +import org.apache.doris.common.AnalysisException; + +/** + * MatchPredicate only support in WHERE_CLAUSE + */ +public final class MatchPredicateRule implements ExprRewriteRule { + public static ExprRewriteRule INSTANCE = new MatchPredicateRule(); + + @Override + public Expr apply(Expr expr, Analyzer analyzer, ExprRewriter.ClauseType clauseType) throws AnalysisException { + if (expr instanceof MatchPredicate && clauseType != ExprRewriter.ClauseType.WHERE_CLAUSE) { + throw new AnalysisException("Not support in " + clauseType.toString() + + ", only support in WHERE_CLAUSE, expression: " + expr.toSql()); + } + return expr; + } + + private MatchPredicateRule() {} +} diff --git a/fe/fe-core/src/main/jflex/sql_scanner.flex b/fe/fe-core/src/main/jflex/sql_scanner.flex index e481039c35..d9c89907de 100644 --- a/fe/fe-core/src/main/jflex/sql_scanner.flex +++ b/fe/fe-core/src/main/jflex/sql_scanner.flex @@ -295,6 +295,15 @@ import org.apache.doris.qe.SqlModeHelper; keywordMap.put("lock", new Integer(SqlParserSymbols.KW_LOCK)); keywordMap.put("low_priority", new Integer(SqlParserSymbols.KW_LOW_PRIORITY)); keywordMap.put("map", new Integer(SqlParserSymbols.KW_MAP)); + keywordMap.put("match", new Integer(SqlParserSymbols.KW_MATCH)); + keywordMap.put("match_any", new Integer(SqlParserSymbols.KW_MATCH_ANY)); + keywordMap.put("match_all", new Integer(SqlParserSymbols.KW_MATCH_ALL)); + keywordMap.put("match_phrase", new Integer(SqlParserSymbols.KW_MATCH_PHRASE)); + keywordMap.put("element_eq", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_EQ)); + keywordMap.put("element_lt", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_LT)); + keywordMap.put("element_gt", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_GT)); + keywordMap.put("element_le", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_LE)); + keywordMap.put("element_ge", new Integer(SqlParserSymbols.KW_MATCH_ELEMENT_GE)); keywordMap.put("materialized", new Integer(SqlParserSymbols.KW_MATERIALIZED)); keywordMap.put("max", new Integer(SqlParserSymbols.KW_MAX)); keywordMap.put("maxvalue", new Integer(SqlParserSymbols.KW_MAX_VALUE)); diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift index ddef7fa94e..230deb51ff 100644 --- a/gensrc/thrift/Exprs.thrift +++ b/gensrc/thrift/Exprs.thrift @@ -57,6 +57,9 @@ enum TExprNodeType { // only used in runtime filter BITMAP_PRED, + + // for fulltext search + MATCH_PRED, } //enum TAggregationOp { diff --git a/gensrc/thrift/Opcodes.thrift b/gensrc/thrift/Opcodes.thrift index d55989fc64..905858016f 100644 --- a/gensrc/thrift/Opcodes.thrift +++ b/gensrc/thrift/Opcodes.thrift @@ -84,4 +84,12 @@ enum TExprOpcode { LAST_OPCODE, EQ_FOR_NULL, RT_FILTER, + MATCH_ANY, + MATCH_ALL, + MATCH_PHRASE, + MATCH_ELEMENT_EQ, + MATCH_ELEMENT_LT, + MATCH_ELEMENT_GT, + MATCH_ELEMENT_LE, + MATCH_ELEMENT_GE, }