diff --git a/be/src/olap/inverted_index_parser.h b/be/src/olap/inverted_index_parser.h index d36950e514..aeb3232c39 100644 --- a/be/src/olap/inverted_index_parser.h +++ b/be/src/olap/inverted_index_parser.h @@ -18,6 +18,7 @@ #pragma once #include +#include #include namespace doris { @@ -30,6 +31,13 @@ enum class InvertedIndexParserType { PARSER_CHINESE = 4, }; +struct InvertedIndexCtx { + InvertedIndexParserType parser_type; + std::string parser_mode; +}; + +using InvertedIndexCtxSPtr = std::shared_ptr; + const std::string INVERTED_INDEX_PARSER_MODE_KEY = "parser_mode"; const std::string INVERTED_INDEX_PARSER_FINE_GRANULARITY = "fine_grained"; const std::string INVERTED_INDEX_PARSER_COARSE_GRANULARITY = "coarse_grained"; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index 544620e68f..f1c9a30c0f 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -84,6 +84,66 @@ bool InvertedIndexReader::indexExists(io::Path& index_file_path) { return exists; } +std::vector InvertedIndexReader::get_analyse_result( + const std::string& field_name, const std::string& value, InvertedIndexQueryType query_type, + InvertedIndexCtx* inverted_index_ctx) { + std::vector analyse_result; + std::shared_ptr analyzer; + std::unique_ptr reader; + auto analyser_type = inverted_index_ctx->parser_type; + if (analyser_type == InvertedIndexParserType::PARSER_STANDARD) { + analyzer = std::make_shared(); + reader.reset( + (new lucene::util::StringReader(std::wstring(value.begin(), value.end()).c_str()))); + } else if (analyser_type == InvertedIndexParserType::PARSER_CHINESE) { + auto chinese_analyzer = + std::make_shared(L"chinese", false); + chinese_analyzer->initDict(config::inverted_index_dict_path); + auto mode = inverted_index_ctx->parser_mode; + if (mode == INVERTED_INDEX_PARSER_COARSE_GRANULARITY) { + chinese_analyzer->setMode(lucene::analysis::AnalyzerMode::Default); + } else { + chinese_analyzer->setMode(lucene::analysis::AnalyzerMode::All); + } + analyzer = chinese_analyzer; + reader.reset(_CLNEW lucene::util::SStringReader(value.c_str(), strlen(value.c_str()), + false)); + //reader.reset(new lucene::util::SimpleInputStreamReader( + // new lucene::util::AStringReader(value.c_str()), + // lucene::util::SimpleInputStreamReader::UTF8)); + } else { + // default + analyzer = std::make_shared>(); + reader.reset( + (new lucene::util::StringReader(std::wstring(value.begin(), value.end()).c_str()))); + } + + std::wstring field_ws = std::wstring(field_name.begin(), field_name.end()); + std::unique_ptr token_stream( + analyzer->tokenStream(field_ws.c_str(), reader.get())); + + lucene::analysis::Token token; + + while (token_stream->next(&token)) { + if (token.termLength() != 0) { + analyse_result.emplace_back( + std::wstring(token.termBuffer(), token.termLength())); + } + } + + if (token_stream != nullptr) { + token_stream->close(); + } + + if (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY || + query_type == InvertedIndexQueryType::MATCH_ALL_QUERY) { + std::set unrepeated_result(analyse_result.begin(), analyse_result.end()); + analyse_result.assign(unrepeated_result.begin(), unrepeated_result.end()); + } + + return analyse_result; +} + Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cache_handle, lucene::store::Directory* dir) { lucene::store::IndexInput* null_bitmap_in = nullptr; @@ -139,66 +199,6 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach return Status::OK(); } -std::vector FullTextIndexReader::get_analyse_result( - const std::wstring& field_name, const std::string& value, - InvertedIndexQueryType query_type) { - std::vector analyse_result; - std::shared_ptr analyzer; - std::unique_ptr reader; - auto analyser_type = get_inverted_index_parser_type_from_string( - get_parser_string_from_properties(_index_meta.properties())); - if (analyser_type == InvertedIndexParserType::PARSER_STANDARD) { - analyzer = std::make_shared(); - reader.reset( - (new lucene::util::StringReader(std::wstring(value.begin(), value.end()).c_str()))); - } else if (analyser_type == InvertedIndexParserType::PARSER_CHINESE) { - auto chinese_analyzer = - std::make_shared(L"chinese", false); - chinese_analyzer->initDict(config::inverted_index_dict_path); - auto mode = get_parser_mode_string_from_properties(_index_meta.properties()); - if (mode == INVERTED_INDEX_PARSER_COARSE_GRANULARITY) { - chinese_analyzer->setMode(lucene::analysis::AnalyzerMode::Default); - } else { - chinese_analyzer->setMode(lucene::analysis::AnalyzerMode::All); - } - analyzer = chinese_analyzer; - reader.reset(_CLNEW lucene::util::SStringReader(value.c_str(), strlen(value.c_str()), - false)); - //reader.reset(new lucene::util::SimpleInputStreamReader( - // new lucene::util::AStringReader(value.c_str()), - // lucene::util::SimpleInputStreamReader::UTF8)); - } else { - // default - analyzer = std::make_shared>(); - reader.reset( - (new lucene::util::StringReader(std::wstring(value.begin(), value.end()).c_str()))); - } - - std::unique_ptr token_stream( - analyzer->tokenStream(field_name.c_str(), reader.get())); - - lucene::analysis::Token token; - - while (token_stream->next(&token)) { - if (token.termLength() != 0) { - analyse_result.emplace_back( - std::wstring(token.termBuffer(), token.termLength())); - } - } - - if (token_stream != nullptr) { - token_stream->close(); - } - - if (query_type == InvertedIndexQueryType::MATCH_ANY_QUERY || - query_type == InvertedIndexQueryType::MATCH_ALL_QUERY) { - std::set unrepeated_result(analyse_result.begin(), analyse_result.end()); - analyse_result.assign(unrepeated_result.begin(), unrepeated_result.end()); - } - - return analyse_result; -} - Status FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, InvertedIndexIterator** iterator) { *iterator = new InvertedIndexIterator(stats, this); @@ -219,13 +219,14 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, const std::string auto index_file_name = InvertedIndexDescriptor::get_index_file_name(path.filename(), _index_meta.index_id()); auto index_file_path = index_dir / index_file_name; - - std::unique_ptr query; - std::wstring field_ws = std::wstring(column_name.begin(), column_name.end()); - + InvertedIndexCtxSPtr inverted_index_ctx = std::make_shared(); + inverted_index_ctx->parser_type = get_inverted_index_parser_type_from_string( + get_parser_string_from_properties(_index_meta.properties())); + inverted_index_ctx->parser_mode = + get_parser_mode_string_from_properties(_index_meta.properties()); try { std::vector analyse_result = - get_analyse_result(field_ws, search_str, query_type); + get_analyse_result(column_name, search_str, query_type, inverted_index_ctx.get()); if (analyse_result.empty()) { LOG(WARNING) << "invalid input query_str: " << search_str @@ -233,12 +234,13 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, const std::string return Status::Error(); } + std::unique_ptr query; + std::wstring field_ws = std::wstring(column_name.begin(), column_name.end()); roaring::Roaring query_match_bitmap; bool first = true; bool null_bitmap_already_read = false; for (auto token_ws : analyse_result) { std::shared_ptr term_match_bitmap = nullptr; - // try to get term bitmap match result from cache to avoid query index on cache hit auto cache = InvertedIndexQueryCache::instance(); // use EQUAL_QUERY type here since cache is for each term/token diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index 80c653f418..66cef27154 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -99,6 +99,11 @@ public: uint32_t get_index_id() const { return _index_meta.index_id(); } + static std::vector get_analyse_result(const std::string& field_name, + const std::string& value, + InvertedIndexQueryType query_type, + InvertedIndexCtx* inverted_index_ctx); + protected: bool _is_match_query(InvertedIndexQueryType query_type); friend class InvertedIndexIterator; @@ -125,9 +130,6 @@ public: } InvertedIndexReaderType type() override; - std::vector get_analyse_result(const std::wstring& field_name, - const std::string& value, - InvertedIndexQueryType query_type); }; class StringTypeInvertedIndexReader : public InvertedIndexReader { diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 1e708e9e6d..a8755296dd 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -803,14 +803,12 @@ Status SegmentIterator::_apply_inverted_index_on_column_predicate( Status res = pred->evaluate(*_schema, _inverted_index_iterators[unique_id].get(), num_rows(), &bitmap); if (!res.ok()) { - if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND && - pred->type() != PredicateType::MATCH) || + if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND || res.code() == ErrorCode::INVERTED_INDEX_FILE_HIT_LIMIT || (res.code() == ErrorCode::INVERTED_INDEX_NO_TERMS && need_remaining_after_evaluate)) { // 1. INVERTED_INDEX_FILE_NOT_FOUND means index file has not been built, - // usually occurs when creating a new index, because match query must - // need index file, queries other than match query can be downgraded + // usually occurs when creating a new index, queries can be downgraded // without index. // 2. INVERTED_INDEX_FILE_HIT_LIMIT means the hit of condition by index // has reached the optimal limit, downgrade without index query can @@ -991,7 +989,7 @@ Status SegmentIterator::_init_inverted_index_iterators() { return Status::OK(); } for (auto cid : _schema->column_ids()) { - int32_t unique_id = _schema->unique_id(cid); + int32_t unique_id = _opts.tablet_schema->column(cid).unique_id(); if (_inverted_index_iterators.count(unique_id) < 1) { RETURN_IF_ERROR(_segment->new_inverted_index_iterator( _opts.tablet_schema->column(cid), _opts.tablet_schema->get_inverted_index(cid), diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index 933d7d6c56..7e1e0d3706 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -162,6 +162,7 @@ set(VEC_FILES exprs/vliteral.cpp exprs/varray_literal.cpp exprs/vmap_literal.cpp + exprs/vmatch_predicate.cpp exprs/vstruct_literal.cpp exprs/vin_predicate.cpp exprs/vbloom_predicate.cpp diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 763480ae9a..f87c247ea9 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -284,24 +284,9 @@ Status NewOlapScanner::_init_tablet_reader_params( // Condition for (auto& filter : filters) { - if (is_match_condition(filter.condition_op) && - !_tablet_schema->has_inverted_index( - _tablet_schema->column(filter.column_name).unique_id())) { - return Status::NotSupported("Match query must with inverted index, column `" + - filter.column_name + "` is not inverted index column"); - } _tablet_reader_params.conditions.push_back(filter); } - for (auto& filter : _compound_filters) { - if (is_match_condition(filter.condition_op) && - !_tablet_schema->has_inverted_index( - _tablet_schema->column(filter.column_name).unique_id())) { - return Status::NotSupported("Match query must with inverted index, column `" + - filter.column_name + "` is not inverted index column"); - } - } - std::copy(_compound_filters.cbegin(), _compound_filters.cend(), std::inserter(_tablet_reader_params.conditions_except_leafnode_of_andnode, _tablet_reader_params.conditions_except_leafnode_of_andnode.begin())); diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index a98cf8444c..cf5516471c 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -609,6 +609,14 @@ Status VScanNode::_normalize_predicate(const VExprSPtr& conjunct_expr_root, VExp return Status::OK(); } + if (pdt == PushDownType::ACCEPTABLE && + TExprNodeType::MATCH_PRED == cur_expr->node_type()) { + // remaining it in the expr tree, in order to filter by function if the pushdown + // match_predicate failed to apply inverted index in the storage layer + output_expr = conjunct_expr_root; // remaining in conjunct tree + return Status::OK(); + } + if (pdt == PushDownType::ACCEPTABLE && _is_key_column(slot->col_name())) { output_expr = nullptr; return Status::OK(); diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 593715cd62..fd3b2fbd1e 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -45,6 +45,7 @@ #include "vec/exprs/vlambda_function_expr.h" #include "vec/exprs/vliteral.h" #include "vec/exprs/vmap_literal.h" +#include "vec/exprs/vmatch_predicate.h" #include "vec/exprs/vschema_change_expr.h" #include "vec/exprs/vslot_ref.h" #include "vec/exprs/vstruct_literal.h" @@ -177,11 +178,14 @@ Status VExpr::create_expr(const doris::TExprNode& expr_node, VExprSPtr& expr) { case doris::TExprNodeType::ARITHMETIC_EXPR: case doris::TExprNodeType::BINARY_PRED: case doris::TExprNodeType::FUNCTION_CALL: - case doris::TExprNodeType::COMPUTE_FUNCTION_CALL: - case doris::TExprNodeType::MATCH_PRED: { + case doris::TExprNodeType::COMPUTE_FUNCTION_CALL: { expr = VectorizedFnCall::create_shared(expr_node); break; } + case TExprNodeType::MATCH_PRED: { + expr = VMatchPredicate::create_shared(expr_node); + break; + } case doris::TExprNodeType::CAST_EXPR: { expr = VCastExpr::create_shared(expr_node); break; diff --git a/be/src/vec/exprs/vmatch_predicate.cpp b/be/src/vec/exprs/vmatch_predicate.cpp new file mode 100644 index 0000000000..e47a14f779 --- /dev/null +++ b/be/src/vec/exprs/vmatch_predicate.cpp @@ -0,0 +1,141 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exprs/vmatch_predicate.h" + +#include +#include // IWYU pragma: keep +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "common/status.h" +#include "vec/core/block.h" +#include "vec/core/column_numbers.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/core/columns_with_type_and_name.h" +#include "vec/exprs/vexpr_context.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris { +class RowDescriptor; +class RuntimeState; +} // namespace doris + +namespace doris::vectorized { + +VMatchPredicate::VMatchPredicate(const TExprNode& node) : VExpr(node) { + _inverted_index_ctx = std::make_shared(); + _inverted_index_ctx->parser_type = + get_inverted_index_parser_type_from_string(node.match_predicate.parser_type); + _inverted_index_ctx->parser_mode = node.match_predicate.parser_mode; +} + +Status VMatchPredicate::prepare(RuntimeState* state, const RowDescriptor& desc, + VExprContext* context) { + RETURN_IF_ERROR_OR_PREPARED(VExpr::prepare(state, desc, context)); + + ColumnsWithTypeAndName argument_template; + argument_template.reserve(_children.size()); + std::vector child_expr_name; + for (auto child : _children) { + argument_template.emplace_back(nullptr, child->data_type(), child->expr_name()); + child_expr_name.emplace_back(child->expr_name()); + } + + _function = SimpleFunctionFactory::instance().get_function(_fn.name.function_name, + argument_template, _data_type); + if (_function == nullptr) { + std::string type_str; + for (auto arg : argument_template) { + type_str = type_str + " " + arg.type->get_name(); + } + return Status::NotSupported( + "Function {} is not implemented, input param type is {}, " + "and return type is {}.", + _fn.name.function_name, type_str, _data_type->get_name()); + } + + VExpr::register_function_context(state, context); + _expr_name = fmt::format("{}({})", _fn.name.function_name, child_expr_name); + _function_name = _fn.name.function_name; + + return Status::OK(); +} + +Status VMatchPredicate::open(RuntimeState* state, VExprContext* context, + FunctionContext::FunctionStateScope scope) { + RETURN_IF_ERROR(VExpr::open(state, context, scope)); + RETURN_IF_ERROR(VExpr::init_function_context(context, scope, _function)); + if (scope == FunctionContext::THREAD_LOCAL) { + context->fn_context(_fn_context_index)->set_function_state(scope, _inverted_index_ctx); + } + return Status::OK(); +} + +void VMatchPredicate::close(RuntimeState* state, VExprContext* context, + FunctionContext::FunctionStateScope scope) { + VExpr::close_function_context(context, scope, _function); + VExpr::close(state, context, scope); +} + +Status VMatchPredicate::execute(VExprContext* context, Block* block, int* result_column_id) { + // TODO: not execute const expr again, but use the const column in function context + doris::vectorized::ColumnNumbers arguments(_children.size()); + for (int i = 0; i < _children.size(); ++i) { + int column_id = -1; + RETURN_IF_ERROR(_children[i]->execute(context, block, &column_id)); + arguments[i] = column_id; + } + // call function + size_t num_columns_without_result = block->columns(); + // prepare a column to save result + block->insert({nullptr, _data_type, _expr_name}); + RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, arguments, + num_columns_without_result, block->rows(), false)); + *result_column_id = num_columns_without_result; + return Status::OK(); +} + +const std::string& VMatchPredicate::expr_name() const { + return _expr_name; +} + +const std::string& VMatchPredicate::function_name() const { + return _function_name; +} + +std::string VMatchPredicate::debug_string() const { + std::stringstream out; + out << "MatchPredicate(" << children()[0]->debug_string() << ",["; + int num_children = children().size(); + + for (int i = 1; i < num_children; ++i) { + out << (i == 1 ? "" : " ") << children()[i]->debug_string(); + } + + out << "])"; + return out.str(); +} + +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/exprs/vmatch_predicate.h b/be/src/vec/exprs/vmatch_predicate.h new file mode 100644 index 0000000000..0c0a9d8ea8 --- /dev/null +++ b/be/src/vec/exprs/vmatch_predicate.h @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/object_pool.h" +#include "common/status.h" +#include "olap/inverted_index_parser.h" +#include "udf/udf.h" +#include "vec/exprs/vexpr.h" +#include "vec/functions/function.h" + +namespace doris { +class RowDescriptor; +class RuntimeState; +class TExprNode; +namespace vectorized { +class Block; +class VExprContext; +} // namespace vectorized +} // namespace doris + +namespace doris::vectorized { + +class VMatchPredicate final : public VExpr { + ENABLE_FACTORY_CREATOR(VMatchPredicate); + +public: + VMatchPredicate(const TExprNode& node); + ~VMatchPredicate() override = default; + doris::Status execute(VExprContext* context, doris::vectorized::Block* block, + int* result_column_id) override; + doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc, + VExprContext* context) override; + doris::Status open(doris::RuntimeState* state, VExprContext* context, + FunctionContext::FunctionStateScope scope) override; + void close(doris::RuntimeState* state, VExprContext* context, + FunctionContext::FunctionStateScope scope) override; + VExprSPtr clone() const override { return VMatchPredicate::create_shared(*this); } + const std::string& expr_name() const override; + const std::string& function_name() const; + + std::string debug_string() const override; + + const FunctionBasePtr function() { return _function; } + +private: + FunctionBasePtr _function; + std::string _expr_name; + std::string _function_name; + InvertedIndexCtxSPtr _inverted_index_ctx; +}; +} // namespace doris::vectorized \ No newline at end of file diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp index 1e69699d1b..b9511bb0b8 100644 --- a/be/src/vec/functions/match.cpp +++ b/be/src/vec/functions/match.cpp @@ -15,134 +15,182 @@ // specific language governing permissions and limitations // under the License. -#include +#include "vec/functions/match.h" -#include -#include -#include -#include -#include -#include - -#include "common/config.h" -#include "common/consts.h" -#include "common/logging.h" -#include "common/status.h" -#include "vec/aggregate_functions/aggregate_function.h" -#include "vec/columns/column.h" -#include "vec/core/block.h" -#include "vec/core/column_numbers.h" -#include "vec/core/column_with_type_and_name.h" -#include "vec/core/types.h" -#include "vec/data_types/data_type_number.h" -#include "vec/functions/function.h" -#include "vec/functions/simple_function_factory.h" - -namespace doris { -class FunctionContext; -} // namespace doris +#include "olap/rowset/segment_v2/inverted_index_reader.h" +#include "runtime/query_context.h" +#include "runtime/runtime_state.h" namespace doris::vectorized { -class FunctionMatchBase : public IFunction { -public: - size_t get_number_of_arguments() const override { return 2; } +Status FunctionMatchBase::execute_impl(FunctionContext* context, Block& block, + const ColumnNumbers& arguments, size_t result, + size_t input_rows_count) { + auto match_query_str = block.get_by_position(arguments[1]).to_string(0); + std::string column_name = block.get_by_position(arguments[0]).name; + auto match_pred_column_name = + BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_match_" + match_query_str; + if (!block.has(match_pred_column_name)) { + VLOG_DEBUG << "begin to execute match directly, column_name=" << column_name + << ", match_query_str=" << match_query_str; + InvertedIndexCtx* inverted_index_ctx = reinterpret_cast( + context->get_function_state(FunctionContext::THREAD_LOCAL)); - String get_name() const override { return "match"; } - - /// Get result types by argument types. If the function does not apply to these arguments, throw an exception. - DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { - return std::make_shared(); - } - - Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, - size_t result, size_t input_rows_count) override { - auto match_query_str = block.get_by_position(arguments[1]).to_string(0); - std::string column_name = block.get_by_position(arguments[0]).name; - auto match_pred_column_name = - BeConsts::BLOCK_TEMP_COLUMN_PREFIX + column_name + "_match_" + match_query_str; - if (!block.has(match_pred_column_name)) { - if (!config::enable_index_apply_preds_except_leafnode_of_andnode) { - return Status::Cancelled( - "please check whether turn on the configuration " - "'enable_index_apply_preds_except_leafnode_of_andnode'"); - } - LOG(WARNING) << "execute match query meet error, block no column: " - << match_pred_column_name; - return Status::InternalError( - "match query meet error, no match predicate evaluate result column in block."); + const auto values_col = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + const auto* values = check_and_get_column(values_col.get()); + if (!values) { + return Status::InternalError("Not supported input arguments types"); } + // result column + auto res = ColumnUInt8::create(); + ColumnUInt8::Container& vec_res = res->get_data(); + // set default value to 0, and match functions only need to set 1/true + vec_res.resize_fill(input_rows_count); + RETURN_IF_ERROR(execute_match(column_name, match_query_str, input_rows_count, values, + inverted_index_ctx, vec_res)); + block.replace_by_position(result, std::move(res)); + } else { auto match_pred_column = block.get_by_name(match_pred_column_name).column->convert_to_full_column_if_const(); - block.replace_by_position(result, std::move(match_pred_column)); - return Status::OK(); } -}; -class FunctionMatchAny : public FunctionMatchBase { -public: - static constexpr auto name = "match_any"; - static FunctionPtr create() { return std::make_shared(); } + return Status::OK(); +} - String get_name() const override { return name; } -}; +Status FunctionMatchAny::execute_match(const std::string& column_name, + const std::string& match_query_str, size_t input_rows_count, + const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) { + doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN; + if (inverted_index_ctx) { + parser_type = inverted_index_ctx->parser_type; + } + VLOG_DEBUG << "begin to run FunctionMatchAny::execute_match, parser_type: " + << inverted_index_parser_type_to_string(parser_type); + std::vector query_tokens = + doris::segment_v2::InvertedIndexReader::get_analyse_result( + column_name, match_query_str, + doris::segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY, inverted_index_ctx); + for (int i = 0; i < input_rows_count; i++) { + const auto& str_ref = datas->get_data_at(i); + std::vector data_tokens = + doris::segment_v2::InvertedIndexReader::get_analyse_result( + column_name, str_ref.to_string(), + doris::segment_v2::InvertedIndexQueryType::MATCH_ANY_QUERY, + inverted_index_ctx); + // TODO: more efficient impl + for (auto& token : query_tokens) { + auto it = std::find(data_tokens.begin(), data_tokens.end(), token); + if (it != data_tokens.end()) { + result[i] = true; + break; + } + } + } -class FunctionMatchAll : public FunctionMatchBase { -public: - static constexpr auto name = "match_all"; - static FunctionPtr create() { return std::make_shared(); } + return Status::OK(); +} - String get_name() const override { return name; } -}; +Status FunctionMatchAll::execute_match(const std::string& column_name, + const std::string& match_query_str, size_t input_rows_count, + const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) { + doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN; + if (inverted_index_ctx) { + parser_type = inverted_index_ctx->parser_type; + } + VLOG_DEBUG << "begin to run FunctionMatchAll::execute_match, parser_type: " + << inverted_index_parser_type_to_string(parser_type); + std::vector query_tokens = + doris::segment_v2::InvertedIndexReader::get_analyse_result( + column_name, match_query_str, + doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY, inverted_index_ctx); -class FunctionMatchPhrase : public FunctionMatchBase { -public: - static constexpr auto name = "match_phrase"; - static FunctionPtr create() { return std::make_shared(); } + for (int i = 0; i < input_rows_count; i++) { + const auto& str_ref = datas->get_data_at(i); + std::vector data_tokens = + doris::segment_v2::InvertedIndexReader::get_analyse_result( + column_name, str_ref.to_string(), + doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY, + inverted_index_ctx); + // TODO: more efficient impl + auto find_count = 0; + for (auto& token : query_tokens) { + auto it = std::find(data_tokens.begin(), data_tokens.end(), token); + if (it != data_tokens.end()) { + ++find_count; + } else { + break; + } + } - String get_name() const override { return name; } -}; + if (find_count == query_tokens.size()) { + result[i] = true; + } + } -class FunctionMatchElementEQ : public FunctionMatchBase { -public: - static constexpr auto name = "match_element_eq"; - static FunctionPtr create() { return std::make_shared(); } + return Status::OK(); +} - String get_name() const override { return name; } -}; +Status FunctionMatchPhrase::execute_match(const std::string& column_name, + const std::string& match_query_str, + size_t input_rows_count, const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) { + doris::InvertedIndexParserType parser_type = doris::InvertedIndexParserType::PARSER_UNKNOWN; + if (inverted_index_ctx) { + parser_type = inverted_index_ctx->parser_type; + } + VLOG_DEBUG << "begin to run FunctionMatchPhrase::execute_match, parser_type: " + << inverted_index_parser_type_to_string(parser_type); + std::vector query_tokens = + doris::segment_v2::InvertedIndexReader::get_analyse_result( + column_name, match_query_str, + doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY, + inverted_index_ctx); -class FunctionMatchElementLT : public FunctionMatchBase { -public: - static constexpr auto name = "match_element_lt"; - static FunctionPtr create() { return std::make_shared(); } + for (int i = 0; i < input_rows_count; i++) { + const auto& str_ref = datas->get_data_at(i); + std::vector data_tokens = + doris::segment_v2::InvertedIndexReader::get_analyse_result( + column_name, str_ref.to_string(), + doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY, + inverted_index_ctx); + // TODO: more efficient impl + bool matched = false; + auto it = data_tokens.begin(); + while (it != data_tokens.end()) { + // find position of first token + it = std::find(it, data_tokens.end(), query_tokens[0]); + if (it != data_tokens.end()) { + matched = true; + it++; + auto it_more = it; + // compare query_tokens after the first to data_tokens one by one + for (size_t idx = 1; idx < query_tokens.size(); idx++) { + if (it_more == data_tokens.end() || *it_more != query_tokens[idx]) { + matched = false; + } + it_more++; + } + if (matched) { + break; + } + } + } - String get_name() const override { return name; } -}; + // check matched + if (matched) { + result[i] = true; + } + } -class FunctionMatchElementGT : public FunctionMatchBase { -public: - static constexpr auto name = "match_element_gt"; - static FunctionPtr create() { return std::make_shared(); } - - String get_name() const override { return name; } -}; - -class FunctionMatchElementLE : public FunctionMatchBase { -public: - static constexpr auto name = "match_element_le"; - static FunctionPtr create() { return std::make_shared(); } - - String get_name() const override { return name; } -}; - -class FunctionMatchElementGE : public FunctionMatchBase { -public: - static constexpr auto name = "match_element_ge"; - static FunctionPtr create() { return std::make_shared(); } - - String get_name() const override { return name; } -}; + return Status::OK(); +} void register_function_match(SimpleFunctionFactory& factory) { factory.register_function(); diff --git a/be/src/vec/functions/match.h b/be/src/vec/functions/match.h new file mode 100644 index 0000000000..3fcce4ebf1 --- /dev/null +++ b/be/src/vec/functions/match.h @@ -0,0 +1,185 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +#include "common/config.h" +#include "common/consts.h" +#include "common/logging.h" +#include "common/status.h" +#include "olap/inverted_index_parser.h" +#include "vec/aggregate_functions/aggregate_function.h" +#include "vec/columns/column.h" +#include "vec/core/block.h" +#include "vec/core/column_numbers.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_number.h" +#include "vec/exprs/vmatch_predicate.h" +#include "vec/functions/function.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris { +class FunctionContext; +} // namespace doris + +namespace doris::vectorized { + +class FunctionMatchBase : public IFunction { +public: + size_t get_number_of_arguments() const override { return 2; } + + String get_name() const override { return "match"; } + + /// Get result types by argument types. If the function does not apply to these arguments, throw an exception. + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return std::make_shared(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override; + + virtual Status execute_match(const std::string& column_name, const std::string& match_query_str, + size_t input_rows_count, const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) = 0; +}; + +class FunctionMatchAny : public FunctionMatchBase { +public: + static constexpr auto name = "match_any"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + virtual Status execute_match(const std::string& column_name, const std::string& match_query_str, + size_t input_rows_count, const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) override; +}; + +class FunctionMatchAll : public FunctionMatchBase { +public: + static constexpr auto name = "match_all"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + virtual Status execute_match(const std::string& column_name, const std::string& match_query_str, + size_t input_rows_count, const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) override; +}; + +class FunctionMatchPhrase : public FunctionMatchBase { +public: + static constexpr auto name = "match_phrase"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + virtual Status execute_match(const std::string& column_name, const std::string& match_query_str, + size_t input_rows_count, const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) override; +}; + +class FunctionMatchElementEQ : public FunctionMatchBase { +public: + static constexpr auto name = "match_element_eq"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + virtual Status execute_match(const std::string& column_name, const std::string& match_query_str, + size_t input_rows_count, const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) override { + return Status::Error(); + } +}; + +class FunctionMatchElementLT : public FunctionMatchBase { +public: + static constexpr auto name = "match_element_lt"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + virtual Status execute_match(const std::string& column_name, const std::string& match_query_str, + size_t input_rows_count, const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) override { + return Status::Error(); + } +}; + +class FunctionMatchElementGT : public FunctionMatchBase { +public: + static constexpr auto name = "match_element_gt"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + virtual Status execute_match(const std::string& column_name, const std::string& match_query_str, + size_t input_rows_count, const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) override { + return Status::Error(); + } +}; + +class FunctionMatchElementLE : public FunctionMatchBase { +public: + static constexpr auto name = "match_element_le"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + virtual Status execute_match(const std::string& column_name, const std::string& match_query_str, + size_t input_rows_count, const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) override { + return Status::Error(); + } +}; + +class FunctionMatchElementGE : public FunctionMatchBase { +public: + static constexpr auto name = "match_element_ge"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + virtual Status execute_match(const std::string& column_name, const std::string& match_query_str, + size_t input_rows_count, const ColumnString* datas, + InvertedIndexCtx* inverted_index_ctx, + ColumnUInt8::Container& result) override { + return Status::Error(); + } +}; + +} // namespace doris::vectorized diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java index 412935ab91..8b8c57b95b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InvertedIndexUtil.java @@ -31,12 +31,22 @@ public class InvertedIndexUtil { public static String INVERTED_INDEX_PARSER_ENGLISH = "english"; public static String INVERTED_INDEX_PARSER_CHINESE = "chinese"; + public static String INVERTED_INDEX_PARSER_MODE_KEY = "parser_mode"; + public static String INVERTED_INDEX_PARSER_FINE_GRANULARITY = "fine_grained"; + public static String INVERTED_INDEX_PARSER_COARSE_GRANULARITY = "coarse_grained"; + public static String getInvertedIndexParser(Map properties) { String parser = properties == null ? null : properties.get(INVERTED_INDEX_PARSER_KEY); // default is "none" if not set return parser != null ? parser : INVERTED_INDEX_PARSER_NONE; } + public static String getInvertedIndexParserMode(Map properties) { + String mode = properties == null ? null : properties.get(INVERTED_INDEX_PARSER_MODE_KEY); + // default is "none" if not set + return mode != null ? mode : INVERTED_INDEX_PARSER_FINE_GRANULARITY; + } + public static void checkInvertedIndexParser(String indexColName, PrimitiveType colType, Map properties) throws AnalysisException { String parser = getInvertedIndexParser(properties); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java index f1da5a3eea..e5196cdcee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java @@ -20,18 +20,22 @@ package org.apache.doris.analysis; import org.apache.doris.catalog.ArrayType; import org.apache.doris.catalog.Function; import org.apache.doris.catalog.FunctionSet; +import org.apache.doris.catalog.Index; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.ScalarFunction; import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.thrift.TExprNode; import org.apache.doris.thrift.TExprNodeType; import org.apache.doris.thrift.TExprOpcode; +import org.apache.doris.thrift.TMatchPredicate; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.util.List; import java.util.Objects; /** @@ -145,6 +149,8 @@ public class MatchPredicate extends Predicate { } private final Operator op; + private String invertedIndexParser; + private String invertedIndexParserMode; public MatchPredicate(Operator op, Expr e1, Expr e2) { super(); @@ -155,6 +161,8 @@ public class MatchPredicate extends Predicate { children.add(e2); // TODO: Calculate selectivity selectivity = Expr.DEFAULT_SELECTIVITY; + invertedIndexParser = InvertedIndexUtil.INVERTED_INDEX_PARSER_UNKNOWN; + invertedIndexParserMode = InvertedIndexUtil.INVERTED_INDEX_PARSER_FINE_GRANULARITY; } public Boolean isMatchElement(Operator op) { @@ -168,6 +176,8 @@ public class MatchPredicate extends Predicate { protected MatchPredicate(MatchPredicate other) { super(other); op = other.op; + invertedIndexParser = other.invertedIndexParser; + invertedIndexParserMode = other.invertedIndexParserMode; } @Override @@ -196,6 +206,7 @@ public class MatchPredicate extends Predicate { protected void toThrift(TExprNode msg) { msg.node_type = TExprNodeType.MATCH_PRED; msg.setOpcode(op.getOpcode()); + msg.match_predicate = new TMatchPredicate(invertedIndexParser, invertedIndexParserMode); } @Override @@ -236,6 +247,26 @@ public class MatchPredicate extends Predicate { throw new AnalysisException("Invalid number format literal: " + e2.getStringValue()); } } + + if (e1 instanceof SlotRef) { + SlotRef slotRef = (SlotRef) e1; + SlotDescriptor slotDesc = slotRef.getDesc(); + if (slotDesc != null && slotDesc.isScanSlot()) { + TupleDescriptor slotParent = slotDesc.getParent(); + OlapTable olapTbl = (OlapTable) slotParent.getTable(); + List indexes = olapTbl.getIndexes(); + for (Index index : indexes) { + if (index.getIndexType() == IndexDef.IndexType.INVERTED) { + List columns = index.getColumns(); + if (slotRef.getColumnName().equals(columns.get(0))) { + invertedIndexParser = index.getInvertedIndexParser(); + invertedIndexParserMode = index.getInvertedIndexParserMode(); + break; + } + } + } + } + } } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java index b7058d2d20..e743087d26 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Index.java @@ -131,6 +131,10 @@ public class Index implements Writable { return InvertedIndexUtil.getInvertedIndexParser(properties); } + public String getInvertedIndexParserMode() { + return InvertedIndexUtil.getInvertedIndexParserMode(properties); + } + public String getComment() { return comment; } diff --git a/gensrc/thrift/Exprs.thrift b/gensrc/thrift/Exprs.thrift index 9c6720a53f..e211ded8ce 100644 --- a/gensrc/thrift/Exprs.thrift +++ b/gensrc/thrift/Exprs.thrift @@ -139,6 +139,11 @@ struct TLikePredicate { 1: required string escape_char; } +struct TMatchPredicate { + 1: required string parser_type; + 2: required string parser_mode; +} + struct TLiteralPredicate { 1: required bool value 2: required bool is_null @@ -235,6 +240,7 @@ struct TExprNode { 31: optional TSchemaChangeExpr schema_change_expr 32: optional TColumnRef column_ref + 33: optional TMatchPredicate match_predicate } // A flattened representation of a tree of Expr nodes, obtained by depth-first diff --git a/regression-test/suites/inverted_index_p0/test_add_drop_index_ignore_case_column.groovy b/regression-test/suites/inverted_index_p0/test_add_drop_index_ignore_case_column.groovy index cd04866dcf..0efbcc0b52 100644 --- a/regression-test/suites/inverted_index_p0/test_add_drop_index_ignore_case_column.groovy +++ b/regression-test/suites/inverted_index_p0/test_add_drop_index_ignore_case_column.groovy @@ -114,14 +114,14 @@ suite("test_add_drop_index_with_ignore_case_column", "inverted_index"){ assertEquals(select_result[0][2], "desc 2") // query rows where description match 'desc', should fail without index - def success = false - try { - sql "select * from ${indexTbName1} where description match 'desc'" - success = true - } catch(Exception ex) { - logger.info("sql exception: " + ex) - } - assertEquals(success, false) + select_result = sql "select * from ${indexTbName1} where description match 'desc' order by id" + assertEquals(select_result.size(), 2) + assertEquals(select_result[0][0], 1) + assertEquals(select_result[0][1], "name1") + assertEquals(select_result[0][2], "desc 1") + assertEquals(select_result[1][0], 2) + assertEquals(select_result[1][1], "name2") + assertEquals(select_result[1][2], "desc 2") // add index on column description sql "create index idx_desc on ${indexTbName1}(description) USING INVERTED PROPERTIES(\"parser\"=\"standard\");" @@ -165,15 +165,15 @@ suite("test_add_drop_index_with_ignore_case_column", "inverted_index"){ sql "drop index idx_desc on ${indexTbName1}" wait_for_latest_op_on_table_finish(indexTbName1, timeout) - // query rows where description match 'desc', should fail without index - success = false - try { - sql "select * from ${indexTbName1} where description match 'desc'" - success = true - } catch(Exception ex) { - logger.info("sql exception: " + ex) - } - assertEquals(success, false) + // query rows where description match 'desc' without index + select_result = sql "select * from ${indexTbName1} where description match 'desc' order by id" + assertEquals(select_result.size(), 2) + assertEquals(select_result[0][0], 1) + assertEquals(select_result[0][1], "name1") + assertEquals(select_result[0][2], "desc 1") + assertEquals(select_result[1][0], 2) + assertEquals(select_result[1][1], "name2") + assertEquals(select_result[1][2], "desc 2") // query rows where name='name1' select_result = sql "select * from ${indexTbName1} where name='name1'" diff --git a/regression-test/suites/inverted_index_p0/test_add_drop_index_with_data.groovy b/regression-test/suites/inverted_index_p0/test_add_drop_index_with_data.groovy index 2f5d8f7e43..7184a72c69 100644 --- a/regression-test/suites/inverted_index_p0/test_add_drop_index_with_data.groovy +++ b/regression-test/suites/inverted_index_p0/test_add_drop_index_with_data.groovy @@ -114,14 +114,14 @@ suite("test_add_drop_index_with_data", "inverted_index"){ assertEquals(select_result[0][2], "desc 2") // query rows where description match 'desc', should fail without index - def success = false - try { - sql "select * from ${indexTbName1} where description match 'desc'" - success = true - } catch(Exception ex) { - logger.info("sql exception: " + ex) - } - assertEquals(success, false) + select_result = sql "select * from ${indexTbName1} where description match 'desc' order by id" + assertEquals(select_result.size(), 2) + assertEquals(select_result[0][0], 1) + assertEquals(select_result[0][1], "name1") + assertEquals(select_result[0][2], "desc 1") + assertEquals(select_result[1][0], 2) + assertEquals(select_result[1][1], "name2") + assertEquals(select_result[1][2], "desc 2") // add index on column description sql "create index idx_desc on ${indexTbName1}(description) USING INVERTED PROPERTIES(\"parser\"=\"standard\");" @@ -165,14 +165,14 @@ suite("test_add_drop_index_with_data", "inverted_index"){ wait_for_latest_op_on_table_finish(indexTbName1, timeout) // query rows where description match 'desc', should fail without index - success = false - try { - sql "select * from ${indexTbName1} where description match 'desc'" - success = true - } catch(Exception ex) { - logger.info("sql exception: " + ex) - } - assertEquals(success, false) + select_result = sql "select * from ${indexTbName1} where description match 'desc' order by id" + assertEquals(select_result.size(), 2) + assertEquals(select_result[0][0], 1) + assertEquals(select_result[0][1], "name1") + assertEquals(select_result[0][2], "desc 1") + assertEquals(select_result[1][0], 2) + assertEquals(select_result[1][1], "name2") + assertEquals(select_result[1][2], "desc 2") // query rows where name='name1' select_result = sql "select * from ${indexTbName1} where name='name1'" @@ -216,15 +216,12 @@ suite("test_add_drop_index_with_data", "inverted_index"){ sql "drop index idx_name on ${indexTbName1}" wait_for_latest_op_on_table_finish(indexTbName1, timeout) - // query rows where name match 'name1', should fail without index - success = false - try { - sql "select * from ${indexTbName1} where name match 'name1'" - success = true - } catch(Exception ex) { - logger.info("sql exception: " + ex) - } - assertEquals(success, false) + // query rows where name match 'name1' without index + select_result = sql "select * from ${indexTbName1} where name match 'name1'" + assertEquals(select_result.size(), 1) + assertEquals(select_result[0][0], 1) + assertEquals(select_result[0][1], "name1") + assertEquals(select_result[0][2], "desc 1") // show index of create table show_result = sql "show index from ${indexTbName1}" @@ -300,23 +297,20 @@ suite("test_add_drop_index_with_data", "inverted_index"){ logger.info("show index from " + indexTbName1 + " result: " + show_result) assertEquals(show_result.size(), 0) - // query rows where name match 'name1', should fail without index - success = false - try { - sql "select * from ${indexTbName1} where name match 'name1'" - success = true - } catch(Exception ex) { - logger.info("sql exception: " + ex) - } - assertEquals(success, false) + // query rows where name match 'name1' without index + select_result = sql "select * from ${indexTbName1} where name match 'name1'" + assertEquals(select_result.size(), 1) + assertEquals(select_result[0][0], 1) + assertEquals(select_result[0][1], "name1") + assertEquals(select_result[0][2], "desc 1") - // query rows where description match 'desc', should fail without index - success = false - try { - sql "select * from ${indexTbName1} where description match 'desc'" - success = true - } catch(Exception ex) { - logger.info("sql exception: " + ex) - } - assertEquals(success, false) + // query rows where description match 'desc' without index + select_result = sql "select * from ${indexTbName1} where description match 'desc' order by id" + assertEquals(select_result.size(), 2) + assertEquals(select_result[0][0], 1) + assertEquals(select_result[0][1], "name1") + assertEquals(select_result[0][2], "desc 1") + assertEquals(select_result[1][0], 2) + assertEquals(select_result[1][1], "name2") + assertEquals(select_result[1][2], "desc 2") } \ No newline at end of file diff --git a/regression-test/suites/inverted_index_p0/test_add_drop_index_with_delete_data.groovy b/regression-test/suites/inverted_index_p0/test_add_drop_index_with_delete_data.groovy index 927a31dfc9..0d8a03a5f7 100644 --- a/regression-test/suites/inverted_index_p0/test_add_drop_index_with_delete_data.groovy +++ b/regression-test/suites/inverted_index_p0/test_add_drop_index_with_delete_data.groovy @@ -115,14 +115,20 @@ suite("test_add_drop_index_with_delete_data", "inverted_index"){ assertEquals(select_result[4][2], "desc world") // query rows where description match 'desc', should fail without index - def success = false - try { - sql "select * from ${indexTbName1} where description match 'desc'" - success = true - } catch(Exception ex) { - logger.info("sql exception: " + ex) - } - assertEquals(success, false) + select_result = sql "select * from ${indexTbName1} where description match 'desc' order by id" + assertEquals(select_result.size(), 4) + assertEquals(select_result[0][0], 1) + assertEquals(select_result[0][1], "name1") + assertEquals(select_result[0][2], "desc test hello") + assertEquals(select_result[1][0], 2) + assertEquals(select_result[1][1], "name2") + assertEquals(select_result[1][2], "desc hello ok") + assertEquals(select_result[2][0], 4) + assertEquals(select_result[2][1], "name4") + assertEquals(select_result[2][2], "desc ok world test") + assertEquals(select_result[3][0], 5) + assertEquals(select_result[3][1], "name5") + assertEquals(select_result[3][2], "desc world") // add index on column description sql "create index idx_desc on ${indexTbName1}(description) USING INVERTED PROPERTIES(\"parser\"=\"standard\");" @@ -192,15 +198,21 @@ suite("test_add_drop_index_with_delete_data", "inverted_index"){ // drop index sql "drop index idx_desc on ${indexTbName1}" wait_for_latest_op_on_table_finish(indexTbName1, timeout) - // query rows where description match 'desc', should fail without index - success = false - try { - sql "select * from ${indexTbName1} where description match 'desc'" - success = true - } catch(Exception ex) { - logger.info("sql exception: " + ex) - } - assertEquals(success, false) + // query rows where description match 'desc' without index + select_result = sql "select * from ${indexTbName1} where description match 'desc' order by id" + assertEquals(select_result.size(), 4) + assertEquals(select_result[0][0], 1) + assertEquals(select_result[0][1], "name1") + assertEquals(select_result[0][2], "desc test hello") + assertEquals(select_result[1][0], 2) + assertEquals(select_result[1][1], "name2") + assertEquals(select_result[1][2], "desc hello ok") + assertEquals(select_result[2][0], 4) + assertEquals(select_result[2][1], "name4") + assertEquals(select_result[2][2], "desc ok world test") + assertEquals(select_result[3][0], 5) + assertEquals(select_result[3][1], "name5") + assertEquals(select_result[3][2], "desc world") show_result = sql "show index from ${indexTbName1}" logger.info("show index from " + indexTbName1 + " result: " + show_result)