From d629369d105284a895b56d8a1728f3a8a98c896b Mon Sep 17 00:00:00 2001 From: saltonz Date: Mon, 13 May 2024 04:20:36 +0000 Subject: [PATCH] [fulltext] enable text retrieval query with no-index-back and no-relevacne-calculation opt hint Co-authored-by: JinmaoLi --- deps/oblib/src/lib/container/ob_mask_set2.h | 12 +- .../code_generator/ob_static_engine_cg.cpp | 3 +- src/sql/code_generator/ob_tsc_cg_service.cpp | 19 ++- src/sql/das/ob_das_scan_op.cpp | 86 +++++++---- src/sql/das/ob_text_retrieval_op.cpp | 42 ++++-- src/sql/optimizer/ob_join_order.cpp | 24 +-- src/sql/optimizer/ob_join_order.h | 1 + src/sql/optimizer/ob_log_plan.cpp | 41 ++++- src/sql/optimizer/ob_log_table_scan.cpp | 5 + src/sql/optimizer/ob_log_table_scan.h | 8 +- src/sql/resolver/dml/ob_dml_stmt.cpp | 19 ++- src/sql/resolver/dml/ob_dml_stmt.h | 4 +- src/sql/resolver/expr/ob_raw_expr.h | 3 +- src/sql/resolver/expr/ob_raw_expr_util.cpp | 4 +- src/sql/rewrite/ob_transform_pre_process.cpp | 9 ++ src/sql/rewrite/ob_transform_utils.cpp | 141 ++++++++++++++++++ src/sql/rewrite/ob_transform_utils.h | 15 ++ .../fts/ob_text_retrieval_iterator.cpp | 30 ++-- 18 files changed, 372 insertions(+), 94 deletions(-) diff --git a/deps/oblib/src/lib/container/ob_mask_set2.h b/deps/oblib/src/lib/container/ob_mask_set2.h index d3a7763ebe..3a09bab154 100644 --- a/deps/oblib/src/lib/container/ob_mask_set2.h +++ b/deps/oblib/src/lib/container/ob_mask_set2.h @@ -19,9 +19,9 @@ namespace oceanbase { namespace common -{ - -template +{ + +template class ObMaskSet2 { public: @@ -147,9 +147,9 @@ public: } } } - } - return ret; - } + } + return ret; + } bool is_mask(const T &key) { bool bool_ret = false; diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index e540492404..8a67b9a05d 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -361,7 +361,8 @@ int ObStaticEngineCG::disable_use_rich_format(const ObLogicalOperator &op, ObOpS || is_virtual_table(tsc.get_ref_table_id()) || (NULL != spec.get_parent() && PHY_UPDATE == spec.get_parent()->type_) || (NULL != spec.get_parent() && PHY_DELETE == spec.get_parent()->type_) - || (static_cast(spec)).tsc_ctdef_.scan_ctdef_.is_get_) { + || (static_cast(spec)).tsc_ctdef_.scan_ctdef_.is_get_ + || tsc.is_text_retrieval_scan()) { use_rich_format = false; LOG_DEBUG("tsc disable use rich format", K(tsc.get_index_back()), K(tsc.use_batch()), K(is_virtual_table(tsc.get_ref_table_id()))); diff --git a/src/sql/code_generator/ob_tsc_cg_service.cpp b/src/sql/code_generator/ob_tsc_cg_service.cpp index 62b725c44d..8b18ec2d6c 100644 --- a/src/sql/code_generator/ob_tsc_cg_service.cpp +++ b/src/sql/code_generator/ob_tsc_cg_service.cpp @@ -1206,6 +1206,7 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op, } if (OB_SUCC(ret)) { + root_ctdef = ir_scan_ctdef; if (OB_FAIL(generate_text_ir_spec_exprs(op, *ir_scan_ctdef))) { LOG_WARN("failed to generate text ir spec exprs", K(ret), KPC(match_against)); } else { @@ -1235,10 +1236,12 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op, ir_scan_ctdef, sort_ctdef))) { LOG_WARN("generate sort ctdef failed", K(ret)); + } else { + root_ctdef = sort_ctdef; } } - if (OB_SUCC(ret)) { + if (OB_SUCC(ret) && op.get_index_back()) { ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; ObDASBaseCtDef *ir_output_ctdef = nullptr == sort_ctdef ? static_cast(ir_scan_ctdef) : static_cast(sort_ctdef); @@ -1359,9 +1362,7 @@ int ObTscCgService::generate_text_ir_pushdown_expr_ctdef( int ret = OB_SUCCESS; const uint64_t scan_table_id = scan_ctdef.ref_table_id_; const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info(); - if (OB_FAIL(cg_.mark_expr_self_produced(tr_info.match_expr_))) { - LOG_WARN("failed to mark raw agg expr", K(ret), KPC(tr_info.match_expr_)); - } else if (!scan_ctdef.pd_expr_spec_.pd_storage_flag_.is_aggregate_pushdown()) { + if (!scan_ctdef.pd_expr_spec_.pd_storage_flag_.is_aggregate_pushdown()) { // this das scan do not need aggregate pushdown } else { ObSEArray agg_expr_arr; @@ -1441,6 +1442,8 @@ int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op, OB_ISNULL(tr_info.doc_id_column_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(cg_.mark_expr_self_produced(tr_info.match_expr_))) { + LOG_WARN("failed to mark raw agg expr", K(ret), KPC(tr_info.match_expr_)); } else if (OB_FAIL(cg_.generate_rt_expr(*tr_info.match_expr_->get_search_key(), text_ir_scan_ctdef.search_text_))) { LOG_WARN("cg rt expr for search text failed", K(ret)); } else if (OB_ISNULL(tr_info.pushdown_match_filter_)) { @@ -1483,13 +1486,17 @@ int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op, LOG_WARN("unexpected null relevance expr", K(ret)); } else if (OB_FAIL(cg_.generate_rt_expr(*tr_info.relevance_expr_, text_ir_scan_ctdef.relevance_expr_))) { LOG_WARN("cg rt expr for relevance expr failed", K(ret)); - } else if (OB_FAIL(cg_.generate_rt_expr(*tr_info.match_expr_, + } + } + + if (OB_SUCC(ret) && (op.need_text_retrieval_calc_relevance() || nullptr != tr_info.pushdown_match_filter_)) { + if (OB_FAIL(cg_.generate_rt_expr(*tr_info.match_expr_, text_ir_scan_ctdef.relevance_proj_col_))) { LOG_WARN("cg rt expr for relevance score proejction failed", K(ret)); } else if (OB_ISNULL(text_ir_scan_ctdef.relevance_proj_col_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected relevance pseudo score colum not found", K(ret)); - } else if (OB_FAIL(result_output.push_back(text_ir_scan_ctdef.relevance_expr_))) { + } else if (OB_FAIL(result_output.push_back(text_ir_scan_ctdef.relevance_proj_col_))) { LOG_WARN("failed to append relevance expr", K(ret)); } } diff --git a/src/sql/das/ob_das_scan_op.cpp b/src/sql/das/ob_das_scan_op.cpp index 4161c95aaa..c1b3f75272 100644 --- a/src/sql/das/ob_das_scan_op.cpp +++ b/src/sql/das/ob_das_scan_op.cpp @@ -936,6 +936,7 @@ int ObDASScanOp::do_text_retrieve(common::ObNewRowIterator *&retrieval_iter) ObDASIRAuxLookupRtDef *aux_lookup_rtdef = nullptr; const ObDASSortCtDef *sort_ctdef = nullptr; ObDASSortRtDef *sort_rtdef = nullptr; + const bool has_lookup = nullptr != get_lookup_ctdef(); if (OB_ISNULL(retrieval_op = OB_NEWx(ObTextRetrievalOp, &op_alloc_))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to allocate text retrieval op", K(ret)); @@ -948,15 +949,28 @@ int ObDASScanOp::do_text_retrieve(common::ObNewRowIterator *&retrieval_iter) ir_scan_ctdef, ir_scan_rtdef))) { LOG_WARN("find ir scan definition failed", K(ret)); - } else if (OB_FAIL(ObDASUtils::find_target_das_def(attach_ctdef_, - attach_rtdef_, - DAS_OP_IR_AUX_LOOKUP, - aux_lookup_ctdef, - aux_lookup_rtdef))) { + } else if (has_lookup && OB_FAIL(ObDASUtils::find_target_das_def(attach_ctdef_, + attach_rtdef_, + DAS_OP_IR_AUX_LOOKUP, + aux_lookup_ctdef, + aux_lookup_rtdef))) { LOG_WARN("find aux lookup definition failed", K(ret)); - } else if (DAS_OP_SORT == aux_lookup_ctdef->get_doc_id_scan_ctdef()->op_type_) { - sort_ctdef = static_cast(aux_lookup_ctdef->get_doc_id_scan_ctdef()); - sort_rtdef = static_cast(aux_lookup_rtdef->get_doc_id_scan_rtdef()); + } + + if (OB_SUCC(ret)) { + if (has_lookup) { + // relevance sort would be child of aux lookup if aux lookup exists + if (DAS_OP_SORT == aux_lookup_ctdef->get_doc_id_scan_ctdef()->op_type_) { + sort_ctdef = static_cast(aux_lookup_ctdef->get_doc_id_scan_ctdef()); + sort_rtdef = static_cast(aux_lookup_rtdef->get_doc_id_scan_rtdef()); + } + } else { + // relevance sort would be the root attach ctdef if no aux/table lookup + if (DAS_OP_SORT == attach_ctdef_->op_type_) { + sort_ctdef = static_cast(attach_ctdef_); + sort_rtdef = static_cast(attach_rtdef_); + } + } } if (FAILEDx(retrieval_op->init(ls_id_, @@ -977,8 +991,8 @@ int ObDASScanOp::do_text_retrieve(common::ObNewRowIterator *&retrieval_iter) int ObDASScanOp::do_text_retrieve_rescan() { int ret = OB_SUCCESS; - ObTextRetrievalOp *text_retrieval_op = nullptr; - if (nullptr == result_ || result_->get_type() != ObNewRowIterator::IterType::ObLocalIndexLookupIterator) { + if (nullptr == result_ || (result_->get_type() != ObNewRowIterator::IterType::ObLocalIndexLookupIterator + && result_->get_type() != ObNewRowIterator::IterType::ObTextRetrievalOp)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected text retrieve rescan status", K(ret), KP_(result)); } else { @@ -992,32 +1006,54 @@ int ObDASScanOp::do_text_retrieve_rescan() ObDASIRAuxLookupRtDef *aux_lookup_rtdef = nullptr; const ObDASSortCtDef *sort_ctdef = nullptr; ObDASSortRtDef *sort_rtdef = nullptr; - ObFullTextIndexLookupOp *text_lookup_op = static_cast(result_); - if (OB_FAIL(get_aux_lookup_tablet_id(aux_lookup_tablet_id))) { - LOG_WARN("failed to get doc id idx tablet id", K(ret), K_(related_tablet_ids)); - } else if (FALSE_IT(text_retrieval_op = static_cast( - text_lookup_op->get_text_retrieval_iter()))) { - } else if (OB_FAIL(get_text_ir_tablet_ids(inv_idx_tablet_id, fwd_idx_tablet_id, doc_id_idx_tablet_id))) { + const bool has_lookup = nullptr != get_lookup_ctdef(); + ObFullTextIndexLookupOp *text_lookup_op = has_lookup + ? static_cast(result_) + : nullptr; + ObTextRetrievalOp * text_retrieval_op = has_lookup + ? static_cast(text_lookup_op->get_text_retrieval_iter()) + : static_cast(result_); + if (OB_FAIL(get_text_ir_tablet_ids(inv_idx_tablet_id, fwd_idx_tablet_id, doc_id_idx_tablet_id))) { LOG_WARN("failed to get text ir tablet ids", K(ret)); - } else if (OB_UNLIKELY(doc_id_idx_tablet_id != aux_lookup_tablet_id)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected aux lookup tablet id is not doc id idx tablet id", K(ret), - K(doc_id_idx_tablet_id), K(aux_lookup_tablet_id)); } else if (OB_FAIL(ObDASUtils::find_target_das_def(attach_ctdef_, attach_rtdef_, DAS_OP_IR_SCAN, ir_scan_ctdef, ir_scan_rtdef))) { LOG_WARN("find ir scan definition failed", K(ret)); + } else if (!has_lookup) { + // skip + } else if (OB_ISNULL(text_lookup_op)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to text lookup op", K(ret), KPC(result_)); + } else if (OB_FAIL(get_aux_lookup_tablet_id(aux_lookup_tablet_id))) { + LOG_WARN("failed to get doc id idx tablet id", K(ret), K_(related_tablet_ids)); } else if (OB_FAIL(ObDASUtils::find_target_das_def(attach_ctdef_, attach_rtdef_, DAS_OP_IR_AUX_LOOKUP, aux_lookup_ctdef, aux_lookup_rtdef))) { LOG_WARN("find aux lookup definition failed", K(ret)); - } else if (DAS_OP_SORT == aux_lookup_ctdef->get_doc_id_scan_ctdef()->op_type_) { - sort_ctdef = static_cast(aux_lookup_ctdef->get_doc_id_scan_ctdef()); - sort_rtdef = static_cast(aux_lookup_rtdef->get_doc_id_scan_rtdef()); + } else { + text_lookup_op->set_tablet_id(get_table_lookup_tablet_id()); + text_lookup_op->set_ls_id(ls_id_); + text_lookup_op->set_doc_id_idx_tablet_id(aux_lookup_tablet_id); + } + + if (OB_SUCC(ret)) { + if (has_lookup) { + // relevance sort would be child of aux lookup if aux lookup exists + if (DAS_OP_SORT == aux_lookup_ctdef->get_doc_id_scan_ctdef()->op_type_) { + sort_ctdef = static_cast(aux_lookup_ctdef->get_doc_id_scan_ctdef()); + sort_rtdef = static_cast(aux_lookup_rtdef->get_doc_id_scan_rtdef()); + } + } else { + // relevance sort would be the root attach ctdef if no aux/table lookup + if (DAS_OP_SORT == attach_ctdef_->op_type_) { + sort_ctdef = static_cast(attach_ctdef_); + sort_rtdef = static_cast(attach_rtdef_); + } + } } if (OB_FAIL(ret)) { @@ -1033,10 +1069,6 @@ int ObDASScanOp::do_text_retrieve_rescan() trans_desc_, snapshot_))) { LOG_WARN("failed to do text retrieval rescan", K(ret)); - } else { - text_lookup_op->set_tablet_id(get_table_lookup_tablet_id()); - text_lookup_op->set_ls_id(ls_id_); - text_lookup_op->set_doc_id_idx_tablet_id(aux_lookup_tablet_id); } } return ret; diff --git a/src/sql/das/ob_text_retrieval_op.cpp b/src/sql/das/ob_text_retrieval_op.cpp index 5e88b01dc3..f951591dd3 100644 --- a/src/sql/das/ob_text_retrieval_op.cpp +++ b/src/sql/das/ob_text_retrieval_op.cpp @@ -227,7 +227,8 @@ int ObTextRetrievalMerge::get_next_row(ObNewRow *&row) } bool got_valid_document = false; - ObExpr *match_filter = retrieval_param_.get_ir_ctdef()->match_filter_; + ObExpr *match_filter = retrieval_param_.need_relevance() + ? retrieval_param_.get_ir_ctdef()->match_filter_ : nullptr; ObDatum *filter_res = nullptr; while (OB_SUCC(ret) && !got_valid_document) { clear_evaluated_infos(); @@ -509,8 +510,9 @@ int ObTextRetrievalMerge::next_disjunctive_document() } } - if (OB_SUCC(ret) && retrieval_param_.get_ir_ctdef()->need_proj_relevance_score()) { - if (OB_FAIL(project_result(*top_item, cur_doc_relevance))) { + if (OB_SUCC(ret)) { + const double project_relevance = retrieval_param_.need_relevance() ? cur_doc_relevance : 1; + if (OB_FAIL(project_result(*top_item, project_relevance))) { LOG_WARN("failed to project relevance", K(ret)); } } @@ -521,7 +523,6 @@ int ObTextRetrievalMerge::next_disjunctive_document() int ObTextRetrievalMerge::project_result(const ObIRIterLoserTreeItem &item, const double relevance) { int ret = OB_SUCCESS; - ObExpr *relevance_proj_col = retrieval_param_.get_ir_ctdef()->relevance_proj_col_; // TODO: usage of doc id column is somehow weird here, since in single token retrieval iterators, // we use doc id expr to scan doc_id column for scan document. But here after DaaT processing, we use this expr // to record current disjunctive documents. Though current implementation can make sure lifetime is @@ -529,15 +530,23 @@ int ObTextRetrievalMerge::project_result(const ObIRIterLoserTreeItem &item, cons // P.S we cannot allocate multiple doc id expr at cg for every query token since tokenization now is an runtime operation ObExpr *doc_id_col = retrieval_param_.get_ir_ctdef()->inv_scan_doc_id_col_; ObEvalCtx *eval_ctx = retrieval_param_.get_ir_rtdef()->eval_ctx_; - if (OB_ISNULL(relevance_proj_col) || OB_ISNULL(doc_id_col) || OB_ISNULL(eval_ctx)) { + if (OB_ISNULL(doc_id_col) || OB_ISNULL(eval_ctx)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected nullptr to relevance proejction column", - K(ret), KP(relevance_proj_col), KP(doc_id_col), KP(eval_ctx)); + K(ret), KP(doc_id_col), KP(eval_ctx)); } else { - ObDatum &relevance_proj_datum = relevance_proj_col->locate_datum_for_write(*eval_ctx); ObDatum &doc_id_proj_datum = doc_id_col->locate_datum_for_write(*eval_ctx); - relevance_proj_datum.set_double(relevance); doc_id_proj_datum.set_string(item.doc_id_.get_string()); + if (retrieval_param_.get_ir_ctdef()->need_proj_relevance_score()) { + ObExpr *relevance_proj_col = retrieval_param_.get_ir_ctdef()->relevance_proj_col_; + if (OB_ISNULL(relevance_proj_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null relevance proj col", K(ret)); + } else { + ObDatum &relevance_proj_datum = relevance_proj_col->locate_datum_for_write(*eval_ctx); + relevance_proj_datum.set_double(relevance); + } + } LOG_DEBUG("project one fulltext search result", K(ret), K(item)); } return ret; @@ -550,13 +559,14 @@ int ObTextRetrievalMerge::fill_loser_tree_item( { int ret = OB_SUCCESS; item.iter_idx_ = iter_idx; - ObExpr *relevance_expr = retrieval_param_.get_ir_ctdef()->relevance_expr_; ObExpr *doc_id_expr = retrieval_param_.get_ir_ctdef()->inv_scan_doc_id_col_; - const ObDatum &relevance_datum = relevance_expr->locate_expr_datum(*retrieval_param_.get_ir_rtdef()->eval_ctx_); const ObDatum &doc_id_datum = doc_id_expr->locate_expr_datum(*retrieval_param_.get_ir_rtdef()->eval_ctx_); - item.relevance_ = relevance_datum.get_double(); if (OB_FAIL(item.doc_id_.from_string(doc_id_datum.get_string()))) { - LOG_WARN("failed to get ObDocId from string", K(ret)); + LOG_WARN("failed to get ObDocId from string", K(ret), K(doc_id_datum), KPC(doc_id_expr)); + } else if (retrieval_param_.need_relevance()) { + ObExpr *relevance_expr = retrieval_param_.get_ir_ctdef()->relevance_expr_; + const ObDatum &relevance_datum = relevance_expr->locate_expr_datum(*retrieval_param_.get_ir_rtdef()->eval_ctx_); + item.relevance_ = relevance_datum.get_double(); } return ret; } @@ -568,7 +578,9 @@ int ObTextRetrievalMerge::init_total_doc_cnt_param( int ret = OB_SUCCESS; const ObDASScanCtDef *ctdef = retrieval_param_.get_doc_id_idx_agg_ctdef(); ObDASScanRtDef *rtdef = retrieval_param_.get_ir_rtdef()->get_doc_id_idx_agg_rtdef(); - if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) { + if (!retrieval_param_.need_relevance()) { + // no need to do total doc cnt + } else if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected scan descriptor", K(ret)); } else { @@ -629,7 +641,9 @@ int ObTextRetrievalMerge::do_total_doc_cnt() { int ret = OB_SUCCESS; - if (retrieval_param_.get_ir_ctdef()->need_do_total_doc_cnt()) { + if (!retrieval_param_.need_relevance()) { + // skip + } else if (retrieval_param_.get_ir_ctdef()->need_do_total_doc_cnt()) { // When estimation info not exist, or we found estimation info not accurate, calculate document count by scan ObITabletScan *tsc_service = MTL(ObAccessService *); if (OB_ISNULL(tsc_service)) { diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index ee3a258dd3..d7686112bc 100644 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -12007,6 +12007,7 @@ int ObJoinOrder::check_subquery_in_join_condition(const ObJoinType join_type, int ObJoinOrder::extract_used_columns(const uint64_t table_id, const uint64_t ref_table_id, bool only_normal_ref_expr, + bool consider_rowkey, ObIArray &column_ids, ObIArray &columns) { @@ -12032,15 +12033,18 @@ int ObJoinOrder::extract_used_columns(const uint64_t table_id, ret = OB_ERR_UNEXPECTED; LOG_WARN("null table item", K(ret)); } else { - // add all rowkey info, always used when merge ss-table and mem-table - const ObRowkeyInfo &rowkey_info = table_schema->get_rowkey_info(); - uint64_t column_id = OB_INVALID_ID; - for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_info.get_size(); ++i) { - if (OB_FAIL(rowkey_info.get_column_id(i, column_id))) { - LOG_WARN("Fail to get column id", K(ret)); - } else if (OB_FAIL(column_ids.push_back(column_id))) { - LOG_WARN("Fail to add column id", K(ret)); - } else { /*do nothing*/ } + if (consider_rowkey) { + // for normal index, add all rowkey info, always used when merge ss-table and mem-table + // for fulltext index, rowkey info is not necessary + const ObRowkeyInfo &rowkey_info = table_schema->get_rowkey_info(); + uint64_t column_id = OB_INVALID_ID; + for (int64_t i = 0; OB_SUCC(ret) && i < rowkey_info.get_size(); ++i) { + if (OB_FAIL(rowkey_info.get_column_id(i, column_id))) { + LOG_WARN("Fail to get column id", K(ret)); + } else if (OB_FAIL(column_ids.push_back(column_id))) { + LOG_WARN("Fail to add column id", K(ret)); + } else { /*do nothing*/ } + } } // add common column ids for (int64_t i = 0; OB_SUCC(ret) && i < stmt->get_column_size(); ++i) { @@ -12097,6 +12101,7 @@ int ObJoinOrder::get_simple_index_info(const uint64_t table_id, } else if (OB_FAIL(extract_used_columns(table_id, ref_table_id, true, + !index_schema->is_fts_index_aux(), column_ids, dummy_columns))) { LOG_WARN("failed to extract column ids", K(table_id), K(ref_table_id), K(ret)); @@ -12501,6 +12506,7 @@ int ObJoinOrder::fill_path_index_meta_info(const uint64_t table_id, if (OB_FAIL(extract_used_columns(table_id, ref_table_id, index_id != ref_table_id && !ap->est_cost_info_.index_meta_info_.is_index_back_, + !index_meta_info.is_fulltext_index_, ap->est_cost_info_.access_columns_, dummy_columns))) { LOG_WARN("failed to extract used column ids", K(ret)); diff --git a/src/sql/optimizer/ob_join_order.h b/src/sql/optimizer/ob_join_order.h index 744dd501b6..b597448f3a 100644 --- a/src/sql/optimizer/ob_join_order.h +++ b/src/sql/optimizer/ob_join_order.h @@ -1384,6 +1384,7 @@ struct NullAwareAntiJoinInfo { int extract_used_columns(const uint64_t table_id, const uint64_t ref_table_id, bool only_normal_ref_expr, + bool consider_rowkey, ObIArray &column_ids, ObIArray &columns); diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index 3be190f29e..398c44ce59 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -6815,6 +6815,7 @@ int ObLogPlan::try_push_aggr_into_table_scan(ObLogicalOperator *top, } else if (is_get || has_npd_filter || scan_op->get_index_back() || + scan_op->is_text_retrieval_scan() || scan_op->is_sample_scan() || (is_descending_direction(scan_op->get_scan_direction()) && !groupby_columns.empty())) { //aggr func cannot be pushed down to the storage layer in these scenarios: @@ -6822,6 +6823,7 @@ int ObLogPlan::try_push_aggr_into_table_scan(ObLogicalOperator *top, //2. TSC is sample scan operator //3. TSC contains filters that cannot be pushed down to the storage //4. TSC is point get + //5. TSC is text retrieval scan } else if (OB_FAIL(scan_op->get_pushdown_aggr_exprs().assign(aggr_items))) { LOG_WARN("failed to assign group exprs", K(ret)); } else if (OB_FAIL(scan_op->get_pushdown_groupby_columns().assign(groupby_columns))) { @@ -7286,7 +7288,7 @@ int ObLogPlan::check_storage_groupby_pushdown(const ObIArray is_virtual_table(table_item->ref_id_) || EXTERNAL_TABLE == table_item->table_type_) { /*do nothing*/ - } else if (OB_FAIL(stmt->has_virtual_generated_column(table_item->table_id_, has_virtual_col))) { + } else if (OB_FAIL(stmt->has_virtual_generated_column(table_item->table_id_, has_virtual_col, true))) { LOG_WARN("failed to check has virtual generated column", K(ret), K(*table_item)); } else if (has_virtual_col) { /* do not push down when exists virtual generated column */ @@ -12738,9 +12740,16 @@ int ObLogPlan::collect_location_related_info(ObLogicalOperator &op) } else if (tsc_op.need_doc_id_index_back() && OB_FAIL(rel_info.related_ids_.push_back(tsc_op.get_doc_id_index_table_id()))) { LOG_WARN("store doc id index back aux tid failed", K(ret)); - } else if (tsc_op.is_text_retrieval_scan() && - OB_FAIL(rel_info.related_ids_.push_back(tsc_op.get_text_retrieval_info().fwd_idx_tid_))) { - LOG_WARN("store forward index id for text retrieval failed", K(ret)); + } + } + + if (OB_SUCC(ret) && tsc_op.is_text_retrieval_scan()) { + if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, tsc_op.get_text_retrieval_info().fwd_idx_tid_))) { + LOG_WARN("failed to append forward index table id", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, tsc_op.get_text_retrieval_info().doc_id_idx_tid_))) { + LOG_WARN("failed to append doc id idx table id", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, tsc_op.get_real_ref_table_id()))) { + LOG_WARN("failed to append main table id", K(ret)); } } @@ -15187,9 +15196,15 @@ int ObLogPlan::prepare_text_retrieval_scan(const ObIArray &exprs, O uint64_t fwd_idx_tid = OB_INVALID_ID; uint64_t inv_idx_tid = OB_INVALID_ID; ObSEArray index_infos; + bool need_calc_relevance = true; + ObSEArray constraints; - if (OB_UNLIKELY(1 != exprs.count()) || OB_ISNULL(match_pred = exprs.at(0)) || OB_ISNULL(scan) || - OB_ISNULL(get_stmt())) { + if (OB_UNLIKELY(1 != exprs.count())) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("multi match filters not supported yet", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "more than one match filter"); + } else if (OB_ISNULL(match_pred = exprs.at(0)) || OB_ISNULL(scan) || + OB_ISNULL(get_stmt()) || OB_ISNULL(get_optimizer_context().get_query_ctx())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argumsnts", K(ret), KPC(match_pred), KP(scan)); } else if (OB_ISNULL(get_stmt()) @@ -15274,14 +15289,26 @@ int ObLogPlan::prepare_text_retrieval_scan(const ObIArray &exprs, O } } if (OB_SUCC(ret)) { + /* + if (OB_FAIL(ObTransformUtils::check_need_calc_match_score(get_optimizer_context().get_exec_ctx(), + get_stmt(), + match_against, + need_calc_relevance, + constraints))) { + LOG_WARN("failed to check need calc relevance", K(ret)); + } else if (!need_calc_relevance && + OB_FAIL(append_array_no_dup(get_optimizer_context().get_query_ctx()->all_expr_constraints_, constraints))) { + LOG_WARN("failed to append array no dup", K(ret)); + } + */ ObTextRetrievalInfo &tr_info = table_scan->get_text_retrieval_info(); tr_info.match_expr_ = match_against; tr_info.inv_idx_tid_ = inv_idx_tid; tr_info.fwd_idx_tid_ = fwd_idx_tid; tr_info.doc_id_idx_tid_ = doc_id_rowkey_tid; tr_info.pushdown_match_filter_ = match_pred; + tr_info.need_calc_relevance_ = need_calc_relevance; table_scan->set_doc_id_index_table_id(doc_id_rowkey_tid); - table_scan->set_index_back(true); } return ret; } diff --git a/src/sql/optimizer/ob_log_table_scan.cpp b/src/sql/optimizer/ob_log_table_scan.cpp index d3bc84f93a..972d84a0bb 100644 --- a/src/sql/optimizer/ob_log_table_scan.cpp +++ b/src/sql/optimizer/ob_log_table_scan.cpp @@ -2336,10 +2336,15 @@ int ObLogTableScan::print_text_retrieval_annotation(char *buf, int64_t buf_len, ObRawExpr *limit = tr_info.topk_limit_expr_; ObRawExpr *offset = tr_info.topk_offset_expr_; ObSEArray sort_keys; + bool calc_relevance = tr_info.need_calc_relevance_; if (OB_FAIL(BUF_PRINTF(", "))) { LOG_WARN("BUF_PRINTF fails", K(ret)); } else if (OB_FAIL(BUF_PRINTF("\n "))) { LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF("calc_relevance=%s", calc_relevance ? "true" : "false"))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } else if (OB_FAIL(BUF_PRINTF(", "))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); } else if (FALSE_IT(EXPLAIN_PRINT_EXPR(match_expr, type))) { } if (OB_SUCC(ret) && OB_NOT_NULL(pushdown_match_filter)) { diff --git a/src/sql/optimizer/ob_log_table_scan.h b/src/sql/optimizer/ob_log_table_scan.h index 4740db1eb7..22137ebc27 100644 --- a/src/sql/optimizer/ob_log_table_scan.h +++ b/src/sql/optimizer/ob_log_table_scan.h @@ -35,6 +35,7 @@ struct ObTextRetrievalInfo topk_limit_expr_(NULL), topk_offset_expr_(NULL), with_ties_(false), + need_calc_relevance_(true), inv_idx_tid_(OB_INVALID_ID), fwd_idx_tid_(OB_INVALID_ID), doc_id_idx_tid_(OB_INVALID_ID), @@ -50,7 +51,8 @@ struct ObTextRetrievalInfo ~ObTextRetrievalInfo() {} TO_STRING_KV(K_(match_expr), K_(pushdown_match_filter), K_(sort_key), K_(topk_limit_expr), - K_(topk_offset_expr), K_(with_ties), K_(inv_idx_tid), K_(fwd_idx_tid), K_(doc_id_idx_tid)); + K_(topk_offset_expr), K_(with_ties), K_(need_calc_relevance), K_(inv_idx_tid), + K_(fwd_idx_tid), K_(doc_id_idx_tid)); bool need_sort() const { return sort_key_.expr_ != nullptr; } @@ -60,6 +62,7 @@ struct ObTextRetrievalInfo ObRawExpr *topk_limit_expr_; ObRawExpr *topk_offset_expr_; bool with_ties_; + bool need_calc_relevance_; // match expr just for retireval (accurate score is not required) uint64_t inv_idx_tid_; // choosed aux inverted index table id (word-doc) uint64_t fwd_idx_tid_; // choosed aux forward index table id (doc-word) uint64_t doc_id_idx_tid_; // choosed aux doc_id index table id (doc-rowkey) @@ -547,8 +550,7 @@ public: inline ObTextRetrievalInfo &get_text_retrieval_info() { return text_retrieval_info_; } inline const ObTextRetrievalInfo &get_text_retrieval_info() const { return text_retrieval_info_; } int prepare_text_retrieval_dep_exprs(); - // jinmao TODO: 之后要判断这个标,一期统一设置为 true - inline bool need_text_retrieval_calc_relevance() const { return true; } + inline bool need_text_retrieval_calc_relevance() const { return text_retrieval_info_.need_calc_relevance_; } inline bool need_doc_id_index_back() const { return is_text_retrieval_scan() || is_multivalue_index_scan() ; } inline void set_doc_id_index_table_id(const uint64_t doc_id_index_table_id) { doc_id_table_id_ = doc_id_index_table_id; } inline uint64_t get_doc_id_index_table_id() const { return doc_id_table_id_; } diff --git a/src/sql/resolver/dml/ob_dml_stmt.cpp b/src/sql/resolver/dml/ob_dml_stmt.cpp index 22a915003f..81dd6e0ee4 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.cpp +++ b/src/sql/resolver/dml/ob_dml_stmt.cpp @@ -1997,7 +1997,8 @@ int ObDMLStmt::set_sharable_expr_reference(ObRawExpr &expr, ExplicitedRefType re int ret = OB_SUCCESS; if (expr.is_column_ref_expr() || expr.is_aggr_expr() || expr.is_win_func_expr() || expr.is_query_ref_expr() || - ObRawExprUtils::is_pseudo_column_like_expr(expr)) { + ObRawExprUtils::is_pseudo_column_like_expr(expr) || + expr.is_match_against_expr()) { expr.set_explicited_reference(ref_type); if (expr.is_column_ref_expr()) { ObColumnRefRawExpr &column_expr = static_cast(expr); @@ -2054,6 +2055,7 @@ int ObDMLStmt::set_sharable_expr_reference(ObRawExpr &expr, ExplicitedRefType re expr.has_flag(CNT_ROWNUM) || expr.has_flag(CNT_SEQ_EXPR) || expr.has_flag(CNT_PSEUDO_COLUMN) || expr.has_flag(CNT_ONETIME) || expr.has_flag(CNT_DYNAMIC_PARAM) || expr.has_flag(CNT_MATCH_EXPR))) { + ref_type = expr.is_match_against_expr() ? ExplicitedRefType::REF_BY_MATCH_EXPR : ref_type; for (int64_t i = 0; OB_SUCC(ret) && i < expr.get_param_count(); i++) { if (OB_ISNULL(expr.get_param_expr(i))) { ret = OB_ERR_UNEXPECTED; @@ -4376,7 +4378,9 @@ int ObDMLStmt::check_and_get_same_rowid_expr(const ObRawExpr *expr, ObRawExpr *& return ret; } -int ObDMLStmt::has_virtual_generated_column(int64_t table_id, bool &has_virtual_col) const +int ObDMLStmt::has_virtual_generated_column(int64_t table_id, + bool &has_virtual_col, + bool ignore_fulltext_gen_col/*=false*/) const { int ret = OB_SUCCESS; const ObColumnRefRawExpr *col_expr = NULL; @@ -4385,9 +4389,14 @@ int ObDMLStmt::has_virtual_generated_column(int64_t table_id, bool &has_virtual_ if (OB_ISNULL(col_expr = column_items_.at(i).expr_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(col_expr)); - } else if (table_id == col_expr->get_table_id() && - col_expr->is_virtual_generated_column()) { - has_virtual_col = true; + } else if (table_id == col_expr->get_table_id() && col_expr->is_virtual_generated_column()) { + if (col_expr->is_fulltext_column() && ignore_fulltext_gen_col) { + // columns that are additionally dependent on the full-text index auxiliary table are + // defined as virtual generated columns on the main table. + has_virtual_col = false; + } else { + has_virtual_col = true; + } } } return ret; diff --git a/src/sql/resolver/dml/ob_dml_stmt.h b/src/sql/resolver/dml/ob_dml_stmt.h index c62ad2e6eb..b057a415a8 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.h +++ b/src/sql/resolver/dml/ob_dml_stmt.h @@ -1141,7 +1141,9 @@ public: virtual bool is_returning() const { return false; } virtual bool has_instead_of_trigger() const { return false; } int has_lob_column(int64_t table_id, bool &has_lob)const; - int has_virtual_generated_column(int64_t table_id, bool &has_virtual_col) const; + int has_virtual_generated_column(int64_t table_id, + bool &has_virtual_col, + bool ignore_fulltext_gen_col = false) const; struct TempTableInfo { TempTableInfo() diff --git a/src/sql/resolver/expr/ob_raw_expr.h b/src/sql/resolver/expr/ob_raw_expr.h index 2da64136b8..bd735f61bd 100644 --- a/src/sql/resolver/expr/ob_raw_expr.h +++ b/src/sql/resolver/expr/ob_raw_expr.h @@ -1708,7 +1708,8 @@ enum ExplicitedRefType { REF_BY_NORMAL = 1 << 0, REF_BY_PART_EXPR = 1 << 1, REF_BY_VIRTUAL_GEN_COL = 1<< 2, - REF_BY_STORED_GEN_COL = 1 << 3 + REF_BY_STORED_GEN_COL = 1 << 3, + REF_BY_MATCH_EXPR = 1 << 4 }; class ObRawExpr : virtual public jit::expr::ObIRawExpr { diff --git a/src/sql/resolver/expr/ob_raw_expr_util.cpp b/src/sql/resolver/expr/ob_raw_expr_util.cpp index 77127c0a57..5e60ba95c0 100644 --- a/src/sql/resolver/expr/ob_raw_expr_util.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_util.cpp @@ -9495,10 +9495,10 @@ int ObRawExprUtils::extract_match_against_filters(const ObIArray &f ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null expr", K(ret)); } else if (expr->has_flag(CNT_MATCH_EXPR)) { - if (OB_FAIL(match_filters.push_back(expr))) { + if (OB_FAIL(add_var_to_array_no_dup(match_filters, expr))) { LOG_WARN("failed to push text ir filters", K(ret)); } - } else if (OB_FAIL(other_filters.push_back(expr))) { + } else if (OB_FAIL(add_var_to_array_no_dup(other_filters, expr))) { LOG_WARN("failed to push other filters", K(ret)); } } diff --git a/src/sql/rewrite/ob_transform_pre_process.cpp b/src/sql/rewrite/ob_transform_pre_process.cpp index bfc5fd783c..e72eb814d9 100644 --- a/src/sql/rewrite/ob_transform_pre_process.cpp +++ b/src/sql/rewrite/ob_transform_pre_process.cpp @@ -9856,6 +9856,14 @@ int ObTransformPreProcess::preserve_order_for_fulltext_search(ObDMLStmt *stmt, b LOG_WARN("unexpected null", K(ret)); } else if (stmt->get_table_items().count() != 1 || stmt->get_order_item_size() != 0) { // do nothing + } else if (stmt->is_select_stmt() && + (static_cast(stmt)->has_order_by() || + static_cast(stmt)->has_group_by() || + static_cast(stmt)->has_distinct() || + static_cast(stmt)->get_aggr_item_size() != 0 || + static_cast(stmt)->has_window_function() || + static_cast(stmt)->get_table_items().count() != 1)) { + // do nothing } else if (OB_ISNULL(table_item = stmt->get_table_item(0))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null", K(ret)); @@ -9871,6 +9879,7 @@ int ObTransformPreProcess::preserve_order_for_fulltext_search(ObDMLStmt *stmt, b if (OB_FAIL(stmt->add_order_item(item))) { LOG_WARN("failed to add order item", K(ret), K(item)); } + trans_happened = true; } return ret; } diff --git a/src/sql/rewrite/ob_transform_utils.cpp b/src/sql/rewrite/ob_transform_utils.cpp index ab8365ec1b..8fbaa3d0cf 100644 --- a/src/sql/rewrite/ob_transform_utils.cpp +++ b/src/sql/rewrite/ob_transform_utils.cpp @@ -15422,5 +15422,146 @@ bool ObTransformUtils::is_full_group_by(ObSelectStmt& stmt, ObSQLMode mode) return !stmt.has_order_by() && is_only_full_group_by_on(mode); } +int ObTransformUtils::check_need_calc_match_score(ObExecContext *exec_ctx, + const ObDMLStmt* stmt, + ObRawExpr* match_expr, + bool &need_calc, + ObIArray &constraints) +{ + int ret = OB_SUCCESS; + ObSEArray relation_exprs; + need_calc = false; + if (OB_ISNULL(match_expr) || OB_ISNULL(stmt) || OB_ISNULL(exec_ctx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("match expr is null", K(ret)); + } else if (OB_FAIL(stmt->get_relation_exprs(relation_exprs))) { + LOG_WARN("failed to get relation exprs", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && !need_calc && i < relation_exprs.count(); i++) { + bool tmp_need_calc = false; + if (OB_ISNULL(relation_exprs.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("relation expr is null", K(ret)); + } else if (!relation_exprs.at(i)->has_flag(CNT_MATCH_EXPR)) { + /* do nothing */ + } else if (OB_FAIL(inner_check_need_calc_match_score(exec_ctx, + relation_exprs.at(i), + match_expr, + tmp_need_calc, + constraints))) { + LOG_WARN("failed to check need calc match score", K(ret)); + } else if (tmp_need_calc) { + need_calc = true; + } + } + } + return ret; +} + +int ObTransformUtils::inner_check_need_calc_match_score(ObExecContext *exec_ctx, + ObRawExpr* expr, + ObRawExpr* match_expr, + bool &need_calc, + ObIArray &constraints) +{ + int ret = OB_SUCCESS; + need_calc = false; + bool need_check_child = true; + if (OB_ISNULL(expr) || OB_ISNULL(match_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("param has null", K(ret), KP(expr), KP(match_expr)); + } else if (expr == match_expr) { + need_calc = true; + need_check_child = false; + } else if (expr->get_expr_type() == T_OP_GT || expr->get_expr_type() == T_OP_LT) { + ObRawExpr *gt_param = NULL; + ObRawExpr *lt_param = NULL; + bool is_param_zero = false; + if (expr->get_param_count() != 2 || OB_ISNULL(expr->get_param_expr(0)) || + OB_ISNULL(expr->get_param_expr(1))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret)); + } else if (OB_FALSE_IT(gt_param = expr->get_expr_type() == T_OP_GT ? expr->get_param_expr(0) : + expr->get_param_expr(1))) { + } else if (OB_FALSE_IT(lt_param = expr->get_expr_type() == T_OP_GT ? expr->get_param_expr(1) : + expr->get_param_expr(0))) { + } else if (gt_param != match_expr) { + /* do nothing */ + } else if (OB_FAIL(check_expr_eq_zero(exec_ctx, lt_param, is_param_zero, constraints))) { + LOG_WARN("failed to check param eq zero", K(ret)); + } else if (is_param_zero) { + need_calc = false; + need_check_child = false; + } + } else if (expr->get_expr_type() == T_OP_BOOL) { + if (expr->get_param_count() == 1 && expr->get_param_expr(0) == match_expr) { + need_calc = false; + need_check_child = false; + } + } + if (OB_SUCC(ret) && need_check_child) { + for (int64_t i = 0; OB_SUCC(ret) && !need_calc && i < expr->get_param_count(); i++) { + bool tmp_need_calc = false; + if (OB_FAIL(SMART_CALL(inner_check_need_calc_match_score(exec_ctx, + expr->get_param_expr(i), + match_expr, + tmp_need_calc, + constraints)))) { + LOG_WARN("failed to inner check need calc match score", K(ret)); + } else if (tmp_need_calc) { + need_calc = true; + } + } + } + return ret; +} + +int ObTransformUtils::check_expr_eq_zero(ObExecContext *ctx, + ObRawExpr *expr, + bool &eq_zero, + ObIArray &constraints) +{ + int ret = OB_SUCCESS; + ObConstRawExpr *zero_expr = NULL; + ObRawExpr *eq_zero_expr = NULL; + bool got_result = false; + ObObj result; + eq_zero = false; + ObPhysicalPlanCtx *phy_ctx = NULL; + if (OB_ISNULL(expr) || OB_ISNULL(ctx) || OB_ISNULL(ctx->get_my_session()) || + OB_ISNULL(ctx->get_expr_factory()) || OB_ISNULL(phy_ctx = ctx->get_physical_plan_ctx())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (!expr->is_static_const_expr()) { + // do nothing + } else if (OB_FAIL(ObRawExprUtils::build_const_double_expr(*ctx->get_expr_factory(), + ObDoubleType, + 0.0, + zero_expr))) { + LOG_WARN("failed to build double expr", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::create_double_op_expr(*ctx->get_expr_factory(), + ctx->get_my_session(), + T_OP_EQ, + eq_zero_expr, + expr, + zero_expr))) { + LOG_WARN("failed to build cmp expr", K(ret)); + } else if (ObSQLUtils::calc_const_or_calculable_expr(ctx, + eq_zero_expr, + result, + got_result, + ctx->get_allocator())) { + LOG_WARN("failed to calc cosnt or calculable expr", K(ret)); + } else if (!got_result || result.is_false() || result.is_null()) { + // do nothing + } else if (OB_FAIL(constraints.push_back( + ObExprConstraint(eq_zero_expr, PreCalcExprExpectResult::PRE_CALC_RESULT_TRUE)))) { + LOG_WARN("failed to push back constraint", K(ret)); + } else { + eq_zero = true; + } + return ret; +} + } // namespace sql } // namespace oceanbase diff --git a/src/sql/rewrite/ob_transform_utils.h b/src/sql/rewrite/ob_transform_utils.h index 1309a32ec3..fa2f3a54e3 100644 --- a/src/sql/rewrite/ob_transform_utils.h +++ b/src/sql/rewrite/ob_transform_utils.h @@ -1942,6 +1942,16 @@ public: bool &is_ref); static int check_contain_correlated_lateral_table(ObDMLStmt *stmt, bool &is_contain); + // check whether the score calculated by match expr is actually utilized + static int check_need_calc_match_score(ObExecContext *exec_ctx, + const ObDMLStmt* stmt, + ObRawExpr* match_expr, + bool &need_calc, + ObIArray &constraints); + static int check_expr_eq_zero(ObExecContext *ctx, + ObRawExpr *expr, + bool &eq_zero, + ObIArray &constraints); private: static int inner_get_lazy_left_join(ObDMLStmt *stmt, TableItem *table, @@ -1991,6 +2001,11 @@ private: static int check_convert_string_safely(const ObRawExpr *expr, const ObRawExpr *src_expr, bool &is_safe); + static int inner_check_need_calc_match_score(ObExecContext *exec_ctx, + ObRawExpr* expr, + ObRawExpr* match_expr, + bool &need_calc, + ObIArray &constraints); }; class StmtUniqueKeyProvider diff --git a/src/storage/fts/ob_text_retrieval_iterator.cpp b/src/storage/fts/ob_text_retrieval_iterator.cpp index 91ad928bcf..df9dbfe238 100644 --- a/src/storage/fts/ob_text_retrieval_iterator.cpp +++ b/src/storage/fts/ob_text_retrieval_iterator.cpp @@ -123,7 +123,7 @@ int ObTextRetrievalIterator::init( retrieval_param_ = &retrieval_param; tx_desc_ = tx_desc; snapshot_ = snapshot; - need_fwd_idx_agg_ = retrieval_param.get_ir_ctdef()->has_fwd_agg_; + need_fwd_idx_agg_ = retrieval_param.get_ir_ctdef()->has_fwd_agg_ && retrieval_param.need_relevance(); need_inv_idx_agg_ = retrieval_param.need_relevance(); if (OB_ISNULL(mem_context_)) { @@ -206,21 +206,25 @@ int ObTextRetrievalIterator::get_next_row() if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("retrieval iterator not inited", K(ret)); - } else if (!inv_idx_agg_evaluated_) { + } else if (!inv_idx_agg_evaluated_ && retrieval_param_->need_relevance()) { if (OB_FAIL(do_doc_cnt_agg())) { if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("Fail to do document count aggregation", K(ret), K_(inv_idx_agg_param)); } } else if (OB_FAIL(tsc_service->revert_scan_iter(inverted_idx_iter_))) { LOG_WARN("Fail to revert inverted index scan iterator after count aggregation", K(ret)); - } else if (FALSE_IT(inverted_idx_iter_ = nullptr)) { - } else if (OB_FAIL(tsc_service->table_scan(inv_idx_scan_param_, inverted_idx_iter_))) { - LOG_WARN("failed to init inverted index scan iterator", K(ret)); } else { + inverted_idx_iter_ = nullptr; inv_idx_agg_evaluated_ = true; } } + if (OB_SUCC(ret) && nullptr == inverted_idx_iter_) { + if (OB_FAIL(tsc_service->table_scan(inv_idx_scan_param_, inverted_idx_iter_))) { + LOG_WARN("failed to init inverted index scan iterator", K(ret)); + } + } + if (OB_FAIL(ret)) { } else if (OB_FAIL(get_next_single_row(inv_idx_scan_param_.op_->is_vectorized(), inverted_idx_iter_))) { if (OB_UNLIKELY(OB_ITER_END != ret)) { @@ -230,13 +234,15 @@ int ObTextRetrievalIterator::get_next_row() LOG_DEBUG("get one invert index scan row", "row", ROWEXPR2STR(*retrieval_param_->get_ir_rtdef()->get_inv_idx_scan_rtdef()->eval_ctx_, *inv_idx_scan_param_.output_exprs_)); - clear_row_wise_evaluated_flag(); - if (OB_FAIL(get_next_doc_token_cnt(need_fwd_idx_agg_))) { - LOG_WARN("failed to get next doc token count", K(ret)); - } else if (OB_FAIL(fill_token_doc_cnt())) { - LOG_WARN("failed to get token doc cnt", K(ret)); - } else if (OB_FAIL(project_relevance_expr())) { - LOG_WARN("failed to evaluate simarity expr", K(ret)); + if (retrieval_param_->need_relevance()) { + clear_row_wise_evaluated_flag(); + if (OB_FAIL(get_next_doc_token_cnt(need_fwd_idx_agg_))) { + LOG_WARN("failed to get next doc token count", K(ret)); + } else if (OB_FAIL(fill_token_doc_cnt())) { + LOG_WARN("failed to get token doc cnt", K(ret)); + } else if (OB_FAIL(project_relevance_expr())) { + LOG_WARN("failed to evaluate simarity expr", K(ret)); + } } } return ret;