diff --git a/deps/oblib/src/lib/ob_define.h b/deps/oblib/src/lib/ob_define.h index 39f37fb79..5e2e858bb 100644 --- a/deps/oblib/src/lib/ob_define.h +++ b/deps/oblib/src/lib/ob_define.h @@ -216,6 +216,13 @@ const int32_t NOT_CHECK_FLAG = 0; const int64_t MAX_SERVER_COUNT = 4095; const uint64_t OB_SERVER_USER_ID = 0; const int64_t OB_MAX_INDEX_PER_TABLE = 128; + +// indicating the maximum number of aux tables required to build OB_MAX_INDEX_PER_TABLE indexes. +// Support 128 vector indexes (vec index aux tables are the most numerous). +// OB_MAX_AUX_TABLE_PER_MAIN_TABLE = 2 + 3 * OB_MAX_INDEX_PER_TABLE + 2 (aux lob meta + aux lob piece) + 1(mlog). +// The first 2 presents shared aux table, 3 presents private table of vec index. +// If there are indexes with more auxiliary tables than vec index, this value needs to be adapted. +const int64_t OB_MAX_AUX_TABLE_PER_MAIN_TABLE = 389; const int64_t OB_MAX_SSTABLE_PER_TABLE = OB_MAX_INDEX_PER_TABLE + 1; const int64_t OB_MAX_SQL_LENGTH = 64 * 1024; const int64_t OB_TINY_SQL_LENGTH = 128; diff --git a/src/observer/table/ob_table_context.cpp b/src/observer/table/ob_table_context.cpp index 622c09445..332cb0f49 100644 --- a/src/observer/table/ob_table_context.cpp +++ b/src/observer/table/ob_table_context.cpp @@ -1849,23 +1849,23 @@ int ObTableCtx::init_trans(transaction::ObTxDesc *trans_desc, int ObTableCtx::init_index_info(const ObString &index_name, const uint64_t arg_table_id) { int ret = OB_SUCCESS; - uint64_t tids[OB_MAX_INDEX_PER_TABLE]; - int64_t index_cnt = OB_MAX_INDEX_PER_TABLE; + uint64_t tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; + int64_t index_aux_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; if (OB_FAIL(schema_guard_->get_can_read_index_array(tenant_id_, ref_table_id_, tids, - index_cnt, + index_aux_cnt, false))) { LOG_WARN("fail to get can read index", K(ret), K_(tenant_id), K_(ref_table_id)); - } else if (index_cnt > OB_MAX_INDEX_PER_TABLE) { + } else if (index_aux_cnt > OB_MAX_AUX_TABLE_PER_MAIN_TABLE) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("index count is bigger than OB_MAX_INDEX_PER_TABLE", K(ret), K(index_cnt)); + LOG_WARN("index aux count is bigger than OB_MAX_AUX_TABLE_PER_MAIN_TABLE", K(ret), K(index_aux_cnt)); } else { const share::schema::ObTableSchema *index_schema = nullptr; ObString this_index_name; bool is_found = false; - for (int64_t i = 0; OB_SUCC(ret) && i < index_cnt && !is_found; i++) { + for (int64_t i = 0; OB_SUCC(ret) && i < index_aux_cnt && !is_found; i++) { if (OB_FAIL(schema_guard_->get_table_schema(tenant_id_, tids[i], index_schema))) { LOG_WARN("fail to get index schema", K(ret), K_(tenant_id), K(tids[i])); } else if (OB_ISNULL(index_schema)) { diff --git a/src/observer/table/ob_table_schema_cache.cpp b/src/observer/table/ob_table_schema_cache.cpp index b65b15bb3..6fab20486 100644 --- a/src/observer/table/ob_table_schema_cache.cpp +++ b/src/observer/table/ob_table_schema_cache.cpp @@ -82,26 +82,26 @@ int ObKvSchemaCacheObj::cons_index_info(ObSchemaGetterGuard *schema_guard, common::ObTableID table_id) { int ret = OB_SUCCESS; - int64_t index_cnt = OB_MAX_INDEX_PER_TABLE; - uint64_t tids[OB_MAX_INDEX_PER_TABLE]; + int64_t index_aux_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; + uint64_t tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; if (OB_ISNULL(schema_guard) || !schema_guard->is_inited()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("schema guard is NULL or not inited", K(ret)); } else if (OB_FAIL(schema_guard->get_can_write_index_array(tenant_id, table_id, tids, - index_cnt, + index_aux_cnt, false /*only global*/))) { LOG_WARN("fail to get can write index array", K(ret), K(table_id)); - } else if (OB_FAIL(local_index_tids_.init(index_cnt))) { + } else if (OB_FAIL(local_index_tids_.init(index_aux_cnt))) { LOG_WARN("fail to init local index tids", K(ret)); - } else if (OB_FAIL(global_index_tids_.init(index_cnt))) { + } else if (OB_FAIL(global_index_tids_.init(index_aux_cnt))) { LOG_WARN("fail to init global index tids", K(ret)); } else { - for (int64_t i = 0; OB_SUCC(ret) && i < index_cnt; i++) { + for (int64_t i = 0; OB_SUCC(ret) && i < index_aux_cnt; i++) { const ObTableSchema *index_schema = nullptr; if (OB_FAIL(schema_guard->get_table_schema(tenant_id_, tids[i], index_schema))) { - LOG_WARN("fail to get index schema", K(ret), K(tids[i]), K(i), K(index_cnt)); + LOG_WARN("fail to get index schema", K(ret), K(tids[i]), K(i), K(index_aux_cnt)); } else if (OB_ISNULL(index_schema)) { ret = OB_SCHEMA_ERROR; LOG_WARN("null index schema", K(ret)); diff --git a/src/pl/sys_package/ob_dbms_stats.cpp b/src/pl/sys_package/ob_dbms_stats.cpp index 7b3a9e82d..6174402dc 100644 --- a/src/pl/sys_package/ob_dbms_stats.cpp +++ b/src/pl/sys_package/ob_dbms_stats.cpp @@ -457,8 +457,8 @@ int ObDbmsStats::fast_gather_index_stats(ObExecContext &ctx, { int ret = OB_SUCCESS; is_all_fast_gather = true; - uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1; + uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1]; + int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1; share::schema::ObSchemaGetterGuard *schema_guard = ctx.get_virtual_table_ctx().schema_guard_; if (OB_FAIL(get_table_index_infos(schema_guard, ctx.get_my_session()->get_effective_tenant_id(), @@ -1129,8 +1129,8 @@ int ObDbmsStats::delete_table_index_stats(sql::ObExecContext &ctx, const ObTableStatParam data_param) { int ret = OB_SUCCESS; - uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1; + uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1]; + int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1; if (OB_FAIL(get_table_index_infos(ctx.get_virtual_table_ctx().schema_guard_, ctx.get_my_session()->get_effective_tenant_id(), data_param.table_id_, @@ -1614,8 +1614,8 @@ int ObDbmsStats::export_table_index_stats(sql::ObExecContext &ctx, const ObTableStatParam data_param) { int ret = OB_SUCCESS; - uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1; + uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1]; + int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1; if (OB_FAIL(get_table_index_infos(ctx.get_virtual_table_ctx().schema_guard_, ctx.get_my_session()->get_effective_tenant_id(), data_param.table_id_, @@ -2013,8 +2013,8 @@ int ObDbmsStats::import_table_index_stats(sql::ObExecContext &ctx, const ObTableStatParam data_param) { int ret = OB_SUCCESS; - uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1; + uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1]; + int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1; if (OB_FAIL(get_table_index_infos(ctx.get_virtual_table_ctx().schema_guard_, ctx.get_my_session()->get_effective_tenant_id(), data_param.table_id_, @@ -2235,8 +2235,8 @@ int ObDbmsStats::lock_or_unlock_index_stats(sql::ObExecContext &ctx, bool is_lock_stats) { int ret = OB_SUCCESS; - uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1; + uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1]; + int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1; if (OB_FAIL(get_table_index_infos(ctx.get_virtual_table_ctx().schema_guard_, ctx.get_my_session()->get_effective_tenant_id(), data_param.table_id_, @@ -3686,24 +3686,24 @@ int ObDbmsStats::init_column_stat_params(ObIAllocator &allocator, } } } - uint64_t tids[OB_MAX_INDEX_PER_TABLE]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE; + uint64_t tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; + int64_t index_aux_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; const ObTableSchema *index_schema = NULL; const uint64_t tenant_id = table_schema.get_tenant_id(); if (OB_FAIL(ret)) {//do nothing } else if (OB_FAIL(schema_guard.get_can_read_index_array(tenant_id, table_schema.get_table_id(), tids, - index_count, + index_aux_count, false, /*with_mv*/ true, /*with_global_index*/ false /*domain index*/))) { LOG_WARN("failed to get can read index", K(table_schema.get_table_id()), K(ret)); - } else if (index_count > OB_MAX_INDEX_PER_TABLE) { + } else if (index_aux_count > OB_MAX_AUX_TABLE_PER_MAIN_TABLE) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("Invalid index count", K(table_schema.get_table_id()), K(index_count), K(ret)); + LOG_WARN("Invalid index count", K(table_schema.get_table_id()), K(index_aux_count), K(ret)); } else { - for (int64_t i = 0; OB_SUCC(ret) && i < index_count; ++i) { + for (int64_t i = 0; OB_SUCC(ret) && i < index_aux_count; ++i) { if (OB_FAIL(schema_guard.get_table_schema(tenant_id, tids[i], index_schema))) { LOG_WARN("failed to get index schema", K(ret), K(tenant_id), K(tids[i])); } else if (OB_ISNULL(index_schema)) { @@ -6369,8 +6369,8 @@ int ObDbmsStats::get_index_schema(sql::ObExecContext &ctx, int ret = OB_SUCCESS; share::schema::ObSchemaGetterGuard *schema_guard = ctx.get_virtual_table_ctx().schema_guard_; index_schema = NULL; - uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1; + uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1]; + int64_t index_aux_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1; if (OB_ISNULL(schema_guard)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); @@ -6378,11 +6378,11 @@ int ObDbmsStats::get_index_schema(sql::ObExecContext &ctx, ctx.get_my_session()->get_effective_tenant_id(), data_table_id, index_tids, - index_count))) { + index_aux_count))) { LOG_WARN("failed to get table index infos", K(ret)); } else { bool found_it = false; - for (int64_t i = 0; OB_SUCC(ret) && !found_it && i < index_count; ++i) { + for (int64_t i = 0; OB_SUCC(ret) && !found_it && i < index_aux_count; ++i) { const share::schema::ObTableSchema *cur_index_schema = NULL; ObString cur_index_name; if (index_tids[i] == data_table_id) { diff --git a/src/rootserver/ddl_task/ob_ddl_scheduler.cpp b/src/rootserver/ddl_task/ob_ddl_scheduler.cpp index f91491b61..79c67e430 100755 --- a/src/rootserver/ddl_task/ob_ddl_scheduler.cpp +++ b/src/rootserver/ddl_task/ob_ddl_scheduler.cpp @@ -1953,7 +1953,7 @@ int ObDDLScheduler::create_drop_fts_index_task( } else if (OB_ISNULL(index_schema) || OB_ISNULL(drop_index_arg)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), KP(index_schema), KP(drop_index_arg)); - } else if (FALSE_IT(is_fts_index = index_schema->is_fts_index_aux())) { + } else if (FALSE_IT(is_fts_index = (index_schema->is_fts_index_aux() || drop_index_arg->is_parent_task_dropping_fts_index_))) { } else if (OB_UNLIKELY(schema_version <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), KP(index_schema), K(schema_version)); @@ -2000,7 +2000,6 @@ int ObDDLScheduler::create_drop_fts_index_task( const ObFTSDDLChildTaskInfo rowkey_doc(rowkey_doc_name, rowkey_doc_table_id, 0/*task_id*/); const ObFTSDDLChildTaskInfo doc_rowkey(doc_rowkey_name, doc_rowkey_table_id, 0/*task_id*/); const ObDDLType ddl_type = is_fts_index ? DDL_DROP_FTS_INDEX : DDL_DROP_MULVALUE_INDEX; - if (OB_FAIL(ret)) { } else if (OB_FAIL(index_task.init(index_schema->get_tenant_id(), task_id, diff --git a/src/rootserver/ddl_task/ob_ddl_task.cpp b/src/rootserver/ddl_task/ob_ddl_task.cpp index 44b700ea4..13433b00f 100644 --- a/src/rootserver/ddl_task/ob_ddl_task.cpp +++ b/src/rootserver/ddl_task/ob_ddl_task.cpp @@ -906,6 +906,15 @@ int ObDDLTask::get_ddl_type_str(const int64_t ddl_type, const char *&ddl_type_st case DDL_DROP_INDEX: ddl_type_str = "drop index"; break; + case DDL_DROP_FTS_INDEX: + ddl_type_str = "drop fts index"; + break; + case DDL_DROP_MULVALUE_INDEX: + ddl_type_str = "drop mulvalue index"; + break; + case DDL_DROP_VEC_INDEX: + ddl_type_str = "drop vec index"; + break; case DDL_ALTER_COLUMN_GROUP: ddl_type_str = "alter column group"; break; @@ -1096,7 +1105,9 @@ bool ObDDLTask::is_ddl_task_can_be_cancelled() const { bool can_be_cancelled = true; if (task_type_ == ObDDLType::DDL_DROP_INDEX || - task_type_ == ObDDLType::DDL_DROP_VEC_INDEX) { + task_type_ == ObDDLType::DDL_DROP_VEC_INDEX || + task_type_ == ObDDLType::DDL_DROP_FTS_INDEX || + task_type_ == ObDDLType::DDL_DROP_MULVALUE_INDEX) { can_be_cancelled = false; } return can_be_cancelled; diff --git a/src/rootserver/ddl_task/ob_drop_fts_index_task.cpp b/src/rootserver/ddl_task/ob_drop_fts_index_task.cpp index f003f7736..cb211fa15 100644 --- a/src/rootserver/ddl_task/ob_drop_fts_index_task.cpp +++ b/src/rootserver/ddl_task/ob_drop_fts_index_task.cpp @@ -561,7 +561,11 @@ int ObDropFTSIndexTask::succ() int ObDropFTSIndexTask::fail() { - return cleanup(); + int ret = OB_SUCCESS; + if (OB_FAIL(cleanup())) { + LOG_WARN("cleanup task failed", K(ret)); + } + return ret; } int ObDropFTSIndexTask::cleanup_impl() diff --git a/src/rootserver/ddl_task/ob_drop_fts_index_task.h b/src/rootserver/ddl_task/ob_drop_fts_index_task.h index 7cb946835..9ffd5c929 100644 --- a/src/rootserver/ddl_task/ob_drop_fts_index_task.h +++ b/src/rootserver/ddl_task/ob_drop_fts_index_task.h @@ -92,6 +92,12 @@ private: int succ(); int fail(); virtual int cleanup_impl() override; + virtual bool is_error_need_retry(const int ret_code) override + { + UNUSED(ret_code); + // we should always retry on drop index task + return task_status_ < share::ObDDLTaskStatus::WAIT_CHILD_TASK_FINISH; + } bool is_fts_task() const { return share::ObDDLType::DDL_DROP_FTS_INDEX == task_type_; } private: diff --git a/src/rootserver/ddl_task/ob_drop_vec_index_task.h b/src/rootserver/ddl_task/ob_drop_vec_index_task.h index 1fd59100a..e82c4dae7 100644 --- a/src/rootserver/ddl_task/ob_drop_vec_index_task.h +++ b/src/rootserver/ddl_task/ob_drop_vec_index_task.h @@ -101,7 +101,12 @@ private: int send_build_single_replica_request(); int check_build_single_replica(bool &is_end); virtual int cleanup_impl() override; - + virtual bool is_error_need_retry(const int ret_code) override + { + UNUSED(ret_code); + // we should always retry on drop index task + return task_status_ < share::ObDDLTaskStatus::DROP_AUX_INDEX_TABLE; + } private: ObRootService *root_service_; ObVecIndexDDLChildTaskInfo rowkey_vid_; diff --git a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp index 491272318..10703effc 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp +++ b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp @@ -296,7 +296,7 @@ int ObMajorMergeProgressChecker::prepare_unfinish_table_ids() } const ObSimpleTableSchemaV2 *index_simple_schema = nullptr; ObTableCompactionInfo table_compaction_info; - ObSEArray index_schemas; + ObSEArray index_schemas; ObSEArray not_validate_index_ids; int64_t start_idx = 0; int64_t end_idx = 0; diff --git a/src/rootserver/ob_ddl_service.cpp b/src/rootserver/ob_ddl_service.cpp index 8adc12607..e9fb2d709 100755 --- a/src/rootserver/ob_ddl_service.cpp +++ b/src/rootserver/ob_ddl_service.cpp @@ -4155,7 +4155,8 @@ int ObDDLService::check_can_add_column_use_instant_(const bool is_oracle_mode, return ret; } -int ObDDLService::check_is_add_column_online_(const ObTableSchema &table_schema, +int ObDDLService::check_is_add_column_online_(const AlterTableSchema &alter_table_schema, + const ObTableSchema &table_schema, const AlterColumnSchema &alter_column_schema, const obrpc::ObAlterTableArg::AlterAlgorithm &algorithm, const bool is_oracle_mode, @@ -4168,7 +4169,7 @@ int ObDDLService::check_is_add_column_online_(const ObTableSchema &table_schema, bool is_change_column_order = false; if (OB_DDL_ADD_COLUMN != alter_column_schema.alter_type_) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("alter_type is not add column", KR(ret), K(alter_column_schema.alter_type_)); + LOG_WARN("alter_type is not add column", KR(ret), K(alter_column_schema)); } else if (algorithm == obrpc::ObAlterTableArg::AlterAlgorithm::INSTANT) { if (OB_FAIL(check_can_add_column_use_instant_(is_oracle_mode, tenant_data_version, @@ -4176,11 +4177,34 @@ int ObDDLService::check_is_add_column_online_(const ObTableSchema &table_schema, LOG_WARN("fail to check can add column use instant algorithm", KR(ret), K(is_oracle_mode), K(table_schema)); } } + if (OB_SUCC(ret)) { if (alter_column_schema.is_autoincrement_ || alter_column_schema.is_primary_key_ || alter_column_schema.has_not_null_constraint()) { tmp_ddl_type = ObDDLType::DDL_TABLE_REDEFINITION; } else if (nullptr != table_schema.get_column_schema(alter_column_schema.get_column_name())) { - tmp_ddl_type = ObDDLType::DDL_TABLE_REDEFINITION; + ObTableSchema::const_column_iterator it_begin = alter_table_schema.column_begin(); + ObTableSchema::const_column_iterator it_end = alter_table_schema.column_end(); + for (; OB_SUCC(ret) && it_begin != it_end; it_begin++) { + const AlterColumnSchema *column_schema = nullptr; + if (OB_ISNULL(column_schema = static_cast(*it_begin))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("*it_begin is NULL", K(ret)); + } else if (ObSchemaOperationType::OB_DDL_DROP_COLUMN == column_schema->alter_type_) { + lib::Worker::CompatMode compat_mode = (is_oracle_mode ? + lib::Worker::CompatMode::ORACLE : lib::Worker::CompatMode::MYSQL); + lib::CompatModeGuard guard(compat_mode); + const ObString &drop_column_name = column_schema->get_origin_column_name(); + const ObString &add_column_name = alter_column_schema.get_column_name(); + if (ObColumnNameHashWrapper(drop_column_name) == ObColumnNameHashWrapper(add_column_name)) { + tmp_ddl_type = ObDDLType::DDL_TABLE_REDEFINITION; + } + } + } + } + } + + if (OB_SUCC(ret)) { + if (ObDDLType::DDL_INVALID != tmp_ddl_type) { } else if (alter_column_schema.is_stored_generated_column()) { tmp_ddl_type = ObDDLType::DDL_ADD_COLUMN_OFFLINE; } else if (OB_FAIL(check_is_change_column_order(table_schema, alter_column_schema, is_change_column_order))) { @@ -4301,7 +4325,7 @@ int ObDDLService::check_can_add_column_instant_(const ObTableSchema &orig_table_ switch (op_type) { case OB_DDL_ADD_COLUMN: { ObDDLType tmp_ddl_type = ObDDLType::DDL_INVALID; - if (!add_column_instant && OB_FAIL(check_is_add_column_online_(orig_table_schema, *alter_column_schema, algorithm, + if (!add_column_instant && OB_FAIL(check_is_add_column_online_(alter_table_schema, orig_table_schema, *alter_column_schema, algorithm, is_oracle_mode, tenant_data_version, tmp_ddl_type))) { LOG_WARN("fail to check is add column online", KR(ret)); } else if (ObDDLType::DDL_ADD_COLUMN_INSTANT == tmp_ddl_type) { @@ -4368,7 +4392,7 @@ int ObDDLService::check_alter_table_column(obrpc::ObAlterTableArg &alter_table_a switch (op_type) { case OB_DDL_ADD_COLUMN: { ObDDLType tmp_ddl_type = ObDDLType::DDL_INVALID; - if (OB_FAIL(check_is_add_column_online_(orig_table_schema, *alter_column_schema, algorithm, + if (OB_FAIL(check_is_add_column_online_(alter_table_schema, orig_table_schema, *alter_column_schema, algorithm, is_oracle_mode, tenant_data_version, tmp_ddl_type))) { LOG_WARN("fail to check is add column online", K(ret)); } else if (tmp_ddl_type == ObDDLType::DDL_ADD_COLUMN_ONLINE) { @@ -5441,7 +5465,8 @@ int ObDDLService::alter_table_primary_key(obrpc::ObAlterTableArg &alter_table_ar const uint64_t tenant_data_version) { int ret = OB_SUCCESS; - int64_t index_count = new_table_schema.get_index_tid_count(); + int64_t index_count = new_table_schema.get_index_count(); + int64_t index_aux_count = new_table_schema.get_index_tid_count(); const ObSArray &index_arg_list = alter_table_arg.index_arg_list_; for (int64_t i = 0; OB_SUCC(ret) && i < index_arg_list.size(); ++i) { ObIndexArg *index_arg = const_cast(index_arg_list.at(i)); @@ -5477,10 +5502,11 @@ int ObDDLService::alter_table_primary_key(obrpc::ObAlterTableArg &alter_table_ar case ObIndexArg::ADD_PRIMARY_KEY: case ObIndexArg::ALTER_PRIMARY_KEY: { if (ObIndexArg::ADD_PRIMARY_KEY == type) { - if (OB_MAX_INDEX_PER_TABLE <= index_count) { + if (OB_MAX_AUX_TABLE_PER_MAIN_TABLE <= index_aux_count || OB_MAX_INDEX_PER_TABLE <= index_count) { ret = OB_ERR_TOO_MANY_KEYS; LOG_USER_ERROR(OB_ERR_TOO_MANY_KEYS, OB_MAX_INDEX_PER_TABLE); - LOG_WARN("too many index for table!", K(index_count), K(OB_MAX_INDEX_PER_TABLE)); + LOG_WARN("too many index or index aux for table!", + K(index_count), K(OB_MAX_INDEX_PER_TABLE), K(index_aux_count), K(OB_MAX_AUX_TABLE_PER_MAIN_TABLE)); } else if (!new_table_schema.is_heap_table()) { ret = OB_ERR_MULTIPLE_PRI_KEY; LOG_WARN("multiple primary key defined", K(ret)); @@ -7267,9 +7293,10 @@ int ObDDLService::alter_table_index(obrpc::ObAlterTableArg &alter_table_arg, HEAP_VAR(RenameIndexNameHashSet, rename_ori_index_name_set) { HEAP_VAR(RenameIndexNameHashSet, rename_new_index_name_set) { HEAP_VAR(AlterIndexNameHashSet, alter_index_name_set) { - int64_t index_count = new_table_schema.get_index_tid_count(); for (int64_t i = 0; OB_SUCC(ret) && i < index_arg_list.size(); ++i) { ObIndexArg *index_arg = index_arg_list.at(i); + int64_t index_count = new_table_schema.get_index_count(); + int64_t index_aux_count = new_table_schema.get_index_tid_count(); if (OB_ISNULL(index_arg)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("index arg should not be null", K(ret)); @@ -7284,10 +7311,11 @@ int ObDDLService::alter_table_index(obrpc::ObAlterTableArg &alter_table_arg, } } else if (OB_FAIL(GET_MIN_DATA_VERSION(create_index_arg->tenant_id_, tenant_data_version))) { LOG_WARN("get min data version failed", K(ret), KPC(create_index_arg)); - } else if (OB_MAX_INDEX_PER_TABLE <= index_count) { + } else if (OB_MAX_AUX_TABLE_PER_MAIN_TABLE <= index_aux_count || OB_MAX_INDEX_PER_TABLE <= index_count) { ret = OB_ERR_TOO_MANY_KEYS; LOG_USER_ERROR(OB_ERR_TOO_MANY_KEYS, OB_MAX_INDEX_PER_TABLE); - LOG_WARN("too many index for table!", K(index_count), K(OB_MAX_INDEX_PER_TABLE)); + LOG_WARN("too many index or index aux for table!", + K(index_count), K(OB_MAX_INDEX_PER_TABLE), K(index_aux_count), K(OB_MAX_AUX_TABLE_PER_MAIN_TABLE)); } if (!new_table_schema.is_partitioned_table() && !new_table_schema.is_auto_partitioned_table() @@ -7454,8 +7482,6 @@ int ObDDLService::alter_table_index(obrpc::ObAlterTableArg &alter_table_arg, } else if (OB_FAIL(add_index_name_set.set_refactored(index_key))) { LOG_WARN("set index name to hash set failed", K(create_index_arg->index_name_), K(ret)); - } else { - ++index_count; } } } @@ -7519,8 +7545,6 @@ int ObDDLService::alter_table_index(obrpc::ObAlterTableArg &alter_table_arg, new_table_schema, trans))) { LOG_WARN("failed to alter table drop index", K(*drop_index_arg), K(ret)); - } else { - --index_count; } } } @@ -8500,7 +8524,7 @@ int ObDDLService::get_dropping_domain_index_invisiable_aux_table_schema( { int ret = OB_SUCCESS; const share::schema::ObTableSchema *data_table_schema = nullptr; - ObSEArray indexs; + ObSEArray indexs; if (OB_UNLIKELY(OB_INVALID_ID == data_table_id || OB_INVALID_ID == index_table_id || OB_INVALID_TENANT_ID == tenant_id @@ -8577,7 +8601,7 @@ int ObDDLService::get_dropping_vec_index_invisiable_table_schema_( { int ret = OB_SUCCESS; const share::schema::ObTableSchema *data_table_schema = nullptr; - ObSEArray indexs; + ObSEArray indexs; if (OB_UNLIKELY(OB_INVALID_ID == data_table_id || OB_INVALID_ID == index_table_id || OB_INVALID_TENANT_ID == tenant_id @@ -15014,6 +15038,7 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg, } if (OB_SUCC(ret) && is_double_table_long_running_ddl(ddl_type)) { bool has_index_operation = false; + bool will_be_having_domain_index_operation = false; bool has_fts_or_multivalue_or_vec_index = false; bool is_adding_constraint = false; bool is_column_store = false; @@ -15042,11 +15067,17 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg, table_id, has_fts_or_multivalue_or_vec_index))) { LOG_WARN("check has fts index failed", K(ret)); + } else if (OB_FAIL(check_will_be_having_domain_index_operation(alter_table_arg, + will_be_having_domain_index_operation))) { + LOG_WARN("check will be having domain index operation failed", K(ret)); } else if (OB_FAIL(check_is_adding_constraint(tenant_id, table_id, is_adding_constraint))) { LOG_WARN("failed to call check_is_adding_constraint", K(ret)); } else if (has_index_operation) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "The DDL cannot be run concurrently with creating index."); + } else if (will_be_having_domain_index_operation) { + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "The DDL cannot be run, as creating/dropping fulltext/multivalue/vector index."); } else if (has_fts_or_multivalue_or_vec_index) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "Run this DDL operation on table with fulltext/multivalue/vector index."); @@ -15125,6 +15156,30 @@ int ObDDLService::check_has_domain_index( return ret; } +int ObDDLService::check_will_be_having_domain_index_operation( + const obrpc::ObAlterTableArg &alter_table_arg, + bool &will_be_having_domain_index_operation/*false*/) +{ + int ret = OB_SUCCESS; + will_be_having_domain_index_operation = false; + const ObSArray &index_arg_list = alter_table_arg.index_arg_list_; + for (int64_t i = 0; OB_SUCC(ret) && i < index_arg_list.size(); ++i) { + ObIndexArg *index_arg = index_arg_list.at(i); + if (OB_ISNULL(index_arg)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("index arg should not be null", K(ret)); + } else { + ObCreateIndexArg *create_index_arg = static_cast(index_arg); + if (share::schema::is_fts_or_multivalue_index(create_index_arg->index_type_) || + share::schema::is_vec_index(create_index_arg->index_type_)) { + will_be_having_domain_index_operation = true; + break; + } + } + } + return ret; +} + int ObDDLService::check_is_oracle_mode_add_column_not_null_ddl(const obrpc::ObAlterTableArg &alter_table_arg, ObSchemaGetterGuard &schema_guard, bool &is_oracle_mode_add_column_not_null_ddl, @@ -20130,18 +20185,20 @@ int ObDDLService::add_new_index_schema(obrpc::ObAlterTableArg &alter_table_arg, HEAP_VAR(AddIndexNameHashSet, add_index_name_set) { HEAP_VAR(DropIndexNameHashSet, drop_index_name_set) { const ObSArray &index_arg_list = alter_table_arg.index_arg_list_; - int64_t index_count = new_table_schema.get_index_tid_count(); for (int64_t i = 0; OB_SUCC(ret) && i < index_arg_list.size(); ++i) { ObIndexArg *index_arg = const_cast(index_arg_list.at(i)); + int64_t index_count = new_table_schema.get_index_count(); + int64_t index_aux_count = new_table_schema.get_index_tid_count(); if (OB_ISNULL(index_arg)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("index arg should not be null", K(ret)); } else { if (index_arg->index_action_type_ == ObIndexArg::ADD_INDEX) { - if (OB_MAX_INDEX_PER_TABLE <= index_count) { + if (OB_MAX_AUX_TABLE_PER_MAIN_TABLE <= index_aux_count || OB_MAX_INDEX_PER_TABLE <= index_count) { ret = OB_ERR_TOO_MANY_KEYS; LOG_USER_ERROR(OB_ERR_TOO_MANY_KEYS, OB_MAX_INDEX_PER_TABLE); - LOG_WARN("too many index for table!", K(index_count), K(OB_MAX_INDEX_PER_TABLE)); + LOG_WARN("too many index or index aux for table!", + K(index_count), K(OB_MAX_INDEX_PER_TABLE), K(index_aux_count), K(OB_MAX_AUX_TABLE_PER_MAIN_TABLE)); } ObCreateIndexArg *create_index_arg = static_cast(index_arg); if (!new_table_schema.is_partitioned_table() @@ -20275,8 +20332,6 @@ int ObDDLService::add_new_index_schema(obrpc::ObAlterTableArg &alter_table_arg, LOG_WARN("set index name to hash set failed", K(create_index_arg->index_name_), K(ret)); - } else { - ++index_count; } } } diff --git a/src/rootserver/ob_ddl_service.h b/src/rootserver/ob_ddl_service.h index ae03bcd29..833fea4bd 100644 --- a/src/rootserver/ob_ddl_service.h +++ b/src/rootserver/ob_ddl_service.h @@ -1336,6 +1336,9 @@ private: const uint64_t tenant_id, const uint64_t data_table_id, bool &domain_index_exist); +int check_will_be_having_domain_index_operation( + const obrpc::ObAlterTableArg &alter_table_arg, + bool &will_be_having_domain_index_operation); int check_has_index_operation( ObSchemaGetterGuard &schema_guard, const uint64_t teannt_id, @@ -1730,7 +1733,8 @@ private: const share::schema::ObColumnSchemaV2 &orig_column_schema, share::schema::AlterColumnSchema &alter_column_schema, bool &is_offline) const; - int check_is_add_column_online_(const share::schema::ObTableSchema &table_schema, + int check_is_add_column_online_(const AlterTableSchema &alter_table_schema, + const share::schema::ObTableSchema &table_schema, const share::schema::AlterColumnSchema &alter_column_schema, const obrpc::ObAlterTableArg::AlterAlgorithm &algorithm, const bool is_oracle_mode, diff --git a/src/rootserver/ob_index_builder.cpp b/src/rootserver/ob_index_builder.cpp index b810ff588..e4c8cc5f9 100644 --- a/src/rootserver/ob_index_builder.cpp +++ b/src/rootserver/ob_index_builder.cpp @@ -1275,11 +1275,14 @@ int ObIndexBuilder::do_create_index( LOG_WARN("can not add index on table in recyclebin", K(ret), K(arg)); } else if (OB_FAIL(ddl_service_.check_restore_point_allow(tenant_id, *table_schema))) { LOG_WARN("failed to check restore point allow.", K(ret), K(tenant_id), K(table_id)); - } else if (table_schema->get_index_tid_count() >= OB_MAX_INDEX_PER_TABLE) { + } else if (table_schema->get_index_tid_count() >= OB_MAX_AUX_TABLE_PER_MAIN_TABLE + || table_schema->get_index_count() >= OB_MAX_INDEX_PER_TABLE) { ret = OB_ERR_TOO_MANY_KEYS; LOG_USER_ERROR(OB_ERR_TOO_MANY_KEYS, OB_MAX_INDEX_PER_TABLE); - int64_t index_count = table_schema->get_index_tid_count(); - LOG_WARN("too many index for table", K(OB_MAX_INDEX_PER_TABLE), K(index_count), K(ret)); + int64_t index_aux_count = table_schema->get_index_tid_count(); + int64_t index_count = table_schema->get_index_count(); + LOG_WARN("too many index or index aux for table", + K(index_count), K(OB_MAX_INDEX_PER_TABLE), K(index_aux_count), K(OB_MAX_AUX_TABLE_PER_MAIN_TABLE), K(ret)); } else if (OB_FAIL(ddl_service_.check_fk_related_table_ddl(*table_schema, ObDDLType::DDL_CREATE_INDEX))) { LOG_WARN("check whether the foreign key related table is executing ddl failed", K(ret)); } else if (INDEX_TYPE_NORMAL_LOCAL == arg.index_type_ @@ -1355,11 +1358,6 @@ int ObIndexBuilder::generate_schema( } if (OB_SUCC(ret)) { - if (arg.index_columns_.count() <= 0) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("index columns can't be empty", "index columns", arg.index_columns_, K(ret)); - } else {} - //do some check if (OB_SUCC(ret)) { if (!GCONF.enable_sys_table_ddl) { @@ -1380,6 +1378,14 @@ int ObIndexBuilder::generate_schema( } } + if (OB_FAIL(ret)) { + } else if (arg.index_columns_.count() <= 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("index columns can't be empty", "index columns", arg.index_columns_, K(ret)); + } else { + // do something + } + if (OB_FAIL(ret)) { } else if (share::schema::is_fts_index(arg.index_type_)) { uint64_t tenant_data_version = 0; @@ -1912,7 +1918,7 @@ int ObIndexBuilder::check_has_none_shared_index_tables_for_fts_or_multivalue_ind bool &has_fts_or_multivalue_index) { int ret = OB_SUCCESS; - ObSEArray indexs; + ObSEArray indexs; has_fts_or_multivalue_index = false; if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id || OB_INVALID_ID == data_table_id)) { ret = OB_INVALID_ARGUMENT; @@ -1949,7 +1955,7 @@ int ObIndexBuilder::check_has_none_shared_index_tables_for_vector_index_( bool &has_none_share_vector_index) { int ret = OB_SUCCESS; - ObSEArray indexs; + ObSEArray indexs; has_none_share_vector_index = false; if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id || OB_INVALID_ID == data_table_id)) { ret = OB_INVALID_ARGUMENT; diff --git a/src/share/schema/ob_schema_getter_guard.cpp b/src/share/schema/ob_schema_getter_guard.cpp index c4ed7de57..7a6974ab8 100644 --- a/src/share/schema/ob_schema_getter_guard.cpp +++ b/src/share/schema/ob_schema_getter_guard.cpp @@ -1550,10 +1550,10 @@ int ObSchemaGetterGuard::get_can_write_index_array( LOG_WARN("cannot get index table schema for table ", KR(ret), K(tenant_id), K(index_id)); } else if (OB_UNLIKELY(index_schema->is_final_invalid_index())) { //invalid index status, need ingore - } else if (OB_MAX_INDEX_PER_TABLE <= can_write_count) { + } else if (OB_MAX_AUX_TABLE_PER_MAIN_TABLE <= can_write_count) { ret = OB_ERR_TOO_MANY_KEYS; LOG_USER_ERROR(OB_ERR_TOO_MANY_KEYS, OB_MAX_INDEX_PER_TABLE); - LOG_WARN("too many index or mlog for table!", K(can_write_count), K(OB_MAX_INDEX_PER_TABLE)); + LOG_WARN("too many index, index aux or mlog for table!", K(can_write_count), K(OB_MAX_AUX_TABLE_PER_MAIN_TABLE)); } else if (index_schema->is_mlog_table()) { index_tid_array[can_write_count] = simple_index_infos.at(i).table_id_; ++can_write_count; @@ -1594,8 +1594,8 @@ int ObSchemaGetterGuard::column_is_key( } else if (column_schema->is_rowkey_column() || column_schema->is_tbl_part_key_column()) { is_key = true; } else { - int64_t index_tid_array_size = OB_MAX_INDEX_PER_TABLE; - uint64_t index_tid_array[OB_MAX_INDEX_PER_TABLE]; + int64_t index_tid_array_size = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; + uint64_t index_tid_array[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; if (OB_FAIL(get_can_write_index_array(tenant_id, table_id, index_tid_array, index_tid_array_size))) { LOG_WARN("get index tid array failed", K(ret), K(tenant_id), K(index_tid_array_size)); } diff --git a/src/share/schema/ob_schema_struct.h b/src/share/schema/ob_schema_struct.h index 6ff22c64c..d6bb08599 100755 --- a/src/share/schema/ob_schema_struct.h +++ b/src/share/schema/ob_schema_struct.h @@ -308,11 +308,14 @@ const int64_t OB_AUX_LOB_TABLE_CNT = 2; // aux lob meta + aux lob piece // The max count of aux tables that can be created for each index. // Some special indexes such as full-text index(FTS), multi-value index, vector index, etc., have multiple aux tables. // The current index with max aux tables: vector index -const int64_t OB_MAX_TABLE_CNT_PER_INDEX = 5; +// They need to be changed at the same time, choosing OB_MAX_AUX_TABLE_PER_TABLE is larger. +const int64_t OB_MAX_SHARED_TABLE_CNT_PER_INDEX_TYPE = 2; // number of common aux tables for all vect indexes in a table. +const int64_t OB_MAX_TABLE_CNT_PER_INDEX = 3; // number of aux tables private per vec index. // The max count of aux tables with physical tablets per user data table. -const int64_t OB_MAX_AUX_TABLE_PER_TABLE = OB_MAX_INDEX_PER_TABLE * OB_MAX_TABLE_CNT_PER_INDEX + OB_AUX_LOB_TABLE_CNT + OB_MLOG_TABLE_CNT; // 643 +const int64_t OB_MAX_AUX_TABLE_PER_TABLE = OB_MAX_INDEX_PER_TABLE * OB_MAX_TABLE_CNT_PER_INDEX + + OB_MAX_SHARED_TABLE_CNT_PER_INDEX_TYPE + OB_AUX_LOB_TABLE_CNT + OB_MLOG_TABLE_CNT; // 389 // The max tablet count of a transfer is one data table tablet with max aux tablets bound together. -const int64_t OB_MAX_TRANSFER_BINDING_TABLET_CNT = OB_MAX_AUX_TABLE_PER_TABLE + 1; // 644 +const int64_t OB_MAX_TRANSFER_BINDING_TABLET_CNT = OB_MAX_AUX_TABLE_PER_TABLE + 1; // 390 // Note: When adding new index type, you should modifiy "tools/obtest/t/quick/partition_balance.test" and // "tools/obtest/t/shared_storage/partition_balance.test" to verify that all aux tables of the new index diff --git a/src/share/schema/ob_table_schema.cpp b/src/share/schema/ob_table_schema.cpp index 37b9c1717..0384dc7e3 100644 --- a/src/share/schema/ob_table_schema.cpp +++ b/src/share/schema/ob_table_schema.cpp @@ -8414,6 +8414,57 @@ const ObConstraint *ObTableSchema::get_constraint(const uint64_t constraint_id) }); } +int64_t ObTableSchema::get_index_count() const +{ + int64_t index_count = 0; + bool is_rowkey_doc_id_exist = false; + bool is_doc_id_rowkey_exist = false; + int64_t fts_index_aux_count = 0; + int64_t fts_doc_word_aux_count = 0; + int64_t multivalue_index_aux_count = 0; + bool is_vec_rowkey_vid_exist = false; + bool is_vec_vid_rowkey_exist = false; + int64_t vec_delta_buffer_count = 0; + int64_t vec_index_id_count = 0; + int64_t vec_index_snapshot_data_count = 0; + for (int64_t i = 0; i < get_index_tid_count(); ++i) { + ObIndexType index_type = simple_index_infos_.at(i).index_type_; + // Count the number of various index aux tables to determine the number of indexes that can be added. + // If there are other indexes with multiple auxiliary tables, you need to add processing branches. + if (share::schema::is_rowkey_doc_aux(index_type)) { + is_rowkey_doc_id_exist = true; + } else if (share::schema::is_doc_rowkey_aux(index_type)) { + is_doc_id_rowkey_exist = true; + } else if (share::schema::is_fts_index_aux(index_type)) { + ++fts_index_aux_count; + } else if (share::schema::is_fts_doc_word_aux(index_type)) { + ++fts_doc_word_aux_count; + } else if (share::schema::is_multivalue_index_aux(index_type)) { + ++multivalue_index_aux_count; + } else if (share::schema::is_vec_rowkey_vid_type(index_type)) { + is_vec_rowkey_vid_exist = true; + } else if (share::schema::is_vec_vid_rowkey_type(index_type)) { + is_vec_vid_rowkey_exist = true; + } else if (share::schema::is_vec_delta_buffer_type(index_type)) { + ++vec_delta_buffer_count; + } else if (share::schema::is_vec_index_id_type(index_type)) { + ++vec_index_id_count; + } else if (share::schema::is_vec_index_snapshot_data_type(index_type)) { + ++vec_index_snapshot_data_count; + } else { + ++index_count; + } + } + // Taking OB_MIN can ensure that the final index number is not greater than OB_MAX_INDEX_PER_TABLE. + // but cannot ensure aux table numbers does not exceed OB_MAX_AUX_TABLE_PER_MAIN_TABLE. + // Therefore, this function often appears with the OB_MAX_AUX_TABLE_PER_MAIN_TABLE limit. + index_count += (is_rowkey_doc_id_exist && is_doc_id_rowkey_exist) ? + OB_MIN(fts_index_aux_count, fts_doc_word_aux_count) + multivalue_index_aux_count : 0; + index_count += (is_vec_rowkey_vid_exist && is_vec_vid_rowkey_exist) ? + OB_MIN(vec_delta_buffer_count, OB_MIN(vec_index_id_count, vec_index_snapshot_data_count)) : 0; + return index_count; +} + const ObConstraint *ObTableSchema::get_constraint(const ObString &constraint_name) const { return get_constraint_internal( @@ -8801,7 +8852,7 @@ int ObTableSchema::add_simple_index_info(const ObAuxTableMetaInfo &simple_index_ { int ret = OB_SUCCESS; bool need_add = true; - int64_t N = simple_index_infos_.count(); + int64_t N = get_index_tid_count(); // we are sure that index_tid are added in sorted order if (simple_index_info.table_id_ == OB_INVALID_ID) { @@ -8819,7 +8870,7 @@ int ObTableSchema::add_simple_index_info(const ObAuxTableMetaInfo &simple_index_ } if (OB_SUCC(ret) && need_add) { const int64_t last_pos = N - 1; - if (N >= common::OB_MAX_INDEX_PER_TABLE) { + if (N >= OB_MAX_AUX_TABLE_PER_MAIN_TABLE || get_index_count() >= common::OB_MAX_INDEX_PER_TABLE) { ret = OB_SIZE_OVERFLOW; LOG_WARN("index num in table is more than limited num", K(ret)); } else if ((last_pos >= 0) diff --git a/src/share/schema/ob_table_schema.h b/src/share/schema/ob_table_schema.h index 2a8e54cef..aca6758c3 100644 --- a/src/share/schema/ob_table_schema.h +++ b/src/share/schema/ob_table_schema.h @@ -1432,7 +1432,7 @@ public: const ObConstraint *get_constraint(const common::ObString &constraint_name) const; int get_pk_constraint_name(common::ObString &pk_name) const; const ObConstraint *get_pk_constraint() const; - + int64_t get_index_count() const; int64_t get_column_idx(const uint64_t column_id, const bool ignore_hidden_column = false) const; int64_t get_replica_num() const; int64_t get_tablet_size() const { return tablet_size_; } diff --git a/src/share/vector_index/ob_vector_index_util.cpp b/src/share/vector_index/ob_vector_index_util.cpp index 5e5527313..0f54546c8 100644 --- a/src/share/vector_index/ob_vector_index_util.cpp +++ b/src/share/vector_index/ob_vector_index_util.cpp @@ -275,7 +275,7 @@ int ObVectorIndexUtil::check_table_has_vector_of_fts_index( LOG_WARN("index table schema should not be null", K(ret), K(simple_index_infos.at(i).table_id_)); } else if (index_table_schema->is_vec_index()) { has_vec_index = true; - } else if (index_table_schema->is_fts_index()) { + } else if (index_table_schema->is_fts_index_aux() || index_table_schema->is_fts_doc_word_aux()) { has_fts_index = true; } } diff --git a/src/sql/CMakeLists.txt b/src/sql/CMakeLists.txt index 206753ead..4fb92280a 100644 --- a/src/sql/CMakeLists.txt +++ b/src/sql/CMakeLists.txt @@ -82,6 +82,9 @@ ob_set_subtarget(ob_sql das das/iter/ob_das_doc_id_merge_iter.cpp das/iter/ob_das_vid_merge_iter.cpp das/iter/ob_das_index_merge_iter.cpp + das/iter/ob_das_func_data_iter.cpp + das/iter/ob_das_functional_lookup_iter.cpp + das/iter/ob_das_cache_lookup_iter.cpp das/iter/ob_das_mvi_lookup_iter.cpp das/iter/ob_das_spatial_scan_iter.cpp ) diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index 4d95bb1ea..8b4cb2da9 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -399,7 +399,8 @@ int ObStaticEngineCG::disable_use_rich_format(const ObLogicalOperator &op, ObOpS || (static_cast(spec)).tsc_ctdef_.scan_ctdef_.is_get_ || tsc.is_text_retrieval_scan() || tsc.is_tsc_with_doc_id() - || tsc.is_tsc_with_vid()) { + || tsc.is_tsc_with_vid() + || tsc.has_func_lookup()) { use_rich_format = false; LOG_DEBUG("tsc disable use rich format", K(tsc.get_index_back()), K(tsc.use_batch()), K(is_virtual_table(tsc.get_ref_table_id()))); diff --git a/src/sql/code_generator/ob_tsc_cg_service.cpp b/src/sql/code_generator/ob_tsc_cg_service.cpp index 2b36c1085..66263f6a6 100644 --- a/src/sql/code_generator/ob_tsc_cg_service.cpp +++ b/src/sql/code_generator/ob_tsc_cg_service.cpp @@ -111,7 +111,8 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc if (op.is_text_retrieval_scan() || op.is_vec_idx_scan()) { scan_ctdef.ir_scan_type_ = ObTSCIRScanType::OB_IR_INV_IDX_SCAN; } - if (OB_FAIL(generate_das_scan_ctdef(op, scan_ctdef, has_rowscn))) { + DASScanCGCtx cg_ctx; + if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, scan_ctdef, has_rowscn))) { LOG_WARN("generate das scan ctdef failed", K(ret), K(scan_ctdef.ref_table_id_)); } else { tsc_ctdef.flashback_item_.need_scn_ |= has_rowscn; @@ -173,7 +174,8 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc bool need_attach = false; if (OB_SUCC(ret) && op.is_text_retrieval_scan()) { - if (OB_FAIL(generate_text_ir_ctdef(op, tsc_ctdef, root_ctdef))) { + DASScanCGCtx cg_ctx; + if (OB_FAIL(generate_text_ir_ctdef(op, cg_ctx, tsc_ctdef, root_ctdef))) { LOG_WARN("failed to generate text ir ctdef", K(ret)); } else { need_attach = true; @@ -260,6 +262,22 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc } } + if (OB_SUCC(ret) && op.has_func_lookup()) { + ObDASBaseCtDef *rowkey_scan_ctdef = nullptr; + ObDASBaseCtDef *main_lookup_ctdef = nullptr; + if (op.get_index_back()) { + rowkey_scan_ctdef = static_cast(root_ctdef)->children_[0]; + main_lookup_ctdef = tsc_ctdef.lookup_ctdef_; + } else { + rowkey_scan_ctdef = root_ctdef; + } + if (OB_FAIL(generate_functional_lookup_ctdef(op, tsc_ctdef, rowkey_scan_ctdef, main_lookup_ctdef, root_ctdef))) { + LOG_WARN("failed to generate functional lookup ctdef", K(ret)); + } else { + need_attach = true; + } + } + if (OB_SUCC(ret) && need_attach) { if (!op.get_is_index_global()) { tsc_ctdef.lookup_ctdef_ = nullptr; @@ -274,6 +292,7 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc } int ObTscCgService::generate_table_param(const ObLogTableScan &op, + const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef, common::ObIArray &tsc_out_cols) { @@ -305,7 +324,7 @@ int ObTscCgService::generate_table_param(const ObLogTableScan &op, } else if (table_schema->is_multivalue_index_aux() && FALSE_IT(scan_ctdef.table_param_.set_is_multivalue_index(true))) { } else if (table_schema->is_vec_index() && FALSE_IT(scan_ctdef.table_param_.set_is_vec_index(true))) { } else if (FALSE_IT(scan_ctdef.table_param_.set_is_partition_table(table_schema->is_partitioned_table()))) { - } else if (OB_FAIL(extract_das_output_column_ids(op, scan_ctdef, *table_schema, tsc_out_cols))) { + } else if (OB_FAIL(extract_das_output_column_ids(op, scan_ctdef, *table_schema, cg_ctx, tsc_out_cols))) { LOG_WARN("extract tsc output column ids failed", K(ret)); } else if (OB_FAIL(session_info->get_sys_variable(SYS_VAR_OB_ROUTE_POLICY, route_policy))) { LOG_WARN("get route policy failed", K(ret)); @@ -710,20 +729,27 @@ int ObTscCgService::generate_pd_storage_flag(const ObLogPlan *log_plan, //2. all columns required by TSC operator filters //3. all columns required by pushdown aggr expr int ObTscCgService::extract_das_access_exprs(const ObLogTableScan &op, + const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef, ObIArray &access_exprs) { int ret = OB_SUCCESS; const ObTableID &scan_table_id = scan_ctdef.ref_table_id_; const bool use_index_merge = scan_ctdef.is_index_merge_; - if (scan_table_id != op.get_rowkey_doc_table_id() + if (cg_ctx.is_func_lookup_ && scan_table_id != op.get_rowkey_doc_table_id()) { + const ObTextRetrievalInfo &tr_info = op.get_lookup_tr_infos().at(cg_ctx.curr_func_lookup_idx_); + if (OB_FAIL(extract_text_ir_access_columns(op, tr_info, scan_ctdef, access_exprs))) { + LOG_WARN("failed to extract text ir access columns for functional lookup", K(ret)); + } + } else if (scan_table_id != op.get_rowkey_doc_table_id() && ((op.is_text_retrieval_scan() && scan_table_id != op.get_ref_table_id()) || (op.is_multivalue_index_scan() && scan_table_id == op.get_doc_id_index_table_id()))) { // non main table scan in text retrieval - if (OB_FAIL(extract_text_ir_access_columns(op, scan_ctdef, access_exprs))) { + if (OB_FAIL(extract_text_ir_access_columns(op, op.get_text_retrieval_info(), scan_ctdef, access_exprs))) { LOG_WARN("failed to extract text ir access columns", K(ret)); } - } else if (op.is_tsc_with_doc_id() && scan_table_id == op.get_rowkey_doc_table_id()) { + } else if ((op.is_tsc_with_doc_id() || cg_ctx.is_func_lookup_) + && scan_table_id == op.get_rowkey_doc_table_id()) { if (OB_FAIL(extract_rowkey_doc_access_columns(op, scan_ctdef, access_exprs))) { LOG_WARN("fail to extract rowkey doc access columns", K(ret)); } @@ -877,13 +903,14 @@ int ObTscCgService::extract_tsc_access_columns(const ObLogTableScan &op, ObArray tsc_exprs; ObArray scan_pushdown_filters; ObArray lookup_pushdown_filters; + const bool need_filter_out_match_expr = op.is_text_retrieval_scan() || op.has_func_lookup(); if (OB_FAIL(const_cast(op).extract_pushdown_filters(tsc_exprs, //non-pushdown filters scan_pushdown_filters, lookup_pushdown_filters))) { LOG_WARN("extract pushdown filters failed", K(ret)); } else if (OB_FAIL(append_array_no_dup(tsc_exprs, op.get_output_exprs()))) { LOG_WARN("append output exprs failed", K(ret)); - } else if (op.is_text_retrieval_scan() && OB_FAIL(filter_out_match_exprs(tsc_exprs))) { + } else if (need_filter_out_match_expr && OB_FAIL(filter_out_match_exprs(tsc_exprs))) { // the matching columns of match expr are only used as semantic identifiers and are not actually accessed LOG_WARN("failed to filter out fts exprs", K(ret)); } else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(tsc_exprs, access_exprs, true))) { @@ -924,6 +951,7 @@ int ObTscCgService::generate_geo_access_ctdef(const ObLogTableScan &op, const Ob } int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op, + const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef, common::ObIArray &doc_id_expr, common::ObIArray &vec_vid_expr, @@ -944,7 +972,7 @@ int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op, } else if (OB_ISNULL(table_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected nullptr to table schema", K(ret)); - } else if (OB_FAIL(extract_das_access_exprs(op, scan_ctdef, access_exprs))) { + } else if (OB_FAIL(extract_das_access_exprs(op, cg_ctx, scan_ctdef, access_exprs))) { LOG_WARN("extract das access exprs failed", K(ret)); } else if (table_schema->is_spatial_index() && OB_FAIL(generate_geo_access_ctdef(op, *table_schema, access_exprs))) { @@ -984,7 +1012,7 @@ int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op, ObColumnRefRawExpr* col_expr = static_cast(expr); bool is_mapping_vt_table = op.get_real_ref_table_id() != op.get_ref_table_id(); ObTableID real_table_id = is_mapping_vt_table ? op.get_real_ref_table_id() : op.get_table_id(); - const bool doc_id_in_rowkey_doc = op.is_tsc_with_doc_id() && table_schema->is_rowkey_doc_id(); + const bool doc_id_in_rowkey_doc = (op.is_tsc_with_doc_id() || cg_ctx.is_func_lookup_) && table_schema->is_rowkey_doc_id(); const bool vec_id_in_rowkey_vid = op.is_tsc_with_vid() && table_schema->is_vec_rowkey_vid_type(); real_table_id = doc_id_in_rowkey_doc || vec_id_in_rowkey_vid ? table_id : real_table_id; if (!col_expr->has_flag(IS_COLUMN) || (col_expr->get_table_id() != real_table_id && !(col_expr->is_doc_id_column() || col_expr->is_vec_vid_column()))) { @@ -1032,6 +1060,7 @@ int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op, } int ObTscCgService::generate_pushdown_aggr_ctdef(const ObLogTableScan &op, + const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef) { int ret = OB_SUCCESS; @@ -1039,9 +1068,12 @@ int ObTscCgService::generate_pushdown_aggr_ctdef(const ObLogTableScan &op, const uint64_t aggregate_output_count = pushdown_aggr_exprs.count(); const ObIArray &group_by_columns = op.get_pushdown_groupby_columns(); const uint64_t group_by_column_count = group_by_columns.count(); - if (op.is_text_retrieval_scan()) { + if (op.is_text_retrieval_scan() || cg_ctx.is_func_lookup_) { // text retrieval scan on fulltext index - if (OB_FAIL(generate_text_ir_pushdown_expr_ctdef(op, scan_ctdef))) { + const ObTextRetrievalInfo &tr_info = cg_ctx.is_func_lookup_ + ? op.get_lookup_tr_infos().at(cg_ctx.curr_func_lookup_idx_) + : op.get_text_retrieval_info(); + if (OB_FAIL(generate_text_ir_pushdown_expr_ctdef(tr_info, op, scan_ctdef))) { LOG_WARN("failed to generate text ir pushdown aggregate ctdef", K(ret), K(op)); } } else if (op.get_index_back() && aggregate_output_count > 0) { @@ -1119,6 +1151,7 @@ int ObTscCgService::generate_pushdown_aggr_ctdef(const ObLogTableScan &op, } int ObTscCgService::generate_das_scan_ctdef(const ObLogTableScan &op, + const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef, bool &has_rowscn) { @@ -1126,12 +1159,12 @@ int ObTscCgService::generate_das_scan_ctdef(const ObLogTableScan &op, ObSEArray doc_id_expr; ObSEArray vec_vid_expr; // 1. add basic column - if (OB_FAIL(generate_access_ctdef(op, scan_ctdef, doc_id_expr, vec_vid_expr, has_rowscn))) { + if (OB_FAIL(generate_access_ctdef(op, cg_ctx, scan_ctdef, doc_id_expr, vec_vid_expr, has_rowscn))) { LOG_WARN("generate access ctdef failed", K(ret), K(scan_ctdef.ref_table_id_)); } //2. generate pushdown aggr column if (OB_SUCC(ret)) { - if (OB_FAIL(generate_pushdown_aggr_ctdef(op, scan_ctdef))) { + if (OB_FAIL(generate_pushdown_aggr_ctdef(op, cg_ctx, scan_ctdef))) { LOG_WARN("generate pushdown aggr ctdef failed", K(ret)); } } @@ -1180,7 +1213,7 @@ int ObTscCgService::generate_das_scan_ctdef(const ObLogTableScan &op, //6. generate table param ObArray tsc_out_cols; if (OB_SUCC(ret)) { - if (OB_FAIL(generate_table_param(op, scan_ctdef, tsc_out_cols))) { + if (OB_FAIL(generate_table_param(op, cg_ctx, scan_ctdef, tsc_out_cols))) { LOG_WARN("generate table param failed", K(ret)); } } @@ -1233,6 +1266,7 @@ int ObTscCgService::generate_das_scan_ctdef(const ObLogTableScan &op, int ObTscCgService::extract_das_output_column_ids(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef, const ObTableSchema &index_schema, + const DASScanCGCtx &cg_ctx, ObIArray &output_cids) { int ret = OB_SUCCESS; @@ -1240,15 +1274,22 @@ int ObTscCgService::extract_das_output_column_ids(const ObLogTableScan &op, const ObTableID &table_id = scan_ctdef.ref_table_id_; const bool use_index_merge = scan_ctdef.is_index_merge_; - if (table_id != op.get_rowkey_doc_table_id() - && ((op.is_text_retrieval_scan() && table_id != op.get_ref_table_id()) - || (op.is_multivalue_index_scan() && table_id == op.get_doc_id_index_table_id()))) { - // non main table scan in text retrieval - if (OB_FAIL(extract_text_ir_das_output_column_ids(op, scan_ctdef, output_cids))) { + if (op.need_doc_id_index_back() && table_id == op.get_doc_id_index_table_id()) { + if (OB_FAIL(extract_doc_id_index_back_output_column_ids(op, output_cids))) { + LOG_WARN("failed to extract doc id index back output column ids", K(ret)); + } + } else if ((op.is_text_retrieval_scan() && table_id != op.get_ref_table_id() && table_id != op.get_rowkey_doc_table_id()) + || (cg_ctx.is_func_lookup_ && table_id != op.get_rowkey_doc_table_id())) { + const ObTextRetrievalInfo &tr_info = cg_ctx.is_func_lookup_ + ? op.get_lookup_tr_infos().at(cg_ctx.curr_func_lookup_idx_) + : op.get_text_retrieval_info(); + if (OB_FAIL(extract_text_ir_das_output_column_ids(tr_info, scan_ctdef, output_cids))) { LOG_WARN("failed to extract text retrieval das output column ids", K(ret)); } - } else if (op.is_tsc_with_doc_id() && table_id == op.get_rowkey_doc_table_id()) { - if (OB_FAIL(extract_rowkey_doc_output_columns_ids(index_schema, op, scan_ctdef, output_cids))) { + } else if ((op.is_tsc_with_doc_id() || cg_ctx.is_func_lookup_) + && table_id == op.get_rowkey_doc_table_id()) { + const bool output_rowkey = !cg_ctx.is_func_lookup_; + if (OB_FAIL(extract_rowkey_doc_output_columns_ids(index_schema, op, scan_ctdef, output_rowkey, output_cids))) { LOG_WARN("fail to extract rowkey doc output columns ids", K(ret)); } } else if (op.is_vec_idx_scan() && @@ -1342,6 +1383,20 @@ int ObTscCgService::extract_das_output_column_ids(const ObLogTableScan &op, LOG_WARN("store group id expr failed", K(ret)); } else if (OB_FAIL(extract_das_column_ids(das_output_cols, output_cids))) { LOG_WARN("extract column ids failed", K(ret)); + } else if (op.has_func_lookup() && op.get_real_index_table_id() == table_id) { + // main scan in functional lookup, need to output extra rowkey exprs for further lookup on functional index + ObArray rowkey_column_ids; + const ObTableSchema *table_schema = nullptr; + if (OB_FAIL(cg_.opt_ctx_->get_schema_guard()->get_table_schema(MTL_ID(), op.get_real_ref_table_id(), table_schema))) { + LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id())); + } else if (OB_ISNULL(table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to table schema", K(ret)); + } else if (OB_FAIL(table_schema->get_rowkey_column_ids(rowkey_column_ids))) { + LOG_WARN("get rowkey column ids failed", K(ret)); + } else if (OB_FAIL(append_array_no_dup(output_cids, rowkey_column_ids))) { + LOG_WARN("fail to append rowkey cids to output cids for functional lookup", K(ret)); + } } else if (op.is_tsc_with_doc_id() && index_schema.is_user_table()) { uint64_t doc_id_col_id = OB_INVALID_ID; uint64_t ft_col_id = OB_INVALID_ID; @@ -1529,12 +1584,13 @@ int ObTscCgService::generate_vec_ir_ctdef(const ObLogTableScan &op, ObDASScanCtDef *snapshot_ctdef = nullptr; ObDASScanCtDef *com_aux_ctdef = nullptr; bool has_rowscn = false; + DASScanCGCtx cg_ctx; if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, delta_ctdef))) { LOG_WARN("allocate delta buf table ctdef failed", K(ret)); } else { delta_ctdef->ref_table_id_ = op.get_vector_index_info().delta_buffer_tid_; delta_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_DELTA_BUF_SCAN; - if (OB_FAIL(generate_das_scan_ctdef(op, *delta_ctdef, has_rowscn))) { + if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *delta_ctdef, has_rowscn))) { LOG_WARN("failed to generate das scan ctdef", K(ret)); } } @@ -1545,7 +1601,7 @@ int ObTscCgService::generate_vec_ir_ctdef(const ObLogTableScan &op, } else { index_id_ctdef->ref_table_id_ = op.get_vector_index_info().index_id_tid_; index_id_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_IDX_ID_SCAN; - if (OB_FAIL(generate_das_scan_ctdef(op, *index_id_ctdef, has_rowscn))) { + if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *index_id_ctdef, has_rowscn))) { LOG_WARN("failed to generate das scan ctdef", K(ret)); } } @@ -1557,7 +1613,7 @@ int ObTscCgService::generate_vec_ir_ctdef(const ObLogTableScan &op, } else { snapshot_ctdef->ref_table_id_ =op.get_vector_index_info().index_snapshot_data_tid_; snapshot_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_SNAPSHOT_SCAN; - if (OB_FAIL(generate_das_scan_ctdef(op, *snapshot_ctdef, has_rowscn))) { + if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *snapshot_ctdef, has_rowscn))) { LOG_WARN("generate das scan ctdef failed", K(ret)); } } @@ -1569,7 +1625,7 @@ int ObTscCgService::generate_vec_ir_ctdef(const ObLogTableScan &op, } else { com_aux_ctdef->ref_table_id_ = op.get_vector_index_info().main_table_tid_; com_aux_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_COM_AUX_SCAN; - if (OB_FAIL(generate_das_scan_ctdef(op, *com_aux_ctdef, has_rowscn))) { + if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *com_aux_ctdef, has_rowscn))) { LOG_WARN("generate das scan ctdef failed", K(ret)); } } @@ -1727,42 +1783,60 @@ int ObTscCgService::generate_gis_ir_ctdef(const ObLogTableScan &op, } int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op, + const DASScanCGCtx &cg_ctx, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef) { int ret = OB_SUCCESS; - ObMatchFunRawExpr *match_against = op.get_text_retrieval_info().match_expr_; + const ObTextRetrievalInfo &tr_info = cg_ctx.is_func_lookup_ + ? op.get_lookup_tr_infos().at(cg_ctx.curr_func_lookup_idx_) + : op.get_text_retrieval_info(); + ObMatchFunRawExpr *match_against = tr_info.match_expr_; ObIAllocator &ctdef_alloc = cg_.phy_plan_->get_allocator(); ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); ObDASIRScanCtDef *ir_scan_ctdef = nullptr; ObDASSortCtDef *sort_ctdef = nullptr; + ObDASScanCtDef *inv_idx_scan_ctdef = nullptr; ObExpr *index_back_doc_id_column = nullptr; + bool has_rowscn = false; const bool use_approx_pre_agg = true; // TODO: support differentiate use approx agg or not if (OB_ISNULL(match_against) || OB_ISNULL(schema_guard)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null pointer", K(ret), KP(match_against), KP(schema_guard)); - } else if (OB_UNLIKELY(OB_INVALID_ID == op.get_text_retrieval_info().inv_idx_tid_ - || (op.need_text_retrieval_calc_relevance() && OB_INVALID_ID == op.get_text_retrieval_info().fwd_idx_tid_))) { + } else if (OB_UNLIKELY(OB_INVALID_ID == tr_info.inv_idx_tid_ + || (tr_info.need_calc_relevance_ && OB_INVALID_ID == tr_info.fwd_idx_tid_))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid fulltext index table id", K(ret), KPC(match_against)); - } else if (OB_UNLIKELY(ObTSCIRScanType::OB_IR_INV_IDX_SCAN != tsc_ctdef.scan_ctdef_.ir_scan_type_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected ir scan type for inverted index scan", K(ret), K(tsc_ctdef.scan_ctdef_)); } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_IR_SCAN, ctdef_alloc, ir_scan_ctdef))) { LOG_WARN("allocate ir scan ctdef failed", K(ret)); - } else if (op.need_text_retrieval_calc_relevance()) { - ObDASScanCtDef *inv_idx_scan_ctdef = &tsc_ctdef.scan_ctdef_; + } else if (OB_UNLIKELY(!cg_ctx.is_func_lookup_ && ObTSCIRScanType::OB_IR_INV_IDX_SCAN != tsc_ctdef.scan_ctdef_.ir_scan_type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected ir scan type for inverted index scan", K(ret), K(tsc_ctdef.scan_ctdef_)); + } else { + if (!cg_ctx.is_func_lookup_) { + inv_idx_scan_ctdef = &tsc_ctdef.scan_ctdef_; + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, inv_idx_scan_ctdef))) { + LOG_WARN("allocate inv idx_scan_ctdef_failed", K(ret)); + } else { + inv_idx_scan_ctdef->ref_table_id_ = tr_info.inv_idx_tid_; + inv_idx_scan_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_IR_INV_IDX_SCAN; + if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *inv_idx_scan_ctdef, has_rowscn))) { + LOG_WARN("failed to generate das scan ctdef", K(ret)); + } + } + } + + if (OB_SUCC(ret) && tr_info.need_calc_relevance_) { ObDASScanCtDef *inv_idx_agg_ctdef = nullptr; ObDASScanCtDef *doc_id_idx_agg_ctdef = nullptr; ObDASScanCtDef *fwd_idx_agg_ctdef = nullptr; - bool has_rowscn = false; if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, inv_idx_agg_ctdef))) { LOG_WARN("allocate inv idx agg ctdef failed", K(ret)); } else { - inv_idx_agg_ctdef->ref_table_id_ = op.get_text_retrieval_info().inv_idx_tid_; + inv_idx_agg_ctdef->ref_table_id_ = tr_info.inv_idx_tid_; inv_idx_agg_ctdef->pd_expr_spec_.pd_storage_flag_.set_aggregate_pushdown(true); inv_idx_agg_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_IR_INV_IDX_AGG; - if (OB_FAIL(generate_das_scan_ctdef(op, *inv_idx_agg_ctdef, has_rowscn))) { + if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *inv_idx_agg_ctdef, has_rowscn))) { LOG_WARN("failed to generate das scan ctdef", K(ret)); } } @@ -1771,10 +1845,10 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op, if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, doc_id_idx_agg_ctdef))) { LOG_WARN("allocate doc id idx agg ctdef failed", K(ret)); } else { - doc_id_idx_agg_ctdef->ref_table_id_ = op.get_text_retrieval_info().doc_id_idx_tid_; + doc_id_idx_agg_ctdef->ref_table_id_ = tr_info.doc_id_idx_tid_; doc_id_idx_agg_ctdef->pd_expr_spec_.pd_storage_flag_.set_aggregate_pushdown(true); doc_id_idx_agg_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_IR_DOC_ID_IDX_AGG; - if (OB_FAIL(generate_das_scan_ctdef(op, *doc_id_idx_agg_ctdef, has_rowscn))) { + if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *doc_id_idx_agg_ctdef, has_rowscn))) { LOG_WARN("failed to generate das scan ctdef", K(ret)); } } @@ -1784,10 +1858,10 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op, if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, fwd_idx_agg_ctdef))) { LOG_WARN("allocate fwd idx agg ctdef failed", K(ret)); } else { - fwd_idx_agg_ctdef->ref_table_id_ = op.get_text_retrieval_info().fwd_idx_tid_; + fwd_idx_agg_ctdef->ref_table_id_ = tr_info.fwd_idx_tid_; fwd_idx_agg_ctdef->pd_expr_spec_.pd_storage_flag_.set_aggregate_pushdown(true); fwd_idx_agg_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_IR_FWD_IDX_AGG; - if (OB_FAIL(generate_das_scan_ctdef(op, *fwd_idx_agg_ctdef, has_rowscn))) { + if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *fwd_idx_agg_ctdef, has_rowscn))) { LOG_WARN("generate das scan ctdef failed", K(ret)); } } @@ -1830,7 +1904,7 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op, if (OB_SUCC(ret)) { root_ctdef = ir_scan_ctdef; - if (OB_FAIL(generate_text_ir_spec_exprs(op, *ir_scan_ctdef))) { + if (OB_FAIL(generate_text_ir_spec_exprs(tr_info, *ir_scan_ctdef))) { LOG_WARN("failed to generate text ir spec exprs", K(ret), KPC(match_against)); } else { const ObCostTableScanInfo *est_cost_info = op.get_est_cost_info(); @@ -1848,15 +1922,15 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op, } } - if (OB_SUCC(ret) && op.get_text_retrieval_info().need_sort()) { + if (OB_SUCC(ret) && tr_info.need_sort()) { ObSEArray order_items; - if (OB_FAIL(order_items.push_back(op.get_text_retrieval_info().sort_key_))) { + if (OB_FAIL(order_items.push_back(tr_info.sort_key_))) { LOG_WARN("append order item array failed", K(ret)); } else if (OB_FAIL(generate_das_sort_ctdef( order_items, - op.get_text_retrieval_info().with_ties_, - op.get_text_retrieval_info().topk_limit_expr_, - op.get_text_retrieval_info().topk_offset_expr_, + tr_info.with_ties_, + tr_info.topk_limit_expr_, + tr_info.topk_offset_expr_, ir_scan_ctdef, sort_ctdef))) { LOG_WARN("generate sort ctdef failed", K(ret)); @@ -1865,7 +1939,7 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op, } } - if (OB_SUCC(ret) && op.get_index_back()) { + if (OB_SUCC(ret) && op.get_index_back() && !cg_ctx.is_func_lookup_) { ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr; ObDASBaseCtDef *ir_output_ctdef = nullptr == sort_ctdef ? static_cast(ir_scan_ctdef) : static_cast(sort_ctdef); @@ -1908,6 +1982,7 @@ int ObTscCgService::generate_index_merge_node_ctdef(const ObLogTableScan &op, ObDASBaseCtDef *&node_ctdef) { int ret = OB_SUCCESS; + DASScanCGCtx cg_ctx; bool has_rowscn = false; if (OB_ISNULL(node) || !node->is_valid()) { ret = OB_ERR_UNEXPECTED; @@ -1922,7 +1997,7 @@ int ObTscCgService::generate_index_merge_node_ctdef(const ObLogTableScan &op, } else if (FALSE_IT(scan_ctdef->ref_table_id_ = node->index_tid_)) { } else if (FALSE_IT(scan_ctdef->index_merge_idx_ = node->idx_)) { } else if (FALSE_IT(scan_ctdef->is_index_merge_ = true)) { - } else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) { + } else if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *scan_ctdef, has_rowscn))) { LOG_WARN("failed to generate das scan ctdef", KPC(scan_ctdef), K(ret)); } else if (OB_NOT_NULL(node->ap_->pre_query_range_) && OB_FAIL(scan_ctdef->pre_query_range_.deep_copy(*node->ap_->pre_query_range_))) { @@ -2079,15 +2154,18 @@ int ObTscCgService::extract_vec_ir_access_columns( int ObTscCgService::extract_text_ir_access_columns( const ObLogTableScan &op, + const ObTextRetrievalInfo &tr_info, const ObDASScanCtDef &scan_ctdef, ObIArray &access_exprs) { int ret = OB_SUCCESS; - const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info(); if (scan_ctdef.ref_table_id_ == op.get_doc_id_index_table_id()) { if (OB_FAIL(extract_doc_id_index_back_access_columns(op, access_exprs))) { LOG_WARN("failed to extract doc id index back access columns", K(ret)); } + } else if (OB_UNLIKELY(scan_ctdef.ref_table_id_ == op.get_rowkey_doc_table_id())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected text ir access table", K(ret)); } else { switch (scan_ctdef.ir_scan_type_) { case ObTSCIRScanType::OB_IR_INV_IDX_SCAN: @@ -2148,17 +2226,12 @@ int ObTscCgService::extract_vector_das_output_column_ids( } int ObTscCgService::extract_text_ir_das_output_column_ids( - const ObLogTableScan &op, + const ObTextRetrievalInfo &tr_info, const ObDASScanCtDef &scan_ctdef, ObIArray &output_cids) { int ret = OB_SUCCESS; - const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info(); - if (scan_ctdef.ref_table_id_ == op.get_doc_id_index_table_id()) { - if (OB_FAIL(extract_doc_id_index_back_output_column_ids(op, output_cids))) { - LOG_WARN("failed to get doc id index back cids", K(ret), K(scan_ctdef.ref_table_id_)); - } - } else if (ObTSCIRScanType::OB_IR_INV_IDX_SCAN == scan_ctdef.ir_scan_type_) { + if (ObTSCIRScanType::OB_IR_INV_IDX_SCAN == scan_ctdef.ir_scan_type_) { if (OB_FAIL(output_cids.push_back( static_cast(tr_info.token_cnt_column_)->get_column_id()))) { LOG_WARN("failed to push output token cnt col id", K(ret)); @@ -2174,12 +2247,12 @@ int ObTscCgService::extract_text_ir_das_output_column_ids( } int ObTscCgService::generate_text_ir_pushdown_expr_ctdef( + const ObTextRetrievalInfo &tr_info, const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef) { int ret = OB_SUCCESS; const uint64_t scan_table_id = scan_ctdef.ref_table_id_; - const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info(); if (!scan_ctdef.pd_expr_spec_.pd_storage_flag_.is_aggregate_pushdown()) { // this das scan do not need aggregate pushdown } else { @@ -2289,12 +2362,11 @@ int ObTscCgService::generate_vec_ir_spec_exprs(const ObLogTableScan &op, return ret; } -int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op, +int ObTscCgService::generate_text_ir_spec_exprs(const ObTextRetrievalInfo &tr_info, ObDASIRScanCtDef &text_ir_scan_ctdef) { int ret = OB_SUCCESS; ObSEArray result_output; - const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info(); if (OB_ISNULL(tr_info.match_expr_) || OB_ISNULL(tr_info.relevance_expr_) || OB_ISNULL(tr_info.doc_id_column_)) { ret = OB_ERR_UNEXPECTED; @@ -2303,11 +2375,6 @@ int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op, LOG_WARN("failed to mark raw agg expr", K(ret), KPC(tr_info.match_expr_)); } else if (OB_FAIL(cg_.generate_rt_expr(*tr_info.match_expr_->get_search_key(), text_ir_scan_ctdef.search_text_))) { LOG_WARN("cg rt expr for search text failed", K(ret)); - } else if (OB_ISNULL(tr_info.pushdown_match_filter_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null match filter", K(ret)); - } else if (OB_FAIL(cg_.generate_rt_expr(*tr_info.pushdown_match_filter_, text_ir_scan_ctdef.match_filter_))) { - LOG_WARN("cg rt expr for match filter failed", K(ret)); } else { const UIntFixedArray &inv_scan_col_id = text_ir_scan_ctdef.get_inv_idx_scan_ctdef()->access_column_ids_; const ObColumnRefRawExpr *doc_id_column = static_cast(tr_info.doc_id_column_); @@ -2337,7 +2404,23 @@ int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op, } } - if (OB_SUCC(ret) && op.need_text_retrieval_calc_relevance()) { + if (OB_SUCC(ret)) { + // mark match columns in match_expr produced + ObIArray &match_columns = tr_info.match_expr_->get_match_columns(); + for (int64_t i = 0; OB_SUCC(ret) && i < match_columns.count(); ++i) { + if (OB_FAIL(cg_.mark_expr_self_produced(match_columns.at(i)))) { + LOG_WARN("failed to mark match column expr as produced", K(ret)); + } + } + } + + if (OB_SUCC(ret) && nullptr != tr_info.pushdown_match_filter_) { + if (OB_FAIL(cg_.generate_rt_expr(*tr_info.pushdown_match_filter_, text_ir_scan_ctdef.match_filter_))) { + LOG_WARN("cg rt expr for match filter failed", K(ret)); + } + } + + if (OB_SUCC(ret) && tr_info.need_calc_relevance_) { if (OB_ISNULL(tr_info.relevance_expr_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null relevance expr", K(ret)); @@ -2346,7 +2429,7 @@ int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op, } } - if (OB_SUCC(ret) && (op.need_text_retrieval_calc_relevance() || nullptr != tr_info.pushdown_match_filter_)) { + if (OB_SUCC(ret) && (tr_info.need_calc_relevance_ || nullptr != tr_info.pushdown_match_filter_)) { if (OB_FAIL(cg_.generate_rt_expr(*tr_info.match_expr_, text_ir_scan_ctdef.relevance_proj_col_))) { LOG_WARN("cg rt expr for relevance score proejction failed", K(ret)); @@ -2405,6 +2488,7 @@ int ObTscCgService::generate_vec_id_lookup_ctdef(const ObLogTableScan &op, LOG_WARN("allocate memory failed", K(ret)); } else { bool has_rowscn = false; + DASScanCGCtx cg_ctx; ObArray result_outputs; scan_ctdef->ref_table_id_ = vec_id_index_tid; aux_lookup_ctdef->children_cnt_ = 2; @@ -2412,7 +2496,7 @@ int ObTscCgService::generate_vec_id_lookup_ctdef(const ObLogTableScan &op, if (OB_ISNULL(scan_loc_meta)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate scan location meta failed", K(ret)); - } else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) { + } else if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *scan_ctdef, has_rowscn))) { LOG_WARN("generate das lookup scan ctdef failed", K(ret)); } else if (OB_FAIL(result_outputs.assign(scan_ctdef->result_output_))) { LOG_WARN("construct aux lookup ctdef failed", K(ret)); @@ -2462,7 +2546,7 @@ int ObTscCgService::generate_doc_id_lookup_ctdef(const ObLogTableScan &op, ret = OB_ERR_UNEXPECTED; LOG_WARN("failed to get data table schema", K(ret)); } else if (OB_FAIL(data_schema->get_doc_id_rowkey_tid(doc_id_index_tid))) { - LOG_WARN("failed to get doc id rowkey index tid", K(ret), KPC(data_schema)); + LOG_WARN("failed to get doc id rowkey index tid", K(ret), KPC(data_schema)); } else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(), doc_id_index_tid, op.get_stmt(), @@ -2480,6 +2564,7 @@ int ObTscCgService::generate_doc_id_lookup_ctdef(const ObLogTableScan &op, LOG_WARN("allocate memory failed", K(ret)); } else { bool has_rowscn = false; + DASScanCGCtx cg_ctx; ObArray result_outputs; scan_ctdef->ref_table_id_ = doc_id_index_tid; aux_lookup_ctdef->children_cnt_ = 2; @@ -2487,7 +2572,7 @@ int ObTscCgService::generate_doc_id_lookup_ctdef(const ObLogTableScan &op, if (OB_ISNULL(scan_loc_meta)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate scan location meta failed", K(ret)); - } else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) { + } else if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *scan_ctdef, has_rowscn))) { LOG_WARN("generate das lookup scan ctdef failed", K(ret)); } else if (OB_FAIL(result_outputs.assign(scan_ctdef->result_output_))) { LOG_WARN("construct aux lookup ctdef failed", K(ret)); @@ -2566,13 +2651,14 @@ int ObTscCgService::extract_rowkey_doc_output_columns_ids( const share::schema::ObTableSchema &schema, const ObLogTableScan &op, const ObDASScanCtDef &scan_ctdef, + const bool need_output_rowkey, ObIArray &output_cids) { int ret = OB_SUCCESS; bool doc_id_is_found = false; const ObIArray &exprs = op.get_rowkey_id_exprs(); ObArray access_exprs; - for (int64_t i = 0; OB_SUCC(ret) && !doc_id_is_found && i < exprs.count(); ++i) { + for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); ++i) { ObRawExpr *expr = exprs.at(i); if (OB_ISNULL(expr)) { ret = OB_ERR_UNEXPECTED; @@ -2584,7 +2670,7 @@ int ObTscCgService::extract_rowkey_doc_output_columns_ids( if (OB_FAIL(access_exprs.push_back(expr))) { LOG_WARN("fail to add doc id access expr", K(ret), KPC(expr)); } - } else if (static_cast(expr)->is_rowkey_column()) { + } else if (need_output_rowkey && static_cast(expr)->is_rowkey_column()) { if (OB_FAIL(access_exprs.push_back(expr))) { LOG_WARN("fail to add doc id access expr", K(ret), KPC(expr)); } @@ -2602,6 +2688,7 @@ int ObTscCgService::extract_rowkey_doc_output_columns_ids( int ObTscCgService::generate_rowkey_doc_ctdef( const ObLogTableScan &op, + const DASScanCGCtx &cg_ctx, ObTableScanCtDef &tsc_ctdef, ObDASScanCtDef *&rowkey_doc_scan_ctdef) { @@ -2610,18 +2697,11 @@ int ObTscCgService::generate_rowkey_doc_ctdef( const ObTableSchema *rowkey_doc_schema = nullptr; ObDASScanCtDef *scan_ctdef = nullptr; ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); - uint64_t rowkey_doc_tid = OB_INVALID_ID; + uint64_t rowkey_doc_tid = op.get_rowkey_doc_table_id(); if (OB_ISNULL(schema_guard)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error, schema guard is nullptr", K(ret), KP(cg_.opt_ctx_)); - } else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(), data_schema))) { - LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id())); - } else if (OB_ISNULL(data_schema)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to get data table schema", K(ret)); - } else if (OB_FAIL(data_schema->get_rowkey_doc_tid(rowkey_doc_tid))) { - LOG_WARN("failed to get rowkey doc tid", K(ret), KPC(data_schema)); } else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(), rowkey_doc_tid, op.get_stmt(), @@ -2640,7 +2720,7 @@ int ObTscCgService::generate_rowkey_doc_ctdef( if (OB_ISNULL(scan_loc_meta)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate scan location meta failed", K(ret)); - } else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) { + } else if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *scan_ctdef, has_rowscn))) { LOG_WARN("generate das lookup scan ctdef failed", K(ret)); } else if (OB_FAIL(generate_table_loc_meta(op.get_table_id(), *op.get_stmt(), @@ -2666,6 +2746,7 @@ int ObTscCgService::generate_das_scan_ctdef_with_doc_id( int ret = OB_SUCCESS; ObArray result_outputs; ObDASScanCtDef *rowkey_doc_scan_ctdef = nullptr; + DASScanCGCtx cg_ctx; if (OB_ISNULL(scan_ctdef)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), KPC(scan_ctdef)); @@ -2675,7 +2756,7 @@ int ObTscCgService::generate_das_scan_ctdef_with_doc_id( } else if (OB_ISNULL(doc_id_merge_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &cg_.phy_plan_->get_allocator(), 2))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to allocate doc id merge ctdef child array memory", K(ret)); - } else if (OB_FAIL(generate_rowkey_doc_ctdef(op, tsc_ctdef, rowkey_doc_scan_ctdef))) { + } else if (OB_FAIL(generate_rowkey_doc_ctdef(op, cg_ctx, tsc_ctdef, rowkey_doc_scan_ctdef))) { LOG_WARN("fail to generate rowkey doc ctdef", K(ret)); } else if (OB_FAIL(result_outputs.assign(scan_ctdef->result_output_))) { LOG_WARN("construct aux lookup ctdef failed", K(ret)); @@ -2800,13 +2881,14 @@ int ObTscCgService::generate_rowkey_vid_ctdef( LOG_WARN("alloc das ctdef failed", K(ret)); } else { bool has_rowscn = false; + DASScanCGCtx cg_ctx; scan_ctdef->ref_table_id_ = rowkey_vid_tid; ObDASTableLocMeta *scan_loc_meta = OB_NEWx(ObDASTableLocMeta, &cg_.phy_plan_->get_allocator(), cg_.phy_plan_->get_allocator()); if (OB_ISNULL(scan_loc_meta)) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("allocate scan location meta failed", K(ret)); - } else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) { + } else if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *scan_ctdef, has_rowscn))) { LOG_WARN("generate das lookup scan ctdef failed", K(ret)); } else if (OB_FAIL(generate_table_loc_meta(op.get_table_id(), *op.get_stmt(), @@ -2878,11 +2960,12 @@ int ObTscCgService::generate_table_lookup_ctdef(const ObLogTableScan &op, LOG_WARN("alloc das ctdef failed", K(ret)); } else { bool has_rowscn = false; + DASScanCGCtx cg_ctx; const ObTableSchema *table_schema = nullptr; ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard(); tsc_ctdef.lookup_ctdef_->ref_table_id_ = op.get_real_ref_table_id(); - if (OB_FAIL(generate_das_scan_ctdef(op, *tsc_ctdef.lookup_ctdef_, has_rowscn))) { + if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *tsc_ctdef.lookup_ctdef_, has_rowscn))) { LOG_WARN("generate das lookup scan ctdef failed", K(ret)); } else if (OB_FAIL(schema_guard->get_table_schema(op.get_table_id(), op.get_ref_table_id(), @@ -3291,5 +3374,141 @@ int ObTscCgService::generate_mr_mv_scan_flag(const ObLogTableScan &op, ObQueryFl return ret; } +int ObTscCgService::generate_functional_lookup_ctdef(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASBaseCtDef *rowkey_scan_ctdef, + ObDASBaseCtDef *main_lookup_ctdef, + ObDASBaseCtDef *&root_ctdef) +{ + // Functional lookup will scan rowkey from one table (main table or secondary index) first, + // and then do functional lookup on specific secondary index to calculate index-related exprs. + // Can also do main table lookup after rowkey scan if needed. + int ret = OB_SUCCESS; + const ObIArray &lookup_tr_infos = op.get_lookup_tr_infos(); + const bool has_main_lookup = nullptr != main_lookup_ctdef; + ObIAllocator &ctdef_alloc = cg_.phy_plan_->get_allocator(); + ObDASFuncLookupCtDef *tmp_func_lookup_ctdef = nullptr; + ObDASIndexProjLookupCtDef *root_lookup_ctdef = nullptr; + ObArray func_lookup_result_outputs; + ObArray final_result_outputs; + DASScanCGCtx cg_ctx; + if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_FUNC_LOOKUP, ctdef_alloc, tmp_func_lookup_ctdef))) { + LOG_WARN("allocate functional lookup ctdef failed", K(ret)); + } else { + tmp_func_lookup_ctdef->main_lookup_cnt_ = has_main_lookup ? 1 : 0; + tmp_func_lookup_ctdef->func_lookup_cnt_ = lookup_tr_infos.count(); + tmp_func_lookup_ctdef->doc_id_lookup_cnt_ = lookup_tr_infos.count() > 0 ? 1 : 0; + tmp_func_lookup_ctdef->children_cnt_ = tmp_func_lookup_ctdef->main_lookup_cnt_ + + tmp_func_lookup_ctdef->func_lookup_cnt_ + tmp_func_lookup_ctdef->doc_id_lookup_cnt_; + if (OB_ISNULL(tmp_func_lookup_ctdef->children_ + = OB_NEW_ARRAY(ObDASBaseCtDef *, &ctdef_alloc, tmp_func_lookup_ctdef->children_cnt_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate functional lookup ctdef children failed", K(ret)); + } else { + if (has_main_lookup) { + tmp_func_lookup_ctdef->children_[0] = main_lookup_ctdef; + if (OB_UNLIKELY(main_lookup_ctdef->op_type_ != DAS_OP_TABLE_SCAN)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected main lookup ctdef type", K(ret), KPC(main_lookup_ctdef)); + } else if (OB_FAIL(func_lookup_result_outputs.assign( + static_cast(main_lookup_ctdef)->result_output_))) { + LOG_WARN("failed to append func lookup result", K(ret)); + } + } + } + } + + if (OB_SUCC(ret) && lookup_tr_infos.count() > 0) { + // generate rowkey->doc_id lookup scan + const int64_t doc_id_lookup_ctdef_idx = has_main_lookup ? 1 : 0; + ObDASScanCtDef *doc_id_lookup_scan_ctdef = nullptr; + ObArray rowkey_exprs; + cg_ctx.set_is_func_lookup(); + if (OB_FAIL(generate_rowkey_doc_ctdef(op, cg_ctx, tsc_ctdef, doc_id_lookup_scan_ctdef))) { + LOG_WARN("generate doc_id lookup scan ctdef failed", K(ret)); + } else if (OB_FAIL(rowkey_exprs.assign(op.get_rowkey_exprs()))) { + LOG_WARN("failed to assign rowkey exprs", K(ret)); + } else if (OB_FAIL(cg_.generate_rt_exprs(rowkey_exprs, doc_id_lookup_scan_ctdef->rowkey_exprs_))) { + LOG_WARN("failed to generate rowkey exprs for doc_id lookup scan", K(ret)); + } else { + tmp_func_lookup_ctdef->children_[doc_id_lookup_ctdef_idx] = doc_id_lookup_scan_ctdef; + + for (int64_t i = 0; OB_SUCC(ret) && i < doc_id_lookup_scan_ctdef->result_output_.count(); ++i) { + ObExpr *doc_id_lookup_result = doc_id_lookup_scan_ctdef->result_output_.at(i); + if (OB_ISNULL(doc_id_lookup_result)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null rowkey expr", K(ret)); + } else if (doc_id_lookup_result->type_ == T_PSEUDO_ROW_TRANS_INFO_COLUMN + || doc_id_lookup_result->type_ == T_PSEUDO_GROUP_ID) { + // skip + } else if (nullptr == tmp_func_lookup_ctdef->lookup_doc_id_expr_) { + tmp_func_lookup_ctdef->lookup_doc_id_expr_ = doc_id_lookup_result; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("more than one doc id result expr for rowkey 2 doc_id lookup", K(ret), KPC(doc_id_lookup_scan_ctdef)); + } + } + } + + for (int64_t i = 0; OB_SUCC(ret) && i < lookup_tr_infos.count(); ++i) { + cg_ctx.reset(); + cg_ctx.set_func_lookup_idx(i); + const int64_t func_lookup_base_idx = doc_id_lookup_ctdef_idx + 1; + const int64_t cur_children_idx = func_lookup_base_idx + i; + ObDASBaseCtDef *tr_lookup_scan_ctdef = nullptr; + if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_IR_SCAN, ctdef_alloc, tr_lookup_scan_ctdef))) { + LOG_WARN("allocate text retrieval lookup scan failed", K(ret)); + } else if (OB_FAIL(generate_text_ir_ctdef(op, cg_ctx, tsc_ctdef, tr_lookup_scan_ctdef))) { + LOG_WARN("failed to generate text retrieval ctdef", K(ret)); + } else if (OB_UNLIKELY(tr_lookup_scan_ctdef->op_type_ != DAS_OP_IR_SCAN)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected lookup tr scan type", K(ret)); + } else if (OB_FAIL(append_array_no_dup( + func_lookup_result_outputs, static_cast(tr_lookup_scan_ctdef)->result_output_))) { + LOG_WARN("failed to append func lookup result", K(ret)); + } else { + tmp_func_lookup_ctdef->children_[cur_children_idx] = tr_lookup_scan_ctdef; + } + } + } + + if (FAILEDx(tmp_func_lookup_ctdef->result_output_.assign(func_lookup_result_outputs))) { + LOG_WARN("failed to assign func lookup result output", K(ret)); + } else if (OB_FAIL(final_result_outputs.assign(func_lookup_result_outputs))) { + LOG_WARN("failed to append final lookup result output", K(ret)); + } else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_INDEX_PROJ_LOOKUP, ctdef_alloc, root_lookup_ctdef))) { + LOG_WARN("failed to allocate das ctdef", K(ret)); + } else if (OB_ISNULL(root_lookup_ctdef->children_ + = OB_NEW_ARRAY(ObDASBaseCtDef *, &ctdef_alloc, 2))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate root lookup ctdef childern failed", K(ret)); + } else if (OB_FAIL(append_array_no_dup(final_result_outputs, tmp_func_lookup_ctdef->result_output_))) { + LOG_WARN("failed to append final result outputs", K(ret)); + } else { + root_lookup_ctdef->children_cnt_ = 2; + root_lookup_ctdef->children_[0] = rowkey_scan_ctdef; + root_lookup_ctdef->children_[1] = tmp_func_lookup_ctdef; + if (!has_main_lookup) { + // no main lookup, rowkey scan will project all output columns on base table for table scan + if (rowkey_scan_ctdef->op_type_ == ObDASOpType::DAS_OP_TABLE_SCAN) { + if (OB_FAIL(root_lookup_ctdef->index_scan_proj_exprs_.assign( + static_cast(rowkey_scan_ctdef)->result_output_))) { + LOG_WARN("Failed to assign index scan project column exprs", K(ret)); + } else if (OB_FAIL(append_array_no_dup(final_result_outputs, root_lookup_ctdef->index_scan_proj_exprs_))) { + LOG_WARN("failed to append final result outputs", K(ret)); + } + } + } + if (FAILEDx(root_lookup_ctdef->result_output_.assign(final_result_outputs))) { + LOG_WARN("failed to append root lookup result outputs", K(ret)); + } + } + + if (OB_SUCC(ret)) { + root_ctdef = root_lookup_ctdef; + } + return ret; +} + } // namespace sql } // namespace oceanbase diff --git a/src/sql/code_generator/ob_tsc_cg_service.h b/src/sql/code_generator/ob_tsc_cg_service.h index 77e99809a..366a66391 100644 --- a/src/sql/code_generator/ob_tsc_cg_service.h +++ b/src/sql/code_generator/ob_tsc_cg_service.h @@ -55,37 +55,62 @@ public: const ObRawExpr *trans_info_expr, const bool include_agg = false); private: + // temporary context for multiple das scan in one table scan operator + struct DASScanCGCtx + { + DASScanCGCtx() + : curr_func_lookup_idx_(0), + is_func_lookup_(false) {} + void reset() + { + curr_func_lookup_idx_ = 0; + is_func_lookup_ = false; + } + void set_func_lookup_idx(const int64_t idx) + { + is_func_lookup_ = true; + curr_func_lookup_idx_ = idx; + } + void set_is_func_lookup() + { + is_func_lookup_ = true; + } + TO_STRING_KV(K_(curr_func_lookup_idx), K_(is_func_lookup)); + int64_t curr_func_lookup_idx_; + bool is_func_lookup_; + }; int generate_access_ctdef(const ObLogTableScan &op, + const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef, common::ObIArray &doc_id_expr, common::ObIArray &vec_vid_expr, bool &has_rowscn); - int generate_pushdown_aggr_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef); - int generate_das_scan_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef, bool &has_rowscn); - int generate_table_param(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef, common::ObIArray &tsc_out_cols); + int generate_pushdown_aggr_ctdef(const ObLogTableScan &op, const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef); + int generate_das_scan_ctdef(const ObLogTableScan &op, const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef, bool &has_rowscn); + int generate_table_param(const ObLogTableScan &op, const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef, common::ObIArray &tsc_out_cols); int extract_das_output_column_ids(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef, const ObTableSchema &index_schema, + const DASScanCGCtx &cg_ctx, common::ObIArray &output_cids); int extract_das_access_exprs(const ObLogTableScan &op, + const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef, common::ObIArray &access_exprs); //extract these column exprs need by TSC operator, these column will output by DAS scan int extract_tsc_access_columns(const ObLogTableScan &op, common::ObIArray &access_exprs); int extract_das_column_ids(const common::ObIArray &column_exprs, common::ObIArray &column_ids); int generate_geo_access_ctdef(const ObLogTableScan &op, const ObTableSchema &index_schema, ObArray &access_exprs); - int generate_text_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef); - int generate_vec_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef); - int generate_multivalue_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef); - int generate_gis_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef); + int generate_text_ir_ctdef(const ObLogTableScan &op, + const DASScanCGCtx &cg_ctx, + ObTableScanCtDef &tsc_ctdef, + ObDASBaseCtDef *&root_ctdef); int extract_text_ir_access_columns(const ObLogTableScan &op, + const ObTextRetrievalInfo &tr_info, const ObDASScanCtDef &scan_ctdef, ObIArray &access_exprs); - int extract_vec_ir_access_columns(const ObLogTableScan &op, - const ObDASScanCtDef &scan_ctdef, - ObIArray &access_exprs); - int extract_text_ir_das_output_column_ids(const ObLogTableScan &op, + int extract_text_ir_das_output_column_ids(const ObTextRetrievalInfo &tr_info, const ObDASScanCtDef &scan_ctdef, ObIArray &output_cids); int extract_rowkey_doc_access_columns(const ObLogTableScan &op, @@ -94,7 +119,22 @@ private: int extract_rowkey_doc_output_columns_ids(const share::schema::ObTableSchema &schema, const ObLogTableScan &op, const ObDASScanCtDef &scan_ctdef, + const bool need_output_rowkey, ObIArray &output_cids); + int generate_text_ir_pushdown_expr_ctdef(const ObTextRetrievalInfo &tr_info, + const ObLogTableScan &op, + ObDASScanCtDef &scan_ctdef); + int generate_text_ir_spec_exprs(const ObTextRetrievalInfo &tr_info, + ObDASIRScanCtDef &text_ir_scan_ctdef); + int generate_vec_ir_spec_exprs(const ObLogTableScan &op, + ObDASVecAuxScanCtDef &vec_ir_scan_ctdef); + int generate_vec_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef); + int generate_multivalue_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef); + int generate_gis_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef); + int extract_vec_ir_access_columns(const ObLogTableScan &op, + const ObDASScanCtDef &scan_ctdef, + ObIArray &access_exprs); + int extract_vector_das_output_column_ids(const ObLogTableScan &op, const ObDASScanCtDef &scan_ctdef, ObIArray &output_cids); @@ -105,17 +145,13 @@ private: const ObLogTableScan &op, const ObDASScanCtDef &scan_ctdef, ObIArray &output_cids); - int generate_text_ir_pushdown_expr_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef); - int generate_text_ir_spec_exprs(const ObLogTableScan &op, - ObDASIRScanCtDef &text_ir_scan_ctdef); - int generate_vec_ir_spec_exprs(const ObLogTableScan &op, - ObDASVecAuxScanCtDef &vec_ir_scan_ctdef); int generate_doc_id_lookup_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *ir_scan_ctdef, ObExpr *doc_id_expr, ObDASIRAuxLookupCtDef *&aux_lookup_ctdef); int generate_rowkey_doc_ctdef(const ObLogTableScan &op, + const DASScanCGCtx &cg_ctx, ObTableScanCtDef &tsc_ctdef, ObDASScanCtDef *&rowkey_doc_scan_ctdef); int generate_das_scan_ctdef_with_doc_id(const ObLogTableScan &op, @@ -166,6 +202,13 @@ private: ObIndexMergeNode *node, common::ObIAllocator &alloc, ObDASBaseCtDef *&node_ctdef); + + int generate_functional_lookup_ctdef(const ObLogTableScan &op, + ObTableScanCtDef &tsc_ctdef, + ObDASBaseCtDef *rowkey_scan_ctdef, + ObDASBaseCtDef *main_lookup_ctdef, + ObDASBaseCtDef *&root_ctdef); + private: ObStaticEngineCG &cg_; }; diff --git a/src/sql/das/iter/ob_das_cache_lookup_iter.cpp b/src/sql/das/iter/ob_das_cache_lookup_iter.cpp new file mode 100644 index 000000000..7fe8d769e --- /dev/null +++ b/src/sql/das/iter/ob_das_cache_lookup_iter.cpp @@ -0,0 +1,123 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_DAS +#include "sql/das/iter/ob_das_cache_lookup_iter.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ +int ObDASCacheLookupIter::inner_get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + bool get_next_rows = false; + int64_t simulate_batch_row_cnt = - EVENT_CALL(EventTable::EN_TABLE_LOOKUP_BATCH_ROW_COUNT); + const bool use_simulate_batch_row_cnt = simulate_batch_row_cnt > 0 && simulate_batch_row_cnt < default_batch_row_count_; + int64_t default_row_batch_cnt = use_simulate_batch_row_cnt ? simulate_batch_row_cnt : default_batch_row_count_; + LOG_DEBUG("simulate lookup row batch count", K(simulate_batch_row_cnt), K(default_row_batch_cnt)); + do { + switch (state_) { + case INDEX_SCAN: { + reset_lookup_state(); + int64_t storage_count = 0; + int64_t index_capacity = 0; + // TODO: @zyx439997 support the outputs of index scan as the project columns by the deep copy { + bool need_accumulation = true; + // } + while (OB_SUCC(ret) && need_accumulation && !index_end_ && lookup_rowkey_cnt_ < default_row_batch_cnt) { + storage_count = 0; + index_capacity = std::min(capacity, std::min(max_size_, default_row_batch_cnt - lookup_rowkey_cnt_)); + index_table_iter_->clear_evaluated_flag(); + if (OB_FAIL(index_table_iter_->get_next_rows(storage_count, index_capacity))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next rows from index table", K(ret)); + } else { + if (storage_count == 0) { + index_end_ = true; + } + ret = OB_SUCCESS; + } + } else { + need_accumulation = false; + } + if (OB_SUCC(ret) && storage_count > 0) { + if (OB_FAIL(add_rowkeys(storage_count))) { + LOG_WARN("failed to add row keys", K(ret)); + } else { + lookup_rowkey_cnt_ += storage_count; + } + } + } + + if (OB_SUCC(ret)) { + if (OB_LIKELY(lookup_rowkey_cnt_ > 0)) { + state_ = DO_LOOKUP; + } else { + state_ = FINISHED; + } + } + break; + } + + case DO_LOOKUP: { + if (OB_FAIL(do_index_lookup())) { + LOG_WARN("failed to do index lookup", K(ret)); + } else { + state_ = OUTPUT_ROWS; + } + break; + } + + case OUTPUT_ROWS: { + count = 0; + data_table_iter_->clear_evaluated_flag(); + if (OB_FAIL(data_table_iter_->get_next_rows(count, capacity))) { + if (OB_LIKELY(OB_ITER_END == ret)) { + ret = OB_SUCCESS; + if (count > 0) { + lookup_row_cnt_ += count; + get_next_rows = true; + } else { + if (OB_FAIL(check_index_lookup())) { + LOG_WARN("failed to check table lookup", K(ret)); + } else { + state_ = INDEX_SCAN; + } + } + } else { + LOG_WARN("failed to get next rows from data table", K(ret)); + } + } else { + lookup_row_cnt_ += count; + get_next_rows = true; + } + if (OB_SUCC(ret) && OB_UNLIKELY(lookup_row_cnt_ != lookup_rowkey_cnt_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected lookup row count", K_(lookup_row_cnt), K_(lookup_rowkey_cnt), K(ret)); + } + break; + } + + case FINISHED: { + ret = OB_ITER_END; + break; + } + } + } while (!get_next_rows && OB_SUCC(ret)); + + return ret; +} + +} // namespace sql +} // namespace oceanbase \ No newline at end of file diff --git a/src/sql/das/iter/ob_das_cache_lookup_iter.h b/src/sql/das/iter/ob_das_cache_lookup_iter.h new file mode 100644 index 000000000..485980219 --- /dev/null +++ b/src/sql/das/iter/ob_das_cache_lookup_iter.h @@ -0,0 +1,55 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OBDEV_SRC_SQL_DAS_ITER_OB_DAS_CACHE_LOOKUP_ITER_H_ +#define OBDEV_SRC_SQL_DAS_ITER_OB_DAS_CACHE_LOOKUP_ITER_H_ + +#include "sql/das/iter/ob_das_local_lookup_iter.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ + +struct ObDASCacheLookupIterParam : public ObDASLocalLookupIterParam +{ +public: + ObDASCacheLookupIterParam() + : ObDASLocalLookupIterParam() + {} + virtual bool is_valid() const override + { + return true; + } +}; + +class ObDASScanCtDef; +class ObDASScanRtDef; +class ObDASFuncLookupIter; +class ObDASCacheLookupIter : public ObDASLocalLookupIter +{ +public: + ObDASCacheLookupIter(const ObDASIterType type = ObDASIterType::DAS_ITER_LOCAL_LOOKUP) + : ObDASLocalLookupIter(type) + {} + virtual ~ObDASCacheLookupIter() {} + +protected: + virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; +}; + +} // namespace sql +} // namespace oceanbase + + +#endif /* OBDEV_SRC_SQL_DAS_ITER_OB_DAS_LOOKUP_ITER_H_ */ diff --git a/src/sql/das/iter/ob_das_func_data_iter.cpp b/src/sql/das/iter/ob_das_func_data_iter.cpp new file mode 100644 index 000000000..a1ca83b47 --- /dev/null +++ b/src/sql/das/iter/ob_das_func_data_iter.cpp @@ -0,0 +1,426 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_DAS + +#include "sql/das/iter/ob_das_func_data_iter.h" +#include "sql/das/iter/ob_das_iter_define.h" +#include "sql/das/ob_das_scan_op.h" + +using namespace oceanbase::common; + +namespace oceanbase +{ +namespace sql +{ + +ObDASFuncDataIterParam::ObDASFuncDataIterParam() + : ObDASIterParam(ObDASIterType::DAS_ITER_FUNC_DATA), + tr_merge_iters_(nullptr), + iter_count_(0), + main_lookup_ctdef_(nullptr), + main_lookup_rtdef_(nullptr), + main_lookup_iter_(nullptr), + trans_desc_(nullptr), + snapshot_(nullptr) +{} + +ObDASFuncDataIterParam::~ObDASFuncDataIterParam() +{ +} + +ObDASFuncDataIter::ObDASFuncDataIter() + :ObDASIter(), + tr_merge_iters_(nullptr), + iter_count_(0), + main_lookup_ctdef_(nullptr), + main_lookup_rtdef_(nullptr), + main_lookup_iter_(nullptr), + main_lookup_tablet_id_(0), + main_lookup_ls_id_(0), + main_lookup_param_(), + merge_memctx_(), + doc_ids_() + {} + +ObDASFuncDataIter::~ObDASFuncDataIter() +{ +} + +int ObDASFuncDataIter::do_table_scan() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(tr_merge_iters_) || OB_UNLIKELY(iter_count_ <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, tr merge iter is nullptr", K(ret)); + } else if (OB_FAIL(build_tr_merge_iters_rangekey())) { + LOG_WARN("fail to build rowkey doc range", K(ret)); + } else { + if (nullptr != main_lookup_iter_) { + if (OB_UNLIKELY(!main_lookup_tablet_id_.is_valid() || !main_lookup_ls_id_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, main lookup tablet id or ls id is invalid", K(ret), K(main_lookup_tablet_id_), K(main_lookup_ls_id_)); + } else { + main_lookup_param_.tablet_id_ = main_lookup_tablet_id_; + main_lookup_param_.ls_id_ = main_lookup_ls_id_; + if (OB_FAIL(main_lookup_iter_->do_table_scan())) { + LOG_WARN("fail to do table scan for main lookup table", K(ret), KPC(main_lookup_iter_)); + } + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) { + if (OB_FAIL(tr_merge_iters_[i]->do_table_scan())) { + LOG_WARN("fail to do table scan for tr merge iter", K(ret), K(i), KPC(tr_merge_iters_[i])); + } + } + } + return ret; +} + +int ObDASFuncDataIter::rescan() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(tr_merge_iters_) || OB_UNLIKELY(iter_count_ <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, tr merge iter is nullptr", K(ret)); + } else if (OB_FAIL(build_tr_merge_iters_rangekey())) { + LOG_WARN("fail to build rowkey doc range", K(ret)); + } else if (nullptr != main_lookup_iter_ && OB_FAIL(main_lookup_iter_->rescan())) { + LOG_WARN("fail to do table scan for main lookup table", K(ret), KPC(main_lookup_iter_)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) { + if (OB_FAIL(tr_merge_iters_[i]->rescan())) { + LOG_WARN("fail to do table scan for tr merge iter", K(ret), K(i), KPC(tr_merge_iters_[i])); + } + } + } + return ret; +} + +void ObDASFuncDataIter::clear_evaluated_flag() +{ + if (OB_NOT_NULL(main_lookup_iter_)) { + main_lookup_iter_->clear_evaluated_flag(); + } + for (int64_t i = 0; i < iter_count_; i++) { + if (OB_NOT_NULL(tr_merge_iters_[i])) { + tr_merge_iters_[i]->clear_evaluated_flag(); + } + } +} + +int ObDASFuncDataIter::inner_init(ObDASIterParam ¶m) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(ObDASIterType::DAS_ITER_FUNC_DATA != param.type_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("inner init das iter with bad param type", K(ret), K(param)); + } else { + ObDASFuncDataIterParam &merge_param = static_cast(param); + lib::ContextParam param; + param.set_mem_attr(MTL_ID(), "FTSMerge", ObCtxIds::DEFAULT_CTX_ID).set_properties(lib::USE_TL_PAGE_OPTIONAL); + if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(merge_memctx_, param))) { + LOG_WARN("failed to create merge memctx", K(ret)); + } else { + tr_merge_iters_ = merge_param.tr_merge_iters_; + iter_count_ = merge_param.iter_count_; + main_lookup_ctdef_ = merge_param.main_lookup_ctdef_; + main_lookup_rtdef_ = merge_param.main_lookup_rtdef_; + main_lookup_iter_ = merge_param.main_lookup_iter_; + read_count_ = 0; + sql::ObExprBasicFuncs *basic_funcs = ObDatumFuncs::get_basic_func(ObVarcharType, CS_TYPE_BINARY); + cmp_func_ = lib::is_oracle_mode() ? basic_funcs->null_last_cmp_ : basic_funcs->null_first_cmp_; + if (main_lookup_iter_ && OB_FAIL(init_main_lookup_scan_param(main_lookup_param_, + main_lookup_ctdef_, + main_lookup_rtdef_, + merge_param.trans_desc_, + merge_param.snapshot_))) { + LOG_WARN("fail to init rowkey doc scan param", K(ret), K(merge_param)); + } + } + } + return ret; +} + +int ObDASFuncDataIter::inner_reuse() +{ + int ret = OB_SUCCESS; + doc_ids_.reuse(); + read_count_ = 0; + if (main_lookup_iter_) { + ObDASScanIter *main_lookup_iter = static_cast(main_lookup_iter_); + storage::ObTableScanParam &main_lookup_scan_param = main_lookup_iter->get_scan_param(); + if (OB_UNLIKELY(&main_lookup_param_ != &main_lookup_iter->get_scan_param())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, main lookup param is nullptr", K(ret)); + } else { + const ObTabletID &old_tablet_id = main_lookup_param_.tablet_id_; + main_lookup_param_.need_switch_param_ = main_lookup_param_.need_switch_param_ || + ((old_tablet_id.is_valid() && old_tablet_id != main_lookup_tablet_id_) ? true : false); + main_lookup_param_.tablet_id_ = main_lookup_tablet_id_; + main_lookup_param_.ls_id_ = main_lookup_ls_id_; + if (!main_lookup_param_.key_ranges_.empty()) { + main_lookup_param_.key_ranges_.reuse(); + } + if (OB_FAIL(main_lookup_iter_->reuse())) { + LOG_WARN("fail to reuse data table iter", K(ret)); + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(merge_memctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("merge_memctx_ is nullptr", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) { + if (OB_NOT_NULL(tr_merge_iters_[i])) { + tr_merge_iters_[i]->reuse(); + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tr merge iter is nullptr", K(ret), K(i)); + } + } + merge_memctx_->reset_remain_one_page(); + } + return ret; +} + +int ObDASFuncDataIter::inner_release() +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(merge_memctx_)) { + DESTROY_CONTEXT(merge_memctx_); + merge_memctx_ = nullptr; + } + if (main_lookup_iter_) { + main_lookup_iter_ = nullptr; + } + for (int64_t i = 0; i < iter_count_; i++) { + if (OB_NOT_NULL(tr_merge_iters_[i])) { + tr_merge_iters_[i] = nullptr; + } + } + doc_ids_.reset(); + main_lookup_param_.destroy_schema_guard(); + main_lookup_param_.snapshot_.reset(); + main_lookup_param_.destroy(); + read_count_ = 0; + return ret; +} + +int ObDASFuncDataIter::inner_get_next_row() +{ + int ret = OB_SUCCESS; + int64_t default_size = doc_ids_.count(); + bool iter_end = false; + if (OB_ISNULL(tr_merge_iters_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, tr merge iter is nullptr", K(ret)); + } else if (OB_UNLIKELY(1 != default_size)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, default size is not 1", K(ret), K(default_size)); + } else if (main_lookup_iter_ && OB_FAIL(main_lookup_iter_->get_next_row())) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to get next row for main lookup table", K(ret), KPC(main_lookup_iter_)); + } else { + ret = OB_SUCCESS; + } + } + + for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) { + if (OB_FAIL(tr_merge_iters_[i]->get_next_row())) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to get next row for tr merge iter", K(ret), K(i), KPC(tr_merge_iters_[i])); + } else { + ret = OB_SUCCESS; + iter_end = true; + } + } + } + if (OB_SUCC(ret) && iter_end) { + ret = OB_ITER_END; + } + return ret; +} + +int ObDASFuncDataIter::inner_get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + int64_t main_lookup_count = 0; + int64_t tr_merge_count = 0; + int64_t default_size = doc_ids_.count(); + + if (OB_ISNULL(tr_merge_iters_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, tr merge iter is nullptr", K(ret)); + } else if (main_lookup_iter_) { + int64_t storage_count = 0; + while (OB_SUCC(ret) && main_lookup_count < capacity) { + int64_t need_capacity = capacity - main_lookup_count; + if (OB_FAIL(main_lookup_iter_->get_next_rows(storage_count, need_capacity))) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to get next row for main lookup table", K(ret), KPC(main_lookup_iter_)); + } else if (storage_count > 0) { + main_lookup_count += storage_count; + } + } else { + main_lookup_count += storage_count; + } + } + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + } + if (OB_UNLIKELY(main_lookup_iter_ && + main_lookup_count != capacity && // case: limit, read once + default_size != main_lookup_count + read_count_)) { // case: limit, read more times + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, main lookup count is not equal to capacity", K(ret), K(default_size), K(main_lookup_count)); + } + + int tmp_count = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) { + tr_merge_count = 0; + if (OB_FAIL(tr_merge_iters_[i]->get_next_rows(tr_merge_count, capacity))) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to get next row for tr merge iter", K(ret), K(i), KPC(tr_merge_iters_[i])); + } else { + ret = OB_SUCCESS; + } + } + if (OB_UNLIKELY(tmp_count != 0 && tmp_count != tr_merge_count)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, tr merge count is not equal to tmp count", K(ret), K(tr_merge_count), K(tmp_count), K(i)); + } else if (OB_UNLIKELY(0 != tr_merge_count && + tr_merge_count != capacity && + tr_merge_count + read_count_ != default_size)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, tr merge count is not equal to capacity", + K(ret), K(tr_merge_count), K(capacity), K(i), K_(read_count), K(default_size)); + } else { + tmp_count = tr_merge_count; + } + } + if (OB_SUCC(ret) && main_lookup_iter_ && tr_merge_count != main_lookup_count) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, tr merge count is not equal to main lookup count", K(ret), K(tr_merge_count), K(main_lookup_count)); + } + if (OB_SUCC(ret)) { + count = tr_merge_count; + if (0 == tr_merge_count) { + ret = OB_ITER_END; + } else { + read_count_ = read_count_ + count; + } + } + return ret; +} + +int ObDASFuncDataIter::build_tr_merge_iters_rangekey() +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(tr_merge_iters_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, tr merge iters is nullptr", K(ret)); + } else { + lib::ob_sort(doc_ids_.begin(), doc_ids_.end(), FtsDocIdCmp(cmp_func_, &ret)); + if (OB_FAIL(ret)) { + LOG_WARN("fail to sort doc id", K(ret)); + } + for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) { + ObDASTextRetrievalMergeIter *tr_merge_iter = static_cast(tr_merge_iters_[i]); + if (OB_FAIL(tr_merge_iter->set_rangkey_and_selector(doc_ids_))) { + LOG_WARN("fail to add doc id", K(ret)); + } + } + } + return ret; +} + +int ObDASFuncDataIter::init_main_lookup_scan_param( + ObTableScanParam ¶m, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot) +{ + int ret = OB_SUCCESS; + uint64_t tenant_id = MTL_ID(); + param.tenant_id_ = tenant_id; + param.key_ranges_.set_attr(ObMemAttr(tenant_id, "SParamKR")); + param.ss_key_ranges_.set_attr(ObMemAttr(tenant_id, "SParamSSKR")); + if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr ctdef or rtdef", K(ret), KPC(ctdef), KPC(rtdef)); + } else { + param.scan_allocator_ = &get_arena_allocator(); + param.allocator_ = &rtdef->stmt_allocator_; + param.tx_lock_timeout_ = rtdef->tx_lock_timeout_; + param.index_id_ = ctdef->ref_table_id_; + param.is_get_ = ctdef->is_get_; + param.is_for_foreign_check_ = rtdef->is_for_foreign_check_; + param.timeout_ = rtdef->timeout_ts_; + param.scan_flag_ = rtdef->scan_flag_; + param.reserved_cell_count_ = ctdef->access_column_ids_.count(); + param.sql_mode_ = rtdef->sql_mode_; + param.frozen_version_ = rtdef->frozen_version_; + param.force_refresh_lc_ = rtdef->force_refresh_lc_; + param.output_exprs_ = &(ctdef->pd_expr_spec_.access_exprs_); + param.aggregate_exprs_ = &(ctdef->pd_expr_spec_.pd_storage_aggregate_output_); + param.ext_file_column_exprs_ = &(ctdef->pd_expr_spec_.ext_file_column_exprs_); + param.ext_column_convert_exprs_ = &(ctdef->pd_expr_spec_.ext_column_convert_exprs_); + param.calc_exprs_ = &(ctdef->pd_expr_spec_.calc_exprs_); + param.table_param_ = &(ctdef->table_param_); + param.op_ = rtdef->p_pd_expr_op_; + param.row2exprs_projector_ = rtdef->p_row2exprs_projector_; + param.schema_version_ = ctdef->schema_version_; + param.tenant_schema_version_ = rtdef->tenant_schema_version_; + param.limit_param_ = rtdef->limit_param_; + param.need_scn_ = rtdef->need_scn_; + param.pd_storage_flag_ = ctdef->pd_expr_spec_.pd_storage_flag_.pd_flag_; + param.fb_snapshot_ = rtdef->fb_snapshot_; + param.fb_read_tx_uncommitted_ = rtdef->fb_read_tx_uncommitted_; + if (rtdef->is_for_foreign_check_) { + param.trans_desc_ = trans_desc; + } + if (OB_NOT_NULL(snapshot)) { + if (OB_FAIL(param.snapshot_.assign(*snapshot))) { + LOG_WARN("assign snapshot fail", K(ret)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null snapshot", K(ret), KPC(ctdef), KPC(rtdef)); + } + if (OB_NOT_NULL(trans_desc)) { + param.tx_id_ = trans_desc->get_tx_id(); + } else { + param.tx_id_.reset(); + } + if (!ctdef->pd_expr_spec_.pushdown_filters_.empty()) { + param.op_filters_ = &ctdef->pd_expr_spec_.pushdown_filters_; + } + param.pd_storage_filters_ = rtdef->p_pd_expr_op_->pd_storage_filters_; + if (OB_FAIL(param.column_ids_.assign(ctdef->access_column_ids_))) { + LOG_WARN("failed to assign column ids", K(ret)); + } + if (rtdef->sample_info_ != nullptr) { + param.sample_info_ = *rtdef->sample_info_; + } + } + + LOG_DEBUG("init rowkey doc table scan param finished", K(param), K(ret)); + return ret; +} + +} // end namespace sql +} // end namespace oceanbase diff --git a/src/sql/das/iter/ob_das_func_data_iter.h b/src/sql/das/iter/ob_das_func_data_iter.h new file mode 100644 index 000000000..d149fc815 --- /dev/null +++ b/src/sql/das/iter/ob_das_func_data_iter.h @@ -0,0 +1,148 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_DAS_FUNC_DATA_ITER_H_ +#define OB_DAS_FUNC_DATA_ITER_H_ + +#include "sql/das/iter/ob_das_iter.h" +#include "sql/das/iter/ob_das_scan_iter.h" +#include "sql/das/iter/ob_das_text_retrieval_merge_iter.h" +#include "common/ob_tablet_id.h" +#include "share/ob_ls_id.h" +#include "storage/access/ob_dml_param.h" + +namespace oceanbase +{ +namespace sql +{ + +class ObDASScanCtDef; +class ObDASScanRtDef; +class ObDASFuncDataIterParam final : public ObDASIterParam +{ +public: + ObDASFuncDataIterParam(); + ~ObDASFuncDataIterParam(); + + virtual bool is_valid() const override + { + return iter_count_ >= 1 && nullptr != tr_merge_iters_; + } +public: + ObDASIter **tr_merge_iters_; + int64_t iter_count_; + const ObDASScanCtDef *main_lookup_ctdef_; + ObDASScanRtDef *main_lookup_rtdef_; + ObDASIter *main_lookup_iter_; + transaction::ObTxDesc *trans_desc_; + transaction::ObTxReadSnapshot *snapshot_; +}; + +/** + * FTS DATA Iter: + * + * + * FTS_DATA_Iter + * / | | \ + * / | | \ + * / | | \ + * / | | \ + * TR_ITER1 TR_ITER2 TR_ITER3 ... MAIN_LOOKUP_ITER(may be null) + * + **/ + +class ObDASFuncDataIter final : public ObDASIter +{ +public: + ObDASFuncDataIter(); + ~ObDASFuncDataIter(); + + virtual int do_table_scan() override; + virtual int rescan() override; + virtual void clear_evaluated_flag() override; + inline int add_doc_id(const ObDocId &doc_id) + { + int ret = OB_SUCCESS; + int64_t idx = doc_ids_.count(); + if (OB_FAIL(doc_ids_.push_back(std::make_pair(doc_id, idx)))) { + LOG_WARN("fail to push back doc id", K(ret)); + } + return ret; + } + void set_tablet_id(const ObTabletID &tablet_id) { main_lookup_tablet_id_ = tablet_id; } + void set_ls_id(const share::ObLSID &ls_id) { main_lookup_ls_id_ = ls_id; } + bool has_main_lookup_iter() const { return nullptr != main_lookup_iter_; } + ObTableScanParam &get_main_lookup_scan_param() { return main_lookup_param_; } + const ObDASScanCtDef *get_main_lookup_ctdef() { return main_lookup_ctdef_; } + INHERIT_TO_STRING_KV("ObDASIter", ObDASIter, + K(main_lookup_param_), + KPC(main_lookup_iter_)); +protected: + virtual int inner_init(ObDASIterParam ¶m) override; + virtual int inner_reuse() override; + virtual int inner_release() override; + virtual int inner_get_next_row() override; + virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; +private: + common::ObArenaAllocator &get_arena_allocator() { return merge_memctx_->get_arena_allocator(); } + int init_main_lookup_scan_param( + ObTableScanParam ¶m, + const ObDASScanCtDef *ctdef, + ObDASScanRtDef *rtdef, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot); + int build_tr_merge_iters_rangekey(); + struct FtsDocIdCmp + { + FtsDocIdCmp(common::ObDatumCmpFuncType cmp_func, int *ret) + { + cmp_func_ = cmp_func; + err_code_ = ret; + } + + bool operator()(const std::pair &a, const std::pair &b) const + { + int ret = OB_SUCCESS; + ObDatum l_datum; + ObDatum r_datum; + // ObDocId must be not null; ObDocIds must be not same + l_datum.set_string(a.first.get_string()); + r_datum.set_string(b.first.get_string()); + int tmp_ret = 0; + if (OB_FAIL(cmp_func_(l_datum, r_datum, tmp_ret))) { + LOG_WARN("failed to compare doc id by datum", K(ret)); + } + *err_code_ = *err_code_ == OB_SUCCESS ? ret : *err_code_; + return tmp_ret < 0; + } + int *err_code_; + private: + common::ObDatumCmpFuncType cmp_func_; + }; +private: + common::ObDatumCmpFuncType cmp_func_; + ObDASIter **tr_merge_iters_; + int64_t iter_count_; + const ObDASScanCtDef *main_lookup_ctdef_; + ObDASScanRtDef *main_lookup_rtdef_; + ObDASIter *main_lookup_iter_; + ObTabletID main_lookup_tablet_id_; + share::ObLSID main_lookup_ls_id_; + storage::ObTableScanParam main_lookup_param_; + lib::MemoryContext merge_memctx_; + ObSEArray, 4> doc_ids_; + int64_t read_count_; +}; + +} // end namespace sql +} // end namespace oceanbase +#endif // OB_DAS_FUNC_DATA_ITER_H_ \ No newline at end of file diff --git a/src/sql/das/iter/ob_das_functional_lookup_iter.cpp b/src/sql/das/iter/ob_das_functional_lookup_iter.cpp new file mode 100644 index 000000000..a727ceb2c --- /dev/null +++ b/src/sql/das/iter/ob_das_functional_lookup_iter.cpp @@ -0,0 +1,316 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#define USING_LOG_PREFIX SQL_DAS +#include "sql/das/iter/ob_das_functional_lookup_iter.h" +#include "sql/das/iter/ob_das_scan_iter.h" +#include "sql/das/iter/ob_das_func_data_iter.h" +#include "sql/das/ob_das_scan_op.h" +#include "sql/das/ob_das_ir_define.h" +#include "storage/concurrency_control/ob_data_validation_service.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ + +int ObDASFuncLookupIter::inner_init(ObDASIterParam ¶m) +{ + int ret = OB_SUCCESS; + if (param.type_ != ObDASIterType::DAS_ITER_FUNC_LOOKUP) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("inner init das iter with bad param type", K(param), K(ret)); + } else { + ObDASFuncLookupIterParam &lookup_param = static_cast(param); + state_ = LookupState::INDEX_SCAN; + index_end_ = false; + default_batch_row_count_ = lookup_param.default_batch_row_count_; + lookup_rowkey_cnt_ = 0; + lookup_row_cnt_ = 0; + index_table_iter_ = lookup_param.index_table_iter_; + data_table_iter_ = lookup_param.data_table_iter_; + index_ctdef_ = lookup_param.index_ctdef_; + index_rtdef_ = lookup_param.index_rtdef_; + lookup_ctdef_ = lookup_param.lookup_ctdef_; + lookup_rtdef_ = lookup_param.lookup_rtdef_; + start_table_scan_ = false; + trans_desc_ = lookup_param.trans_desc_; + snapshot_ = lookup_param.snapshot_; + lib::ContextParam param; + param.set_mem_attr(MTL_ID(), ObModIds::OB_SQL_TABLE_LOOKUP, ObCtxIds::DEFAULT_CTX_ID) + .set_properties(lib::USE_TL_PAGE_OPTIONAL); + if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(lookup_memctx_, param))) { + LOG_WARN("failed to create lookup memctx", K(ret)); + } else if (OB_FAIL(rowkey_exprs_.push_back(lookup_param.doc_id_expr_))) { + LOG_WARN("failed to assign rowkey exprs", K(ret)); + } else if (rowkey_exprs_.count() != 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected rowkey exprs count", K(rowkey_exprs_.count()), K(ret)); + } + } + return ret; +} + +void ObDASFuncLookupIter::reset_lookup_state() +{ + lookup_row_cnt_ = 0; + lookup_rowkey_cnt_ = 0; + index_end_ = false; + state_ = LookupState::INDEX_SCAN; + if (!is_first_lookup_) { + data_table_iter_->reuse(); + } + if (OB_NOT_NULL(lookup_memctx_)) { + lookup_memctx_->reset_remain_one_page(); + } + trans_info_array_.reuse(); +} + +int ObDASFuncLookupIter::inner_reuse() +{ + int ret = OB_SUCCESS; + ObDASScanIter *index_table_iter = static_cast(index_table_iter_); + storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param(); + if (!index_scan_param.key_ranges_.empty()) { + index_scan_param.key_ranges_.reuse(); + } + if (start_table_scan_) { + if (OB_FAIL(index_table_iter_->reuse())) { + LOG_WARN("failed to reuse index table iter", K(ret)); + } else if (is_first_lookup_ &&OB_FAIL(data_table_iter_->reuse())) { + LOG_WARN("failed to reuse data table iter", K(ret)); + } else if (OB_FAIL(ObDASLookupIter::inner_reuse())) { + LOG_WARN("failed to reuse das lookup iter", K(ret)); + } else { + trans_info_array_.reuse(); + } + } + return ret; +} + +int ObDASFuncLookupIter::inner_release() +{ + int ret = OB_SUCCESS; + start_table_scan_ = false; + if (OB_FAIL(ObDASLocalLookupIter::inner_release())) { + LOG_WARN("failed to release lookup iter", K(ret)); + } + return ret; +} + +int ObDASFuncLookupIter::do_table_scan() +{ + int ret = OB_SUCCESS; + start_table_scan_ = true; + OB_ASSERT(index_table_iter_->get_type() == DAS_ITER_SCAN); + ObDASScanIter *index_table_iter = static_cast(index_table_iter_); + storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param(); + const ObDASScanCtDef *index_ctdef = static_cast(index_ctdef_); + ObDASScanRtDef *index_rtdef = static_cast(index_rtdef_); + if (OB_UNLIKELY(index_scan_param.key_ranges_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected key ranges count", K(index_scan_param.key_ranges_.count()), K(ret)); + } else if (OB_FAIL(index_table_iter_->do_table_scan())) { + if (OB_SNAPSHOT_DISCARDED == ret && index_scan_param.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("failed to do partition scan", K(index_scan_param), K(ret)); + } + } + return ret; +} + +int ObDASFuncLookupIter::rescan() +{ + int ret = OB_SUCCESS; + // only rescan index table, data table will be rescan in do_lookup. + ObDASScanIter *index_table_iter = static_cast(index_table_iter_); + storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param(); + if (OB_UNLIKELY(!start_table_scan_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected rescan, should do table scan first", K(ret)); + } else if (OB_UNLIKELY(index_scan_param.key_ranges_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected key ranges count", K(index_scan_param.key_ranges_.count()), K(ret)); + } else if (OB_FAIL(index_table_iter_->rescan())) { + LOG_WARN("failed to rescan index table iter", K(ret)); + } + return ret; +} + +int ObDASFuncLookupIter::inner_get_next_row() +{ + int ret = OB_SUCCESS; + ObDASScanIter *index_table_iter = static_cast(index_table_iter_); + storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param(); + OB_ASSERT(index_table_iter_->get_type() == DAS_ITER_SCAN); + int64_t simulate_batch_row_cnt = - EVENT_CALL(EventTable::EN_TABLE_LOOKUP_BATCH_ROW_COUNT); + const bool use_simulate_batch_row_cnt = simulate_batch_row_cnt > 0 && simulate_batch_row_cnt < default_batch_row_count_; + int64_t default_row_batch_cnt = use_simulate_batch_row_cnt ? simulate_batch_row_cnt : default_batch_row_count_; + LOG_DEBUG("simulate lookup row batch count", K(simulate_batch_row_cnt), K(default_row_batch_cnt)); + if (index_scan_param.key_ranges_.empty()) { + ret = OB_ITER_END; + } else if (OB_UNLIKELY(index_scan_param.key_ranges_.count() != 1 || default_row_batch_cnt != 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected key ranges count", K(index_scan_param.key_ranges_.count()), K(default_row_batch_cnt), K(ret)); + } else if (OB_FAIL(ObDASLocalLookupIter::inner_get_next_row())) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to get next row from function lookup iter", K(ret)); + } + } else if (OB_UNLIKELY(lookup_row_cnt_ > 1 || lookup_rowkey_cnt_ > 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected lookup row count", K_(lookup_row_cnt), K_(lookup_rowkey_cnt), K(ret)); + } + return ret; +} + +int ObDASFuncLookupIter::inner_get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + ObDASScanIter *index_table_iter = static_cast(index_table_iter_); + storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param(); + OB_ASSERT(index_table_iter_->get_type() == DAS_ITER_SCAN); + cap_ = index_scan_param.key_ranges_.count(); + int64_t simulate_batch_row_cnt = - EVENT_CALL(EventTable::EN_TABLE_LOOKUP_BATCH_ROW_COUNT); + const bool use_simulate_batch_row_cnt = simulate_batch_row_cnt > 0 && simulate_batch_row_cnt < default_batch_row_count_; + int64_t default_row_batch_cnt = use_simulate_batch_row_cnt ? simulate_batch_row_cnt : default_batch_row_count_; + LOG_DEBUG("simulate lookup row batch count", K(simulate_batch_row_cnt), K(default_row_batch_cnt)); + if (index_scan_param.key_ranges_.empty()) { + ret = OB_ITER_END; + } else if (OB_UNLIKELY(default_row_batch_cnt < cap_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected key ranges count", K(default_row_batch_cnt), K(capacity), + K_(cap), K(index_scan_param.key_ranges_.count()), K(ret)); + } else if (OB_FAIL(ObDASLookupIter::inner_get_next_rows(count, capacity))) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to get next row from function lookup iter", K(ret)); + } + } + if (OB_SUCC(ret) && OB_UNLIKELY(lookup_row_cnt_ != lookup_rowkey_cnt_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected lookup row count", K_(lookup_row_cnt), K_(lookup_rowkey_cnt), K(ret)); + } + return ret; +} + +int ObDASFuncLookupIter::add_rowkey() +{ + int ret = OB_SUCCESS; + OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_FUNC_DATA); + if (OB_ISNULL(eval_ctx_) || OB_UNLIKELY(1 != rowkey_exprs_.count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid eval ctx or rowkey exprs", K_(eval_ctx), K_(rowkey_exprs), K(ret)); + } else { + ObDASScanIter *index_iter = static_cast(index_table_iter_); + ObDASFuncDataIter *merge_iter = static_cast(data_table_iter_); + ObDocId doc_id; + const ObExpr *expr = rowkey_exprs_.at(0); + ObDatum &col_datum = expr->locate_expr_datum(*eval_ctx_); + doc_id.from_string(col_datum.get_string()); + if (OB_UNLIKELY(!doc_id.is_valid())) { + LOG_WARN("invalid doc id", K(doc_id)); + } else if (OB_FAIL(merge_iter->add_doc_id(doc_id))) { + LOG_WARN("failed to add doc id", K(ret)); + } + LOG_DEBUG("push doc id to tr iter", K(doc_id), K(ret)); + } + return ret; +} + +int ObDASFuncLookupIter::add_rowkeys(int64_t storage_count) +{ + int ret = OB_SUCCESS; + // for limit case, can do better, add_rowkeys(limit_count) + if (OB_UNLIKELY(storage_count != cap_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected count", K(storage_count), K(cap_)); + } else if (OB_FAIL(ObDASLocalLookupIter::add_rowkeys(storage_count))) { + LOG_WARN("failed to add rowkeys", K(ret)); + } + return ret; +} + +int ObDASFuncLookupIter::do_index_lookup() +{ + int ret = OB_SUCCESS; + OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_FUNC_DATA); + ObDASScanIter *index_table_iter = static_cast(index_table_iter_); + storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param(); + ObDASFuncDataIter *merge_iter = static_cast(data_table_iter_); + if (merge_iter->has_main_lookup_iter()) { + storage::ObTableScanParam &main_lookup_param = merge_iter->get_main_lookup_scan_param(); + int64 group_id = 0; + if (OB_UNLIKELY(!main_lookup_param.key_ranges_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected key ranges count", K(main_lookup_param.key_ranges_.count()), K(ret)); + } else if (DAS_OP_TABLE_SCAN != index_ctdef_->op_type_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected index op type", K(index_ctdef_->op_type_), K(ret)); + } else { + const ObDASScanCtDef *index_ctdef = static_cast(index_ctdef_); + if (nullptr != index_ctdef->group_id_expr_) { + group_id = index_ctdef->group_id_expr_->locate_expr_datum(*eval_ctx_).get_int(); + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < index_scan_param.key_ranges_.count(); i++) { + ObRowkey row_key = index_scan_param.key_ranges_.at(i).start_key_; + ObNewRange range; + range.build_range(merge_iter->get_main_lookup_ctdef()->ref_table_id_, row_key); + int64_t group_idx = ObNewRange::get_group_idx(group_id); + range.group_idx_ = group_idx; + main_lookup_param.key_ranges_.push_back(range); + } + if (OB_SUCC(ret)) { + main_lookup_param.is_get_ = true; + } + } + if (OB_FAIL(ret)) { + } else if (is_first_lookup_) { + is_first_lookup_ = false; + if (OB_FAIL(data_table_iter_->do_table_scan())) { + if (OB_SNAPSHOT_DISCARDED == ret && lookup_param_.fb_snapshot_.is_valid()) { + ret = OB_INVALID_QUERY_TIMESTAMP; + } else if (OB_TRY_LOCK_ROW_CONFLICT != ret) { + LOG_WARN("failed to do partition scan", K(lookup_param_), K(ret)); + } + } + } else if (OB_FAIL(data_table_iter_->rescan())) { + LOG_WARN("failed to rescan data table", K(ret)); + } + return ret; +} + +int ObDASFuncLookupIter::check_index_lookup() +{ + int ret = OB_SUCCESS; + OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_FUNC_DATA); + if (GCONF.enable_defensive_check()) { + if (OB_UNLIKELY(lookup_rowkey_cnt_ != lookup_row_cnt_)) { + ret = OB_ERR_DEFENSIVE_CHECK; + ObString func_name = ObString::make_string("check_lookup_row_cnt"); + LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr()); + LOG_ERROR("Fatal Error!!! Catch a defensive error!", + K(ret), K_(lookup_rowkey_cnt), K_(lookup_row_cnt)); + } + } + return ret; +} + +void ObDASFuncLookupIter::clear_evaluated_flag() +{ + index_table_iter_->clear_evaluated_flag(); + data_table_iter_->clear_evaluated_flag(); +} + +} // namespace sql +} // namespace oceanbase diff --git a/src/sql/das/iter/ob_das_functional_lookup_iter.h b/src/sql/das/iter/ob_das_functional_lookup_iter.h new file mode 100644 index 000000000..61f5a65d9 --- /dev/null +++ b/src/sql/das/iter/ob_das_functional_lookup_iter.h @@ -0,0 +1,130 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OBDEV_SRC_SQL_DAS_ITER_OB_DAS_FUNCTIONAL_LOOKUP_ITER_H_ +#define OBDEV_SRC_SQL_DAS_ITER_OB_DAS_FUNCTIONAL_LOOKUP_ITER_H_ + +#include "sql/das/iter/ob_das_local_lookup_iter.h" + +namespace oceanbase +{ +using namespace common; +namespace sql +{ + +struct ObDASFuncLookupIterParam : public ObDASIterParam +{ +public: + ObDASFuncLookupIterParam() + : ObDASIterParam(DAS_ITER_FUNC_LOOKUP), + default_batch_row_count_(0), + index_ctdef_(nullptr), + index_rtdef_(nullptr), + lookup_ctdef_(nullptr), + lookup_rtdef_(nullptr), + index_table_iter_(nullptr), + data_table_iter_(nullptr), + rowkey_exprs_(nullptr), + doc_id_expr_(nullptr), + trans_desc_(nullptr), + snapshot_(nullptr) + {} + int64_t default_batch_row_count_; + const ObDASBaseCtDef *index_ctdef_; + ObDASBaseRtDef *index_rtdef_; + const ObDASScanCtDef *lookup_ctdef_; + ObDASScanRtDef *lookup_rtdef_; + ObDASIter *index_table_iter_; + ObDASIter *data_table_iter_; + const ExprFixedArray *rowkey_exprs_; + ObExpr *doc_id_expr_; + transaction::ObTxDesc *trans_desc_; + transaction::ObTxReadSnapshot *snapshot_; + + virtual bool is_valid() const override + { + return ObDASIterParam::is_valid() + && index_table_iter_ != nullptr && data_table_iter_ != nullptr + && index_ctdef_ != nullptr && index_rtdef_ != nullptr && doc_id_expr_ != nullptr; + } +}; + +class ObDASScanCtDef; +class ObDASScanRtDef; + +/** + * Func Lookup Iter: + * Func Lookup Iter + * / \ + * / \ + * / \ + * / \ + * INDEX_ITER DATA_ITER = FTS_MERGE_ITER + * (ROWKEY_DOCID) + * + * Func Lookup: + * Local Lookup Iter + * / \ + * / \ + * / \ + * Local Lookup/Das Scan Func Lookup Iter + **/ + +/* + * In ObDASFuncLookupIter, the data iter is a fts merge iter which is just + * a tool iter including main lookup iter and tr merge iters. + */ +class ObDASFuncLookupIter : public ObDASLocalLookupIter +{ +public: + ObDASFuncLookupIter() + : ObDASLocalLookupIter(ObDASIterType::DAS_ITER_FUNC_LOOKUP), + cap_(0) + {} + virtual ~ObDASFuncLookupIter() {} + void set_index_scan_param(storage::ObTableScanParam &scan_param) { static_cast(index_table_iter_)->set_scan_param(scan_param);} + ObDASScanIter *get_index_scan_iter() { return static_cast(index_table_iter_); } + int64 get_group_id() const + { + const ExprFixedArray *exprs = &(static_cast(index_ctdef_))->pd_expr_spec_.access_exprs_; + int64 group_id = 0; + for (int i = 0; i < exprs->count(); i++) { + if (T_PSEUDO_GROUP_ID == exprs->at(i)->type_) { + group_id = exprs->at(i)->locate_expr_datum(*eval_ctx_).get_int(); + } + } + return group_id; + } + virtual void clear_evaluated_flag() override; +protected: + virtual int inner_init(ObDASIterParam ¶m) override; + virtual int inner_reuse() override; + virtual int inner_release() override; + virtual int do_table_scan() override; + virtual int rescan() override; + virtual int inner_get_next_row() override; + virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; + virtual int add_rowkey() override; + virtual int add_rowkeys(int64_t count) override; + virtual int do_index_lookup() override; + virtual int check_index_lookup() override; + virtual void reset_lookup_state() override; +protected: + int64_t cap_; + bool start_table_scan_; +}; + +} // namespace sql +} // namespace oceanbase + + +#endif /* OBDEV_SRC_SQL_DAS_ITER_OB_DAS_FUNCTIONAL_LOOKUP_ITER_H_ */ diff --git a/src/sql/das/iter/ob_das_iter_define.h b/src/sql/das/iter/ob_das_iter_define.h index fea90f7fd..6f1f91091 100644 --- a/src/sql/das/iter/ob_das_iter_define.h +++ b/src/sql/das/iter/ob_das_iter_define.h @@ -71,11 +71,27 @@ enum ObDASIterTreeType : uint32_t ITER_TREE_MAX }; +struct ObDASFTSTabletID +{ +public: + common::ObTabletID inv_idx_tablet_id_; + common::ObTabletID fwd_idx_tablet_id_; + common::ObTabletID doc_id_idx_tablet_id_; + void reset() + { + inv_idx_tablet_id_.reset(); + fwd_idx_tablet_id_.reset(); + doc_id_idx_tablet_id_.reset(); + } + TO_STRING_KV(K_(inv_idx_tablet_id), K_(fwd_idx_tablet_id), K_(doc_id_idx_tablet_id)); +}; + #define SUPPORTED_DAS_ITER_TREE(_type) \ ({ \ ITER_TREE_PARTITION_SCAN == (_type) || \ ITER_TREE_LOCAL_LOOKUP == (_type) || \ ITER_TREE_TEXT_RETRIEVAL == (_type) || \ + ITER_TREE_FUNC_LOOKUP == (_type) || \ ITER_TREE_INDEX_MERGE == (_type) || \ ITER_TREE_MVI_LOOKUP == (_type) || \ ITER_TREE_GIS_LOOKUP == (_type); \ @@ -93,16 +109,20 @@ public: common::ObTabletID rowkey_doc_tablet_id_; common::ObTabletID rowkey_vid_tablet_id_; - /* used by fulltext index */ + /* used by basic fulltext index */ common::ObTabletID inv_idx_tablet_id_; common::ObTabletID fwd_idx_tablet_id_; common::ObTabletID doc_id_idx_tablet_id_; - /* used by fulltext index */ + /* used by basic fulltext index */ /* used by index merge */ common::ObFixedArray index_merge_tablet_ids_; /* used by index merge */ + /* used by function lookup index (special fulltext)*/ + common::ObSEArray fts_tablet_ids_; + /* used by function lookup index (special fulltext)*/ + void reset() { lookup_tablet_id_.reset(); @@ -113,6 +133,7 @@ public: fwd_idx_tablet_id_.reset(); doc_id_idx_tablet_id_.reset(); index_merge_tablet_ids_.reset(); + fts_tablet_ids_.reset(); } }; diff --git a/src/sql/das/iter/ob_das_iter_utils.cpp b/src/sql/das/iter/ob_das_iter_utils.cpp index 78430d806..230e01b63 100644 --- a/src/sql/das/iter/ob_das_iter_utils.cpp +++ b/src/sql/das/iter/ob_das_iter_utils.cpp @@ -100,6 +100,10 @@ int ObDASIterUtils::create_das_scan_iter_tree(ObDASIterTreeType tree_type, ret = create_index_merge_iter_tree(scan_param, alloc, attach_ctdef, attach_rtdef, related_tablet_ids, trans_desc, snapshot, iter_tree); break; } + case ITER_TREE_FUNC_LOOKUP: { + ret = create_function_lookup_tree(scan_param, alloc, attach_ctdef, attach_rtdef, related_tablet_ids, trans_desc, snapshot, iter_tree); + break; + } case ITER_TREE_MVI_LOOKUP: { ret = create_mvi_lookup_tree(scan_param, alloc, attach_ctdef, attach_rtdef, related_tablet_ids, trans_desc, snapshot, iter_tree); break; @@ -241,7 +245,11 @@ int ObDASIterUtils::set_text_retrieval_related_ids(const ObDASBaseCtDef *attach_ } else { ObDASTextRetrievalMergeIter *tr_merge_iter = static_cast(root_iter); need_set_child = false; - if (OB_FAIL(tr_merge_iter->set_related_tablet_ids(ls_id, related_tablet_ids))) { + ObDASFTSTabletID fts_tablet_ids; + fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.inv_idx_tablet_id_; + fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fwd_idx_tablet_id_; + fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.doc_id_idx_tablet_id_; + if (OB_FAIL(tr_merge_iter->set_related_tablet_ids(ls_id, fts_tablet_ids))) { LOG_WARN("failed to set related tablet ids", K(ret)); } } @@ -337,6 +345,139 @@ int ObDASIterUtils::set_index_merge_related_ids(const ObDASBaseCtDef *attach_ctd return ret; } +int ObDASIterUtils::set_func_lookup_iter_related_ids(const ObDASBaseCtDef *attach_ctdef, + const ObDASRelatedTabletID &related_tablet_ids, + const ObLSID &ls_id, + int64_t flag, + ObDASIter *root_iter) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(attach_ctdef) || OB_ISNULL(root_iter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret), KP(attach_ctdef), KP(root_iter)); + } else { + const ObDASIterType &iter_type = root_iter->get_type(); + bool need_set_child = false; + switch (attach_ctdef->op_type_) { + case ObDASOpType::DAS_OP_INDEX_PROJ_LOOKUP: { + if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_LOCAL_LOOKUP)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter type not match", K(ret), K(iter_type)); + } else { + ObDASLocalLookupIter *local_lookup_iter = static_cast(root_iter); + local_lookup_iter->set_tablet_id(related_tablet_ids.rowkey_doc_tablet_id_); + local_lookup_iter->set_ls_id(ls_id); + need_set_child = true; + } + break; + } + case ObDASOpType::DAS_OP_FUNC_LOOKUP: { + if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_FUNC_LOOKUP)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter type not match with ctdef", K(ret), K(attach_ctdef->op_type_), K(iter_type)); + } else { + const ObDASFuncLookupCtDef *func_lookup_ctdef = static_cast(attach_ctdef); + const int64_t func_lookup_cnt = func_lookup_ctdef->func_lookup_cnt_; + ObDASFuncLookupIter *func_lookup_iter = static_cast(root_iter); + ObDASFuncDataIter *merge_iter = static_cast(root_iter->get_children()[1]); + if (func_lookup_ctdef->has_main_table_lookup()) { + merge_iter->set_tablet_id(related_tablet_ids.lookup_tablet_id_); + merge_iter->set_ls_id(ls_id); + } + for (int64_t i = 0; OB_SUCC(ret) && i < func_lookup_cnt; ++i) { + if (OB_FAIL(set_func_lookup_iter_related_ids( + func_lookup_ctdef->get_func_lookup_scan_ctdef(i), + related_tablet_ids, + ls_id, + i, + merge_iter->get_children()[i]))) { + LOG_WARN("failed to set text retrieval related ids", K(ret)); + } + } + need_set_child = false; + } + break; + } + case ObDASOpType::DAS_OP_IR_AUX_LOOKUP: { + if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_LOCAL_LOOKUP)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter type not match with ctdef", K(ret), K(attach_ctdef->op_type_), K(iter_type)); + } else { + ObDASLocalLookupIter *aux_lookup_iter = static_cast(root_iter); + aux_lookup_iter->set_ls_id(ls_id); + aux_lookup_iter->set_tablet_id(related_tablet_ids.aux_lookup_tablet_id_); + need_set_child = true; + } + break; + } + case ObDASOpType::DAS_OP_SORT: { + if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_SORT)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter type not match with ctdef", K(ret), K(attach_ctdef->op_type_), K(iter_type)); + } else { + need_set_child = true; + } + break; + } + case ObDASOpType::DAS_OP_IR_SCAN: { + if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_TEXT_RETRIEVAL_MERGE)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter type not match with ctdef", K(ret), K(attach_ctdef->op_type_), K(iter_type)); + } else { + ObDASTextRetrievalMergeIter *tr_merge_iter = static_cast(root_iter); + ObDASFTSTabletID fts_tablet_ids; + if (flag >= 0) { + fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[flag].inv_idx_tablet_id_; + fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[flag].fwd_idx_tablet_id_; + fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[flag].doc_id_idx_tablet_id_; + } else { + fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.inv_idx_tablet_id_; + fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fwd_idx_tablet_id_; + fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.doc_id_idx_tablet_id_; + } + if (OB_FAIL(tr_merge_iter->set_related_tablet_ids(ls_id, fts_tablet_ids))) { + LOG_WARN("failed to set related tablet ids", K(ret)); + } + need_set_child = false; + } + break; + } + case ObDASOpType::DAS_OP_TABLE_SCAN: { + if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_SCAN)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter type not match with ctdef", K(ret), K(attach_ctdef->op_type_), K(iter_type)); + } else { + need_set_child = false; + } + break; + } + default: { + need_set_child = false; + break; + } + } + + if (OB_FAIL(ret) || !need_set_child) { + } else if (OB_UNLIKELY(attach_ctdef->children_cnt_ != root_iter->get_children_cnt())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected iter children count not equal to ctdef children count", + K(ret), K(attach_ctdef->children_cnt_), K(root_iter->get_children_cnt())); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < attach_ctdef->children_cnt_; ++i) { + if (OB_FAIL(set_func_lookup_iter_related_ids( + attach_ctdef->children_[i], + related_tablet_ids, + ls_id, + -1, + root_iter->get_children()[i]))) { + LOG_WARN("failed to set text retrieval related ids", K(ret)); + } + } + } + } + return ret; +} + /***************** PUBLIC END *****************/ int ObDASIterUtils::create_partition_scan_tree(storage::ObTableScanParam &scan_param, @@ -490,6 +631,10 @@ int ObDASIterUtils::create_text_retrieval_tree(ObTableScanParam &scan_param, const bool has_lookup = ObDASOpType::DAS_OP_TABLE_LOOKUP == attach_ctdef->op_type_; int64_t token_cnt = 0; bool taat_mode = false; + ObDASFTSTabletID fts_tablet_ids; + fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.inv_idx_tablet_id_; + fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fwd_idx_tablet_id_; + fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.doc_id_idx_tablet_id_; if (OB_UNLIKELY(attach_ctdef->op_type_ != ObDASOpType::DAS_OP_IR_SCAN && attach_ctdef->op_type_ != ObDASOpType::DAS_OP_TABLE_LOOKUP && attach_ctdef->op_type_ != ObDASOpType::DAS_OP_SORT)) { @@ -507,7 +652,7 @@ int ObDASIterUtils::create_text_retrieval_tree(ObTableScanParam &scan_param, alloc, ir_scan_ctdef, ir_scan_rtdef, - related_tablet_ids, + fts_tablet_ids, trans_desc, snapshot, text_retrieval_result))) { @@ -592,11 +737,212 @@ int ObDASIterUtils::create_text_retrieval_tree(ObTableScanParam &scan_param, return ret; } +int ObDASIterUtils::create_functional_text_retrieval_sub_tree(const ObLSID &ls_id, + common::ObIAllocator &alloc, + const ObDASIRScanCtDef *ir_scan_ctdef, + ObDASIRScanRtDef *ir_scan_rtdef, + const ObDASFTSTabletID &related_tablet_ids, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot, + ObDASIter *&retrieval_result) + { + int ret = OB_SUCCESS; + ObDASTextRetrievalMergeIterParam merge_iter_param; + ObDASTextRetrievalMergeIter *tr_merge_iter = nullptr; + ObDASScanIterParam doc_cnt_agg_param; + ObDASScanIter *doc_cnt_agg_iter = nullptr; + bool taat_mode = false; + bool need_inv_idx_agg_reset = false; + + merge_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_; + merge_iter_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_; + merge_iter_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_; + merge_iter_param.output_ = &ir_scan_ctdef->result_output_; + merge_iter_param.ir_ctdef_ = ir_scan_ctdef; + merge_iter_param.ir_rtdef_ = ir_scan_rtdef; + merge_iter_param.tx_desc_ = trans_desc; + merge_iter_param.snapshot_ = snapshot; + merge_iter_param.force_return_docid_ = true; + + if (0 != merge_iter_param.query_tokens_.count()) { + merge_iter_param.query_tokens_.reuse(); + } + + if (OB_FAIL(ObDASTextRetrievalMergeIter::build_query_tokens(ir_scan_ctdef, ir_scan_rtdef, alloc, merge_iter_param.query_tokens_))) { + LOG_WARN("failed to get query tokens for text retrieval", K(ret)); + } else if (!ir_scan_ctdef->need_proj_relevance_score()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("functional lookup without relevance score not supported", K(ret)); + } else if (merge_iter_param.query_tokens_.count() > OB_MAX_TEXT_RETRIEVAL_TOKEN_CNT) { + need_inv_idx_agg_reset = true; + if (!ir_scan_ctdef->need_estimate_total_doc_cnt()) { + doc_cnt_agg_param.scan_ctdef_ = ir_scan_ctdef->get_doc_id_idx_agg_ctdef(); + doc_cnt_agg_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_; + doc_cnt_agg_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_; + doc_cnt_agg_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_; + doc_cnt_agg_param.output_ = &ir_scan_ctdef->get_doc_id_idx_agg_ctdef()->result_output_; + if (OB_FAIL(create_das_iter(alloc, doc_cnt_agg_param, doc_cnt_agg_iter))) { + LOG_WARN("failed to create doc cnt agg scan iter", K(ret)); + } else { + merge_iter_param.doc_cnt_iter_ = doc_cnt_agg_iter; + } + } + ObDASTRTaatLookupIter *fts_merge_iter = nullptr; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(create_das_iter(alloc, merge_iter_param, fts_merge_iter))) { + LOG_WARN("failed to create text retrieval merge iter", K(ret)); + } else { + tr_merge_iter = fts_merge_iter; + taat_mode = true; + } + } else { + if (ir_scan_ctdef->need_calc_relevance() && !ir_scan_ctdef->need_estimate_total_doc_cnt()) { + doc_cnt_agg_param.scan_ctdef_ = ir_scan_ctdef->get_doc_id_idx_agg_ctdef(); + doc_cnt_agg_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_; + doc_cnt_agg_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_; + doc_cnt_agg_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_; + doc_cnt_agg_param.output_ = &ir_scan_ctdef->get_doc_id_idx_agg_ctdef()->result_output_; + if (OB_FAIL(create_das_iter(alloc, doc_cnt_agg_param, doc_cnt_agg_iter))) { + LOG_WARN("failed to create doc cnt agg scan iter", K(ret)); + } else { + merge_iter_param.doc_cnt_iter_ = doc_cnt_agg_iter; + } + } + ObDASTRDaatLookupIter *fts_merge_iter = nullptr; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(create_das_iter(alloc, merge_iter_param, fts_merge_iter))) { + LOG_WARN("failed to create text retrieval merge iter", K(ret)); + } else { + tr_merge_iter = fts_merge_iter; + taat_mode = false; + } + } + if (OB_FAIL(ret)) { + } else { + ObSEArray iters; + const ObIArray &query_tokens = tr_merge_iter->get_query_tokens(); + int64_t size = taat_mode && query_tokens.count() != 0 ? 1 : query_tokens.count(); + for (int64_t i = 0; OB_SUCC(ret) && i < size; ++i) { + ObDASTextRetrievalIterParam retrieval_param; + ObDASTextRetrievalIter *retrieval_iter = nullptr; + retrieval_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_; + retrieval_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_; + retrieval_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_; + retrieval_param.output_ = &ir_scan_ctdef->result_output_; + retrieval_param.ir_ctdef_ = ir_scan_ctdef; + retrieval_param.ir_rtdef_ = ir_scan_rtdef; + retrieval_param.tx_desc_ = trans_desc; + retrieval_param.snapshot_ = snapshot; + retrieval_param.need_inv_idx_agg_reset_ = need_inv_idx_agg_reset; + + ObDASScanIterParam inv_idx_scan_iter_param; + ObDASScanIter *inv_idx_scan_iter = nullptr; + inv_idx_scan_iter_param.scan_ctdef_ = ir_scan_ctdef->get_inv_idx_scan_ctdef(); + inv_idx_scan_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_; + inv_idx_scan_iter_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_; + inv_idx_scan_iter_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_; + inv_idx_scan_iter_param.output_ = &ir_scan_ctdef->get_inv_idx_scan_ctdef()->result_output_; + ObDASScanIterParam inv_idx_agg_iter_param; + ObDASScanIter *inv_idx_agg_iter = nullptr; + if (ir_scan_ctdef->need_inv_idx_agg()) { + inv_idx_agg_iter_param.scan_ctdef_ = ir_scan_ctdef->get_inv_idx_agg_ctdef(); + inv_idx_agg_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_; + inv_idx_agg_iter_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_; + inv_idx_agg_iter_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_; + inv_idx_agg_iter_param.output_ = &ir_scan_ctdef->get_inv_idx_agg_ctdef()->result_output_; + } + ObDASScanIterParam fwd_idx_iter_param; + ObDASScanIter *fwd_idx_iter = nullptr; + if (ir_scan_ctdef->need_fwd_idx_agg()) { + fwd_idx_iter_param.scan_ctdef_ = ir_scan_ctdef->get_fwd_idx_agg_ctdef(); + fwd_idx_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_; + fwd_idx_iter_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_; + fwd_idx_iter_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_; + fwd_idx_iter_param.output_ = &ir_scan_ctdef->get_fwd_idx_agg_ctdef()->result_output_; + } + if (OB_FAIL(create_das_iter(alloc, inv_idx_scan_iter_param, inv_idx_scan_iter))) { + LOG_WARN("failed to create inv idx iter", K(ret)); + } else if (ir_scan_ctdef->need_inv_idx_agg() + && OB_FAIL(create_das_iter(alloc, inv_idx_agg_iter_param, inv_idx_agg_iter))) { + LOG_WARN("failed to create inv idx agg iter", K(ret)); + } else if (ir_scan_ctdef->need_fwd_idx_agg() + && OB_FAIL(create_das_iter(alloc, fwd_idx_iter_param, fwd_idx_iter))) { + LOG_WARN("failed to create fwd idx iter", K(ret)); + } else { + retrieval_param.inv_idx_scan_iter_ = inv_idx_scan_iter; + retrieval_param.inv_idx_agg_iter_ = inv_idx_agg_iter; + retrieval_param.fwd_idx_iter_ = fwd_idx_iter; + const int64_t inv_idx_iter_cnt = ir_scan_ctdef->need_inv_idx_agg() ? 2 : 1; + const int64_t fwd_idx_iter_cnt = ir_scan_ctdef->need_fwd_idx_agg() ? 1 : 0; + const int64_t tr_children_cnt = inv_idx_iter_cnt + fwd_idx_iter_cnt; + if (taat_mode) { + if (OB_FAIL(create_das_iter(alloc, retrieval_param, retrieval_iter))) { + LOG_WARN("failed to create text retrieval iter", K(ret)); + } + } else { + ObDASTRCacheIter *tr_iter = nullptr; + if (OB_FAIL(create_das_iter(alloc, retrieval_param, tr_iter))) { + LOG_WARN("failed to create text retrieval iter", K(ret)); + } else { + retrieval_iter = tr_iter; + } + } + if (OB_FAIL(ret)) { + // set query_token and range in do_table_scan + } else if (OB_FAIL(create_iter_children_array(tr_children_cnt, alloc, retrieval_iter))) { + LOG_WARN("failed to create iter children array", K(ret)); + } else { + retrieval_iter->get_children()[0] = inv_idx_scan_iter; + if (ir_scan_ctdef->need_inv_idx_agg()) { + retrieval_iter->get_children()[1] = inv_idx_agg_iter; + } + if (ir_scan_ctdef->need_fwd_idx_agg()) { + retrieval_iter->get_children()[2] = fwd_idx_iter; + } + retrieval_iter->set_ls_tablet_ids( + ls_id, + related_tablet_ids.inv_idx_tablet_id_, + related_tablet_ids.fwd_idx_tablet_id_); + if (OB_FAIL(iters.push_back(retrieval_iter))) { + LOG_WARN("failed append retrieval iter to array", K(ret)); + } + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(tr_merge_iter->set_merge_iters(iters))) { + LOG_WARN("failed to set merge iters for text retrieval", K(ret)); + } else if (OB_FAIL(tr_merge_iter->set_related_tablet_ids(ls_id, related_tablet_ids))) { + LOG_WARN("failed to set related tabelt ids", K(ret)); + } else { + ObDASIter **&tr_merge_children = tr_merge_iter->get_children(); + const bool need_do_total_doc_cnt = (ir_scan_ctdef->need_calc_relevance()) && !ir_scan_ctdef->need_estimate_total_doc_cnt(); + const int64_t tr_merge_children_cnt = need_do_total_doc_cnt ? iters.count() + 1 : iters.count(); + if (0 != tr_merge_children_cnt + && OB_FAIL(create_iter_children_array(tr_merge_children_cnt, alloc, tr_merge_iter))) { + LOG_WARN("failed to alloc text retrieval merge iter children", K(ret), K(tr_merge_children_cnt)); + } else { + for (int64_t i = 0; i < iters.count(); ++i) { + tr_merge_children[i] = iters.at(i); + } + if (need_do_total_doc_cnt) { + tr_merge_children[iters.count()] = doc_cnt_agg_iter; + } + tr_merge_iter->set_doc_id_idx_tablet_id(related_tablet_ids.doc_id_idx_tablet_id_); + tr_merge_iter->set_ls_id(ls_id); + retrieval_result = tr_merge_iter; + } + } + } + return ret; +} + int ObDASIterUtils::create_text_retrieval_sub_tree(const ObLSID &ls_id, common::ObIAllocator &alloc, const ObDASIRScanCtDef *ir_scan_ctdef, ObDASIRScanRtDef *ir_scan_rtdef, - const ObDASRelatedTabletID &related_tablet_ids, + const ObDASFTSTabletID &related_tablet_ids, transaction::ObTxDesc *trans_desc, transaction::ObTxReadSnapshot *snapshot, ObDASIter *&retrieval_result) @@ -676,6 +1022,7 @@ int ObDASIterUtils::create_text_retrieval_sub_tree(const ObLSID &ls_id, retrieval_param.ir_rtdef_ = ir_scan_rtdef; retrieval_param.tx_desc_ = trans_desc; retrieval_param.snapshot_ = snapshot; + retrieval_param.need_inv_idx_agg_reset_ = true; ObDASScanIterParam inv_idx_scan_iter_param; ObDASScanIter *inv_idx_scan_iter = nullptr; @@ -683,13 +1030,14 @@ int ObDASIterUtils::create_text_retrieval_sub_tree(const ObLSID &ls_id, inv_idx_scan_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_; ObDASScanIterParam inv_idx_agg_iter_param; ObDASScanIter *inv_idx_agg_iter = nullptr; - init_scan_iter_param(inv_idx_agg_iter_param, ir_scan_ctdef->get_inv_idx_agg_ctdef(), ir_scan_rtdef); - inv_idx_agg_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_; + if (ir_scan_ctdef->need_inv_idx_agg()) { + init_scan_iter_param(inv_idx_agg_iter_param, ir_scan_ctdef->get_inv_idx_agg_ctdef(), ir_scan_rtdef); + } ObDASScanIterParam fwd_idx_iter_param; ObDASScanIter *fwd_idx_iter = nullptr; - init_scan_iter_param(fwd_idx_iter_param, ir_scan_ctdef->get_fwd_idx_agg_ctdef(), ir_scan_rtdef); - fwd_idx_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_; - + if (ir_scan_ctdef->need_fwd_idx_agg()) { + init_scan_iter_param(fwd_idx_iter_param, ir_scan_ctdef->get_fwd_idx_agg_ctdef(), ir_scan_rtdef); + } if (OB_FAIL(create_das_iter(alloc, inv_idx_scan_iter_param, inv_idx_scan_iter))) { LOG_WARN("failed to create inv idx iter", K(ret)); } else if (ir_scan_ctdef->need_inv_idx_agg() @@ -1009,6 +1357,460 @@ int ObDASIterUtils::create_domain_lookup_sub_tree(ObTableScanParam &scan_param, return ret; } +int ObDASIterUtils::create_function_lookup_tree(ObTableScanParam &scan_param, + common::ObIAllocator &alloc, + const ObDASBaseCtDef *attach_ctdef, + ObDASBaseRtDef *attach_rtdef, + const ObDASRelatedTabletID &related_tablet_ids, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot, + ObDASIter *&iter_tree) +{ + int ret = OB_SUCCESS; + const ObDASIndexProjLookupCtDef *idx_proj_lookup_ctdef = nullptr; + ObDASIndexProjLookupRtDef *idx_proj_lookup_rtdef = nullptr; + const ObDASFuncLookupCtDef *func_lookup_ctdef = nullptr; + ObDASFuncLookupRtDef *func_lookup_rtdef = nullptr; + + const ObDASBaseCtDef *rowkey_scan_ctdef = nullptr; + ObDASBaseRtDef *rowkey_scan_rtdef = nullptr; + ObDASIter *rowkey_scan_iter = nullptr; + bool lookup_keep_order = false; + ObTableScanParam *rowkey_scan_param = nullptr; + void *buf = nullptr; + + // for check { + const ExprFixedArray *docid_lookup_rowkey_exprs = nullptr; + const ExprFixedArray *main_lookup_rowkey_exprs =nullptr; + const ExprFixedArray *rowkey_scan_ouput_exprs = nullptr; + // for check } + + if (OB_ISNULL(attach_ctdef) || OB_ISNULL(attach_rtdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to attach def", K(ret), KP(attach_ctdef), KP(attach_rtdef)); + } else if (OB_ISNULL(rowkey_scan_param = OB_NEWx(ObTableScanParam, &alloc))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to new rowkey scan param", K(sizeof(ObTableScanParam)), K(ret)); + } else if (OB_UNLIKELY(attach_ctdef->op_type_ != ObDASOpType::DAS_OP_INDEX_PROJ_LOOKUP)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("unexpected text retrieval root attach def type", K(ret), KPC(attach_ctdef)); + } else { + idx_proj_lookup_ctdef = static_cast(attach_ctdef); + idx_proj_lookup_rtdef = static_cast(attach_rtdef); + func_lookup_ctdef = static_cast(idx_proj_lookup_ctdef->get_lookup_ctdef()); + func_lookup_rtdef = static_cast(idx_proj_lookup_rtdef->get_lookup_rtdef()); + rowkey_scan_ctdef = idx_proj_lookup_ctdef->get_rowkey_scan_ctdef(); + rowkey_scan_rtdef = idx_proj_lookup_rtdef->get_rowkey_scan_rtdef(); + if (OB_ISNULL(func_lookup_ctdef) || OB_ISNULL(func_lookup_rtdef) + || OB_ISNULL(rowkey_scan_ctdef) || OB_ISNULL(rowkey_scan_ctdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to ctdef", K(ret), KP(func_lookup_ctdef)); + } else if (OB_UNLIKELY(rowkey_scan_ctdef->op_type_ != ObDASOpType::DAS_OP_IR_AUX_LOOKUP + && rowkey_scan_ctdef->op_type_ != ObDASOpType::DAS_OP_TABLE_SCAN + && rowkey_scan_ctdef->op_type_ != ObDASOpType::DAS_OP_SORT)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("unexpected rowkey scan type", K(ret), KPC(rowkey_scan_ctdef)); + } + } + + if (OB_FAIL(ret)) { + } else if (ObDASOpType::DAS_OP_IR_AUX_LOOKUP == rowkey_scan_ctdef->op_type_) { + const ObDASIRScanCtDef *ir_scan_ctdef = nullptr; + ObDASIRScanRtDef *ir_scan_rtdef = nullptr; + const ObDASIRAuxLookupCtDef *aux_lookup_ctdef = static_cast(rowkey_scan_ctdef); + rowkey_scan_ouput_exprs = &aux_lookup_ctdef->get_lookup_scan_ctdef()->result_output_; + ObDASIRAuxLookupRtDef *aux_lookup_rtdef = static_cast(rowkey_scan_rtdef); + ObDASLocalLookupIter *doc_id_lookup_iter = nullptr; + ObDASIter *text_retrieval_result = nullptr; + const ObDASSortCtDef *sort_ctdef = nullptr; + ObDASSortRtDef *sort_rtdef = nullptr; + ObDASIter *sort_result = nullptr; + bool taat_mode = false; + + ObDASFTSTabletID fts_tablet_ids; + fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.inv_idx_tablet_id_; + fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fwd_idx_tablet_id_; + fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.doc_id_idx_tablet_id_; + const bool need_rewind = true; + const bool need_distinct = false; + if (OB_FAIL(ObDASUtils::find_target_das_def( + rowkey_scan_ctdef, + rowkey_scan_rtdef, + ObDASOpType::DAS_OP_IR_SCAN, + ir_scan_ctdef, + ir_scan_rtdef))) { + LOG_WARN("fail to find ir scan definition", K(ret)); + } else if (OB_FAIL(create_text_retrieval_sub_tree( + scan_param.ls_id_, + alloc, + ir_scan_ctdef, + ir_scan_rtdef, + fts_tablet_ids, + trans_desc, + snapshot, + text_retrieval_result))) { + LOG_WARN("failed to create text retrieval sub tree", K(ret)); + } else if (FALSE_IT(rowkey_scan_iter = text_retrieval_result)) { + } else if (aux_lookup_ctdef->get_doc_id_scan_ctdef()->op_type_ != ObDASOpType::DAS_OP_SORT) { + // do nothing, just skip + } else if (FALSE_IT(sort_ctdef = static_cast(aux_lookup_ctdef->get_doc_id_scan_ctdef()))) { + } else if (FALSE_IT(sort_rtdef = static_cast(aux_lookup_rtdef->get_doc_id_scan_rtdef()))) { + } else if (OB_FAIL(create_sort_sub_tree( + alloc, sort_ctdef, sort_rtdef, need_rewind, need_distinct, text_retrieval_result, sort_result))) { + LOG_WARN("failed to create sort sub tree", K(ret)); + } else { + rowkey_scan_iter = sort_result; + } + if (OB_FAIL(ret)) { + } else { + ObDASScanIter *docid_rowkey_table_iter = nullptr; + ObDASScanIterParam docid_rowkey_table_param; + const ObDASScanCtDef *lookup_ctdef = static_cast(aux_lookup_ctdef->get_lookup_scan_ctdef()); + ObDASScanRtDef *lookup_rtdef = static_cast(aux_lookup_rtdef->get_lookup_scan_rtdef()); + docid_rowkey_table_param.scan_ctdef_ = lookup_ctdef; + docid_rowkey_table_param.max_size_ = lookup_rtdef->eval_ctx_->is_vectorized() ? lookup_rtdef->eval_ctx_->max_batch_size_ : 1; + docid_rowkey_table_param.eval_ctx_ = lookup_rtdef->eval_ctx_; + docid_rowkey_table_param.exec_ctx_ = &lookup_rtdef->eval_ctx_->exec_ctx_; + docid_rowkey_table_param.output_ = &lookup_ctdef->result_output_; + if (OB_FAIL(create_das_iter(alloc, docid_rowkey_table_param, docid_rowkey_table_iter))) { + LOG_WARN("failed to create doc id table iter", K(ret)); + } else { + ObDASLocalLookupIterParam doc_id_lookup_param; + doc_id_lookup_param.max_size_ = aux_lookup_rtdef->eval_ctx_->is_vectorized() + ? aux_lookup_rtdef->eval_ctx_->max_batch_size_ : 1; + doc_id_lookup_param.eval_ctx_ = aux_lookup_rtdef->eval_ctx_; + doc_id_lookup_param.exec_ctx_ = &aux_lookup_rtdef->eval_ctx_->exec_ctx_; + doc_id_lookup_param.output_ = &aux_lookup_ctdef->result_output_; + doc_id_lookup_param.default_batch_row_count_ = doc_id_lookup_param.max_size_; + doc_id_lookup_param.index_ctdef_ = aux_lookup_ctdef->get_doc_id_scan_ctdef(); + doc_id_lookup_param.index_rtdef_ = aux_lookup_rtdef->get_doc_id_scan_rtdef(); + doc_id_lookup_param.lookup_ctdef_ = aux_lookup_ctdef->get_lookup_scan_ctdef(); + doc_id_lookup_param.lookup_rtdef_ = aux_lookup_rtdef->get_lookup_scan_rtdef(); + doc_id_lookup_param.index_table_iter_ = rowkey_scan_iter; + doc_id_lookup_param.data_table_iter_ = docid_rowkey_table_iter; + doc_id_lookup_param.trans_desc_ = trans_desc; + doc_id_lookup_param.snapshot_ = snapshot; + doc_id_lookup_param.rowkey_exprs_ = &aux_lookup_ctdef->get_lookup_scan_ctdef()->rowkey_exprs_; + ObDASTextRetrievalMergeIter *tr_merge_iter = static_cast(text_retrieval_result); + taat_mode = tr_merge_iter->is_taat_mode(); + if (taat_mode || sort_result) { + doc_id_lookup_param.lookup_rtdef_->scan_flag_.scan_order_ = ObQueryFlag::KeepOrder; + } + if (OB_FAIL(create_das_iter(alloc, doc_id_lookup_param, doc_id_lookup_iter))) { + LOG_WARN("failed to create doc id lookup iter", K(ret)); + } else if (OB_FAIL(create_iter_children_array(2, alloc, doc_id_lookup_iter))) { + LOG_WARN("failed to create iter children array", K(ret)); + } else { + doc_id_lookup_iter->get_children()[0] = rowkey_scan_iter; + doc_id_lookup_iter->get_children()[1] = docid_rowkey_table_iter; + docid_rowkey_table_iter->set_scan_param(doc_id_lookup_iter->get_lookup_param()); + doc_id_lookup_iter->set_tablet_id(related_tablet_ids.doc_id_idx_tablet_id_); + doc_id_lookup_iter->set_ls_id(scan_param.ls_id_); + rowkey_scan_iter = doc_id_lookup_iter; + } + } + } + } else if (ObDASOpType::DAS_OP_TABLE_SCAN == rowkey_scan_ctdef->op_type_) { + ObDASScanIter *scan_iter = nullptr; + ObDASScanIterParam iter_param; + // this code is based on the assumption that scan_param will be not released util this iter is released + const ObDASScanCtDef *ctdef = static_cast(rowkey_scan_ctdef); + ObDASScanRtDef *rtdef = static_cast(rowkey_scan_rtdef); + iter_param.scan_ctdef_ = ctdef; + iter_param.max_size_ = rtdef->eval_ctx_->is_vectorized() ? rtdef->eval_ctx_->max_batch_size_ : 1; + iter_param.eval_ctx_ = rtdef->eval_ctx_; + iter_param.exec_ctx_ = &rtdef->eval_ctx_->exec_ctx_; + iter_param.output_ = &ctdef->result_output_; + if (OB_FAIL(create_das_iter(alloc, iter_param, scan_iter))) { + LOG_WARN("failed to create data table lookup scan iter", K(ret)); + } else if (FALSE_IT(scan_iter->set_scan_param(scan_param))) { + LOG_WARN("failed to init default scan param", K(ret)); + } else { + rowkey_scan_iter = scan_iter; + rowkey_scan_ouput_exprs = &static_cast(rowkey_scan_ctdef)->pd_expr_spec_.access_exprs_; + } + } else if (ObDASOpType::DAS_OP_SORT == rowkey_scan_ctdef->op_type_) { + const ObDASScanCtDef *scan_ctdef = nullptr; + ObDASScanRtDef *scan_rtdef = nullptr; + ObDASScanIterParam iter_param; + const ObDASSortCtDef *sort_ctdef = nullptr; + ObDASSortRtDef *sort_rtdef = nullptr; + ObDASIter *sort_result = nullptr; + ObDASScanIter *scan_iter = nullptr; + const bool need_rewind = true; + const bool need_distinct = false; + if (OB_FAIL(ObDASUtils::find_target_das_def( + rowkey_scan_ctdef, + rowkey_scan_rtdef, + ObDASOpType::DAS_OP_TABLE_SCAN, + scan_ctdef, + scan_rtdef))) { + LOG_WARN("fail to find scan definition", K(ret)); + } else { + const ObDASScanCtDef *ctdef = static_cast(scan_ctdef); + ObDASScanRtDef *rtdef = static_cast(scan_rtdef); + iter_param.scan_ctdef_ = ctdef; + iter_param.max_size_ = rtdef->eval_ctx_->is_vectorized() ? rtdef->eval_ctx_->max_batch_size_ : 1; + iter_param.eval_ctx_ = rtdef->eval_ctx_; + iter_param.exec_ctx_ = &rtdef->eval_ctx_->exec_ctx_; + iter_param.output_ = &ctdef->result_output_; + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(create_das_iter(alloc, iter_param, scan_iter))) { + LOG_WARN("failed to create data table lookup scan iter", K(ret)); + } else if (FALSE_IT(scan_iter->set_scan_param(scan_param))) { + } else if (FALSE_IT(sort_ctdef = static_cast(rowkey_scan_ctdef))) { + } else if (FALSE_IT(sort_rtdef = static_cast(rowkey_scan_rtdef))) { + } else if (OB_FAIL(create_sort_sub_tree( + alloc, sort_ctdef, sort_rtdef, need_rewind, need_distinct, scan_iter, sort_result))) { + LOG_WARN("failed to create sort sub tree", K(ret)); + } else { + rowkey_scan_iter = sort_result; + rowkey_scan_ouput_exprs = &scan_ctdef->pd_expr_spec_.access_exprs_; + } + } + + // check exprs + docid_lookup_rowkey_exprs = &static_cast(func_lookup_ctdef->get_doc_id_lookup_scan_ctdef())->rowkey_exprs_; + bool find = false; + for (int i = 0; OB_SUCC(ret) && i < docid_lookup_rowkey_exprs->count(); i++) { + for (int j = 0; OB_SUCC(ret) && !find && j < rowkey_scan_ouput_exprs->count(); j++) { + if (rowkey_scan_ouput_exprs->at(j) == docid_lookup_rowkey_exprs->at(i)) { + find = true; + } + } + if (OB_UNLIKELY(!find)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, rowkey scan output exprs count not equal to docid lookup rowkey exprs count", K(ret)); + } else { + find = false; + } + } + + if (OB_SUCC(ret) && func_lookup_ctdef->has_main_table_lookup()) { + find = false; + main_lookup_rowkey_exprs = &static_cast(func_lookup_ctdef->get_main_lookup_scan_ctdef())->rowkey_exprs_; + for (int i = 0; OB_SUCC(ret) && i < main_lookup_rowkey_exprs->count(); i++) { + for (int j = 0; OB_SUCC(ret) && !find && j < rowkey_scan_ouput_exprs->count(); j++) { + if (rowkey_scan_ouput_exprs->at(j) == main_lookup_rowkey_exprs->at(i)) { + find = true; + } + } + if (OB_UNLIKELY(!find)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, rowkey scan output exprs count not equal to docid lookup rowkey exprs count", K(ret)); + } else { + find = false; + } + } + } + + ObDASIter *func_lookup_result = nullptr; + ObDASCacheLookupIter *root_lookup_iter = nullptr; + if (FAILEDx(create_functional_lookup_sub_tree( + scan_param, + scan_param.ls_id_, + alloc, + func_lookup_ctdef, + func_lookup_rtdef, + related_tablet_ids, + true, + trans_desc, + snapshot, + func_lookup_result))) { + LOG_WARN("failed to create domain index lookup iters", K(ret)); + } else { + ObDASCacheLookupIterParam root_lookup_param; + root_lookup_param.max_size_ = idx_proj_lookup_rtdef->eval_ctx_->is_vectorized() + ? idx_proj_lookup_rtdef->get_rowkey_scan_rtdef()->eval_ctx_->max_batch_size_ : 1; + root_lookup_param.eval_ctx_ = idx_proj_lookup_rtdef->eval_ctx_; + root_lookup_param.exec_ctx_ = &idx_proj_lookup_rtdef->eval_ctx_->exec_ctx_; + root_lookup_param.output_ = &idx_proj_lookup_ctdef->result_output_; + root_lookup_param.default_batch_row_count_ = root_lookup_param.max_size_; + root_lookup_param.index_ctdef_ = idx_proj_lookup_ctdef->get_rowkey_scan_ctdef(); + root_lookup_param.index_rtdef_ = idx_proj_lookup_rtdef->get_rowkey_scan_rtdef(); + root_lookup_param.lookup_ctdef_ = static_cast(func_lookup_ctdef->get_doc_id_lookup_scan_ctdef()); + root_lookup_param.lookup_rtdef_ = static_cast(func_lookup_rtdef->get_doc_id_lookup_scan_rtdef()); + root_lookup_param.index_table_iter_ = rowkey_scan_iter; + root_lookup_param.data_table_iter_ = func_lookup_result; + root_lookup_param.trans_desc_ = trans_desc; + root_lookup_param.snapshot_ = snapshot; + root_lookup_param.rowkey_exprs_ = &static_cast(func_lookup_ctdef->get_doc_id_lookup_scan_ctdef())->rowkey_exprs_; + root_lookup_param.lookup_rtdef_->scan_flag_.scan_order_ = ObQueryFlag::KeepOrder; + if (OB_FAIL(create_das_iter(alloc, root_lookup_param, root_lookup_iter))) { + LOG_WARN("failed to create das iter", K(ret)); + } else if (OB_FAIL(create_iter_children_array(2, alloc, root_lookup_iter))) { + LOG_WARN("failed to create iter children array", K(ret)); + } else { + root_lookup_iter->get_children()[0] = rowkey_scan_iter; + root_lookup_iter->get_children()[1] = func_lookup_result; + static_cast(func_lookup_result)->set_index_scan_param(root_lookup_iter->get_lookup_param()); + root_lookup_iter->set_tablet_id(related_tablet_ids.rowkey_doc_tablet_id_); + root_lookup_iter->set_ls_id(scan_param.ls_id_); + iter_tree = root_lookup_iter; + } + } + return ret; +} + +int ObDASIterUtils::create_functional_lookup_sub_tree(ObTableScanParam &scan_param, + const ObLSID &ls_id, + common::ObIAllocator &alloc, + const ObDASFuncLookupCtDef *func_lookup_ctdef, + ObDASFuncLookupRtDef *func_lookup_rtdef, + const ObDASRelatedTabletID &related_tablet_ids, + const bool &lookup_keep_order, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot, + ObDASIter *&fun_lookup_result) +{ + int ret = OB_SUCCESS; + void *buf = nullptr; + + ObDASIter **data_table_iters = nullptr; + ObDASScanIter *main_lookup_table_iter = nullptr; + + ObDASFuncDataIter *fts_merge_iter = nullptr; + ObDASScanIter *rowkey_docid_iter = nullptr; + + ObDASFuncLookupIter *func_lookup_iter = nullptr; + + // ObDASCacheLookupIter *root_local_lookup_iter = nullptr; + const int64_t func_lookup_cnt = func_lookup_ctdef->func_lookup_cnt_; + const int64_t total_lookup_cnt = func_lookup_ctdef->has_main_table_lookup() ? func_lookup_cnt + 1 : func_lookup_cnt; + + ObDASFuncDataIterParam fts_merge_iter_param; + if (OB_UNLIKELY(0 == func_lookup_cnt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, func lookup count is 0", K(ret)); + } else if (OB_ISNULL(buf = alloc.alloc(sizeof(ObDASIter *) * func_lookup_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate enough memory", K(sizeof(ObDASIter *) * func_lookup_cnt), K(ret)); + } else { + data_table_iters = static_cast(buf); + for (int64_t i = 0; OB_SUCC(ret) && i < func_lookup_cnt; i++) { + data_table_iters[i] = nullptr; + ObDASFTSTabletID fts_tablet_ids; + fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[i].inv_idx_tablet_id_; + fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[i].fwd_idx_tablet_id_; + fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[i].doc_id_idx_tablet_id_; + if (OB_FAIL(create_functional_text_retrieval_sub_tree(scan_param.ls_id_, + alloc, + static_cast(func_lookup_ctdef->get_func_lookup_scan_ctdef(i)), + static_cast(func_lookup_rtdef->get_func_lookup_scan_rtdef(i)), + fts_tablet_ids, + trans_desc, + snapshot, + data_table_iters[i]))) { + LOG_WARN("failed to create text retrieval sub tree", K(ret)); + } + } + if (OB_SUCC(ret)) { + fts_merge_iter_param.tr_merge_iters_ = data_table_iters; + fts_merge_iter_param.iter_count_ = func_lookup_cnt; + fts_merge_iter_param.trans_desc_ = trans_desc; + fts_merge_iter_param.snapshot_ = snapshot; + if (func_lookup_ctdef->has_main_table_lookup()) { + ObDASScanIterParam main_table_param; + const ObDASScanCtDef *ctdef = static_cast(func_lookup_ctdef->get_main_lookup_scan_ctdef()); + ObDASScanRtDef *rtdef = static_cast(func_lookup_rtdef->get_main_lookup_scan_rtdef()); + if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, ctdef or rtdef is nullptr", K(ret), KPC(ctdef), KPC(rtdef)); + } else if (ObDASOpType::DAS_OP_TABLE_SCAN != ctdef->op_type_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, ctdef is not table scan", K(ret), K(ctdef->op_type_), K(ObDASOpType::DAS_OP_TABLE_SCAN)); + } else { + main_table_param.scan_ctdef_ = ctdef; + main_table_param.max_size_ = rtdef->eval_ctx_->is_vectorized() ? rtdef->eval_ctx_->max_batch_size_ : 1; + main_table_param.eval_ctx_ = rtdef->eval_ctx_; + main_table_param.exec_ctx_ = &rtdef->eval_ctx_->exec_ctx_; + main_table_param.output_ = &ctdef->result_output_; + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(create_das_iter(alloc, main_table_param, main_lookup_table_iter))) { + LOG_WARN("failed to create data table lookup scan iter", K(ret)); + } else { + if (lookup_keep_order) { + rtdef->scan_flag_.scan_order_ = ObQueryFlag::KeepOrder; + } + fts_merge_iter_param.main_lookup_ctdef_ = ctdef; + fts_merge_iter_param.main_lookup_rtdef_ = rtdef; + fts_merge_iter_param.main_lookup_iter_ = main_lookup_table_iter; + } + } + } + } + + // create fts merge iter + if (OB_FAIL(ret)) { + } else if (OB_FAIL(create_das_iter(alloc, fts_merge_iter_param, fts_merge_iter))) { + LOG_WARN("failed to create fts merge iter", K(ret)); + } else if (OB_FAIL(create_iter_children_array(total_lookup_cnt, alloc, fts_merge_iter))) { + LOG_WARN("failed to create iter children array", K(ret)); + } else { + for (int64_t i = 0; i < func_lookup_cnt; ++i) { + fts_merge_iter->get_children()[i] = data_table_iters[i]; + } + if (func_lookup_ctdef->has_main_table_lookup()) { + fts_merge_iter->get_children()[func_lookup_cnt] = main_lookup_table_iter; + main_lookup_table_iter->set_scan_param(fts_merge_iter->get_main_lookup_scan_param()); + } + fts_merge_iter->set_tablet_id(related_tablet_ids.lookup_tablet_id_); // for main_lookup + fts_merge_iter->set_ls_id(ls_id); + } + + // create function lookup iter + if (OB_SUCC(ret)) { + const ObDASBaseCtDef *rowkey_docid_ctdef = func_lookup_ctdef->get_doc_id_lookup_scan_ctdef(); + ObDASBaseRtDef *rowkey_docid_rtdef = func_lookup_rtdef->get_doc_id_lookup_scan_rtdef(); + + ObDASScanIterParam rowkey_docid_param; + const ObDASScanCtDef *ctdef = static_cast(rowkey_docid_ctdef); + ObDASScanRtDef *rtdef = static_cast(rowkey_docid_rtdef); + rowkey_docid_param.scan_ctdef_ = ctdef; + rowkey_docid_param.max_size_ = rtdef->eval_ctx_->is_vectorized() ? rtdef->eval_ctx_->max_batch_size_ : 1; + rowkey_docid_param.eval_ctx_ = rtdef->eval_ctx_; + rowkey_docid_param.exec_ctx_ = &rtdef->eval_ctx_->exec_ctx_; + rowkey_docid_param.output_ = &ctdef->result_output_; + if (OB_FAIL(create_das_iter(alloc, rowkey_docid_param, rowkey_docid_iter))) { + LOG_WARN("failed to create data table lookup scan iter", K(ret)); + } else { + ObDASFuncLookupIterParam func_lookup_param; + func_lookup_param.max_size_ = func_lookup_rtdef->eval_ctx_->is_vectorized() ? func_lookup_rtdef->eval_ctx_->max_batch_size_ : 1; + func_lookup_param.eval_ctx_ = func_lookup_rtdef->eval_ctx_; + func_lookup_param.exec_ctx_ = &func_lookup_rtdef->eval_ctx_->exec_ctx_; + func_lookup_param.output_ = &func_lookup_ctdef->result_output_; + func_lookup_param.default_batch_row_count_ = func_lookup_param.max_size_; + func_lookup_param.index_ctdef_ = rowkey_docid_ctdef; + func_lookup_param.index_rtdef_ = rowkey_docid_rtdef; + func_lookup_param.lookup_ctdef_ = nullptr; + func_lookup_param.lookup_rtdef_ = nullptr; + func_lookup_param.index_table_iter_ = rowkey_docid_iter; + func_lookup_param.data_table_iter_ = fts_merge_iter; + func_lookup_param.trans_desc_ = trans_desc; + func_lookup_param.snapshot_ = snapshot; + func_lookup_param.doc_id_expr_ = func_lookup_ctdef->lookup_doc_id_expr_; + if (lookup_keep_order) { + static_cast(func_lookup_param.index_rtdef_)->scan_flag_.scan_order_ = ObQueryFlag::KeepOrder; + } + + if (OB_FAIL(create_das_iter(alloc, func_lookup_param, func_lookup_iter))) { + LOG_WARN("failed to create doc id lookup iter", K(ret)); + } else if (OB_FAIL(create_iter_children_array(2, alloc, func_lookup_iter))) { + LOG_WARN("failed to create iter children array", K(ret)); + } else { + func_lookup_iter->get_children()[0] = rowkey_docid_iter; + func_lookup_iter->get_children()[1] = fts_merge_iter; + } + } + } + + if (OB_SUCC(ret)) { + fun_lookup_result = func_lookup_iter; + } + return ret; +} /* local_lookup * | | @@ -1660,5 +2462,6 @@ int ObDASIterUtils::create_iter_children_array(const int64_t children_cnt, } return ret; } + } // namespace sql } // namespace oceanbase diff --git a/src/sql/das/iter/ob_das_iter_utils.h b/src/sql/das/iter/ob_das_iter_utils.h index a00a58be7..c9f9f21dc 100644 --- a/src/sql/das/iter/ob_das_iter_utils.h +++ b/src/sql/das/iter/ob_das_iter_utils.h @@ -26,6 +26,9 @@ #include "sql/das/iter/ob_das_doc_id_merge_iter.h" #include "sql/das/iter/ob_das_vid_merge_iter.h" #include "sql/das/iter/ob_das_index_merge_iter.h" +#include "sql/das/iter/ob_das_func_data_iter.h" +#include "sql/das/iter/ob_das_functional_lookup_iter.h" +#include "sql/das/iter/ob_das_cache_lookup_iter.h" #include "sql/engine/table/ob_table_scan_op.h" #include "sql/das/iter/ob_das_mvi_lookup_iter.h" @@ -76,6 +79,11 @@ public: const ObDASRelatedTabletID &related_tablet_ids, const ObLSID &ls_id, ObDASIter *root_iter); + static int set_func_lookup_iter_related_ids(const ObDASBaseCtDef *attach_ctdef, + const ObDASRelatedTabletID &related_tablet_ids, + const ObLSID &ls_id, + int64_t flag, + ObDASIter *root_iter); static int set_index_merge_related_ids(const ObDASBaseCtDef *attach_ctdef, const ObDASRelatedTabletID &related_tablet_ids, @@ -122,6 +130,15 @@ private: transaction::ObTxReadSnapshot *snapshot, ObDASIter *&iter_tree); + static int create_function_lookup_tree(ObTableScanParam &scan_param, + common::ObIAllocator &alloc, + const ObDASBaseCtDef *attach_ctdef, + ObDASBaseRtDef *attach_rtdef, + const ObDASRelatedTabletID &related_tablet_ids, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot, + ObDASIter *&iter_tree); + static int create_doc_id_scan_sub_tree(ObTableScanParam &scan_param, common::ObIAllocator &alloc, const ObDASDocIdMergeCtDef *merge_ctdef, @@ -175,7 +192,16 @@ private: common::ObIAllocator &alloc, const ObDASIRScanCtDef *ir_scan_ctdef, ObDASIRScanRtDef *ir_scan_rtdef, - const ObDASRelatedTabletID &related_tablet_ids, + const ObDASFTSTabletID &related_tablet_ids, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot, + ObDASIter *&retrieval_result); + + static int create_functional_text_retrieval_sub_tree(const ObLSID &ls_id, + common::ObIAllocator &alloc, + const ObDASIRScanCtDef *ir_scan_ctdef, + ObDASIRScanRtDef *ir_scan_rtdef, + const ObDASFTSTabletID &related_tablet_ids, transaction::ObTxDesc *trans_desc, transaction::ObTxReadSnapshot *snapshot, ObDASIter *&retrieval_result); @@ -223,6 +249,16 @@ private: transaction::ObTxDesc *tx_desc, transaction::ObTxReadSnapshot *snapshot, ObDASIter *&iter); + static int create_functional_lookup_sub_tree(ObTableScanParam &scan_param, + const ObLSID &ls_id, + common::ObIAllocator &alloc, + const ObDASFuncLookupCtDef *table_lookup_ctdef, + ObDASFuncLookupRtDef *table_lookup_rtdef, + const ObDASRelatedTabletID &related_tablet_ids, + const bool &lookup_keep_order, + transaction::ObTxDesc *trans_desc, + transaction::ObTxReadSnapshot *snapshot, + ObDASIter *&fun_lookup_result); static int create_iter_children_array(const int64_t children_cnt, common::ObIAllocator &alloc, diff --git a/src/sql/das/iter/ob_das_local_lookup_iter.cpp b/src/sql/das/iter/ob_das_local_lookup_iter.cpp index 3fe957a37..8180db7c2 100644 --- a/src/sql/das/iter/ob_das_local_lookup_iter.cpp +++ b/src/sql/das/iter/ob_das_local_lookup_iter.cpp @@ -15,6 +15,7 @@ #include "sql/das/iter/ob_das_scan_iter.h" #include "sql/das/iter/ob_das_doc_id_merge_iter.h" #include "sql/das/iter/ob_das_vid_merge_iter.h" +#include "sql/das/iter/ob_das_functional_lookup_iter.h" #include "sql/das/ob_das_scan_op.h" #include "sql/das/ob_das_ir_define.h" #include "sql/das/ob_das_vec_define.h" @@ -177,13 +178,17 @@ void ObDASLocalLookupIter::reset_lookup_state() int ObDASLocalLookupIter::add_rowkey() { int ret = OB_SUCCESS; - OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN || data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE - || data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE); + OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN || + data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE || + data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE || + data_table_iter_->get_type() == DAS_ITER_FUNC_LOOKUP); ObDASScanIter *scan_iter = nullptr; if (data_table_iter_->get_type() == DAS_ITER_SCAN) { scan_iter = static_cast(data_table_iter_); } else if (data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE) { scan_iter = static_cast(data_table_iter_)->get_data_table_iter(); + } else if (data_table_iter_->get_type() == DAS_ITER_FUNC_LOOKUP) { + scan_iter = static_cast(data_table_iter_)->get_index_scan_iter(); } else if (data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE) { scan_iter = static_cast(data_table_iter_)->get_data_table_iter(); } @@ -207,6 +212,8 @@ int ObDASLocalLookupIter::add_rowkey() LOG_WARN("failed to push back trans info array", K(ret), KPC(datum_ptr)); } } + } else if (DAS_ITER_FUNC_LOOKUP == data_table_iter_->get_type()) { + group_id = static_cast(data_table_iter_)->get_group_id(); } int64_t group_idx = ObNewRange::get_group_idx(group_id); @@ -256,8 +263,10 @@ int ObDASLocalLookupIter::add_rowkeys(int64_t count) int ObDASLocalLookupIter::do_index_lookup() { int ret = OB_SUCCESS; - OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN || data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE - || data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE); + OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN || + data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE || + data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE || + data_table_iter_->get_type() == DAS_ITER_FUNC_LOOKUP); if (is_first_lookup_) { is_first_lookup_ = false; if (OB_FAIL(init_scan_param(lookup_param_, lookup_ctdef_, lookup_rtdef_))) { @@ -285,13 +294,17 @@ int ObDASLocalLookupIter::do_index_lookup() int ObDASLocalLookupIter::check_index_lookup() { int ret = OB_SUCCESS; - OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN || data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE - || data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE); + OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN || + data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE || + data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE || + data_table_iter_->get_type() == DAS_ITER_FUNC_LOOKUP); ObDASScanIter *scan_iter = nullptr; if (data_table_iter_->get_type() == DAS_ITER_SCAN) { scan_iter = static_cast(data_table_iter_); } else if (data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE) { scan_iter = static_cast(data_table_iter_)->get_data_table_iter(); + } else if (data_table_iter_->get_type() == DAS_ITER_FUNC_LOOKUP) { + scan_iter = static_cast(data_table_iter_)->get_index_scan_iter(); } else { scan_iter = static_cast(data_table_iter_)->get_data_table_iter(); } @@ -336,8 +349,10 @@ int ObDASLocalLookupIter::check_index_lookup() int ObDASLocalLookupIter::init_rowkey_exprs_for_compat() { int ret = OB_SUCCESS; - if (ObDASOpType::DAS_OP_TABLE_SCAN == index_ctdef_->op_type_ - || ObDASOpType::DAS_OP_IR_AUX_LOOKUP == index_ctdef_->op_type_) { + if (ObDASOpType::DAS_OP_IR_AUX_LOOKUP == index_ctdef_->op_type_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected ir aux lookup iter", K(ret)); + } else if (ObDASOpType::DAS_OP_TABLE_SCAN == index_ctdef_->op_type_) { const ObDASScanCtDef *scan_ctdef = static_cast(index_ctdef_); int64_t rowkey_cnt = scan_ctdef->result_output_.count(); if (nullptr != scan_ctdef->group_id_expr_) { diff --git a/src/sql/das/iter/ob_das_local_lookup_iter.h b/src/sql/das/iter/ob_das_local_lookup_iter.h index 26c555898..ab27a4fe8 100644 --- a/src/sql/das/iter/ob_das_local_lookup_iter.h +++ b/src/sql/das/iter/ob_das_local_lookup_iter.h @@ -40,6 +40,7 @@ public: class ObDASScanCtDef; class ObDASScanRtDef; +class ObDASFuncLookupIter; class ObDASLocalLookupIter : public ObDASLookupIter { public: @@ -66,16 +67,16 @@ protected: virtual int inner_release() override; virtual int do_table_scan() override; virtual int rescan() override; - virtual void reset_lookup_state(); + virtual void reset_lookup_state() override; virtual int add_rowkey() override; virtual int add_rowkeys(int64_t count) override; virtual int do_index_lookup() override; virtual int check_index_lookup() override; -private: +protected: int init_rowkey_exprs_for_compat(); -private: +protected: ObSEArray trans_info_array_; // Local lookup das task could rescan multiple times during execution, lookup_tablet_id_ and // lookup_ls_id_ store the lookup parameter for this time. diff --git a/src/sql/das/iter/ob_das_lookup_iter.h b/src/sql/das/iter/ob_das_lookup_iter.h index ab207d5ab..5002e617d 100644 --- a/src/sql/das/iter/ob_das_lookup_iter.h +++ b/src/sql/das/iter/ob_das_lookup_iter.h @@ -120,8 +120,6 @@ protected: int build_lookup_range(ObNewRange &range); int build_trans_info_datum(const ObExpr *trans_info_expr, ObDatum *&datum_ptr); common::ObArenaAllocator &get_arena_allocator() { return lookup_memctx_->get_arena_allocator(); } - -private: lib::MemoryContext lookup_memctx_; }; diff --git a/src/sql/das/iter/ob_das_text_retrieval_iter.cpp b/src/sql/das/iter/ob_das_text_retrieval_iter.cpp index 9201cd6f9..38125e726 100644 --- a/src/sql/das/iter/ob_das_text_retrieval_iter.cpp +++ b/src/sql/das/iter/ob_das_text_retrieval_iter.cpp @@ -47,6 +47,7 @@ ObDASTextRetrievalIter::ObDASTextRetrievalIter() need_fwd_idx_agg_(false), need_inv_idx_agg_(false), inv_idx_agg_evaluated_(false), + need_inv_idx_agg_reset_(false), not_first_fwd_agg_(false), is_inited_(false) { @@ -56,20 +57,104 @@ int ObDASTextRetrievalIter::set_query_token(const ObString &query_token) { int ret = OB_SUCCESS; ObNewRange inv_idx_scan_range; + if (OB_FAIL(check_inv_idx_scan_and_agg_param())) { + LOG_WARN("failed to check inv idx scan or agg param", K(ret)); + } else { + const ExprFixedArray *exprs = &(ir_ctdef_->get_inv_idx_scan_ctdef()->pd_expr_spec_.access_exprs_); + int64 group_id = 0; + for (int64_t i = 0; i < exprs->count(); ++i) { + if (T_PSEUDO_GROUP_ID == exprs->at(i)->type_) { + group_id = exprs->at(i)->locate_expr_datum(*eval_ctx_).get_int(); + } + } + int64_t group_idx = ObNewRange::get_group_idx(group_id); + if (OB_FAIL(gen_default_inv_idx_scan_range(query_token, inv_idx_scan_range))) { + LOG_WARN("failed to generate inverted index scan range", K(ret), K(query_token)); + } else if (need_inv_idx_agg_ && OB_FAIL(add_agg_rang_key(inv_idx_scan_range))) { + LOG_WARN("failed to add scan range for inv idx agg", K(ret)); + } else if (FALSE_IT(inv_idx_scan_range.group_idx_ = group_idx)) { + } else if (OB_FAIL(add_rowkey_range_key(inv_idx_scan_range))) { + LOG_WARN("failed to add scan range for inv idx scan", K(ret)); + } + } + + return ret; +} + +int ObDASTextRetrievalIter::set_query_token_and_rangekey(const ObString &query_token, const common::ObIArray &doc_id, const int64_t &batch_size) +{ + int ret = OB_SUCCESS; + ObNewRange inv_idx_scan_range; + ObNewRange inv_idx_agg_scan_range; + if (OB_FAIL(check_inv_idx_scan_and_agg_param())) { + LOG_WARN("failed to check inv idx scan or agg param", K(ret)); + } else { + const ExprFixedArray *exprs = &(ir_ctdef_->get_inv_idx_scan_ctdef()->pd_expr_spec_.access_exprs_); + int64 group_id = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < exprs->count(); ++i) { + if (T_PSEUDO_GROUP_ID == exprs->at(i)->type_) { + group_id = exprs->at(i)->locate_expr_datum(*eval_ctx_).get_int(); + } + } + int64_t group_idx = ObNewRange::get_group_idx(group_id); + for (int64_t i = 0; OB_SUCC(ret) && i < batch_size; ++i) { + if (OB_FAIL(gen_inv_idx_scan_range(query_token, doc_id.at(i), inv_idx_scan_range))) { + LOG_WARN("failed to build inverted index scan range", K(ret), K(query_token), K(doc_id.at(i))); + } else if (FALSE_IT(inv_idx_scan_range.group_idx_ = group_idx)) { + } else if (OB_FAIL(add_rowkey_range_key(inv_idx_scan_range))) { + LOG_WARN("failed to add scan range for inv idx scan", K(ret)); + } + } + if (OB_SUCC(ret) && need_inv_idx_agg_ && (!inv_idx_agg_evaluated_ || need_inv_idx_agg_reset_)) { + if (OB_FAIL(gen_default_inv_idx_scan_range(query_token, inv_idx_agg_scan_range))) { + LOG_WARN("failed to generate inverted index scan range", K(ret), K(query_token)); + } else if (OB_FAIL(add_agg_rang_key(inv_idx_agg_scan_range))) { + LOG_WARN("failed to add scan range for inv idx agg", K(ret)); + } + } + } + return ret; +} + +int ObDASTextRetrievalIter::check_inv_idx_scan_and_agg_param() +{ + int ret = OB_SUCCESS; if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("text retrieval iter not inited", K(ret)); - } else if (OB_UNLIKELY(!inv_idx_scan_param_.key_ranges_.empty() || - (need_inv_idx_agg_ && !inv_idx_agg_param_.key_ranges_.empty()))) { + } else if (OB_UNLIKELY(!need_inv_idx_agg_reset_ && need_fwd_idx_agg_)) { + // TODO: try to support the case @zyx439997 ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected set query token with not null query range", K(ret), K(query_token), - K(inv_idx_scan_param_.key_ranges_), K_(need_inv_idx_agg), K(inv_idx_agg_param_.key_ranges_)); - } else if (OB_FAIL(gen_inv_idx_scan_range(query_token, inv_idx_scan_range))) { - LOG_WARN("failed to generate inverted index scan range", K(ret), K(query_token)); - } else if (OB_FAIL(inv_idx_scan_param_.key_ranges_.push_back(inv_idx_scan_range))) { - LOG_WARN("failed to add scan range for inv idx scan", K(ret)); - } else if (need_inv_idx_agg_ && OB_FAIL(inv_idx_agg_param_.key_ranges_.push_back(inv_idx_scan_range))) { - LOG_WARN("failed to add scan range for inv idx agg", K(ret)); + LOG_WARN("unexpected empty query range", K(ret), K(inv_idx_scan_param_.key_ranges_)); + } else if (OB_UNLIKELY(!inv_idx_scan_param_.key_ranges_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected empty query range", K(ret), K(inv_idx_scan_param_.key_ranges_)); + } else if (need_inv_idx_agg_) { + if (OB_UNLIKELY(!inv_idx_agg_param_.key_ranges_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected empty query range", K(ret), K_(need_inv_idx_agg), K_(inv_idx_agg_evaluated), K(inv_idx_agg_param_.key_ranges_)); + } + } + return ret; +} + +int ObDASTextRetrievalIter::add_agg_rang_key(const ObNewRange &range) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!need_inv_idx_agg_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected empty query range", K(ret), KPC(&range)); + } else if (OB_FAIL(inv_idx_agg_param_.key_ranges_.push_back(range))) { + LOG_WARN("failed to push back lookup range", K(ret)); + } + return ret; +} + +int ObDASTextRetrievalIter::add_rowkey_range_key(const ObNewRange &range) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(inv_idx_scan_param_.key_ranges_.push_back(range))) { + LOG_WARN("failed to push back lookup range", K(ret)); } return ret; } @@ -92,6 +177,7 @@ int ObDASTextRetrievalIter::inner_init(ObDASIterParam ¶m) snapshot_ = retrieval_param.snapshot_; need_fwd_idx_agg_ = ir_ctdef_->need_fwd_idx_agg(); need_inv_idx_agg_ = ir_ctdef_->need_inv_idx_agg(); + need_inv_idx_agg_reset_ = retrieval_param.need_inv_idx_agg_reset_; max_batch_size_ = ir_rtdef_->eval_ctx_->max_batch_size_; if (need_inv_idx_agg_) { @@ -134,8 +220,7 @@ int ObDASTextRetrievalIter::inner_reuse() if (nullptr != mem_context_) { mem_context_->reset_remain_one_page(); } - inv_idx_agg_evaluated_ = false; - token_doc_cnt_ = 0; + int64_t old_default_size = OB_MAX(max_batch_size_, 1); max_batch_size_ = ir_rtdef_->eval_ctx_->max_batch_size_; if (old_default_size < OB_MAX(max_batch_size_, 1)) { @@ -168,9 +253,18 @@ int ObDASTextRetrievalIter::inner_reuse() if (!inv_idx_agg_param_.key_ranges_.empty()) { inv_idx_agg_param_.key_ranges_.reuse(); } - if (OB_FAIL(inverted_idx_agg_iter_->reuse())) { - LOG_WARN("failed to reuse inverted index agg iter", K(ret)); + if (!inv_idx_agg_evaluated_ || + need_inv_idx_agg_reset_ || + inv_idx_agg_param_.need_switch_param_) { + if (OB_FAIL(inverted_idx_agg_iter_->reuse())) { + LOG_WARN("failed to reuse inverted index agg iter", K(ret)); + } + inv_idx_agg_evaluated_ = false; + token_doc_cnt_ = 0; } + } else { + inv_idx_agg_evaluated_ = false; + token_doc_cnt_ = 0; } if (OB_SUCC(ret) && need_fwd_idx_agg_) { @@ -221,6 +315,7 @@ int ObDASTextRetrievalIter::inner_release() need_fwd_idx_agg_ = false; need_inv_idx_agg_ = false; inv_idx_agg_evaluated_ = false; + need_inv_idx_agg_reset_ = false; not_first_fwd_agg_ = false; is_inited_ = false; return ret; @@ -256,7 +351,9 @@ int ObDASTextRetrievalIter::rescan() } if (OB_FAIL(inverted_idx_scan_iter_->rescan())) { LOG_WARN("failed to rescan inverted scan iter", K(ret)); - } else if (need_inv_idx_agg_ && OB_FAIL(inverted_idx_agg_iter_->rescan())) { + } else if (need_inv_idx_agg_ && + !inv_idx_agg_evaluated_ && + OB_FAIL(inverted_idx_agg_iter_->rescan())) { LOG_WARN("failed to rescan inverted index agg iter", K(ret)); } else { int64_t cnt = inv_idx_scan_param_.output_exprs_->count(); @@ -317,6 +414,8 @@ int ObDASTextRetrievalIter::inner_get_next_rows(int64_t &count, int64_t capacity if (OB_FAIL(do_doc_cnt_agg())) { if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("Fail to do document count aggregation", K(ret), K_(inv_idx_agg_param)); + } else { + inv_idx_agg_evaluated_ = true; } } else { inv_idx_agg_evaluated_ = true; @@ -354,31 +453,6 @@ int ObDASTextRetrievalIter::inner_get_next_rows(int64_t &count, int64_t capacity return ret; } -int ObDASTextRetrievalIter::get_next_row_inner() -{ - int ret = OB_SUCCESS; - if (OB_FAIL(inverted_idx_scan_iter_->get_next_row())) { - if (OB_UNLIKELY(OB_ITER_END != ret)) { - LOG_WARN("failed to get next row from inverted index", K(ret), K_(inv_idx_scan_param), KPC_(inverted_idx_scan_iter)); - } - } else { - LOG_DEBUG("get one invert index scan row", "row", - ROWEXPR2STR(*ir_rtdef_->get_inv_idx_scan_rtdef()->eval_ctx_, - *inv_idx_scan_param_.output_exprs_)); - if (ir_ctdef_->need_calc_relevance()) { - clear_row_wise_evaluated_flag(); - if (OB_FAIL(get_next_doc_token_cnt(need_fwd_idx_agg_))) { - LOG_WARN("failed to get next doc token count", K(ret)); - } else if (OB_FAIL(fill_token_doc_cnt())) { - LOG_WARN("failed to get token doc cnt", K(ret)); - } else if (OB_FAIL(project_relevance_expr())) { - LOG_WARN("failed to evaluate simarity expr", K(ret)); - } - } - } - return ret; -} - int ObDASTextRetrievalIter::init_inv_idx_scan_param() { int ret = OB_SUCCESS; @@ -402,6 +476,9 @@ int ObDASTextRetrievalIter::init_inv_idx_scan_param() inv_idx_agg_param_))) { LOG_WARN("fail to init inverted index count aggregate param", K(ret), KPC_(ir_ctdef)); } else { + // for some cases, the default scan_order_ may be the 'Reverse'. + inv_idx_scan_param_.scan_flag_.scan_order_ = ObQueryFlag::Forward; + if (OB_UNLIKELY(!static_cast( ir_ctdef_->get_inv_idx_agg_ctdef()->pd_expr_spec_.pd_storage_flag_).is_aggregate_pushdown())) { ret = OB_NOT_IMPLEMENT; @@ -516,6 +593,8 @@ int ObDASTextRetrievalIter::do_doc_cnt_agg() const sql::ObExpr *inv_idx_agg_expr = inv_idx_agg_param_.aggregate_exprs_->at(0); sql::ObEvalCtx *eval_ctx = ir_rtdef_->get_inv_idx_agg_rtdef()->eval_ctx_; ObDatum *doc_cnt_datum = nullptr; + ObEvalCtx::BatchInfoScopeGuard guard(*eval_ctx); + guard.set_batch_idx(0); if (OB_FAIL(inv_idx_agg_expr->eval(*eval_ctx, doc_cnt_datum))) { LOG_WARN("failed to evaluate aggregated expr", K(ret)); } else { @@ -624,7 +703,7 @@ int ObDASTextRetrievalIter::reuse_fwd_idx_iter() return ret; } -int ObDASTextRetrievalIter::gen_inv_idx_scan_range(const ObString &query_token, ObNewRange &scan_range) +int ObDASTextRetrievalIter::gen_default_inv_idx_scan_range(const ObString &query_token, ObNewRange &scan_range) { int ret = OB_SUCCESS; void *buf = nullptr; @@ -661,6 +740,37 @@ int ObDASTextRetrievalIter::gen_inv_idx_scan_range(const ObString &query_token, return ret; } +int ObDASTextRetrievalIter::gen_inv_idx_scan_range(const ObString &query_token, const ObDocId &doc_id, ObNewRange &scan_range) +{ + int ret = OB_SUCCESS; + void *buf = nullptr; + ObObj *obj_ptr = nullptr; + common::ObArenaAllocator &ctx_alloc = mem_context_->get_arena_allocator(); + constexpr int64_t obj_cnt = INV_IDX_ROWKEY_COL_CNT; + ObObj tmp_obj; + tmp_obj.set_string(ObVarcharType, query_token); + // We need to ensure collation type / level between query text and token column is compatible + tmp_obj.set_meta_type(ir_ctdef_->search_text_->obj_meta_); + + if (OB_ISNULL(buf = ctx_alloc.alloc(sizeof(ObObj) * obj_cnt))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for rowkey obj", K(ret)); + } else if (OB_ISNULL(obj_ptr = new (buf) ObObj[obj_cnt])) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret)); + } else if (OB_FAIL(ob_write_obj(ctx_alloc, tmp_obj, obj_ptr[0]))) { + LOG_WARN("failed to write obj", K(ret)); + } else { + obj_ptr[1].set_varbinary(doc_id.get_string()); + ObRowkey row_key(obj_ptr, obj_cnt); + common::ObTableID inv_table_id = ir_ctdef_->get_inv_idx_scan_ctdef()->ref_table_id_; + if (OB_FAIL(scan_range.build_range(inv_table_id, row_key))) { + LOG_WARN("failed to build lookup range", K(ret), K(inv_table_id), K(row_key)); + } + } + return ret; +} + int ObDASTextRetrievalIter::gen_fwd_idx_scan_range(const ObDocId &doc_id, ObNewRange &scan_range) { int ret = OB_SUCCESS; @@ -768,6 +878,8 @@ int ObDASTextRetrievalIter::fill_token_doc_cnt() ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null expr", K(ret), KP(inv_idx_agg_expr), KP(eval_ctx)); } else { + ObEvalCtx::BatchInfoScopeGuard guard(*eval_ctx); + guard.set_batch_idx(0); ObDatum &doc_cnt_datum = inv_idx_agg_expr->locate_datum_for_write(*eval_ctx); doc_cnt_datum.set_int(token_doc_cnt_); } @@ -946,6 +1058,8 @@ int ObDASTRCacheIter::inner_get_next_rows(int64_t &count, int64_t capacity) if (OB_FAIL(do_doc_cnt_agg())) { if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("Fail to do document count aggregation", K(ret), K_(inv_idx_agg_param)); + } else { + inv_idx_agg_evaluated_ = true; } } else { inv_idx_agg_evaluated_ = true; diff --git a/src/sql/das/iter/ob_das_text_retrieval_iter.h b/src/sql/das/iter/ob_das_text_retrieval_iter.h index 2ed491685..c14d5681b 100644 --- a/src/sql/das/iter/ob_das_text_retrieval_iter.h +++ b/src/sql/das/iter/ob_das_text_retrieval_iter.h @@ -34,7 +34,8 @@ public: inv_idx_agg_iter_(nullptr), fwd_idx_iter_(nullptr), tx_desc_(nullptr), - snapshot_(nullptr) + snapshot_(nullptr), + need_inv_idx_agg_reset_(true) {} virtual bool is_valid() const override @@ -49,6 +50,7 @@ public: ObDASIter *fwd_idx_iter_; transaction::ObTxDesc *tx_desc_; transaction::ObTxReadSnapshot *snapshot_; + bool need_inv_idx_agg_reset_; }; // single token @@ -61,6 +63,7 @@ public: virtual int rescan() override; int set_query_token(const ObString &query_token); + int set_query_token_and_rangekey(const ObString &query_token, const common::ObIArray &doc_id, const int64_t &batch_size); void set_ls_tablet_ids( const share::ObLSID &ls_id, const ObTabletID &inv_tablet_id, @@ -100,7 +103,9 @@ protected: int project_relevance_expr(); int batch_project_relevance_expr(const int64_t &count); int reuse_fwd_idx_iter(); - int gen_inv_idx_scan_range(const ObString &query_token, ObNewRange &scan_range); + int gen_default_inv_idx_scan_range(const ObString &query_token, ObNewRange &scan_range); + int gen_inv_idx_scan_range(const ObString &query_token, const ObDocId &doc_id, ObNewRange &scan_range); + int gen_fwd_idx_scan_range(const ObDocId &doc_id, ObNewRange &scan_range); inline bool need_calc_relevance() { return true; } // TODO: reduce tsc ops if no need to calc relevance int init_calc_exprs(); @@ -118,6 +123,10 @@ protected: } return ret; } + + int add_rowkey_range_key(const ObNewRange &range); + int add_agg_rang_key(const ObNewRange &range); + int check_inv_idx_scan_and_agg_param(); protected: static const int64_t FWD_IDX_ROWKEY_COL_CNT = 2; static const int64_t INV_IDX_ROWKEY_COL_CNT = 2; @@ -146,6 +155,7 @@ protected: bool need_fwd_idx_agg_; bool need_inv_idx_agg_; bool inv_idx_agg_evaluated_; + bool need_inv_idx_agg_reset_; bool not_first_fwd_agg_; bool is_inited_; }; diff --git a/src/sql/das/iter/ob_das_text_retrieval_merge_iter.cpp b/src/sql/das/iter/ob_das_text_retrieval_merge_iter.cpp index 2b414837b..7c59f4a18 100644 --- a/src/sql/das/iter/ob_das_text_retrieval_merge_iter.cpp +++ b/src/sql/das/iter/ob_das_text_retrieval_merge_iter.cpp @@ -94,6 +94,7 @@ ObDASTextRetrievalMergeIter::ObDASTextRetrievalMergeIter() limit_param_(), input_row_cnt_(0), output_row_cnt_(0), + force_return_docid_(false), doc_cnt_calculated_(false), doc_cnt_iter_acquired_(false), is_inited_(false) @@ -115,24 +116,20 @@ int ObDASTextRetrievalMergeIter::rescan() { int ret = OB_SUCCESS; if (0 == query_tokens_.count()) { - } else if (nullptr != whole_doc_cnt_iter_ && OB_FAIL(whole_doc_cnt_iter_->rescan())) { + } else if (nullptr != whole_doc_cnt_iter_ && + (!force_return_docid_ || whole_doc_agg_param_.need_switch_param_) && + OB_FAIL(whole_doc_cnt_iter_->rescan())) { // for force_return_docid_ mdoe, we just read the cnt once. LOG_WARN("failed to rescan doc count iter", K(ret)); } else { next_written_idx_ = 0; limit_param_ = ir_rtdef_->get_inv_idx_scan_rtdef()->limit_param_; - int64_t size = ir_ctdef_->inv_scan_doc_id_col_->is_batch_result() ? ir_rtdef_->eval_ctx_->max_batch_size_ : 1; - if (OB_FAIL(cache_doc_ids_.init(size))) { - LOG_WARN("failed to init cache_doc_ids_ array", K(ret)); - } else if (OB_FAIL(cache_doc_ids_.prepare_allocate(size))) { - LOG_WARN("failed to prepare allocate cache_doc_ids_ array", K(ret)); - } } return ret; } int ObDASTextRetrievalMergeIter::set_related_tablet_ids( const ObLSID &ls_id, - const ObDASRelatedTabletID &related_tablet_ids) + const ObDASFTSTabletID &related_tablet_ids) { int ret = OB_SUCCESS; ls_id_ = ls_id; @@ -256,6 +253,7 @@ int ObDASTextRetrievalMergeIter::inner_init(ObDASIterParam ¶m) snapshot_ = retrieval_param.snapshot_; relation_type_ = TokenRelationType::DISJUNCTIVE; + force_return_docid_ = retrieval_param.force_return_docid_; // from param if (OB_ISNULL(mem_context_)) { lib::ContextParam param; @@ -280,6 +278,25 @@ int ObDASTextRetrievalMergeIter::inner_init(ObDASIterParam ¶m) } else { limit_param_ = ir_rtdef_->get_inv_idx_scan_rtdef()->limit_param_; } + if (OB_FAIL(ret)) { + } else if (force_return_docid_) { + if (FALSE_IT(hints_.set_allocator(&mem_context_->get_arena_allocator()))) { + } else if (FALSE_IT(relevances_.set_allocator(&mem_context_->get_arena_allocator()))) { + } else if (FALSE_IT(reverse_hints_.set_allocator(&mem_context_->get_arena_allocator()))) { + } else if (OB_FAIL(hints_.init(size))) { + LOG_WARN("failed to init hints array", K(ret)); + } else if (OB_FAIL(hints_.prepare_allocate(size))) { + LOG_WARN("failed to prepare allocate hints array", K(ret)); + } else if (OB_FAIL(relevances_.init(size))) { + LOG_WARN("failed to init relevances array", K(ret)); + } else if (OB_FAIL(relevances_.prepare_allocate(size))) { + LOG_WARN("failed to prepare allocate relevances array", K(ret)); + } else if (OB_FAIL(reverse_hints_.init(size))) { + LOG_WARN("failed to init hints array", K(ret)); + } else if (OB_FAIL(reverse_hints_.prepare_allocate(size))) { + LOG_WARN("failed to prepare allocate hints array", K(ret)); + } + } } LOG_DEBUG("init text retrieval op", K(ret), KPC_(ir_ctdef), KPC_(ir_rtdef)); @@ -290,20 +307,55 @@ int ObDASTextRetrievalMergeIter::inner_init(ObDASIterParam ¶m) int ObDASTextRetrievalMergeIter::inner_reuse() { int ret = OB_SUCCESS; - cache_doc_ids_.reuse(); + int64_t size = ir_ctdef_->inv_scan_doc_id_col_->is_batch_result() ? ir_rtdef_->eval_ctx_->max_batch_size_ : 1; + if (0 == token_iters_.count()) { + // do nothing + } else if (size <= cache_doc_ids_.count()) { + // do nothing + } else { + cache_doc_ids_.reuse(); + hints_.reuse(); + relevances_.reuse(); + reverse_hints_.reuse(); + if (OB_FAIL(cache_doc_ids_.init(size))) { + LOG_WARN("failed to init cache_doc_ids_ array", K(ret)); + } else if (OB_FAIL(cache_doc_ids_.prepare_allocate(size))) { + LOG_WARN("failed to prepare allocate cache_doc_ids_ array", K(ret)); + } else if (force_return_docid_) { + if (OB_FAIL(hints_.init(size))) { + LOG_WARN("failed to init hints array", K(ret)); + } else if (OB_FAIL(hints_.prepare_allocate(size))) { + LOG_WARN("failed to prepare allocate hints array", K(ret)); + } else if (OB_FAIL(relevances_.init(size))) { + LOG_WARN("failed to init relevances array", K(ret)); + } else if (OB_FAIL(relevances_.prepare_allocate(size))) { + LOG_WARN("failed to prepare allocate relevances array", K(ret)); + } else if (OB_FAIL(reverse_hints_.init(size))) { + LOG_WARN("failed to init relevances array", K(ret)); + } else if (OB_FAIL(reverse_hints_.prepare_allocate(size))) { + LOG_WARN("failed to prepare allocate relevances array", K(ret)); + } + } + } next_written_idx_ = 0; - doc_cnt_calculated_ = false; + if (!force_return_docid_) { + doc_cnt_calculated_ = false; + } input_row_cnt_ = 0; output_row_cnt_ = 0; const ObTabletID &old_doc_id_tablet_id = whole_doc_agg_param_.tablet_id_; whole_doc_agg_param_.need_switch_param_ = whole_doc_agg_param_.need_switch_param_ || ((old_doc_id_tablet_id.is_valid() && old_doc_id_tablet_id != doc_id_idx_tablet_id_) ? true : false); - if (nullptr != whole_doc_cnt_iter_) { - whole_doc_cnt_iter_->set_scan_param(whole_doc_agg_param_); - if (OB_FAIL(whole_doc_cnt_iter_->reuse())) { - LOG_WARN("failed to reuse whole doc cnt iter", K(ret)); + if (!force_return_docid_ || whole_doc_agg_param_.need_switch_param_) { + if (nullptr != whole_doc_cnt_iter_) { + whole_doc_cnt_iter_->set_scan_param(whole_doc_agg_param_); + if (OB_FAIL(whole_doc_cnt_iter_->reuse())) { + LOG_WARN("failed to reuse whole doc cnt iter", K(ret)); + } } + doc_cnt_calculated_ = false; } + for (int64_t i = 0; OB_SUCC(ret) && i < token_iters_.count(); ++i) { if (OB_FAIL(token_iters_.at(i)->reuse())) { LOG_WARN("failed to reuse token iters", K(ret)); @@ -322,6 +374,9 @@ int ObDASTextRetrievalMergeIter::inner_release() whole_doc_cnt_iter_ = nullptr; token_iters_.reset(); cache_doc_ids_.reset(); + hints_.reset(); + relevances_.reset(); + reverse_hints_.reset(); if (nullptr != mem_context_) { mem_context_->reset_remain_one_page(); DESTROY_CONTEXT(mem_context_); @@ -332,6 +387,7 @@ int ObDASTextRetrievalMergeIter::inner_release() output_row_cnt_ = 0; limit_param_.offset_ = 0; limit_param_.limit_ = -1; + force_return_docid_ = false; doc_cnt_calculated_ = false; doc_cnt_iter_acquired_ = false; is_inited_ = false; @@ -352,6 +408,42 @@ int ObDASTextRetrievalMergeIter::inner_get_next_rows(int64_t &count, int64_t cap return ret; } +int ObDASTextRetrievalMergeIter::set_rangkey_and_selector(const common::ObIArray> &virtual_rangkeys) +{ + int ret = OB_SUCCESS; + rangekey_size_ = virtual_rangkeys.count(); + if (OB_UNLIKELY(!force_return_docid_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected mode", K(ret)); + } else if (rangekey_size_ > OB_MAX(ir_rtdef_->eval_ctx_->max_batch_size_, 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected rangekey size", K(ret), K_(rangekey_size)); + } else if (0 != token_iters_.count()) { + int64_t max_size = ir_ctdef_->inv_scan_doc_id_col_->is_batch_result() ? ir_rtdef_->eval_ctx_->max_batch_size_ : 1; + if (rangekey_size_ > cache_doc_ids_.count() || rangekey_size_ > max_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected size", K(ret), K(rangekey_size_), K(cache_doc_ids_.count())); + } + for (int64_t i = 0; OB_SUCC(ret) && i < virtual_rangkeys.count(); ++i) { + cache_doc_ids_[i].from_string(virtual_rangkeys.at(i).first.get_string()); + hints_[i] = virtual_rangkeys.at(i).second; + relevances_[i] = 0.0; + if (virtual_rangkeys.at(i).second >= max_size) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected size", K(ret), K(virtual_rangkeys.at(i).second), K(max_size)); + } else { + reverse_hints_[virtual_rangkeys.at(i).second] = i; + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < token_iters_.count(); ++i) { + if (OB_FAIL(token_iters_.at(i)->set_query_token_and_rangekey(query_tokens_.at(i), cache_doc_ids_, rangekey_size_))) { + LOG_WARN("failed to set token and rangekey", K(ret), K_(rangekey_size)); + } + } + } + return ret; +} + int ObDASTextRetrievalMergeIter::check_and_prepare() { int ret = OB_SUCCESS; @@ -371,6 +463,8 @@ int ObDASTextRetrievalMergeIter::check_and_prepare() } else if (OB_FAIL(do_total_doc_cnt())) { if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("failed to do total document count", K(ret), KPC_(ir_ctdef)); + } else { + doc_cnt_calculated_ = true; } } else { doc_cnt_calculated_ = true; @@ -379,7 +473,7 @@ int ObDASTextRetrievalMergeIter::check_and_prepare() return ret; } -int ObDASTextRetrievalMergeIter::project_result(const ObIRIterLoserTreeItem &item, const double relevance) +int ObDASTextRetrievalMergeIter::project_result(const ObDocId &docid, const double relevance) { int ret = OB_SUCCESS; // TODO: usage of doc id column is somehow weird here, since in single token retrieval iterators, @@ -395,7 +489,7 @@ int ObDASTextRetrievalMergeIter::project_result(const ObIRIterLoserTreeItem &ite K(ret), KP(doc_id_col), KP(eval_ctx)); } else { ObDatum &doc_id_proj_datum = doc_id_col->locate_datum_for_write(*eval_ctx); - doc_id_proj_datum.set_string(item.doc_id_.get_string()); + doc_id_proj_datum.set_string(docid.get_string()); if (ir_ctdef_->need_proj_relevance_score()) { ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_; if (OB_ISNULL(relevance_proj_col)) { @@ -406,12 +500,12 @@ int ObDASTextRetrievalMergeIter::project_result(const ObIRIterLoserTreeItem &ite relevance_proj_datum.set_double(relevance); } } - LOG_DEBUG("project one fulltext search result", K(ret), K(item)); + LOG_DEBUG("project one fulltext search result", K(ret), K(docid), K(relevance)); } return ret; } -int ObDASTextRetrievalMergeIter::project_relevance(const ObIRIterLoserTreeItem &item, const double relevance) +int ObDASTextRetrievalMergeIter::project_relevance(const ObDocId &docid, const double relevance) { int ret = OB_SUCCESS; // TODO: usage of doc id column is somehow weird here, since in single token retrieval iterators, @@ -426,7 +520,7 @@ int ObDASTextRetrievalMergeIter::project_relevance(const ObIRIterLoserTreeItem & LOG_WARN("unexpected nullptr to relevance proejction column", K(ret), KP(doc_id_col), KP(eval_ctx)); } else { - cache_doc_ids_[next_written_idx_] = item.doc_id_; + cache_doc_ids_[next_written_idx_] = docid; if (ir_ctdef_->need_proj_relevance_score()) { ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_; if (OB_ISNULL(relevance_proj_col)) { @@ -558,6 +652,7 @@ int ObDASTextRetrievalMergeIter::do_total_doc_cnt() ObEvalCtx::BatchInfoScopeGuard guard(*ir_rtdef_->eval_ctx_); guard.set_batch_idx(0); if (!ir_ctdef_->need_estimate_total_doc_cnt()) { + bool get_next = false; // When estimation info not exist, or we found estimation info not accurate, calculate document count by scan if (!doc_cnt_iter_acquired_) { if (OB_FAIL(init_total_doc_cnt_param(tx_desc_, snapshot_))) { @@ -567,6 +662,7 @@ int ObDASTextRetrievalMergeIter::do_total_doc_cnt() LOG_WARN("failed to do table scan for document count aggregation", K(ret)); } else { doc_cnt_iter_acquired_ = true; + get_next = true; } } else { const ObTabletID old_tablet_id = whole_doc_agg_param_.tablet_id_; @@ -574,13 +670,17 @@ int ObDASTextRetrievalMergeIter::do_total_doc_cnt() || ((old_tablet_id.is_valid() && old_tablet_id != doc_id_idx_tablet_id_ ) ? true : false); whole_doc_agg_param_.tablet_id_ = doc_id_idx_tablet_id_; whole_doc_agg_param_.ls_id_ = ls_id_; - if (OB_FAIL(whole_doc_cnt_iter_->reuse())) { - LOG_WARN("failed to reuse whole doc cnt iter", K(ret)); - } else if (OB_FAIL(whole_doc_cnt_iter_->rescan())) { - LOG_WARN("failed to rescan whole doc cnt iter", K(ret)); + if (!force_return_docid_ || whole_doc_agg_param_.need_switch_param_) { + if (OB_FAIL(whole_doc_cnt_iter_->reuse())) { + LOG_WARN("failed to reuse whole doc cnt iter", K(ret)); + } else if (OB_FAIL(whole_doc_cnt_iter_->rescan())) { + LOG_WARN("failed to rescan whole doc cnt iter", K(ret)); + } else { + get_next = true; + } } } - if (OB_SUCC(ret)) { + if (OB_SUCC(ret) && get_next) { if (OB_UNLIKELY(!static_cast(whole_doc_agg_param_.pd_storage_flag_).is_aggregate_pushdown())) { ret = OB_NOT_IMPLEMENT; LOG_ERROR("aggregate without pushdown not implemented", K(ret)); @@ -690,7 +790,7 @@ int ObDASTRTaatIter::inner_reuse() int ret = OB_SUCCESS; if (hash_maps_) { for (int64_t i = 0; i < hash_map_size_; ++i) { - hash_maps_[i]->reuse(); + hash_maps_[i]->destroy(); } hash_maps_ = nullptr; } @@ -709,7 +809,9 @@ int ObDASTRTaatIter::inner_reuse() hash_map_size_ = 0; cur_map_iter_ = nullptr; next_clear_map_idx_ = 0; - total_doc_cnt_ = -1; + if (!force_return_docid_) { + total_doc_cnt_ = -1; + } cur_map_idx_= -1; cache_first_docid_.reset(); is_chunk_store_inited_ = false; @@ -772,6 +874,8 @@ int ObDASTRTaatIter::check_and_prepare() if (OB_FAIL(do_total_doc_cnt())) { if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("failed to do total document count", K(ret), KPC_(ir_ctdef)); + } else { + doc_cnt_calculated_ = true; } } else { doc_cnt_calculated_ = true; @@ -861,7 +965,8 @@ int ObDASTRTaatIter::get_next_batch_rows(int64_t &count, int64_t capacity) next_written_idx_ = 0; count = 0; while (OB_SUCC(ret) && next_written_idx_ != real_capacity) { - if (cur_map_idx_!= -1 && (cur_map_iter_ != nullptr && (*cur_map_iter_) != hash_maps_[cur_map_idx_]->end())) { + if (cur_map_idx_!= -1 && + (force_return_docid_ || (cur_map_iter_ != nullptr && (*cur_map_iter_) != hash_maps_[cur_map_idx_]->end()))) { if (OB_UNLIKELY(next_written_idx_ > real_capacity)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected next_written_idx", K(ret), K(next_written_idx_), K(real_capacity)); @@ -871,12 +976,14 @@ int ObDASTRTaatIter::get_next_batch_rows(int64_t &count, int64_t capacity) } } } else if (OB_FAIL(load_next_hashmap())) { // cache data - LOG_WARN("failed to load next hashmap", K(ret)); + if (OB_ITER_END != ret) { + LOG_WARN("failed to load next hashmap", K(ret), K_(cur_map_idx), K_(next_written_idx), K(count), K(real_capacity)); + } } } if (OB_SUCC(ret) || OB_ITER_END == ret) { - if (next_written_idx_ > 0) { + if (next_written_idx_ > 0 && !force_return_docid_) { ObExpr *doc_id_col = ir_ctdef_->inv_scan_doc_id_col_; ObEvalCtx *eval_ctx = ir_rtdef_->eval_ctx_; ObDatum *doc_id_proj_datum = doc_id_col->locate_batch_datums(*eval_ctx); @@ -973,7 +1080,7 @@ int ObDASTRTaatIter::init_stores_by_partition() ret = OB_ERR_UNEXPECTED; LOG_WARN("total doc cnt is not set", K(ret), K_(is_hashmap_inited)); } else { - int64_t partition_cnt = OB_MIN((total_doc_cnt_- 1) / OB_HASHMAP_DEFAULT_SIZE + 1, OB_MAX_HASHMAP_COUNT); + int64_t partition_cnt = force_return_docid_ ? 1 : OB_MIN((total_doc_cnt_- 1) / OB_HASHMAP_DEFAULT_SIZE + 1, OB_MAX_HASHMAP_COUNT); hash_map_size_ = partition_cnt; void *buf = nullptr; if (nullptr == hash_maps_ && OB_SUCC(ret)) { @@ -1186,7 +1293,7 @@ int ObDASTRTaatIter::fill_chunk_store_by_tr_iter() if (token_idx + 1 < query_tokens_.count()) { if (OB_FAIL(token_iters_.at(0)->reuse())) { LOG_WARN("failed to reuse tr iter", K(ret)); - } else if (OB_FAIL(token_iters_.at(0)->set_query_token(query_tokens_.at(token_idx + 1)))) { + } else if (OB_FAIL(reset_query_token(query_tokens_.at(token_idx + 1)))) { LOG_WARN("failed to set query token", K(ret)); } else if (OB_FAIL(token_iters_.at(0)->rescan())) { LOG_WARN("failed to rescan tr iter", K(ret)); @@ -1227,6 +1334,8 @@ int ObDASTRTaatIter::load_next_hashmap() } else if (FALSE_IT(++cur_map_idx_)) { } else if (OB_FAIL(inner_load_next_hashmap())) { LOG_WARN("failed to load next hashmap", K(ret)); + } else if (force_return_docid_) { + // do nothing } else { hash::ObHashMap::iterator iter = hash_maps_[cur_map_idx_]->begin(); void *buf = nullptr; @@ -1315,6 +1424,249 @@ int ObDASTRTaatIter::inner_load_next_hashmap() return ret; } +int ObDASTRTaatIter::reset_query_token(const ObString &query_token) +{ + int ret = OB_SUCCESS; + if (!force_return_docid_) { + if (OB_FAIL(token_iters_.at(0)->set_query_token(query_token))) { + LOG_WARN("failed to set query token", K(ret)); + } + } else { + if (OB_FAIL(token_iters_.at(0)->set_query_token_and_rangekey(query_token, cache_doc_ids_, rangekey_size_))) { + LOG_WARN("failed to set token and rangekey", K(ret), K_(rangekey_size)); + } + } + return ret; +} + +ObDASTRTaatLookupIter::ObDASTRTaatLookupIter() + : ObDASTRTaatIter() +{ +} + +int ObDASTRTaatLookupIter::rescan() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObDASTextRetrievalMergeIter::rescan())) { + LOG_WARN("failed to rescan iter", K(ret)); + } else if (OB_UNLIKELY(token_iters_.count() > 1)) { + ret= OB_ERR_UNEXPECTED; + LOG_WARN("unexpected iter count mismatch with query tokens", + K(ret), K_(query_tokens), K_(token_iters)); + } else if (0 != query_tokens_.count()) { + ObDASTextRetrievalIter *iter = token_iters_.at(0); + if (OB_FAIL(iter->rescan())) { + LOG_WARN("failed to append token iter to array", K(ret)); + } + is_hashmap_inited_ = false; + cur_map_idx_= -1; + next_clear_map_idx_ = 0; + } + return ret; +} + +int ObDASTRTaatLookupIter::fill_output_exprs(int64_t &count, int64_t safe_capacity) +{ + int ret = OB_SUCCESS; + const bool need_relevance = ir_ctdef_->need_proj_relevance_score(); + ObDatum *filter_res = nullptr; + ObExpr *match_filter = need_relevance ? ir_ctdef_->match_filter_ : nullptr; + hash::ObHashMap *map = hash_maps_[cur_map_idx_]; + ObEvalCtx *eval_ctx = ir_rtdef_->eval_ctx_; + ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_; + ObDatum *relevance_proj_datum = nullptr; + ObExpr *doc_id_col = ir_ctdef_->inv_scan_doc_id_col_; + ObDatum *doc_id_proj_datum = doc_id_col->locate_batch_datums(*eval_ctx); + bool filter_valid = false; + + if (need_relevance) { + relevance_proj_datum = relevance_proj_col->locate_datums_for_update(*eval_ctx, safe_capacity); + } + + if (OB_UNLIKELY(count != 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected size", K(ret), K_(rangekey_size), K(safe_capacity), K(count)); + } else if (OB_UNLIKELY(hash_map_size_ != 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected size", K(ret), K_(hash_map_size)); + } else if (OB_UNLIKELY(nullptr != match_filter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected match filter", K(ret)); + } + + hash::ObHashMap *first_map = hash_maps_[0]; + ObEvalCtx *ctx = ir_rtdef_->eval_ctx_; + for (int64_t i = 0; OB_SUCC(ret) && i < rangekey_size_; ++i) { + double cur_relevance = 0; + if (OB_FAIL(first_map->get_refactored(cache_doc_ids_[i], cur_relevance))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("fail to get relevance", K(ret), K(cur_relevance)); + } else { + ret = OB_SUCCESS; + } + } + if (OB_SUCC(ret)) { + int64_t pos = hints_[i]; + if (pos < safe_capacity) { + ObEvalCtx::BatchInfoScopeGuard guard(*ctx); + guard.set_batch_idx(pos); + doc_id_proj_datum[pos].set_string(cache_doc_ids_[i].get_string()); + if (need_relevance) { + relevance_proj_datum[pos].set_double(cur_relevance); + relevance_proj_col->set_evaluated_flag(*eval_ctx); + } + output_row_cnt_ ++; + input_row_cnt_ ++; + count ++; + } else { + relevances_[pos] = cur_relevance; + } + next_written_idx_++; + } + } + if (OB_SUCC(ret)) { + next_written_idx_ = count; + if (need_relevance) { + relevance_proj_col->set_evaluated_projected(*eval_ctx); + } + if (count != OB_MIN(safe_capacity, rangekey_size_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected size", K(ret), K(count), K_(rangekey_size), K(safe_capacity)); + } else if (count == rangekey_size_) { + ret = OB_ITER_END; + } + } + return ret; +} + +int ObDASTRTaatLookupIter::inner_get_next_row() +{ + int ret = OB_SUCCESS; + bool need_fill_doc_cnt = !doc_cnt_calculated_; + if (OB_UNLIKELY(1 != rangekey_size_)) { // if rangekey_size_ > 1, UNSUPPORTED + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected rangekey size", K(ret), K_(rangekey_size)); + } else if (next_written_idx_ >= rangekey_size_) { + ret = OB_ITER_END; + } else if (OB_FAIL(check_and_prepare())) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to prepare to get next row", K(ret)); + } else { + ObDocId default_docid; + ObEvalCtx *ctx = ir_rtdef_->eval_ctx_; + ObEvalCtx::BatchInfoScopeGuard guard(*ctx); + guard.set_batch_idx(0); + if (OB_FAIL(project_result(default_docid,0))) { + LOG_WARN("failed to project result", K(ret)); + } else { + next_written_idx_++; + } + } + } else if (need_fill_doc_cnt && OB_FAIL(fill_total_doc_cnt())) { + LOG_WARN("failed to fill total document count", K(ret), K(total_doc_cnt_)); + } else if (0 == total_doc_cnt_) { + ret = OB_ITER_END; + } else if (total_doc_cnt_ < 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected total doc cnt", K(ret), K(total_doc_cnt_)); + } else { + int64_t count = 0; + const int64_t cap = 1; + if (OB_FAIL(get_next_batch_rows(count, cap))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next row with taat", K(ret)); + } else if (OB_UNLIKELY(count != 0)) { + ret = OB_SUCCESS; + } + } else if (OB_UNLIKELY(count != 1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected row count", K(ret), K(count)); + } + } + return ret; +} + +int ObDASTRTaatLookupIter::inner_get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + count = 0; + bool need_fill_doc_cnt = !doc_cnt_calculated_; + if (OB_UNLIKELY(capacity == 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected capacity size", K(ret), K(capacity)); + } else if (OB_FAIL(check_and_prepare())) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to prepare to get next row", K(ret)); + } else if (next_written_idx_ == rangekey_size_) { + // do nothing + } else if (next_written_idx_ != 0 ) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected next_written_idx", K(ret), K_(next_written_idx)); + } else { + ret = OB_SUCCESS; + ObEvalCtx *ctx = ir_rtdef_->eval_ctx_; + ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_; + while (OB_SUCC(ret) && next_written_idx_ < rangekey_size_) { + // fill the remaining results with the relevance value of '0' + // Note: if we need calculate the docid expr, fix the code and cache the hints. + ObDocId default_docid; + ObEvalCtx::BatchInfoScopeGuard guard(*ctx); + guard.set_batch_idx(next_written_idx_); + if (OB_FAIL(project_result(default_docid, 0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to project result", K(ret)); + } + next_written_idx_ ++; + } + if (OB_SUCC(ret)) { + for (int i = 0; i < rangekey_size_; i++) { + relevance_proj_col->get_evaluated_flags(*ctx).set(i); + } + ret = OB_ITER_END; + } + relevance_proj_col->set_evaluated_projected(*ctx); + count = OB_MIN(rangekey_size_, capacity); + next_written_idx_ = OB_MIN(rangekey_size_, capacity); + } + } else if (need_fill_doc_cnt && OB_FAIL(fill_total_doc_cnt())) { + LOG_WARN("failed to fill total document count", K(ret), K(total_doc_cnt_)); + } else if (0 == total_doc_cnt_) { + ret = OB_ITER_END; + } else if (total_doc_cnt_ < 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected total doc cnt", K(ret), K(total_doc_cnt_)); + } else if (next_written_idx_ == rangekey_size_) { + ret = OB_ITER_END; + } else if (next_written_idx_ > rangekey_size_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected capacity size", K(ret), K(capacity)); + } else if (next_written_idx_ == 0 &&OB_FAIL(get_next_batch_rows(count, capacity))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to get next rows with taat", K(ret)); + } + } else if (next_written_idx_ < rangekey_size_) { + int remain_size = rangekey_size_ - next_written_idx_; + int return_size = OB_MIN(remain_size, capacity); + // next_written_idx_ is the output idx + ObEvalCtx *ctx = ir_rtdef_->eval_ctx_; + while (count < return_size && OB_SUCC(ret)) { + int64_t pos = reverse_hints_[next_written_idx_]; + ObEvalCtx::BatchInfoScopeGuard guard(*ctx); + guard.set_batch_idx(count); + if (OB_FAIL(project_result(cache_doc_ids_[pos], relevances_[next_written_idx_]))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to project result", K(ret)); + } + next_written_idx_++; + count ++; + } + if (OB_SUCC(ret) && next_written_idx_ == rangekey_size_) { + ret = OB_ITER_END; + } + } + return ret; +} + ObDASTRDaatIter::ObDASTRDaatIter() : ObDASTextRetrievalMergeIter(), loser_tree_cmp_(), @@ -1375,27 +1727,41 @@ int ObDASTRDaatIter::set_merge_iters(const ObIArray &retrieval_iter ret = OB_ERR_UNEXPECTED; LOG_WARN("processing type unexpected", K(ret)); } else { - if (FALSE_IT(next_batch_iter_idxes_.set_allocator(&mem_context_->get_arena_allocator()))) { - } else if (OB_FAIL(next_batch_iter_idxes_.init(query_tokens_.count()))) { - LOG_WARN("failed to init next batch iter idxes array", K(ret)); - } else if (OB_FAIL(next_batch_iter_idxes_.prepare_allocate(query_tokens_.count()))) { - LOG_WARN("failed to prepare allocate next batch iter idxes array", K(ret)); - } else { - next_batch_cnt_ = query_tokens_.count(); - for (int64_t i = 0; OB_SUCC(ret) && i < query_tokens_.count(); ++i) { - const ObString &query_token = query_tokens_.at(i); - ObDASTextRetrievalIter *iter = static_cast(retrieval_iters.at(i)); - if (OB_FAIL(token_iters_.push_back(iter))) { - LOG_WARN("failed to append token iter to array", K(ret)); - } else { - next_batch_iter_idxes_[i] = i; - } + next_batch_cnt_ = query_tokens_.count(); + for (int64_t i = 0; OB_SUCC(ret) && i < query_tokens_.count(); ++i) { + const ObString &query_token = query_tokens_.at(i); + ObDASTextRetrievalIter *iter = static_cast(retrieval_iters.at(i)); + if (OB_FAIL(token_iters_.push_back(iter))) { + LOG_WARN("failed to append token iter to array", K(ret)); } } } return ret; } +int ObDASTRDaatIter::do_table_scan() +{ + int ret = OB_SUCCESS; + if (FALSE_IT(next_batch_iter_idxes_.set_allocator(&mem_context_->get_arena_allocator()))) { + } else if (OB_FAIL(next_batch_iter_idxes_.init(query_tokens_.count()))) { + LOG_WARN("failed to init next batch iter idxes array", K(ret)); + } else if (OB_FAIL(next_batch_iter_idxes_.prepare_allocate(query_tokens_.count()))) { + LOG_WARN("failed to prepare allocate next batch iter idxes array", K(ret)); + } else if (query_tokens_.count()!= 0 && OB_FAIL(iter_row_heap_->open(query_tokens_.count()))) { + LOG_WARN("failed to open iter row heap", K(ret), K_(query_tokens)); + } else { + next_batch_cnt_ = query_tokens_.count(); + for (int64_t i = 0; i < next_batch_cnt_; ++i) { + next_batch_iter_idxes_[i] = i; + } + } + if (OB_FAIL(ret)) { + } else if(OB_FAIL(ObDASTextRetrievalMergeIter::do_table_scan())) { + LOG_WARN("failed to do table scan", K(ret)); + } + return ret; +} + int ObDASTRDaatIter::inner_init(ObDASIterParam ¶m) { int ret = OB_SUCCESS; @@ -1580,7 +1946,7 @@ int ObDASTRDaatIter::pull_next_batch_rows() if (OB_SUCC(ret)) { if (iter_row_heap_->empty()) { ret = OB_ITER_END; - } else if (OB_FAIL(iter_row_heap_->rebuild())) { + } else if (0 != next_batch_cnt_ && OB_FAIL(iter_row_heap_->rebuild())) { LOG_WARN("fail to rebuild loser tree", K(ret), K_(next_batch_cnt)); } else { next_batch_cnt_ = 0; @@ -1621,7 +1987,7 @@ int ObDASTRDaatIter::pull_next_batch_rows_with_batch_mode() if (OB_SUCC(ret)) { if (iter_row_heap_->empty()) { ret = OB_ITER_END; - } else if (OB_FAIL(iter_row_heap_->rebuild())) { + } else if (0 != next_batch_cnt_ && OB_FAIL(iter_row_heap_->rebuild())) { LOG_WARN("fail to rebuild loser tree", K(ret), K_(next_batch_cnt)); } else { next_batch_cnt_ = 0; @@ -1675,14 +2041,279 @@ int ObDASTRDaatIter::next_disjunctive_document(bool is_batch) if (OB_SUCC(ret)) { const double relevance_score = ir_ctdef_->need_calc_relevance() ? cur_doc_relevance : 1; - if (!is_batch && OB_FAIL(project_result(*top_item, relevance_score))) { + if (!is_batch && OB_FAIL(project_result(top_item->doc_id_, relevance_score))) { LOG_WARN("failed to project result", K(ret)); - } else if (is_batch && OB_FAIL(project_relevance(*top_item, relevance_score))) { + } else if (is_batch && OB_FAIL(project_relevance(top_item->doc_id_, relevance_score))) { LOG_WARN("failed to project relevance", K(ret)); } } + return ret; +} + +ObDASTRDaatLookupIter::ObDASTRDaatLookupIter() + : ObDASTRDaatIter() +{ +} + +int ObDASTRDaatLookupIter::rescan() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObDASTextRetrievalMergeIter::rescan())) { + LOG_WARN("failed to rescan iter", K(ret)); + } else if (OB_UNLIKELY(query_tokens_.count() != token_iters_.count())) { + ret= OB_ERR_UNEXPECTED; + LOG_WARN("unexpected iter count mismatch with query tokens", + K(ret), K_(query_tokens), K_(token_iters)); + } else if (0 != query_tokens_.count()) { + if (OB_FAIL(next_batch_iter_idxes_.init(query_tokens_.count()))) { + LOG_WARN("failed to init next batch iter idxes array", K(ret)); + } else if (OB_FAIL(next_batch_iter_idxes_.prepare_allocate(query_tokens_.count()))) { + LOG_WARN("failed to prepare allocate next batch iter idxes array", K(ret)); + } else if (OB_FAIL(iter_row_heap_->open(query_tokens_.count()))) { + LOG_WARN("failed to open iter row heap", K(ret), K_(query_tokens)); + } else { + next_batch_cnt_ = token_iters_.count(); + for (int64_t i = 0; OB_SUCC(ret) && i < token_iters_.count(); ++i) { + ObDASTextRetrievalIter *iter = token_iters_.at(i); + if (OB_FAIL(iter->rescan())) { + LOG_WARN("failed to append token iter to array", K(ret)); + } else { + next_batch_iter_idxes_[i] = i; + } + } + } + } + return ret; +} + +int ObDASTRDaatLookupIter::inner_get_next_row() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(1 != rangekey_size_)) { // if rangekey_size_ > 1, UNSUPPORTED + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected rangekey size", K(ret), K_(rangekey_size)); + } else if (next_written_idx_ >= rangekey_size_) { + ret = OB_ITER_END; + } else if (OB_FAIL(check_and_prepare())) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to prepare to get next rows", K(ret)); + } else { + ObEvalCtx *ctx = ir_rtdef_->eval_ctx_; + ObEvalCtx::BatchInfoScopeGuard guard(*ctx); + guard.set_batch_idx(0); + ObDocId default_doc_id; + if (OB_FAIL(project_result(default_doc_id, 0))) { + LOG_WARN("failed to project result", K(ret)); + } else { + next_written_idx_++; + } + } + } else if (next_written_idx_ == rangekey_size_) { + ret = OB_ITER_END; + } else if (next_written_idx_ > rangekey_size_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected capacity size", K(ret)); + } else if (next_written_idx_ == 0) { + clear_evaluated_infos(); + int capacity = 1; + if (OB_FAIL(pull_next_batch_rows())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to pull next batch rows from iterator", K(ret)); + } else if (OB_FAIL(project_result(cache_doc_ids_[0], 0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to project result", K(ret)); + } else { + next_written_idx_ ++; + } + } else if (OB_FAIL(next_disjunctive_document(capacity))) { + LOG_WARN("failed to get next document with disjunctive tokens", K(ret)); + } else { + next_written_idx_ ++; + } + } + return ret; +} + +int ObDASTRDaatLookupIter::inner_get_next_rows(int64_t &count, int64_t capacity) +{ + int ret = OB_SUCCESS; + count = 0; + if (OB_UNLIKELY(capacity == 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected capacity size", K(ret), K(capacity)); + } else if (OB_FAIL(check_and_prepare())) { + if (OB_ITER_END != ret) { + LOG_WARN("failed to prepare to get next rows", K(ret)); + } else if (next_written_idx_ == rangekey_size_) { + // do nothing + } else if (next_written_idx_ != 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected next_written_idx", K(ret), K_(next_written_idx)); + } else { + ObEvalCtx *ctx = ir_rtdef_->eval_ctx_; + ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_; + ret = OB_SUCCESS; + while (OB_SUCC(ret) && next_written_idx_ < rangekey_size_) { + // fill the remaining results with the relevance value of '0' + // Note: if we need calculate the docid expr, fix the code and cache the hints. + ObDocId default_docid; + ObEvalCtx::BatchInfoScopeGuard guard(*ctx); + guard.set_batch_idx(next_written_idx_); + if (OB_FAIL(project_result(default_docid, 0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to project result", K(ret)); + } + next_written_idx_ ++; + } + if (OB_SUCC(ret)) { + for (int i = 0; i < rangekey_size_; i++) { + relevance_proj_col->get_evaluated_flags(*ctx).set(i); + } + ret = OB_ITER_END; + } + relevance_proj_col->set_evaluated_projected(*ctx); + count = OB_MIN(rangekey_size_, capacity); + next_written_idx_ = OB_MIN(rangekey_size_, capacity); + } + } else if (next_written_idx_ == rangekey_size_) { + ret = OB_ITER_END; + } else if (next_written_idx_ > rangekey_size_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected capacity size", K(ret), K(capacity)); + } else if (next_written_idx_ == 0) { + // for normal case + ObExpr *match_filter = ir_ctdef_->need_calc_relevance() ? ir_ctdef_->match_filter_ : nullptr; + const bool is_batch = true; + next_written_idx_ = 0; + bool filter_valid = false; + // fill the all result in the range of rangkey_size_ + while (OB_SUCC(ret) && next_written_idx_ < rangekey_size_) { + if (OB_FAIL(pull_next_batch_rows_with_batch_mode())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to pull next batch rows from iterator", K(ret)); + } + } else if (OB_FAIL(next_disjunctive_document(capacity))) { + LOG_WARN("failed to get next document with disjunctive tokens", K(ret)); + } else { + next_written_idx_ ++; + } + } + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + // fill the remaining results with the relevance value of '0' + if (OB_LIKELY(next_written_idx_ < rangekey_size_)) { + ObEvalCtx *ctx = ir_rtdef_->eval_ctx_; + while (next_written_idx_ < rangekey_size_ && OB_SUCC(ret)) { + int64_t pos = hints_[next_written_idx_]; + if (pos < capacity) { + ObEvalCtx::BatchInfoScopeGuard guard(*ctx); + guard.set_batch_idx(pos); + if (OB_FAIL(project_result(cache_doc_ids_[next_written_idx_], 0))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to project result", K(ret)); + } + } + next_written_idx_++; + } + } + // output the part result in the range of capacity + if (OB_SUCC(ret)) { + ObEvalCtx *ctx = ir_rtdef_->eval_ctx_; + ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_; + for (int i = 0; i < OB_MIN(rangekey_size_, capacity); i++) { + relevance_proj_col->get_evaluated_flags(*ctx).set(i); + } + relevance_proj_col->set_evaluated_projected(*ctx); + next_written_idx_ = OB_MIN(rangekey_size_, capacity); + count = OB_MIN(rangekey_size_, capacity); + } + } else if (next_written_idx_ < rangekey_size_) { + int remain_size = rangekey_size_ - next_written_idx_; + int return_size = OB_MIN(remain_size, capacity); + // next_written_idx_ is the output idx + ObEvalCtx *ctx = ir_rtdef_->eval_ctx_; + while (count < return_size && OB_SUCC(ret)) { + int64_t pos = reverse_hints_[next_written_idx_]; + ObEvalCtx::BatchInfoScopeGuard guard(*ctx); + guard.set_batch_idx(count); + if (OB_FAIL(project_result(cache_doc_ids_[pos], relevances_[next_written_idx_]))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to project result", K(ret)); + } + next_written_idx_++; + count ++; + } + if (OB_SUCC(ret) && next_written_idx_ == rangekey_size_) { + ret = OB_ITER_END; + } + } return ret; } + +int ObDASTRDaatLookupIter::next_disjunctive_document(const int capacity) +{ + int ret = OB_SUCCESS; + const ObIRIterLoserTreeItem *top_item = nullptr; + ObEvalCtx *ctx = ir_rtdef_->eval_ctx_; + if (!iter_row_heap_->empty()) { + if (OB_FAIL(iter_row_heap_->top(top_item))) { + LOG_WARN("failed to get top item from heap", K(ret)); + } else if (cache_doc_ids_[next_written_idx_] != top_item->doc_id_) { + // fill some unexit results with the relevance value of '0' + int64_t pos = hints_[next_written_idx_]; + if (pos < capacity) { + ObEvalCtx::BatchInfoScopeGuard guard(*ctx); + guard.set_batch_idx(pos); + if (OB_FAIL(project_result(cache_doc_ids_[next_written_idx_], 0))) { + LOG_WARN("failed to project result", K(ret)); + } + } else { + // cache it + relevances_[pos] = 0; + } + } else { + int64_t doc_cnt = 0; + bool curr_doc_end = false; + // Do we need to use ObExpr to collect relevance? + double cur_doc_relevance = 0.0; + while (OB_SUCC(ret) && !iter_row_heap_->empty() && !curr_doc_end) { + if (iter_row_heap_->is_unique_champion()) { + curr_doc_end = true; + } + if (OB_FAIL(iter_row_heap_->top(top_item))) { + LOG_WARN("failed to get top item from heap", K(ret)); + } else { + // consider to add an expr for collectiong conjunction result between query tokens here? + cur_doc_relevance += top_item->relevance_; + next_batch_iter_idxes_[next_batch_cnt_++] = top_item->iter_idx_; + if (OB_FAIL(iter_row_heap_->pop())) { + LOG_WARN("failed to pop top item in heap", K(ret)); + } + } + } + + if (OB_SUCC(ret)) { + const double relevance_score = ir_ctdef_->need_calc_relevance() ? cur_doc_relevance : 0; + int64_t pos = hints_[next_written_idx_]; + if (pos < capacity) { + ObEvalCtx::BatchInfoScopeGuard guard(*ctx); + guard.set_batch_idx(pos); + if (OB_FAIL(project_result(top_item->doc_id_, relevance_score))) { + LOG_WARN("failed to project result", K(ret)); + } + } else { + // cache it + relevances_[pos] = relevance_score; + } + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iter_row_heap_ is empty", K(ret)); + } + return ret; +} } // namespace sql } // namespace oceanbase diff --git a/src/sql/das/iter/ob_das_text_retrieval_merge_iter.h b/src/sql/das/iter/ob_das_text_retrieval_merge_iter.h index c9d10c381..b5519c005 100644 --- a/src/sql/das/iter/ob_das_text_retrieval_merge_iter.h +++ b/src/sql/das/iter/ob_das_text_retrieval_merge_iter.h @@ -65,7 +65,8 @@ public: doc_cnt_iter_(nullptr), tx_desc_(nullptr), snapshot_(nullptr), - query_tokens_() + query_tokens_(), + force_return_docid_(false) {} virtual bool is_valid() const override @@ -79,6 +80,7 @@ public: transaction::ObTxDesc *tx_desc_; transaction::ObTxReadSnapshot *snapshot_; ObArray query_tokens_; + bool force_return_docid_; }; class ObDASTextRetrievalMergeIter : public ObDASIter @@ -107,11 +109,13 @@ public: void set_doc_id_idx_tablet_id(const ObTabletID &tablet_id) { doc_id_idx_tablet_id_ = tablet_id; } void set_ls_id(const ObLSID &ls_id) { ls_id_ = ls_id; } storage::ObTableScanParam &get_doc_agg_param() { return whole_doc_agg_param_; } - int set_related_tablet_ids(const ObLSID &ls_id, const ObDASRelatedTabletID &related_tablet_ids); + int set_related_tablet_ids(const ObLSID &ls_id, const ObDASFTSTabletID &related_tablet_ids); virtual int set_merge_iters(const ObIArray &retrieval_iters); const ObIArray &get_query_tokens() { return query_tokens_; } bool is_taat_mode() { return RetrievalProcType::TAAT == processing_type_; } static int build_query_tokens(const ObDASIRScanCtDef *ir_ctdef, ObDASIRScanRtDef *ir_rtdef, common::ObIAllocator &alloc, ObArray &query_tokens); + virtual int set_rangkey_and_selector(const common::ObIArray> &virtual_rangkeys); + protected: virtual int inner_init(ObDASIterParam ¶m) override; virtual int inner_reuse() override; @@ -120,8 +124,8 @@ protected: virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; protected: virtual int check_and_prepare(); - int project_result(const ObIRIterLoserTreeItem &item, const double relevance); - int project_relevance(const ObIRIterLoserTreeItem &item, const double relevance); + int project_result(const ObDocId &docid, const double relevance); + int project_relevance(const ObDocId &docid, const double relevance); int project_docid(); void clear_evaluated_infos(); int init_iters( @@ -136,6 +140,7 @@ protected: static const int64_t OB_DEFAULT_QUERY_TOKEN_ITER_CNT = 4; typedef ObSEArray ObDASTokenRetrievalIterArray; lib::MemoryContext mem_context_; + ObArenaAllocator allocator_; TokenRelationType relation_type_; RetrievalProcType processing_type_; const ObDASIRScanCtDef *ir_ctdef_; @@ -147,12 +152,17 @@ protected: ObArray query_tokens_; ObDASTokenRetrievalIterArray token_iters_; ObFixedArray cache_doc_ids_; + ObFixedArray hints_; // the postion of the cur idx cache doc in output exprs + ObFixedArray relevances_; + ObFixedArray reverse_hints_; // the postion of the cur output doc in cache doc + int64_t rangekey_size_; int64_t next_written_idx_; ObDASScanIter *whole_doc_cnt_iter_; ObTableScanParam whole_doc_agg_param_; common::ObLimitParam limit_param_; int64_t input_row_cnt_; int64_t output_row_cnt_; + bool force_return_docid_; // for function lookup bool doc_cnt_calculated_; bool doc_cnt_iter_acquired_; bool is_inited_; @@ -172,15 +182,16 @@ protected: virtual int inner_get_next_row() override; virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; virtual int check_and_prepare() override; -private: +protected: int get_next_batch_rows(int64_t &count, int64_t capacity); - int fill_output_exprs(int64_t &count, int64_t safe_capacity); + virtual int fill_output_exprs(int64_t &count, int64_t safe_capacity); int load_next_hashmap(); int inner_load_next_hashmap(); int fill_total_doc_cnt(); int init_stores_by_partition(); int fill_chunk_store_by_tr_iter(); -private: + int reset_query_token(const ObString &query_token); +protected: static const int64_t OB_MAX_HASHMAP_COUNT = 20; static const int64_t OB_HASHMAP_DEFAULT_SIZE = 1000; hash::ObHashMap **hash_maps_; @@ -196,12 +207,25 @@ private: bool is_hashmap_inited_; }; +class ObDASTRTaatLookupIter : public ObDASTRTaatIter +{ +public: + ObDASTRTaatLookupIter(); + virtual ~ObDASTRTaatLookupIter() {} + virtual int rescan() override; +protected: + virtual int inner_get_next_row() override; + virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; + virtual int fill_output_exprs(int64_t &count, int64_t safe_capacity) override; +}; + class ObDASTRDaatIter : public ObDASTextRetrievalMergeIter { public: ObDASTRDaatIter(); virtual ~ObDASTRDaatIter() {} virtual int rescan() override; + virtual int do_table_scan() override; virtual int set_merge_iters(const ObIArray &retrieval_iters) override; protected: virtual int inner_init(ObDASIterParam ¶m) override; @@ -209,21 +233,32 @@ protected: virtual int inner_release() override; virtual int inner_get_next_row() override; virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; -private: - int pull_next_batch_rows(); - int pull_next_batch_rows_with_batch_mode(); - int fill_loser_tree_item( - ObDASTextRetrievalIter &iter, - const int64_t iter_idx, - ObIRIterLoserTreeItem &item); + virtual int pull_next_batch_rows(); + virtual int pull_next_batch_rows_with_batch_mode(); + virtual int fill_loser_tree_item( + ObDASTextRetrievalIter &iter, + const int64_t iter_idx, + ObIRIterLoserTreeItem &item); int next_disjunctive_document(bool batch_mode); -private: +protected: ObIRIterLoserTreeCmp loser_tree_cmp_; ObIRIterLoserTree *iter_row_heap_; ObFixedArray next_batch_iter_idxes_; int64_t next_batch_cnt_; }; +class ObDASTRDaatLookupIter : public ObDASTRDaatIter +{ +public: + ObDASTRDaatLookupIter(); + virtual ~ObDASTRDaatLookupIter() {} + virtual int rescan() override; +protected: + virtual int inner_get_next_row() override; + virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override; + int next_disjunctive_document(const int capacity); +}; + } // namespace sql } // namespace oceanbase diff --git a/src/sql/das/ob_das_attach_define.cpp b/src/sql/das/ob_das_attach_define.cpp index a7a1d75fb..afbaefc7a 100644 --- a/src/sql/das/ob_das_attach_define.cpp +++ b/src/sql/das/ob_das_attach_define.cpp @@ -65,6 +65,11 @@ ObDASScanRtDef *ObDASTableLookupRtDef::get_lookup_scan_rtdef() return scan_rtdef; } +OB_SERIALIZE_MEMBER((ObDASIndexProjLookupCtDef, ObDASTableLookupCtDef), + index_scan_proj_exprs_); + +OB_SERIALIZE_MEMBER((ObDASIndexProjLookupRtDef, ObDASTableLookupRtDef)); + OB_SERIALIZE_MEMBER((ObDASSortCtDef, ObDASAttachCtDef), sort_exprs_, sort_collations_, diff --git a/src/sql/das/ob_das_attach_define.h b/src/sql/das/ob_das_attach_define.h index 035ec8553..3e46125b9 100644 --- a/src/sql/das/ob_das_attach_define.h +++ b/src/sql/das/ob_das_attach_define.h @@ -55,8 +55,8 @@ struct ObDASTableLookupCtDef : ObDASAttachCtDef { OB_UNIS_VERSION(1); public: - ObDASTableLookupCtDef(common::ObIAllocator &alloc) - : ObDASAttachCtDef(alloc, DAS_OP_TABLE_LOOKUP), + ObDASTableLookupCtDef(common::ObIAllocator &alloc, const ObDASOpType &op_type = DAS_OP_TABLE_LOOKUP) + : ObDASAttachCtDef(alloc, op_type), is_global_index_(false) { } @@ -75,8 +75,8 @@ struct ObDASTableLookupRtDef : ObDASAttachRtDef { OB_UNIS_VERSION(1); public: - ObDASTableLookupRtDef() - : ObDASAttachRtDef(DAS_OP_TABLE_LOOKUP) + ObDASTableLookupRtDef(const ObDASOpType &op_type = DAS_OP_TABLE_LOOKUP) + : ObDASAttachRtDef(op_type) {} virtual ~ObDASTableLookupRtDef() {} @@ -89,6 +89,41 @@ public: ObDASScanRtDef *get_lookup_scan_rtdef(); }; +struct ObDASIndexProjLookupCtDef : ObDASTableLookupCtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASIndexProjLookupCtDef(common::ObIAllocator &alloc) + : ObDASTableLookupCtDef(alloc, DAS_OP_INDEX_PROJ_LOOKUP), + index_scan_proj_exprs_(alloc) + {} + virtual ~ObDASIndexProjLookupCtDef() {} + + const ObDASBaseCtDef *get_lookup_ctdef() const + { + OB_ASSERT(2 == children_cnt_ && children_ != nullptr); + return children_[1]; + } +public: + ExprFixedArray index_scan_proj_exprs_; +}; + +struct ObDASIndexProjLookupRtDef : ObDASTableLookupRtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASIndexProjLookupRtDef() + : ObDASTableLookupRtDef(DAS_OP_INDEX_PROJ_LOOKUP) + {} + virtual ~ObDASIndexProjLookupRtDef() {} + + ObDASBaseRtDef *get_lookup_rtdef() + { + OB_ASSERT(2 == children_cnt_ && children_ != nullptr); + return children_[1]; + } +}; + struct ObDASSortCtDef : ObDASAttachCtDef { OB_UNIS_VERSION(1); diff --git a/src/sql/das/ob_das_def_reg.h b/src/sql/das/ob_das_def_reg.h index 046c5460c..f17cf1f1d 100644 --- a/src/sql/das/ob_das_def_reg.h +++ b/src/sql/das/ob_das_def_reg.h @@ -150,11 +150,18 @@ struct ObDASVIdMergeCtDef; struct ObDASVIdMergeRtDef; REGISTER_DAS_ATTACH_OP(DAS_OP_VID_MERGE, ObDASVIdMergeCtDef, ObDASVIdMergeRtDef); +struct ObDASFuncLookupCtDef; +struct ObDASFuncLookupRtDef; +REGISTER_DAS_ATTACH_OP(DAS_OP_FUNC_LOOKUP, ObDASFuncLookupCtDef, ObDASFuncLookupRtDef); struct ObDASIndexMergeCtDef; struct ObDASIndexMergeRtDef; REGISTER_DAS_ATTACH_OP(DAS_OP_INDEX_MERGE, ObDASIndexMergeCtDef, ObDASIndexMergeRtDef); +struct ObDASIndexProjLookupCtDef; +struct ObDASIndexProjLookupRtDef; +REGISTER_DAS_ATTACH_OP(DAS_OP_INDEX_PROJ_LOOKUP, ObDASIndexProjLookupCtDef, ObDASIndexProjLookupRtDef); + #undef REGISTER_DAS_ATTACH_OP } // namespace sql } // namespace oceanbase diff --git a/src/sql/das/ob_das_domain_utils.cpp b/src/sql/das/ob_das_domain_utils.cpp index e13882c97..554434974 100644 --- a/src/sql/das/ob_das_domain_utils.cpp +++ b/src/sql/das/ob_das_domain_utils.cpp @@ -33,6 +33,9 @@ namespace sql ObObjDatumMapType ObFTIndexRowCache::FTS_INDEX_TYPES[] = {OBJ_DATUM_STRING, OBJ_DATUM_STRING, OBJ_DATUM_8BYTE_DATA, OBJ_DATUM_8BYTE_DATA}; ObObjDatumMapType ObFTIndexRowCache::FTS_DOC_WORD_TYPES[] = {OBJ_DATUM_STRING, OBJ_DATUM_STRING, OBJ_DATUM_8BYTE_DATA, OBJ_DATUM_8BYTE_DATA}; +ObExprOperatorType ObFTIndexRowCache::FTS_INDEX_EXPR_TYPE[] = {T_FUN_SYS_WORD_SEGMENT, T_FUN_SYS_DOC_ID, T_FUN_SYS_WORD_COUNT, T_FUN_SYS_DOC_LENGTH}; +ObExprOperatorType ObFTIndexRowCache::FTS_DOC_WORD_EXPR_TYPE[] = {T_FUN_SYS_DOC_ID, T_FUN_SYS_WORD_SEGMENT, T_FUN_SYS_WORD_COUNT, T_FUN_SYS_DOC_LENGTH}; + ObFTIndexRowCache::ObFTIndexRowCache() : rows_(), row_idx_(0), diff --git a/src/sql/das/ob_das_domain_utils.h b/src/sql/das/ob_das_domain_utils.h index 8604d1edd..5b4decc31 100644 --- a/src/sql/das/ob_das_domain_utils.h +++ b/src/sql/das/ob_das_domain_utils.h @@ -28,6 +28,8 @@ class ObFTIndexRowCache final public: static ObObjDatumMapType FTS_INDEX_TYPES[4]; static ObObjDatumMapType FTS_DOC_WORD_TYPES[4]; + static ObExprOperatorType FTS_INDEX_EXPR_TYPE[4]; + static ObExprOperatorType FTS_DOC_WORD_EXPR_TYPE[4]; ObFTIndexRowCache(); ~ObFTIndexRowCache(); diff --git a/src/sql/das/ob_das_ir_define.cpp b/src/sql/das/ob_das_ir_define.cpp index ab7448829..a34bbf995 100644 --- a/src/sql/das/ob_das_ir_define.cpp +++ b/src/sql/das/ob_das_ir_define.cpp @@ -35,5 +35,13 @@ OB_SERIALIZE_MEMBER((ObDASIRAuxLookupCtDef, ObDASAttachCtDef), OB_SERIALIZE_MEMBER((ObDASIRAuxLookupRtDef, ObDASAttachRtDef)); +OB_SERIALIZE_MEMBER((ObDASFuncLookupCtDef, ObDASAttachCtDef), + main_lookup_cnt_, + doc_id_lookup_cnt_, + func_lookup_cnt_, + lookup_doc_id_expr_); + +OB_SERIALIZE_MEMBER((ObDASFuncLookupRtDef, ObDASAttachRtDef)); + } // sql } // oceanbase diff --git a/src/sql/das/ob_das_ir_define.h b/src/sql/das/ob_das_ir_define.h index c9f7c30b3..0850e63f4 100644 --- a/src/sql/das/ob_das_ir_define.h +++ b/src/sql/das/ob_das_ir_define.h @@ -33,9 +33,7 @@ public: relevance_expr_(nullptr), relevance_proj_col_(nullptr), estimated_total_doc_cnt_(0), - flags_(0) - { - } + flags_(0) {} bool need_calc_relevance() const { return nullptr != relevance_expr_; } bool need_proj_relevance_score() const { return nullptr != relevance_proj_col_; } bool need_fwd_idx_agg() const { return has_fwd_agg_ && need_calc_relevance(); } @@ -177,8 +175,7 @@ struct ObDASIRAuxLookupCtDef : ObDASAttachCtDef public: ObDASIRAuxLookupCtDef(common::ObIAllocator &alloc) : ObDASAttachCtDef(alloc, DAS_OP_IR_AUX_LOOKUP), - relevance_proj_col_(nullptr) - { } + relevance_proj_col_(nullptr) {} const ObDASBaseCtDef *get_doc_id_scan_ctdef() const { @@ -199,8 +196,7 @@ struct ObDASIRAuxLookupRtDef : ObDASAttachRtDef OB_UNIS_VERSION(1); public: ObDASIRAuxLookupRtDef() - : ObDASAttachRtDef(DAS_OP_IR_AUX_LOOKUP) - {} + : ObDASAttachRtDef(DAS_OP_IR_AUX_LOOKUP) {} virtual ~ObDASIRAuxLookupRtDef() {} @@ -216,6 +212,125 @@ public: } }; +struct ObDASFuncLookupCtDef : ObDASAttachCtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASFuncLookupCtDef(common::ObIAllocator &alloc) + : ObDASAttachCtDef(alloc, DAS_OP_FUNC_LOOKUP), + main_lookup_cnt_(0), + doc_id_lookup_cnt_(0), + func_lookup_cnt_(0), + lookup_doc_id_expr_(nullptr) {} + + virtual ~ObDASFuncLookupCtDef() {} + + bool has_main_table_lookup() const { return main_lookup_cnt_ > 0; } + bool has_doc_id_lookup() const { return doc_id_lookup_cnt_ > 0; } + int64_t get_func_lookup_scan_idx(const int64_t idx) const + { + OB_ASSERT(children_cnt_ == (main_lookup_cnt_ + doc_id_lookup_cnt_ + func_lookup_cnt_)); + return (idx < func_lookup_cnt_) ? (idx + doc_id_lookup_cnt_ + main_lookup_cnt_) : -1; + } + + int64_t get_doc_id_lookup_scan_idx() const + { + OB_ASSERT(children_cnt_ == (main_lookup_cnt_ + doc_id_lookup_cnt_ + func_lookup_cnt_)); + const int64_t ret_idx = has_doc_id_lookup() ? (main_lookup_cnt_) : -1; + return ret_idx; + } + + int64_t get_main_lookup_scan_idx() const + { + OB_ASSERT(children_cnt_ == (main_lookup_cnt_ + doc_id_lookup_cnt_ + func_lookup_cnt_)); + const int64_t ret_idx = has_main_table_lookup() ? 0 : -1; + return ret_idx; + } + + const ObDASBaseCtDef *get_func_lookup_scan_ctdef(const int64_t idx) const + { + const ObDASBaseCtDef *ctdef = nullptr; + const int64_t children_idx = get_func_lookup_scan_idx(idx); + if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) { + ctdef = children_[children_idx]; + } + return ctdef; + } + + const ObDASBaseCtDef *get_doc_id_lookup_scan_ctdef() const + { + ObDASBaseCtDef *doc_id_lookup_scan_ctdef = nullptr; + const int64_t children_idx = get_doc_id_lookup_scan_idx(); + if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) { + doc_id_lookup_scan_ctdef = children_[children_idx]; + } + return doc_id_lookup_scan_ctdef; + } + + const ObDASBaseCtDef *get_main_lookup_scan_ctdef() const + { + ObDASBaseCtDef *main_lookup_ctdef = nullptr; + const int64_t children_idx = get_main_lookup_scan_idx(); + if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) { + main_lookup_ctdef = children_[children_idx]; + } + return main_lookup_ctdef; + } + + int64_t main_lookup_cnt_; + int64_t doc_id_lookup_cnt_; + int64_t func_lookup_cnt_; + ObExpr *lookup_doc_id_expr_; +}; + +struct ObDASFuncLookupRtDef : ObDASAttachRtDef +{ + OB_UNIS_VERSION(1); +public: + ObDASFuncLookupRtDef() + : ObDASAttachRtDef(DAS_OP_FUNC_LOOKUP) {} + + virtual ~ObDASFuncLookupRtDef() {} + + int64_t get_func_lookup_count() const + { + return static_cast(ctdef_)->func_lookup_cnt_; + } + + ObDASBaseRtDef *get_func_lookup_scan_rtdef(const int64_t idx) const + { + const ObDASFuncLookupCtDef *ctdef = static_cast(ctdef_); + ObDASBaseRtDef *rtdef = nullptr; + const int64_t children_idx = ctdef->get_func_lookup_scan_idx(idx); + if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) { + rtdef = children_[children_idx]; + } + return rtdef; + } + + ObDASBaseRtDef *get_doc_id_lookup_scan_rtdef() const + { + const ObDASFuncLookupCtDef *ctdef = static_cast(ctdef_); + ObDASBaseRtDef *rtdef = nullptr; + const int64_t children_idx = ctdef->get_doc_id_lookup_scan_idx(); + if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) { + rtdef = children_[children_idx]; + } + return rtdef; + } + + ObDASBaseRtDef *get_main_lookup_scan_rtdef() const + { + const ObDASFuncLookupCtDef *ctdef = static_cast(ctdef_); + ObDASBaseRtDef *rtdef = nullptr; + const int64_t children_idx = ctdef->get_main_lookup_scan_idx(); + if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) { + rtdef = children_[children_idx]; + } + return rtdef; + } +}; + } // namespace sql } // namespace oceanbase diff --git a/src/sql/das/ob_das_scan_op.cpp b/src/sql/das/ob_das_scan_op.cpp index 91c00544f..75e7bc8fe 100644 --- a/src/sql/das/ob_das_scan_op.cpp +++ b/src/sql/das/ob_das_scan_op.cpp @@ -380,7 +380,10 @@ ObDASIterTreeType ObDASScanOp::get_iter_tree_type() const bool is_spatial_index = scan_param_.table_param_->is_spatial_index(); bool is_multivalue_index = scan_param_.table_param_->is_multivalue_index(); bool is_vector_index = scan_param_.table_param_->is_vec_index(); - if (is_fts_index) { + + if (is_func_lookup(attach_ctdef_)) { + tree_type = ObDASIterTreeType::ITER_TREE_FUNC_LOOKUP; + } else if (is_fts_index) { tree_type = ObDASIterTreeType::ITER_TREE_TEXT_RETRIEVAL; } else if (is_spatial_index) { tree_type = ObDASIterTreeType::ITER_TREE_GIS_LOOKUP; @@ -415,6 +418,18 @@ bool ObDASScanOp::is_index_merge(const ObDASBaseCtDef *attach_ctdef) const return bret; } +bool ObDASScanOp::is_func_lookup(const ObDASBaseCtDef *attach_ctdef) const +{ + bool bret = false; + if (nullptr != attach_ctdef && attach_ctdef->op_type_ == ObDASOpType::DAS_OP_INDEX_PROJ_LOOKUP) { + const ObDASBaseCtDef *lookup_ctdef = static_cast(attach_ctdef)->get_lookup_ctdef(); + if (OB_NOT_NULL(lookup_ctdef)) { + bret = lookup_ctdef->op_type_ == ObDASOpType::DAS_OP_FUNC_LOOKUP; + } + } + return bret; +} + int ObDASScanOp::init_related_tablet_ids(ObDASRelatedTabletID &related_tablet_ids) { int ret = OB_SUCCESS; @@ -428,12 +443,14 @@ int ObDASScanOp::init_related_tablet_ids(ObDASRelatedTabletID &related_tablet_id LOG_WARN("fail to get rowkey vid tablet id", K(ret)); } else if (!scan_param_.table_param_->is_spatial_index() && OB_FAIL(get_aux_lookup_tablet_id(related_tablet_ids.aux_lookup_tablet_id_))) { LOG_WARN("failed to get aux lookup tablet id", K(ret)); - } else if (OB_FAIL(get_text_ir_tablet_ids(related_tablet_ids.inv_idx_tablet_id_, - related_tablet_ids.fwd_idx_tablet_id_, - related_tablet_ids.doc_id_idx_tablet_id_))) { + } else if (OB_FAIL(get_base_text_ir_tablet_ids(related_tablet_ids.inv_idx_tablet_id_, + related_tablet_ids.fwd_idx_tablet_id_, + related_tablet_ids.doc_id_idx_tablet_id_))) { LOG_WARN("failed to get text ir tablet ids", K(ret)); } else if (OB_FAIL(get_index_merge_tablet_ids(related_tablet_ids.index_merge_tablet_ids_))) { LOG_WARN("failed to get index merge tablet ids", K(ret)); + } else if (OB_FAIL(get_func_lookup_tablet_ids(related_tablet_ids))) { + LOG_WARN("failed to get func lookup tablet ids", K(ret)); } return ret; } @@ -1079,6 +1096,14 @@ int ObDASScanOp::reuse_iter() } break; } + case ITER_TREE_FUNC_LOOKUP: { + ObDASIter *result_iter = static_cast(result_); + if (OB_FAIL(ObDASIterUtils::set_func_lookup_iter_related_ids( + attach_ctdef_, tablet_ids_, ls_id_, -1, result_iter))) { + LOG_WARN("failed to set text retrieval related ids", K(ret)); + } + break; + } case ITER_TREE_MVI_LOOKUP: { if (OB_NOT_NULL(get_lookup_ctdef())) { ObDASLocalLookupIter *lookup_iter = static_cast(result_); @@ -1398,7 +1423,7 @@ int ObDASScanOp::get_vec_ir_tablet_ids( return ret; } -int ObDASScanOp::get_text_ir_tablet_ids( +int ObDASScanOp::get_base_text_ir_tablet_ids( common::ObTabletID &inv_idx_tablet_id, common::ObTabletID &fwd_idx_tablet_id, common::ObTabletID &doc_id_idx_tablet_id) @@ -1410,30 +1435,130 @@ int ObDASScanOp::get_text_ir_tablet_ids( if (OB_UNLIKELY(related_ctdefs_.count() != related_tablet_ids_.count())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected related scan array not match", K(ret), K_(related_ctdefs), K_(related_tablet_ids)); + } else if (nullptr == attach_ctdef_ || ObDASOpType::DAS_OP_FUNC_LOOKUP == attach_ctdef_->op_type_) { + // do nothing + } else { + for (int64_t i= 0; OB_SUCC(ret) && i < related_ctdefs_.count(); ++i) { + const ObDASScanCtDef *ctdef = static_cast(related_ctdefs_.at(i)); + switch (ctdef->ir_scan_type_) { + case ObTSCIRScanType::OB_NOT_A_SPEC_SCAN: { + break; + } + case ObTSCIRScanType::OB_IR_INV_IDX_SCAN: + case ObTSCIRScanType::OB_IR_INV_IDX_AGG: { + inv_idx_tablet_id = related_tablet_ids_.at(i); + break; + } + case ObTSCIRScanType::OB_IR_DOC_ID_IDX_AGG: { + doc_id_idx_tablet_id = related_tablet_ids_.at(i); + break; + } + case ObTSCIRScanType::OB_IR_FWD_IDX_AGG: { + fwd_idx_tablet_id = related_tablet_ids_.at(i); + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted ir scan type", K(ret), KPC(ctdef)); + } + } + } } - for (int64_t i= 0; OB_SUCC(ret) && i < related_ctdefs_.count(); ++i) { - const ObDASScanCtDef *ctdef = static_cast(related_ctdefs_.at(i)); - switch (ctdef->ir_scan_type_) { - case ObTSCIRScanType::OB_NOT_A_SPEC_SCAN: { - break; - } - case ObTSCIRScanType::OB_IR_INV_IDX_SCAN: - case ObTSCIRScanType::OB_IR_INV_IDX_AGG: { - inv_idx_tablet_id = related_tablet_ids_.at(i); - break; - } - case ObTSCIRScanType::OB_IR_DOC_ID_IDX_AGG: { - doc_id_idx_tablet_id = related_tablet_ids_.at(i); - break; - } - case ObTSCIRScanType::OB_IR_FWD_IDX_AGG: { - fwd_idx_tablet_id = related_tablet_ids_.at(i); - break; - } - default: { + return ret; +} + +int ObDASScanOp::get_func_lookup_tablet_ids(ObDASRelatedTabletID &related_tablet_ids) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(related_ctdefs_.count() != related_tablet_ids_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected related scan array not match", K(ret), K_(related_ctdefs), K_(related_tablet_ids)); + } else if (nullptr == attach_ctdef_ + || ObDASOpType::DAS_OP_INDEX_PROJ_LOOKUP != attach_ctdef_->op_type_ + || static_cast(attach_ctdef_)->get_lookup_ctdef()->op_type_ != ObDASOpType::DAS_OP_FUNC_LOOKUP) { + // do nothing + } else { + related_tablet_ids.reset(); + const ObDASIndexProjLookupCtDef *root_lookup_ctdef = static_cast(attach_ctdef_); + ObDASIndexProjLookupRtDef *root_lookup_rtdef = static_cast(attach_rtdef_); + const ObDASFuncLookupCtDef *func_lookup_ctdef = static_cast(root_lookup_ctdef->get_lookup_ctdef()); + ObDASFuncLookupRtDef *func_lookup_rtdef = static_cast(root_lookup_rtdef->get_lookup_rtdef()); + const ObDASScanCtDef *rowkey_ctdef = static_cast(root_lookup_ctdef->get_rowkey_scan_ctdef()); + const ObDASScanCtDef *scan_ctdef = nullptr; + if (ObDASOpType::DAS_OP_IR_AUX_LOOKUP == rowkey_ctdef->op_type_) { + const ObDASIRAuxLookupCtDef *aux_lookup_ctdef = static_cast(root_lookup_ctdef->get_rowkey_scan_ctdef()); + ObDASIRAuxLookupRtDef *aux_lookup_rtdef = static_cast(root_lookup_rtdef->get_rowkey_scan_rtdef()); + const ObDASIRScanCtDef * ir_scan_ctdef = nullptr; + ObDASIRScanRtDef * ir_scan_rtdef = nullptr; + if (OB_FAIL(ObDASUtils::find_target_das_def( + aux_lookup_ctdef, + aux_lookup_rtdef, + ObDASOpType::DAS_OP_IR_SCAN, + ir_scan_ctdef, + ir_scan_rtdef))) { + LOG_WARN("fail to find ir scan definition", K(ret)); + } else { + int exit_flag = 0; + int flag_size = nullptr == ir_scan_ctdef->get_fwd_idx_agg_ctdef() ? 4 : 5; + for (int i = 0; exit_flag < flag_size && i < related_ctdefs_.count(); ++i) { + if (aux_lookup_ctdef->get_lookup_scan_ctdef() == related_ctdefs_.at(i)) { + related_tablet_ids.aux_lookup_tablet_id_ = related_tablet_ids_.at(i); + exit_flag++; + } else if (ir_scan_ctdef->get_inv_idx_agg_ctdef() == related_ctdefs_.at(i) || ir_scan_ctdef->get_inv_idx_scan_ctdef() == related_ctdefs_.at(i)) { + related_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids_.at(i); + exit_flag++; + } else if (ir_scan_ctdef->get_doc_id_idx_agg_ctdef() == related_ctdefs_.at(i)) { + related_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids_.at(i); + exit_flag++; + } else if (ir_scan_ctdef->get_fwd_idx_agg_ctdef() == related_ctdefs_.at(i)) { + related_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids_.at(i); + exit_flag++; + } + } + } + } else if (ObDASOpType::DAS_OP_SORT == rowkey_ctdef->op_type_ && FALSE_IT(scan_ctdef = static_cast(rowkey_ctdef->children_[0]))) { + } else if (ObDASOpType::DAS_OP_TABLE_SCAN == rowkey_ctdef->op_type_ && + FALSE_IT(scan_ctdef = static_cast(root_lookup_ctdef->get_rowkey_scan_ctdef()))) { + } else if (ObDASOpType::DAS_OP_SORT == rowkey_ctdef->op_type_ || ObDASOpType::DAS_OP_TABLE_SCAN == rowkey_ctdef->op_type_) { + // do nothing + } else { ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpeted ir scan type", K(ret), KPC(ctdef)); + LOG_WARN("unexpected rowkey scan type", K(ret), KPC(rowkey_ctdef)); } + + if (OB_FAIL(ret)) { + } else { + const ObDASScanCtDef *rowkey_docid_ctdef = static_cast(func_lookup_ctdef->get_doc_id_lookup_scan_ctdef()); + const ObDASScanCtDef *main_lookup_ctdef = static_cast(func_lookup_ctdef->get_main_lookup_scan_ctdef()); + + for (int i = 0; i < related_ctdefs_.count(); ++i) { + if (rowkey_docid_ctdef == related_ctdefs_.at(i)) { + related_tablet_ids.rowkey_doc_tablet_id_ = related_tablet_ids_.at(i); + } else if (nullptr != main_lookup_ctdef && main_lookup_ctdef == related_ctdefs_.at(i)) { + related_tablet_ids.lookup_tablet_id_ = related_tablet_ids_.at(i); + } + } + for (int j = 0; j < func_lookup_ctdef->func_lookup_cnt_; ++j) { + const ObDASIRScanCtDef *tr_merger_ctdef = static_cast(func_lookup_ctdef->get_func_lookup_scan_ctdef(j)); + int exit_flag = 0; + ObDASFTSTabletID fts_tablet_id; + int flag_size = nullptr == tr_merger_ctdef->get_fwd_idx_agg_ctdef() ? 3 : 4; + for (int i = 0; exit_flag < flag_size && i < related_ctdefs_.count(); ++i) { + if (tr_merger_ctdef->get_inv_idx_agg_ctdef() == related_ctdefs_.at(i) || tr_merger_ctdef->get_inv_idx_scan_ctdef() == related_ctdefs_.at(i)) { + fts_tablet_id.inv_idx_tablet_id_ = related_tablet_ids_.at(i); + exit_flag++; + } else if (tr_merger_ctdef->get_doc_id_idx_agg_ctdef() == related_ctdefs_.at(i)) { + fts_tablet_id.doc_id_idx_tablet_id_ = related_tablet_ids_.at(i); + exit_flag++; + } else if (tr_merger_ctdef->get_fwd_idx_agg_ctdef() == related_ctdefs_.at(i)) { + fts_tablet_id.fwd_idx_tablet_id_ = related_tablet_ids_.at(i); + exit_flag++; + } + } + if (OB_FAIL(related_tablet_ids.fts_tablet_ids_.push_back(fts_tablet_id))) { + LOG_WARN("failed to push fts_tablet_id", K(ret)); + } + } } } return ret; diff --git a/src/sql/das/ob_das_scan_op.h b/src/sql/das/ob_das_scan_op.h index 1a26cd6a1..e3df9f7b1 100644 --- a/src/sql/das/ob_das_scan_op.h +++ b/src/sql/das/ob_das_scan_op.h @@ -337,7 +337,7 @@ public: bool is_contain_trans_info() {return NULL != scan_ctdef_->trans_info_expr_; } int do_table_scan(); int do_domain_index_lookup(); - int get_text_ir_tablet_ids( + int get_base_text_ir_tablet_ids( common::ObTabletID &inv_idx_tablet_id, common::ObTabletID &fwd_idx_tablet_id, common::ObTabletID &doc_id_idx_tablet_id); @@ -348,6 +348,7 @@ public: common::ObTabletID &snapshot_tid, common::ObTabletID &com_aux_vec_tid); int get_index_merge_tablet_ids(common::ObIArray &index_merge_tablet_ids); + int get_func_lookup_tablet_ids(ObDASRelatedTabletID &related_tablet_ids); bool enable_rich_format() const { return scan_rtdef_->enable_rich_format(); } INHERIT_TO_STRING_KV("parent", ObIDASTaskOp, KPC_(scan_ctdef), @@ -362,6 +363,7 @@ protected: common::ObNewRowIterator *get_output_result_iter() { return result_; } ObDASIterTreeType get_iter_tree_type() const; bool is_index_merge(const ObDASBaseCtDef *attach_ctdef) const; + bool is_func_lookup(const ObDASBaseCtDef *attach_ctdef) const; public: ObSEArray trans_info_array_; protected: diff --git a/src/sql/engine/table/ob_table_scan_op.cpp b/src/sql/engine/table/ob_table_scan_op.cpp index 20b5e900b..9194e8fc8 100644 --- a/src/sql/engine/table/ob_table_scan_op.cpp +++ b/src/sql/engine/table/ob_table_scan_op.cpp @@ -266,6 +266,15 @@ ObDASScanCtDef *ObTableScanCtDef::get_lookup_ctdef() OB_ASSERT(2 == vid_merge_ctdef->children_cnt_ && vid_merge_ctdef->children_ != nullptr); lookup_ctdef = static_cast(vid_merge_ctdef->children_[0]); } + } else if (DAS_OP_INDEX_PROJ_LOOKUP == attach_ctdef->op_type_) { + OB_ASSERT(2 == attach_ctdef->children_cnt_ && attach_ctdef->children_ != nullptr); + if (DAS_OP_FUNC_LOOKUP == attach_ctdef->children_[1]->op_type_) { + ObDASFuncLookupCtDef *func_lookup_ctdef = static_cast(attach_ctdef->children_[1]); + if (func_lookup_ctdef->has_main_table_lookup()) { + const int64_t lookup_child_idx = func_lookup_ctdef->get_main_lookup_scan_idx(); + lookup_ctdef = static_cast(func_lookup_ctdef->children_[lookup_child_idx]); + } + } } return lookup_ctdef; } @@ -299,6 +308,15 @@ const ObDASScanCtDef *ObTableScanCtDef::get_lookup_ctdef() const OB_ASSERT(2 == vid_merge_ctdef->children_cnt_ && vid_merge_ctdef->children_ != nullptr); lookup_ctdef = static_cast(vid_merge_ctdef->children_[0]); } + } else if (DAS_OP_INDEX_PROJ_LOOKUP == attach_ctdef->op_type_) { + OB_ASSERT(2 == attach_ctdef->children_cnt_ && attach_ctdef->children_ != nullptr); + if (DAS_OP_FUNC_LOOKUP == attach_ctdef->children_[1]->op_type_) { + ObDASFuncLookupCtDef *func_lookup_ctdef = static_cast(attach_ctdef->children_[1]); + if (func_lookup_ctdef->has_main_table_lookup()) { + const int64_t lookup_child_idx = func_lookup_ctdef->get_main_lookup_scan_idx(); + lookup_ctdef = static_cast(func_lookup_ctdef->children_[lookup_child_idx]); + } + } } return lookup_ctdef; } @@ -3887,41 +3905,40 @@ int ObTableScanOp::inner_get_next_fts_index_row() int ObTableScanOp::fetch_next_fts_index_rows() { int ret = OB_SUCCESS; + ObExpr *ft_expr = nullptr; + ObExpr *doc_id_expr = nullptr; + ObDatum *ft_datum = nullptr; + ObDatum *doc_id_datum = nullptr; if (OB_FAIL(ObTableScanOp::inner_get_next_row_implement())) { if (OB_ITER_END != ret) { LOG_WARN("fail to get next row implement", K(ret)); } + } else if (OB_FAIL(get_output_fts_col_expr_by_type(T_FUN_SYS_DOC_ID, doc_id_expr))) { + LOG_WARN("fail to get doc id column expr from output", K(ret)); + } else if (OB_FAIL(get_output_fts_col_expr_by_type(T_FUN_SYS_WORD_SEGMENT, ft_expr))) { + LOG_WARN("fail to get word segment column expr from output", K(ret)); + } else if (OB_ISNULL(ft_expr) || OB_ISNULL(doc_id_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, ft or doc id expr is nullptr", K(ret), KP(ft_expr), KP(doc_id_expr)); + } else if (OB_FAIL(ft_expr->eval(eval_ctx_, ft_datum))) { + LOG_WARN("fail to evaluate fulltext expr", K(ret)); + } else if (OB_FAIL(doc_id_expr->eval(eval_ctx_, doc_id_datum))) { + LOG_WARN("fail to evaluate doc id expr", K(ret)); + } else if (OB_ISNULL(ft_datum) || OB_ISNULL(doc_id_datum)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpeted error, ft or doc id datum is nullptr", K(ret), KP(ft_datum), KP(doc_id_datum)); } else { - const int64_t part_count = get_part_dep_col_cnt(); - const int64_t word_col_idx = (MY_SPEC.is_fts_index_aux_ ? 0 : 1) + part_count; - const int64_t doc_id_col_idx = (MY_SPEC.is_fts_index_aux_ ? 1 : 0) + part_count; - ObExpr *ft_expr = MY_SPEC.output_.at(word_col_idx); - ObExpr *doc_id_expr = MY_SPEC.output_.at(doc_id_col_idx); - ObDatum *ft_datum = nullptr; - ObDatum *doc_id_datum = nullptr; - if (OB_ISNULL(ft_expr) || OB_ISNULL(doc_id_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpeted error, ft or doc id expr is nullptr", K(ret), KP(ft_expr), KP(doc_id_expr)); - } else if (OB_FAIL(ft_expr->eval(eval_ctx_, ft_datum))) { - LOG_WARN("fail to evaluate fulltext expr", K(ret)); - } else if (OB_FAIL(doc_id_expr->eval(eval_ctx_, doc_id_datum))) { - LOG_WARN("fail to evaluate doc id expr", K(ret)); - } else if (OB_ISNULL(ft_datum) || OB_ISNULL(doc_id_datum)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpeted error, ft or doc id datum is nullptr", K(ret), KP(ft_datum), KP(doc_id_datum)); - } else { - ObString ft = ft_datum->get_string(); - const ObString &doc_id = doc_id_datum->get_string(); - ObArenaAllocator tmp_allocator(ObModIds::OB_LOB_ACCESS_BUFFER, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); - if (OB_FAIL(ObTextStringHelper::read_real_string_data(tmp_allocator, *ft_datum, ft_expr->datum_meta_, - ft_expr->obj_meta_.has_lob_header(), ft))) { - LOG_WARN("fail to read real string data", K(ret)); - } else if (OB_UNLIKELY(doc_id.length() != sizeof(ObDocId)) || OB_ISNULL(doc_id.ptr())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid binary document id", K(ret), K(doc_id)); - } else if (OB_FAIL(fts_index_.segment(ft_expr->obj_meta_, doc_id, ft))) { - LOG_WARN("fail to segment fulltext", K(ret), K(doc_id), K(ft)); - } + ObString ft = ft_datum->get_string(); + const ObString &doc_id = doc_id_datum->get_string(); + ObArenaAllocator tmp_allocator(ObModIds::OB_LOB_ACCESS_BUFFER, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + if (OB_FAIL(ObTextStringHelper::read_real_string_data(tmp_allocator, *ft_datum, ft_expr->datum_meta_, + ft_expr->obj_meta_.has_lob_header(), ft))) { + LOG_WARN("fail to read real string data", K(ret)); + } else if (OB_UNLIKELY(doc_id.length() != sizeof(ObDocId)) || OB_ISNULL(doc_id.ptr())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid binary document id", K(ret), K(doc_id)); + } else if (OB_FAIL(fts_index_.segment(ft_expr->obj_meta_, doc_id, ft))) { + LOG_WARN("fail to segment fulltext", K(ret), K(doc_id), K(ft)); } } return ret; @@ -3931,6 +3948,8 @@ int ObTableScanOp::fill_generated_fts_cols(blocksstable::ObDatumRow *row) { int ret = OB_SUCCESS; const int64_t part_count = get_part_dep_col_cnt(); + const ObObjDatumMapType *types = MY_SPEC.is_fts_index_aux_ ? ObFTIndexRowCache::FTS_INDEX_TYPES : ObFTIndexRowCache::FTS_DOC_WORD_TYPES; + const ObExprOperatorType *expr_types = MY_SPEC.is_fts_index_aux_ ? ObFTIndexRowCache::FTS_INDEX_EXPR_TYPE : ObFTIndexRowCache::FTS_DOC_WORD_EXPR_TYPE; if (OB_ISNULL(row)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument, row is nullptr", K(ret), KP(row)); @@ -3939,16 +3958,19 @@ int ObTableScanOp::fill_generated_fts_cols(blocksstable::ObDatumRow *row) ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected output column count", K(ret), K(MY_SPEC.output_), KPC(row), K(part_count)); } else { - ObObjDatumMapType *types =MY_SPEC.is_fts_index_aux_ ? ObFTIndexRowCache::FTS_INDEX_TYPES : ObFTIndexRowCache::FTS_DOC_WORD_TYPES; - for (int64_t i = part_count; OB_SUCC(ret) && i < share::ObFtsIndexBuilderUtil::OB_FTS_INDEX_OR_DOC_WORD_TABLE_COL_CNT + part_count; ++i) { - ObExpr *expr = MY_SPEC.output_.at(i); - ObDatum &datum = expr->locate_datum_for_write(eval_ctx_); - ObEvalInfo &eval_info = expr->get_eval_info(eval_ctx_); - if (OB_FAIL(datum.from_storage_datum(row->storage_datums_[i - part_count], types[i - part_count]))) { - LOG_WARN("fail to fill fulltext index row", K(ret), K(i), K(MY_SPEC.output_), KPC(row)); + for (int64_t i = 0; OB_SUCC(ret) && i < share::ObFtsIndexBuilderUtil::OB_FTS_INDEX_OR_DOC_WORD_TABLE_COL_CNT; ++i) { + ObExpr *expr = nullptr; + if (OB_FAIL(get_output_fts_col_expr_by_type(expr_types[i], expr))) { + LOG_WARN("fail to get fts column expr", K(ret), K(i), K(expr_types[i])); } else { - eval_info.evaluated_ = true; - eval_info.projected_ = true; + ObDatum &datum = expr->locate_datum_for_write(eval_ctx_); + ObEvalInfo &eval_info = expr->get_eval_info(eval_ctx_); + if (OB_FAIL(datum.from_storage_datum(row->storage_datums_[i], types[i]))) { + LOG_WARN("fail to fill fulltext index row", K(ret), K(i), K(MY_SPEC.output_), KPC(row)); + } else { + eval_info.evaluated_ = true; + eval_info.projected_ = true; + } } } } @@ -3976,5 +3998,51 @@ int64_t ObTableScanOp::get_part_dep_col_cnt() return part_dep_col_cnt; } +int ObTableScanOp::get_output_fts_col_expr_by_type( + const ObExprOperatorType &type, + ObExpr *&expr) +{ + int ret = OB_SUCCESS; + expr = nullptr; + if (OB_UNLIKELY(T_FUN_SYS_WORD_SEGMENT != type + && T_FUN_SYS_DOC_ID != type + && T_FUN_SYS_WORD_COUNT != type + && T_FUN_SYS_DOC_LENGTH != type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid fts column expr type", K(ret), "type", get_type_name(type)); + } else if (T_FUN_SYS_DOC_ID == type) { + for (int64_t i = 0; OB_SUCC(ret) && OB_ISNULL(expr) && i < MY_SPEC.output_.count(); ++i) { + ObExpr *tmp_expr = MY_SPEC.output_.at(i); + if (OB_ISNULL(tmp_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, expr in output is nullptr", K(ret), K(i)); + } else if (T_FUN_SYS_WORD_SEGMENT == tmp_expr->type_) { + const int64_t idx = MY_SPEC.is_fts_index_aux_ ? i+1 : i-1; + if (OB_UNLIKELY(idx < 0 || idx >= MY_SPEC.output_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, invalid doc id idx", K(ret), K(idx), K(i), K(MY_SPEC.output_)); + } else { + expr = MY_SPEC.output_.at(idx); + } + } + } + } else { + for (int64_t i = 0; OB_SUCC(ret) && OB_ISNULL(expr) && i < MY_SPEC.output_.count(); ++i) { + ObExpr *tmp_expr = MY_SPEC.output_.at(i); + if (OB_ISNULL(tmp_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, expr in output is nullptr", K(ret), K(i)); + } else if (type == tmp_expr->type_) { + expr = tmp_expr; + } + } + } + if (OB_SUCC(ret) && OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, fts column expr isn't found", K(ret), "type", get_type_name(type), K(MY_SPEC.output_)); + } + return ret; +} + } // end namespace sql } // end namespace oceanbase diff --git a/src/sql/engine/table/ob_table_scan_op.h b/src/sql/engine/table/ob_table_scan_op.h index 956dff113..1067d6b3a 100644 --- a/src/sql/engine/table/ob_table_scan_op.h +++ b/src/sql/engine/table/ob_table_scan_op.h @@ -711,6 +711,7 @@ private: int inner_get_next_fts_index_row(); int fetch_next_fts_index_rows(); int fill_generated_fts_cols(ObDatumRow *row); + int get_output_fts_col_expr_by_type(const ObExprOperatorType &type, ObExpr *&expr); int64_t get_part_dep_col_cnt(); protected: DASOpResultIter scan_result_; diff --git a/src/sql/optimizer/ob_access_path_estimation.cpp b/src/sql/optimizer/ob_access_path_estimation.cpp index 2ef037c04..30a887db2 100644 --- a/src/sql/optimizer/ob_access_path_estimation.cpp +++ b/src/sql/optimizer/ob_access_path_estimation.cpp @@ -786,7 +786,10 @@ int ObAccessPathEstimation::add_storage_estimation_task(ObOptimizerContext &ctx, index_partitions, chosen_partitions))) { LOG_WARN("failed to choose partitions", K(ret)); - } else if (OB_FAIL(choose_storage_estimation_ranges(range_limit, ap, chosen_scan_ranges))) { + } else if (OB_FAIL(choose_storage_estimation_ranges(range_limit, + ap.est_cost_info_.ranges_, + ap.est_cost_info_.index_meta_info_.is_geo_index_, + chosen_scan_ranges))) { LOG_WARN("failed to choose scan ranges", K(ret)); } else { result_helper.est_scan_range_count_ = chosen_scan_ranges.count(); @@ -846,7 +849,10 @@ int ObAccessPathEstimation::add_storage_estimation_task_by_ranges(ObOptimizerCon OB_UNLIKELY(ori_partitions.empty())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected param", K(ret), K(table_meta), K(ori_partitions), K(index_partitions)); - } else if (OB_FAIL(choose_storage_estimation_ranges(range_limit, ap, chosen_scan_ranges))) { + } else if (OB_FAIL(choose_storage_estimation_ranges(range_limit, + ap.est_cost_info_.ranges_, + ap.est_cost_info_.index_meta_info_.is_geo_index_, + chosen_scan_ranges))) { LOG_WARN("failed to choose scan ranges", K(ret)); } else if (OB_FAIL(result_helper.range_result_.prepare_allocate(chosen_scan_ranges.count()))) { LOG_WARN("failed to prepare allocate", K(ret)); @@ -1315,16 +1321,17 @@ int ObAccessPathEstimation::choose_storage_estimation_partitions(const int64_t p } int ObAccessPathEstimation::choose_storage_estimation_ranges(const int64_t range_limit, - AccessPath &ap, + const ObRangesArray &ranges, + bool is_geo_index, ObIArray &scan_ranges) { int ret = OB_SUCCESS; ObSEArray get_ranges; ObSEArray valid_ranges; - if (ap.est_cost_info_.ranges_.empty()) { + if (ranges.empty()) { // do nothing - } else if (ap.est_cost_info_.index_meta_info_.is_geo_index_) { - ObIArray &geo_ranges = ap.est_cost_info_.ranges_; + } else if (is_geo_index) { + const ObIArray &geo_ranges = ranges; int64_t total_cnt = geo_ranges.count(); if (geo_ranges.at(0).get_start_key().get_obj_cnt() < SPATIAL_ROWKEY_MIN_NUM) { ret = OB_ERR_UNEXPECTED; @@ -1364,7 +1371,7 @@ int ObAccessPathEstimation::choose_storage_estimation_ranges(const int64_t range } } else { if (OB_FAIL(ObOptimizerUtil::classify_get_scan_ranges( - ap.est_cost_info_.ranges_, + ranges, get_ranges, scan_ranges))) { LOG_WARN("failed to clasiffy get scan ranges", K(ret)); @@ -1829,7 +1836,7 @@ int ObAccessPathEstimation::estimate_full_table_rowcount(ObOptimizerContext &ctx LOG_TRACE("succeed to storage estimate full table rowcount", K(meta)); } } else if (part_loc_info_array.count() > 1 && partition_limit >= 0) { - if (OB_FAIL(storage_estimate_full_table_rowcount(ctx, part_loc_info_array, meta))) { + if (OB_FAIL(storage_estimate_range_rowcount(ctx, part_loc_info_array, true, NULL, meta))) { LOG_WARN("failed to storage estimate full table rowcount", K(ret)); } else { LOG_TRACE("succeed to storage estimate full table rowcount", K(meta)); @@ -1945,19 +1952,24 @@ int ObAccessPathEstimation::storage_estimate_full_table_rowcount(ObOptimizerCont return ret; } -int ObAccessPathEstimation::storage_estimate_full_table_rowcount(ObOptimizerContext &ctx, - const ObCandiTabletLocIArray &part_loc_infos, - ObTableMetaInfo &meta) +int ObAccessPathEstimation::storage_estimate_range_rowcount(ObOptimizerContext &ctx, + const ObCandiTabletLocIArray &part_loc_infos, + bool estimate_whole_range, + const ObRangesArray *ranges, + ObTableMetaInfo &meta) { int ret = OB_SUCCESS; ObArenaAllocator arena("CardEstimation"); ObArray tasks; ObArray prefer_addrs; ObCandiTabletLocSEArray chosen_partitions; + ObSEArray chosen_scan_ranges; + ObRangesArray whole_range; bool need_fallback = false; int64_t partition_limit = 0; + int64_t range_limit = 0; int64_t total_part_cnt = part_loc_infos.count(); - if (OB_ISNULL(ctx.get_session_info())) { + if (OB_ISNULL(ctx.get_session_info()) || (!estimate_whole_range && OB_ISNULL(ranges))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); } else if ((is_virtual_table(meta.ref_table_id_) && @@ -1969,12 +1981,48 @@ int ObAccessPathEstimation::storage_estimate_full_table_rowcount(ObOptimizerCont share::SYS_VAR_PARTITION_INDEX_DIVE_LIMIT, partition_limit))) { LOG_WARN("failed to get hint system variable", K(ret)); + } else if (OB_FAIL(ctx.get_global_hint().opt_params_.get_sys_var(ObOptParamHint::RANGE_INDEX_DIVE_LIMIT, + ctx.get_session_info(), + share::SYS_VAR_RANGE_INDEX_DIVE_LIMIT, + range_limit))) { + LOG_WARN("failed to get hint system variable", K(ret)); + } else { + if (partition_limit < 0 && range_limit < 0) { + partition_limit = 1; + range_limit = ObOptEstCost::MAX_STORAGE_RANGE_ESTIMATION_NUM; + } + // make whole range if need + if (estimate_whole_range) { + ObNewRange *range = NULL; + if (OB_FAIL(ObSQLUtils::make_whole_range(arena, + meta.ref_table_id_, + meta.table_rowkey_count_, + range))) { + LOG_WARN("failed to make whole range", K(ret)); + } else if (OB_ISNULL(range)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("null range", K(ret)); + } else if (OB_FAIL(whole_range.push_back(*range))) { + LOG_WARN("failed to push back range", K(ret)); + } else { + ranges = &whole_range; + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(ranges)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ranges is null", K(ret)); + } else if (OB_FAIL(choose_storage_estimation_ranges(range_limit, *ranges, false, chosen_scan_ranges))) { + LOG_WARN("failed to choose scan ranges", K(ret)); } else if (OB_FAIL(choose_storage_estimation_partitions(partition_limit, part_loc_infos, chosen_partitions))) { LOG_WARN("failed to choose partitions", K(ret)); } else { LOG_TRACE("choose partitions to estimate rowcount", K(chosen_partitions)); + LOG_TRACE("choose ranges to estimate rowcount", K(chosen_scan_ranges)); } for (int64_t i = 0; OB_SUCC(ret) && !need_fallback && i < chosen_partitions.count(); i ++) { EstimatedPartition best_index_part; @@ -1990,7 +2038,6 @@ int ObAccessPathEstimation::storage_estimate_full_table_rowcount(ObOptimizerCont LOG_WARN("failed to get task", K(ret)); } else if (NULL != task) { obrpc::ObEstPartArgElement path_arg; - ObNewRange *range = NULL; task->addr_ = best_index_part.addr_; path_arg.scan_flag_.index_back_ = 0; path_arg.index_id_ = meta.ref_table_id_; @@ -2000,14 +2047,8 @@ int ObAccessPathEstimation::storage_estimate_full_table_rowcount(ObOptimizerCont path_arg.ls_id_ = best_index_part.ls_id_; path_arg.tenant_id_ = ctx.get_session_info()->get_effective_tenant_id(); path_arg.tx_id_ = ctx.get_session_info()->get_tx_id(); - if (OB_FAIL(ObSQLUtils::make_whole_range(arena, - meta.ref_table_id_, - meta.table_rowkey_count_, - range))) { - LOG_WARN("failed to make whole range", K(ret)); - } else if (OB_ISNULL(path_arg.batch_.range_ = range)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to generate whole range", K(ret), K(range)); + if (OB_FAIL(construct_scan_range_batch(ctx.get_allocator(), chosen_scan_ranges, path_arg.batch_))) { + LOG_WARN("failed to construct scan range batch", K(ret)); } else if (OB_FAIL(task->arg_.index_params_.push_back(path_arg))) { LOG_WARN("failed to add primary key estimation arg", K(ret)); } diff --git a/src/sql/optimizer/ob_access_path_estimation.h b/src/sql/optimizer/ob_access_path_estimation.h index dcbf63f41..93d586b4b 100644 --- a/src/sql/optimizer/ob_access_path_estimation.h +++ b/src/sql/optimizer/ob_access_path_estimation.h @@ -110,6 +110,11 @@ public: uint64_t table_id, uint64_t ref_table_id, bool &can_use); + static int storage_estimate_range_rowcount(ObOptimizerContext &ctx, + const ObCandiTabletLocIArray &part_loc_infos, + bool estimate_whole_range, + const ObRangesArray *ranges, + ObTableMetaInfo &meta); private: static const int STORAGE_EST_SAMPLE_SEED = 1; static int inner_estimate_rowcount(ObOptimizerContext &ctx, @@ -230,7 +235,8 @@ private: const ObCandiTabletLocIArray &partitions, ObCandiTabletLocIArray &chosen_partitions); static int choose_storage_estimation_ranges(const int64_t range_limit, - AccessPath &ap, + const ObRangesArray &ranges, + bool is_geo_index, ObIArray &scan_ranges); static int process_dynamic_sampling_estimation(ObOptimizerContext &ctx, @@ -317,9 +323,6 @@ private: static int storage_estimate_full_table_rowcount(ObOptimizerContext &ctx, const ObCandiTabletLoc &part_loc_info, ObTableMetaInfo &meta); - static int storage_estimate_full_table_rowcount(ObOptimizerContext &ctx, - const ObCandiTabletLocIArray &part_loc_infos, - ObTableMetaInfo &meta); static int estimate_full_table_rowcount_by_meta_table(ObOptimizerContext &ctx, const ObIArray &all_tablet_ids, diff --git a/src/sql/optimizer/ob_del_upd_log_plan.cpp b/src/sql/optimizer/ob_del_upd_log_plan.cpp index 1e70bc286..41668422c 100644 --- a/src/sql/optimizer/ob_del_upd_log_plan.cpp +++ b/src/sql/optimizer/ob_del_upd_log_plan.cpp @@ -1793,8 +1793,8 @@ int ObDelUpdLogPlan::collect_related_local_index_ids(IndexDMLInfo &primary_dml_i const ObTableSchema *index_schema = nullptr; ObSchemaGetterGuard *schema_guard = nullptr; const ObDelUpdStmt *stmt = get_stmt(); - int64_t index_tid_array_size = OB_MAX_INDEX_PER_TABLE; - uint64_t index_tid_array[OB_MAX_INDEX_PER_TABLE]; + int64_t index_tid_array_size = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; + uint64_t index_tid_array[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; ObArray base_column_ids; const uint64_t tenant_id = optimizer_context_.get_session_info()->get_effective_tenant_id(); ObInsertLogPlan *insert_plan = dynamic_cast(this); @@ -1956,8 +1956,8 @@ int ObDelUpdLogPlan::prepare_table_dml_info_basic(const ObDmlTableInfo& table_in } } if (OB_SUCC(ret) && !has_tg) { - uint64_t index_tid[OB_MAX_INDEX_PER_TABLE]; - int64_t index_cnt = OB_MAX_INDEX_PER_TABLE; + uint64_t index_tid[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; + int64_t index_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; ObInsertLogPlan *insert_plan = dynamic_cast(this); if (NULL != insert_plan && get_optimizer_context().get_direct_load_optimizer_ctx().use_direct_load()) { index_cnt = 0; // no need building index diff --git a/src/sql/optimizer/ob_index_info_cache.cpp b/src/sql/optimizer/ob_index_info_cache.cpp index f59e7cf86..f338858b1 100644 --- a/src/sql/optimizer/ob_index_info_cache.cpp +++ b/src/sql/optimizer/ob_index_info_cache.cpp @@ -121,10 +121,10 @@ int ObIndexInfoCache::add_index_info_entry(IndexInfoEntry *entry) // update index info entry old_entry->~IndexInfoEntry(); index_entrys_[idx] = entry; - } else if (entry_count_ >= common::OB_MAX_INDEX_PER_TABLE + 1) { + } else if (entry_count_ >= common::OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid entry count", K(ret), K_(entry_count), - K(common::OB_MAX_INDEX_PER_TABLE)); + K(common::OB_MAX_AUX_TABLE_PER_MAIN_TABLE)); } else { index_entrys_[entry_count_] = entry; ++entry_count_; diff --git a/src/sql/optimizer/ob_index_info_cache.h b/src/sql/optimizer/ob_index_info_cache.h index d01c601e6..c3d8dc38a 100644 --- a/src/sql/optimizer/ob_index_info_cache.h +++ b/src/sql/optimizer/ob_index_info_cache.h @@ -255,7 +255,7 @@ private: uint64_t table_id_; uint64_t base_table_id_; int64_t entry_count_; - IndexInfoEntry *index_entrys_[common::OB_MAX_INDEX_PER_TABLE + 1]; //including table and index table + IndexInfoEntry *index_entrys_[common::OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1]; //including table and index table DISALLOW_COPY_AND_ASSIGN(ObIndexInfoCache); }; diff --git a/src/sql/optimizer/ob_join_order.cpp b/src/sql/optimizer/ob_join_order.cpp index 8804e09fa..de5b438f8 100755 --- a/src/sql/optimizer/ob_join_order.cpp +++ b/src/sql/optimizer/ob_join_order.cpp @@ -986,11 +986,13 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, ObExecContext *exec_ctx = NULL; ObQueryRangeProvider *query_range_provider = NULL; const share::schema::ObTableSchema *index_schema = NULL; + const share::schema::ObTableSchema *table_schema = NULL; ObQueryRangeArray &ranges = range_info.get_ranges(); ObQueryRangeArray &ss_ranges = range_info.get_ss_ranges(); ObIArray &range_columns = range_info.get_range_columns(); bool is_geo_index = false; bool is_multi_index = false; + bool is_fts_index = false; bool is_domain_index = false; ObWrapperAllocator wrap_allocator(*allocator_); ColumnIdInfoMapAllocer map_alloc(OB_MALLOC_NORMAL_BLOCK_SIZE, wrap_allocator); @@ -1004,7 +1006,10 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, } else if (OB_FAIL(schema_guard->get_table_schema(index_id, index_schema, ObSqlSchemaGuard::is_link_table(get_plan()->get_stmt(), table_id)))) { LOG_WARN("fail to get table schema", K(index_id), K(ret)); - } else if (OB_ISNULL(index_schema)) { + } else if (OB_FAIL(schema_guard->get_table_schema(base_table_id, table_schema, + ObSqlSchemaGuard::is_link_table(get_plan()->get_stmt(), table_id)))) { + LOG_WARN("fail to get table schema", K(index_id), K(ret)); + } else if (OB_ISNULL(index_schema) || OB_ISNULL(table_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(index_schema), K(ret)); } else if (OB_FAIL(get_plan()->get_index_column_items(opt_ctx->get_expr_factory(), @@ -1018,10 +1023,10 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, domain_columnInfo_map))) { LOG_WARN("failed to extract geometry schema info", K(ret), K(table_id), K(index_id)); } else if (FALSE_IT(is_multi_index = index_schema->is_multivalue_index())) { + } else if (FALSE_IT(is_fts_index = index_schema->is_fts_index_aux())) { } else { const ObSQLSessionInfo *session = opt_ctx->get_session_info(); const ObDataTypeCastParams dtc_params = ObBasicSessionInfo::create_dtc_params(session); - bool all_single_value_range = false; int64_t equal_prefix_count = 0; int64_t equal_prefix_null_count = 0; @@ -1029,7 +1034,7 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, bool contain_always_false = false; bool has_exec_param = false; int64_t out_index_prefix = -1; - bool is_domain_index = (is_geo_index || is_multi_index); + bool is_domain_index = (is_geo_index || is_multi_index || is_fts_index); common::ObSEArray agent_table_filter; bool is_oracle_inner_index_table = share::is_oracle_mapping_real_virtual_table(index_schema->get_table_id()); @@ -1060,6 +1065,15 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id, agent_table_filter : helper.filters_, query_range_provider))) { LOG_WARN("failed to extract query range", K(ret), K(index_id)); + } else if(is_fts_index && OB_FAIL(extract_fts_preliminary_query_range(range_columns, + is_oracle_inner_index_table + ? agent_table_filter + : helper.filters_, + table_schema, + index_schema, + helper, + query_range_provider))) { + LOG_WARN("failed to extract query range", K(ret), K(index_id)); } else if (OB_ISNULL(query_range_provider)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(query_range_provider), K(ret)); @@ -1733,7 +1747,9 @@ int ObJoinOrder::create_one_access_path(const uint64_t table_id, ap->contain_das_op_ = ap->use_das_; ap->is_ror_ = (ref_id == index_id) ? true : range_info.get_equal_prefix_count() >= range_info.get_index_column_count(); - if (OB_FAIL(init_sample_info_for_access_path(ap, table_id, table_item))) { + if (OB_FAIL(process_index_for_match_expr(table_id, ref_id, index_id, helper, *ap))) { + LOG_WARN("failed to process index for match expr", K(ret)); + } else if (OB_FAIL(init_sample_info_for_access_path(ap, table_id, table_item))) { LOG_WARN("failed to init sample info", K(ret)); } else if (OB_FAIL(add_access_filters(ap, ordering_info.get_index_keys(), @@ -1775,6 +1791,7 @@ int ObJoinOrder::create_one_access_path(const uint64_t table_id, if (OB_FAIL(fill_filters(ap->filter_, ap->get_query_range_provider(), ap->est_cost_info_, + ap->tr_idx_info_, is_nl_with_extended_range, ObSqlSchemaGuard::is_link_table(get_plan()->get_stmt(), table_id), OptSkipScanState::SS_DISABLE != use_skip_scan))) { @@ -2336,14 +2353,19 @@ int ObJoinOrder::check_and_extract_query_range(const uint64_t table_id, //do some quick check bool expr_match = false; //some condition on index contain_always_false = false; - bool is_multivlaue_idx = index_info_entry.is_multivalue_index(); - if (is_multivlaue_idx && + bool is_special_index = index_info_entry.is_index_geo() || + index_info_entry.is_multivalue_index() || + index_info_entry.is_fulltext_index(); + if (index_info_entry.is_multivalue_index() && OB_FAIL(check_exprs_overlap_multivalue_index(table_id, index_table_id, restrict_infos, index_keys, expr_match))) { LOG_WARN("get_range_columns failed", K(ret)); - } else if (!is_multivlaue_idx && !index_info_entry.is_index_geo() && OB_FAIL(check_exprs_overlap_index(restrict_infos, index_keys, expr_match))) { - LOG_WARN("check quals match index error", K(restrict_infos), K(index_keys)); - } else if (index_info_entry.is_index_geo() && OB_FAIL(check_exprs_overlap_gis_index(restrict_infos, index_keys, expr_match))) { + } else if (index_info_entry.is_index_geo() && + OB_FAIL(check_exprs_overlap_gis_index(restrict_infos, index_keys, expr_match))) { LOG_WARN("check quals match gis index error", K(restrict_infos), K(index_keys)); + } else if (index_info_entry.is_fulltext_index() && + OB_FALSE_IT(expr_match = index_info_entry.get_range_info().is_valid())) { + } else if (!is_special_index && OB_FAIL(check_exprs_overlap_index(restrict_infos, index_keys, expr_match))) { + LOG_WARN("check quals match index error", K(restrict_infos), K(index_keys)); } else if (expr_match) { prefix_range_ids.reset(); const QueryRangeInfo *query_range_info = NULL; @@ -2598,7 +2620,7 @@ int ObJoinOrder::fill_index_info_entry(const uint64_t table_id, entry->set_is_index_geo(is_index_geo); entry->set_is_index_back(is_index_back); entry->set_is_unique_index(is_unique_index); - entry->set_is_fulltext_index(index_schema->is_fts_index()); + entry->set_is_fulltext_index(index_schema->is_fts_index_aux()); entry->set_is_multivalue_index(index_schema->is_multivalue_index_aux()); entry->set_is_vector_index(index_schema->is_vec_index()); entry->get_ordering_info().set_scan_direction(direction); @@ -2715,6 +2737,10 @@ int ObJoinOrder::create_access_paths(const uint64_t table_id, } else if (OB_FAIL(get_generated_col_index_qual(table_id, helper.filters_, helper))) { LOG_WARN("get prefix index qual failed"); + } else if (OB_FAIL(init_basic_text_retrieval_info(table_id, + ref_table_id, + helper))) { + LOG_WARN("failed to init basic text retrieval info", K(ret)); } else if (OB_FAIL(check_can_use_index_merge(table_id, ref_table_id, helper, @@ -2734,6 +2760,7 @@ int ObJoinOrder::create_access_paths(const uint64_t table_id, } } else if (OB_FAIL(get_valid_index_ids(table_id, ref_table_id, + helper, candi_index_ids))) { LOG_WARN("failed to get valid index ids", K(ret)); } else if (OB_FAIL(fill_index_info_cache(table_id, ref_table_id, @@ -3210,6 +3237,7 @@ int ObJoinOrder::compute_table_rowcount_info() int ObJoinOrder::get_valid_index_ids(const uint64_t table_id, const uint64_t ref_table_id, + PathHelper &helper, ObIArray &valid_index_ids) { int ret = OB_SUCCESS; @@ -3217,10 +3245,9 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id, const TableItem *table_item = NULL; ObSqlSchemaGuard *schema_guard = NULL; ObSQLSessionInfo *session_info = NULL; - uint64_t tids[OB_MAX_INDEX_PER_TABLE + 1]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1; + uint64_t tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1]; + int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1; const LogTableHint *log_table_hint = NULL; - ObMatchFunRawExpr *match_expr = NULL; ObRawExpr *vector_expr = NULL; const ObSelectStmt *select_stmt = NULL; bool has_aggr = false; // defend aggr for ann search @@ -3238,20 +3265,6 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id, } else if (OB_ISNULL(table_item = stmt->get_table_item_by_id(table_id))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Table item should not be NULL", K(table_id), K(table_item), K(ret)); - } else if (OB_FAIL(stmt->get_match_expr_on_table(table_id, match_expr))) { - LOG_WARN("failed to check has fulltext search on table", K(ret)); - } else if (OB_NOT_NULL(match_expr)) { - // If there is a full-text search requirement on current base table, We can only choose the - // path that accesses the word-doc inverted index for now. - uint64_t inv_idx_tid = OB_INVALID_ID; - if (OB_FAIL(get_matched_inv_index_tid(match_expr, ref_table_id, inv_idx_tid))) { - LOG_WARN("failed to get matched inverted index table id", K(ret)); - } else if (inv_idx_tid == OB_INVALID_ID) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected table id", K(ret)); - } else if (OB_FAIL(valid_index_ids.push_back(inv_idx_tid))) { - LOG_WARN("failed to assign index ids", K(ret)); - } } else if (stmt->is_select_stmt() && FALSE_IT(select_stmt = static_cast(stmt))) { } else if (nullptr != select_stmt && FALSE_IT(has_aggr = select_stmt->get_aggr_item_size() > 0)) { } else if (stmt->has_vec_approx() @@ -3286,7 +3299,11 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id, } else if (FALSE_IT(log_table_hint = get_plan()->get_log_plan_hint().get_index_hint(table_id))) { } else if (NULL != log_table_hint && log_table_hint->is_use_index_hint()) { // for use index hint, get index ids from hint. - if (OB_FAIL(valid_index_ids.assign(log_table_hint->index_list_))) { + ObSEArray valid_hint_index_list; + const bool is_link = ObSqlSchemaGuard::is_link_table(stmt, table_id); + if (OB_FAIL(get_valid_hint_index_list(log_table_hint->index_list_, is_link, schema_guard, helper, valid_hint_index_list))) { + LOG_WARN("failed to get valid hint index list", K(ret)); + } else if (OB_FAIL(valid_index_ids.assign(valid_hint_index_list))) { LOG_WARN("failed to assign index ids", K(ret)); } } else if (OB_FAIL(schema_guard->get_can_read_index_array(ref_table_id, @@ -3298,7 +3315,9 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id, false /*spatial index*/, false /*vector index*/))) { LOG_WARN("failed to get can read index", K(ref_table_id), K(ret)); - } else if (index_count > OB_MAX_INDEX_PER_TABLE + 1) { + } else if (OB_FAIL(add_valid_fts_index_ids(helper, tids, index_count))) { + LOG_WARN("failed to add valid fts index ids", K(ret)); + } else if (index_count > OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Invalid index count", K(ref_table_id), K(index_count), K(ret)); } else if (NULL != log_table_hint && @@ -3582,8 +3601,8 @@ int ObJoinOrder::fill_opt_info_index_name(const uint64_t table_id, { int ret = OB_SUCCESS; const ObTableSchema *table_schema = NULL; - uint64_t index_ids[OB_MAX_INDEX_PER_TABLE + 3]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE + 3; + uint64_t index_ids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 3]; + int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 3; ObSqlSchemaGuard *schema_guard = NULL; const ObDMLStmt *stmt = NULL; if (OB_ISNULL(table_opt_info) || OB_ISNULL(get_plan()) @@ -3602,9 +3621,9 @@ int ObJoinOrder::fill_opt_info_index_name(const uint64_t table_id, index_count, false, true /*global index*/, - false /*domain index*/))) { + true /*domain index*/))) { LOG_WARN("failed to get can read index", K(base_table_id), K(ret)); - } else if (index_count > OB_MAX_INDEX_PER_TABLE + 1) { + } else if (index_count > OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Invalid index count", K(base_table_id), K(index_count), K(ret)); } else if (OB_FAIL(table_opt_info->available_index_id_.assign(available_index_id))) { @@ -3620,6 +3639,8 @@ int ObJoinOrder::fill_opt_info_index_name(const uint64_t table_id, } else if (OB_ISNULL(table_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("index schema should not be null", K(ret), K(index_id)); + } else if (table_schema->is_built_in_fts_index()) { + // do nothing } else if (base_table_id == index_id) { name = table_schema->get_table_name_str(); } else if (OB_FAIL(table_schema->get_index_name(name))) { @@ -3627,12 +3648,16 @@ int ObJoinOrder::fill_opt_info_index_name(const uint64_t table_id, } else { /*do nothing*/ } if (OB_SUCC(ret)) { - if (OB_FAIL(table_opt_info->available_index_name_.push_back(name))) { + if (name.empty()) { + // do nothing + } else if (OB_FAIL(table_opt_info->available_index_name_.push_back(name))) { LOG_WARN("failed to push back index name", K(name), K(ret)); } else { /* do nothing */ } } if (OB_FAIL(ret)) { + } else if (name.empty()) { + // do nothing } else if (ObOptimizerUtil::find_item(available_index_id, index_id)) { //do nothing } else if (ObOptimizerUtil::find_item(unstable_index_id, index_id)) { @@ -4387,6 +4412,95 @@ int ObJoinOrder::extract_multivalue_preliminary_query_range(const ObIArray &range_columns, + const ObIArray &predicates, + const ObTableSchema *table_schema, + const ObTableSchema *index_schema, + PathHelper &helper, + ObQueryRangeProvider *&query_range) +{ + int ret = OB_SUCCESS; + bool direct_query_on_index = false; + const ParamStore *params = NULL; + if (OB_ISNULL(OPT_CTX.get_exec_ctx()) || OB_ISNULL(allocator_) || OB_ISNULL(table_schema) || + OB_ISNULL(index_schema) || OB_ISNULL(OPT_CTX.get_exec_ctx()->get_expr_factory()) || + OB_ISNULL(params = OPT_CTX.get_params())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get unexpected null", K(OPT_CTX.get_exec_ctx()), K(allocator_), K(ret)); + } else if (OB_FALSE_IT(direct_query_on_index = index_schema->get_table_id() == table_schema->get_table_id())) { + } else if (!direct_query_on_index) { + ObArray match_exprs; + ObArray match_filters; + const MatchExprInfo *match_expr_info = NULL; + if (OB_FAIL(extract_scan_match_expr_candidates(predicates, match_exprs, match_filters))) { + LOG_WARN("failed to extract match expr candidates", K(ret)); + } else if (OB_FAIL(find_least_selective_expr_on_index(match_exprs, + helper.match_expr_infos_, + index_schema->get_table_id(), + match_expr_info))) { + LOG_WARN("failed to find most selective expr on index", K(ret)); + } else if (OB_ISNULL(match_expr_info) || OB_ISNULL(match_expr_info->query_range_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else { + query_range = match_expr_info->query_range_; + } + } else if (OPT_CTX.enable_new_query_range()) { + void *ptr = allocator_->alloc(sizeof(ObPreRangeGraph)); + ObPreRangeGraph *pre_range_graph = NULL; + if (OB_ISNULL(ptr)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for pre range graph", K(ret)); + } else { + pre_range_graph = new(ptr)ObPreRangeGraph(*allocator_); + if (OB_FAIL(pre_range_graph->preliminary_extract_query_range(range_columns, predicates, + OPT_CTX.get_exec_ctx(), + nullptr, + params))) { + LOG_WARN("failed to preliminary extract query range", K(ret)); + } + } + if (OB_SUCC(ret)) { + query_range = pre_range_graph; + } else { + if (NULL != pre_range_graph) { + pre_range_graph->~ObPreRangeGraph(); + pre_range_graph = NULL; + } + } + } else { + void *tmp_ptr = allocator_->alloc(sizeof(ObQueryRange)); + ObQueryRange *tmp_qr = NULL; + if (OB_ISNULL(tmp_ptr)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for query range", K(ret)); + } else { + tmp_qr = new(tmp_ptr)ObQueryRange(*allocator_); + const ObDataTypeCastParams dtc_params = + ObBasicSessionInfo::create_dtc_params(OPT_CTX.get_exec_ctx()->get_my_session()); + if (OB_FAIL(tmp_qr->preliminary_extract_query_range(range_columns, predicates, + dtc_params, OPT_CTX.get_exec_ctx(), + OPT_CTX.get_query_ctx(), + NULL, params))) { + LOG_WARN("failed to preliminary extract query range", K(ret)); + } + } + if (OB_SUCC(ret)) { + query_range = tmp_qr; + } else { + if (NULL != tmp_qr) { + tmp_qr->~ObQueryRange(); + tmp_qr = NULL; + } + } + } + return ret; +} + int ObJoinOrder::get_candi_range_expr(const ObIArray &range_columns, const ObIArray &predicates, ObIArray &range_predicates) @@ -6632,6 +6746,8 @@ int AccessPath::compute_access_path_batch_rescan() LOG_WARN("failed to extract ir fitler from filters", K(ret), K(filter_)); } else if (is_virtual_table(ref_table_id_) || est_cost_info_.index_meta_info_.is_geo_index_ + || tr_idx_info_.has_ir_scan() + || tr_idx_info_.has_func_lookup() || for_update_ || !subquery_exprs_.empty() || table_item->is_link_table() @@ -13485,6 +13601,7 @@ int ObJoinOrder::get_simple_index_info(const uint64_t table_id, int ObJoinOrder::fill_filters(const ObIArray &all_filters, const ObQueryRangeProvider *query_range_provider, ObCostTableScanInfo &est_cost_info, + const TRIndexAccessInfo &tr_index_info, bool &is_nl_with_extended_range, bool is_link, bool use_skip_scan) @@ -13536,6 +13653,9 @@ int ObJoinOrder::fill_filters(const ObIArray &all_filters, } else if (use_skip_scan && OB_FAIL(est_cost_info.ss_postfix_range_filters_.assign(query_range_provider->get_ss_range_exprs()))) { LOG_WARN("failed to assign exprs", K(ret)); + } else if (est_cost_info.index_meta_info_.is_fulltext_index_ && + OB_FAIL(append_array_no_dup(est_cost_info.prefix_filters_, tr_index_info.index_scan_filters_))) { + LOG_WARN("failed to assign exprs", K(ret)); } } @@ -15249,8 +15369,8 @@ int ObJoinOrder::compute_fd_item_set_for_table_scan(const uint64_t table_id, int ret = OB_SUCCESS; ObSqlSchemaGuard *schema_guard = NULL; const ObDMLStmt *stmt = NULL; - uint64_t index_tids[OB_MAX_INDEX_PER_TABLE]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE; + uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; + int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; if (OB_ISNULL(get_plan()) || OB_ISNULL(stmt = get_plan()->get_stmt()) || OB_ISNULL(schema_guard = get_plan()->get_optimizer_context().get_sql_schema_guard())) { ret = OB_ERR_UNEXPECTED; @@ -17633,7 +17753,7 @@ int ObJoinOrder::get_matched_inv_index_tid(ObMatchFunRawExpr *match_expr, inv_idx_schema, found_matched_index))) { LOG_WARN("failed to check fulltext index match column", K(ret)); - } else if (found_matched_index) { + } else if (found_matched_index && inv_idx_schema->can_read_index() && inv_idx_schema->is_index_visible()) { inv_idx_tid = index_info.table_id_; } } @@ -17641,6 +17761,564 @@ int ObJoinOrder::get_matched_inv_index_tid(ObMatchFunRawExpr *match_expr, return ret; } +int ObJoinOrder::extract_scan_match_expr_candidates(const ObIArray &filters, + ObIArray &scan_match_exprs, + ObIArray &scan_match_filters) +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < get_restrict_infos().count(); ++i) { + ObRawExpr *filter = get_restrict_infos().at(i); + if (OB_ISNULL(filter)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to filter expr", K(ret), K(i), KPC(filter)); + } else if (filter->get_expr_type() == T_OP_BOOL && filter->has_flag(CNT_MATCH_EXPR)) { + ObRawExpr *param_expr = filter->get_param_expr(0); + if (OB_ISNULL(param_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null param expr for bool op", K(ret)); + } else if (param_expr->has_flag(IS_MATCH_EXPR)) { + if (OB_FAIL(scan_match_exprs.push_back(static_cast(param_expr)))) { + LOG_WARN("failed to append match expr to array", K(ret)); + } else if (OB_FAIL(scan_match_filters.push_back(filter))) { + LOG_WARN("failed to append match filter to array", K(ret)); + } + } + } + } + return ret; +} + +// classify index scan and functional lookup match exprs +int ObJoinOrder::process_index_for_match_expr(const uint64_t table_id, + const uint64_t ref_table_id, + const uint64_t index_id, + PathHelper &helper, + AccessPath &access_path) +{ + int ret = OB_SUCCESS; + ObSEArray all_match_exprs; + ObMatchFunRawExpr *match_expr_for_index_scan = nullptr; + ObSqlSchemaGuard *schema_guard = nullptr; + const ObTableSchema *index_schema = nullptr; + if (OB_ISNULL(schema_guard = OPT_CTX.get_sql_schema_guard()) || OB_ISNULL(get_plan()) || + OB_ISNULL(get_plan()->get_stmt())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr", K(ret)); + } else if (OB_FAIL(get_plan()->get_stmt()->get_match_expr_on_table(table_id, all_match_exprs))) { + LOG_WARN("failed to get match exprs by table id", K(ret), K(table_id)); + } else if (all_match_exprs.empty()) { + // do nothing + } else if (OB_FAIL(schema_guard->get_table_schema(index_id, index_schema))) { + LOG_WARN("failed to get index table schema", K(ret), K(index_id)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to index schema", K(ret)); + } else if (index_schema->is_fts_index()) { + ObSEArray scan_match_exprs; + ObSEArray scan_match_filters; + const MatchExprInfo *match_expr_info = NULL; + int64_t idx = -1; + if (OB_FAIL(extract_scan_match_expr_candidates(get_restrict_infos(), + scan_match_exprs, + scan_match_filters))) { + LOG_WARN("failed to extract scan match expr", K(ret)); + } else if (OB_FAIL(find_least_selective_expr_on_index(scan_match_exprs, + helper.match_expr_infos_, + index_id, + match_expr_info))) { + LOG_WARN("failed to find most selective expr on index", K(ret)); + } else if (OB_ISNULL(match_expr_info)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (!ObOptimizerUtil::find_item(scan_match_exprs, match_expr_info->match_expr_, &idx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected match expr", K(ret)); + } else if (OB_UNLIKELY(idx < 0 || idx >= scan_match_filters.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected idx", K(ret), K(idx), K(scan_match_filters.count())); + } else if (OB_FAIL(access_path.tr_idx_info_.index_scan_exprs_.push_back(match_expr_info->match_expr_))) { + LOG_WARN("failed to append match expr", K(ret)); + } else if (OB_FAIL(access_path.tr_idx_info_.index_scan_filters_.push_back(scan_match_filters.at(idx)))) { + LOG_WARN("failed to append scan match filter expr", K(ret)); + } else if (OB_FAIL(access_path.tr_idx_info_.index_scan_index_ids_.push_back(match_expr_info->inv_idx_id_))) { + LOG_WARN("failed to append inverted index table id", K(ret)); + } else { + match_expr_for_index_scan = match_expr_info->match_expr_; + } + } + + for (int64_t i = 0; OB_SUCC(ret) && i < all_match_exprs.count(); ++i) { + ObMatchFunRawExpr *curr_expr = static_cast(all_match_exprs.at(i)); + const MatchExprInfo *match_expr_info = NULL; + if (OB_ISNULL(curr_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null "); + } else if (curr_expr == match_expr_for_index_scan) { + // skip + } else if (OB_FAIL(find_match_expr_info(helper.match_expr_infos_, curr_expr, match_expr_info))) { + LOG_WARN("failed to find match expr info", K(ret), KPC(curr_expr)); + } else if (OB_ISNULL(match_expr_info)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null match expr info", K(ret)); + } else if (OB_FAIL(access_path.tr_idx_info_.func_lookup_exprs_.push_back(curr_expr))) { + LOG_WARN("failed to append func lookup exprs", K(ret), KPC(curr_expr)); + } else if (OB_FAIL(access_path.tr_idx_info_.func_lookup_index_ids_.push_back(match_expr_info->inv_idx_id_))) { + LOG_WARN("failed to append func lookup index id", K(ret)); + } + } + return ret; +} + +int ObJoinOrder::init_basic_text_retrieval_info(uint64_t table_id, + uint64_t ref_table_id, + PathHelper &helper) +{ + int ret = OB_SUCCESS; + helper.match_expr_infos_.reuse(); + ObSEArray match_exprs; + ObSqlSchemaGuard *schema_guard = NULL; + ObSEArray query_tokens; + if (OB_ISNULL(get_plan()) || OB_ISNULL(get_plan()->get_stmt()) || + OB_ISNULL(schema_guard = OPT_CTX.get_sql_schema_guard())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null plan or stmt", K(ret), KP(get_plan()), KP(get_plan()->get_stmt())); + } else if (OB_FAIL(get_plan()->get_stmt()->get_match_expr_on_table(table_id, match_exprs))) { + LOG_WARN("failed to get match exprs", K(ret), K(table_id)); + } else { + // generate selectivity info for each match against expr + for (int64_t i = 0; OB_SUCC(ret) && i < match_exprs.count(); ++i) { + ObMatchFunRawExpr *match_expr = NULL; + uint64_t index_id = OB_INVALID_ID; + const ObTableSchema *index_schema = NULL; + MatchExprInfo match_expr_info; + ObSEArray range_columns; + if (OB_ISNULL(match_exprs.at(i)) || OB_UNLIKELY(!match_exprs.at(i)->is_match_against_expr())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null match expr", K(ret)); + } else if (OB_FALSE_IT(match_expr = static_cast(match_exprs.at(i)))) { + } else if (OB_FAIL(get_matched_inv_index_tid(match_expr, ref_table_id, index_id))) { + LOG_WARN("failed to get matched inverted index table id", K(ret), KPC(match_expr)); + } else if (OB_FAIL(schema_guard->get_table_schema(index_id, index_schema))) { + LOG_WARN("failed to get index schema", K(ret), K(index_id)); + } else if (OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null index schema", K(ret), K(index_id)); + } else if (OB_FAIL(get_plan()->get_index_column_items(OPT_CTX.get_expr_factory(), + table_id, + *index_schema, + range_columns))) { + LOG_WARN("failed to generate rowkey column items", K(ret)); + } else if (OB_FAIL(get_query_tokens(match_expr, index_schema, query_tokens))) { + LOG_WARN("failed to get query tokens", K(ret)); + } else if (OB_FAIL(get_range_of_query_tokens(query_tokens, + *index_schema, + range_columns, + match_expr_info.query_range_))) { + LOG_WARN("failed to get range of query tokens", K(ret)); + } else if (OB_FAIL(estimate_fts_index_scan(table_id, + ref_table_id, + index_id, + index_schema, + match_expr_info.query_range_, + match_expr_info.query_range_row_count_, + match_expr_info.selectivity_))) { + LOG_WARN("failed to estimate fts index scan", K(ret)); + } else if (OB_FALSE_IT(match_expr_info.match_expr_ = match_expr)) { + } else if (OB_FALSE_IT(match_expr_info.inv_idx_id_ = index_id)) { + } else if (OB_FAIL(helper.match_expr_infos_.push_back(match_expr_info))) { + LOG_WARN("failed to push back match expr info", K(ret)); + // add selectivity infos of match against exprs to LogPlan + } else if (OB_FAIL(get_plan()->get_predicate_selectivities(). + push_back(ObExprSelPair(match_expr, match_expr_info.selectivity_)))) { + LOG_WARN("failed to push back predicate selectivities", K(ret)); + } + } + LOG_TRACE("OPT: selectivity infos of match exprs", K(helper.match_expr_infos_)); + } + return ret; +} + +int ObJoinOrder::get_query_tokens(ObMatchFunRawExpr *match_expr, + const ObTableSchema *index_schema, + ObIArray &query_tokens) +{ + int ret = OB_SUCCESS; + ObObj result; + bool got_result = false; + if (OB_ISNULL(allocator_) || OB_ISNULL(index_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret), KP(allocator_), KP(index_schema)); + } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(OPT_CTX.get_exec_ctx(), + match_expr->get_search_key(), + result, + got_result, + *allocator_))) { + LOG_WARN("fail to calc prefix pattern expr", K(ret)); + } else if (!got_result || result.is_null() || (is_oracle_mode() && result.is_null_oracle())) { + // do nothing + } else { + const ObString &search_text_string = result.get_string(); + const ObString &parser_name = index_schema->get_parser_name_str(); + const ObCollationType &cs_type = match_expr->get_search_key()->get_collation_type(); + storage::ObFTParseHelper tokenize_helper; + common::ObSEArray tokens; + hash::ObHashMap token_map; + int64_t doc_length = 0; + const int64_t ft_word_bkt_cnt = MAX(search_text_string.length() / 10, 2); + if (search_text_string.length() == 0) { + // do nothing + } else if (OB_FAIL(tokenize_helper.init(allocator_, parser_name))) { + LOG_WARN("failed to init tokenize helper", K(ret)); + } else if (OB_FAIL(token_map.create(ft_word_bkt_cnt, common::ObMemAttr(MTL_ID(), "FTWordMap")))) { + LOG_WARN("failed to create token map", K(ret)); + } else if (OB_FAIL(tokenize_helper.segment( + cs_type, search_text_string.ptr(), search_text_string.length(), doc_length, token_map))) { + LOG_WARN("failed to segment"); + } else { + for (hash::ObHashMap::const_iterator iter = token_map.begin(); + OB_SUCC(ret) && iter != token_map.end(); + ++iter) { + const ObFTWord &token = iter->first; + ObString token_string; + ObConstRawExpr *token_expr = NULL; + if (OB_FAIL(ob_write_string(*allocator_, token.get_word(), token_string))) { + LOG_WARN("failed to deep copy query token", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::build_const_string_expr(*OPT_CTX.get_exec_ctx()->get_expr_factory(), + ObVarcharType, + token_string, + cs_type, + token_expr))) { + LOG_WARN("failed to build const string expr", K(ret)); + } else if (OB_FAIL(query_tokens.push_back(token_expr))) { + LOG_WARN("failed to append query token", K(ret)); + } + } + } + } + return ret; +} + +int ObJoinOrder::get_range_of_query_tokens(ObIArray &query_tokens, + const ObTableSchema &index_schema, + ObIArray &range_columns, + ObQueryRangeProvider *&query_range) +{ + // jinmao TODO: 改成直接构造 query range,不要生成 IN 表达式间接去抽 + int ret = OB_SUCCESS; + ObColumnRefRawExpr *word_col = NULL; + ObOpRawExpr *in_expr = NULL; + ObOpRawExpr *in_list_expr = NULL; + ObSEArray tmp_range_exprs; + const ParamStore *params = NULL; + // find word segment column on fts index + for (int64_t i = 0; OB_SUCC(ret) && OB_ISNULL(word_col) && i < range_columns.count(); i++) { + const ObColumnSchemaV2 *col_schema = index_schema.get_column_schema(range_columns.at(i).column_id_); + if (OB_ISNULL(col_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(col_schema), K(ret)); + } else if (col_schema->is_word_segment_column()) { + word_col = range_columns.at(i).expr_; + } + } + + // construct in expr to integrate all tokens + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(word_col)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get word segment column", K(ret)); + } else if (!query_tokens.empty()) { + if (OB_FAIL(OPT_CTX.get_exec_ctx()->get_expr_factory()->create_raw_expr(T_OP_ROW, in_list_expr))) { + LOG_WARN("create to_type expr failed", K(ret)); + } else if (OB_FAIL(OPT_CTX.get_exec_ctx()->get_expr_factory()->create_raw_expr(T_OP_IN, in_expr))) { + LOG_WARN("create to_type expr failed", K(ret)); + } else if (OB_ISNULL(in_list_expr) || OB_ISNULL(in_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(in_list_expr), K(in_expr), K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < query_tokens.count(); i++) { + if (OB_FAIL(in_list_expr->add_param_expr(query_tokens.at(i)))) { + LOG_WARN("failed to add param expr", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(in_expr->set_param_exprs(word_col, in_list_expr))) { + LOG_WARN("failed to set param exprs", K(ret)); + } else if (OB_FAIL(in_expr->formalize(OPT_CTX.get_exec_ctx()->get_my_session()))) { + LOG_WARN("failed to formalize expr", K(ret)); + } else if (OB_FAIL(tmp_range_exprs.push_back(in_expr))) { + LOG_WARN("failed to push back range expr", K(ret)); + } + } + } else { + // build an always false expr for empty query tokens + ObRawExpr *eq_expr = NULL; + ObConstRawExpr *empty_string_expr = NULL; + if (OB_FAIL(ObRawExprUtils::build_const_string_expr(*OPT_CTX.get_exec_ctx()->get_expr_factory(), + ObVarcharType, + ObString(), + word_col->get_collation_type(), + empty_string_expr))) { + LOG_WARN("failed to build const int expr", K(ret)); + } else if (OB_FAIL(ObRawExprUtils::build_common_binary_op_expr(*OPT_CTX.get_exec_ctx()->get_expr_factory(), + T_OP_EQ, + word_col, + empty_string_expr, + eq_expr))) { + LOG_WARN("failed to build common binary op expr", K(ret)); + } else if (OB_FAIL(eq_expr->formalize(OPT_CTX.get_exec_ctx()->get_my_session()))) { + LOG_WARN("failed to formalize expr", K(ret)); + } else if (OB_FAIL(tmp_range_exprs.push_back(eq_expr))) { + LOG_WARN("failed to push back range expr", K(ret)); + } + } + + // generate query range + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(params = OPT_CTX.get_params())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(params), K(ret)); + } else if (OPT_CTX.enable_new_query_range()) { + void *ptr = allocator_->alloc(sizeof(ObPreRangeGraph)); + ObPreRangeGraph *pre_range_graph = NULL; + if (OB_ISNULL(ptr)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for pre range graph", K(ret)); + } else { + pre_range_graph = new(ptr)ObPreRangeGraph(*allocator_); + if (OB_FAIL(pre_range_graph->preliminary_extract_query_range(range_columns, tmp_range_exprs, + OPT_CTX.get_exec_ctx(), + nullptr, + params))) { + LOG_WARN("failed to preliminary extract query range", K(ret)); + } + } + if (OB_SUCC(ret)) { + query_range = pre_range_graph; + // reset range exprs which should be invisible after query range extraction + pre_range_graph->reset_range_exprs(); + } else { + if (NULL != pre_range_graph) { + pre_range_graph->~ObPreRangeGraph(); + pre_range_graph = NULL; + } + } + } else { + void *tmp_ptr = allocator_->alloc(sizeof(ObQueryRange)); + ObQueryRange *tmp_qr = NULL; + if (OB_ISNULL(tmp_ptr)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for query range", K(ret)); + } else { + tmp_qr = new(tmp_ptr)ObQueryRange(*allocator_); + const ObDataTypeCastParams dtc_params = + ObBasicSessionInfo::create_dtc_params(OPT_CTX.get_exec_ctx()->get_my_session()); + if (OB_FAIL(tmp_qr->preliminary_extract_query_range(range_columns, tmp_range_exprs, + dtc_params, OPT_CTX.get_exec_ctx(), + OPT_CTX.get_query_ctx(), + NULL, params))) { + LOG_WARN("failed to preliminary extract query range", K(ret)); + } + } + if (OB_SUCC(ret)) { + query_range = tmp_qr; + // reset range exprs which should be invisible after query range extraction + tmp_qr->reset_range_exprs(); + } else { + if (NULL != tmp_qr) { + tmp_qr->~ObQueryRange(); + tmp_qr = NULL; + } + } + } + return ret; +} + +int ObJoinOrder::estimate_fts_index_scan(uint64_t table_id, + uint64_t ref_table_id, + uint64_t index_id, + const ObTableSchema *index_schema, + ObQueryRangeProvider *query_range, + int64_t &query_range_row_count, + double &selectivity) +{ + int ret = OB_SUCCESS; + ObTablePartitionInfo *table_partition_info = NULL; + ObTableMetaInfo table_meta_range(index_id); + const ObSQLSessionInfo *session = OPT_CTX.get_session_info(); + const ObDataTypeCastParams dtc_params = ObBasicSessionInfo::create_dtc_params(session); + ObQueryRangeArray range_array; + ObRangesArray ranges; + bool dummy_all_single_value_ranges = true; + if (OB_ISNULL(index_schema) || OB_ISNULL(query_range) || OB_ISNULL(OPT_CTX.get_exec_ctx()) || + OB_UNLIKELY(index_schema->is_global_index_table())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(index_schema), K(query_range), K(ret)); + } else if (OB_FAIL(compute_table_location(table_id, index_id, false, table_partition_info))) { + LOG_WARN("failed to compute table location", K(ret)); + } else if (OB_ISNULL(table_partition_info)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(table_partition_info), K(ret)); + } else if (OB_FAIL(query_range->get_tablet_ranges(OPT_CTX.get_allocator(), + *OPT_CTX.get_exec_ctx(), + range_array, + dummy_all_single_value_ranges, + dtc_params))) { + LOG_WARN("failed to get tablet ranges", K(ret)); + } else { + for(int64_t i = 0; OB_SUCC(ret) && i < range_array.count(); ++i) { + if (OB_ISNULL(range_array.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("range is null", K(ret)); + } else if (OB_FAIL(ranges.push_back(*range_array.at(i)))) { + LOG_WARN("failed to add range", K(ret)); + } + } + // init table meta info + table_meta_range.ref_table_id_ = index_id; + table_meta_range.table_rowkey_count_ = index_schema->get_rowkey_info().get_size(); + table_meta_range.table_column_count_ = index_schema->get_column_count(); + table_meta_range.micro_block_size_ = index_schema->get_block_size(); + table_meta_range.part_count_ = + table_partition_info->get_phy_tbl_location_info().get_phy_part_loc_info_list().count(); + table_meta_range.schema_version_ = index_schema->get_schema_version(); + table_meta_range.is_broadcast_table_ = index_schema->is_broadcast_table(); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObAccessPathEstimation::storage_estimate_range_rowcount(OPT_CTX, + table_partition_info->get_phy_tbl_location_info().get_phy_part_loc_info_list(), + false, + &ranges, + table_meta_range))) { + LOG_WARN("failed to estimate table range rowcount", K(ret)); + } else { + query_range_row_count = table_meta_range.table_row_count_; + selectivity = get_table_meta().table_row_count_ == 0 ? 0 : + table_meta_range.table_row_count_ * 1.0 / get_table_meta().table_row_count_; + // refine selectivity + selectivity = std::min(selectivity, 1.0); + } + return ret; +} + +int ObJoinOrder::add_valid_fts_index_ids(PathHelper &helper, uint64_t *index_tid_array, int64_t &size) +{ + int ret = OB_SUCCESS; + ObSEArray scan_match_exprs; + ObSEArray scan_match_filters; + ObSEArray fts_index_ids; + if (OB_FAIL(extract_scan_match_expr_candidates(get_restrict_infos(), + scan_match_exprs, + scan_match_filters))) { + LOG_WARN("failed to extract scan match expr candidates", K(ret)); + } else if (!scan_match_exprs.empty()) { + for (int64_t i = 0; OB_SUCC(ret) && i < scan_match_exprs.count(); ++i) { + const MatchExprInfo *match_expr_info = NULL; + if (OB_FAIL(find_match_expr_info(helper.match_expr_infos_, scan_match_exprs.at(i), match_expr_info))) { + LOG_WARN("failed to find match expr info", K(ret)); + } else if (OB_ISNULL(match_expr_info)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(fts_index_ids, match_expr_info->inv_idx_id_))) { + LOG_WARN("failed to add var to array no dup", K(ret)); + } + } + if (OB_SUCC(ret)) { + for (int64_t i = 0; i < fts_index_ids.count() && size < OB_MAX_INDEX_PER_TABLE + 1; ++i) { + index_tid_array[size++] = fts_index_ids.at(i); + } + } + } + return ret; +} + +int ObJoinOrder::find_match_expr_info(const ObIArray &match_expr_infos, + ObRawExpr *match_expr, + const MatchExprInfo *&match_expr_info) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(match_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (OB_UNLIKELY(!match_expr->is_match_against_expr())) { + // do nothing + } else { + for (int64_t i = 0; OB_SUCC(ret) && OB_ISNULL(match_expr_info) && i < match_expr_infos.count(); ++i) { + if (match_expr_infos.at(i).match_expr_ == match_expr) { + match_expr_info = &match_expr_infos.at(i); + } + } + } + return ret; +} + +int ObJoinOrder::find_least_selective_expr_on_index(const ObIArray &match_exprs, + const ObIArray &match_expr_infos, + uint64_t index_id, + const MatchExprInfo *&match_expr_info) +{ + int ret = OB_SUCCESS; + double min_selectivity = 1.1; + for (int64_t i = 0; OB_SUCC(ret) && i < match_exprs.count(); ++i) { + const MatchExprInfo *tmp_match_expr_info = NULL; + if (OB_FAIL(find_match_expr_info(match_expr_infos, match_exprs.at(i), tmp_match_expr_info))) { + LOG_WARN("failed to find match expr info", K(ret)); + } else if (OB_ISNULL(tmp_match_expr_info)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null", K(ret)); + } else if (tmp_match_expr_info->inv_idx_id_ == index_id && + tmp_match_expr_info->selectivity_ < min_selectivity) { + min_selectivity = tmp_match_expr_info->selectivity_; + match_expr_info = tmp_match_expr_info; + } + } + return ret; +} + +int ObJoinOrder::get_valid_hint_index_list(const ObIArray &hint_index_ids, + const bool is_link_table, + ObSqlSchemaGuard *schema_guard, + PathHelper &helper, + ObIArray &valid_hint_index_ids) const +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(schema_guard)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(schema_guard)); + } + + for (int64_t i = 0; OB_SUCC(ret) && i < hint_index_ids.count(); ++i) { + const ObTableSchema *index_hint_table_schema = nullptr; + const uint64_t tid = hint_index_ids.at(i); + if (OB_FAIL(schema_guard->get_table_schema(tid, index_hint_table_schema, is_link_table))) { + LOG_WARN("failed to get table schema", K(ret), K(tid)); + } else if (OB_ISNULL(index_hint_table_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to index hint table schema", K(ret), K(tid)); + } else if (index_hint_table_schema->is_fts_index() + && !has_match_expr_on_index(tid, helper.match_expr_infos_)) { + // skip index hint on fulltext index without match expr on fulltext index + } else if (OB_FAIL(valid_hint_index_ids.push_back(tid))) { + LOG_WARN("failed to append valid hint index list", K(ret), K(tid)); + } + } + + return ret; +} + +bool ObJoinOrder::has_match_expr_on_index(const uint64_t index_id, + const ObIArray &match_expr_infos) const +{ + bool bret = false; + for (int64_t i = 0; !bret && i < match_expr_infos.count(); ++i) { + const MatchExprInfo &match_info = match_expr_infos.at(i); + if (match_info.inv_idx_id_ == index_id) { + bret = true; + } + } + return bret; +} + int ObJoinOrder::get_better_index_prefix(const ObIArray &range_exprs, const ObIArray &range_expr_max_offsets, const ObIArray &total_range_counts, diff --git a/src/sql/optimizer/ob_join_order.h b/src/sql/optimizer/ob_join_order.h index 41ecd73fd..824c3b82d 100755 --- a/src/sql/optimizer/ob_join_order.h +++ b/src/sql/optimizer/ob_join_order.h @@ -277,6 +277,37 @@ struct EstimateCostInfo { bool override_; }; +struct TRIndexAccessInfo +{ + TRIndexAccessInfo() + : index_scan_exprs_(), + index_scan_filters_(), + index_scan_index_ids_(), + func_lookup_exprs_(), + func_lookup_index_ids_() {} + + void reset() + { + index_scan_exprs_.reset(); + index_scan_filters_.reset(); + index_scan_index_ids_.reset(); + func_lookup_exprs_.reset(); + func_lookup_index_ids_.reset(); + } + + bool has_ir_scan() const { return index_scan_exprs_.count() != 0; } + bool has_func_lookup() const { return func_lookup_exprs_.count() != 0; } + + TO_STRING_KV(K_(index_scan_exprs), K_(index_scan_filters), K_(index_scan_index_ids), + K_(func_lookup_exprs), K_(func_lookup_index_ids)); + + common::ObSEArray index_scan_exprs_; + common::ObSEArray index_scan_filters_; + common::ObSEArray index_scan_index_ids_; + common::ObSEArray func_lookup_exprs_; + common::ObSEArray func_lookup_index_ids_; +}; + class Path { public: @@ -549,6 +580,7 @@ struct EstimateCostInfo { est_records_(), range_prefix_count_(0), table_opt_info_(), + tr_idx_info_(), for_update_(false), use_skip_scan_(OptSkipScanState::SS_UNSET), use_column_store_(false), @@ -672,6 +704,7 @@ struct EstimateCostInfo { K_(est_cost_info), K_(sample_info), K_(range_prefix_count), + K_(tr_idx_info), K_(for_update), K_(use_das), K_(use_skip_scan), @@ -701,6 +734,7 @@ struct EstimateCostInfo { SampleInfo sample_info_; // sample scan info int64_t range_prefix_count_; // prefix count BaseTableOptInfo *table_opt_info_; + TRIndexAccessInfo tr_idx_info_; bool for_update_; OptSkipScanState use_skip_scan_; bool use_column_store_; @@ -1344,6 +1378,30 @@ struct NullAwareAntiJoinInfo { static const int64_t TABLE_HEURISTIC_UNIQUE_KEY_RANGE_THRESHOLD = 10000; static const int64_t PRUNING_ROW_COUNT_THRESHOLD = 1000; + struct MatchExprInfo { + MatchExprInfo() + : match_expr_(NULL), + inv_idx_id_(common::OB_INVALID_ID), + query_range_(NULL), + query_range_row_count_(-1), + selectivity_(-1.0) + {} + + ObMatchFunRawExpr *match_expr_; + uint64_t inv_idx_id_; + ObQueryRangeProvider *query_range_; + int64_t query_range_row_count_; + double selectivity_; + + TO_STRING_KV( + K_(match_expr), + K_(inv_idx_id), + K_(query_range), + K_(query_range_row_count), + K_(selectivity) + ); + }; + struct PathHelper { PathHelper() : is_inner_path_(false), @@ -1378,6 +1436,8 @@ struct NullAwareAntiJoinInfo { ObBaseTableEstMethod est_method_; // include nl params and onetime params ObSEArray exec_params_; + // record basic index and selectivity info for all match exprs + ObSEArray match_expr_infos_; }; struct DeducedExprInfo { @@ -2425,6 +2485,7 @@ struct NullAwareAntiJoinInfo { int get_valid_index_ids(const uint64_t table_id, const uint64_t ref_table_id, + PathHelper &helper, ObIArray &valid_index_id); int get_valid_index_ids_with_no_index_hint(ObSqlSchemaGuard &schema_guard, const uint64_t ref_table_id, @@ -2558,6 +2619,7 @@ struct NullAwareAntiJoinInfo { int fill_filters(const common::ObIArray &all_filters, const ObQueryRangeProvider* query_range, ObCostTableScanInfo &est_scan_cost_info, + const TRIndexAccessInfo &tr_idx_info, bool &is_nl_with_extended_range, bool is_link = false, bool use_skip_scan = false); @@ -2630,6 +2692,37 @@ struct NullAwareAntiJoinInfo { int get_join_output_exprs(ObIArray &output_exprs); int get_excluded_condition_exprs(ObIArray &excluded_conditions); static double calc_single_parallel_rows(double rows, int64_t parallel); + int init_basic_text_retrieval_info(uint64_t table_id, + uint64_t ref_table_id, + PathHelper &helper); + int extract_fts_preliminary_query_range(const ObIArray &range_columns, + const ObIArray &predicates, + const ObTableSchema *table_schema, + const ObTableSchema *index_schema, + PathHelper &helper, + ObQueryRangeProvider *&query_range); + int get_query_tokens(ObMatchFunRawExpr *match_expr, + const ObTableSchema *index_schema, + ObIArray &query_tokens); + int get_range_of_query_tokens(ObIArray &query_tokens, + const ObTableSchema &index_schema, + ObIArray &range_columns, + ObQueryRangeProvider *&query_range); + int estimate_fts_index_scan(uint64_t table_id, + uint64_t ref_table_id, + uint64_t index_id, + const ObTableSchema *index_schema, + ObQueryRangeProvider *query_range, + int64_t &query_range_row_count, + double &selectivity); + int add_valid_fts_index_ids(PathHelper &helper, uint64_t *index_tid_array, int64_t &size); + int find_match_expr_info(const ObIArray &match_expr_infos, + ObRawExpr *match_expr, + const MatchExprInfo *&match_expr_info); + int find_least_selective_expr_on_index(const ObIArray &match_exprs, + const ObIArray &match_expr_infos, + uint64_t index_id, + const MatchExprInfo *&match_expr_info); private: static int check_and_remove_is_null_qual(ObLogPlan *plan, const ObJoinType join_type, @@ -2794,6 +2887,20 @@ struct NullAwareAntiJoinInfo { int compute_sharding_info_for_index_info_entry(const uint64_t table_id, const uint64_t base_table_id, IndexInfoEntry *index_info_entry); + int process_index_for_match_expr(const uint64_t table_id, + const uint64_t ref_table_id, + const uint64_t index_id, + PathHelper &helper, + AccessPath &access_path); + int extract_scan_match_expr_candidates(const ObIArray &filters, + ObIArray &scan_match_exprs, + ObIArray &scan_match_filters); + int get_valid_hint_index_list(const ObIArray &hint_index_ids, + const bool is_link_table, + ObSqlSchemaGuard *schema_guard, + PathHelper &helper, + ObIArray &valid_hint_index_ids) const; + bool has_match_expr_on_index(const uint64_t index_id, const ObIArray &match_expr_infos) const; friend class ::test::TestJoinOrder_ob_join_order_param_check_Test; friend class ::test::TestJoinOrder_ob_join_order_src_Test; private: diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index 6080ea6bf..491a8bf93 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -2923,16 +2923,27 @@ int ObLogPlan::allocate_access_path(AccessPath *ap, } if (OB_SUCC(ret)) { - ObSEArray non_match_filters; - ObSEArray match_filters; - if (OB_FAIL(ObRawExprUtils::extract_match_against_filters(ap->filter_, - non_match_filters, - match_filters))) { - LOG_WARN("failed to extract ir fitler from filters", K(ret), K(ap->filter_)); - } else if (match_filters.count() > 0) { - if (OB_FAIL(prepare_text_retrieval_scan(match_filters, scan))) { + if (ap->tr_idx_info_.has_ir_scan()) { + // For functional lookup with multiple match filters, use only one filter + // as index scan and other filters eval after functional lookup + // TODO: enable multiple fulltext index scan after index merge supported + ObSEArray non_match_filters; + ObSEArray match_filters; + ObSEArray table_scan_filters; + if (OB_FAIL(ObRawExprUtils::extract_match_against_filters(ap->filter_, + non_match_filters, + match_filters))) { + LOG_WARN("failed to extract ir fitler from filters", K(ret), K(ap->filter_)); + } else if (OB_FAIL(table_scan_filters.assign(non_match_filters))) { + LOG_WARN("failed to assign non match filters to scan filters", K(ret)); + } else if (OB_FAIL(prepare_text_retrieval_scan( + ap->tr_idx_info_.index_scan_exprs_, + ap->tr_idx_info_.index_scan_filters_, + match_filters, + table_scan_filters, + scan))) { LOG_WARN("failed to allocate text ir scan", K(ret)); - } else if (OB_FAIL(scan->set_table_scan_filters(non_match_filters))) { + } else if (OB_FAIL(scan->set_table_scan_filters(table_scan_filters))) { LOG_WARN("failed to set filters", K(ret)); } else if (OB_FAIL(append(scan->get_pushdown_filter_exprs(), ap->pushdown_filters_))) { LOG_WARN("failed to append pushdown filters", K(ret)); @@ -2954,6 +2965,15 @@ int ObLogPlan::allocate_access_path(AccessPath *ap, } } + if (OB_SUCC(ret) && ap->tr_idx_info_.has_func_lookup()) { + // init push-down calc exprs for functional lookup + if (OB_FAIL(prepare_text_retrieval_lookup(ap->tr_idx_info_.func_lookup_exprs_, + ap->tr_idx_info_.func_lookup_index_ids_, + scan))) { + LOG_WARN("failed to prepare text retrieval lookup", K(ret), KPC(ap)); + } + } + //init part/subpart expr for query range prune if (OB_SUCC(ret)) { ObRawExpr *part_expr = NULL; @@ -11620,7 +11640,25 @@ int ObLogPlan::collect_location_related_info(ObLogicalOperator &op) LOG_WARN("failed to append main table id", K(ret)); } } + LOG_TRACE("collect location related info", K(rel_info)); + if (OB_SUCC(ret) && tsc_op.has_func_lookup()) { + for (int64_t i = 0; OB_SUCC(ret) && i < tsc_op.get_lookup_tr_infos().count(); ++i) { + const ObTextRetrievalInfo &curr_tr_info = tsc_op.get_lookup_tr_infos().at(i); + if (tsc_op.is_index_scan() + && OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, tsc_op.get_real_ref_table_id()))) { + LOG_WARN("failed to append real table id", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, curr_tr_info.inv_idx_tid_))) { + LOG_WARN("failed to append inverted index table id", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, curr_tr_info.fwd_idx_tid_))) { + LOG_WARN("failed to append foward index table id", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, curr_tr_info.doc_id_idx_tid_))) { + LOG_WARN("failed to append doc_id index table id", K(ret)); + } else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, curr_tr_info.rowkey_idx_tid_))) { + LOG_WARN("failed to append rowkey index table id", K(ret)); + } + } + } if (OB_SUCC(ret) && OB_FAIL(optimizer_context_.get_loc_rel_infos().push_back(rel_info))) { LOG_WARN("store location related info failed", K(ret)); @@ -11883,6 +11921,10 @@ int ObLogPlan::check_das_need_scan_with_domain_id(ObLogicalOperator *op) ObLogTableScan *scan = static_cast(op); if (OB_FAIL(scan->check_das_need_scan_with_domain_id())) { LOG_WARN("failed to check das scan with doc id", K(ret)); + } else if (OB_UNLIKELY(scan->has_func_lookup() && (scan->is_tsc_with_doc_id() || scan->is_tsc_with_vid()))) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("functional lookup with dml on fulltext index / vector index not supported", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "filter that can not imply match_score not equal to 0 in dml"); } } for (int i = 0; OB_SUCC(ret) && i < op->get_num_of_child(); ++i) { @@ -14418,38 +14460,30 @@ int ObLogPlan::compute_duplicate_table_replicas(ObLogicalOperator *op) return ret; } -int ObLogPlan::prepare_text_retrieval_scan(const ObIArray &exprs, ObLogicalOperator *scan) +int ObLogPlan::prepare_text_retrieval_scan(const ObIArray &scan_match_exprs, + const ObIArray &scan_match_filters, + const ObIArray &all_match_filters, + ObIArray &scan_filters, + ObLogicalOperator *scan) { - // TODO: only support one match against expr as filter for now int ret = OB_SUCCESS; ObLogTableScan *table_scan = static_cast(scan); ObRawExpr *match_pred = NULL; ObMatchFunRawExpr *match_against = NULL; - ObSchemaGetterGuard *schema_guard = NULL; - ObSQLSessionInfo *session = NULL; - const ObTableSchema *table_schema = NULL; - const ObTableSchema *inv_idx_schema = NULL; - const ObTableSchema *fwd_idx_schema = NULL; - uint64_t doc_id_rowkey_tid = OB_INVALID_ID; - uint64_t fwd_idx_tid = OB_INVALID_ID; - uint64_t inv_idx_tid = OB_INVALID_ID; - ObSEArray index_infos; - bool need_calc_relevance = true; - ObSEArray constraints; + ObMatchFunRawExpr *scan_match_expr = nullptr; - if (OB_UNLIKELY(1 != exprs.count())) { + if (OB_UNLIKELY(1 != scan_match_exprs.count())) { ret = OB_NOT_SUPPORTED; LOG_WARN("multi match filters not supported yet", K(ret)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "more than one match filter"); - } else if (OB_ISNULL(match_pred = exprs.at(0)) || OB_ISNULL(scan) || - OB_ISNULL(get_stmt()) || OB_ISNULL(get_optimizer_context().get_query_ctx())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argumsnts", K(ret), KPC(match_pred), KP(scan)); - } else if (OB_ISNULL(get_stmt()) - || OB_ISNULL(schema_guard = get_optimizer_context().get_schema_guard()) - || OB_ISNULL(session = get_optimizer_context().get_session_info())) { + } else if (OB_UNLIKELY(scan_match_filters.count() < 1)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null pointers", K(ret), KP(get_stmt()), KP(schema_guard), KP(session)); + LOG_WARN("unexpected text retrieval scan without match filters", K(ret)); + } else if (OB_ISNULL(match_pred = scan_match_filters.at(0)) + || OB_ISNULL(scan_match_expr = static_cast(scan_match_exprs.at(0))) + || OB_ISNULL(scan)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argumsnts", K(ret), KPC(match_pred), KPC(scan_match_expr), KP(scan)); } else if (OB_UNLIKELY(!match_pred->has_flag(CNT_MATCH_EXPR) || LOG_TABLE_SCAN != scan->get_type() || 0 == match_pred->get_param_count())) { @@ -14474,11 +14508,97 @@ int ObLogPlan::prepare_text_retrieval_scan(const ObIArray &exprs, O } if (OB_FAIL(ret)) { - } else if (OB_ISNULL(match_against)) { + } else if (OB_UNLIKELY(match_against != static_cast(scan_match_exprs.at(0)))) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null match against expr", K(ret), KPC(match_pred), KPC(match_against)); + LOG_WARN("unexpected match against expr in match filter is not the match expr for scan", + K(ret), KPC(match_against), K(scan_match_exprs)); + } else if (OB_FAIL(prepare_text_retrieval_info(table_scan->get_real_ref_table_id(), + table_scan->get_index_table_id(), + match_against, + table_scan->get_text_retrieval_info()))) { + LOG_WARN("failed to prepare text retrieval info", K(ret)); + } else { + ObTextRetrievalInfo &tr_info = table_scan->get_text_retrieval_info(); + tr_info.match_expr_ = match_against; + tr_info.pushdown_match_filter_ = match_pred; + table_scan->set_doc_id_index_table_id(tr_info.doc_id_idx_tid_); + } + + for (int64_t i = 0; OB_SUCC(ret) && i < all_match_filters.count(); ++i) { + ObRawExpr *curr_filter = all_match_filters.at(i); + if (curr_filter != match_pred) { + if (OB_FAIL(scan_filters.push_back(curr_filter))) { + LOG_WARN("failed to append match filter after functional lookup", K(ret)); + } + } + } + return ret; +} + +int ObLogPlan::prepare_text_retrieval_lookup(const ObIArray &lookup_match_exprs, + const ObIArray &lookup_index_ids, + ObLogicalOperator *scan) +{ + int ret = OB_SUCCESS; + ObLogTableScan *table_scan = static_cast(scan); + if (OB_ISNULL(table_scan) || OB_UNLIKELY(lookup_match_exprs.count() != lookup_index_ids.count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KPC(table_scan), K(lookup_match_exprs), K(lookup_index_ids)); + } + + for (int64_t i = 0; OB_SUCC(ret) && i < lookup_match_exprs.count(); ++i) { + ObTextRetrievalInfo tr_info; + ObMatchFunRawExpr *curr_match_expr = nullptr; + if (OB_ISNULL(curr_match_expr = static_cast(lookup_match_exprs.at(i)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to lookup match exprs", K(ret), K(i), K(lookup_match_exprs)); + } else if (OB_FAIL(prepare_text_retrieval_info(table_scan->get_real_ref_table_id(), + lookup_index_ids.at(i), + curr_match_expr, + tr_info))) { + LOG_WARN("failed to prepare text retrieval info", K(ret)); + } else if (OB_FAIL(table_scan->get_lookup_tr_infos().push_back(tr_info))) { + LOG_WARN("failed to append lookup text retrieval infos", K(ret)); + } + } + + if (OB_SUCC(ret) && table_scan->get_lookup_tr_infos().count() > 0) { + // has text retrieval lookup, need do rowkey->doc_id lookup + const uint64_t rowkey_doc_tid = table_scan->get_lookup_tr_infos().at(0).rowkey_idx_tid_; + table_scan->set_rowkey_doc_table_id(rowkey_doc_tid); + } + return ret; +} + +int ObLogPlan::prepare_text_retrieval_info(const uint64_t ref_table_id, + const uint64_t index_table_id, + ObMatchFunRawExpr *match_against, + ObTextRetrievalInfo &tr_info) +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard *schema_guard = NULL; + ObSQLSessionInfo *session = NULL; + const ObTableSchema *table_schema = NULL; + const ObTableSchema *inv_idx_schema = NULL; + const ObTableSchema *fwd_idx_schema = NULL; + uint64_t doc_id_rowkey_tid = OB_INVALID_ID; + uint64_t rowkey_doc_tid = OB_INVALID_ID; + uint64_t fwd_idx_tid = OB_INVALID_ID; + uint64_t inv_idx_tid = OB_INVALID_ID; + ObSEArray index_infos; + bool need_calc_relevance = true; + ObSEArray constraints; + + if (OB_ISNULL(match_against) || OB_ISNULL(get_stmt()) || OB_ISNULL(get_optimizer_context().get_query_ctx())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), KPC(match_against)); + } else if (OB_ISNULL(get_stmt()) + || OB_ISNULL(schema_guard = get_optimizer_context().get_schema_guard()) + || OB_ISNULL(session = get_optimizer_context().get_session_info())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null pointers", K(ret), KP(get_stmt()), KP(schema_guard), KP(session)); } else if (OB_FAIL(schema_guard->get_table_schema(session->get_effective_tenant_id(), - table_scan->get_real_ref_table_id(), + ref_table_id, table_schema))) { LOG_WARN("failed to get table schema", K(ret)); } else if (OB_ISNULL(table_schema)) { @@ -14488,7 +14608,9 @@ int ObLogPlan::prepare_text_retrieval_scan(const ObIArray &exprs, O LOG_WARN("failed to get index infos", K(ret)); } else if (OB_FAIL(table_schema->get_doc_id_rowkey_tid(doc_id_rowkey_tid))) { LOG_WARN("failed to get doc_id_rowkey table id", K(ret)); - } else if (OB_FALSE_IT(inv_idx_tid = table_scan->get_index_table_id())) { + } else if (OB_FAIL(table_schema->get_rowkey_doc_tid(rowkey_doc_tid))) { + LOG_WARN("failed to get rowkey doc table id", K(ret), KPC(table_schema)); + } else if (OB_FALSE_IT(inv_idx_tid = index_table_id)) { } else if (OB_FAIL(schema_guard->get_table_schema(session->get_effective_tenant_id(), inv_idx_tid, inv_idx_schema))) { @@ -14539,14 +14661,13 @@ int ObLogPlan::prepare_text_retrieval_scan(const ObIArray &exprs, O LOG_WARN("failed to append array no dup", K(ret)); } */ - ObTextRetrievalInfo &tr_info = table_scan->get_text_retrieval_info(); tr_info.match_expr_ = match_against; tr_info.inv_idx_tid_ = inv_idx_tid; tr_info.fwd_idx_tid_ = fwd_idx_tid; tr_info.doc_id_idx_tid_ = doc_id_rowkey_tid; - tr_info.pushdown_match_filter_ = match_pred; + tr_info.rowkey_idx_tid_ = rowkey_doc_tid; + tr_info.pushdown_match_filter_ = nullptr; tr_info.need_calc_relevance_ = need_calc_relevance; - table_scan->set_doc_id_index_table_id(doc_id_rowkey_tid); } return ret; } diff --git a/src/sql/optimizer/ob_log_plan.h b/src/sql/optimizer/ob_log_plan.h index 6ab30a32b..2bab28f7e 100644 --- a/src/sql/optimizer/ob_log_plan.h +++ b/src/sql/optimizer/ob_log_plan.h @@ -87,6 +87,7 @@ struct IndexDMLInfo; class ValuesTablePath; class ObSelectLogPlan; class ObThreeStageAggrInfo; +struct ObTextRetrievalInfo; class ObHashRollupInfo; struct TableDependInfo { @@ -1461,7 +1462,14 @@ public: int construct_startup_filter_for_limit(ObRawExpr *limit_expr, ObLogicalOperator *log_op); int prepare_vector_index_info(ObLogicalOperator *scan); - int prepare_text_retrieval_scan(const ObIArray &exprs, ObLogicalOperator *scan); + int prepare_text_retrieval_scan(const ObIArray &scan_match_exprs, + const ObIArray &scan_match_filters, + const ObIArray &all_match_filters, + ObIArray &scan_filters, + ObLogicalOperator *scan); + int prepare_text_retrieval_lookup(const ObIArray &lookup_match_exprs, + const ObIArray &lookup_index_ids, + ObLogicalOperator *scan); int prepare_multivalue_retrieval_scan(ObLogicalOperator *scan); int try_push_topn_into_domain_scan(ObLogicalOperator *&top, ObRawExpr *topn_expr, @@ -1781,6 +1789,10 @@ private: // member functions int adjust_expr_properties_for_external_table(ObRawExpr *col_expr, ObRawExpr *&expr) const; int compute_duplicate_table_replicas(ObLogicalOperator *op); + int prepare_text_retrieval_info(const uint64_t ref_table_id, + const uint64_t index_table_id, + ObMatchFunRawExpr *ma_expr, + ObTextRetrievalInfo &tr_info); public: const ObLogPlanHint &get_log_plan_hint() const { return log_plan_hint_; } bool has_join_order_hint() { return !log_plan_hint_.join_order_.leading_tables_.is_empty(); } diff --git a/src/sql/optimizer/ob_log_table_scan.cpp b/src/sql/optimizer/ob_log_table_scan.cpp index 47c25d815..f2a156218 100644 --- a/src/sql/optimizer/ob_log_table_scan.cpp +++ b/src/sql/optimizer/ob_log_table_scan.cpp @@ -219,12 +219,15 @@ int ObLogTableScan::get_op_exprs(ObIArray &all_exprs) LOG_WARN("failed to push back expr", K(ret)); } else if (NULL != group_id_expr_ && OB_FAIL(all_exprs.push_back(group_id_expr_))) { LOG_WARN("failed to push back expr", K(ret)); - } else if (is_text_retrieval_scan() && OB_FAIL(get_text_retrieval_calc_exprs(all_exprs))) { + } else if (is_text_retrieval_scan() + && OB_FAIL(get_text_retrieval_calc_exprs(get_text_retrieval_info(), all_exprs))) { LOG_WARN("failed to get text retrieval exprs", K(ret)); } else if (is_vec_idx_scan() && OB_FAIL(get_vec_idx_calc_exprs(all_exprs))) { LOG_WARN("failed to get text retrieval exprs", K(ret)); } else if (OB_FAIL(append(all_exprs, rowkey_id_exprs_))) { LOG_WARN("failed to append rowkey doc exprs", K(ret)); + } else if (has_func_lookup() && OB_FAIL(get_func_lookup_calc_exprs(all_exprs))) { + LOG_WARN("failed to get functional lookup exprs", K(ret)); } else if (OB_FAIL(append(all_exprs, access_exprs_))) { LOG_WARN("failed to append exprs", K(ret)); } else if (OB_FAIL(append(all_exprs, pushdown_aggr_exprs_))) { @@ -271,19 +274,15 @@ int ObLogTableScan::allocate_expr_post(ObAllocExprContext &ctx) LOG_WARN("failed to mark expr as produced", K(*expr), K(branch_id_), K(id_), K(ret)); } } - if (OB_SUCC(ret) && is_text_retrieval_scan()) { + if (OB_SUCC(ret)) { // match against relevance expr will be calculated in storage ObSEArray tmp_exprs; - if (OB_FAIL(ObRawExprUtils::extract_column_exprs(get_text_retrieval_info().relevance_expr_, tmp_exprs))) { - LOG_WARN("failed to extract column exprs", K(ret)); - } else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().doc_token_cnt_))) { - LOG_WARN("failed to append tmp exprs", K(ret)); - } else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().total_doc_cnt_))) { - LOG_WARN("failed to append tmp exprs", K(ret)); - } else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().related_doc_cnt_))) { - LOG_WARN("failed to append tmp exprs", K(ret)); - } else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().match_expr_))) { - LOG_WARN("failed to append tmp exprs", K(ret)); + if (is_text_retrieval_scan() + && OB_FAIL(get_text_retrieval_calc_exprs(get_text_retrieval_info(), tmp_exprs))) { + LOG_WARN("failed to get text retrieval calc exprs", K(ret)); + } else if (has_func_lookup() + && OB_FAIL(get_func_lookup_calc_exprs(tmp_exprs))) { + LOG_WARN("failed to get func lookup exprs", K(ret)); } for (int64_t i = 0; OB_SUCC(ret) && i < tmp_exprs.count(); ++i) { ObRawExpr *expr = tmp_exprs.at(i); @@ -485,12 +484,14 @@ int ObLogTableScan::generate_access_exprs() LOG_WARN("get unexpected null", K(get_plan()), K(get_stmt()), K(ret)); } else if (OB_FAIL(copy_filter_before_index_back())) { LOG_WARN("failed to copy filter before index back", K(ret)); - } else if (is_text_retrieval_scan() && OB_FAIL(prepare_text_retrieval_dep_exprs())) { + } else if (is_text_retrieval_scan() && OB_FAIL(prepare_text_retrieval_dep_exprs(get_text_retrieval_info()))) { LOG_WARN("failed to copy text retrieval aggr exprs", K(ret)); } else if (is_vec_idx_scan() && OB_FAIL(prepare_vector_access_exprs())) { LOG_WARN("failed to copy vec idx scan exprs", K(ret)); - } else if ((is_tsc_with_doc_id() || is_tsc_with_vid()) && OB_FAIL(prepare_rowkey_domain_id_dep_exprs())) { + } else if (need_rowkey_doc_expr() && OB_FAIL(prepare_rowkey_domain_id_dep_exprs())) { LOG_WARN("failed to prepare table scan with doc id info", K(ret)); + } else if (has_func_lookup() && OB_FAIL(prepare_func_lookup_dep_exprs())) { + LOG_WARN("failed to prepare functional lookup dependent exprs", K(ret)); } else if (OB_FAIL(generate_necessary_rowkey_and_partkey_exprs())) { LOG_WARN("failed to generate rowkey and part exprs", K(ret)); } else if (OB_FAIL(allocate_group_id_expr())) { @@ -759,9 +760,16 @@ int ObLogTableScan::extract_pushdown_filters(ObIArray &nonpushdown_f if (OB_FAIL(nonpushdown_filters.push_back(filters.at(i)))) { LOG_WARN("push variable assign filter store non-pushdown filter failed", K(ret), K(i)); } + } else if (has_func_lookup() && + (filters.at(i)->has_flag(CNT_MATCH_EXPR) || !flags.at(i))) { + // for filter with match expr in functional lookup, need to be evaluated after func lookup + // push-down filter on main-table lookup with functional lookup not supported by executor + if (OB_FAIL(nonpushdown_filters.push_back(filters.at(i)))) { + LOG_WARN("push func-lookup match filter to non-pushdown array failed", K(ret), K(i)); + } } else if (is_text_retrieval_scan() && need_text_retrieval_calc_relevance()) { if (OB_FAIL(nonpushdown_filters.push_back(filters.at(i)))) { - LOG_WARN("push variable assign filter store non-pushdown filter failed", K(ret), K(i)); + LOG_WARN("push text retrieval scan store non-pushdown filter failed", K(ret), K(i)); } } else if (ignore_pd_filter) { //ignore_pd_filter: only extract non-pushdown filters, ignore others @@ -1091,14 +1099,18 @@ int ObLogTableScan::generate_necessary_rowkey_and_partkey_exprs() LOG_WARN("failed to check whether stmt has mbr column", K(ret)); } else if (need_doc_id_index_back() && OB_FAIL(extract_doc_id_index_back_expr(domain_exprs_, is_vec_idx_scan()))) { LOG_WARN("failed to extract doc id index back exprs", K(ret)); - } else if (is_text_retrieval_scan() && OB_FAIL(extract_text_retrieval_access_expr(domain_exprs_))) { + } else if (is_text_retrieval_scan() + && OB_FAIL(extract_text_retrieval_access_expr(get_text_retrieval_info(), domain_exprs_))) { LOG_WARN("failed to extract text retrieval access exprs", K(ret)); } else if (is_vec_idx_scan() && OB_FAIL(extract_vec_idx_access_expr(domain_exprs_))) { LOG_WARN("failed to extract vector index access exprs", K(ret)); - }else if (is_heap_table && is_index_global_ && index_back_ && + } else if (has_func_lookup() + && OB_FAIL(extract_func_lookup_access_exprs(domain_exprs_))) { + LOG_WARN("failed to extract functional lookup access exprs", K(ret)); + } else if (is_heap_table && is_index_global_ && index_back_ && OB_FAIL(get_part_column_exprs(table_id_, ref_table_id_, part_exprs_))) { LOG_WARN("failed to get part column exprs", K(ret)); - } else if ((has_lob_column || index_back_) && + } else if ((has_lob_column || index_back_ || has_func_lookup()) && OB_FAIL(get_plan()->get_rowkey_exprs(table_id_, ref_table_id_, rowkey_exprs_))) { LOG_WARN("failed to generate rowkey exprs", K(ret)); } else { /*do nothing*/ } @@ -1720,6 +1732,15 @@ int ObLogTableScan::get_plan_item_info(PlanText &plan_text, LOG_WARN("BUF_PRINTF fails", K(ret)); } } + + if (OB_SUCC(ret) && has_func_lookup()) { + if (OB_FAIL(BUF_PRINTF(", "))) { + LOG_WARN("BUF_PRINTF failed", K(ret)); + } else if (OB_FAIL(BUF_PRINTF("has_functional_lookup=true"))) { + LOG_WARN("BUF_PRINTF fails", K(ret)); + } + } + END_BUF_PRINT(plan_item.special_predicates_, plan_item.special_predicates_len_); } @@ -2964,10 +2985,10 @@ int ObLogTableScan::extract_doc_id_index_back_expr(ObIArray &exprs, return ret; } -int ObLogTableScan::extract_text_retrieval_access_expr(ObIArray &exprs) +int ObLogTableScan::extract_text_retrieval_access_expr(ObTextRetrievalInfo &tr_info, + ObIArray &exprs) { int ret = OB_SUCCESS; - ObTextRetrievalInfo &tr_info = get_text_retrieval_info(); if (OB_ISNULL(tr_info.match_expr_) || OB_ISNULL(tr_info.total_doc_cnt_) || OB_ISNULL(tr_info.doc_token_cnt_) || OB_ISNULL(tr_info.related_doc_cnt_)) { ret = OB_ERR_UNEXPECTED; @@ -3065,34 +3086,62 @@ int ObLogTableScan::get_vec_idx_calc_exprs(ObIArray &all_exprs) return ret; } -int ObLogTableScan::get_text_retrieval_calc_exprs(ObIArray &all_exprs) +int ObLogTableScan::get_text_retrieval_calc_exprs(ObTextRetrievalInfo &tr_info, + ObIArray &all_exprs) { int ret = OB_SUCCESS; - if (OB_ISNULL(get_text_retrieval_info().match_expr_)) { + if (OB_ISNULL(tr_info.match_expr_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null match against expr", K(ret)); - } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().related_doc_cnt_))) { + } else if (OB_FAIL(all_exprs.push_back(tr_info.related_doc_cnt_))) { LOG_WARN("failed to append relevanced doc cnt expr", K(ret)); - } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().doc_token_cnt_))) { + } else if (OB_FAIL(all_exprs.push_back(tr_info.doc_token_cnt_))) { LOG_WARN("failed to append doc token cnt expr", K(ret)); - } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().total_doc_cnt_))) { + } else if (OB_FAIL(all_exprs.push_back(tr_info.total_doc_cnt_))) { LOG_WARN("failed to append total doc cnt expr", K(ret)); - } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().relevance_expr_))) { + } else if (OB_FAIL(all_exprs.push_back(tr_info.relevance_expr_))) { LOG_WARN("failed to append relevance expr", K(ret)); - } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().match_expr_))) { + } else if (OB_FAIL(all_exprs.push_back(tr_info.match_expr_))) { LOG_WARN("failed to append text retrieval expr", K(ret)); - } else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().pushdown_match_filter_))) { + } else if (nullptr != tr_info.pushdown_match_filter_ + && OB_FAIL(all_exprs.push_back(tr_info.pushdown_match_filter_))) { LOG_WARN("failed to append match filter", K(ret)); - } else if (OB_NOT_NULL(get_text_retrieval_info().topk_limit_expr_) && - OB_FAIL(all_exprs.push_back(get_text_retrieval_info().topk_limit_expr_))) { + } else if (nullptr != tr_info.topk_limit_expr_ + && OB_FAIL(all_exprs.push_back(tr_info.topk_limit_expr_))) { LOG_WARN("failed to append limit expr", K(ret)); - } else if (OB_NOT_NULL(get_text_retrieval_info().topk_offset_expr_) && - OB_FAIL(all_exprs.push_back(get_text_retrieval_info().topk_offset_expr_))) { + } else if (nullptr != tr_info.topk_offset_expr_ + && OB_FAIL(all_exprs.push_back(tr_info.topk_offset_expr_))) { LOG_WARN("failed to append offset expr", K(ret)); } return ret; } +int ObLogTableScan::extract_func_lookup_access_exprs(ObIArray &all_exprs) +{ + int ret = OB_SUCCESS; + + for (int64_t i = 0; OB_SUCC(ret) && i < lookup_tr_infos_.count(); ++i) { + if (OB_FAIL(extract_text_retrieval_access_expr(lookup_tr_infos_.at(i), all_exprs))) { + LOG_WARN("failed to extract text retrieval access expr", K(ret), K(i), K(lookup_tr_infos_.at(i))); + } + } + + return ret; +} + +int ObLogTableScan::get_func_lookup_calc_exprs(ObIArray &all_exprs) +{ + int ret = OB_SUCCESS; + + for (int64_t i = 0; OB_SUCC(ret) && i < lookup_tr_infos_.count(); ++i) { + if (OB_FAIL(get_text_retrieval_calc_exprs(lookup_tr_infos_.at(i), all_exprs))) { + LOG_WARN("failed to get text retrieval calc expr", K(ret), K(i), K(lookup_tr_infos_.at(i))); + } + } + + return ret; +} + int ObLogTableScan::print_text_retrieval_annotation(char *buf, int64_t buf_len, int64_t &pos, ExplainType type) { int ret = OB_SUCCESS; @@ -3413,7 +3462,7 @@ int ObLogTableScan::prepare_vector_access_exprs() return ret; } -int ObLogTableScan::prepare_text_retrieval_dep_exprs() +int ObLogTableScan::prepare_text_retrieval_dep_exprs(ObTextRetrievalInfo &tr_info) { int ret = OB_SUCCESS; const ObTableSchema *table_schema; @@ -3433,7 +3482,6 @@ int ObLogTableScan::prepare_text_retrieval_dep_exprs() ObAggFunRawExpr *total_doc_cnt = nullptr; ObAggFunRawExpr *doc_token_cnt = nullptr; ObOpRawExpr *relevance_expr = nullptr; - ObTextRetrievalInfo &tr_info = get_text_retrieval_info(); if (OB_NOT_NULL(tr_info.doc_id_column_) && OB_NOT_NULL(tr_info.doc_length_column_) && OB_NOT_NULL(tr_info.token_column_) && OB_NOT_NULL(tr_info.token_cnt_column_) && OB_NOT_NULL(tr_info.doc_token_cnt_) && OB_NOT_NULL(tr_info.total_doc_cnt_) && @@ -3546,8 +3594,11 @@ int ObLogTableScan::prepare_text_retrieval_dep_exprs() // Copy column ref expr referenced by aggregation in different index table scan // to avoid share expression } else if (OB_FAIL(copier.copy(related_doc_cnt->get_param_expr(0)))) { + LOG_WARN("failed to copy related_doc_cnt expr", K(ret)); } else if (OB_FAIL(copier.copy(total_doc_cnt->get_param_expr(0)))) { + LOG_WARN("failed to copy total_doc_cnt expr", K(ret)); } else if (OB_FAIL(copier.copy(doc_token_cnt->get_param_expr(0)))) { + LOG_WARN("failed to copy doc_token_cnt expr", K(ret)); } else { tr_info.token_column_ = token_column; tr_info.token_cnt_column_ = token_cnt_column; @@ -3562,6 +3613,20 @@ int ObLogTableScan::prepare_text_retrieval_dep_exprs() return ret; } +int ObLogTableScan::prepare_func_lookup_dep_exprs() +{ + int ret = OB_SUCCESS; + + for (int64_t i = 0; OB_SUCC(ret) && i < lookup_tr_infos_.count(); ++i) { + if (OB_FAIL(prepare_text_retrieval_dep_exprs(lookup_tr_infos_.at(i)))) { + LOG_WARN("failed to prepare text retrieval dependent exprs", + K(ret), K(i), K(lookup_tr_infos_.at(i))); + } + } + + return ret; +} + int ObLogTableScan::get_card_without_filter(double &card) { int ret = OB_SUCCESS; @@ -3976,7 +4041,7 @@ int ObLogTableScan::prepare_rowkey_domain_id_dep_exprs() ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error, table schema is nullptr", K(ret)); } else { - if (is_tsc_with_doc_id()) { + if (is_tsc_with_doc_id() || has_func_lookup()) { if (OB_FAIL(schema_guard->get_table_schema(rowkey_doc_tid_, rowkey_doc_schema))) { LOG_WARN("fail toprint_ranges get rowkey doc table schema", K(ret), K(rowkey_doc_tid_)); } else if (OB_ISNULL(rowkey_doc_schema)) { diff --git a/src/sql/optimizer/ob_log_table_scan.h b/src/sql/optimizer/ob_log_table_scan.h index 741475592..1467d13d6 100644 --- a/src/sql/optimizer/ob_log_table_scan.h +++ b/src/sql/optimizer/ob_log_table_scan.h @@ -63,9 +63,10 @@ struct ObTextRetrievalInfo ObRawExpr *topk_offset_expr_; bool with_ties_; bool need_calc_relevance_; // match expr just for retireval (accurate score is not required) - uint64_t inv_idx_tid_; // choosed aux inverted index table id (word-doc) - uint64_t fwd_idx_tid_; // choosed aux forward index table id (doc-word) - uint64_t doc_id_idx_tid_; // choosed aux doc_id index table id (doc-rowkey) + uint64_t inv_idx_tid_; // chosen aux inverted index table id (word-doc) + uint64_t fwd_idx_tid_; // chosen aux forward index table id (doc-word) + uint64_t doc_id_idx_tid_; // chosen aux doc_id index table id (doc-rowkey) + uint64_t rowkey_idx_tid_; // chosen aux rowkey index table id (rowkey-doc) // the following exprs are used for intermediate calculation of relevance score ObColumnRefRawExpr *token_column_; ObColumnRefRawExpr *token_cnt_column_; @@ -690,13 +691,17 @@ public: inline bool is_spatial_index_scan() const { return is_spatial_index_; } inline ObTextRetrievalInfo &get_text_retrieval_info() { return text_retrieval_info_; } inline const ObTextRetrievalInfo &get_text_retrieval_info() const { return text_retrieval_info_; } - int prepare_text_retrieval_dep_exprs(); int prepare_vector_access_exprs(); + inline bool has_func_lookup() const { return 0 != lookup_tr_infos_.count(); } + inline ObIArray &get_lookup_tr_infos() { return lookup_tr_infos_; } + inline const ObIArray &get_lookup_tr_infos() const { return lookup_tr_infos_; } inline bool need_text_retrieval_calc_relevance() const { return text_retrieval_info_.need_calc_relevance_; } inline bool need_doc_id_index_back() const { return is_text_retrieval_scan() || is_multivalue_index_scan() || is_vec_idx_scan(); } + inline bool need_rowkey_doc_expr() const { return is_tsc_with_doc_id() || is_tsc_with_vid() || has_func_lookup(); } inline void set_doc_id_index_table_id(const uint64_t doc_id_index_table_id) { doc_id_table_id_ = doc_id_index_table_id; } inline uint64_t get_doc_id_index_table_id() const { return doc_id_table_id_; } inline uint64_t get_rowkey_vid_table_id() const { return rowkey_vid_tid_; } + inline void set_rowkey_doc_table_id(const uint64_t tid) { rowkey_doc_tid_ = tid; } inline uint64_t get_rowkey_doc_table_id() const { return rowkey_doc_tid_; } inline uint64_t get_multivalue_col_idx() const { return multivalue_col_idx_; } inline int32_t get_multivalue_type() const { return multivalue_type_; } @@ -759,11 +764,15 @@ private: // member functions const share::schema::ObTableSchema &table_schema, bool &need_filter); int allocate_group_id_expr(); - int extract_doc_id_index_back_expr(ObIArray &exprs, bool is_vec_scan = false); - int extract_text_retrieval_access_expr(ObIArray &exprs); int extract_vec_idx_access_expr(ObIArray &exprs); - int get_text_retrieval_calc_exprs(ObIArray &all_exprs); int get_vec_idx_calc_exprs(ObIArray &all_exprs); + int extract_doc_id_index_back_expr(ObIArray &exprs, bool is_vec_scan = false); + int extract_text_retrieval_access_expr(ObTextRetrievalInfo &tr_info, ObIArray &exprs); + int get_text_retrieval_calc_exprs(ObTextRetrievalInfo &tr_info, ObIArray &all_exprs); + int prepare_text_retrieval_dep_exprs(ObTextRetrievalInfo &tr_info); + int extract_func_lookup_access_exprs(ObIArray &all_exprs); + int get_func_lookup_calc_exprs(ObIArray &all_exprs); + int prepare_func_lookup_dep_exprs(); int print_text_retrieval_annotation(char *buf, int64_t buf_len, int64_t &pos, ExplainType type); int find_nearest_rcte_op(ObLogSet *&rcte_op); int generate_filter_monotonicity(); @@ -908,9 +917,11 @@ protected: // memeber variables share::schema::ObTableType table_type_; bool use_column_store_; uint64_t doc_id_table_id_; // used for rowkey lookup of fulltext, JSON multi-value and vector index + // text retrieval as index scan ObTextRetrievalInfo text_retrieval_info_; + // text retrieval as functional lookup + common::ObSEArray lookup_tr_infos_; ObVectorIndexInfo vector_index_info_; - ObPxRFStaticInfo px_rf_info_; bool das_keep_ordering_; typedef common::ObSEArray FilterMonotonicity; diff --git a/src/sql/optimizer/ob_opt_cost_model_parameter.cpp b/src/sql/optimizer/ob_opt_cost_model_parameter.cpp index 9eae875e4..2226e0db2 100644 --- a/src/sql/optimizer/ob_opt_cost_model_parameter.cpp +++ b/src/sql/optimizer/ob_opt_cost_model_parameter.cpp @@ -428,3 +428,13 @@ double ObOptCostModelParameter::get_cmp_err_handle_expr_cost(const OptSystemStat return CMP_ERR_HANDLE_EXPR_COST / stat.get_cpu_speed(); } } + +double ObOptCostModelParameter::get_functional_lookup_per_row_cost(const OptSystemStat& stat) const +{ + // jinmao TODO: 这里需要再考虑一下 + if (stat.get_cpu_speed() <= 0) { + return FUNCTIONAL_LOOKUP_PER_ROW_COST; + } else { + return FUNCTIONAL_LOOKUP_PER_ROW_COST / stat.get_cpu_speed(); + } +} diff --git a/src/sql/optimizer/ob_opt_cost_model_parameter.h b/src/sql/optimizer/ob_opt_cost_model_parameter.h index bd877f47e..9c750a787 100644 --- a/src/sql/optimizer/ob_opt_cost_model_parameter.h +++ b/src/sql/optimizer/ob_opt_cost_model_parameter.h @@ -74,6 +74,7 @@ public: const double DEFAULT_CMP_UDF_COST, const double DEFAULT_CMP_LOB_COST, const double DEFAULT_CMP_ERR_HANDLE_EXPR_COST, + const double DEFAULT_FUNCTIONAL_LOOKUP_PER_ROW_COST, const double (&comparison_params)[common::ObMaxTC + 1], const double (&hash_params)[common::ObMaxTC + 1], const double (&project_params)[2][2][MAX_PROJECT_TYPE] @@ -119,6 +120,7 @@ public: CMP_UDF_COST(DEFAULT_CMP_UDF_COST), CMP_LOB_COST(DEFAULT_CMP_LOB_COST), CMP_ERR_HANDLE_EXPR_COST(DEFAULT_CMP_ERR_HANDLE_EXPR_COST), + FUNCTIONAL_LOOKUP_PER_ROW_COST(DEFAULT_FUNCTIONAL_LOOKUP_PER_ROW_COST), comparison_params_(comparison_params), hash_params_(hash_params), project_params_(project_params) @@ -172,6 +174,7 @@ public: double get_cmp_lob_cost(const OptSystemStat& stat) const; double get_cmp_udf_cost(const OptSystemStat& stat) const; double get_cmp_err_handle_expr_cost(const OptSystemStat& stat) const; + double get_functional_lookup_per_row_cost(const OptSystemStat& stat) const; protected: /** 读取一行的CPU开销,基本上只包括get_next_row()操作 */ @@ -253,6 +256,8 @@ protected: double CMP_LOB_COST; //计算一个需处理异常的表达式的代价 double CMP_ERR_HANDLE_EXPR_COST; + //计算一个全文索引 functional lookup 表达式的代价 + double FUNCTIONAL_LOOKUP_PER_ROW_COST; const double (&comparison_params_)[common::ObMaxTC + 1]; const double (&hash_params_)[common::ObMaxTC + 1]; /* diff --git a/src/sql/optimizer/ob_opt_est_cost_model.cpp b/src/sql/optimizer/ob_opt_est_cost_model.cpp index e33ddc661..5863baf2a 100644 --- a/src/sql/optimizer/ob_opt_est_cost_model.cpp +++ b/src/sql/optimizer/ob_opt_est_cost_model.cpp @@ -1617,14 +1617,12 @@ int ObOptEstCostModel::cost_row_store_index_scan(const ObCostTableScanInfo &est_ // 1. 以 [token, token] 为 range 扫描 inv_index 两次,计算一个聚合函数; // 2. 全表扫描 doc_id_rowkey_index, 计算一个聚合函数; // 3. 用过滤后的 doc_id 对 doc_id_rowkey_index 做回表 - int token_count = 1; // 此处先假设 search query 只有一个 token,后续要调整 - double token_sel = DEFAULT_SEL; double inv_index_range_scan_cost = 0; double doc_id_full_scan_cost = 0; double doc_id_index_back_cost = 0; if (OB_FAIL(cost_range_scan(est_cost_info, true, - row_count * token_sel, + row_count, inv_index_range_scan_cost))) { LOG_WARN("Failed to estimate scan cost", K(ret)); } else if (OB_FAIL(cost_range_scan(est_cost_info, @@ -1634,14 +1632,14 @@ int ObOptEstCostModel::cost_row_store_index_scan(const ObCostTableScanInfo &est_ LOG_WARN("Failed to estimate scan cost", K(ret)); } else if (OB_FAIL(cost_range_get(est_cost_info, true, - row_count * token_sel, + row_count, doc_id_index_back_cost))) { LOG_WARN("Failed to estimate get cost", K(ret)); } - double aggregation_cost = (row_count * token_sel + row_count) * cost_params_.get_per_aggr_func_cost(sys_stat_); + double aggregation_cost = (row_count + row_count) * cost_params_.get_per_aggr_func_cost(sys_stat_); double fulltext_scan_cost = 2 * inv_index_range_scan_cost + doc_id_full_scan_cost + aggregation_cost + doc_id_index_back_cost; - index_scan_cost = token_count * fulltext_scan_cost; + index_scan_cost = fulltext_scan_cost; LOG_TRACE("OPT::[COST FULLTEXT INDEX SCAN]", K(fulltext_scan_cost), K(ret)); } //add index skip scan cost @@ -2381,6 +2379,11 @@ double ObOptEstCostModel::cost_quals(double rows, const ObIArray &q if (need_scale) { factor /= 25.0; } + } else if (qual->has_flag(CNT_MATCH_EXPR)) { + cost_per_row += cost_params_.get_functional_lookup_per_row_cost(sys_stat_) * factor; + if (need_scale) { + factor /= 10.0; + } } else { ObObjTypeClass calc_type = qual->get_result_type().get_calc_type_class(); cost_per_row += cost_params_.get_comparison_cost(sys_stat_, calc_type) * factor; @@ -2496,6 +2499,9 @@ int ObOptEstCostModel::calc_pred_cost_per_row(const ObRawExpr *expr, cost += (expr->get_param_expr(1)->get_param_count() + 1) * cost_params_.get_comparison_cost(sys_stat_,ObIntTC) / rows; } need_calc_child_cost = false; + } else if (T_FUN_MATCH_AGAINST == expr->get_expr_type()) { + cost += cost_params_.get_functional_lookup_per_row_cost(sys_stat_) / rows; + need_calc_child_cost = false; } else { cost += cost_params_.get_comparison_cost(sys_stat_,ObIntTC) / rows; } diff --git a/src/sql/optimizer/ob_opt_est_parameter_normal.h b/src/sql/optimizer/ob_opt_est_parameter_normal.h index 72c2524c0..48bf6bc41 100644 --- a/src/sql/optimizer/ob_opt_est_parameter_normal.h +++ b/src/sql/optimizer/ob_opt_est_parameter_normal.h @@ -92,6 +92,8 @@ const static double NORMAL_INVALID_HASH_COST = -1; const static double NORMAL_CMP_UDF_COST = 100.0 * DEFAULT_CPU_SPEED; const static double NORMAL_CMP_LOB_COST = 9.707028746051587301587301588 * DEFAULT_CPU_SPEED; //NORMAL_CMP_CHAR_COST * 100 const static double NORMAL_CMP_ERR_HANDLE_EXPR_COST = 1.00087103407539 * DEFAULT_CPU_SPEED; //NORMAL_CMP_INT_COST * 100 +// jinmao TODO: 系数要测算后再填 +const static double NORMAL_FUNCTIONAL_LOOKUP_PER_ROW_COST = 100.0 * DEFAULT_CPU_SPEED; const static double comparison_params_normal[ObMaxTC+1] = { NORMAL_CMP_INT_COST, // null @@ -229,6 +231,7 @@ const static ObOptCostModelParameter cost_params_normal( NORMAL_CMP_UDF_COST, NORMAL_CMP_LOB_COST, NORMAL_CMP_ERR_HANDLE_EXPR_COST, + NORMAL_FUNCTIONAL_LOOKUP_PER_ROW_COST, comparison_params_normal, hash_params_normal, project_params_normal diff --git a/src/sql/optimizer/ob_opt_est_parameter_vector.h b/src/sql/optimizer/ob_opt_est_parameter_vector.h index de49c0cc9..453d8d51a 100644 --- a/src/sql/optimizer/ob_opt_est_parameter_vector.h +++ b/src/sql/optimizer/ob_opt_est_parameter_vector.h @@ -92,6 +92,8 @@ const static double VECTOR_INVALID_HASH_COST = -1; const static double VECTOR_CMP_UDF_COST = 100.0 * DEFAULT_CPU_SPEED; const static double VECTOR_CMP_LOB_COST = 9.707028746051587301587301588 * DEFAULT_CPU_SPEED; //NORMAL_CMP_CHAR_COST * 100 const static double VECTOR_CMP_ERR_HANDLE_EXPR_COST = 1.00087103407539 * DEFAULT_CPU_SPEED; //NORMAL_CMP_INT_COST * 100 +//jinmao TODO: 系数要测算后再填 +const static double VECTOR_FUNCTIONAL_LOOKUP_PER_ROW_COST = 100.0 * DEFAULT_CPU_SPEED; const static double comparison_params_vector[ObMaxTC+1] = { VECTOR_CMP_INT_COST, // null @@ -224,6 +226,7 @@ const static ObOptCostModelParameter cost_params_vector( VECTOR_CMP_UDF_COST, VECTOR_CMP_LOB_COST, VECTOR_CMP_ERR_HANDLE_EXPR_COST, + VECTOR_FUNCTIONAL_LOOKUP_PER_ROW_COST, comparison_params_vector, hash_params_vector, project_params_vector diff --git a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp index 45e33b449..8329cf0d9 100644 --- a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp +++ b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp @@ -1591,17 +1591,29 @@ int ObAlterTableResolver::resolve_index_column_list(const ParseNode &node, ret = OB_NOT_SUPPORTED; LOG_WARN("experimental feature: build multivalue index afterward is experimental feature", K(ret)); LOG_USER_ERROR(OB_NOT_SUPPORTED, "build multivalue index afterward"); + } else if (table_schema_->is_materialized_view()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("create fulltext/multivalue/vector index on materialized view not supported", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create fulltext/multivalue/vector index on materialized view"); } } else if (index_keyname_ == FTS_KEY) { - if (!GCONF._enable_add_fulltext_index_to_existing_table) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("experimental feature: build fulltext index afterward is experimental feature", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "build fulltext index afterward"); + uint64_t tenant_data_version = 0; + if (OB_ISNULL(session_info_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(session_info_->get_effective_tenant_id(), tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_5_0) { + LOG_WARN("there are the observers with version lower than 4.3.5 in cluster, build fulltext index afterward not supported", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "there are the observers with version lower than 4.3.5 in cluster, build fulltext index afterward"); } else if (OB_FAIL(resolve_fts_index_constraint(*table_schema_, - sort_item.column_name_, - index_name_value))) { - SQL_RESV_LOG(WARN, "check fts index constraint fail",K(ret), - K(sort_item.column_name_)); + sort_item.column_name_, + index_name_value))) { + SQL_RESV_LOG(WARN, "check fts index constraint fail", K(ret), K(sort_item.column_name_)); + } else if (table_schema_->is_materialized_view()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("create fulltext/multivalue/vector index on materialized view not supported", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create fulltext/multivalue/vector index on materialized view"); } } else if (index_keyname_ == VEC_KEY) { // TODO@xiajin @@ -1733,7 +1745,8 @@ int ObAlterTableResolver::resolve_add_index(const ParseNode &node) ParseNode *colulmn_group_node = nullptr; bool is_index_part_specified = false; CHECK_COMPATIBILITY_MODE(session_info_); - if (is_unique_key && lib::is_oracle_mode()) { + if (OB_FAIL(ret)) { + } else if (is_unique_key && lib::is_oracle_mode()) { // oracle mode if (node.num_child_ != 2) { ret = OB_ERR_UNEXPECTED; @@ -2495,6 +2508,7 @@ int ObAlterTableResolver::generate_index_arg(obrpc::ObCreateIndexArg &index_arg, index_arg.index_option_.storage_format_version_ = storage_format_version_; index_arg.index_option_.comment_ = comment_; index_arg.with_rowid_ = with_rowid_; + index_arg.index_option_.parser_name_ = parser_name_; if (OB_SUCC(ret)) { ObIndexType type = INDEX_TYPE_IS_NOT; if (OB_NOT_NULL(table_schema_) && table_schema_->is_oracle_tmp_table()) { diff --git a/src/sql/resolver/ddl/ob_alter_table_resolver.h b/src/sql/resolver/ddl/ob_alter_table_resolver.h index 7ac6bc25d..3786a7be8 100644 --- a/src/sql/resolver/ddl/ob_alter_table_resolver.h +++ b/src/sql/resolver/ddl/ob_alter_table_resolver.h @@ -20,8 +20,7 @@ namespace oceanbase { namespace sql { - -typedef common::hash::ObPlacementHashSet ObReducedVisibleColSet; +typedef common::hash::ObPlacementHashSet ObReducedVisibleColSet; typedef common::hash::ObPlacementHashSet ObColumnNameSet; /* #define ADD_COLUMN_NOT_NULL (1UL << 0) diff --git a/src/sql/resolver/ddl/ob_create_index_resolver.cpp b/src/sql/resolver/ddl/ob_create_index_resolver.cpp index 3257f81c7..4bbc2ea6d 100644 --- a/src/sql/resolver/ddl/ob_create_index_resolver.cpp +++ b/src/sql/resolver/ddl/ob_create_index_resolver.cpp @@ -267,15 +267,23 @@ int ObCreateIndexResolver::resolve_index_column_node( LOG_USER_ERROR(OB_NOT_SUPPORTED, "build multivalue index afterward"); } } else if (index_keyname_ == FTS_KEY) { - if (!GCONF._enable_add_fulltext_index_to_existing_table) { - ret = OB_NOT_SUPPORTED; - LOG_WARN("build fulltext index afterward is experimental feature", K(ret)); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "experimental feature: build fulltext index afterward"); + uint64_t tenant_data_version = 0; + if (OB_ISNULL(session_info_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(session_info_->get_effective_tenant_id(), tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret)); + } else if (tenant_data_version < DATA_VERSION_4_3_5_0) { + LOG_WARN("there are the observers with version lower than 4.3.5 in cluster, build fulltext index afterward not supported", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "there are the observers with version lower than 4.3.5 in cluster, build fulltext index afterward"); } else if (OB_FAIL(resolve_fts_index_constraint(*tbl_schema, - sort_item.column_name_, - index_keyname_value))) { - SQL_RESV_LOG(WARN, "check fts index constraint fail",K(ret), - K(sort_item.column_name_)); + sort_item.column_name_, + index_keyname_value))) { + SQL_RESV_LOG(WARN, "check fts index constraint fail", K(ret), K(sort_item.column_name_)); + } else if (OB_UNLIKELY(tbl_schema->mv_container_table())) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("create fulltext index on materialized view not supported", K(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create fulltext index on materialized view"); } } else if (index_keyname_ == INDEX_KEYNAME::VEC_KEY) { if (sort_item.is_func_index_) { diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.h b/src/sql/resolver/ddl/ob_ddl_resolver.h index 58b97cd49..83b28f5d3 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.h +++ b/src/sql/resolver/ddl/ob_ddl_resolver.h @@ -1075,7 +1075,7 @@ protected: common::hash::ObPlacementHashSet storing_column_set_; common::hash::ObPlacementHashSet current_foreign_key_name_set_; + common::OB_MAX_AUX_TABLE_PER_MAIN_TABLE> current_foreign_key_name_set_; common::ObBitSet<> alter_table_bitset_; bool has_index_using_type_; share::schema::ObIndexUsingType index_using_type_; diff --git a/src/sql/resolver/dml/ob_del_upd_resolver.cpp b/src/sql/resolver/dml/ob_del_upd_resolver.cpp index 7a36aac0f..3b8f9836e 100644 --- a/src/sql/resolver/dml/ob_del_upd_resolver.cpp +++ b/src/sql/resolver/dml/ob_del_upd_resolver.cpp @@ -2089,8 +2089,8 @@ int ObDelUpdResolver::add_index_related_columns_to_stmt(const TableItem &table_i LOG_DEBUG("add all column to stmt due to the update column is primary key"); } } else { - uint64_t index_tids[OB_MAX_INDEX_PER_TABLE]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE; + uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; + int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; // get all the indexes if (OB_FAIL(schema_checker_->get_can_write_index_array(tenant_id, base_table_id, @@ -2123,8 +2123,8 @@ int ObDelUpdResolver::add_all_index_rowkey_to_stmt(const TableItem &table_item, { int ret = OB_SUCCESS; const ObTableSchema *index_schema = NULL; - uint64_t idx_tids[OB_MAX_INDEX_PER_TABLE]; - int64_t idx_count = OB_MAX_INDEX_PER_TABLE; + uint64_t idx_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; + int64_t idx_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; if (OB_ISNULL(schema_checker_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret)); @@ -4025,8 +4025,8 @@ int ObDelUpdResolver::generate_insert_table_info(const TableItem &table_item, ObDelUpdStmt *del_upd_stmt = get_del_upd_stmt(); const TableItem &base_table_item = table_item.get_base_table_item(); const ObTableSchema *table_schema = NULL; - uint64_t index_tid[OB_MAX_INDEX_PER_TABLE]; - int64_t gindex_cnt = OB_MAX_INDEX_PER_TABLE; + uint64_t index_tid[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; + int64_t gindex_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; if (OB_ISNULL(del_upd_stmt) || OB_ISNULL(schema_checker_) || OB_ISNULL(session_info_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected null", K(ret), K(del_upd_stmt), K(schema_checker_), K(session_info_)); diff --git a/src/sql/resolver/dml/ob_delete_resolver.cpp b/src/sql/resolver/dml/ob_delete_resolver.cpp index bd65c8c24..6b169ba35 100644 --- a/src/sql/resolver/dml/ob_delete_resolver.cpp +++ b/src/sql/resolver/dml/ob_delete_resolver.cpp @@ -396,8 +396,8 @@ int ObDeleteResolver::generate_delete_table_info(const TableItem &table_item) const ObTableSchema *table_schema = NULL; ObDeleteStmt *delete_stmt = get_delete_stmt(); ObDeleteTableInfo *table_info = NULL; - uint64_t index_tid[OB_MAX_INDEX_PER_TABLE]; - int64_t gindex_cnt = OB_MAX_INDEX_PER_TABLE; + uint64_t index_tid[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; + int64_t gindex_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; int64_t binlog_row_image = ObBinlogRowImage::FULL; if (OB_ISNULL(schema_checker_) || OB_ISNULL(params_.session_info_) || OB_ISNULL(allocator_) || OB_ISNULL(delete_stmt)) { diff --git a/src/sql/resolver/dml/ob_dml_resolver.cpp b/src/sql/resolver/dml/ob_dml_resolver.cpp index 878ff5a58..a1d664fbf 100755 --- a/src/sql/resolver/dml/ob_dml_resolver.cpp +++ b/src/sql/resolver/dml/ob_dml_resolver.cpp @@ -18864,16 +18864,12 @@ int ObDMLResolver::resolve_match_against_exprs(ObRawExpr *&expr, if (OB_ISNULL(stmt) || OB_ISNULL(expr) || OB_ISNULL(params_.query_ctx_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null", K(ret), K(stmt), K(expr)); - } else if (match_exprs.count() > 1) { - // jinmao TODO: 之后存储层支持返回未匹配行,并且 SQL 层支持计算之后可以删掉这里的一系列限制 - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "match expr can only be used in simple filter for now"); - LOG_WARN("match expr can only be used in simple filter for now", K(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < match_exprs.count(); i++) { uint64_t table_id = OB_INVALID_ID; ObMatchFunRawExpr *cur_match_expr = NULL; ObMatchFunRawExpr *match_expr_on_table = NULL; + ObSEArray match_exprs_on_table; bool table_on_null_side = false; bool is_simple_filter = false; ObSEArray constraints; @@ -18882,38 +18878,31 @@ int ObDMLResolver::resolve_match_against_exprs(ObRawExpr *&expr, LOG_WARN("unexpected null", K(ret)); } else if (OB_FAIL(cur_match_expr->get_table_id(table_id))) { LOG_WARN("failed to get table id", K(ret)); - } else if (OB_FAIL(stmt->get_match_expr_on_table(table_id, match_expr_on_table))) { + } else if (OB_FAIL(stmt->get_match_expr_on_table(table_id, match_exprs_on_table))) { LOG_WARN("failed to get fulltext search expr on table", K(ret), K(table_id)); } else if (OB_FAIL(resolve_match_against_expr(*cur_match_expr))) { LOG_WARN("failed to resolve match index", K(ret)); - } else if (OB_ISNULL(match_expr_on_table)) { - if (scope != T_WHERE_SCOPE) { - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "fulltext search expr defined beyond where clause"); - LOG_WARN("fulltext search expr not found in condition expr", K(ret)); - } else if (OB_FAIL(ObOptimizerUtil::is_table_on_null_side(stmt, table_id, table_on_null_side))) { + } else { + for (int64_t match_idx = 0; match_idx < match_exprs_on_table.count(); ++match_idx) { + if (match_exprs_on_table.at(match_idx)->same_as(*cur_match_expr, &check_ctx)) { + match_expr_on_table = static_cast(match_exprs_on_table.at(match_idx)); + break; + } + } + } + + if (OB_FAIL(ret)) { + } else if (nullptr == match_expr_on_table) { + // same expr not found in stmt + if (OB_FAIL(ObOptimizerUtil::is_table_on_null_side(stmt, table_id, table_on_null_side))) { LOG_WARN("failed to check table on null side", K(ret)); } else if (table_on_null_side) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "fulltext search on null side of joined table"); LOG_WARN("fulltext search on null side of joined table is not supported", K(ret)); - } else if (OB_FAIL(check_fulltext_search_simple_filter(expr, cur_match_expr, is_simple_filter, constraints))) { - LOG_WARN("failed to check fulltext search simple filter", K(ret)); - } else if (is_simple_filter) { - if (OB_FAIL(stmt->get_match_exprs().push_back(cur_match_expr))) { - LOG_WARN("failed to push back expr", K(ret)); - } else if (OB_FAIL(append(params_.query_ctx_->all_expr_constraints_, constraints))) { - LOG_WARN("failed to append constraints", K(ret)); - } - } else { - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "filter that can't imply match_score not equal to 0"); - LOG_WARN("filter that can't imply match_score not equal to 0 is not supported", K(ret), KPC(expr)); + } else if (OB_FAIL(stmt->get_match_exprs().push_back(cur_match_expr))) { + LOG_WARN("failed to push back expr", K(ret)); } - } else if (!cur_match_expr->same_as(*match_expr_on_table, &check_ctx)) { - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "non-shareable match exprs on same base table"); - LOG_WARN("non-shareable match exprs on same base table are not supported", K(ret), KPC(cur_match_expr), KPC(match_expr_on_table)); } else if (OB_FAIL(replacer.add_replace_expr(cur_match_expr, match_expr_on_table))) { LOG_WARN("failed to add replace expr", K(ret)); } else if (OB_FAIL(replacer.replace(expr))) { @@ -19073,74 +19062,6 @@ int ObDMLResolver::resolve_match_index( return ret; } -// check that the fulltext search filter can imply a condition where match_score is not equal to zero. -int ObDMLResolver::check_fulltext_search_simple_filter(ObRawExpr *expr, - ObRawExpr *match_expr, - bool &is_simple_filter, - ObIArray &constraints) -{ - int ret = OB_SUCCESS; - is_simple_filter = false; - if (expr->get_expr_type() == T_FUN_MATCH_AGAINST) { - // bool expr will be added above in where scope - is_simple_filter = true; - } else { - ObRawExprCopier copier(*params_.expr_factory_); - ObSEArray match_exprs; - ObSEArray zero_exprs; - ObConstRawExpr *zero_expr = NULL; - ObObj obj_zero; - obj_zero.set_double(ObDoubleType, 0); - ObRawExpr *false_null_expr = NULL; - ObRawExpr *lnnvl_expr = NULL; - bool got_result = false; - ObObj result; - if (OB_ISNULL(params_.expr_factory_) || OB_ISNULL(params_.session_info_) || OB_ISNULL(allocator_) || - OB_ISNULL(params_.session_info_->get_cur_exec_ctx())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null", K(ret)); - } else if (OB_FAIL(params_.expr_factory_->create_raw_expr(T_DOUBLE, zero_expr))) { - LOG_WARN("create raw expr fail", K(ret)); - } else if (OB_ISNULL(zero_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null expr", K(ret)); - } else if (OB_FALSE_IT(zero_expr->set_value(obj_zero))) { - } else if (OB_FAIL(match_exprs.push_back(match_expr))) { - LOG_WARN("failed to push back expr", K(ret)); - } else if (OB_FAIL(zero_exprs.push_back(zero_expr))) { - LOG_WARN("failed to push back expr", K(ret)); - } else if (OB_FAIL(copier.add_replaced_expr(match_exprs, zero_exprs))) { - LOG_WARN("failed to add replace pair", K(ret)); - } else if (OB_FAIL(copier.copy_on_replace(expr, false_null_expr))) { - LOG_WARN("failed to do expr copy on replace", K(ret)); - } else if (OB_FAIL(ObRawExprUtils::build_lnnvl_expr(*params_.expr_factory_, false_null_expr, lnnvl_expr))) { - LOG_WARN("failed to build lnnvl expr", K(ret)); - } else if (OB_ISNULL(lnnvl_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null", K(ret)); - } else if (OB_FAIL(lnnvl_expr->formalize(params_.session_info_))) { - LOG_WARN("failed to formalize lnnvl expr", K(ret)); - } else if (!lnnvl_expr->is_static_const_expr()) { - is_simple_filter = false; - } else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(params_.session_info_->get_cur_exec_ctx(), - lnnvl_expr, - result, - got_result, - *allocator_))) { - LOG_WARN("failed to calc cosnt or calculable expr", K(ret)); - } else if (!got_result || result.is_false() || result.is_null()) { - is_simple_filter = false; - } else { - is_simple_filter = true; - ObExprConstraint true_constraint(lnnvl_expr, PreCalcExprExpectResult::PRE_CALC_RESULT_TRUE); - if (OB_FAIL(constraints.push_back(true_constraint))) { - LOG_WARN("failed to push back true constraint", K(ret)); - } - } - } - return ret; -} - int ObDMLResolver::add_udt_dependency(const pl::ObUserDefinedType &udt_type) { int ret = OB_SUCCESS; diff --git a/src/sql/resolver/dml/ob_dml_resolver.h b/src/sql/resolver/dml/ob_dml_resolver.h index 393e6853a..d64e8b053 100644 --- a/src/sql/resolver/dml/ob_dml_resolver.h +++ b/src/sql/resolver/dml/ob_dml_resolver.h @@ -1034,10 +1034,6 @@ private: int resolve_match_index(const ColumnReferenceSet &match_column_set, const ObTableSchema &table_schema, ObMatchFunRawExpr &match_against); - int check_fulltext_search_simple_filter(ObRawExpr *expr, - ObRawExpr *match_expr, - bool &is_simple_filter, - ObIArray &constraints); int build_and_check_true_expr(ObRawExpr *const_expr, ObItemType compare_op, bool &is_true, diff --git a/src/sql/resolver/dml/ob_dml_stmt.cpp b/src/sql/resolver/dml/ob_dml_stmt.cpp index 30c5bde0a..23d016009 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.cpp +++ b/src/sql/resolver/dml/ob_dml_stmt.cpp @@ -5426,6 +5426,25 @@ int ObDMLStmt::do_formalize_lateral_derived_table_post() return ret; } +int ObDMLStmt::get_match_expr_on_table(uint64_t table_id, ObIArray &match_exprs) const +{ + int ret = OB_SUCCESS; + for (int64_t i = 0; OB_SUCC(ret) && i < get_match_exprs().count(); i++) { + uint64_t cur_tid = OB_INVALID_ID; + if (OB_ISNULL(get_match_exprs().at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null", K(ret)); + } else if (OB_FAIL(get_match_exprs().at(i)->get_table_id(cur_tid))) { + LOG_WARN("failed to get fulltext search exprs", K(ret)); + } else if (cur_tid != table_id) { + // skip + } else if (OB_FAIL(add_var_to_array_no_dup(match_exprs, static_cast(get_match_exprs().at(i))))) { + LOG_WARN("failed to append match expr to array", K(ret), K(table_id)); + } else { /*do nothing*/ } + } + return ret; +} + ObJtColBaseInfo::ObJtColBaseInfo() : col_type_(0), truncate_(0), @@ -5660,27 +5679,6 @@ int ObValuesTableDef::deep_copy(const ObValuesTableDef &other, return ret; } -int ObDMLStmt::get_match_expr_on_table(uint64_t table_id, ObMatchFunRawExpr *&match_expr) const -{ - int ret = OB_SUCCESS; - match_expr = NULL; - for (int64_t i = 0; OB_SUCC(ret) && i < get_match_exprs().count(); i++) { - uint64_t cur_tid = OB_INVALID_ID; - if (OB_ISNULL(get_match_exprs().at(i))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null", K(ret)); - } else if (OB_FAIL(get_match_exprs().at(i)->get_table_id(cur_tid))) { - LOG_WARN("failed to get fulltext search exprs", K(ret)); - } else if (OB_NOT_NULL(match_expr) && cur_tid == table_id) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument, find more than one match expr on current table", K(ret), K(table_id)); - } else if (cur_tid == table_id) { - match_expr = get_match_exprs().at(i); - } else { /*do nothing*/ } - } - return ret; -} - /** * 获取指定index的分区列/分区生成列 * e.g. create table (c1 int, c2 int generated always as (c1 + 1)) partition by hash (c2) diff --git a/src/sql/resolver/dml/ob_dml_stmt.h b/src/sql/resolver/dml/ob_dml_stmt.h index 0b7cfe161..f5f43d147 100644 --- a/src/sql/resolver/dml/ob_dml_stmt.h +++ b/src/sql/resolver/dml/ob_dml_stmt.h @@ -1030,7 +1030,7 @@ public: { return match_exprs_; } common::ObIArray &get_match_exprs() { return match_exprs_; } - int get_match_expr_on_table(uint64_t table_id, ObMatchFunRawExpr *&match_expr) const; + int get_match_expr_on_table(uint64_t table_id, ObIArray &match_exprs) const; int get_table_pseudo_column_like_exprs(uint64_t table_id, ObIArray &pseudo_columns); int get_table_pseudo_column_like_exprs(ObIArray &table_id, ObIArray &pseudo_columns); int rebuild_tables_hash(); diff --git a/src/sql/resolver/dml/ob_sql_hint.cpp b/src/sql/resolver/dml/ob_sql_hint.cpp index 36a64a477..6d1b8247c 100644 --- a/src/sql/resolver/dml/ob_sql_hint.cpp +++ b/src/sql/resolver/dml/ob_sql_hint.cpp @@ -2439,8 +2439,9 @@ int LogTableHint::assign(const LogTableHint &other) int LogTableHint::init_index_hints(ObSqlSchemaGuard &schema_guard) { int ret = OB_SUCCESS; - uint64_t tids[OB_MAX_INDEX_PER_TABLE + 1]; - int64_t table_index_count = OB_MAX_INDEX_PER_TABLE + 1; + uint64_t tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1]; + int64_t table_index_aux_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1; + const share::schema::ObTableSchema *data_table_schema = nullptr; if (OB_ISNULL(table_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected log index hint", K(ret), K(this)); @@ -2448,26 +2449,33 @@ int LogTableHint::init_index_hints(ObSqlSchemaGuard &schema_guard) /* do nothing */ } else if (OB_FAIL(schema_guard.get_can_read_index_array(table_->ref_id_, tids, - table_index_count, + table_index_aux_count, false, table_->access_all_part(), - false /*domain index*/, + true /*domain index*/, false /*spatial index*/))) { LOG_WARN("failed to get can read index", K(ret)); - } else if (table_index_count > OB_MAX_INDEX_PER_TABLE) { + } else if (OB_FAIL(schema_guard.get_table_schema(table_->ref_id_, data_table_schema))) { + LOG_WARN("failed to get data table schema", K(ret), K(table_->ref_id_)); + } else if (OB_ISNULL(data_table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("data table schema is null", K(ret), K(table_->ref_id_)); + } else if (table_index_aux_count > OB_MAX_AUX_TABLE_PER_MAIN_TABLE + || data_table_schema->get_index_count() > OB_MAX_INDEX_PER_TABLE) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("Table index count is bigger than OB_MAX_INDEX_PER_TABLE", K(ret), K(table_index_count)); + int64_t table_index_count = data_table_schema->get_index_count(); + LOG_WARN("Table index or index aux count is invalid", K(ret), K(table_index_count), K(table_index_aux_count)); } else if (union_merge_hint_ != nullptr && OB_FAIL(merge_index_list_.prepare_allocate(union_merge_hint_->get_index_name_list().count()))) { LOG_WARN("failed to prepare allocate merge index list", KPC(union_merge_hint_), K(ret)); } else { - LOG_TRACE("get readable index", K(table_index_count)); + LOG_TRACE("get readable index", K(table_index_aux_count)); const share::schema::ObTableSchema *index_schema = NULL; ObSEArray index_list; ObSEArray no_index_list; ObSEArray index_hints; ObSEArray no_index_hints; - for (int64_t i = -1; OB_SUCC(ret) && i < table_index_count; ++i) { + for (int64_t i = -1; OB_SUCC(ret) && i < table_index_aux_count; ++i) { uint64_t index_id = -1 == i ? table_->ref_id_ : tids[i]; ObString index_name; bool is_primary_key = false; @@ -2478,7 +2486,7 @@ int LogTableHint::init_index_hints(ObSqlSchemaGuard &schema_guard) OB_ISNULL(index_schema)) { ret = OB_SCHEMA_ERROR; LOG_WARN("fail to get table schema", K(index_id), K(ret)); - } else if (index_schema->is_fts_index() || index_schema->is_vec_index()) { + } else if (index_schema->is_built_in_fts_index() || index_schema->is_vec_index()) { // just ignore fts && vector index } else if (OB_FAIL(index_schema->get_index_name(index_name))) { LOG_WARN("fail to get index name", K(index_name), K(ret)); diff --git a/src/sql/resolver/dml/ob_update_resolver.cpp b/src/sql/resolver/dml/ob_update_resolver.cpp index c67f19837..e0ed8dc2a 100644 --- a/src/sql/resolver/dml/ob_update_resolver.cpp +++ b/src/sql/resolver/dml/ob_update_resolver.cpp @@ -489,8 +489,8 @@ int ObUpdateResolver::generate_update_table_info(ObTableAssignment &table_assign const ObTableSchema *table_schema = NULL; const TableItem *table_item = NULL; ObUpdateTableInfo *table_info = NULL; - uint64_t index_tid[OB_MAX_INDEX_PER_TABLE]; - int64_t gindex_cnt = OB_MAX_INDEX_PER_TABLE; + uint64_t index_tid[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; + int64_t gindex_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; int64_t binlog_row_image = ObBinlogRowImage::FULL; if (OB_ISNULL(schema_checker_) || OB_ISNULL(params_.session_info_) || OB_ISNULL(allocator_) || OB_ISNULL(update_stmt)) { diff --git a/src/sql/resolver/expr/ob_raw_expr_util.cpp b/src/sql/resolver/expr/ob_raw_expr_util.cpp index a5ebf321f..9d3831a6e 100644 --- a/src/sql/resolver/expr/ob_raw_expr_util.cpp +++ b/src/sql/resolver/expr/ob_raw_expr_util.cpp @@ -10021,6 +10021,29 @@ int ObRawExprUtils::extract_match_against_filters(const ObIArray &f return ret; } +int ObRawExprUtils::extract_match_exprs(ObRawExpr *expr, + ObIArray &match_exprs) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("expr is null", K(ret)); + } else if (!expr->is_match_against_expr()) { + // do nothing + } else if (OB_FAIL(add_var_to_array_no_dup(match_exprs, static_cast(expr)))) { + LOG_WARN("failed to push back match expr", K(ret)); + } + + if (OB_SUCC(ret) && expr->has_flag(CNT_MATCH_EXPR)) { + for (int64_t i = 0; OB_SUCC(ret) && i < expr->get_param_count(); ++i) { + if (OB_FAIL(SMART_CALL(extract_match_exprs(expr->get_param_expr(i), match_exprs)))) { + LOG_WARN("failed to extract match exprs", K(ret)); + } + } + } + return ret; +} + int ObRawExprUtils::build_dummy_count_expr(ObRawExprFactory &expr_factory, const ObSQLSessionInfo *session_info, ObAggFunRawExpr *&expr) diff --git a/src/sql/resolver/expr/ob_raw_expr_util.h b/src/sql/resolver/expr/ob_raw_expr_util.h index fa716d5d2..41ec2719f 100644 --- a/src/sql/resolver/expr/ob_raw_expr_util.h +++ b/src/sql/resolver/expr/ob_raw_expr_util.h @@ -1267,6 +1267,7 @@ public: static int extract_match_against_filters(const ObIArray &filters, ObIArray &other_filters, ObIArray &match_filters); + static int extract_match_exprs(ObRawExpr *expr, ObIArray &match_exprs); static int build_dummy_count_expr(ObRawExprFactory &expr_factory, const ObSQLSessionInfo *session_info, ObAggFunRawExpr *&expr); diff --git a/src/sql/resolver/ob_schema_checker.cpp b/src/sql/resolver/ob_schema_checker.cpp index 5c29bf44a..9bcb54d07 100644 --- a/src/sql/resolver/ob_schema_checker.cpp +++ b/src/sql/resolver/ob_schema_checker.cpp @@ -1143,8 +1143,8 @@ int ObSchemaChecker::check_column_has_index(const uint64_t tenant_id, uint64_t t { int ret = OB_SUCCESS; const ObColumnSchemaV2 *col_schema = NULL; - uint64_t index_tid_array[OB_MAX_INDEX_PER_TABLE]; - int64_t index_cnt = OB_MAX_INDEX_PER_TABLE; + uint64_t index_tid_array[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; + int64_t index_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; has_index = false; if (IS_NOT_INIT) { diff --git a/src/sql/rewrite/ob_query_range.h b/src/sql/rewrite/ob_query_range.h index 4242018f9..6164b40bd 100644 --- a/src/sql/rewrite/ob_query_range.h +++ b/src/sql/rewrite/ob_query_range.h @@ -487,6 +487,7 @@ public: } virtual int is_get(bool &is_get) const; int is_get(int64_t column_count, bool &is_get) const; + inline void reset_range_exprs() { range_exprs_.reset(); } virtual bool is_precise_get() const { return table_graph_.is_precise_get_; } static common::ObDomainOpType get_geo_relation(ObItemType type); static common::ObDomainOpType get_domain_op_type(ObItemType type); diff --git a/src/sql/rewrite/ob_query_range_define.h b/src/sql/rewrite/ob_query_range_define.h index dae5f278c..52cfdc806 100644 --- a/src/sql/rewrite/ob_query_range_define.h +++ b/src/sql/rewrite/ob_query_range_define.h @@ -322,6 +322,7 @@ public: } virtual inline bool has_range() const { return column_count_ > 0; } virtual bool is_contain_geo_filters() const { return contain_geo_filters_; } + inline void reset_range_exprs() { range_exprs_.reset(); } virtual const common::ObIArray &get_range_exprs() const { return range_exprs_; } virtual const common::ObIArray &get_ss_range_exprs() const { return ss_range_exprs_; } virtual const common::ObIArray &get_unprecise_range_exprs() const { return unprecise_range_exprs_; } diff --git a/src/sql/rewrite/ob_transform_pre_process.cpp b/src/sql/rewrite/ob_transform_pre_process.cpp index a5cad6119..c5e198401 100644 --- a/src/sql/rewrite/ob_transform_pre_process.cpp +++ b/src/sql/rewrite/ob_transform_pre_process.cpp @@ -10487,7 +10487,7 @@ int ObTransformPreProcess::preserve_order_for_fulltext_search(ObDMLStmt *stmt, b int ret = OB_SUCCESS; trans_happened = false; TableItem *table_item = NULL; - ObMatchFunRawExpr *match_expr = NULL; + ObRawExpr *match_expr = nullptr; if (OB_ISNULL(stmt)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected null", K(ret)); @@ -10506,18 +10506,57 @@ int ObTransformPreProcess::preserve_order_for_fulltext_search(ObDMLStmt *stmt, b LOG_WARN("unexpected null", K(ret)); } else if (!table_item->is_basic_table()) { // do nothing - } else if (OB_FAIL(stmt->get_match_expr_on_table(table_item->table_id_, match_expr))) { - LOG_WARN("failed to get fulltext search expr on table", K(table_item->table_id_), K(ret)); - } else if (OB_ISNULL(match_expr)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected null", K(ret)); + } else if (0 == stmt->get_match_exprs().count()) { + // do nothing } else { + const common::ObIArray &condition_exprs = stmt->get_condition_exprs(); + bool found = false; + for (int64_t i = 0; OB_SUCC(ret) && !found && i < condition_exprs.count(); ++i) { + ObRawExpr *filter = nullptr; + if (OB_ISNULL(filter = condition_exprs.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected nullptr to where condition filter", K(ret), K(i), KP(filter)); + } else if (filter->has_flag(IS_MATCH_EXPR)) { + match_expr = filter; + found = true; + } else if (!filter->has_flag(CNT_MATCH_EXPR) + || filter->has_flag(CNT_OR)) { + // skip + } else if (IS_RANGE_CMP_OP(filter->get_expr_type())) { + ObRawExpr *param_expr0 = filter->get_param_expr(0); + ObRawExpr *param_expr1 = filter->get_param_expr(1); + if (OB_ISNULL(param_expr0) || OB_ISNULL(param_expr1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpecter null param expr for range cmp op", K(ret), KP(param_expr0), KP(param_expr1)); + } else if (param_expr0->is_const_expr() && param_expr1->has_flag(IS_MATCH_EXPR)) { + match_expr = param_expr1; + found = true; + } else if (param_expr1->is_const_expr() && param_expr0->has_flag(IS_MATCH_EXPR)) { + match_expr = param_expr0; + found = true; + } + } else if (filter->get_expr_type() == T_OP_BOOL) { + ObRawExpr *param_expr = filter->get_param_expr(0); + if (OB_ISNULL(param_expr)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null param expr for bool op", K(ret)); + } else if (param_expr->has_flag(IS_MATCH_EXPR)) { + found = true; + match_expr = param_expr; + } + } + } + } + + if (OB_SUCC(ret) && nullptr != match_expr) { OrderItem item(match_expr, default_desc_direction()); if (OB_FAIL(stmt->add_order_item(item))) { LOG_WARN("failed to add order item", K(ret), K(item)); + } else { + trans_happened = true; } - trans_happened = true; } + return ret; } diff --git a/src/sql/rewrite/ob_transform_utils.cpp b/src/sql/rewrite/ob_transform_utils.cpp index b0e07a6bb..b8117d301 100644 --- a/src/sql/rewrite/ob_transform_utils.cpp +++ b/src/sql/rewrite/ob_transform_utils.cpp @@ -4811,8 +4811,8 @@ int ObTransformUtils::compute_basic_table_property(const ObDMLStmt *stmt, ObSEArray cur_cond_exprs; ObSqlBitSet<> table_set; ObSqlSchemaGuard *schema_guard = NULL; - uint64_t index_tids[OB_MAX_INDEX_PER_TABLE]; - int64_t index_count = OB_MAX_INDEX_PER_TABLE; + uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE]; + int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE; if (OB_ISNULL(stmt) || OB_ISNULL(table) || OB_ISNULL(check_helper.alloc_) || OB_ISNULL(check_helper.fd_factory_) || OB_ISNULL(check_helper.schema_checker_) diff --git a/src/storage/high_availability/ob_ls_transfer_info.h b/src/storage/high_availability/ob_ls_transfer_info.h index 0a211c813..db5394b48 100644 --- a/src/storage/high_availability/ob_ls_transfer_info.h +++ b/src/storage/high_availability/ob_ls_transfer_info.h @@ -101,7 +101,7 @@ public: return pos; } private: - static const int64_t MAX_TABLET_COUNT = 200; + static const int64_t MAX_TABLET_COUNT = share::schema::OB_MAX_TRANSFER_BINDING_TABLET_CNT; int64_t count_; common::ObTabletID id_array_[MAX_TABLET_COUNT]; };