[FEAT MERGE] Fulltext index feature for version 435

Co-authored-by: Tyshawn <tuyunshan@gmail.com>
Co-authored-by: wz-WillZheng <18701736737@163.com>
Co-authored-by: SuperYoko <lipeng.yk@gmail.com>
This commit is contained in:
saltonz 2024-11-26 10:15:07 +00:00 committed by ob-robot
parent 57bc2f94ac
commit 648e6d4047
85 changed files with 5446 additions and 722 deletions

View File

@ -216,6 +216,13 @@ const int32_t NOT_CHECK_FLAG = 0;
const int64_t MAX_SERVER_COUNT = 4095;
const uint64_t OB_SERVER_USER_ID = 0;
const int64_t OB_MAX_INDEX_PER_TABLE = 128;
// indicating the maximum number of aux tables required to build OB_MAX_INDEX_PER_TABLE indexes.
// Support 128 vector indexes (vec index aux tables are the most numerous).
// OB_MAX_AUX_TABLE_PER_MAIN_TABLE = 2 + 3 * OB_MAX_INDEX_PER_TABLE + 2 (aux lob meta + aux lob piece) + 1(mlog).
// The first 2 presents shared aux table, 3 presents private table of vec index.
// If there are indexes with more auxiliary tables than vec index, this value needs to be adapted.
const int64_t OB_MAX_AUX_TABLE_PER_MAIN_TABLE = 389;
const int64_t OB_MAX_SSTABLE_PER_TABLE = OB_MAX_INDEX_PER_TABLE + 1;
const int64_t OB_MAX_SQL_LENGTH = 64 * 1024;
const int64_t OB_TINY_SQL_LENGTH = 128;

View File

@ -1849,23 +1849,23 @@ int ObTableCtx::init_trans(transaction::ObTxDesc *trans_desc,
int ObTableCtx::init_index_info(const ObString &index_name, const uint64_t arg_table_id)
{
int ret = OB_SUCCESS;
uint64_t tids[OB_MAX_INDEX_PER_TABLE];
int64_t index_cnt = OB_MAX_INDEX_PER_TABLE;
uint64_t tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
int64_t index_aux_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
if (OB_FAIL(schema_guard_->get_can_read_index_array(tenant_id_,
ref_table_id_,
tids,
index_cnt,
index_aux_cnt,
false))) {
LOG_WARN("fail to get can read index", K(ret), K_(tenant_id), K_(ref_table_id));
} else if (index_cnt > OB_MAX_INDEX_PER_TABLE) {
} else if (index_aux_cnt > OB_MAX_AUX_TABLE_PER_MAIN_TABLE) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("index count is bigger than OB_MAX_INDEX_PER_TABLE", K(ret), K(index_cnt));
LOG_WARN("index aux count is bigger than OB_MAX_AUX_TABLE_PER_MAIN_TABLE", K(ret), K(index_aux_cnt));
} else {
const share::schema::ObTableSchema *index_schema = nullptr;
ObString this_index_name;
bool is_found = false;
for (int64_t i = 0; OB_SUCC(ret) && i < index_cnt && !is_found; i++) {
for (int64_t i = 0; OB_SUCC(ret) && i < index_aux_cnt && !is_found; i++) {
if (OB_FAIL(schema_guard_->get_table_schema(tenant_id_, tids[i], index_schema))) {
LOG_WARN("fail to get index schema", K(ret), K_(tenant_id), K(tids[i]));
} else if (OB_ISNULL(index_schema)) {

View File

@ -82,26 +82,26 @@ int ObKvSchemaCacheObj::cons_index_info(ObSchemaGetterGuard *schema_guard,
common::ObTableID table_id)
{
int ret = OB_SUCCESS;
int64_t index_cnt = OB_MAX_INDEX_PER_TABLE;
uint64_t tids[OB_MAX_INDEX_PER_TABLE];
int64_t index_aux_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
uint64_t tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
if (OB_ISNULL(schema_guard) || !schema_guard->is_inited()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("schema guard is NULL or not inited", K(ret));
} else if (OB_FAIL(schema_guard->get_can_write_index_array(tenant_id,
table_id,
tids,
index_cnt,
index_aux_cnt,
false /*only global*/))) {
LOG_WARN("fail to get can write index array", K(ret), K(table_id));
} else if (OB_FAIL(local_index_tids_.init(index_cnt))) {
} else if (OB_FAIL(local_index_tids_.init(index_aux_cnt))) {
LOG_WARN("fail to init local index tids", K(ret));
} else if (OB_FAIL(global_index_tids_.init(index_cnt))) {
} else if (OB_FAIL(global_index_tids_.init(index_aux_cnt))) {
LOG_WARN("fail to init global index tids", K(ret));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < index_cnt; i++) {
for (int64_t i = 0; OB_SUCC(ret) && i < index_aux_cnt; i++) {
const ObTableSchema *index_schema = nullptr;
if (OB_FAIL(schema_guard->get_table_schema(tenant_id_, tids[i], index_schema))) {
LOG_WARN("fail to get index schema", K(ret), K(tids[i]), K(i), K(index_cnt));
LOG_WARN("fail to get index schema", K(ret), K(tids[i]), K(i), K(index_aux_cnt));
} else if (OB_ISNULL(index_schema)) {
ret = OB_SCHEMA_ERROR;
LOG_WARN("null index schema", K(ret));

View File

@ -457,8 +457,8 @@ int ObDbmsStats::fast_gather_index_stats(ObExecContext &ctx,
{
int ret = OB_SUCCESS;
is_all_fast_gather = true;
uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1];
int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1;
uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1];
int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1;
share::schema::ObSchemaGetterGuard *schema_guard = ctx.get_virtual_table_ctx().schema_guard_;
if (OB_FAIL(get_table_index_infos(schema_guard,
ctx.get_my_session()->get_effective_tenant_id(),
@ -1129,8 +1129,8 @@ int ObDbmsStats::delete_table_index_stats(sql::ObExecContext &ctx,
const ObTableStatParam data_param)
{
int ret = OB_SUCCESS;
uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1];
int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1;
uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1];
int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1;
if (OB_FAIL(get_table_index_infos(ctx.get_virtual_table_ctx().schema_guard_,
ctx.get_my_session()->get_effective_tenant_id(),
data_param.table_id_,
@ -1614,8 +1614,8 @@ int ObDbmsStats::export_table_index_stats(sql::ObExecContext &ctx,
const ObTableStatParam data_param)
{
int ret = OB_SUCCESS;
uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1];
int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1;
uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1];
int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1;
if (OB_FAIL(get_table_index_infos(ctx.get_virtual_table_ctx().schema_guard_,
ctx.get_my_session()->get_effective_tenant_id(),
data_param.table_id_,
@ -2013,8 +2013,8 @@ int ObDbmsStats::import_table_index_stats(sql::ObExecContext &ctx,
const ObTableStatParam data_param)
{
int ret = OB_SUCCESS;
uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1];
int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1;
uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1];
int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1;
if (OB_FAIL(get_table_index_infos(ctx.get_virtual_table_ctx().schema_guard_,
ctx.get_my_session()->get_effective_tenant_id(),
data_param.table_id_,
@ -2235,8 +2235,8 @@ int ObDbmsStats::lock_or_unlock_index_stats(sql::ObExecContext &ctx,
bool is_lock_stats)
{
int ret = OB_SUCCESS;
uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1];
int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1;
uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1];
int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1;
if (OB_FAIL(get_table_index_infos(ctx.get_virtual_table_ctx().schema_guard_,
ctx.get_my_session()->get_effective_tenant_id(),
data_param.table_id_,
@ -3686,24 +3686,24 @@ int ObDbmsStats::init_column_stat_params(ObIAllocator &allocator,
}
}
}
uint64_t tids[OB_MAX_INDEX_PER_TABLE];
int64_t index_count = OB_MAX_INDEX_PER_TABLE;
uint64_t tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
int64_t index_aux_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
const ObTableSchema *index_schema = NULL;
const uint64_t tenant_id = table_schema.get_tenant_id();
if (OB_FAIL(ret)) {//do nothing
} else if (OB_FAIL(schema_guard.get_can_read_index_array(tenant_id,
table_schema.get_table_id(),
tids,
index_count,
index_aux_count,
false, /*with_mv*/
true, /*with_global_index*/
false /*domain index*/))) {
LOG_WARN("failed to get can read index", K(table_schema.get_table_id()), K(ret));
} else if (index_count > OB_MAX_INDEX_PER_TABLE) {
} else if (index_aux_count > OB_MAX_AUX_TABLE_PER_MAIN_TABLE) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("Invalid index count", K(table_schema.get_table_id()), K(index_count), K(ret));
LOG_WARN("Invalid index count", K(table_schema.get_table_id()), K(index_aux_count), K(ret));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < index_count; ++i) {
for (int64_t i = 0; OB_SUCC(ret) && i < index_aux_count; ++i) {
if (OB_FAIL(schema_guard.get_table_schema(tenant_id, tids[i], index_schema))) {
LOG_WARN("failed to get index schema", K(ret), K(tenant_id), K(tids[i]));
} else if (OB_ISNULL(index_schema)) {
@ -6369,8 +6369,8 @@ int ObDbmsStats::get_index_schema(sql::ObExecContext &ctx,
int ret = OB_SUCCESS;
share::schema::ObSchemaGetterGuard *schema_guard = ctx.get_virtual_table_ctx().schema_guard_;
index_schema = NULL;
uint64_t index_tids[OB_MAX_INDEX_PER_TABLE + 1];
int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1;
uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1];
int64_t index_aux_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1;
if (OB_ISNULL(schema_guard)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret));
@ -6378,11 +6378,11 @@ int ObDbmsStats::get_index_schema(sql::ObExecContext &ctx,
ctx.get_my_session()->get_effective_tenant_id(),
data_table_id,
index_tids,
index_count))) {
index_aux_count))) {
LOG_WARN("failed to get table index infos", K(ret));
} else {
bool found_it = false;
for (int64_t i = 0; OB_SUCC(ret) && !found_it && i < index_count; ++i) {
for (int64_t i = 0; OB_SUCC(ret) && !found_it && i < index_aux_count; ++i) {
const share::schema::ObTableSchema *cur_index_schema = NULL;
ObString cur_index_name;
if (index_tids[i] == data_table_id) {

View File

@ -1953,7 +1953,7 @@ int ObDDLScheduler::create_drop_fts_index_task(
} else if (OB_ISNULL(index_schema) || OB_ISNULL(drop_index_arg)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), KP(index_schema), KP(drop_index_arg));
} else if (FALSE_IT(is_fts_index = index_schema->is_fts_index_aux())) {
} else if (FALSE_IT(is_fts_index = (index_schema->is_fts_index_aux() || drop_index_arg->is_parent_task_dropping_fts_index_))) {
} else if (OB_UNLIKELY(schema_version <= 0)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), KP(index_schema), K(schema_version));
@ -2000,7 +2000,6 @@ int ObDDLScheduler::create_drop_fts_index_task(
const ObFTSDDLChildTaskInfo rowkey_doc(rowkey_doc_name, rowkey_doc_table_id, 0/*task_id*/);
const ObFTSDDLChildTaskInfo doc_rowkey(doc_rowkey_name, doc_rowkey_table_id, 0/*task_id*/);
const ObDDLType ddl_type = is_fts_index ? DDL_DROP_FTS_INDEX : DDL_DROP_MULVALUE_INDEX;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(index_task.init(index_schema->get_tenant_id(),
task_id,

View File

@ -906,6 +906,15 @@ int ObDDLTask::get_ddl_type_str(const int64_t ddl_type, const char *&ddl_type_st
case DDL_DROP_INDEX:
ddl_type_str = "drop index";
break;
case DDL_DROP_FTS_INDEX:
ddl_type_str = "drop fts index";
break;
case DDL_DROP_MULVALUE_INDEX:
ddl_type_str = "drop mulvalue index";
break;
case DDL_DROP_VEC_INDEX:
ddl_type_str = "drop vec index";
break;
case DDL_ALTER_COLUMN_GROUP:
ddl_type_str = "alter column group";
break;
@ -1096,7 +1105,9 @@ bool ObDDLTask::is_ddl_task_can_be_cancelled() const
{
bool can_be_cancelled = true;
if (task_type_ == ObDDLType::DDL_DROP_INDEX ||
task_type_ == ObDDLType::DDL_DROP_VEC_INDEX) {
task_type_ == ObDDLType::DDL_DROP_VEC_INDEX ||
task_type_ == ObDDLType::DDL_DROP_FTS_INDEX ||
task_type_ == ObDDLType::DDL_DROP_MULVALUE_INDEX) {
can_be_cancelled = false;
}
return can_be_cancelled;

View File

@ -561,7 +561,11 @@ int ObDropFTSIndexTask::succ()
int ObDropFTSIndexTask::fail()
{
return cleanup();
int ret = OB_SUCCESS;
if (OB_FAIL(cleanup())) {
LOG_WARN("cleanup task failed", K(ret));
}
return ret;
}
int ObDropFTSIndexTask::cleanup_impl()

View File

@ -92,6 +92,12 @@ private:
int succ();
int fail();
virtual int cleanup_impl() override;
virtual bool is_error_need_retry(const int ret_code) override
{
UNUSED(ret_code);
// we should always retry on drop index task
return task_status_ < share::ObDDLTaskStatus::WAIT_CHILD_TASK_FINISH;
}
bool is_fts_task() const { return share::ObDDLType::DDL_DROP_FTS_INDEX == task_type_; }
private:

View File

@ -101,7 +101,12 @@ private:
int send_build_single_replica_request();
int check_build_single_replica(bool &is_end);
virtual int cleanup_impl() override;
virtual bool is_error_need_retry(const int ret_code) override
{
UNUSED(ret_code);
// we should always retry on drop index task
return task_status_ < share::ObDDLTaskStatus::DROP_AUX_INDEX_TABLE;
}
private:
ObRootService *root_service_;
ObVecIndexDDLChildTaskInfo rowkey_vid_;

View File

@ -296,7 +296,7 @@ int ObMajorMergeProgressChecker::prepare_unfinish_table_ids()
}
const ObSimpleTableSchemaV2 *index_simple_schema = nullptr;
ObTableCompactionInfo table_compaction_info;
ObSEArray<const ObSimpleTableSchemaV2 *, OB_MAX_INDEX_PER_TABLE> index_schemas;
ObSEArray<const ObSimpleTableSchemaV2 *, OB_MAX_AUX_TABLE_PER_MAIN_TABLE> index_schemas;
ObSEArray<uint64_t, 50> not_validate_index_ids;
int64_t start_idx = 0;
int64_t end_idx = 0;

View File

@ -4155,7 +4155,8 @@ int ObDDLService::check_can_add_column_use_instant_(const bool is_oracle_mode,
return ret;
}
int ObDDLService::check_is_add_column_online_(const ObTableSchema &table_schema,
int ObDDLService::check_is_add_column_online_(const AlterTableSchema &alter_table_schema,
const ObTableSchema &table_schema,
const AlterColumnSchema &alter_column_schema,
const obrpc::ObAlterTableArg::AlterAlgorithm &algorithm,
const bool is_oracle_mode,
@ -4168,7 +4169,7 @@ int ObDDLService::check_is_add_column_online_(const ObTableSchema &table_schema,
bool is_change_column_order = false;
if (OB_DDL_ADD_COLUMN != alter_column_schema.alter_type_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("alter_type is not add column", KR(ret), K(alter_column_schema.alter_type_));
LOG_WARN("alter_type is not add column", KR(ret), K(alter_column_schema));
} else if (algorithm == obrpc::ObAlterTableArg::AlterAlgorithm::INSTANT) {
if (OB_FAIL(check_can_add_column_use_instant_(is_oracle_mode,
tenant_data_version,
@ -4176,11 +4177,34 @@ int ObDDLService::check_is_add_column_online_(const ObTableSchema &table_schema,
LOG_WARN("fail to check can add column use instant algorithm", KR(ret), K(is_oracle_mode), K(table_schema));
}
}
if (OB_SUCC(ret)) {
if (alter_column_schema.is_autoincrement_ || alter_column_schema.is_primary_key_ || alter_column_schema.has_not_null_constraint()) {
tmp_ddl_type = ObDDLType::DDL_TABLE_REDEFINITION;
} else if (nullptr != table_schema.get_column_schema(alter_column_schema.get_column_name())) {
tmp_ddl_type = ObDDLType::DDL_TABLE_REDEFINITION;
ObTableSchema::const_column_iterator it_begin = alter_table_schema.column_begin();
ObTableSchema::const_column_iterator it_end = alter_table_schema.column_end();
for (; OB_SUCC(ret) && it_begin != it_end; it_begin++) {
const AlterColumnSchema *column_schema = nullptr;
if (OB_ISNULL(column_schema = static_cast<AlterColumnSchema *>(*it_begin))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("*it_begin is NULL", K(ret));
} else if (ObSchemaOperationType::OB_DDL_DROP_COLUMN == column_schema->alter_type_) {
lib::Worker::CompatMode compat_mode = (is_oracle_mode ?
lib::Worker::CompatMode::ORACLE : lib::Worker::CompatMode::MYSQL);
lib::CompatModeGuard guard(compat_mode);
const ObString &drop_column_name = column_schema->get_origin_column_name();
const ObString &add_column_name = alter_column_schema.get_column_name();
if (ObColumnNameHashWrapper(drop_column_name) == ObColumnNameHashWrapper(add_column_name)) {
tmp_ddl_type = ObDDLType::DDL_TABLE_REDEFINITION;
}
}
}
}
}
if (OB_SUCC(ret)) {
if (ObDDLType::DDL_INVALID != tmp_ddl_type) {
} else if (alter_column_schema.is_stored_generated_column()) {
tmp_ddl_type = ObDDLType::DDL_ADD_COLUMN_OFFLINE;
} else if (OB_FAIL(check_is_change_column_order(table_schema, alter_column_schema, is_change_column_order))) {
@ -4301,7 +4325,7 @@ int ObDDLService::check_can_add_column_instant_(const ObTableSchema &orig_table_
switch (op_type) {
case OB_DDL_ADD_COLUMN: {
ObDDLType tmp_ddl_type = ObDDLType::DDL_INVALID;
if (!add_column_instant && OB_FAIL(check_is_add_column_online_(orig_table_schema, *alter_column_schema, algorithm,
if (!add_column_instant && OB_FAIL(check_is_add_column_online_(alter_table_schema, orig_table_schema, *alter_column_schema, algorithm,
is_oracle_mode, tenant_data_version, tmp_ddl_type))) {
LOG_WARN("fail to check is add column online", KR(ret));
} else if (ObDDLType::DDL_ADD_COLUMN_INSTANT == tmp_ddl_type) {
@ -4368,7 +4392,7 @@ int ObDDLService::check_alter_table_column(obrpc::ObAlterTableArg &alter_table_a
switch (op_type) {
case OB_DDL_ADD_COLUMN: {
ObDDLType tmp_ddl_type = ObDDLType::DDL_INVALID;
if (OB_FAIL(check_is_add_column_online_(orig_table_schema, *alter_column_schema, algorithm,
if (OB_FAIL(check_is_add_column_online_(alter_table_schema, orig_table_schema, *alter_column_schema, algorithm,
is_oracle_mode, tenant_data_version, tmp_ddl_type))) {
LOG_WARN("fail to check is add column online", K(ret));
} else if (tmp_ddl_type == ObDDLType::DDL_ADD_COLUMN_ONLINE) {
@ -5441,7 +5465,8 @@ int ObDDLService::alter_table_primary_key(obrpc::ObAlterTableArg &alter_table_ar
const uint64_t tenant_data_version)
{
int ret = OB_SUCCESS;
int64_t index_count = new_table_schema.get_index_tid_count();
int64_t index_count = new_table_schema.get_index_count();
int64_t index_aux_count = new_table_schema.get_index_tid_count();
const ObSArray<ObIndexArg *> &index_arg_list = alter_table_arg.index_arg_list_;
for (int64_t i = 0; OB_SUCC(ret) && i < index_arg_list.size(); ++i) {
ObIndexArg *index_arg = const_cast<ObIndexArg *>(index_arg_list.at(i));
@ -5477,10 +5502,11 @@ int ObDDLService::alter_table_primary_key(obrpc::ObAlterTableArg &alter_table_ar
case ObIndexArg::ADD_PRIMARY_KEY:
case ObIndexArg::ALTER_PRIMARY_KEY: {
if (ObIndexArg::ADD_PRIMARY_KEY == type) {
if (OB_MAX_INDEX_PER_TABLE <= index_count) {
if (OB_MAX_AUX_TABLE_PER_MAIN_TABLE <= index_aux_count || OB_MAX_INDEX_PER_TABLE <= index_count) {
ret = OB_ERR_TOO_MANY_KEYS;
LOG_USER_ERROR(OB_ERR_TOO_MANY_KEYS, OB_MAX_INDEX_PER_TABLE);
LOG_WARN("too many index for table!", K(index_count), K(OB_MAX_INDEX_PER_TABLE));
LOG_WARN("too many index or index aux for table!",
K(index_count), K(OB_MAX_INDEX_PER_TABLE), K(index_aux_count), K(OB_MAX_AUX_TABLE_PER_MAIN_TABLE));
} else if (!new_table_schema.is_heap_table()) {
ret = OB_ERR_MULTIPLE_PRI_KEY;
LOG_WARN("multiple primary key defined", K(ret));
@ -7267,9 +7293,10 @@ int ObDDLService::alter_table_index(obrpc::ObAlterTableArg &alter_table_arg,
HEAP_VAR(RenameIndexNameHashSet, rename_ori_index_name_set) {
HEAP_VAR(RenameIndexNameHashSet, rename_new_index_name_set) {
HEAP_VAR(AlterIndexNameHashSet, alter_index_name_set) {
int64_t index_count = new_table_schema.get_index_tid_count();
for (int64_t i = 0; OB_SUCC(ret) && i < index_arg_list.size(); ++i) {
ObIndexArg *index_arg = index_arg_list.at(i);
int64_t index_count = new_table_schema.get_index_count();
int64_t index_aux_count = new_table_schema.get_index_tid_count();
if (OB_ISNULL(index_arg)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("index arg should not be null", K(ret));
@ -7284,10 +7311,11 @@ int ObDDLService::alter_table_index(obrpc::ObAlterTableArg &alter_table_arg,
}
} else if (OB_FAIL(GET_MIN_DATA_VERSION(create_index_arg->tenant_id_, tenant_data_version))) {
LOG_WARN("get min data version failed", K(ret), KPC(create_index_arg));
} else if (OB_MAX_INDEX_PER_TABLE <= index_count) {
} else if (OB_MAX_AUX_TABLE_PER_MAIN_TABLE <= index_aux_count || OB_MAX_INDEX_PER_TABLE <= index_count) {
ret = OB_ERR_TOO_MANY_KEYS;
LOG_USER_ERROR(OB_ERR_TOO_MANY_KEYS, OB_MAX_INDEX_PER_TABLE);
LOG_WARN("too many index for table!", K(index_count), K(OB_MAX_INDEX_PER_TABLE));
LOG_WARN("too many index or index aux for table!",
K(index_count), K(OB_MAX_INDEX_PER_TABLE), K(index_aux_count), K(OB_MAX_AUX_TABLE_PER_MAIN_TABLE));
}
if (!new_table_schema.is_partitioned_table()
&& !new_table_schema.is_auto_partitioned_table()
@ -7454,8 +7482,6 @@ int ObDDLService::alter_table_index(obrpc::ObAlterTableArg &alter_table_arg,
} else if (OB_FAIL(add_index_name_set.set_refactored(index_key))) {
LOG_WARN("set index name to hash set failed",
K(create_index_arg->index_name_), K(ret));
} else {
++index_count;
}
}
}
@ -7519,8 +7545,6 @@ int ObDDLService::alter_table_index(obrpc::ObAlterTableArg &alter_table_arg,
new_table_schema,
trans))) {
LOG_WARN("failed to alter table drop index", K(*drop_index_arg), K(ret));
} else {
--index_count;
}
}
}
@ -8500,7 +8524,7 @@ int ObDDLService::get_dropping_domain_index_invisiable_aux_table_schema(
{
int ret = OB_SUCCESS;
const share::schema::ObTableSchema *data_table_schema = nullptr;
ObSEArray<const ObSimpleTableSchemaV2 *, OB_MAX_INDEX_PER_TABLE> indexs;
ObSEArray<const ObSimpleTableSchemaV2 *, OB_MAX_AUX_TABLE_PER_MAIN_TABLE> indexs;
if (OB_UNLIKELY(OB_INVALID_ID == data_table_id
|| OB_INVALID_ID == index_table_id
|| OB_INVALID_TENANT_ID == tenant_id
@ -8577,7 +8601,7 @@ int ObDDLService::get_dropping_vec_index_invisiable_table_schema_(
{
int ret = OB_SUCCESS;
const share::schema::ObTableSchema *data_table_schema = nullptr;
ObSEArray<const ObSimpleTableSchemaV2 *, OB_MAX_INDEX_PER_TABLE> indexs;
ObSEArray<const ObSimpleTableSchemaV2 *, OB_MAX_AUX_TABLE_PER_MAIN_TABLE> indexs;
if (OB_UNLIKELY(OB_INVALID_ID == data_table_id
|| OB_INVALID_ID == index_table_id
|| OB_INVALID_TENANT_ID == tenant_id
@ -15014,6 +15038,7 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg,
}
if (OB_SUCC(ret) && is_double_table_long_running_ddl(ddl_type)) {
bool has_index_operation = false;
bool will_be_having_domain_index_operation = false;
bool has_fts_or_multivalue_or_vec_index = false;
bool is_adding_constraint = false;
bool is_column_store = false;
@ -15042,11 +15067,17 @@ int ObDDLService::check_is_offline_ddl(ObAlterTableArg &alter_table_arg,
table_id,
has_fts_or_multivalue_or_vec_index))) {
LOG_WARN("check has fts index failed", K(ret));
} else if (OB_FAIL(check_will_be_having_domain_index_operation(alter_table_arg,
will_be_having_domain_index_operation))) {
LOG_WARN("check will be having domain index operation failed", K(ret));
} else if (OB_FAIL(check_is_adding_constraint(tenant_id, table_id, is_adding_constraint))) {
LOG_WARN("failed to call check_is_adding_constraint", K(ret));
} else if (has_index_operation) {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "The DDL cannot be run concurrently with creating index.");
} else if (will_be_having_domain_index_operation) {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "The DDL cannot be run, as creating/dropping fulltext/multivalue/vector index.");
} else if (has_fts_or_multivalue_or_vec_index) {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "Run this DDL operation on table with fulltext/multivalue/vector index.");
@ -15125,6 +15156,30 @@ int ObDDLService::check_has_domain_index(
return ret;
}
int ObDDLService::check_will_be_having_domain_index_operation(
const obrpc::ObAlterTableArg &alter_table_arg,
bool &will_be_having_domain_index_operation/*false*/)
{
int ret = OB_SUCCESS;
will_be_having_domain_index_operation = false;
const ObSArray<ObIndexArg *> &index_arg_list = alter_table_arg.index_arg_list_;
for (int64_t i = 0; OB_SUCC(ret) && i < index_arg_list.size(); ++i) {
ObIndexArg *index_arg = index_arg_list.at(i);
if (OB_ISNULL(index_arg)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("index arg should not be null", K(ret));
} else {
ObCreateIndexArg *create_index_arg = static_cast<ObCreateIndexArg *>(index_arg);
if (share::schema::is_fts_or_multivalue_index(create_index_arg->index_type_) ||
share::schema::is_vec_index(create_index_arg->index_type_)) {
will_be_having_domain_index_operation = true;
break;
}
}
}
return ret;
}
int ObDDLService::check_is_oracle_mode_add_column_not_null_ddl(const obrpc::ObAlterTableArg &alter_table_arg,
ObSchemaGetterGuard &schema_guard,
bool &is_oracle_mode_add_column_not_null_ddl,
@ -20130,18 +20185,20 @@ int ObDDLService::add_new_index_schema(obrpc::ObAlterTableArg &alter_table_arg,
HEAP_VAR(AddIndexNameHashSet, add_index_name_set) {
HEAP_VAR(DropIndexNameHashSet, drop_index_name_set) {
const ObSArray<ObIndexArg *> &index_arg_list = alter_table_arg.index_arg_list_;
int64_t index_count = new_table_schema.get_index_tid_count();
for (int64_t i = 0; OB_SUCC(ret) && i < index_arg_list.size(); ++i) {
ObIndexArg *index_arg = const_cast<ObIndexArg *>(index_arg_list.at(i));
int64_t index_count = new_table_schema.get_index_count();
int64_t index_aux_count = new_table_schema.get_index_tid_count();
if (OB_ISNULL(index_arg)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("index arg should not be null", K(ret));
} else {
if (index_arg->index_action_type_ == ObIndexArg::ADD_INDEX) {
if (OB_MAX_INDEX_PER_TABLE <= index_count) {
if (OB_MAX_AUX_TABLE_PER_MAIN_TABLE <= index_aux_count || OB_MAX_INDEX_PER_TABLE <= index_count) {
ret = OB_ERR_TOO_MANY_KEYS;
LOG_USER_ERROR(OB_ERR_TOO_MANY_KEYS, OB_MAX_INDEX_PER_TABLE);
LOG_WARN("too many index for table!", K(index_count), K(OB_MAX_INDEX_PER_TABLE));
LOG_WARN("too many index or index aux for table!",
K(index_count), K(OB_MAX_INDEX_PER_TABLE), K(index_aux_count), K(OB_MAX_AUX_TABLE_PER_MAIN_TABLE));
}
ObCreateIndexArg *create_index_arg = static_cast<ObCreateIndexArg *>(index_arg);
if (!new_table_schema.is_partitioned_table()
@ -20275,8 +20332,6 @@ int ObDDLService::add_new_index_schema(obrpc::ObAlterTableArg &alter_table_arg,
LOG_WARN("set index name to hash set failed",
K(create_index_arg->index_name_), K(ret));
} else {
++index_count;
}
}
}

View File

@ -1336,6 +1336,9 @@ private:
const uint64_t tenant_id,
const uint64_t data_table_id,
bool &domain_index_exist);
int check_will_be_having_domain_index_operation(
const obrpc::ObAlterTableArg &alter_table_arg,
bool &will_be_having_domain_index_operation);
int check_has_index_operation(
ObSchemaGetterGuard &schema_guard,
const uint64_t teannt_id,
@ -1730,7 +1733,8 @@ private:
const share::schema::ObColumnSchemaV2 &orig_column_schema,
share::schema::AlterColumnSchema &alter_column_schema,
bool &is_offline) const;
int check_is_add_column_online_(const share::schema::ObTableSchema &table_schema,
int check_is_add_column_online_(const AlterTableSchema &alter_table_schema,
const share::schema::ObTableSchema &table_schema,
const share::schema::AlterColumnSchema &alter_column_schema,
const obrpc::ObAlterTableArg::AlterAlgorithm &algorithm,
const bool is_oracle_mode,

View File

@ -1275,11 +1275,14 @@ int ObIndexBuilder::do_create_index(
LOG_WARN("can not add index on table in recyclebin", K(ret), K(arg));
} else if (OB_FAIL(ddl_service_.check_restore_point_allow(tenant_id, *table_schema))) {
LOG_WARN("failed to check restore point allow.", K(ret), K(tenant_id), K(table_id));
} else if (table_schema->get_index_tid_count() >= OB_MAX_INDEX_PER_TABLE) {
} else if (table_schema->get_index_tid_count() >= OB_MAX_AUX_TABLE_PER_MAIN_TABLE
|| table_schema->get_index_count() >= OB_MAX_INDEX_PER_TABLE) {
ret = OB_ERR_TOO_MANY_KEYS;
LOG_USER_ERROR(OB_ERR_TOO_MANY_KEYS, OB_MAX_INDEX_PER_TABLE);
int64_t index_count = table_schema->get_index_tid_count();
LOG_WARN("too many index for table", K(OB_MAX_INDEX_PER_TABLE), K(index_count), K(ret));
int64_t index_aux_count = table_schema->get_index_tid_count();
int64_t index_count = table_schema->get_index_count();
LOG_WARN("too many index or index aux for table",
K(index_count), K(OB_MAX_INDEX_PER_TABLE), K(index_aux_count), K(OB_MAX_AUX_TABLE_PER_MAIN_TABLE), K(ret));
} else if (OB_FAIL(ddl_service_.check_fk_related_table_ddl(*table_schema, ObDDLType::DDL_CREATE_INDEX))) {
LOG_WARN("check whether the foreign key related table is executing ddl failed", K(ret));
} else if (INDEX_TYPE_NORMAL_LOCAL == arg.index_type_
@ -1355,11 +1358,6 @@ int ObIndexBuilder::generate_schema(
}
if (OB_SUCC(ret)) {
if (arg.index_columns_.count() <= 0) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("index columns can't be empty", "index columns", arg.index_columns_, K(ret));
} else {}
//do some check
if (OB_SUCC(ret)) {
if (!GCONF.enable_sys_table_ddl) {
@ -1380,6 +1378,14 @@ int ObIndexBuilder::generate_schema(
}
}
if (OB_FAIL(ret)) {
} else if (arg.index_columns_.count() <= 0) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("index columns can't be empty", "index columns", arg.index_columns_, K(ret));
} else {
// do something
}
if (OB_FAIL(ret)) {
} else if (share::schema::is_fts_index(arg.index_type_)) {
uint64_t tenant_data_version = 0;
@ -1912,7 +1918,7 @@ int ObIndexBuilder::check_has_none_shared_index_tables_for_fts_or_multivalue_ind
bool &has_fts_or_multivalue_index)
{
int ret = OB_SUCCESS;
ObSEArray<const ObSimpleTableSchemaV2 *, OB_MAX_INDEX_PER_TABLE> indexs;
ObSEArray<const ObSimpleTableSchemaV2 *, OB_MAX_AUX_TABLE_PER_MAIN_TABLE> indexs;
has_fts_or_multivalue_index = false;
if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id || OB_INVALID_ID == data_table_id)) {
ret = OB_INVALID_ARGUMENT;
@ -1949,7 +1955,7 @@ int ObIndexBuilder::check_has_none_shared_index_tables_for_vector_index_(
bool &has_none_share_vector_index)
{
int ret = OB_SUCCESS;
ObSEArray<const ObSimpleTableSchemaV2 *, OB_MAX_INDEX_PER_TABLE> indexs;
ObSEArray<const ObSimpleTableSchemaV2 *, OB_MAX_AUX_TABLE_PER_MAIN_TABLE> indexs;
has_none_share_vector_index = false;
if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id || OB_INVALID_ID == data_table_id)) {
ret = OB_INVALID_ARGUMENT;

View File

@ -1550,10 +1550,10 @@ int ObSchemaGetterGuard::get_can_write_index_array(
LOG_WARN("cannot get index table schema for table ", KR(ret), K(tenant_id), K(index_id));
} else if (OB_UNLIKELY(index_schema->is_final_invalid_index())) {
//invalid index status, need ingore
} else if (OB_MAX_INDEX_PER_TABLE <= can_write_count) {
} else if (OB_MAX_AUX_TABLE_PER_MAIN_TABLE <= can_write_count) {
ret = OB_ERR_TOO_MANY_KEYS;
LOG_USER_ERROR(OB_ERR_TOO_MANY_KEYS, OB_MAX_INDEX_PER_TABLE);
LOG_WARN("too many index or mlog for table!", K(can_write_count), K(OB_MAX_INDEX_PER_TABLE));
LOG_WARN("too many index, index aux or mlog for table!", K(can_write_count), K(OB_MAX_AUX_TABLE_PER_MAIN_TABLE));
} else if (index_schema->is_mlog_table()) {
index_tid_array[can_write_count] = simple_index_infos.at(i).table_id_;
++can_write_count;
@ -1594,8 +1594,8 @@ int ObSchemaGetterGuard::column_is_key(
} else if (column_schema->is_rowkey_column() || column_schema->is_tbl_part_key_column()) {
is_key = true;
} else {
int64_t index_tid_array_size = OB_MAX_INDEX_PER_TABLE;
uint64_t index_tid_array[OB_MAX_INDEX_PER_TABLE];
int64_t index_tid_array_size = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
uint64_t index_tid_array[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
if (OB_FAIL(get_can_write_index_array(tenant_id, table_id, index_tid_array, index_tid_array_size))) {
LOG_WARN("get index tid array failed", K(ret), K(tenant_id), K(index_tid_array_size));
}

View File

@ -308,11 +308,14 @@ const int64_t OB_AUX_LOB_TABLE_CNT = 2; // aux lob meta + aux lob piece
// The max count of aux tables that can be created for each index.
// Some special indexes such as full-text index(FTS), multi-value index, vector index, etc., have multiple aux tables.
// The current index with max aux tables: vector index
const int64_t OB_MAX_TABLE_CNT_PER_INDEX = 5;
// They need to be changed at the same time, choosing OB_MAX_AUX_TABLE_PER_TABLE is larger.
const int64_t OB_MAX_SHARED_TABLE_CNT_PER_INDEX_TYPE = 2; // number of common aux tables for all vect indexes in a table.
const int64_t OB_MAX_TABLE_CNT_PER_INDEX = 3; // number of aux tables private per vec index.
// The max count of aux tables with physical tablets per user data table.
const int64_t OB_MAX_AUX_TABLE_PER_TABLE = OB_MAX_INDEX_PER_TABLE * OB_MAX_TABLE_CNT_PER_INDEX + OB_AUX_LOB_TABLE_CNT + OB_MLOG_TABLE_CNT; // 643
const int64_t OB_MAX_AUX_TABLE_PER_TABLE = OB_MAX_INDEX_PER_TABLE * OB_MAX_TABLE_CNT_PER_INDEX +
OB_MAX_SHARED_TABLE_CNT_PER_INDEX_TYPE + OB_AUX_LOB_TABLE_CNT + OB_MLOG_TABLE_CNT; // 389
// The max tablet count of a transfer is one data table tablet with max aux tablets bound together.
const int64_t OB_MAX_TRANSFER_BINDING_TABLET_CNT = OB_MAX_AUX_TABLE_PER_TABLE + 1; // 644
const int64_t OB_MAX_TRANSFER_BINDING_TABLET_CNT = OB_MAX_AUX_TABLE_PER_TABLE + 1; // 390
// Note: When adding new index type, you should modifiy "tools/obtest/t/quick/partition_balance.test" and
// "tools/obtest/t/shared_storage/partition_balance.test" to verify that all aux tables of the new index

View File

@ -8414,6 +8414,57 @@ const ObConstraint *ObTableSchema::get_constraint(const uint64_t constraint_id)
});
}
int64_t ObTableSchema::get_index_count() const
{
int64_t index_count = 0;
bool is_rowkey_doc_id_exist = false;
bool is_doc_id_rowkey_exist = false;
int64_t fts_index_aux_count = 0;
int64_t fts_doc_word_aux_count = 0;
int64_t multivalue_index_aux_count = 0;
bool is_vec_rowkey_vid_exist = false;
bool is_vec_vid_rowkey_exist = false;
int64_t vec_delta_buffer_count = 0;
int64_t vec_index_id_count = 0;
int64_t vec_index_snapshot_data_count = 0;
for (int64_t i = 0; i < get_index_tid_count(); ++i) {
ObIndexType index_type = simple_index_infos_.at(i).index_type_;
// Count the number of various index aux tables to determine the number of indexes that can be added.
// If there are other indexes with multiple auxiliary tables, you need to add processing branches.
if (share::schema::is_rowkey_doc_aux(index_type)) {
is_rowkey_doc_id_exist = true;
} else if (share::schema::is_doc_rowkey_aux(index_type)) {
is_doc_id_rowkey_exist = true;
} else if (share::schema::is_fts_index_aux(index_type)) {
++fts_index_aux_count;
} else if (share::schema::is_fts_doc_word_aux(index_type)) {
++fts_doc_word_aux_count;
} else if (share::schema::is_multivalue_index_aux(index_type)) {
++multivalue_index_aux_count;
} else if (share::schema::is_vec_rowkey_vid_type(index_type)) {
is_vec_rowkey_vid_exist = true;
} else if (share::schema::is_vec_vid_rowkey_type(index_type)) {
is_vec_vid_rowkey_exist = true;
} else if (share::schema::is_vec_delta_buffer_type(index_type)) {
++vec_delta_buffer_count;
} else if (share::schema::is_vec_index_id_type(index_type)) {
++vec_index_id_count;
} else if (share::schema::is_vec_index_snapshot_data_type(index_type)) {
++vec_index_snapshot_data_count;
} else {
++index_count;
}
}
// Taking OB_MIN can ensure that the final index number is not greater than OB_MAX_INDEX_PER_TABLE.
// but cannot ensure aux table numbers does not exceed OB_MAX_AUX_TABLE_PER_MAIN_TABLE.
// Therefore, this function often appears with the OB_MAX_AUX_TABLE_PER_MAIN_TABLE limit.
index_count += (is_rowkey_doc_id_exist && is_doc_id_rowkey_exist) ?
OB_MIN(fts_index_aux_count, fts_doc_word_aux_count) + multivalue_index_aux_count : 0;
index_count += (is_vec_rowkey_vid_exist && is_vec_vid_rowkey_exist) ?
OB_MIN(vec_delta_buffer_count, OB_MIN(vec_index_id_count, vec_index_snapshot_data_count)) : 0;
return index_count;
}
const ObConstraint *ObTableSchema::get_constraint(const ObString &constraint_name) const
{
return get_constraint_internal(
@ -8801,7 +8852,7 @@ int ObTableSchema::add_simple_index_info(const ObAuxTableMetaInfo &simple_index_
{
int ret = OB_SUCCESS;
bool need_add = true;
int64_t N = simple_index_infos_.count();
int64_t N = get_index_tid_count();
// we are sure that index_tid are added in sorted order
if (simple_index_info.table_id_ == OB_INVALID_ID) {
@ -8819,7 +8870,7 @@ int ObTableSchema::add_simple_index_info(const ObAuxTableMetaInfo &simple_index_
}
if (OB_SUCC(ret) && need_add) {
const int64_t last_pos = N - 1;
if (N >= common::OB_MAX_INDEX_PER_TABLE) {
if (N >= OB_MAX_AUX_TABLE_PER_MAIN_TABLE || get_index_count() >= common::OB_MAX_INDEX_PER_TABLE) {
ret = OB_SIZE_OVERFLOW;
LOG_WARN("index num in table is more than limited num", K(ret));
} else if ((last_pos >= 0)

View File

@ -1432,7 +1432,7 @@ public:
const ObConstraint *get_constraint(const common::ObString &constraint_name) const;
int get_pk_constraint_name(common::ObString &pk_name) const;
const ObConstraint *get_pk_constraint() const;
int64_t get_index_count() const;
int64_t get_column_idx(const uint64_t column_id, const bool ignore_hidden_column = false) const;
int64_t get_replica_num() const;
int64_t get_tablet_size() const { return tablet_size_; }

View File

@ -275,7 +275,7 @@ int ObVectorIndexUtil::check_table_has_vector_of_fts_index(
LOG_WARN("index table schema should not be null", K(ret), K(simple_index_infos.at(i).table_id_));
} else if (index_table_schema->is_vec_index()) {
has_vec_index = true;
} else if (index_table_schema->is_fts_index()) {
} else if (index_table_schema->is_fts_index_aux() || index_table_schema->is_fts_doc_word_aux()) {
has_fts_index = true;
}
}

View File

@ -82,6 +82,9 @@ ob_set_subtarget(ob_sql das
das/iter/ob_das_doc_id_merge_iter.cpp
das/iter/ob_das_vid_merge_iter.cpp
das/iter/ob_das_index_merge_iter.cpp
das/iter/ob_das_func_data_iter.cpp
das/iter/ob_das_functional_lookup_iter.cpp
das/iter/ob_das_cache_lookup_iter.cpp
das/iter/ob_das_mvi_lookup_iter.cpp
das/iter/ob_das_spatial_scan_iter.cpp
)

View File

@ -399,7 +399,8 @@ int ObStaticEngineCG::disable_use_rich_format(const ObLogicalOperator &op, ObOpS
|| (static_cast<ObTableScanSpec &>(spec)).tsc_ctdef_.scan_ctdef_.is_get_
|| tsc.is_text_retrieval_scan()
|| tsc.is_tsc_with_doc_id()
|| tsc.is_tsc_with_vid()) {
|| tsc.is_tsc_with_vid()
|| tsc.has_func_lookup()) {
use_rich_format = false;
LOG_DEBUG("tsc disable use rich format", K(tsc.get_index_back()), K(tsc.use_batch()),
K(is_virtual_table(tsc.get_ref_table_id())));

View File

@ -111,7 +111,8 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc
if (op.is_text_retrieval_scan() || op.is_vec_idx_scan()) {
scan_ctdef.ir_scan_type_ = ObTSCIRScanType::OB_IR_INV_IDX_SCAN;
}
if (OB_FAIL(generate_das_scan_ctdef(op, scan_ctdef, has_rowscn))) {
DASScanCGCtx cg_ctx;
if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, scan_ctdef, has_rowscn))) {
LOG_WARN("generate das scan ctdef failed", K(ret), K(scan_ctdef.ref_table_id_));
} else {
tsc_ctdef.flashback_item_.need_scn_ |= has_rowscn;
@ -173,7 +174,8 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc
bool need_attach = false;
if (OB_SUCC(ret) && op.is_text_retrieval_scan()) {
if (OB_FAIL(generate_text_ir_ctdef(op, tsc_ctdef, root_ctdef))) {
DASScanCGCtx cg_ctx;
if (OB_FAIL(generate_text_ir_ctdef(op, cg_ctx, tsc_ctdef, root_ctdef))) {
LOG_WARN("failed to generate text ir ctdef", K(ret));
} else {
need_attach = true;
@ -260,6 +262,22 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc
}
}
if (OB_SUCC(ret) && op.has_func_lookup()) {
ObDASBaseCtDef *rowkey_scan_ctdef = nullptr;
ObDASBaseCtDef *main_lookup_ctdef = nullptr;
if (op.get_index_back()) {
rowkey_scan_ctdef = static_cast<ObDASTableLookupCtDef *>(root_ctdef)->children_[0];
main_lookup_ctdef = tsc_ctdef.lookup_ctdef_;
} else {
rowkey_scan_ctdef = root_ctdef;
}
if (OB_FAIL(generate_functional_lookup_ctdef(op, tsc_ctdef, rowkey_scan_ctdef, main_lookup_ctdef, root_ctdef))) {
LOG_WARN("failed to generate functional lookup ctdef", K(ret));
} else {
need_attach = true;
}
}
if (OB_SUCC(ret) && need_attach) {
if (!op.get_is_index_global()) {
tsc_ctdef.lookup_ctdef_ = nullptr;
@ -274,6 +292,7 @@ int ObTscCgService::generate_tsc_ctdef(ObLogTableScan &op, ObTableScanCtDef &tsc
}
int ObTscCgService::generate_table_param(const ObLogTableScan &op,
const DASScanCGCtx &cg_ctx,
ObDASScanCtDef &scan_ctdef,
common::ObIArray<uint64_t> &tsc_out_cols)
{
@ -305,7 +324,7 @@ int ObTscCgService::generate_table_param(const ObLogTableScan &op,
} else if (table_schema->is_multivalue_index_aux() && FALSE_IT(scan_ctdef.table_param_.set_is_multivalue_index(true))) {
} else if (table_schema->is_vec_index() && FALSE_IT(scan_ctdef.table_param_.set_is_vec_index(true))) {
} else if (FALSE_IT(scan_ctdef.table_param_.set_is_partition_table(table_schema->is_partitioned_table()))) {
} else if (OB_FAIL(extract_das_output_column_ids(op, scan_ctdef, *table_schema, tsc_out_cols))) {
} else if (OB_FAIL(extract_das_output_column_ids(op, scan_ctdef, *table_schema, cg_ctx, tsc_out_cols))) {
LOG_WARN("extract tsc output column ids failed", K(ret));
} else if (OB_FAIL(session_info->get_sys_variable(SYS_VAR_OB_ROUTE_POLICY, route_policy))) {
LOG_WARN("get route policy failed", K(ret));
@ -710,20 +729,27 @@ int ObTscCgService::generate_pd_storage_flag(const ObLogPlan *log_plan,
//2. all columns required by TSC operator filters
//3. all columns required by pushdown aggr expr
int ObTscCgService::extract_das_access_exprs(const ObLogTableScan &op,
const DASScanCGCtx &cg_ctx,
ObDASScanCtDef &scan_ctdef,
ObIArray<ObRawExpr*> &access_exprs)
{
int ret = OB_SUCCESS;
const ObTableID &scan_table_id = scan_ctdef.ref_table_id_;
const bool use_index_merge = scan_ctdef.is_index_merge_;
if (scan_table_id != op.get_rowkey_doc_table_id()
if (cg_ctx.is_func_lookup_ && scan_table_id != op.get_rowkey_doc_table_id()) {
const ObTextRetrievalInfo &tr_info = op.get_lookup_tr_infos().at(cg_ctx.curr_func_lookup_idx_);
if (OB_FAIL(extract_text_ir_access_columns(op, tr_info, scan_ctdef, access_exprs))) {
LOG_WARN("failed to extract text ir access columns for functional lookup", K(ret));
}
} else if (scan_table_id != op.get_rowkey_doc_table_id()
&& ((op.is_text_retrieval_scan() && scan_table_id != op.get_ref_table_id())
|| (op.is_multivalue_index_scan() && scan_table_id == op.get_doc_id_index_table_id()))) {
// non main table scan in text retrieval
if (OB_FAIL(extract_text_ir_access_columns(op, scan_ctdef, access_exprs))) {
if (OB_FAIL(extract_text_ir_access_columns(op, op.get_text_retrieval_info(), scan_ctdef, access_exprs))) {
LOG_WARN("failed to extract text ir access columns", K(ret));
}
} else if (op.is_tsc_with_doc_id() && scan_table_id == op.get_rowkey_doc_table_id()) {
} else if ((op.is_tsc_with_doc_id() || cg_ctx.is_func_lookup_)
&& scan_table_id == op.get_rowkey_doc_table_id()) {
if (OB_FAIL(extract_rowkey_doc_access_columns(op, scan_ctdef, access_exprs))) {
LOG_WARN("fail to extract rowkey doc access columns", K(ret));
}
@ -877,13 +903,14 @@ int ObTscCgService::extract_tsc_access_columns(const ObLogTableScan &op,
ObArray<ObRawExpr*> tsc_exprs;
ObArray<ObRawExpr*> scan_pushdown_filters;
ObArray<ObRawExpr*> lookup_pushdown_filters;
const bool need_filter_out_match_expr = op.is_text_retrieval_scan() || op.has_func_lookup();
if (OB_FAIL(const_cast<ObLogTableScan &>(op).extract_pushdown_filters(tsc_exprs, //non-pushdown filters
scan_pushdown_filters,
lookup_pushdown_filters))) {
LOG_WARN("extract pushdown filters failed", K(ret));
} else if (OB_FAIL(append_array_no_dup(tsc_exprs, op.get_output_exprs()))) {
LOG_WARN("append output exprs failed", K(ret));
} else if (op.is_text_retrieval_scan() && OB_FAIL(filter_out_match_exprs(tsc_exprs))) {
} else if (need_filter_out_match_expr && OB_FAIL(filter_out_match_exprs(tsc_exprs))) {
// the matching columns of match expr are only used as semantic identifiers and are not actually accessed
LOG_WARN("failed to filter out fts exprs", K(ret));
} else if (OB_FAIL(ObRawExprUtils::extract_column_exprs(tsc_exprs, access_exprs, true))) {
@ -924,6 +951,7 @@ int ObTscCgService::generate_geo_access_ctdef(const ObLogTableScan &op, const Ob
}
int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op,
const DASScanCGCtx &cg_ctx,
ObDASScanCtDef &scan_ctdef,
common::ObIArray<ObExpr *> &doc_id_expr,
common::ObIArray<ObExpr *> &vec_vid_expr,
@ -944,7 +972,7 @@ int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op,
} else if (OB_ISNULL(table_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr to table schema", K(ret));
} else if (OB_FAIL(extract_das_access_exprs(op, scan_ctdef, access_exprs))) {
} else if (OB_FAIL(extract_das_access_exprs(op, cg_ctx, scan_ctdef, access_exprs))) {
LOG_WARN("extract das access exprs failed", K(ret));
} else if (table_schema->is_spatial_index()
&& OB_FAIL(generate_geo_access_ctdef(op, *table_schema, access_exprs))) {
@ -984,7 +1012,7 @@ int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op,
ObColumnRefRawExpr* col_expr = static_cast<ObColumnRefRawExpr *>(expr);
bool is_mapping_vt_table = op.get_real_ref_table_id() != op.get_ref_table_id();
ObTableID real_table_id = is_mapping_vt_table ? op.get_real_ref_table_id() : op.get_table_id();
const bool doc_id_in_rowkey_doc = op.is_tsc_with_doc_id() && table_schema->is_rowkey_doc_id();
const bool doc_id_in_rowkey_doc = (op.is_tsc_with_doc_id() || cg_ctx.is_func_lookup_) && table_schema->is_rowkey_doc_id();
const bool vec_id_in_rowkey_vid = op.is_tsc_with_vid() && table_schema->is_vec_rowkey_vid_type();
real_table_id = doc_id_in_rowkey_doc || vec_id_in_rowkey_vid ? table_id : real_table_id;
if (!col_expr->has_flag(IS_COLUMN) || (col_expr->get_table_id() != real_table_id && !(col_expr->is_doc_id_column() || col_expr->is_vec_vid_column()))) {
@ -1032,6 +1060,7 @@ int ObTscCgService::generate_access_ctdef(const ObLogTableScan &op,
}
int ObTscCgService::generate_pushdown_aggr_ctdef(const ObLogTableScan &op,
const DASScanCGCtx &cg_ctx,
ObDASScanCtDef &scan_ctdef)
{
int ret = OB_SUCCESS;
@ -1039,9 +1068,12 @@ int ObTscCgService::generate_pushdown_aggr_ctdef(const ObLogTableScan &op,
const uint64_t aggregate_output_count = pushdown_aggr_exprs.count();
const ObIArray<ObRawExpr*> &group_by_columns = op.get_pushdown_groupby_columns();
const uint64_t group_by_column_count = group_by_columns.count();
if (op.is_text_retrieval_scan()) {
if (op.is_text_retrieval_scan() || cg_ctx.is_func_lookup_) {
// text retrieval scan on fulltext index
if (OB_FAIL(generate_text_ir_pushdown_expr_ctdef(op, scan_ctdef))) {
const ObTextRetrievalInfo &tr_info = cg_ctx.is_func_lookup_
? op.get_lookup_tr_infos().at(cg_ctx.curr_func_lookup_idx_)
: op.get_text_retrieval_info();
if (OB_FAIL(generate_text_ir_pushdown_expr_ctdef(tr_info, op, scan_ctdef))) {
LOG_WARN("failed to generate text ir pushdown aggregate ctdef", K(ret), K(op));
}
} else if (op.get_index_back() && aggregate_output_count > 0) {
@ -1119,6 +1151,7 @@ int ObTscCgService::generate_pushdown_aggr_ctdef(const ObLogTableScan &op,
}
int ObTscCgService::generate_das_scan_ctdef(const ObLogTableScan &op,
const DASScanCGCtx &cg_ctx,
ObDASScanCtDef &scan_ctdef,
bool &has_rowscn)
{
@ -1126,12 +1159,12 @@ int ObTscCgService::generate_das_scan_ctdef(const ObLogTableScan &op,
ObSEArray<ObExpr *, 1> doc_id_expr;
ObSEArray<ObExpr *, 1> vec_vid_expr;
// 1. add basic column
if (OB_FAIL(generate_access_ctdef(op, scan_ctdef, doc_id_expr, vec_vid_expr, has_rowscn))) {
if (OB_FAIL(generate_access_ctdef(op, cg_ctx, scan_ctdef, doc_id_expr, vec_vid_expr, has_rowscn))) {
LOG_WARN("generate access ctdef failed", K(ret), K(scan_ctdef.ref_table_id_));
}
//2. generate pushdown aggr column
if (OB_SUCC(ret)) {
if (OB_FAIL(generate_pushdown_aggr_ctdef(op, scan_ctdef))) {
if (OB_FAIL(generate_pushdown_aggr_ctdef(op, cg_ctx, scan_ctdef))) {
LOG_WARN("generate pushdown aggr ctdef failed", K(ret));
}
}
@ -1180,7 +1213,7 @@ int ObTscCgService::generate_das_scan_ctdef(const ObLogTableScan &op,
//6. generate table param
ObArray<uint64_t> tsc_out_cols;
if (OB_SUCC(ret)) {
if (OB_FAIL(generate_table_param(op, scan_ctdef, tsc_out_cols))) {
if (OB_FAIL(generate_table_param(op, cg_ctx, scan_ctdef, tsc_out_cols))) {
LOG_WARN("generate table param failed", K(ret));
}
}
@ -1233,6 +1266,7 @@ int ObTscCgService::generate_das_scan_ctdef(const ObLogTableScan &op,
int ObTscCgService::extract_das_output_column_ids(const ObLogTableScan &op,
ObDASScanCtDef &scan_ctdef,
const ObTableSchema &index_schema,
const DASScanCGCtx &cg_ctx,
ObIArray<uint64_t> &output_cids)
{
int ret = OB_SUCCESS;
@ -1240,15 +1274,22 @@ int ObTscCgService::extract_das_output_column_ids(const ObLogTableScan &op,
const ObTableID &table_id = scan_ctdef.ref_table_id_;
const bool use_index_merge = scan_ctdef.is_index_merge_;
if (table_id != op.get_rowkey_doc_table_id()
&& ((op.is_text_retrieval_scan() && table_id != op.get_ref_table_id())
|| (op.is_multivalue_index_scan() && table_id == op.get_doc_id_index_table_id()))) {
// non main table scan in text retrieval
if (OB_FAIL(extract_text_ir_das_output_column_ids(op, scan_ctdef, output_cids))) {
if (op.need_doc_id_index_back() && table_id == op.get_doc_id_index_table_id()) {
if (OB_FAIL(extract_doc_id_index_back_output_column_ids(op, output_cids))) {
LOG_WARN("failed to extract doc id index back output column ids", K(ret));
}
} else if ((op.is_text_retrieval_scan() && table_id != op.get_ref_table_id() && table_id != op.get_rowkey_doc_table_id())
|| (cg_ctx.is_func_lookup_ && table_id != op.get_rowkey_doc_table_id())) {
const ObTextRetrievalInfo &tr_info = cg_ctx.is_func_lookup_
? op.get_lookup_tr_infos().at(cg_ctx.curr_func_lookup_idx_)
: op.get_text_retrieval_info();
if (OB_FAIL(extract_text_ir_das_output_column_ids(tr_info, scan_ctdef, output_cids))) {
LOG_WARN("failed to extract text retrieval das output column ids", K(ret));
}
} else if (op.is_tsc_with_doc_id() && table_id == op.get_rowkey_doc_table_id()) {
if (OB_FAIL(extract_rowkey_doc_output_columns_ids(index_schema, op, scan_ctdef, output_cids))) {
} else if ((op.is_tsc_with_doc_id() || cg_ctx.is_func_lookup_)
&& table_id == op.get_rowkey_doc_table_id()) {
const bool output_rowkey = !cg_ctx.is_func_lookup_;
if (OB_FAIL(extract_rowkey_doc_output_columns_ids(index_schema, op, scan_ctdef, output_rowkey, output_cids))) {
LOG_WARN("fail to extract rowkey doc output columns ids", K(ret));
}
} else if (op.is_vec_idx_scan() &&
@ -1342,6 +1383,20 @@ int ObTscCgService::extract_das_output_column_ids(const ObLogTableScan &op,
LOG_WARN("store group id expr failed", K(ret));
} else if (OB_FAIL(extract_das_column_ids(das_output_cols, output_cids))) {
LOG_WARN("extract column ids failed", K(ret));
} else if (op.has_func_lookup() && op.get_real_index_table_id() == table_id) {
// main scan in functional lookup, need to output extra rowkey exprs for further lookup on functional index
ObArray<uint64_t> rowkey_column_ids;
const ObTableSchema *table_schema = nullptr;
if (OB_FAIL(cg_.opt_ctx_->get_schema_guard()->get_table_schema(MTL_ID(), op.get_real_ref_table_id(), table_schema))) {
LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id()));
} else if (OB_ISNULL(table_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr to table schema", K(ret));
} else if (OB_FAIL(table_schema->get_rowkey_column_ids(rowkey_column_ids))) {
LOG_WARN("get rowkey column ids failed", K(ret));
} else if (OB_FAIL(append_array_no_dup(output_cids, rowkey_column_ids))) {
LOG_WARN("fail to append rowkey cids to output cids for functional lookup", K(ret));
}
} else if (op.is_tsc_with_doc_id() && index_schema.is_user_table()) {
uint64_t doc_id_col_id = OB_INVALID_ID;
uint64_t ft_col_id = OB_INVALID_ID;
@ -1529,12 +1584,13 @@ int ObTscCgService::generate_vec_ir_ctdef(const ObLogTableScan &op,
ObDASScanCtDef *snapshot_ctdef = nullptr;
ObDASScanCtDef *com_aux_ctdef = nullptr;
bool has_rowscn = false;
DASScanCGCtx cg_ctx;
if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, delta_ctdef))) {
LOG_WARN("allocate delta buf table ctdef failed", K(ret));
} else {
delta_ctdef->ref_table_id_ = op.get_vector_index_info().delta_buffer_tid_;
delta_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_DELTA_BUF_SCAN;
if (OB_FAIL(generate_das_scan_ctdef(op, *delta_ctdef, has_rowscn))) {
if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *delta_ctdef, has_rowscn))) {
LOG_WARN("failed to generate das scan ctdef", K(ret));
}
}
@ -1545,7 +1601,7 @@ int ObTscCgService::generate_vec_ir_ctdef(const ObLogTableScan &op,
} else {
index_id_ctdef->ref_table_id_ = op.get_vector_index_info().index_id_tid_;
index_id_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_IDX_ID_SCAN;
if (OB_FAIL(generate_das_scan_ctdef(op, *index_id_ctdef, has_rowscn))) {
if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *index_id_ctdef, has_rowscn))) {
LOG_WARN("failed to generate das scan ctdef", K(ret));
}
}
@ -1557,7 +1613,7 @@ int ObTscCgService::generate_vec_ir_ctdef(const ObLogTableScan &op,
} else {
snapshot_ctdef->ref_table_id_ =op.get_vector_index_info().index_snapshot_data_tid_;
snapshot_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_SNAPSHOT_SCAN;
if (OB_FAIL(generate_das_scan_ctdef(op, *snapshot_ctdef, has_rowscn))) {
if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *snapshot_ctdef, has_rowscn))) {
LOG_WARN("generate das scan ctdef failed", K(ret));
}
}
@ -1569,7 +1625,7 @@ int ObTscCgService::generate_vec_ir_ctdef(const ObLogTableScan &op,
} else {
com_aux_ctdef->ref_table_id_ = op.get_vector_index_info().main_table_tid_;
com_aux_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_VEC_COM_AUX_SCAN;
if (OB_FAIL(generate_das_scan_ctdef(op, *com_aux_ctdef, has_rowscn))) {
if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *com_aux_ctdef, has_rowscn))) {
LOG_WARN("generate das scan ctdef failed", K(ret));
}
}
@ -1727,42 +1783,60 @@ int ObTscCgService::generate_gis_ir_ctdef(const ObLogTableScan &op,
}
int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op,
const DASScanCGCtx &cg_ctx,
ObTableScanCtDef &tsc_ctdef,
ObDASBaseCtDef *&root_ctdef)
{
int ret = OB_SUCCESS;
ObMatchFunRawExpr *match_against = op.get_text_retrieval_info().match_expr_;
const ObTextRetrievalInfo &tr_info = cg_ctx.is_func_lookup_
? op.get_lookup_tr_infos().at(cg_ctx.curr_func_lookup_idx_)
: op.get_text_retrieval_info();
ObMatchFunRawExpr *match_against = tr_info.match_expr_;
ObIAllocator &ctdef_alloc = cg_.phy_plan_->get_allocator();
ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard();
ObDASIRScanCtDef *ir_scan_ctdef = nullptr;
ObDASSortCtDef *sort_ctdef = nullptr;
ObDASScanCtDef *inv_idx_scan_ctdef = nullptr;
ObExpr *index_back_doc_id_column = nullptr;
bool has_rowscn = false;
const bool use_approx_pre_agg = true; // TODO: support differentiate use approx agg or not
if (OB_ISNULL(match_against) || OB_ISNULL(schema_guard)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null pointer", K(ret), KP(match_against), KP(schema_guard));
} else if (OB_UNLIKELY(OB_INVALID_ID == op.get_text_retrieval_info().inv_idx_tid_
|| (op.need_text_retrieval_calc_relevance() && OB_INVALID_ID == op.get_text_retrieval_info().fwd_idx_tid_))) {
} else if (OB_UNLIKELY(OB_INVALID_ID == tr_info.inv_idx_tid_
|| (tr_info.need_calc_relevance_ && OB_INVALID_ID == tr_info.fwd_idx_tid_))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid fulltext index table id", K(ret), KPC(match_against));
} else if (OB_UNLIKELY(ObTSCIRScanType::OB_IR_INV_IDX_SCAN != tsc_ctdef.scan_ctdef_.ir_scan_type_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected ir scan type for inverted index scan", K(ret), K(tsc_ctdef.scan_ctdef_));
} else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_IR_SCAN, ctdef_alloc, ir_scan_ctdef))) {
LOG_WARN("allocate ir scan ctdef failed", K(ret));
} else if (op.need_text_retrieval_calc_relevance()) {
ObDASScanCtDef *inv_idx_scan_ctdef = &tsc_ctdef.scan_ctdef_;
} else if (OB_UNLIKELY(!cg_ctx.is_func_lookup_ && ObTSCIRScanType::OB_IR_INV_IDX_SCAN != tsc_ctdef.scan_ctdef_.ir_scan_type_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected ir scan type for inverted index scan", K(ret), K(tsc_ctdef.scan_ctdef_));
} else {
if (!cg_ctx.is_func_lookup_) {
inv_idx_scan_ctdef = &tsc_ctdef.scan_ctdef_;
} else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, inv_idx_scan_ctdef))) {
LOG_WARN("allocate inv idx_scan_ctdef_failed", K(ret));
} else {
inv_idx_scan_ctdef->ref_table_id_ = tr_info.inv_idx_tid_;
inv_idx_scan_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_IR_INV_IDX_SCAN;
if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *inv_idx_scan_ctdef, has_rowscn))) {
LOG_WARN("failed to generate das scan ctdef", K(ret));
}
}
}
if (OB_SUCC(ret) && tr_info.need_calc_relevance_) {
ObDASScanCtDef *inv_idx_agg_ctdef = nullptr;
ObDASScanCtDef *doc_id_idx_agg_ctdef = nullptr;
ObDASScanCtDef *fwd_idx_agg_ctdef = nullptr;
bool has_rowscn = false;
if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, inv_idx_agg_ctdef))) {
LOG_WARN("allocate inv idx agg ctdef failed", K(ret));
} else {
inv_idx_agg_ctdef->ref_table_id_ = op.get_text_retrieval_info().inv_idx_tid_;
inv_idx_agg_ctdef->ref_table_id_ = tr_info.inv_idx_tid_;
inv_idx_agg_ctdef->pd_expr_spec_.pd_storage_flag_.set_aggregate_pushdown(true);
inv_idx_agg_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_IR_INV_IDX_AGG;
if (OB_FAIL(generate_das_scan_ctdef(op, *inv_idx_agg_ctdef, has_rowscn))) {
if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *inv_idx_agg_ctdef, has_rowscn))) {
LOG_WARN("failed to generate das scan ctdef", K(ret));
}
}
@ -1771,10 +1845,10 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op,
if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, doc_id_idx_agg_ctdef))) {
LOG_WARN("allocate doc id idx agg ctdef failed", K(ret));
} else {
doc_id_idx_agg_ctdef->ref_table_id_ = op.get_text_retrieval_info().doc_id_idx_tid_;
doc_id_idx_agg_ctdef->ref_table_id_ = tr_info.doc_id_idx_tid_;
doc_id_idx_agg_ctdef->pd_expr_spec_.pd_storage_flag_.set_aggregate_pushdown(true);
doc_id_idx_agg_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_IR_DOC_ID_IDX_AGG;
if (OB_FAIL(generate_das_scan_ctdef(op, *doc_id_idx_agg_ctdef, has_rowscn))) {
if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *doc_id_idx_agg_ctdef, has_rowscn))) {
LOG_WARN("failed to generate das scan ctdef", K(ret));
}
}
@ -1784,10 +1858,10 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op,
if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_TABLE_SCAN, ctdef_alloc, fwd_idx_agg_ctdef))) {
LOG_WARN("allocate fwd idx agg ctdef failed", K(ret));
} else {
fwd_idx_agg_ctdef->ref_table_id_ = op.get_text_retrieval_info().fwd_idx_tid_;
fwd_idx_agg_ctdef->ref_table_id_ = tr_info.fwd_idx_tid_;
fwd_idx_agg_ctdef->pd_expr_spec_.pd_storage_flag_.set_aggregate_pushdown(true);
fwd_idx_agg_ctdef->ir_scan_type_ = ObTSCIRScanType::OB_IR_FWD_IDX_AGG;
if (OB_FAIL(generate_das_scan_ctdef(op, *fwd_idx_agg_ctdef, has_rowscn))) {
if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *fwd_idx_agg_ctdef, has_rowscn))) {
LOG_WARN("generate das scan ctdef failed", K(ret));
}
}
@ -1830,7 +1904,7 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op,
if (OB_SUCC(ret)) {
root_ctdef = ir_scan_ctdef;
if (OB_FAIL(generate_text_ir_spec_exprs(op, *ir_scan_ctdef))) {
if (OB_FAIL(generate_text_ir_spec_exprs(tr_info, *ir_scan_ctdef))) {
LOG_WARN("failed to generate text ir spec exprs", K(ret), KPC(match_against));
} else {
const ObCostTableScanInfo *est_cost_info = op.get_est_cost_info();
@ -1848,15 +1922,15 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op,
}
}
if (OB_SUCC(ret) && op.get_text_retrieval_info().need_sort()) {
if (OB_SUCC(ret) && tr_info.need_sort()) {
ObSEArray<OrderItem, 2> order_items;
if (OB_FAIL(order_items.push_back(op.get_text_retrieval_info().sort_key_))) {
if (OB_FAIL(order_items.push_back(tr_info.sort_key_))) {
LOG_WARN("append order item array failed", K(ret));
} else if (OB_FAIL(generate_das_sort_ctdef(
order_items,
op.get_text_retrieval_info().with_ties_,
op.get_text_retrieval_info().topk_limit_expr_,
op.get_text_retrieval_info().topk_offset_expr_,
tr_info.with_ties_,
tr_info.topk_limit_expr_,
tr_info.topk_offset_expr_,
ir_scan_ctdef,
sort_ctdef))) {
LOG_WARN("generate sort ctdef failed", K(ret));
@ -1865,7 +1939,7 @@ int ObTscCgService::generate_text_ir_ctdef(const ObLogTableScan &op,
}
}
if (OB_SUCC(ret) && op.get_index_back()) {
if (OB_SUCC(ret) && op.get_index_back() && !cg_ctx.is_func_lookup_) {
ObDASIRAuxLookupCtDef *aux_lookup_ctdef = nullptr;
ObDASBaseCtDef *ir_output_ctdef = nullptr == sort_ctdef ?
static_cast<ObDASBaseCtDef *>(ir_scan_ctdef) : static_cast<ObDASBaseCtDef *>(sort_ctdef);
@ -1908,6 +1982,7 @@ int ObTscCgService::generate_index_merge_node_ctdef(const ObLogTableScan &op,
ObDASBaseCtDef *&node_ctdef)
{
int ret = OB_SUCCESS;
DASScanCGCtx cg_ctx;
bool has_rowscn = false;
if (OB_ISNULL(node) || !node->is_valid()) {
ret = OB_ERR_UNEXPECTED;
@ -1922,7 +1997,7 @@ int ObTscCgService::generate_index_merge_node_ctdef(const ObLogTableScan &op,
} else if (FALSE_IT(scan_ctdef->ref_table_id_ = node->index_tid_)) {
} else if (FALSE_IT(scan_ctdef->index_merge_idx_ = node->idx_)) {
} else if (FALSE_IT(scan_ctdef->is_index_merge_ = true)) {
} else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) {
} else if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *scan_ctdef, has_rowscn))) {
LOG_WARN("failed to generate das scan ctdef", KPC(scan_ctdef), K(ret));
} else if (OB_NOT_NULL(node->ap_->pre_query_range_) &&
OB_FAIL(scan_ctdef->pre_query_range_.deep_copy(*node->ap_->pre_query_range_))) {
@ -2079,15 +2154,18 @@ int ObTscCgService::extract_vec_ir_access_columns(
int ObTscCgService::extract_text_ir_access_columns(
const ObLogTableScan &op,
const ObTextRetrievalInfo &tr_info,
const ObDASScanCtDef &scan_ctdef,
ObIArray<ObRawExpr*> &access_exprs)
{
int ret = OB_SUCCESS;
const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info();
if (scan_ctdef.ref_table_id_ == op.get_doc_id_index_table_id()) {
if (OB_FAIL(extract_doc_id_index_back_access_columns(op, access_exprs))) {
LOG_WARN("failed to extract doc id index back access columns", K(ret));
}
} else if (OB_UNLIKELY(scan_ctdef.ref_table_id_ == op.get_rowkey_doc_table_id())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected text ir access table", K(ret));
} else {
switch (scan_ctdef.ir_scan_type_) {
case ObTSCIRScanType::OB_IR_INV_IDX_SCAN:
@ -2148,17 +2226,12 @@ int ObTscCgService::extract_vector_das_output_column_ids(
}
int ObTscCgService::extract_text_ir_das_output_column_ids(
const ObLogTableScan &op,
const ObTextRetrievalInfo &tr_info,
const ObDASScanCtDef &scan_ctdef,
ObIArray<uint64_t> &output_cids)
{
int ret = OB_SUCCESS;
const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info();
if (scan_ctdef.ref_table_id_ == op.get_doc_id_index_table_id()) {
if (OB_FAIL(extract_doc_id_index_back_output_column_ids(op, output_cids))) {
LOG_WARN("failed to get doc id index back cids", K(ret), K(scan_ctdef.ref_table_id_));
}
} else if (ObTSCIRScanType::OB_IR_INV_IDX_SCAN == scan_ctdef.ir_scan_type_) {
if (ObTSCIRScanType::OB_IR_INV_IDX_SCAN == scan_ctdef.ir_scan_type_) {
if (OB_FAIL(output_cids.push_back(
static_cast<ObColumnRefRawExpr *>(tr_info.token_cnt_column_)->get_column_id()))) {
LOG_WARN("failed to push output token cnt col id", K(ret));
@ -2174,12 +2247,12 @@ int ObTscCgService::extract_text_ir_das_output_column_ids(
}
int ObTscCgService::generate_text_ir_pushdown_expr_ctdef(
const ObTextRetrievalInfo &tr_info,
const ObLogTableScan &op,
ObDASScanCtDef &scan_ctdef)
{
int ret = OB_SUCCESS;
const uint64_t scan_table_id = scan_ctdef.ref_table_id_;
const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info();
if (!scan_ctdef.pd_expr_spec_.pd_storage_flag_.is_aggregate_pushdown()) {
// this das scan do not need aggregate pushdown
} else {
@ -2289,12 +2362,11 @@ int ObTscCgService::generate_vec_ir_spec_exprs(const ObLogTableScan &op,
return ret;
}
int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op,
int ObTscCgService::generate_text_ir_spec_exprs(const ObTextRetrievalInfo &tr_info,
ObDASIRScanCtDef &text_ir_scan_ctdef)
{
int ret = OB_SUCCESS;
ObSEArray<ObExpr *, 4> result_output;
const ObTextRetrievalInfo &tr_info = op.get_text_retrieval_info();
if (OB_ISNULL(tr_info.match_expr_) || OB_ISNULL(tr_info.relevance_expr_) ||
OB_ISNULL(tr_info.doc_id_column_)) {
ret = OB_ERR_UNEXPECTED;
@ -2303,11 +2375,6 @@ int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op,
LOG_WARN("failed to mark raw agg expr", K(ret), KPC(tr_info.match_expr_));
} else if (OB_FAIL(cg_.generate_rt_expr(*tr_info.match_expr_->get_search_key(), text_ir_scan_ctdef.search_text_))) {
LOG_WARN("cg rt expr for search text failed", K(ret));
} else if (OB_ISNULL(tr_info.pushdown_match_filter_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null match filter", K(ret));
} else if (OB_FAIL(cg_.generate_rt_expr(*tr_info.pushdown_match_filter_, text_ir_scan_ctdef.match_filter_))) {
LOG_WARN("cg rt expr for match filter failed", K(ret));
} else {
const UIntFixedArray &inv_scan_col_id = text_ir_scan_ctdef.get_inv_idx_scan_ctdef()->access_column_ids_;
const ObColumnRefRawExpr *doc_id_column = static_cast<ObColumnRefRawExpr *>(tr_info.doc_id_column_);
@ -2337,7 +2404,23 @@ int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op,
}
}
if (OB_SUCC(ret) && op.need_text_retrieval_calc_relevance()) {
if (OB_SUCC(ret)) {
// mark match columns in match_expr produced
ObIArray<ObRawExpr*> &match_columns = tr_info.match_expr_->get_match_columns();
for (int64_t i = 0; OB_SUCC(ret) && i < match_columns.count(); ++i) {
if (OB_FAIL(cg_.mark_expr_self_produced(match_columns.at(i)))) {
LOG_WARN("failed to mark match column expr as produced", K(ret));
}
}
}
if (OB_SUCC(ret) && nullptr != tr_info.pushdown_match_filter_) {
if (OB_FAIL(cg_.generate_rt_expr(*tr_info.pushdown_match_filter_, text_ir_scan_ctdef.match_filter_))) {
LOG_WARN("cg rt expr for match filter failed", K(ret));
}
}
if (OB_SUCC(ret) && tr_info.need_calc_relevance_) {
if (OB_ISNULL(tr_info.relevance_expr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null relevance expr", K(ret));
@ -2346,7 +2429,7 @@ int ObTscCgService::generate_text_ir_spec_exprs(const ObLogTableScan &op,
}
}
if (OB_SUCC(ret) && (op.need_text_retrieval_calc_relevance() || nullptr != tr_info.pushdown_match_filter_)) {
if (OB_SUCC(ret) && (tr_info.need_calc_relevance_ || nullptr != tr_info.pushdown_match_filter_)) {
if (OB_FAIL(cg_.generate_rt_expr(*tr_info.match_expr_,
text_ir_scan_ctdef.relevance_proj_col_))) {
LOG_WARN("cg rt expr for relevance score proejction failed", K(ret));
@ -2405,6 +2488,7 @@ int ObTscCgService::generate_vec_id_lookup_ctdef(const ObLogTableScan &op,
LOG_WARN("allocate memory failed", K(ret));
} else {
bool has_rowscn = false;
DASScanCGCtx cg_ctx;
ObArray<ObExpr*> result_outputs;
scan_ctdef->ref_table_id_ = vec_id_index_tid;
aux_lookup_ctdef->children_cnt_ = 2;
@ -2412,7 +2496,7 @@ int ObTscCgService::generate_vec_id_lookup_ctdef(const ObLogTableScan &op,
if (OB_ISNULL(scan_loc_meta)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate scan location meta failed", K(ret));
} else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) {
} else if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *scan_ctdef, has_rowscn))) {
LOG_WARN("generate das lookup scan ctdef failed", K(ret));
} else if (OB_FAIL(result_outputs.assign(scan_ctdef->result_output_))) {
LOG_WARN("construct aux lookup ctdef failed", K(ret));
@ -2462,7 +2546,7 @@ int ObTscCgService::generate_doc_id_lookup_ctdef(const ObLogTableScan &op,
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to get data table schema", K(ret));
} else if (OB_FAIL(data_schema->get_doc_id_rowkey_tid(doc_id_index_tid))) {
LOG_WARN("failed to get doc id rowkey index tid", K(ret), KPC(data_schema));
LOG_WARN("failed to get doc id rowkey index tid", K(ret), KPC(data_schema));
} else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(),
doc_id_index_tid,
op.get_stmt(),
@ -2480,6 +2564,7 @@ int ObTscCgService::generate_doc_id_lookup_ctdef(const ObLogTableScan &op,
LOG_WARN("allocate memory failed", K(ret));
} else {
bool has_rowscn = false;
DASScanCGCtx cg_ctx;
ObArray<ObExpr*> result_outputs;
scan_ctdef->ref_table_id_ = doc_id_index_tid;
aux_lookup_ctdef->children_cnt_ = 2;
@ -2487,7 +2572,7 @@ int ObTscCgService::generate_doc_id_lookup_ctdef(const ObLogTableScan &op,
if (OB_ISNULL(scan_loc_meta)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate scan location meta failed", K(ret));
} else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) {
} else if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *scan_ctdef, has_rowscn))) {
LOG_WARN("generate das lookup scan ctdef failed", K(ret));
} else if (OB_FAIL(result_outputs.assign(scan_ctdef->result_output_))) {
LOG_WARN("construct aux lookup ctdef failed", K(ret));
@ -2566,13 +2651,14 @@ int ObTscCgService::extract_rowkey_doc_output_columns_ids(
const share::schema::ObTableSchema &schema,
const ObLogTableScan &op,
const ObDASScanCtDef &scan_ctdef,
const bool need_output_rowkey,
ObIArray<uint64_t> &output_cids)
{
int ret = OB_SUCCESS;
bool doc_id_is_found = false;
const ObIArray<ObRawExpr *> &exprs = op.get_rowkey_id_exprs();
ObArray<ObRawExpr *> access_exprs;
for (int64_t i = 0; OB_SUCC(ret) && !doc_id_is_found && i < exprs.count(); ++i) {
for (int64_t i = 0; OB_SUCC(ret) && i < exprs.count(); ++i) {
ObRawExpr *expr = exprs.at(i);
if (OB_ISNULL(expr)) {
ret = OB_ERR_UNEXPECTED;
@ -2584,7 +2670,7 @@ int ObTscCgService::extract_rowkey_doc_output_columns_ids(
if (OB_FAIL(access_exprs.push_back(expr))) {
LOG_WARN("fail to add doc id access expr", K(ret), KPC(expr));
}
} else if (static_cast<ObColumnRefRawExpr *>(expr)->is_rowkey_column()) {
} else if (need_output_rowkey && static_cast<ObColumnRefRawExpr *>(expr)->is_rowkey_column()) {
if (OB_FAIL(access_exprs.push_back(expr))) {
LOG_WARN("fail to add doc id access expr", K(ret), KPC(expr));
}
@ -2602,6 +2688,7 @@ int ObTscCgService::extract_rowkey_doc_output_columns_ids(
int ObTscCgService::generate_rowkey_doc_ctdef(
const ObLogTableScan &op,
const DASScanCGCtx &cg_ctx,
ObTableScanCtDef &tsc_ctdef,
ObDASScanCtDef *&rowkey_doc_scan_ctdef)
{
@ -2610,18 +2697,11 @@ int ObTscCgService::generate_rowkey_doc_ctdef(
const ObTableSchema *rowkey_doc_schema = nullptr;
ObDASScanCtDef *scan_ctdef = nullptr;
ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard();
uint64_t rowkey_doc_tid = OB_INVALID_ID;
uint64_t rowkey_doc_tid = op.get_rowkey_doc_table_id();
if (OB_ISNULL(schema_guard)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, schema guard is nullptr", K(ret), KP(cg_.opt_ctx_));
} else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(), data_schema))) {
LOG_WARN("get table schema failed", K(ret), K(op.get_ref_table_id()));
} else if (OB_ISNULL(data_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to get data table schema", K(ret));
} else if (OB_FAIL(data_schema->get_rowkey_doc_tid(rowkey_doc_tid))) {
LOG_WARN("failed to get rowkey doc tid", K(ret), KPC(data_schema));
} else if (OB_FAIL(schema_guard->get_table_schema(op.get_ref_table_id(),
rowkey_doc_tid,
op.get_stmt(),
@ -2640,7 +2720,7 @@ int ObTscCgService::generate_rowkey_doc_ctdef(
if (OB_ISNULL(scan_loc_meta)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate scan location meta failed", K(ret));
} else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) {
} else if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *scan_ctdef, has_rowscn))) {
LOG_WARN("generate das lookup scan ctdef failed", K(ret));
} else if (OB_FAIL(generate_table_loc_meta(op.get_table_id(),
*op.get_stmt(),
@ -2666,6 +2746,7 @@ int ObTscCgService::generate_das_scan_ctdef_with_doc_id(
int ret = OB_SUCCESS;
ObArray<ObExpr*> result_outputs;
ObDASScanCtDef *rowkey_doc_scan_ctdef = nullptr;
DASScanCGCtx cg_ctx;
if (OB_ISNULL(scan_ctdef)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid arguments", K(ret), KPC(scan_ctdef));
@ -2675,7 +2756,7 @@ int ObTscCgService::generate_das_scan_ctdef_with_doc_id(
} else if (OB_ISNULL(doc_id_merge_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &cg_.phy_plan_->get_allocator(), 2))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to allocate doc id merge ctdef child array memory", K(ret));
} else if (OB_FAIL(generate_rowkey_doc_ctdef(op, tsc_ctdef, rowkey_doc_scan_ctdef))) {
} else if (OB_FAIL(generate_rowkey_doc_ctdef(op, cg_ctx, tsc_ctdef, rowkey_doc_scan_ctdef))) {
LOG_WARN("fail to generate rowkey doc ctdef", K(ret));
} else if (OB_FAIL(result_outputs.assign(scan_ctdef->result_output_))) {
LOG_WARN("construct aux lookup ctdef failed", K(ret));
@ -2800,13 +2881,14 @@ int ObTscCgService::generate_rowkey_vid_ctdef(
LOG_WARN("alloc das ctdef failed", K(ret));
} else {
bool has_rowscn = false;
DASScanCGCtx cg_ctx;
scan_ctdef->ref_table_id_ = rowkey_vid_tid;
ObDASTableLocMeta *scan_loc_meta =
OB_NEWx(ObDASTableLocMeta, &cg_.phy_plan_->get_allocator(), cg_.phy_plan_->get_allocator());
if (OB_ISNULL(scan_loc_meta)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate scan location meta failed", K(ret));
} else if (OB_FAIL(generate_das_scan_ctdef(op, *scan_ctdef, has_rowscn))) {
} else if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *scan_ctdef, has_rowscn))) {
LOG_WARN("generate das lookup scan ctdef failed", K(ret));
} else if (OB_FAIL(generate_table_loc_meta(op.get_table_id(),
*op.get_stmt(),
@ -2878,11 +2960,12 @@ int ObTscCgService::generate_table_lookup_ctdef(const ObLogTableScan &op,
LOG_WARN("alloc das ctdef failed", K(ret));
} else {
bool has_rowscn = false;
DASScanCGCtx cg_ctx;
const ObTableSchema *table_schema = nullptr;
ObSqlSchemaGuard *schema_guard = cg_.opt_ctx_->get_sql_schema_guard();
tsc_ctdef.lookup_ctdef_->ref_table_id_ = op.get_real_ref_table_id();
if (OB_FAIL(generate_das_scan_ctdef(op, *tsc_ctdef.lookup_ctdef_, has_rowscn))) {
if (OB_FAIL(generate_das_scan_ctdef(op, cg_ctx, *tsc_ctdef.lookup_ctdef_, has_rowscn))) {
LOG_WARN("generate das lookup scan ctdef failed", K(ret));
} else if (OB_FAIL(schema_guard->get_table_schema(op.get_table_id(),
op.get_ref_table_id(),
@ -3291,5 +3374,141 @@ int ObTscCgService::generate_mr_mv_scan_flag(const ObLogTableScan &op, ObQueryFl
return ret;
}
int ObTscCgService::generate_functional_lookup_ctdef(const ObLogTableScan &op,
ObTableScanCtDef &tsc_ctdef,
ObDASBaseCtDef *rowkey_scan_ctdef,
ObDASBaseCtDef *main_lookup_ctdef,
ObDASBaseCtDef *&root_ctdef)
{
// Functional lookup will scan rowkey from one table (main table or secondary index) first,
// and then do functional lookup on specific secondary index to calculate index-related exprs.
// Can also do main table lookup after rowkey scan if needed.
int ret = OB_SUCCESS;
const ObIArray<ObTextRetrievalInfo> &lookup_tr_infos = op.get_lookup_tr_infos();
const bool has_main_lookup = nullptr != main_lookup_ctdef;
ObIAllocator &ctdef_alloc = cg_.phy_plan_->get_allocator();
ObDASFuncLookupCtDef *tmp_func_lookup_ctdef = nullptr;
ObDASIndexProjLookupCtDef *root_lookup_ctdef = nullptr;
ObArray<ObExpr *> func_lookup_result_outputs;
ObArray<ObExpr *> final_result_outputs;
DASScanCGCtx cg_ctx;
if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_FUNC_LOOKUP, ctdef_alloc, tmp_func_lookup_ctdef))) {
LOG_WARN("allocate functional lookup ctdef failed", K(ret));
} else {
tmp_func_lookup_ctdef->main_lookup_cnt_ = has_main_lookup ? 1 : 0;
tmp_func_lookup_ctdef->func_lookup_cnt_ = lookup_tr_infos.count();
tmp_func_lookup_ctdef->doc_id_lookup_cnt_ = lookup_tr_infos.count() > 0 ? 1 : 0;
tmp_func_lookup_ctdef->children_cnt_ = tmp_func_lookup_ctdef->main_lookup_cnt_
+ tmp_func_lookup_ctdef->func_lookup_cnt_ + tmp_func_lookup_ctdef->doc_id_lookup_cnt_;
if (OB_ISNULL(tmp_func_lookup_ctdef->children_
= OB_NEW_ARRAY(ObDASBaseCtDef *, &ctdef_alloc, tmp_func_lookup_ctdef->children_cnt_))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate functional lookup ctdef children failed", K(ret));
} else {
if (has_main_lookup) {
tmp_func_lookup_ctdef->children_[0] = main_lookup_ctdef;
if (OB_UNLIKELY(main_lookup_ctdef->op_type_ != DAS_OP_TABLE_SCAN)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected main lookup ctdef type", K(ret), KPC(main_lookup_ctdef));
} else if (OB_FAIL(func_lookup_result_outputs.assign(
static_cast<ObDASScanCtDef *>(main_lookup_ctdef)->result_output_))) {
LOG_WARN("failed to append func lookup result", K(ret));
}
}
}
}
if (OB_SUCC(ret) && lookup_tr_infos.count() > 0) {
// generate rowkey->doc_id lookup scan
const int64_t doc_id_lookup_ctdef_idx = has_main_lookup ? 1 : 0;
ObDASScanCtDef *doc_id_lookup_scan_ctdef = nullptr;
ObArray<ObRawExpr *> rowkey_exprs;
cg_ctx.set_is_func_lookup();
if (OB_FAIL(generate_rowkey_doc_ctdef(op, cg_ctx, tsc_ctdef, doc_id_lookup_scan_ctdef))) {
LOG_WARN("generate doc_id lookup scan ctdef failed", K(ret));
} else if (OB_FAIL(rowkey_exprs.assign(op.get_rowkey_exprs()))) {
LOG_WARN("failed to assign rowkey exprs", K(ret));
} else if (OB_FAIL(cg_.generate_rt_exprs(rowkey_exprs, doc_id_lookup_scan_ctdef->rowkey_exprs_))) {
LOG_WARN("failed to generate rowkey exprs for doc_id lookup scan", K(ret));
} else {
tmp_func_lookup_ctdef->children_[doc_id_lookup_ctdef_idx] = doc_id_lookup_scan_ctdef;
for (int64_t i = 0; OB_SUCC(ret) && i < doc_id_lookup_scan_ctdef->result_output_.count(); ++i) {
ObExpr *doc_id_lookup_result = doc_id_lookup_scan_ctdef->result_output_.at(i);
if (OB_ISNULL(doc_id_lookup_result)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null rowkey expr", K(ret));
} else if (doc_id_lookup_result->type_ == T_PSEUDO_ROW_TRANS_INFO_COLUMN
|| doc_id_lookup_result->type_ == T_PSEUDO_GROUP_ID) {
// skip
} else if (nullptr == tmp_func_lookup_ctdef->lookup_doc_id_expr_) {
tmp_func_lookup_ctdef->lookup_doc_id_expr_ = doc_id_lookup_result;
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("more than one doc id result expr for rowkey 2 doc_id lookup", K(ret), KPC(doc_id_lookup_scan_ctdef));
}
}
}
for (int64_t i = 0; OB_SUCC(ret) && i < lookup_tr_infos.count(); ++i) {
cg_ctx.reset();
cg_ctx.set_func_lookup_idx(i);
const int64_t func_lookup_base_idx = doc_id_lookup_ctdef_idx + 1;
const int64_t cur_children_idx = func_lookup_base_idx + i;
ObDASBaseCtDef *tr_lookup_scan_ctdef = nullptr;
if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_IR_SCAN, ctdef_alloc, tr_lookup_scan_ctdef))) {
LOG_WARN("allocate text retrieval lookup scan failed", K(ret));
} else if (OB_FAIL(generate_text_ir_ctdef(op, cg_ctx, tsc_ctdef, tr_lookup_scan_ctdef))) {
LOG_WARN("failed to generate text retrieval ctdef", K(ret));
} else if (OB_UNLIKELY(tr_lookup_scan_ctdef->op_type_ != DAS_OP_IR_SCAN)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected lookup tr scan type", K(ret));
} else if (OB_FAIL(append_array_no_dup(
func_lookup_result_outputs, static_cast<ObDASIRScanCtDef *>(tr_lookup_scan_ctdef)->result_output_))) {
LOG_WARN("failed to append func lookup result", K(ret));
} else {
tmp_func_lookup_ctdef->children_[cur_children_idx] = tr_lookup_scan_ctdef;
}
}
}
if (FAILEDx(tmp_func_lookup_ctdef->result_output_.assign(func_lookup_result_outputs))) {
LOG_WARN("failed to assign func lookup result output", K(ret));
} else if (OB_FAIL(final_result_outputs.assign(func_lookup_result_outputs))) {
LOG_WARN("failed to append final lookup result output", K(ret));
} else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_INDEX_PROJ_LOOKUP, ctdef_alloc, root_lookup_ctdef))) {
LOG_WARN("failed to allocate das ctdef", K(ret));
} else if (OB_ISNULL(root_lookup_ctdef->children_
= OB_NEW_ARRAY(ObDASBaseCtDef *, &ctdef_alloc, 2))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("allocate root lookup ctdef childern failed", K(ret));
} else if (OB_FAIL(append_array_no_dup(final_result_outputs, tmp_func_lookup_ctdef->result_output_))) {
LOG_WARN("failed to append final result outputs", K(ret));
} else {
root_lookup_ctdef->children_cnt_ = 2;
root_lookup_ctdef->children_[0] = rowkey_scan_ctdef;
root_lookup_ctdef->children_[1] = tmp_func_lookup_ctdef;
if (!has_main_lookup) {
// no main lookup, rowkey scan will project all output columns on base table for table scan
if (rowkey_scan_ctdef->op_type_ == ObDASOpType::DAS_OP_TABLE_SCAN) {
if (OB_FAIL(root_lookup_ctdef->index_scan_proj_exprs_.assign(
static_cast<ObDASScanCtDef *>(rowkey_scan_ctdef)->result_output_))) {
LOG_WARN("Failed to assign index scan project column exprs", K(ret));
} else if (OB_FAIL(append_array_no_dup(final_result_outputs, root_lookup_ctdef->index_scan_proj_exprs_))) {
LOG_WARN("failed to append final result outputs", K(ret));
}
}
}
if (FAILEDx(root_lookup_ctdef->result_output_.assign(final_result_outputs))) {
LOG_WARN("failed to append root lookup result outputs", K(ret));
}
}
if (OB_SUCC(ret)) {
root_ctdef = root_lookup_ctdef;
}
return ret;
}
} // namespace sql
} // namespace oceanbase

View File

@ -55,37 +55,62 @@ public:
const ObRawExpr *trans_info_expr,
const bool include_agg = false);
private:
// temporary context for multiple das scan in one table scan operator
struct DASScanCGCtx
{
DASScanCGCtx()
: curr_func_lookup_idx_(0),
is_func_lookup_(false) {}
void reset()
{
curr_func_lookup_idx_ = 0;
is_func_lookup_ = false;
}
void set_func_lookup_idx(const int64_t idx)
{
is_func_lookup_ = true;
curr_func_lookup_idx_ = idx;
}
void set_is_func_lookup()
{
is_func_lookup_ = true;
}
TO_STRING_KV(K_(curr_func_lookup_idx), K_(is_func_lookup));
int64_t curr_func_lookup_idx_;
bool is_func_lookup_;
};
int generate_access_ctdef(const ObLogTableScan &op,
const DASScanCGCtx &cg_ctx,
ObDASScanCtDef &scan_ctdef,
common::ObIArray<ObExpr *> &doc_id_expr,
common::ObIArray<ObExpr *> &vec_vid_expr,
bool &has_rowscn);
int generate_pushdown_aggr_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef);
int generate_das_scan_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef, bool &has_rowscn);
int generate_table_param(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef, common::ObIArray<uint64_t> &tsc_out_cols);
int generate_pushdown_aggr_ctdef(const ObLogTableScan &op, const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef);
int generate_das_scan_ctdef(const ObLogTableScan &op, const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef, bool &has_rowscn);
int generate_table_param(const ObLogTableScan &op, const DASScanCGCtx &cg_ctx, ObDASScanCtDef &scan_ctdef, common::ObIArray<uint64_t> &tsc_out_cols);
int extract_das_output_column_ids(const ObLogTableScan &op,
ObDASScanCtDef &scan_ctdef,
const ObTableSchema &index_schema,
const DASScanCGCtx &cg_ctx,
common::ObIArray<uint64_t> &output_cids);
int extract_das_access_exprs(const ObLogTableScan &op,
const DASScanCGCtx &cg_ctx,
ObDASScanCtDef &scan_ctdef,
common::ObIArray<ObRawExpr*> &access_exprs);
//extract these column exprs need by TSC operator, these column will output by DAS scan
int extract_tsc_access_columns(const ObLogTableScan &op, common::ObIArray<ObRawExpr*> &access_exprs);
int extract_das_column_ids(const common::ObIArray<ObRawExpr*> &column_exprs, common::ObIArray<uint64_t> &column_ids);
int generate_geo_access_ctdef(const ObLogTableScan &op, const ObTableSchema &index_schema, ObArray<ObRawExpr*> &access_exprs);
int generate_text_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef);
int generate_vec_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef);
int generate_multivalue_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef);
int generate_gis_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef);
int generate_text_ir_ctdef(const ObLogTableScan &op,
const DASScanCGCtx &cg_ctx,
ObTableScanCtDef &tsc_ctdef,
ObDASBaseCtDef *&root_ctdef);
int extract_text_ir_access_columns(const ObLogTableScan &op,
const ObTextRetrievalInfo &tr_info,
const ObDASScanCtDef &scan_ctdef,
ObIArray<ObRawExpr*> &access_exprs);
int extract_vec_ir_access_columns(const ObLogTableScan &op,
const ObDASScanCtDef &scan_ctdef,
ObIArray<ObRawExpr*> &access_exprs);
int extract_text_ir_das_output_column_ids(const ObLogTableScan &op,
int extract_text_ir_das_output_column_ids(const ObTextRetrievalInfo &tr_info,
const ObDASScanCtDef &scan_ctdef,
ObIArray<uint64_t> &output_cids);
int extract_rowkey_doc_access_columns(const ObLogTableScan &op,
@ -94,7 +119,22 @@ private:
int extract_rowkey_doc_output_columns_ids(const share::schema::ObTableSchema &schema,
const ObLogTableScan &op,
const ObDASScanCtDef &scan_ctdef,
const bool need_output_rowkey,
ObIArray<uint64_t> &output_cids);
int generate_text_ir_pushdown_expr_ctdef(const ObTextRetrievalInfo &tr_info,
const ObLogTableScan &op,
ObDASScanCtDef &scan_ctdef);
int generate_text_ir_spec_exprs(const ObTextRetrievalInfo &tr_info,
ObDASIRScanCtDef &text_ir_scan_ctdef);
int generate_vec_ir_spec_exprs(const ObLogTableScan &op,
ObDASVecAuxScanCtDef &vec_ir_scan_ctdef);
int generate_vec_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef);
int generate_multivalue_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef);
int generate_gis_ir_ctdef(const ObLogTableScan &op, ObTableScanCtDef &tsc_ctdef, ObDASBaseCtDef *&root_ctdef);
int extract_vec_ir_access_columns(const ObLogTableScan &op,
const ObDASScanCtDef &scan_ctdef,
ObIArray<ObRawExpr*> &access_exprs);
int extract_vector_das_output_column_ids(const ObLogTableScan &op,
const ObDASScanCtDef &scan_ctdef,
ObIArray<uint64_t> &output_cids);
@ -105,17 +145,13 @@ private:
const ObLogTableScan &op,
const ObDASScanCtDef &scan_ctdef,
ObIArray<uint64_t> &output_cids);
int generate_text_ir_pushdown_expr_ctdef(const ObLogTableScan &op, ObDASScanCtDef &scan_ctdef);
int generate_text_ir_spec_exprs(const ObLogTableScan &op,
ObDASIRScanCtDef &text_ir_scan_ctdef);
int generate_vec_ir_spec_exprs(const ObLogTableScan &op,
ObDASVecAuxScanCtDef &vec_ir_scan_ctdef);
int generate_doc_id_lookup_ctdef(const ObLogTableScan &op,
ObTableScanCtDef &tsc_ctdef,
ObDASBaseCtDef *ir_scan_ctdef,
ObExpr *doc_id_expr,
ObDASIRAuxLookupCtDef *&aux_lookup_ctdef);
int generate_rowkey_doc_ctdef(const ObLogTableScan &op,
const DASScanCGCtx &cg_ctx,
ObTableScanCtDef &tsc_ctdef,
ObDASScanCtDef *&rowkey_doc_scan_ctdef);
int generate_das_scan_ctdef_with_doc_id(const ObLogTableScan &op,
@ -166,6 +202,13 @@ private:
ObIndexMergeNode *node,
common::ObIAllocator &alloc,
ObDASBaseCtDef *&node_ctdef);
int generate_functional_lookup_ctdef(const ObLogTableScan &op,
ObTableScanCtDef &tsc_ctdef,
ObDASBaseCtDef *rowkey_scan_ctdef,
ObDASBaseCtDef *main_lookup_ctdef,
ObDASBaseCtDef *&root_ctdef);
private:
ObStaticEngineCG &cg_;
};

View File

@ -0,0 +1,123 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL_DAS
#include "sql/das/iter/ob_das_cache_lookup_iter.h"
namespace oceanbase
{
using namespace common;
namespace sql
{
int ObDASCacheLookupIter::inner_get_next_rows(int64_t &count, int64_t capacity)
{
int ret = OB_SUCCESS;
bool get_next_rows = false;
int64_t simulate_batch_row_cnt = - EVENT_CALL(EventTable::EN_TABLE_LOOKUP_BATCH_ROW_COUNT);
const bool use_simulate_batch_row_cnt = simulate_batch_row_cnt > 0 && simulate_batch_row_cnt < default_batch_row_count_;
int64_t default_row_batch_cnt = use_simulate_batch_row_cnt ? simulate_batch_row_cnt : default_batch_row_count_;
LOG_DEBUG("simulate lookup row batch count", K(simulate_batch_row_cnt), K(default_row_batch_cnt));
do {
switch (state_) {
case INDEX_SCAN: {
reset_lookup_state();
int64_t storage_count = 0;
int64_t index_capacity = 0;
// TODO: @zyx439997 support the outputs of index scan as the project columns by the deep copy {
bool need_accumulation = true;
// }
while (OB_SUCC(ret) && need_accumulation && !index_end_ && lookup_rowkey_cnt_ < default_row_batch_cnt) {
storage_count = 0;
index_capacity = std::min(capacity, std::min(max_size_, default_row_batch_cnt - lookup_rowkey_cnt_));
index_table_iter_->clear_evaluated_flag();
if (OB_FAIL(index_table_iter_->get_next_rows(storage_count, index_capacity))) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("failed to get next rows from index table", K(ret));
} else {
if (storage_count == 0) {
index_end_ = true;
}
ret = OB_SUCCESS;
}
} else {
need_accumulation = false;
}
if (OB_SUCC(ret) && storage_count > 0) {
if (OB_FAIL(add_rowkeys(storage_count))) {
LOG_WARN("failed to add row keys", K(ret));
} else {
lookup_rowkey_cnt_ += storage_count;
}
}
}
if (OB_SUCC(ret)) {
if (OB_LIKELY(lookup_rowkey_cnt_ > 0)) {
state_ = DO_LOOKUP;
} else {
state_ = FINISHED;
}
}
break;
}
case DO_LOOKUP: {
if (OB_FAIL(do_index_lookup())) {
LOG_WARN("failed to do index lookup", K(ret));
} else {
state_ = OUTPUT_ROWS;
}
break;
}
case OUTPUT_ROWS: {
count = 0;
data_table_iter_->clear_evaluated_flag();
if (OB_FAIL(data_table_iter_->get_next_rows(count, capacity))) {
if (OB_LIKELY(OB_ITER_END == ret)) {
ret = OB_SUCCESS;
if (count > 0) {
lookup_row_cnt_ += count;
get_next_rows = true;
} else {
if (OB_FAIL(check_index_lookup())) {
LOG_WARN("failed to check table lookup", K(ret));
} else {
state_ = INDEX_SCAN;
}
}
} else {
LOG_WARN("failed to get next rows from data table", K(ret));
}
} else {
lookup_row_cnt_ += count;
get_next_rows = true;
}
if (OB_SUCC(ret) && OB_UNLIKELY(lookup_row_cnt_ != lookup_rowkey_cnt_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected lookup row count", K_(lookup_row_cnt), K_(lookup_rowkey_cnt), K(ret));
}
break;
}
case FINISHED: {
ret = OB_ITER_END;
break;
}
}
} while (!get_next_rows && OB_SUCC(ret));
return ret;
}
} // namespace sql
} // namespace oceanbase

View File

@ -0,0 +1,55 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OBDEV_SRC_SQL_DAS_ITER_OB_DAS_CACHE_LOOKUP_ITER_H_
#define OBDEV_SRC_SQL_DAS_ITER_OB_DAS_CACHE_LOOKUP_ITER_H_
#include "sql/das/iter/ob_das_local_lookup_iter.h"
namespace oceanbase
{
using namespace common;
namespace sql
{
struct ObDASCacheLookupIterParam : public ObDASLocalLookupIterParam
{
public:
ObDASCacheLookupIterParam()
: ObDASLocalLookupIterParam()
{}
virtual bool is_valid() const override
{
return true;
}
};
class ObDASScanCtDef;
class ObDASScanRtDef;
class ObDASFuncLookupIter;
class ObDASCacheLookupIter : public ObDASLocalLookupIter
{
public:
ObDASCacheLookupIter(const ObDASIterType type = ObDASIterType::DAS_ITER_LOCAL_LOOKUP)
: ObDASLocalLookupIter(type)
{}
virtual ~ObDASCacheLookupIter() {}
protected:
virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override;
};
} // namespace sql
} // namespace oceanbase
#endif /* OBDEV_SRC_SQL_DAS_ITER_OB_DAS_LOOKUP_ITER_H_ */

View File

@ -0,0 +1,426 @@
/**
* Copyright (c) 2024 OceanBase
* OceanBase is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL_DAS
#include "sql/das/iter/ob_das_func_data_iter.h"
#include "sql/das/iter/ob_das_iter_define.h"
#include "sql/das/ob_das_scan_op.h"
using namespace oceanbase::common;
namespace oceanbase
{
namespace sql
{
ObDASFuncDataIterParam::ObDASFuncDataIterParam()
: ObDASIterParam(ObDASIterType::DAS_ITER_FUNC_DATA),
tr_merge_iters_(nullptr),
iter_count_(0),
main_lookup_ctdef_(nullptr),
main_lookup_rtdef_(nullptr),
main_lookup_iter_(nullptr),
trans_desc_(nullptr),
snapshot_(nullptr)
{}
ObDASFuncDataIterParam::~ObDASFuncDataIterParam()
{
}
ObDASFuncDataIter::ObDASFuncDataIter()
:ObDASIter(),
tr_merge_iters_(nullptr),
iter_count_(0),
main_lookup_ctdef_(nullptr),
main_lookup_rtdef_(nullptr),
main_lookup_iter_(nullptr),
main_lookup_tablet_id_(0),
main_lookup_ls_id_(0),
main_lookup_param_(),
merge_memctx_(),
doc_ids_()
{}
ObDASFuncDataIter::~ObDASFuncDataIter()
{
}
int ObDASFuncDataIter::do_table_scan()
{
int ret = OB_SUCCESS;
if (OB_ISNULL(tr_merge_iters_) || OB_UNLIKELY(iter_count_ <= 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpeted error, tr merge iter is nullptr", K(ret));
} else if (OB_FAIL(build_tr_merge_iters_rangekey())) {
LOG_WARN("fail to build rowkey doc range", K(ret));
} else {
if (nullptr != main_lookup_iter_) {
if (OB_UNLIKELY(!main_lookup_tablet_id_.is_valid() || !main_lookup_ls_id_.is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, main lookup tablet id or ls id is invalid", K(ret), K(main_lookup_tablet_id_), K(main_lookup_ls_id_));
} else {
main_lookup_param_.tablet_id_ = main_lookup_tablet_id_;
main_lookup_param_.ls_id_ = main_lookup_ls_id_;
if (OB_FAIL(main_lookup_iter_->do_table_scan())) {
LOG_WARN("fail to do table scan for main lookup table", K(ret), KPC(main_lookup_iter_));
}
}
}
for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) {
if (OB_FAIL(tr_merge_iters_[i]->do_table_scan())) {
LOG_WARN("fail to do table scan for tr merge iter", K(ret), K(i), KPC(tr_merge_iters_[i]));
}
}
}
return ret;
}
int ObDASFuncDataIter::rescan()
{
int ret = OB_SUCCESS;
if (OB_ISNULL(tr_merge_iters_) || OB_UNLIKELY(iter_count_ <= 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpeted error, tr merge iter is nullptr", K(ret));
} else if (OB_FAIL(build_tr_merge_iters_rangekey())) {
LOG_WARN("fail to build rowkey doc range", K(ret));
} else if (nullptr != main_lookup_iter_ && OB_FAIL(main_lookup_iter_->rescan())) {
LOG_WARN("fail to do table scan for main lookup table", K(ret), KPC(main_lookup_iter_));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) {
if (OB_FAIL(tr_merge_iters_[i]->rescan())) {
LOG_WARN("fail to do table scan for tr merge iter", K(ret), K(i), KPC(tr_merge_iters_[i]));
}
}
}
return ret;
}
void ObDASFuncDataIter::clear_evaluated_flag()
{
if (OB_NOT_NULL(main_lookup_iter_)) {
main_lookup_iter_->clear_evaluated_flag();
}
for (int64_t i = 0; i < iter_count_; i++) {
if (OB_NOT_NULL(tr_merge_iters_[i])) {
tr_merge_iters_[i]->clear_evaluated_flag();
}
}
}
int ObDASFuncDataIter::inner_init(ObDASIterParam &param)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(ObDASIterType::DAS_ITER_FUNC_DATA != param.type_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("inner init das iter with bad param type", K(ret), K(param));
} else {
ObDASFuncDataIterParam &merge_param = static_cast<ObDASFuncDataIterParam &>(param);
lib::ContextParam param;
param.set_mem_attr(MTL_ID(), "FTSMerge", ObCtxIds::DEFAULT_CTX_ID).set_properties(lib::USE_TL_PAGE_OPTIONAL);
if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(merge_memctx_, param))) {
LOG_WARN("failed to create merge memctx", K(ret));
} else {
tr_merge_iters_ = merge_param.tr_merge_iters_;
iter_count_ = merge_param.iter_count_;
main_lookup_ctdef_ = merge_param.main_lookup_ctdef_;
main_lookup_rtdef_ = merge_param.main_lookup_rtdef_;
main_lookup_iter_ = merge_param.main_lookup_iter_;
read_count_ = 0;
sql::ObExprBasicFuncs *basic_funcs = ObDatumFuncs::get_basic_func(ObVarcharType, CS_TYPE_BINARY);
cmp_func_ = lib::is_oracle_mode() ? basic_funcs->null_last_cmp_ : basic_funcs->null_first_cmp_;
if (main_lookup_iter_ && OB_FAIL(init_main_lookup_scan_param(main_lookup_param_,
main_lookup_ctdef_,
main_lookup_rtdef_,
merge_param.trans_desc_,
merge_param.snapshot_))) {
LOG_WARN("fail to init rowkey doc scan param", K(ret), K(merge_param));
}
}
}
return ret;
}
int ObDASFuncDataIter::inner_reuse()
{
int ret = OB_SUCCESS;
doc_ids_.reuse();
read_count_ = 0;
if (main_lookup_iter_) {
ObDASScanIter *main_lookup_iter = static_cast<ObDASScanIter *>(main_lookup_iter_);
storage::ObTableScanParam &main_lookup_scan_param = main_lookup_iter->get_scan_param();
if (OB_UNLIKELY(&main_lookup_param_ != &main_lookup_iter->get_scan_param())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, main lookup param is nullptr", K(ret));
} else {
const ObTabletID &old_tablet_id = main_lookup_param_.tablet_id_;
main_lookup_param_.need_switch_param_ = main_lookup_param_.need_switch_param_ ||
((old_tablet_id.is_valid() && old_tablet_id != main_lookup_tablet_id_) ? true : false);
main_lookup_param_.tablet_id_ = main_lookup_tablet_id_;
main_lookup_param_.ls_id_ = main_lookup_ls_id_;
if (!main_lookup_param_.key_ranges_.empty()) {
main_lookup_param_.key_ranges_.reuse();
}
if (OB_FAIL(main_lookup_iter_->reuse())) {
LOG_WARN("fail to reuse data table iter", K(ret));
}
}
}
if (OB_FAIL(ret)) {
} else if (OB_ISNULL(merge_memctx_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("merge_memctx_ is nullptr", K(ret));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) {
if (OB_NOT_NULL(tr_merge_iters_[i])) {
tr_merge_iters_[i]->reuse();
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tr merge iter is nullptr", K(ret), K(i));
}
}
merge_memctx_->reset_remain_one_page();
}
return ret;
}
int ObDASFuncDataIter::inner_release()
{
int ret = OB_SUCCESS;
if (OB_NOT_NULL(merge_memctx_)) {
DESTROY_CONTEXT(merge_memctx_);
merge_memctx_ = nullptr;
}
if (main_lookup_iter_) {
main_lookup_iter_ = nullptr;
}
for (int64_t i = 0; i < iter_count_; i++) {
if (OB_NOT_NULL(tr_merge_iters_[i])) {
tr_merge_iters_[i] = nullptr;
}
}
doc_ids_.reset();
main_lookup_param_.destroy_schema_guard();
main_lookup_param_.snapshot_.reset();
main_lookup_param_.destroy();
read_count_ = 0;
return ret;
}
int ObDASFuncDataIter::inner_get_next_row()
{
int ret = OB_SUCCESS;
int64_t default_size = doc_ids_.count();
bool iter_end = false;
if (OB_ISNULL(tr_merge_iters_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, tr merge iter is nullptr", K(ret));
} else if (OB_UNLIKELY(1 != default_size)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, default size is not 1", K(ret), K(default_size));
} else if (main_lookup_iter_ && OB_FAIL(main_lookup_iter_->get_next_row())) {
if (OB_ITER_END != ret) {
LOG_WARN("fail to get next row for main lookup table", K(ret), KPC(main_lookup_iter_));
} else {
ret = OB_SUCCESS;
}
}
for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) {
if (OB_FAIL(tr_merge_iters_[i]->get_next_row())) {
if (OB_ITER_END != ret) {
LOG_WARN("fail to get next row for tr merge iter", K(ret), K(i), KPC(tr_merge_iters_[i]));
} else {
ret = OB_SUCCESS;
iter_end = true;
}
}
}
if (OB_SUCC(ret) && iter_end) {
ret = OB_ITER_END;
}
return ret;
}
int ObDASFuncDataIter::inner_get_next_rows(int64_t &count, int64_t capacity)
{
int ret = OB_SUCCESS;
int64_t main_lookup_count = 0;
int64_t tr_merge_count = 0;
int64_t default_size = doc_ids_.count();
if (OB_ISNULL(tr_merge_iters_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, tr merge iter is nullptr", K(ret));
} else if (main_lookup_iter_) {
int64_t storage_count = 0;
while (OB_SUCC(ret) && main_lookup_count < capacity) {
int64_t need_capacity = capacity - main_lookup_count;
if (OB_FAIL(main_lookup_iter_->get_next_rows(storage_count, need_capacity))) {
if (OB_ITER_END != ret) {
LOG_WARN("fail to get next row for main lookup table", K(ret), KPC(main_lookup_iter_));
} else if (storage_count > 0) {
main_lookup_count += storage_count;
}
} else {
main_lookup_count += storage_count;
}
}
if (OB_ITER_END == ret) {
ret = OB_SUCCESS;
}
}
if (OB_UNLIKELY(main_lookup_iter_ &&
main_lookup_count != capacity && // case: limit, read once
default_size != main_lookup_count + read_count_)) { // case: limit, read more times
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, main lookup count is not equal to capacity", K(ret), K(default_size), K(main_lookup_count));
}
int tmp_count = 0;
for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) {
tr_merge_count = 0;
if (OB_FAIL(tr_merge_iters_[i]->get_next_rows(tr_merge_count, capacity))) {
if (OB_ITER_END != ret) {
LOG_WARN("fail to get next row for tr merge iter", K(ret), K(i), KPC(tr_merge_iters_[i]));
} else {
ret = OB_SUCCESS;
}
}
if (OB_UNLIKELY(tmp_count != 0 && tmp_count != tr_merge_count)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, tr merge count is not equal to tmp count", K(ret), K(tr_merge_count), K(tmp_count), K(i));
} else if (OB_UNLIKELY(0 != tr_merge_count &&
tr_merge_count != capacity &&
tr_merge_count + read_count_ != default_size)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, tr merge count is not equal to capacity",
K(ret), K(tr_merge_count), K(capacity), K(i), K_(read_count), K(default_size));
} else {
tmp_count = tr_merge_count;
}
}
if (OB_SUCC(ret) && main_lookup_iter_ && tr_merge_count != main_lookup_count) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, tr merge count is not equal to main lookup count", K(ret), K(tr_merge_count), K(main_lookup_count));
}
if (OB_SUCC(ret)) {
count = tr_merge_count;
if (0 == tr_merge_count) {
ret = OB_ITER_END;
} else {
read_count_ = read_count_ + count;
}
}
return ret;
}
int ObDASFuncDataIter::build_tr_merge_iters_rangekey()
{
int ret = OB_SUCCESS;
if (OB_ISNULL(tr_merge_iters_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, tr merge iters is nullptr", K(ret));
} else {
lib::ob_sort(doc_ids_.begin(), doc_ids_.end(), FtsDocIdCmp(cmp_func_, &ret));
if (OB_FAIL(ret)) {
LOG_WARN("fail to sort doc id", K(ret));
}
for (int64_t i = 0; OB_SUCC(ret) && i < iter_count_; i++) {
ObDASTextRetrievalMergeIter *tr_merge_iter = static_cast<ObDASTextRetrievalMergeIter *>(tr_merge_iters_[i]);
if (OB_FAIL(tr_merge_iter->set_rangkey_and_selector(doc_ids_))) {
LOG_WARN("fail to add doc id", K(ret));
}
}
}
return ret;
}
int ObDASFuncDataIter::init_main_lookup_scan_param(
ObTableScanParam &param,
const ObDASScanCtDef *ctdef,
ObDASScanRtDef *rtdef,
transaction::ObTxDesc *trans_desc,
transaction::ObTxReadSnapshot *snapshot)
{
int ret = OB_SUCCESS;
uint64_t tenant_id = MTL_ID();
param.tenant_id_ = tenant_id;
param.key_ranges_.set_attr(ObMemAttr(tenant_id, "SParamKR"));
param.ss_key_ranges_.set_attr(ObMemAttr(tenant_id, "SParamSSKR"));
if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr ctdef or rtdef", K(ret), KPC(ctdef), KPC(rtdef));
} else {
param.scan_allocator_ = &get_arena_allocator();
param.allocator_ = &rtdef->stmt_allocator_;
param.tx_lock_timeout_ = rtdef->tx_lock_timeout_;
param.index_id_ = ctdef->ref_table_id_;
param.is_get_ = ctdef->is_get_;
param.is_for_foreign_check_ = rtdef->is_for_foreign_check_;
param.timeout_ = rtdef->timeout_ts_;
param.scan_flag_ = rtdef->scan_flag_;
param.reserved_cell_count_ = ctdef->access_column_ids_.count();
param.sql_mode_ = rtdef->sql_mode_;
param.frozen_version_ = rtdef->frozen_version_;
param.force_refresh_lc_ = rtdef->force_refresh_lc_;
param.output_exprs_ = &(ctdef->pd_expr_spec_.access_exprs_);
param.aggregate_exprs_ = &(ctdef->pd_expr_spec_.pd_storage_aggregate_output_);
param.ext_file_column_exprs_ = &(ctdef->pd_expr_spec_.ext_file_column_exprs_);
param.ext_column_convert_exprs_ = &(ctdef->pd_expr_spec_.ext_column_convert_exprs_);
param.calc_exprs_ = &(ctdef->pd_expr_spec_.calc_exprs_);
param.table_param_ = &(ctdef->table_param_);
param.op_ = rtdef->p_pd_expr_op_;
param.row2exprs_projector_ = rtdef->p_row2exprs_projector_;
param.schema_version_ = ctdef->schema_version_;
param.tenant_schema_version_ = rtdef->tenant_schema_version_;
param.limit_param_ = rtdef->limit_param_;
param.need_scn_ = rtdef->need_scn_;
param.pd_storage_flag_ = ctdef->pd_expr_spec_.pd_storage_flag_.pd_flag_;
param.fb_snapshot_ = rtdef->fb_snapshot_;
param.fb_read_tx_uncommitted_ = rtdef->fb_read_tx_uncommitted_;
if (rtdef->is_for_foreign_check_) {
param.trans_desc_ = trans_desc;
}
if (OB_NOT_NULL(snapshot)) {
if (OB_FAIL(param.snapshot_.assign(*snapshot))) {
LOG_WARN("assign snapshot fail", K(ret));
}
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null snapshot", K(ret), KPC(ctdef), KPC(rtdef));
}
if (OB_NOT_NULL(trans_desc)) {
param.tx_id_ = trans_desc->get_tx_id();
} else {
param.tx_id_.reset();
}
if (!ctdef->pd_expr_spec_.pushdown_filters_.empty()) {
param.op_filters_ = &ctdef->pd_expr_spec_.pushdown_filters_;
}
param.pd_storage_filters_ = rtdef->p_pd_expr_op_->pd_storage_filters_;
if (OB_FAIL(param.column_ids_.assign(ctdef->access_column_ids_))) {
LOG_WARN("failed to assign column ids", K(ret));
}
if (rtdef->sample_info_ != nullptr) {
param.sample_info_ = *rtdef->sample_info_;
}
}
LOG_DEBUG("init rowkey doc table scan param finished", K(param), K(ret));
return ret;
}
} // end namespace sql
} // end namespace oceanbase

View File

@ -0,0 +1,148 @@
/**
* Copyright (c) 2024 OceanBase
* OceanBase is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OB_DAS_FUNC_DATA_ITER_H_
#define OB_DAS_FUNC_DATA_ITER_H_
#include "sql/das/iter/ob_das_iter.h"
#include "sql/das/iter/ob_das_scan_iter.h"
#include "sql/das/iter/ob_das_text_retrieval_merge_iter.h"
#include "common/ob_tablet_id.h"
#include "share/ob_ls_id.h"
#include "storage/access/ob_dml_param.h"
namespace oceanbase
{
namespace sql
{
class ObDASScanCtDef;
class ObDASScanRtDef;
class ObDASFuncDataIterParam final : public ObDASIterParam
{
public:
ObDASFuncDataIterParam();
~ObDASFuncDataIterParam();
virtual bool is_valid() const override
{
return iter_count_ >= 1 && nullptr != tr_merge_iters_;
}
public:
ObDASIter **tr_merge_iters_;
int64_t iter_count_;
const ObDASScanCtDef *main_lookup_ctdef_;
ObDASScanRtDef *main_lookup_rtdef_;
ObDASIter *main_lookup_iter_;
transaction::ObTxDesc *trans_desc_;
transaction::ObTxReadSnapshot *snapshot_;
};
/**
* FTS DATA Iter:
*
*
* FTS_DATA_Iter
* / | | \
* / | | \
* / | | \
* / | | \
* TR_ITER1 TR_ITER2 TR_ITER3 ... MAIN_LOOKUP_ITER(may be null)
*
**/
class ObDASFuncDataIter final : public ObDASIter
{
public:
ObDASFuncDataIter();
~ObDASFuncDataIter();
virtual int do_table_scan() override;
virtual int rescan() override;
virtual void clear_evaluated_flag() override;
inline int add_doc_id(const ObDocId &doc_id)
{
int ret = OB_SUCCESS;
int64_t idx = doc_ids_.count();
if (OB_FAIL(doc_ids_.push_back(std::make_pair(doc_id, idx)))) {
LOG_WARN("fail to push back doc id", K(ret));
}
return ret;
}
void set_tablet_id(const ObTabletID &tablet_id) { main_lookup_tablet_id_ = tablet_id; }
void set_ls_id(const share::ObLSID &ls_id) { main_lookup_ls_id_ = ls_id; }
bool has_main_lookup_iter() const { return nullptr != main_lookup_iter_; }
ObTableScanParam &get_main_lookup_scan_param() { return main_lookup_param_; }
const ObDASScanCtDef *get_main_lookup_ctdef() { return main_lookup_ctdef_; }
INHERIT_TO_STRING_KV("ObDASIter", ObDASIter,
K(main_lookup_param_),
KPC(main_lookup_iter_));
protected:
virtual int inner_init(ObDASIterParam &param) override;
virtual int inner_reuse() override;
virtual int inner_release() override;
virtual int inner_get_next_row() override;
virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override;
private:
common::ObArenaAllocator &get_arena_allocator() { return merge_memctx_->get_arena_allocator(); }
int init_main_lookup_scan_param(
ObTableScanParam &param,
const ObDASScanCtDef *ctdef,
ObDASScanRtDef *rtdef,
transaction::ObTxDesc *trans_desc,
transaction::ObTxReadSnapshot *snapshot);
int build_tr_merge_iters_rangekey();
struct FtsDocIdCmp
{
FtsDocIdCmp(common::ObDatumCmpFuncType cmp_func, int *ret)
{
cmp_func_ = cmp_func;
err_code_ = ret;
}
bool operator()(const std::pair<ObDocId, int> &a, const std::pair<ObDocId, int> &b) const
{
int ret = OB_SUCCESS;
ObDatum l_datum;
ObDatum r_datum;
// ObDocId must be not null; ObDocIds must be not same
l_datum.set_string(a.first.get_string());
r_datum.set_string(b.first.get_string());
int tmp_ret = 0;
if (OB_FAIL(cmp_func_(l_datum, r_datum, tmp_ret))) {
LOG_WARN("failed to compare doc id by datum", K(ret));
}
*err_code_ = *err_code_ == OB_SUCCESS ? ret : *err_code_;
return tmp_ret < 0;
}
int *err_code_;
private:
common::ObDatumCmpFuncType cmp_func_;
};
private:
common::ObDatumCmpFuncType cmp_func_;
ObDASIter **tr_merge_iters_;
int64_t iter_count_;
const ObDASScanCtDef *main_lookup_ctdef_;
ObDASScanRtDef *main_lookup_rtdef_;
ObDASIter *main_lookup_iter_;
ObTabletID main_lookup_tablet_id_;
share::ObLSID main_lookup_ls_id_;
storage::ObTableScanParam main_lookup_param_;
lib::MemoryContext merge_memctx_;
ObSEArray<std::pair<ObDocId, int>, 4> doc_ids_;
int64_t read_count_;
};
} // end namespace sql
} // end namespace oceanbase
#endif // OB_DAS_FUNC_DATA_ITER_H_

View File

@ -0,0 +1,316 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SQL_DAS
#include "sql/das/iter/ob_das_functional_lookup_iter.h"
#include "sql/das/iter/ob_das_scan_iter.h"
#include "sql/das/iter/ob_das_func_data_iter.h"
#include "sql/das/ob_das_scan_op.h"
#include "sql/das/ob_das_ir_define.h"
#include "storage/concurrency_control/ob_data_validation_service.h"
namespace oceanbase
{
using namespace common;
namespace sql
{
int ObDASFuncLookupIter::inner_init(ObDASIterParam &param)
{
int ret = OB_SUCCESS;
if (param.type_ != ObDASIterType::DAS_ITER_FUNC_LOOKUP) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("inner init das iter with bad param type", K(param), K(ret));
} else {
ObDASFuncLookupIterParam &lookup_param = static_cast<ObDASFuncLookupIterParam&>(param);
state_ = LookupState::INDEX_SCAN;
index_end_ = false;
default_batch_row_count_ = lookup_param.default_batch_row_count_;
lookup_rowkey_cnt_ = 0;
lookup_row_cnt_ = 0;
index_table_iter_ = lookup_param.index_table_iter_;
data_table_iter_ = lookup_param.data_table_iter_;
index_ctdef_ = lookup_param.index_ctdef_;
index_rtdef_ = lookup_param.index_rtdef_;
lookup_ctdef_ = lookup_param.lookup_ctdef_;
lookup_rtdef_ = lookup_param.lookup_rtdef_;
start_table_scan_ = false;
trans_desc_ = lookup_param.trans_desc_;
snapshot_ = lookup_param.snapshot_;
lib::ContextParam param;
param.set_mem_attr(MTL_ID(), ObModIds::OB_SQL_TABLE_LOOKUP, ObCtxIds::DEFAULT_CTX_ID)
.set_properties(lib::USE_TL_PAGE_OPTIONAL);
if (OB_FAIL(CURRENT_CONTEXT->CREATE_CONTEXT(lookup_memctx_, param))) {
LOG_WARN("failed to create lookup memctx", K(ret));
} else if (OB_FAIL(rowkey_exprs_.push_back(lookup_param.doc_id_expr_))) {
LOG_WARN("failed to assign rowkey exprs", K(ret));
} else if (rowkey_exprs_.count() != 1) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected rowkey exprs count", K(rowkey_exprs_.count()), K(ret));
}
}
return ret;
}
void ObDASFuncLookupIter::reset_lookup_state()
{
lookup_row_cnt_ = 0;
lookup_rowkey_cnt_ = 0;
index_end_ = false;
state_ = LookupState::INDEX_SCAN;
if (!is_first_lookup_) {
data_table_iter_->reuse();
}
if (OB_NOT_NULL(lookup_memctx_)) {
lookup_memctx_->reset_remain_one_page();
}
trans_info_array_.reuse();
}
int ObDASFuncLookupIter::inner_reuse()
{
int ret = OB_SUCCESS;
ObDASScanIter *index_table_iter = static_cast<ObDASScanIter *>(index_table_iter_);
storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param();
if (!index_scan_param.key_ranges_.empty()) {
index_scan_param.key_ranges_.reuse();
}
if (start_table_scan_) {
if (OB_FAIL(index_table_iter_->reuse())) {
LOG_WARN("failed to reuse index table iter", K(ret));
} else if (is_first_lookup_ &&OB_FAIL(data_table_iter_->reuse())) {
LOG_WARN("failed to reuse data table iter", K(ret));
} else if (OB_FAIL(ObDASLookupIter::inner_reuse())) {
LOG_WARN("failed to reuse das lookup iter", K(ret));
} else {
trans_info_array_.reuse();
}
}
return ret;
}
int ObDASFuncLookupIter::inner_release()
{
int ret = OB_SUCCESS;
start_table_scan_ = false;
if (OB_FAIL(ObDASLocalLookupIter::inner_release())) {
LOG_WARN("failed to release lookup iter", K(ret));
}
return ret;
}
int ObDASFuncLookupIter::do_table_scan()
{
int ret = OB_SUCCESS;
start_table_scan_ = true;
OB_ASSERT(index_table_iter_->get_type() == DAS_ITER_SCAN);
ObDASScanIter *index_table_iter = static_cast<ObDASScanIter *>(index_table_iter_);
storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param();
const ObDASScanCtDef *index_ctdef = static_cast<const ObDASScanCtDef *>(index_ctdef_);
ObDASScanRtDef *index_rtdef = static_cast<ObDASScanRtDef *>(index_rtdef_);
if (OB_UNLIKELY(index_scan_param.key_ranges_.empty())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected key ranges count", K(index_scan_param.key_ranges_.count()), K(ret));
} else if (OB_FAIL(index_table_iter_->do_table_scan())) {
if (OB_SNAPSHOT_DISCARDED == ret && index_scan_param.fb_snapshot_.is_valid()) {
ret = OB_INVALID_QUERY_TIMESTAMP;
} else if (OB_TRY_LOCK_ROW_CONFLICT != ret) {
LOG_WARN("failed to do partition scan", K(index_scan_param), K(ret));
}
}
return ret;
}
int ObDASFuncLookupIter::rescan()
{
int ret = OB_SUCCESS;
// only rescan index table, data table will be rescan in do_lookup.
ObDASScanIter *index_table_iter = static_cast<ObDASScanIter *>(index_table_iter_);
storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param();
if (OB_UNLIKELY(!start_table_scan_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected rescan, should do table scan first", K(ret));
} else if (OB_UNLIKELY(index_scan_param.key_ranges_.empty())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected key ranges count", K(index_scan_param.key_ranges_.count()), K(ret));
} else if (OB_FAIL(index_table_iter_->rescan())) {
LOG_WARN("failed to rescan index table iter", K(ret));
}
return ret;
}
int ObDASFuncLookupIter::inner_get_next_row()
{
int ret = OB_SUCCESS;
ObDASScanIter *index_table_iter = static_cast<ObDASScanIter *>(index_table_iter_);
storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param();
OB_ASSERT(index_table_iter_->get_type() == DAS_ITER_SCAN);
int64_t simulate_batch_row_cnt = - EVENT_CALL(EventTable::EN_TABLE_LOOKUP_BATCH_ROW_COUNT);
const bool use_simulate_batch_row_cnt = simulate_batch_row_cnt > 0 && simulate_batch_row_cnt < default_batch_row_count_;
int64_t default_row_batch_cnt = use_simulate_batch_row_cnt ? simulate_batch_row_cnt : default_batch_row_count_;
LOG_DEBUG("simulate lookup row batch count", K(simulate_batch_row_cnt), K(default_row_batch_cnt));
if (index_scan_param.key_ranges_.empty()) {
ret = OB_ITER_END;
} else if (OB_UNLIKELY(index_scan_param.key_ranges_.count() != 1 || default_row_batch_cnt != 1)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected key ranges count", K(index_scan_param.key_ranges_.count()), K(default_row_batch_cnt), K(ret));
} else if (OB_FAIL(ObDASLocalLookupIter::inner_get_next_row())) {
if (OB_ITER_END != ret) {
LOG_WARN("failed to get next row from function lookup iter", K(ret));
}
} else if (OB_UNLIKELY(lookup_row_cnt_ > 1 || lookup_rowkey_cnt_ > 1)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected lookup row count", K_(lookup_row_cnt), K_(lookup_rowkey_cnt), K(ret));
}
return ret;
}
int ObDASFuncLookupIter::inner_get_next_rows(int64_t &count, int64_t capacity)
{
int ret = OB_SUCCESS;
ObDASScanIter *index_table_iter = static_cast<ObDASScanIter *>(index_table_iter_);
storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param();
OB_ASSERT(index_table_iter_->get_type() == DAS_ITER_SCAN);
cap_ = index_scan_param.key_ranges_.count();
int64_t simulate_batch_row_cnt = - EVENT_CALL(EventTable::EN_TABLE_LOOKUP_BATCH_ROW_COUNT);
const bool use_simulate_batch_row_cnt = simulate_batch_row_cnt > 0 && simulate_batch_row_cnt < default_batch_row_count_;
int64_t default_row_batch_cnt = use_simulate_batch_row_cnt ? simulate_batch_row_cnt : default_batch_row_count_;
LOG_DEBUG("simulate lookup row batch count", K(simulate_batch_row_cnt), K(default_row_batch_cnt));
if (index_scan_param.key_ranges_.empty()) {
ret = OB_ITER_END;
} else if (OB_UNLIKELY(default_row_batch_cnt < cap_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected key ranges count", K(default_row_batch_cnt), K(capacity),
K_(cap), K(index_scan_param.key_ranges_.count()), K(ret));
} else if (OB_FAIL(ObDASLookupIter::inner_get_next_rows(count, capacity))) {
if (OB_ITER_END != ret) {
LOG_WARN("failed to get next row from function lookup iter", K(ret));
}
}
if (OB_SUCC(ret) && OB_UNLIKELY(lookup_row_cnt_ != lookup_rowkey_cnt_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected lookup row count", K_(lookup_row_cnt), K_(lookup_rowkey_cnt), K(ret));
}
return ret;
}
int ObDASFuncLookupIter::add_rowkey()
{
int ret = OB_SUCCESS;
OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_FUNC_DATA);
if (OB_ISNULL(eval_ctx_) || OB_UNLIKELY(1 != rowkey_exprs_.count())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid eval ctx or rowkey exprs", K_(eval_ctx), K_(rowkey_exprs), K(ret));
} else {
ObDASScanIter *index_iter = static_cast<ObDASScanIter *>(index_table_iter_);
ObDASFuncDataIter *merge_iter = static_cast<ObDASFuncDataIter *>(data_table_iter_);
ObDocId doc_id;
const ObExpr *expr = rowkey_exprs_.at(0);
ObDatum &col_datum = expr->locate_expr_datum(*eval_ctx_);
doc_id.from_string(col_datum.get_string());
if (OB_UNLIKELY(!doc_id.is_valid())) {
LOG_WARN("invalid doc id", K(doc_id));
} else if (OB_FAIL(merge_iter->add_doc_id(doc_id))) {
LOG_WARN("failed to add doc id", K(ret));
}
LOG_DEBUG("push doc id to tr iter", K(doc_id), K(ret));
}
return ret;
}
int ObDASFuncLookupIter::add_rowkeys(int64_t storage_count)
{
int ret = OB_SUCCESS;
// for limit case, can do better, add_rowkeys(limit_count)
if (OB_UNLIKELY(storage_count != cap_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected count", K(storage_count), K(cap_));
} else if (OB_FAIL(ObDASLocalLookupIter::add_rowkeys(storage_count))) {
LOG_WARN("failed to add rowkeys", K(ret));
}
return ret;
}
int ObDASFuncLookupIter::do_index_lookup()
{
int ret = OB_SUCCESS;
OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_FUNC_DATA);
ObDASScanIter *index_table_iter = static_cast<ObDASScanIter *>(index_table_iter_);
storage::ObTableScanParam &index_scan_param = index_table_iter->get_scan_param();
ObDASFuncDataIter *merge_iter = static_cast<ObDASFuncDataIter *>(data_table_iter_);
if (merge_iter->has_main_lookup_iter()) {
storage::ObTableScanParam &main_lookup_param = merge_iter->get_main_lookup_scan_param();
int64 group_id = 0;
if (OB_UNLIKELY(!main_lookup_param.key_ranges_.empty())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected key ranges count", K(main_lookup_param.key_ranges_.count()), K(ret));
} else if (DAS_OP_TABLE_SCAN != index_ctdef_->op_type_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected index op type", K(index_ctdef_->op_type_), K(ret));
} else {
const ObDASScanCtDef *index_ctdef = static_cast<const ObDASScanCtDef*>(index_ctdef_);
if (nullptr != index_ctdef->group_id_expr_) {
group_id = index_ctdef->group_id_expr_->locate_expr_datum(*eval_ctx_).get_int();
}
}
for (int64_t i = 0; OB_SUCC(ret) && i < index_scan_param.key_ranges_.count(); i++) {
ObRowkey row_key = index_scan_param.key_ranges_.at(i).start_key_;
ObNewRange range;
range.build_range(merge_iter->get_main_lookup_ctdef()->ref_table_id_, row_key);
int64_t group_idx = ObNewRange::get_group_idx(group_id);
range.group_idx_ = group_idx;
main_lookup_param.key_ranges_.push_back(range);
}
if (OB_SUCC(ret)) {
main_lookup_param.is_get_ = true;
}
}
if (OB_FAIL(ret)) {
} else if (is_first_lookup_) {
is_first_lookup_ = false;
if (OB_FAIL(data_table_iter_->do_table_scan())) {
if (OB_SNAPSHOT_DISCARDED == ret && lookup_param_.fb_snapshot_.is_valid()) {
ret = OB_INVALID_QUERY_TIMESTAMP;
} else if (OB_TRY_LOCK_ROW_CONFLICT != ret) {
LOG_WARN("failed to do partition scan", K(lookup_param_), K(ret));
}
}
} else if (OB_FAIL(data_table_iter_->rescan())) {
LOG_WARN("failed to rescan data table", K(ret));
}
return ret;
}
int ObDASFuncLookupIter::check_index_lookup()
{
int ret = OB_SUCCESS;
OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_FUNC_DATA);
if (GCONF.enable_defensive_check()) {
if (OB_UNLIKELY(lookup_rowkey_cnt_ != lookup_row_cnt_)) {
ret = OB_ERR_DEFENSIVE_CHECK;
ObString func_name = ObString::make_string("check_lookup_row_cnt");
LOG_USER_ERROR(OB_ERR_DEFENSIVE_CHECK, func_name.length(), func_name.ptr());
LOG_ERROR("Fatal Error!!! Catch a defensive error!",
K(ret), K_(lookup_rowkey_cnt), K_(lookup_row_cnt));
}
}
return ret;
}
void ObDASFuncLookupIter::clear_evaluated_flag()
{
index_table_iter_->clear_evaluated_flag();
data_table_iter_->clear_evaluated_flag();
}
} // namespace sql
} // namespace oceanbase

View File

@ -0,0 +1,130 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OBDEV_SRC_SQL_DAS_ITER_OB_DAS_FUNCTIONAL_LOOKUP_ITER_H_
#define OBDEV_SRC_SQL_DAS_ITER_OB_DAS_FUNCTIONAL_LOOKUP_ITER_H_
#include "sql/das/iter/ob_das_local_lookup_iter.h"
namespace oceanbase
{
using namespace common;
namespace sql
{
struct ObDASFuncLookupIterParam : public ObDASIterParam
{
public:
ObDASFuncLookupIterParam()
: ObDASIterParam(DAS_ITER_FUNC_LOOKUP),
default_batch_row_count_(0),
index_ctdef_(nullptr),
index_rtdef_(nullptr),
lookup_ctdef_(nullptr),
lookup_rtdef_(nullptr),
index_table_iter_(nullptr),
data_table_iter_(nullptr),
rowkey_exprs_(nullptr),
doc_id_expr_(nullptr),
trans_desc_(nullptr),
snapshot_(nullptr)
{}
int64_t default_batch_row_count_;
const ObDASBaseCtDef *index_ctdef_;
ObDASBaseRtDef *index_rtdef_;
const ObDASScanCtDef *lookup_ctdef_;
ObDASScanRtDef *lookup_rtdef_;
ObDASIter *index_table_iter_;
ObDASIter *data_table_iter_;
const ExprFixedArray *rowkey_exprs_;
ObExpr *doc_id_expr_;
transaction::ObTxDesc *trans_desc_;
transaction::ObTxReadSnapshot *snapshot_;
virtual bool is_valid() const override
{
return ObDASIterParam::is_valid()
&& index_table_iter_ != nullptr && data_table_iter_ != nullptr
&& index_ctdef_ != nullptr && index_rtdef_ != nullptr && doc_id_expr_ != nullptr;
}
};
class ObDASScanCtDef;
class ObDASScanRtDef;
/**
* Func Lookup Iter:
* Func Lookup Iter
* / \
* / \
* / \
* / \
* INDEX_ITER DATA_ITER = FTS_MERGE_ITER
* (ROWKEY_DOCID)
*
* Func Lookup:
* Local Lookup Iter
* / \
* / \
* / \
* Local Lookup/Das Scan Func Lookup Iter
**/
/*
* In ObDASFuncLookupIter, the data iter is a fts merge iter which is just
* a tool iter including main lookup iter and tr merge iters.
*/
class ObDASFuncLookupIter : public ObDASLocalLookupIter
{
public:
ObDASFuncLookupIter()
: ObDASLocalLookupIter(ObDASIterType::DAS_ITER_FUNC_LOOKUP),
cap_(0)
{}
virtual ~ObDASFuncLookupIter() {}
void set_index_scan_param(storage::ObTableScanParam &scan_param) { static_cast<ObDASScanIter *>(index_table_iter_)->set_scan_param(scan_param);}
ObDASScanIter *get_index_scan_iter() { return static_cast<ObDASScanIter *>(index_table_iter_); }
int64 get_group_id() const
{
const ExprFixedArray *exprs = &(static_cast<const ObDASScanCtDef *>(index_ctdef_))->pd_expr_spec_.access_exprs_;
int64 group_id = 0;
for (int i = 0; i < exprs->count(); i++) {
if (T_PSEUDO_GROUP_ID == exprs->at(i)->type_) {
group_id = exprs->at(i)->locate_expr_datum(*eval_ctx_).get_int();
}
}
return group_id;
}
virtual void clear_evaluated_flag() override;
protected:
virtual int inner_init(ObDASIterParam &param) override;
virtual int inner_reuse() override;
virtual int inner_release() override;
virtual int do_table_scan() override;
virtual int rescan() override;
virtual int inner_get_next_row() override;
virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override;
virtual int add_rowkey() override;
virtual int add_rowkeys(int64_t count) override;
virtual int do_index_lookup() override;
virtual int check_index_lookup() override;
virtual void reset_lookup_state() override;
protected:
int64_t cap_;
bool start_table_scan_;
};
} // namespace sql
} // namespace oceanbase
#endif /* OBDEV_SRC_SQL_DAS_ITER_OB_DAS_FUNCTIONAL_LOOKUP_ITER_H_ */

View File

@ -71,11 +71,27 @@ enum ObDASIterTreeType : uint32_t
ITER_TREE_MAX
};
struct ObDASFTSTabletID
{
public:
common::ObTabletID inv_idx_tablet_id_;
common::ObTabletID fwd_idx_tablet_id_;
common::ObTabletID doc_id_idx_tablet_id_;
void reset()
{
inv_idx_tablet_id_.reset();
fwd_idx_tablet_id_.reset();
doc_id_idx_tablet_id_.reset();
}
TO_STRING_KV(K_(inv_idx_tablet_id), K_(fwd_idx_tablet_id), K_(doc_id_idx_tablet_id));
};
#define SUPPORTED_DAS_ITER_TREE(_type) \
({ \
ITER_TREE_PARTITION_SCAN == (_type) || \
ITER_TREE_LOCAL_LOOKUP == (_type) || \
ITER_TREE_TEXT_RETRIEVAL == (_type) || \
ITER_TREE_FUNC_LOOKUP == (_type) || \
ITER_TREE_INDEX_MERGE == (_type) || \
ITER_TREE_MVI_LOOKUP == (_type) || \
ITER_TREE_GIS_LOOKUP == (_type); \
@ -93,16 +109,20 @@ public:
common::ObTabletID rowkey_doc_tablet_id_;
common::ObTabletID rowkey_vid_tablet_id_;
/* used by fulltext index */
/* used by basic fulltext index */
common::ObTabletID inv_idx_tablet_id_;
common::ObTabletID fwd_idx_tablet_id_;
common::ObTabletID doc_id_idx_tablet_id_;
/* used by fulltext index */
/* used by basic fulltext index */
/* used by index merge */
common::ObFixedArray<common::ObTabletID, ObIAllocator> index_merge_tablet_ids_;
/* used by index merge */
/* used by function lookup index (special fulltext)*/
common::ObSEArray<ObDASFTSTabletID, 2> fts_tablet_ids_;
/* used by function lookup index (special fulltext)*/
void reset()
{
lookup_tablet_id_.reset();
@ -113,6 +133,7 @@ public:
fwd_idx_tablet_id_.reset();
doc_id_idx_tablet_id_.reset();
index_merge_tablet_ids_.reset();
fts_tablet_ids_.reset();
}
};

View File

@ -100,6 +100,10 @@ int ObDASIterUtils::create_das_scan_iter_tree(ObDASIterTreeType tree_type,
ret = create_index_merge_iter_tree(scan_param, alloc, attach_ctdef, attach_rtdef, related_tablet_ids, trans_desc, snapshot, iter_tree);
break;
}
case ITER_TREE_FUNC_LOOKUP: {
ret = create_function_lookup_tree(scan_param, alloc, attach_ctdef, attach_rtdef, related_tablet_ids, trans_desc, snapshot, iter_tree);
break;
}
case ITER_TREE_MVI_LOOKUP: {
ret = create_mvi_lookup_tree(scan_param, alloc, attach_ctdef, attach_rtdef, related_tablet_ids, trans_desc, snapshot, iter_tree);
break;
@ -241,7 +245,11 @@ int ObDASIterUtils::set_text_retrieval_related_ids(const ObDASBaseCtDef *attach_
} else {
ObDASTextRetrievalMergeIter *tr_merge_iter = static_cast<ObDASTextRetrievalMergeIter *>(root_iter);
need_set_child = false;
if (OB_FAIL(tr_merge_iter->set_related_tablet_ids(ls_id, related_tablet_ids))) {
ObDASFTSTabletID fts_tablet_ids;
fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.inv_idx_tablet_id_;
fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fwd_idx_tablet_id_;
fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.doc_id_idx_tablet_id_;
if (OB_FAIL(tr_merge_iter->set_related_tablet_ids(ls_id, fts_tablet_ids))) {
LOG_WARN("failed to set related tablet ids", K(ret));
}
}
@ -337,6 +345,139 @@ int ObDASIterUtils::set_index_merge_related_ids(const ObDASBaseCtDef *attach_ctd
return ret;
}
int ObDASIterUtils::set_func_lookup_iter_related_ids(const ObDASBaseCtDef *attach_ctdef,
const ObDASRelatedTabletID &related_tablet_ids,
const ObLSID &ls_id,
int64_t flag,
ObDASIter *root_iter)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(attach_ctdef) || OB_ISNULL(root_iter)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr", K(ret), KP(attach_ctdef), KP(root_iter));
} else {
const ObDASIterType &iter_type = root_iter->get_type();
bool need_set_child = false;
switch (attach_ctdef->op_type_) {
case ObDASOpType::DAS_OP_INDEX_PROJ_LOOKUP: {
if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_LOCAL_LOOKUP)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("iter type not match", K(ret), K(iter_type));
} else {
ObDASLocalLookupIter *local_lookup_iter = static_cast<ObDASLocalLookupIter *>(root_iter);
local_lookup_iter->set_tablet_id(related_tablet_ids.rowkey_doc_tablet_id_);
local_lookup_iter->set_ls_id(ls_id);
need_set_child = true;
}
break;
}
case ObDASOpType::DAS_OP_FUNC_LOOKUP: {
if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_FUNC_LOOKUP)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("iter type not match with ctdef", K(ret), K(attach_ctdef->op_type_), K(iter_type));
} else {
const ObDASFuncLookupCtDef *func_lookup_ctdef = static_cast<const ObDASFuncLookupCtDef *>(attach_ctdef);
const int64_t func_lookup_cnt = func_lookup_ctdef->func_lookup_cnt_;
ObDASFuncLookupIter *func_lookup_iter = static_cast<ObDASFuncLookupIter *>(root_iter);
ObDASFuncDataIter *merge_iter = static_cast<ObDASFuncDataIter *>(root_iter->get_children()[1]);
if (func_lookup_ctdef->has_main_table_lookup()) {
merge_iter->set_tablet_id(related_tablet_ids.lookup_tablet_id_);
merge_iter->set_ls_id(ls_id);
}
for (int64_t i = 0; OB_SUCC(ret) && i < func_lookup_cnt; ++i) {
if (OB_FAIL(set_func_lookup_iter_related_ids(
func_lookup_ctdef->get_func_lookup_scan_ctdef(i),
related_tablet_ids,
ls_id,
i,
merge_iter->get_children()[i]))) {
LOG_WARN("failed to set text retrieval related ids", K(ret));
}
}
need_set_child = false;
}
break;
}
case ObDASOpType::DAS_OP_IR_AUX_LOOKUP: {
if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_LOCAL_LOOKUP)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("iter type not match with ctdef", K(ret), K(attach_ctdef->op_type_), K(iter_type));
} else {
ObDASLocalLookupIter *aux_lookup_iter = static_cast<ObDASLocalLookupIter *>(root_iter);
aux_lookup_iter->set_ls_id(ls_id);
aux_lookup_iter->set_tablet_id(related_tablet_ids.aux_lookup_tablet_id_);
need_set_child = true;
}
break;
}
case ObDASOpType::DAS_OP_SORT: {
if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_SORT)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("iter type not match with ctdef", K(ret), K(attach_ctdef->op_type_), K(iter_type));
} else {
need_set_child = true;
}
break;
}
case ObDASOpType::DAS_OP_IR_SCAN: {
if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_TEXT_RETRIEVAL_MERGE)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("iter type not match with ctdef", K(ret), K(attach_ctdef->op_type_), K(iter_type));
} else {
ObDASTextRetrievalMergeIter *tr_merge_iter = static_cast<ObDASTextRetrievalMergeIter *>(root_iter);
ObDASFTSTabletID fts_tablet_ids;
if (flag >= 0) {
fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[flag].inv_idx_tablet_id_;
fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[flag].fwd_idx_tablet_id_;
fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[flag].doc_id_idx_tablet_id_;
} else {
fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.inv_idx_tablet_id_;
fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fwd_idx_tablet_id_;
fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.doc_id_idx_tablet_id_;
}
if (OB_FAIL(tr_merge_iter->set_related_tablet_ids(ls_id, fts_tablet_ids))) {
LOG_WARN("failed to set related tablet ids", K(ret));
}
need_set_child = false;
}
break;
}
case ObDASOpType::DAS_OP_TABLE_SCAN: {
if (OB_UNLIKELY(iter_type != ObDASIterType::DAS_ITER_SCAN)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("iter type not match with ctdef", K(ret), K(attach_ctdef->op_type_), K(iter_type));
} else {
need_set_child = false;
}
break;
}
default: {
need_set_child = false;
break;
}
}
if (OB_FAIL(ret) || !need_set_child) {
} else if (OB_UNLIKELY(attach_ctdef->children_cnt_ != root_iter->get_children_cnt())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected iter children count not equal to ctdef children count",
K(ret), K(attach_ctdef->children_cnt_), K(root_iter->get_children_cnt()));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < attach_ctdef->children_cnt_; ++i) {
if (OB_FAIL(set_func_lookup_iter_related_ids(
attach_ctdef->children_[i],
related_tablet_ids,
ls_id,
-1,
root_iter->get_children()[i]))) {
LOG_WARN("failed to set text retrieval related ids", K(ret));
}
}
}
}
return ret;
}
/***************** PUBLIC END *****************/
int ObDASIterUtils::create_partition_scan_tree(storage::ObTableScanParam &scan_param,
@ -490,6 +631,10 @@ int ObDASIterUtils::create_text_retrieval_tree(ObTableScanParam &scan_param,
const bool has_lookup = ObDASOpType::DAS_OP_TABLE_LOOKUP == attach_ctdef->op_type_;
int64_t token_cnt = 0;
bool taat_mode = false;
ObDASFTSTabletID fts_tablet_ids;
fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.inv_idx_tablet_id_;
fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fwd_idx_tablet_id_;
fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.doc_id_idx_tablet_id_;
if (OB_UNLIKELY(attach_ctdef->op_type_ != ObDASOpType::DAS_OP_IR_SCAN
&& attach_ctdef->op_type_ != ObDASOpType::DAS_OP_TABLE_LOOKUP
&& attach_ctdef->op_type_ != ObDASOpType::DAS_OP_SORT)) {
@ -507,7 +652,7 @@ int ObDASIterUtils::create_text_retrieval_tree(ObTableScanParam &scan_param,
alloc,
ir_scan_ctdef,
ir_scan_rtdef,
related_tablet_ids,
fts_tablet_ids,
trans_desc,
snapshot,
text_retrieval_result))) {
@ -592,11 +737,212 @@ int ObDASIterUtils::create_text_retrieval_tree(ObTableScanParam &scan_param,
return ret;
}
int ObDASIterUtils::create_functional_text_retrieval_sub_tree(const ObLSID &ls_id,
common::ObIAllocator &alloc,
const ObDASIRScanCtDef *ir_scan_ctdef,
ObDASIRScanRtDef *ir_scan_rtdef,
const ObDASFTSTabletID &related_tablet_ids,
transaction::ObTxDesc *trans_desc,
transaction::ObTxReadSnapshot *snapshot,
ObDASIter *&retrieval_result)
{
int ret = OB_SUCCESS;
ObDASTextRetrievalMergeIterParam merge_iter_param;
ObDASTextRetrievalMergeIter *tr_merge_iter = nullptr;
ObDASScanIterParam doc_cnt_agg_param;
ObDASScanIter *doc_cnt_agg_iter = nullptr;
bool taat_mode = false;
bool need_inv_idx_agg_reset = false;
merge_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_;
merge_iter_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_;
merge_iter_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_;
merge_iter_param.output_ = &ir_scan_ctdef->result_output_;
merge_iter_param.ir_ctdef_ = ir_scan_ctdef;
merge_iter_param.ir_rtdef_ = ir_scan_rtdef;
merge_iter_param.tx_desc_ = trans_desc;
merge_iter_param.snapshot_ = snapshot;
merge_iter_param.force_return_docid_ = true;
if (0 != merge_iter_param.query_tokens_.count()) {
merge_iter_param.query_tokens_.reuse();
}
if (OB_FAIL(ObDASTextRetrievalMergeIter::build_query_tokens(ir_scan_ctdef, ir_scan_rtdef, alloc, merge_iter_param.query_tokens_))) {
LOG_WARN("failed to get query tokens for text retrieval", K(ret));
} else if (!ir_scan_ctdef->need_proj_relevance_score()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("functional lookup without relevance score not supported", K(ret));
} else if (merge_iter_param.query_tokens_.count() > OB_MAX_TEXT_RETRIEVAL_TOKEN_CNT) {
need_inv_idx_agg_reset = true;
if (!ir_scan_ctdef->need_estimate_total_doc_cnt()) {
doc_cnt_agg_param.scan_ctdef_ = ir_scan_ctdef->get_doc_id_idx_agg_ctdef();
doc_cnt_agg_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_;
doc_cnt_agg_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_;
doc_cnt_agg_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_;
doc_cnt_agg_param.output_ = &ir_scan_ctdef->get_doc_id_idx_agg_ctdef()->result_output_;
if (OB_FAIL(create_das_iter(alloc, doc_cnt_agg_param, doc_cnt_agg_iter))) {
LOG_WARN("failed to create doc cnt agg scan iter", K(ret));
} else {
merge_iter_param.doc_cnt_iter_ = doc_cnt_agg_iter;
}
}
ObDASTRTaatLookupIter *fts_merge_iter = nullptr;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(create_das_iter(alloc, merge_iter_param, fts_merge_iter))) {
LOG_WARN("failed to create text retrieval merge iter", K(ret));
} else {
tr_merge_iter = fts_merge_iter;
taat_mode = true;
}
} else {
if (ir_scan_ctdef->need_calc_relevance() && !ir_scan_ctdef->need_estimate_total_doc_cnt()) {
doc_cnt_agg_param.scan_ctdef_ = ir_scan_ctdef->get_doc_id_idx_agg_ctdef();
doc_cnt_agg_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_;
doc_cnt_agg_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_;
doc_cnt_agg_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_;
doc_cnt_agg_param.output_ = &ir_scan_ctdef->get_doc_id_idx_agg_ctdef()->result_output_;
if (OB_FAIL(create_das_iter(alloc, doc_cnt_agg_param, doc_cnt_agg_iter))) {
LOG_WARN("failed to create doc cnt agg scan iter", K(ret));
} else {
merge_iter_param.doc_cnt_iter_ = doc_cnt_agg_iter;
}
}
ObDASTRDaatLookupIter *fts_merge_iter = nullptr;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(create_das_iter(alloc, merge_iter_param, fts_merge_iter))) {
LOG_WARN("failed to create text retrieval merge iter", K(ret));
} else {
tr_merge_iter = fts_merge_iter;
taat_mode = false;
}
}
if (OB_FAIL(ret)) {
} else {
ObSEArray<ObDASIter *, 16> iters;
const ObIArray<ObString> &query_tokens = tr_merge_iter->get_query_tokens();
int64_t size = taat_mode && query_tokens.count() != 0 ? 1 : query_tokens.count();
for (int64_t i = 0; OB_SUCC(ret) && i < size; ++i) {
ObDASTextRetrievalIterParam retrieval_param;
ObDASTextRetrievalIter *retrieval_iter = nullptr;
retrieval_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_;
retrieval_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_;
retrieval_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_;
retrieval_param.output_ = &ir_scan_ctdef->result_output_;
retrieval_param.ir_ctdef_ = ir_scan_ctdef;
retrieval_param.ir_rtdef_ = ir_scan_rtdef;
retrieval_param.tx_desc_ = trans_desc;
retrieval_param.snapshot_ = snapshot;
retrieval_param.need_inv_idx_agg_reset_ = need_inv_idx_agg_reset;
ObDASScanIterParam inv_idx_scan_iter_param;
ObDASScanIter *inv_idx_scan_iter = nullptr;
inv_idx_scan_iter_param.scan_ctdef_ = ir_scan_ctdef->get_inv_idx_scan_ctdef();
inv_idx_scan_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_;
inv_idx_scan_iter_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_;
inv_idx_scan_iter_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_;
inv_idx_scan_iter_param.output_ = &ir_scan_ctdef->get_inv_idx_scan_ctdef()->result_output_;
ObDASScanIterParam inv_idx_agg_iter_param;
ObDASScanIter *inv_idx_agg_iter = nullptr;
if (ir_scan_ctdef->need_inv_idx_agg()) {
inv_idx_agg_iter_param.scan_ctdef_ = ir_scan_ctdef->get_inv_idx_agg_ctdef();
inv_idx_agg_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_;
inv_idx_agg_iter_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_;
inv_idx_agg_iter_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_;
inv_idx_agg_iter_param.output_ = &ir_scan_ctdef->get_inv_idx_agg_ctdef()->result_output_;
}
ObDASScanIterParam fwd_idx_iter_param;
ObDASScanIter *fwd_idx_iter = nullptr;
if (ir_scan_ctdef->need_fwd_idx_agg()) {
fwd_idx_iter_param.scan_ctdef_ = ir_scan_ctdef->get_fwd_idx_agg_ctdef();
fwd_idx_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_;
fwd_idx_iter_param.eval_ctx_ = ir_scan_rtdef->eval_ctx_;
fwd_idx_iter_param.exec_ctx_ = &ir_scan_rtdef->eval_ctx_->exec_ctx_;
fwd_idx_iter_param.output_ = &ir_scan_ctdef->get_fwd_idx_agg_ctdef()->result_output_;
}
if (OB_FAIL(create_das_iter(alloc, inv_idx_scan_iter_param, inv_idx_scan_iter))) {
LOG_WARN("failed to create inv idx iter", K(ret));
} else if (ir_scan_ctdef->need_inv_idx_agg()
&& OB_FAIL(create_das_iter(alloc, inv_idx_agg_iter_param, inv_idx_agg_iter))) {
LOG_WARN("failed to create inv idx agg iter", K(ret));
} else if (ir_scan_ctdef->need_fwd_idx_agg()
&& OB_FAIL(create_das_iter(alloc, fwd_idx_iter_param, fwd_idx_iter))) {
LOG_WARN("failed to create fwd idx iter", K(ret));
} else {
retrieval_param.inv_idx_scan_iter_ = inv_idx_scan_iter;
retrieval_param.inv_idx_agg_iter_ = inv_idx_agg_iter;
retrieval_param.fwd_idx_iter_ = fwd_idx_iter;
const int64_t inv_idx_iter_cnt = ir_scan_ctdef->need_inv_idx_agg() ? 2 : 1;
const int64_t fwd_idx_iter_cnt = ir_scan_ctdef->need_fwd_idx_agg() ? 1 : 0;
const int64_t tr_children_cnt = inv_idx_iter_cnt + fwd_idx_iter_cnt;
if (taat_mode) {
if (OB_FAIL(create_das_iter(alloc, retrieval_param, retrieval_iter))) {
LOG_WARN("failed to create text retrieval iter", K(ret));
}
} else {
ObDASTRCacheIter *tr_iter = nullptr;
if (OB_FAIL(create_das_iter(alloc, retrieval_param, tr_iter))) {
LOG_WARN("failed to create text retrieval iter", K(ret));
} else {
retrieval_iter = tr_iter;
}
}
if (OB_FAIL(ret)) {
// set query_token and range in do_table_scan
} else if (OB_FAIL(create_iter_children_array(tr_children_cnt, alloc, retrieval_iter))) {
LOG_WARN("failed to create iter children array", K(ret));
} else {
retrieval_iter->get_children()[0] = inv_idx_scan_iter;
if (ir_scan_ctdef->need_inv_idx_agg()) {
retrieval_iter->get_children()[1] = inv_idx_agg_iter;
}
if (ir_scan_ctdef->need_fwd_idx_agg()) {
retrieval_iter->get_children()[2] = fwd_idx_iter;
}
retrieval_iter->set_ls_tablet_ids(
ls_id,
related_tablet_ids.inv_idx_tablet_id_,
related_tablet_ids.fwd_idx_tablet_id_);
if (OB_FAIL(iters.push_back(retrieval_iter))) {
LOG_WARN("failed append retrieval iter to array", K(ret));
}
}
}
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(tr_merge_iter->set_merge_iters(iters))) {
LOG_WARN("failed to set merge iters for text retrieval", K(ret));
} else if (OB_FAIL(tr_merge_iter->set_related_tablet_ids(ls_id, related_tablet_ids))) {
LOG_WARN("failed to set related tabelt ids", K(ret));
} else {
ObDASIter **&tr_merge_children = tr_merge_iter->get_children();
const bool need_do_total_doc_cnt = (ir_scan_ctdef->need_calc_relevance()) && !ir_scan_ctdef->need_estimate_total_doc_cnt();
const int64_t tr_merge_children_cnt = need_do_total_doc_cnt ? iters.count() + 1 : iters.count();
if (0 != tr_merge_children_cnt
&& OB_FAIL(create_iter_children_array(tr_merge_children_cnt, alloc, tr_merge_iter))) {
LOG_WARN("failed to alloc text retrieval merge iter children", K(ret), K(tr_merge_children_cnt));
} else {
for (int64_t i = 0; i < iters.count(); ++i) {
tr_merge_children[i] = iters.at(i);
}
if (need_do_total_doc_cnt) {
tr_merge_children[iters.count()] = doc_cnt_agg_iter;
}
tr_merge_iter->set_doc_id_idx_tablet_id(related_tablet_ids.doc_id_idx_tablet_id_);
tr_merge_iter->set_ls_id(ls_id);
retrieval_result = tr_merge_iter;
}
}
}
return ret;
}
int ObDASIterUtils::create_text_retrieval_sub_tree(const ObLSID &ls_id,
common::ObIAllocator &alloc,
const ObDASIRScanCtDef *ir_scan_ctdef,
ObDASIRScanRtDef *ir_scan_rtdef,
const ObDASRelatedTabletID &related_tablet_ids,
const ObDASFTSTabletID &related_tablet_ids,
transaction::ObTxDesc *trans_desc,
transaction::ObTxReadSnapshot *snapshot,
ObDASIter *&retrieval_result)
@ -676,6 +1022,7 @@ int ObDASIterUtils::create_text_retrieval_sub_tree(const ObLSID &ls_id,
retrieval_param.ir_rtdef_ = ir_scan_rtdef;
retrieval_param.tx_desc_ = trans_desc;
retrieval_param.snapshot_ = snapshot;
retrieval_param.need_inv_idx_agg_reset_ = true;
ObDASScanIterParam inv_idx_scan_iter_param;
ObDASScanIter *inv_idx_scan_iter = nullptr;
@ -683,13 +1030,14 @@ int ObDASIterUtils::create_text_retrieval_sub_tree(const ObLSID &ls_id,
inv_idx_scan_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_;
ObDASScanIterParam inv_idx_agg_iter_param;
ObDASScanIter *inv_idx_agg_iter = nullptr;
init_scan_iter_param(inv_idx_agg_iter_param, ir_scan_ctdef->get_inv_idx_agg_ctdef(), ir_scan_rtdef);
inv_idx_agg_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_;
if (ir_scan_ctdef->need_inv_idx_agg()) {
init_scan_iter_param(inv_idx_agg_iter_param, ir_scan_ctdef->get_inv_idx_agg_ctdef(), ir_scan_rtdef);
}
ObDASScanIterParam fwd_idx_iter_param;
ObDASScanIter *fwd_idx_iter = nullptr;
init_scan_iter_param(fwd_idx_iter_param, ir_scan_ctdef->get_fwd_idx_agg_ctdef(), ir_scan_rtdef);
fwd_idx_iter_param.max_size_ = ir_scan_rtdef->eval_ctx_->max_batch_size_;
if (ir_scan_ctdef->need_fwd_idx_agg()) {
init_scan_iter_param(fwd_idx_iter_param, ir_scan_ctdef->get_fwd_idx_agg_ctdef(), ir_scan_rtdef);
}
if (OB_FAIL(create_das_iter(alloc, inv_idx_scan_iter_param, inv_idx_scan_iter))) {
LOG_WARN("failed to create inv idx iter", K(ret));
} else if (ir_scan_ctdef->need_inv_idx_agg()
@ -1009,6 +1357,460 @@ int ObDASIterUtils::create_domain_lookup_sub_tree(ObTableScanParam &scan_param,
return ret;
}
int ObDASIterUtils::create_function_lookup_tree(ObTableScanParam &scan_param,
common::ObIAllocator &alloc,
const ObDASBaseCtDef *attach_ctdef,
ObDASBaseRtDef *attach_rtdef,
const ObDASRelatedTabletID &related_tablet_ids,
transaction::ObTxDesc *trans_desc,
transaction::ObTxReadSnapshot *snapshot,
ObDASIter *&iter_tree)
{
int ret = OB_SUCCESS;
const ObDASIndexProjLookupCtDef *idx_proj_lookup_ctdef = nullptr;
ObDASIndexProjLookupRtDef *idx_proj_lookup_rtdef = nullptr;
const ObDASFuncLookupCtDef *func_lookup_ctdef = nullptr;
ObDASFuncLookupRtDef *func_lookup_rtdef = nullptr;
const ObDASBaseCtDef *rowkey_scan_ctdef = nullptr;
ObDASBaseRtDef *rowkey_scan_rtdef = nullptr;
ObDASIter *rowkey_scan_iter = nullptr;
bool lookup_keep_order = false;
ObTableScanParam *rowkey_scan_param = nullptr;
void *buf = nullptr;
// for check {
const ExprFixedArray *docid_lookup_rowkey_exprs = nullptr;
const ExprFixedArray *main_lookup_rowkey_exprs =nullptr;
const ExprFixedArray *rowkey_scan_ouput_exprs = nullptr;
// for check }
if (OB_ISNULL(attach_ctdef) || OB_ISNULL(attach_rtdef)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr to attach def", K(ret), KP(attach_ctdef), KP(attach_rtdef));
} else if (OB_ISNULL(rowkey_scan_param = OB_NEWx(ObTableScanParam, &alloc))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to new rowkey scan param", K(sizeof(ObTableScanParam)), K(ret));
} else if (OB_UNLIKELY(attach_ctdef->op_type_ != ObDASOpType::DAS_OP_INDEX_PROJ_LOOKUP)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("unexpected text retrieval root attach def type", K(ret), KPC(attach_ctdef));
} else {
idx_proj_lookup_ctdef = static_cast<const ObDASIndexProjLookupCtDef *>(attach_ctdef);
idx_proj_lookup_rtdef = static_cast<ObDASIndexProjLookupRtDef *>(attach_rtdef);
func_lookup_ctdef = static_cast<const ObDASFuncLookupCtDef *>(idx_proj_lookup_ctdef->get_lookup_ctdef());
func_lookup_rtdef = static_cast<ObDASFuncLookupRtDef *>(idx_proj_lookup_rtdef->get_lookup_rtdef());
rowkey_scan_ctdef = idx_proj_lookup_ctdef->get_rowkey_scan_ctdef();
rowkey_scan_rtdef = idx_proj_lookup_rtdef->get_rowkey_scan_rtdef();
if (OB_ISNULL(func_lookup_ctdef) || OB_ISNULL(func_lookup_rtdef)
|| OB_ISNULL(rowkey_scan_ctdef) || OB_ISNULL(rowkey_scan_ctdef)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr to ctdef", K(ret), KP(func_lookup_ctdef));
} else if (OB_UNLIKELY(rowkey_scan_ctdef->op_type_ != ObDASOpType::DAS_OP_IR_AUX_LOOKUP
&& rowkey_scan_ctdef->op_type_ != ObDASOpType::DAS_OP_TABLE_SCAN
&& rowkey_scan_ctdef->op_type_ != ObDASOpType::DAS_OP_SORT)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("unexpected rowkey scan type", K(ret), KPC(rowkey_scan_ctdef));
}
}
if (OB_FAIL(ret)) {
} else if (ObDASOpType::DAS_OP_IR_AUX_LOOKUP == rowkey_scan_ctdef->op_type_) {
const ObDASIRScanCtDef *ir_scan_ctdef = nullptr;
ObDASIRScanRtDef *ir_scan_rtdef = nullptr;
const ObDASIRAuxLookupCtDef *aux_lookup_ctdef = static_cast<const ObDASIRAuxLookupCtDef *>(rowkey_scan_ctdef);
rowkey_scan_ouput_exprs = &aux_lookup_ctdef->get_lookup_scan_ctdef()->result_output_;
ObDASIRAuxLookupRtDef *aux_lookup_rtdef = static_cast<ObDASIRAuxLookupRtDef *>(rowkey_scan_rtdef);
ObDASLocalLookupIter *doc_id_lookup_iter = nullptr;
ObDASIter *text_retrieval_result = nullptr;
const ObDASSortCtDef *sort_ctdef = nullptr;
ObDASSortRtDef *sort_rtdef = nullptr;
ObDASIter *sort_result = nullptr;
bool taat_mode = false;
ObDASFTSTabletID fts_tablet_ids;
fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.inv_idx_tablet_id_;
fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fwd_idx_tablet_id_;
fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.doc_id_idx_tablet_id_;
const bool need_rewind = true;
const bool need_distinct = false;
if (OB_FAIL(ObDASUtils::find_target_das_def(
rowkey_scan_ctdef,
rowkey_scan_rtdef,
ObDASOpType::DAS_OP_IR_SCAN,
ir_scan_ctdef,
ir_scan_rtdef))) {
LOG_WARN("fail to find ir scan definition", K(ret));
} else if (OB_FAIL(create_text_retrieval_sub_tree(
scan_param.ls_id_,
alloc,
ir_scan_ctdef,
ir_scan_rtdef,
fts_tablet_ids,
trans_desc,
snapshot,
text_retrieval_result))) {
LOG_WARN("failed to create text retrieval sub tree", K(ret));
} else if (FALSE_IT(rowkey_scan_iter = text_retrieval_result)) {
} else if (aux_lookup_ctdef->get_doc_id_scan_ctdef()->op_type_ != ObDASOpType::DAS_OP_SORT) {
// do nothing, just skip
} else if (FALSE_IT(sort_ctdef = static_cast<const ObDASSortCtDef *>(aux_lookup_ctdef->get_doc_id_scan_ctdef()))) {
} else if (FALSE_IT(sort_rtdef = static_cast<ObDASSortRtDef *>(aux_lookup_rtdef->get_doc_id_scan_rtdef()))) {
} else if (OB_FAIL(create_sort_sub_tree(
alloc, sort_ctdef, sort_rtdef, need_rewind, need_distinct, text_retrieval_result, sort_result))) {
LOG_WARN("failed to create sort sub tree", K(ret));
} else {
rowkey_scan_iter = sort_result;
}
if (OB_FAIL(ret)) {
} else {
ObDASScanIter *docid_rowkey_table_iter = nullptr;
ObDASScanIterParam docid_rowkey_table_param;
const ObDASScanCtDef *lookup_ctdef = static_cast<const ObDASScanCtDef*>(aux_lookup_ctdef->get_lookup_scan_ctdef());
ObDASScanRtDef *lookup_rtdef = static_cast<ObDASScanRtDef*>(aux_lookup_rtdef->get_lookup_scan_rtdef());
docid_rowkey_table_param.scan_ctdef_ = lookup_ctdef;
docid_rowkey_table_param.max_size_ = lookup_rtdef->eval_ctx_->is_vectorized() ? lookup_rtdef->eval_ctx_->max_batch_size_ : 1;
docid_rowkey_table_param.eval_ctx_ = lookup_rtdef->eval_ctx_;
docid_rowkey_table_param.exec_ctx_ = &lookup_rtdef->eval_ctx_->exec_ctx_;
docid_rowkey_table_param.output_ = &lookup_ctdef->result_output_;
if (OB_FAIL(create_das_iter(alloc, docid_rowkey_table_param, docid_rowkey_table_iter))) {
LOG_WARN("failed to create doc id table iter", K(ret));
} else {
ObDASLocalLookupIterParam doc_id_lookup_param;
doc_id_lookup_param.max_size_ = aux_lookup_rtdef->eval_ctx_->is_vectorized()
? aux_lookup_rtdef->eval_ctx_->max_batch_size_ : 1;
doc_id_lookup_param.eval_ctx_ = aux_lookup_rtdef->eval_ctx_;
doc_id_lookup_param.exec_ctx_ = &aux_lookup_rtdef->eval_ctx_->exec_ctx_;
doc_id_lookup_param.output_ = &aux_lookup_ctdef->result_output_;
doc_id_lookup_param.default_batch_row_count_ = doc_id_lookup_param.max_size_;
doc_id_lookup_param.index_ctdef_ = aux_lookup_ctdef->get_doc_id_scan_ctdef();
doc_id_lookup_param.index_rtdef_ = aux_lookup_rtdef->get_doc_id_scan_rtdef();
doc_id_lookup_param.lookup_ctdef_ = aux_lookup_ctdef->get_lookup_scan_ctdef();
doc_id_lookup_param.lookup_rtdef_ = aux_lookup_rtdef->get_lookup_scan_rtdef();
doc_id_lookup_param.index_table_iter_ = rowkey_scan_iter;
doc_id_lookup_param.data_table_iter_ = docid_rowkey_table_iter;
doc_id_lookup_param.trans_desc_ = trans_desc;
doc_id_lookup_param.snapshot_ = snapshot;
doc_id_lookup_param.rowkey_exprs_ = &aux_lookup_ctdef->get_lookup_scan_ctdef()->rowkey_exprs_;
ObDASTextRetrievalMergeIter *tr_merge_iter = static_cast<ObDASTextRetrievalMergeIter *>(text_retrieval_result);
taat_mode = tr_merge_iter->is_taat_mode();
if (taat_mode || sort_result) {
doc_id_lookup_param.lookup_rtdef_->scan_flag_.scan_order_ = ObQueryFlag::KeepOrder;
}
if (OB_FAIL(create_das_iter(alloc, doc_id_lookup_param, doc_id_lookup_iter))) {
LOG_WARN("failed to create doc id lookup iter", K(ret));
} else if (OB_FAIL(create_iter_children_array(2, alloc, doc_id_lookup_iter))) {
LOG_WARN("failed to create iter children array", K(ret));
} else {
doc_id_lookup_iter->get_children()[0] = rowkey_scan_iter;
doc_id_lookup_iter->get_children()[1] = docid_rowkey_table_iter;
docid_rowkey_table_iter->set_scan_param(doc_id_lookup_iter->get_lookup_param());
doc_id_lookup_iter->set_tablet_id(related_tablet_ids.doc_id_idx_tablet_id_);
doc_id_lookup_iter->set_ls_id(scan_param.ls_id_);
rowkey_scan_iter = doc_id_lookup_iter;
}
}
}
} else if (ObDASOpType::DAS_OP_TABLE_SCAN == rowkey_scan_ctdef->op_type_) {
ObDASScanIter *scan_iter = nullptr;
ObDASScanIterParam iter_param;
// this code is based on the assumption that scan_param will be not released util this iter is released
const ObDASScanCtDef *ctdef = static_cast<const ObDASScanCtDef*>(rowkey_scan_ctdef);
ObDASScanRtDef *rtdef = static_cast<ObDASScanRtDef*>(rowkey_scan_rtdef);
iter_param.scan_ctdef_ = ctdef;
iter_param.max_size_ = rtdef->eval_ctx_->is_vectorized() ? rtdef->eval_ctx_->max_batch_size_ : 1;
iter_param.eval_ctx_ = rtdef->eval_ctx_;
iter_param.exec_ctx_ = &rtdef->eval_ctx_->exec_ctx_;
iter_param.output_ = &ctdef->result_output_;
if (OB_FAIL(create_das_iter(alloc, iter_param, scan_iter))) {
LOG_WARN("failed to create data table lookup scan iter", K(ret));
} else if (FALSE_IT(scan_iter->set_scan_param(scan_param))) {
LOG_WARN("failed to init default scan param", K(ret));
} else {
rowkey_scan_iter = scan_iter;
rowkey_scan_ouput_exprs = &static_cast<const ObDASScanCtDef *>(rowkey_scan_ctdef)->pd_expr_spec_.access_exprs_;
}
} else if (ObDASOpType::DAS_OP_SORT == rowkey_scan_ctdef->op_type_) {
const ObDASScanCtDef *scan_ctdef = nullptr;
ObDASScanRtDef *scan_rtdef = nullptr;
ObDASScanIterParam iter_param;
const ObDASSortCtDef *sort_ctdef = nullptr;
ObDASSortRtDef *sort_rtdef = nullptr;
ObDASIter *sort_result = nullptr;
ObDASScanIter *scan_iter = nullptr;
const bool need_rewind = true;
const bool need_distinct = false;
if (OB_FAIL(ObDASUtils::find_target_das_def(
rowkey_scan_ctdef,
rowkey_scan_rtdef,
ObDASOpType::DAS_OP_TABLE_SCAN,
scan_ctdef,
scan_rtdef))) {
LOG_WARN("fail to find scan definition", K(ret));
} else {
const ObDASScanCtDef *ctdef = static_cast<const ObDASScanCtDef*>(scan_ctdef);
ObDASScanRtDef *rtdef = static_cast<ObDASScanRtDef*>(scan_rtdef);
iter_param.scan_ctdef_ = ctdef;
iter_param.max_size_ = rtdef->eval_ctx_->is_vectorized() ? rtdef->eval_ctx_->max_batch_size_ : 1;
iter_param.eval_ctx_ = rtdef->eval_ctx_;
iter_param.exec_ctx_ = &rtdef->eval_ctx_->exec_ctx_;
iter_param.output_ = &ctdef->result_output_;
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(create_das_iter(alloc, iter_param, scan_iter))) {
LOG_WARN("failed to create data table lookup scan iter", K(ret));
} else if (FALSE_IT(scan_iter->set_scan_param(scan_param))) {
} else if (FALSE_IT(sort_ctdef = static_cast<const ObDASSortCtDef *>(rowkey_scan_ctdef))) {
} else if (FALSE_IT(sort_rtdef = static_cast<ObDASSortRtDef *>(rowkey_scan_rtdef))) {
} else if (OB_FAIL(create_sort_sub_tree(
alloc, sort_ctdef, sort_rtdef, need_rewind, need_distinct, scan_iter, sort_result))) {
LOG_WARN("failed to create sort sub tree", K(ret));
} else {
rowkey_scan_iter = sort_result;
rowkey_scan_ouput_exprs = &scan_ctdef->pd_expr_spec_.access_exprs_;
}
}
// check exprs
docid_lookup_rowkey_exprs = &static_cast<const ObDASScanCtDef *>(func_lookup_ctdef->get_doc_id_lookup_scan_ctdef())->rowkey_exprs_;
bool find = false;
for (int i = 0; OB_SUCC(ret) && i < docid_lookup_rowkey_exprs->count(); i++) {
for (int j = 0; OB_SUCC(ret) && !find && j < rowkey_scan_ouput_exprs->count(); j++) {
if (rowkey_scan_ouput_exprs->at(j) == docid_lookup_rowkey_exprs->at(i)) {
find = true;
}
}
if (OB_UNLIKELY(!find)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, rowkey scan output exprs count not equal to docid lookup rowkey exprs count", K(ret));
} else {
find = false;
}
}
if (OB_SUCC(ret) && func_lookup_ctdef->has_main_table_lookup()) {
find = false;
main_lookup_rowkey_exprs = &static_cast<const ObDASScanCtDef *>(func_lookup_ctdef->get_main_lookup_scan_ctdef())->rowkey_exprs_;
for (int i = 0; OB_SUCC(ret) && i < main_lookup_rowkey_exprs->count(); i++) {
for (int j = 0; OB_SUCC(ret) && !find && j < rowkey_scan_ouput_exprs->count(); j++) {
if (rowkey_scan_ouput_exprs->at(j) == main_lookup_rowkey_exprs->at(i)) {
find = true;
}
}
if (OB_UNLIKELY(!find)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, rowkey scan output exprs count not equal to docid lookup rowkey exprs count", K(ret));
} else {
find = false;
}
}
}
ObDASIter *func_lookup_result = nullptr;
ObDASCacheLookupIter *root_lookup_iter = nullptr;
if (FAILEDx(create_functional_lookup_sub_tree(
scan_param,
scan_param.ls_id_,
alloc,
func_lookup_ctdef,
func_lookup_rtdef,
related_tablet_ids,
true,
trans_desc,
snapshot,
func_lookup_result))) {
LOG_WARN("failed to create domain index lookup iters", K(ret));
} else {
ObDASCacheLookupIterParam root_lookup_param;
root_lookup_param.max_size_ = idx_proj_lookup_rtdef->eval_ctx_->is_vectorized()
? idx_proj_lookup_rtdef->get_rowkey_scan_rtdef()->eval_ctx_->max_batch_size_ : 1;
root_lookup_param.eval_ctx_ = idx_proj_lookup_rtdef->eval_ctx_;
root_lookup_param.exec_ctx_ = &idx_proj_lookup_rtdef->eval_ctx_->exec_ctx_;
root_lookup_param.output_ = &idx_proj_lookup_ctdef->result_output_;
root_lookup_param.default_batch_row_count_ = root_lookup_param.max_size_;
root_lookup_param.index_ctdef_ = idx_proj_lookup_ctdef->get_rowkey_scan_ctdef();
root_lookup_param.index_rtdef_ = idx_proj_lookup_rtdef->get_rowkey_scan_rtdef();
root_lookup_param.lookup_ctdef_ = static_cast<const ObDASScanCtDef *>(func_lookup_ctdef->get_doc_id_lookup_scan_ctdef());
root_lookup_param.lookup_rtdef_ = static_cast<ObDASScanRtDef *>(func_lookup_rtdef->get_doc_id_lookup_scan_rtdef());
root_lookup_param.index_table_iter_ = rowkey_scan_iter;
root_lookup_param.data_table_iter_ = func_lookup_result;
root_lookup_param.trans_desc_ = trans_desc;
root_lookup_param.snapshot_ = snapshot;
root_lookup_param.rowkey_exprs_ = &static_cast<const ObDASScanCtDef *>(func_lookup_ctdef->get_doc_id_lookup_scan_ctdef())->rowkey_exprs_;
root_lookup_param.lookup_rtdef_->scan_flag_.scan_order_ = ObQueryFlag::KeepOrder;
if (OB_FAIL(create_das_iter(alloc, root_lookup_param, root_lookup_iter))) {
LOG_WARN("failed to create das iter", K(ret));
} else if (OB_FAIL(create_iter_children_array(2, alloc, root_lookup_iter))) {
LOG_WARN("failed to create iter children array", K(ret));
} else {
root_lookup_iter->get_children()[0] = rowkey_scan_iter;
root_lookup_iter->get_children()[1] = func_lookup_result;
static_cast<ObDASFuncLookupIter *>(func_lookup_result)->set_index_scan_param(root_lookup_iter->get_lookup_param());
root_lookup_iter->set_tablet_id(related_tablet_ids.rowkey_doc_tablet_id_);
root_lookup_iter->set_ls_id(scan_param.ls_id_);
iter_tree = root_lookup_iter;
}
}
return ret;
}
int ObDASIterUtils::create_functional_lookup_sub_tree(ObTableScanParam &scan_param,
const ObLSID &ls_id,
common::ObIAllocator &alloc,
const ObDASFuncLookupCtDef *func_lookup_ctdef,
ObDASFuncLookupRtDef *func_lookup_rtdef,
const ObDASRelatedTabletID &related_tablet_ids,
const bool &lookup_keep_order,
transaction::ObTxDesc *trans_desc,
transaction::ObTxReadSnapshot *snapshot,
ObDASIter *&fun_lookup_result)
{
int ret = OB_SUCCESS;
void *buf = nullptr;
ObDASIter **data_table_iters = nullptr;
ObDASScanIter *main_lookup_table_iter = nullptr;
ObDASFuncDataIter *fts_merge_iter = nullptr;
ObDASScanIter *rowkey_docid_iter = nullptr;
ObDASFuncLookupIter *func_lookup_iter = nullptr;
// ObDASCacheLookupIter *root_local_lookup_iter = nullptr;
const int64_t func_lookup_cnt = func_lookup_ctdef->func_lookup_cnt_;
const int64_t total_lookup_cnt = func_lookup_ctdef->has_main_table_lookup() ? func_lookup_cnt + 1 : func_lookup_cnt;
ObDASFuncDataIterParam fts_merge_iter_param;
if (OB_UNLIKELY(0 == func_lookup_cnt)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, func lookup count is 0", K(ret));
} else if (OB_ISNULL(buf = alloc.alloc(sizeof(ObDASIter *) * func_lookup_cnt))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to allocate enough memory", K(sizeof(ObDASIter *) * func_lookup_cnt), K(ret));
} else {
data_table_iters = static_cast<ObDASIter **>(buf);
for (int64_t i = 0; OB_SUCC(ret) && i < func_lookup_cnt; i++) {
data_table_iters[i] = nullptr;
ObDASFTSTabletID fts_tablet_ids;
fts_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[i].inv_idx_tablet_id_;
fts_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[i].fwd_idx_tablet_id_;
fts_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids.fts_tablet_ids_[i].doc_id_idx_tablet_id_;
if (OB_FAIL(create_functional_text_retrieval_sub_tree(scan_param.ls_id_,
alloc,
static_cast<const ObDASIRScanCtDef *>(func_lookup_ctdef->get_func_lookup_scan_ctdef(i)),
static_cast<ObDASIRScanRtDef *>(func_lookup_rtdef->get_func_lookup_scan_rtdef(i)),
fts_tablet_ids,
trans_desc,
snapshot,
data_table_iters[i]))) {
LOG_WARN("failed to create text retrieval sub tree", K(ret));
}
}
if (OB_SUCC(ret)) {
fts_merge_iter_param.tr_merge_iters_ = data_table_iters;
fts_merge_iter_param.iter_count_ = func_lookup_cnt;
fts_merge_iter_param.trans_desc_ = trans_desc;
fts_merge_iter_param.snapshot_ = snapshot;
if (func_lookup_ctdef->has_main_table_lookup()) {
ObDASScanIterParam main_table_param;
const ObDASScanCtDef *ctdef = static_cast<const ObDASScanCtDef *>(func_lookup_ctdef->get_main_lookup_scan_ctdef());
ObDASScanRtDef *rtdef = static_cast<ObDASScanRtDef *>(func_lookup_rtdef->get_main_lookup_scan_rtdef());
if (OB_ISNULL(ctdef) || OB_ISNULL(rtdef)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpeted error, ctdef or rtdef is nullptr", K(ret), KPC(ctdef), KPC(rtdef));
} else if (ObDASOpType::DAS_OP_TABLE_SCAN != ctdef->op_type_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpeted error, ctdef is not table scan", K(ret), K(ctdef->op_type_), K(ObDASOpType::DAS_OP_TABLE_SCAN));
} else {
main_table_param.scan_ctdef_ = ctdef;
main_table_param.max_size_ = rtdef->eval_ctx_->is_vectorized() ? rtdef->eval_ctx_->max_batch_size_ : 1;
main_table_param.eval_ctx_ = rtdef->eval_ctx_;
main_table_param.exec_ctx_ = &rtdef->eval_ctx_->exec_ctx_;
main_table_param.output_ = &ctdef->result_output_;
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(create_das_iter(alloc, main_table_param, main_lookup_table_iter))) {
LOG_WARN("failed to create data table lookup scan iter", K(ret));
} else {
if (lookup_keep_order) {
rtdef->scan_flag_.scan_order_ = ObQueryFlag::KeepOrder;
}
fts_merge_iter_param.main_lookup_ctdef_ = ctdef;
fts_merge_iter_param.main_lookup_rtdef_ = rtdef;
fts_merge_iter_param.main_lookup_iter_ = main_lookup_table_iter;
}
}
}
}
// create fts merge iter
if (OB_FAIL(ret)) {
} else if (OB_FAIL(create_das_iter(alloc, fts_merge_iter_param, fts_merge_iter))) {
LOG_WARN("failed to create fts merge iter", K(ret));
} else if (OB_FAIL(create_iter_children_array(total_lookup_cnt, alloc, fts_merge_iter))) {
LOG_WARN("failed to create iter children array", K(ret));
} else {
for (int64_t i = 0; i < func_lookup_cnt; ++i) {
fts_merge_iter->get_children()[i] = data_table_iters[i];
}
if (func_lookup_ctdef->has_main_table_lookup()) {
fts_merge_iter->get_children()[func_lookup_cnt] = main_lookup_table_iter;
main_lookup_table_iter->set_scan_param(fts_merge_iter->get_main_lookup_scan_param());
}
fts_merge_iter->set_tablet_id(related_tablet_ids.lookup_tablet_id_); // for main_lookup
fts_merge_iter->set_ls_id(ls_id);
}
// create function lookup iter
if (OB_SUCC(ret)) {
const ObDASBaseCtDef *rowkey_docid_ctdef = func_lookup_ctdef->get_doc_id_lookup_scan_ctdef();
ObDASBaseRtDef *rowkey_docid_rtdef = func_lookup_rtdef->get_doc_id_lookup_scan_rtdef();
ObDASScanIterParam rowkey_docid_param;
const ObDASScanCtDef *ctdef = static_cast<const ObDASScanCtDef *>(rowkey_docid_ctdef);
ObDASScanRtDef *rtdef = static_cast<ObDASScanRtDef*>(rowkey_docid_rtdef);
rowkey_docid_param.scan_ctdef_ = ctdef;
rowkey_docid_param.max_size_ = rtdef->eval_ctx_->is_vectorized() ? rtdef->eval_ctx_->max_batch_size_ : 1;
rowkey_docid_param.eval_ctx_ = rtdef->eval_ctx_;
rowkey_docid_param.exec_ctx_ = &rtdef->eval_ctx_->exec_ctx_;
rowkey_docid_param.output_ = &ctdef->result_output_;
if (OB_FAIL(create_das_iter(alloc, rowkey_docid_param, rowkey_docid_iter))) {
LOG_WARN("failed to create data table lookup scan iter", K(ret));
} else {
ObDASFuncLookupIterParam func_lookup_param;
func_lookup_param.max_size_ = func_lookup_rtdef->eval_ctx_->is_vectorized() ? func_lookup_rtdef->eval_ctx_->max_batch_size_ : 1;
func_lookup_param.eval_ctx_ = func_lookup_rtdef->eval_ctx_;
func_lookup_param.exec_ctx_ = &func_lookup_rtdef->eval_ctx_->exec_ctx_;
func_lookup_param.output_ = &func_lookup_ctdef->result_output_;
func_lookup_param.default_batch_row_count_ = func_lookup_param.max_size_;
func_lookup_param.index_ctdef_ = rowkey_docid_ctdef;
func_lookup_param.index_rtdef_ = rowkey_docid_rtdef;
func_lookup_param.lookup_ctdef_ = nullptr;
func_lookup_param.lookup_rtdef_ = nullptr;
func_lookup_param.index_table_iter_ = rowkey_docid_iter;
func_lookup_param.data_table_iter_ = fts_merge_iter;
func_lookup_param.trans_desc_ = trans_desc;
func_lookup_param.snapshot_ = snapshot;
func_lookup_param.doc_id_expr_ = func_lookup_ctdef->lookup_doc_id_expr_;
if (lookup_keep_order) {
static_cast<ObDASScanRtDef *>(func_lookup_param.index_rtdef_)->scan_flag_.scan_order_ = ObQueryFlag::KeepOrder;
}
if (OB_FAIL(create_das_iter(alloc, func_lookup_param, func_lookup_iter))) {
LOG_WARN("failed to create doc id lookup iter", K(ret));
} else if (OB_FAIL(create_iter_children_array(2, alloc, func_lookup_iter))) {
LOG_WARN("failed to create iter children array", K(ret));
} else {
func_lookup_iter->get_children()[0] = rowkey_docid_iter;
func_lookup_iter->get_children()[1] = fts_merge_iter;
}
}
}
if (OB_SUCC(ret)) {
fun_lookup_result = func_lookup_iter;
}
return ret;
}
/* local_lookup
* | |
@ -1660,5 +2462,6 @@ int ObDASIterUtils::create_iter_children_array(const int64_t children_cnt,
}
return ret;
}
} // namespace sql
} // namespace oceanbase

View File

@ -26,6 +26,9 @@
#include "sql/das/iter/ob_das_doc_id_merge_iter.h"
#include "sql/das/iter/ob_das_vid_merge_iter.h"
#include "sql/das/iter/ob_das_index_merge_iter.h"
#include "sql/das/iter/ob_das_func_data_iter.h"
#include "sql/das/iter/ob_das_functional_lookup_iter.h"
#include "sql/das/iter/ob_das_cache_lookup_iter.h"
#include "sql/engine/table/ob_table_scan_op.h"
#include "sql/das/iter/ob_das_mvi_lookup_iter.h"
@ -76,6 +79,11 @@ public:
const ObDASRelatedTabletID &related_tablet_ids,
const ObLSID &ls_id,
ObDASIter *root_iter);
static int set_func_lookup_iter_related_ids(const ObDASBaseCtDef *attach_ctdef,
const ObDASRelatedTabletID &related_tablet_ids,
const ObLSID &ls_id,
int64_t flag,
ObDASIter *root_iter);
static int set_index_merge_related_ids(const ObDASBaseCtDef *attach_ctdef,
const ObDASRelatedTabletID &related_tablet_ids,
@ -122,6 +130,15 @@ private:
transaction::ObTxReadSnapshot *snapshot,
ObDASIter *&iter_tree);
static int create_function_lookup_tree(ObTableScanParam &scan_param,
common::ObIAllocator &alloc,
const ObDASBaseCtDef *attach_ctdef,
ObDASBaseRtDef *attach_rtdef,
const ObDASRelatedTabletID &related_tablet_ids,
transaction::ObTxDesc *trans_desc,
transaction::ObTxReadSnapshot *snapshot,
ObDASIter *&iter_tree);
static int create_doc_id_scan_sub_tree(ObTableScanParam &scan_param,
common::ObIAllocator &alloc,
const ObDASDocIdMergeCtDef *merge_ctdef,
@ -175,7 +192,16 @@ private:
common::ObIAllocator &alloc,
const ObDASIRScanCtDef *ir_scan_ctdef,
ObDASIRScanRtDef *ir_scan_rtdef,
const ObDASRelatedTabletID &related_tablet_ids,
const ObDASFTSTabletID &related_tablet_ids,
transaction::ObTxDesc *trans_desc,
transaction::ObTxReadSnapshot *snapshot,
ObDASIter *&retrieval_result);
static int create_functional_text_retrieval_sub_tree(const ObLSID &ls_id,
common::ObIAllocator &alloc,
const ObDASIRScanCtDef *ir_scan_ctdef,
ObDASIRScanRtDef *ir_scan_rtdef,
const ObDASFTSTabletID &related_tablet_ids,
transaction::ObTxDesc *trans_desc,
transaction::ObTxReadSnapshot *snapshot,
ObDASIter *&retrieval_result);
@ -223,6 +249,16 @@ private:
transaction::ObTxDesc *tx_desc,
transaction::ObTxReadSnapshot *snapshot,
ObDASIter *&iter);
static int create_functional_lookup_sub_tree(ObTableScanParam &scan_param,
const ObLSID &ls_id,
common::ObIAllocator &alloc,
const ObDASFuncLookupCtDef *table_lookup_ctdef,
ObDASFuncLookupRtDef *table_lookup_rtdef,
const ObDASRelatedTabletID &related_tablet_ids,
const bool &lookup_keep_order,
transaction::ObTxDesc *trans_desc,
transaction::ObTxReadSnapshot *snapshot,
ObDASIter *&fun_lookup_result);
static int create_iter_children_array(const int64_t children_cnt,
common::ObIAllocator &alloc,

View File

@ -15,6 +15,7 @@
#include "sql/das/iter/ob_das_scan_iter.h"
#include "sql/das/iter/ob_das_doc_id_merge_iter.h"
#include "sql/das/iter/ob_das_vid_merge_iter.h"
#include "sql/das/iter/ob_das_functional_lookup_iter.h"
#include "sql/das/ob_das_scan_op.h"
#include "sql/das/ob_das_ir_define.h"
#include "sql/das/ob_das_vec_define.h"
@ -177,13 +178,17 @@ void ObDASLocalLookupIter::reset_lookup_state()
int ObDASLocalLookupIter::add_rowkey()
{
int ret = OB_SUCCESS;
OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN || data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE
|| data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE);
OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN ||
data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE ||
data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE ||
data_table_iter_->get_type() == DAS_ITER_FUNC_LOOKUP);
ObDASScanIter *scan_iter = nullptr;
if (data_table_iter_->get_type() == DAS_ITER_SCAN) {
scan_iter = static_cast<ObDASScanIter *>(data_table_iter_);
} else if (data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE) {
scan_iter = static_cast<ObDASDocIdMergeIter *>(data_table_iter_)->get_data_table_iter();
} else if (data_table_iter_->get_type() == DAS_ITER_FUNC_LOOKUP) {
scan_iter = static_cast<ObDASFuncLookupIter *>(data_table_iter_)->get_index_scan_iter();
} else if (data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE) {
scan_iter = static_cast<ObDASVIdMergeIter *>(data_table_iter_)->get_data_table_iter();
}
@ -207,6 +212,8 @@ int ObDASLocalLookupIter::add_rowkey()
LOG_WARN("failed to push back trans info array", K(ret), KPC(datum_ptr));
}
}
} else if (DAS_ITER_FUNC_LOOKUP == data_table_iter_->get_type()) {
group_id = static_cast<ObDASFuncLookupIter *>(data_table_iter_)->get_group_id();
}
int64_t group_idx = ObNewRange::get_group_idx(group_id);
@ -256,8 +263,10 @@ int ObDASLocalLookupIter::add_rowkeys(int64_t count)
int ObDASLocalLookupIter::do_index_lookup()
{
int ret = OB_SUCCESS;
OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN || data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE
|| data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE);
OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN ||
data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE ||
data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE ||
data_table_iter_->get_type() == DAS_ITER_FUNC_LOOKUP);
if (is_first_lookup_) {
is_first_lookup_ = false;
if (OB_FAIL(init_scan_param(lookup_param_, lookup_ctdef_, lookup_rtdef_))) {
@ -285,13 +294,17 @@ int ObDASLocalLookupIter::do_index_lookup()
int ObDASLocalLookupIter::check_index_lookup()
{
int ret = OB_SUCCESS;
OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN || data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE
|| data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE);
OB_ASSERT(data_table_iter_->get_type() == DAS_ITER_SCAN ||
data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE ||
data_table_iter_->get_type() == DAS_ITER_VEC_VID_MERGE ||
data_table_iter_->get_type() == DAS_ITER_FUNC_LOOKUP);
ObDASScanIter *scan_iter = nullptr;
if (data_table_iter_->get_type() == DAS_ITER_SCAN) {
scan_iter = static_cast<ObDASScanIter*>(data_table_iter_);
} else if (data_table_iter_->get_type() == DAS_ITER_DOC_ID_MERGE) {
scan_iter = static_cast<ObDASDocIdMergeIter *>(data_table_iter_)->get_data_table_iter();
} else if (data_table_iter_->get_type() == DAS_ITER_FUNC_LOOKUP) {
scan_iter = static_cast<ObDASFuncLookupIter *>(data_table_iter_)->get_index_scan_iter();
} else {
scan_iter = static_cast<ObDASVIdMergeIter *>(data_table_iter_)->get_data_table_iter();
}
@ -336,8 +349,10 @@ int ObDASLocalLookupIter::check_index_lookup()
int ObDASLocalLookupIter::init_rowkey_exprs_for_compat()
{
int ret = OB_SUCCESS;
if (ObDASOpType::DAS_OP_TABLE_SCAN == index_ctdef_->op_type_
|| ObDASOpType::DAS_OP_IR_AUX_LOOKUP == index_ctdef_->op_type_) {
if (ObDASOpType::DAS_OP_IR_AUX_LOOKUP == index_ctdef_->op_type_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected ir aux lookup iter", K(ret));
} else if (ObDASOpType::DAS_OP_TABLE_SCAN == index_ctdef_->op_type_) {
const ObDASScanCtDef *scan_ctdef = static_cast<const ObDASScanCtDef*>(index_ctdef_);
int64_t rowkey_cnt = scan_ctdef->result_output_.count();
if (nullptr != scan_ctdef->group_id_expr_) {

View File

@ -40,6 +40,7 @@ public:
class ObDASScanCtDef;
class ObDASScanRtDef;
class ObDASFuncLookupIter;
class ObDASLocalLookupIter : public ObDASLookupIter
{
public:
@ -66,16 +67,16 @@ protected:
virtual int inner_release() override;
virtual int do_table_scan() override;
virtual int rescan() override;
virtual void reset_lookup_state();
virtual void reset_lookup_state() override;
virtual int add_rowkey() override;
virtual int add_rowkeys(int64_t count) override;
virtual int do_index_lookup() override;
virtual int check_index_lookup() override;
private:
protected:
int init_rowkey_exprs_for_compat();
private:
protected:
ObSEArray<ObDatum *, 4> trans_info_array_;
// Local lookup das task could rescan multiple times during execution, lookup_tablet_id_ and
// lookup_ls_id_ store the lookup parameter for this time.

View File

@ -120,8 +120,6 @@ protected:
int build_lookup_range(ObNewRange &range);
int build_trans_info_datum(const ObExpr *trans_info_expr, ObDatum *&datum_ptr);
common::ObArenaAllocator &get_arena_allocator() { return lookup_memctx_->get_arena_allocator(); }
private:
lib::MemoryContext lookup_memctx_;
};

View File

@ -47,6 +47,7 @@ ObDASTextRetrievalIter::ObDASTextRetrievalIter()
need_fwd_idx_agg_(false),
need_inv_idx_agg_(false),
inv_idx_agg_evaluated_(false),
need_inv_idx_agg_reset_(false),
not_first_fwd_agg_(false),
is_inited_(false)
{
@ -56,20 +57,104 @@ int ObDASTextRetrievalIter::set_query_token(const ObString &query_token)
{
int ret = OB_SUCCESS;
ObNewRange inv_idx_scan_range;
if (OB_FAIL(check_inv_idx_scan_and_agg_param())) {
LOG_WARN("failed to check inv idx scan or agg param", K(ret));
} else {
const ExprFixedArray *exprs = &(ir_ctdef_->get_inv_idx_scan_ctdef()->pd_expr_spec_.access_exprs_);
int64 group_id = 0;
for (int64_t i = 0; i < exprs->count(); ++i) {
if (T_PSEUDO_GROUP_ID == exprs->at(i)->type_) {
group_id = exprs->at(i)->locate_expr_datum(*eval_ctx_).get_int();
}
}
int64_t group_idx = ObNewRange::get_group_idx(group_id);
if (OB_FAIL(gen_default_inv_idx_scan_range(query_token, inv_idx_scan_range))) {
LOG_WARN("failed to generate inverted index scan range", K(ret), K(query_token));
} else if (need_inv_idx_agg_ && OB_FAIL(add_agg_rang_key(inv_idx_scan_range))) {
LOG_WARN("failed to add scan range for inv idx agg", K(ret));
} else if (FALSE_IT(inv_idx_scan_range.group_idx_ = group_idx)) {
} else if (OB_FAIL(add_rowkey_range_key(inv_idx_scan_range))) {
LOG_WARN("failed to add scan range for inv idx scan", K(ret));
}
}
return ret;
}
int ObDASTextRetrievalIter::set_query_token_and_rangekey(const ObString &query_token, const common::ObIArray<ObDocId> &doc_id, const int64_t &batch_size)
{
int ret = OB_SUCCESS;
ObNewRange inv_idx_scan_range;
ObNewRange inv_idx_agg_scan_range;
if (OB_FAIL(check_inv_idx_scan_and_agg_param())) {
LOG_WARN("failed to check inv idx scan or agg param", K(ret));
} else {
const ExprFixedArray *exprs = &(ir_ctdef_->get_inv_idx_scan_ctdef()->pd_expr_spec_.access_exprs_);
int64 group_id = 0;
for (int64_t i = 0; OB_SUCC(ret) && i < exprs->count(); ++i) {
if (T_PSEUDO_GROUP_ID == exprs->at(i)->type_) {
group_id = exprs->at(i)->locate_expr_datum(*eval_ctx_).get_int();
}
}
int64_t group_idx = ObNewRange::get_group_idx(group_id);
for (int64_t i = 0; OB_SUCC(ret) && i < batch_size; ++i) {
if (OB_FAIL(gen_inv_idx_scan_range(query_token, doc_id.at(i), inv_idx_scan_range))) {
LOG_WARN("failed to build inverted index scan range", K(ret), K(query_token), K(doc_id.at(i)));
} else if (FALSE_IT(inv_idx_scan_range.group_idx_ = group_idx)) {
} else if (OB_FAIL(add_rowkey_range_key(inv_idx_scan_range))) {
LOG_WARN("failed to add scan range for inv idx scan", K(ret));
}
}
if (OB_SUCC(ret) && need_inv_idx_agg_ && (!inv_idx_agg_evaluated_ || need_inv_idx_agg_reset_)) {
if (OB_FAIL(gen_default_inv_idx_scan_range(query_token, inv_idx_agg_scan_range))) {
LOG_WARN("failed to generate inverted index scan range", K(ret), K(query_token));
} else if (OB_FAIL(add_agg_rang_key(inv_idx_agg_scan_range))) {
LOG_WARN("failed to add scan range for inv idx agg", K(ret));
}
}
}
return ret;
}
int ObDASTextRetrievalIter::check_inv_idx_scan_and_agg_param()
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("text retrieval iter not inited", K(ret));
} else if (OB_UNLIKELY(!inv_idx_scan_param_.key_ranges_.empty() ||
(need_inv_idx_agg_ && !inv_idx_agg_param_.key_ranges_.empty()))) {
} else if (OB_UNLIKELY(!need_inv_idx_agg_reset_ && need_fwd_idx_agg_)) {
// TODO: try to support the case @zyx439997
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected set query token with not null query range", K(ret), K(query_token),
K(inv_idx_scan_param_.key_ranges_), K_(need_inv_idx_agg), K(inv_idx_agg_param_.key_ranges_));
} else if (OB_FAIL(gen_inv_idx_scan_range(query_token, inv_idx_scan_range))) {
LOG_WARN("failed to generate inverted index scan range", K(ret), K(query_token));
} else if (OB_FAIL(inv_idx_scan_param_.key_ranges_.push_back(inv_idx_scan_range))) {
LOG_WARN("failed to add scan range for inv idx scan", K(ret));
} else if (need_inv_idx_agg_ && OB_FAIL(inv_idx_agg_param_.key_ranges_.push_back(inv_idx_scan_range))) {
LOG_WARN("failed to add scan range for inv idx agg", K(ret));
LOG_WARN("unexpected empty query range", K(ret), K(inv_idx_scan_param_.key_ranges_));
} else if (OB_UNLIKELY(!inv_idx_scan_param_.key_ranges_.empty())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected empty query range", K(ret), K(inv_idx_scan_param_.key_ranges_));
} else if (need_inv_idx_agg_) {
if (OB_UNLIKELY(!inv_idx_agg_param_.key_ranges_.empty())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected empty query range", K(ret), K_(need_inv_idx_agg), K_(inv_idx_agg_evaluated), K(inv_idx_agg_param_.key_ranges_));
}
}
return ret;
}
int ObDASTextRetrievalIter::add_agg_rang_key(const ObNewRange &range)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!need_inv_idx_agg_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected empty query range", K(ret), KPC(&range));
} else if (OB_FAIL(inv_idx_agg_param_.key_ranges_.push_back(range))) {
LOG_WARN("failed to push back lookup range", K(ret));
}
return ret;
}
int ObDASTextRetrievalIter::add_rowkey_range_key(const ObNewRange &range)
{
int ret = OB_SUCCESS;
if (OB_FAIL(inv_idx_scan_param_.key_ranges_.push_back(range))) {
LOG_WARN("failed to push back lookup range", K(ret));
}
return ret;
}
@ -92,6 +177,7 @@ int ObDASTextRetrievalIter::inner_init(ObDASIterParam &param)
snapshot_ = retrieval_param.snapshot_;
need_fwd_idx_agg_ = ir_ctdef_->need_fwd_idx_agg();
need_inv_idx_agg_ = ir_ctdef_->need_inv_idx_agg();
need_inv_idx_agg_reset_ = retrieval_param.need_inv_idx_agg_reset_;
max_batch_size_ = ir_rtdef_->eval_ctx_->max_batch_size_;
if (need_inv_idx_agg_) {
@ -134,8 +220,7 @@ int ObDASTextRetrievalIter::inner_reuse()
if (nullptr != mem_context_) {
mem_context_->reset_remain_one_page();
}
inv_idx_agg_evaluated_ = false;
token_doc_cnt_ = 0;
int64_t old_default_size = OB_MAX(max_batch_size_, 1);
max_batch_size_ = ir_rtdef_->eval_ctx_->max_batch_size_;
if (old_default_size < OB_MAX(max_batch_size_, 1)) {
@ -168,9 +253,18 @@ int ObDASTextRetrievalIter::inner_reuse()
if (!inv_idx_agg_param_.key_ranges_.empty()) {
inv_idx_agg_param_.key_ranges_.reuse();
}
if (OB_FAIL(inverted_idx_agg_iter_->reuse())) {
LOG_WARN("failed to reuse inverted index agg iter", K(ret));
if (!inv_idx_agg_evaluated_ ||
need_inv_idx_agg_reset_ ||
inv_idx_agg_param_.need_switch_param_) {
if (OB_FAIL(inverted_idx_agg_iter_->reuse())) {
LOG_WARN("failed to reuse inverted index agg iter", K(ret));
}
inv_idx_agg_evaluated_ = false;
token_doc_cnt_ = 0;
}
} else {
inv_idx_agg_evaluated_ = false;
token_doc_cnt_ = 0;
}
if (OB_SUCC(ret) && need_fwd_idx_agg_) {
@ -221,6 +315,7 @@ int ObDASTextRetrievalIter::inner_release()
need_fwd_idx_agg_ = false;
need_inv_idx_agg_ = false;
inv_idx_agg_evaluated_ = false;
need_inv_idx_agg_reset_ = false;
not_first_fwd_agg_ = false;
is_inited_ = false;
return ret;
@ -256,7 +351,9 @@ int ObDASTextRetrievalIter::rescan()
}
if (OB_FAIL(inverted_idx_scan_iter_->rescan())) {
LOG_WARN("failed to rescan inverted scan iter", K(ret));
} else if (need_inv_idx_agg_ && OB_FAIL(inverted_idx_agg_iter_->rescan())) {
} else if (need_inv_idx_agg_ &&
!inv_idx_agg_evaluated_ &&
OB_FAIL(inverted_idx_agg_iter_->rescan())) {
LOG_WARN("failed to rescan inverted index agg iter", K(ret));
} else {
int64_t cnt = inv_idx_scan_param_.output_exprs_->count();
@ -317,6 +414,8 @@ int ObDASTextRetrievalIter::inner_get_next_rows(int64_t &count, int64_t capacity
if (OB_FAIL(do_doc_cnt_agg())) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("Fail to do document count aggregation", K(ret), K_(inv_idx_agg_param));
} else {
inv_idx_agg_evaluated_ = true;
}
} else {
inv_idx_agg_evaluated_ = true;
@ -354,31 +453,6 @@ int ObDASTextRetrievalIter::inner_get_next_rows(int64_t &count, int64_t capacity
return ret;
}
int ObDASTextRetrievalIter::get_next_row_inner()
{
int ret = OB_SUCCESS;
if (OB_FAIL(inverted_idx_scan_iter_->get_next_row())) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("failed to get next row from inverted index", K(ret), K_(inv_idx_scan_param), KPC_(inverted_idx_scan_iter));
}
} else {
LOG_DEBUG("get one invert index scan row", "row",
ROWEXPR2STR(*ir_rtdef_->get_inv_idx_scan_rtdef()->eval_ctx_,
*inv_idx_scan_param_.output_exprs_));
if (ir_ctdef_->need_calc_relevance()) {
clear_row_wise_evaluated_flag();
if (OB_FAIL(get_next_doc_token_cnt(need_fwd_idx_agg_))) {
LOG_WARN("failed to get next doc token count", K(ret));
} else if (OB_FAIL(fill_token_doc_cnt())) {
LOG_WARN("failed to get token doc cnt", K(ret));
} else if (OB_FAIL(project_relevance_expr())) {
LOG_WARN("failed to evaluate simarity expr", K(ret));
}
}
}
return ret;
}
int ObDASTextRetrievalIter::init_inv_idx_scan_param()
{
int ret = OB_SUCCESS;
@ -402,6 +476,9 @@ int ObDASTextRetrievalIter::init_inv_idx_scan_param()
inv_idx_agg_param_))) {
LOG_WARN("fail to init inverted index count aggregate param", K(ret), KPC_(ir_ctdef));
} else {
// for some cases, the default scan_order_ may be the 'Reverse'.
inv_idx_scan_param_.scan_flag_.scan_order_ = ObQueryFlag::Forward;
if (OB_UNLIKELY(!static_cast<sql::ObStoragePushdownFlag>(
ir_ctdef_->get_inv_idx_agg_ctdef()->pd_expr_spec_.pd_storage_flag_).is_aggregate_pushdown())) {
ret = OB_NOT_IMPLEMENT;
@ -516,6 +593,8 @@ int ObDASTextRetrievalIter::do_doc_cnt_agg()
const sql::ObExpr *inv_idx_agg_expr = inv_idx_agg_param_.aggregate_exprs_->at(0);
sql::ObEvalCtx *eval_ctx = ir_rtdef_->get_inv_idx_agg_rtdef()->eval_ctx_;
ObDatum *doc_cnt_datum = nullptr;
ObEvalCtx::BatchInfoScopeGuard guard(*eval_ctx);
guard.set_batch_idx(0);
if (OB_FAIL(inv_idx_agg_expr->eval(*eval_ctx, doc_cnt_datum))) {
LOG_WARN("failed to evaluate aggregated expr", K(ret));
} else {
@ -624,7 +703,7 @@ int ObDASTextRetrievalIter::reuse_fwd_idx_iter()
return ret;
}
int ObDASTextRetrievalIter::gen_inv_idx_scan_range(const ObString &query_token, ObNewRange &scan_range)
int ObDASTextRetrievalIter::gen_default_inv_idx_scan_range(const ObString &query_token, ObNewRange &scan_range)
{
int ret = OB_SUCCESS;
void *buf = nullptr;
@ -661,6 +740,37 @@ int ObDASTextRetrievalIter::gen_inv_idx_scan_range(const ObString &query_token,
return ret;
}
int ObDASTextRetrievalIter::gen_inv_idx_scan_range(const ObString &query_token, const ObDocId &doc_id, ObNewRange &scan_range)
{
int ret = OB_SUCCESS;
void *buf = nullptr;
ObObj *obj_ptr = nullptr;
common::ObArenaAllocator &ctx_alloc = mem_context_->get_arena_allocator();
constexpr int64_t obj_cnt = INV_IDX_ROWKEY_COL_CNT;
ObObj tmp_obj;
tmp_obj.set_string(ObVarcharType, query_token);
// We need to ensure collation type / level between query text and token column is compatible
tmp_obj.set_meta_type(ir_ctdef_->search_text_->obj_meta_);
if (OB_ISNULL(buf = ctx_alloc.alloc(sizeof(ObObj) * obj_cnt))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to allocate memory for rowkey obj", K(ret));
} else if (OB_ISNULL(obj_ptr = new (buf) ObObj[obj_cnt])) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr", K(ret));
} else if (OB_FAIL(ob_write_obj(ctx_alloc, tmp_obj, obj_ptr[0]))) {
LOG_WARN("failed to write obj", K(ret));
} else {
obj_ptr[1].set_varbinary(doc_id.get_string());
ObRowkey row_key(obj_ptr, obj_cnt);
common::ObTableID inv_table_id = ir_ctdef_->get_inv_idx_scan_ctdef()->ref_table_id_;
if (OB_FAIL(scan_range.build_range(inv_table_id, row_key))) {
LOG_WARN("failed to build lookup range", K(ret), K(inv_table_id), K(row_key));
}
}
return ret;
}
int ObDASTextRetrievalIter::gen_fwd_idx_scan_range(const ObDocId &doc_id, ObNewRange &scan_range)
{
int ret = OB_SUCCESS;
@ -768,6 +878,8 @@ int ObDASTextRetrievalIter::fill_token_doc_cnt()
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null expr", K(ret), KP(inv_idx_agg_expr), KP(eval_ctx));
} else {
ObEvalCtx::BatchInfoScopeGuard guard(*eval_ctx);
guard.set_batch_idx(0);
ObDatum &doc_cnt_datum = inv_idx_agg_expr->locate_datum_for_write(*eval_ctx);
doc_cnt_datum.set_int(token_doc_cnt_);
}
@ -946,6 +1058,8 @@ int ObDASTRCacheIter::inner_get_next_rows(int64_t &count, int64_t capacity)
if (OB_FAIL(do_doc_cnt_agg())) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("Fail to do document count aggregation", K(ret), K_(inv_idx_agg_param));
} else {
inv_idx_agg_evaluated_ = true;
}
} else {
inv_idx_agg_evaluated_ = true;

View File

@ -34,7 +34,8 @@ public:
inv_idx_agg_iter_(nullptr),
fwd_idx_iter_(nullptr),
tx_desc_(nullptr),
snapshot_(nullptr)
snapshot_(nullptr),
need_inv_idx_agg_reset_(true)
{}
virtual bool is_valid() const override
@ -49,6 +50,7 @@ public:
ObDASIter *fwd_idx_iter_;
transaction::ObTxDesc *tx_desc_;
transaction::ObTxReadSnapshot *snapshot_;
bool need_inv_idx_agg_reset_;
};
// single token
@ -61,6 +63,7 @@ public:
virtual int rescan() override;
int set_query_token(const ObString &query_token);
int set_query_token_and_rangekey(const ObString &query_token, const common::ObIArray<ObDocId> &doc_id, const int64_t &batch_size);
void set_ls_tablet_ids(
const share::ObLSID &ls_id,
const ObTabletID &inv_tablet_id,
@ -100,7 +103,9 @@ protected:
int project_relevance_expr();
int batch_project_relevance_expr(const int64_t &count);
int reuse_fwd_idx_iter();
int gen_inv_idx_scan_range(const ObString &query_token, ObNewRange &scan_range);
int gen_default_inv_idx_scan_range(const ObString &query_token, ObNewRange &scan_range);
int gen_inv_idx_scan_range(const ObString &query_token, const ObDocId &doc_id, ObNewRange &scan_range);
int gen_fwd_idx_scan_range(const ObDocId &doc_id, ObNewRange &scan_range);
inline bool need_calc_relevance() { return true; } // TODO: reduce tsc ops if no need to calc relevance
int init_calc_exprs();
@ -118,6 +123,10 @@ protected:
}
return ret;
}
int add_rowkey_range_key(const ObNewRange &range);
int add_agg_rang_key(const ObNewRange &range);
int check_inv_idx_scan_and_agg_param();
protected:
static const int64_t FWD_IDX_ROWKEY_COL_CNT = 2;
static const int64_t INV_IDX_ROWKEY_COL_CNT = 2;
@ -146,6 +155,7 @@ protected:
bool need_fwd_idx_agg_;
bool need_inv_idx_agg_;
bool inv_idx_agg_evaluated_;
bool need_inv_idx_agg_reset_;
bool not_first_fwd_agg_;
bool is_inited_;
};

View File

@ -94,6 +94,7 @@ ObDASTextRetrievalMergeIter::ObDASTextRetrievalMergeIter()
limit_param_(),
input_row_cnt_(0),
output_row_cnt_(0),
force_return_docid_(false),
doc_cnt_calculated_(false),
doc_cnt_iter_acquired_(false),
is_inited_(false)
@ -115,24 +116,20 @@ int ObDASTextRetrievalMergeIter::rescan()
{
int ret = OB_SUCCESS;
if (0 == query_tokens_.count()) {
} else if (nullptr != whole_doc_cnt_iter_ && OB_FAIL(whole_doc_cnt_iter_->rescan())) {
} else if (nullptr != whole_doc_cnt_iter_ &&
(!force_return_docid_ || whole_doc_agg_param_.need_switch_param_) &&
OB_FAIL(whole_doc_cnt_iter_->rescan())) { // for force_return_docid_ mdoe, we just read the cnt once.
LOG_WARN("failed to rescan doc count iter", K(ret));
} else {
next_written_idx_ = 0;
limit_param_ = ir_rtdef_->get_inv_idx_scan_rtdef()->limit_param_;
int64_t size = ir_ctdef_->inv_scan_doc_id_col_->is_batch_result() ? ir_rtdef_->eval_ctx_->max_batch_size_ : 1;
if (OB_FAIL(cache_doc_ids_.init(size))) {
LOG_WARN("failed to init cache_doc_ids_ array", K(ret));
} else if (OB_FAIL(cache_doc_ids_.prepare_allocate(size))) {
LOG_WARN("failed to prepare allocate cache_doc_ids_ array", K(ret));
}
}
return ret;
}
int ObDASTextRetrievalMergeIter::set_related_tablet_ids(
const ObLSID &ls_id,
const ObDASRelatedTabletID &related_tablet_ids)
const ObDASFTSTabletID &related_tablet_ids)
{
int ret = OB_SUCCESS;
ls_id_ = ls_id;
@ -256,6 +253,7 @@ int ObDASTextRetrievalMergeIter::inner_init(ObDASIterParam &param)
snapshot_ = retrieval_param.snapshot_;
relation_type_ = TokenRelationType::DISJUNCTIVE;
force_return_docid_ = retrieval_param.force_return_docid_; // from param
if (OB_ISNULL(mem_context_)) {
lib::ContextParam param;
@ -280,6 +278,25 @@ int ObDASTextRetrievalMergeIter::inner_init(ObDASIterParam &param)
} else {
limit_param_ = ir_rtdef_->get_inv_idx_scan_rtdef()->limit_param_;
}
if (OB_FAIL(ret)) {
} else if (force_return_docid_) {
if (FALSE_IT(hints_.set_allocator(&mem_context_->get_arena_allocator()))) {
} else if (FALSE_IT(relevances_.set_allocator(&mem_context_->get_arena_allocator()))) {
} else if (FALSE_IT(reverse_hints_.set_allocator(&mem_context_->get_arena_allocator()))) {
} else if (OB_FAIL(hints_.init(size))) {
LOG_WARN("failed to init hints array", K(ret));
} else if (OB_FAIL(hints_.prepare_allocate(size))) {
LOG_WARN("failed to prepare allocate hints array", K(ret));
} else if (OB_FAIL(relevances_.init(size))) {
LOG_WARN("failed to init relevances array", K(ret));
} else if (OB_FAIL(relevances_.prepare_allocate(size))) {
LOG_WARN("failed to prepare allocate relevances array", K(ret));
} else if (OB_FAIL(reverse_hints_.init(size))) {
LOG_WARN("failed to init hints array", K(ret));
} else if (OB_FAIL(reverse_hints_.prepare_allocate(size))) {
LOG_WARN("failed to prepare allocate hints array", K(ret));
}
}
}
LOG_DEBUG("init text retrieval op", K(ret), KPC_(ir_ctdef), KPC_(ir_rtdef));
@ -290,20 +307,55 @@ int ObDASTextRetrievalMergeIter::inner_init(ObDASIterParam &param)
int ObDASTextRetrievalMergeIter::inner_reuse()
{
int ret = OB_SUCCESS;
cache_doc_ids_.reuse();
int64_t size = ir_ctdef_->inv_scan_doc_id_col_->is_batch_result() ? ir_rtdef_->eval_ctx_->max_batch_size_ : 1;
if (0 == token_iters_.count()) {
// do nothing
} else if (size <= cache_doc_ids_.count()) {
// do nothing
} else {
cache_doc_ids_.reuse();
hints_.reuse();
relevances_.reuse();
reverse_hints_.reuse();
if (OB_FAIL(cache_doc_ids_.init(size))) {
LOG_WARN("failed to init cache_doc_ids_ array", K(ret));
} else if (OB_FAIL(cache_doc_ids_.prepare_allocate(size))) {
LOG_WARN("failed to prepare allocate cache_doc_ids_ array", K(ret));
} else if (force_return_docid_) {
if (OB_FAIL(hints_.init(size))) {
LOG_WARN("failed to init hints array", K(ret));
} else if (OB_FAIL(hints_.prepare_allocate(size))) {
LOG_WARN("failed to prepare allocate hints array", K(ret));
} else if (OB_FAIL(relevances_.init(size))) {
LOG_WARN("failed to init relevances array", K(ret));
} else if (OB_FAIL(relevances_.prepare_allocate(size))) {
LOG_WARN("failed to prepare allocate relevances array", K(ret));
} else if (OB_FAIL(reverse_hints_.init(size))) {
LOG_WARN("failed to init relevances array", K(ret));
} else if (OB_FAIL(reverse_hints_.prepare_allocate(size))) {
LOG_WARN("failed to prepare allocate relevances array", K(ret));
}
}
}
next_written_idx_ = 0;
doc_cnt_calculated_ = false;
if (!force_return_docid_) {
doc_cnt_calculated_ = false;
}
input_row_cnt_ = 0;
output_row_cnt_ = 0;
const ObTabletID &old_doc_id_tablet_id = whole_doc_agg_param_.tablet_id_;
whole_doc_agg_param_.need_switch_param_ = whole_doc_agg_param_.need_switch_param_ ||
((old_doc_id_tablet_id.is_valid() && old_doc_id_tablet_id != doc_id_idx_tablet_id_) ? true : false);
if (nullptr != whole_doc_cnt_iter_) {
whole_doc_cnt_iter_->set_scan_param(whole_doc_agg_param_);
if (OB_FAIL(whole_doc_cnt_iter_->reuse())) {
LOG_WARN("failed to reuse whole doc cnt iter", K(ret));
if (!force_return_docid_ || whole_doc_agg_param_.need_switch_param_) {
if (nullptr != whole_doc_cnt_iter_) {
whole_doc_cnt_iter_->set_scan_param(whole_doc_agg_param_);
if (OB_FAIL(whole_doc_cnt_iter_->reuse())) {
LOG_WARN("failed to reuse whole doc cnt iter", K(ret));
}
}
doc_cnt_calculated_ = false;
}
for (int64_t i = 0; OB_SUCC(ret) && i < token_iters_.count(); ++i) {
if (OB_FAIL(token_iters_.at(i)->reuse())) {
LOG_WARN("failed to reuse token iters", K(ret));
@ -322,6 +374,9 @@ int ObDASTextRetrievalMergeIter::inner_release()
whole_doc_cnt_iter_ = nullptr;
token_iters_.reset();
cache_doc_ids_.reset();
hints_.reset();
relevances_.reset();
reverse_hints_.reset();
if (nullptr != mem_context_) {
mem_context_->reset_remain_one_page();
DESTROY_CONTEXT(mem_context_);
@ -332,6 +387,7 @@ int ObDASTextRetrievalMergeIter::inner_release()
output_row_cnt_ = 0;
limit_param_.offset_ = 0;
limit_param_.limit_ = -1;
force_return_docid_ = false;
doc_cnt_calculated_ = false;
doc_cnt_iter_acquired_ = false;
is_inited_ = false;
@ -352,6 +408,42 @@ int ObDASTextRetrievalMergeIter::inner_get_next_rows(int64_t &count, int64_t cap
return ret;
}
int ObDASTextRetrievalMergeIter::set_rangkey_and_selector(const common::ObIArray<std::pair<ObDocId, int>> &virtual_rangkeys)
{
int ret = OB_SUCCESS;
rangekey_size_ = virtual_rangkeys.count();
if (OB_UNLIKELY(!force_return_docid_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected mode", K(ret));
} else if (rangekey_size_ > OB_MAX(ir_rtdef_->eval_ctx_->max_batch_size_, 1)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected rangekey size", K(ret), K_(rangekey_size));
} else if (0 != token_iters_.count()) {
int64_t max_size = ir_ctdef_->inv_scan_doc_id_col_->is_batch_result() ? ir_rtdef_->eval_ctx_->max_batch_size_ : 1;
if (rangekey_size_ > cache_doc_ids_.count() || rangekey_size_ > max_size) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected size", K(ret), K(rangekey_size_), K(cache_doc_ids_.count()));
}
for (int64_t i = 0; OB_SUCC(ret) && i < virtual_rangkeys.count(); ++i) {
cache_doc_ids_[i].from_string(virtual_rangkeys.at(i).first.get_string());
hints_[i] = virtual_rangkeys.at(i).second;
relevances_[i] = 0.0;
if (virtual_rangkeys.at(i).second >= max_size) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected size", K(ret), K(virtual_rangkeys.at(i).second), K(max_size));
} else {
reverse_hints_[virtual_rangkeys.at(i).second] = i;
}
}
for (int64_t i = 0; OB_SUCC(ret) && i < token_iters_.count(); ++i) {
if (OB_FAIL(token_iters_.at(i)->set_query_token_and_rangekey(query_tokens_.at(i), cache_doc_ids_, rangekey_size_))) {
LOG_WARN("failed to set token and rangekey", K(ret), K_(rangekey_size));
}
}
}
return ret;
}
int ObDASTextRetrievalMergeIter::check_and_prepare()
{
int ret = OB_SUCCESS;
@ -371,6 +463,8 @@ int ObDASTextRetrievalMergeIter::check_and_prepare()
} else if (OB_FAIL(do_total_doc_cnt())) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("failed to do total document count", K(ret), KPC_(ir_ctdef));
} else {
doc_cnt_calculated_ = true;
}
} else {
doc_cnt_calculated_ = true;
@ -379,7 +473,7 @@ int ObDASTextRetrievalMergeIter::check_and_prepare()
return ret;
}
int ObDASTextRetrievalMergeIter::project_result(const ObIRIterLoserTreeItem &item, const double relevance)
int ObDASTextRetrievalMergeIter::project_result(const ObDocId &docid, const double relevance)
{
int ret = OB_SUCCESS;
// TODO: usage of doc id column is somehow weird here, since in single token retrieval iterators,
@ -395,7 +489,7 @@ int ObDASTextRetrievalMergeIter::project_result(const ObIRIterLoserTreeItem &ite
K(ret), KP(doc_id_col), KP(eval_ctx));
} else {
ObDatum &doc_id_proj_datum = doc_id_col->locate_datum_for_write(*eval_ctx);
doc_id_proj_datum.set_string(item.doc_id_.get_string());
doc_id_proj_datum.set_string(docid.get_string());
if (ir_ctdef_->need_proj_relevance_score()) {
ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_;
if (OB_ISNULL(relevance_proj_col)) {
@ -406,12 +500,12 @@ int ObDASTextRetrievalMergeIter::project_result(const ObIRIterLoserTreeItem &ite
relevance_proj_datum.set_double(relevance);
}
}
LOG_DEBUG("project one fulltext search result", K(ret), K(item));
LOG_DEBUG("project one fulltext search result", K(ret), K(docid), K(relevance));
}
return ret;
}
int ObDASTextRetrievalMergeIter::project_relevance(const ObIRIterLoserTreeItem &item, const double relevance)
int ObDASTextRetrievalMergeIter::project_relevance(const ObDocId &docid, const double relevance)
{
int ret = OB_SUCCESS;
// TODO: usage of doc id column is somehow weird here, since in single token retrieval iterators,
@ -426,7 +520,7 @@ int ObDASTextRetrievalMergeIter::project_relevance(const ObIRIterLoserTreeItem &
LOG_WARN("unexpected nullptr to relevance proejction column",
K(ret), KP(doc_id_col), KP(eval_ctx));
} else {
cache_doc_ids_[next_written_idx_] = item.doc_id_;
cache_doc_ids_[next_written_idx_] = docid;
if (ir_ctdef_->need_proj_relevance_score()) {
ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_;
if (OB_ISNULL(relevance_proj_col)) {
@ -558,6 +652,7 @@ int ObDASTextRetrievalMergeIter::do_total_doc_cnt()
ObEvalCtx::BatchInfoScopeGuard guard(*ir_rtdef_->eval_ctx_);
guard.set_batch_idx(0);
if (!ir_ctdef_->need_estimate_total_doc_cnt()) {
bool get_next = false;
// When estimation info not exist, or we found estimation info not accurate, calculate document count by scan
if (!doc_cnt_iter_acquired_) {
if (OB_FAIL(init_total_doc_cnt_param(tx_desc_, snapshot_))) {
@ -567,6 +662,7 @@ int ObDASTextRetrievalMergeIter::do_total_doc_cnt()
LOG_WARN("failed to do table scan for document count aggregation", K(ret));
} else {
doc_cnt_iter_acquired_ = true;
get_next = true;
}
} else {
const ObTabletID old_tablet_id = whole_doc_agg_param_.tablet_id_;
@ -574,13 +670,17 @@ int ObDASTextRetrievalMergeIter::do_total_doc_cnt()
|| ((old_tablet_id.is_valid() && old_tablet_id != doc_id_idx_tablet_id_ ) ? true : false);
whole_doc_agg_param_.tablet_id_ = doc_id_idx_tablet_id_;
whole_doc_agg_param_.ls_id_ = ls_id_;
if (OB_FAIL(whole_doc_cnt_iter_->reuse())) {
LOG_WARN("failed to reuse whole doc cnt iter", K(ret));
} else if (OB_FAIL(whole_doc_cnt_iter_->rescan())) {
LOG_WARN("failed to rescan whole doc cnt iter", K(ret));
if (!force_return_docid_ || whole_doc_agg_param_.need_switch_param_) {
if (OB_FAIL(whole_doc_cnt_iter_->reuse())) {
LOG_WARN("failed to reuse whole doc cnt iter", K(ret));
} else if (OB_FAIL(whole_doc_cnt_iter_->rescan())) {
LOG_WARN("failed to rescan whole doc cnt iter", K(ret));
} else {
get_next = true;
}
}
}
if (OB_SUCC(ret)) {
if (OB_SUCC(ret) && get_next) {
if (OB_UNLIKELY(!static_cast<sql::ObStoragePushdownFlag>(whole_doc_agg_param_.pd_storage_flag_).is_aggregate_pushdown())) {
ret = OB_NOT_IMPLEMENT;
LOG_ERROR("aggregate without pushdown not implemented", K(ret));
@ -690,7 +790,7 @@ int ObDASTRTaatIter::inner_reuse()
int ret = OB_SUCCESS;
if (hash_maps_) {
for (int64_t i = 0; i < hash_map_size_; ++i) {
hash_maps_[i]->reuse();
hash_maps_[i]->destroy();
}
hash_maps_ = nullptr;
}
@ -709,7 +809,9 @@ int ObDASTRTaatIter::inner_reuse()
hash_map_size_ = 0;
cur_map_iter_ = nullptr;
next_clear_map_idx_ = 0;
total_doc_cnt_ = -1;
if (!force_return_docid_) {
total_doc_cnt_ = -1;
}
cur_map_idx_= -1;
cache_first_docid_.reset();
is_chunk_store_inited_ = false;
@ -772,6 +874,8 @@ int ObDASTRTaatIter::check_and_prepare()
if (OB_FAIL(do_total_doc_cnt())) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("failed to do total document count", K(ret), KPC_(ir_ctdef));
} else {
doc_cnt_calculated_ = true;
}
} else {
doc_cnt_calculated_ = true;
@ -861,7 +965,8 @@ int ObDASTRTaatIter::get_next_batch_rows(int64_t &count, int64_t capacity)
next_written_idx_ = 0;
count = 0;
while (OB_SUCC(ret) && next_written_idx_ != real_capacity) {
if (cur_map_idx_!= -1 && (cur_map_iter_ != nullptr && (*cur_map_iter_) != hash_maps_[cur_map_idx_]->end())) {
if (cur_map_idx_!= -1 &&
(force_return_docid_ || (cur_map_iter_ != nullptr && (*cur_map_iter_) != hash_maps_[cur_map_idx_]->end()))) {
if (OB_UNLIKELY(next_written_idx_ > real_capacity)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected next_written_idx", K(ret), K(next_written_idx_), K(real_capacity));
@ -871,12 +976,14 @@ int ObDASTRTaatIter::get_next_batch_rows(int64_t &count, int64_t capacity)
}
}
} else if (OB_FAIL(load_next_hashmap())) { // cache data
LOG_WARN("failed to load next hashmap", K(ret));
if (OB_ITER_END != ret) {
LOG_WARN("failed to load next hashmap", K(ret), K_(cur_map_idx), K_(next_written_idx), K(count), K(real_capacity));
}
}
}
if (OB_SUCC(ret) || OB_ITER_END == ret) {
if (next_written_idx_ > 0) {
if (next_written_idx_ > 0 && !force_return_docid_) {
ObExpr *doc_id_col = ir_ctdef_->inv_scan_doc_id_col_;
ObEvalCtx *eval_ctx = ir_rtdef_->eval_ctx_;
ObDatum *doc_id_proj_datum = doc_id_col->locate_batch_datums(*eval_ctx);
@ -973,7 +1080,7 @@ int ObDASTRTaatIter::init_stores_by_partition()
ret = OB_ERR_UNEXPECTED;
LOG_WARN("total doc cnt is not set", K(ret), K_(is_hashmap_inited));
} else {
int64_t partition_cnt = OB_MIN((total_doc_cnt_- 1) / OB_HASHMAP_DEFAULT_SIZE + 1, OB_MAX_HASHMAP_COUNT);
int64_t partition_cnt = force_return_docid_ ? 1 : OB_MIN((total_doc_cnt_- 1) / OB_HASHMAP_DEFAULT_SIZE + 1, OB_MAX_HASHMAP_COUNT);
hash_map_size_ = partition_cnt;
void *buf = nullptr;
if (nullptr == hash_maps_ && OB_SUCC(ret)) {
@ -1186,7 +1293,7 @@ int ObDASTRTaatIter::fill_chunk_store_by_tr_iter()
if (token_idx + 1 < query_tokens_.count()) {
if (OB_FAIL(token_iters_.at(0)->reuse())) {
LOG_WARN("failed to reuse tr iter", K(ret));
} else if (OB_FAIL(token_iters_.at(0)->set_query_token(query_tokens_.at(token_idx + 1)))) {
} else if (OB_FAIL(reset_query_token(query_tokens_.at(token_idx + 1)))) {
LOG_WARN("failed to set query token", K(ret));
} else if (OB_FAIL(token_iters_.at(0)->rescan())) {
LOG_WARN("failed to rescan tr iter", K(ret));
@ -1227,6 +1334,8 @@ int ObDASTRTaatIter::load_next_hashmap()
} else if (FALSE_IT(++cur_map_idx_)) {
} else if (OB_FAIL(inner_load_next_hashmap())) {
LOG_WARN("failed to load next hashmap", K(ret));
} else if (force_return_docid_) {
// do nothing
} else {
hash::ObHashMap<ObDocId, double>::iterator iter = hash_maps_[cur_map_idx_]->begin();
void *buf = nullptr;
@ -1315,6 +1424,249 @@ int ObDASTRTaatIter::inner_load_next_hashmap()
return ret;
}
int ObDASTRTaatIter::reset_query_token(const ObString &query_token)
{
int ret = OB_SUCCESS;
if (!force_return_docid_) {
if (OB_FAIL(token_iters_.at(0)->set_query_token(query_token))) {
LOG_WARN("failed to set query token", K(ret));
}
} else {
if (OB_FAIL(token_iters_.at(0)->set_query_token_and_rangekey(query_token, cache_doc_ids_, rangekey_size_))) {
LOG_WARN("failed to set token and rangekey", K(ret), K_(rangekey_size));
}
}
return ret;
}
ObDASTRTaatLookupIter::ObDASTRTaatLookupIter()
: ObDASTRTaatIter()
{
}
int ObDASTRTaatLookupIter::rescan()
{
int ret = OB_SUCCESS;
if (OB_FAIL(ObDASTextRetrievalMergeIter::rescan())) {
LOG_WARN("failed to rescan iter", K(ret));
} else if (OB_UNLIKELY(token_iters_.count() > 1)) {
ret= OB_ERR_UNEXPECTED;
LOG_WARN("unexpected iter count mismatch with query tokens",
K(ret), K_(query_tokens), K_(token_iters));
} else if (0 != query_tokens_.count()) {
ObDASTextRetrievalIter *iter = token_iters_.at(0);
if (OB_FAIL(iter->rescan())) {
LOG_WARN("failed to append token iter to array", K(ret));
}
is_hashmap_inited_ = false;
cur_map_idx_= -1;
next_clear_map_idx_ = 0;
}
return ret;
}
int ObDASTRTaatLookupIter::fill_output_exprs(int64_t &count, int64_t safe_capacity)
{
int ret = OB_SUCCESS;
const bool need_relevance = ir_ctdef_->need_proj_relevance_score();
ObDatum *filter_res = nullptr;
ObExpr *match_filter = need_relevance ? ir_ctdef_->match_filter_ : nullptr;
hash::ObHashMap<ObDocId, double> *map = hash_maps_[cur_map_idx_];
ObEvalCtx *eval_ctx = ir_rtdef_->eval_ctx_;
ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_;
ObDatum *relevance_proj_datum = nullptr;
ObExpr *doc_id_col = ir_ctdef_->inv_scan_doc_id_col_;
ObDatum *doc_id_proj_datum = doc_id_col->locate_batch_datums(*eval_ctx);
bool filter_valid = false;
if (need_relevance) {
relevance_proj_datum = relevance_proj_col->locate_datums_for_update(*eval_ctx, safe_capacity);
}
if (OB_UNLIKELY(count != 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected size", K(ret), K_(rangekey_size), K(safe_capacity), K(count));
} else if (OB_UNLIKELY(hash_map_size_ != 1)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected size", K(ret), K_(hash_map_size));
} else if (OB_UNLIKELY(nullptr != match_filter)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected match filter", K(ret));
}
hash::ObHashMap<ObDocId, double> *first_map = hash_maps_[0];
ObEvalCtx *ctx = ir_rtdef_->eval_ctx_;
for (int64_t i = 0; OB_SUCC(ret) && i < rangekey_size_; ++i) {
double cur_relevance = 0;
if (OB_FAIL(first_map->get_refactored(cache_doc_ids_[i], cur_relevance))) {
if (OB_HASH_NOT_EXIST != ret) {
LOG_WARN("fail to get relevance", K(ret), K(cur_relevance));
} else {
ret = OB_SUCCESS;
}
}
if (OB_SUCC(ret)) {
int64_t pos = hints_[i];
if (pos < safe_capacity) {
ObEvalCtx::BatchInfoScopeGuard guard(*ctx);
guard.set_batch_idx(pos);
doc_id_proj_datum[pos].set_string(cache_doc_ids_[i].get_string());
if (need_relevance) {
relevance_proj_datum[pos].set_double(cur_relevance);
relevance_proj_col->set_evaluated_flag(*eval_ctx);
}
output_row_cnt_ ++;
input_row_cnt_ ++;
count ++;
} else {
relevances_[pos] = cur_relevance;
}
next_written_idx_++;
}
}
if (OB_SUCC(ret)) {
next_written_idx_ = count;
if (need_relevance) {
relevance_proj_col->set_evaluated_projected(*eval_ctx);
}
if (count != OB_MIN(safe_capacity, rangekey_size_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected size", K(ret), K(count), K_(rangekey_size), K(safe_capacity));
} else if (count == rangekey_size_) {
ret = OB_ITER_END;
}
}
return ret;
}
int ObDASTRTaatLookupIter::inner_get_next_row()
{
int ret = OB_SUCCESS;
bool need_fill_doc_cnt = !doc_cnt_calculated_;
if (OB_UNLIKELY(1 != rangekey_size_)) { // if rangekey_size_ > 1, UNSUPPORTED
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected rangekey size", K(ret), K_(rangekey_size));
} else if (next_written_idx_ >= rangekey_size_) {
ret = OB_ITER_END;
} else if (OB_FAIL(check_and_prepare())) {
if (OB_ITER_END != ret) {
LOG_WARN("failed to prepare to get next row", K(ret));
} else {
ObDocId default_docid;
ObEvalCtx *ctx = ir_rtdef_->eval_ctx_;
ObEvalCtx::BatchInfoScopeGuard guard(*ctx);
guard.set_batch_idx(0);
if (OB_FAIL(project_result(default_docid,0))) {
LOG_WARN("failed to project result", K(ret));
} else {
next_written_idx_++;
}
}
} else if (need_fill_doc_cnt && OB_FAIL(fill_total_doc_cnt())) {
LOG_WARN("failed to fill total document count", K(ret), K(total_doc_cnt_));
} else if (0 == total_doc_cnt_) {
ret = OB_ITER_END;
} else if (total_doc_cnt_ < 0) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected total doc cnt", K(ret), K(total_doc_cnt_));
} else {
int64_t count = 0;
const int64_t cap = 1;
if (OB_FAIL(get_next_batch_rows(count, cap))) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("failed to get next row with taat", K(ret));
} else if (OB_UNLIKELY(count != 0)) {
ret = OB_SUCCESS;
}
} else if (OB_UNLIKELY(count != 1)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected row count", K(ret), K(count));
}
}
return ret;
}
int ObDASTRTaatLookupIter::inner_get_next_rows(int64_t &count, int64_t capacity)
{
int ret = OB_SUCCESS;
count = 0;
bool need_fill_doc_cnt = !doc_cnt_calculated_;
if (OB_UNLIKELY(capacity == 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected capacity size", K(ret), K(capacity));
} else if (OB_FAIL(check_and_prepare())) {
if (OB_ITER_END != ret) {
LOG_WARN("failed to prepare to get next row", K(ret));
} else if (next_written_idx_ == rangekey_size_) {
// do nothing
} else if (next_written_idx_ != 0 ) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected next_written_idx", K(ret), K_(next_written_idx));
} else {
ret = OB_SUCCESS;
ObEvalCtx *ctx = ir_rtdef_->eval_ctx_;
ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_;
while (OB_SUCC(ret) && next_written_idx_ < rangekey_size_) {
// fill the remaining results with the relevance value of '0'
// Note: if we need calculate the docid expr, fix the code and cache the hints.
ObDocId default_docid;
ObEvalCtx::BatchInfoScopeGuard guard(*ctx);
guard.set_batch_idx(next_written_idx_);
if (OB_FAIL(project_result(default_docid, 0))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to project result", K(ret));
}
next_written_idx_ ++;
}
if (OB_SUCC(ret)) {
for (int i = 0; i < rangekey_size_; i++) {
relevance_proj_col->get_evaluated_flags(*ctx).set(i);
}
ret = OB_ITER_END;
}
relevance_proj_col->set_evaluated_projected(*ctx);
count = OB_MIN(rangekey_size_, capacity);
next_written_idx_ = OB_MIN(rangekey_size_, capacity);
}
} else if (need_fill_doc_cnt && OB_FAIL(fill_total_doc_cnt())) {
LOG_WARN("failed to fill total document count", K(ret), K(total_doc_cnt_));
} else if (0 == total_doc_cnt_) {
ret = OB_ITER_END;
} else if (total_doc_cnt_ < 0) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected total doc cnt", K(ret), K(total_doc_cnt_));
} else if (next_written_idx_ == rangekey_size_) {
ret = OB_ITER_END;
} else if (next_written_idx_ > rangekey_size_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected capacity size", K(ret), K(capacity));
} else if (next_written_idx_ == 0 &&OB_FAIL(get_next_batch_rows(count, capacity))) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("failed to get next rows with taat", K(ret));
}
} else if (next_written_idx_ < rangekey_size_) {
int remain_size = rangekey_size_ - next_written_idx_;
int return_size = OB_MIN(remain_size, capacity);
// next_written_idx_ is the output idx
ObEvalCtx *ctx = ir_rtdef_->eval_ctx_;
while (count < return_size && OB_SUCC(ret)) {
int64_t pos = reverse_hints_[next_written_idx_];
ObEvalCtx::BatchInfoScopeGuard guard(*ctx);
guard.set_batch_idx(count);
if (OB_FAIL(project_result(cache_doc_ids_[pos], relevances_[next_written_idx_]))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to project result", K(ret));
}
next_written_idx_++;
count ++;
}
if (OB_SUCC(ret) && next_written_idx_ == rangekey_size_) {
ret = OB_ITER_END;
}
}
return ret;
}
ObDASTRDaatIter::ObDASTRDaatIter()
: ObDASTextRetrievalMergeIter(),
loser_tree_cmp_(),
@ -1375,27 +1727,41 @@ int ObDASTRDaatIter::set_merge_iters(const ObIArray<ObDASIter *> &retrieval_iter
ret = OB_ERR_UNEXPECTED;
LOG_WARN("processing type unexpected", K(ret));
} else {
if (FALSE_IT(next_batch_iter_idxes_.set_allocator(&mem_context_->get_arena_allocator()))) {
} else if (OB_FAIL(next_batch_iter_idxes_.init(query_tokens_.count()))) {
LOG_WARN("failed to init next batch iter idxes array", K(ret));
} else if (OB_FAIL(next_batch_iter_idxes_.prepare_allocate(query_tokens_.count()))) {
LOG_WARN("failed to prepare allocate next batch iter idxes array", K(ret));
} else {
next_batch_cnt_ = query_tokens_.count();
for (int64_t i = 0; OB_SUCC(ret) && i < query_tokens_.count(); ++i) {
const ObString &query_token = query_tokens_.at(i);
ObDASTextRetrievalIter *iter = static_cast<ObDASTextRetrievalIter *>(retrieval_iters.at(i));
if (OB_FAIL(token_iters_.push_back(iter))) {
LOG_WARN("failed to append token iter to array", K(ret));
} else {
next_batch_iter_idxes_[i] = i;
}
next_batch_cnt_ = query_tokens_.count();
for (int64_t i = 0; OB_SUCC(ret) && i < query_tokens_.count(); ++i) {
const ObString &query_token = query_tokens_.at(i);
ObDASTextRetrievalIter *iter = static_cast<ObDASTextRetrievalIter *>(retrieval_iters.at(i));
if (OB_FAIL(token_iters_.push_back(iter))) {
LOG_WARN("failed to append token iter to array", K(ret));
}
}
}
return ret;
}
int ObDASTRDaatIter::do_table_scan()
{
int ret = OB_SUCCESS;
if (FALSE_IT(next_batch_iter_idxes_.set_allocator(&mem_context_->get_arena_allocator()))) {
} else if (OB_FAIL(next_batch_iter_idxes_.init(query_tokens_.count()))) {
LOG_WARN("failed to init next batch iter idxes array", K(ret));
} else if (OB_FAIL(next_batch_iter_idxes_.prepare_allocate(query_tokens_.count()))) {
LOG_WARN("failed to prepare allocate next batch iter idxes array", K(ret));
} else if (query_tokens_.count()!= 0 && OB_FAIL(iter_row_heap_->open(query_tokens_.count()))) {
LOG_WARN("failed to open iter row heap", K(ret), K_(query_tokens));
} else {
next_batch_cnt_ = query_tokens_.count();
for (int64_t i = 0; i < next_batch_cnt_; ++i) {
next_batch_iter_idxes_[i] = i;
}
}
if (OB_FAIL(ret)) {
} else if(OB_FAIL(ObDASTextRetrievalMergeIter::do_table_scan())) {
LOG_WARN("failed to do table scan", K(ret));
}
return ret;
}
int ObDASTRDaatIter::inner_init(ObDASIterParam &param)
{
int ret = OB_SUCCESS;
@ -1580,7 +1946,7 @@ int ObDASTRDaatIter::pull_next_batch_rows()
if (OB_SUCC(ret)) {
if (iter_row_heap_->empty()) {
ret = OB_ITER_END;
} else if (OB_FAIL(iter_row_heap_->rebuild())) {
} else if (0 != next_batch_cnt_ && OB_FAIL(iter_row_heap_->rebuild())) {
LOG_WARN("fail to rebuild loser tree", K(ret), K_(next_batch_cnt));
} else {
next_batch_cnt_ = 0;
@ -1621,7 +1987,7 @@ int ObDASTRDaatIter::pull_next_batch_rows_with_batch_mode()
if (OB_SUCC(ret)) {
if (iter_row_heap_->empty()) {
ret = OB_ITER_END;
} else if (OB_FAIL(iter_row_heap_->rebuild())) {
} else if (0 != next_batch_cnt_ && OB_FAIL(iter_row_heap_->rebuild())) {
LOG_WARN("fail to rebuild loser tree", K(ret), K_(next_batch_cnt));
} else {
next_batch_cnt_ = 0;
@ -1675,14 +2041,279 @@ int ObDASTRDaatIter::next_disjunctive_document(bool is_batch)
if (OB_SUCC(ret)) {
const double relevance_score = ir_ctdef_->need_calc_relevance() ? cur_doc_relevance : 1;
if (!is_batch && OB_FAIL(project_result(*top_item, relevance_score))) {
if (!is_batch && OB_FAIL(project_result(top_item->doc_id_, relevance_score))) {
LOG_WARN("failed to project result", K(ret));
} else if (is_batch && OB_FAIL(project_relevance(*top_item, relevance_score))) {
} else if (is_batch && OB_FAIL(project_relevance(top_item->doc_id_, relevance_score))) {
LOG_WARN("failed to project relevance", K(ret));
}
}
return ret;
}
ObDASTRDaatLookupIter::ObDASTRDaatLookupIter()
: ObDASTRDaatIter()
{
}
int ObDASTRDaatLookupIter::rescan()
{
int ret = OB_SUCCESS;
if (OB_FAIL(ObDASTextRetrievalMergeIter::rescan())) {
LOG_WARN("failed to rescan iter", K(ret));
} else if (OB_UNLIKELY(query_tokens_.count() != token_iters_.count())) {
ret= OB_ERR_UNEXPECTED;
LOG_WARN("unexpected iter count mismatch with query tokens",
K(ret), K_(query_tokens), K_(token_iters));
} else if (0 != query_tokens_.count()) {
if (OB_FAIL(next_batch_iter_idxes_.init(query_tokens_.count()))) {
LOG_WARN("failed to init next batch iter idxes array", K(ret));
} else if (OB_FAIL(next_batch_iter_idxes_.prepare_allocate(query_tokens_.count()))) {
LOG_WARN("failed to prepare allocate next batch iter idxes array", K(ret));
} else if (OB_FAIL(iter_row_heap_->open(query_tokens_.count()))) {
LOG_WARN("failed to open iter row heap", K(ret), K_(query_tokens));
} else {
next_batch_cnt_ = token_iters_.count();
for (int64_t i = 0; OB_SUCC(ret) && i < token_iters_.count(); ++i) {
ObDASTextRetrievalIter *iter = token_iters_.at(i);
if (OB_FAIL(iter->rescan())) {
LOG_WARN("failed to append token iter to array", K(ret));
} else {
next_batch_iter_idxes_[i] = i;
}
}
}
}
return ret;
}
int ObDASTRDaatLookupIter::inner_get_next_row()
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(1 != rangekey_size_)) { // if rangekey_size_ > 1, UNSUPPORTED
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected rangekey size", K(ret), K_(rangekey_size));
} else if (next_written_idx_ >= rangekey_size_) {
ret = OB_ITER_END;
} else if (OB_FAIL(check_and_prepare())) {
if (OB_ITER_END != ret) {
LOG_WARN("failed to prepare to get next rows", K(ret));
} else {
ObEvalCtx *ctx = ir_rtdef_->eval_ctx_;
ObEvalCtx::BatchInfoScopeGuard guard(*ctx);
guard.set_batch_idx(0);
ObDocId default_doc_id;
if (OB_FAIL(project_result(default_doc_id, 0))) {
LOG_WARN("failed to project result", K(ret));
} else {
next_written_idx_++;
}
}
} else if (next_written_idx_ == rangekey_size_) {
ret = OB_ITER_END;
} else if (next_written_idx_ > rangekey_size_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected capacity size", K(ret));
} else if (next_written_idx_ == 0) {
clear_evaluated_infos();
int capacity = 1;
if (OB_FAIL(pull_next_batch_rows())) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("failed to pull next batch rows from iterator", K(ret));
} else if (OB_FAIL(project_result(cache_doc_ids_[0], 0))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to project result", K(ret));
} else {
next_written_idx_ ++;
}
} else if (OB_FAIL(next_disjunctive_document(capacity))) {
LOG_WARN("failed to get next document with disjunctive tokens", K(ret));
} else {
next_written_idx_ ++;
}
}
return ret;
}
int ObDASTRDaatLookupIter::inner_get_next_rows(int64_t &count, int64_t capacity)
{
int ret = OB_SUCCESS;
count = 0;
if (OB_UNLIKELY(capacity == 0)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected capacity size", K(ret), K(capacity));
} else if (OB_FAIL(check_and_prepare())) {
if (OB_ITER_END != ret) {
LOG_WARN("failed to prepare to get next rows", K(ret));
} else if (next_written_idx_ == rangekey_size_) {
// do nothing
} else if (next_written_idx_ != 0) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected next_written_idx", K(ret), K_(next_written_idx));
} else {
ObEvalCtx *ctx = ir_rtdef_->eval_ctx_;
ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_;
ret = OB_SUCCESS;
while (OB_SUCC(ret) && next_written_idx_ < rangekey_size_) {
// fill the remaining results with the relevance value of '0'
// Note: if we need calculate the docid expr, fix the code and cache the hints.
ObDocId default_docid;
ObEvalCtx::BatchInfoScopeGuard guard(*ctx);
guard.set_batch_idx(next_written_idx_);
if (OB_FAIL(project_result(default_docid, 0))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to project result", K(ret));
}
next_written_idx_ ++;
}
if (OB_SUCC(ret)) {
for (int i = 0; i < rangekey_size_; i++) {
relevance_proj_col->get_evaluated_flags(*ctx).set(i);
}
ret = OB_ITER_END;
}
relevance_proj_col->set_evaluated_projected(*ctx);
count = OB_MIN(rangekey_size_, capacity);
next_written_idx_ = OB_MIN(rangekey_size_, capacity);
}
} else if (next_written_idx_ == rangekey_size_) {
ret = OB_ITER_END;
} else if (next_written_idx_ > rangekey_size_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected capacity size", K(ret), K(capacity));
} else if (next_written_idx_ == 0) {
// for normal case
ObExpr *match_filter = ir_ctdef_->need_calc_relevance() ? ir_ctdef_->match_filter_ : nullptr;
const bool is_batch = true;
next_written_idx_ = 0;
bool filter_valid = false;
// fill the all result in the range of rangkey_size_
while (OB_SUCC(ret) && next_written_idx_ < rangekey_size_) {
if (OB_FAIL(pull_next_batch_rows_with_batch_mode())) {
if (OB_UNLIKELY(OB_ITER_END != ret)) {
LOG_WARN("failed to pull next batch rows from iterator", K(ret));
}
} else if (OB_FAIL(next_disjunctive_document(capacity))) {
LOG_WARN("failed to get next document with disjunctive tokens", K(ret));
} else {
next_written_idx_ ++;
}
}
if (OB_ITER_END == ret) {
ret = OB_SUCCESS;
}
// fill the remaining results with the relevance value of '0'
if (OB_LIKELY(next_written_idx_ < rangekey_size_)) {
ObEvalCtx *ctx = ir_rtdef_->eval_ctx_;
while (next_written_idx_ < rangekey_size_ && OB_SUCC(ret)) {
int64_t pos = hints_[next_written_idx_];
if (pos < capacity) {
ObEvalCtx::BatchInfoScopeGuard guard(*ctx);
guard.set_batch_idx(pos);
if (OB_FAIL(project_result(cache_doc_ids_[next_written_idx_], 0))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to project result", K(ret));
}
}
next_written_idx_++;
}
}
// output the part result in the range of capacity
if (OB_SUCC(ret)) {
ObEvalCtx *ctx = ir_rtdef_->eval_ctx_;
ObExpr *relevance_proj_col = ir_ctdef_->relevance_proj_col_;
for (int i = 0; i < OB_MIN(rangekey_size_, capacity); i++) {
relevance_proj_col->get_evaluated_flags(*ctx).set(i);
}
relevance_proj_col->set_evaluated_projected(*ctx);
next_written_idx_ = OB_MIN(rangekey_size_, capacity);
count = OB_MIN(rangekey_size_, capacity);
}
} else if (next_written_idx_ < rangekey_size_) {
int remain_size = rangekey_size_ - next_written_idx_;
int return_size = OB_MIN(remain_size, capacity);
// next_written_idx_ is the output idx
ObEvalCtx *ctx = ir_rtdef_->eval_ctx_;
while (count < return_size && OB_SUCC(ret)) {
int64_t pos = reverse_hints_[next_written_idx_];
ObEvalCtx::BatchInfoScopeGuard guard(*ctx);
guard.set_batch_idx(count);
if (OB_FAIL(project_result(cache_doc_ids_[pos], relevances_[next_written_idx_]))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to project result", K(ret));
}
next_written_idx_++;
count ++;
}
if (OB_SUCC(ret) && next_written_idx_ == rangekey_size_) {
ret = OB_ITER_END;
}
}
return ret;
}
int ObDASTRDaatLookupIter::next_disjunctive_document(const int capacity)
{
int ret = OB_SUCCESS;
const ObIRIterLoserTreeItem *top_item = nullptr;
ObEvalCtx *ctx = ir_rtdef_->eval_ctx_;
if (!iter_row_heap_->empty()) {
if (OB_FAIL(iter_row_heap_->top(top_item))) {
LOG_WARN("failed to get top item from heap", K(ret));
} else if (cache_doc_ids_[next_written_idx_] != top_item->doc_id_) {
// fill some unexit results with the relevance value of '0'
int64_t pos = hints_[next_written_idx_];
if (pos < capacity) {
ObEvalCtx::BatchInfoScopeGuard guard(*ctx);
guard.set_batch_idx(pos);
if (OB_FAIL(project_result(cache_doc_ids_[next_written_idx_], 0))) {
LOG_WARN("failed to project result", K(ret));
}
} else {
// cache it
relevances_[pos] = 0;
}
} else {
int64_t doc_cnt = 0;
bool curr_doc_end = false;
// Do we need to use ObExpr to collect relevance?
double cur_doc_relevance = 0.0;
while (OB_SUCC(ret) && !iter_row_heap_->empty() && !curr_doc_end) {
if (iter_row_heap_->is_unique_champion()) {
curr_doc_end = true;
}
if (OB_FAIL(iter_row_heap_->top(top_item))) {
LOG_WARN("failed to get top item from heap", K(ret));
} else {
// consider to add an expr for collectiong conjunction result between query tokens here?
cur_doc_relevance += top_item->relevance_;
next_batch_iter_idxes_[next_batch_cnt_++] = top_item->iter_idx_;
if (OB_FAIL(iter_row_heap_->pop())) {
LOG_WARN("failed to pop top item in heap", K(ret));
}
}
}
if (OB_SUCC(ret)) {
const double relevance_score = ir_ctdef_->need_calc_relevance() ? cur_doc_relevance : 0;
int64_t pos = hints_[next_written_idx_];
if (pos < capacity) {
ObEvalCtx::BatchInfoScopeGuard guard(*ctx);
guard.set_batch_idx(pos);
if (OB_FAIL(project_result(top_item->doc_id_, relevance_score))) {
LOG_WARN("failed to project result", K(ret));
}
} else {
// cache it
relevances_[pos] = relevance_score;
}
}
}
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("iter_row_heap_ is empty", K(ret));
}
return ret;
}
} // namespace sql
} // namespace oceanbase

View File

@ -65,7 +65,8 @@ public:
doc_cnt_iter_(nullptr),
tx_desc_(nullptr),
snapshot_(nullptr),
query_tokens_()
query_tokens_(),
force_return_docid_(false)
{}
virtual bool is_valid() const override
@ -79,6 +80,7 @@ public:
transaction::ObTxDesc *tx_desc_;
transaction::ObTxReadSnapshot *snapshot_;
ObArray<ObString> query_tokens_;
bool force_return_docid_;
};
class ObDASTextRetrievalMergeIter : public ObDASIter
@ -107,11 +109,13 @@ public:
void set_doc_id_idx_tablet_id(const ObTabletID &tablet_id) { doc_id_idx_tablet_id_ = tablet_id; }
void set_ls_id(const ObLSID &ls_id) { ls_id_ = ls_id; }
storage::ObTableScanParam &get_doc_agg_param() { return whole_doc_agg_param_; }
int set_related_tablet_ids(const ObLSID &ls_id, const ObDASRelatedTabletID &related_tablet_ids);
int set_related_tablet_ids(const ObLSID &ls_id, const ObDASFTSTabletID &related_tablet_ids);
virtual int set_merge_iters(const ObIArray<ObDASIter *> &retrieval_iters);
const ObIArray<ObString> &get_query_tokens() { return query_tokens_; }
bool is_taat_mode() { return RetrievalProcType::TAAT == processing_type_; }
static int build_query_tokens(const ObDASIRScanCtDef *ir_ctdef, ObDASIRScanRtDef *ir_rtdef, common::ObIAllocator &alloc, ObArray<ObString> &query_tokens);
virtual int set_rangkey_and_selector(const common::ObIArray<std::pair<ObDocId, int>> &virtual_rangkeys);
protected:
virtual int inner_init(ObDASIterParam &param) override;
virtual int inner_reuse() override;
@ -120,8 +124,8 @@ protected:
virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override;
protected:
virtual int check_and_prepare();
int project_result(const ObIRIterLoserTreeItem &item, const double relevance);
int project_relevance(const ObIRIterLoserTreeItem &item, const double relevance);
int project_result(const ObDocId &docid, const double relevance);
int project_relevance(const ObDocId &docid, const double relevance);
int project_docid();
void clear_evaluated_infos();
int init_iters(
@ -136,6 +140,7 @@ protected:
static const int64_t OB_DEFAULT_QUERY_TOKEN_ITER_CNT = 4;
typedef ObSEArray<ObDASTextRetrievalIter *, OB_DEFAULT_QUERY_TOKEN_ITER_CNT> ObDASTokenRetrievalIterArray;
lib::MemoryContext mem_context_;
ObArenaAllocator allocator_;
TokenRelationType relation_type_;
RetrievalProcType processing_type_;
const ObDASIRScanCtDef *ir_ctdef_;
@ -147,12 +152,17 @@ protected:
ObArray<ObString> query_tokens_;
ObDASTokenRetrievalIterArray token_iters_;
ObFixedArray<ObDocId, ObIAllocator> cache_doc_ids_;
ObFixedArray<int64_t, ObIAllocator> hints_; // the postion of the cur idx cache doc in output exprs
ObFixedArray<double, ObIAllocator> relevances_;
ObFixedArray<int64_t, ObIAllocator> reverse_hints_; // the postion of the cur output doc in cache doc
int64_t rangekey_size_;
int64_t next_written_idx_;
ObDASScanIter *whole_doc_cnt_iter_;
ObTableScanParam whole_doc_agg_param_;
common::ObLimitParam limit_param_;
int64_t input_row_cnt_;
int64_t output_row_cnt_;
bool force_return_docid_; // for function lookup
bool doc_cnt_calculated_;
bool doc_cnt_iter_acquired_;
bool is_inited_;
@ -172,15 +182,16 @@ protected:
virtual int inner_get_next_row() override;
virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override;
virtual int check_and_prepare() override;
private:
protected:
int get_next_batch_rows(int64_t &count, int64_t capacity);
int fill_output_exprs(int64_t &count, int64_t safe_capacity);
virtual int fill_output_exprs(int64_t &count, int64_t safe_capacity);
int load_next_hashmap();
int inner_load_next_hashmap();
int fill_total_doc_cnt();
int init_stores_by_partition();
int fill_chunk_store_by_tr_iter();
private:
int reset_query_token(const ObString &query_token);
protected:
static const int64_t OB_MAX_HASHMAP_COUNT = 20;
static const int64_t OB_HASHMAP_DEFAULT_SIZE = 1000;
hash::ObHashMap<ObDocId, double> **hash_maps_;
@ -196,12 +207,25 @@ private:
bool is_hashmap_inited_;
};
class ObDASTRTaatLookupIter : public ObDASTRTaatIter
{
public:
ObDASTRTaatLookupIter();
virtual ~ObDASTRTaatLookupIter() {}
virtual int rescan() override;
protected:
virtual int inner_get_next_row() override;
virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override;
virtual int fill_output_exprs(int64_t &count, int64_t safe_capacity) override;
};
class ObDASTRDaatIter : public ObDASTextRetrievalMergeIter
{
public:
ObDASTRDaatIter();
virtual ~ObDASTRDaatIter() {}
virtual int rescan() override;
virtual int do_table_scan() override;
virtual int set_merge_iters(const ObIArray<ObDASIter *> &retrieval_iters) override;
protected:
virtual int inner_init(ObDASIterParam &param) override;
@ -209,21 +233,32 @@ protected:
virtual int inner_release() override;
virtual int inner_get_next_row() override;
virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override;
private:
int pull_next_batch_rows();
int pull_next_batch_rows_with_batch_mode();
int fill_loser_tree_item(
ObDASTextRetrievalIter &iter,
const int64_t iter_idx,
ObIRIterLoserTreeItem &item);
virtual int pull_next_batch_rows();
virtual int pull_next_batch_rows_with_batch_mode();
virtual int fill_loser_tree_item(
ObDASTextRetrievalIter &iter,
const int64_t iter_idx,
ObIRIterLoserTreeItem &item);
int next_disjunctive_document(bool batch_mode);
private:
protected:
ObIRIterLoserTreeCmp loser_tree_cmp_;
ObIRIterLoserTree *iter_row_heap_;
ObFixedArray<int64_t, ObIAllocator> next_batch_iter_idxes_;
int64_t next_batch_cnt_;
};
class ObDASTRDaatLookupIter : public ObDASTRDaatIter
{
public:
ObDASTRDaatLookupIter();
virtual ~ObDASTRDaatLookupIter() {}
virtual int rescan() override;
protected:
virtual int inner_get_next_row() override;
virtual int inner_get_next_rows(int64_t &count, int64_t capacity) override;
int next_disjunctive_document(const int capacity);
};
} // namespace sql
} // namespace oceanbase

View File

@ -65,6 +65,11 @@ ObDASScanRtDef *ObDASTableLookupRtDef::get_lookup_scan_rtdef()
return scan_rtdef;
}
OB_SERIALIZE_MEMBER((ObDASIndexProjLookupCtDef, ObDASTableLookupCtDef),
index_scan_proj_exprs_);
OB_SERIALIZE_MEMBER((ObDASIndexProjLookupRtDef, ObDASTableLookupRtDef));
OB_SERIALIZE_MEMBER((ObDASSortCtDef, ObDASAttachCtDef),
sort_exprs_,
sort_collations_,

View File

@ -55,8 +55,8 @@ struct ObDASTableLookupCtDef : ObDASAttachCtDef
{
OB_UNIS_VERSION(1);
public:
ObDASTableLookupCtDef(common::ObIAllocator &alloc)
: ObDASAttachCtDef(alloc, DAS_OP_TABLE_LOOKUP),
ObDASTableLookupCtDef(common::ObIAllocator &alloc, const ObDASOpType &op_type = DAS_OP_TABLE_LOOKUP)
: ObDASAttachCtDef(alloc, op_type),
is_global_index_(false)
{
}
@ -75,8 +75,8 @@ struct ObDASTableLookupRtDef : ObDASAttachRtDef
{
OB_UNIS_VERSION(1);
public:
ObDASTableLookupRtDef()
: ObDASAttachRtDef(DAS_OP_TABLE_LOOKUP)
ObDASTableLookupRtDef(const ObDASOpType &op_type = DAS_OP_TABLE_LOOKUP)
: ObDASAttachRtDef(op_type)
{}
virtual ~ObDASTableLookupRtDef() {}
@ -89,6 +89,41 @@ public:
ObDASScanRtDef *get_lookup_scan_rtdef();
};
struct ObDASIndexProjLookupCtDef : ObDASTableLookupCtDef
{
OB_UNIS_VERSION(1);
public:
ObDASIndexProjLookupCtDef(common::ObIAllocator &alloc)
: ObDASTableLookupCtDef(alloc, DAS_OP_INDEX_PROJ_LOOKUP),
index_scan_proj_exprs_(alloc)
{}
virtual ~ObDASIndexProjLookupCtDef() {}
const ObDASBaseCtDef *get_lookup_ctdef() const
{
OB_ASSERT(2 == children_cnt_ && children_ != nullptr);
return children_[1];
}
public:
ExprFixedArray index_scan_proj_exprs_;
};
struct ObDASIndexProjLookupRtDef : ObDASTableLookupRtDef
{
OB_UNIS_VERSION(1);
public:
ObDASIndexProjLookupRtDef()
: ObDASTableLookupRtDef(DAS_OP_INDEX_PROJ_LOOKUP)
{}
virtual ~ObDASIndexProjLookupRtDef() {}
ObDASBaseRtDef *get_lookup_rtdef()
{
OB_ASSERT(2 == children_cnt_ && children_ != nullptr);
return children_[1];
}
};
struct ObDASSortCtDef : ObDASAttachCtDef
{
OB_UNIS_VERSION(1);

View File

@ -150,11 +150,18 @@ struct ObDASVIdMergeCtDef;
struct ObDASVIdMergeRtDef;
REGISTER_DAS_ATTACH_OP(DAS_OP_VID_MERGE, ObDASVIdMergeCtDef, ObDASVIdMergeRtDef);
struct ObDASFuncLookupCtDef;
struct ObDASFuncLookupRtDef;
REGISTER_DAS_ATTACH_OP(DAS_OP_FUNC_LOOKUP, ObDASFuncLookupCtDef, ObDASFuncLookupRtDef);
struct ObDASIndexMergeCtDef;
struct ObDASIndexMergeRtDef;
REGISTER_DAS_ATTACH_OP(DAS_OP_INDEX_MERGE, ObDASIndexMergeCtDef, ObDASIndexMergeRtDef);
struct ObDASIndexProjLookupCtDef;
struct ObDASIndexProjLookupRtDef;
REGISTER_DAS_ATTACH_OP(DAS_OP_INDEX_PROJ_LOOKUP, ObDASIndexProjLookupCtDef, ObDASIndexProjLookupRtDef);
#undef REGISTER_DAS_ATTACH_OP
} // namespace sql
} // namespace oceanbase

View File

@ -33,6 +33,9 @@ namespace sql
ObObjDatumMapType ObFTIndexRowCache::FTS_INDEX_TYPES[] = {OBJ_DATUM_STRING, OBJ_DATUM_STRING, OBJ_DATUM_8BYTE_DATA, OBJ_DATUM_8BYTE_DATA};
ObObjDatumMapType ObFTIndexRowCache::FTS_DOC_WORD_TYPES[] = {OBJ_DATUM_STRING, OBJ_DATUM_STRING, OBJ_DATUM_8BYTE_DATA, OBJ_DATUM_8BYTE_DATA};
ObExprOperatorType ObFTIndexRowCache::FTS_INDEX_EXPR_TYPE[] = {T_FUN_SYS_WORD_SEGMENT, T_FUN_SYS_DOC_ID, T_FUN_SYS_WORD_COUNT, T_FUN_SYS_DOC_LENGTH};
ObExprOperatorType ObFTIndexRowCache::FTS_DOC_WORD_EXPR_TYPE[] = {T_FUN_SYS_DOC_ID, T_FUN_SYS_WORD_SEGMENT, T_FUN_SYS_WORD_COUNT, T_FUN_SYS_DOC_LENGTH};
ObFTIndexRowCache::ObFTIndexRowCache()
: rows_(),
row_idx_(0),

View File

@ -28,6 +28,8 @@ class ObFTIndexRowCache final
public:
static ObObjDatumMapType FTS_INDEX_TYPES[4];
static ObObjDatumMapType FTS_DOC_WORD_TYPES[4];
static ObExprOperatorType FTS_INDEX_EXPR_TYPE[4];
static ObExprOperatorType FTS_DOC_WORD_EXPR_TYPE[4];
ObFTIndexRowCache();
~ObFTIndexRowCache();

View File

@ -35,5 +35,13 @@ OB_SERIALIZE_MEMBER((ObDASIRAuxLookupCtDef, ObDASAttachCtDef),
OB_SERIALIZE_MEMBER((ObDASIRAuxLookupRtDef, ObDASAttachRtDef));
OB_SERIALIZE_MEMBER((ObDASFuncLookupCtDef, ObDASAttachCtDef),
main_lookup_cnt_,
doc_id_lookup_cnt_,
func_lookup_cnt_,
lookup_doc_id_expr_);
OB_SERIALIZE_MEMBER((ObDASFuncLookupRtDef, ObDASAttachRtDef));
} // sql
} // oceanbase

View File

@ -33,9 +33,7 @@ public:
relevance_expr_(nullptr),
relevance_proj_col_(nullptr),
estimated_total_doc_cnt_(0),
flags_(0)
{
}
flags_(0) {}
bool need_calc_relevance() const { return nullptr != relevance_expr_; }
bool need_proj_relevance_score() const { return nullptr != relevance_proj_col_; }
bool need_fwd_idx_agg() const { return has_fwd_agg_ && need_calc_relevance(); }
@ -177,8 +175,7 @@ struct ObDASIRAuxLookupCtDef : ObDASAttachCtDef
public:
ObDASIRAuxLookupCtDef(common::ObIAllocator &alloc)
: ObDASAttachCtDef(alloc, DAS_OP_IR_AUX_LOOKUP),
relevance_proj_col_(nullptr)
{ }
relevance_proj_col_(nullptr) {}
const ObDASBaseCtDef *get_doc_id_scan_ctdef() const
{
@ -199,8 +196,7 @@ struct ObDASIRAuxLookupRtDef : ObDASAttachRtDef
OB_UNIS_VERSION(1);
public:
ObDASIRAuxLookupRtDef()
: ObDASAttachRtDef(DAS_OP_IR_AUX_LOOKUP)
{}
: ObDASAttachRtDef(DAS_OP_IR_AUX_LOOKUP) {}
virtual ~ObDASIRAuxLookupRtDef() {}
@ -216,6 +212,125 @@ public:
}
};
struct ObDASFuncLookupCtDef : ObDASAttachCtDef
{
OB_UNIS_VERSION(1);
public:
ObDASFuncLookupCtDef(common::ObIAllocator &alloc)
: ObDASAttachCtDef(alloc, DAS_OP_FUNC_LOOKUP),
main_lookup_cnt_(0),
doc_id_lookup_cnt_(0),
func_lookup_cnt_(0),
lookup_doc_id_expr_(nullptr) {}
virtual ~ObDASFuncLookupCtDef() {}
bool has_main_table_lookup() const { return main_lookup_cnt_ > 0; }
bool has_doc_id_lookup() const { return doc_id_lookup_cnt_ > 0; }
int64_t get_func_lookup_scan_idx(const int64_t idx) const
{
OB_ASSERT(children_cnt_ == (main_lookup_cnt_ + doc_id_lookup_cnt_ + func_lookup_cnt_));
return (idx < func_lookup_cnt_) ? (idx + doc_id_lookup_cnt_ + main_lookup_cnt_) : -1;
}
int64_t get_doc_id_lookup_scan_idx() const
{
OB_ASSERT(children_cnt_ == (main_lookup_cnt_ + doc_id_lookup_cnt_ + func_lookup_cnt_));
const int64_t ret_idx = has_doc_id_lookup() ? (main_lookup_cnt_) : -1;
return ret_idx;
}
int64_t get_main_lookup_scan_idx() const
{
OB_ASSERT(children_cnt_ == (main_lookup_cnt_ + doc_id_lookup_cnt_ + func_lookup_cnt_));
const int64_t ret_idx = has_main_table_lookup() ? 0 : -1;
return ret_idx;
}
const ObDASBaseCtDef *get_func_lookup_scan_ctdef(const int64_t idx) const
{
const ObDASBaseCtDef *ctdef = nullptr;
const int64_t children_idx = get_func_lookup_scan_idx(idx);
if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) {
ctdef = children_[children_idx];
}
return ctdef;
}
const ObDASBaseCtDef *get_doc_id_lookup_scan_ctdef() const
{
ObDASBaseCtDef *doc_id_lookup_scan_ctdef = nullptr;
const int64_t children_idx = get_doc_id_lookup_scan_idx();
if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) {
doc_id_lookup_scan_ctdef = children_[children_idx];
}
return doc_id_lookup_scan_ctdef;
}
const ObDASBaseCtDef *get_main_lookup_scan_ctdef() const
{
ObDASBaseCtDef *main_lookup_ctdef = nullptr;
const int64_t children_idx = get_main_lookup_scan_idx();
if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) {
main_lookup_ctdef = children_[children_idx];
}
return main_lookup_ctdef;
}
int64_t main_lookup_cnt_;
int64_t doc_id_lookup_cnt_;
int64_t func_lookup_cnt_;
ObExpr *lookup_doc_id_expr_;
};
struct ObDASFuncLookupRtDef : ObDASAttachRtDef
{
OB_UNIS_VERSION(1);
public:
ObDASFuncLookupRtDef()
: ObDASAttachRtDef(DAS_OP_FUNC_LOOKUP) {}
virtual ~ObDASFuncLookupRtDef() {}
int64_t get_func_lookup_count() const
{
return static_cast<const ObDASFuncLookupCtDef *>(ctdef_)->func_lookup_cnt_;
}
ObDASBaseRtDef *get_func_lookup_scan_rtdef(const int64_t idx) const
{
const ObDASFuncLookupCtDef *ctdef = static_cast<const ObDASFuncLookupCtDef *>(ctdef_);
ObDASBaseRtDef *rtdef = nullptr;
const int64_t children_idx = ctdef->get_func_lookup_scan_idx(idx);
if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) {
rtdef = children_[children_idx];
}
return rtdef;
}
ObDASBaseRtDef *get_doc_id_lookup_scan_rtdef() const
{
const ObDASFuncLookupCtDef *ctdef = static_cast<const ObDASFuncLookupCtDef *>(ctdef_);
ObDASBaseRtDef *rtdef = nullptr;
const int64_t children_idx = ctdef->get_doc_id_lookup_scan_idx();
if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) {
rtdef = children_[children_idx];
}
return rtdef;
}
ObDASBaseRtDef *get_main_lookup_scan_rtdef() const
{
const ObDASFuncLookupCtDef *ctdef = static_cast<const ObDASFuncLookupCtDef *>(ctdef_);
ObDASBaseRtDef *rtdef = nullptr;
const int64_t children_idx = ctdef->get_main_lookup_scan_idx();
if (children_idx >= 0 && children_idx < children_cnt_ && nullptr != children_) {
rtdef = children_[children_idx];
}
return rtdef;
}
};
} // namespace sql
} // namespace oceanbase

View File

@ -380,7 +380,10 @@ ObDASIterTreeType ObDASScanOp::get_iter_tree_type() const
bool is_spatial_index = scan_param_.table_param_->is_spatial_index();
bool is_multivalue_index = scan_param_.table_param_->is_multivalue_index();
bool is_vector_index = scan_param_.table_param_->is_vec_index();
if (is_fts_index) {
if (is_func_lookup(attach_ctdef_)) {
tree_type = ObDASIterTreeType::ITER_TREE_FUNC_LOOKUP;
} else if (is_fts_index) {
tree_type = ObDASIterTreeType::ITER_TREE_TEXT_RETRIEVAL;
} else if (is_spatial_index) {
tree_type = ObDASIterTreeType::ITER_TREE_GIS_LOOKUP;
@ -415,6 +418,18 @@ bool ObDASScanOp::is_index_merge(const ObDASBaseCtDef *attach_ctdef) const
return bret;
}
bool ObDASScanOp::is_func_lookup(const ObDASBaseCtDef *attach_ctdef) const
{
bool bret = false;
if (nullptr != attach_ctdef && attach_ctdef->op_type_ == ObDASOpType::DAS_OP_INDEX_PROJ_LOOKUP) {
const ObDASBaseCtDef *lookup_ctdef = static_cast<const ObDASIndexProjLookupCtDef *>(attach_ctdef)->get_lookup_ctdef();
if (OB_NOT_NULL(lookup_ctdef)) {
bret = lookup_ctdef->op_type_ == ObDASOpType::DAS_OP_FUNC_LOOKUP;
}
}
return bret;
}
int ObDASScanOp::init_related_tablet_ids(ObDASRelatedTabletID &related_tablet_ids)
{
int ret = OB_SUCCESS;
@ -428,12 +443,14 @@ int ObDASScanOp::init_related_tablet_ids(ObDASRelatedTabletID &related_tablet_id
LOG_WARN("fail to get rowkey vid tablet id", K(ret));
} else if (!scan_param_.table_param_->is_spatial_index() && OB_FAIL(get_aux_lookup_tablet_id(related_tablet_ids.aux_lookup_tablet_id_))) {
LOG_WARN("failed to get aux lookup tablet id", K(ret));
} else if (OB_FAIL(get_text_ir_tablet_ids(related_tablet_ids.inv_idx_tablet_id_,
related_tablet_ids.fwd_idx_tablet_id_,
related_tablet_ids.doc_id_idx_tablet_id_))) {
} else if (OB_FAIL(get_base_text_ir_tablet_ids(related_tablet_ids.inv_idx_tablet_id_,
related_tablet_ids.fwd_idx_tablet_id_,
related_tablet_ids.doc_id_idx_tablet_id_))) {
LOG_WARN("failed to get text ir tablet ids", K(ret));
} else if (OB_FAIL(get_index_merge_tablet_ids(related_tablet_ids.index_merge_tablet_ids_))) {
LOG_WARN("failed to get index merge tablet ids", K(ret));
} else if (OB_FAIL(get_func_lookup_tablet_ids(related_tablet_ids))) {
LOG_WARN("failed to get func lookup tablet ids", K(ret));
}
return ret;
}
@ -1079,6 +1096,14 @@ int ObDASScanOp::reuse_iter()
}
break;
}
case ITER_TREE_FUNC_LOOKUP: {
ObDASIter *result_iter = static_cast<ObDASIter *>(result_);
if (OB_FAIL(ObDASIterUtils::set_func_lookup_iter_related_ids(
attach_ctdef_, tablet_ids_, ls_id_, -1, result_iter))) {
LOG_WARN("failed to set text retrieval related ids", K(ret));
}
break;
}
case ITER_TREE_MVI_LOOKUP: {
if (OB_NOT_NULL(get_lookup_ctdef())) {
ObDASLocalLookupIter *lookup_iter = static_cast<ObDASLocalLookupIter*>(result_);
@ -1398,7 +1423,7 @@ int ObDASScanOp::get_vec_ir_tablet_ids(
return ret;
}
int ObDASScanOp::get_text_ir_tablet_ids(
int ObDASScanOp::get_base_text_ir_tablet_ids(
common::ObTabletID &inv_idx_tablet_id,
common::ObTabletID &fwd_idx_tablet_id,
common::ObTabletID &doc_id_idx_tablet_id)
@ -1410,30 +1435,130 @@ int ObDASScanOp::get_text_ir_tablet_ids(
if (OB_UNLIKELY(related_ctdefs_.count() != related_tablet_ids_.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected related scan array not match", K(ret), K_(related_ctdefs), K_(related_tablet_ids));
} else if (nullptr == attach_ctdef_ || ObDASOpType::DAS_OP_FUNC_LOOKUP == attach_ctdef_->op_type_) {
// do nothing
} else {
for (int64_t i= 0; OB_SUCC(ret) && i < related_ctdefs_.count(); ++i) {
const ObDASScanCtDef *ctdef = static_cast<const ObDASScanCtDef *>(related_ctdefs_.at(i));
switch (ctdef->ir_scan_type_) {
case ObTSCIRScanType::OB_NOT_A_SPEC_SCAN: {
break;
}
case ObTSCIRScanType::OB_IR_INV_IDX_SCAN:
case ObTSCIRScanType::OB_IR_INV_IDX_AGG: {
inv_idx_tablet_id = related_tablet_ids_.at(i);
break;
}
case ObTSCIRScanType::OB_IR_DOC_ID_IDX_AGG: {
doc_id_idx_tablet_id = related_tablet_ids_.at(i);
break;
}
case ObTSCIRScanType::OB_IR_FWD_IDX_AGG: {
fwd_idx_tablet_id = related_tablet_ids_.at(i);
break;
}
default: {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpeted ir scan type", K(ret), KPC(ctdef));
}
}
}
}
for (int64_t i= 0; OB_SUCC(ret) && i < related_ctdefs_.count(); ++i) {
const ObDASScanCtDef *ctdef = static_cast<const ObDASScanCtDef *>(related_ctdefs_.at(i));
switch (ctdef->ir_scan_type_) {
case ObTSCIRScanType::OB_NOT_A_SPEC_SCAN: {
break;
}
case ObTSCIRScanType::OB_IR_INV_IDX_SCAN:
case ObTSCIRScanType::OB_IR_INV_IDX_AGG: {
inv_idx_tablet_id = related_tablet_ids_.at(i);
break;
}
case ObTSCIRScanType::OB_IR_DOC_ID_IDX_AGG: {
doc_id_idx_tablet_id = related_tablet_ids_.at(i);
break;
}
case ObTSCIRScanType::OB_IR_FWD_IDX_AGG: {
fwd_idx_tablet_id = related_tablet_ids_.at(i);
break;
}
default: {
return ret;
}
int ObDASScanOp::get_func_lookup_tablet_ids(ObDASRelatedTabletID &related_tablet_ids)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(related_ctdefs_.count() != related_tablet_ids_.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected related scan array not match", K(ret), K_(related_ctdefs), K_(related_tablet_ids));
} else if (nullptr == attach_ctdef_
|| ObDASOpType::DAS_OP_INDEX_PROJ_LOOKUP != attach_ctdef_->op_type_
|| static_cast<const ObDASIndexProjLookupCtDef *>(attach_ctdef_)->get_lookup_ctdef()->op_type_ != ObDASOpType::DAS_OP_FUNC_LOOKUP) {
// do nothing
} else {
related_tablet_ids.reset();
const ObDASIndexProjLookupCtDef *root_lookup_ctdef = static_cast<const ObDASIndexProjLookupCtDef *>(attach_ctdef_);
ObDASIndexProjLookupRtDef *root_lookup_rtdef = static_cast<ObDASIndexProjLookupRtDef *>(attach_rtdef_);
const ObDASFuncLookupCtDef *func_lookup_ctdef = static_cast<const ObDASFuncLookupCtDef*>(root_lookup_ctdef->get_lookup_ctdef());
ObDASFuncLookupRtDef *func_lookup_rtdef = static_cast<ObDASFuncLookupRtDef*>(root_lookup_rtdef->get_lookup_rtdef());
const ObDASScanCtDef *rowkey_ctdef = static_cast<const ObDASScanCtDef*>(root_lookup_ctdef->get_rowkey_scan_ctdef());
const ObDASScanCtDef *scan_ctdef = nullptr;
if (ObDASOpType::DAS_OP_IR_AUX_LOOKUP == rowkey_ctdef->op_type_) {
const ObDASIRAuxLookupCtDef *aux_lookup_ctdef = static_cast<const ObDASIRAuxLookupCtDef *>(root_lookup_ctdef->get_rowkey_scan_ctdef());
ObDASIRAuxLookupRtDef *aux_lookup_rtdef = static_cast<ObDASIRAuxLookupRtDef *>(root_lookup_rtdef->get_rowkey_scan_rtdef());
const ObDASIRScanCtDef * ir_scan_ctdef = nullptr;
ObDASIRScanRtDef * ir_scan_rtdef = nullptr;
if (OB_FAIL(ObDASUtils::find_target_das_def(
aux_lookup_ctdef,
aux_lookup_rtdef,
ObDASOpType::DAS_OP_IR_SCAN,
ir_scan_ctdef,
ir_scan_rtdef))) {
LOG_WARN("fail to find ir scan definition", K(ret));
} else {
int exit_flag = 0;
int flag_size = nullptr == ir_scan_ctdef->get_fwd_idx_agg_ctdef() ? 4 : 5;
for (int i = 0; exit_flag < flag_size && i < related_ctdefs_.count(); ++i) {
if (aux_lookup_ctdef->get_lookup_scan_ctdef() == related_ctdefs_.at(i)) {
related_tablet_ids.aux_lookup_tablet_id_ = related_tablet_ids_.at(i);
exit_flag++;
} else if (ir_scan_ctdef->get_inv_idx_agg_ctdef() == related_ctdefs_.at(i) || ir_scan_ctdef->get_inv_idx_scan_ctdef() == related_ctdefs_.at(i)) {
related_tablet_ids.inv_idx_tablet_id_ = related_tablet_ids_.at(i);
exit_flag++;
} else if (ir_scan_ctdef->get_doc_id_idx_agg_ctdef() == related_ctdefs_.at(i)) {
related_tablet_ids.doc_id_idx_tablet_id_ = related_tablet_ids_.at(i);
exit_flag++;
} else if (ir_scan_ctdef->get_fwd_idx_agg_ctdef() == related_ctdefs_.at(i)) {
related_tablet_ids.fwd_idx_tablet_id_ = related_tablet_ids_.at(i);
exit_flag++;
}
}
}
} else if (ObDASOpType::DAS_OP_SORT == rowkey_ctdef->op_type_ && FALSE_IT(scan_ctdef = static_cast<const ObDASScanCtDef *>(rowkey_ctdef->children_[0]))) {
} else if (ObDASOpType::DAS_OP_TABLE_SCAN == rowkey_ctdef->op_type_ &&
FALSE_IT(scan_ctdef = static_cast<const ObDASScanCtDef *>(root_lookup_ctdef->get_rowkey_scan_ctdef()))) {
} else if (ObDASOpType::DAS_OP_SORT == rowkey_ctdef->op_type_ || ObDASOpType::DAS_OP_TABLE_SCAN == rowkey_ctdef->op_type_) {
// do nothing
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpeted ir scan type", K(ret), KPC(ctdef));
LOG_WARN("unexpected rowkey scan type", K(ret), KPC(rowkey_ctdef));
}
if (OB_FAIL(ret)) {
} else {
const ObDASScanCtDef *rowkey_docid_ctdef = static_cast<const ObDASScanCtDef*>(func_lookup_ctdef->get_doc_id_lookup_scan_ctdef());
const ObDASScanCtDef *main_lookup_ctdef = static_cast<const ObDASScanCtDef *>(func_lookup_ctdef->get_main_lookup_scan_ctdef());
for (int i = 0; i < related_ctdefs_.count(); ++i) {
if (rowkey_docid_ctdef == related_ctdefs_.at(i)) {
related_tablet_ids.rowkey_doc_tablet_id_ = related_tablet_ids_.at(i);
} else if (nullptr != main_lookup_ctdef && main_lookup_ctdef == related_ctdefs_.at(i)) {
related_tablet_ids.lookup_tablet_id_ = related_tablet_ids_.at(i);
}
}
for (int j = 0; j < func_lookup_ctdef->func_lookup_cnt_; ++j) {
const ObDASIRScanCtDef *tr_merger_ctdef = static_cast<const ObDASIRScanCtDef *>(func_lookup_ctdef->get_func_lookup_scan_ctdef(j));
int exit_flag = 0;
ObDASFTSTabletID fts_tablet_id;
int flag_size = nullptr == tr_merger_ctdef->get_fwd_idx_agg_ctdef() ? 3 : 4;
for (int i = 0; exit_flag < flag_size && i < related_ctdefs_.count(); ++i) {
if (tr_merger_ctdef->get_inv_idx_agg_ctdef() == related_ctdefs_.at(i) || tr_merger_ctdef->get_inv_idx_scan_ctdef() == related_ctdefs_.at(i)) {
fts_tablet_id.inv_idx_tablet_id_ = related_tablet_ids_.at(i);
exit_flag++;
} else if (tr_merger_ctdef->get_doc_id_idx_agg_ctdef() == related_ctdefs_.at(i)) {
fts_tablet_id.doc_id_idx_tablet_id_ = related_tablet_ids_.at(i);
exit_flag++;
} else if (tr_merger_ctdef->get_fwd_idx_agg_ctdef() == related_ctdefs_.at(i)) {
fts_tablet_id.fwd_idx_tablet_id_ = related_tablet_ids_.at(i);
exit_flag++;
}
}
if (OB_FAIL(related_tablet_ids.fts_tablet_ids_.push_back(fts_tablet_id))) {
LOG_WARN("failed to push fts_tablet_id", K(ret));
}
}
}
}
return ret;

View File

@ -337,7 +337,7 @@ public:
bool is_contain_trans_info() {return NULL != scan_ctdef_->trans_info_expr_; }
int do_table_scan();
int do_domain_index_lookup();
int get_text_ir_tablet_ids(
int get_base_text_ir_tablet_ids(
common::ObTabletID &inv_idx_tablet_id,
common::ObTabletID &fwd_idx_tablet_id,
common::ObTabletID &doc_id_idx_tablet_id);
@ -348,6 +348,7 @@ public:
common::ObTabletID &snapshot_tid,
common::ObTabletID &com_aux_vec_tid);
int get_index_merge_tablet_ids(common::ObIArray<common::ObTabletID> &index_merge_tablet_ids);
int get_func_lookup_tablet_ids(ObDASRelatedTabletID &related_tablet_ids);
bool enable_rich_format() const { return scan_rtdef_->enable_rich_format(); }
INHERIT_TO_STRING_KV("parent", ObIDASTaskOp,
KPC_(scan_ctdef),
@ -362,6 +363,7 @@ protected:
common::ObNewRowIterator *get_output_result_iter() { return result_; }
ObDASIterTreeType get_iter_tree_type() const;
bool is_index_merge(const ObDASBaseCtDef *attach_ctdef) const;
bool is_func_lookup(const ObDASBaseCtDef *attach_ctdef) const;
public:
ObSEArray<ObDatum *, 4> trans_info_array_;
protected:

View File

@ -266,6 +266,15 @@ ObDASScanCtDef *ObTableScanCtDef::get_lookup_ctdef()
OB_ASSERT(2 == vid_merge_ctdef->children_cnt_ && vid_merge_ctdef->children_ != nullptr);
lookup_ctdef = static_cast<ObDASScanCtDef*>(vid_merge_ctdef->children_[0]);
}
} else if (DAS_OP_INDEX_PROJ_LOOKUP == attach_ctdef->op_type_) {
OB_ASSERT(2 == attach_ctdef->children_cnt_ && attach_ctdef->children_ != nullptr);
if (DAS_OP_FUNC_LOOKUP == attach_ctdef->children_[1]->op_type_) {
ObDASFuncLookupCtDef *func_lookup_ctdef = static_cast<ObDASFuncLookupCtDef *>(attach_ctdef->children_[1]);
if (func_lookup_ctdef->has_main_table_lookup()) {
const int64_t lookup_child_idx = func_lookup_ctdef->get_main_lookup_scan_idx();
lookup_ctdef = static_cast<ObDASScanCtDef *>(func_lookup_ctdef->children_[lookup_child_idx]);
}
}
}
return lookup_ctdef;
}
@ -299,6 +308,15 @@ const ObDASScanCtDef *ObTableScanCtDef::get_lookup_ctdef() const
OB_ASSERT(2 == vid_merge_ctdef->children_cnt_ && vid_merge_ctdef->children_ != nullptr);
lookup_ctdef = static_cast<ObDASScanCtDef*>(vid_merge_ctdef->children_[0]);
}
} else if (DAS_OP_INDEX_PROJ_LOOKUP == attach_ctdef->op_type_) {
OB_ASSERT(2 == attach_ctdef->children_cnt_ && attach_ctdef->children_ != nullptr);
if (DAS_OP_FUNC_LOOKUP == attach_ctdef->children_[1]->op_type_) {
ObDASFuncLookupCtDef *func_lookup_ctdef = static_cast<ObDASFuncLookupCtDef *>(attach_ctdef->children_[1]);
if (func_lookup_ctdef->has_main_table_lookup()) {
const int64_t lookup_child_idx = func_lookup_ctdef->get_main_lookup_scan_idx();
lookup_ctdef = static_cast<ObDASScanCtDef *>(func_lookup_ctdef->children_[lookup_child_idx]);
}
}
}
return lookup_ctdef;
}
@ -3887,41 +3905,40 @@ int ObTableScanOp::inner_get_next_fts_index_row()
int ObTableScanOp::fetch_next_fts_index_rows()
{
int ret = OB_SUCCESS;
ObExpr *ft_expr = nullptr;
ObExpr *doc_id_expr = nullptr;
ObDatum *ft_datum = nullptr;
ObDatum *doc_id_datum = nullptr;
if (OB_FAIL(ObTableScanOp::inner_get_next_row_implement())) {
if (OB_ITER_END != ret) {
LOG_WARN("fail to get next row implement", K(ret));
}
} else if (OB_FAIL(get_output_fts_col_expr_by_type(T_FUN_SYS_DOC_ID, doc_id_expr))) {
LOG_WARN("fail to get doc id column expr from output", K(ret));
} else if (OB_FAIL(get_output_fts_col_expr_by_type(T_FUN_SYS_WORD_SEGMENT, ft_expr))) {
LOG_WARN("fail to get word segment column expr from output", K(ret));
} else if (OB_ISNULL(ft_expr) || OB_ISNULL(doc_id_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpeted error, ft or doc id expr is nullptr", K(ret), KP(ft_expr), KP(doc_id_expr));
} else if (OB_FAIL(ft_expr->eval(eval_ctx_, ft_datum))) {
LOG_WARN("fail to evaluate fulltext expr", K(ret));
} else if (OB_FAIL(doc_id_expr->eval(eval_ctx_, doc_id_datum))) {
LOG_WARN("fail to evaluate doc id expr", K(ret));
} else if (OB_ISNULL(ft_datum) || OB_ISNULL(doc_id_datum)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpeted error, ft or doc id datum is nullptr", K(ret), KP(ft_datum), KP(doc_id_datum));
} else {
const int64_t part_count = get_part_dep_col_cnt();
const int64_t word_col_idx = (MY_SPEC.is_fts_index_aux_ ? 0 : 1) + part_count;
const int64_t doc_id_col_idx = (MY_SPEC.is_fts_index_aux_ ? 1 : 0) + part_count;
ObExpr *ft_expr = MY_SPEC.output_.at(word_col_idx);
ObExpr *doc_id_expr = MY_SPEC.output_.at(doc_id_col_idx);
ObDatum *ft_datum = nullptr;
ObDatum *doc_id_datum = nullptr;
if (OB_ISNULL(ft_expr) || OB_ISNULL(doc_id_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpeted error, ft or doc id expr is nullptr", K(ret), KP(ft_expr), KP(doc_id_expr));
} else if (OB_FAIL(ft_expr->eval(eval_ctx_, ft_datum))) {
LOG_WARN("fail to evaluate fulltext expr", K(ret));
} else if (OB_FAIL(doc_id_expr->eval(eval_ctx_, doc_id_datum))) {
LOG_WARN("fail to evaluate doc id expr", K(ret));
} else if (OB_ISNULL(ft_datum) || OB_ISNULL(doc_id_datum)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpeted error, ft or doc id datum is nullptr", K(ret), KP(ft_datum), KP(doc_id_datum));
} else {
ObString ft = ft_datum->get_string();
const ObString &doc_id = doc_id_datum->get_string();
ObArenaAllocator tmp_allocator(ObModIds::OB_LOB_ACCESS_BUFFER, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID());
if (OB_FAIL(ObTextStringHelper::read_real_string_data(tmp_allocator, *ft_datum, ft_expr->datum_meta_,
ft_expr->obj_meta_.has_lob_header(), ft))) {
LOG_WARN("fail to read real string data", K(ret));
} else if (OB_UNLIKELY(doc_id.length() != sizeof(ObDocId)) || OB_ISNULL(doc_id.ptr())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid binary document id", K(ret), K(doc_id));
} else if (OB_FAIL(fts_index_.segment(ft_expr->obj_meta_, doc_id, ft))) {
LOG_WARN("fail to segment fulltext", K(ret), K(doc_id), K(ft));
}
ObString ft = ft_datum->get_string();
const ObString &doc_id = doc_id_datum->get_string();
ObArenaAllocator tmp_allocator(ObModIds::OB_LOB_ACCESS_BUFFER, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID());
if (OB_FAIL(ObTextStringHelper::read_real_string_data(tmp_allocator, *ft_datum, ft_expr->datum_meta_,
ft_expr->obj_meta_.has_lob_header(), ft))) {
LOG_WARN("fail to read real string data", K(ret));
} else if (OB_UNLIKELY(doc_id.length() != sizeof(ObDocId)) || OB_ISNULL(doc_id.ptr())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid binary document id", K(ret), K(doc_id));
} else if (OB_FAIL(fts_index_.segment(ft_expr->obj_meta_, doc_id, ft))) {
LOG_WARN("fail to segment fulltext", K(ret), K(doc_id), K(ft));
}
}
return ret;
@ -3931,6 +3948,8 @@ int ObTableScanOp::fill_generated_fts_cols(blocksstable::ObDatumRow *row)
{
int ret = OB_SUCCESS;
const int64_t part_count = get_part_dep_col_cnt();
const ObObjDatumMapType *types = MY_SPEC.is_fts_index_aux_ ? ObFTIndexRowCache::FTS_INDEX_TYPES : ObFTIndexRowCache::FTS_DOC_WORD_TYPES;
const ObExprOperatorType *expr_types = MY_SPEC.is_fts_index_aux_ ? ObFTIndexRowCache::FTS_INDEX_EXPR_TYPE : ObFTIndexRowCache::FTS_DOC_WORD_EXPR_TYPE;
if (OB_ISNULL(row)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument, row is nullptr", K(ret), KP(row));
@ -3939,16 +3958,19 @@ int ObTableScanOp::fill_generated_fts_cols(blocksstable::ObDatumRow *row)
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected output column count", K(ret), K(MY_SPEC.output_), KPC(row), K(part_count));
} else {
ObObjDatumMapType *types =MY_SPEC.is_fts_index_aux_ ? ObFTIndexRowCache::FTS_INDEX_TYPES : ObFTIndexRowCache::FTS_DOC_WORD_TYPES;
for (int64_t i = part_count; OB_SUCC(ret) && i < share::ObFtsIndexBuilderUtil::OB_FTS_INDEX_OR_DOC_WORD_TABLE_COL_CNT + part_count; ++i) {
ObExpr *expr = MY_SPEC.output_.at(i);
ObDatum &datum = expr->locate_datum_for_write(eval_ctx_);
ObEvalInfo &eval_info = expr->get_eval_info(eval_ctx_);
if (OB_FAIL(datum.from_storage_datum(row->storage_datums_[i - part_count], types[i - part_count]))) {
LOG_WARN("fail to fill fulltext index row", K(ret), K(i), K(MY_SPEC.output_), KPC(row));
for (int64_t i = 0; OB_SUCC(ret) && i < share::ObFtsIndexBuilderUtil::OB_FTS_INDEX_OR_DOC_WORD_TABLE_COL_CNT; ++i) {
ObExpr *expr = nullptr;
if (OB_FAIL(get_output_fts_col_expr_by_type(expr_types[i], expr))) {
LOG_WARN("fail to get fts column expr", K(ret), K(i), K(expr_types[i]));
} else {
eval_info.evaluated_ = true;
eval_info.projected_ = true;
ObDatum &datum = expr->locate_datum_for_write(eval_ctx_);
ObEvalInfo &eval_info = expr->get_eval_info(eval_ctx_);
if (OB_FAIL(datum.from_storage_datum(row->storage_datums_[i], types[i]))) {
LOG_WARN("fail to fill fulltext index row", K(ret), K(i), K(MY_SPEC.output_), KPC(row));
} else {
eval_info.evaluated_ = true;
eval_info.projected_ = true;
}
}
}
}
@ -3976,5 +3998,51 @@ int64_t ObTableScanOp::get_part_dep_col_cnt()
return part_dep_col_cnt;
}
int ObTableScanOp::get_output_fts_col_expr_by_type(
const ObExprOperatorType &type,
ObExpr *&expr)
{
int ret = OB_SUCCESS;
expr = nullptr;
if (OB_UNLIKELY(T_FUN_SYS_WORD_SEGMENT != type
&& T_FUN_SYS_DOC_ID != type
&& T_FUN_SYS_WORD_COUNT != type
&& T_FUN_SYS_DOC_LENGTH != type)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid fts column expr type", K(ret), "type", get_type_name(type));
} else if (T_FUN_SYS_DOC_ID == type) {
for (int64_t i = 0; OB_SUCC(ret) && OB_ISNULL(expr) && i < MY_SPEC.output_.count(); ++i) {
ObExpr *tmp_expr = MY_SPEC.output_.at(i);
if (OB_ISNULL(tmp_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, expr in output is nullptr", K(ret), K(i));
} else if (T_FUN_SYS_WORD_SEGMENT == tmp_expr->type_) {
const int64_t idx = MY_SPEC.is_fts_index_aux_ ? i+1 : i-1;
if (OB_UNLIKELY(idx < 0 || idx >= MY_SPEC.output_.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, invalid doc id idx", K(ret), K(idx), K(i), K(MY_SPEC.output_));
} else {
expr = MY_SPEC.output_.at(idx);
}
}
}
} else {
for (int64_t i = 0; OB_SUCC(ret) && OB_ISNULL(expr) && i < MY_SPEC.output_.count(); ++i) {
ObExpr *tmp_expr = MY_SPEC.output_.at(i);
if (OB_ISNULL(tmp_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, expr in output is nullptr", K(ret), K(i));
} else if (type == tmp_expr->type_) {
expr = tmp_expr;
}
}
}
if (OB_SUCC(ret) && OB_ISNULL(expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, fts column expr isn't found", K(ret), "type", get_type_name(type), K(MY_SPEC.output_));
}
return ret;
}
} // end namespace sql
} // end namespace oceanbase

View File

@ -711,6 +711,7 @@ private:
int inner_get_next_fts_index_row();
int fetch_next_fts_index_rows();
int fill_generated_fts_cols(ObDatumRow *row);
int get_output_fts_col_expr_by_type(const ObExprOperatorType &type, ObExpr *&expr);
int64_t get_part_dep_col_cnt();
protected:
DASOpResultIter scan_result_;

View File

@ -786,7 +786,10 @@ int ObAccessPathEstimation::add_storage_estimation_task(ObOptimizerContext &ctx,
index_partitions,
chosen_partitions))) {
LOG_WARN("failed to choose partitions", K(ret));
} else if (OB_FAIL(choose_storage_estimation_ranges(range_limit, ap, chosen_scan_ranges))) {
} else if (OB_FAIL(choose_storage_estimation_ranges(range_limit,
ap.est_cost_info_.ranges_,
ap.est_cost_info_.index_meta_info_.is_geo_index_,
chosen_scan_ranges))) {
LOG_WARN("failed to choose scan ranges", K(ret));
} else {
result_helper.est_scan_range_count_ = chosen_scan_ranges.count();
@ -846,7 +849,10 @@ int ObAccessPathEstimation::add_storage_estimation_task_by_ranges(ObOptimizerCon
OB_UNLIKELY(ori_partitions.empty())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected param", K(ret), K(table_meta), K(ori_partitions), K(index_partitions));
} else if (OB_FAIL(choose_storage_estimation_ranges(range_limit, ap, chosen_scan_ranges))) {
} else if (OB_FAIL(choose_storage_estimation_ranges(range_limit,
ap.est_cost_info_.ranges_,
ap.est_cost_info_.index_meta_info_.is_geo_index_,
chosen_scan_ranges))) {
LOG_WARN("failed to choose scan ranges", K(ret));
} else if (OB_FAIL(result_helper.range_result_.prepare_allocate(chosen_scan_ranges.count()))) {
LOG_WARN("failed to prepare allocate", K(ret));
@ -1315,16 +1321,17 @@ int ObAccessPathEstimation::choose_storage_estimation_partitions(const int64_t p
}
int ObAccessPathEstimation::choose_storage_estimation_ranges(const int64_t range_limit,
AccessPath &ap,
const ObRangesArray &ranges,
bool is_geo_index,
ObIArray<common::ObNewRange> &scan_ranges)
{
int ret = OB_SUCCESS;
ObSEArray<common::ObNewRange, 4> get_ranges;
ObSEArray<common::ObNewRange, 4> valid_ranges;
if (ap.est_cost_info_.ranges_.empty()) {
if (ranges.empty()) {
// do nothing
} else if (ap.est_cost_info_.index_meta_info_.is_geo_index_) {
ObIArray<common::ObNewRange> &geo_ranges = ap.est_cost_info_.ranges_;
} else if (is_geo_index) {
const ObIArray<common::ObNewRange> &geo_ranges = ranges;
int64_t total_cnt = geo_ranges.count();
if (geo_ranges.at(0).get_start_key().get_obj_cnt() < SPATIAL_ROWKEY_MIN_NUM) {
ret = OB_ERR_UNEXPECTED;
@ -1364,7 +1371,7 @@ int ObAccessPathEstimation::choose_storage_estimation_ranges(const int64_t range
}
} else {
if (OB_FAIL(ObOptimizerUtil::classify_get_scan_ranges(
ap.est_cost_info_.ranges_,
ranges,
get_ranges,
scan_ranges))) {
LOG_WARN("failed to clasiffy get scan ranges", K(ret));
@ -1829,7 +1836,7 @@ int ObAccessPathEstimation::estimate_full_table_rowcount(ObOptimizerContext &ctx
LOG_TRACE("succeed to storage estimate full table rowcount", K(meta));
}
} else if (part_loc_info_array.count() > 1 && partition_limit >= 0) {
if (OB_FAIL(storage_estimate_full_table_rowcount(ctx, part_loc_info_array, meta))) {
if (OB_FAIL(storage_estimate_range_rowcount(ctx, part_loc_info_array, true, NULL, meta))) {
LOG_WARN("failed to storage estimate full table rowcount", K(ret));
} else {
LOG_TRACE("succeed to storage estimate full table rowcount", K(meta));
@ -1945,19 +1952,24 @@ int ObAccessPathEstimation::storage_estimate_full_table_rowcount(ObOptimizerCont
return ret;
}
int ObAccessPathEstimation::storage_estimate_full_table_rowcount(ObOptimizerContext &ctx,
const ObCandiTabletLocIArray &part_loc_infos,
ObTableMetaInfo &meta)
int ObAccessPathEstimation::storage_estimate_range_rowcount(ObOptimizerContext &ctx,
const ObCandiTabletLocIArray &part_loc_infos,
bool estimate_whole_range,
const ObRangesArray *ranges,
ObTableMetaInfo &meta)
{
int ret = OB_SUCCESS;
ObArenaAllocator arena("CardEstimation");
ObArray<ObBatchEstTasks *> tasks;
ObArray<ObAddr> prefer_addrs;
ObCandiTabletLocSEArray chosen_partitions;
ObSEArray<ObNewRange, 4> chosen_scan_ranges;
ObRangesArray whole_range;
bool need_fallback = false;
int64_t partition_limit = 0;
int64_t range_limit = 0;
int64_t total_part_cnt = part_loc_infos.count();
if (OB_ISNULL(ctx.get_session_info())) {
if (OB_ISNULL(ctx.get_session_info()) || (!estimate_whole_range && OB_ISNULL(ranges))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret));
} else if ((is_virtual_table(meta.ref_table_id_) &&
@ -1969,12 +1981,48 @@ int ObAccessPathEstimation::storage_estimate_full_table_rowcount(ObOptimizerCont
share::SYS_VAR_PARTITION_INDEX_DIVE_LIMIT,
partition_limit))) {
LOG_WARN("failed to get hint system variable", K(ret));
} else if (OB_FAIL(ctx.get_global_hint().opt_params_.get_sys_var(ObOptParamHint::RANGE_INDEX_DIVE_LIMIT,
ctx.get_session_info(),
share::SYS_VAR_RANGE_INDEX_DIVE_LIMIT,
range_limit))) {
LOG_WARN("failed to get hint system variable", K(ret));
} else {
if (partition_limit < 0 && range_limit < 0) {
partition_limit = 1;
range_limit = ObOptEstCost::MAX_STORAGE_RANGE_ESTIMATION_NUM;
}
// make whole range if need
if (estimate_whole_range) {
ObNewRange *range = NULL;
if (OB_FAIL(ObSQLUtils::make_whole_range(arena,
meta.ref_table_id_,
meta.table_rowkey_count_,
range))) {
LOG_WARN("failed to make whole range", K(ret));
} else if (OB_ISNULL(range)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("null range", K(ret));
} else if (OB_FAIL(whole_range.push_back(*range))) {
LOG_WARN("failed to push back range", K(ret));
} else {
ranges = &whole_range;
}
}
}
if (OB_FAIL(ret)) {
} else if (OB_ISNULL(ranges)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ranges is null", K(ret));
} else if (OB_FAIL(choose_storage_estimation_ranges(range_limit, *ranges, false, chosen_scan_ranges))) {
LOG_WARN("failed to choose scan ranges", K(ret));
} else if (OB_FAIL(choose_storage_estimation_partitions(partition_limit,
part_loc_infos,
chosen_partitions))) {
LOG_WARN("failed to choose partitions", K(ret));
} else {
LOG_TRACE("choose partitions to estimate rowcount", K(chosen_partitions));
LOG_TRACE("choose ranges to estimate rowcount", K(chosen_scan_ranges));
}
for (int64_t i = 0; OB_SUCC(ret) && !need_fallback && i < chosen_partitions.count(); i ++) {
EstimatedPartition best_index_part;
@ -1990,7 +2038,6 @@ int ObAccessPathEstimation::storage_estimate_full_table_rowcount(ObOptimizerCont
LOG_WARN("failed to get task", K(ret));
} else if (NULL != task) {
obrpc::ObEstPartArgElement path_arg;
ObNewRange *range = NULL;
task->addr_ = best_index_part.addr_;
path_arg.scan_flag_.index_back_ = 0;
path_arg.index_id_ = meta.ref_table_id_;
@ -2000,14 +2047,8 @@ int ObAccessPathEstimation::storage_estimate_full_table_rowcount(ObOptimizerCont
path_arg.ls_id_ = best_index_part.ls_id_;
path_arg.tenant_id_ = ctx.get_session_info()->get_effective_tenant_id();
path_arg.tx_id_ = ctx.get_session_info()->get_tx_id();
if (OB_FAIL(ObSQLUtils::make_whole_range(arena,
meta.ref_table_id_,
meta.table_rowkey_count_,
range))) {
LOG_WARN("failed to make whole range", K(ret));
} else if (OB_ISNULL(path_arg.batch_.range_ = range)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to generate whole range", K(ret), K(range));
if (OB_FAIL(construct_scan_range_batch(ctx.get_allocator(), chosen_scan_ranges, path_arg.batch_))) {
LOG_WARN("failed to construct scan range batch", K(ret));
} else if (OB_FAIL(task->arg_.index_params_.push_back(path_arg))) {
LOG_WARN("failed to add primary key estimation arg", K(ret));
}

View File

@ -110,6 +110,11 @@ public:
uint64_t table_id,
uint64_t ref_table_id,
bool &can_use);
static int storage_estimate_range_rowcount(ObOptimizerContext &ctx,
const ObCandiTabletLocIArray &part_loc_infos,
bool estimate_whole_range,
const ObRangesArray *ranges,
ObTableMetaInfo &meta);
private:
static const int STORAGE_EST_SAMPLE_SEED = 1;
static int inner_estimate_rowcount(ObOptimizerContext &ctx,
@ -230,7 +235,8 @@ private:
const ObCandiTabletLocIArray &partitions,
ObCandiTabletLocIArray &chosen_partitions);
static int choose_storage_estimation_ranges(const int64_t range_limit,
AccessPath &ap,
const ObRangesArray &ranges,
bool is_geo_index,
ObIArray<common::ObNewRange> &scan_ranges);
static int process_dynamic_sampling_estimation(ObOptimizerContext &ctx,
@ -317,9 +323,6 @@ private:
static int storage_estimate_full_table_rowcount(ObOptimizerContext &ctx,
const ObCandiTabletLoc &part_loc_info,
ObTableMetaInfo &meta);
static int storage_estimate_full_table_rowcount(ObOptimizerContext &ctx,
const ObCandiTabletLocIArray &part_loc_infos,
ObTableMetaInfo &meta);
static int estimate_full_table_rowcount_by_meta_table(ObOptimizerContext &ctx,
const ObIArray<ObTabletID> &all_tablet_ids,

View File

@ -1793,8 +1793,8 @@ int ObDelUpdLogPlan::collect_related_local_index_ids(IndexDMLInfo &primary_dml_i
const ObTableSchema *index_schema = nullptr;
ObSchemaGetterGuard *schema_guard = nullptr;
const ObDelUpdStmt *stmt = get_stmt();
int64_t index_tid_array_size = OB_MAX_INDEX_PER_TABLE;
uint64_t index_tid_array[OB_MAX_INDEX_PER_TABLE];
int64_t index_tid_array_size = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
uint64_t index_tid_array[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
ObArray<uint64_t> base_column_ids;
const uint64_t tenant_id = optimizer_context_.get_session_info()->get_effective_tenant_id();
ObInsertLogPlan *insert_plan = dynamic_cast<ObInsertLogPlan*>(this);
@ -1956,8 +1956,8 @@ int ObDelUpdLogPlan::prepare_table_dml_info_basic(const ObDmlTableInfo& table_in
}
}
if (OB_SUCC(ret) && !has_tg) {
uint64_t index_tid[OB_MAX_INDEX_PER_TABLE];
int64_t index_cnt = OB_MAX_INDEX_PER_TABLE;
uint64_t index_tid[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
int64_t index_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
ObInsertLogPlan *insert_plan = dynamic_cast<ObInsertLogPlan*>(this);
if (NULL != insert_plan && get_optimizer_context().get_direct_load_optimizer_ctx().use_direct_load()) {
index_cnt = 0; // no need building index

View File

@ -121,10 +121,10 @@ int ObIndexInfoCache::add_index_info_entry(IndexInfoEntry *entry)
// update index info entry
old_entry->~IndexInfoEntry();
index_entrys_[idx] = entry;
} else if (entry_count_ >= common::OB_MAX_INDEX_PER_TABLE + 1) {
} else if (entry_count_ >= common::OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid entry count", K(ret), K_(entry_count),
K(common::OB_MAX_INDEX_PER_TABLE));
K(common::OB_MAX_AUX_TABLE_PER_MAIN_TABLE));
} else {
index_entrys_[entry_count_] = entry;
++entry_count_;

View File

@ -255,7 +255,7 @@ private:
uint64_t table_id_;
uint64_t base_table_id_;
int64_t entry_count_;
IndexInfoEntry *index_entrys_[common::OB_MAX_INDEX_PER_TABLE + 1]; //including table and index table
IndexInfoEntry *index_entrys_[common::OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1]; //including table and index table
DISALLOW_COPY_AND_ASSIGN(ObIndexInfoCache);
};

View File

@ -986,11 +986,13 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id,
ObExecContext *exec_ctx = NULL;
ObQueryRangeProvider *query_range_provider = NULL;
const share::schema::ObTableSchema *index_schema = NULL;
const share::schema::ObTableSchema *table_schema = NULL;
ObQueryRangeArray &ranges = range_info.get_ranges();
ObQueryRangeArray &ss_ranges = range_info.get_ss_ranges();
ObIArray<ColumnItem> &range_columns = range_info.get_range_columns();
bool is_geo_index = false;
bool is_multi_index = false;
bool is_fts_index = false;
bool is_domain_index = false;
ObWrapperAllocator wrap_allocator(*allocator_);
ColumnIdInfoMapAllocer map_alloc(OB_MALLOC_NORMAL_BLOCK_SIZE, wrap_allocator);
@ -1004,7 +1006,10 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id,
} else if (OB_FAIL(schema_guard->get_table_schema(index_id, index_schema,
ObSqlSchemaGuard::is_link_table(get_plan()->get_stmt(), table_id)))) {
LOG_WARN("fail to get table schema", K(index_id), K(ret));
} else if (OB_ISNULL(index_schema)) {
} else if (OB_FAIL(schema_guard->get_table_schema(base_table_id, table_schema,
ObSqlSchemaGuard::is_link_table(get_plan()->get_stmt(), table_id)))) {
LOG_WARN("fail to get table schema", K(index_id), K(ret));
} else if (OB_ISNULL(index_schema) || OB_ISNULL(table_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(index_schema), K(ret));
} else if (OB_FAIL(get_plan()->get_index_column_items(opt_ctx->get_expr_factory(),
@ -1018,10 +1023,10 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id,
domain_columnInfo_map))) {
LOG_WARN("failed to extract geometry schema info", K(ret), K(table_id), K(index_id));
} else if (FALSE_IT(is_multi_index = index_schema->is_multivalue_index())) {
} else if (FALSE_IT(is_fts_index = index_schema->is_fts_index_aux())) {
} else {
const ObSQLSessionInfo *session = opt_ctx->get_session_info();
const ObDataTypeCastParams dtc_params = ObBasicSessionInfo::create_dtc_params(session);
bool all_single_value_range = false;
int64_t equal_prefix_count = 0;
int64_t equal_prefix_null_count = 0;
@ -1029,7 +1034,7 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id,
bool contain_always_false = false;
bool has_exec_param = false;
int64_t out_index_prefix = -1;
bool is_domain_index = (is_geo_index || is_multi_index);
bool is_domain_index = (is_geo_index || is_multi_index || is_fts_index);
common::ObSEArray<ObRawExpr *, 4> agent_table_filter;
bool is_oracle_inner_index_table = share::is_oracle_mapping_real_virtual_table(index_schema->get_table_id());
@ -1060,6 +1065,15 @@ int ObJoinOrder::get_query_range_info(const uint64_t table_id,
agent_table_filter : helper.filters_,
query_range_provider))) {
LOG_WARN("failed to extract query range", K(ret), K(index_id));
} else if(is_fts_index && OB_FAIL(extract_fts_preliminary_query_range(range_columns,
is_oracle_inner_index_table
? agent_table_filter
: helper.filters_,
table_schema,
index_schema,
helper,
query_range_provider))) {
LOG_WARN("failed to extract query range", K(ret), K(index_id));
} else if (OB_ISNULL(query_range_provider)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(query_range_provider), K(ret));
@ -1733,7 +1747,9 @@ int ObJoinOrder::create_one_access_path(const uint64_t table_id,
ap->contain_das_op_ = ap->use_das_;
ap->is_ror_ = (ref_id == index_id) ? true
: range_info.get_equal_prefix_count() >= range_info.get_index_column_count();
if (OB_FAIL(init_sample_info_for_access_path(ap, table_id, table_item))) {
if (OB_FAIL(process_index_for_match_expr(table_id, ref_id, index_id, helper, *ap))) {
LOG_WARN("failed to process index for match expr", K(ret));
} else if (OB_FAIL(init_sample_info_for_access_path(ap, table_id, table_item))) {
LOG_WARN("failed to init sample info", K(ret));
} else if (OB_FAIL(add_access_filters(ap,
ordering_info.get_index_keys(),
@ -1775,6 +1791,7 @@ int ObJoinOrder::create_one_access_path(const uint64_t table_id,
if (OB_FAIL(fill_filters(ap->filter_,
ap->get_query_range_provider(),
ap->est_cost_info_,
ap->tr_idx_info_,
is_nl_with_extended_range,
ObSqlSchemaGuard::is_link_table(get_plan()->get_stmt(), table_id),
OptSkipScanState::SS_DISABLE != use_skip_scan))) {
@ -2336,14 +2353,19 @@ int ObJoinOrder::check_and_extract_query_range(const uint64_t table_id,
//do some quick check
bool expr_match = false; //some condition on index
contain_always_false = false;
bool is_multivlaue_idx = index_info_entry.is_multivalue_index();
if (is_multivlaue_idx &&
bool is_special_index = index_info_entry.is_index_geo() ||
index_info_entry.is_multivalue_index() ||
index_info_entry.is_fulltext_index();
if (index_info_entry.is_multivalue_index() &&
OB_FAIL(check_exprs_overlap_multivalue_index(table_id, index_table_id, restrict_infos, index_keys, expr_match))) {
LOG_WARN("get_range_columns failed", K(ret));
} else if (!is_multivlaue_idx && !index_info_entry.is_index_geo() && OB_FAIL(check_exprs_overlap_index(restrict_infos, index_keys, expr_match))) {
LOG_WARN("check quals match index error", K(restrict_infos), K(index_keys));
} else if (index_info_entry.is_index_geo() && OB_FAIL(check_exprs_overlap_gis_index(restrict_infos, index_keys, expr_match))) {
} else if (index_info_entry.is_index_geo() &&
OB_FAIL(check_exprs_overlap_gis_index(restrict_infos, index_keys, expr_match))) {
LOG_WARN("check quals match gis index error", K(restrict_infos), K(index_keys));
} else if (index_info_entry.is_fulltext_index() &&
OB_FALSE_IT(expr_match = index_info_entry.get_range_info().is_valid())) {
} else if (!is_special_index && OB_FAIL(check_exprs_overlap_index(restrict_infos, index_keys, expr_match))) {
LOG_WARN("check quals match index error", K(restrict_infos), K(index_keys));
} else if (expr_match) {
prefix_range_ids.reset();
const QueryRangeInfo *query_range_info = NULL;
@ -2598,7 +2620,7 @@ int ObJoinOrder::fill_index_info_entry(const uint64_t table_id,
entry->set_is_index_geo(is_index_geo);
entry->set_is_index_back(is_index_back);
entry->set_is_unique_index(is_unique_index);
entry->set_is_fulltext_index(index_schema->is_fts_index());
entry->set_is_fulltext_index(index_schema->is_fts_index_aux());
entry->set_is_multivalue_index(index_schema->is_multivalue_index_aux());
entry->set_is_vector_index(index_schema->is_vec_index());
entry->get_ordering_info().set_scan_direction(direction);
@ -2715,6 +2737,10 @@ int ObJoinOrder::create_access_paths(const uint64_t table_id,
} else if (OB_FAIL(get_generated_col_index_qual(table_id,
helper.filters_, helper))) {
LOG_WARN("get prefix index qual failed");
} else if (OB_FAIL(init_basic_text_retrieval_info(table_id,
ref_table_id,
helper))) {
LOG_WARN("failed to init basic text retrieval info", K(ret));
} else if (OB_FAIL(check_can_use_index_merge(table_id,
ref_table_id,
helper,
@ -2734,6 +2760,7 @@ int ObJoinOrder::create_access_paths(const uint64_t table_id,
}
} else if (OB_FAIL(get_valid_index_ids(table_id,
ref_table_id,
helper,
candi_index_ids))) {
LOG_WARN("failed to get valid index ids", K(ret));
} else if (OB_FAIL(fill_index_info_cache(table_id, ref_table_id,
@ -3210,6 +3237,7 @@ int ObJoinOrder::compute_table_rowcount_info()
int ObJoinOrder::get_valid_index_ids(const uint64_t table_id,
const uint64_t ref_table_id,
PathHelper &helper,
ObIArray<uint64_t> &valid_index_ids)
{
int ret = OB_SUCCESS;
@ -3217,10 +3245,9 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id,
const TableItem *table_item = NULL;
ObSqlSchemaGuard *schema_guard = NULL;
ObSQLSessionInfo *session_info = NULL;
uint64_t tids[OB_MAX_INDEX_PER_TABLE + 1];
int64_t index_count = OB_MAX_INDEX_PER_TABLE + 1;
uint64_t tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1];
int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1;
const LogTableHint *log_table_hint = NULL;
ObMatchFunRawExpr *match_expr = NULL;
ObRawExpr *vector_expr = NULL;
const ObSelectStmt *select_stmt = NULL;
bool has_aggr = false; // defend aggr for ann search
@ -3238,20 +3265,6 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id,
} else if (OB_ISNULL(table_item = stmt->get_table_item_by_id(table_id))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("Table item should not be NULL", K(table_id), K(table_item), K(ret));
} else if (OB_FAIL(stmt->get_match_expr_on_table(table_id, match_expr))) {
LOG_WARN("failed to check has fulltext search on table", K(ret));
} else if (OB_NOT_NULL(match_expr)) {
// If there is a full-text search requirement on current base table, We can only choose the
// path that accesses the word-doc inverted index for now.
uint64_t inv_idx_tid = OB_INVALID_ID;
if (OB_FAIL(get_matched_inv_index_tid(match_expr, ref_table_id, inv_idx_tid))) {
LOG_WARN("failed to get matched inverted index table id", K(ret));
} else if (inv_idx_tid == OB_INVALID_ID) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected table id", K(ret));
} else if (OB_FAIL(valid_index_ids.push_back(inv_idx_tid))) {
LOG_WARN("failed to assign index ids", K(ret));
}
} else if (stmt->is_select_stmt() && FALSE_IT(select_stmt = static_cast<const ObSelectStmt*>(stmt))) {
} else if (nullptr != select_stmt && FALSE_IT(has_aggr = select_stmt->get_aggr_item_size() > 0)) {
} else if (stmt->has_vec_approx()
@ -3286,7 +3299,11 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id,
} else if (FALSE_IT(log_table_hint = get_plan()->get_log_plan_hint().get_index_hint(table_id))) {
} else if (NULL != log_table_hint && log_table_hint->is_use_index_hint()) {
// for use index hint, get index ids from hint.
if (OB_FAIL(valid_index_ids.assign(log_table_hint->index_list_))) {
ObSEArray<uint64_t, 4> valid_hint_index_list;
const bool is_link = ObSqlSchemaGuard::is_link_table(stmt, table_id);
if (OB_FAIL(get_valid_hint_index_list(log_table_hint->index_list_, is_link, schema_guard, helper, valid_hint_index_list))) {
LOG_WARN("failed to get valid hint index list", K(ret));
} else if (OB_FAIL(valid_index_ids.assign(valid_hint_index_list))) {
LOG_WARN("failed to assign index ids", K(ret));
}
} else if (OB_FAIL(schema_guard->get_can_read_index_array(ref_table_id,
@ -3298,7 +3315,9 @@ int ObJoinOrder::get_valid_index_ids(const uint64_t table_id,
false /*spatial index*/,
false /*vector index*/))) {
LOG_WARN("failed to get can read index", K(ref_table_id), K(ret));
} else if (index_count > OB_MAX_INDEX_PER_TABLE + 1) {
} else if (OB_FAIL(add_valid_fts_index_ids(helper, tids, index_count))) {
LOG_WARN("failed to add valid fts index ids", K(ret));
} else if (index_count > OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("Invalid index count", K(ref_table_id), K(index_count), K(ret));
} else if (NULL != log_table_hint &&
@ -3582,8 +3601,8 @@ int ObJoinOrder::fill_opt_info_index_name(const uint64_t table_id,
{
int ret = OB_SUCCESS;
const ObTableSchema *table_schema = NULL;
uint64_t index_ids[OB_MAX_INDEX_PER_TABLE + 3];
int64_t index_count = OB_MAX_INDEX_PER_TABLE + 3;
uint64_t index_ids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 3];
int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 3;
ObSqlSchemaGuard *schema_guard = NULL;
const ObDMLStmt *stmt = NULL;
if (OB_ISNULL(table_opt_info) || OB_ISNULL(get_plan())
@ -3602,9 +3621,9 @@ int ObJoinOrder::fill_opt_info_index_name(const uint64_t table_id,
index_count,
false,
true /*global index*/,
false /*domain index*/))) {
true /*domain index*/))) {
LOG_WARN("failed to get can read index", K(base_table_id), K(ret));
} else if (index_count > OB_MAX_INDEX_PER_TABLE + 1) {
} else if (index_count > OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("Invalid index count", K(base_table_id), K(index_count), K(ret));
} else if (OB_FAIL(table_opt_info->available_index_id_.assign(available_index_id))) {
@ -3620,6 +3639,8 @@ int ObJoinOrder::fill_opt_info_index_name(const uint64_t table_id,
} else if (OB_ISNULL(table_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("index schema should not be null", K(ret), K(index_id));
} else if (table_schema->is_built_in_fts_index()) {
// do nothing
} else if (base_table_id == index_id) {
name = table_schema->get_table_name_str();
} else if (OB_FAIL(table_schema->get_index_name(name))) {
@ -3627,12 +3648,16 @@ int ObJoinOrder::fill_opt_info_index_name(const uint64_t table_id,
} else { /*do nothing*/ }
if (OB_SUCC(ret)) {
if (OB_FAIL(table_opt_info->available_index_name_.push_back(name))) {
if (name.empty()) {
// do nothing
} else if (OB_FAIL(table_opt_info->available_index_name_.push_back(name))) {
LOG_WARN("failed to push back index name", K(name), K(ret));
} else { /* do nothing */ }
}
if (OB_FAIL(ret)) {
} else if (name.empty()) {
// do nothing
} else if (ObOptimizerUtil::find_item(available_index_id, index_id)) {
//do nothing
} else if (ObOptimizerUtil::find_item(unstable_index_id, index_id)) {
@ -4387,6 +4412,95 @@ int ObJoinOrder::extract_multivalue_preliminary_query_range(const ObIArray<Colum
return ret;
}
// If there is a fulltext search on main tabe,
// range for fts index is only used for storage estimation, not actually used on execution stage.
// If it's a direct query on inverted index table,
// range for fts index follows normal usage.
int ObJoinOrder::extract_fts_preliminary_query_range(const ObIArray<ColumnItem> &range_columns,
const ObIArray<ObRawExpr*> &predicates,
const ObTableSchema *table_schema,
const ObTableSchema *index_schema,
PathHelper &helper,
ObQueryRangeProvider *&query_range)
{
int ret = OB_SUCCESS;
bool direct_query_on_index = false;
const ParamStore *params = NULL;
if (OB_ISNULL(OPT_CTX.get_exec_ctx()) || OB_ISNULL(allocator_) || OB_ISNULL(table_schema) ||
OB_ISNULL(index_schema) || OB_ISNULL(OPT_CTX.get_exec_ctx()->get_expr_factory()) ||
OB_ISNULL(params = OPT_CTX.get_params())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("get unexpected null", K(OPT_CTX.get_exec_ctx()), K(allocator_), K(ret));
} else if (OB_FALSE_IT(direct_query_on_index = index_schema->get_table_id() == table_schema->get_table_id())) {
} else if (!direct_query_on_index) {
ObArray<ObMatchFunRawExpr*> match_exprs;
ObArray<ObRawExpr*> match_filters;
const MatchExprInfo *match_expr_info = NULL;
if (OB_FAIL(extract_scan_match_expr_candidates(predicates, match_exprs, match_filters))) {
LOG_WARN("failed to extract match expr candidates", K(ret));
} else if (OB_FAIL(find_least_selective_expr_on_index(match_exprs,
helper.match_expr_infos_,
index_schema->get_table_id(),
match_expr_info))) {
LOG_WARN("failed to find most selective expr on index", K(ret));
} else if (OB_ISNULL(match_expr_info) || OB_ISNULL(match_expr_info->query_range_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret));
} else {
query_range = match_expr_info->query_range_;
}
} else if (OPT_CTX.enable_new_query_range()) {
void *ptr = allocator_->alloc(sizeof(ObPreRangeGraph));
ObPreRangeGraph *pre_range_graph = NULL;
if (OB_ISNULL(ptr)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to allocate memory for pre range graph", K(ret));
} else {
pre_range_graph = new(ptr)ObPreRangeGraph(*allocator_);
if (OB_FAIL(pre_range_graph->preliminary_extract_query_range(range_columns, predicates,
OPT_CTX.get_exec_ctx(),
nullptr,
params))) {
LOG_WARN("failed to preliminary extract query range", K(ret));
}
}
if (OB_SUCC(ret)) {
query_range = pre_range_graph;
} else {
if (NULL != pre_range_graph) {
pre_range_graph->~ObPreRangeGraph();
pre_range_graph = NULL;
}
}
} else {
void *tmp_ptr = allocator_->alloc(sizeof(ObQueryRange));
ObQueryRange *tmp_qr = NULL;
if (OB_ISNULL(tmp_ptr)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to allocate memory for query range", K(ret));
} else {
tmp_qr = new(tmp_ptr)ObQueryRange(*allocator_);
const ObDataTypeCastParams dtc_params =
ObBasicSessionInfo::create_dtc_params(OPT_CTX.get_exec_ctx()->get_my_session());
if (OB_FAIL(tmp_qr->preliminary_extract_query_range(range_columns, predicates,
dtc_params, OPT_CTX.get_exec_ctx(),
OPT_CTX.get_query_ctx(),
NULL, params))) {
LOG_WARN("failed to preliminary extract query range", K(ret));
}
}
if (OB_SUCC(ret)) {
query_range = tmp_qr;
} else {
if (NULL != tmp_qr) {
tmp_qr->~ObQueryRange();
tmp_qr = NULL;
}
}
}
return ret;
}
int ObJoinOrder::get_candi_range_expr(const ObIArray<ColumnItem> &range_columns,
const ObIArray<ObRawExpr*> &predicates,
ObIArray<ObRawExpr*> &range_predicates)
@ -6632,6 +6746,8 @@ int AccessPath::compute_access_path_batch_rescan()
LOG_WARN("failed to extract ir fitler from filters", K(ret), K(filter_));
} else if (is_virtual_table(ref_table_id_)
|| est_cost_info_.index_meta_info_.is_geo_index_
|| tr_idx_info_.has_ir_scan()
|| tr_idx_info_.has_func_lookup()
|| for_update_
|| !subquery_exprs_.empty()
|| table_item->is_link_table()
@ -13485,6 +13601,7 @@ int ObJoinOrder::get_simple_index_info(const uint64_t table_id,
int ObJoinOrder::fill_filters(const ObIArray<ObRawExpr*> &all_filters,
const ObQueryRangeProvider *query_range_provider,
ObCostTableScanInfo &est_cost_info,
const TRIndexAccessInfo &tr_index_info,
bool &is_nl_with_extended_range,
bool is_link,
bool use_skip_scan)
@ -13536,6 +13653,9 @@ int ObJoinOrder::fill_filters(const ObIArray<ObRawExpr*> &all_filters,
} else if (use_skip_scan &&
OB_FAIL(est_cost_info.ss_postfix_range_filters_.assign(query_range_provider->get_ss_range_exprs()))) {
LOG_WARN("failed to assign exprs", K(ret));
} else if (est_cost_info.index_meta_info_.is_fulltext_index_ &&
OB_FAIL(append_array_no_dup(est_cost_info.prefix_filters_, tr_index_info.index_scan_filters_))) {
LOG_WARN("failed to assign exprs", K(ret));
}
}
@ -15249,8 +15369,8 @@ int ObJoinOrder::compute_fd_item_set_for_table_scan(const uint64_t table_id,
int ret = OB_SUCCESS;
ObSqlSchemaGuard *schema_guard = NULL;
const ObDMLStmt *stmt = NULL;
uint64_t index_tids[OB_MAX_INDEX_PER_TABLE];
int64_t index_count = OB_MAX_INDEX_PER_TABLE;
uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
if (OB_ISNULL(get_plan()) || OB_ISNULL(stmt = get_plan()->get_stmt()) ||
OB_ISNULL(schema_guard = get_plan()->get_optimizer_context().get_sql_schema_guard())) {
ret = OB_ERR_UNEXPECTED;
@ -17633,7 +17753,7 @@ int ObJoinOrder::get_matched_inv_index_tid(ObMatchFunRawExpr *match_expr,
inv_idx_schema,
found_matched_index))) {
LOG_WARN("failed to check fulltext index match column", K(ret));
} else if (found_matched_index) {
} else if (found_matched_index && inv_idx_schema->can_read_index() && inv_idx_schema->is_index_visible()) {
inv_idx_tid = index_info.table_id_;
}
}
@ -17641,6 +17761,564 @@ int ObJoinOrder::get_matched_inv_index_tid(ObMatchFunRawExpr *match_expr,
return ret;
}
int ObJoinOrder::extract_scan_match_expr_candidates(const ObIArray<ObRawExpr *> &filters,
ObIArray<ObMatchFunRawExpr *> &scan_match_exprs,
ObIArray<ObRawExpr *> &scan_match_filters)
{
int ret = OB_SUCCESS;
for (int64_t i = 0; OB_SUCC(ret) && i < get_restrict_infos().count(); ++i) {
ObRawExpr *filter = get_restrict_infos().at(i);
if (OB_ISNULL(filter)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr to filter expr", K(ret), K(i), KPC(filter));
} else if (filter->get_expr_type() == T_OP_BOOL && filter->has_flag(CNT_MATCH_EXPR)) {
ObRawExpr *param_expr = filter->get_param_expr(0);
if (OB_ISNULL(param_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null param expr for bool op", K(ret));
} else if (param_expr->has_flag(IS_MATCH_EXPR)) {
if (OB_FAIL(scan_match_exprs.push_back(static_cast<ObMatchFunRawExpr*>(param_expr)))) {
LOG_WARN("failed to append match expr to array", K(ret));
} else if (OB_FAIL(scan_match_filters.push_back(filter))) {
LOG_WARN("failed to append match filter to array", K(ret));
}
}
}
}
return ret;
}
// classify index scan and functional lookup match exprs
int ObJoinOrder::process_index_for_match_expr(const uint64_t table_id,
const uint64_t ref_table_id,
const uint64_t index_id,
PathHelper &helper,
AccessPath &access_path)
{
int ret = OB_SUCCESS;
ObSEArray<ObRawExpr *, 4> all_match_exprs;
ObMatchFunRawExpr *match_expr_for_index_scan = nullptr;
ObSqlSchemaGuard *schema_guard = nullptr;
const ObTableSchema *index_schema = nullptr;
if (OB_ISNULL(schema_guard = OPT_CTX.get_sql_schema_guard()) || OB_ISNULL(get_plan()) ||
OB_ISNULL(get_plan()->get_stmt())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr", K(ret));
} else if (OB_FAIL(get_plan()->get_stmt()->get_match_expr_on_table(table_id, all_match_exprs))) {
LOG_WARN("failed to get match exprs by table id", K(ret), K(table_id));
} else if (all_match_exprs.empty()) {
// do nothing
} else if (OB_FAIL(schema_guard->get_table_schema(index_id, index_schema))) {
LOG_WARN("failed to get index table schema", K(ret), K(index_id));
} else if (OB_ISNULL(index_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr to index schema", K(ret));
} else if (index_schema->is_fts_index()) {
ObSEArray<ObMatchFunRawExpr *, 4> scan_match_exprs;
ObSEArray<ObRawExpr *, 4> scan_match_filters;
const MatchExprInfo *match_expr_info = NULL;
int64_t idx = -1;
if (OB_FAIL(extract_scan_match_expr_candidates(get_restrict_infos(),
scan_match_exprs,
scan_match_filters))) {
LOG_WARN("failed to extract scan match expr", K(ret));
} else if (OB_FAIL(find_least_selective_expr_on_index(scan_match_exprs,
helper.match_expr_infos_,
index_id,
match_expr_info))) {
LOG_WARN("failed to find most selective expr on index", K(ret));
} else if (OB_ISNULL(match_expr_info)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret));
} else if (!ObOptimizerUtil::find_item(scan_match_exprs, match_expr_info->match_expr_, &idx)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected match expr", K(ret));
} else if (OB_UNLIKELY(idx < 0 || idx >= scan_match_filters.count())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected idx", K(ret), K(idx), K(scan_match_filters.count()));
} else if (OB_FAIL(access_path.tr_idx_info_.index_scan_exprs_.push_back(match_expr_info->match_expr_))) {
LOG_WARN("failed to append match expr", K(ret));
} else if (OB_FAIL(access_path.tr_idx_info_.index_scan_filters_.push_back(scan_match_filters.at(idx)))) {
LOG_WARN("failed to append scan match filter expr", K(ret));
} else if (OB_FAIL(access_path.tr_idx_info_.index_scan_index_ids_.push_back(match_expr_info->inv_idx_id_))) {
LOG_WARN("failed to append inverted index table id", K(ret));
} else {
match_expr_for_index_scan = match_expr_info->match_expr_;
}
}
for (int64_t i = 0; OB_SUCC(ret) && i < all_match_exprs.count(); ++i) {
ObMatchFunRawExpr *curr_expr = static_cast<ObMatchFunRawExpr *>(all_match_exprs.at(i));
const MatchExprInfo *match_expr_info = NULL;
if (OB_ISNULL(curr_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null ");
} else if (curr_expr == match_expr_for_index_scan) {
// skip
} else if (OB_FAIL(find_match_expr_info(helper.match_expr_infos_, curr_expr, match_expr_info))) {
LOG_WARN("failed to find match expr info", K(ret), KPC(curr_expr));
} else if (OB_ISNULL(match_expr_info)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null match expr info", K(ret));
} else if (OB_FAIL(access_path.tr_idx_info_.func_lookup_exprs_.push_back(curr_expr))) {
LOG_WARN("failed to append func lookup exprs", K(ret), KPC(curr_expr));
} else if (OB_FAIL(access_path.tr_idx_info_.func_lookup_index_ids_.push_back(match_expr_info->inv_idx_id_))) {
LOG_WARN("failed to append func lookup index id", K(ret));
}
}
return ret;
}
int ObJoinOrder::init_basic_text_retrieval_info(uint64_t table_id,
uint64_t ref_table_id,
PathHelper &helper)
{
int ret = OB_SUCCESS;
helper.match_expr_infos_.reuse();
ObSEArray<ObRawExpr*, 4> match_exprs;
ObSqlSchemaGuard *schema_guard = NULL;
ObSEArray<ObConstRawExpr*, 4> query_tokens;
if (OB_ISNULL(get_plan()) || OB_ISNULL(get_plan()->get_stmt()) ||
OB_ISNULL(schema_guard = OPT_CTX.get_sql_schema_guard())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null plan or stmt", K(ret), KP(get_plan()), KP(get_plan()->get_stmt()));
} else if (OB_FAIL(get_plan()->get_stmt()->get_match_expr_on_table(table_id, match_exprs))) {
LOG_WARN("failed to get match exprs", K(ret), K(table_id));
} else {
// generate selectivity info for each match against expr
for (int64_t i = 0; OB_SUCC(ret) && i < match_exprs.count(); ++i) {
ObMatchFunRawExpr *match_expr = NULL;
uint64_t index_id = OB_INVALID_ID;
const ObTableSchema *index_schema = NULL;
MatchExprInfo match_expr_info;
ObSEArray<ColumnItem, 4> range_columns;
if (OB_ISNULL(match_exprs.at(i)) || OB_UNLIKELY(!match_exprs.at(i)->is_match_against_expr())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null match expr", K(ret));
} else if (OB_FALSE_IT(match_expr = static_cast<ObMatchFunRawExpr *>(match_exprs.at(i)))) {
} else if (OB_FAIL(get_matched_inv_index_tid(match_expr, ref_table_id, index_id))) {
LOG_WARN("failed to get matched inverted index table id", K(ret), KPC(match_expr));
} else if (OB_FAIL(schema_guard->get_table_schema(index_id, index_schema))) {
LOG_WARN("failed to get index schema", K(ret), K(index_id));
} else if (OB_ISNULL(index_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null index schema", K(ret), K(index_id));
} else if (OB_FAIL(get_plan()->get_index_column_items(OPT_CTX.get_expr_factory(),
table_id,
*index_schema,
range_columns))) {
LOG_WARN("failed to generate rowkey column items", K(ret));
} else if (OB_FAIL(get_query_tokens(match_expr, index_schema, query_tokens))) {
LOG_WARN("failed to get query tokens", K(ret));
} else if (OB_FAIL(get_range_of_query_tokens(query_tokens,
*index_schema,
range_columns,
match_expr_info.query_range_))) {
LOG_WARN("failed to get range of query tokens", K(ret));
} else if (OB_FAIL(estimate_fts_index_scan(table_id,
ref_table_id,
index_id,
index_schema,
match_expr_info.query_range_,
match_expr_info.query_range_row_count_,
match_expr_info.selectivity_))) {
LOG_WARN("failed to estimate fts index scan", K(ret));
} else if (OB_FALSE_IT(match_expr_info.match_expr_ = match_expr)) {
} else if (OB_FALSE_IT(match_expr_info.inv_idx_id_ = index_id)) {
} else if (OB_FAIL(helper.match_expr_infos_.push_back(match_expr_info))) {
LOG_WARN("failed to push back match expr info", K(ret));
// add selectivity infos of match against exprs to LogPlan
} else if (OB_FAIL(get_plan()->get_predicate_selectivities().
push_back(ObExprSelPair(match_expr, match_expr_info.selectivity_)))) {
LOG_WARN("failed to push back predicate selectivities", K(ret));
}
}
LOG_TRACE("OPT: selectivity infos of match exprs", K(helper.match_expr_infos_));
}
return ret;
}
int ObJoinOrder::get_query_tokens(ObMatchFunRawExpr *match_expr,
const ObTableSchema *index_schema,
ObIArray<ObConstRawExpr*> &query_tokens)
{
int ret = OB_SUCCESS;
ObObj result;
bool got_result = false;
if (OB_ISNULL(allocator_) || OB_ISNULL(index_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret), KP(allocator_), KP(index_schema));
} else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(OPT_CTX.get_exec_ctx(),
match_expr->get_search_key(),
result,
got_result,
*allocator_))) {
LOG_WARN("fail to calc prefix pattern expr", K(ret));
} else if (!got_result || result.is_null() || (is_oracle_mode() && result.is_null_oracle())) {
// do nothing
} else {
const ObString &search_text_string = result.get_string();
const ObString &parser_name = index_schema->get_parser_name_str();
const ObCollationType &cs_type = match_expr->get_search_key()->get_collation_type();
storage::ObFTParseHelper tokenize_helper;
common::ObSEArray<ObFTWord, 16> tokens;
hash::ObHashMap<ObFTWord, int64_t> token_map;
int64_t doc_length = 0;
const int64_t ft_word_bkt_cnt = MAX(search_text_string.length() / 10, 2);
if (search_text_string.length() == 0) {
// do nothing
} else if (OB_FAIL(tokenize_helper.init(allocator_, parser_name))) {
LOG_WARN("failed to init tokenize helper", K(ret));
} else if (OB_FAIL(token_map.create(ft_word_bkt_cnt, common::ObMemAttr(MTL_ID(), "FTWordMap")))) {
LOG_WARN("failed to create token map", K(ret));
} else if (OB_FAIL(tokenize_helper.segment(
cs_type, search_text_string.ptr(), search_text_string.length(), doc_length, token_map))) {
LOG_WARN("failed to segment");
} else {
for (hash::ObHashMap<ObFTWord, int64_t>::const_iterator iter = token_map.begin();
OB_SUCC(ret) && iter != token_map.end();
++iter) {
const ObFTWord &token = iter->first;
ObString token_string;
ObConstRawExpr *token_expr = NULL;
if (OB_FAIL(ob_write_string(*allocator_, token.get_word(), token_string))) {
LOG_WARN("failed to deep copy query token", K(ret));
} else if (OB_FAIL(ObRawExprUtils::build_const_string_expr(*OPT_CTX.get_exec_ctx()->get_expr_factory(),
ObVarcharType,
token_string,
cs_type,
token_expr))) {
LOG_WARN("failed to build const string expr", K(ret));
} else if (OB_FAIL(query_tokens.push_back(token_expr))) {
LOG_WARN("failed to append query token", K(ret));
}
}
}
}
return ret;
}
int ObJoinOrder::get_range_of_query_tokens(ObIArray<ObConstRawExpr*> &query_tokens,
const ObTableSchema &index_schema,
ObIArray<ColumnItem> &range_columns,
ObQueryRangeProvider *&query_range)
{
// jinmao TODO: 改成直接构造 query range,不要生成 IN 表达式间接去抽
int ret = OB_SUCCESS;
ObColumnRefRawExpr *word_col = NULL;
ObOpRawExpr *in_expr = NULL;
ObOpRawExpr *in_list_expr = NULL;
ObSEArray<ObRawExpr*,2> tmp_range_exprs;
const ParamStore *params = NULL;
// find word segment column on fts index
for (int64_t i = 0; OB_SUCC(ret) && OB_ISNULL(word_col) && i < range_columns.count(); i++) {
const ObColumnSchemaV2 *col_schema = index_schema.get_column_schema(range_columns.at(i).column_id_);
if (OB_ISNULL(col_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(col_schema), K(ret));
} else if (col_schema->is_word_segment_column()) {
word_col = range_columns.at(i).expr_;
}
}
// construct in expr to integrate all tokens
if (OB_FAIL(ret)) {
} else if (OB_ISNULL(word_col)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("failed to get word segment column", K(ret));
} else if (!query_tokens.empty()) {
if (OB_FAIL(OPT_CTX.get_exec_ctx()->get_expr_factory()->create_raw_expr(T_OP_ROW, in_list_expr))) {
LOG_WARN("create to_type expr failed", K(ret));
} else if (OB_FAIL(OPT_CTX.get_exec_ctx()->get_expr_factory()->create_raw_expr(T_OP_IN, in_expr))) {
LOG_WARN("create to_type expr failed", K(ret));
} else if (OB_ISNULL(in_list_expr) || OB_ISNULL(in_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(in_list_expr), K(in_expr), K(ret));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < query_tokens.count(); i++) {
if (OB_FAIL(in_list_expr->add_param_expr(query_tokens.at(i)))) {
LOG_WARN("failed to add param expr", K(ret));
}
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(in_expr->set_param_exprs(word_col, in_list_expr))) {
LOG_WARN("failed to set param exprs", K(ret));
} else if (OB_FAIL(in_expr->formalize(OPT_CTX.get_exec_ctx()->get_my_session()))) {
LOG_WARN("failed to formalize expr", K(ret));
} else if (OB_FAIL(tmp_range_exprs.push_back(in_expr))) {
LOG_WARN("failed to push back range expr", K(ret));
}
}
} else {
// build an always false expr for empty query tokens
ObRawExpr *eq_expr = NULL;
ObConstRawExpr *empty_string_expr = NULL;
if (OB_FAIL(ObRawExprUtils::build_const_string_expr(*OPT_CTX.get_exec_ctx()->get_expr_factory(),
ObVarcharType,
ObString(),
word_col->get_collation_type(),
empty_string_expr))) {
LOG_WARN("failed to build const int expr", K(ret));
} else if (OB_FAIL(ObRawExprUtils::build_common_binary_op_expr(*OPT_CTX.get_exec_ctx()->get_expr_factory(),
T_OP_EQ,
word_col,
empty_string_expr,
eq_expr))) {
LOG_WARN("failed to build common binary op expr", K(ret));
} else if (OB_FAIL(eq_expr->formalize(OPT_CTX.get_exec_ctx()->get_my_session()))) {
LOG_WARN("failed to formalize expr", K(ret));
} else if (OB_FAIL(tmp_range_exprs.push_back(eq_expr))) {
LOG_WARN("failed to push back range expr", K(ret));
}
}
// generate query range
if (OB_FAIL(ret)) {
} else if (OB_ISNULL(params = OPT_CTX.get_params())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(params), K(ret));
} else if (OPT_CTX.enable_new_query_range()) {
void *ptr = allocator_->alloc(sizeof(ObPreRangeGraph));
ObPreRangeGraph *pre_range_graph = NULL;
if (OB_ISNULL(ptr)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to allocate memory for pre range graph", K(ret));
} else {
pre_range_graph = new(ptr)ObPreRangeGraph(*allocator_);
if (OB_FAIL(pre_range_graph->preliminary_extract_query_range(range_columns, tmp_range_exprs,
OPT_CTX.get_exec_ctx(),
nullptr,
params))) {
LOG_WARN("failed to preliminary extract query range", K(ret));
}
}
if (OB_SUCC(ret)) {
query_range = pre_range_graph;
// reset range exprs which should be invisible after query range extraction
pre_range_graph->reset_range_exprs();
} else {
if (NULL != pre_range_graph) {
pre_range_graph->~ObPreRangeGraph();
pre_range_graph = NULL;
}
}
} else {
void *tmp_ptr = allocator_->alloc(sizeof(ObQueryRange));
ObQueryRange *tmp_qr = NULL;
if (OB_ISNULL(tmp_ptr)) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("failed to allocate memory for query range", K(ret));
} else {
tmp_qr = new(tmp_ptr)ObQueryRange(*allocator_);
const ObDataTypeCastParams dtc_params =
ObBasicSessionInfo::create_dtc_params(OPT_CTX.get_exec_ctx()->get_my_session());
if (OB_FAIL(tmp_qr->preliminary_extract_query_range(range_columns, tmp_range_exprs,
dtc_params, OPT_CTX.get_exec_ctx(),
OPT_CTX.get_query_ctx(),
NULL, params))) {
LOG_WARN("failed to preliminary extract query range", K(ret));
}
}
if (OB_SUCC(ret)) {
query_range = tmp_qr;
// reset range exprs which should be invisible after query range extraction
tmp_qr->reset_range_exprs();
} else {
if (NULL != tmp_qr) {
tmp_qr->~ObQueryRange();
tmp_qr = NULL;
}
}
}
return ret;
}
int ObJoinOrder::estimate_fts_index_scan(uint64_t table_id,
uint64_t ref_table_id,
uint64_t index_id,
const ObTableSchema *index_schema,
ObQueryRangeProvider *query_range,
int64_t &query_range_row_count,
double &selectivity)
{
int ret = OB_SUCCESS;
ObTablePartitionInfo *table_partition_info = NULL;
ObTableMetaInfo table_meta_range(index_id);
const ObSQLSessionInfo *session = OPT_CTX.get_session_info();
const ObDataTypeCastParams dtc_params = ObBasicSessionInfo::create_dtc_params(session);
ObQueryRangeArray range_array;
ObRangesArray ranges;
bool dummy_all_single_value_ranges = true;
if (OB_ISNULL(index_schema) || OB_ISNULL(query_range) || OB_ISNULL(OPT_CTX.get_exec_ctx()) ||
OB_UNLIKELY(index_schema->is_global_index_table())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(index_schema), K(query_range), K(ret));
} else if (OB_FAIL(compute_table_location(table_id, index_id, false, table_partition_info))) {
LOG_WARN("failed to compute table location", K(ret));
} else if (OB_ISNULL(table_partition_info)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(table_partition_info), K(ret));
} else if (OB_FAIL(query_range->get_tablet_ranges(OPT_CTX.get_allocator(),
*OPT_CTX.get_exec_ctx(),
range_array,
dummy_all_single_value_ranges,
dtc_params))) {
LOG_WARN("failed to get tablet ranges", K(ret));
} else {
for(int64_t i = 0; OB_SUCC(ret) && i < range_array.count(); ++i) {
if (OB_ISNULL(range_array.at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("range is null", K(ret));
} else if (OB_FAIL(ranges.push_back(*range_array.at(i)))) {
LOG_WARN("failed to add range", K(ret));
}
}
// init table meta info
table_meta_range.ref_table_id_ = index_id;
table_meta_range.table_rowkey_count_ = index_schema->get_rowkey_info().get_size();
table_meta_range.table_column_count_ = index_schema->get_column_count();
table_meta_range.micro_block_size_ = index_schema->get_block_size();
table_meta_range.part_count_ =
table_partition_info->get_phy_tbl_location_info().get_phy_part_loc_info_list().count();
table_meta_range.schema_version_ = index_schema->get_schema_version();
table_meta_range.is_broadcast_table_ = index_schema->is_broadcast_table();
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(ObAccessPathEstimation::storage_estimate_range_rowcount(OPT_CTX,
table_partition_info->get_phy_tbl_location_info().get_phy_part_loc_info_list(),
false,
&ranges,
table_meta_range))) {
LOG_WARN("failed to estimate table range rowcount", K(ret));
} else {
query_range_row_count = table_meta_range.table_row_count_;
selectivity = get_table_meta().table_row_count_ == 0 ? 0 :
table_meta_range.table_row_count_ * 1.0 / get_table_meta().table_row_count_;
// refine selectivity
selectivity = std::min(selectivity, 1.0);
}
return ret;
}
int ObJoinOrder::add_valid_fts_index_ids(PathHelper &helper, uint64_t *index_tid_array, int64_t &size)
{
int ret = OB_SUCCESS;
ObSEArray<ObMatchFunRawExpr *, 4> scan_match_exprs;
ObSEArray<ObRawExpr *, 4> scan_match_filters;
ObSEArray<uint64_t, 4> fts_index_ids;
if (OB_FAIL(extract_scan_match_expr_candidates(get_restrict_infos(),
scan_match_exprs,
scan_match_filters))) {
LOG_WARN("failed to extract scan match expr candidates", K(ret));
} else if (!scan_match_exprs.empty()) {
for (int64_t i = 0; OB_SUCC(ret) && i < scan_match_exprs.count(); ++i) {
const MatchExprInfo *match_expr_info = NULL;
if (OB_FAIL(find_match_expr_info(helper.match_expr_infos_, scan_match_exprs.at(i), match_expr_info))) {
LOG_WARN("failed to find match expr info", K(ret));
} else if (OB_ISNULL(match_expr_info)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret));
} else if (OB_FAIL(add_var_to_array_no_dup(fts_index_ids, match_expr_info->inv_idx_id_))) {
LOG_WARN("failed to add var to array no dup", K(ret));
}
}
if (OB_SUCC(ret)) {
for (int64_t i = 0; i < fts_index_ids.count() && size < OB_MAX_INDEX_PER_TABLE + 1; ++i) {
index_tid_array[size++] = fts_index_ids.at(i);
}
}
}
return ret;
}
int ObJoinOrder::find_match_expr_info(const ObIArray<MatchExprInfo> &match_expr_infos,
ObRawExpr *match_expr,
const MatchExprInfo *&match_expr_info)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(match_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret));
} else if (OB_UNLIKELY(!match_expr->is_match_against_expr())) {
// do nothing
} else {
for (int64_t i = 0; OB_SUCC(ret) && OB_ISNULL(match_expr_info) && i < match_expr_infos.count(); ++i) {
if (match_expr_infos.at(i).match_expr_ == match_expr) {
match_expr_info = &match_expr_infos.at(i);
}
}
}
return ret;
}
int ObJoinOrder::find_least_selective_expr_on_index(const ObIArray<ObMatchFunRawExpr*> &match_exprs,
const ObIArray<MatchExprInfo> &match_expr_infos,
uint64_t index_id,
const MatchExprInfo *&match_expr_info)
{
int ret = OB_SUCCESS;
double min_selectivity = 1.1;
for (int64_t i = 0; OB_SUCC(ret) && i < match_exprs.count(); ++i) {
const MatchExprInfo *tmp_match_expr_info = NULL;
if (OB_FAIL(find_match_expr_info(match_expr_infos, match_exprs.at(i), tmp_match_expr_info))) {
LOG_WARN("failed to find match expr info", K(ret));
} else if (OB_ISNULL(tmp_match_expr_info)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret));
} else if (tmp_match_expr_info->inv_idx_id_ == index_id &&
tmp_match_expr_info->selectivity_ < min_selectivity) {
min_selectivity = tmp_match_expr_info->selectivity_;
match_expr_info = tmp_match_expr_info;
}
}
return ret;
}
int ObJoinOrder::get_valid_hint_index_list(const ObIArray<uint64_t> &hint_index_ids,
const bool is_link_table,
ObSqlSchemaGuard *schema_guard,
PathHelper &helper,
ObIArray<uint64_t> &valid_hint_index_ids) const
{
int ret = OB_SUCCESS;
if (OB_ISNULL(schema_guard)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), KP(schema_guard));
}
for (int64_t i = 0; OB_SUCC(ret) && i < hint_index_ids.count(); ++i) {
const ObTableSchema *index_hint_table_schema = nullptr;
const uint64_t tid = hint_index_ids.at(i);
if (OB_FAIL(schema_guard->get_table_schema(tid, index_hint_table_schema, is_link_table))) {
LOG_WARN("failed to get table schema", K(ret), K(tid));
} else if (OB_ISNULL(index_hint_table_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr to index hint table schema", K(ret), K(tid));
} else if (index_hint_table_schema->is_fts_index()
&& !has_match_expr_on_index(tid, helper.match_expr_infos_)) {
// skip index hint on fulltext index without match expr on fulltext index
} else if (OB_FAIL(valid_hint_index_ids.push_back(tid))) {
LOG_WARN("failed to append valid hint index list", K(ret), K(tid));
}
}
return ret;
}
bool ObJoinOrder::has_match_expr_on_index(const uint64_t index_id,
const ObIArray<MatchExprInfo> &match_expr_infos) const
{
bool bret = false;
for (int64_t i = 0; !bret && i < match_expr_infos.count(); ++i) {
const MatchExprInfo &match_info = match_expr_infos.at(i);
if (match_info.inv_idx_id_ == index_id) {
bret = true;
}
}
return bret;
}
int ObJoinOrder::get_better_index_prefix(const ObIArray<ObRawExpr*> &range_exprs,
const ObIArray<int64_t> &range_expr_max_offsets,
const ObIArray<uint64_t> &total_range_counts,

View File

@ -277,6 +277,37 @@ struct EstimateCostInfo {
bool override_;
};
struct TRIndexAccessInfo
{
TRIndexAccessInfo()
: index_scan_exprs_(),
index_scan_filters_(),
index_scan_index_ids_(),
func_lookup_exprs_(),
func_lookup_index_ids_() {}
void reset()
{
index_scan_exprs_.reset();
index_scan_filters_.reset();
index_scan_index_ids_.reset();
func_lookup_exprs_.reset();
func_lookup_index_ids_.reset();
}
bool has_ir_scan() const { return index_scan_exprs_.count() != 0; }
bool has_func_lookup() const { return func_lookup_exprs_.count() != 0; }
TO_STRING_KV(K_(index_scan_exprs), K_(index_scan_filters), K_(index_scan_index_ids),
K_(func_lookup_exprs), K_(func_lookup_index_ids));
common::ObSEArray<ObRawExpr *, 2, common::ModulePageAllocator, true> index_scan_exprs_;
common::ObSEArray<ObRawExpr *, 2, common::ModulePageAllocator, true> index_scan_filters_;
common::ObSEArray<uint64_t, 2, common::ModulePageAllocator, true> index_scan_index_ids_;
common::ObSEArray<ObRawExpr *, 4, common::ModulePageAllocator, true> func_lookup_exprs_;
common::ObSEArray<uint64_t, 4, common::ModulePageAllocator, true> func_lookup_index_ids_;
};
class Path
{
public:
@ -549,6 +580,7 @@ struct EstimateCostInfo {
est_records_(),
range_prefix_count_(0),
table_opt_info_(),
tr_idx_info_(),
for_update_(false),
use_skip_scan_(OptSkipScanState::SS_UNSET),
use_column_store_(false),
@ -672,6 +704,7 @@ struct EstimateCostInfo {
K_(est_cost_info),
K_(sample_info),
K_(range_prefix_count),
K_(tr_idx_info),
K_(for_update),
K_(use_das),
K_(use_skip_scan),
@ -701,6 +734,7 @@ struct EstimateCostInfo {
SampleInfo sample_info_; // sample scan info
int64_t range_prefix_count_; // prefix count
BaseTableOptInfo *table_opt_info_;
TRIndexAccessInfo tr_idx_info_;
bool for_update_;
OptSkipScanState use_skip_scan_;
bool use_column_store_;
@ -1344,6 +1378,30 @@ struct NullAwareAntiJoinInfo {
static const int64_t TABLE_HEURISTIC_UNIQUE_KEY_RANGE_THRESHOLD = 10000;
static const int64_t PRUNING_ROW_COUNT_THRESHOLD = 1000;
struct MatchExprInfo {
MatchExprInfo()
: match_expr_(NULL),
inv_idx_id_(common::OB_INVALID_ID),
query_range_(NULL),
query_range_row_count_(-1),
selectivity_(-1.0)
{}
ObMatchFunRawExpr *match_expr_;
uint64_t inv_idx_id_;
ObQueryRangeProvider *query_range_;
int64_t query_range_row_count_;
double selectivity_;
TO_STRING_KV(
K_(match_expr),
K_(inv_idx_id),
K_(query_range),
K_(query_range_row_count),
K_(selectivity)
);
};
struct PathHelper {
PathHelper()
: is_inner_path_(false),
@ -1378,6 +1436,8 @@ struct NullAwareAntiJoinInfo {
ObBaseTableEstMethod est_method_;
// include nl params and onetime params
ObSEArray<ObExecParamRawExpr *, 2> exec_params_;
// record basic index and selectivity info for all match exprs
ObSEArray<MatchExprInfo, 4> match_expr_infos_;
};
struct DeducedExprInfo {
@ -2425,6 +2485,7 @@ struct NullAwareAntiJoinInfo {
int get_valid_index_ids(const uint64_t table_id,
const uint64_t ref_table_id,
PathHelper &helper,
ObIArray<uint64_t> &valid_index_id);
int get_valid_index_ids_with_no_index_hint(ObSqlSchemaGuard &schema_guard,
const uint64_t ref_table_id,
@ -2558,6 +2619,7 @@ struct NullAwareAntiJoinInfo {
int fill_filters(const common::ObIArray<ObRawExpr *> &all_filters,
const ObQueryRangeProvider* query_range,
ObCostTableScanInfo &est_scan_cost_info,
const TRIndexAccessInfo &tr_idx_info,
bool &is_nl_with_extended_range,
bool is_link = false,
bool use_skip_scan = false);
@ -2630,6 +2692,37 @@ struct NullAwareAntiJoinInfo {
int get_join_output_exprs(ObIArray<ObRawExpr *> &output_exprs);
int get_excluded_condition_exprs(ObIArray<ObRawExpr *> &excluded_conditions);
static double calc_single_parallel_rows(double rows, int64_t parallel);
int init_basic_text_retrieval_info(uint64_t table_id,
uint64_t ref_table_id,
PathHelper &helper);
int extract_fts_preliminary_query_range(const ObIArray<ColumnItem> &range_columns,
const ObIArray<ObRawExpr*> &predicates,
const ObTableSchema *table_schema,
const ObTableSchema *index_schema,
PathHelper &helper,
ObQueryRangeProvider *&query_range);
int get_query_tokens(ObMatchFunRawExpr *match_expr,
const ObTableSchema *index_schema,
ObIArray<ObConstRawExpr*> &query_tokens);
int get_range_of_query_tokens(ObIArray<ObConstRawExpr*> &query_tokens,
const ObTableSchema &index_schema,
ObIArray<ColumnItem> &range_columns,
ObQueryRangeProvider *&query_range);
int estimate_fts_index_scan(uint64_t table_id,
uint64_t ref_table_id,
uint64_t index_id,
const ObTableSchema *index_schema,
ObQueryRangeProvider *query_range,
int64_t &query_range_row_count,
double &selectivity);
int add_valid_fts_index_ids(PathHelper &helper, uint64_t *index_tid_array, int64_t &size);
int find_match_expr_info(const ObIArray<MatchExprInfo> &match_expr_infos,
ObRawExpr *match_expr,
const MatchExprInfo *&match_expr_info);
int find_least_selective_expr_on_index(const ObIArray<ObMatchFunRawExpr*> &match_exprs,
const ObIArray<MatchExprInfo> &match_expr_infos,
uint64_t index_id,
const MatchExprInfo *&match_expr_info);
private:
static int check_and_remove_is_null_qual(ObLogPlan *plan,
const ObJoinType join_type,
@ -2794,6 +2887,20 @@ struct NullAwareAntiJoinInfo {
int compute_sharding_info_for_index_info_entry(const uint64_t table_id,
const uint64_t base_table_id,
IndexInfoEntry *index_info_entry);
int process_index_for_match_expr(const uint64_t table_id,
const uint64_t ref_table_id,
const uint64_t index_id,
PathHelper &helper,
AccessPath &access_path);
int extract_scan_match_expr_candidates(const ObIArray<ObRawExpr *> &filters,
ObIArray<ObMatchFunRawExpr *> &scan_match_exprs,
ObIArray<ObRawExpr *> &scan_match_filters);
int get_valid_hint_index_list(const ObIArray<uint64_t> &hint_index_ids,
const bool is_link_table,
ObSqlSchemaGuard *schema_guard,
PathHelper &helper,
ObIArray<uint64_t> &valid_hint_index_ids) const;
bool has_match_expr_on_index(const uint64_t index_id, const ObIArray<MatchExprInfo> &match_expr_infos) const;
friend class ::test::TestJoinOrder_ob_join_order_param_check_Test;
friend class ::test::TestJoinOrder_ob_join_order_src_Test;
private:

View File

@ -2923,16 +2923,27 @@ int ObLogPlan::allocate_access_path(AccessPath *ap,
}
if (OB_SUCC(ret)) {
ObSEArray<ObRawExpr *, 8> non_match_filters;
ObSEArray<ObRawExpr *, 2> match_filters;
if (OB_FAIL(ObRawExprUtils::extract_match_against_filters(ap->filter_,
non_match_filters,
match_filters))) {
LOG_WARN("failed to extract ir fitler from filters", K(ret), K(ap->filter_));
} else if (match_filters.count() > 0) {
if (OB_FAIL(prepare_text_retrieval_scan(match_filters, scan))) {
if (ap->tr_idx_info_.has_ir_scan()) {
// For functional lookup with multiple match filters, use only one filter
// as index scan and other filters eval after functional lookup
// TODO: enable multiple fulltext index scan after index merge supported
ObSEArray<ObRawExpr *, 8> non_match_filters;
ObSEArray<ObRawExpr *, 2> match_filters;
ObSEArray<ObRawExpr *, 8> table_scan_filters;
if (OB_FAIL(ObRawExprUtils::extract_match_against_filters(ap->filter_,
non_match_filters,
match_filters))) {
LOG_WARN("failed to extract ir fitler from filters", K(ret), K(ap->filter_));
} else if (OB_FAIL(table_scan_filters.assign(non_match_filters))) {
LOG_WARN("failed to assign non match filters to scan filters", K(ret));
} else if (OB_FAIL(prepare_text_retrieval_scan(
ap->tr_idx_info_.index_scan_exprs_,
ap->tr_idx_info_.index_scan_filters_,
match_filters,
table_scan_filters,
scan))) {
LOG_WARN("failed to allocate text ir scan", K(ret));
} else if (OB_FAIL(scan->set_table_scan_filters(non_match_filters))) {
} else if (OB_FAIL(scan->set_table_scan_filters(table_scan_filters))) {
LOG_WARN("failed to set filters", K(ret));
} else if (OB_FAIL(append(scan->get_pushdown_filter_exprs(), ap->pushdown_filters_))) {
LOG_WARN("failed to append pushdown filters", K(ret));
@ -2954,6 +2965,15 @@ int ObLogPlan::allocate_access_path(AccessPath *ap,
}
}
if (OB_SUCC(ret) && ap->tr_idx_info_.has_func_lookup()) {
// init push-down calc exprs for functional lookup
if (OB_FAIL(prepare_text_retrieval_lookup(ap->tr_idx_info_.func_lookup_exprs_,
ap->tr_idx_info_.func_lookup_index_ids_,
scan))) {
LOG_WARN("failed to prepare text retrieval lookup", K(ret), KPC(ap));
}
}
//init part/subpart expr for query range prune
if (OB_SUCC(ret)) {
ObRawExpr *part_expr = NULL;
@ -11620,7 +11640,25 @@ int ObLogPlan::collect_location_related_info(ObLogicalOperator &op)
LOG_WARN("failed to append main table id", K(ret));
}
}
LOG_TRACE("collect location related info", K(rel_info));
if (OB_SUCC(ret) && tsc_op.has_func_lookup()) {
for (int64_t i = 0; OB_SUCC(ret) && i < tsc_op.get_lookup_tr_infos().count(); ++i) {
const ObTextRetrievalInfo &curr_tr_info = tsc_op.get_lookup_tr_infos().at(i);
if (tsc_op.is_index_scan()
&& OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, tsc_op.get_real_ref_table_id()))) {
LOG_WARN("failed to append real table id", K(ret));
} else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, curr_tr_info.inv_idx_tid_))) {
LOG_WARN("failed to append inverted index table id", K(ret));
} else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, curr_tr_info.fwd_idx_tid_))) {
LOG_WARN("failed to append foward index table id", K(ret));
} else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, curr_tr_info.doc_id_idx_tid_))) {
LOG_WARN("failed to append doc_id index table id", K(ret));
} else if (OB_FAIL(add_var_to_array_no_dup(rel_info.related_ids_, curr_tr_info.rowkey_idx_tid_))) {
LOG_WARN("failed to append rowkey index table id", K(ret));
}
}
}
if (OB_SUCC(ret) && OB_FAIL(optimizer_context_.get_loc_rel_infos().push_back(rel_info))) {
LOG_WARN("store location related info failed", K(ret));
@ -11883,6 +11921,10 @@ int ObLogPlan::check_das_need_scan_with_domain_id(ObLogicalOperator *op)
ObLogTableScan *scan = static_cast<ObLogTableScan*>(op);
if (OB_FAIL(scan->check_das_need_scan_with_domain_id())) {
LOG_WARN("failed to check das scan with doc id", K(ret));
} else if (OB_UNLIKELY(scan->has_func_lookup() && (scan->is_tsc_with_doc_id() || scan->is_tsc_with_vid()))) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("functional lookup with dml on fulltext index / vector index not supported", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "filter that can not imply match_score not equal to 0 in dml");
}
}
for (int i = 0; OB_SUCC(ret) && i < op->get_num_of_child(); ++i) {
@ -14418,38 +14460,30 @@ int ObLogPlan::compute_duplicate_table_replicas(ObLogicalOperator *op)
return ret;
}
int ObLogPlan::prepare_text_retrieval_scan(const ObIArray<ObRawExpr *> &exprs, ObLogicalOperator *scan)
int ObLogPlan::prepare_text_retrieval_scan(const ObIArray<ObRawExpr *> &scan_match_exprs,
const ObIArray<ObRawExpr *> &scan_match_filters,
const ObIArray<ObRawExpr *> &all_match_filters,
ObIArray<ObRawExpr *> &scan_filters,
ObLogicalOperator *scan)
{
// TODO: only support one match against expr as filter for now
int ret = OB_SUCCESS;
ObLogTableScan *table_scan = static_cast<ObLogTableScan*>(scan);
ObRawExpr *match_pred = NULL;
ObMatchFunRawExpr *match_against = NULL;
ObSchemaGetterGuard *schema_guard = NULL;
ObSQLSessionInfo *session = NULL;
const ObTableSchema *table_schema = NULL;
const ObTableSchema *inv_idx_schema = NULL;
const ObTableSchema *fwd_idx_schema = NULL;
uint64_t doc_id_rowkey_tid = OB_INVALID_ID;
uint64_t fwd_idx_tid = OB_INVALID_ID;
uint64_t inv_idx_tid = OB_INVALID_ID;
ObSEArray<ObAuxTableMetaInfo, 4> index_infos;
bool need_calc_relevance = true;
ObSEArray<ObExprConstraint, 2> constraints;
ObMatchFunRawExpr *scan_match_expr = nullptr;
if (OB_UNLIKELY(1 != exprs.count())) {
if (OB_UNLIKELY(1 != scan_match_exprs.count())) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("multi match filters not supported yet", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "more than one match filter");
} else if (OB_ISNULL(match_pred = exprs.at(0)) || OB_ISNULL(scan) ||
OB_ISNULL(get_stmt()) || OB_ISNULL(get_optimizer_context().get_query_ctx())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argumsnts", K(ret), KPC(match_pred), KP(scan));
} else if (OB_ISNULL(get_stmt())
|| OB_ISNULL(schema_guard = get_optimizer_context().get_schema_guard())
|| OB_ISNULL(session = get_optimizer_context().get_session_info())) {
} else if (OB_UNLIKELY(scan_match_filters.count() < 1)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null pointers", K(ret), KP(get_stmt()), KP(schema_guard), KP(session));
LOG_WARN("unexpected text retrieval scan without match filters", K(ret));
} else if (OB_ISNULL(match_pred = scan_match_filters.at(0))
|| OB_ISNULL(scan_match_expr = static_cast<ObMatchFunRawExpr *>(scan_match_exprs.at(0)))
|| OB_ISNULL(scan)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argumsnts", K(ret), KPC(match_pred), KPC(scan_match_expr), KP(scan));
} else if (OB_UNLIKELY(!match_pred->has_flag(CNT_MATCH_EXPR)
|| LOG_TABLE_SCAN != scan->get_type()
|| 0 == match_pred->get_param_count())) {
@ -14474,11 +14508,97 @@ int ObLogPlan::prepare_text_retrieval_scan(const ObIArray<ObRawExpr *> &exprs, O
}
if (OB_FAIL(ret)) {
} else if (OB_ISNULL(match_against)) {
} else if (OB_UNLIKELY(match_against != static_cast<ObMatchFunRawExpr *>(scan_match_exprs.at(0)))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null match against expr", K(ret), KPC(match_pred), KPC(match_against));
LOG_WARN("unexpected match against expr in match filter is not the match expr for scan",
K(ret), KPC(match_against), K(scan_match_exprs));
} else if (OB_FAIL(prepare_text_retrieval_info(table_scan->get_real_ref_table_id(),
table_scan->get_index_table_id(),
match_against,
table_scan->get_text_retrieval_info()))) {
LOG_WARN("failed to prepare text retrieval info", K(ret));
} else {
ObTextRetrievalInfo &tr_info = table_scan->get_text_retrieval_info();
tr_info.match_expr_ = match_against;
tr_info.pushdown_match_filter_ = match_pred;
table_scan->set_doc_id_index_table_id(tr_info.doc_id_idx_tid_);
}
for (int64_t i = 0; OB_SUCC(ret) && i < all_match_filters.count(); ++i) {
ObRawExpr *curr_filter = all_match_filters.at(i);
if (curr_filter != match_pred) {
if (OB_FAIL(scan_filters.push_back(curr_filter))) {
LOG_WARN("failed to append match filter after functional lookup", K(ret));
}
}
}
return ret;
}
int ObLogPlan::prepare_text_retrieval_lookup(const ObIArray<ObRawExpr *> &lookup_match_exprs,
const ObIArray<uint64_t> &lookup_index_ids,
ObLogicalOperator *scan)
{
int ret = OB_SUCCESS;
ObLogTableScan *table_scan = static_cast<ObLogTableScan *>(scan);
if (OB_ISNULL(table_scan) || OB_UNLIKELY(lookup_match_exprs.count() != lookup_index_ids.count())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), KPC(table_scan), K(lookup_match_exprs), K(lookup_index_ids));
}
for (int64_t i = 0; OB_SUCC(ret) && i < lookup_match_exprs.count(); ++i) {
ObTextRetrievalInfo tr_info;
ObMatchFunRawExpr *curr_match_expr = nullptr;
if (OB_ISNULL(curr_match_expr = static_cast<ObMatchFunRawExpr *>(lookup_match_exprs.at(i)))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr to lookup match exprs", K(ret), K(i), K(lookup_match_exprs));
} else if (OB_FAIL(prepare_text_retrieval_info(table_scan->get_real_ref_table_id(),
lookup_index_ids.at(i),
curr_match_expr,
tr_info))) {
LOG_WARN("failed to prepare text retrieval info", K(ret));
} else if (OB_FAIL(table_scan->get_lookup_tr_infos().push_back(tr_info))) {
LOG_WARN("failed to append lookup text retrieval infos", K(ret));
}
}
if (OB_SUCC(ret) && table_scan->get_lookup_tr_infos().count() > 0) {
// has text retrieval lookup, need do rowkey->doc_id lookup
const uint64_t rowkey_doc_tid = table_scan->get_lookup_tr_infos().at(0).rowkey_idx_tid_;
table_scan->set_rowkey_doc_table_id(rowkey_doc_tid);
}
return ret;
}
int ObLogPlan::prepare_text_retrieval_info(const uint64_t ref_table_id,
const uint64_t index_table_id,
ObMatchFunRawExpr *match_against,
ObTextRetrievalInfo &tr_info)
{
int ret = OB_SUCCESS;
ObSchemaGetterGuard *schema_guard = NULL;
ObSQLSessionInfo *session = NULL;
const ObTableSchema *table_schema = NULL;
const ObTableSchema *inv_idx_schema = NULL;
const ObTableSchema *fwd_idx_schema = NULL;
uint64_t doc_id_rowkey_tid = OB_INVALID_ID;
uint64_t rowkey_doc_tid = OB_INVALID_ID;
uint64_t fwd_idx_tid = OB_INVALID_ID;
uint64_t inv_idx_tid = OB_INVALID_ID;
ObSEArray<ObAuxTableMetaInfo, 4> index_infos;
bool need_calc_relevance = true;
ObSEArray<ObExprConstraint, 2> constraints;
if (OB_ISNULL(match_against) || OB_ISNULL(get_stmt()) || OB_ISNULL(get_optimizer_context().get_query_ctx())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid arguments", K(ret), KPC(match_against));
} else if (OB_ISNULL(get_stmt())
|| OB_ISNULL(schema_guard = get_optimizer_context().get_schema_guard())
|| OB_ISNULL(session = get_optimizer_context().get_session_info())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null pointers", K(ret), KP(get_stmt()), KP(schema_guard), KP(session));
} else if (OB_FAIL(schema_guard->get_table_schema(session->get_effective_tenant_id(),
table_scan->get_real_ref_table_id(),
ref_table_id,
table_schema))) {
LOG_WARN("failed to get table schema", K(ret));
} else if (OB_ISNULL(table_schema)) {
@ -14488,7 +14608,9 @@ int ObLogPlan::prepare_text_retrieval_scan(const ObIArray<ObRawExpr *> &exprs, O
LOG_WARN("failed to get index infos", K(ret));
} else if (OB_FAIL(table_schema->get_doc_id_rowkey_tid(doc_id_rowkey_tid))) {
LOG_WARN("failed to get doc_id_rowkey table id", K(ret));
} else if (OB_FALSE_IT(inv_idx_tid = table_scan->get_index_table_id())) {
} else if (OB_FAIL(table_schema->get_rowkey_doc_tid(rowkey_doc_tid))) {
LOG_WARN("failed to get rowkey doc table id", K(ret), KPC(table_schema));
} else if (OB_FALSE_IT(inv_idx_tid = index_table_id)) {
} else if (OB_FAIL(schema_guard->get_table_schema(session->get_effective_tenant_id(),
inv_idx_tid,
inv_idx_schema))) {
@ -14539,14 +14661,13 @@ int ObLogPlan::prepare_text_retrieval_scan(const ObIArray<ObRawExpr *> &exprs, O
LOG_WARN("failed to append array no dup", K(ret));
}
*/
ObTextRetrievalInfo &tr_info = table_scan->get_text_retrieval_info();
tr_info.match_expr_ = match_against;
tr_info.inv_idx_tid_ = inv_idx_tid;
tr_info.fwd_idx_tid_ = fwd_idx_tid;
tr_info.doc_id_idx_tid_ = doc_id_rowkey_tid;
tr_info.pushdown_match_filter_ = match_pred;
tr_info.rowkey_idx_tid_ = rowkey_doc_tid;
tr_info.pushdown_match_filter_ = nullptr;
tr_info.need_calc_relevance_ = need_calc_relevance;
table_scan->set_doc_id_index_table_id(doc_id_rowkey_tid);
}
return ret;
}

View File

@ -87,6 +87,7 @@ struct IndexDMLInfo;
class ValuesTablePath;
class ObSelectLogPlan;
class ObThreeStageAggrInfo;
struct ObTextRetrievalInfo;
class ObHashRollupInfo;
struct TableDependInfo {
@ -1461,7 +1462,14 @@ public:
int construct_startup_filter_for_limit(ObRawExpr *limit_expr, ObLogicalOperator *log_op);
int prepare_vector_index_info(ObLogicalOperator *scan);
int prepare_text_retrieval_scan(const ObIArray<ObRawExpr*> &exprs, ObLogicalOperator *scan);
int prepare_text_retrieval_scan(const ObIArray<ObRawExpr *> &scan_match_exprs,
const ObIArray<ObRawExpr *> &scan_match_filters,
const ObIArray<ObRawExpr *> &all_match_filters,
ObIArray<ObRawExpr *> &scan_filters,
ObLogicalOperator *scan);
int prepare_text_retrieval_lookup(const ObIArray<ObRawExpr *> &lookup_match_exprs,
const ObIArray<uint64_t> &lookup_index_ids,
ObLogicalOperator *scan);
int prepare_multivalue_retrieval_scan(ObLogicalOperator *scan);
int try_push_topn_into_domain_scan(ObLogicalOperator *&top,
ObRawExpr *topn_expr,
@ -1781,6 +1789,10 @@ private: // member functions
int adjust_expr_properties_for_external_table(ObRawExpr *col_expr, ObRawExpr *&expr) const;
int compute_duplicate_table_replicas(ObLogicalOperator *op);
int prepare_text_retrieval_info(const uint64_t ref_table_id,
const uint64_t index_table_id,
ObMatchFunRawExpr *ma_expr,
ObTextRetrievalInfo &tr_info);
public:
const ObLogPlanHint &get_log_plan_hint() const { return log_plan_hint_; }
bool has_join_order_hint() { return !log_plan_hint_.join_order_.leading_tables_.is_empty(); }

View File

@ -219,12 +219,15 @@ int ObLogTableScan::get_op_exprs(ObIArray<ObRawExpr*> &all_exprs)
LOG_WARN("failed to push back expr", K(ret));
} else if (NULL != group_id_expr_ && OB_FAIL(all_exprs.push_back(group_id_expr_))) {
LOG_WARN("failed to push back expr", K(ret));
} else if (is_text_retrieval_scan() && OB_FAIL(get_text_retrieval_calc_exprs(all_exprs))) {
} else if (is_text_retrieval_scan()
&& OB_FAIL(get_text_retrieval_calc_exprs(get_text_retrieval_info(), all_exprs))) {
LOG_WARN("failed to get text retrieval exprs", K(ret));
} else if (is_vec_idx_scan() && OB_FAIL(get_vec_idx_calc_exprs(all_exprs))) {
LOG_WARN("failed to get text retrieval exprs", K(ret));
} else if (OB_FAIL(append(all_exprs, rowkey_id_exprs_))) {
LOG_WARN("failed to append rowkey doc exprs", K(ret));
} else if (has_func_lookup() && OB_FAIL(get_func_lookup_calc_exprs(all_exprs))) {
LOG_WARN("failed to get functional lookup exprs", K(ret));
} else if (OB_FAIL(append(all_exprs, access_exprs_))) {
LOG_WARN("failed to append exprs", K(ret));
} else if (OB_FAIL(append(all_exprs, pushdown_aggr_exprs_))) {
@ -271,19 +274,15 @@ int ObLogTableScan::allocate_expr_post(ObAllocExprContext &ctx)
LOG_WARN("failed to mark expr as produced", K(*expr), K(branch_id_), K(id_), K(ret));
}
}
if (OB_SUCC(ret) && is_text_retrieval_scan()) {
if (OB_SUCC(ret)) {
// match against relevance expr will be calculated in storage
ObSEArray<ObRawExpr *, 8> tmp_exprs;
if (OB_FAIL(ObRawExprUtils::extract_column_exprs(get_text_retrieval_info().relevance_expr_, tmp_exprs))) {
LOG_WARN("failed to extract column exprs", K(ret));
} else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().doc_token_cnt_))) {
LOG_WARN("failed to append tmp exprs", K(ret));
} else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().total_doc_cnt_))) {
LOG_WARN("failed to append tmp exprs", K(ret));
} else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().related_doc_cnt_))) {
LOG_WARN("failed to append tmp exprs", K(ret));
} else if (OB_FAIL(tmp_exprs.push_back(get_text_retrieval_info().match_expr_))) {
LOG_WARN("failed to append tmp exprs", K(ret));
if (is_text_retrieval_scan()
&& OB_FAIL(get_text_retrieval_calc_exprs(get_text_retrieval_info(), tmp_exprs))) {
LOG_WARN("failed to get text retrieval calc exprs", K(ret));
} else if (has_func_lookup()
&& OB_FAIL(get_func_lookup_calc_exprs(tmp_exprs))) {
LOG_WARN("failed to get func lookup exprs", K(ret));
}
for (int64_t i = 0; OB_SUCC(ret) && i < tmp_exprs.count(); ++i) {
ObRawExpr *expr = tmp_exprs.at(i);
@ -485,12 +484,14 @@ int ObLogTableScan::generate_access_exprs()
LOG_WARN("get unexpected null", K(get_plan()), K(get_stmt()), K(ret));
} else if (OB_FAIL(copy_filter_before_index_back())) {
LOG_WARN("failed to copy filter before index back", K(ret));
} else if (is_text_retrieval_scan() && OB_FAIL(prepare_text_retrieval_dep_exprs())) {
} else if (is_text_retrieval_scan() && OB_FAIL(prepare_text_retrieval_dep_exprs(get_text_retrieval_info()))) {
LOG_WARN("failed to copy text retrieval aggr exprs", K(ret));
} else if (is_vec_idx_scan() && OB_FAIL(prepare_vector_access_exprs())) {
LOG_WARN("failed to copy vec idx scan exprs", K(ret));
} else if ((is_tsc_with_doc_id() || is_tsc_with_vid()) && OB_FAIL(prepare_rowkey_domain_id_dep_exprs())) {
} else if (need_rowkey_doc_expr() && OB_FAIL(prepare_rowkey_domain_id_dep_exprs())) {
LOG_WARN("failed to prepare table scan with doc id info", K(ret));
} else if (has_func_lookup() && OB_FAIL(prepare_func_lookup_dep_exprs())) {
LOG_WARN("failed to prepare functional lookup dependent exprs", K(ret));
} else if (OB_FAIL(generate_necessary_rowkey_and_partkey_exprs())) {
LOG_WARN("failed to generate rowkey and part exprs", K(ret));
} else if (OB_FAIL(allocate_group_id_expr())) {
@ -759,9 +760,16 @@ int ObLogTableScan::extract_pushdown_filters(ObIArray<ObRawExpr*> &nonpushdown_f
if (OB_FAIL(nonpushdown_filters.push_back(filters.at(i)))) {
LOG_WARN("push variable assign filter store non-pushdown filter failed", K(ret), K(i));
}
} else if (has_func_lookup() &&
(filters.at(i)->has_flag(CNT_MATCH_EXPR) || !flags.at(i))) {
// for filter with match expr in functional lookup, need to be evaluated after func lookup
// push-down filter on main-table lookup with functional lookup not supported by executor
if (OB_FAIL(nonpushdown_filters.push_back(filters.at(i)))) {
LOG_WARN("push func-lookup match filter to non-pushdown array failed", K(ret), K(i));
}
} else if (is_text_retrieval_scan() && need_text_retrieval_calc_relevance()) {
if (OB_FAIL(nonpushdown_filters.push_back(filters.at(i)))) {
LOG_WARN("push variable assign filter store non-pushdown filter failed", K(ret), K(i));
LOG_WARN("push text retrieval scan store non-pushdown filter failed", K(ret), K(i));
}
} else if (ignore_pd_filter) {
//ignore_pd_filter: only extract non-pushdown filters, ignore others
@ -1091,14 +1099,18 @@ int ObLogTableScan::generate_necessary_rowkey_and_partkey_exprs()
LOG_WARN("failed to check whether stmt has mbr column", K(ret));
} else if (need_doc_id_index_back() && OB_FAIL(extract_doc_id_index_back_expr(domain_exprs_, is_vec_idx_scan()))) {
LOG_WARN("failed to extract doc id index back exprs", K(ret));
} else if (is_text_retrieval_scan() && OB_FAIL(extract_text_retrieval_access_expr(domain_exprs_))) {
} else if (is_text_retrieval_scan()
&& OB_FAIL(extract_text_retrieval_access_expr(get_text_retrieval_info(), domain_exprs_))) {
LOG_WARN("failed to extract text retrieval access exprs", K(ret));
} else if (is_vec_idx_scan() && OB_FAIL(extract_vec_idx_access_expr(domain_exprs_))) {
LOG_WARN("failed to extract vector index access exprs", K(ret));
}else if (is_heap_table && is_index_global_ && index_back_ &&
} else if (has_func_lookup()
&& OB_FAIL(extract_func_lookup_access_exprs(domain_exprs_))) {
LOG_WARN("failed to extract functional lookup access exprs", K(ret));
} else if (is_heap_table && is_index_global_ && index_back_ &&
OB_FAIL(get_part_column_exprs(table_id_, ref_table_id_, part_exprs_))) {
LOG_WARN("failed to get part column exprs", K(ret));
} else if ((has_lob_column || index_back_) &&
} else if ((has_lob_column || index_back_ || has_func_lookup()) &&
OB_FAIL(get_plan()->get_rowkey_exprs(table_id_, ref_table_id_, rowkey_exprs_))) {
LOG_WARN("failed to generate rowkey exprs", K(ret));
} else { /*do nothing*/ }
@ -1720,6 +1732,15 @@ int ObLogTableScan::get_plan_item_info(PlanText &plan_text,
LOG_WARN("BUF_PRINTF fails", K(ret));
}
}
if (OB_SUCC(ret) && has_func_lookup()) {
if (OB_FAIL(BUF_PRINTF(", "))) {
LOG_WARN("BUF_PRINTF failed", K(ret));
} else if (OB_FAIL(BUF_PRINTF("has_functional_lookup=true"))) {
LOG_WARN("BUF_PRINTF fails", K(ret));
}
}
END_BUF_PRINT(plan_item.special_predicates_,
plan_item.special_predicates_len_);
}
@ -2964,10 +2985,10 @@ int ObLogTableScan::extract_doc_id_index_back_expr(ObIArray<ObRawExpr *> &exprs,
return ret;
}
int ObLogTableScan::extract_text_retrieval_access_expr(ObIArray<ObRawExpr *> &exprs)
int ObLogTableScan::extract_text_retrieval_access_expr(ObTextRetrievalInfo &tr_info,
ObIArray<ObRawExpr *> &exprs)
{
int ret = OB_SUCCESS;
ObTextRetrievalInfo &tr_info = get_text_retrieval_info();
if (OB_ISNULL(tr_info.match_expr_) || OB_ISNULL(tr_info.total_doc_cnt_) ||
OB_ISNULL(tr_info.doc_token_cnt_) || OB_ISNULL(tr_info.related_doc_cnt_)) {
ret = OB_ERR_UNEXPECTED;
@ -3065,34 +3086,62 @@ int ObLogTableScan::get_vec_idx_calc_exprs(ObIArray<ObRawExpr *> &all_exprs)
return ret;
}
int ObLogTableScan::get_text_retrieval_calc_exprs(ObIArray<ObRawExpr *> &all_exprs)
int ObLogTableScan::get_text_retrieval_calc_exprs(ObTextRetrievalInfo &tr_info,
ObIArray<ObRawExpr *> &all_exprs)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(get_text_retrieval_info().match_expr_)) {
if (OB_ISNULL(tr_info.match_expr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null match against expr", K(ret));
} else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().related_doc_cnt_))) {
} else if (OB_FAIL(all_exprs.push_back(tr_info.related_doc_cnt_))) {
LOG_WARN("failed to append relevanced doc cnt expr", K(ret));
} else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().doc_token_cnt_))) {
} else if (OB_FAIL(all_exprs.push_back(tr_info.doc_token_cnt_))) {
LOG_WARN("failed to append doc token cnt expr", K(ret));
} else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().total_doc_cnt_))) {
} else if (OB_FAIL(all_exprs.push_back(tr_info.total_doc_cnt_))) {
LOG_WARN("failed to append total doc cnt expr", K(ret));
} else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().relevance_expr_))) {
} else if (OB_FAIL(all_exprs.push_back(tr_info.relevance_expr_))) {
LOG_WARN("failed to append relevance expr", K(ret));
} else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().match_expr_))) {
} else if (OB_FAIL(all_exprs.push_back(tr_info.match_expr_))) {
LOG_WARN("failed to append text retrieval expr", K(ret));
} else if (OB_FAIL(all_exprs.push_back(get_text_retrieval_info().pushdown_match_filter_))) {
} else if (nullptr != tr_info.pushdown_match_filter_
&& OB_FAIL(all_exprs.push_back(tr_info.pushdown_match_filter_))) {
LOG_WARN("failed to append match filter", K(ret));
} else if (OB_NOT_NULL(get_text_retrieval_info().topk_limit_expr_) &&
OB_FAIL(all_exprs.push_back(get_text_retrieval_info().topk_limit_expr_))) {
} else if (nullptr != tr_info.topk_limit_expr_
&& OB_FAIL(all_exprs.push_back(tr_info.topk_limit_expr_))) {
LOG_WARN("failed to append limit expr", K(ret));
} else if (OB_NOT_NULL(get_text_retrieval_info().topk_offset_expr_) &&
OB_FAIL(all_exprs.push_back(get_text_retrieval_info().topk_offset_expr_))) {
} else if (nullptr != tr_info.topk_offset_expr_
&& OB_FAIL(all_exprs.push_back(tr_info.topk_offset_expr_))) {
LOG_WARN("failed to append offset expr", K(ret));
}
return ret;
}
int ObLogTableScan::extract_func_lookup_access_exprs(ObIArray<ObRawExpr *> &all_exprs)
{
int ret = OB_SUCCESS;
for (int64_t i = 0; OB_SUCC(ret) && i < lookup_tr_infos_.count(); ++i) {
if (OB_FAIL(extract_text_retrieval_access_expr(lookup_tr_infos_.at(i), all_exprs))) {
LOG_WARN("failed to extract text retrieval access expr", K(ret), K(i), K(lookup_tr_infos_.at(i)));
}
}
return ret;
}
int ObLogTableScan::get_func_lookup_calc_exprs(ObIArray<ObRawExpr *> &all_exprs)
{
int ret = OB_SUCCESS;
for (int64_t i = 0; OB_SUCC(ret) && i < lookup_tr_infos_.count(); ++i) {
if (OB_FAIL(get_text_retrieval_calc_exprs(lookup_tr_infos_.at(i), all_exprs))) {
LOG_WARN("failed to get text retrieval calc expr", K(ret), K(i), K(lookup_tr_infos_.at(i)));
}
}
return ret;
}
int ObLogTableScan::print_text_retrieval_annotation(char *buf, int64_t buf_len, int64_t &pos, ExplainType type)
{
int ret = OB_SUCCESS;
@ -3413,7 +3462,7 @@ int ObLogTableScan::prepare_vector_access_exprs()
return ret;
}
int ObLogTableScan::prepare_text_retrieval_dep_exprs()
int ObLogTableScan::prepare_text_retrieval_dep_exprs(ObTextRetrievalInfo &tr_info)
{
int ret = OB_SUCCESS;
const ObTableSchema *table_schema;
@ -3433,7 +3482,6 @@ int ObLogTableScan::prepare_text_retrieval_dep_exprs()
ObAggFunRawExpr *total_doc_cnt = nullptr;
ObAggFunRawExpr *doc_token_cnt = nullptr;
ObOpRawExpr *relevance_expr = nullptr;
ObTextRetrievalInfo &tr_info = get_text_retrieval_info();
if (OB_NOT_NULL(tr_info.doc_id_column_) && OB_NOT_NULL(tr_info.doc_length_column_) &&
OB_NOT_NULL(tr_info.token_column_) && OB_NOT_NULL(tr_info.token_cnt_column_) &&
OB_NOT_NULL(tr_info.doc_token_cnt_) && OB_NOT_NULL(tr_info.total_doc_cnt_) &&
@ -3546,8 +3594,11 @@ int ObLogTableScan::prepare_text_retrieval_dep_exprs()
// Copy column ref expr referenced by aggregation in different index table scan
// to avoid share expression
} else if (OB_FAIL(copier.copy(related_doc_cnt->get_param_expr(0)))) {
LOG_WARN("failed to copy related_doc_cnt expr", K(ret));
} else if (OB_FAIL(copier.copy(total_doc_cnt->get_param_expr(0)))) {
LOG_WARN("failed to copy total_doc_cnt expr", K(ret));
} else if (OB_FAIL(copier.copy(doc_token_cnt->get_param_expr(0)))) {
LOG_WARN("failed to copy doc_token_cnt expr", K(ret));
} else {
tr_info.token_column_ = token_column;
tr_info.token_cnt_column_ = token_cnt_column;
@ -3562,6 +3613,20 @@ int ObLogTableScan::prepare_text_retrieval_dep_exprs()
return ret;
}
int ObLogTableScan::prepare_func_lookup_dep_exprs()
{
int ret = OB_SUCCESS;
for (int64_t i = 0; OB_SUCC(ret) && i < lookup_tr_infos_.count(); ++i) {
if (OB_FAIL(prepare_text_retrieval_dep_exprs(lookup_tr_infos_.at(i)))) {
LOG_WARN("failed to prepare text retrieval dependent exprs",
K(ret), K(i), K(lookup_tr_infos_.at(i)));
}
}
return ret;
}
int ObLogTableScan::get_card_without_filter(double &card)
{
int ret = OB_SUCCESS;
@ -3976,7 +4041,7 @@ int ObLogTableScan::prepare_rowkey_domain_id_dep_exprs()
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, table schema is nullptr", K(ret));
} else {
if (is_tsc_with_doc_id()) {
if (is_tsc_with_doc_id() || has_func_lookup()) {
if (OB_FAIL(schema_guard->get_table_schema(rowkey_doc_tid_, rowkey_doc_schema))) {
LOG_WARN("fail toprint_ranges get rowkey doc table schema", K(ret), K(rowkey_doc_tid_));
} else if (OB_ISNULL(rowkey_doc_schema)) {

View File

@ -63,9 +63,10 @@ struct ObTextRetrievalInfo
ObRawExpr *topk_offset_expr_;
bool with_ties_;
bool need_calc_relevance_; // match expr just for retireval (accurate score is not required)
uint64_t inv_idx_tid_; // choosed aux inverted index table id (word-doc)
uint64_t fwd_idx_tid_; // choosed aux forward index table id (doc-word)
uint64_t doc_id_idx_tid_; // choosed aux doc_id index table id (doc-rowkey)
uint64_t inv_idx_tid_; // chosen aux inverted index table id (word-doc)
uint64_t fwd_idx_tid_; // chosen aux forward index table id (doc-word)
uint64_t doc_id_idx_tid_; // chosen aux doc_id index table id (doc-rowkey)
uint64_t rowkey_idx_tid_; // chosen aux rowkey index table id (rowkey-doc)
// the following exprs are used for intermediate calculation of relevance score
ObColumnRefRawExpr *token_column_;
ObColumnRefRawExpr *token_cnt_column_;
@ -690,13 +691,17 @@ public:
inline bool is_spatial_index_scan() const { return is_spatial_index_; }
inline ObTextRetrievalInfo &get_text_retrieval_info() { return text_retrieval_info_; }
inline const ObTextRetrievalInfo &get_text_retrieval_info() const { return text_retrieval_info_; }
int prepare_text_retrieval_dep_exprs();
int prepare_vector_access_exprs();
inline bool has_func_lookup() const { return 0 != lookup_tr_infos_.count(); }
inline ObIArray<ObTextRetrievalInfo> &get_lookup_tr_infos() { return lookup_tr_infos_; }
inline const ObIArray<ObTextRetrievalInfo> &get_lookup_tr_infos() const { return lookup_tr_infos_; }
inline bool need_text_retrieval_calc_relevance() const { return text_retrieval_info_.need_calc_relevance_; }
inline bool need_doc_id_index_back() const { return is_text_retrieval_scan() || is_multivalue_index_scan() || is_vec_idx_scan(); }
inline bool need_rowkey_doc_expr() const { return is_tsc_with_doc_id() || is_tsc_with_vid() || has_func_lookup(); }
inline void set_doc_id_index_table_id(const uint64_t doc_id_index_table_id) { doc_id_table_id_ = doc_id_index_table_id; }
inline uint64_t get_doc_id_index_table_id() const { return doc_id_table_id_; }
inline uint64_t get_rowkey_vid_table_id() const { return rowkey_vid_tid_; }
inline void set_rowkey_doc_table_id(const uint64_t tid) { rowkey_doc_tid_ = tid; }
inline uint64_t get_rowkey_doc_table_id() const { return rowkey_doc_tid_; }
inline uint64_t get_multivalue_col_idx() const { return multivalue_col_idx_; }
inline int32_t get_multivalue_type() const { return multivalue_type_; }
@ -759,11 +764,15 @@ private: // member functions
const share::schema::ObTableSchema &table_schema,
bool &need_filter);
int allocate_group_id_expr();
int extract_doc_id_index_back_expr(ObIArray<ObRawExpr *> &exprs, bool is_vec_scan = false);
int extract_text_retrieval_access_expr(ObIArray<ObRawExpr *> &exprs);
int extract_vec_idx_access_expr(ObIArray<ObRawExpr *> &exprs);
int get_text_retrieval_calc_exprs(ObIArray<ObRawExpr *> &all_exprs);
int get_vec_idx_calc_exprs(ObIArray<ObRawExpr *> &all_exprs);
int extract_doc_id_index_back_expr(ObIArray<ObRawExpr *> &exprs, bool is_vec_scan = false);
int extract_text_retrieval_access_expr(ObTextRetrievalInfo &tr_info, ObIArray<ObRawExpr *> &exprs);
int get_text_retrieval_calc_exprs(ObTextRetrievalInfo &tr_info, ObIArray<ObRawExpr *> &all_exprs);
int prepare_text_retrieval_dep_exprs(ObTextRetrievalInfo &tr_info);
int extract_func_lookup_access_exprs(ObIArray<ObRawExpr *> &all_exprs);
int get_func_lookup_calc_exprs(ObIArray<ObRawExpr *> &all_exprs);
int prepare_func_lookup_dep_exprs();
int print_text_retrieval_annotation(char *buf, int64_t buf_len, int64_t &pos, ExplainType type);
int find_nearest_rcte_op(ObLogSet *&rcte_op);
int generate_filter_monotonicity();
@ -908,9 +917,11 @@ protected: // memeber variables
share::schema::ObTableType table_type_;
bool use_column_store_;
uint64_t doc_id_table_id_; // used for rowkey lookup of fulltext, JSON multi-value and vector index
// text retrieval as index scan
ObTextRetrievalInfo text_retrieval_info_;
// text retrieval as functional lookup
common::ObSEArray<ObTextRetrievalInfo, 2, common::ModulePageAllocator, true> lookup_tr_infos_;
ObVectorIndexInfo vector_index_info_;
ObPxRFStaticInfo px_rf_info_;
bool das_keep_ordering_;
typedef common::ObSEArray<ObRawFilterMonotonicity, 4, common::ModulePageAllocator, true> FilterMonotonicity;

View File

@ -428,3 +428,13 @@ double ObOptCostModelParameter::get_cmp_err_handle_expr_cost(const OptSystemStat
return CMP_ERR_HANDLE_EXPR_COST / stat.get_cpu_speed();
}
}
double ObOptCostModelParameter::get_functional_lookup_per_row_cost(const OptSystemStat& stat) const
{
// jinmao TODO: 这里需要再考虑一下
if (stat.get_cpu_speed() <= 0) {
return FUNCTIONAL_LOOKUP_PER_ROW_COST;
} else {
return FUNCTIONAL_LOOKUP_PER_ROW_COST / stat.get_cpu_speed();
}
}

View File

@ -74,6 +74,7 @@ public:
const double DEFAULT_CMP_UDF_COST,
const double DEFAULT_CMP_LOB_COST,
const double DEFAULT_CMP_ERR_HANDLE_EXPR_COST,
const double DEFAULT_FUNCTIONAL_LOOKUP_PER_ROW_COST,
const double (&comparison_params)[common::ObMaxTC + 1],
const double (&hash_params)[common::ObMaxTC + 1],
const double (&project_params)[2][2][MAX_PROJECT_TYPE]
@ -119,6 +120,7 @@ public:
CMP_UDF_COST(DEFAULT_CMP_UDF_COST),
CMP_LOB_COST(DEFAULT_CMP_LOB_COST),
CMP_ERR_HANDLE_EXPR_COST(DEFAULT_CMP_ERR_HANDLE_EXPR_COST),
FUNCTIONAL_LOOKUP_PER_ROW_COST(DEFAULT_FUNCTIONAL_LOOKUP_PER_ROW_COST),
comparison_params_(comparison_params),
hash_params_(hash_params),
project_params_(project_params)
@ -172,6 +174,7 @@ public:
double get_cmp_lob_cost(const OptSystemStat& stat) const;
double get_cmp_udf_cost(const OptSystemStat& stat) const;
double get_cmp_err_handle_expr_cost(const OptSystemStat& stat) const;
double get_functional_lookup_per_row_cost(const OptSystemStat& stat) const;
protected:
/** 读取一行的CPU开销,基本上只包括get_next_row()操作 */
@ -253,6 +256,8 @@ protected:
double CMP_LOB_COST;
//计算一个需处理异常的表达式的代价
double CMP_ERR_HANDLE_EXPR_COST;
//计算一个全文索引 functional lookup 表达式的代价
double FUNCTIONAL_LOOKUP_PER_ROW_COST;
const double (&comparison_params_)[common::ObMaxTC + 1];
const double (&hash_params_)[common::ObMaxTC + 1]; /*

View File

@ -1617,14 +1617,12 @@ int ObOptEstCostModel::cost_row_store_index_scan(const ObCostTableScanInfo &est_
// 1. 以 [token, token] 为 range 扫描 inv_index 两次,计算一个聚合函数;
// 2. 全表扫描 doc_id_rowkey_index, 计算一个聚合函数;
// 3. 用过滤后的 doc_id 对 doc_id_rowkey_index 做回表
int token_count = 1; // 此处先假设 search query 只有一个 token,后续要调整
double token_sel = DEFAULT_SEL;
double inv_index_range_scan_cost = 0;
double doc_id_full_scan_cost = 0;
double doc_id_index_back_cost = 0;
if (OB_FAIL(cost_range_scan(est_cost_info,
true,
row_count * token_sel,
row_count,
inv_index_range_scan_cost))) {
LOG_WARN("Failed to estimate scan cost", K(ret));
} else if (OB_FAIL(cost_range_scan(est_cost_info,
@ -1634,14 +1632,14 @@ int ObOptEstCostModel::cost_row_store_index_scan(const ObCostTableScanInfo &est_
LOG_WARN("Failed to estimate scan cost", K(ret));
} else if (OB_FAIL(cost_range_get(est_cost_info,
true,
row_count * token_sel,
row_count,
doc_id_index_back_cost))) {
LOG_WARN("Failed to estimate get cost", K(ret));
}
double aggregation_cost = (row_count * token_sel + row_count) * cost_params_.get_per_aggr_func_cost(sys_stat_);
double aggregation_cost = (row_count + row_count) * cost_params_.get_per_aggr_func_cost(sys_stat_);
double fulltext_scan_cost = 2 * inv_index_range_scan_cost + doc_id_full_scan_cost +
aggregation_cost + doc_id_index_back_cost;
index_scan_cost = token_count * fulltext_scan_cost;
index_scan_cost = fulltext_scan_cost;
LOG_TRACE("OPT::[COST FULLTEXT INDEX SCAN]", K(fulltext_scan_cost), K(ret));
}
//add index skip scan cost
@ -2381,6 +2379,11 @@ double ObOptEstCostModel::cost_quals(double rows, const ObIArray<ObRawExpr *> &q
if (need_scale) {
factor /= 25.0;
}
} else if (qual->has_flag(CNT_MATCH_EXPR)) {
cost_per_row += cost_params_.get_functional_lookup_per_row_cost(sys_stat_) * factor;
if (need_scale) {
factor /= 10.0;
}
} else {
ObObjTypeClass calc_type = qual->get_result_type().get_calc_type_class();
cost_per_row += cost_params_.get_comparison_cost(sys_stat_, calc_type) * factor;
@ -2496,6 +2499,9 @@ int ObOptEstCostModel::calc_pred_cost_per_row(const ObRawExpr *expr,
cost += (expr->get_param_expr(1)->get_param_count() + 1) * cost_params_.get_comparison_cost(sys_stat_,ObIntTC) / rows;
}
need_calc_child_cost = false;
} else if (T_FUN_MATCH_AGAINST == expr->get_expr_type()) {
cost += cost_params_.get_functional_lookup_per_row_cost(sys_stat_) / rows;
need_calc_child_cost = false;
} else {
cost += cost_params_.get_comparison_cost(sys_stat_,ObIntTC) / rows;
}

View File

@ -92,6 +92,8 @@ const static double NORMAL_INVALID_HASH_COST = -1;
const static double NORMAL_CMP_UDF_COST = 100.0 * DEFAULT_CPU_SPEED;
const static double NORMAL_CMP_LOB_COST = 9.707028746051587301587301588 * DEFAULT_CPU_SPEED; //NORMAL_CMP_CHAR_COST * 100
const static double NORMAL_CMP_ERR_HANDLE_EXPR_COST = 1.00087103407539 * DEFAULT_CPU_SPEED; //NORMAL_CMP_INT_COST * 100
// jinmao TODO: 系数要测算后再填
const static double NORMAL_FUNCTIONAL_LOOKUP_PER_ROW_COST = 100.0 * DEFAULT_CPU_SPEED;
const static double comparison_params_normal[ObMaxTC+1] = {
NORMAL_CMP_INT_COST, // null
@ -229,6 +231,7 @@ const static ObOptCostModelParameter cost_params_normal(
NORMAL_CMP_UDF_COST,
NORMAL_CMP_LOB_COST,
NORMAL_CMP_ERR_HANDLE_EXPR_COST,
NORMAL_FUNCTIONAL_LOOKUP_PER_ROW_COST,
comparison_params_normal,
hash_params_normal,
project_params_normal

View File

@ -92,6 +92,8 @@ const static double VECTOR_INVALID_HASH_COST = -1;
const static double VECTOR_CMP_UDF_COST = 100.0 * DEFAULT_CPU_SPEED;
const static double VECTOR_CMP_LOB_COST = 9.707028746051587301587301588 * DEFAULT_CPU_SPEED; //NORMAL_CMP_CHAR_COST * 100
const static double VECTOR_CMP_ERR_HANDLE_EXPR_COST = 1.00087103407539 * DEFAULT_CPU_SPEED; //NORMAL_CMP_INT_COST * 100
//jinmao TODO: 系数要测算后再填
const static double VECTOR_FUNCTIONAL_LOOKUP_PER_ROW_COST = 100.0 * DEFAULT_CPU_SPEED;
const static double comparison_params_vector[ObMaxTC+1] = {
VECTOR_CMP_INT_COST, // null
@ -224,6 +226,7 @@ const static ObOptCostModelParameter cost_params_vector(
VECTOR_CMP_UDF_COST,
VECTOR_CMP_LOB_COST,
VECTOR_CMP_ERR_HANDLE_EXPR_COST,
VECTOR_FUNCTIONAL_LOOKUP_PER_ROW_COST,
comparison_params_vector,
hash_params_vector,
project_params_vector

View File

@ -1591,17 +1591,29 @@ int ObAlterTableResolver::resolve_index_column_list(const ParseNode &node,
ret = OB_NOT_SUPPORTED;
LOG_WARN("experimental feature: build multivalue index afterward is experimental feature", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "build multivalue index afterward");
} else if (table_schema_->is_materialized_view()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("create fulltext/multivalue/vector index on materialized view not supported", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "create fulltext/multivalue/vector index on materialized view");
}
} else if (index_keyname_ == FTS_KEY) {
if (!GCONF._enable_add_fulltext_index_to_existing_table) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("experimental feature: build fulltext index afterward is experimental feature", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "build fulltext index afterward");
uint64_t tenant_data_version = 0;
if (OB_ISNULL(session_info_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret));
} else if (OB_FAIL(GET_MIN_DATA_VERSION(session_info_->get_effective_tenant_id(), tenant_data_version))) {
LOG_WARN("get tenant data version failed", K(ret));
} else if (tenant_data_version < DATA_VERSION_4_3_5_0) {
LOG_WARN("there are the observers with version lower than 4.3.5 in cluster, build fulltext index afterward not supported", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "there are the observers with version lower than 4.3.5 in cluster, build fulltext index afterward");
} else if (OB_FAIL(resolve_fts_index_constraint(*table_schema_,
sort_item.column_name_,
index_name_value))) {
SQL_RESV_LOG(WARN, "check fts index constraint fail",K(ret),
K(sort_item.column_name_));
sort_item.column_name_,
index_name_value))) {
SQL_RESV_LOG(WARN, "check fts index constraint fail", K(ret), K(sort_item.column_name_));
} else if (table_schema_->is_materialized_view()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("create fulltext/multivalue/vector index on materialized view not supported", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "create fulltext/multivalue/vector index on materialized view");
}
} else if (index_keyname_ == VEC_KEY) {
// TODO@xiajin
@ -1733,7 +1745,8 @@ int ObAlterTableResolver::resolve_add_index(const ParseNode &node)
ParseNode *colulmn_group_node = nullptr;
bool is_index_part_specified = false;
CHECK_COMPATIBILITY_MODE(session_info_);
if (is_unique_key && lib::is_oracle_mode()) {
if (OB_FAIL(ret)) {
} else if (is_unique_key && lib::is_oracle_mode()) {
// oracle mode
if (node.num_child_ != 2) {
ret = OB_ERR_UNEXPECTED;
@ -2495,6 +2508,7 @@ int ObAlterTableResolver::generate_index_arg(obrpc::ObCreateIndexArg &index_arg,
index_arg.index_option_.storage_format_version_ = storage_format_version_;
index_arg.index_option_.comment_ = comment_;
index_arg.with_rowid_ = with_rowid_;
index_arg.index_option_.parser_name_ = parser_name_;
if (OB_SUCC(ret)) {
ObIndexType type = INDEX_TYPE_IS_NOT;
if (OB_NOT_NULL(table_schema_) && table_schema_->is_oracle_tmp_table()) {

View File

@ -20,8 +20,7 @@ namespace oceanbase
{
namespace sql
{
typedef common::hash::ObPlacementHashSet<share::schema::ObColumnNameHashWrapper, common::OB_MAX_INDEX_PER_TABLE> ObReducedVisibleColSet;
typedef common::hash::ObPlacementHashSet<share::schema::ObColumnNameHashWrapper, common::OB_MAX_AUX_TABLE_PER_MAIN_TABLE> ObReducedVisibleColSet;
typedef common::hash::ObPlacementHashSet<share::schema::ObColumnNameHashWrapper, common::OB_MAX_COLUMN_NUMBER> ObColumnNameSet;
/*
#define ADD_COLUMN_NOT_NULL (1UL << 0)

View File

@ -267,15 +267,23 @@ int ObCreateIndexResolver::resolve_index_column_node(
LOG_USER_ERROR(OB_NOT_SUPPORTED, "build multivalue index afterward");
}
} else if (index_keyname_ == FTS_KEY) {
if (!GCONF._enable_add_fulltext_index_to_existing_table) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("build fulltext index afterward is experimental feature", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "experimental feature: build fulltext index afterward");
uint64_t tenant_data_version = 0;
if (OB_ISNULL(session_info_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret));
} else if (OB_FAIL(GET_MIN_DATA_VERSION(session_info_->get_effective_tenant_id(), tenant_data_version))) {
LOG_WARN("get tenant data version failed", K(ret));
} else if (tenant_data_version < DATA_VERSION_4_3_5_0) {
LOG_WARN("there are the observers with version lower than 4.3.5 in cluster, build fulltext index afterward not supported", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "there are the observers with version lower than 4.3.5 in cluster, build fulltext index afterward");
} else if (OB_FAIL(resolve_fts_index_constraint(*tbl_schema,
sort_item.column_name_,
index_keyname_value))) {
SQL_RESV_LOG(WARN, "check fts index constraint fail",K(ret),
K(sort_item.column_name_));
sort_item.column_name_,
index_keyname_value))) {
SQL_RESV_LOG(WARN, "check fts index constraint fail", K(ret), K(sort_item.column_name_));
} else if (OB_UNLIKELY(tbl_schema->mv_container_table())) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("create fulltext index on materialized view not supported", K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "create fulltext index on materialized view");
}
} else if (index_keyname_ == INDEX_KEYNAME::VEC_KEY) {
if (sort_item.is_func_index_) {

View File

@ -1075,7 +1075,7 @@ protected:
common::hash::ObPlacementHashSet<share::schema::ObColumnNameHashWrapper,
common::OB_MAX_COLUMN_NUMBER> storing_column_set_;
common::hash::ObPlacementHashSet<share::schema::ObForeignKeyNameHashWrapper,
common::OB_MAX_INDEX_PER_TABLE> current_foreign_key_name_set_;
common::OB_MAX_AUX_TABLE_PER_MAIN_TABLE> current_foreign_key_name_set_;
common::ObBitSet<> alter_table_bitset_;
bool has_index_using_type_;
share::schema::ObIndexUsingType index_using_type_;

View File

@ -2089,8 +2089,8 @@ int ObDelUpdResolver::add_index_related_columns_to_stmt(const TableItem &table_i
LOG_DEBUG("add all column to stmt due to the update column is primary key");
}
} else {
uint64_t index_tids[OB_MAX_INDEX_PER_TABLE];
int64_t index_count = OB_MAX_INDEX_PER_TABLE;
uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
// get all the indexes
if (OB_FAIL(schema_checker_->get_can_write_index_array(tenant_id,
base_table_id,
@ -2123,8 +2123,8 @@ int ObDelUpdResolver::add_all_index_rowkey_to_stmt(const TableItem &table_item,
{
int ret = OB_SUCCESS;
const ObTableSchema *index_schema = NULL;
uint64_t idx_tids[OB_MAX_INDEX_PER_TABLE];
int64_t idx_count = OB_MAX_INDEX_PER_TABLE;
uint64_t idx_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
int64_t idx_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
if (OB_ISNULL(schema_checker_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret));
@ -4025,8 +4025,8 @@ int ObDelUpdResolver::generate_insert_table_info(const TableItem &table_item,
ObDelUpdStmt *del_upd_stmt = get_del_upd_stmt();
const TableItem &base_table_item = table_item.get_base_table_item();
const ObTableSchema *table_schema = NULL;
uint64_t index_tid[OB_MAX_INDEX_PER_TABLE];
int64_t gindex_cnt = OB_MAX_INDEX_PER_TABLE;
uint64_t index_tid[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
int64_t gindex_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
if (OB_ISNULL(del_upd_stmt) || OB_ISNULL(schema_checker_) || OB_ISNULL(session_info_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null", K(ret), K(del_upd_stmt), K(schema_checker_), K(session_info_));

View File

@ -396,8 +396,8 @@ int ObDeleteResolver::generate_delete_table_info(const TableItem &table_item)
const ObTableSchema *table_schema = NULL;
ObDeleteStmt *delete_stmt = get_delete_stmt();
ObDeleteTableInfo *table_info = NULL;
uint64_t index_tid[OB_MAX_INDEX_PER_TABLE];
int64_t gindex_cnt = OB_MAX_INDEX_PER_TABLE;
uint64_t index_tid[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
int64_t gindex_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
int64_t binlog_row_image = ObBinlogRowImage::FULL;
if (OB_ISNULL(schema_checker_) || OB_ISNULL(params_.session_info_) ||
OB_ISNULL(allocator_) || OB_ISNULL(delete_stmt)) {

View File

@ -18864,16 +18864,12 @@ int ObDMLResolver::resolve_match_against_exprs(ObRawExpr *&expr,
if (OB_ISNULL(stmt) || OB_ISNULL(expr) || OB_ISNULL(params_.query_ctx_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret), K(stmt), K(expr));
} else if (match_exprs.count() > 1) {
// jinmao TODO: 之后存储层支持返回未匹配行,并且 SQL 层支持计算之后可以删掉这里的一系列限制
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "match expr can only be used in simple filter for now");
LOG_WARN("match expr can only be used in simple filter for now", K(ret));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < match_exprs.count(); i++) {
uint64_t table_id = OB_INVALID_ID;
ObMatchFunRawExpr *cur_match_expr = NULL;
ObMatchFunRawExpr *match_expr_on_table = NULL;
ObSEArray<ObRawExpr *, 4> match_exprs_on_table;
bool table_on_null_side = false;
bool is_simple_filter = false;
ObSEArray<ObExprConstraint, 1> constraints;
@ -18882,38 +18878,31 @@ int ObDMLResolver::resolve_match_against_exprs(ObRawExpr *&expr,
LOG_WARN("unexpected null", K(ret));
} else if (OB_FAIL(cur_match_expr->get_table_id(table_id))) {
LOG_WARN("failed to get table id", K(ret));
} else if (OB_FAIL(stmt->get_match_expr_on_table(table_id, match_expr_on_table))) {
} else if (OB_FAIL(stmt->get_match_expr_on_table(table_id, match_exprs_on_table))) {
LOG_WARN("failed to get fulltext search expr on table", K(ret), K(table_id));
} else if (OB_FAIL(resolve_match_against_expr(*cur_match_expr))) {
LOG_WARN("failed to resolve match index", K(ret));
} else if (OB_ISNULL(match_expr_on_table)) {
if (scope != T_WHERE_SCOPE) {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "fulltext search expr defined beyond where clause");
LOG_WARN("fulltext search expr not found in condition expr", K(ret));
} else if (OB_FAIL(ObOptimizerUtil::is_table_on_null_side(stmt, table_id, table_on_null_side))) {
} else {
for (int64_t match_idx = 0; match_idx < match_exprs_on_table.count(); ++match_idx) {
if (match_exprs_on_table.at(match_idx)->same_as(*cur_match_expr, &check_ctx)) {
match_expr_on_table = static_cast<ObMatchFunRawExpr *>(match_exprs_on_table.at(match_idx));
break;
}
}
}
if (OB_FAIL(ret)) {
} else if (nullptr == match_expr_on_table) {
// same expr not found in stmt
if (OB_FAIL(ObOptimizerUtil::is_table_on_null_side(stmt, table_id, table_on_null_side))) {
LOG_WARN("failed to check table on null side", K(ret));
} else if (table_on_null_side) {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "fulltext search on null side of joined table");
LOG_WARN("fulltext search on null side of joined table is not supported", K(ret));
} else if (OB_FAIL(check_fulltext_search_simple_filter(expr, cur_match_expr, is_simple_filter, constraints))) {
LOG_WARN("failed to check fulltext search simple filter", K(ret));
} else if (is_simple_filter) {
if (OB_FAIL(stmt->get_match_exprs().push_back(cur_match_expr))) {
LOG_WARN("failed to push back expr", K(ret));
} else if (OB_FAIL(append(params_.query_ctx_->all_expr_constraints_, constraints))) {
LOG_WARN("failed to append constraints", K(ret));
}
} else {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "filter that can't imply match_score not equal to 0");
LOG_WARN("filter that can't imply match_score not equal to 0 is not supported", K(ret), KPC(expr));
} else if (OB_FAIL(stmt->get_match_exprs().push_back(cur_match_expr))) {
LOG_WARN("failed to push back expr", K(ret));
}
} else if (!cur_match_expr->same_as(*match_expr_on_table, &check_ctx)) {
ret = OB_NOT_SUPPORTED;
LOG_USER_ERROR(OB_NOT_SUPPORTED, "non-shareable match exprs on same base table");
LOG_WARN("non-shareable match exprs on same base table are not supported", K(ret), KPC(cur_match_expr), KPC(match_expr_on_table));
} else if (OB_FAIL(replacer.add_replace_expr(cur_match_expr, match_expr_on_table))) {
LOG_WARN("failed to add replace expr", K(ret));
} else if (OB_FAIL(replacer.replace(expr))) {
@ -19073,74 +19062,6 @@ int ObDMLResolver::resolve_match_index(
return ret;
}
// check that the fulltext search filter can imply a condition where match_score is not equal to zero.
int ObDMLResolver::check_fulltext_search_simple_filter(ObRawExpr *expr,
ObRawExpr *match_expr,
bool &is_simple_filter,
ObIArray<ObExprConstraint> &constraints)
{
int ret = OB_SUCCESS;
is_simple_filter = false;
if (expr->get_expr_type() == T_FUN_MATCH_AGAINST) {
// bool expr will be added above in where scope
is_simple_filter = true;
} else {
ObRawExprCopier copier(*params_.expr_factory_);
ObSEArray<ObRawExpr*, 1> match_exprs;
ObSEArray<ObRawExpr*, 1> zero_exprs;
ObConstRawExpr *zero_expr = NULL;
ObObj obj_zero;
obj_zero.set_double(ObDoubleType, 0);
ObRawExpr *false_null_expr = NULL;
ObRawExpr *lnnvl_expr = NULL;
bool got_result = false;
ObObj result;
if (OB_ISNULL(params_.expr_factory_) || OB_ISNULL(params_.session_info_) || OB_ISNULL(allocator_) ||
OB_ISNULL(params_.session_info_->get_cur_exec_ctx())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret));
} else if (OB_FAIL(params_.expr_factory_->create_raw_expr(T_DOUBLE, zero_expr))) {
LOG_WARN("create raw expr fail", K(ret));
} else if (OB_ISNULL(zero_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null expr", K(ret));
} else if (OB_FALSE_IT(zero_expr->set_value(obj_zero))) {
} else if (OB_FAIL(match_exprs.push_back(match_expr))) {
LOG_WARN("failed to push back expr", K(ret));
} else if (OB_FAIL(zero_exprs.push_back(zero_expr))) {
LOG_WARN("failed to push back expr", K(ret));
} else if (OB_FAIL(copier.add_replaced_expr(match_exprs, zero_exprs))) {
LOG_WARN("failed to add replace pair", K(ret));
} else if (OB_FAIL(copier.copy_on_replace(expr, false_null_expr))) {
LOG_WARN("failed to do expr copy on replace", K(ret));
} else if (OB_FAIL(ObRawExprUtils::build_lnnvl_expr(*params_.expr_factory_, false_null_expr, lnnvl_expr))) {
LOG_WARN("failed to build lnnvl expr", K(ret));
} else if (OB_ISNULL(lnnvl_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret));
} else if (OB_FAIL(lnnvl_expr->formalize(params_.session_info_))) {
LOG_WARN("failed to formalize lnnvl expr", K(ret));
} else if (!lnnvl_expr->is_static_const_expr()) {
is_simple_filter = false;
} else if (OB_FAIL(ObSQLUtils::calc_const_or_calculable_expr(params_.session_info_->get_cur_exec_ctx(),
lnnvl_expr,
result,
got_result,
*allocator_))) {
LOG_WARN("failed to calc cosnt or calculable expr", K(ret));
} else if (!got_result || result.is_false() || result.is_null()) {
is_simple_filter = false;
} else {
is_simple_filter = true;
ObExprConstraint true_constraint(lnnvl_expr, PreCalcExprExpectResult::PRE_CALC_RESULT_TRUE);
if (OB_FAIL(constraints.push_back(true_constraint))) {
LOG_WARN("failed to push back true constraint", K(ret));
}
}
}
return ret;
}
int ObDMLResolver::add_udt_dependency(const pl::ObUserDefinedType &udt_type)
{
int ret = OB_SUCCESS;

View File

@ -1034,10 +1034,6 @@ private:
int resolve_match_index(const ColumnReferenceSet &match_column_set,
const ObTableSchema &table_schema,
ObMatchFunRawExpr &match_against);
int check_fulltext_search_simple_filter(ObRawExpr *expr,
ObRawExpr *match_expr,
bool &is_simple_filter,
ObIArray<ObExprConstraint> &constraints);
int build_and_check_true_expr(ObRawExpr *const_expr,
ObItemType compare_op,
bool &is_true,

View File

@ -5426,6 +5426,25 @@ int ObDMLStmt::do_formalize_lateral_derived_table_post()
return ret;
}
int ObDMLStmt::get_match_expr_on_table(uint64_t table_id, ObIArray<ObRawExpr *> &match_exprs) const
{
int ret = OB_SUCCESS;
for (int64_t i = 0; OB_SUCC(ret) && i < get_match_exprs().count(); i++) {
uint64_t cur_tid = OB_INVALID_ID;
if (OB_ISNULL(get_match_exprs().at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret));
} else if (OB_FAIL(get_match_exprs().at(i)->get_table_id(cur_tid))) {
LOG_WARN("failed to get fulltext search exprs", K(ret));
} else if (cur_tid != table_id) {
// skip
} else if (OB_FAIL(add_var_to_array_no_dup(match_exprs, static_cast<ObRawExpr *>(get_match_exprs().at(i))))) {
LOG_WARN("failed to append match expr to array", K(ret), K(table_id));
} else { /*do nothing*/ }
}
return ret;
}
ObJtColBaseInfo::ObJtColBaseInfo()
: col_type_(0),
truncate_(0),
@ -5660,27 +5679,6 @@ int ObValuesTableDef::deep_copy(const ObValuesTableDef &other,
return ret;
}
int ObDMLStmt::get_match_expr_on_table(uint64_t table_id, ObMatchFunRawExpr *&match_expr) const
{
int ret = OB_SUCCESS;
match_expr = NULL;
for (int64_t i = 0; OB_SUCC(ret) && i < get_match_exprs().count(); i++) {
uint64_t cur_tid = OB_INVALID_ID;
if (OB_ISNULL(get_match_exprs().at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret));
} else if (OB_FAIL(get_match_exprs().at(i)->get_table_id(cur_tid))) {
LOG_WARN("failed to get fulltext search exprs", K(ret));
} else if (OB_NOT_NULL(match_expr) && cur_tid == table_id) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument, find more than one match expr on current table", K(ret), K(table_id));
} else if (cur_tid == table_id) {
match_expr = get_match_exprs().at(i);
} else { /*do nothing*/ }
}
return ret;
}
/**
* index的分区列/
* e.g. create table (c1 int, c2 int generated always as (c1 + 1)) partition by hash (c2)

View File

@ -1030,7 +1030,7 @@ public:
{ return match_exprs_; }
common::ObIArray<ObMatchFunRawExpr *> &get_match_exprs()
{ return match_exprs_; }
int get_match_expr_on_table(uint64_t table_id, ObMatchFunRawExpr *&match_expr) const;
int get_match_expr_on_table(uint64_t table_id, ObIArray<ObRawExpr *> &match_exprs) const;
int get_table_pseudo_column_like_exprs(uint64_t table_id, ObIArray<ObRawExpr *> &pseudo_columns);
int get_table_pseudo_column_like_exprs(ObIArray<uint64_t> &table_id, ObIArray<ObRawExpr *> &pseudo_columns);
int rebuild_tables_hash();

View File

@ -2439,8 +2439,9 @@ int LogTableHint::assign(const LogTableHint &other)
int LogTableHint::init_index_hints(ObSqlSchemaGuard &schema_guard)
{
int ret = OB_SUCCESS;
uint64_t tids[OB_MAX_INDEX_PER_TABLE + 1];
int64_t table_index_count = OB_MAX_INDEX_PER_TABLE + 1;
uint64_t tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1];
int64_t table_index_aux_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE + 1;
const share::schema::ObTableSchema *data_table_schema = nullptr;
if (OB_ISNULL(table_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected log index hint", K(ret), K(this));
@ -2448,26 +2449,33 @@ int LogTableHint::init_index_hints(ObSqlSchemaGuard &schema_guard)
/* do nothing */
} else if (OB_FAIL(schema_guard.get_can_read_index_array(table_->ref_id_,
tids,
table_index_count,
table_index_aux_count,
false,
table_->access_all_part(),
false /*domain index*/,
true /*domain index*/,
false /*spatial index*/))) {
LOG_WARN("failed to get can read index", K(ret));
} else if (table_index_count > OB_MAX_INDEX_PER_TABLE) {
} else if (OB_FAIL(schema_guard.get_table_schema(table_->ref_id_, data_table_schema))) {
LOG_WARN("failed to get data table schema", K(ret), K(table_->ref_id_));
} else if (OB_ISNULL(data_table_schema)) {
ret = OB_TABLE_NOT_EXIST;
LOG_WARN("data table schema is null", K(ret), K(table_->ref_id_));
} else if (table_index_aux_count > OB_MAX_AUX_TABLE_PER_MAIN_TABLE
|| data_table_schema->get_index_count() > OB_MAX_INDEX_PER_TABLE) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("Table index count is bigger than OB_MAX_INDEX_PER_TABLE", K(ret), K(table_index_count));
int64_t table_index_count = data_table_schema->get_index_count();
LOG_WARN("Table index or index aux count is invalid", K(ret), K(table_index_count), K(table_index_aux_count));
} else if (union_merge_hint_ != nullptr &&
OB_FAIL(merge_index_list_.prepare_allocate(union_merge_hint_->get_index_name_list().count()))) {
LOG_WARN("failed to prepare allocate merge index list", KPC(union_merge_hint_), K(ret));
} else {
LOG_TRACE("get readable index", K(table_index_count));
LOG_TRACE("get readable index", K(table_index_aux_count));
const share::schema::ObTableSchema *index_schema = NULL;
ObSEArray<uint64_t, 4> index_list;
ObSEArray<uint64_t, 4> no_index_list;
ObSEArray<const ObIndexHint*, 4> index_hints;
ObSEArray<const ObIndexHint*, 4> no_index_hints;
for (int64_t i = -1; OB_SUCC(ret) && i < table_index_count; ++i) {
for (int64_t i = -1; OB_SUCC(ret) && i < table_index_aux_count; ++i) {
uint64_t index_id = -1 == i ? table_->ref_id_ : tids[i];
ObString index_name;
bool is_primary_key = false;
@ -2478,7 +2486,7 @@ int LogTableHint::init_index_hints(ObSqlSchemaGuard &schema_guard)
OB_ISNULL(index_schema)) {
ret = OB_SCHEMA_ERROR;
LOG_WARN("fail to get table schema", K(index_id), K(ret));
} else if (index_schema->is_fts_index() || index_schema->is_vec_index()) {
} else if (index_schema->is_built_in_fts_index() || index_schema->is_vec_index()) {
// just ignore fts && vector index
} else if (OB_FAIL(index_schema->get_index_name(index_name))) {
LOG_WARN("fail to get index name", K(index_name), K(ret));

View File

@ -489,8 +489,8 @@ int ObUpdateResolver::generate_update_table_info(ObTableAssignment &table_assign
const ObTableSchema *table_schema = NULL;
const TableItem *table_item = NULL;
ObUpdateTableInfo *table_info = NULL;
uint64_t index_tid[OB_MAX_INDEX_PER_TABLE];
int64_t gindex_cnt = OB_MAX_INDEX_PER_TABLE;
uint64_t index_tid[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
int64_t gindex_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
int64_t binlog_row_image = ObBinlogRowImage::FULL;
if (OB_ISNULL(schema_checker_) || OB_ISNULL(params_.session_info_) ||
OB_ISNULL(allocator_) || OB_ISNULL(update_stmt)) {

View File

@ -10021,6 +10021,29 @@ int ObRawExprUtils::extract_match_against_filters(const ObIArray<ObRawExpr *> &f
return ret;
}
int ObRawExprUtils::extract_match_exprs(ObRawExpr *expr,
ObIArray<ObMatchFunRawExpr*> &match_exprs)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("expr is null", K(ret));
} else if (!expr->is_match_against_expr()) {
// do nothing
} else if (OB_FAIL(add_var_to_array_no_dup(match_exprs, static_cast<ObMatchFunRawExpr*>(expr)))) {
LOG_WARN("failed to push back match expr", K(ret));
}
if (OB_SUCC(ret) && expr->has_flag(CNT_MATCH_EXPR)) {
for (int64_t i = 0; OB_SUCC(ret) && i < expr->get_param_count(); ++i) {
if (OB_FAIL(SMART_CALL(extract_match_exprs(expr->get_param_expr(i), match_exprs)))) {
LOG_WARN("failed to extract match exprs", K(ret));
}
}
}
return ret;
}
int ObRawExprUtils::build_dummy_count_expr(ObRawExprFactory &expr_factory,
const ObSQLSessionInfo *session_info,
ObAggFunRawExpr *&expr)

View File

@ -1267,6 +1267,7 @@ public:
static int extract_match_against_filters(const ObIArray<ObRawExpr *> &filters,
ObIArray<ObRawExpr *> &other_filters,
ObIArray<ObRawExpr *> &match_filters);
static int extract_match_exprs(ObRawExpr *expr, ObIArray<ObMatchFunRawExpr*> &match_exprs);
static int build_dummy_count_expr(ObRawExprFactory &expr_factory,
const ObSQLSessionInfo *session_info,
ObAggFunRawExpr *&expr);

View File

@ -1143,8 +1143,8 @@ int ObSchemaChecker::check_column_has_index(const uint64_t tenant_id, uint64_t t
{
int ret = OB_SUCCESS;
const ObColumnSchemaV2 *col_schema = NULL;
uint64_t index_tid_array[OB_MAX_INDEX_PER_TABLE];
int64_t index_cnt = OB_MAX_INDEX_PER_TABLE;
uint64_t index_tid_array[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
int64_t index_cnt = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
has_index = false;
if (IS_NOT_INIT) {

View File

@ -487,6 +487,7 @@ public:
}
virtual int is_get(bool &is_get) const;
int is_get(int64_t column_count, bool &is_get) const;
inline void reset_range_exprs() { range_exprs_.reset(); }
virtual bool is_precise_get() const { return table_graph_.is_precise_get_; }
static common::ObDomainOpType get_geo_relation(ObItemType type);
static common::ObDomainOpType get_domain_op_type(ObItemType type);

View File

@ -322,6 +322,7 @@ public:
}
virtual inline bool has_range() const { return column_count_ > 0; }
virtual bool is_contain_geo_filters() const { return contain_geo_filters_; }
inline void reset_range_exprs() { range_exprs_.reset(); }
virtual const common::ObIArray<ObRawExpr*> &get_range_exprs() const { return range_exprs_; }
virtual const common::ObIArray<ObRawExpr*> &get_ss_range_exprs() const { return ss_range_exprs_; }
virtual const common::ObIArray<ObRawExpr*> &get_unprecise_range_exprs() const { return unprecise_range_exprs_; }

View File

@ -10487,7 +10487,7 @@ int ObTransformPreProcess::preserve_order_for_fulltext_search(ObDMLStmt *stmt, b
int ret = OB_SUCCESS;
trans_happened = false;
TableItem *table_item = NULL;
ObMatchFunRawExpr *match_expr = NULL;
ObRawExpr *match_expr = nullptr;
if (OB_ISNULL(stmt)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret));
@ -10506,18 +10506,57 @@ int ObTransformPreProcess::preserve_order_for_fulltext_search(ObDMLStmt *stmt, b
LOG_WARN("unexpected null", K(ret));
} else if (!table_item->is_basic_table()) {
// do nothing
} else if (OB_FAIL(stmt->get_match_expr_on_table(table_item->table_id_, match_expr))) {
LOG_WARN("failed to get fulltext search expr on table", K(table_item->table_id_), K(ret));
} else if (OB_ISNULL(match_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null", K(ret));
} else if (0 == stmt->get_match_exprs().count()) {
// do nothing
} else {
const common::ObIArray<ObRawExpr *> &condition_exprs = stmt->get_condition_exprs();
bool found = false;
for (int64_t i = 0; OB_SUCC(ret) && !found && i < condition_exprs.count(); ++i) {
ObRawExpr *filter = nullptr;
if (OB_ISNULL(filter = condition_exprs.at(i))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected nullptr to where condition filter", K(ret), K(i), KP(filter));
} else if (filter->has_flag(IS_MATCH_EXPR)) {
match_expr = filter;
found = true;
} else if (!filter->has_flag(CNT_MATCH_EXPR)
|| filter->has_flag(CNT_OR)) {
// skip
} else if (IS_RANGE_CMP_OP(filter->get_expr_type())) {
ObRawExpr *param_expr0 = filter->get_param_expr(0);
ObRawExpr *param_expr1 = filter->get_param_expr(1);
if (OB_ISNULL(param_expr0) || OB_ISNULL(param_expr1)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpecter null param expr for range cmp op", K(ret), KP(param_expr0), KP(param_expr1));
} else if (param_expr0->is_const_expr() && param_expr1->has_flag(IS_MATCH_EXPR)) {
match_expr = param_expr1;
found = true;
} else if (param_expr1->is_const_expr() && param_expr0->has_flag(IS_MATCH_EXPR)) {
match_expr = param_expr0;
found = true;
}
} else if (filter->get_expr_type() == T_OP_BOOL) {
ObRawExpr *param_expr = filter->get_param_expr(0);
if (OB_ISNULL(param_expr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected null param expr for bool op", K(ret));
} else if (param_expr->has_flag(IS_MATCH_EXPR)) {
found = true;
match_expr = param_expr;
}
}
}
}
if (OB_SUCC(ret) && nullptr != match_expr) {
OrderItem item(match_expr, default_desc_direction());
if (OB_FAIL(stmt->add_order_item(item))) {
LOG_WARN("failed to add order item", K(ret), K(item));
} else {
trans_happened = true;
}
trans_happened = true;
}
return ret;
}

View File

@ -4811,8 +4811,8 @@ int ObTransformUtils::compute_basic_table_property(const ObDMLStmt *stmt,
ObSEArray<ObRawExpr*, 8> cur_cond_exprs;
ObSqlBitSet<> table_set;
ObSqlSchemaGuard *schema_guard = NULL;
uint64_t index_tids[OB_MAX_INDEX_PER_TABLE];
int64_t index_count = OB_MAX_INDEX_PER_TABLE;
uint64_t index_tids[OB_MAX_AUX_TABLE_PER_MAIN_TABLE];
int64_t index_count = OB_MAX_AUX_TABLE_PER_MAIN_TABLE;
if (OB_ISNULL(stmt) || OB_ISNULL(table) || OB_ISNULL(check_helper.alloc_)
|| OB_ISNULL(check_helper.fd_factory_)
|| OB_ISNULL(check_helper.schema_checker_)

View File

@ -101,7 +101,7 @@ public:
return pos;
}
private:
static const int64_t MAX_TABLET_COUNT = 200;
static const int64_t MAX_TABLET_COUNT = share::schema::OB_MAX_TRANSFER_BINDING_TABLET_CNT;
int64_t count_;
common::ObTabletID id_array_[MAX_TABLET_COUNT];
};