[obkv] fix core when scan with doc_id in obkv

This commit is contained in:
GroundWu
2025-02-26 03:45:25 +00:00
committed by ob-robot
parent c0338edb5a
commit 8bb3ebc227
5 changed files with 117 additions and 32 deletions

View File

@ -451,12 +451,16 @@ int ObTableFtsDmlCgService::generate_scan_with_doc_id_ctdef_if_need(ObTableCtx &
int ret = OB_SUCCESS;
bool need_doc_id_merge_iter = ctx.has_fts_index();
ObArray<int64_t> domain_types;
ObArray<uint64_t> domain_tids;
ObArray<ObExpr*> result_outputs;
ObDASDocIdMergeCtDef *doc_id_merge_ctdef = nullptr;
ObDASDomainIdMergeCtDef *doc_id_merge_ctdef = nullptr;
ObDASScanCtDef *rowkey_doc_scan_ctdef = nullptr;
if (!need_doc_id_merge_iter) {
// just skip, nothing to do
} else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_DOC_ID_MERGE, allocator, doc_id_merge_ctdef))) {
} else if (OB_FAIL(ObTableFtsUtil::check_domain_index(ctx, domain_types, domain_tids))) {
LOG_WARN("fail to check need domain id merge iter", K(ret));
} else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_DOMAIN_ID_MERGE, allocator, doc_id_merge_ctdef))) {
LOG_WARN("fail to allocate to doc id merge ctdef", K(ret));
} else if (OB_ISNULL(doc_id_merge_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &allocator, 2))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
@ -468,9 +472,12 @@ int ObTableFtsDmlCgService::generate_scan_with_doc_id_ctdef_if_need(ObTableCtx &
} else if (OB_UNLIKELY(result_outputs.empty())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, result outputs is nullptr", K(ret));
} else if (OB_FAIL(doc_id_merge_ctdef->domain_types_.prepare_allocate(1))) {
LOG_WARN("fail to allocate domain types array memory", K(ret));
} else {
doc_id_merge_ctdef->children_cnt_ = 2;
doc_id_merge_ctdef->children_[0] = &scan_ctdef;
doc_id_merge_ctdef->domain_types_.at(0) = domain_types.at(0);
doc_id_merge_ctdef->children_[1] = rowkey_doc_scan_ctdef;
if (OB_FAIL(doc_id_merge_ctdef->result_output_.assign(result_outputs))) {
LOG_WARN("fail to assign result output", K(ret));
@ -1135,19 +1142,25 @@ int ObTableFtsTscCgService::generate_das_scan_ctdef_with_doc_id(ObIAllocator &al
const ObTableCtx &ctx,
ObTableApiScanCtDef &tsc_ctdef,
ObDASScanCtDef *scan_ctdef,
ObDASDocIdMergeCtDef *&doc_id_merge_ctdef)
ObDASDomainIdMergeCtDef *&doc_id_merge_ctdef)
{
int ret = OB_SUCCESS;
ObArray<ObExpr*> result_outputs;
ObDASScanCtDef *rowkey_doc_scan_ctdef = nullptr;
ObArray<int64_t> domain_types;
ObArray<uint64_t> domain_tids;
if (OB_ISNULL(scan_ctdef)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid arguments", K(ret), KPC(scan_ctdef));
} else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_DOC_ID_MERGE, alloc, doc_id_merge_ctdef))) {
} else if (OB_FAIL(ObTableFtsUtil::check_domain_index(ctx, domain_types, domain_tids))) {
LOG_WARN("fail to check need domain id merge iter", K(ret));
} else if (OB_FAIL(ObDASTaskFactory::alloc_das_ctdef(DAS_OP_DOMAIN_ID_MERGE, alloc, doc_id_merge_ctdef))) {
LOG_WARN("fail to allocate to doc id merge ctdef", K(ret));
} else if (OB_ISNULL(doc_id_merge_ctdef->children_ = OB_NEW_ARRAY(ObDASBaseCtDef*, &alloc, 2))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("fail to allocate doc id merge ctdef child array memory", K(ret));
} else if (OB_FAIL(doc_id_merge_ctdef->domain_types_.prepare_allocate(1))) {
LOG_WARN("fail to allocate domain types array memory", K(ret));
} else if (OB_FAIL(generate_rowkey_doc_ctdef(alloc, ctx, tsc_ctdef, rowkey_doc_scan_ctdef))) {
LOG_WARN("fail to generate rowkey doc ctdef", K(ret));
} else if (OB_FAIL(result_outputs.assign(scan_ctdef->result_output_))) {
@ -1158,6 +1171,7 @@ int ObTableFtsTscCgService::generate_das_scan_ctdef_with_doc_id(ObIAllocator &al
} else {
doc_id_merge_ctdef->children_cnt_ = 2;
doc_id_merge_ctdef->children_[0] = scan_ctdef;
doc_id_merge_ctdef->domain_types_.at(0) = domain_types.at(0);
doc_id_merge_ctdef->children_[1] = rowkey_doc_scan_ctdef;
if (OB_FAIL(doc_id_merge_ctdef->result_output_.assign(result_outputs))) {
LOG_WARN("fail to assign result output", K(ret));
@ -1274,5 +1288,84 @@ int ObTableFtsTscCgService::extract_doc_rowkey_exprs(const ObTableCtx &ctx,
return ret;
}
int ObTableFtsUtil::check_domain_index(const ObTableCtx &ctx,
ObIArray<int64_t> &domain_types,
ObIArray<uint64_t> &domain_tids)
{
int ret = OB_SUCCESS;
const ObTableSchema *table_schema = ctx.get_table_schema();
domain_types.reset();
domain_tids.reset();
if (OB_ISNULL(table_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("table schema is null", K(ret), K(table_schema));
} else if (OB_FAIL(ObDomainIdUtils::check_has_domain_index(table_schema, domain_types, domain_tids))) {
LOG_WARN("failed to check has domain index", K(ret));
} else if (domain_types.count() != domain_tids.count()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected domain types and tids", K(ret), K(domain_types), K(domain_tids));
} else if (domain_types.count() != 1 ||
domain_types.at(0) != ObDomainIdUtils::ObDomainIDType::DOC_ID) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("has unsupported domain index types", K(ret), K(domain_types));
}
return ret;
}
int ObTableFtsUtil::init_fts_domain_info(const ObTableCtx &ctx,
const ObIArray<uint64_t> &tsc_out_cols,
ObDASScanCtDef &das_tsc_ctdef)
{
int ret = OB_SUCCESS;
uint64_t doc_id_col_id = OB_INVALID_ID;
uint64_t ft_col_id = OB_INVALID_ID;
const ObTableSchema *rowkey_doc_schema = nullptr;
const ObTableFtsCtx *fts_ctx = ctx.get_fts_ctx();
const ObTableSchema *table_schema = ctx.get_table_schema();
if (OB_ISNULL(table_schema)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("table schema is NULL", K(ret));
} else if (OB_ISNULL(fts_ctx)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fts_ctx is NULL", K(ret));
} else if (OB_ISNULL(rowkey_doc_schema = fts_ctx->get_rowkey_doc_schema())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("rowkey doc schema is NULL", K(ret));
} else if (OB_FAIL(das_tsc_ctdef.domain_tids_.init(1))) {
LOG_WARN("fail to init domain tid", K(ret));
} else if (OB_FAIL(das_tsc_ctdef.domain_types_.init(1))) {
LOG_WARN("fail to init domain type", K(ret));
} else if (OB_FAIL(das_tsc_ctdef.domain_id_idxs_.init(1))) {
LOG_WARN("fail to init domain id idx", K(ret));
} else if (OB_FAIL(das_tsc_ctdef.domain_tids_.push_back(rowkey_doc_schema->get_table_id()))) {
LOG_WARN("fail to push back table id", K(ret));
} else if (OB_FAIL(das_tsc_ctdef.domain_types_.push_back(ObDomainIdUtils::ObDomainIDType::DOC_ID))) {
LOG_WARN("fail to push back domain type", K(ret));
} else if (OB_FAIL(table_schema->get_fulltext_column_ids(doc_id_col_id, ft_col_id))) {
LOG_WARN("fail to get fulltext column ids", K(ret));
} else if (OB_INVALID_ID == doc_id_col_id) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get doc id column", K(ret));
} else {
bool is_found = false;
DomainIdxs domain_idx;
for (int64_t i = 0; OB_SUCC(ret) && i < tsc_out_cols.count() && !is_found; ++i) {
if (tsc_out_cols.at(i) == doc_id_col_id) {
if (OB_FAIL(domain_idx.push_back(i))) {
LOG_WARN("fail to push back domain idx", K(ret));
}
is_found = true;
}
}
if (is_found && OB_SUCC(ret)) {
if (OB_FAIL(das_tsc_ctdef.domain_id_idxs_.push_back(domain_idx))) {
LOG_WARN("fail to push back domain idx", K(ret));
}
}
}
return ret;
}
} // namespace table
} // namespace oceanbase

View File

@ -71,7 +71,7 @@ public:
const ObTableCtx &ctx,
ObTableApiScanCtDef &tsc_ctdef,
ObDASScanCtDef *scan_ctdef,
ObDASDocIdMergeCtDef *&doc_id_merge_ctdef);
ObDASDomainIdMergeCtDef *&doc_id_merge_ctdef);
static int get_fts_schema(const ObTableCtx &ctx, uint64_t table_id, const ObTableSchema *&index_schema);
static int extract_text_ir_access_columns(const ObTableCtx &ctx,
ObDASScanCtDef &scan_ctdef,
@ -104,6 +104,17 @@ private:
DISALLOW_COPY_AND_ASSIGN(ObTableFtsTscCgService);
};
class ObTableFtsUtil
{
public:
static int check_domain_index(const ObTableCtx &ctx,
ObIArray<int64_t> &domain_types,
ObIArray<uint64_t> &domain_tids);
static int init_fts_domain_info(const ObTableCtx &ctx,
const ObIArray<uint64_t> &tsc_out_cols,
ObDASScanCtDef &das_tsc_ctdef);
};
} // end namespace table
} // end namespace oceanbase

View File

@ -2338,14 +2338,8 @@ int ObTableDmlCgService::generate_tsc_ctdef(ObTableCtx &ctx,
LOG_WARN("fail to get column ids", K(ret));
} else if (OB_FAIL(tsc_ctdef.access_column_ids_.assign(column_ids))) {
LOG_WARN("fail to assign column ids", K(ret), K(column_ids));
} else if (need_doc_id) {
uint64_t rowkey_doc_col_id = OB_INVALID_ID;
uint64_t ft_col_id = OB_INVALID_ID;
if (OB_FAIL(table_schema->get_fulltext_column_ids(rowkey_doc_col_id, ft_col_id))) {
LOG_WARN("fail to get fulltext column ids", K(ret));
} else {
tsc_ctdef.doc_id_idx_ = table_schema->get_column_idx(rowkey_doc_col_id);
}
} else if (need_doc_id && OB_FAIL(ObTableFtsUtil::init_fts_domain_info(ctx, column_ids, tsc_ctdef))) {
LOG_WARN("fail to fts domain info", K(ret), K(column_ids));
}
if (OB_SUCC(ret)) {
@ -3395,21 +3389,8 @@ int ObTableTscCgService::extract_select_output_column_ids(const ObTableCtx &ctx,
ret = OB_ERR_UNEXPECTED;
LOG_WARN("table schema is NULL", K(ret));
} else if (ctx.is_tsc_with_doc_id() && index_schema->is_user_table()) {
uint64_t doc_id_col_id = OB_INVALID_ID;
uint64_t ft_col_id = OB_INVALID_ID;
if (OB_FAIL(index_schema->get_fulltext_column_ids(doc_id_col_id, ft_col_id))) {
LOG_WARN("fail to get fulltext column ids", K(ret));
} else if (OB_INVALID_ID == doc_id_col_id) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get doc id column", K(ret));
} else {
bool is_found = false;
for (int64_t i = 0; OB_SUCC(ret) && i < tsc_out_cols.count() && !is_found; ++i) {
if (tsc_out_cols.at(i) == doc_id_col_id) {
das_tsc_ctdef.doc_id_idx_ = i;
is_found = true;
}
}
if (OB_FAIL(ObTableFtsUtil::init_fts_domain_info(ctx, tsc_out_cols, das_tsc_ctdef))) {
LOG_WARN("fail to fts domain info", K(ret), K(tsc_out_cols));
}
}
LOG_DEBUG("[table cg service] extract das output column ids", K(ret), K(table_id),
@ -3567,7 +3548,7 @@ int ObTableTscCgService::generate_tsc_ctdef(const ObTableCtx &ctx,
const int64_t filter_exprs_cnt = ctx.get_filter_exprs().count();
bool query_cs_replica = false;
ObDASBaseCtDef *root_ctdef = nullptr;
ObDASDocIdMergeCtDef *doc_id_merge_ctdef = nullptr;
ObDASDomainIdMergeCtDef *doc_id_merge_ctdef = nullptr;
bool need_attach = false;
// init scan_ctdef_.ref_table_id_
// when scan primary index, index_table_id == ref_table_id

View File

@ -2456,7 +2456,7 @@ const ObDASScanCtDef* ObTableApiScanCtDef::get_lookup_ctdef() const
const ObDASBaseCtDef *attach_ctdef = attach_spec_.attach_ctdef_;
if (nullptr == attach_ctdef) {
lookup_ctdef = lookup_ctdef_;
} else if (DAS_OP_DOC_ID_MERGE == attach_ctdef->op_type_) {
} else if (DAS_OP_DOMAIN_ID_MERGE == attach_ctdef->op_type_) {
OB_ASSERT(2 == attach_ctdef->children_cnt_ && attach_ctdef->children_ != nullptr);
if (OB_NOT_NULL(lookup_ctdef_)) {
lookup_ctdef = static_cast<ObDASScanCtDef*>(attach_ctdef->children_[0]);
@ -2465,7 +2465,7 @@ const ObDASScanCtDef* ObTableApiScanCtDef::get_lookup_ctdef() const
OB_ASSERT(2 == attach_ctdef->children_cnt_ && attach_ctdef->children_ != nullptr);
if (DAS_OP_TABLE_SCAN == attach_ctdef->children_[1]->op_type_) {
lookup_ctdef = static_cast<ObDASScanCtDef*>(attach_ctdef->children_[1]);
} else if (DAS_OP_DOC_ID_MERGE == attach_ctdef->children_[1]->op_type_) {
} else if (DAS_OP_DOMAIN_ID_MERGE == attach_ctdef->children_[1]->op_type_) {
ObDASDocIdMergeCtDef *doc_id_merge_ctdef = static_cast<ObDASDocIdMergeCtDef *>(attach_ctdef->children_[1]);
OB_ASSERT(2 == doc_id_merge_ctdef->children_cnt_ && doc_id_merge_ctdef->children_ != nullptr);
lookup_ctdef = static_cast<ObDASScanCtDef*>(doc_id_merge_ctdef->children_[0]);

View File

@ -215,7 +215,7 @@ int ObTableApiScanExecutor::pushdown_attach_task_to_das(ObDASScanOp &target_op)
ObDASAttachRtInfo *attach_rtinfo = tsc_rtdef_.attach_rtinfo_;
const ObTableApiScanCtDef &tsc_ctdef = scan_spec_.get_ctdef();
if (tb_ctx_.is_global_index_scan() && nullptr != tsc_ctdef.lookup_ctdef_
&& DAS_OP_DOC_ID_MERGE == tsc_ctdef.attach_spec_.attach_ctdef_->op_type_) {
&& DAS_OP_DOMAIN_ID_MERGE == tsc_ctdef.attach_spec_.attach_ctdef_->op_type_) {
// just skip, and doc id merge will be attach into global lookup iter.
} else if (OB_FAIL(target_op.reserve_related_buffer(attach_rtinfo->related_scan_cnt_))) {
LOG_WARN("reserve related buffer failed", K(ret), K(attach_rtinfo->related_scan_cnt_));