From 774fd029391a464bb6ffac5f26f499d338dbf58f Mon Sep 17 00:00:00 2001 From: skylhd Date: Mon, 21 Oct 2024 03:13:51 +0000 Subject: [PATCH] [CP][BUGFIX] not fill vid when doing sampling --- src/sql/das/iter/ob_das_vid_merge_iter.cpp | 65 ++++++++++++++++++---- src/sql/das/iter/ob_das_vid_merge_iter.h | 5 +- 2 files changed, 58 insertions(+), 12 deletions(-) diff --git a/src/sql/das/iter/ob_das_vid_merge_iter.cpp b/src/sql/das/iter/ob_das_vid_merge_iter.cpp index df585c7ca..dcbce8a69 100644 --- a/src/sql/das/iter/ob_das_vid_merge_iter.cpp +++ b/src/sql/das/iter/ob_das_vid_merge_iter.cpp @@ -45,6 +45,7 @@ ObDASVIdMergeIterParam::~ObDASVIdMergeIterParam() ObDASVIdMergeIter::ObDASVIdMergeIter() : ObDASIter(), need_filter_rowkey_vid_(true), + is_block_sample_(false), rowkey_vid_scan_param_(), rowkey_vid_iter_(nullptr), data_table_iter_(nullptr), @@ -190,6 +191,7 @@ int ObDASVIdMergeIter::inner_release() data_table_iter_ = nullptr; rowkey_vid_iter_ = nullptr; need_filter_rowkey_vid_ = true; + is_block_sample_ = false; return ret; } @@ -303,6 +305,7 @@ int ObDASVIdMergeIter::init_rowkey_vid_scan_param( } if (rtdef->sample_info_ != nullptr) { rowkey_vid_scan_param_.sample_info_ = *rtdef->sample_info_; + is_block_sample_ = (rowkey_vid_scan_param_.sample_info_.method_ != common::SampleInfo::NO_SAMPLE); } } @@ -348,8 +351,10 @@ int ObDASVIdMergeIter::build_rowkey_vid_range() data_table_iter_->get_scan_param().sample_info_.method_ = common::SampleInfo::ROW_SAMPLE; rowkey_vid_scan_param_.sample_info_.method_ = common::SampleInfo::ROW_SAMPLE; } + is_block_sample_ = (rowkey_vid_scan_param_.sample_info_.method_ != common::SampleInfo::NO_SAMPLE); + rowkey_vid_scan_param_.scan_flag_.scan_order_ = data_table_iter_->get_scan_param().scan_flag_.scan_order_; } - LOG_INFO("build rowkey vid range", K(ret), K(need_filter_rowkey_vid_), K(rowkey_vid_scan_param_.key_ranges_), + LOG_INFO("build rowkey vid range", K(ret), K(need_filter_rowkey_vid_), K(is_block_sample_), K(rowkey_vid_scan_param_.key_ranges_), K(rowkey_vid_scan_param_.ss_key_ranges_), K(rowkey_vid_scan_param_.sample_info_)); return ret; } @@ -358,7 +363,15 @@ int ObDASVIdMergeIter::concat_row() { int ret = OB_SUCCESS; int64_t vid_id; - if (OB_FAIL(data_table_iter_->get_next_row())) { + if (is_block_sample_) { + if (OB_FAIL(data_table_iter_->get_next_row())) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("fail to get next rows", K(ret)); + } + } else if (OB_FAIL(fill_vid_id_in_data_table(vid_id, true))) { + LOG_WARN("fail to fill null vid id in data table", K(ret), K(vid_id)); + } + } else if (OB_FAIL(data_table_iter_->get_next_row())) { if (OB_ITER_END == ret) { if (OB_FAIL(rowkey_vid_iter_->get_next_row())) { if (OB_UNLIKELY(OB_ITER_END != ret)) { @@ -372,6 +385,7 @@ int ObDASVIdMergeIter::concat_row() } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("row count isn't equal between data table and rowkey doc", K(ret), K(rowkey), + K(is_block_sample_), K(rowkey_vid_iter_->get_scan_param()), K(data_table_iter_->get_scan_param())); } } @@ -401,6 +415,12 @@ int ObDASVIdMergeIter::concat_rows(int64_t &count, int64_t capacity) } } if (OB_FAIL(ret) && ret != OB_ITER_END) { + } else if (is_block_sample_) { + for (int64_t i = 0; i < data_row_cnt && OB_SUCC(ret); i++) { + if (OB_FAIL(vid_ids.push_back(0))) { + LOG_WARN("fail to push back mock vid into array", K(ret), K(i), K(data_row_cnt)); + } + } } else { // whatever succ or iter_end, we should get from rowkey_vid_iter bool expect_iter_end = (ret == OB_ITER_END); int64_t real_cap = (data_row_cnt > 0 && !expect_iter_end) ? data_row_cnt : capacity; @@ -435,19 +455,19 @@ int ObDASVIdMergeIter::concat_rows(int64_t &count, int64_t capacity) int tmp_ret = ret; ret = OB_ERR_UNEXPECTED; LOG_WARN("row count isn't equal between data table and rowkey vid", - K(ret), K(tmp_ret), K(capacity), K(vid_ids.count()), K(data_row_cnt)); + K(ret), K(is_block_sample_), K(tmp_ret), K(capacity), K(vid_ids.count()), K(data_row_cnt)); } } if (OB_FAIL(ret) && OB_ITER_END != ret) { } else if (OB_UNLIKELY(data_row_cnt != vid_ids.count())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("The row count of data table isn't equal to rowkey vid", K(ret), K(data_row_cnt), K(vid_ids), - K(data_table_iter_->get_scan_param()), K(rowkey_vid_iter_->get_scan_param())); + K(is_block_sample_), K(data_table_iter_->get_scan_param()), K(rowkey_vid_iter_->get_scan_param())); } else { count = data_row_cnt; if (count > 0 && data_table_ctdef_->vec_vid_idx_ != -1) { const int tmp_ret = ret; - if (OB_FAIL(fill_vid_ids_in_data_table(vid_ids))) { + if (OB_FAIL(fill_vid_ids_in_data_table(vid_ids, is_block_sample_))) { LOG_WARN("fail to fill vid ids in data table", K(ret), K(tmp_ret), K(vid_ids)); } else { ret = tmp_ret; @@ -455,7 +475,7 @@ int ObDASVIdMergeIter::concat_rows(int64_t &count, int64_t capacity) } } LOG_TRACE("concat rows in data table and rowkey vid", K(ret), K(data_row_cnt), K(vid_ids), K(count), - K(capacity)); + K(capacity), K(is_block_sample_)); return ret; } @@ -466,6 +486,11 @@ int ObDASVIdMergeIter::sorted_merge_join_row() common::ObRowkey data_table_rowkey; if (OB_FAIL(data_table_iter_->get_next_row()) && OB_ITER_END != ret) { LOG_WARN("fail to get next data table row", K(ret)); + } else if (is_block_sample_) { + int64_t vid_id = 0; + if (OB_FAIL(fill_vid_id_in_data_table(vid_id, true))) { + LOG_WARN("fail to fill null vid id in data table", K(ret), K(vid_id)); + } } else if (OB_ITER_END == ret) { while (OB_SUCC(rowkey_vid_iter_->get_next_row())); if (OB_ITER_END != ret) { @@ -517,6 +542,18 @@ int ObDASVIdMergeIter::sorted_merge_join_rows(int64_t &count, int64_t capacity) } else if (OB_UNLIKELY(0 == data_table_cnt && OB_SUCCESS == ret)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error, data table row count is 0, but ret code is success", K(ret), KPC(data_table_iter_)); + } else if (is_block_sample_) { + for (int64_t i = 0; i < data_table_cnt && OB_SUCC(ret); i++) { + if (OB_FAIL(vid_ids.push_back(0))) { + LOG_WARN("fail to push back mock vid into array", K(ret), K(i), K(data_table_cnt)); + } + } + if (FAILEDx(fill_vid_ids_in_data_table(vid_ids, true))) { + LOG_WARN("fail to fill null vid ids in data table", K(ret), K(vid_ids)); + } else { + count = data_table_cnt; + ret = is_iter_end ? OB_ITER_END : ret; + } } else if (OB_ITER_END == ret && FALSE_IT(is_iter_end = true)) { } else if (OB_FAIL(get_rowkeys(data_table_cnt, allocator, data_table_ctdef_, data_table_rtdef_, rowkeys_in_data_table))) { @@ -723,7 +760,7 @@ int ObDASVIdMergeIter::get_rowkeys_and_vid_ids( return ret; } -int ObDASVIdMergeIter::fill_vid_id_in_data_table(const int64_t &vid_id) +int ObDASVIdMergeIter::fill_vid_id_in_data_table(const int64_t &vid_id, bool set_null) { int ret = OB_SUCCESS; // if (OB_UNLIKELY(!vid_id.is_valid())) { @@ -753,7 +790,11 @@ int ObDASVIdMergeIter::fill_vid_id_in_data_table(const int64_t &vid_id) LOG_WARN("fail to allocate memory", K(ret), KP(buf)); } else { // ObDocId *vid_id_ptr = new (buf) ObDocId(vid_id); - datum.set_int(vid_id); + if (set_null) { + datum.set_null(); + } else { + datum.set_int(vid_id); + } vid_id_expr->set_evaluated_projected(*data_table_rtdef_->eval_ctx_); LOG_INFO("Doc id merge fill a vidument id", K(vid_id)); } @@ -765,7 +806,7 @@ int ObDASVIdMergeIter::fill_vid_id_in_data_table(const int64_t &vid_id) } -int ObDASVIdMergeIter::fill_vid_ids_in_data_table(const common::ObIArray &vid_ids) +int ObDASVIdMergeIter::fill_vid_ids_in_data_table(const common::ObIArray &vid_ids, bool set_null) { int ret = OB_SUCCESS; if (OB_UNLIKELY(0 == vid_ids.count())) { @@ -792,7 +833,11 @@ int ObDASVIdMergeIter::fill_vid_ids_in_data_table(const common::ObIArrayset_evaluated_projected(*data_table_rtdef_->eval_ctx_); diff --git a/src/sql/das/iter/ob_das_vid_merge_iter.h b/src/sql/das/iter/ob_das_vid_merge_iter.h index 648b107c5..7c7eb02e1 100644 --- a/src/sql/das/iter/ob_das_vid_merge_iter.h +++ b/src/sql/das/iter/ob_das_vid_merge_iter.h @@ -145,10 +145,11 @@ protected: ObDASScanRtDef *rtdef, common::ObIArray &rowkeys, common::ObIArray &vid_ids); - int fill_vid_id_in_data_table(const int64_t &vid_id); - int fill_vid_ids_in_data_table(const common::ObIArray &vid_ids); + int fill_vid_id_in_data_table(const int64_t &vid_id, bool set_null = false); + int fill_vid_ids_in_data_table(const common::ObIArray &vid_ids, bool set_null = false); private: bool need_filter_rowkey_vid_; + bool is_block_sample_; storage::ObTableScanParam rowkey_vid_scan_param_; ObDASScanIter *rowkey_vid_iter_; ObDASScanIter *data_table_iter_;