From 4c5dc275aae3343f59d616c69cbe7c10013fa3e0 Mon Sep 17 00:00:00 2001 From: obdev Date: Wed, 18 Sep 2024 05:24:21 +0000 Subject: [PATCH] bugfix for tmp file flush return -4016 and could not remove file --- .../tmp_file/ob_shared_nothing_tmp_file.cpp | 2 ++ .../tmp_file/ob_tmp_file_flush_manager.cpp | 10 ++++-- .../tmp_file/ob_tmp_file_thread_wrapper.cpp | 36 +++++++++++++++++++ .../tmp_file/ob_tmp_file_thread_wrapper.h | 1 + 4 files changed, 47 insertions(+), 2 deletions(-) diff --git a/src/storage/tmp_file/ob_shared_nothing_tmp_file.cpp b/src/storage/tmp_file/ob_shared_nothing_tmp_file.cpp index f72b24fb8..a49e14cdf 100644 --- a/src/storage/tmp_file/ob_shared_nothing_tmp_file.cpp +++ b/src/storage/tmp_file/ob_shared_nothing_tmp_file.cpp @@ -2583,6 +2583,7 @@ int ObSharedNothingTmpFile::copy_flush_data_from_wbp_( LOG_WARN("invalid buf or write_offset", KR(ret), KP(buf), K(write_offset), K(flush_task), KPC(this)); } else if (OB_FAIL(inner_flush_ctx_.data_flush_infos_.push_back(InnerFlushInfo()))) { LOG_WARN("fail to push back empty flush info", KR(ret), K(fd_), K(info), K(flush_task), KPC(this)); + ret = OB_ITER_END; // override error code, we will handle this err code in flush mgr } while (OB_SUCC(ret) && cur_page_id != copy_end_page_id && write_offset < OB_STORAGE_OBJECT_MGR.get_macro_object_size()) { if (need_flush_tail && cur_page_id == end_page_id_ && file_size_ % ObTmpFileGlobal::PAGE_SIZE != 0) { @@ -2707,6 +2708,7 @@ int ObSharedNothingTmpFile::generate_meta_flush_info_( LOG_WARN("invalid buf or write_offset", KR(ret), KP(buf), K(write_offset), K(flush_task), KPC(this)); } else if (OB_FAIL(flush_infos_.push_back(InnerFlushInfo()))) { LOG_WARN("fail to push back empty flush info", KR(ret), K(fd_), K(info), K(flush_task), KPC(this)); + ret = OB_ITER_END; // override error code, we will handle this err code in flush mgr } else if (OB_FAIL(meta_tree_.flush_meta_pages_for_block(block_index, flush_type, buf, write_offset, meta_flush_context, info.flush_meta_page_array_))) { LOG_WARN("fail to flush meta pages for block", KR(ret), K(fd_), K(flush_task), K(meta_flush_context), KPC(this)); diff --git a/src/storage/tmp_file/ob_tmp_file_flush_manager.cpp b/src/storage/tmp_file/ob_tmp_file_flush_manager.cpp index 0216a04ae..4359a86e1 100644 --- a/src/storage/tmp_file/ob_tmp_file_flush_manager.cpp +++ b/src/storage/tmp_file/ob_tmp_file_flush_manager.cpp @@ -291,7 +291,7 @@ int ObTmpFileFlushManager::flush(ObSpLinkQueue &flushing_queue, flush_ctx_.record_flush_task(flush_task->get_data_length()); // maintain statistics } if (flush_task->get_is_fast_flush_tree()) { - if (OB_FAIL(ret)) { + if (OB_FAIL(ret) && flush_task->get_data_length() > 0) { STORAGE_LOG(ERROR, "fail to execute fast_flush_tree_page flush task to TFFT_WAIT", KR(ret), KPC(flush_task)); } break; // generate only one fast_flush_tree_page_ task to avoid excessive flushing of the meta @@ -665,7 +665,13 @@ void ObTmpFileFlushManager::try_remove_unused_flush_info_(ObTmpFileFlushTask &fl STORAGE_LOG(INFO, "the file is deleting, abort this flush info", KR(ret), K(flush_info), K(flush_task)); flush_info.reset(); - flush_infos.remove(i); + // manually move and reset flush_info to avoid file handle not released + int64_t last_idx = flush_infos.count() - 1; + for (int64_t j = i; j < last_idx; ++j) { + flush_infos.at(j) = flush_infos.at(j + 1); + } + flush_infos.at(last_idx).reset(); + flush_infos.remove(last_idx); --i; } } diff --git a/src/storage/tmp_file/ob_tmp_file_thread_wrapper.cpp b/src/storage/tmp_file/ob_tmp_file_thread_wrapper.cpp index 2cb92e15e..51c656346 100644 --- a/src/storage/tmp_file/ob_tmp_file_thread_wrapper.cpp +++ b/src/storage/tmp_file/ob_tmp_file_thread_wrapper.cpp @@ -228,6 +228,7 @@ int ObTmpFileFlushTG::do_work_() if (is_fast_flush_meta_) { check_flush_task_io_finished_(); + retry_fast_flush_meta_task_(); } else { if (RUNNING_MODE::FAST == mode_) { flush_fast_(); @@ -381,6 +382,38 @@ int ObTmpFileFlushTG::wash_(const int64_t expect_flush_size, const RUNNING_MODE return ret; } +int ObTmpFileFlushTG::retry_fast_flush_meta_task_() +{ + int ret = OB_SUCCESS; + for (int64_t cnt = retry_list_size_; cnt > 0 && !retry_list_.is_empty(); --cnt) { + ObTmpFileFlushTask *flush_task = nullptr; + pop_retry_list_(flush_task); + if (OB_ISNULL(flush_task)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "flush task is nullptr", KR(ret)); + } else if (!flush_task->get_is_fast_flush_tree()) { + push_retry_list_(flush_task); + } else { + // only retry is_fast_flush_tree tasks + STORAGE_LOG(DEBUG, "retry is_fast_flush_tree flush task", KPC(flush_task)); + if (OB_FAIL(flush_mgr_.retry(*flush_task))) { + STORAGE_LOG(WARN, "fail to retry flush task", KR(ret), KPC(flush_task)); + } + + FlushState state = flush_task->get_state(); + if (FlushState::TFFT_WAIT == state) { + push_wait_list_(flush_task); + } else if (FlushState::TFFT_FILL_BLOCK_BUF < state) { + push_retry_list_(flush_task); + } else if (FlushState::TFFT_FILL_BLOCK_BUF >= state) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("unexpected flush task status in retry phase", KR(ret), KPC(flush_task)); + } + } + } + return ret; +} + int ObTmpFileFlushTG::retry_task_() { int ret = OB_SUCCESS; @@ -416,6 +449,9 @@ int ObTmpFileFlushTG::retry_task_() } break; } + } else if (FlushState::TFFT_FILL_BLOCK_BUF >= state) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("unexpected flush task status in retry phase", KR(ret), KPC(flush_task)); } } } diff --git a/src/storage/tmp_file/ob_tmp_file_thread_wrapper.h b/src/storage/tmp_file/ob_tmp_file_thread_wrapper.h index 780b731ed..cfff4d380 100644 --- a/src/storage/tmp_file/ob_tmp_file_thread_wrapper.h +++ b/src/storage/tmp_file/ob_tmp_file_thread_wrapper.h @@ -65,6 +65,7 @@ private: int handle_generated_flush_tasks_(ObSpLinkQueue &flushing_list, int64_t &task_num); int wash_(const int64_t expect_flush_size, const RUNNING_MODE mode); int check_flush_task_io_finished_(); + int retry_fast_flush_meta_task_(); int retry_task_(); int special_flush_meta_tree_page_(); void flush_fast_();