From acf96e359c2d2462583e5a86639649bd6a117a4c Mon Sep 17 00:00:00 2001 From: Handora Date: Mon, 21 Oct 2024 05:43:50 +0000 Subject: [PATCH] [TRANSFER] retry submit log during transfer if 6268 --- .../high_availability/ob_transfer_handler.cpp | 3 +-- .../high_availability/ob_transfer_struct.cpp | 19 +++++++++++++++++++ .../high_availability/ob_transfer_struct.h | 3 +++ src/storage/ob_storage_rpc.cpp | 5 ++--- 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/storage/high_availability/ob_transfer_handler.cpp b/src/storage/high_availability/ob_transfer_handler.cpp index 58947320d..1f5c516d9 100644 --- a/src/storage/high_availability/ob_transfer_handler.cpp +++ b/src/storage/high_availability/ob_transfer_handler.cpp @@ -1119,7 +1119,6 @@ int ObTransferHandler::do_trans_transfer_start_prepare_( { int ret = OB_SUCCESS; ObLSHandle src_ls_handle; - ObTransID failed_tx_id; ObStorageHASrcInfo addr_info; addr_info.cluster_id_ = GCONF.cluster_id; ObAddr dest_ls_leader; @@ -1142,7 +1141,7 @@ int ObTransferHandler::do_trans_transfer_start_prepare_( // submit active tx redo log before block tablet write to optimise system interrupt time } else if (OB_FAIL(MTL(ObLSService*)->get_ls(task_info.src_ls_id_, src_ls_handle, ObLSGetMod::HA_MOD))) { LOG_WARN("failed to get ls", K(ret), K(task_info)); - } else if (OB_FAIL(src_ls_handle.get_ls()->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) { + } else if (OB_FAIL(ObTXTransferUtils::traverse_trans_to_submit_redo_log_with_retry(*src_ls_handle.get_ls(), 100_ms))) { LOG_WARN("failed to submit tx log", K(ret), K(task_info)); // submit dest_ls active tx redo log } else if (OB_FAIL(storage_rpc_->submit_tx_log(task_info.tenant_id_, addr_info, task_info.dest_ls_id_, data_scn))) { diff --git a/src/storage/high_availability/ob_transfer_struct.cpp b/src/storage/high_availability/ob_transfer_struct.cpp index bd7f3c4a3..d723f8178 100644 --- a/src/storage/high_availability/ob_transfer_struct.cpp +++ b/src/storage/high_availability/ob_transfer_struct.cpp @@ -417,6 +417,25 @@ int ObTXTransferUtils::set_tablet_freeze_flag(storage::ObLS &ls, ObTablet *table return ret; } +int ObTXTransferUtils::traverse_trans_to_submit_redo_log_with_retry( + storage::ObLS &ls, + const int64_t timeout) +{ + int ret = OB_TX_NOLOGCB; + ObTransID failed_tx_id; + int64_t start_time = ObTimeUtil::current_time(); + + while (OB_TX_NOLOGCB == ret + && ObTimeUtil::current_time() - start_time < timeout) { + ret = ls.get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id); + if (OB_TX_NOLOGCB == ret) { + usleep(10_ms); + } + } + + return ret; +} + int ObTXTransferUtils::create_empty_minor_sstable( const common::ObTabletID &tablet_id, const SCN start_scn, diff --git a/src/storage/high_availability/ob_transfer_struct.h b/src/storage/high_availability/ob_transfer_struct.h index 65eb9a04e..d35df2881 100644 --- a/src/storage/high_availability/ob_transfer_struct.h +++ b/src/storage/high_availability/ob_transfer_struct.h @@ -165,6 +165,9 @@ struct ObTXTransferUtils common::ObArenaAllocator &allocator, ObTableHandleV2 &table_handle); static int set_tablet_freeze_flag(storage::ObLS &ls, ObTablet *tablet); + static int traverse_trans_to_submit_redo_log_with_retry( + storage::ObLS &ls, + const int64_t timeout); private: static int get_tablet_status_( diff --git a/src/storage/ob_storage_rpc.cpp b/src/storage/ob_storage_rpc.cpp index 7f22d189f..d2f9cb4df 100644 --- a/src/storage/ob_storage_rpc.cpp +++ b/src/storage/ob_storage_rpc.cpp @@ -3480,7 +3480,6 @@ int ObStorageSubmitTxLogP::process() MTL_SWITCH(tenant_id) { ObLSHandle ls_handle; ObLS *ls = NULL; - transaction::ObTransID failed_tx_id; SCN scn; if (!arg_.is_valid()) { ret = OB_INVALID_ARGUMENT; @@ -3490,8 +3489,8 @@ int ObStorageSubmitTxLogP::process() } else if (OB_ISNULL(ls = ls_handle.get_ls())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ls is NULL", KR(ret), K(ls_handle)); - } else if (OB_FAIL(ls->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) { - LOG_WARN("failed to submit tx log", K(ret), KPC(ls), K(failed_tx_id)); + } else if (OB_FAIL(ObTXTransferUtils::traverse_trans_to_submit_redo_log_with_retry(*ls, 100_ms))) { + LOG_WARN("failed to submit tx log", K(ret), KPC(ls)); } else if (OB_FAIL(ls->get_log_handler()->get_max_scn(scn))) { LOG_WARN("log_handler get_max_scn failed", K(ret), K(ls_id)); } else {