diff --git a/deps/oblib/src/lib/profile/ob_trace_id.h b/deps/oblib/src/lib/profile/ob_trace_id.h index a5f9a06a2a..931c585172 100644 --- a/deps/oblib/src/lib/profile/ob_trace_id.h +++ b/deps/oblib/src/lib/profile/ob_trace_id.h @@ -266,7 +266,7 @@ struct ObCurTraceId } } #ifdef COMPILE_DLL_MODE -private: +private: static TLOCAL(TraceId, trace_id_); #endif }; diff --git a/src/logservice/applyservice/ob_log_apply_service.h b/src/logservice/applyservice/ob_log_apply_service.h index 73741b3d1b..8c9cc6bfdc 100644 --- a/src/logservice/applyservice/ob_log_apply_service.h +++ b/src/logservice/applyservice/ob_log_apply_service.h @@ -65,8 +65,13 @@ struct LSApplyStat struct ApplyDiagnoseInfo { + ApplyDiagnoseInfo() { reset(); } + ~ApplyDiagnoseInfo() { reset(); } share::SCN max_applied_scn_; TO_STRING_KV(K(max_applied_scn_)); + void reset() { + max_applied_scn_.reset(); + } }; class ObApplyFsCb : public palf::PalfFSCb diff --git a/src/logservice/ob_garbage_collector.h b/src/logservice/ob_garbage_collector.h index 656123e773..d1f5eacc38 100644 --- a/src/logservice/ob_garbage_collector.h +++ b/src/logservice/ob_garbage_collector.h @@ -96,10 +96,16 @@ int gc_state_to_string(const LSGCState gc_state, struct GCDiagnoseInfo { + GCDiagnoseInfo() { reset(); } + ~GCDiagnoseInfo() { reset(); } LSGCState gc_state_; int64_t gc_start_ts_; TO_STRING_KV(K(gc_state_), K(gc_start_ts_)); + void reset() { + gc_state_ = LSGCState::INVALID_LS_GC_STATE; + gc_start_ts_ = OB_INVALID_TIMESTAMP; + } }; class ObGCLSLog diff --git a/src/logservice/ob_log_handler.h b/src/logservice/ob_log_handler.h index 2d9ca8cfe0..12c75b41ed 100644 --- a/src/logservice/ob_log_handler.h +++ b/src/logservice/ob_log_handler.h @@ -50,8 +50,14 @@ class ObApplyStatus; class ObLogReplayService; class AppendCb; struct LogHandlerDiagnoseInfo { + LogHandlerDiagnoseInfo() { reset(); } + ~LogHandlerDiagnoseInfo() { reset(); } common::ObRole log_handler_role_; int64_t log_handler_proposal_id_; + void reset() { + log_handler_role_ = FOLLOWER; + log_handler_proposal_id_ = palf::INVALID_PROPOSAL_ID; + } TO_STRING_KV(K(log_handler_role_), K(log_handler_proposal_id_)); }; diff --git a/src/logservice/palf/palf_handle_impl.h b/src/logservice/palf/palf_handle_impl.h index 1c87b36985..c1a6e7b9ee 100644 --- a/src/logservice/palf/palf_handle_impl.h +++ b/src/logservice/palf/palf_handle_impl.h @@ -99,11 +99,20 @@ public: }; struct PalfDiagnoseInfo { + PalfDiagnoseInfo() { reset(); } + ~PalfDiagnoseInfo() { reset(); } common::ObRole election_role_; int64_t election_epoch_; common::ObRole palf_role_; palf::ObReplicaState palf_state_; int64_t palf_proposal_id_; + void reset() { + election_role_ = FOLLOWER; + election_epoch_ = 0; + palf_role_ = FOLLOWER; + palf_state_ = ObReplicaState::INVALID_STATE; + palf_proposal_id_ = INVALID_PROPOSAL_ID; + } TO_STRING_KV(K(election_role_), K(election_epoch_), K(palf_role_), diff --git a/src/logservice/rcservice/ob_role_change_handler.h b/src/logservice/rcservice/ob_role_change_handler.h index c91037b1f3..6a5d994bc3 100644 --- a/src/logservice/rcservice/ob_role_change_handler.h +++ b/src/logservice/rcservice/ob_role_change_handler.h @@ -56,6 +56,7 @@ int takeover_state_to_string(const TakeOverState log_type, struct RCDiagnoseInfo { RCDiagnoseInfo() { reset(); } + ~RCDiagnoseInfo() { reset(); } void reset(); int64_t id_; TakeOverState state_; diff --git a/src/logservice/replayservice/ob_replay_status.h b/src/logservice/replayservice/ob_replay_status.h index 75df5ee364..17bf1b594a 100644 --- a/src/logservice/replayservice/ob_replay_status.h +++ b/src/logservice/replayservice/ob_replay_status.h @@ -78,11 +78,17 @@ struct LSReplayStat struct ReplayDiagnoseInfo { + ReplayDiagnoseInfo() { reset(); } + ~ReplayDiagnoseInfo() { reset(); } palf::LSN max_replayed_lsn_; share::SCN max_replayed_scn_; ObSqlString diagnose_str_; TO_STRING_KV(K(max_replayed_lsn_), K(max_replayed_scn_)); + void reset() { + max_replayed_lsn_.reset(); + max_replayed_scn_.reset(); + } }; //此类型为前向barrier日志专用, 与ObLogReplayTask分开分配 diff --git a/src/logservice/restoreservice/ob_log_restore_handler.cpp b/src/logservice/restoreservice/ob_log_restore_handler.cpp index 691f762880..7f89463ac7 100644 --- a/src/logservice/restoreservice/ob_log_restore_handler.cpp +++ b/src/logservice/restoreservice/ob_log_restore_handler.cpp @@ -22,6 +22,7 @@ #include "lib/time/ob_time_utility.h" // ObTimeUtility #include "lib/utility/ob_macro_utils.h" #include "logservice/ob_log_service.h" // ObLogService +#include "logservice/palf/log_define.h" #include "logservice/palf/log_group_entry.h" #include "logservice/palf/palf_env.h" // PalfEnv #include "logservice/palf/palf_iterator.h" @@ -700,5 +701,38 @@ int ObLogRestoreHandler::get_next_sorted_task(ObFetchLogTask *&task) return ret; } +int ObLogRestoreHandler::diagnose(RestoreDiagnoseInfo &diagnose_info) +{ + int ret = OB_SUCCESS; + diagnose_info.restore_context_info_.reset(); + diagnose_info.restore_context_info_.reset(); + const int64_t MAX_TRACE_ID_LENGTH = 64; + char trace_id[MAX_TRACE_ID_LENGTH]; + RLockGuard guard(lock_); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + } else if (FALSE_IT(diagnose_info.restore_role_ = role_)) { + } else if (FALSE_IT(diagnose_info.restore_proposal_id_ = proposal_id_)) { + } else if (OB_FAIL(diagnose_info.restore_context_info_.append_fmt("issue_task_num:%ld; " + "last_fetch_ts:%ld; " + "max_submit_lsn:%ld; " + "max_fetch_lsn:%ld; " + "max_fetch_scn:%ld; ", + context_.issue_task_num_, + context_.last_fetch_ts_, + context_.max_submit_lsn_.val_, + context_.max_fetch_lsn_.val_, + context_.max_fetch_scn_.convert_to_ts()))) { + CLOG_LOG(WARN, "append restore_context_info failed", K(ret), K(context_)); + } else if (FALSE_IT(context_.error_context_.trace_id_.to_string(trace_id, sizeof(trace_id)))) { + } else if (OB_FAIL(diagnose_info.restore_err_context_info_.append_fmt("ret_code:%d; " + "trace_id:%s; ", + context_.error_context_.ret_code_, + trace_id))) { + CLOG_LOG(WARN, "append restore_context_info failed", K(ret), K(context_)); + } + return ret; +} + } // namespace logservice } // namespace oceanbase diff --git a/src/logservice/restoreservice/ob_log_restore_handler.h b/src/logservice/restoreservice/ob_log_restore_handler.h index 23f4d57b57..95fdfbb225 100644 --- a/src/logservice/restoreservice/ob_log_restore_handler.h +++ b/src/logservice/restoreservice/ob_log_restore_handler.h @@ -57,6 +57,24 @@ using oceanbase::palf::PalfEnv; using oceanbase::common::ObString; using oceanbase::common::ObAddr; +struct RestoreDiagnoseInfo +{ + RestoreDiagnoseInfo() { reset(); } + ~RestoreDiagnoseInfo() { reset(); } + common::ObRole restore_role_; + int64_t restore_proposal_id_; + ObSqlString restore_context_info_; + ObSqlString restore_err_context_info_; + TO_STRING_KV(K(restore_role_), + K(restore_proposal_id_)); + void reset() { + restore_role_ = FOLLOWER; + restore_proposal_id_ = palf::INVALID_PROPOSAL_ID; + restore_context_info_.reset(); + restore_err_context_info_.reset(); + } +}; + // The interface to submit log for physical restore and physical standby class ObLogRestoreHandler : public ObLogHandlerBase { @@ -154,6 +172,7 @@ public: // OB_SUCCESS get task successfully, but maybe no task in turn exists // other code unexpected ret_code int get_next_sorted_task(ObFetchLogTask *&task); + int diagnose(RestoreDiagnoseInfo &diagnose_info); TO_STRING_KV(K_(is_inited), K_(is_in_stop_state), K_(id), K_(proposal_id), K_(role), KP_(parent), K_(context), K_(restore_context)); diff --git a/src/observer/virtual_table/ob_all_virtual_ha_diagnose.cpp b/src/observer/virtual_table/ob_all_virtual_ha_diagnose.cpp index 3ab08d69d0..fc3f3804d3 100644 --- a/src/observer/virtual_table/ob_all_virtual_ha_diagnose.cpp +++ b/src/observer/virtual_table/ob_all_virtual_ha_diagnose.cpp @@ -164,24 +164,20 @@ int ObAllVirtualHADiagnose::insert_stat_(storage::DiagnoseInfo &diagnose_info) ObCharset::get_default_charset())); } break; - case MAX_APPLIED_SCN: { + case MAX_APPLIED_SCN: cur_row_.cells_[i].set_uint64(diagnose_info.apply_diagnose_info_.max_applied_scn_.get_val_for_inner_table_field()); break; - } - case MAX_REPALYED_LSN: { + case MAX_REPALYED_LSN: cur_row_.cells_[i].set_uint64(diagnose_info.replay_diagnose_info_.max_replayed_lsn_.val_); break; - } - case MAX_REPLAYED_SCN: { + case MAX_REPLAYED_SCN: cur_row_.cells_[i].set_uint64(diagnose_info.replay_diagnose_info_.max_replayed_scn_.get_val_for_inner_table_field()); break; - } - case REPLAY_DIAGNOSE_INFO: { + case REPLAY_DIAGNOSE_INFO: cur_row_.cells_[i].set_varchar((diagnose_info.replay_diagnose_info_.diagnose_str_.string())); cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation( ObCharset::get_default_charset())); break; - } case GC_STATE: if (OB_FAIL(gc_state_to_string(diagnose_info.gc_diagnose_info_.gc_state_, gc_state_str_, @@ -193,24 +189,20 @@ int ObAllVirtualHADiagnose::insert_stat_(storage::DiagnoseInfo &diagnose_info) ObCharset::get_default_charset())); } break; - case GC_START_TS: { + case GC_START_TS: cur_row_.cells_[i].set_int(diagnose_info.gc_diagnose_info_.gc_start_ts_); break; - } //TODO: @keqing.llt archive_scn列目前只占位 - case ARCHIVE_SCN: { + case ARCHIVE_SCN: cur_row_.cells_[i].set_uint64(0); break; - } - case CHECKPOINT_SCN: { + case CHECKPOINT_SCN: cur_row_.cells_[i].set_uint64(diagnose_info.checkpoint_diagnose_info_.checkpoint_.get_val_for_inner_table_field()); break; - } - case MIN_REC_SCN: { + case MIN_REC_SCN: cur_row_.cells_[i].set_uint64(diagnose_info.checkpoint_diagnose_info_.min_rec_scn_.get_val_for_inner_table_field()); break; - } - case MIN_REC_SCN_LOG_TYPE: { + case MIN_REC_SCN_LOG_TYPE: if (OB_FAIL(log_base_type_to_string(diagnose_info.checkpoint_diagnose_info_.log_type_, min_rec_log_scn_log_type_str_, sizeof(min_rec_log_scn_log_type_str_)))) { @@ -221,7 +213,29 @@ int ObAllVirtualHADiagnose::insert_stat_(storage::DiagnoseInfo &diagnose_info) ObCharset::get_default_charset())); } break; - } + case RESTORE_HANDLER_ROLE: + if (OB_FAIL(role_to_string(diagnose_info.restore_diagnose_info_.restore_role_, + palf_role_str_, sizeof(palf_role_str_)))) { + SERVER_LOG(WARN, "role_to_string failed", K(ret), K(diagnose_info)); + } else { + cur_row_.cells_[i].set_varchar(ObString::make_string(restore_handler_role_str_)); + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation( + ObCharset::get_default_charset())); + } + break; + case RESTORE_HANDLER_PROPOSAL_ID: + cur_row_.cells_[i].set_int(diagnose_info.restore_diagnose_info_.restore_proposal_id_); + break; + case RESTORE_CONTEXT_INFO: + cur_row_.cells_[i].set_varchar((diagnose_info.restore_diagnose_info_.restore_context_info_.string())); + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation( + ObCharset::get_default_charset())); + break; + case RESTORE_ERR_CONTEXT_INFO: + cur_row_.cells_[i].set_varchar((diagnose_info.restore_diagnose_info_.restore_err_context_info_.string())); + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation( + ObCharset::get_default_charset())); + break; default: ret = OB_ERR_UNEXPECTED; SERVER_LOG(WARN, "unkown column"); @@ -230,5 +244,6 @@ int ObAllVirtualHADiagnose::insert_stat_(storage::DiagnoseInfo &diagnose_info) } return ret; } + } // namespace observer } // namespace oceanbase diff --git a/src/observer/virtual_table/ob_all_virtual_ha_diagnose.h b/src/observer/virtual_table/ob_all_virtual_ha_diagnose.h index 2aed4c8c37..8f92465c0c 100644 --- a/src/observer/virtual_table/ob_all_virtual_ha_diagnose.h +++ b/src/observer/virtual_table/ob_all_virtual_ha_diagnose.h @@ -48,6 +48,10 @@ enum IOStatColumn CHECKPOINT_SCN, MIN_REC_SCN, MIN_REC_SCN_LOG_TYPE, + RESTORE_HANDLER_ROLE, + RESTORE_HANDLER_PROPOSAL_ID, + RESTORE_CONTEXT_INFO, + RESTORE_ERR_CONTEXT_INFO, }; class ObAllVirtualHADiagnose : public common::ObVirtualTableScannerIterator @@ -68,6 +72,7 @@ private: char log_handler_takeover_log_type_str_[VARCHAR_32] = {'\0'}; char gc_state_str_[VARCHAR_32] = {'\0'}; char min_rec_log_scn_log_type_str_[VARCHAR_32] = {'\0'}; + char restore_handler_role_str_[VARCHAR_32] = {'\0'}; omt::ObMultiTenant *omt_; }; } // namespace observer diff --git a/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp b/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp index 931194fd8a..a1bbd51d36 100644 --- a/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp +++ b/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp @@ -8389,6 +8389,66 @@ int ObInnerTableSchema::all_virtual_ha_diagnose_schema(ObTableSchema &table_sche false, //is_nullable false); //is_autoincrement } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("restore_handler_role", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + 32, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("restore_proposal_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("restore_context_info", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + 1024, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("restore_err_context_info", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + 1024, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } if (OB_SUCC(ret)) { table_schema.get_part_option().set_part_num(1); table_schema.set_part_level(PARTITION_LEVEL_ONE); diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index ba69b97181..1afe51eae5 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -11250,7 +11250,11 @@ def_table_schema( ('archive_scn', 'uint'), ('checkpoint_scn', 'uint'), ('min_rec_scn', 'uint'), - ('min_rec_scn_log_type', 'varchar:32') + ('min_rec_scn_log_type', 'varchar:32'), + ('restore_handler_role', 'varchar:32'), + ('restore_proposal_id', 'int'), + ('restore_context_info', 'varchar:1024'), + ('restore_err_context_info', 'varchar:1024') ], partition_columns = ['svr_ip', 'svr_port'], diff --git a/src/share/io/ob_io_struct.cpp b/src/share/io/ob_io_struct.cpp index de1aed7058..8faedc86aa 100644 --- a/src/share/io/ob_io_struct.cpp +++ b/src/share/io/ob_io_struct.cpp @@ -888,14 +888,14 @@ void ObIOSender::stop() stop_submit(); if (tg_id_ >= 0) { TG_STOP(tg_id_); - } + } } void ObIOSender::wait() { if (tg_id_ >= 0) { TG_WAIT(tg_id_); - } + } } void ObIOSender::destroy() @@ -1014,7 +1014,7 @@ int ObIOSender::enqueue_request(ObIORequest &req) } if (OB_SUCC(ret)) { if (tmp_phy_queue->req_list_.is_empty()) { - //new request + //new request if (OB_FAIL(io_queue_->remove_from_heap(tmp_phy_queue))) { LOG_WARN("remove phy queue from heap failed", K(ret), K(index)); } else { @@ -1026,7 +1026,7 @@ int ObIOSender::enqueue_request(ObIORequest &req) LOG_WARN("push new req into phy queue failed", K(ret)); } else { ATOMIC_INC(&sender_req_count_); - req.time_log_.enqueue_ts_ = ObTimeUtility::fast_current_time(); + req.time_log_.enqueue_ts_ = ObTimeUtility::fast_current_time(); //calc ts_ if (OB_NOT_NULL(req.tenant_io_mgr_.get_ptr())) { ObTenantIOClock *io_clock = static_cast(req.tenant_io_mgr_.get_ptr()->get_io_clock()); @@ -1749,14 +1749,14 @@ void ObAsyncIOChannel::stop() { if (tg_id_ >= 0) { TG_STOP(tg_id_); - } + } } void ObAsyncIOChannel::wait() { if (tg_id_ >= 0) { TG_WAIT(tg_id_); - } + } } void ObAsyncIOChannel::destroy() @@ -2386,7 +2386,7 @@ void ObIORunner::stop() { if (tg_id_ >= 0) { TG_STOP(tg_id_); - } + } } void ObIORunner::wait() diff --git a/src/storage/checkpoint/ob_checkpoint_executor.h b/src/storage/checkpoint/ob_checkpoint_executor.h index 4f5ba8f1aa..ce81ddd53f 100644 --- a/src/storage/checkpoint/ob_checkpoint_executor.h +++ b/src/storage/checkpoint/ob_checkpoint_executor.h @@ -39,13 +39,19 @@ struct ObCheckpointVTInfo struct CheckpointDiagnoseInfo { + CheckpointDiagnoseInfo() { reset(); } + ~CheckpointDiagnoseInfo() { reset(); } share::SCN checkpoint_; share::SCN min_rec_scn_; logservice::ObLogBaseType log_type_; - TO_STRING_KV(K(checkpoint_), K(min_rec_scn_), K(log_type_)); + void reset() { + checkpoint_.reset(); + min_rec_scn_.reset(); + log_type_ = logservice::ObLogBaseType::INVALID_LOG_BASE_TYPE; + } }; class ObCheckpointExecutor diff --git a/src/storage/ls/ob_ls.h b/src/storage/ls/ob_ls.h index 87136e555e..aaf20742e4 100644 --- a/src/storage/ls/ob_ls.h +++ b/src/storage/ls/ob_ls.h @@ -93,6 +93,8 @@ struct ObLSVTInfo // 诊断虚表统计信息 struct DiagnoseInfo { + DiagnoseInfo() { reset(); } + ~DiagnoseInfo() { reset(); } bool is_role_sync() { return ((palf_diagnose_info_.election_role_ == palf_diagnose_info_.palf_role_) && (palf_diagnose_info_.palf_role_ == log_handler_diagnose_info_.log_handler_role_)); @@ -105,6 +107,7 @@ struct DiagnoseInfo logservice::ReplayDiagnoseInfo replay_diagnose_info_; logservice::GCDiagnoseInfo gc_diagnose_info_; checkpoint::CheckpointDiagnoseInfo checkpoint_diagnose_info_; + logservice::RestoreDiagnoseInfo restore_diagnose_info_; TO_STRING_KV(K(ls_id_), K(log_handler_diagnose_info_), K(palf_diagnose_info_), @@ -112,7 +115,19 @@ struct DiagnoseInfo K(apply_diagnose_info_), K(replay_diagnose_info_), K(gc_diagnose_info_), - K(checkpoint_diagnose_info_)); + K(checkpoint_diagnose_info_), + K(restore_diagnose_info_)); + void reset() { + ls_id_ = -1; + log_handler_diagnose_info_.reset(); + palf_diagnose_info_.reset(); + rc_diagnose_info_.reset(); + apply_diagnose_info_.reset(); + replay_diagnose_info_.reset(); + gc_diagnose_info_.reset(); + checkpoint_diagnose_info_.reset(); + restore_diagnose_info_.reset(); + } }; class ObIComponentFactory;