diff --git a/src/rootserver/freeze/ob_checksum_validator.cpp b/src/rootserver/freeze/ob_checksum_validator.cpp index a49afa4f52..fa1b7a6d54 100644 --- a/src/rootserver/freeze/ob_checksum_validator.cpp +++ b/src/rootserver/freeze/ob_checksum_validator.cpp @@ -16,6 +16,7 @@ #include "rootserver/freeze/ob_freeze_info_manager.h" #include "rootserver/freeze/ob_zone_merge_manager.h" #include "rootserver/freeze/ob_major_freeze_util.h" +#include "rootserver/freeze/ob_major_merge_progress_checker.h" #include "rootserver/ob_root_utils.h" #include "lib/mysqlclient/ob_mysql_proxy.h" #include "lib/mysqlclient/ob_isql_client.h" @@ -103,6 +104,7 @@ int ObChecksumValidatorBase::validate_checksum( const hash::ObHashMap &tablet_compaction_map, int64_t &table_count, hash::ObHashMap &table_compaction_map, + ObMergeTimeStatistics &merge_time_statistics, const int64_t expected_epoch) { int ret = OB_SUCCESS; @@ -116,7 +118,7 @@ int ObChecksumValidatorBase::validate_checksum( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K_(tenant_id), K(frozen_scn)); } else if (OB_FAIL(check_all_table_verification_finished(stop, frozen_scn, tablet_compaction_map, - table_count, table_compaction_map, expected_epoch))) { + table_count, table_compaction_map, merge_time_statistics, expected_epoch))) { LOG_WARN("fail to check all table verification finished", KR(ret), K_(tenant_id), K(frozen_scn)); } return ret; @@ -199,12 +201,14 @@ int ObTabletChecksumValidator::check_all_table_verification_finished( const hash::ObHashMap &tablet_compaction_map, int64_t &table_count, hash::ObHashMap &table_compaction_map, + ObMergeTimeStatistics &merge_time_statistics, const int64_t expected_epoch) { int ret = OB_SUCCESS; int check_ret = OB_SUCCESS; UNUSED(expected_epoch); + const int64_t start_time_us = ObTimeUtil::current_time(); if (OB_UNLIKELY(!frozen_scn.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K_(tenant_id), K(frozen_scn)); @@ -254,6 +258,8 @@ int ObTabletChecksumValidator::check_all_table_verification_finished( if (OB_CHECKSUM_ERROR == check_ret) { ret = check_ret; } + const int64_t cost_time_us = ObTimeUtil::current_time() - start_time_us; + merge_time_statistics.update_merge_status_us_.tablet_validator_us_ = cost_time_us; return ret; } @@ -413,11 +419,13 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished const hash::ObHashMap &tablet_compaction_map, int64_t &table_count, hash::ObHashMap &table_compaction_map, + ObMergeTimeStatistics &merge_time_statistics, const int64_t expected_epoch) { int ret = OB_SUCCESS; int check_ret = OB_SUCCESS; + const int64_t start_time_us = ObTimeUtil::current_time(); if (OB_UNLIKELY(!frozen_scn.is_valid() || tablet_compaction_map.empty())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K_(tenant_id), K(frozen_scn), @@ -456,7 +464,7 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished } else if (cur_compaction_info.is_verified()) { // already finished verification, skip it! } else if (table_schema->has_tablet()) { if (cur_compaction_info.is_index_ckm_verified()) { - if (need_validate()) { // need to validate cross-cluster checksum + if (need_validate()) { // need to validate cross-cluster checksum // check whether waiting all tablet checksum has timed out bool is_wait_tablet_checksum_timeout = check_waiting_tablet_checksum_timeout(); if (OB_UNLIKELY(is_wait_tablet_checksum_timeout)) { @@ -481,7 +489,7 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished K(frozen_scn), KPC(table_schema)); } } else if (OB_FAIL(handle_table_verification_finished(stop, table_schema, frozen_scn, - table_compaction_map, expected_epoch))) { + table_compaction_map, merge_time_statistics, expected_epoch))) { LOG_WARN("fail to handle table verification finished", KR(ret), K_(tenant_id), K(frozen_scn), KPC(table_schema)); } @@ -492,7 +500,7 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished } } else { // no need to validate cross-cluster checksum if (OB_FAIL(handle_table_verification_finished(stop, table_schema, frozen_scn, - table_compaction_map, expected_epoch))) { + table_compaction_map, merge_time_statistics, expected_epoch))) { LOG_WARN("fail to handle table verification finished", KR(ret), K_(tenant_id), K(frozen_scn), KPC(table_schema)); } @@ -501,7 +509,7 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished } else { // like VIEW that has no tablet, update report_scn for this table and mark it as VERIFIED if (cur_compaction_info.is_index_ckm_verified()) { if (OB_FAIL(handle_table_verification_finished(stop, table_schema, frozen_scn, - table_compaction_map, expected_epoch))) { + table_compaction_map, merge_time_statistics, expected_epoch))) { LOG_WARN("fail to handle table verification finished", KR(ret), K_(tenant_id), K(frozen_scn), KPC(table_schema)); } @@ -526,6 +534,8 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished if (OB_CHECKSUM_ERROR == check_ret) { ret = check_ret; } + const int64_t cost_time_us = ObTimeUtil::current_time() - start_time_us; + merge_time_statistics.update_merge_status_us_.cross_cluster_validator_us_ = cost_time_us; return ret; } @@ -656,6 +666,7 @@ int ObCrossClusterTabletChecksumValidator::handle_table_verification_finished( const ObTableSchema *table_schema, const SCN &frozen_scn, hash::ObHashMap &table_compaction_map, + ObMergeTimeStatistics &merge_time_statistics, const int64_t expected_epoch) { int ret = OB_SUCCESS; @@ -684,6 +695,7 @@ int ObCrossClusterTabletChecksumValidator::handle_table_verification_finished( LOG_WARN("fail to get tablet_ls pairs of current table schema", KR(ret), K_(tenant_id), K(table_id)); } else { if (is_primary_service_) { // only primary major_freeze_service need to write tablet checksum + const int64_t write_start_time_us = ObTimeUtil::current_time(); if (OB_FAIL(contains_first_tablet_in_sys_ls(pairs, is_containing))) { LOG_WARN("fail to check if contains first tablet in sys ls", KR(ret), K_(tenant_id), K(pairs)); } else if (is_containing) { @@ -697,12 +709,17 @@ int ObCrossClusterTabletChecksumValidator::handle_table_verification_finished( cur_compaction_info, table_id, expected_epoch))) { LOG_WARN("fail to write tablet checksum at table level", KR(ret), K_(tenant_id), K(pairs)); } + const int64_t write_cost_time_us = ObTimeUtil::current_time() - write_start_time_us; + merge_time_statistics.update_merge_status_us_.write_tablet_checksum_us_ += write_cost_time_us; } + const int64_t update_start_time_us = ObTimeUtil::current_time(); if (FAILEDx(ObTabletMetaTableCompactionOperator::batch_update_report_scn( tenant_id_, frozen_scn.get_val_for_tx(), pairs, ObTabletReplica::ScnStatus::SCN_STATUS_ERROR))) { LOG_WARN("fail to batch update report_scn", KR(ret), K_(tenant_id), K(pairs)); } + const int64_t update_cost_time_us = ObTimeUtil::current_time() - update_start_time_us; + merge_time_statistics.update_merge_status_us_.update_report_scn_us_ += update_cost_time_us; } } } @@ -925,11 +942,13 @@ int ObIndexChecksumValidator::check_all_table_verification_finished( const hash::ObHashMap &tablet_compaction_map, int64_t &table_count, hash::ObHashMap &table_compaction_map, + ObMergeTimeStatistics &merge_time_statistics, const int64_t expected_epoch) { int ret = OB_SUCCESS; int check_ret = OB_SUCCESS; + const int64_t start_time_us = ObTimeUtil::current_time(); if (OB_UNLIKELY(!frozen_scn.is_valid() || tablet_compaction_map.empty())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K_(tenant_id), K(frozen_scn), @@ -1080,6 +1099,8 @@ int ObIndexChecksumValidator::check_all_table_verification_finished( if (OB_CHECKSUM_ERROR == check_ret) { ret = check_ret; } + const int64_t cost_time_us = ObTimeUtil::current_time() - start_time_us; + merge_time_statistics.update_merge_status_us_.index_validator_us_ = cost_time_us; return ret; } diff --git a/src/rootserver/freeze/ob_checksum_validator.h b/src/rootserver/freeze/ob_checksum_validator.h index 92d4db50e9..42f2aef74d 100644 --- a/src/rootserver/freeze/ob_checksum_validator.h +++ b/src/rootserver/freeze/ob_checksum_validator.h @@ -29,6 +29,7 @@ namespace rootserver class ObZoneMergeManager; class ObFreezeInfoManager; class ObServerManager; +struct ObMergeTimeStatistics; class ObMergeErrorCallback { @@ -69,6 +70,7 @@ public: const hash::ObHashMap &tablet_compaction_map, int64_t &table_count, hash::ObHashMap &table_compaction_map, + ObMergeTimeStatistics &merge_time_statistics, const int64_t expected_epoch); static const int64_t MIN_CHECK_INTERVAL = 10 * 1000 * 1000LL; @@ -91,6 +93,7 @@ private: const hash::ObHashMap &tablet_compaction_map, int64_t &table_count, hash::ObHashMap &table_compaction_map, + ObMergeTimeStatistics &merge_time_statistics, const int64_t expected_epoch) = 0; protected: @@ -118,6 +121,7 @@ private: const hash::ObHashMap &tablet_compaction_map, int64_t &table_count, hash::ObHashMap &table_compaction_map, + ObMergeTimeStatistics &merge_time_statistics, const int64_t expected_epoch) override; // check whether all tablets of this table finished compaction or not, // and execute tablet replica checksum verification if this table has tablet. @@ -154,6 +158,7 @@ private: const hash::ObHashMap &tablet_compaction_map, int64_t &table_count, hash::ObHashMap &table_compaction_map, + ObMergeTimeStatistics &merge_time_statistics, const int64_t expected_epoch) override; int check_need_validate(const bool is_primary_service, const share::SCN &frozen_scn, @@ -169,6 +174,7 @@ private: const share::schema::ObTableSchema *table_schema, const share::SCN &frozen_scn, hash::ObHashMap &table_compaction_map, + ObMergeTimeStatistics &merge_time_statistics, const int64_t expected_epoch); int try_update_tablet_checksum_items(const volatile bool &stop, const ObArray &pairs, @@ -200,6 +206,7 @@ private: const hash::ObHashMap &tablet_compaction_map, int64_t &table_count, hash::ObHashMap &table_compaction_map, + ObMergeTimeStatistics &merge_time_statistics, const int64_t expected_epoch) override; void check_need_validate(const bool is_primary_service, bool &need_validate) const; // handle data table which has tablet and index table(s). its all index tables may finish virification or not diff --git a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp index 1d5f17bbd4..d56dfac2f6 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp +++ b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp @@ -34,7 +34,7 @@ using namespace oceanbase::share; using namespace oceanbase::share::schema; ObMajorMergeProgressChecker::ObMajorMergeProgressChecker() - : is_inited_(false), tenant_id_(OB_INVALID_ID), sql_proxy_(nullptr), + : merge_time_statistics_(), is_inited_(false), tenant_id_(OB_INVALID_ID), sql_proxy_(nullptr), schema_service_(nullptr), zone_merge_mgr_(nullptr), lst_operator_(nullptr), server_trace_(nullptr), tablet_compaction_map_(), table_count_(0), table_ids_(), table_compaction_map_(), tablet_validator_(), index_validator_(), cross_cluster_validator_() @@ -274,6 +274,7 @@ int ObMajorMergeProgressChecker::check_merge_progress( } const int64_t cost_us = ObTimeUtility::current_time() - start_time; + merge_time_statistics_.update_merge_status_us_.check_merge_progress_us_ = cost_us; if (OB_SUCC(ret)) { LOG_INFO("succ to check merge progress", K_(tenant_id), K(global_broadcast_scn), K(cost_us)); } else { @@ -563,14 +564,14 @@ int ObMajorMergeProgressChecker::check_verification( } else if (OB_FAIL(cross_cluster_validator_.check_and_set_validate(is_primary_service, global_broadcast_scn))) { LOG_WARN("fail to check and set validate for cross_cluster_validator", KR(ret), K(global_broadcast_scn)); } else if (OB_FAIL(tablet_validator_.validate_checksum(stop, global_broadcast_scn, tablet_compaction_map_, - table_count_, table_compaction_map_, expected_epoch))) { + table_count_, table_compaction_map_, merge_time_statistics_, expected_epoch))) { LOG_WARN("fail to validate checksum of tablet validator", KR(ret), K(global_broadcast_scn)); } else if (!tablet_compaction_map_.empty()) { if (OB_FAIL(index_validator_.validate_checksum(stop, global_broadcast_scn, - tablet_compaction_map_, table_count_, table_compaction_map_, expected_epoch))) { + tablet_compaction_map_, table_count_, table_compaction_map_, merge_time_statistics_, expected_epoch))) { LOG_WARN("fail to validate checksum of index validator", KR(ret), K(global_broadcast_scn)); } else if (OB_FAIL(cross_cluster_validator_.validate_checksum(stop, global_broadcast_scn, - tablet_compaction_map_, table_count_, table_compaction_map_, expected_epoch))) { + tablet_compaction_map_, table_count_, table_compaction_map_, merge_time_statistics_, expected_epoch))) { LOG_WARN("fail to validate checksum of cross cluster validator", KR(ret), K(global_broadcast_scn)); } } else { diff --git a/src/rootserver/freeze/ob_major_merge_progress_checker.h b/src/rootserver/freeze/ob_major_merge_progress_checker.h index 63728de6f1..9c0089dd48 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_checker.h +++ b/src/rootserver/freeze/ob_major_merge_progress_checker.h @@ -43,6 +43,82 @@ namespace rootserver { class ObZoneMergeManager; +struct ObUpdateMergeStatusTime +{ +public: + ObUpdateMergeStatusTime() + : check_merge_progress_us_(0), tablet_validator_us_(0), index_validator_us_(0), + cross_cluster_validator_us_(0), update_report_scn_us_(0), write_tablet_checksum_us_(0) + {} + + void reset() + { + check_merge_progress_us_ = 0; + tablet_validator_us_ = 0; + index_validator_us_ = 0; + cross_cluster_validator_us_ = 0; + update_report_scn_us_ = 0; + write_tablet_checksum_us_ = 0; + } + + int64_t get_total_time_us() const + { + // Note: update_report_scn_us_ and write_tablet_checksum_us_ are included in + // cross_cluster_validator_us_ now (may be excluded later). + return (check_merge_progress_us_ + tablet_validator_us_ + + index_validator_us_ + cross_cluster_validator_us_); + } + + ObUpdateMergeStatusTime &operator+=(const ObUpdateMergeStatusTime &o) + { + check_merge_progress_us_ += o.check_merge_progress_us_; + tablet_validator_us_ += o.tablet_validator_us_; + index_validator_us_ += o.index_validator_us_; + cross_cluster_validator_us_ += o.cross_cluster_validator_us_; + update_report_scn_us_ += o.update_report_scn_us_; + write_tablet_checksum_us_ += o.write_tablet_checksum_us_; + return *this; + } + + TO_STRING_KV("total_us", get_total_time_us(), K_(check_merge_progress_us), + K_(tablet_validator_us), K_(index_validator_us), K_(cross_cluster_validator_us), + K_(update_report_scn_us), K_(write_tablet_checksum_us)); + + int64_t check_merge_progress_us_; + int64_t tablet_validator_us_; + int64_t index_validator_us_; + int64_t cross_cluster_validator_us_; + int64_t update_report_scn_us_; + int64_t write_tablet_checksum_us_; +}; + +struct ObMergeTimeStatistics +{ +public: + ObMergeTimeStatistics() + : update_merge_status_us_(), idle_us_(0) + {} + + void reset() + { + update_merge_status_us_.reset(); + idle_us_ = 0; + } + + ObMergeTimeStatistics &operator+=(const ObMergeTimeStatistics &o) + { + update_merge_status_us_ += o.update_merge_status_us_; + idle_us_ += o.idle_us_; + return *this; + } + + TO_STRING_KV("total_us", update_merge_status_us_.get_total_time_us() + idle_us_, + K_(update_merge_status_us), K_(idle_us)); + + ObUpdateMergeStatusTime update_merge_status_us_; + int64_t idle_us_; +}; + class ObMajorMergeProgressChecker { public: @@ -80,6 +156,9 @@ public: void set_major_merge_start_time(const int64_t major_merge_start_us); +public: + ObMergeTimeStatistics merge_time_statistics_; + private: int check_tablet(const share::ObTabletInfo &tablet_info, const common::hash::ObHashMap &tablet_map, diff --git a/src/rootserver/freeze/ob_major_merge_scheduler.cpp b/src/rootserver/freeze/ob_major_merge_scheduler.cpp index 18beb2f408..b5db9636cc 100644 --- a/src/rootserver/freeze/ob_major_merge_scheduler.cpp +++ b/src/rootserver/freeze/ob_major_merge_scheduler.cpp @@ -27,6 +27,7 @@ #include "lib/container/ob_se_array_iterator.h" #include "lib/allocator/page_arena.h" #include "lib/profile/ob_trace_id.h" +#include "lib/time/ob_time_utility.h" #include "share/ob_errno.h" #include "share/config/ob_server_config.h" #include "share/tablet/ob_tablet_table_iterator.h" @@ -168,6 +169,7 @@ int ObMajorMergeScheduler::try_idle( const int64_t IMMEDIATE_RETRY_CNT = 3; int64_t idle_time_us = ori_idle_time_us; int64_t merger_check_interval = idling_.get_idle_interval_us(); + const int64_t start_time_us = ObTimeUtil::current_time(); if (OB_SUCC(work_ret)) { fail_count_ = 0; @@ -203,6 +205,8 @@ int ObMajorMergeScheduler::try_idle( } else if (OB_FAIL(idling_.idle(idle_time_us))) { LOG_WARN("fail to idle", KR(ret), K(idle_time_us)); } + const int64_t cost_time_us = ObTimeUtil::current_time() - start_time_us; + progress_checker_.merge_time_statistics_.idle_us_ = cost_time_us; return ret; } @@ -286,6 +290,7 @@ int ObMajorMergeScheduler::do_one_round_major_merge(const int64_t expected_epoch int ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS; + ObMergeTimeStatistics total_merge_time_statistics; HEAP_VARS_2((ObZoneMergeInfoArray, info_array), (ObGlobalMergeInfo, global_info)) { LOG_INFO("start to do one round major_merge", K(expected_epoch)); // loop until 'this round major merge finished' or 'epoch changed' @@ -293,6 +298,7 @@ int ObMajorMergeScheduler::do_one_round_major_merge(const int64_t expected_epoch update_last_run_timestamp(); ObCurTraceId::init(GCONF.self_addr_); ObZoneArray to_merge_zone; + progress_checker_.merge_time_statistics_.reset(); // Place is_last_merge_complete() to the head of this while loop. // So as to break this loop at once, when the last merge is complete. // Otherwise, may run one extra loop that should not run, and thus incur error. @@ -331,8 +337,12 @@ int ObMajorMergeScheduler::do_one_round_major_merge(const int64_t expected_epoch tmp_ret = OB_SUCCESS; // treat as is_merging = true, even though last merge complete check_merge_interval_time(true); + total_merge_time_statistics += progress_checker_.merge_time_statistics_; + LOG_INFO("finish one round of loop in do_one_round_major_merge", K(expected_epoch), + "merge_time_statistics", progress_checker_.merge_time_statistics_); } } + LOG_INFO("finish do_one_round_major_merge", K(expected_epoch), K(total_merge_time_statistics)); return ret; }