fix obtest cases about the max value of SCN and add merge time statistics

This commit is contained in:
obdev
2023-02-07 20:02:29 +08:00
committed by ob-robot
parent 7c3f4eddc5
commit 6dd22cf5f4
5 changed files with 127 additions and 9 deletions

View File

@ -16,6 +16,7 @@
#include "rootserver/freeze/ob_freeze_info_manager.h"
#include "rootserver/freeze/ob_zone_merge_manager.h"
#include "rootserver/freeze/ob_major_freeze_util.h"
#include "rootserver/freeze/ob_major_merge_progress_checker.h"
#include "rootserver/ob_root_utils.h"
#include "lib/mysqlclient/ob_mysql_proxy.h"
#include "lib/mysqlclient/ob_isql_client.h"
@ -103,6 +104,7 @@ int ObChecksumValidatorBase::validate_checksum(
const hash::ObHashMap<ObTabletLSPair, ObTabletCompactionStatus> &tablet_compaction_map,
int64_t &table_count,
hash::ObHashMap<uint64_t, ObTableCompactionInfo> &table_compaction_map,
ObMergeTimeStatistics &merge_time_statistics,
const int64_t expected_epoch)
{
int ret = OB_SUCCESS;
@ -116,7 +118,7 @@ int ObChecksumValidatorBase::validate_checksum(
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K_(tenant_id), K(frozen_scn));
} else if (OB_FAIL(check_all_table_verification_finished(stop, frozen_scn, tablet_compaction_map,
table_count, table_compaction_map, expected_epoch))) {
table_count, table_compaction_map, merge_time_statistics, expected_epoch))) {
LOG_WARN("fail to check all table verification finished", KR(ret), K_(tenant_id), K(frozen_scn));
}
return ret;
@ -199,12 +201,14 @@ int ObTabletChecksumValidator::check_all_table_verification_finished(
const hash::ObHashMap<ObTabletLSPair, ObTabletCompactionStatus> &tablet_compaction_map,
int64_t &table_count,
hash::ObHashMap<uint64_t, ObTableCompactionInfo> &table_compaction_map,
ObMergeTimeStatistics &merge_time_statistics,
const int64_t expected_epoch)
{
int ret = OB_SUCCESS;
int check_ret = OB_SUCCESS;
UNUSED(expected_epoch);
const int64_t start_time_us = ObTimeUtil::current_time();
if (OB_UNLIKELY(!frozen_scn.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K_(tenant_id), K(frozen_scn));
@ -254,6 +258,8 @@ int ObTabletChecksumValidator::check_all_table_verification_finished(
if (OB_CHECKSUM_ERROR == check_ret) {
ret = check_ret;
}
const int64_t cost_time_us = ObTimeUtil::current_time() - start_time_us;
merge_time_statistics.update_merge_status_us_.tablet_validator_us_ = cost_time_us;
return ret;
}
@ -413,11 +419,13 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished
const hash::ObHashMap<share::ObTabletLSPair, share::ObTabletCompactionStatus> &tablet_compaction_map,
int64_t &table_count,
hash::ObHashMap<uint64_t, share::ObTableCompactionInfo> &table_compaction_map,
ObMergeTimeStatistics &merge_time_statistics,
const int64_t expected_epoch)
{
int ret = OB_SUCCESS;
int check_ret = OB_SUCCESS;
const int64_t start_time_us = ObTimeUtil::current_time();
if (OB_UNLIKELY(!frozen_scn.is_valid() || tablet_compaction_map.empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K_(tenant_id), K(frozen_scn),
@ -456,7 +464,7 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished
} else if (cur_compaction_info.is_verified()) { // already finished verification, skip it!
} else if (table_schema->has_tablet()) {
if (cur_compaction_info.is_index_ckm_verified()) {
if (need_validate()) { // need to validate cross-cluster checksum
if (need_validate()) { // need to validate cross-cluster checksum
// check whether waiting all tablet checksum has timed out
bool is_wait_tablet_checksum_timeout = check_waiting_tablet_checksum_timeout();
if (OB_UNLIKELY(is_wait_tablet_checksum_timeout)) {
@ -481,7 +489,7 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished
K(frozen_scn), KPC(table_schema));
}
} else if (OB_FAIL(handle_table_verification_finished(stop, table_schema, frozen_scn,
table_compaction_map, expected_epoch))) {
table_compaction_map, merge_time_statistics, expected_epoch))) {
LOG_WARN("fail to handle table verification finished", KR(ret), K_(tenant_id),
K(frozen_scn), KPC(table_schema));
}
@ -492,7 +500,7 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished
}
} else { // no need to validate cross-cluster checksum
if (OB_FAIL(handle_table_verification_finished(stop, table_schema, frozen_scn,
table_compaction_map, expected_epoch))) {
table_compaction_map, merge_time_statistics, expected_epoch))) {
LOG_WARN("fail to handle table verification finished", KR(ret), K_(tenant_id),
K(frozen_scn), KPC(table_schema));
}
@ -501,7 +509,7 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished
} else { // like VIEW that has no tablet, update report_scn for this table and mark it as VERIFIED
if (cur_compaction_info.is_index_ckm_verified()) {
if (OB_FAIL(handle_table_verification_finished(stop, table_schema, frozen_scn,
table_compaction_map, expected_epoch))) {
table_compaction_map, merge_time_statistics, expected_epoch))) {
LOG_WARN("fail to handle table verification finished", KR(ret), K_(tenant_id),
K(frozen_scn), KPC(table_schema));
}
@ -526,6 +534,8 @@ int ObCrossClusterTabletChecksumValidator::check_all_table_verification_finished
if (OB_CHECKSUM_ERROR == check_ret) {
ret = check_ret;
}
const int64_t cost_time_us = ObTimeUtil::current_time() - start_time_us;
merge_time_statistics.update_merge_status_us_.cross_cluster_validator_us_ = cost_time_us;
return ret;
}
@ -656,6 +666,7 @@ int ObCrossClusterTabletChecksumValidator::handle_table_verification_finished(
const ObTableSchema *table_schema,
const SCN &frozen_scn,
hash::ObHashMap<uint64_t, ObTableCompactionInfo> &table_compaction_map,
ObMergeTimeStatistics &merge_time_statistics,
const int64_t expected_epoch)
{
int ret = OB_SUCCESS;
@ -684,6 +695,7 @@ int ObCrossClusterTabletChecksumValidator::handle_table_verification_finished(
LOG_WARN("fail to get tablet_ls pairs of current table schema", KR(ret), K_(tenant_id), K(table_id));
} else {
if (is_primary_service_) { // only primary major_freeze_service need to write tablet checksum
const int64_t write_start_time_us = ObTimeUtil::current_time();
if (OB_FAIL(contains_first_tablet_in_sys_ls(pairs, is_containing))) {
LOG_WARN("fail to check if contains first tablet in sys ls", KR(ret), K_(tenant_id), K(pairs));
} else if (is_containing) {
@ -697,12 +709,17 @@ int ObCrossClusterTabletChecksumValidator::handle_table_verification_finished(
cur_compaction_info, table_id, expected_epoch))) {
LOG_WARN("fail to write tablet checksum at table level", KR(ret), K_(tenant_id), K(pairs));
}
const int64_t write_cost_time_us = ObTimeUtil::current_time() - write_start_time_us;
merge_time_statistics.update_merge_status_us_.write_tablet_checksum_us_ += write_cost_time_us;
}
const int64_t update_start_time_us = ObTimeUtil::current_time();
if (FAILEDx(ObTabletMetaTableCompactionOperator::batch_update_report_scn(
tenant_id_, frozen_scn.get_val_for_tx(),
pairs, ObTabletReplica::ScnStatus::SCN_STATUS_ERROR))) {
LOG_WARN("fail to batch update report_scn", KR(ret), K_(tenant_id), K(pairs));
}
const int64_t update_cost_time_us = ObTimeUtil::current_time() - update_start_time_us;
merge_time_statistics.update_merge_status_us_.update_report_scn_us_ += update_cost_time_us;
}
}
}
@ -925,11 +942,13 @@ int ObIndexChecksumValidator::check_all_table_verification_finished(
const hash::ObHashMap<ObTabletLSPair, ObTabletCompactionStatus> &tablet_compaction_map,
int64_t &table_count,
hash::ObHashMap<uint64_t, ObTableCompactionInfo> &table_compaction_map,
ObMergeTimeStatistics &merge_time_statistics,
const int64_t expected_epoch)
{
int ret = OB_SUCCESS;
int check_ret = OB_SUCCESS;
const int64_t start_time_us = ObTimeUtil::current_time();
if (OB_UNLIKELY(!frozen_scn.is_valid() || tablet_compaction_map.empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K_(tenant_id), K(frozen_scn),
@ -1080,6 +1099,8 @@ int ObIndexChecksumValidator::check_all_table_verification_finished(
if (OB_CHECKSUM_ERROR == check_ret) {
ret = check_ret;
}
const int64_t cost_time_us = ObTimeUtil::current_time() - start_time_us;
merge_time_statistics.update_merge_status_us_.index_validator_us_ = cost_time_us;
return ret;
}

View File

@ -29,6 +29,7 @@ namespace rootserver
class ObZoneMergeManager;
class ObFreezeInfoManager;
class ObServerManager;
struct ObMergeTimeStatistics;
class ObMergeErrorCallback
{
@ -69,6 +70,7 @@ public:
const hash::ObHashMap<share::ObTabletLSPair, share::ObTabletCompactionStatus> &tablet_compaction_map,
int64_t &table_count,
hash::ObHashMap<uint64_t, share::ObTableCompactionInfo> &table_compaction_map,
ObMergeTimeStatistics &merge_time_statistics,
const int64_t expected_epoch);
static const int64_t MIN_CHECK_INTERVAL = 10 * 1000 * 1000LL;
@ -91,6 +93,7 @@ private:
const hash::ObHashMap<share::ObTabletLSPair, share::ObTabletCompactionStatus> &tablet_compaction_map,
int64_t &table_count,
hash::ObHashMap<uint64_t, share::ObTableCompactionInfo> &table_compaction_map,
ObMergeTimeStatistics &merge_time_statistics,
const int64_t expected_epoch) = 0;
protected:
@ -118,6 +121,7 @@ private:
const hash::ObHashMap<share::ObTabletLSPair, share::ObTabletCompactionStatus> &tablet_compaction_map,
int64_t &table_count,
hash::ObHashMap<uint64_t, share::ObTableCompactionInfo> &table_compaction_map,
ObMergeTimeStatistics &merge_time_statistics,
const int64_t expected_epoch) override;
// check whether all tablets of this table finished compaction or not,
// and execute tablet replica checksum verification if this table has tablet.
@ -154,6 +158,7 @@ private:
const hash::ObHashMap<share::ObTabletLSPair, share::ObTabletCompactionStatus> &tablet_compaction_map,
int64_t &table_count,
hash::ObHashMap<uint64_t, share::ObTableCompactionInfo> &table_compaction_map,
ObMergeTimeStatistics &merge_time_statistics,
const int64_t expected_epoch) override;
int check_need_validate(const bool is_primary_service,
const share::SCN &frozen_scn,
@ -169,6 +174,7 @@ private:
const share::schema::ObTableSchema *table_schema,
const share::SCN &frozen_scn,
hash::ObHashMap<uint64_t, share::ObTableCompactionInfo> &table_compaction_map,
ObMergeTimeStatistics &merge_time_statistics,
const int64_t expected_epoch);
int try_update_tablet_checksum_items(const volatile bool &stop,
const ObArray<share::ObTabletLSPair> &pairs,
@ -200,6 +206,7 @@ private:
const hash::ObHashMap<share::ObTabletLSPair, share::ObTabletCompactionStatus> &tablet_compaction_map,
int64_t &table_count,
hash::ObHashMap<uint64_t, share::ObTableCompactionInfo> &table_compaction_map,
ObMergeTimeStatistics &merge_time_statistics,
const int64_t expected_epoch) override;
void check_need_validate(const bool is_primary_service, bool &need_validate) const;
// handle data table which has tablet and index table(s). its all index tables may finish virification or not

View File

@ -34,7 +34,7 @@ using namespace oceanbase::share;
using namespace oceanbase::share::schema;
ObMajorMergeProgressChecker::ObMajorMergeProgressChecker()
: is_inited_(false), tenant_id_(OB_INVALID_ID), sql_proxy_(nullptr),
: merge_time_statistics_(), is_inited_(false), tenant_id_(OB_INVALID_ID), sql_proxy_(nullptr),
schema_service_(nullptr), zone_merge_mgr_(nullptr), lst_operator_(nullptr),
server_trace_(nullptr), tablet_compaction_map_(), table_count_(0), table_ids_(),
table_compaction_map_(), tablet_validator_(), index_validator_(), cross_cluster_validator_()
@ -274,6 +274,7 @@ int ObMajorMergeProgressChecker::check_merge_progress(
}
const int64_t cost_us = ObTimeUtility::current_time() - start_time;
merge_time_statistics_.update_merge_status_us_.check_merge_progress_us_ = cost_us;
if (OB_SUCC(ret)) {
LOG_INFO("succ to check merge progress", K_(tenant_id), K(global_broadcast_scn), K(cost_us));
} else {
@ -563,14 +564,14 @@ int ObMajorMergeProgressChecker::check_verification(
} else if (OB_FAIL(cross_cluster_validator_.check_and_set_validate(is_primary_service, global_broadcast_scn))) {
LOG_WARN("fail to check and set validate for cross_cluster_validator", KR(ret), K(global_broadcast_scn));
} else if (OB_FAIL(tablet_validator_.validate_checksum(stop, global_broadcast_scn, tablet_compaction_map_,
table_count_, table_compaction_map_, expected_epoch))) {
table_count_, table_compaction_map_, merge_time_statistics_, expected_epoch))) {
LOG_WARN("fail to validate checksum of tablet validator", KR(ret), K(global_broadcast_scn));
} else if (!tablet_compaction_map_.empty()) {
if (OB_FAIL(index_validator_.validate_checksum(stop, global_broadcast_scn,
tablet_compaction_map_, table_count_, table_compaction_map_, expected_epoch))) {
tablet_compaction_map_, table_count_, table_compaction_map_, merge_time_statistics_, expected_epoch))) {
LOG_WARN("fail to validate checksum of index validator", KR(ret), K(global_broadcast_scn));
} else if (OB_FAIL(cross_cluster_validator_.validate_checksum(stop, global_broadcast_scn,
tablet_compaction_map_, table_count_, table_compaction_map_, expected_epoch))) {
tablet_compaction_map_, table_count_, table_compaction_map_, merge_time_statistics_, expected_epoch))) {
LOG_WARN("fail to validate checksum of cross cluster validator", KR(ret), K(global_broadcast_scn));
}
} else {

View File

@ -43,6 +43,82 @@ namespace rootserver
{
class ObZoneMergeManager;
struct ObUpdateMergeStatusTime
{
public:
ObUpdateMergeStatusTime()
: check_merge_progress_us_(0), tablet_validator_us_(0), index_validator_us_(0),
cross_cluster_validator_us_(0), update_report_scn_us_(0), write_tablet_checksum_us_(0)
{}
void reset()
{
check_merge_progress_us_ = 0;
tablet_validator_us_ = 0;
index_validator_us_ = 0;
cross_cluster_validator_us_ = 0;
update_report_scn_us_ = 0;
write_tablet_checksum_us_ = 0;
}
int64_t get_total_time_us() const
{
// Note: update_report_scn_us_ and write_tablet_checksum_us_ are included in
// cross_cluster_validator_us_ now (may be excluded later).
return (check_merge_progress_us_ + tablet_validator_us_ +
index_validator_us_ + cross_cluster_validator_us_);
}
ObUpdateMergeStatusTime &operator+=(const ObUpdateMergeStatusTime &o)
{
check_merge_progress_us_ += o.check_merge_progress_us_;
tablet_validator_us_ += o.tablet_validator_us_;
index_validator_us_ += o.index_validator_us_;
cross_cluster_validator_us_ += o.cross_cluster_validator_us_;
update_report_scn_us_ += o.update_report_scn_us_;
write_tablet_checksum_us_ += o.write_tablet_checksum_us_;
return *this;
}
TO_STRING_KV("total_us", get_total_time_us(), K_(check_merge_progress_us),
K_(tablet_validator_us), K_(index_validator_us), K_(cross_cluster_validator_us),
K_(update_report_scn_us), K_(write_tablet_checksum_us));
int64_t check_merge_progress_us_;
int64_t tablet_validator_us_;
int64_t index_validator_us_;
int64_t cross_cluster_validator_us_;
int64_t update_report_scn_us_;
int64_t write_tablet_checksum_us_;
};
struct ObMergeTimeStatistics
{
public:
ObMergeTimeStatistics()
: update_merge_status_us_(), idle_us_(0)
{}
void reset()
{
update_merge_status_us_.reset();
idle_us_ = 0;
}
ObMergeTimeStatistics &operator+=(const ObMergeTimeStatistics &o)
{
update_merge_status_us_ += o.update_merge_status_us_;
idle_us_ += o.idle_us_;
return *this;
}
TO_STRING_KV("total_us", update_merge_status_us_.get_total_time_us() + idle_us_,
K_(update_merge_status_us), K_(idle_us));
ObUpdateMergeStatusTime update_merge_status_us_;
int64_t idle_us_;
};
class ObMajorMergeProgressChecker
{
public:
@ -80,6 +156,9 @@ public:
void set_major_merge_start_time(const int64_t major_merge_start_us);
public:
ObMergeTimeStatistics merge_time_statistics_;
private:
int check_tablet(const share::ObTabletInfo &tablet_info,
const common::hash::ObHashMap<ObTabletID, uint64_t> &tablet_map,

View File

@ -27,6 +27,7 @@
#include "lib/container/ob_se_array_iterator.h"
#include "lib/allocator/page_arena.h"
#include "lib/profile/ob_trace_id.h"
#include "lib/time/ob_time_utility.h"
#include "share/ob_errno.h"
#include "share/config/ob_server_config.h"
#include "share/tablet/ob_tablet_table_iterator.h"
@ -168,6 +169,7 @@ int ObMajorMergeScheduler::try_idle(
const int64_t IMMEDIATE_RETRY_CNT = 3;
int64_t idle_time_us = ori_idle_time_us;
int64_t merger_check_interval = idling_.get_idle_interval_us();
const int64_t start_time_us = ObTimeUtil::current_time();
if (OB_SUCC(work_ret)) {
fail_count_ = 0;
@ -203,6 +205,8 @@ int ObMajorMergeScheduler::try_idle(
} else if (OB_FAIL(idling_.idle(idle_time_us))) {
LOG_WARN("fail to idle", KR(ret), K(idle_time_us));
}
const int64_t cost_time_us = ObTimeUtil::current_time() - start_time_us;
progress_checker_.merge_time_statistics_.idle_us_ = cost_time_us;
return ret;
}
@ -286,6 +290,7 @@ int ObMajorMergeScheduler::do_one_round_major_merge(const int64_t expected_epoch
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
ObMergeTimeStatistics total_merge_time_statistics;
HEAP_VARS_2((ObZoneMergeInfoArray, info_array), (ObGlobalMergeInfo, global_info)) {
LOG_INFO("start to do one round major_merge", K(expected_epoch));
// loop until 'this round major merge finished' or 'epoch changed'
@ -293,6 +298,7 @@ int ObMajorMergeScheduler::do_one_round_major_merge(const int64_t expected_epoch
update_last_run_timestamp();
ObCurTraceId::init(GCONF.self_addr_);
ObZoneArray to_merge_zone;
progress_checker_.merge_time_statistics_.reset();
// Place is_last_merge_complete() to the head of this while loop.
// So as to break this loop at once, when the last merge is complete.
// Otherwise, may run one extra loop that should not run, and thus incur error.
@ -331,8 +337,12 @@ int ObMajorMergeScheduler::do_one_round_major_merge(const int64_t expected_epoch
tmp_ret = OB_SUCCESS;
// treat as is_merging = true, even though last merge complete
check_merge_interval_time(true);
total_merge_time_statistics += progress_checker_.merge_time_statistics_;
LOG_INFO("finish one round of loop in do_one_round_major_merge", K(expected_epoch),
"merge_time_statistics", progress_checker_.merge_time_statistics_);
}
}
LOG_INFO("finish do_one_round_major_merge", K(expected_epoch), K(total_merge_time_statistics));
return ret;
}