diff --git a/src/storage/concurrency_control/ob_multi_version_garbage_collector.cpp b/src/storage/concurrency_control/ob_multi_version_garbage_collector.cpp index ec4916d388..56ce301836 100644 --- a/src/storage/concurrency_control/ob_multi_version_garbage_collector.cpp +++ b/src/storage/concurrency_control/ob_multi_version_garbage_collector.cpp @@ -32,6 +32,7 @@ ObMultiVersionGarbageCollector::ObMultiVersionGarbageCollector() last_study_timestamp_(0), last_refresh_timestamp_(0), last_reclaim_timestamp_(0), + last_sstable_overflow_timestamp_(0), has_error_when_study_(false), refresh_error_too_long_(false), has_error_when_reclaim_(false), @@ -56,6 +57,7 @@ int ObMultiVersionGarbageCollector::init() last_study_timestamp_ = 0; last_refresh_timestamp_ = 0; last_reclaim_timestamp_ = 0; + last_sstable_overflow_timestamp_ = 0; has_error_when_study_ = false; refresh_error_too_long_ = false; has_error_when_reclaim_ = false; @@ -72,6 +74,7 @@ void ObMultiVersionGarbageCollector::cure() last_study_timestamp_ = 0; last_refresh_timestamp_ = 0; last_reclaim_timestamp_ = 0; + last_sstable_overflow_timestamp_ = 0; has_error_when_study_ = false; refresh_error_too_long_ = false; has_error_when_reclaim_ = false; @@ -134,6 +137,7 @@ int ObMultiVersionGarbageCollector::stop() last_study_timestamp_ = 0; last_refresh_timestamp_ = 0; last_reclaim_timestamp_ = 0; + last_sstable_overflow_timestamp_ = 0; has_error_when_study_ = false; refresh_error_too_long_ = false; has_error_when_reclaim_ = false; @@ -536,7 +540,8 @@ int ObMultiVersionGarbageCollector::refresh_() // Step3: cache the reserved snapshot of active txn for future use. // NB: be care of the lower value and maximum value which is not reasonable - decide_reserved_snapshot_version_(collector.get_reserved_snapshot_version()); + decide_reserved_snapshot_version_(collector.get_reserved_snapshot_version(), + collector.get_reserved_snapshot_type()); timeguard.click("decide_reserved_snapshot_"); @@ -561,7 +566,8 @@ void ObMultiVersionGarbageCollector::decide_gc_status_(const ObMultiVersionGCSta } void ObMultiVersionGarbageCollector::decide_reserved_snapshot_version_( - const share::SCN reserved_snapshot) + const share::SCN reserved_snapshot, + const ObMultiVersionSnapshotType reserved_type) { int ret = OB_SUCCESS; @@ -575,12 +581,26 @@ void ObMultiVersionGarbageCollector::decide_reserved_snapshot_version_( // We ignore the reserved snapshot with too late snapshot and report WARN // because there may be servers offline and online suddenly and report a // stale txn version. And we report error for a too too old snapshot. - if ((global_reserved_snapshot_.get_val_for_tx() - - reserved_snapshot.get_val_for_tx()) / 1000 > 100 * 1_min) { + // NB: There may be WRS service which disables the monotonic weak read and + // finally causes the timestamp to go back, so we should ignore it. + if (ObMultiVersionSnapshotType::MIN_UNALLOCATED_WRS == reserved_type + && !transaction::ObWeakReadUtil::enable_monotonic_weak_read(MTL_ID())) { + MVCC_LOG(WARN, "update a smaller reserved snapshot with wrs disable monotonic weak read", + K(ret), KPC(this), K(global_reserved_snapshot_), K(reserved_snapshot)); + } else if (ObMultiVersionSnapshotType::MIN_UNALLOCATED_WRS == reserved_type + && ((global_reserved_snapshot_.get_val_for_tx() - + reserved_snapshot.get_val_for_tx()) / 1000 > + MAX(transaction::ObWeakReadUtil::max_stale_time_for_weak_consistency(MTL_ID()), + 100 * 1_min))) { + MVCC_LOG(ERROR, "update a too too smaller reserved snapshot with wrs!!!", + K(ret), KPC(this), K(global_reserved_snapshot_), K(reserved_snapshot), + K(transaction::ObWeakReadUtil::max_stale_time_for_weak_consistency(MTL_ID()))); + } else if ((global_reserved_snapshot_.get_val_for_tx() - + reserved_snapshot.get_val_for_tx()) / 1000 > 100 * 1_min) { MVCC_LOG(ERROR, "update a too too smaller reserved snapshot!!!", K(ret), KPC(this), K(global_reserved_snapshot_), K(reserved_snapshot)); } else { - MVCC_LOG(WARN, "update a too too smaller reserved snapshot!", K(ret), KPC(this), + MVCC_LOG(WARN, "update a too smaller reserved snapshot!", K(ret), KPC(this), K(global_reserved_snapshot_), K(reserved_snapshot)); } } else { @@ -1131,7 +1151,9 @@ int ObMultiVersionGarbageCollector::is_disk_almost_full_(bool &is_almost_full) is_almost_full = false; const int64_t required_size = 0; - if (OB_FAIL(THE_IO_DEVICE->check_space_full(required_size))) { + // Case1: io device is almost full + if (!is_almost_full + && OB_FAIL(THE_IO_DEVICE->check_space_full(required_size))) { if (OB_SERVER_OUTOF_DISK_SPACE == ret) { ret = OB_SUCCESS; is_almost_full = true; @@ -1141,9 +1163,38 @@ int ObMultiVersionGarbageCollector::is_disk_almost_full_(bool &is_almost_full) } } + // Case2: sstable is overflow during merge + if (!is_almost_full + && is_sstable_overflow_()) { + is_almost_full = true; + MVCC_LOG(WARN, "disk is almost full, we should give up", KPC(this)); + } + return ret; } +void ObMultiVersionGarbageCollector::report_sstable_overflow() +{ + const int64_t current_timestamp = common::ObTimeUtility::current_time(); + ATOMIC_STORE(&last_sstable_overflow_timestamp_, current_timestamp); + MVCC_LOG_RET(WARN, OB_SIZE_OVERFLOW, "sstable is alomost overflow, we should give up", KPC(this)); +} + +bool ObMultiVersionGarbageCollector::is_sstable_overflow_() +{ + bool b_ret = false; + const int64_t current_timestamp = common::ObTimeUtility::current_time(); + const int64_t last_sstable_overflow_timestamp = ATOMIC_LOAD(&last_sstable_overflow_timestamp_); + if (0 != last_sstable_overflow_timestamp + && current_timestamp >= last_sstable_overflow_timestamp + // We currenly think that there may be a disk full problem if there exists + // an sstable overflow error within 5 minutes + && current_timestamp - last_sstable_overflow_timestamp <= 5 * 1_min) { + b_ret = true; + } + return b_ret; +} + ObMultiVersionGCSnapshotCalculator::ObMultiVersionGCSnapshotCalculator() : reserved_snapshot_version_(share::SCN::max_scn()), reserved_snapshot_type_(ObMultiVersionSnapshotType::MIN_SNAPSHOT_TYPE), @@ -1211,6 +1262,11 @@ share::SCN ObMultiVersionGCSnapshotCalculator::get_reserved_snapshot_version() c return reserved_snapshot_version_; } +ObMultiVersionSnapshotType ObMultiVersionGCSnapshotCalculator::get_reserved_snapshot_type() const +{ + return reserved_snapshot_type_; +} + ObMultiVersionGCStatus ObMultiVersionGCSnapshotCalculator::get_status() const { return status_; diff --git a/src/storage/concurrency_control/ob_multi_version_garbage_collector.h b/src/storage/concurrency_control/ob_multi_version_garbage_collector.h index 8cc89a57b4..c8c3a44c10 100644 --- a/src/storage/concurrency_control/ob_multi_version_garbage_collector.h +++ b/src/storage/concurrency_control/ob_multi_version_garbage_collector.h @@ -179,6 +179,7 @@ public: const int64_t create_time, const ObAddr addr); share::SCN get_reserved_snapshot_version() const; + ObMultiVersionSnapshotType get_reserved_snapshot_type() const; ObMultiVersionGCStatus get_status() const; bool is_this_server_disabled() const { return is_this_server_disabled_; } @@ -312,6 +313,9 @@ public: // get_reserved_snapshot_for_active_txn fetch the cached globally reserved // snapshot if updated in time, otherwise max_scn() is used for available share::SCN get_reserved_snapshot_for_active_txn() const; + // report_sstable_overflow marks the last sstable's overflow events and we + // will use it to disable mvcc gc + void report_sstable_overflow(); // is_gc_disabled shows the global gc status of whether the gc is disabled bool is_gc_disabled() const; @@ -319,6 +323,7 @@ public: K_(last_study_timestamp), K_(last_refresh_timestamp), K_(last_reclaim_timestamp), + K_(last_sstable_overflow_timestamp), K_(has_error_when_study), K_(refresh_error_too_long), K_(has_error_when_reclaim), @@ -342,8 +347,10 @@ private: int study_max_committed_txn_version(share::SCN &max_committed_txn_version); int study_min_active_txn_version(share::SCN &min_active_txn_version); int is_disk_almost_full_(bool &is_almost_full); + bool is_sstable_overflow_(); void decide_gc_status_(const ObMultiVersionGCStatus gc_status); - void decide_reserved_snapshot_version_(const share::SCN reserved_snapshot); + void decide_reserved_snapshot_version_(const share::SCN reserved_snapshot, + const ObMultiVersionSnapshotType reserved_type); // ============== for test ================ OB_NOINLINE bool can_report(); @@ -355,6 +362,8 @@ private: int64_t last_study_timestamp_; int64_t last_refresh_timestamp_; int64_t last_reclaim_timestamp_; + // last timestamp sstable reports overflow during merge + int64_t last_sstable_overflow_timestamp_; bool has_error_when_study_; // refresh too long without contacting inner table successfully. // It may be caused by inner table majority crash or network issues. diff --git a/src/storage/tablet/ob_tablet_table_store.cpp b/src/storage/tablet/ob_tablet_table_store.cpp index 5d7eac46a5..43c22c77e6 100644 --- a/src/storage/tablet/ob_tablet_table_store.cpp +++ b/src/storage/tablet/ob_tablet_table_store.cpp @@ -23,6 +23,7 @@ #include "lib/container/ob_array_iterator.h" #include "storage/meta_mem/ob_tablet_pointer.h" #include "storage/ddl/ob_tablet_ddl_kv.h" +#include "storage/concurrency_control/ob_multi_version_garbage_collector.h" using namespace oceanbase; using namespace oceanbase::blocksstable; @@ -1238,9 +1239,11 @@ int ObTabletTableStore::check_ready_for_read() } else if (minor_tables_.count() + 1 > MAX_SSTABLE_CNT_IN_STORAGE) { ret = OB_SIZE_OVERFLOW; LOG_WARN("Too Many sstables in table store", K(ret), KPC(this), KPC(tablet_ptr_)); + MTL(concurrency_control::ObMultiVersionGarbageCollector *)->report_sstable_overflow(); } else if (get_table_count() > ObTabletTableStore::MAX_SSTABLE_CNT) { ret = OB_SIZE_OVERFLOW; LOG_WARN("Too Many sstables, cannot add another sstable any more", K(ret), KPC(this), KPC(tablet_ptr_)); + MTL(concurrency_control::ObMultiVersionGarbageCollector *)->report_sstable_overflow(); ObPartitionMergePolicy::diagnose_table_count_unsafe(MAJOR_MERGE, *tablet_ptr_); } else if (minor_tables_.empty()) { is_ready_for_read_ = true;