Add protection for too much sstable and wrs
This commit is contained in:
parent
caf2783ed1
commit
d2f227903e
@ -32,6 +32,7 @@ ObMultiVersionGarbageCollector::ObMultiVersionGarbageCollector()
|
||||
last_study_timestamp_(0),
|
||||
last_refresh_timestamp_(0),
|
||||
last_reclaim_timestamp_(0),
|
||||
last_sstable_overflow_timestamp_(0),
|
||||
has_error_when_study_(false),
|
||||
refresh_error_too_long_(false),
|
||||
has_error_when_reclaim_(false),
|
||||
@ -56,6 +57,7 @@ int ObMultiVersionGarbageCollector::init()
|
||||
last_study_timestamp_ = 0;
|
||||
last_refresh_timestamp_ = 0;
|
||||
last_reclaim_timestamp_ = 0;
|
||||
last_sstable_overflow_timestamp_ = 0;
|
||||
has_error_when_study_ = false;
|
||||
refresh_error_too_long_ = false;
|
||||
has_error_when_reclaim_ = false;
|
||||
@ -72,6 +74,7 @@ void ObMultiVersionGarbageCollector::cure()
|
||||
last_study_timestamp_ = 0;
|
||||
last_refresh_timestamp_ = 0;
|
||||
last_reclaim_timestamp_ = 0;
|
||||
last_sstable_overflow_timestamp_ = 0;
|
||||
has_error_when_study_ = false;
|
||||
refresh_error_too_long_ = false;
|
||||
has_error_when_reclaim_ = false;
|
||||
@ -134,6 +137,7 @@ int ObMultiVersionGarbageCollector::stop()
|
||||
last_study_timestamp_ = 0;
|
||||
last_refresh_timestamp_ = 0;
|
||||
last_reclaim_timestamp_ = 0;
|
||||
last_sstable_overflow_timestamp_ = 0;
|
||||
has_error_when_study_ = false;
|
||||
refresh_error_too_long_ = false;
|
||||
has_error_when_reclaim_ = false;
|
||||
@ -536,7 +540,8 @@ int ObMultiVersionGarbageCollector::refresh_()
|
||||
|
||||
// Step3: cache the reserved snapshot of active txn for future use.
|
||||
// NB: be care of the lower value and maximum value which is not reasonable
|
||||
decide_reserved_snapshot_version_(collector.get_reserved_snapshot_version());
|
||||
decide_reserved_snapshot_version_(collector.get_reserved_snapshot_version(),
|
||||
collector.get_reserved_snapshot_type());
|
||||
|
||||
timeguard.click("decide_reserved_snapshot_");
|
||||
|
||||
@ -561,7 +566,8 @@ void ObMultiVersionGarbageCollector::decide_gc_status_(const ObMultiVersionGCSta
|
||||
}
|
||||
|
||||
void ObMultiVersionGarbageCollector::decide_reserved_snapshot_version_(
|
||||
const share::SCN reserved_snapshot)
|
||||
const share::SCN reserved_snapshot,
|
||||
const ObMultiVersionSnapshotType reserved_type)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
@ -575,12 +581,26 @@ void ObMultiVersionGarbageCollector::decide_reserved_snapshot_version_(
|
||||
// We ignore the reserved snapshot with too late snapshot and report WARN
|
||||
// because there may be servers offline and online suddenly and report a
|
||||
// stale txn version. And we report error for a too too old snapshot.
|
||||
if ((global_reserved_snapshot_.get_val_for_tx() -
|
||||
reserved_snapshot.get_val_for_tx()) / 1000 > 100 * 1_min) {
|
||||
// NB: There may be WRS service which disables the monotonic weak read and
|
||||
// finally causes the timestamp to go back, so we should ignore it.
|
||||
if (ObMultiVersionSnapshotType::MIN_UNALLOCATED_WRS == reserved_type
|
||||
&& !transaction::ObWeakReadUtil::enable_monotonic_weak_read(MTL_ID())) {
|
||||
MVCC_LOG(WARN, "update a smaller reserved snapshot with wrs disable monotonic weak read",
|
||||
K(ret), KPC(this), K(global_reserved_snapshot_), K(reserved_snapshot));
|
||||
} else if (ObMultiVersionSnapshotType::MIN_UNALLOCATED_WRS == reserved_type
|
||||
&& ((global_reserved_snapshot_.get_val_for_tx() -
|
||||
reserved_snapshot.get_val_for_tx()) / 1000 >
|
||||
MAX(transaction::ObWeakReadUtil::max_stale_time_for_weak_consistency(MTL_ID()),
|
||||
100 * 1_min))) {
|
||||
MVCC_LOG(ERROR, "update a too too smaller reserved snapshot with wrs!!!",
|
||||
K(ret), KPC(this), K(global_reserved_snapshot_), K(reserved_snapshot),
|
||||
K(transaction::ObWeakReadUtil::max_stale_time_for_weak_consistency(MTL_ID())));
|
||||
} else if ((global_reserved_snapshot_.get_val_for_tx() -
|
||||
reserved_snapshot.get_val_for_tx()) / 1000 > 100 * 1_min) {
|
||||
MVCC_LOG(ERROR, "update a too too smaller reserved snapshot!!!", K(ret), KPC(this),
|
||||
K(global_reserved_snapshot_), K(reserved_snapshot));
|
||||
} else {
|
||||
MVCC_LOG(WARN, "update a too too smaller reserved snapshot!", K(ret), KPC(this),
|
||||
MVCC_LOG(WARN, "update a too smaller reserved snapshot!", K(ret), KPC(this),
|
||||
K(global_reserved_snapshot_), K(reserved_snapshot));
|
||||
}
|
||||
} else {
|
||||
@ -1131,7 +1151,9 @@ int ObMultiVersionGarbageCollector::is_disk_almost_full_(bool &is_almost_full)
|
||||
is_almost_full = false;
|
||||
const int64_t required_size = 0;
|
||||
|
||||
if (OB_FAIL(THE_IO_DEVICE->check_space_full(required_size))) {
|
||||
// Case1: io device is almost full
|
||||
if (!is_almost_full
|
||||
&& OB_FAIL(THE_IO_DEVICE->check_space_full(required_size))) {
|
||||
if (OB_SERVER_OUTOF_DISK_SPACE == ret) {
|
||||
ret = OB_SUCCESS;
|
||||
is_almost_full = true;
|
||||
@ -1141,9 +1163,38 @@ int ObMultiVersionGarbageCollector::is_disk_almost_full_(bool &is_almost_full)
|
||||
}
|
||||
}
|
||||
|
||||
// Case2: sstable is overflow during merge
|
||||
if (!is_almost_full
|
||||
&& is_sstable_overflow_()) {
|
||||
is_almost_full = true;
|
||||
MVCC_LOG(WARN, "disk is almost full, we should give up", KPC(this));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ObMultiVersionGarbageCollector::report_sstable_overflow()
|
||||
{
|
||||
const int64_t current_timestamp = common::ObTimeUtility::current_time();
|
||||
ATOMIC_STORE(&last_sstable_overflow_timestamp_, current_timestamp);
|
||||
MVCC_LOG_RET(WARN, OB_SIZE_OVERFLOW, "sstable is alomost overflow, we should give up", KPC(this));
|
||||
}
|
||||
|
||||
bool ObMultiVersionGarbageCollector::is_sstable_overflow_()
|
||||
{
|
||||
bool b_ret = false;
|
||||
const int64_t current_timestamp = common::ObTimeUtility::current_time();
|
||||
const int64_t last_sstable_overflow_timestamp = ATOMIC_LOAD(&last_sstable_overflow_timestamp_);
|
||||
if (0 != last_sstable_overflow_timestamp
|
||||
&& current_timestamp >= last_sstable_overflow_timestamp
|
||||
// We currenly think that there may be a disk full problem if there exists
|
||||
// an sstable overflow error within 5 minutes
|
||||
&& current_timestamp - last_sstable_overflow_timestamp <= 5 * 1_min) {
|
||||
b_ret = true;
|
||||
}
|
||||
return b_ret;
|
||||
}
|
||||
|
||||
ObMultiVersionGCSnapshotCalculator::ObMultiVersionGCSnapshotCalculator()
|
||||
: reserved_snapshot_version_(share::SCN::max_scn()),
|
||||
reserved_snapshot_type_(ObMultiVersionSnapshotType::MIN_SNAPSHOT_TYPE),
|
||||
@ -1211,6 +1262,11 @@ share::SCN ObMultiVersionGCSnapshotCalculator::get_reserved_snapshot_version() c
|
||||
return reserved_snapshot_version_;
|
||||
}
|
||||
|
||||
ObMultiVersionSnapshotType ObMultiVersionGCSnapshotCalculator::get_reserved_snapshot_type() const
|
||||
{
|
||||
return reserved_snapshot_type_;
|
||||
}
|
||||
|
||||
ObMultiVersionGCStatus ObMultiVersionGCSnapshotCalculator::get_status() const
|
||||
{
|
||||
return status_;
|
||||
|
@ -179,6 +179,7 @@ public:
|
||||
const int64_t create_time,
|
||||
const ObAddr addr);
|
||||
share::SCN get_reserved_snapshot_version() const;
|
||||
ObMultiVersionSnapshotType get_reserved_snapshot_type() const;
|
||||
ObMultiVersionGCStatus get_status() const;
|
||||
bool is_this_server_disabled() const
|
||||
{ return is_this_server_disabled_; }
|
||||
@ -312,6 +313,9 @@ public:
|
||||
// get_reserved_snapshot_for_active_txn fetch the cached globally reserved
|
||||
// snapshot if updated in time, otherwise max_scn() is used for available
|
||||
share::SCN get_reserved_snapshot_for_active_txn() const;
|
||||
// report_sstable_overflow marks the last sstable's overflow events and we
|
||||
// will use it to disable mvcc gc
|
||||
void report_sstable_overflow();
|
||||
// is_gc_disabled shows the global gc status of whether the gc is disabled
|
||||
bool is_gc_disabled() const;
|
||||
|
||||
@ -319,6 +323,7 @@ public:
|
||||
K_(last_study_timestamp),
|
||||
K_(last_refresh_timestamp),
|
||||
K_(last_reclaim_timestamp),
|
||||
K_(last_sstable_overflow_timestamp),
|
||||
K_(has_error_when_study),
|
||||
K_(refresh_error_too_long),
|
||||
K_(has_error_when_reclaim),
|
||||
@ -342,8 +347,10 @@ private:
|
||||
int study_max_committed_txn_version(share::SCN &max_committed_txn_version);
|
||||
int study_min_active_txn_version(share::SCN &min_active_txn_version);
|
||||
int is_disk_almost_full_(bool &is_almost_full);
|
||||
bool is_sstable_overflow_();
|
||||
void decide_gc_status_(const ObMultiVersionGCStatus gc_status);
|
||||
void decide_reserved_snapshot_version_(const share::SCN reserved_snapshot);
|
||||
void decide_reserved_snapshot_version_(const share::SCN reserved_snapshot,
|
||||
const ObMultiVersionSnapshotType reserved_type);
|
||||
|
||||
// ============== for test ================
|
||||
OB_NOINLINE bool can_report();
|
||||
@ -355,6 +362,8 @@ private:
|
||||
int64_t last_study_timestamp_;
|
||||
int64_t last_refresh_timestamp_;
|
||||
int64_t last_reclaim_timestamp_;
|
||||
// last timestamp sstable reports overflow during merge
|
||||
int64_t last_sstable_overflow_timestamp_;
|
||||
bool has_error_when_study_;
|
||||
// refresh too long without contacting inner table successfully.
|
||||
// It may be caused by inner table majority crash or network issues.
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "lib/container/ob_array_iterator.h"
|
||||
#include "storage/meta_mem/ob_tablet_pointer.h"
|
||||
#include "storage/ddl/ob_tablet_ddl_kv.h"
|
||||
#include "storage/concurrency_control/ob_multi_version_garbage_collector.h"
|
||||
|
||||
using namespace oceanbase;
|
||||
using namespace oceanbase::blocksstable;
|
||||
@ -1238,9 +1239,11 @@ int ObTabletTableStore::check_ready_for_read()
|
||||
} else if (minor_tables_.count() + 1 > MAX_SSTABLE_CNT_IN_STORAGE) {
|
||||
ret = OB_SIZE_OVERFLOW;
|
||||
LOG_WARN("Too Many sstables in table store", K(ret), KPC(this), KPC(tablet_ptr_));
|
||||
MTL(concurrency_control::ObMultiVersionGarbageCollector *)->report_sstable_overflow();
|
||||
} else if (get_table_count() > ObTabletTableStore::MAX_SSTABLE_CNT) {
|
||||
ret = OB_SIZE_OVERFLOW;
|
||||
LOG_WARN("Too Many sstables, cannot add another sstable any more", K(ret), KPC(this), KPC(tablet_ptr_));
|
||||
MTL(concurrency_control::ObMultiVersionGarbageCollector *)->report_sstable_overflow();
|
||||
ObPartitionMergePolicy::diagnose_table_count_unsafe(MAJOR_MERGE, *tablet_ptr_);
|
||||
} else if (minor_tables_.empty()) {
|
||||
is_ready_for_read_ = true;
|
||||
|
Loading…
x
Reference in New Issue
Block a user