fix checkpoint_mgr some problems

This commit is contained in:
obdev
2023-01-10 10:11:54 +00:00
committed by ob-robot
parent 2e53238aa9
commit 15a782fe2b
9 changed files with 113 additions and 37 deletions

View File

@ -289,11 +289,11 @@ WAIT_EVENT_DEF(TENANT_MEM_USAGE_LOCK_WAIT, 15244, "latch: tenant memory usage lo
WAIT_EVENT_DEF(TX_TABLE_LOCK_WAIT, 15245, "rwlock: tx table lock wait", "address", "number", "tries", CONCURRENCY, "rwlock: tx table lock wait", true)
WAIT_EVENT_DEF(MEMTABLE_STAT_LOCK_WAIT, 15246, "spinlock: memtable stat lock wait", "address", "number", "tries", CONCURRENCY, "spinlock: memtable stat lock wait", true)
WAIT_EVENT_DEF(DEADLOCK_DETECT_LOCK_WAIT, 15247, "spinlock: deadlock detect lock wait", "address", "number", "tries", CONCURRENCY, "spinlock: deadlock detect lock wait", true)
WAIT_EVENT_DEF(CLOG_CKPT_RWLOCK_WAIT, 15248, "rwlock: clog checkpoint rwlock wait", "address", "number", "tries", CONCURRENCY, "rwlock: clog checkpoint rwlock wait", true)
//WAIT_EVENT_DEF(BACKUP_DATA_SERVICE_COND_WAIT, 15248, "backup data service condition wait", "address", "", "", CONCURRENCY, "backup data service condition wait", true) used by backup
//WAIT_EVENT_DEF(BACKUP_CLEAN_SERVICE_COND_WAIT, 15249, "backup clean service condition wait", "address", "", "", CONCURRENCY, "backup clean service condition wait", true)
//WAIT_EVENT_DEF(BACKUP_ARCHIVE_SERVICE_COND_WAIT, 15250, "backup archive service condition wait", "address", "", "", CONCURRENCY, "backup archive service condition wait", true)
WAIT_EVENT_DEF(SRS_LOCK_WAIT, 15251, "latch: srs lock wait", "address", "number", "tries", CONCURRENCY, "latch: srs lock wait", true)
WAIT_EVENT_DEF(CLOG_CKPT_RWLOCK_WAIT, 15252, "rwlock: clog checkpoint rwlock wait", "address", "number", "tries", CONCURRENCY, "rwlock: clog checkpoint rwlock wait", true)
//transaction
WAIT_EVENT_DEF(END_TRANS_WAIT, 16001, "wait end trans", "rollback", "trans_hash_value", "participant_count", COMMIT,"wait end trans", false)

View File

@ -104,7 +104,7 @@ public:
{}
~TestMemtable()
{
ObFreezeCheckpoint::remove_from_data_checkpoint(true);
ObFreezeCheckpoint::remove_from_data_checkpoint();
}
void set_rec_scn(share::SCN rec_scn)

View File

@ -91,7 +91,7 @@ public:
{}
~TestMemtable()
{
ObFreezeCheckpoint::remove_from_data_checkpoint(true);
ObFreezeCheckpoint::remove_from_data_checkpoint();
}
void set_rec_scn(share::SCN rec_scn)

View File

@ -122,6 +122,23 @@ ObFreezeCheckpoint *ObCheckpointDList::get_first_greater(const SCN rec_scn)
return cur;
}
int ObCheckpointDList::get_need_freeze_checkpoints(const SCN rec_scn,
ObIArray<ObFreezeCheckpoint*> &freeze_checkpoints)
{
int ret = OB_SUCCESS;
ObFreezeCheckpoint *head = checkpoint_list_.get_header();
ObFreezeCheckpoint *cur = head->get_next();
while (cur != head && cur->get_rec_scn() <= rec_scn) {
if (OB_FAIL(freeze_checkpoints.push_back(cur))) {
STORAGE_LOG(WARN, "push_back into freeze_checkpoints failed");
break;
} else {
cur = cur->get_next();
}
}
return ret;
}
int ObCheckpointDList::get_freezecheckpoint_info(
ObIArray<checkpoint::ObFreezeCheckpointVTInfo> &freeze_checkpoint_array)
{
@ -191,7 +208,6 @@ int ObDataCheckpoint::safe_to_destroy(bool &is_safe_destroy)
ls_frozen_list_.reset();
active_list_.reset();
prepare_list_.reset();
ls_ = nullptr;
if (OB_FAIL(ret)) {
is_safe_destroy = false;
@ -228,12 +244,9 @@ int ObDataCheckpoint::flush(SCN recycle_scn, bool need_freeze)
{
int ret = OB_SUCCESS;
if (need_freeze) {
if (get_rec_scn() <= recycle_scn) {
if (!is_flushing() &&
!has_prepared_flush_checkpoint() &&
OB_FAIL(ls_->logstream_freeze())) {
STORAGE_LOG(WARN, "minor freeze failed", K(ret), K(ls_->get_ls_id()));
}
if (OB_FAIL(freeze_base_on_needs_(recycle_scn))) {
STORAGE_LOG(WARN, "freeze_base_on_needs failed",
K(ret), K(ls_->get_ls_id()), K(recycle_scn));
}
} else if (OB_FAIL(traversal_flush_())) {
STORAGE_LOG(WARN, "traversal_flush failed", K(ret), K(ls_->get_ls_id()));
@ -248,7 +261,7 @@ int ObDataCheckpoint::ls_freeze(SCN rec_scn)
ObCheckPointService *checkpoint_srv = MTL(ObCheckPointService *);
set_ls_freeze_finished_(false);
if (OB_FAIL(checkpoint_srv->add_ls_freeze_task(this, rec_scn))) {
STORAGE_LOG(ERROR, "ls_freeze add task failed", K(ret));
STORAGE_LOG(WARN, "ls_freeze add task failed", K(ret));
set_ls_freeze_finished_(true);
}
return ret;
@ -536,11 +549,6 @@ int ObDataCheckpoint::unlink_from_prepare(ObFreezeCheckpoint *ob_freeze_checkpoi
return ret;
}
bool ObDataCheckpoint::has_prepared_flush_checkpoint()
{
return !prepare_list_.is_empty();
}
int ObDataCheckpoint::get_freezecheckpoint_info(
ObIArray<checkpoint::ObFreezeCheckpointVTInfo> &freeze_checkpoint_array)
{
@ -704,6 +712,67 @@ int ObDataCheckpoint::transfer_from_active_to_prepare_(ObFreezeCheckpoint *ob_fr
return ret;
}
int ObDataCheckpoint::get_need_flush_tablets_(const share::SCN recycle_scn,
ObIArray<ObTabletID> &flush_tablets)
{
int ret = OB_SUCCESS;
ObSpinLockGuard guard(lock_);
ObSArray<ObFreezeCheckpoint*> need_freeze_checkpoints;
if (OB_FAIL(new_create_list_.get_need_freeze_checkpoints(
recycle_scn, need_freeze_checkpoints))) {
STORAGE_LOG(WARN, "get_need_freeze_checkpoints failed", K(ret));
} else if (OB_FAIL(active_list_.get_need_freeze_checkpoints(
recycle_scn, need_freeze_checkpoints))) {
STORAGE_LOG(WARN, "get_need_freeze_checkpoints failed", K(ret));
} else {
for (int i = 0; OB_SUCC(ret) && i < need_freeze_checkpoints.count(); i++) {
if (OB_FAIL(flush_tablets.push_back(
need_freeze_checkpoints[i]->get_tablet_id()))) {
STORAGE_LOG(WARN, "get_flush_tablets failed", K(ret));
}
}
}
return ret;
}
int ObDataCheckpoint::freeze_base_on_needs_(share::SCN recycle_scn)
{
int ret = OB_SUCCESS;
if (get_rec_scn() <= recycle_scn) {
if (!is_flushing() && prepare_list_.is_empty()) {
int64_t wait_flush_num =
new_create_list_.checkpoint_list_.get_size()
+ active_list_.checkpoint_list_.get_size();
bool logstream_freeze = true;
ObSArray<ObTabletID> need_flush_tablets;
if (wait_flush_num > MAX_FREEZE_CHECKPOINT_NUM) {
if (OB_FAIL(get_need_flush_tablets_(recycle_scn, need_flush_tablets))) {
// do nothing
} else {
int need_flush_num = need_flush_tablets.count();
logstream_freeze =
need_flush_num * 100 / wait_flush_num <= TABLET_FREEZE_PERCENT;
}
}
if (logstream_freeze) {
if (OB_FAIL(ls_->logstream_freeze())) {
STORAGE_LOG(WARN, "minor freeze failed", K(ret), K(ls_->get_ls_id()));
}
} else {
for (int i = 0; OB_SUCC(ret) && i < need_flush_tablets.count(); i++) {
if (OB_FAIL(ls_->tablet_freeze(need_flush_tablets[i]))) {
STORAGE_LOG(WARN, "tablet freeze failed",
K(ret), K(ls_->get_ls_id()), K(need_flush_tablets[i]));
}
}
}
}
}
return ret;
}
} // namespace checkpoint
} // namespace storage
} // namespace oceanbase

View File

@ -44,6 +44,8 @@ struct ObCheckpointDList
ObFreezeCheckpoint *get_first_greater(const share::SCN rec_scn);
int get_freezecheckpoint_info(
ObIArray<checkpoint::ObFreezeCheckpointVTInfo> &freeze_checkpoint_array);
int get_need_freeze_checkpoints(const share::SCN rec_scn,
ObIArray<ObFreezeCheckpoint*> &freeze_checkpoints);
ObDList<ObFreezeCheckpoint> checkpoint_list_;
};
@ -82,7 +84,7 @@ public:
ls_frozen_list_lock_(common::ObLatchIds::CLOG_CKPT_LOCK),
ls_freeze_finished_(true)
{}
~ObDataCheckpoint() {}
~ObDataCheckpoint() { ls_ = nullptr; }
// used for virtual table
static const uint64_t LS_DATA_CHECKPOINT_TABLET_ID = 40000;
@ -116,8 +118,6 @@ public:
bool is_flushing() const;
bool has_prepared_flush_checkpoint();
bool is_empty();
private:
@ -146,8 +146,18 @@ private:
void ls_frozen_to_prepare_(int64_t &last_time);
void print_list_(ObCheckpointDList &list);
void set_ls_freeze_finished_(bool is_finished);
int get_need_flush_tablets_(const share::SCN recycle_scn,
common::ObIArray<ObTabletID> &flush_tablets);
int freeze_base_on_needs_(share::SCN recycle_scn);
static const int64_t LOOP_TRAVERSAL_INTERVAL_US = 1000L * 50; // 50ms
// when freeze memtable base on needs less than TABLET_FREEZE_PERCENT,
// tablet_freeze will be instead of logstream_freeze
// to relieve pressure for mini minor merge
static const int64_t TABLET_FREEZE_PERCENT = 10;
// when nums of memtables that wait to freeze less than MAX_FREEZE_CHECKPOINT_NUM.
// logstream_freeze without get_need_flush_tablets
static const int64_t MAX_FREEZE_CHECKPOINT_NUM = 50;
bool is_inited_;
// avoid leaving out ObFreezeCheckpoint that unlinking and not in any list
common::ObSpinLock lock_;

View File

@ -22,24 +22,21 @@ namespace storage
namespace checkpoint
{
// need_lock_data_checkpoint = false when release head empty memtable
void ObFreezeCheckpoint::remove_from_data_checkpoint(bool need_lock_data_checkpoint)
void ObFreezeCheckpoint::remove_from_data_checkpoint()
{
if (OUT != location_) {
int ret = OB_SUCCESS;
if (!need_lock_data_checkpoint) {
if(OB_FAIL(unlink_())) {
STORAGE_LOG(WARN, "ObFreezeCheckpoint Unlink From DataCheckpoint Failed", K(ret));
}
} else {
ObSpinLockGuard ls_frozen_list_guard(data_checkpoint_->ls_frozen_list_lock_);
ObSpinLockGuard guard(data_checkpoint_->lock_);
int ret = OB_SUCCESS;
if(OB_FAIL(unlink_())) {
STORAGE_LOG(WARN, "ObFreezeCheckpoint Unlink From DataCheckpoint Failed", K(ret));
}
}
}
void ObFreezeCheckpoint::reset()
{
data_checkpoint_ = nullptr;
}
}
int ObFreezeCheckpoint::unlink_()

View File

@ -86,7 +86,8 @@ class ObFreezeCheckpoint : public common::ObDLinkBase<ObFreezeCheckpoint>
public:
ObFreezeCheckpoint() : location_(OUT), data_checkpoint_(nullptr) {}
virtual ~ObFreezeCheckpoint() {}
virtual void remove_from_data_checkpoint(bool need_lock_data_checkpoint = true);
void reset();
virtual void remove_from_data_checkpoint();
virtual share::SCN get_rec_scn() = 0;
virtual int flush(share::ObLSID ls_id) = 0;
// judge rec_scn of the checkpoint unit won't get smaller

View File

@ -231,6 +231,7 @@ void ObMemtable::destroy()
}
}
ObITable::reset();
ObFreezeCheckpoint::reset();
mvcc_engine_.destroy();
time_guard.click();
query_engine_.destroy();

View File

@ -62,7 +62,7 @@ void ObTabletMemtableMgr::destroy()
STORAGE_LOG(WARN, "memtable is nullptr", K(ret), KP(imemtable), K(pos));
} else if (imemtable->is_data_memtable()) {
memtable::ObMemtable *memtable = static_cast<memtable::ObMemtable *>(imemtable);
memtable->remove_from_data_checkpoint(true);
memtable->remove_from_data_checkpoint();
}
}
reset_tables();
@ -627,9 +627,7 @@ int ObTabletMemtableMgr::release_head_memtable_(memtable::ObIMemtable *imemtable
if (!memtable->is_empty()) {
memtable->set_read_barrier();
}
// if empty memtable in prepare_list in data_checkpoint
// have been removed from data_checkpoint in release_head_empty_memtable
memtable->remove_from_data_checkpoint(true);
memtable->remove_from_data_checkpoint();
memtable->set_is_flushed();
memtable->set_freeze_state(ObMemtableFreezeState::RELEASED);
release_head_memtable();