add some log for bug detect

This commit is contained in:
obdev 2023-04-04 01:44:49 +00:00 committed by ob-robot
parent 8b00a6d6be
commit 0c2d063f4e
5 changed files with 71 additions and 62 deletions

View File

@ -383,7 +383,7 @@ void ObDataCheckpoint::ls_frozen_to_active_(int64_t &last_time)
{
int64_t read_lock = LSLOCKALL - LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_ls(ls_->lock_, read_lock, write_lock);
ObLSLockGuard lock_ls(ls_, ls_->lock_, read_lock, write_lock);
if (OB_UNLIKELY(ls_->is_stopped_)) {
ret = OB_NOT_RUNNING;
@ -445,7 +445,7 @@ void ObDataCheckpoint::ls_frozen_to_prepare_(int64_t &last_time)
{
int64_t read_lock = LSLOCKALL - LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_ls(ls_->lock_, read_lock, write_lock);
ObLSLockGuard lock_ls(ls_, ls_->lock_, read_lock, write_lock);
if (OB_UNLIKELY(ls_->is_stopped_)) {
ret = OB_NOT_RUNNING;

View File

@ -540,7 +540,7 @@ int ObFreezer::ls_freeze_task()
if (need_resubmit_log()) {
int64_t read_lock = LSLOCKALL - LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_ls(ls_->lock_, read_lock, write_lock);
ObLSLockGuard lock_ls(ls_, ls_->lock_, read_lock, write_lock);
if (OB_FAIL(check_ls_state())) {
} else {
submit_log_for_freeze();
@ -782,7 +782,7 @@ int ObFreezer::tablet_freeze_task(ObTableHandleV2 handle)
} else {
int64_t read_lock = LSLOCKALL - LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_ls(ls_->lock_, read_lock, write_lock);
ObLSLockGuard lock_ls(ls_, ls_->lock_, read_lock, write_lock);
if (OB_FAIL(check_ls_state())) {
} else if (OB_FAIL(memtable->finish_freeze())) {
TRANS_LOG(ERROR, "[Freezer] memtable cannot be flushed",
@ -813,7 +813,7 @@ int ObFreezer::wait_memtable_ready_for_flush_with_ls_lock(memtable::ObMemtable *
do {
int64_t read_lock = LSLOCKALL - LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_ls(ls_->lock_, read_lock, write_lock);
ObLSLockGuard lock_ls(ls_, ls_->lock_, read_lock, write_lock);
if (OB_FAIL(check_ls_state())) {
} else if (!memtable->ready_for_flush()) {

View File

@ -440,7 +440,7 @@ int ObLS::stop()
int64_t read_lock = 0;
int64_t write_lock = LSLOCKALL;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
@ -490,7 +490,7 @@ void ObLS::wait()
do {
retry_times++;
{
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
ls_migration_handler_.wait(wait_finished);
}
if (!wait_finished) {
@ -756,7 +756,7 @@ int ObLS::offline()
do {
retry_times++;
{
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
// only follower can do this.
if (OB_FAIL(offline_())) {
LOG_WARN("ls offline failed", K(ret), K(ls_meta_));
@ -820,7 +820,7 @@ int ObLS::online()
int ret = OB_SUCCESS;
int64_t read_lock = 0;
int64_t write_lock = LSLOCKALL;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -857,7 +857,7 @@ int ObLS::enable_for_restore()
int ret = OB_SUCCESS;
int64_t read_lock = 0;
int64_t write_lock = LSLOCKALL;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -954,7 +954,7 @@ int ObLS::get_replica_status(ObReplicaStatus &replica_status)
ObMigrationStatus migration_status;
int64_t read_lock = LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -984,7 +984,7 @@ int ObLS::get_ls_role(ObRole &role)
} else {
int64_t read_lock = LSLOCKLOG;
int64_t write_lock = 0;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (OB_FAIL(log_handler_.get_role(role, proposal_id))) {
LOG_WARN("get ls role failed", K(ret), KPC(this));
}
@ -1086,7 +1086,7 @@ int ObLS::block_tx_start()
} else {
int64_t read_lock = 0;
int64_t write_lock = LSLOCKSTORAGE | LSLOCKTX;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
// protect with lock_ to make sure there is no tablet transfer in process doing.
// transfer in must use this lock too.
if (OB_FAIL(block_tx())) {
@ -1105,7 +1105,7 @@ int ObLS::tablet_transfer_in(const ObTabletID &tablet_id)
} else {
int64_t read_lock = 0;
int64_t write_lock = LSLOCKSTORAGE;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
// make sure there is no block on going.
logservice::LSGCState gc_state = logservice::LSGCState::INVALID_LS_GC_STATE;
if (OB_FAIL(get_gc_state(gc_state))) {
@ -1128,7 +1128,7 @@ int ObLS::update_tablet_table_store(
int ret = OB_SUCCESS;
const int64_t read_lock = LSLOCKLOGMETA;
const int64_t write_lock = 0;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -1156,7 +1156,7 @@ int ObLS::update_tablet_table_store(
int ret = OB_SUCCESS;
const int64_t read_lock = LSLOCKLOGMETA;
const int64_t write_lock = 0;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls hasn't been inited", K(ret));
@ -1182,7 +1182,7 @@ int ObLS::build_ha_tablet_new_table_store(
int ret = OB_SUCCESS;
int64_t read_lock = LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
const int64_t rebuild_seq = ls_meta_.get_rebuild_seq();
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
@ -1209,7 +1209,7 @@ int ObLS::finish_slog_replay()
ObMigrationStatus new_migration_status;
int64_t read_lock = 0;
int64_t write_lock = LSLOCKALL - LSLOCKLOGMETA;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (OB_FAIL(get_migration_status(current_migration_status))) {
LOG_WARN("failed to get migration status", K(ret), KPC(this));
@ -1325,7 +1325,7 @@ int ObLS::logstream_freeze(bool is_sync)
{
int64_t read_lock = LSLOCKALL - LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -1357,7 +1357,7 @@ int ObLS::tablet_freeze(const ObTabletID &tablet_id, bool is_sync)
{
int64_t read_lock = LSLOCKALL - LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -1386,7 +1386,7 @@ int ObLS::force_tablet_freeze(const ObTabletID &tablet_id)
int ret = OB_SUCCESS;
int64_t read_lock = LSLOCKALL - LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -1408,7 +1408,7 @@ int ObLS::advance_checkpoint_by_flush(SCN recycle_scn)
{
int64_t read_lock = LSLOCKALL;
int64_t write_lock = 0;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
return checkpoint_executor_.advance_checkpoint_by_flush(recycle_scn);
}
@ -1421,7 +1421,7 @@ int ObLS::get_ls_meta_package_and_tablet_ids(const bool check_archive,
int64_t write_lock = 0;
const bool need_initial_state = false;
ObHALSTabletIDIterator iter(ls_meta_.ls_id_, need_initial_state);
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -1456,7 +1456,7 @@ int ObLS::disable_sync()
int ret = OB_SUCCESS;
int64_t read_lock = LSLOCKLS;
int64_t write_lock = LSLOCKLOG;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -1473,7 +1473,7 @@ int ObLS::enable_replay()
int ret = OB_SUCCESS;
int64_t read_lock = LSLOCKLS;
int64_t write_lock = LSLOCKLOG;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -1504,7 +1504,7 @@ int ObLS::disable_replay()
int ret = OB_SUCCESS;
int64_t read_lock = LSLOCKLS;
int64_t write_lock = LSLOCKLOG;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -1536,7 +1536,7 @@ int ObLS::flush_if_need(const bool need_flush)
int64_t read_lock = LSLOCKALL - LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ls is not inited", K(ret));
@ -1574,7 +1574,7 @@ int ObLS::try_update_uppder_trans_version()
int tmp_ret = OB_SUCCESS;
int64_t read_lock = LSLOCKLOGMETA;
int64_t write_lock = 0;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
@ -1623,7 +1623,7 @@ int ObLS::set_tablet_change_checkpoint_scn(const SCN &scn)
int64_t read_lock = 0;
int64_t write_lock = LSLOCKLOGMETA;
const bool try_lock = true; // the upper layer should deal with try lock fail.
ObLSLockGuard lock_myself(lock_, read_lock, write_lock, try_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock, try_lock);
if (!lock_myself.locked()) {
ret = OB_EAGAIN;
LOG_WARN("try lock failed, please retry later", K(ret), K(ls_meta_));
@ -1710,7 +1710,7 @@ int ObLS::set_migration_status(
share::ObLSRestoreStatus restore_status;
int64_t read_lock = LSLOCKLS;
int64_t write_lock = LSLOCKLOGMETA;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
@ -1748,7 +1748,7 @@ int ObLS::set_restore_status(
ObMigrationStatus migration_status = ObMigrationStatus::OB_MIGRATION_STATUS_MAX;
int64_t read_lock = LSLOCKLS;
int64_t write_lock = LSLOCKLOGMETA;
ObLSLockGuard lock_myself(lock_, read_lock, write_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
@ -1784,7 +1784,7 @@ int ObLS::set_ls_rebuild()
int64_t read_lock = LSLOCKLS;
int64_t write_lock = LSLOCKLOGMETA;
const bool try_lock = true; // the upper layer should deal with try lock fail.
ObLSLockGuard lock_myself(lock_, read_lock, write_lock, try_lock);
ObLSLockGuard lock_myself(this, lock_, read_lock, write_lock, try_lock);
if (!lock_myself.locked()) {
ret = OB_EAGAIN;

View File

@ -10,6 +10,8 @@
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX STORAGE
#include "ob_ls_lock.h"
#include "lib/stat/ob_latch_define.h"
@ -33,7 +35,7 @@ ObLSLock::~ObLSLock()
{
}
int64_t ObLSLock::lock(int64_t hold, int64_t change)
int64_t ObLSLock::lock(const ObLS *ls, int64_t hold, int64_t change)
{
int ret = OB_SUCCESS; // tmp_ret, rewrite every time
int64_t pos = 0;
@ -41,17 +43,17 @@ int64_t ObLSLock::lock(int64_t hold, int64_t change)
hold &= LSLOCKMASK;
change &= LSLOCKMASK;
ObTimeGuard tg("ObLSLock::lock", 100 * 1000);
ObTimeGuard tg("ObLSLock::lock", LOCK_CONFLICT_WARN_TIME);
while (hold | change) {
if (change & 1) {
if (OB_FAIL(locks_[pos].wrlock(ObLatchIds::LS_LOCK))) {
STORAGE_LOG(ERROR, "wrlock error", K(ret), K(pos));
LOG_ERROR("wrlock error", K(ret), K(pos));
} else {
res |= 1L << pos;
}
} else if (hold & 1) {
if (OB_FAIL(locks_[pos].rdlock(ObLatchIds::LS_LOCK))) {
STORAGE_LOG(ERROR, "rdlock error", K(ret), K(pos));
LOG_ERROR("rdlock error", K(ret), K(pos));
} else {
res |= 1L << pos;
}
@ -63,11 +65,13 @@ int64_t ObLSLock::lock(int64_t hold, int64_t change)
change >>= 1;
pos++;
}
if (tg.get_diff() >= LOCK_CONFLICT_WARN_TIME) {
LOG_WARN("get lock cost too much time", KP(ls), "ls_id", OB_ISNULL(ls) ? ObLSID(0) : ls->get_ls_id());
}
return res;
}
int64_t ObLSLock::try_lock(int64_t hold, int64_t change)
int64_t ObLSLock::try_lock(const ObLS *ls, int64_t hold, int64_t change)
{
int ret = OB_SUCCESS; // tmp_ret, rewrite every time
int64_t pos = 0;
@ -75,7 +79,7 @@ int64_t ObLSLock::try_lock(int64_t hold, int64_t change)
hold &= LSLOCKMASK;
change &= LSLOCKMASK;
ObTimeGuard tg("ObLSLock::try_lock", 100 * 1000);
ObTimeGuard tg("ObLSLock::try_lock", LOCK_CONFLICT_WARN_TIME);
while (hold | change) {
if (change & 1) {
if (OB_FAIL(locks_[pos].try_wrlock(ObLatchIds::LS_LOCK))) {
@ -97,6 +101,9 @@ int64_t ObLSLock::try_lock(int64_t hold, int64_t change)
change >>= 1;
pos++;
}
if (tg.get_diff() >= LOCK_CONFLICT_WARN_TIME) {
LOG_WARN("get lock cost too much time", KP(ls), "ls_id", OB_ISNULL(ls) ? ObLSID(0) : ls->get_ls_id());
}
return res;
}
@ -109,7 +116,7 @@ void ObLSLock::unlock(int64_t target)
target &= LSLOCKMASK;
while (target) {
if ((target & 1) && OB_FAIL(locks_[pos].unlock())) {
STORAGE_LOG(ERROR, "unlock error", K(ret), K(pos));
LOG_ERROR("unlock error", K(ret), K(pos));
}
target >>= 1;
@ -119,13 +126,15 @@ void ObLSLock::unlock(int64_t target)
// <=================== lock guard =======================>
ObLSLockGuard::ObLSLockGuard(ObLSLock &lock,
ObLSLockGuard::ObLSLockGuard(ObLS *ls,
ObLSLock &lock,
int64_t hold,
int64_t change,
const bool trylock)
: lock_(lock),
mark_(0),
start_ts_(INT64_MAX)
start_ts_(INT64_MAX),
ls_(ls)
{
hold &= LSLOCKMASK;
change &= LSLOCKMASK;
@ -133,10 +142,10 @@ ObLSLockGuard::ObLSLockGuard(ObLSLock &lock,
hold ^= hold & change;
if (!trylock) {
mark_ = lock_.lock(hold, change);
mark_ = lock_.lock(ls, hold, change);
start_ts_ = ObTimeUtility::current_time();
} else {
if ((hold | change) != (mark_ = lock_.try_lock(hold, change))) {
if ((hold | change) != (mark_ = lock_.try_lock(ls, hold, change))) {
// try lock failed
lock_.unlock(mark_);
mark_ = 0;
@ -147,7 +156,8 @@ ObLSLockGuard::ObLSLockGuard(ObLSLock &lock,
ObLSLockGuard::ObLSLockGuard(ObLS *ls, const bool rdlock)
: lock_(ls->lock_),
mark_(0),
start_ts_(INT64_MAX)
start_ts_(INT64_MAX),
ls_(ls)
{
int64_t wrlock_mask = LSLOCKALL;
int64_t rdlock_mask = 0;
@ -157,7 +167,7 @@ ObLSLockGuard::ObLSLockGuard(ObLS *ls, const bool rdlock)
}
wrlock_mask &= LSLOCKMASK;
rdlock_mask &= LSLOCKMASK;
mark_ = lock_.lock(rdlock_mask, wrlock_mask);
mark_ = lock_.lock(ls, rdlock_mask, wrlock_mask);
start_ts_ = ObTimeUtility::current_time();
}
@ -166,12 +176,12 @@ ObLSLockGuard::~ObLSLockGuard()
lock_.unlock(mark_);
const int64_t end_ts = ObTimeUtility::current_time();
if (end_ts - start_ts_ > 5 * 1000 * 1000) {
STORAGE_LOG(INFO, "ls lock cost too much time",
K_(start_ts),
"cost_us", end_ts - start_ts_,
K(lbt()));
FLOG_INFO("ls lock cost too much time", K_(start_ts),
"cost_us", end_ts - start_ts_, KP(ls_),
"ls_id", OB_ISNULL(ls_) ? ObLSID(0) : ls_->get_ls_id(), K(lbt()));
}
start_ts_ = INT64_MAX;
ls_ = nullptr;
}
void ObLSLockGuard::unlock(int64_t target)
@ -181,10 +191,9 @@ void ObLSLockGuard::unlock(int64_t target)
lock_.unlock(target);
const int64_t end_ts = ObTimeUtility::current_time();
if (end_ts - start_ts_ > 5 * 1000 * 1000) {
STORAGE_LOG(INFO, "ls lock cost too much time",
K_(start_ts),
"cost_us", end_ts - start_ts_,
K(lbt()));
FLOG_INFO("ls lock cost too much time", K_(start_ts),
"cost_us", end_ts - start_ts_, KP(ls_),
"ls_id", OB_ISNULL(ls_) ? ObLSID(0) : ls_->get_ls_id(), K(lbt()));
}
}
@ -207,11 +216,8 @@ ObLSLockWithPendingReplayGuard::~ObLSLockWithPendingReplayGuard()
// TODO: cxf262476 unlock
const int64_t end_ts = ObTimeUtility::current_time();
if (end_ts - start_ts_ > 5 * 1000 * 1000) {
STORAGE_LOG(INFO, "ls lock cost too much time",
K_(start_ts),
"cost_us", end_ts - start_ts_,
K(ls_id_),
K(lbt()));
LOG_INFO("ls lock cost too much time", K_(start_ts),
"cost_us", end_ts - start_ts_, K(ls_id_), K(lbt()));
}
start_ts_ = INT64_MAX;
}

View File

@ -56,6 +56,7 @@ class ObLSLock
friend ObLSLockGuard;
friend ObLSTryLockGuard;
friend ObLSLockWithPendingReplayGuard;
static const int64_t LOCK_CONFLICT_WARN_TIME = 100 * 1000; // 100 ms
public:
typedef common::ObLatch RWLock;
@ -65,8 +66,8 @@ public:
ObLSLock(const ObLSLock&) = delete;
ObLSLock& operator=(const ObLSLock&) = delete;
private:
int64_t lock(int64_t hold, int64_t change);
int64_t try_lock(int64_t hold, int64_t change);
int64_t lock(const ObLS *ls, int64_t hold, int64_t change);
int64_t try_lock(const ObLS *ls, int64_t hold, int64_t change);
void unlock(int64_t target);
RWLock locks_[LSLOCKSIZE];
@ -75,7 +76,8 @@ private:
class ObLSLockGuard
{
public:
ObLSLockGuard(ObLSLock &lock,
ObLSLockGuard(ObLS *ls,
ObLSLock &lock,
int64_t hold,
int64_t change,
const bool trylock = false);
@ -93,6 +95,7 @@ private:
ObLSLock &lock_;
int64_t mark_;
int64_t start_ts_;
const ObLS *ls_;
};
// ATTENTION: