[GC] gc need wait readonly tx cleaned up
This commit is contained in:
@ -354,7 +354,8 @@ ObGCHandler::ObGCHandler() : is_inited_(false),
|
||||
rwlock_(common::ObLatchIds::GC_HANDLER_LOCK),
|
||||
ls_(NULL),
|
||||
gc_seq_invalid_member_(-1),
|
||||
gc_start_ts_(OB_INVALID_TIMESTAMP)
|
||||
gc_start_ts_(OB_INVALID_TIMESTAMP),
|
||||
block_tx_ts_(OB_INVALID_TIMESTAMP)
|
||||
{
|
||||
}
|
||||
|
||||
@ -369,6 +370,7 @@ void ObGCHandler::reset()
|
||||
gc_seq_invalid_member_ = -1;
|
||||
ls_ = NULL;
|
||||
gc_start_ts_ = OB_INVALID_TIMESTAMP;
|
||||
block_tx_ts_ = OB_INVALID_TIMESTAMP;
|
||||
is_inited_ = false;
|
||||
}
|
||||
|
||||
@ -389,6 +391,67 @@ int ObGCHandler::init(ObLS *ls)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObGCHandler::execute_pre_remove()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
CLOG_LOG(WARN, "GC handler not init");
|
||||
} else {
|
||||
WLockGuard wlock_guard(rwlock_);
|
||||
int64_t ls_id = ls_->get_ls_id().id();
|
||||
bool is_tenant_dropping_or_dropped = false;
|
||||
bool need_check_readonly_tx = true;
|
||||
|
||||
const uint64_t tenant_id = MTL_ID();
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
if (OB_SUCCESS != (tmp_ret = check_if_tenant_is_dropping_or_dropped_(tenant_id, is_tenant_dropping_or_dropped))) {
|
||||
CLOG_LOG(WARN, "check_if_tenant_has_been_dropped_ failed", K(tmp_ret), K(tenant_id), K(ls_id));
|
||||
} else if (is_tenant_dropping_or_dropped) {
|
||||
need_check_readonly_tx = false;
|
||||
CLOG_LOG(INFO, "tenant is dropping or dropped, no longer need to check read_only tx", K(ls_id), K(tenant_id));
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret) && need_check_readonly_tx) {
|
||||
//follower or not in member list replica need block_tx here
|
||||
if (OB_INVALID_TIMESTAMP == block_tx_ts_) {
|
||||
if (OB_FAIL(ls_->block_tx_start())) {
|
||||
CLOG_LOG(WARN, "failed to block_tx_start", K(ls_id), KPC(this));
|
||||
} else {
|
||||
block_tx_ts_ = ObClockGenerator::getClock();
|
||||
}
|
||||
}
|
||||
|
||||
if (OB_SUCC(ret)) {
|
||||
if (OB_FAIL(ls_->check_all_readonly_tx_clean_up())) {
|
||||
if (OB_EAGAIN == ret) {
|
||||
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id));
|
||||
if (! tenant_config.is_valid()) {
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
CLOG_LOG(WARN, "tenant_config is not valid", K(ret), K(tenant_id));
|
||||
} else {
|
||||
const int64_t ls_gc_wait_readonly_tx_time = tenant_config->_ls_gc_wait_readonly_tx_time;
|
||||
const int64_t cur_time = ObClockGenerator::getClock();
|
||||
|
||||
if (block_tx_ts_ + ls_gc_wait_readonly_tx_time < cur_time) {
|
||||
CLOG_LOG(WARN, "Attention!!! Wait enough time before readonly tx been cleaned up", K(ls_id), KPC(this));
|
||||
ret = OB_SUCCESS;
|
||||
} else {
|
||||
CLOG_LOG(WARN, "[WAIT_REASEON]need wait before readonly tx been cleaned up", K(ls_id), KPC(this));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
CLOG_LOG(WARN, "check_all_readonly_tx_clean_up failed", K(ls_id), K(ret));
|
||||
}
|
||||
} else {
|
||||
CLOG_LOG(INFO, "check_all_readonly_tx_clean_up success", K(ls_id), K(ret));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ObGCHandler::execute_pre_gc_process(ObGarbageCollector::LSStatus &ls_status)
|
||||
{
|
||||
switch (ls_status)
|
||||
@ -409,6 +472,8 @@ void ObGCHandler::execute_pre_gc_process(ObGarbageCollector::LSStatus &ls_status
|
||||
|
||||
int ObGCHandler::check_ls_can_offline(const share::ObLSStatus &ls_status)
|
||||
{
|
||||
//the inspection should be performed by leader,and get_gc_state should be invoked before get_palf_role
|
||||
//to guarantee correctness
|
||||
int ret = OB_SUCCESS;
|
||||
if (IS_NOT_INIT) {
|
||||
ret = OB_NOT_INIT;
|
||||
@ -416,12 +481,18 @@ int ObGCHandler::check_ls_can_offline(const share::ObLSStatus &ls_status)
|
||||
} else {
|
||||
RLockGuard rlock_guard(rwlock_);
|
||||
ObLSID ls_id = ls_->get_ls_id();
|
||||
ObRole role;
|
||||
LSGCState gc_state = INVALID_LS_GC_STATE;
|
||||
if (OB_FAIL(ls_->get_gc_state(gc_state))) {
|
||||
CLOG_LOG(WARN, "get_gc_state failed", K(ls_id), K(gc_state));
|
||||
} else if (!is_valid_ls_gc_state(gc_state)) {
|
||||
ret = OB_STATE_NOT_MATCH;
|
||||
CLOG_LOG(WARN, "ls check gc state invalid", K(ls_id), K(gc_state));
|
||||
} else if (OB_FAIL(get_palf_role_(role))) {
|
||||
CLOG_LOG(WARN, "get_palf_role_ failed", K(ls_id));
|
||||
} else if (ObRole::LEADER != role) {
|
||||
ret = OB_STATE_NOT_MATCH;
|
||||
CLOG_LOG(WARN, "follower can not advance gc state", K(ls_id), K(gc_state));
|
||||
} else if (is_ls_offline_finished_(gc_state)) {
|
||||
CLOG_LOG(INFO, "ls check_ls_can_offline success", K(ls_id), K(gc_state));
|
||||
} else if (is_ls_blocked_state_(gc_state)) {
|
||||
@ -817,7 +888,7 @@ int ObGCHandler::check_if_tenant_in_archive_(bool &in_archive)
|
||||
return MTL(ObArchiveService*)->check_tenant_in_archive(in_archive);
|
||||
}
|
||||
|
||||
void ObGCHandler::submit_log_(const ObGCLSLOGType log_type)
|
||||
int ObGCHandler::submit_log_(const ObGCLSLOGType log_type, bool &is_success)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
ObGCLSLog gc_log(log_type);
|
||||
@ -826,6 +897,7 @@ void ObGCHandler::submit_log_(const ObGCLSLOGType log_type)
|
||||
int64_t buffer_size = gc_log.get_serialize_size();
|
||||
ObGCLSLogCb cb;
|
||||
const bool need_nonblock = false;
|
||||
is_success = false;
|
||||
SCN ref_scn;
|
||||
palf::LSN lsn;
|
||||
SCN scn;
|
||||
@ -857,6 +929,7 @@ void ObGCHandler::submit_log_(const ObGCLSLOGType log_type)
|
||||
if (cb.is_succeed()) {
|
||||
(void)update_ls_gc_state_after_submit_log_(log_type, scn);
|
||||
is_finished = true;
|
||||
is_success = true;
|
||||
CLOG_LOG(INFO, "write GC ls log success", K(ret), K(log_type));
|
||||
} else if (cb.is_failed()) {
|
||||
is_finished = true;
|
||||
@ -874,6 +947,7 @@ void ObGCHandler::submit_log_(const ObGCLSLOGType log_type)
|
||||
mtl_free(buffer);
|
||||
buffer = nullptr;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void ObGCHandler::update_ls_gc_state_after_submit_log_(const ObGCLSLOGType log_type,
|
||||
@ -908,6 +982,7 @@ void ObGCHandler::block_ls_transfer_in_(const SCN &block_scn)
|
||||
//TODO: @keqing.llt transfer功能完成之前,先用杀事务代替transfer out
|
||||
} else if (OB_FAIL(ls_->block_tx_start())) {
|
||||
CLOG_LOG(WARN, "block_tx_start failed", K(ls_id), K(ret));
|
||||
} else if (FALSE_IT(block_tx_ts_ = ObClockGenerator::getClock())) {
|
||||
} else if (OB_FAIL(ls_->set_gc_state(LSGCState::LS_BLOCKED))) {
|
||||
CLOG_LOG(WARN, "set_gc_state block failed", K(ls_id), K(ret));
|
||||
} else {
|
||||
@ -966,6 +1041,7 @@ void ObGCHandler::handle_gc_ls_dropping_(const ObGarbageCollector::LSStatus &ls_
|
||||
CLOG_LOG(WARN, "GC handler not init");
|
||||
} else {
|
||||
WLockGuard wlock_guard(rwlock_);
|
||||
bool is_success = false;
|
||||
ObRole role;
|
||||
ObLSID ls_id = ls_->get_ls_id();
|
||||
LSGCState gc_state = INVALID_LS_GC_STATE;
|
||||
@ -983,13 +1059,21 @@ void ObGCHandler::handle_gc_ls_dropping_(const ObGarbageCollector::LSStatus &ls_
|
||||
} else if (!is_valid_ls_gc_state(gc_state)) {
|
||||
CLOG_LOG(WARN, "ls check gc state invalid", K(ls_id), K(gc_state));
|
||||
} else if (is_ls_offline_finished_(gc_state)) {
|
||||
(void)set_block_tx_if_necessary_();
|
||||
CLOG_LOG(INFO, "handle_gc_ls_dropping already finished", K(ls_id), K(gc_state));
|
||||
} else if (is_ls_blocked_state_(gc_state)) {
|
||||
(void)set_block_tx_if_necessary_();
|
||||
// trigger kill all tx
|
||||
(void)is_tablet_clear_(ls_status);
|
||||
} else {
|
||||
(void)submit_log_(ObGCLSLOGType::BLOCK_TABLET_TRANSFER_IN);
|
||||
(void)is_tablet_clear_(ls_status);
|
||||
if (OB_FAIL(submit_log_(ObGCLSLOGType::BLOCK_TABLET_TRANSFER_IN, is_success))) {
|
||||
CLOG_LOG(WARN, "failed to submit BLOCK_TABLET_TRANSFER_IN log", K(ls_id), K(gc_state));
|
||||
} else if (is_success) {
|
||||
(void)is_tablet_clear_(ls_status);
|
||||
CLOG_LOG(INFO, "BLOCK_TABLET_TRANSFER_IN log has callback on_success", K(ls_id), K(gc_state));
|
||||
} else {
|
||||
CLOG_LOG(WARN, "BLOCK_TABLET_TRANSFER_IN log has not callback on_success", K(ls_id), K(gc_state));
|
||||
}
|
||||
}
|
||||
CLOG_LOG(INFO, "ls handle_gc_ls_dropping_ finished", K(ls_id), K(role), K(gc_state));
|
||||
}
|
||||
@ -1011,6 +1095,8 @@ void ObGCHandler::handle_gc_ls_offline_(ObGarbageCollector::LSStatus &ls_status)
|
||||
gc_start_ts_ = ObTimeUtility::current_time();
|
||||
}
|
||||
|
||||
bool is_success = false;
|
||||
(void)set_block_tx_if_necessary_();
|
||||
if (OB_FAIL(get_palf_role_(role))) {
|
||||
CLOG_LOG(WARN, "get_palf_role_ failed", K(ls_id));
|
||||
} else if (ObRole::LEADER != role) {
|
||||
@ -1028,10 +1114,16 @@ void ObGCHandler::handle_gc_ls_offline_(ObGarbageCollector::LSStatus &ls_status)
|
||||
} else if (is_ls_offline_state_(gc_state)) {
|
||||
(void)try_check_and_set_wait_gc_(ls_status);
|
||||
} else {
|
||||
(void)submit_log_(ObGCLSLOGType::OFFLINE_LS);
|
||||
(void)try_check_and_set_wait_gc_(ls_status);
|
||||
if (OB_FAIL(submit_log_(ObGCLSLOGType::OFFLINE_LS, is_success))) {
|
||||
CLOG_LOG(WARN, "failed to submit OFFLINE_LS log", K(ls_id), K(gc_state));
|
||||
} else if (is_success) {
|
||||
CLOG_LOG(INFO, "OFFLINE_LS has callback on_success", K(ls_id), K(gc_state));
|
||||
(void)try_check_and_set_wait_gc_(ls_status);
|
||||
} else {
|
||||
CLOG_LOG(WARN, "OFFLINE_LS has not callback on_success", K(ls_id), K(gc_state));
|
||||
}
|
||||
}
|
||||
CLOG_LOG(INFO, "ls handle_gc_ls_offline finished", K(ls_id), K(role), K(gc_state));
|
||||
CLOG_LOG(INFO, "ls handle_gc_ls_offline finished", K(ls_id), K(role), K(gc_state), K(is_success));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1051,6 +1143,14 @@ int ObGCHandler::diagnose(GCDiagnoseInfo &diagnose_info) const
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
void ObGCHandler::set_block_tx_if_necessary_()
|
||||
{
|
||||
//for restart or switch_leader, block_tx_ts_ in memory may be cleaned
|
||||
if (OB_INVALID_TIMESTAMP == block_tx_ts_) {
|
||||
block_tx_ts_ = ObClockGenerator::getClock();
|
||||
}
|
||||
}
|
||||
//---------------ObGarbageCollector---------------//
|
||||
void ObGarbageCollector::GCCandidate::set_ls_status(const share::ObLSStatus &ls_status)
|
||||
{
|
||||
@ -1187,25 +1287,27 @@ void ObGarbageCollector::run1()
|
||||
CLOG_LOG(INFO, "Garbage Collector start to run");
|
||||
lib::set_thread_name("GCCollector");
|
||||
|
||||
const int64_t gc_interval = GC_INTERVAL;
|
||||
while (!has_set_stop()) {
|
||||
if (!ObServerCheckpointSlogHandler::get_instance().is_started()) {
|
||||
// tablets are not ready for read
|
||||
usleep(5000 * 1000); // 5s
|
||||
} else if (!stop_create_new_gc_task_) {
|
||||
ObGCCandidateArray gc_candidates;
|
||||
int64_t gc_interval = GC_INTERVAL;
|
||||
CLOG_LOG(INFO, "Garbage Collector is running", K(seq_), K(gc_interval));
|
||||
gc_candidates.reset();
|
||||
(void)gc_check_member_list_(gc_candidates);
|
||||
(void)execute_gc_(gc_candidates);
|
||||
gc_candidates.reset();
|
||||
(void)gc_check_ls_status_(gc_candidates);
|
||||
(void)execute_gc_(gc_candidates);
|
||||
ob_usleep(gc_interval);
|
||||
seq_++;
|
||||
if (ObServerCheckpointSlogHandler::get_instance().is_started()) {
|
||||
if (!stop_create_new_gc_task_) {
|
||||
CLOG_LOG(INFO, "Garbage Collector is running", K(seq_), K(gc_interval));
|
||||
ObGCCandidateArray gc_candidates;
|
||||
gc_candidates.reset();
|
||||
(void)gc_check_member_list_(gc_candidates);
|
||||
(void)execute_gc_(gc_candidates);
|
||||
gc_candidates.reset();
|
||||
(void)gc_check_ls_status_(gc_candidates);
|
||||
(void)execute_gc_(gc_candidates);
|
||||
seq_++;
|
||||
}
|
||||
// safe destroy task
|
||||
(void) safe_destroy_handler_.handle();
|
||||
} else {
|
||||
CLOG_LOG(INFO, "Garbage Collector is not running, waiting for ObServerCheckpointSlogHandler",
|
||||
K(seq_), K(gc_interval));
|
||||
}
|
||||
// safe destroy task
|
||||
(void) safe_destroy_handler_.handle();
|
||||
ob_usleep(gc_interval);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1507,18 +1609,20 @@ void ObGarbageCollector::execute_gc_(ObGCCandidateArray &gc_candidates)
|
||||
} else if (OB_ISNULL(ls = ls_handle.get_ls())) {
|
||||
tmp_ret = OB_ERR_UNEXPECTED;
|
||||
CLOG_LOG(ERROR, "ls not exist", K(tmp_ret), K(id));
|
||||
} else if (OB_ISNULL(gc_handler = ls->get_gc_handler())) {
|
||||
tmp_ret = OB_ERR_UNEXPECTED;
|
||||
CLOG_LOG(ERROR, "gc_handler is NULL", K(tmp_ret), K(id));
|
||||
} else if (is_need_gc_ls_status_(ls_status)) {
|
||||
ObSwitchLeaderAdapter switch_leader_adapter;
|
||||
if (OB_FAIL(switch_leader_adapter.remove_from_election_blacklist(id.id(), self_addr_))) {
|
||||
CLOG_LOG(WARN, "remove_from_election_blacklist failed", K(ret), K(id), K_(self_addr));
|
||||
if (OB_SUCCESS != (tmp_ret = (gc_handler->execute_pre_remove()))) {
|
||||
CLOG_LOG(WARN, "failed to execute_pre_remove", K(tmp_ret), K(id), K_(self_addr));
|
||||
} else if (OB_SUCCESS != (tmp_ret = switch_leader_adapter.remove_from_election_blacklist(id.id(), self_addr_))) {
|
||||
CLOG_LOG(WARN, "remove_from_election_blacklist failed", K(tmp_ret), K(id), K_(self_addr));
|
||||
} else if (OB_SUCCESS != (tmp_ret = ls_service_->remove_ls(id, false))) {
|
||||
CLOG_LOG(WARN, "remove_ls failed", K(tmp_ret), K(id));
|
||||
} else {
|
||||
CLOG_LOG(INFO, "remove_ls success", K(id), K(gc_reason));
|
||||
}
|
||||
} else if (OB_ISNULL(gc_handler = ls->get_gc_handler())) {
|
||||
tmp_ret = OB_ERR_UNEXPECTED;
|
||||
CLOG_LOG(ERROR, "gc_handler is NULL", K(tmp_ret), K(id));
|
||||
} else {
|
||||
CLOG_LOG(INFO, "begin execute_pre_gc_process", K(id), K(ls_status));
|
||||
(void)gc_handler->execute_pre_gc_process(ls_status);
|
||||
|
||||
@ -102,11 +102,14 @@ struct GCDiagnoseInfo
|
||||
~GCDiagnoseInfo() { reset(); }
|
||||
LSGCState gc_state_;
|
||||
int64_t gc_start_ts_;
|
||||
int64_t block_tx_ts_;
|
||||
TO_STRING_KV(K(gc_state_),
|
||||
K(gc_start_ts_));
|
||||
K(gc_start_ts_),
|
||||
K(block_tx_ts_));
|
||||
void reset() {
|
||||
gc_state_ = LSGCState::INVALID_LS_GC_STATE;
|
||||
gc_start_ts_ = OB_INVALID_TIMESTAMP;
|
||||
block_tx_ts_ = OB_INVALID_TIMESTAMP;
|
||||
}
|
||||
};
|
||||
|
||||
@ -244,9 +247,11 @@ public:
|
||||
int init(storage::ObLS *ls);
|
||||
void reset();
|
||||
void execute_pre_gc_process(ObGarbageCollector::LSStatus &ls_status);
|
||||
int execute_pre_remove();
|
||||
int check_ls_can_offline(const share::ObLSStatus &ls_status);
|
||||
int gc_check_invalid_member_seq(const int64_t gc_seq, bool &need_gc);
|
||||
static bool is_valid_ls_gc_state(const LSGCState &state);
|
||||
|
||||
int diagnose(GCDiagnoseInfo &diagnose_info) const;
|
||||
|
||||
// for replay
|
||||
@ -266,7 +271,9 @@ public:
|
||||
virtual int flush(share::SCN &scn) override;
|
||||
|
||||
TO_STRING_KV(K(is_inited_),
|
||||
K(gc_seq_invalid_member_));
|
||||
K(gc_seq_invalid_member_),
|
||||
K(gc_start_ts_),
|
||||
K(block_tx_ts_));
|
||||
|
||||
private:
|
||||
typedef common::SpinRWLock RWLock;
|
||||
@ -302,6 +309,7 @@ private:
|
||||
};
|
||||
|
||||
private:
|
||||
const int64_t MAX_WAIT_TIME_US_FOR_READONLY_TX = 10 * 60 * 1000 * 1000L;//10 min
|
||||
const int64_t LS_CLOG_ALIVE_TIMEOUT_US = 100 * 1000; //100ms
|
||||
const int64_t GET_GTS_TIMEOUT_US = 10L * 1000 * 1000; //10s
|
||||
int get_gts_(const int64_t timeout_us, share::SCN >s_scn);
|
||||
@ -321,7 +329,7 @@ private:
|
||||
bool &is_tenant_dropping_or_dropped);
|
||||
int get_tenant_readable_scn_(share::SCN &readable_scn);
|
||||
int check_if_tenant_in_archive_(bool &in_archive);
|
||||
void submit_log_(const ObGCLSLOGType log_type);
|
||||
int submit_log_(const ObGCLSLOGType log_type, bool &is_success);
|
||||
void update_ls_gc_state_after_submit_log_(const ObGCLSLOGType log_type,
|
||||
const share::SCN &scn);
|
||||
void block_ls_transfer_in_(const share::SCN &block_scn);
|
||||
@ -329,12 +337,14 @@ private:
|
||||
int get_palf_role_(common::ObRole &role);
|
||||
void handle_gc_ls_dropping_(const ObGarbageCollector::LSStatus &ls_status);
|
||||
void handle_gc_ls_offline_(ObGarbageCollector::LSStatus &ls_status);
|
||||
void set_block_tx_if_necessary_();
|
||||
private:
|
||||
bool is_inited_;
|
||||
RWLock rwlock_; //for leader revoke/takeover submit log
|
||||
storage::ObLS *ls_;
|
||||
int64_t gc_seq_invalid_member_; //缓存gc检查当前ls不在成员列表时的轮次
|
||||
int64_t gc_start_ts_;
|
||||
int64_t block_tx_ts_;
|
||||
};
|
||||
|
||||
} // namespace logservice
|
||||
|
||||
Reference in New Issue
Block a user