[GC] gc need wait readonly tx cleaned up

This commit is contained in:
yyy-hust
2023-07-25 14:48:36 +00:00
committed by ob-robot
parent 7b37f56021
commit 8c16a5e83c
12 changed files with 205 additions and 112 deletions

View File

@ -354,7 +354,8 @@ ObGCHandler::ObGCHandler() : is_inited_(false),
rwlock_(common::ObLatchIds::GC_HANDLER_LOCK),
ls_(NULL),
gc_seq_invalid_member_(-1),
gc_start_ts_(OB_INVALID_TIMESTAMP)
gc_start_ts_(OB_INVALID_TIMESTAMP),
block_tx_ts_(OB_INVALID_TIMESTAMP)
{
}
@ -369,6 +370,7 @@ void ObGCHandler::reset()
gc_seq_invalid_member_ = -1;
ls_ = NULL;
gc_start_ts_ = OB_INVALID_TIMESTAMP;
block_tx_ts_ = OB_INVALID_TIMESTAMP;
is_inited_ = false;
}
@ -389,6 +391,67 @@ int ObGCHandler::init(ObLS *ls)
return ret;
}
int ObGCHandler::execute_pre_remove()
{
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
CLOG_LOG(WARN, "GC handler not init");
} else {
WLockGuard wlock_guard(rwlock_);
int64_t ls_id = ls_->get_ls_id().id();
bool is_tenant_dropping_or_dropped = false;
bool need_check_readonly_tx = true;
const uint64_t tenant_id = MTL_ID();
int tmp_ret = OB_SUCCESS;
if (OB_SUCCESS != (tmp_ret = check_if_tenant_is_dropping_or_dropped_(tenant_id, is_tenant_dropping_or_dropped))) {
CLOG_LOG(WARN, "check_if_tenant_has_been_dropped_ failed", K(tmp_ret), K(tenant_id), K(ls_id));
} else if (is_tenant_dropping_or_dropped) {
need_check_readonly_tx = false;
CLOG_LOG(INFO, "tenant is dropping or dropped, no longer need to check read_only tx", K(ls_id), K(tenant_id));
}
if (OB_SUCC(ret) && need_check_readonly_tx) {
//follower or not in member list replica need block_tx here
if (OB_INVALID_TIMESTAMP == block_tx_ts_) {
if (OB_FAIL(ls_->block_tx_start())) {
CLOG_LOG(WARN, "failed to block_tx_start", K(ls_id), KPC(this));
} else {
block_tx_ts_ = ObClockGenerator::getClock();
}
}
if (OB_SUCC(ret)) {
if (OB_FAIL(ls_->check_all_readonly_tx_clean_up())) {
if (OB_EAGAIN == ret) {
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(tenant_id));
if (! tenant_config.is_valid()) {
ret = OB_INVALID_ARGUMENT;
CLOG_LOG(WARN, "tenant_config is not valid", K(ret), K(tenant_id));
} else {
const int64_t ls_gc_wait_readonly_tx_time = tenant_config->_ls_gc_wait_readonly_tx_time;
const int64_t cur_time = ObClockGenerator::getClock();
if (block_tx_ts_ + ls_gc_wait_readonly_tx_time < cur_time) {
CLOG_LOG(WARN, "Attention!!! Wait enough time before readonly tx been cleaned up", K(ls_id), KPC(this));
ret = OB_SUCCESS;
} else {
CLOG_LOG(WARN, "[WAIT_REASEON]need wait before readonly tx been cleaned up", K(ls_id), KPC(this));
}
}
} else {
CLOG_LOG(WARN, "check_all_readonly_tx_clean_up failed", K(ls_id), K(ret));
}
} else {
CLOG_LOG(INFO, "check_all_readonly_tx_clean_up success", K(ls_id), K(ret));
}
}
}
}
return ret;
}
void ObGCHandler::execute_pre_gc_process(ObGarbageCollector::LSStatus &ls_status)
{
switch (ls_status)
@ -409,6 +472,8 @@ void ObGCHandler::execute_pre_gc_process(ObGarbageCollector::LSStatus &ls_status
int ObGCHandler::check_ls_can_offline(const share::ObLSStatus &ls_status)
{
//the inspection should be performed by leader,and get_gc_state should be invoked before get_palf_role
//to guarantee correctness
int ret = OB_SUCCESS;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
@ -416,12 +481,18 @@ int ObGCHandler::check_ls_can_offline(const share::ObLSStatus &ls_status)
} else {
RLockGuard rlock_guard(rwlock_);
ObLSID ls_id = ls_->get_ls_id();
ObRole role;
LSGCState gc_state = INVALID_LS_GC_STATE;
if (OB_FAIL(ls_->get_gc_state(gc_state))) {
CLOG_LOG(WARN, "get_gc_state failed", K(ls_id), K(gc_state));
} else if (!is_valid_ls_gc_state(gc_state)) {
ret = OB_STATE_NOT_MATCH;
CLOG_LOG(WARN, "ls check gc state invalid", K(ls_id), K(gc_state));
} else if (OB_FAIL(get_palf_role_(role))) {
CLOG_LOG(WARN, "get_palf_role_ failed", K(ls_id));
} else if (ObRole::LEADER != role) {
ret = OB_STATE_NOT_MATCH;
CLOG_LOG(WARN, "follower can not advance gc state", K(ls_id), K(gc_state));
} else if (is_ls_offline_finished_(gc_state)) {
CLOG_LOG(INFO, "ls check_ls_can_offline success", K(ls_id), K(gc_state));
} else if (is_ls_blocked_state_(gc_state)) {
@ -817,7 +888,7 @@ int ObGCHandler::check_if_tenant_in_archive_(bool &in_archive)
return MTL(ObArchiveService*)->check_tenant_in_archive(in_archive);
}
void ObGCHandler::submit_log_(const ObGCLSLOGType log_type)
int ObGCHandler::submit_log_(const ObGCLSLOGType log_type, bool &is_success)
{
int ret = OB_SUCCESS;
ObGCLSLog gc_log(log_type);
@ -826,6 +897,7 @@ void ObGCHandler::submit_log_(const ObGCLSLOGType log_type)
int64_t buffer_size = gc_log.get_serialize_size();
ObGCLSLogCb cb;
const bool need_nonblock = false;
is_success = false;
SCN ref_scn;
palf::LSN lsn;
SCN scn;
@ -857,6 +929,7 @@ void ObGCHandler::submit_log_(const ObGCLSLOGType log_type)
if (cb.is_succeed()) {
(void)update_ls_gc_state_after_submit_log_(log_type, scn);
is_finished = true;
is_success = true;
CLOG_LOG(INFO, "write GC ls log success", K(ret), K(log_type));
} else if (cb.is_failed()) {
is_finished = true;
@ -874,6 +947,7 @@ void ObGCHandler::submit_log_(const ObGCLSLOGType log_type)
mtl_free(buffer);
buffer = nullptr;
}
return ret;
}
void ObGCHandler::update_ls_gc_state_after_submit_log_(const ObGCLSLOGType log_type,
@ -908,6 +982,7 @@ void ObGCHandler::block_ls_transfer_in_(const SCN &block_scn)
//TODO: @keqing.llt transfer功能完成之前,先用杀事务代替transfer out
} else if (OB_FAIL(ls_->block_tx_start())) {
CLOG_LOG(WARN, "block_tx_start failed", K(ls_id), K(ret));
} else if (FALSE_IT(block_tx_ts_ = ObClockGenerator::getClock())) {
} else if (OB_FAIL(ls_->set_gc_state(LSGCState::LS_BLOCKED))) {
CLOG_LOG(WARN, "set_gc_state block failed", K(ls_id), K(ret));
} else {
@ -966,6 +1041,7 @@ void ObGCHandler::handle_gc_ls_dropping_(const ObGarbageCollector::LSStatus &ls_
CLOG_LOG(WARN, "GC handler not init");
} else {
WLockGuard wlock_guard(rwlock_);
bool is_success = false;
ObRole role;
ObLSID ls_id = ls_->get_ls_id();
LSGCState gc_state = INVALID_LS_GC_STATE;
@ -983,13 +1059,21 @@ void ObGCHandler::handle_gc_ls_dropping_(const ObGarbageCollector::LSStatus &ls_
} else if (!is_valid_ls_gc_state(gc_state)) {
CLOG_LOG(WARN, "ls check gc state invalid", K(ls_id), K(gc_state));
} else if (is_ls_offline_finished_(gc_state)) {
(void)set_block_tx_if_necessary_();
CLOG_LOG(INFO, "handle_gc_ls_dropping already finished", K(ls_id), K(gc_state));
} else if (is_ls_blocked_state_(gc_state)) {
(void)set_block_tx_if_necessary_();
// trigger kill all tx
(void)is_tablet_clear_(ls_status);
} else {
(void)submit_log_(ObGCLSLOGType::BLOCK_TABLET_TRANSFER_IN);
(void)is_tablet_clear_(ls_status);
if (OB_FAIL(submit_log_(ObGCLSLOGType::BLOCK_TABLET_TRANSFER_IN, is_success))) {
CLOG_LOG(WARN, "failed to submit BLOCK_TABLET_TRANSFER_IN log", K(ls_id), K(gc_state));
} else if (is_success) {
(void)is_tablet_clear_(ls_status);
CLOG_LOG(INFO, "BLOCK_TABLET_TRANSFER_IN log has callback on_success", K(ls_id), K(gc_state));
} else {
CLOG_LOG(WARN, "BLOCK_TABLET_TRANSFER_IN log has not callback on_success", K(ls_id), K(gc_state));
}
}
CLOG_LOG(INFO, "ls handle_gc_ls_dropping_ finished", K(ls_id), K(role), K(gc_state));
}
@ -1011,6 +1095,8 @@ void ObGCHandler::handle_gc_ls_offline_(ObGarbageCollector::LSStatus &ls_status)
gc_start_ts_ = ObTimeUtility::current_time();
}
bool is_success = false;
(void)set_block_tx_if_necessary_();
if (OB_FAIL(get_palf_role_(role))) {
CLOG_LOG(WARN, "get_palf_role_ failed", K(ls_id));
} else if (ObRole::LEADER != role) {
@ -1028,10 +1114,16 @@ void ObGCHandler::handle_gc_ls_offline_(ObGarbageCollector::LSStatus &ls_status)
} else if (is_ls_offline_state_(gc_state)) {
(void)try_check_and_set_wait_gc_(ls_status);
} else {
(void)submit_log_(ObGCLSLOGType::OFFLINE_LS);
(void)try_check_and_set_wait_gc_(ls_status);
if (OB_FAIL(submit_log_(ObGCLSLOGType::OFFLINE_LS, is_success))) {
CLOG_LOG(WARN, "failed to submit OFFLINE_LS log", K(ls_id), K(gc_state));
} else if (is_success) {
CLOG_LOG(INFO, "OFFLINE_LS has callback on_success", K(ls_id), K(gc_state));
(void)try_check_and_set_wait_gc_(ls_status);
} else {
CLOG_LOG(WARN, "OFFLINE_LS has not callback on_success", K(ls_id), K(gc_state));
}
}
CLOG_LOG(INFO, "ls handle_gc_ls_offline finished", K(ls_id), K(role), K(gc_state));
CLOG_LOG(INFO, "ls handle_gc_ls_offline finished", K(ls_id), K(role), K(gc_state), K(is_success));
}
}
@ -1051,6 +1143,14 @@ int ObGCHandler::diagnose(GCDiagnoseInfo &diagnose_info) const
return ret;
}
void ObGCHandler::set_block_tx_if_necessary_()
{
//for restart or switch_leader, block_tx_ts_ in memory may be cleaned
if (OB_INVALID_TIMESTAMP == block_tx_ts_) {
block_tx_ts_ = ObClockGenerator::getClock();
}
}
//---------------ObGarbageCollector---------------//
void ObGarbageCollector::GCCandidate::set_ls_status(const share::ObLSStatus &ls_status)
{
@ -1187,25 +1287,27 @@ void ObGarbageCollector::run1()
CLOG_LOG(INFO, "Garbage Collector start to run");
lib::set_thread_name("GCCollector");
const int64_t gc_interval = GC_INTERVAL;
while (!has_set_stop()) {
if (!ObServerCheckpointSlogHandler::get_instance().is_started()) {
// tablets are not ready for read
usleep(5000 * 1000); // 5s
} else if (!stop_create_new_gc_task_) {
ObGCCandidateArray gc_candidates;
int64_t gc_interval = GC_INTERVAL;
CLOG_LOG(INFO, "Garbage Collector is running", K(seq_), K(gc_interval));
gc_candidates.reset();
(void)gc_check_member_list_(gc_candidates);
(void)execute_gc_(gc_candidates);
gc_candidates.reset();
(void)gc_check_ls_status_(gc_candidates);
(void)execute_gc_(gc_candidates);
ob_usleep(gc_interval);
seq_++;
if (ObServerCheckpointSlogHandler::get_instance().is_started()) {
if (!stop_create_new_gc_task_) {
CLOG_LOG(INFO, "Garbage Collector is running", K(seq_), K(gc_interval));
ObGCCandidateArray gc_candidates;
gc_candidates.reset();
(void)gc_check_member_list_(gc_candidates);
(void)execute_gc_(gc_candidates);
gc_candidates.reset();
(void)gc_check_ls_status_(gc_candidates);
(void)execute_gc_(gc_candidates);
seq_++;
}
// safe destroy task
(void) safe_destroy_handler_.handle();
} else {
CLOG_LOG(INFO, "Garbage Collector is not running, waiting for ObServerCheckpointSlogHandler",
K(seq_), K(gc_interval));
}
// safe destroy task
(void) safe_destroy_handler_.handle();
ob_usleep(gc_interval);
}
}
@ -1507,18 +1609,20 @@ void ObGarbageCollector::execute_gc_(ObGCCandidateArray &gc_candidates)
} else if (OB_ISNULL(ls = ls_handle.get_ls())) {
tmp_ret = OB_ERR_UNEXPECTED;
CLOG_LOG(ERROR, "ls not exist", K(tmp_ret), K(id));
} else if (OB_ISNULL(gc_handler = ls->get_gc_handler())) {
tmp_ret = OB_ERR_UNEXPECTED;
CLOG_LOG(ERROR, "gc_handler is NULL", K(tmp_ret), K(id));
} else if (is_need_gc_ls_status_(ls_status)) {
ObSwitchLeaderAdapter switch_leader_adapter;
if (OB_FAIL(switch_leader_adapter.remove_from_election_blacklist(id.id(), self_addr_))) {
CLOG_LOG(WARN, "remove_from_election_blacklist failed", K(ret), K(id), K_(self_addr));
if (OB_SUCCESS != (tmp_ret = (gc_handler->execute_pre_remove()))) {
CLOG_LOG(WARN, "failed to execute_pre_remove", K(tmp_ret), K(id), K_(self_addr));
} else if (OB_SUCCESS != (tmp_ret = switch_leader_adapter.remove_from_election_blacklist(id.id(), self_addr_))) {
CLOG_LOG(WARN, "remove_from_election_blacklist failed", K(tmp_ret), K(id), K_(self_addr));
} else if (OB_SUCCESS != (tmp_ret = ls_service_->remove_ls(id, false))) {
CLOG_LOG(WARN, "remove_ls failed", K(tmp_ret), K(id));
} else {
CLOG_LOG(INFO, "remove_ls success", K(id), K(gc_reason));
}
} else if (OB_ISNULL(gc_handler = ls->get_gc_handler())) {
tmp_ret = OB_ERR_UNEXPECTED;
CLOG_LOG(ERROR, "gc_handler is NULL", K(tmp_ret), K(id));
} else {
CLOG_LOG(INFO, "begin execute_pre_gc_process", K(id), K(ls_status));
(void)gc_handler->execute_pre_gc_process(ls_status);