fix switchover during upgrade
This commit is contained in:
@ -1747,6 +1747,7 @@ int ObNewTableTabletAllocator::choose_new_ls_(
|
||||
}
|
||||
}
|
||||
} else if (old_ls_attr.get_ls_flag().is_block_tablet_in()) {
|
||||
//only in 4200 canbe block tablet in, no need process data_version
|
||||
if (OB_FAIL(ObBalanceTaskTableOperator::get_merge_task_dest_ls_by_src_ls(
|
||||
*sql_proxy_,
|
||||
tenant_id,
|
||||
|
||||
@ -194,6 +194,9 @@ int ObLSRecoveryStatHandler::do_get_ls_level_readable_scn_(SCN &read_scn)
|
||||
// scn get order: read_scn before replayable_scn before sync_scn
|
||||
} else if (OB_FAIL(ls_->get_max_decided_scn(read_scn))) {
|
||||
LOG_WARN("failed to get_max_decided_scn", KR(ret), KPC_(ls));
|
||||
} else if (GET_MIN_CLUSTER_VERSION() < CLUSTER_VERSION_4_2_0_0) {
|
||||
//Before the cluster version is pushed up, the majority is not counted,
|
||||
//and this RPC is only supported in version 4.2
|
||||
} else if (OB_FAIL(get_majority_readable_scn_(read_scn /* leader_readable_scn */, majority_min_readable_scn))) {
|
||||
LOG_WARN("failed to get_majority_readable_scn_", KR(ret), K(read_scn), KPC_(ls));
|
||||
} else {
|
||||
|
||||
@ -23,6 +23,7 @@
|
||||
#include "share/ob_rpc_struct.h"//ObLSAccessModeInfo
|
||||
#include "observer/ob_server_struct.h"//GCTX
|
||||
#include "share/location_cache/ob_location_service.h"//get ls leader
|
||||
#include "share/ob_global_stat_proxy.h"//ObGlobalStatProxy
|
||||
#include "share/ob_schema_status_proxy.h"//set_schema_status
|
||||
#include "storage/tx/ob_timestamp_service.h" // ObTimestampService
|
||||
#include "share/ob_primary_standby_service.h" // ObPrimaryStandbyService
|
||||
@ -485,8 +486,17 @@ int ObTenantRoleTransitionService::get_tenant_ref_scn_(const share::SCN &sync_sc
|
||||
int ObTenantRoleTransitionService::wait_ls_balance_task_finish_()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
uint64_t compat_version = 0;
|
||||
|
||||
if (OB_FAIL(check_inner_stat())) {
|
||||
LOG_WARN("error unexpected", KR(ret), K(tenant_id_), KP(sql_proxy_), KP(rpc_proxy_));
|
||||
} else {
|
||||
ObGlobalStatProxy global_proxy(*sql_proxy_, gen_meta_tenant_id(tenant_id_));
|
||||
if (OB_FAIL(global_proxy.get_current_data_version(compat_version))) {
|
||||
LOG_WARN("failed to get current data version", KR(ret), K(tenant_id_));
|
||||
} else if (compat_version < DATA_VERSION_4_2_0_0) {
|
||||
//if tenant version is less than 4200, no need check
|
||||
//Regardless of the data_version change and switchover concurrency scenario
|
||||
} else {
|
||||
bool is_finish = false;
|
||||
ObBalanceTaskHelper ls_balance_task;
|
||||
@ -508,6 +518,7 @@ int ObTenantRoleTransitionService::wait_ls_balance_task_finish_()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@ -973,7 +973,8 @@ int ObLSStatusOperator::inner_get_ls_status_(
|
||||
LOG_WARN("failed to get result", KR(ret), K(init_learner_list_str));
|
||||
} else if (init_learner_list_str.empty()) {
|
||||
// maybe
|
||||
} else if (OB_FAIL(set_list_with_hex_str_(init_learner_list_str, learner_list, arb_member))) {
|
||||
} else if (OB_FAIL(set_list_with_hex_str_(init_learner_list_str,
|
||||
learner_list, arb_member))) {
|
||||
LOG_WARN("failed to set learner list", KR(ret), K(init_learner_list_str));
|
||||
}
|
||||
}
|
||||
@ -1602,8 +1603,25 @@ int ObLSStatusOperator::create_abort_ls_in_switch_tenant(
|
||||
share::ObLSStatusInfoArray status_info_array;
|
||||
ObLSStatusOperator status_op;
|
||||
ObAllTenantInfo tenant_info;
|
||||
const uint64_t exec_tenant_id = ObLSLifeIAgent::get_exec_tenant_id(tenant_id);
|
||||
if (OB_FAIL(trans.start(&client, exec_tenant_id))) {
|
||||
bool is_compatible_with_readonly_replica = false;
|
||||
int tmp_ret = OB_SUCCESS;
|
||||
ObSqlString sub_string;
|
||||
const uint64_t exec_tenant_id = get_exec_tenant_id(tenant_id);
|
||||
if (OB_SUCCESS != (tmp_ret = ObShareUtil::check_compat_version_for_readonly_replica(
|
||||
exec_tenant_id, is_compatible_with_readonly_replica))) {
|
||||
LOG_WARN("fail to check tenant compat version with readonly replica",
|
||||
KR(tmp_ret), K(exec_tenant_id));
|
||||
} else if (is_compatible_with_readonly_replica
|
||||
&& OB_SUCCESS != (tmp_ret = sub_string.assign(", init_learner_list = '', b_init_learner_list = ''"))) {
|
||||
LOG_WARN("fail to construct substring for learner list", KR(tmp_ret));
|
||||
sub_string.reset();
|
||||
//Ignore the fact that data_version has been changed to 4.2,
|
||||
//but the local observer configuration item has not been refreshed.
|
||||
//If the leader_list is not cleaned up, there will be no logical problems.
|
||||
//It is just not very good-looking, and it can be cleaned up eventually.
|
||||
}
|
||||
|
||||
if (FAILEDx(trans.start(&client, exec_tenant_id))) {
|
||||
LOG_WARN("failed to start trans", KR(ret), K(exec_tenant_id), K(tenant_id));
|
||||
} else if (OB_FAIL(ObAllTenantInfoProxy::load_tenant_info(tenant_id, &trans, true, tenant_info))) {
|
||||
LOG_WARN("failed to load tenant info", KR(ret), K(tenant_id));
|
||||
@ -1611,13 +1629,13 @@ int ObLSStatusOperator::create_abort_ls_in_switch_tenant(
|
||||
|| status != tenant_info.get_switchover_status())) {
|
||||
ret = OB_NEED_RETRY;
|
||||
LOG_WARN("switchover may concurrency, need retry", KR(ret), K(switchover_epoch), K(status), K(tenant_info));
|
||||
} else if (OB_FAIL(sql.assign_fmt("UPDATE %s set status = '%s',init_member_list = '', b_init_member_list = ''"
|
||||
", init_learner_list = '', b_init_learner_list = ''"
|
||||
} else if (OB_FAIL(sql.assign_fmt("UPDATE %s set status = '%s',init_member_list = '', b_init_member_list = ''%.*s"
|
||||
" where tenant_id = %lu and status in ('%s', '%s')",
|
||||
OB_ALL_LS_STATUS_TNAME,
|
||||
ls_status_to_str(share::OB_LS_CREATE_ABORT),
|
||||
static_cast<int>(sub_string.length()), sub_string.ptr(),
|
||||
tenant_id, ls_status_to_str(OB_LS_CREATED), ls_status_to_str(OB_LS_CREATING)))) {
|
||||
LOG_WARN("failed to assign sql", KR(ret), K(tenant_id), K(sql));
|
||||
LOG_WARN("failed to assign sql", KR(ret), K(tenant_id), K(sql), K(sub_string));
|
||||
} else if (OB_FAIL(exec_write(tenant_id, sql, this, trans, true))) {
|
||||
LOG_WARN("failed to exec write", KR(ret), K(tenant_id), K(sql));
|
||||
}
|
||||
@ -1630,7 +1648,7 @@ int ObLSStatusOperator::create_abort_ls_in_switch_tenant(
|
||||
}
|
||||
}
|
||||
LOG_INFO("finish create abort ls", KR(ret), K(tenant_id), K(sql));
|
||||
|
||||
ALL_LS_EVENT_ADD(tenant_id, SYS_LS, "create abort ls for switchover", ret, sql);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@ -25,6 +25,7 @@
|
||||
#include "share/ls/ob_ls_operator.h" //ObLSAttr
|
||||
#include "share/schema/ob_multi_version_schema_service.h" // for GSCHEMASERVICE
|
||||
#include "share/ob_standby_upgrade.h" // ObStandbyUpgrade
|
||||
#include "share/ob_global_stat_proxy.h"//ObGlobalStatProxy
|
||||
#include "share/backup/ob_backup_config.h" // ObBackupConfigParserMgr
|
||||
#include "observer/ob_inner_sql_connection.h"//ObInnerSQLConnection
|
||||
#include "storage/tx/ob_trans_service.h" //ObTransService
|
||||
@ -482,13 +483,20 @@ int ObPrimaryStandbyService::switch_to_standby(
|
||||
if (OB_FAIL(ret)) {
|
||||
} else {
|
||||
ObTenantRoleTransitionService role_transition_service(tenant_id, sql_proxy_, GCTX.srv_rpc_proxy_, switch_optype);
|
||||
|
||||
uint64_t compat_version = 0;
|
||||
ObGlobalStatProxy global_proxy(*sql_proxy_, gen_meta_tenant_id(tenant_id));
|
||||
(void)role_transition_service.set_switchover_epoch(tenant_info.get_switchover_epoch());
|
||||
if (OB_FAIL(role_transition_service.do_switch_access_mode_to_raw_rw(tenant_info))) {
|
||||
LOG_WARN("failed to do_switch_access_mode", KR(ret), K(tenant_id), K(tenant_info));
|
||||
} else if (OB_FAIL(global_proxy.get_current_data_version(compat_version))) {
|
||||
LOG_WARN("failed to get current data version", KR(ret), K(tenant_id));
|
||||
} else if (compat_version < DATA_VERSION_4_2_0_0) {
|
||||
//Regardless of the data_version change and switchover concurrency scenario,
|
||||
//if there is concurrency, the member_list lock that has not been released by the operation and maintenance process
|
||||
} else if (OB_FAIL(ObMemberListLockUtils::unlock_member_list_when_switch_to_standby(tenant_id, *sql_proxy_))) {
|
||||
LOG_WARN("failed to unlock member list when switch to standby", K(ret), K(tenant_id));
|
||||
} else if (OB_FAIL(role_transition_service.switchover_update_tenant_status(tenant_id,
|
||||
}
|
||||
if (FAILEDx(role_transition_service.switchover_update_tenant_status(tenant_id,
|
||||
false /* switch_to_standby */,
|
||||
share::STANDBY_TENANT_ROLE,
|
||||
tenant_info.get_switchover_status(),
|
||||
|
||||
Reference in New Issue
Block a user