Files
oceanbase/src/rootserver/ob_server_zone_op_service.cpp
2023-05-24 06:41:30 +00:00

757 lines
31 KiB
C++

/**
* Copyright (c) 2022 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX RS
#include "ob_server_zone_op_service.h"
#include "share/ob_zone_table_operation.h"
#include "share/ob_service_epoch_proxy.h"
#include "share/ob_max_id_fetcher.h"
#include "lib/mysqlclient/ob_mysql_transaction.h" // ObMySQLTransaction
#include "lib/utility/ob_tracepoint.h" // ERRSIM
#include "rootserver/ob_root_service.h" // callback
#include "share/ob_all_server_tracer.h"
#include "rootserver/ob_server_manager.h"
namespace oceanbase
{
using namespace common;
using namespace share;
using namespace obrpc;
namespace rootserver
{
ObServerZoneOpService::ObServerZoneOpService()
: is_inited_(false),
server_change_callback_(NULL),
rpc_proxy_(NULL),
sql_proxy_(NULL),
lst_operator_(NULL),
unit_manager_(NULL)
{
}
ObServerZoneOpService::~ObServerZoneOpService()
{
}
int ObServerZoneOpService::init(
ObIServerChangeCallback &server_change_callback,
ObSrvRpcProxy &rpc_proxy,
ObLSTableOperator &lst_operator,
ObUnitManager &unit_manager,
ObMySQLProxy &sql_proxy
)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(is_inited_)) {
ret = OB_INIT_TWICE;
LOG_WARN("server zone operation service has been inited already", KR(ret), K(is_inited_));
} else if (OB_FAIL(st_operator_.init(&sql_proxy))) {
LOG_WARN("fail to init server table operator", KR(ret));
} else {
server_change_callback_ = &server_change_callback;
rpc_proxy_ = &rpc_proxy;
sql_proxy_ = &sql_proxy;
lst_operator_ = &lst_operator;
unit_manager_ = &unit_manager;
is_inited_ = true;
}
return ret;
}
int ObServerZoneOpService::add_servers(const ObIArray<ObAddr> &servers, const ObZone &zone, bool is_bootstrap)
{
int ret = OB_SUCCESS;
uint64_t sys_tenant_data_version = 0;
ObCheckServerForAddingServerArg rpc_arg;
ObCheckServerForAddingServerResult rpc_result;
ObZone picked_zone;
ObTimeoutCtx ctx;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_FAIL(GET_MIN_DATA_VERSION(OB_SYS_TENANT_ID, sys_tenant_data_version))) {
LOG_WARN("fail to get sys tenant's min data version", KR(ret));
} else if (OB_ISNULL(rpc_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("rpc_proxy_ is null", KR(ret), KP(rpc_proxy_));
} else if (OB_FAIL(rootserver::ObRootUtils::get_rs_default_timeout_ctx(ctx))) {
LOG_WARN("fail to get timeout ctx", KR(ret), K(ctx));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) {
const ObAddr &addr = servers.at(i);
int64_t timeout = ctx.get_timeout();
uint64_t server_id = OB_INVALID_ID;
const int64_t ERR_MSG_BUF_LEN = OB_MAX_SERVER_ADDR_SIZE + 100;
char non_empty_server_err_msg[ERR_MSG_BUF_LEN] = "";
int64_t pos = 0;
rpc_arg.reset();
if (OB_UNLIKELY(timeout <= 0)) {
ret = OB_TIMEOUT;
LOG_WARN("ctx time out", KR(ret), K(timeout));
} else if (OB_FAIL(databuff_printf(
non_empty_server_err_msg,
ERR_MSG_BUF_LEN,
pos,
"add non-empty server %s",
to_cstring(addr)))) {
LOG_WARN("fail to execute databuff_printf", KR(ret), K(addr));
} else if (OB_FAIL(fetch_new_server_id_(server_id))) {
// fetch a new server id and insert the server into __all_server table
LOG_WARN("fail to fetch new server id", KR(ret));
} else if (OB_UNLIKELY(!is_valid_server_id(server_id))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("server id is invalid", KR(ret), K(server_id));
} else if (OB_FAIL(rpc_arg.init(
ObCheckServerForAddingServerArg::ADD_SERVER,
sys_tenant_data_version,
server_id))) {
LOG_WARN("fail to init rpc arg", KR(ret), K(sys_tenant_data_version), K(server_id));
} else if (OB_FAIL(rpc_proxy_->to(addr)
.timeout(timeout)
.check_server_for_adding_server(rpc_arg, rpc_result))) {
LOG_WARN("fail to check whether the server is empty", KR(ret), K(addr));
} else if (!rpc_result.get_is_server_empty()) {
ret = OB_OP_NOT_ALLOW;
LOG_WARN("adding non-empty server is not allowed", KR(ret));
LOG_USER_ERROR(OB_OP_NOT_ALLOW, non_empty_server_err_msg);
} else if (OB_FAIL(zone_checking_for_adding_server_(zone, rpc_result.get_zone(), picked_zone))) {
LOG_WARN("zone checking for adding server is failed", KR(ret), K(zone), K(rpc_result.get_zone()));
} else if (OB_FAIL(add_server_(
addr,
server_id,
picked_zone,
rpc_result.get_sql_port(),
rpc_result.get_build_version()))) {
LOG_WARN("add_server failed", KR(ret), K(addr), K(server_id), K(picked_zone), "sql_port",
rpc_result.get_sql_port(), "build_version", rpc_result.get_build_version());
} else {}
}
}
return ret;
}
int ObServerZoneOpService::delete_servers(
const ObIArray<ObAddr> &servers,
const ObZone &zone)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(GCTX.root_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("root_service_ is null", KR(ret), KP(GCTX.root_service_));
} else if (OB_UNLIKELY(servers.count() <= 0)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(servers));
} else if (OB_FAIL(check_server_have_enough_resource_for_delete_server_(servers, zone))) {
LOG_WARN("not enough resource, cannot delete servers", KR(ret), K(servers), K(zone));
} else if (OB_FAIL(GCTX.root_service_->check_all_ls_has_leader("delete server"))) {
LOG_WARN("fail to check all ls has leader", KR(ret));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) {
if (OB_FAIL(delete_server_(servers.at(i), zone))) {
LOG_WARN("delete_server failed", "server", servers.at(i), "zone", zone, KR(ret));
}
}
}
return ret;
}
int ObServerZoneOpService::cancel_delete_servers(
const ObIArray<ObAddr> &servers,
const ObZone &zone)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(unit_manager_) || OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit_manager_ or sql_proxy_ or server_change_callback_ is null", KR(ret),
KP(unit_manager_), KP(sql_proxy_));
} else {
ObServerInfoInTable server_info_in_table;
for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) {
const ObAddr &server = servers.at(i);
const int64_t now = ObTimeUtility::current_time();
ObMySQLTransaction trans;
server_info_in_table.reset();
if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) {
LOG_WARN("fail to start trans", KR(ret));
} else if (OB_FAIL(check_and_end_delete_server_(trans, server, zone, true /* is_cancel */, server_info_in_table))) {
LOG_WARN("fail to check and end delete server", KR(ret), K(server), K(zone));
} else if (OB_FAIL(ObServerTableOperator::update_status(
trans,
server,
ObServerStatus::OB_SERVER_DELETING,
server_info_in_table.is_alive() ? ObServerStatus::OB_SERVER_ACTIVE : ObServerStatus::OB_SERVER_INACTIVE))) {
LOG_WARN("fail to update status in __all_server table", KR(ret),
K(server), K(server_info_in_table));
} else if (OB_FAIL(unit_manager_->cancel_migrate_out_units(server))) {
LOG_WARN("unit_manager_ cancel_migrate_out_units failed", KR(ret), K(server));
}
(void) end_trans_and_on_server_change_(ret, trans, "cancel_delete_server", server, server_info_in_table.get_zone(), now);
}
}
return ret;
}
int ObServerZoneOpService::finish_delete_server(
const ObAddr &server,
const ObZone &zone)
{
int ret = OB_SUCCESS;
ObServerInfoInTable server_info_in_table;
const int64_t now = ObTimeUtility::current_time();
ObMySQLTransaction trans;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql_proxy_ is null", KR(ret), KP(sql_proxy_));
} else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) {
LOG_WARN("fail to start trans", KR(ret));
} else if (OB_FAIL(check_and_end_delete_server_(trans, server, zone, false /* is_cancel */, server_info_in_table))) {
LOG_WARN("fail to check and end delete server", KR(ret), K(server), K(zone));
} else if (OB_FAIL(ObServerManager::try_delete_server_working_dir(
server_info_in_table.get_zone(),
server,
server_info_in_table.get_server_id()))) {
LOG_WARN("fail to delete server working dir", KR(ret), K(server_info_in_table));
} else if (OB_FAIL(st_operator_.remove(server, trans))) {
LOG_WARN("fail to remove this server from __all_server table", KR(ret), K(server));
}
(void) end_trans_and_on_server_change_(ret, trans, "finish_delete_server", server, server_info_in_table.get_zone(), now);
return ret;
}
int ObServerZoneOpService::stop_servers(
const ObIArray<ObAddr> &servers,
const ObZone &zone,
const obrpc::ObAdminServerArg::AdminServerOp &op)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_FAIL(stop_server_precheck(servers, op))) {
LOG_WARN("fail to precheck stop server", KR(ret), K(servers), K(zone));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); i++) {
const ObAddr &server = servers.at(i);
if (OB_FAIL(start_or_stop_server_(server, zone, op))) {
LOG_WARN("fail to stop server", KR(ret), K(server), K(zone));
}
}
}
return ret;
}
int ObServerZoneOpService::start_servers(
const ObIArray<ObAddr> &servers,
const ObZone &zone)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(servers.count() <= 0)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("servers' count is zero", KR(ret), K(servers));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) {
const ObAddr &server = servers.at(i);
if (OB_FAIL(start_or_stop_server_(server, zone, ObAdminServerArg::START))) {
LOG_WARN("fail to start server", KR(ret), K(server), K(zone));
}
}
}
return ret;
}
int ObServerZoneOpService::stop_server_precheck(
const ObIArray<ObAddr> &servers,
const obrpc::ObAdminServerArg::AdminServerOp &op)
{
int ret = OB_SUCCESS;
ObZone zone;
bool is_same_zone = false;
bool is_all_stopped = false;
ObArray<ObServerInfoInTable> all_servers_info_in_table;
ObServerInfoInTable server_info;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(servers.count() <= 0)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("servers' count is zero", KR(ret), K(servers));
} else if (OB_ISNULL(GCTX.root_service_) || OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("GCTX.root_service_ or sql_proxy_ is null", KR(ret), KP(GCTX.root_service_), KP(sql_proxy_));
} else if (OB_FAIL(ObServerTableOperator::get(*sql_proxy_, all_servers_info_in_table))) {
LOG_WARN("fail to read __all_server table", KR(ret), KP(sql_proxy_));
} else if (OB_FAIL(check_zone_and_server_(
all_servers_info_in_table,
servers,
is_same_zone,
is_all_stopped))) {
LOG_WARN("fail to check zone and server", KR(ret), K(all_servers_info_in_table), K(servers));
} else if (is_all_stopped) {
//nothing todo
} else if (!is_same_zone) {
ret = OB_STOP_SERVER_IN_MULTIPLE_ZONES;
LOG_WARN("can not stop servers in multiple zones", KR(ret), K(server_info), K(servers));
} else if (OB_FAIL((ObRootUtils::find_server_info(all_servers_info_in_table, servers.at(0), server_info)))) {
LOG_WARN("fail to find server info", KR(ret), K(all_servers_info_in_table), K(servers.at(0)));
} else {
const ObZone &zone = server_info.get_zone();
if (ObAdminServerArg::ISOLATE == op) {
//"Isolate server" does not need to check the total number and status of replicas; it cannot be restarted later;
if (OB_FAIL(GCTX.root_service_->check_can_stop(zone, servers, false /*is_stop_zone*/))) {
LOG_WARN("fail to check can stop", KR(ret), K(zone), K(servers), K(op));
if (OB_OP_NOT_ALLOW == ret) {
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "Stop all servers in primary region is");
}
}
} else {
if (ObRootUtils::have_other_stop_task(zone)) {
ret = OB_STOP_SERVER_IN_MULTIPLE_ZONES;
LOG_WARN("can not stop servers in multiple zones", KR(ret), K(zone), K(servers), K(op));
LOG_USER_ERROR(OB_STOP_SERVER_IN_MULTIPLE_ZONES,
"cannot stop server or stop zone in multiple zones");
} else if (OB_FAIL(GCTX.root_service_->check_majority_and_log_in_sync(
servers,
ObAdminServerArg::FORCE_STOP == op,/*skip_log_sync_check*/
"stop server"))) {
LOG_WARN("fail to check majority and log in-sync", KR(ret), K(zone), K(servers), K(op));
}
}
}
return ret;
}
int ObServerZoneOpService::zone_checking_for_adding_server_(
const ObZone &command_zone,
const ObZone &rpc_zone,
ObZone &picked_zone)
{
int ret = OB_SUCCESS;
// command_zone: the zone specified in the system command ADD SERVER
// rpc_zone: the zone specified in the server's local config and send to rs via rpc
// picked_zone: the zone we will use in add_server
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(rpc_zone.is_empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("rpc_zone cannot be empty. It implies that server's local config zone is empty.",
KR(ret), K(rpc_zone));
} else if (!command_zone.is_empty() && command_zone != rpc_zone) {
ret = OB_SERVER_ZONE_NOT_MATCH;
LOG_WARN("the zone specified in the server's local config is not the same as"
" the zone specified in the command", KR(ret), K(command_zone), K(rpc_zone));
} else if (OB_FAIL(picked_zone.assign(rpc_zone))) {
LOG_WARN("fail to assign picked_zone", KR(ret), K(rpc_zone));
} else {}
return ret;
}
int ObServerZoneOpService::add_server_(
const ObAddr &server,
const uint64_t server_id,
const ObZone &zone,
const int64_t sql_port,
const ObServerInfoInTable::ObBuildVersion &build_version)
{
int ret = OB_SUCCESS;
bool is_active = false;
const int64_t now = ObTimeUtility::current_time();
ObServerInfoInTable server_info_in_table;
ObMySQLTransaction trans;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(!server.is_valid()
|| !is_valid_server_id(server_id)
|| zone.is_empty()
|| sql_port <= 0
|| build_version.is_empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(server), K(server_id), K(zone), K(sql_port), K(build_version));
} else if (OB_ISNULL(sql_proxy_) || OB_ISNULL(server_change_callback_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql_proxy_ or server_change_callback_ is null", KR(ret),
KP(sql_proxy_), KP(server_change_callback_));
} else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) {
LOG_WARN("fail to start trans", KR(ret));
} else if (OB_FAIL(check_and_update_service_epoch_(trans))) {
LOG_WARN("fail to check and update service epoch", KR(ret));
} else if (OB_FAIL(ObZoneTableOperation::check_zone_active(trans, zone, is_active))){
// we do not need to lock the zone info in __all_zone table
// all server/zone operations are mutually exclusive since we locked the service epoch
LOG_WARN("fail to check whether the zone is active", KR(ret), K(zone));
} else if (OB_UNLIKELY(!is_active)) {
ret = OB_ZONE_NOT_ACTIVE;
LOG_WARN("the zone is not active", KR(ret), K(zone), K(is_active));
} else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info_in_table))) {
if (OB_SERVER_NOT_IN_WHITE_LIST == ret) {
ret = OB_SUCCESS;
} else {
LOG_WARN("fail to get server_info in table", KR(ret), K(server));
}
} else {
ret = OB_ENTRY_EXIST;
LOG_WARN("server exists", KR(ret), K(server_info_in_table));
}
if (FAILEDx(server_info_in_table.init(
server,
server_id,
zone,
sql_port,
false, /* with_rootserver */
ObServerStatus::OB_SERVER_ACTIVE,
build_version,
0, /* stop_time */
0, /* start_service_time */
0 /* last_offline_time */))) {
LOG_WARN("fail to init server info in table", KR(ret), K(server), K(server_id), K(zone),
K(sql_port), K(build_version), K(now));
} else if (OB_FAIL(ObServerTableOperator::insert(trans, server_info_in_table))) {
LOG_WARN("fail to insert server info into __all_server table", KR(ret), K(server_info_in_table));
}
(void) end_trans_and_on_server_change_(ret, trans, "add_server", server, zone, now);
return ret;
}
int ObServerZoneOpService::delete_server_(
const common::ObAddr &server,
const ObZone &zone)
{
int ret = OB_SUCCESS;
ObServerInfoInTable server_info_in_table;
const int64_t now = ObTimeUtility::current_time();
char ip[OB_MAX_SERVER_ADDR_SIZE] = "";
ObMySQLTransaction trans;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(!server.is_valid() || !server.ip_to_string(ip, sizeof(ip)))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(server));
} else if (OB_ISNULL(sql_proxy_) || OB_ISNULL(server_change_callback_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql_proxy_ or server_change_callback_ is null", KR(ret),
KP(sql_proxy_), KP(server_change_callback_));
} else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) {
LOG_WARN("fail to start trans", KR(ret));
} else if (OB_FAIL(check_and_update_service_epoch_(trans))) {
LOG_WARN("fail to check and update service epoch", KR(ret));
} else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info_in_table))) {
LOG_WARN("fail to get server_info in table", KR(ret), K(server));
} else if (!zone.is_empty() && zone != server_info_in_table.get_zone()) {
ret = OB_SERVER_ZONE_NOT_MATCH;
LOG_WARN("zone not matches", KR(ret), K(server), K(zone), K(server_info_in_table));
} else if (OB_UNLIKELY(server_info_in_table.is_deleting())) {
ret = OB_SERVER_ALREADY_DELETED;
LOG_WARN("the server has been deleted", KR(ret), K(server_info_in_table));
} else {
int64_t job_id = RS_JOB_CREATE(DELETE_SERVER, trans, "svr_ip", ip, "svr_port", server.get_port());
if (job_id < 1) {
ret = OB_SQL_OPT_ERROR;
LOG_WARN("insert into all_rootservice_job failed ", K(ret));
} else if (OB_FAIL(ObServerTableOperator::update_status(
trans,
server,
server_info_in_table.get_status(),
ObServerStatus::OB_SERVER_DELETING))) {
LOG_WARN("fail to update status", KR(ret), K(server), K(server_info_in_table));
}
}
(void) end_trans_and_on_server_change_(ret, trans, "delete_server", server, server_info_in_table.get_zone(), now);
return ret;
}
int ObServerZoneOpService::check_and_end_delete_server_(
common::ObMySQLTransaction &trans,
const common::ObAddr &server,
const ObZone &zone,
const bool is_cancel,
share::ObServerInfoInTable &server_info)
{
int ret = OB_SUCCESS;
server_info.reset();
char ip[OB_MAX_SERVER_ADDR_SIZE] = "";
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(!server.is_valid() || !server.ip_to_string(ip, sizeof(ip)))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(server));
} else if (OB_FAIL(check_and_update_service_epoch_(trans))) {
LOG_WARN("fail to check and update service epoch", KR(ret));
} else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info))) {
LOG_WARN("fail to get server_info in table", KR(ret), K(server));
} else if (!zone.is_empty() && zone != server_info.get_zone()) {
ret = OB_SERVER_ZONE_NOT_MATCH;
LOG_WARN("zone not matches", KR(ret), K(server), K(zone), K(server_info));
} else if (OB_UNLIKELY(!server_info.is_deleting())) {
ret = OB_SERVER_NOT_DELETING;
LOG_ERROR("server is not in deleting status, cannot be removed from __all_server table",
KR(ret), K(server_info));
} else {
ObRsJobInfo job_info;
ret = RS_JOB_FIND(job_info, trans, "job_type", "DELETE_SERVER",
"job_status", "INPROGRESS",
"svr_ip", ip, "svr_port", server.get_port());
if (OB_SUCC(ret) && job_info.job_id_ > 0) {
int tmp_ret = is_cancel ? OB_CANCELED : OB_SUCCESS;
if (OB_FAIL(RS_JOB_COMPLETE(job_info.job_id_, tmp_ret, trans))) {
LOG_WARN("fail to all_rootservice_job" , KR(ret), K(server));
}
} else {
LOG_WARN("failed to find job", KR(ret), K(server));
}
}
return ret;
}
int ObServerZoneOpService::start_or_stop_server_(
const common::ObAddr &server,
const ObZone &zone,
const obrpc::ObAdminServerArg::AdminServerOp &op)
{
int ret = OB_SUCCESS;
const int64_t now = ObTimeUtility::current_time();
ObServerInfoInTable server_info;
ObMySQLTransaction trans;
bool is_start = (ObAdminServerArg::START == op);
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(!server.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(server));
} else if (OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql_proxy_ is null", KR(ret), KP(sql_proxy_));
} else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) {
LOG_WARN("fail to start trans", KR(ret));
} else if (OB_FAIL(check_and_update_service_epoch_(trans))) {
LOG_WARN("fail to check and update service epoch", KR(ret));
} else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info))) {
LOG_WARN("fail to get server_info", KR(ret), K(server));
} else if (!zone.is_empty() && zone != server_info.get_zone()) {
ret = OB_SERVER_ZONE_NOT_MATCH;
LOG_WARN("zone not matches", KR(ret), K(server), K(zone), K(server_info));
} else if (ObAdminServerArg::STOP == op || ObAdminServerArg::FORCE_STOP == op) {
// check again, if there exists stopped servers in other zones
if (ObRootUtils::have_other_stop_task(server_info.get_zone())) {
ret = OB_STOP_SERVER_IN_MULTIPLE_ZONES;
LOG_WARN("can not stop servers in multiple zones", KR(ret), K(server_info.get_zone()));
LOG_USER_ERROR(OB_STOP_SERVER_IN_MULTIPLE_ZONES,
"cannot stop server or stop zone in multiple zones");
}
}
if (OB_SUCC(ret)) {
int64_t new_stop_time = is_start ? 0 : now;
int64_t old_stop_time = server_info.get_stop_time();
if ((is_start && 0 != old_stop_time) || (!is_start && 0 == old_stop_time)) {
if (OB_FAIL(ObServerTableOperator::update_stop_time(
trans,
server,
old_stop_time,
new_stop_time))) {
LOG_WARN("fail to update stop_time", KR(ret), K(server), K(old_stop_time), K(new_stop_time));
}
}
LOG_INFO("update stop time", KR(ret), K(server_info),
K(old_stop_time), K(new_stop_time), K(op), K(is_start));
}
const char *op_print_str = is_start ? "start_server" : "stop_server";
(void) end_trans_and_on_server_change_(ret, trans, op_print_str, server, server_info.get_zone(), now);
return ret;
}
int ObServerZoneOpService::construct_rs_list_arg(ObRsListArg &rs_list_arg)
{
int ret = OB_SUCCESS;
ObLSInfo ls_info;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(lst_operator_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("lst operator is null", KR(ret), KP(lst_operator_));
} else if (OB_FAIL(lst_operator_->get(
GCONF.cluster_id,
OB_SYS_TENANT_ID,
SYS_LS,
share::ObLSTable::DEFAULT_MODE,
ls_info))) {
LOG_WARN("fail to get ls info", KR(ret));
} else {
rs_list_arg.master_rs_ = GCONF.self_addr_;
FOREACH_CNT_X(replica, ls_info.get_replicas(), OB_SUCC(ret)) {
if (replica->get_server() == GCONF.self_addr_
|| (replica->is_in_service()
&& ObReplicaTypeCheck::is_paxos_replica_V2(replica->get_replica_type()))) {
if (OB_FAIL(rs_list_arg.rs_list_.push_back(replica->get_server()))) {
LOG_WARN("fail to push a server into rs list", KR(ret), K(replica->get_server()));
}
}
}
}
return ret;
}
int ObServerZoneOpService::check_and_update_service_epoch_(ObMySQLTransaction &trans)
{
int ret = OB_SUCCESS;
int64_t service_epoch_in_table = palf::INVALID_PROPOSAL_ID;
int64_t proposal_id = palf::INVALID_PROPOSAL_ID;
ObRole role;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_FAIL(ObRootUtils::get_proposal_id_from_sys_ls(proposal_id, role))) {
LOG_WARN("fail to get proposal id from sys ls", KR(ret));
} else if (ObRole::LEADER != role) {
ret = OB_NOT_MASTER;
LOG_WARN("not leader ls", KR(ret), K(proposal_id), K(service_epoch_in_table), K(role));
} else if (palf::INVALID_PROPOSAL_ID == proposal_id) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid proposal id", KR(ret), K(proposal_id));
} else if (OB_FAIL(ObServiceEpochProxy::check_and_update_service_epoch(
trans,
OB_SYS_TENANT_ID,
ObServiceEpochProxy::SERVER_ZONE_OP_SERVICE_EPOCH,
proposal_id))) {
LOG_WARN("fail to check and update server zone op service epoch", KR(ret), K(proposal_id));
} else {}
return ret;
}
int ObServerZoneOpService::fetch_new_server_id_(uint64_t &server_id)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid sql proxy", KR(ret), KP(sql_proxy_));
} else {
uint64_t new_max_id = OB_INVALID_ID;
ObMaxIdFetcher id_fetcher(*sql_proxy_);
if (OB_FAIL(id_fetcher.fetch_new_max_id(
OB_SYS_TENANT_ID,
OB_MAX_USED_SERVER_ID_TYPE,
new_max_id))) {
LOG_WARN("fetch_new_max_id failed", KR(ret));
} else {
server_id = new_max_id;
}
}
return ret;
}
int ObServerZoneOpService::check_server_have_enough_resource_for_delete_server_(
const ObIArray<ObAddr> &servers,
const ObZone &zone)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(unit_manager_) || OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit_manager_ or sql_proxy_ is null", KR(ret), KP(unit_manager_), KP(sql_proxy_));
} else {
ObServerInfoInTable server_info;
FOREACH_CNT_X(server, servers, OB_SUCC(ret)) {
server_info.reset();
if (OB_FAIL(ObServerTableOperator::get(*sql_proxy_, *server, server_info))) {
LOG_WARN("fail to get server_info in table", KR(ret), KP(sql_proxy_), KPC(server));
} else if (!zone.is_empty() && server_info.get_zone() != zone) {
ret = OB_SERVER_ZONE_NOT_MATCH;
LOG_WARN("the arg zone is not the same as the server's zone in __all_server table", KR(ret),
K(zone), K(server_info));
} else if (OB_FAIL(unit_manager_->check_enough_resource_for_delete_server(
*server, server_info.get_zone()))) {
LOG_WARN("fail to check enouch resource", KR(ret), KPC(server), K(server_info));
}
}//end for each
}
return ret;
}
int ObServerZoneOpService::check_zone_and_server_(
const ObIArray<share::ObServerInfoInTable> &servers_info,
const ObIArray<ObAddr> &servers,
bool &is_same_zone,
bool &is_all_stopped)
{
int ret = OB_SUCCESS;
is_same_zone = true;
is_all_stopped = true;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else {
ObServerInfoInTable server_info;
ObZone zone;
for (int64_t i = 0; i < servers.count() && OB_SUCC(ret) && (is_same_zone || is_all_stopped); i++) {
const ObAddr &server = servers.at(i);
server_info.reset();
if (OB_FAIL(ObRootUtils::find_server_info(servers_info, server, server_info))) {
LOG_WARN("fail to get server info", KR(ret), K(servers_info), K(server));
} else if (0 == i) {
if (OB_FAIL(zone.assign(server_info.get_zone()))) {
LOG_WARN("fail to assign zone", KR(ret), K(server_info.get_zone()));
}
} else if (zone != server_info.get_zone()) {
is_same_zone = false;
LOG_WARN("server zone not same", K(zone), K(server_info), K(servers));
}
if (OB_FAIL(ret)) {
} else if (!server_info.is_stopped()) {
is_all_stopped = false;
}
}
}
return ret;
}
ERRSIM_POINT_DEF(ALL_SERVER_LIST_ERROR);
void ObServerZoneOpService::end_trans_and_on_server_change_(
int &ret,
common::ObMySQLTransaction &trans,
const char *op_print_str,
const common::ObAddr &server,
const ObZone &zone,
const int64_t start_time)
{
int tmp_ret = OB_SUCCESS;
LOG_INFO("start execute end_trans_and_on_server_change_", KR(ret),
K(op_print_str), K(server), K(zone), K(start_time));
if (OB_UNLIKELY(!trans.is_started())) {
LOG_WARN("the transaction is not started");
} else {
if (OB_TMP_FAIL(trans.end(OB_SUCC(ret)))) {
LOG_WARN("fail to commit the transaction", KR(ret), KR(tmp_ret), K(server), K(zone));
ret = OB_SUCC(ret) ? tmp_ret : ret;
}
}
if (OB_TMP_FAIL(SVR_TRACER.refresh())) {
LOG_WARN("fail to refresh server tracer", KR(ret), KR(tmp_ret));
}
bool no_on_server_change = ALL_SERVER_LIST_ERROR ? true : false;
if (OB_ISNULL(server_change_callback_)) {
tmp_ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_change_callback_ is null", KR(ret), KR(tmp_ret), KP(server_change_callback_));
ret = OB_SUCC(ret) ? tmp_ret : ret;
} else if (no_on_server_change) {
} else if (OB_TMP_FAIL(server_change_callback_->on_server_change())) {
LOG_WARN("fail to callback on server change", KR(ret), KR(tmp_ret));
}
int64_t time_cost = ::oceanbase::common::ObTimeUtility::current_time() - start_time;
FLOG_INFO(op_print_str, K(server), K(zone), "time cost", time_cost, KR(ret));
ROOTSERVICE_EVENT_ADD("server", op_print_str, K(server), K(ret));
}
}
}