/** * Copyright (c) 2022 OceanBase * OceanBase CE is licensed under Mulan PubL v2. * You can use this software according to the terms and conditions of the Mulan PubL v2. * You may obtain a copy of Mulan PubL v2 at: * http://license.coscl.org.cn/MulanPubL-2.0 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ #define USING_LOG_PREFIX RS #include "ob_server_zone_op_service.h" #include "share/ob_zone_table_operation.h" #include "share/ob_service_epoch_proxy.h" #include "share/ob_max_id_fetcher.h" #include "lib/mysqlclient/ob_mysql_transaction.h" // ObMySQLTransaction #include "lib/utility/ob_tracepoint.h" // ERRSIM #include "rootserver/ob_root_service.h" // callback #include "share/ob_all_server_tracer.h" #include "rootserver/ob_server_manager.h" namespace oceanbase { using namespace common; using namespace share; using namespace obrpc; namespace rootserver { ObServerZoneOpService::ObServerZoneOpService() : is_inited_(false), server_change_callback_(NULL), rpc_proxy_(NULL), sql_proxy_(NULL), lst_operator_(NULL), unit_manager_(NULL) { } ObServerZoneOpService::~ObServerZoneOpService() { } int ObServerZoneOpService::init( ObIServerChangeCallback &server_change_callback, ObSrvRpcProxy &rpc_proxy, ObLSTableOperator &lst_operator, ObUnitManager &unit_manager, ObMySQLProxy &sql_proxy ) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("server zone operation service has been inited already", KR(ret), K(is_inited_)); } else if (OB_FAIL(st_operator_.init(&sql_proxy))) { LOG_WARN("fail to init server table operator", KR(ret)); } else { server_change_callback_ = &server_change_callback; rpc_proxy_ = &rpc_proxy; sql_proxy_ = &sql_proxy; lst_operator_ = &lst_operator; unit_manager_ = &unit_manager; is_inited_ = true; } return ret; } int ObServerZoneOpService::add_servers(const ObIArray &servers, const ObZone &zone, bool is_bootstrap) { int ret = OB_SUCCESS; uint64_t sys_tenant_data_version = 0; ObCheckServerForAddingServerArg rpc_arg; ObCheckServerForAddingServerResult rpc_result; ObZone picked_zone; ObTimeoutCtx ctx; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_FAIL(GET_MIN_DATA_VERSION(OB_SYS_TENANT_ID, sys_tenant_data_version))) { LOG_WARN("fail to get sys tenant's min data version", KR(ret)); } else if (OB_ISNULL(rpc_proxy_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("rpc_proxy_ is null", KR(ret), KP(rpc_proxy_)); } else if (OB_FAIL(rootserver::ObRootUtils::get_rs_default_timeout_ctx(ctx))) { LOG_WARN("fail to get timeout ctx", KR(ret), K(ctx)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) { const ObAddr &addr = servers.at(i); int64_t timeout = ctx.get_timeout(); uint64_t server_id = OB_INVALID_ID; const int64_t ERR_MSG_BUF_LEN = OB_MAX_SERVER_ADDR_SIZE + 100; char non_empty_server_err_msg[ERR_MSG_BUF_LEN] = ""; int64_t pos = 0; rpc_arg.reset(); if (OB_UNLIKELY(timeout <= 0)) { ret = OB_TIMEOUT; LOG_WARN("ctx time out", KR(ret), K(timeout)); } else if (OB_FAIL(databuff_printf( non_empty_server_err_msg, ERR_MSG_BUF_LEN, pos, "add non-empty server %s", to_cstring(addr)))) { LOG_WARN("fail to execute databuff_printf", KR(ret), K(addr)); } else if (OB_FAIL(fetch_new_server_id_(server_id))) { // fetch a new server id and insert the server into __all_server table LOG_WARN("fail to fetch new server id", KR(ret)); } else if (OB_UNLIKELY(!is_valid_server_id(server_id))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("server id is invalid", KR(ret), K(server_id)); } else if (OB_FAIL(rpc_arg.init( ObCheckServerForAddingServerArg::ADD_SERVER, sys_tenant_data_version, server_id))) { LOG_WARN("fail to init rpc arg", KR(ret), K(sys_tenant_data_version), K(server_id)); } else if (OB_FAIL(rpc_proxy_->to(addr) .timeout(timeout) .check_server_for_adding_server(rpc_arg, rpc_result))) { LOG_WARN("fail to check whether the server is empty", KR(ret), K(addr)); } else if (!rpc_result.get_is_server_empty()) { ret = OB_OP_NOT_ALLOW; LOG_WARN("adding non-empty server is not allowed", KR(ret)); LOG_USER_ERROR(OB_OP_NOT_ALLOW, non_empty_server_err_msg); } else if (OB_FAIL(zone_checking_for_adding_server_(zone, rpc_result.get_zone(), picked_zone))) { LOG_WARN("zone checking for adding server is failed", KR(ret), K(zone), K(rpc_result.get_zone())); } else if (OB_FAIL(add_server_( addr, server_id, picked_zone, rpc_result.get_sql_port(), rpc_result.get_build_version()))) { LOG_WARN("add_server failed", KR(ret), K(addr), K(server_id), K(picked_zone), "sql_port", rpc_result.get_sql_port(), "build_version", rpc_result.get_build_version()); } else {} } } return ret; } int ObServerZoneOpService::delete_servers( const ObIArray &servers, const ObZone &zone) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_ISNULL(GCTX.root_service_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("root_service_ is null", KR(ret), KP(GCTX.root_service_)); } else if (OB_UNLIKELY(servers.count() <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(servers)); } else if (OB_FAIL(check_server_have_enough_resource_for_delete_server_(servers, zone))) { LOG_WARN("not enough resource, cannot delete servers", KR(ret), K(servers), K(zone)); } else if (OB_FAIL(GCTX.root_service_->check_all_ls_has_leader("delete server"))) { LOG_WARN("fail to check all ls has leader", KR(ret)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) { if (OB_FAIL(delete_server_(servers.at(i), zone))) { LOG_WARN("delete_server failed", "server", servers.at(i), "zone", zone, KR(ret)); } } } return ret; } int ObServerZoneOpService::cancel_delete_servers( const ObIArray &servers, const ObZone &zone) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_ISNULL(unit_manager_) || OB_ISNULL(sql_proxy_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unit_manager_ or sql_proxy_ or server_change_callback_ is null", KR(ret), KP(unit_manager_), KP(sql_proxy_)); } else { ObServerInfoInTable server_info_in_table; for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) { const ObAddr &server = servers.at(i); const int64_t now = ObTimeUtility::current_time(); ObMySQLTransaction trans; server_info_in_table.reset(); if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) { LOG_WARN("fail to start trans", KR(ret)); } else if (OB_FAIL(check_and_end_delete_server_(trans, server, zone, true /* is_cancel */, server_info_in_table))) { LOG_WARN("fail to check and end delete server", KR(ret), K(server), K(zone)); } else if (OB_FAIL(ObServerTableOperator::update_status( trans, server, ObServerStatus::OB_SERVER_DELETING, server_info_in_table.is_alive() ? ObServerStatus::OB_SERVER_ACTIVE : ObServerStatus::OB_SERVER_INACTIVE))) { LOG_WARN("fail to update status in __all_server table", KR(ret), K(server), K(server_info_in_table)); } else if (OB_FAIL(unit_manager_->cancel_migrate_out_units(server))) { LOG_WARN("unit_manager_ cancel_migrate_out_units failed", KR(ret), K(server)); } (void) end_trans_and_on_server_change_(ret, trans, "cancel_delete_server", server, server_info_in_table.get_zone(), now); } } return ret; } int ObServerZoneOpService::finish_delete_server( const ObAddr &server, const ObZone &zone) { int ret = OB_SUCCESS; ObServerInfoInTable server_info_in_table; const int64_t now = ObTimeUtility::current_time(); ObMySQLTransaction trans; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_ISNULL(sql_proxy_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("sql_proxy_ is null", KR(ret), KP(sql_proxy_)); } else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) { LOG_WARN("fail to start trans", KR(ret)); } else if (OB_FAIL(check_and_end_delete_server_(trans, server, zone, false /* is_cancel */, server_info_in_table))) { LOG_WARN("fail to check and end delete server", KR(ret), K(server), K(zone)); } else if (OB_FAIL(ObServerManager::try_delete_server_working_dir( server_info_in_table.get_zone(), server, server_info_in_table.get_server_id()))) { LOG_WARN("fail to delete server working dir", KR(ret), K(server_info_in_table)); } else if (OB_FAIL(st_operator_.remove(server, trans))) { LOG_WARN("fail to remove this server from __all_server table", KR(ret), K(server)); } (void) end_trans_and_on_server_change_(ret, trans, "finish_delete_server", server, server_info_in_table.get_zone(), now); return ret; } int ObServerZoneOpService::stop_servers( const ObIArray &servers, const ObZone &zone, const obrpc::ObAdminServerArg::AdminServerOp &op) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_FAIL(stop_server_precheck(servers, op))) { LOG_WARN("fail to precheck stop server", KR(ret), K(servers), K(zone)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); i++) { const ObAddr &server = servers.at(i); if (OB_FAIL(start_or_stop_server_(server, zone, op))) { LOG_WARN("fail to stop server", KR(ret), K(server), K(zone)); } } } return ret; } int ObServerZoneOpService::start_servers( const ObIArray &servers, const ObZone &zone) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_UNLIKELY(servers.count() <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("servers' count is zero", KR(ret), K(servers)); } else { for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) { const ObAddr &server = servers.at(i); if (OB_FAIL(start_or_stop_server_(server, zone, ObAdminServerArg::START))) { LOG_WARN("fail to start server", KR(ret), K(server), K(zone)); } } } return ret; } int ObServerZoneOpService::stop_server_precheck( const ObIArray &servers, const obrpc::ObAdminServerArg::AdminServerOp &op) { int ret = OB_SUCCESS; ObZone zone; bool is_same_zone = false; bool is_all_stopped = false; ObArray all_servers_info_in_table; ObServerInfoInTable server_info; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_UNLIKELY(servers.count() <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("servers' count is zero", KR(ret), K(servers)); } else if (OB_ISNULL(GCTX.root_service_) || OB_ISNULL(sql_proxy_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("GCTX.root_service_ or sql_proxy_ is null", KR(ret), KP(GCTX.root_service_), KP(sql_proxy_)); } else if (OB_FAIL(ObServerTableOperator::get(*sql_proxy_, all_servers_info_in_table))) { LOG_WARN("fail to read __all_server table", KR(ret), KP(sql_proxy_)); } else if (OB_FAIL(check_zone_and_server_( all_servers_info_in_table, servers, is_same_zone, is_all_stopped))) { LOG_WARN("fail to check zone and server", KR(ret), K(all_servers_info_in_table), K(servers)); } else if (is_all_stopped) { //nothing todo } else if (!is_same_zone) { ret = OB_STOP_SERVER_IN_MULTIPLE_ZONES; LOG_WARN("can not stop servers in multiple zones", KR(ret), K(server_info), K(servers)); } else if (OB_FAIL((ObRootUtils::find_server_info(all_servers_info_in_table, servers.at(0), server_info)))) { LOG_WARN("fail to find server info", KR(ret), K(all_servers_info_in_table), K(servers.at(0))); } else { const ObZone &zone = server_info.get_zone(); if (ObAdminServerArg::ISOLATE == op) { //"Isolate server" does not need to check the total number and status of replicas; it cannot be restarted later; if (OB_FAIL(GCTX.root_service_->check_can_stop(zone, servers, false /*is_stop_zone*/))) { LOG_WARN("fail to check can stop", KR(ret), K(zone), K(servers), K(op)); if (OB_OP_NOT_ALLOW == ret) { LOG_USER_ERROR(OB_OP_NOT_ALLOW, "Stop all servers in primary region is"); } } } else { if (ObRootUtils::have_other_stop_task(zone)) { ret = OB_STOP_SERVER_IN_MULTIPLE_ZONES; LOG_WARN("can not stop servers in multiple zones", KR(ret), K(zone), K(servers), K(op)); LOG_USER_ERROR(OB_STOP_SERVER_IN_MULTIPLE_ZONES, "cannot stop server or stop zone in multiple zones"); } else if (OB_FAIL(GCTX.root_service_->check_majority_and_log_in_sync( servers, ObAdminServerArg::FORCE_STOP == op,/*skip_log_sync_check*/ "stop server"))) { LOG_WARN("fail to check majority and log in-sync", KR(ret), K(zone), K(servers), K(op)); } } } return ret; } int ObServerZoneOpService::zone_checking_for_adding_server_( const ObZone &command_zone, const ObZone &rpc_zone, ObZone &picked_zone) { int ret = OB_SUCCESS; // command_zone: the zone specified in the system command ADD SERVER // rpc_zone: the zone specified in the server's local config and send to rs via rpc // picked_zone: the zone we will use in add_server if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_UNLIKELY(rpc_zone.is_empty())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("rpc_zone cannot be empty. It implies that server's local config zone is empty.", KR(ret), K(rpc_zone)); } else if (!command_zone.is_empty() && command_zone != rpc_zone) { ret = OB_SERVER_ZONE_NOT_MATCH; LOG_WARN("the zone specified in the server's local config is not the same as" " the zone specified in the command", KR(ret), K(command_zone), K(rpc_zone)); } else if (OB_FAIL(picked_zone.assign(rpc_zone))) { LOG_WARN("fail to assign picked_zone", KR(ret), K(rpc_zone)); } else {} return ret; } int ObServerZoneOpService::add_server_( const ObAddr &server, const uint64_t server_id, const ObZone &zone, const int64_t sql_port, const ObServerInfoInTable::ObBuildVersion &build_version) { int ret = OB_SUCCESS; bool is_active = false; const int64_t now = ObTimeUtility::current_time(); ObServerInfoInTable server_info_in_table; ObMySQLTransaction trans; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_UNLIKELY(!server.is_valid() || !is_valid_server_id(server_id) || zone.is_empty() || sql_port <= 0 || build_version.is_empty())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(server), K(server_id), K(zone), K(sql_port), K(build_version)); } else if (OB_ISNULL(sql_proxy_) || OB_ISNULL(server_change_callback_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("sql_proxy_ or server_change_callback_ is null", KR(ret), KP(sql_proxy_), KP(server_change_callback_)); } else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) { LOG_WARN("fail to start trans", KR(ret)); } else if (OB_FAIL(check_and_update_service_epoch_(trans))) { LOG_WARN("fail to check and update service epoch", KR(ret)); } else if (OB_FAIL(ObZoneTableOperation::check_zone_active(trans, zone, is_active))){ // we do not need to lock the zone info in __all_zone table // all server/zone operations are mutually exclusive since we locked the service epoch LOG_WARN("fail to check whether the zone is active", KR(ret), K(zone)); } else if (OB_UNLIKELY(!is_active)) { ret = OB_ZONE_NOT_ACTIVE; LOG_WARN("the zone is not active", KR(ret), K(zone), K(is_active)); } else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info_in_table))) { if (OB_SERVER_NOT_IN_WHITE_LIST == ret) { ret = OB_SUCCESS; } else { LOG_WARN("fail to get server_info in table", KR(ret), K(server)); } } else { ret = OB_ENTRY_EXIST; LOG_WARN("server exists", KR(ret), K(server_info_in_table)); } if (FAILEDx(server_info_in_table.init( server, server_id, zone, sql_port, false, /* with_rootserver */ ObServerStatus::OB_SERVER_ACTIVE, build_version, 0, /* stop_time */ 0, /* start_service_time */ 0 /* last_offline_time */))) { LOG_WARN("fail to init server info in table", KR(ret), K(server), K(server_id), K(zone), K(sql_port), K(build_version), K(now)); } else if (OB_FAIL(ObServerTableOperator::insert(trans, server_info_in_table))) { LOG_WARN("fail to insert server info into __all_server table", KR(ret), K(server_info_in_table)); } (void) end_trans_and_on_server_change_(ret, trans, "add_server", server, zone, now); return ret; } int ObServerZoneOpService::delete_server_( const common::ObAddr &server, const ObZone &zone) { int ret = OB_SUCCESS; ObServerInfoInTable server_info_in_table; const int64_t now = ObTimeUtility::current_time(); char ip[OB_MAX_SERVER_ADDR_SIZE] = ""; ObMySQLTransaction trans; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_UNLIKELY(!server.is_valid() || !server.ip_to_string(ip, sizeof(ip)))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(server)); } else if (OB_ISNULL(sql_proxy_) || OB_ISNULL(server_change_callback_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("sql_proxy_ or server_change_callback_ is null", KR(ret), KP(sql_proxy_), KP(server_change_callback_)); } else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) { LOG_WARN("fail to start trans", KR(ret)); } else if (OB_FAIL(check_and_update_service_epoch_(trans))) { LOG_WARN("fail to check and update service epoch", KR(ret)); } else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info_in_table))) { LOG_WARN("fail to get server_info in table", KR(ret), K(server)); } else if (!zone.is_empty() && zone != server_info_in_table.get_zone()) { ret = OB_SERVER_ZONE_NOT_MATCH; LOG_WARN("zone not matches", KR(ret), K(server), K(zone), K(server_info_in_table)); } else if (OB_UNLIKELY(server_info_in_table.is_deleting())) { ret = OB_SERVER_ALREADY_DELETED; LOG_WARN("the server has been deleted", KR(ret), K(server_info_in_table)); } else { int64_t job_id = RS_JOB_CREATE(DELETE_SERVER, trans, "svr_ip", ip, "svr_port", server.get_port()); if (job_id < 1) { ret = OB_SQL_OPT_ERROR; LOG_WARN("insert into all_rootservice_job failed ", K(ret)); } else if (OB_FAIL(ObServerTableOperator::update_status( trans, server, server_info_in_table.get_status(), ObServerStatus::OB_SERVER_DELETING))) { LOG_WARN("fail to update status", KR(ret), K(server), K(server_info_in_table)); } } (void) end_trans_and_on_server_change_(ret, trans, "delete_server", server, server_info_in_table.get_zone(), now); return ret; } int ObServerZoneOpService::check_and_end_delete_server_( common::ObMySQLTransaction &trans, const common::ObAddr &server, const ObZone &zone, const bool is_cancel, share::ObServerInfoInTable &server_info) { int ret = OB_SUCCESS; server_info.reset(); char ip[OB_MAX_SERVER_ADDR_SIZE] = ""; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_UNLIKELY(!server.is_valid() || !server.ip_to_string(ip, sizeof(ip)))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(server)); } else if (OB_FAIL(check_and_update_service_epoch_(trans))) { LOG_WARN("fail to check and update service epoch", KR(ret)); } else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info))) { LOG_WARN("fail to get server_info in table", KR(ret), K(server)); } else if (!zone.is_empty() && zone != server_info.get_zone()) { ret = OB_SERVER_ZONE_NOT_MATCH; LOG_WARN("zone not matches", KR(ret), K(server), K(zone), K(server_info)); } else if (OB_UNLIKELY(!server_info.is_deleting())) { ret = OB_SERVER_NOT_DELETING; LOG_ERROR("server is not in deleting status, cannot be removed from __all_server table", KR(ret), K(server_info)); } else { ObRsJobInfo job_info; ret = RS_JOB_FIND(job_info, trans, "job_type", "DELETE_SERVER", "job_status", "INPROGRESS", "svr_ip", ip, "svr_port", server.get_port()); if (OB_SUCC(ret) && job_info.job_id_ > 0) { int tmp_ret = is_cancel ? OB_CANCELED : OB_SUCCESS; if (OB_FAIL(RS_JOB_COMPLETE(job_info.job_id_, tmp_ret, trans))) { LOG_WARN("fail to all_rootservice_job" , KR(ret), K(server)); } } else { LOG_WARN("failed to find job", KR(ret), K(server)); } } return ret; } int ObServerZoneOpService::start_or_stop_server_( const common::ObAddr &server, const ObZone &zone, const obrpc::ObAdminServerArg::AdminServerOp &op) { int ret = OB_SUCCESS; const int64_t now = ObTimeUtility::current_time(); ObServerInfoInTable server_info; ObMySQLTransaction trans; bool is_start = (ObAdminServerArg::START == op); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_UNLIKELY(!server.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(server)); } else if (OB_ISNULL(sql_proxy_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("sql_proxy_ is null", KR(ret), KP(sql_proxy_)); } else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) { LOG_WARN("fail to start trans", KR(ret)); } else if (OB_FAIL(check_and_update_service_epoch_(trans))) { LOG_WARN("fail to check and update service epoch", KR(ret)); } else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info))) { LOG_WARN("fail to get server_info", KR(ret), K(server)); } else if (!zone.is_empty() && zone != server_info.get_zone()) { ret = OB_SERVER_ZONE_NOT_MATCH; LOG_WARN("zone not matches", KR(ret), K(server), K(zone), K(server_info)); } else if (ObAdminServerArg::STOP == op || ObAdminServerArg::FORCE_STOP == op) { // check again, if there exists stopped servers in other zones if (ObRootUtils::have_other_stop_task(server_info.get_zone())) { ret = OB_STOP_SERVER_IN_MULTIPLE_ZONES; LOG_WARN("can not stop servers in multiple zones", KR(ret), K(server_info.get_zone())); LOG_USER_ERROR(OB_STOP_SERVER_IN_MULTIPLE_ZONES, "cannot stop server or stop zone in multiple zones"); } } if (OB_SUCC(ret)) { int64_t new_stop_time = is_start ? 0 : now; int64_t old_stop_time = server_info.get_stop_time(); if ((is_start && 0 != old_stop_time) || (!is_start && 0 == old_stop_time)) { if (OB_FAIL(ObServerTableOperator::update_stop_time( trans, server, old_stop_time, new_stop_time))) { LOG_WARN("fail to update stop_time", KR(ret), K(server), K(old_stop_time), K(new_stop_time)); } } LOG_INFO("update stop time", KR(ret), K(server_info), K(old_stop_time), K(new_stop_time), K(op), K(is_start)); } const char *op_print_str = is_start ? "start_server" : "stop_server"; (void) end_trans_and_on_server_change_(ret, trans, op_print_str, server, server_info.get_zone(), now); return ret; } int ObServerZoneOpService::construct_rs_list_arg(ObRsListArg &rs_list_arg) { int ret = OB_SUCCESS; ObLSInfo ls_info; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_ISNULL(lst_operator_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("lst operator is null", KR(ret), KP(lst_operator_)); } else if (OB_FAIL(lst_operator_->get( GCONF.cluster_id, OB_SYS_TENANT_ID, SYS_LS, share::ObLSTable::DEFAULT_MODE, ls_info))) { LOG_WARN("fail to get ls info", KR(ret)); } else { rs_list_arg.master_rs_ = GCONF.self_addr_; FOREACH_CNT_X(replica, ls_info.get_replicas(), OB_SUCC(ret)) { if (replica->get_server() == GCONF.self_addr_ || (replica->is_in_service() && ObReplicaTypeCheck::is_paxos_replica_V2(replica->get_replica_type()))) { if (OB_FAIL(rs_list_arg.rs_list_.push_back(replica->get_server()))) { LOG_WARN("fail to push a server into rs list", KR(ret), K(replica->get_server())); } } } } return ret; } int ObServerZoneOpService::check_and_update_service_epoch_(ObMySQLTransaction &trans) { int ret = OB_SUCCESS; int64_t service_epoch_in_table = palf::INVALID_PROPOSAL_ID; int64_t proposal_id = palf::INVALID_PROPOSAL_ID; ObRole role; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_FAIL(ObRootUtils::get_proposal_id_from_sys_ls(proposal_id, role))) { LOG_WARN("fail to get proposal id from sys ls", KR(ret)); } else if (ObRole::LEADER != role) { ret = OB_NOT_MASTER; LOG_WARN("not leader ls", KR(ret), K(proposal_id), K(service_epoch_in_table), K(role)); } else if (palf::INVALID_PROPOSAL_ID == proposal_id) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid proposal id", KR(ret), K(proposal_id)); } else if (OB_FAIL(ObServiceEpochProxy::check_and_update_service_epoch( trans, OB_SYS_TENANT_ID, ObServiceEpochProxy::SERVER_ZONE_OP_SERVICE_EPOCH, proposal_id))) { LOG_WARN("fail to check and update server zone op service epoch", KR(ret), K(proposal_id)); } else {} return ret; } int ObServerZoneOpService::fetch_new_server_id_(uint64_t &server_id) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_ISNULL(sql_proxy_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid sql proxy", KR(ret), KP(sql_proxy_)); } else { uint64_t new_max_id = OB_INVALID_ID; ObMaxIdFetcher id_fetcher(*sql_proxy_); if (OB_FAIL(id_fetcher.fetch_new_max_id( OB_SYS_TENANT_ID, OB_MAX_USED_SERVER_ID_TYPE, new_max_id))) { LOG_WARN("fetch_new_max_id failed", KR(ret)); } else { server_id = new_max_id; } } return ret; } int ObServerZoneOpService::check_server_have_enough_resource_for_delete_server_( const ObIArray &servers, const ObZone &zone) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else if (OB_ISNULL(unit_manager_) || OB_ISNULL(sql_proxy_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unit_manager_ or sql_proxy_ is null", KR(ret), KP(unit_manager_), KP(sql_proxy_)); } else { ObServerInfoInTable server_info; FOREACH_CNT_X(server, servers, OB_SUCC(ret)) { server_info.reset(); if (OB_FAIL(ObServerTableOperator::get(*sql_proxy_, *server, server_info))) { LOG_WARN("fail to get server_info in table", KR(ret), KP(sql_proxy_), KPC(server)); } else if (!zone.is_empty() && server_info.get_zone() != zone) { ret = OB_SERVER_ZONE_NOT_MATCH; LOG_WARN("the arg zone is not the same as the server's zone in __all_server table", KR(ret), K(zone), K(server_info)); } else if (OB_FAIL(unit_manager_->check_enough_resource_for_delete_server( *server, server_info.get_zone()))) { LOG_WARN("fail to check enouch resource", KR(ret), KPC(server), K(server_info)); } }//end for each } return ret; } int ObServerZoneOpService::check_zone_and_server_( const ObIArray &servers_info, const ObIArray &servers, bool &is_same_zone, bool &is_all_stopped) { int ret = OB_SUCCESS; is_same_zone = true; is_all_stopped = true; if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K(is_inited_)); } else { ObServerInfoInTable server_info; ObZone zone; for (int64_t i = 0; i < servers.count() && OB_SUCC(ret) && (is_same_zone || is_all_stopped); i++) { const ObAddr &server = servers.at(i); server_info.reset(); if (OB_FAIL(ObRootUtils::find_server_info(servers_info, server, server_info))) { LOG_WARN("fail to get server info", KR(ret), K(servers_info), K(server)); } else if (0 == i) { if (OB_FAIL(zone.assign(server_info.get_zone()))) { LOG_WARN("fail to assign zone", KR(ret), K(server_info.get_zone())); } } else if (zone != server_info.get_zone()) { is_same_zone = false; LOG_WARN("server zone not same", K(zone), K(server_info), K(servers)); } if (OB_FAIL(ret)) { } else if (!server_info.is_stopped()) { is_all_stopped = false; } } } return ret; } ERRSIM_POINT_DEF(ALL_SERVER_LIST_ERROR); void ObServerZoneOpService::end_trans_and_on_server_change_( int &ret, common::ObMySQLTransaction &trans, const char *op_print_str, const common::ObAddr &server, const ObZone &zone, const int64_t start_time) { int tmp_ret = OB_SUCCESS; LOG_INFO("start execute end_trans_and_on_server_change_", KR(ret), K(op_print_str), K(server), K(zone), K(start_time)); if (OB_UNLIKELY(!trans.is_started())) { LOG_WARN("the transaction is not started"); } else { if (OB_TMP_FAIL(trans.end(OB_SUCC(ret)))) { LOG_WARN("fail to commit the transaction", KR(ret), KR(tmp_ret), K(server), K(zone)); ret = OB_SUCC(ret) ? tmp_ret : ret; } } if (OB_TMP_FAIL(SVR_TRACER.refresh())) { LOG_WARN("fail to refresh server tracer", KR(ret), KR(tmp_ret)); } bool no_on_server_change = ALL_SERVER_LIST_ERROR ? true : false; if (OB_ISNULL(server_change_callback_)) { tmp_ret = OB_ERR_UNEXPECTED; LOG_WARN("server_change_callback_ is null", KR(ret), KR(tmp_ret), KP(server_change_callback_)); ret = OB_SUCC(ret) ? tmp_ret : ret; } else if (no_on_server_change) { } else if (OB_TMP_FAIL(server_change_callback_->on_server_change())) { LOG_WARN("fail to callback on server change", KR(ret), KR(tmp_ret)); } int64_t time_cost = ::oceanbase::common::ObTimeUtility::current_time() - start_time; FLOG_INFO(op_print_str, K(server), K(zone), "time cost", time_cost, KR(ret)); ROOTSERVICE_EVENT_ADD("server", op_print_str, K(server), K(ret)); } } }