[FEAT MERGE] server manager 微服务化改造

Co-authored-by: maosy <630014370@qq.com>
This commit is contained in:
obdev 2023-04-27 16:13:23 +08:00 committed by ob-robot
parent 57f1c6e7ee
commit 6b8425d8b0
138 changed files with 7492 additions and 3310 deletions

View File

@ -309,6 +309,8 @@ LATCH_DEF(SQL_WF_PARTICIPATOR_COND_LOCK, 296, "window function participator lock
LATCH_DEF(ARB_SERVER_CONFIG_LOCK, 297, "arbserver config lock", LATCH_FIFO, 2000, 0, ARB_SERVER_CONFIG_WAIT, "arbserver config lock")
LATCH_DEF(CDC_SERVICE_LS_CTX_LOCK, 298, "cdcservice clientlsctx lock", LATCH_FIFO, 2000, 0, CDC_SERVICE_LS_CTX_LOCK_WAIT, "cdcservice clientlsctx lock")
LATCH_DEF(MAJOR_FREEZE_DIAGNOSE_LOCK, 299, "major freeze diagnose lock", LATCH_READ_PREFER, 2000, 0, MAJOR_FREEZE_DIAGNOSE_LOCK_WAIT, "major freeze diagnose lock")
LATCH_DEF(HB_RESPONSES_LOCK, 300, "hb responses lock", LATCH_READ_PREFER, 2000, 0, HB_RESPONSES_LOCK_WAIT, "hb responses lock")
LATCH_DEF(ALL_SERVERS_INFO_IN_TABLE_LOCK, 301, "all servers info in table lock", LATCH_READ_PREFER, 2000, 0, ALL_SERVERS_INFO_IN_TABLE_LOCK_WAIT, "all servers info in table lock")
LATCH_DEF(LATCH_END, 99999, "latch end", LATCH_FIFO, 2000, 0, WAIT_EVENT_END, "latch end")
#endif

View File

@ -300,6 +300,8 @@ WAIT_EVENT_DEF(TENANT_IO_CONFIG_WAIT, 15254, "rwlock: tenant io config wait", "a
WAIT_EVENT_DEF(SQL_WF_PARTICIPATOR_LOCK_WAIT, 15255, "latch: window function participator cond lock wait", "address", "", "", CONCURRENCY, "window function participator cond lock wait", true)
WAIT_EVENT_DEF(SQL_WF_PARTICIPATOR_COND_WAIT, 15256, "mutex: window function participator cond wait", "address", "", "", CONCURRENCY, "window function participator cond wait", true)
WAIT_EVENT_DEF(MAJOR_FREEZE_DIAGNOSE_LOCK_WAIT, 15257, "latch: major_freeze diagnose lock wait", "address", "number", "tries", CONCURRENCY, "latch: major_freeze diagnose lock wait", true)
WAIT_EVENT_DEF(HB_RESPONSES_LOCK_WAIT, 15258, "latch: hb responses lock wait", "address", "number", "tries", CONCURRENCY, "latch: hb responses lock wait", true)
WAIT_EVENT_DEF(ALL_SERVERS_INFO_IN_TABLE_LOCK_WAIT, 15259, "latch: all servers info in table lock wait", "address", "number", "tries", CONCURRENCY, "latch: all servers info in table lock wait", true)
//transaction
WAIT_EVENT_DEF(END_TRANS_WAIT, 16001, "wait end trans", "rollback", "trans_hash_value", "participant_count", COMMIT,"wait end trans", false)

View File

@ -315,3 +315,9 @@ The read and write operation on configs in gc handler should be mutually exclusi
## latch: cdcservice clientlsctx lock wait
The read and write operation on source(RemoteLogParent) in ClientLSCtx should be mutually exclusive.
## latch: hb respnses lock wait
The read and write operation on hb_responses_ should be mutually exclusive.
## latch: all servers info in table lock wait
The read and write operation on all_servers_info_in_table_ should be mutually exclusive.

View File

@ -53,7 +53,7 @@ PCODE_DEF(OB_START_ZONE, 0x110)
PCODE_DEF(OB_STOP_ZONE, 0x111)
PCODE_DEF(OB_TENANT_MGR, 0x113)
PCODE_DEF(OB_MERGE_FINISH, 0x114)
// PCODE_DEF(OB_MERGE_FINISH, 0x114)
//PCODE_DEF(OB_MERGE_ERROR, 0x115) // 4.0 not supported
PCODE_DEF(OB_START_SERVER, 0x116)
PCODE_DEF(OB_STOP_SERVER, 0x117)
@ -85,7 +85,7 @@ PCODE_DEF(OB_CHECK_DEPLOYMENT_MODE, 0x143)
//PCODE_DEF(OB_GET_CLUSTER_STATS, 0x146)// 4.0 not supported
PCODE_DEF(OB_WAIT_MASTER_KEY_IN_SYNC, 0x147)
PCODE_DEF(OB_GET_REGION_BW, 0x148)
PCODE_DEF(OB_FETCH_ACTIVE_SERVER_STATUS, 0x149)
// PCODE_DEF(OB_FETCH_ACTIVE_SERVER_STATUS, 0x149)
PCODE_DEF(OB_DETECT_MASTER_RS_LS, 0x14A)
//PCODE_DEF(OB_DETECT_MASTER_RS_LOG_STREAM, 0x14B) // for rpc_log_stream_table's get
PCODE_DEF(OB_BATCH_BROADCAST_SCHEMA, 0x14C)
@ -185,7 +185,7 @@ PCODE_DEF(OB_DO_SEQUENCE_DDL, 0x245)
PCODE_DEF(OB_CREATE_TENANT_END, 0x248)
//PCODE_DEF(OB_REACH_PARTITION_LIMIT, 0x24A)
//PCODE_DEF(OB_ALTER_CLUSTER_INFO, 0x24B)// 4.0 not supported
PCODE_DEF(OB_CHECK_MERGE_FINISH, 0x24C)
//PCODE_DEF(OB_CHECK_MERGE_FINISH, 0x24C)
//PCODE_DEF(OB_CHECK_CLUSTER_VALID_TO_ADD, 0x24D)// 4.0 not supported
PCODE_DEF(OB_FLASHBACK_TABLE_TO_SCN, 0x24E)
//PCODE_DEF(OB_GET_STANDBY_CLUSTER_STATISTIC, 0x24F)// 4.0 not supported

View File

@ -46,4 +46,5 @@ ob_unittest_observer(test_fast_commit_report fast_commit_report.cpp)
ob_unittest_observer(test_mvcc_gc test_mvcc_gc.cpp)
ob_unittest_observer(test_ob_simple_rto test_ob_simple_rto.cpp)
ob_unittest_observer(test_all_virtual_proxy_partition_info_default_value test_all_virtual_proxy_partition_info_default_value.cpp)
ob_unittest_observer(test_get_stopped_zone_list test_get_stopped_zone_list.cpp)
ob_unittest_observer(test_lock_table_with_tx test_lock_table_with_tx.cpp)

View File

@ -0,0 +1,102 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SHARE
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include "lib/string/ob_sql_string.h" // ObSqlString
#include "lib/mysqlclient/ob_mysql_proxy.h" // ObISqlClient, SMART_VAR
#include "observer/ob_sql_client_decorator.h" // ObSQLClientRetryWeak
#include "env/ob_simple_cluster_test_base.h"
#include "lib/ob_errno.h"
#include "lib/oblog/ob_log.h"
#include "rootserver/ob_root_utils.h"
#include "share/ob_server_table_operator.h"
#include "share/ob_zone_table_operation.h"
#define SQL_PROXY (get_curr_simple_server().get_observer().get_mysql_proxy())
namespace oceanbase
{
using namespace unittest;
namespace share
{
using ::testing::_;
using ::testing::Invoke;
using ::testing::Return;
using namespace schema;
using namespace common;
class TestGetStoppedZoneList : public unittest::ObSimpleClusterTestBase
{
public:
TestGetStoppedZoneList() : unittest::ObSimpleClusterTestBase("test_get_stopped_zone_list") {}
};
TEST_F(TestGetStoppedZoneList, GetStoppedZoneList)
{
// empty zone z3 is stopped
// server2 in z2 is stopped
// stopped_zone_list should be z2, z3, stopped_server_list should be server2
// have_other_stop_task is also tested
ObServerInfoInTable server_info_in_table;
ObAddr server2;
ObServerTableOperator st_operator;
int64_t affected_rows = 0;
ObZone z2("z2");
ObZone z3("z3");
ObSqlString sql;
ASSERT_EQ(OB_SUCCESS, st_operator.init(&SQL_PROXY));
ASSERT_TRUE(server2.set_ip_addr("127.0.0.1", 11111));
ASSERT_FALSE(rootserver::ObRootUtils::have_other_stop_task(GCONF.zone.str()));
ASSERT_EQ(OB_SUCCESS, sql.assign_fmt("alter system add zone z2"));
ASSERT_EQ(OB_SUCCESS, SQL_PROXY.write(OB_SYS_TENANT_ID, sql.ptr(), affected_rows));
sql.reset();
ASSERT_TRUE(rootserver::ObRootUtils::have_other_stop_task(GCONF.zone.str()));
ASSERT_EQ(OB_SUCCESS, sql.assign_fmt("alter system start zone z2"));
ASSERT_EQ(OB_SUCCESS, SQL_PROXY.write(OB_SYS_TENANT_ID, sql.ptr(), affected_rows));
sql.reset();
ASSERT_FALSE(rootserver::ObRootUtils::have_other_stop_task(GCONF.zone.str()));
int ret = server_info_in_table.init(server2, 2, "z2", 15432, false, ObServerStatus::OB_SERVER_ACTIVE, "test_version", 5558888, 55555, 0);
ASSERT_EQ(OB_SUCCESS, ret);
ret = st_operator.insert(SQL_PROXY, server_info_in_table);
ASSERT_EQ(OB_SUCCESS, ret);
ASSERT_TRUE(rootserver::ObRootUtils::have_other_stop_task(GCONF.zone.str()));
ASSERT_EQ(OB_SUCCESS, sql.assign_fmt("alter system add zone z3"));
ASSERT_EQ(OB_SUCCESS, SQL_PROXY.write(OB_SYS_TENANT_ID, sql.ptr(), affected_rows));
ObArray<ObZone> active_zone_list;
ObArray<ObZone> inactive_zone_list;
ASSERT_EQ(OB_SUCCESS, ObZoneTableOperation::get_active_zone_list(SQL_PROXY, active_zone_list));
ASSERT_EQ(OB_SUCCESS, ObZoneTableOperation::get_inactive_zone_list(SQL_PROXY, inactive_zone_list));
ASSERT_EQ(z3, inactive_zone_list.at(0));
ASSERT_EQ(2, active_zone_list.count());
ObArray<ObZone> stopped_zone_list;
ObArray<ObAddr> stopped_server_list;
ret = rootserver::ObRootUtils::get_stopped_zone_list(stopped_zone_list, stopped_server_list);
ASSERT_EQ(OB_SUCCESS, ret);
ASSERT_EQ(1, stopped_server_list.count());
ASSERT_EQ(server2, stopped_server_list.at(0));
ASSERT_EQ(2, stopped_zone_list.count());
ASSERT_TRUE(has_exist_in_array(stopped_zone_list, z2));
ASSERT_TRUE(has_exist_in_array(stopped_zone_list, z3));
}
} // share
} // oceanbase
int main(int argc, char **argv)
{
init_log_and_gtest(argc, argv);
OB_LOGGER.set_log_level("INFO");
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -75,6 +75,8 @@ enum ObLogBaseType
// for arbitration service
ARBITRATION_SERVICE_LOG_BASE_TYPE = 21,
HEARTBEAT_SERVICE_LOG_BASE_TYPE = 22,
// pay attention!!!
// add log type in log_base_type_to_string
// max value
@ -133,6 +135,8 @@ int log_base_type_to_string(const ObLogBaseType log_type,
strncpy(str ,"DATA_DICTIONARY_SERVICE", str_len);
} else if (log_type == ARBITRATION_SERVICE_LOG_BASE_TYPE) {
strncpy(str ,"ARBITRATION_SERVICE", str_len);
} else if (log_type == HEARTBEAT_SERVICE_LOG_BASE_TYPE) {
strncpy(str ,"HEARTBEAT_SERVICE", str_len);
} else {
ret = OB_INVALID_ARGUMENT;
}

View File

@ -6,6 +6,7 @@ ob_set_subtarget(ob_server ALONE
ob_set_subtarget(ob_server common
ob_dump_task_generator.cpp
ob_heartbeat.cpp
ob_heartbeat_handler.cpp
ob_inner_sql_rpc_proxy.cpp
ob_inner_sql_rpc_processor.cpp
ob_inner_sql_connection.cpp

View File

@ -25,6 +25,7 @@
#include "lib/profile/ob_trace_id.h"
#include "share/partition_table/ob_partition_location.h"
#include "share/ob_all_server_tracer.h"
#include "observer/ob_server_struct.h"
namespace oceanbase
@ -235,8 +236,7 @@ ObDBMSJobMaster &ObDBMSJobMaster::get_instance()
return master_;
}
int ObDBMSJobMaster::init(ObServerManager *server_mgr,
ObISQLClient *sql_client,
int ObDBMSJobMaster::init(ObISQLClient *sql_client,
ObMultiVersionSchemaService *schema_service)
{
int ret = OB_SUCCESS;
@ -247,16 +247,12 @@ int ObDBMSJobMaster::init(ObServerManager *server_mgr,
if (inited_) {
ret = OB_INIT_TWICE;
LOG_WARN("dbms job master already inited", K(ret), K(inited_));
} else if (OB_ISNULL(server_mgr)
|| OB_ISNULL(sql_client)
} else if (OB_ISNULL(sql_client)
|| OB_ISNULL(schema_service)
|| OB_ISNULL(GCTX.dbms_job_rpc_proxy_)
) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("null ptr", K(ret), K(server_mgr), K(sql_client), K(schema_service));
} else if (!server_mgr->is_inited()) {
ret = OB_NOT_INIT;
LOG_WARN("server manager not init yet", K(ret));
LOG_WARN("null ptr", K(ret), K(sql_client), K(schema_service));
} else if (OB_FAIL(ready_queue_.init(ready_queue_size))) {
LOG_WARN("fail to init ready job queue for all jobs", K(ret));
} else if (OB_FAIL(scheduler_task_.init(&ready_queue_))) {
@ -273,7 +269,6 @@ int ObDBMSJobMaster::init(ObServerManager *server_mgr,
} else {
trace_id_ = ObCurTraceId::get();
self_addr_ = GCONF.self_addr_;
server_mgr_ = server_mgr;
schema_service_ = schema_service;
job_rpc_proxy_ = GCTX.dbms_job_rpc_proxy_;
inited_ = true;
@ -481,9 +476,6 @@ int ObDBMSJobMaster::get_all_servers(int64_t tenant_id, ObString &pick_zone, ObI
} else if (OB_INVALID_ID == tenant_id) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid job id", K(ret), K(tenant_id));
} else if (!server_mgr_->is_inited()) {
ret = OB_NOT_INIT;
LOG_WARN("server manager not init yet!", K(ret));
} else if (OB_FAIL(schema_service_->get_tenant_schema_guard(OB_SYS_TENANT_ID, schema_guard))) {
LOG_WARN("fail get schema guard", K(ret));
} else if (OB_FAIL(schema_guard.get_tenant_info(tenant_id, tenant_info))) {
@ -500,8 +492,8 @@ int ObDBMSJobMaster::get_all_servers(int64_t tenant_id, ObString &pick_zone, ObI
if (pick_zone.empty()
|| 0 == pick_zone.case_compare(dbms_job::ObDBMSJobInfo::__ALL_SERVER_BC)
|| 0 == pick_zone.case_compare(zone.str())) {
if (OB_FAIL(server_mgr_->get_alive_servers(zone, server_list))) {
LOG_WARN("fail to get zone server list", K(ret));
if (OB_FAIL(SVR_TRACER.get_alive_servers(zone, server_list))) {
LOG_WARN("fail to get zone server list", KR(ret), K(zone));
} else {
for (int64_t j = 0; OB_SUCC(ret) && j < server_list.count(); j++) {
if (common::is_contain(servers, server_list.at(j))) {
@ -537,12 +529,16 @@ int ObDBMSJobMaster::server_random_pick(int64_t tenant_id, ObString &pick_zone,
while (OB_SUCC(ret) && cnt < total_server.count()) {
pos = (pos + 1) % total_server.count();
pick = total_server.at(pos);
server_mgr_->check_server_alive(pick, is_alive);
server_mgr_->check_server_active(pick, is_active);
if (is_alive && is_active) {
break;
if (OB_FAIL(SVR_TRACER.check_server_alive(pick, is_alive))) {
LOG_WARN("fail to check server alive", KR(ret), K(pick));
} else if (OB_FAIL(SVR_TRACER.check_server_active(pick, is_active))) {
LOG_WARN("fail to check server active", KR(ret), K(pick));
} else {
if (is_alive && is_active) {
break;
}
cnt++;
}
cnt++;
}
if (OB_FAIL(ret)) {
} else if (cnt >= total_server.count()) {

View File

@ -29,7 +29,6 @@
#include "share/schema/ob_schema_service.h"
#include "share/schema/ob_multi_version_schema_service.h"
#include "rootserver/ob_server_manager.h"
#include "rootserver/ob_ddl_service.h"
@ -159,7 +158,6 @@ public:
running_(false),
trace_id_(NULL),
rand_(),
server_mgr_(NULL),
schema_service_(NULL),
job_rpc_proxy_(NULL),
self_addr_(),
@ -172,8 +170,7 @@ public:
bool is_inited() { return inited_; }
int init(rootserver::ObServerManager *server_mgr,
common::ObISQLClient *sql_client,
int init(common::ObISQLClient *sql_client,
share::schema::ObMultiVersionSchemaService *schema_service);
int start();
@ -211,7 +208,6 @@ private:
const uint64_t *trace_id_;
common::ObRandom rand_; // for random pick server
rootserver::ObServerManager *server_mgr_;
share::schema::ObMultiVersionSchemaService *schema_service_; // for got all tenant info
obrpc::ObDBMSJobRpcProxy *job_rpc_proxy_;

View File

@ -25,6 +25,7 @@
#include "lib/profile/ob_trace_id.h"
#include "share/partition_table/ob_partition_location.h"
#include "share/ob_all_server_tracer.h"
#include "observer/ob_server_struct.h"
#include "rootserver/ob_root_service.h"
@ -242,8 +243,7 @@ ObDBMSSchedJobMaster &ObDBMSSchedJobMaster::get_instance()
return master_;
}
int ObDBMSSchedJobMaster::init(ObServerManager *server_mgr,
ObUnitManager *unit_mgr,
int ObDBMSSchedJobMaster::init(ObUnitManager *unit_mgr,
ObISQLClient *sql_client,
ObMultiVersionSchemaService *schema_service)
{
@ -251,17 +251,13 @@ int ObDBMSSchedJobMaster::init(ObServerManager *server_mgr,
if (inited_) {
ret = OB_INIT_TWICE;
LOG_WARN("dbms sched job master already inited", K(ret), K(inited_));
} else if (OB_ISNULL(server_mgr)
|| OB_ISNULL(unit_mgr)
} else if (OB_ISNULL(unit_mgr)
|| OB_ISNULL(sql_client)
|| OB_ISNULL(schema_service)
|| OB_ISNULL(GCTX.dbms_sched_job_rpc_proxy_)
) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("null ptr", K(ret), K(server_mgr), K(unit_mgr), K(sql_client), K(schema_service));
} else if (!server_mgr->is_inited()) {
ret = OB_NOT_INIT;
LOG_WARN("server manager not init yet", K(ret));
LOG_WARN("null ptr", K(ret), K(unit_mgr), K(sql_client), K(schema_service));
} else if (OB_FAIL(ready_queue_.init(MAX_READY_JOBS_CAPACITY))) {
LOG_WARN("fail to init ready job queue for all jobs", K(ret));
} else if (OB_FAIL(scheduler_task_.init())) {
@ -278,7 +274,6 @@ int ObDBMSSchedJobMaster::init(ObServerManager *server_mgr,
} else {
trace_id_ = ObCurTraceId::get();
self_addr_ = GCONF.self_addr_;
server_mgr_ = server_mgr;
unit_mgr_ = unit_mgr;
schema_service_ = schema_service;
job_rpc_proxy_ = GCTX.dbms_sched_job_rpc_proxy_;
@ -486,16 +481,16 @@ int ObDBMSSchedJobMaster::server_random_pick(int64_t tenant_id, ObString &pick_z
} else if (OB_INVALID_ID == tenant_id) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid job id", K(ret), K(tenant_id));
} else if (!server_mgr_->is_inited()) {
ret = OB_NOT_INIT;
LOG_WARN("server manager not init yet!", K(ret));
} else if (OB_ISNULL(schema_service_) || OB_ISNULL(unit_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("schema_service_ or unit_mgr_ is null", KR(ret), KP(schema_service_), KP(unit_mgr_));
} else if (OB_FAIL(schema_service_->get_tenant_schema_guard(OB_SYS_TENANT_ID, schema_guard))) {
LOG_WARN("fail get schema guard", K(ret));
} else if (OB_FAIL(schema_guard.get_tenant_info(tenant_id, tenant_info))) {
LOG_WARN("fail to get tenant info", K(ret), K(tenant_id));
} else if (OB_ISNULL(tenant_info)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("null ptr", K(ret), K(tenant_info));
LOG_WARN("null ptr", K(ret), KP(tenant_info));
} else if (OB_FAIL(tenant_info->get_zone_list(zone_list))) {
LOG_WARN("fail to get zone list", K(ret));
} else {
@ -503,8 +498,8 @@ int ObDBMSSchedJobMaster::server_random_pick(int64_t tenant_id, ObString &pick_z
common::ObZone zone = zone_list.at(i);
common::ObArray<ObAddr> server_list;
if (pick_zone.empty() || 0 == pick_zone.case_compare(zone.str())) {
if (OB_FAIL(server_mgr_->get_alive_servers(zone, server_list))) {
LOG_WARN("fail to get zone server list", K(ret));
if (OB_FAIL(SVR_TRACER.get_alive_servers(zone, server_list))) {
LOG_WARN("fail to get zone server list", KR(ret), K(zone));
} else {
for (int64_t j = 0; OB_SUCC(ret) && j < server_list.count(); j++) {
if (OB_FAIL(total_server.push_back(server_list.at(j)))) {
@ -529,13 +524,18 @@ int ObDBMSSchedJobMaster::server_random_pick(int64_t tenant_id, ObString &pick_z
do {
pos = (pos + 1) % total_server.count();
pick = total_server.at(pos);
server_mgr_->check_server_alive(pick, is_alive);
server_mgr_->check_server_active(pick, is_active);
unit_mgr_->check_tenant_on_server(tenant_id, pick, on_server);
if (is_alive && is_active && on_server) {
break;
if (OB_FAIL(SVR_TRACER.check_server_alive(pick, is_alive))) {
LOG_WARN("fail to check server alive", KR(ret), K(pick));
} else if (OB_FAIL(SVR_TRACER.check_server_active(pick, is_active))) {
LOG_WARN("fail to check server active", KR(ret), K(pick));
} else if (OB_FAIL(unit_mgr_->check_tenant_on_server(tenant_id, pick, on_server))) {
LOG_WARN("fail to check tenant on server", KR(ret), K(tenant_id), K(pick));
} else {
if (is_alive && is_active && on_server) {
break;
}
cnt++;
}
cnt++;
} while (cnt < total_server.count());
if (cnt >= total_server.count()) {
ret = OB_ERR_UNEXPECTED;

View File

@ -30,7 +30,6 @@
#include "share/schema/ob_schema_service.h"
#include "share/schema/ob_multi_version_schema_service.h"
#include "rootserver/ob_server_manager.h"
#include "rootserver/ob_ddl_service.h"
@ -168,7 +167,6 @@ public:
running_(false),
trace_id_(NULL),
rand_(),
server_mgr_(NULL),
schema_service_(NULL),
job_rpc_proxy_(NULL),
self_addr_(),
@ -181,8 +179,7 @@ public:
bool is_inited() { return inited_; }
int init(rootserver::ObServerManager *server_mgr,
rootserver::ObUnitManager *unit_mgr,
int init(rootserver::ObUnitManager *unit_mgr,
common::ObISQLClient *sql_client,
share::schema::ObMultiVersionSchemaService *schema_service);
@ -224,7 +221,6 @@ private:
const uint64_t *trace_id_;
common::ObRandom rand_; // for random pick server
rootserver::ObServerManager *server_mgr_;
rootserver::ObUnitManager *unit_mgr_;
share::schema::ObMultiVersionSchemaService *schema_service_; // for got all tenant info
obrpc::ObDBMSSchedJobRpcProxy *job_rpc_proxy_;

View File

@ -27,6 +27,7 @@
#include "observer/ob_server_schema_updater.h"
#include "observer/ob_server.h"
#include "observer/omt/ob_tenant_config_mgr.h"
#include "observer/ob_heartbeat_handler.h"
#include "common/ob_timeout_ctx.h"
#include "storage/slog/ob_storage_logger_manager.h"
@ -84,40 +85,22 @@ int ObHeartBeatProcess::init()
int ObHeartBeatProcess::init_lease_request(ObLeaseRequest &lease_request)
{
int ret = OB_SUCCESS;
omt::ObTenantNodeBalancer::ServerResource svr_res_assigned;
common::ObArray<std::pair<uint64_t, uint64_t> > max_stored_versions;
int64_t clog_free_size_byte = 0;
int64_t clog_total_size_byte = 0;
logservice::ObServerLogBlockMgr *log_block_mgr = GCTX.log_block_mgr_;
if (!inited_ || OB_ISNULL(log_block_mgr)) {
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init or log_block_mgr is null", KR(ret), K(inited_), K(GCTX.log_block_mgr_));
} else if (OB_FAIL(omt::ObTenantNodeBalancer::get_instance().get_server_allocated_resource(svr_res_assigned))) {
LOG_WARN("fail to get server allocated resource", KR(ret));
} else if (OB_FAIL(log_block_mgr->get_disk_usage(clog_free_size_byte, clog_total_size_byte))) {
LOG_WARN("Failed to get clog stat ", KR(ret));
LOG_WARN("not init", KR(ret), K(inited_));
} else if (OB_ISNULL(GCTX.ob_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("GCTX.ob_service_ is null", KR(ret), KP(GCTX.ob_service_));
} else if (OB_FAIL((GCTX.ob_service_->get_server_resource_info(lease_request.resource_info_)))) {
LOG_WARN("fail to get server resource info", KR(ret));
} else {
int64_t reserved_size = 4 * 1024 * 1024 * 1024L; // default RESERVED_DISK_SIZE -> 4G
(void) SLOGGERMGR.get_reserved_size(reserved_size);
lease_request.request_lease_time_ = 0; // this is not a valid member
lease_request.version_ = ObLeaseRequest::LEASE_VERSION;
lease_request.zone_ = gctx_.config_->zone.str();
lease_request.server_ = gctx_.self_addr();
lease_request.sql_port_ = gctx_.config_->mysql_port;
lease_request.resource_info_.cpu_ = get_cpu_count();
lease_request.resource_info_.report_cpu_assigned_ = svr_res_assigned.min_cpu_;
lease_request.resource_info_.report_cpu_max_assigned_ = svr_res_assigned.max_cpu_;
lease_request.resource_info_.report_mem_assigned_ = svr_res_assigned.memory_size_;
lease_request.resource_info_.mem_in_use_ = 0;
lease_request.resource_info_.mem_total_ = GMEMCONF.get_server_memory_avail();
lease_request.resource_info_.disk_total_
= OB_SERVER_BLOCK_MGR.get_max_macro_block_count(reserved_size) * OB_SERVER_BLOCK_MGR.get_macro_block_size();
lease_request.resource_info_.disk_in_use_
= OB_SERVER_BLOCK_MGR.get_used_macro_block_count() * OB_SERVER_BLOCK_MGR.get_macro_block_size();
lease_request.resource_info_.log_disk_total_ = clog_total_size_byte;
lease_request.resource_info_.report_log_disk_assigned_ = svr_res_assigned.log_disk_size_;
get_package_and_svn(lease_request.build_version_, sizeof(lease_request.build_version_));
OTC_MGR.get_lease_request(lease_request);
lease_request.start_service_time_ = gctx_.start_service_time_;
@ -174,8 +157,8 @@ int ObHeartBeatProcess::do_heartbeat_event(const ObLeaseResponse &lease_response
if (OB_INVALID_ID != lease_response.server_id_) {
if (GCTX.server_id_ != lease_response.server_id_) {
LOG_INFO("receive new server id",
"old_id", GCTX.server_id_,
"new_id", lease_response.server_id_);
"old_id", GCTX.server_id_,
"new_id", lease_response.server_id_);
GCTX.server_id_ = lease_response.server_id_;
GCONF.server_id = lease_response.server_id_;
const int64_t delay = 0;
@ -189,13 +172,16 @@ int ObHeartBeatProcess::do_heartbeat_event(const ObLeaseResponse &lease_response
}
}
// update server status if needed
if (RSS_INVALID != lease_response.rs_server_status_) {
if (GCTX.rs_server_status_ != lease_response.rs_server_status_) {
LOG_INFO("receive new server status recorded in rs",
"old_status", GCTX.rs_server_status_,
"new_status", lease_response.rs_server_status_);
GCTX.rs_server_status_ = lease_response.rs_server_status_;
if (!ObHeartbeatHandler::is_rs_epoch_id_valid()) {
///// if the new heartbeat service has not started, this heartbeat is responsible for
//// update server_id_ and rs_server_status_
if (RSS_INVALID != lease_response.rs_server_status_) {
if (GCTX.rs_server_status_ != lease_response.rs_server_status_) {
LOG_INFO("receive new server status recorded in rs",
"old_status", GCTX.rs_server_status_,
"new_status", lease_response.rs_server_status_);
GCTX.rs_server_status_ = lease_response.rs_server_status_;
}
}
}
// even try reload schema failed, we should continue do following things
@ -203,10 +189,10 @@ int ObHeartBeatProcess::do_heartbeat_event(const ObLeaseResponse &lease_response
if (OB_SUCCESS != schema_ret) {
LOG_WARN("try reload schema failed", "schema_version", lease_response.schema_version_,
"refresh_schema_info", lease_response.refresh_schema_info_, K(schema_ret));
"refresh_schema_info", lease_response.refresh_schema_info_, K(schema_ret));
} else {
LOG_INFO("try reload schema success", "schema_version", lease_response.schema_version_,
"refresh_schema_info", lease_response.refresh_schema_info_, K(schema_ret));
"refresh_schema_info", lease_response.refresh_schema_info_, K(schema_ret));
}
const int64_t delay = 0;

View File

@ -0,0 +1,193 @@
/**
* Copyright (c) 2022 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX SERVER
#include "observer/ob_heartbeat_handler.h"
#include "observer/ob_server.h"
#include "share/ob_version.h"
#include "observer/ob_service.h"
namespace oceanbase
{
namespace observer
{
static const char *OB_DATA_DISK_STATUS_STR[] = {"INVALID", "NORMAL", "ERROR"};
OB_SERIALIZE_MEMBER(
ObServerHealthStatus,
data_disk_status_
)
ObServerHealthStatus::ObServerHealthStatus()
: data_disk_status_(ObDataDiskStatus::DATA_DISK_STATUS_INVALID)
{
}
ObServerHealthStatus::~ObServerHealthStatus()
{
}
int ObServerHealthStatus::init(ObDataDiskStatus data_disk_status)
{
int ret = OB_SUCCESS;
if (data_disk_status <= DATA_DISK_STATUS_INVALID || data_disk_status >= DATA_DISK_STATUS_MAX) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(data_disk_status));
} else {
data_disk_status_ = data_disk_status;
}
return ret;
}
int ObServerHealthStatus::assign(const ObServerHealthStatus server_health_status)
{
int ret = OB_SUCCESS;
data_disk_status_ = server_health_status.data_disk_status_;
return ret;
}
void ObServerHealthStatus::reset()
{
data_disk_status_ = ObDataDiskStatus::DATA_DISK_STATUS_INVALID;
}
bool ObServerHealthStatus::is_valid() const
{
return data_disk_status_ > ObDataDiskStatus::DATA_DISK_STATUS_INVALID
&& data_disk_status_ < ObDataDiskStatus::DATA_DISK_STATUS_MAX;
}
bool ObServerHealthStatus::is_healthy() const
{
return ObDataDiskStatus::DATA_DISK_STATUS_NORMAL == data_disk_status_;
}
const char *ObServerHealthStatus::data_disk_status_to_str(const ObDataDiskStatus data_disk_status)
{
STATIC_ASSERT(ARRAYSIZEOF(OB_DATA_DISK_STATUS_STR) == DATA_DISK_STATUS_MAX, "array size mismatch");
const char *str = "UNKNOWN";
if (OB_UNLIKELY(data_disk_status >= ARRAYSIZEOF(OB_DATA_DISK_STATUS_STR)
|| data_disk_status < DATA_DISK_STATUS_INVALID)) {
LOG_ERROR_RET(OB_ERR_UNEXPECTED, "fatal error, unknown data disk status", K(data_disk_status));
} else {
str = OB_DATA_DISK_STATUS_STR[data_disk_status];
}
return str;
}
ObHeartbeatHandler::ObHeartbeatHandler()
{
}
ObHeartbeatHandler::~ObHeartbeatHandler()
{
}
int64_t ObHeartbeatHandler::rs_epoch_id_ = palf::INVALID_PROPOSAL_ID;
bool ObHeartbeatHandler::is_rs_epoch_id_valid()
{
return palf::INVALID_PROPOSAL_ID != ATOMIC_LOAD(&rs_epoch_id_);
}
int ObHeartbeatHandler::handle_heartbeat(
const share::ObHBRequest &hb_request,
share::ObHBResponse &hb_response)
{
int ret = OB_SUCCESS;
hb_response.reset();
int64_t rs_epoch_id = ATOMIC_LOAD(&rs_epoch_id_);
if (OB_UNLIKELY(!hb_request.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("receive an invalid heartbeat request", KR(ret), K(hb_request));
} else if (OB_ISNULL(GCTX.rs_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("rs manager is null", KR(ret), KP(GCTX.rs_mgr_));
} else {
const int64_t epoch_id = hb_request.get_epoch_id();
if (rs_epoch_id < epoch_id || palf::INVALID_PROPOSAL_ID == rs_epoch_id) {
LOG_INFO("receive new rs epoch", "old rs_epoch_id", rs_epoch_id, "new rs_epoch_id", epoch_id);
int64_t current_epoch_id = ATOMIC_CAS(&rs_epoch_id_, rs_epoch_id, epoch_id);
if (rs_epoch_id != current_epoch_id) {
ret = OB_NEED_RETRY;
LOG_WARN("set rs_epoch_id_failed", KR(ret), K(rs_epoch_id), K(epoch_id), K(current_epoch_id));
}
} else if (rs_epoch_id > epoch_id) {
ret = OB_RS_NOT_MASTER;
LOG_WARN("this rs is not the newest leader", KR(ret), K(rs_epoch_id), K(epoch_id));
}
}
if (FAILEDx(GCTX.rs_mgr_->force_set_master_rs(hb_request.get_rs_addr()))) {
LOG_WARN("fail to set master rs", KR(ret), K(hb_request.get_rs_addr()));
} else if (OB_FAIL(init_hb_response_(hb_response))) {
LOG_WARN("fail to init hb response", KR(ret));
} else {
// const uint64_t server_id = hb_request.get_server_id();
const share::RSServerStatus rs_server_status = hb_request.get_rs_server_status();
// if (GCTX.server_id_ != server_id) {
// LOG_INFO("receive new server id", "old server_id_", GCTX.server_id_, "new server_id_", server_id);
// GCTX.server_id_ = server_id;
// }
if (GCTX.rs_server_status_ != rs_server_status) {
LOG_INFO("receive new server status recorded in rs",
"old_status", GCTX.rs_server_status_,
"new_status", rs_server_status);
GCTX.rs_server_status_ = rs_server_status;
}
}
return ret;
}
int ObHeartbeatHandler::check_disk_status_(ObServerHealthStatus &server_health_status)
{
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
ObDeviceHealthStatus dhs = DEVICE_HEALTH_NORMAL;
int64_t abnormal_time = 0;
server_health_status.reset();
if (OB_TMP_FAIL(ObIOManager::get_instance().get_device_health_status(dhs, abnormal_time))) {
LOG_WARN("fail to get device health status", KR(ret), KR(tmp_ret));
} else if (OB_UNLIKELY(DEVICE_HEALTH_ERROR == dhs)) {
const int64_t PRINT_LOG_INTERVAL_IN_US = 60 * 1000 * 1000; // 1min
if (REACH_TIME_INTERVAL(PRINT_LOG_INTERVAL_IN_US)) {
LOG_WARN("error occurs on data disk, ",
"data_disk_health_status", device_health_status_to_str(dhs), K(abnormal_time));
}
}
const bool is_data_disk_error = (DEVICE_HEALTH_ERROR == dhs);
if (is_data_disk_error) {
server_health_status.init(ObServerHealthStatus::DATA_DISK_STATUS_ERROR);
} else {
server_health_status.init(ObServerHealthStatus::DATA_DISK_STATUS_NORMAL);
}
return ret;
}
ERRSIM_POINT_DEF(ERRSIM_DISK_ERROR);
int ObHeartbeatHandler::init_hb_response_(share::ObHBResponse &hb_response)
{
int ret = OB_SUCCESS;
ObServerHealthStatus server_health_status;
if (OB_FAIL(check_disk_status_(server_health_status))) {
LOG_WARN("fail to check disk status", KR(ret));
} else {
int64_t sql_port = GCONF.mysql_port;
share::ObServerInfoInTable::ObBuildVersion build_version;
common::ObZone zone;
int64_t test_id = ERRSIM_DISK_ERROR ? 2 : OB_INVALID_ID;
if (test_id == GCTX.server_id_) {
server_health_status.reset();
server_health_status.init(ObServerHealthStatus::DATA_DISK_STATUS_ERROR);
}
if (OB_FAIL(zone.assign(GCONF.zone.str()))) {
LOG_WARN("fail to assign zone", KR(ret), K(GCONF.zone.str()));
} else if (OB_FAIL(ObService::get_build_version(build_version))) {
LOG_WARN("fail to get build_version", KR(ret), K(build_version));
} else if (OB_FAIL(hb_response.init(
zone,
GCTX.self_addr(),
sql_port,
build_version,
GCTX.start_service_time_,
server_health_status))) {
LOG_WARN("fail to init the heartbeat response", KR(ret), K(zone), K(GCTX.self_addr()),
K(sql_port), K(build_version), K(GCTX.start_service_time_), K(server_health_status));
} else {}
}
return ret;
}
} // observer
} // oceanbase

View File

@ -0,0 +1,75 @@
/**
* Copyright (c) 2022 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_OBSERVER_OB_HEARTBEAT_HANDLER_H_
#define OCEANBASE_OBSERVER_OB_HEARTBEAT_HANDLER_H_
#include "lib/utility/ob_print_utils.h"
#include "lib/utility/ob_unify_serialize.h"
namespace oceanbase
{
namespace share
{
struct ObHBRequest;
struct ObHBResponse;
}
namespace observer
{
// currently, server health status only covers data disk status.
struct ObServerHealthStatus
{
OB_UNIS_VERSION(1);
public:
enum ObDataDiskStatus
{
DATA_DISK_STATUS_INVALID = 0,
DATA_DISK_STATUS_NORMAL = 1,
DATA_DISK_STATUS_ERROR = 2,
DATA_DISK_STATUS_MAX = 3
};
explicit ObServerHealthStatus();
virtual ~ObServerHealthStatus();
int init(ObDataDiskStatus data_disk_status);
int assign(const ObServerHealthStatus server_health_status);
void reset();
bool is_valid() const;
bool is_healthy() const;
static const char *data_disk_status_to_str(const ObDataDiskStatus data_disk_status);
inline bool operator ==(const ObServerHealthStatus &other) const
{
return data_disk_status_ == other.data_disk_status_;
}
inline bool operator !=(const ObServerHealthStatus &other) const
{
return data_disk_status_ != other.data_disk_status_;
}
TO_STRING_KV(K(data_disk_status_), "data_disk_status", data_disk_status_to_str(data_disk_status_));
private:
ObDataDiskStatus data_disk_status_;
};
class ObHeartbeatHandler
{
public:
explicit ObHeartbeatHandler();
virtual ~ObHeartbeatHandler();
static int handle_heartbeat(
const share::ObHBRequest &hb_request,
share::ObHBResponse &hb_response);
static bool is_rs_epoch_id_valid();
private:
static int check_disk_status_(ObServerHealthStatus &server_health_status);
static int init_hb_response_(share::ObHBResponse &hb_response);
static int64_t rs_epoch_id_;
private:
DISALLOW_COPY_AND_ASSIGN(ObHeartbeatHandler);
};
} // observer
} // oceanbase
#endif

View File

@ -811,6 +811,18 @@ int ObRpcIsEmptyServerP::process()
return ret;
}
int ObRpcCheckServerForAddingServerP::process()
{
int ret = OB_SUCCESS;
if (OB_ISNULL(gctx_.ob_service_)) {
ret = OB_INVALID_ARGUMENT;
LOG_ERROR("invalid argument", KR(ret), KP(gctx_.ob_service_));
} else if (OB_FAIL(gctx_.ob_service_->check_server_for_adding_server(arg_, result_))) {
LOG_WARN("fail to call check_server_for_adding_server", KR(ret), K(arg_));
} else {}
return ret;
}
int ObRpcCheckDeploymentModeP::process()
{
int ret = OB_SUCCESS;
@ -2334,9 +2346,33 @@ int ObSyncRewriteRulesP::process()
} else if (OB_FAIL(rule_mgr->sync_rule_from_inner_table())) {
LOG_WARN("failed to sync rewrite rules from inner table", K(ret));
}
}
return ret;
}
int ObRpcSendHeartbeatP::process()
{
int ret = OB_SUCCESS;
if (OB_ISNULL(gctx_.ob_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("invalid argument", KR(ret), KP(gctx_.ob_service_));
} else if (OB_FAIL(gctx_.ob_service_->handle_heartbeat(arg_, result_))) {
LOG_WARN("fail to call handle_heartbeat in ob service", KR(ret), K(arg_));
}
return ret;
}
int ObRpcGetServerResourceInfoP::process()
{
int ret = OB_SUCCESS;
if (OB_ISNULL(gctx_.ob_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("invalid argument", KR(ret), KP(gctx_.ob_service_));
} else if (OB_FAIL(gctx_.ob_service_->get_server_resource_info(arg_, result_))) {
LOG_WARN("fail to call get_server_resource_info in ob service", KR(ret), K(arg_));
} else {}
return ret;
}
} // end of namespace observer
} // end of namespace oceanbase

View File

@ -132,6 +132,7 @@ OB_DEFINE_PROCESSOR_S(Srv, OB_REFRESH_MEMORY_STAT, ObRpcRefreshMemStatP);
OB_DEFINE_PROCESSOR_S(Srv, OB_WASH_MEMORY_FRAGMENTATION, ObRpcWashMemFragmentationP);
OB_DEFINE_PROCESSOR_S(Srv, OB_BOOTSTRAP, ObRpcBootstrapP);
OB_DEFINE_PROCESSOR_S(Srv, OB_IS_EMPTY_SERVER, ObRpcIsEmptyServerP);
OB_DEFINE_PROCESSOR_S(Srv, OB_CHECK_SERVER_FOR_ADDING_SERVER, ObRpcCheckServerForAddingServerP);
OB_DEFINE_PROCESSOR_S(Srv, OB_CHECK_DEPLOYMENT_MODE, ObRpcCheckDeploymentModeP);
OB_DEFINE_PROCESSOR_S(Srv, OB_REFRESH_SYNC_VALUE, ObRpcSyncAutoincValueP);
OB_DEFINE_PROCESSOR_S(Srv, OB_CLEAR_AUTOINC_CACHE, ObRpcClearAutoincCacheP);
@ -217,6 +218,8 @@ OB_DEFINE_PROCESSOR_S(Srv, OB_ESTIMATE_TABLET_BLOCK_COUNT, ObEstimateTabletBlock
OB_DEFINE_PROCESSOR_S(Srv, OB_DDL_CHECK_TABLET_MERGE_STATUS, ObRpcDDLCheckTabletMergeStatusP);
OB_DEFINE_PROCESSOR_S(Srv, OB_REFRESH_TENANT_INFO, ObRefreshTenantInfoP);
OB_DEFINE_PROCESSOR_S(Srv, OB_SYNC_REWRITE_RULES, ObSyncRewriteRulesP);
OB_DEFINE_PROCESSOR_S(Srv, OB_SEND_HEARTBEAT, ObRpcSendHeartbeatP);
OB_DEFINE_PROCESSOR_S(Srv, OB_GET_SERVER_RESOURCE_INFO, ObRpcGetServerResourceInfoP);
OB_DEFINE_PROCESSOR_S(Srv, OB_UPDATE_TENANT_INFO_CACHE, ObUpdateTenantInfoCacheP);
} // end of namespace observer
} // end of namespace oceanbase

View File

@ -73,6 +73,8 @@
#include "storage/compaction/ob_tenant_tablet_scheduler.h"
#include "share/ob_cluster_event_history_table_operator.h"//CLUSTER_EVENT_INSTANCE
#include "storage/ddl/ob_tablet_ddl_kv_mgr.h"
#include "observer/ob_heartbeat_handler.h"
#include "storage/slog/ob_storage_logger_manager.h"
namespace oceanbase
{
@ -1484,8 +1486,124 @@ int ObService::is_empty_server(const obrpc::ObCheckServerEmptyArg &arg, obrpc::B
}
return ret;
}
int ObService::check_server_for_adding_server(
const obrpc::ObCheckServerForAddingServerArg &arg,
obrpc::ObCheckServerForAddingServerResult &result)
{
int ret = OB_SUCCESS;
uint64_t sys_tenant_data_version = 0;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(inited_));
} else if (OB_FAIL(GET_MIN_DATA_VERSION(OB_SYS_TENANT_ID, sys_tenant_data_version))) {
LOG_WARN("fail to get sys tenant data version", KR(ret));
} else if (arg.get_sys_tenant_data_version() > 0
&& sys_tenant_data_version > arg.get_sys_tenant_data_version()) {
ret = OB_NOT_SUPPORTED;
LOG_WARN("adding server with larger sys tenant data version is not supported",
KR(ret), K(arg), K(sys_tenant_data_version), K(arg.get_sys_tenant_data_version()));
} else {
bool server_empty = false;
ObCheckServerEmptyArg check_server_empty_arg;
check_server_empty_arg.mode_ = ObCheckServerEmptyArg::ADD_SERVER;
const bool wait_log_scan = ObCheckServerEmptyArg::BOOTSTRAP == check_server_empty_arg.mode_;
if (OB_FAIL(check_server_empty(check_server_empty_arg, wait_log_scan, server_empty))) {
LOG_WARN("check_server_empty failed", KR(ret), K(check_server_empty_arg), K(wait_log_scan));
} else {
char build_version[common::OB_SERVER_VERSION_LENGTH] = {0};
ObServerInfoInTable::ObBuildVersion build_version_string;
ObZone zone;
int64_t sql_port = GCONF.mysql_port;
get_package_and_svn(build_version, sizeof(build_version));
if (OB_FAIL(zone.assign(GCONF.zone.str()))) {
LOG_WARN("fail to assign zone", KR(ret), K(GCONF.zone.str()));
} else if (OB_FAIL(build_version_string.assign(build_version))) {
LOG_WARN("fail to assign build version", KR(ret), K(build_version));
} else if (OB_FAIL(result.init(
server_empty,
zone,
sql_port,
build_version_string))) {
LOG_WARN("fail to init result", KR(ret), K(server_empty), K(zone), K(sql_port),
K(build_version_string));
} else {}
}
}
LOG_INFO("generate result", KR(ret), K(arg), K(result));
return ret;
}
int ObService::get_server_resource_info(
const obrpc::ObGetServerResourceInfoArg &arg,
obrpc::ObGetServerResourceInfoResult &result)
{
int ret = OB_SUCCESS;
const ObAddr &my_addr = GCONF.self_addr_;
share::ObServerResourceInfo resource_info;
result.reset();
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(inited_));
} else if (OB_UNLIKELY(!arg.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(arg));
} else if (OB_FAIL(get_server_resource_info(resource_info))) {
LOG_WARN("fail to get server resource info", KR(ret));
} else if (OB_FAIL(result.init(my_addr, resource_info))) {
LOG_WARN("fail to init result", KR(ret), K(my_addr), K(resource_info));
}
FLOG_INFO("get server resource info", KR(ret), K(arg), K(result));
return ret;
}
int ObService::get_server_resource_info(share::ObServerResourceInfo &resource_info)
{
int ret = OB_SUCCESS;
omt::ObTenantNodeBalancer::ServerResource svr_res_assigned;
int64_t clog_free_size_byte = 0;
int64_t clog_total_size_byte = 0;
logservice::ObServerLogBlockMgr *log_block_mgr = GCTX.log_block_mgr_;
resource_info.reset();
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(inited_));
} else if (OB_ISNULL(log_block_mgr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("log_block_mgr is null", KR(ret), K(GCTX.log_block_mgr_));
} else if (OB_FAIL(omt::ObTenantNodeBalancer::get_instance().get_server_allocated_resource(svr_res_assigned))) {
LOG_WARN("fail to get server allocated resource", KR(ret));
} else if (OB_FAIL(log_block_mgr->get_disk_usage(clog_free_size_byte, clog_total_size_byte))) {
LOG_WARN("Failed to get clog stat ", KR(ret));
} else {
int64_t reserved_size = 4 * 1024 * 1024 * 1024L; // default RESERVED_DISK_SIZE -> 4G
(void) SLOGGERMGR.get_reserved_size(reserved_size);
resource_info.cpu_ = get_cpu_count();
resource_info.report_cpu_assigned_ = svr_res_assigned.min_cpu_;
resource_info.report_cpu_max_assigned_ = svr_res_assigned.max_cpu_;
resource_info.report_mem_assigned_ = svr_res_assigned.memory_size_;
resource_info.mem_in_use_ = 0;
resource_info.mem_total_ = GMEMCONF.get_server_memory_avail();
resource_info.disk_total_
= OB_SERVER_BLOCK_MGR.get_max_macro_block_count(reserved_size) * OB_SERVER_BLOCK_MGR.get_macro_block_size();
resource_info.disk_in_use_
= OB_SERVER_BLOCK_MGR.get_used_macro_block_count() * OB_SERVER_BLOCK_MGR.get_macro_block_size();
resource_info.log_disk_total_ = clog_total_size_byte;
resource_info.report_log_disk_assigned_ = svr_res_assigned.log_disk_size_;
}
return ret;
}
int ObService::get_build_version(share::ObServerInfoInTable::ObBuildVersion &build_version)
{
int ret = OB_SUCCESS;
char build_version_char_array[common::OB_SERVER_VERSION_LENGTH] = {0};
build_version.reset();
get_package_and_svn(build_version_char_array, sizeof(build_version));
if (OB_FAIL(build_version.assign(build_version_char_array))) {
LOG_WARN("fail to assign build_version", KR(ret), K(build_version_char_array));
}
return ret;
}
int ObService::get_partition_count(obrpc::ObGetPartitionCountResult &result)
{
UNUSEDx(result);
@ -1504,6 +1622,7 @@ int ObService::get_partition_count(obrpc::ObGetPartitionCountResult &result)
int ObService::check_server_empty(const ObCheckServerEmptyArg &arg, const bool wait_log_scan, bool &is_empty)
{
// **TODO (linqiucen.lqc): if rs_epoch has been already valid, this server is not empty
int ret = OB_SUCCESS;
is_empty = true;
UNUSED(wait_log_scan);
@ -1735,18 +1854,6 @@ int ObService::sync_partition_table(const obrpc::Int64 &arg)
return OB_NOT_SUPPORTED;
}
int ObService::get_server_heartbeat_expire_time(int64_t &lease_expire_time)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", K(ret));
} else {
lease_expire_time = lease_state_mgr_.get_heartbeat_expire_time();
}
return ret;
}
int ObService::set_tracepoint(const obrpc::ObAdminSetTPArg &arg)
{
int ret = OB_SUCCESS;
@ -2680,6 +2787,23 @@ int ObService::init_tenant_config(
return OB_SUCCESS;
}
int ObService::handle_heartbeat(
const share::ObHBRequest &hb_request,
share::ObHBResponse &hb_response)
{
int ret = OB_SUCCESS;
LOG_TRACE("receive a heartbeat request from heartbeat service", K(hb_request));
const int64_t now = ::oceanbase::common::ObTimeUtility::current_time();
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("ObService is not inited", KR(ret), K(inited_));
} else if (OB_FAIL(ObHeartbeatHandler::handle_heartbeat(hb_request, hb_response))) {
LOG_WARN("fail to handle heartbeat", KR(ret), K(hb_request));
}
const int64_t time_cost = ::oceanbase::common::ObTimeUtility::current_time() - now;
FLOG_INFO("handle_heartbeat", KR(ret), K(hb_request), K(hb_response), K(time_cost));
return ret;
}
int ObService::update_tenant_info_cache(
const ObUpdateTenantInfoCacheArg &arg,
ObUpdateTenantInfoCacheRes &result)

View File

@ -181,6 +181,14 @@ public:
////////////////////////////////////////////////////////////////
// ObRpcBootstrapP @RS bootstrap
int bootstrap(const obrpc::ObBootstrapArg &arg);
// ObRpcCheckServerForAddingServerP @RS add server
int check_server_for_adding_server(
const obrpc::ObCheckServerForAddingServerArg &arg,
obrpc::ObCheckServerForAddingServerResult &result);
// ObRpcGetServerStatusP @RS
int get_server_resource_info(const obrpc::ObGetServerResourceInfoArg &arg, obrpc::ObGetServerResourceInfoResult &result);
int get_server_resource_info(share::ObServerResourceInfo &resource_info);
static int get_build_version(share::ObServerInfoInTable::ObBuildVersion &build_version);
// ObRpcIsEmptyServerP @RS bootstrap
int is_empty_server(const obrpc::ObCheckServerEmptyArg &arg, obrpc::Bool &is_empty);
// ObRpcCheckDeploymentModeP
@ -209,8 +217,6 @@ public:
int sync_partition_table(const obrpc::Int64 &arg);
// ObRpcSetTPP @RS::admin to set tracepoint
int set_tracepoint(const obrpc::ObAdminSetTPArg &arg);
// for ObPartitionService::check_mc_allowed_by_server_lease
int get_server_heartbeat_expire_time(int64_t &lease_expire_time);
int cancel_sys_task(const share::ObTaskId &task_id);
int refresh_memory_stat();
int wash_memory_fragmentation();
@ -231,6 +237,9 @@ public:
int init_tenant_config(
const obrpc::ObInitTenantConfigArg &arg,
obrpc::ObInitTenantConfigRes &result);
int handle_heartbeat(
const share::ObHBRequest &hb_request,
share::ObHBResponse &hb_response);
private:
int get_role_from_palf_(
logservice::ObLogService &log_service,

View File

@ -72,6 +72,7 @@ void oceanbase::observer::init_srv_xlator_for_sys(ObSrvRpcXlator *xlator) {
RPC_PROCESSOR(ObInitTenantConfigP, gctx_);
RPC_PROCESSOR(ObGetLeaderLocationsP, gctx_);
RPC_PROCESSOR(ObBatchBroadcastSchemaP, gctx_);
RPC_PROCESSOR(ObRpcSendHeartbeatP, gctx_);
RPC_PROCESSOR(ObRpcNotifySwitchLeaderP, gctx_);
// interrupt
@ -109,6 +110,8 @@ void oceanbase::observer::init_srv_xlator_for_sys(ObSrvRpcXlator *xlator) {
//dbms_scheduler
RPC_PROCESSOR(ObRpcRunDBMSSchedJobP, gctx_);
RPC_PROCESSOR(ObRpcGetServerResourceInfoP, gctx_);
}
void oceanbase::observer::init_srv_xlator_for_schema_test(ObSrvRpcXlator *xlator) {

View File

@ -60,11 +60,11 @@ void oceanbase::observer::init_srv_xlator_for_rootserver(ObSrvRpcXlator *xlator)
RPC_PROCESSOR(rootserver::ObRpcReportSysLSP, *gctx_.root_service_);
RPC_PROCESSOR(rootserver::ObRpcRemoveSysLSP, *gctx_.root_service_);
RPC_PROCESSOR(rootserver::ObRpcFetchLocationP, *gctx_.root_service_);
RPC_PROCESSOR(rootserver::ObRpcMergeFinishP, *gctx_.root_service_);
// RPC_PROCESSOR(rootserver::ObRpcMergeFinishP, *gctx_.root_service_);
RPC_PROCESSOR(rootserver::ObBroadcastDSActionP, *gctx_.root_service_);
RPC_PROCESSOR(rootserver::ObRpcFetchAliveServerP, *gctx_.root_service_);
RPC_PROCESSOR(rootserver::ObRpcFetchActiveServerStatusP, *gctx_.root_service_);
// RPC_PROCESSOR(rootserver::ObRpcFetchActiveServerStatusP, *gctx_.root_service_);
RPC_PROCESSOR(rootserver::ObRpcRefreshTimeZoneInfoP, *gctx_.root_service_);
RPC_PROCESSOR(rootserver::ObRpcRequestTimeZoneInfoP, *gctx_.root_service_);
RPC_PROCESSOR(rootserver::ObCheckDanglingReplicaFinishP, *gctx_.root_service_);
@ -238,7 +238,6 @@ void oceanbase::observer::init_srv_xlator_for_rootserver(ObSrvRpcXlator *xlator)
//for upgrade
RPC_PROCESSOR(ObGetTenantSchemaVersionP, gctx_);
RPC_PROCESSOR(rootserver::ObCheckMergeFinishP, *gctx_.root_service_);
RPC_PROCESSOR(rootserver::ObRpcFlashBackTableToScnP, *gctx_.root_service_);
RPC_PROCESSOR(rootserver::ObRpcCreateRestorePointP, *gctx_.root_service_);

View File

@ -68,6 +68,7 @@ void oceanbase::observer::init_srv_xlator_for_storage(ObSrvRpcXlator *xlator) {
RPC_PROCESSOR(ObRpcWashMemFragmentationP, gctx_);
RPC_PROCESSOR(ObRpcBootstrapP, gctx_);
RPC_PROCESSOR(ObRpcIsEmptyServerP, gctx_);
RPC_PROCESSOR(ObRpcCheckServerForAddingServerP, gctx_);
RPC_PROCESSOR(ObRpcCheckDeploymentModeP, gctx_);
RPC_PROCESSOR(ObRpcSyncAutoincValueP, gctx_);
RPC_PROCESSOR(ObRpcClearAutoincCacheP, gctx_);

View File

@ -102,6 +102,7 @@
#include "observer/table_load/ob_table_load_service.h"
#include "sql/plan_cache/ob_plan_cache.h"
#include "sql/plan_cache/ob_ps_cache.h"
#include "rootserver/ob_heartbeat_service.h"
using namespace oceanbase;
using namespace oceanbase::lib;
@ -452,6 +453,7 @@ int ObMultiTenant::init(ObAddr myaddr,
// MTL_BIND2(nullptr, nullptr, start_sql_nio_server, mtl_stop_default,
// mtl_wait_default, mtl_destroy_default);
}
MTL_BIND2(mtl_new_default, rootserver::ObHeartbeatService::mtl_init, nullptr, rootserver::ObHeartbeatService::mtl_stop, rootserver::ObHeartbeatService::mtl_wait, mtl_destroy_default);
}
if (OB_SUCC(ret)) {

View File

@ -3,6 +3,7 @@ ob_set_subtarget(ob_rootserver ALONE
ob_root_service.cpp
ddl_task/ob_ddl_redefinition_task.cpp
ob_unit_manager.cpp
ob_heartbeat_service.cpp
)
ob_set_subtarget(ob_rootserver backup
@ -54,6 +55,7 @@ ob_set_subtarget(ob_rootserver common
ob_empty_server_checker.cpp
ob_lost_replica_checker.cpp
ob_server_manager.cpp
ob_server_zone_op_service.cpp
ob_snapshot_info_manager.cpp
ob_tablet_creator.cpp
ob_tablet_drop.cpp

View File

@ -13,7 +13,6 @@
#define USING_LOG_PREFIX ARCHIVE
#include "rootserver/backup/ob_archive_scheduler_service.h"
#include "rootserver/backup/ob_tenant_archive_scheduler.h"
#include "rootserver/ob_server_manager.h"
#include "rootserver/ob_rs_event_history_table_operator.h"
#include "rootserver/ob_unit_manager.h"
#include "storage/tx/ob_ts_mgr.h"
@ -73,14 +72,13 @@ void ObArchiveThreadIdling::set_checkpoint_interval(const int64_t interval_us)
*/
ObArchiveSchedulerService::ObArchiveSchedulerService()
: is_inited_(false), is_working_(false), idling_(stop_),
server_mgr_(nullptr), zone_mgr_(nullptr), unit_mgr_(nullptr),
zone_mgr_(nullptr), unit_mgr_(nullptr),
rpc_proxy_(nullptr), sql_proxy_(nullptr), schema_service_(nullptr), backup_lease_service_(nullptr)
{
}
int ObArchiveSchedulerService::init(
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
ObUnitManager &unit_manager,
share::schema::ObMultiVersionSchemaService *schema_service,
@ -100,7 +98,6 @@ int ObArchiveSchedulerService::init(
} else if (OB_FAIL(create(thread_cnt, "LOG_ARCHIVE_SERVICE"))) {
LOG_WARN("failed to create log archive thread", K(ret));
} else {
server_mgr_ = &server_mgr;
zone_mgr_ = &zone_mgr;
unit_mgr_ = &unit_manager;
schema_service_ = schema_service;
@ -311,7 +308,7 @@ int ObArchiveSchedulerService::start_tenant_archive_(const uint64_t tenant_id)
ObArchiveHandler archive_handler;
// Only one dest is supported.
const int64_t dest_no = 0;
if (OB_FAIL(archive_handler.init(tenant_id, *server_mgr_, *zone_mgr_, *unit_mgr_, schema_service_, *rpc_proxy_, *sql_proxy_))) {
if (OB_FAIL(archive_handler.init(tenant_id, *zone_mgr_, *unit_mgr_, schema_service_, *rpc_proxy_, *sql_proxy_))) {
LOG_WARN("failed to init archive_handler", K(ret));
} else if (OB_FAIL(archive_handler.enable_archive(dest_no))) {
LOG_WARN("failed to enable archive tenant", K(ret), K(tenant_id), K(dest_no));
@ -328,7 +325,7 @@ int ObArchiveSchedulerService::stop_tenant_archive_(const uint64_t tenant_id)
ObArchiveHandler archive_handler;
// Only one dest is supported.
const int64_t dest_no = 0;
if (OB_FAIL(archive_handler.init(tenant_id, *server_mgr_, *zone_mgr_, *unit_mgr_, schema_service_, *rpc_proxy_, *sql_proxy_))) {
if (OB_FAIL(archive_handler.init(tenant_id, *zone_mgr_, *unit_mgr_, schema_service_, *rpc_proxy_, *sql_proxy_))) {
LOG_WARN("failed to init archive_handler", K(ret), K(tenant_id));
} else if (OB_FAIL(archive_handler.disable_archive(dest_no))) {
LOG_WARN("failed to disable tenant archive", K(ret), K(tenant_id), K(dest_no));
@ -375,7 +372,7 @@ int ObArchiveSchedulerService::inner_process_(const uint64_t tenant_id)
bool no_round = false;
ObArchiveHandler tenant_scheduler;
if (OB_FAIL(tenant_scheduler.init(tenant_id, *server_mgr_, *zone_mgr_, *unit_mgr_, schema_service_, *rpc_proxy_, *sql_proxy_))) {
if (OB_FAIL(tenant_scheduler.init(tenant_id, *zone_mgr_, *unit_mgr_, schema_service_, *rpc_proxy_, *sql_proxy_))) {
LOG_WARN("failed to init tenant archive scheduler", K(ret), K(tenant_id));
} else if (OB_TMP_FAIL(tenant_scheduler.checkpoint())) {
LOG_WARN("failed to checkpoint", K(tmp_ret), K(tenant_id));
@ -526,7 +523,7 @@ int ObArchiveSchedulerService::open_tenant_archive_mode_(const uint64_t tenant_i
{
int ret = OB_SUCCESS;
ObArchiveHandler tenant_scheduler;
if (OB_FAIL(tenant_scheduler.init(tenant_id, *server_mgr_, *zone_mgr_, *unit_mgr_, schema_service_, *rpc_proxy_, *sql_proxy_))) {
if (OB_FAIL(tenant_scheduler.init(tenant_id, *zone_mgr_, *unit_mgr_, schema_service_, *rpc_proxy_, *sql_proxy_))) {
LOG_WARN("failed to init tenant archive scheduler", K(ret), K(tenant_id));
} else if (OB_FAIL(tenant_scheduler.open_archive_mode())) {
LOG_WARN("failed to open archive mode", K(ret), K(tenant_id));
@ -588,7 +585,7 @@ int ObArchiveSchedulerService::close_tenant_archive_mode_(const uint64_t tenant_
{
int ret = OB_SUCCESS;
ObArchiveHandler tenant_scheduler;
if (OB_FAIL(tenant_scheduler.init(tenant_id, *server_mgr_, *zone_mgr_, *unit_mgr_, schema_service_, *rpc_proxy_, *sql_proxy_))) {
if (OB_FAIL(tenant_scheduler.init(tenant_id, *zone_mgr_, *unit_mgr_, schema_service_, *rpc_proxy_, *sql_proxy_))) {
LOG_WARN("failed to init tenant archive scheduler", K(ret), K(tenant_id));
} else if (OB_FAIL(tenant_scheduler.close_archive_mode())) {
LOG_WARN("failed to close archive mode", K(ret), K(tenant_id));

View File

@ -38,7 +38,6 @@ namespace share {
namespace rootserver
{
class ObServerManager;
class ObZoneManager;
class ObUnitManager;
@ -66,7 +65,6 @@ public:
~ObArchiveSchedulerService() {}
int init(
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
ObUnitManager &unit_manager,
share::schema::ObMultiVersionSchemaService *schema_service,
@ -129,7 +127,6 @@ private:
bool is_inited_;
bool is_working_;
mutable ObArchiveThreadIdling idling_;
ObServerManager *server_mgr_;
ObZoneManager *zone_mgr_;
ObUnitManager *unit_mgr_;
obrpc::ObSrvRpcProxy *rpc_proxy_;

View File

@ -32,7 +32,6 @@ namespace rootserver
ObBackupCleanScheduler::ObBackupCleanScheduler()
: ObIBackupJobScheduler(BackupJobType::BACKUP_CLEAN_JOB),
is_inited_(false),
server_mgr_(nullptr),
sql_proxy_(nullptr),
rpc_proxy_(nullptr),
schema_service_(nullptr),
@ -43,7 +42,6 @@ ObBackupCleanScheduler::ObBackupCleanScheduler()
}
int ObBackupCleanScheduler::init(
ObServerManager &server_mgr,
common::ObMySQLProxy &sql_proxy,
obrpc::ObSrvRpcProxy &rpc_proxy,
share::schema::ObMultiVersionSchemaService &schema_service,
@ -56,7 +54,6 @@ int ObBackupCleanScheduler::init(
ret = OB_INIT_TWICE;
LOG_WARN("init twice", K(ret));
} else {
server_mgr_ = &server_mgr;
sql_proxy_ = &sql_proxy;
rpc_proxy_ = &rpc_proxy;
schema_service_ = &schema_service;
@ -1914,7 +1911,6 @@ int ObSysTenantBackupDeleteMgr::advance_status_(
ObBackupAutoObsoleteDeleteTrigger::ObBackupAutoObsoleteDeleteTrigger()
: ObIBackupTrigger(BackupTriggerType::BACKUP_AUTO_DELETE_TRIGGER),
is_inited_(false),
server_mgr_(nullptr),
sql_proxy_(nullptr),
rpc_proxy_(nullptr),
schema_service_(nullptr),
@ -1925,7 +1921,6 @@ ObBackupAutoObsoleteDeleteTrigger::ObBackupAutoObsoleteDeleteTrigger()
}
int ObBackupAutoObsoleteDeleteTrigger::init(
ObServerManager &server_mgr,
common::ObMySQLProxy &sql_proxy,
obrpc::ObSrvRpcProxy &rpc_proxy,
share::schema::ObMultiVersionSchemaService &schema_service,
@ -1938,7 +1933,6 @@ int ObBackupAutoObsoleteDeleteTrigger::init(
ret = OB_INIT_TWICE;
LOG_WARN("init twice", K(ret));
} else {
server_mgr_ = &server_mgr;
sql_proxy_ = &sql_proxy;
rpc_proxy_ = &rpc_proxy;
schema_service_ = &schema_service;

View File

@ -25,7 +25,6 @@ class ObISQLClient;
}
namespace rootserver
{
class ObServerManager;
class ObIBackupDeleteMgr;
class ObBackupCleanScheduler : public ObIBackupJobScheduler
{
@ -43,7 +42,6 @@ public:
virtual int get_need_reload_task(common::ObIAllocator &allocator, common::ObIArray<ObBackupScheduleTask *> &tasks) override; // reload tasks after switch master happend
public:
int init(
ObServerManager &server_mgr,
common::ObMySQLProxy &sql_proxy,
obrpc::ObSrvRpcProxy &rpc_proxy,
share::schema::ObMultiVersionSchemaService &schema_service,
@ -94,7 +92,6 @@ private:
int handle_failed_job_(const uint64_t tenant_id, const int64_t result, ObIBackupDeleteMgr &job_mgr, share::ObBackupCleanJobAttr &job_attr);
private:
bool is_inited_;
ObServerManager *server_mgr_;
common::ObMySQLProxy *sql_proxy_;
obrpc::ObSrvRpcProxy *rpc_proxy_;
share::schema::ObMultiVersionSchemaService *schema_service_;
@ -259,7 +256,6 @@ public:
virtual int process() override;
public:
int init(
ObServerManager &server_mgr,
common::ObMySQLProxy &sql_proxy,
obrpc::ObSrvRpcProxy &rpc_proxy,
share::schema::ObMultiVersionSchemaService &schema_service,
@ -274,7 +270,6 @@ private:
int parse_time_interval_(const char *str, int64_t &val);
private:
bool is_inited_;
ObServerManager *server_mgr_;
common::ObMySQLProxy *sql_proxy_;
obrpc::ObSrvRpcProxy *rpc_proxy_;
share::schema::ObMultiVersionSchemaService *schema_service_;

View File

@ -20,7 +20,6 @@
#include "rootserver/ob_root_utils.h"
#include "share/backup/ob_tenant_archive_mgr.h"
#include "share/backup/ob_backup_helper.h"
#include "rootserver/ob_server_manager.h"
#include "observer/ob_sql_client_decorator.h"
#include "share/ob_tenant_info_proxy.h"
#include "share/backup/ob_backup_connectivity.h"

View File

@ -31,8 +31,6 @@ class ObISQLClient;
namespace rootserver
{
class ObServerManager;
class ObSysTenantBackupJobMgr;
class ObBackupSetTaskMgr;
class ObIBackupJobMgr;

View File

@ -19,7 +19,6 @@
#include "rootserver/ob_root_utils.h"
#include "observer/omt/ob_tenant_config_mgr.h"
#include "share/backup/ob_tenant_archive_mgr.h"
#include "rootserver/ob_server_manager.h"
#include "observer/ob_sql_client_decorator.h"
#include "storage/ls/ob_ls.h"
#include "share/ls/ob_ls_operator.h"

View File

@ -16,7 +16,6 @@
#include "ob_backup_schedule_task.h"
#include "ob_backup_task_scheduler.h"
#include "rootserver/ob_root_utils.h"
#include "rootserver/ob_server_manager.h"
namespace oceanbase
{
@ -50,7 +49,6 @@ ObBackupService::ObBackupService()
}
int ObBackupService::init(
ObServerManager &server_mgr,
common::ObMySQLProxy &sql_proxy,
obrpc::ObSrvRpcProxy &rpc_proxy,
schema::ObMultiVersionSchemaService &schema_service,
@ -68,12 +66,12 @@ int ObBackupService::init(
} else if (OB_FAIL(register_job_(&backup_data_scheduler_))) {
LOG_WARN("fail to regist job", K(ret), "job_type", backup_data_scheduler_.get_job_type());
} else if (OB_FAIL(backup_clean_scheduler_.init(
server_mgr, sql_proxy, rpc_proxy, schema_service, lease_service, task_scheduler, *this))) {
sql_proxy, rpc_proxy, schema_service, lease_service, task_scheduler, *this))) {
LOG_WARN("fail to init backup clean scheduler", K(ret));
} else if (OB_FAIL(register_job_(&backup_clean_scheduler_))) {
LOG_WARN("fail to regist job", K(ret), "job_type", backup_clean_scheduler_.get_job_type());
} else if (OB_FAIL(backup_auto_obsolete_delete_trigger_.init(
server_mgr, sql_proxy, rpc_proxy, schema_service, lease_service, task_scheduler, *this))) {
sql_proxy, rpc_proxy, schema_service, lease_service, task_scheduler, *this))) {
LOG_WARN("fail to init backup auto obsolete delete trigger", K(ret));
} else if (OB_FAIL(register_trigger_(&backup_auto_obsolete_delete_trigger_))) {
LOG_WARN("fail to regist job", K(ret), "job_type", backup_auto_obsolete_delete_trigger_.get_trigger_type());

View File

@ -35,7 +35,7 @@ class ObBackupService : public ObRsReentrantThread
public:
ObBackupService();
virtual ~ObBackupService() {};
int init(ObServerManager &server_mgr, common::ObMySQLProxy &sql_proxy, obrpc::ObSrvRpcProxy &rpc_proxy,
int init(common::ObMySQLProxy &sql_proxy, obrpc::ObSrvRpcProxy &rpc_proxy,
share::schema::ObMultiVersionSchemaService &schema_service, ObBackupLeaseService &lease_service,
ObBackupTaskScheduler &task_scheduler);
virtual void run3() override;

View File

@ -17,12 +17,12 @@
#include "lib/lock/ob_mutex.h"
#include "lib/stat/ob_diagnose_info.h"
#include "lib/profile/ob_trace_id.h"
#include "rootserver/ob_server_manager.h"
#include "lib/alloc/ob_malloc_allocator.h"
#include "lib/oblog/ob_log_module.h"
#include "share/ob_rpc_struct.h"
#include "rootserver/ob_rs_event_history_table_operator.h"
#include "share/ob_srv_rpc_proxy.h"
#include "share/ob_all_server_tracer.h"
namespace oceanbase
{
using namespace common;
@ -43,7 +43,6 @@ ObBackupTaskSchedulerQueue::ObBackupTaskSchedulerQueue()
task_map_(),
rpc_proxy_(nullptr),
task_scheduler_(nullptr),
server_mgr_(nullptr),
zone_mgr_(nullptr),
backup_service_(nullptr),
sql_proxy_(nullptr),
@ -97,7 +96,6 @@ void ObBackupTaskSchedulerQueue::reset()
int ObBackupTaskSchedulerQueue::init(
ObTenantBackupScheduleTaskStatMap &tenant_stat_map,
ObServerBackupScheduleTaskStatMap &server_stat_map,
ObServerManager &server_manager,
ObZoneManager &zone_manager,
ObBackupService &backup_service,
const int64_t bucket_num,
@ -124,7 +122,6 @@ int ObBackupTaskSchedulerQueue::init(
max_size_ = max_size;
tenant_stat_map_ = &tenant_stat_map;
server_stat_map_ = &server_stat_map;
server_mgr_ = &server_manager;
zone_mgr_ = &zone_manager;
rpc_proxy_ = rpc_proxy;
task_scheduler_ = task_scheduler;
@ -406,7 +403,8 @@ int ObBackupTaskSchedulerQueue::get_all_servers_(
tmp_server_list.reuse();
const ObZone &zone = all_zones.at(i).zone_;
const int64_t priority = all_zones.at(i).priority_;
if (OB_FAIL(server_mgr_->get_alive_servers(zone, tmp_server_list))) {
// **FIXME (linqiucen.lqc): temp. solution, this will be replaced when transfer branch is merged
if (OB_FAIL(SVR_TRACER.get_alive_servers(zone, tmp_server_list))) {
LOG_WARN("failed to get alive servers", KR(ret), K(zone));
} else {
for (int64_t j = 0; OB_SUCC(ret) && j < tmp_server_list.count(); ++j) {
@ -1084,7 +1082,6 @@ ObBackupTaskScheduler::ObBackupTaskScheduler()
server_stat_map_(),
queue_(),
self_(),
server_mgr_(nullptr),
zone_mgr_(nullptr),
rpc_proxy_(nullptr),
backup_service_(nullptr),
@ -1094,7 +1091,6 @@ ObBackupTaskScheduler::ObBackupTaskScheduler()
}
int ObBackupTaskScheduler::init(
ObServerManager *server_mgr,
ObZoneManager *zone_mgr,
obrpc::ObSrvRpcProxy *rpc_proxy,
ObBackupService *backup_mgr,
@ -1108,13 +1104,12 @@ int ObBackupTaskScheduler::init(
if (IS_INIT) {
ret = OB_INIT_TWICE;
LOG_WARN("init twice", K(ret));
} else if (OB_UNLIKELY(nullptr == server_mgr || nullptr == rpc_proxy || nullptr == zone_mgr || nullptr == service)) {
} else if (OB_UNLIKELY(nullptr == rpc_proxy || nullptr == zone_mgr || nullptr == service)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(server_mgr), K(rpc_proxy), K(zone_mgr));
LOG_WARN("invalid argument", K(ret), K(rpc_proxy), K(zone_mgr));
} else if (OB_FAIL(create(backup_task_scheduler_thread_cnt, BACKUPTASKSCHEDULER))) {
LOG_WARN("create backup task scheduler thread failed", K(ret), K(backup_task_scheduler_thread_cnt));
} else {
server_mgr_ = server_mgr;
zone_mgr_ = zone_mgr;
rpc_proxy_ = rpc_proxy;
backup_service_ = backup_mgr;
@ -1124,7 +1119,7 @@ int ObBackupTaskScheduler::init(
LOG_WARN("init tenant stat failed", K(ret), LITERAL_K(MAX_BACKUP_TASK_QUEUE_LIMIT));
} else if (OB_FAIL(server_stat_map_.init(MAX_BACKUP_TASK_QUEUE_LIMIT))) {
LOG_WARN("init server stat failed", K(ret), LITERAL_K(MAX_BACKUP_TASK_QUEUE_LIMIT));
} else if (OB_FAIL(queue_.init(tenant_stat_map_, server_stat_map_, *server_mgr,
} else if (OB_FAIL(queue_.init(tenant_stat_map_, server_stat_map_,
*zone_mgr, *backup_mgr, MAX_BACKUP_TASK_QUEUE_LIMIT, rpc_proxy_, this, MAX_BACKUP_TASK_QUEUE_LIMIT, sql_proxy, lease_service))) {
LOG_WARN("init rebalance task queue failed", K(ret), LITERAL_K(MAX_BACKUP_TASK_QUEUE_LIMIT));
} else {
@ -1289,19 +1284,21 @@ int ObBackupTaskScheduler::check_alive_(int64_t &last_check_task_on_server_ts, b
bool is_exist = true;
ObBackupScheduleTask *task = schedule_tasks.at(i);
const ObAddr dst = task->get_dst();
share::ObServerStatus server_status;
share::ObServerInfoInTable server_info;
obrpc::ObBackupCheckTaskArg check_task_arg;
check_task_arg.tenant_id_ = task->get_tenant_id();
check_task_arg.trace_id_ = task->get_trace_id();
if ((now - task->get_generate_time() < backup_task_keep_alive_interval) && !reload_flag) {
// no need to check alive, wait next turn
} else if (OB_FAIL(server_mgr_->is_server_exist(dst, is_exist))) {
// **FIXME (linqiucen.lqc): temp. solution, this will be replaced when transfer branch is merged
} else if (OB_FAIL(SVR_TRACER.is_server_exist(dst, is_exist))) {
LOG_WARN("fail to check server exist", K(ret), K(dst));
} else if (!is_exist) {
LOG_WARN("backup dest server is not exist", K(ret), K(dst));
} else if (OB_FAIL(server_mgr_->get_server_status(dst, server_status))) {
LOG_WARN("fail to get server status", K(ret), K(dst));
} else if (!server_status.is_active() || !server_status.in_service()) {
// **FIXME (linqiucen.lqc): temp. solution, this will be replaced when transfer branch is merged
} else if (OB_FAIL(SVR_TRACER.get_server_info(dst, server_info))) {
LOG_WARN("fail to get server_info", K(ret), K(dst));
} else if (!server_info.is_active() || !server_info.in_service()) {
is_exist = false;
LOG_WARN("server status may not active or in service", K(ret), K(dst));
} else if (OB_FAIL(rpc_proxy_->to(dst).check_backup_task_exist(check_task_arg, res))) {
@ -1383,12 +1380,14 @@ int ObBackupTaskScheduler::do_execute_(const ObBackupScheduleTask &task)
bool is_alive = false;
bool in_service = false;
common::ObAddr leader;
if (OB_FAIL(server_mgr_->check_server_alive(online_server, is_alive))) {
// **FIXME (linqiucen.lqc): temp. solution, this will be replaced when transfer branch is merged
if (OB_FAIL(SVR_TRACER.check_server_alive(online_server, is_alive))) {
LOG_WARN("check server alive failed", K(ret), K(online_server));
} else if (!is_alive) {
ret = OB_REBALANCE_TASK_CANT_EXEC;
LOG_WARN("dst server not alive", K(ret), K(online_server));
} else if (OB_FAIL(server_mgr_->check_in_service(online_server, in_service))) {
// **FIXME (linqiucen.lqc): temp. solution, this will be replaced when transfer branch is merged
} else if (OB_FAIL(SVR_TRACER.check_in_service(online_server, in_service))) {
LOG_WARN("check in service failed", K(ret), K(online_server));
} else if (!in_service) {
ret = OB_REBALANCE_TASK_CANT_EXEC;

View File

@ -27,8 +27,6 @@ class ObMutex;
}
namespace rootserver
{
class ObServerManager;
class ObZoneManager;
class ObBackupTaskScheduler;
class ObBackupService;
@ -43,8 +41,7 @@ public:
virtual ~ObBackupTaskSchedulerQueue();
int init(ObTenantBackupScheduleTaskStatMap &tenant_stat_map,
ObServerBackupScheduleTaskStatMap &server_stat_map,
ObServerManager &server_manager,
ObServerBackupScheduleTaskStatMap &server_stat_map,
ObZoneManager &zone_manager,
ObBackupService &backup_mgr,
const int64_t bucket_num,
@ -129,7 +126,6 @@ private:
TaskMap task_map_;
obrpc::ObSrvRpcProxy *rpc_proxy_;
ObBackupTaskScheduler *task_scheduler_;
ObServerManager *server_mgr_;
ObZoneManager *zone_mgr_;
ObBackupService *backup_service_;
common::ObMySQLProxy *sql_proxy_;
@ -155,8 +151,7 @@ public:
public:
ObBackupTaskScheduler();
int init(ObServerManager *server_mgr,
ObZoneManager *zone_mgr_,
int init(ObZoneManager *zone_mgr_,
obrpc::ObSrvRpcProxy *rpc_proxy,
ObBackupService *backup_mgr,
common::ObMySQLProxy &sql_proxy,
@ -202,7 +197,6 @@ private:
ObBackupTaskSchedulerQueue queue_;
// scheduler's self server addr
common::ObAddr self_;
ObServerManager *server_mgr_;
ObZoneManager *zone_mgr_;
obrpc::ObSrvRpcProxy *rpc_proxy_;
ObBackupService *backup_service_;

View File

@ -12,7 +12,6 @@
#define USING_LOG_PREFIX ARCHIVE
#include "rootserver/backup/ob_tenant_archive_scheduler.h"
#include "rootserver/ob_server_manager.h"
#include "rootserver/ob_rs_event_history_table_operator.h"
#include "rootserver/ob_unit_manager.h"
#include "storage/tx/ob_ts_mgr.h"
@ -385,7 +384,7 @@ static int round_checkpoint_cb(
*/
ObArchiveHandler::ObArchiveHandler()
: is_inited_(false), tenant_id_(OB_INVALID_TENANT_ID),
server_mgr_(nullptr), zone_mgr_(nullptr), unit_mgr_(nullptr), rpc_proxy_(nullptr),
zone_mgr_(nullptr), unit_mgr_(nullptr), rpc_proxy_(nullptr),
sql_proxy_(nullptr), schema_service_(nullptr), round_handler_(),
archive_table_op_()
{
@ -394,7 +393,6 @@ ObArchiveHandler::ObArchiveHandler()
int ObArchiveHandler::init(
const uint64_t tenant_id,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
ObUnitManager &unit_manager,
share::schema::ObMultiVersionSchemaService *schema_service,
@ -415,7 +413,6 @@ int ObArchiveHandler::init(
LOG_WARN("failed to init archive round", K(ret), K(tenant_id));
} else {
tenant_id_ = tenant_id;
server_mgr_ = &server_mgr;
zone_mgr_ = &zone_mgr;
unit_mgr_ = &unit_manager;
schema_service_ = schema_service;

View File

@ -32,7 +32,6 @@ namespace common {
namespace rootserver
{
class ObServerManager;
class ObZoneManager;
class ObUnitManager;
@ -44,7 +43,6 @@ public:
int init(
const uint64_t tenant_id,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
ObUnitManager &unit_manager,
share::schema::ObMultiVersionSchemaService *schema_service,
@ -77,7 +75,6 @@ private:
private:
bool is_inited_;
uint64_t tenant_id_; // user tenant id
ObServerManager *server_mgr_;
ObZoneManager *zone_mgr_;
ObUnitManager *unit_mgr_;
obrpc::ObSrvRpcProxy *rpc_proxy_;

View File

@ -34,7 +34,6 @@
#include "share/ob_max_id_fetcher.h"
#include "share/ob_freeze_info_proxy.h"
#include "share/scheduler/ob_sys_task_stat.h"
#include "rootserver/ob_server_manager.h"
#include "rootserver/ob_zone_manager.h"
#include "rootserver/ob_ddl_service.h"
#include "rootserver/ob_root_service.h"

View File

@ -29,7 +29,6 @@ namespace rootserver
{
class ObZoneMergeManager;
class ObFreezeInfoManager;
class ObServerManager;
struct ObMergeTimeStatistics;
class ObMergeErrorCallback

View File

@ -16,6 +16,7 @@
#include "share/config/ob_server_config.h"
#include "rootserver/ob_server_manager.h"
#include "rootserver/ob_heartbeat_service.h"
namespace oceanbase
{
@ -149,8 +150,12 @@ ObCheckServerTask::ObCheckServerTask(common::ObWorkQueue &work_queue,
int ObCheckServerTask::process()
{
int ret = OB_SUCCESS;
if (OB_FAIL(checker_.check_all_server())) {
LOG_WARN("checker all server failed", K(ret));
if (!ObHeartbeatService::is_service_enabled()) {
if (OB_FAIL(checker_.check_all_server())) {
LOG_WARN("checker all server failed", K(ret));
}
} else {
LOG_TRACE("no need to do ObCheckServerTask in version >= 4.2");
}
return ret;
}

View File

@ -21,6 +21,8 @@
#include "rootserver/ob_disaster_recovery_task_mgr.h"
#include "rootserver/ob_root_utils.h"
#include "observer/ob_server_struct.h"
#include "share/ob_all_server_tracer.h"
#include "ob_heartbeat_service.h"
namespace oceanbase
{
@ -47,18 +49,25 @@ ObAllServerTask::~ObAllServerTask()
int ObAllServerTask::process()
{
int ret = OB_SUCCESS;
if (OB_ISNULL(ObCurTraceId::get_trace_id())) {
//Prevent the current trace_id from being overwritten
ObCurTraceId::init(GCONF.self_addr_);
}
THIS_WORKER.set_timeout_ts(INT64_MAX);
if (!ObRootServiceRoleChecker::is_rootserver()) {
ret = OB_NOT_MASTER;
LOG_WARN("not master", K(ret));
} else if (OB_FAIL(server_manager_.adjust_server_status(
server_, disaster_recovery_task_mgr_, with_rootserver_))) {
LOG_WARN("fail to adjust server status", K(ret), K(server_));
int tmp_ret = OB_SUCCESS;
if (!ObHeartbeatService::is_service_enabled()) {
if (OB_ISNULL(ObCurTraceId::get_trace_id())) {
//Prevent the current trace_id from being overwritten
ObCurTraceId::init(GCONF.self_addr_);
}
THIS_WORKER.set_timeout_ts(INT64_MAX);
if (!ObRootServiceRoleChecker::is_rootserver()) {
ret = OB_NOT_MASTER;
LOG_WARN("not master", K(ret));
} else if (OB_FAIL(server_manager_.adjust_server_status(
server_, disaster_recovery_task_mgr_, with_rootserver_))) {
LOG_WARN("fail to adjust server status", K(ret), K(server_));
}
if (OB_TMP_FAIL(SVR_TRACER.refresh())) {
LOG_WARN("fail to refresh all server tracer", KR(ret), KR(tmp_ret));
}
} else {
LOG_TRACE("no need to do ObAllServerTask in version >= 4.2");
}
return ret;
}

View File

@ -34,7 +34,6 @@ ObAlterLocalityFinishChecker::ObAlterLocalityFinishChecker(volatile bool &stop)
common_rpc_proxy_(NULL),
self_(),
unit_mgr_(NULL),
server_mgr_(NULL),
zone_mgr_(NULL),
sql_proxy_(NULL),
stop_(stop)
@ -50,7 +49,6 @@ int ObAlterLocalityFinishChecker::init(
obrpc::ObCommonRpcProxy &common_rpc_proxy,
common::ObAddr &addr,
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
common::ObMySQLProxy &sql_proxy,
share::ObLSTableOperator &lst_operator)
@ -67,7 +65,6 @@ int ObAlterLocalityFinishChecker::init(
common_rpc_proxy_ = &common_rpc_proxy;
self_ = addr;
unit_mgr_ = &unit_mgr;
server_mgr_ = &server_mgr;
zone_mgr_ = &zone_mgr;
sql_proxy_ = &sql_proxy;
lst_operator_ = &lst_operator;
@ -89,11 +86,9 @@ int ObAlterLocalityFinishChecker::check()
} else if (OB_ISNULL(schema_service_)
|| OB_ISNULL(unit_mgr_)
|| OB_ISNULL(zone_mgr_)
|| OB_ISNULL(server_mgr_)
|| !self_.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), KP_(schema_service), KP_(unit_mgr),
KP_(zone_mgr), KP_(server_mgr), K_(self));
LOG_WARN("invalid argument", KR(ret), KP_(schema_service), KP_(unit_mgr), KP_(zone_mgr), K_(self));
} else if (OB_FAIL(check_stop())) {
LOG_WARN("ObAlterLocalityFinishChecker stopped", KR(ret));
} else if (OB_FAIL(schema_service_->get_tenant_schema_guard(OB_SYS_TENANT_ID, schema_guard))) {
@ -123,7 +118,6 @@ int ObAlterLocalityFinishChecker::check()
} else if (OB_SUCCESS != (tmp_ret = ObDRWorker::check_tenant_locality_match(
tenant_id,
*unit_mgr_,
*server_mgr_,
*zone_mgr_,
alter_locality_finish))){
LOG_WARN("fail to check tenant locality match", KR(tmp_ret), K(tenant_id), K(alter_locality_finish));
@ -131,7 +125,6 @@ int ObAlterLocalityFinishChecker::check()
&& OB_SUCCESS != (tmp_ret = ObDRWorker::check_tenant_locality_match(
gen_meta_tenant_id(tenant_id),
*unit_mgr_,
*server_mgr_,
*zone_mgr_,
meta_alter_locality_finish))){
LOG_WARN("fail to check tenant locality match", KR(tmp_ret), "meta_tenant_id",

View File

@ -33,7 +33,6 @@ class ObLSTableOperator;
namespace rootserver
{
class ObUnitManager;
class ObServerManager;
class ObZoneManager;
class DRLSInfo;
class LocalityMap;
@ -60,7 +59,6 @@ public:
obrpc::ObCommonRpcProxy &common_rpc_proxy,
common::ObAddr &self,
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
common::ObMySQLProxy &sql_proxy,
share::ObLSTableOperator &lst_operator);
@ -76,7 +74,6 @@ private:
obrpc::ObCommonRpcProxy *common_rpc_proxy_; //use GCTX.rs_rpc_proxy_
common::ObAddr self_;
ObUnitManager *unit_mgr_;
ObServerManager *server_mgr_;
ObZoneManager *zone_mgr_;
common::ObMySQLProxy *sql_proxy_;
share::ObLSTableOperator *lst_operator_;

View File

@ -23,7 +23,6 @@
#include "share/schema/ob_table_schema.h"
#include "share/schema/ob_schema_getter_guard.h"
#include "share/schema/ob_part_mgr_util.h"
#include "ob_server_manager.h"
#include "ob_unit_manager.h"
#include "ob_zone_manager.h"
#include "ob_root_utils.h"

View File

@ -41,7 +41,7 @@ class ObSchemaGetterGuard;
namespace rootserver
{
class ObUnitManager;
class ObServerManager;
class ObZoneManager;
class ObDataSourceCandidateChecker

View File

@ -44,15 +44,19 @@
#include "storage/ob_file_system_router.h"
#include "share/ls/ob_ls_creator.h"//ObLSCreator
#include "share/ls/ob_ls_life_manager.h"//ObLSLifeAgentManager
#include "share/ob_all_server_tracer.h"
#include "rootserver/ob_rs_event_history_table_operator.h"
#include "rootserver/ob_rs_async_rpc_proxy.h"
#include "rootserver/ob_ddl_operator.h"
#include "rootserver/ob_locality_util.h"
#include "rootserver/ob_rs_async_rpc_proxy.h"
#include "rootserver/ob_server_zone_op_service.h"
#include "observer/ob_server_struct.h"
#include "rootserver/freeze/ob_freeze_info_manager.h"
#include "rootserver/ob_table_creator.h"
#include "share/scn.h"
#include "rootserver/ob_heartbeat_service.h"
#include "rootserver/ob_root_service.h"
namespace oceanbase
{
@ -503,11 +507,10 @@ ObBootstrap::ObBootstrap(
{
}
int ObBootstrap::execute_bootstrap()
int ObBootstrap::execute_bootstrap(rootserver::ObServerZoneOpService &server_zone_op_service)
{
int ret = OB_SUCCESS;
bool already_bootstrap = true;
uint64_t server_id = OB_INIT_SERVER_ID;
ObSArray<ObTableSchema> table_schemas;
begin_ts_ = ObTimeUtility::current_time();
@ -522,12 +525,8 @@ int ObBootstrap::execute_bootstrap()
LOG_WARN("ob system is already bootstrap, cannot bootstrap again", K(ret));
} else if (OB_FAIL(check_bootstrap_rs_list(rs_list_))) {
LOG_WARN("failed to check_bootstrap_rs_list", K_(rs_list), K(ret));
} else if (OB_FAIL(add_rs_list(server_id))) {
LOG_WARN("fail to add rs list to server manager", K(ret));
} else if (OB_FAIL(create_all_core_table_partition())) {
LOG_WARN("fail to create all core_table partition", KR(ret));
} else if (OB_FAIL(wait_all_rs_online())) {
LOG_WARN("failed to wait all rs online", K(ret));
} else if (OB_FAIL(set_in_bootstrap())) {
LOG_WARN("failed to set in bootstrap", K(ret));
} else if (OB_FAIL(init_global_stat())) {
@ -545,24 +544,23 @@ int ObBootstrap::execute_bootstrap()
ObMultiVersionSchemaService &schema_service = ddl_service_.get_schema_service();
if (OB_SUCC(ret)) {
if (OB_FAIL(init_system_data(server_id))) {
LOG_WARN("failed to init system data", K(server_id), K(ret));
if (OB_FAIL(init_system_data())) {
LOG_WARN("failed to init system data", KR(ret));
} else if (OB_FAIL(ddl_service_.refresh_schema(OB_SYS_TENANT_ID))) {
LOG_WARN("failed to refresh_schema", K(ret));
}
}
BOOTSTRAP_CHECK_SUCCESS_V2("refresh_schema");
if (OB_SUCC(ret)) {
if (OB_FAIL(wait_all_rs_in_service())) {
LOG_WARN("failed to wait all rs in service", KR(ret));
} else if (OB_FAIL(init_backup_inner_table())) {
LOG_WARN("failed to init backup inner table", KR(ret));
} else if (OB_FAIL(init_backup_data())) {
LOG_WARN("failed to init backup inner table version", KR(ret));
} else {
ROOTSERVICE_EVENT_ADD("bootstrap", "bootstrap_succeed");
}
if (FAILEDx(add_servers_in_rs_list(server_zone_op_service))) {
LOG_WARN("fail to add servers in rs_list_", KR(ret));
} else if (OB_FAIL(wait_all_rs_in_service())) {
LOG_WARN("failed to wait all rs in service", KR(ret));
} else if (OB_FAIL(init_backup_inner_table())) {
LOG_WARN("failed to init backup inner table", KR(ret));
} else if (OB_FAIL(init_backup_data())) {
LOG_WARN("failed to init backup inner table version", KR(ret));
} else {
ROOTSERVICE_EVENT_ADD("bootstrap", "bootstrap_succeed");
}
BOOTSTRAP_CHECK_SUCCESS();
@ -884,12 +882,7 @@ int ObBootstrap::broadcast_sys_schema(const ObSArray<ObTableSchema> &table_schem
if (INT64_MAX != THIS_WORKER.get_timeout_ts()) {
rpc_timeout = max(rpc_timeout, THIS_WORKER.get_timeout_remain());
}
if (OB_FAIL(ddl_service_.get_server_manager().check_server_active(rs->server_, is_active))) {
LOG_WARN("check_server_active failed", KR(ret), "server", rs->server_);
} else if (!is_active) {
ret = OB_SERVER_NOT_ACTIVE;
LOG_WARN("server not active", KR(ret), "server", rs->server_);
} else if (OB_FAIL(proxy.call(rs->server_, rpc_timeout, arg))) {
if (OB_FAIL(proxy.call(rs->server_, rpc_timeout, arg))) {
LOG_WARN("broadcast_sys_schema failed", KR(ret), K(rpc_timeout),
"server", rs->server_);
}
@ -1045,60 +1038,39 @@ int ObBootstrap::construct_schema(
return ret;
}
int ObBootstrap::add_rs_list(uint64_t &server_id)
{
int ObBootstrap::add_servers_in_rs_list(rootserver::ObServerZoneOpService &server_zone_op_service) {
int ret = OB_SUCCESS;
ObServerManager &server_mgr = ddl_service_.get_server_manager();
if (OB_FAIL(check_inner_stat())) {
LOG_WARN("check_inner_stat failed", K(ret));
} else if (OB_FAIL(server_mgr.add_server_list(rs_list_, server_id))) {
LOG_WARN("add_server_list failed", K_(rs_list), K(ret));
}
BOOTSTRAP_CHECK_SUCCESS();
return ret;
}
int ObBootstrap::wait_all_rs_online()
{
int ret = OB_SUCCESS;
int64_t left_time_can_sleep = 0;
ObServerManager &server_mgr = ddl_service_.get_server_manager();
if (OB_FAIL(check_inner_stat())) {
LOG_WARN("check_inner_stat failed", K(ret));
} else if (OB_FAIL(server_mgr.get_lease_duration(left_time_can_sleep))) {
LOG_WARN("get_lease_duration failed", K(ret));
ObArray<ObAddr> servers;
if (OB_ISNULL(GCTX.root_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("GCTX.root_service_ is null", KR(ret), KP(GCTX.root_service_));
} else {
while (OB_SUCC(ret)) {
if (!ObRootServiceRoleChecker::is_rootserver()) {
ret = OB_RS_SHUTDOWN;
LOG_WARN("wait all rs online fail, self is not master rootservice any more, check SYS LS leader revoke infos",
KR(ret), K(left_time_can_sleep));
break;
}
bool all_alive = true;
if (INT64_MAX != THIS_WORKER.get_timeout_ts()) {
left_time_can_sleep = max(left_time_can_sleep, THIS_WORKER.get_timeout_remain());
}
for (int64_t i = 0; OB_SUCC(ret) && i < rs_list_.count(); ++i) {
bool is_alive = false;
if (OB_FAIL(server_mgr.check_server_alive(rs_list_.at(i).server_, is_alive))) {
LOG_WARN("check_server_alive failed", "server", rs_list_.at(i).server_, K(ret));
} else if (!is_alive) {
LOG_WARN("server is not alive", "server", rs_list_.at(i).server_, K(is_alive));
all_alive = false;
break;
if (!ObHeartbeatService::is_service_enabled()) {
for (int64_t i = 0; OB_SUCC(ret) && i < rs_list_.count(); i++) {
const ObAddr &server = rs_list_.at(i).server_;
const ObZone &zone = rs_list_.at(i).zone_;
if (OB_FAIL(GCTX.root_service_->add_server_for_bootstrap_in_version_smaller_than_4_2_0(server, zone))) {
LOG_WARN("fail to add server in version < 4.2", KR(ret), K(server), K(zone));
}
FLOG_INFO("add servers in rs_list_ in version < 4.2", KR(ret), K(server), K(zone));
}
if (OB_FAIL(ret)) {
} else if (all_alive) {
break;
} else if (left_time_can_sleep > 0) {
USLEEP(min(left_time_can_sleep, 200 * 1000));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < rs_list_.count(); i++) {
servers.reuse();
const ObAddr &server = rs_list_.at(i).server_;
const ObZone &zone = rs_list_.at(i).zone_;
if (OB_FAIL(servers.push_back(server))) {
LOG_WARN("fail to push an element into servers", KR(ret), K(server));
} else if (OB_FAIL(server_zone_op_service.add_servers(servers, zone, true /* is_bootstrap */))) {
LOG_WARN("fail to add servers", KR(ret), K(servers), K(zone));
}
FLOG_INFO("add servers in rs_list_ in version >= 4.2", KR(ret), K(servers), K(zone));
}
if (FAILEDx(GCTX.root_service_->load_server_manager())) {
LOG_WARN("fail to load server manager", KR(ret), KP(GCTX.root_service_));
}
}
}
BOOTSTRAP_CHECK_SUCCESS();
return ret;
}
@ -1124,8 +1096,13 @@ int ObBootstrap::wait_all_rs_in_service()
if (INT64_MAX != THIS_WORKER.get_timeout_ts()) {
left_time_can_sleep = max(left_time_can_sleep, THIS_WORKER.get_timeout_remain());
}
if (OB_FAIL(ddl_service_.get_server_manager().check_in_service(rs->server_, in_service))) {
// mark
if (OB_FAIL(SVR_TRACER.check_in_service(rs->server_, in_service))) {
LOG_WARN("check_in_service failed", "server", rs->server_, K(ret));
if (OB_ENTRY_NOT_EXIST == ret) {
ret = OB_SUCCESS;
all_in_service = false;
}
} else if (!in_service) {
LOG_WARN("server is not in_service ", "server", rs->server_);
all_in_service = false;
@ -1405,14 +1382,11 @@ int ObBootstrap::insert_sys_ls_(const share::schema::ObTenantSchema &tenant_sche
}
int ObBootstrap::init_system_data(const uint64_t server_id)
int ObBootstrap::init_system_data()
{
int ret = OB_SUCCESS;
if (OB_FAIL(check_inner_stat())) {
LOG_WARN("check_inner_stat failed", KR(ret));
} else if (OB_INVALID_ID == server_id) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid server_id", K(server_id), KR(ret));
} else if (OB_FAIL(unit_mgr_.load())) {
LOG_WARN("unit_mgr load failed", KR(ret));
} else if (OB_FAIL(create_sys_unit_config())) {
@ -1421,8 +1395,6 @@ int ObBootstrap::init_system_data(const uint64_t server_id)
LOG_WARN("create sys resource pool failed", KR(ret));
} else if (OB_FAIL(create_sys_tenant())) {
LOG_WARN("create system tenant failed", KR(ret));
} else if (OB_FAIL(init_server_id(server_id))) {
LOG_WARN("init server id failed", K(server_id), KR(ret));
} else if (OB_FAIL(init_all_zone_table())) {
LOG_WARN("failed to init all zone table", KR(ret));
}
@ -1461,7 +1433,9 @@ int ObBootstrap::init_backup_data()
LOG_WARN("failed to init backup inner table version", K(ret));
} else if (OB_FAIL(ObBackupInfoOperator::set_backup_leader_epoch(ddl_service_.get_sql_proxy(), 1))) {
LOG_WARN("failed to init backup leader epoch", K(ret));
} else if (OB_FAIL(ObBackupInfoOperator::set_backup_leader(ddl_service_.get_sql_proxy(), ddl_service_.get_server_manager().get_rs_addr()))) {
// mark
} else if (OB_FAIL(ObBackupInfoOperator::set_backup_leader(ddl_service_.get_sql_proxy(), GCTX.self_addr()))) {
//
LOG_WARN("failed to init backup leader", K(ret));
}
@ -1631,24 +1605,6 @@ int ObBootstrap::init_all_zone_table()
return ret;
}
//FIXME:it need to write in new table, if table name changes after splitting
int ObBootstrap::init_server_id(const uint64_t server_id)
{
int ret = OB_SUCCESS;
ObMaxIdFetcher fetcher(ddl_service_.get_sql_proxy());
if (OB_FAIL(check_inner_stat())) {
LOG_WARN("check_inner_stat failed", K(ret));
} else if (OB_INVALID_ID == server_id) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid server_id", K(server_id), K(ret));
} else if (OB_FAIL(fetcher.update_max_id(ddl_service_.get_sql_proxy(),
OB_SYS_TENANT_ID, OB_MAX_USED_SERVER_ID_TYPE, server_id))) {
LOG_WARN("update max used server id failed", K(server_id), K(ret));
}
BOOTSTRAP_CHECK_SUCCESS();
return ret;
}
template<typename SCHEMA>
int ObBootstrap::set_replica_options(SCHEMA &schema)
{

View File

@ -52,6 +52,7 @@ namespace rootserver
class ObRsGtsManager;
struct ObSysStat;
class ObTableCreator;
class ObServerZoneOpService;
class ObBaseBootstrap
{
@ -138,7 +139,7 @@ public:
obrpc::ObCommonRpcProxy &rs_rpc_proxy);
virtual ~ObBootstrap() {}
virtual int execute_bootstrap();
virtual int execute_bootstrap(rootserver::ObServerZoneOpService &server_zone_op_service);
static int create_all_schema(
ObDDLService &ddl_service,
common::ObIArray<share::schema::ObTableSchema> &table_schemas);
@ -170,12 +171,10 @@ private:
virtual int check_is_already_bootstrap(bool &is_bootstrap);
virtual int init_global_stat();
virtual int init_sequence_id();
virtual int init_system_data(const uint64_t server_id);
virtual int init_system_data();
virtual int init_all_zone_table();
virtual int init_multiple_zone_deployment_table(common::ObISQLClient &sql_client);
virtual int init_server_id(const uint64_t server_id);
virtual int add_rs_list(uint64_t &server_id);
virtual int wait_all_rs_online();
virtual int add_servers_in_rs_list(rootserver::ObServerZoneOpService &server_zone_op_service);
virtual int wait_all_rs_in_service();
int init_backup_inner_table();
int init_backup_data();

View File

@ -126,7 +126,7 @@ int ObSysStat::set_initial_values(const uint64_t tenant_id)
ob_max_used_unit_config_id_.value_.set_int(OB_USER_UNIT_CONFIG_ID);
ob_max_used_resource_pool_id_.value_.set_int(OB_USER_RESOURCE_POOL_ID);
ob_max_used_unit_id_.value_.set_int(OB_USER_UNIT_ID);
ob_max_used_server_id_.value_.set_int(OB_INIT_SERVER_ID);
ob_max_used_server_id_.value_.set_int(OB_INIT_SERVER_ID - 1);
ob_max_used_ddl_task_id_.value_.set_int(OB_INIT_DDL_TASK_ID);
ob_max_used_unit_group_id_.value_.set_int(OB_USER_UNIT_GROUP_ID);
} else {

View File

@ -55,7 +55,7 @@
#include "sql/resolver/ddl/ob_ddl_resolver.h"
#include "sql/resolver/expr/ob_raw_expr_modify_column_name.h"
#include "sql/resolver/expr/ob_raw_expr_printer.h"
#include "ob_server_manager.h"
#include "share/ob_all_server_tracer.h"
#include "ob_zone_manager.h"
#include "rootserver/ob_schema2ddl_sql.h"
#include "rootserver/ob_unit_manager.h"
@ -161,7 +161,6 @@ ObDDLService::ObDDLService()
sql_proxy_(NULL),
schema_service_(NULL),
lst_operator_(NULL),
server_mgr_(NULL),
zone_mgr_(NULL),
unit_mgr_(NULL),
snapshot_mgr_(NULL)
@ -173,7 +172,6 @@ int ObDDLService::init(obrpc::ObSrvRpcProxy &rpc_proxy,
common::ObMySQLProxy &sql_proxy,
share::schema::ObMultiVersionSchemaService &schema_service,
share::ObLSTableOperator &lst_operator,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
ObUnitManager &unit_mgr,
ObSnapshotInfoManager &snapshot_mgr)
@ -188,7 +186,6 @@ int ObDDLService::init(obrpc::ObSrvRpcProxy &rpc_proxy,
sql_proxy_ = &sql_proxy;
schema_service_ = &schema_service;
lst_operator_ = &lst_operator;
server_mgr_ = &server_mgr;
zone_mgr_ = &zone_mgr;
unit_mgr_ = &unit_mgr;
snapshot_mgr_ = &snapshot_mgr;
@ -1494,9 +1491,9 @@ int ObDDLService::check_inner_stat() const
|| OB_ISNULL(rpc_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("schema_service_,sql_proxy_ or rpc_proxy_ is null", K(ret));
} else if (OB_ISNULL(server_mgr_) || OB_ISNULL(lst_operator_)) {
} else if (OB_ISNULL(lst_operator_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_mgr_ or pt_operator_ or lst_operator_ is null", KR(ret));
LOG_WARN("lst_operator_ is null", KR(ret));
} else if (OB_ISNULL(unit_mgr_) || OB_ISNULL(zone_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit_mgr_ or zone_mgr_ is null", K(ret));
@ -11274,50 +11271,6 @@ int ObDDLService::check_restore_point_allow(const int64_t tenant_id, const ObTab
return ret;
}
int ObDDLService::check_all_server_frozen_scn(const SCN &frozen_scn)
{
int ret = OB_SUCCESS;
if (OB_ISNULL(rpc_proxy_) || OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ptr is null", K(ret), KP_(rpc_proxy), KP_(server_mgr));
} else {
ObCheckFrozenScnProxy check_frozen_scn_proxy(*rpc_proxy_, &obrpc::ObSrvRpcProxy::check_frozen_scn);
ObZone zone;
ObArray<share::ObServerStatus> server_statuses;
ObCheckFrozenScnArg arg;
arg.frozen_scn_ = frozen_scn;
if (OB_FAIL(server_mgr_->get_server_statuses(zone, server_statuses))) {
LOG_WARN("fail to get server statuses", K(ret));
} else if (server_statuses.count() <= 0) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid server cnt", K(ret));
}
// check server alive
for (int64_t i = 0; OB_SUCC(ret) && i < server_statuses.count(); i++) {
if (!server_statuses[i].is_alive()) {
ret = OB_SERVER_NOT_ALIVE;
LOG_WARN("server not alive", K(ret), "server", server_statuses[i]);
}
}
if (OB_SUCC(ret)) {
// send async rpc
for (int64_t i = 0; OB_SUCC(ret) && i < server_statuses.count(); i++) {
const int64_t rpc_timeout_us = THIS_WORKER.get_timeout_remain();
const ObAddr &addr = server_statuses[i].server_;
if (OB_FAIL(check_frozen_scn_proxy.call(addr, rpc_timeout_us, arg))) {
LOG_WARN("fail to check frozen version", K(ret), K(addr), K(rpc_timeout_us));
}
}
int tmp_ret = OB_SUCCESS;
// all server should success;
if (OB_SUCCESS != (tmp_ret = check_frozen_scn_proxy.wait())) {
LOG_WARN("fail to execute rpc", K(tmp_ret));
}
ret = OB_SUCC(ret) ? tmp_ret : ret;
}
}
return ret;
}
// This code will be used for partition operations of table and tablegroup
// 1. for table, parameter is_drop_truncate_and_alter_index parameter avoids the drop/truncate partition
@ -20129,8 +20082,13 @@ int ObDDLService::create_sys_tenant(
LOG_WARN("init tenant env failed", K(tenant_schema), K(ret));
} else if (OB_FAIL(ddl_operator.insert_tenant_merge_info(OB_DDL_ADD_TENANT, tenant_schema, trans))) {
LOG_WARN("fail to insert tenant merge info", KR(ret));
} else if (OB_FAIL(ObServiceEpochProxy::init_service_epoch(trans, OB_SYS_TENANT_ID,
0/*freeze_service_epoch*/ ,0/*arbitration_service_epoch*/))) {
} else if (OB_FAIL(ObServiceEpochProxy::init_service_epoch(
trans,
OB_SYS_TENANT_ID,
0, /*freeze_service_epoch*/
0, /*arbitration_service_epoch*/
0, /*server_zone_op_service_epoch*/
0 /*heartbeat_service_epoch*/))) {
LOG_WARN("fail to init service epoch", KR(ret));
}
if (trans.is_started()) {
@ -21188,9 +21146,14 @@ int ObDDLService::init_tenant_schema(
LOG_WARN("init tenant env failed", KR(ret), K(tenant_role), K(recovery_until_scn), K(tenant_schema));
} else if (OB_FAIL(ddl_operator.insert_tenant_merge_info(OB_DDL_ADD_TENANT_START, tenant_schema, trans))) {
LOG_WARN("fail to insert tenant merge info", KR(ret), K(tenant_schema));
} else if (is_meta_tenant(tenant_id) && OB_FAIL(ObServiceEpochProxy::init_service_epoch(trans, tenant_id,
0/*freeze_service_epoch*/, 0/*arbitration_service_epoch*/))) {
LOG_WARN("fail to init service epoch", KR(ret), K(tenant_id));
} else if (is_meta_tenant(tenant_id) && OB_FAIL(ObServiceEpochProxy::init_service_epoch(
trans,
tenant_id,
0, /*freeze_service_epoch*/
0, /*arbitration_service_epoch*/
0, /*server_zone_op_service_epoch*/
0 /*heartbeat_service_epoch*/))) {
LOG_WARN("fail to init service epoch", KR(ret));
}
if (trans.is_started()) {
@ -24331,19 +24294,25 @@ int ObDDLService::notify_refresh_schema(const ObAddrIArray &addrs)
{
int ret = OB_SUCCESS;
const ObZone zone;
ObServerManager::ObServerArray server_list;
ObArray<ObAddr> server_list;
ObSwitchSchemaProxy proxy(*rpc_proxy_, &ObSrvRpcProxy::switch_schema);
ObSwitchSchemaArg arg;
ObRefreshSchemaInfo local_schema_info;
ObRefreshSchemaInfo &schema_info = arg.schema_info_;
int64_t schema_version = OB_INVALID_VERSION;
ObAddr rs_addr;
if (OB_FAIL(check_inner_stat())) {
LOG_WARN("variable is not init");
} else if (OB_FAIL(server_mgr_->get_alive_servers(zone, server_list))) {
LOG_WARN("get alive server failed", KR(ret));
} else if (OB_ISNULL(schema_service_)) {
} else if (OB_ISNULL(GCTX.rs_mgr_) || OB_ISNULL(schema_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("schema_service is null", KR(ret));
LOG_WARN("GCTX.rs_mgr_ or schema_service_ is null", KR(ret), KP(GCTX.rs_mgr_), KP(schema_service_));
} else if (OB_FAIL(GCTX.rs_mgr_->get_master_root_server(rs_addr))) {
LOG_WARN("fail to get master root servcer", KR(ret));
} else if (OB_UNLIKELY(!rs_addr.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("rs_addr is invalid", KR(ret), K(rs_addr));
} else if (OB_FAIL(SVR_TRACER.get_alive_servers(zone, server_list))) {
LOG_WARN("get alive server failed", KR(ret), K(zone));
} else if (OB_FAIL(schema_service_->get_refresh_schema_info(local_schema_info))) {
LOG_WARN("fail to get schema info", KR(ret));
} else if (OB_FAIL(schema_service_->get_tenant_schema_version(OB_SYS_TENANT_ID, schema_version))) {
@ -24366,7 +24335,7 @@ int ObDDLService::notify_refresh_schema(const ObAddrIArray &addrs)
if (OB_ISNULL(s)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("s is null", K(ret));
} else if (server_mgr_->get_rs_addr() == *s) {
} else if (rs_addr == *s) {
continue;
} else {
bool found = false;

View File

@ -83,7 +83,6 @@ namespace palf
namespace rootserver
{
class ObDDLOperator;
class ObServerManager;
class ObZoneManager;
class ObUnitManager;
class ObCommitAlterTenantLocalityArg;
@ -131,7 +130,6 @@ public:
common::ObMySQLProxy &sql_proxy,
share::schema::ObMultiVersionSchemaService &schema_service,
share::ObLSTableOperator &lst_operator,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
ObUnitManager &unit_mgr,
ObSnapshotInfoManager &snapshot_mgr);
@ -142,7 +140,6 @@ public:
// these functions should be called after ddl_service has been inited
share::schema::ObMultiVersionSchemaService &get_schema_service() { return *schema_service_; }
common::ObMySQLProxy &get_sql_proxy() { return *sql_proxy_; }
ObServerManager &get_server_manager() { return *server_mgr_; }
ObZoneManager &get_zone_mgr() { return *zone_mgr_; }
ObSnapshotInfoManager &get_snapshot_mgr() { return *snapshot_mgr_; }
share::ObLSTableOperator &get_lst_operator() { return *lst_operator_; }
@ -1781,7 +1778,6 @@ public:
common::ObIArray<share::schema::ObZoneRegion> &zone_region_list,
const common::ObIArray<common::ObZone> &zone_list);
int check_all_server_frozen_scn(const share::SCN &frozen_scn);
int handle_security_audit(const obrpc::ObSecurityAuditArg &arg);
static int check_and_get_object_name(share::schema::ObSchemaGetterGuard &schema_guard,
@ -2300,7 +2296,6 @@ private:
share::schema::ObMultiVersionSchemaService *schema_service_;
share::ObLSTableOperator *lst_operator_;
//TODO(jingqian): used to choose partition server, use load balancer finnally
ObServerManager *server_mgr_;
ObZoneManager *zone_mgr_;
ObUnitManager *unit_mgr_;
ObSnapshotInfoManager *snapshot_mgr_;

View File

@ -17,8 +17,8 @@
#include "lib/container/ob_se_array.h"
#include "lib/container/ob_se_array_iterator.h"
#include "ob_unit_manager.h"
#include "ob_server_manager.h"
#include "ob_zone_manager.h"
#include "share/ob_all_server_tracer.h"
using namespace oceanbase::common;
using namespace oceanbase::share;
@ -272,35 +272,39 @@ int DRLSInfo::fill_servers()
{
int ret = OB_SUCCESS;
common::ObZone zone;
ObServerManager::ObServerStatusArray server_status_array;
if (OB_UNLIKELY(nullptr == server_mgr_)) {
ObArray<ObServerInfoInTable> servers_info;
if (OB_FAIL(SVR_TRACER.get_servers_info(zone, servers_info))) {
LOG_WARN("fail to get all servers_info", KR(ret));
} else if (OB_ISNULL(zone_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server mgr ptr is null", KR(ret), KP(server_mgr_));
} else if (OB_FAIL(server_mgr_->get_server_statuses(zone, server_status_array))) {
LOG_WARN("fail to get all server status", KR(ret));
LOG_WARN("zone_mgr_ is null", KR(ret), KP(zone_mgr_));
} else {
server_stat_info_map_.reuse();
FOREACH_X(s, server_status_array, OB_SUCC(ret)) {
FOREACH_X(s, servers_info, OB_SUCC(ret)) {
ServerStatInfoMap::Item *item = nullptr;
bool zone_active = false;
if (OB_UNLIKELY(nullptr == s)) {
if (OB_ISNULL(s)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server ptr is null", KR(ret));
} else if (OB_FAIL(zone_mgr_->check_zone_active(s->zone_, zone_active))) {
LOG_WARN("fail to check zone active", KR(ret), "zone", s->zone_);
} else if (OB_FAIL(server_stat_info_map_.locate(s->server_, item))) {
LOG_WARN("fail to locate server status", KR(ret), "server", s->server_);
} else if (OB_UNLIKELY(nullptr == item)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("item ptr is null", KR(ret), "server", s->server_);
} else if (OB_FAIL(item->v_.init(
s->server_,
s->is_alive(),
s->is_active(),
s->is_permanent_offline(),
s->is_migrate_in_blocked(),
(s->is_stopped() || !zone_active)))) {
LOG_WARN("fail to init server item", KR(ret));
} else {
const ObAddr &server = s->get_server();
const ObZone &zone = s->get_zone();
if (OB_FAIL(zone_mgr_->check_zone_active(zone, zone_active))) {
LOG_WARN("fail to check zone active", KR(ret), "zone", zone);
} else if (OB_FAIL(server_stat_info_map_.locate(server, item))) {
LOG_WARN("fail to locate server status", KR(ret), "server", server);
} else if (OB_UNLIKELY(nullptr == item)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("item ptr is null", KR(ret), "server", server);
} else if (OB_FAIL(item->v_.init(
server,
s->is_alive(),
s->is_active(),
s->is_permanent_offline(),
s->is_migrate_in_blocked(),
(s->is_stopped() || !zone_active)))) {
LOG_WARN("fail to init server item", KR(ret));
}
}
}
}

View File

@ -37,7 +37,6 @@ namespace rootserver
{
class ObUnitManager;
class ObServerManager;
class ObZoneManager;
struct DRServerStatInfo
@ -141,13 +140,11 @@ class DRLSInfo
public:
DRLSInfo(const uint64_t resource_tenant_id,
ObUnitManager *unit_mgr,
ObServerManager *server_mgr,
ObZoneManager *zone_mgr,
share::schema::ObMultiVersionSchemaService *schema_service)
: resource_tenant_id_(resource_tenant_id),
sys_schema_guard_(),
unit_mgr_(unit_mgr),
server_mgr_(server_mgr),
zone_mgr_(zone_mgr),
schema_service_(schema_service),
unit_stat_info_map_(),
@ -238,7 +235,6 @@ private:
uint64_t resource_tenant_id_;
share::schema::ObSchemaGetterGuard sys_schema_guard_;
ObUnitManager *unit_mgr_;
ObServerManager *server_mgr_;
ObZoneManager *zone_mgr_;
share::schema::ObMultiVersionSchemaService *schema_service_;
UnitStatInfoMap unit_stat_info_map_;

View File

@ -24,7 +24,6 @@
#include "rootserver/ob_root_balancer.h"
#include "rootserver/ob_root_service.h"
#include "ob_rs_event_history_table_operator.h"
#include "ob_server_manager.h"
#include "share/ob_rpc_struct.h"
#include "observer/ob_server_struct.h"
#include "observer/ob_server.h"
@ -331,22 +330,6 @@ int ObDRTask::set_task_key(
return ret;
}
int ObDRTask::update_with_partition(
const common::ObAddr &dst_server) const
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!dst_server.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(dst_server));
} else if (OB_UNLIKELY(nullptr == GCTX.root_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("rootservice ptr is null", KR(ret));
} else if (OB_FAIL(GCTX.root_service_->get_server_mgr().set_with_partition(dst_server))) {
LOG_WARN("fail to set with partition", KR(ret), K(dst_server));
}
return ret;
}
void ObDRTask::set_schedule()
{
set_schedule_time(ObTimeUtility::current_time());
@ -579,9 +562,6 @@ int ObMigrateLSReplicaTask::check_before_execute(
LOG_WARN("fail to check paxos replica number", KR(ret), K(ls_info));
} else if (OB_FAIL(check_online(ls_info, ret_comment))) {
LOG_WARN("fail to check online", KR(ret), K(ls_info));
} else if (OB_FAIL(update_with_partition(dst_replica_.get_server()))) {
LOG_WARN("fail to update with partition", KR(ret),
"server", dst_replica_.get_server());
}
return ret;
}
@ -1032,9 +1012,6 @@ int ObAddLSReplicaTask::check_before_execute(
LOG_WARN("fail to check online", KR(ret), K(ls_info));
} else if (OB_FAIL(check_paxos_member(ls_info, ret_comment))) {
LOG_WARN("fail to check paxos member", KR(ret), K(ls_info));
} else if (OB_FAIL(update_with_partition(dst_replica_.get_server()))) {
LOG_WARN("fail to update with partition", KR(ret),
"server", dst_replica_.get_server());
}
return ret;
}
@ -1505,9 +1482,6 @@ int ObLSTypeTransformTask::check_before_execute(
LOG_WARN("fail to check online", KR(ret), K(ls_info));
} else if (OB_FAIL(check_paxos_member(ls_info, ret_comment))) {
LOG_WARN("fail to check paxos member", KR(ret), K(ls_info));
} else if (OB_FAIL(update_with_partition(dst_replica_.get_server()))) {
LOG_WARN("fail to update with partition", KR(ret),
"server", dst_replica_.get_server());
}
return ret;
}

View File

@ -331,8 +331,6 @@ public:
virtual int clone(void *input_ptr, ObDRTask *&output_task) const = 0;
int deep_copy(const ObDRTask &that);
public:
int update_with_partition(
const common::ObAddr &dst_server) const;
void set_schedule();
bool is_manual_task() const { return obrpc::ObAdminClearDRTaskArg::TaskType::MANUAL == invoked_source_; }
public:

View File

@ -20,11 +20,11 @@
#include "share/ls/ob_ls_table_operator.h"
#include "share/ob_cluster_version.h"
#include "ob_rs_event_history_table_operator.h"
#include "ob_server_manager.h"
#include "ob_disaster_recovery_task_mgr.h"
#include "ob_disaster_recovery_task.h"
#include "observer/ob_server.h"
#include "lib/utility/ob_tracepoint.h"
#include "share/ob_all_server_tracer.h"
namespace oceanbase
{
@ -35,8 +35,7 @@ namespace rootserver
int ObDRTaskExecutor::init(
share::ObLSTableOperator &lst_operator,
obrpc::ObSrvRpcProxy &rpc_proxy,
ObServerManager &server_mgr)
obrpc::ObSrvRpcProxy &rpc_proxy)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(inited_)) {
@ -45,7 +44,6 @@ int ObDRTaskExecutor::init(
} else {
lst_operator_ = &lst_operator;
rpc_proxy_ = &rpc_proxy;
server_mgr_ = &server_mgr;
inited_ = true;
}
return ret;
@ -57,18 +55,16 @@ int ObDRTaskExecutor::execute(
ObDRTaskRetComment &ret_comment) const
{
int ret = OB_SUCCESS;
const ObAddr &dst_server = task.get_dst_server();
ObServerInfoInTable server_info;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret));
} else if (OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server mgr ptr is null", KR(ret), KP(server_mgr_));
} else if (OB_FAIL(SVR_TRACER.get_server_info(dst_server, server_info))) {
LOG_WARN("fail to get server_info", KR(ret), K(dst_server));
} else {
const ObAddr &dst_server = task.get_dst_server();
bool is_dst_server_alive = false;
if (OB_FAIL(server_mgr_->check_server_alive(dst_server, is_dst_server_alive))) {
LOG_WARN("fail to check server alive", KR(ret), K(dst_server));
} else if (!is_dst_server_alive) {
const bool is_dst_server_alive = server_info.is_alive();
if (!is_dst_server_alive) {
ret = OB_REBALANCE_TASK_CANT_EXEC;
ret_comment = ObDRTaskRetComment::CANNOT_EXECUTE_DUE_TO_SERVER_NOT_ALIVE;
LOG_WARN("dst server not alive", KR(ret), K(dst_server));

View File

@ -14,7 +14,6 @@
#define OCEANBASE_ROOTSERVER_OB_DISASTER_RECOVERY_TASK_EXECUTOR_H_
#include "rootserver/ob_disaster_recovery_task_mgr.h"
#include "rootserver/ob_server_manager.h"
namespace oceanbase
{
@ -35,7 +34,6 @@ class ObMultiVersionSchemaService;
namespace rootserver
{
class ObServerManager;
class ObDRTask;
class ObDRTaskExecutor
@ -44,18 +42,15 @@ public:
ObDRTaskExecutor()
: inited_(false),
lst_operator_(nullptr),
rpc_proxy_(nullptr),
server_mgr_(nullptr) {}
rpc_proxy_(nullptr) {}
virtual ~ObDRTaskExecutor() {}
public:
// init a ObDRTaskExecutor
// param [in] lst_operator, to check task
// param [in] rpc_proxy, to send task execution to dst server
// param [in] server_mgr, to check whether dst server alive
int init(
share::ObLSTableOperator &lst_operator,
obrpc::ObSrvRpcProxy &rpc_proxy,
ObServerManager &server_mgr);
obrpc::ObSrvRpcProxy &rpc_proxy);
// do previous check and execute a task
// @param [in] task, the task to execute
@ -68,7 +63,6 @@ private:
bool inited_;
share::ObLSTableOperator *lst_operator_;
obrpc::ObSrvRpcProxy *rpc_proxy_;
ObServerManager *server_mgr_;
private:
DISALLOW_COPY_AND_ASSIGN(ObDRTaskExecutor);
};

View File

@ -24,16 +24,15 @@
#include "ob_disaster_recovery_task_executor.h"
#include "rootserver/ob_root_balancer.h"
#include "ob_rs_event_history_table_operator.h"
#include "ob_server_manager.h"
#include "share/ob_rpc_struct.h"
#include "observer/ob_server_struct.h"
#include "share/ob_server_status.h"
#include "sql/executor/ob_executor_rpc_proxy.h"
#include "rootserver/ob_disaster_recovery_task.h" // for ObDRTaskType
#include "share/ob_share_util.h" // for ObShareUtil
#include "lib/lock/ob_tc_rwlock.h" // for common::RWLock
#include "rootserver/ob_disaster_recovery_task.h"
#include "share/inner_table/ob_inner_table_schema_constants.h"
#include "share/ob_all_server_tracer.h"
namespace oceanbase
{
@ -49,7 +48,6 @@ ObDRTaskQueue::ObDRTaskQueue() : inited_(false),
schedule_list_(),
task_map_(),
rpc_proxy_(nullptr),
server_mgr_(nullptr),
priority_(ObDRTaskPriority::MAX_PRI)
{
}
@ -101,7 +99,6 @@ int ObDRTaskQueue::init(
common::ObServerConfig &config,
const int64_t bucket_num,
obrpc::ObSrvRpcProxy *rpc_proxy,
ObServerManager *server_mgr,
ObDRTaskPriority priority)
{
int ret = OB_SUCCESS;
@ -110,10 +107,9 @@ int ObDRTaskQueue::init(
LOG_WARN("init twice", KR(ret));
} else if (OB_UNLIKELY(bucket_num <= 0)
|| OB_ISNULL(rpc_proxy)
|| OB_ISNULL(server_mgr)
|| (ObDRTaskPriority::LOW_PRI != priority && ObDRTaskPriority::HIGH_PRI != priority)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(bucket_num), KP(rpc_proxy), KP(server_mgr), K(priority));
LOG_WARN("invalid argument", KR(ret), K(bucket_num), KP(rpc_proxy), K(priority));
} else if (OB_FAIL(task_map_.create(bucket_num, "DRTaskMap"))) {
LOG_WARN("fail to create task map", KR(ret), K(bucket_num));
} else if (OB_FAIL(task_alloc_.init(
@ -123,7 +119,6 @@ int ObDRTaskQueue::init(
} else {
config_ = &config;
rpc_proxy_ = rpc_proxy;
server_mgr_ = server_mgr;
priority_ = priority;
inited_ = true;
}
@ -414,32 +409,30 @@ int ObDRTaskQueue::check_task_need_cleaning_(
// (3) rpc ls_check_dr_task_exist successfully told us task not exist
// (4) task is timeout while any failure during whole procedure
need_cleanning = false;
share::ObServerStatus server_status;
Bool task_exist = false;
bool server_exist = true;
const ObAddr &dst_server = task.get_dst_server();
share::ObServerInfoInTable server_info;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("task queue not init", KR(ret));
} else if (OB_ISNULL(server_mgr_) || OB_ISNULL(rpc_proxy_)) {
} else if (OB_ISNULL(rpc_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("some ptr is null", KR(ret), KP(server_mgr_), KP(rpc_proxy_));
} else if (OB_FAIL(server_mgr_->is_server_exist(task.get_dst_server(), server_exist))) {
LOG_WARN("fail to check is server exist", KR(ret), "server", task.get_dst_server());
} else if (!server_exist) {
// case 1. server not exist
FLOG_INFO("the reason to clean this task: server not exist", K(task));
need_cleanning = true;
ret_comment = ObDRTaskRetComment::CLEAN_TASK_DUE_TO_SERVER_NOT_EXIST;
} else if (OB_FAIL(server_mgr_->get_server_status(task.get_dst_server(), server_status))) {
// we only care about HeartBeatStatus in server_status
LOG_WARN("fail to get server status", KR(ret), "server", task.get_dst_server());
} else if (server_status.is_permanent_offline()) {
// case 2. server status is permanant offline
FLOG_INFO("the reason to clean this task: server permanent offline", K(task), K(server_status));
LOG_WARN("some ptr is null", KR(ret), KP(rpc_proxy_));
} else if (OB_FAIL(SVR_TRACER.get_server_info(dst_server, server_info))) {
LOG_WARN("fail to get server_info", KR(ret), "server", dst_server);
// case 1. server not exist
if (OB_ENTRY_NOT_EXIST == ret) {
ret = OB_SUCCESS;
FLOG_INFO("the reason to clean this task: server not exist", K(task));
need_cleanning = true;
ret_comment = ObDRTaskRetComment::CLEAN_TASK_DUE_TO_SERVER_NOT_EXIST;
}
} else if (server_info.is_permanent_offline()) {
// case 2. server is permanant offline
FLOG_INFO("the reason to clean this task: server permanent offline", K(task), K(server_info));
need_cleanning = true;
ret_comment = ObDRTaskRetComment::CLEAN_TASK_DUE_TO_SERVER_PERMANENT_OFFLINE;
} else if (server_status.is_alive()) {
} else if (server_info.is_alive()) {
ObDRTaskExistArg arg;
arg.task_id_ = task.get_task_id();
arg.tenant_id_ = task.get_tenant_id();
@ -454,12 +447,12 @@ int ObDRTaskQueue::check_task_need_cleaning_(
need_cleanning = true;
ret_comment = ObDRTaskRetComment::CLEAN_TASK_DUE_TO_TASK_NOT_RUNNING;
}
} else if (server_status.is_temporary_offline()) {
} else if (server_info.is_temporary_offline()) {
ret = OB_SERVER_NOT_ALIVE;
LOG_WARN("server status is not alive, task may be cleanned later", KR(ret), "server", task.get_dst_server(), K(server_status), K(task));
LOG_WARN("server status is not alive, task may be cleanned later", KR(ret), "server", task.get_dst_server(), K(server_info), K(task));
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected server status", KR(ret), "server", task.get_dst_server(), K(server_status), K(task));
LOG_WARN("unexpected server status", KR(ret), "server", task.get_dst_server(), K(server_info), K(task));
}
// case 4. task is timeout while any OB_FAIL occurs
@ -608,7 +601,6 @@ int ObDRTaskMgr::init(
const common::ObAddr &server,
common::ObServerConfig &config,
ObDRTaskExecutor &task_executor,
ObServerManager *server_mgr,
obrpc::ObSrvRpcProxy *rpc_proxy,
common::ObMySQLProxy *sql_proxy,
share::schema::ObMultiVersionSchemaService *schema_service)
@ -619,12 +611,11 @@ int ObDRTaskMgr::init(
ret = OB_INIT_TWICE;
LOG_WARN("init twice", KR(ret), K(inited_), K_(stopped));
} else if (OB_UNLIKELY(!server.is_valid())
|| OB_ISNULL(server_mgr)
|| OB_ISNULL(rpc_proxy)
|| OB_ISNULL(sql_proxy)
|| OB_ISNULL(schema_service)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(server), KP(server_mgr), KP(rpc_proxy),
LOG_WARN("invalid argument", KR(ret), K(server), KP(rpc_proxy),
KP(sql_proxy), KP(schema_service));
} else if (OB_FAIL(cond_.init(ObWaitEventIds::REBALANCE_TASK_MGR_COND_WAIT))) {
LOG_WARN("fail to init cond", KR(ret));
@ -634,15 +625,14 @@ int ObDRTaskMgr::init(
config_ = &config;
self_ = server;
task_executor_ = &task_executor;
server_mgr_ = server_mgr;
rpc_proxy_ = rpc_proxy;
sql_proxy_ = sql_proxy;
schema_service_ = schema_service;
if (OB_FAIL(high_task_queue_.init(
config, TASK_QUEUE_LIMIT, rpc_proxy_, server_mgr_, ObDRTaskPriority::HIGH_PRI))) {
config, TASK_QUEUE_LIMIT, rpc_proxy_, ObDRTaskPriority::HIGH_PRI))) {
LOG_WARN("fail to init high priority task queue", KR(ret));
} else if (OB_FAIL(low_task_queue_.init(
config, TASK_QUEUE_LIMIT, rpc_proxy_, server_mgr_, ObDRTaskPriority::LOW_PRI))) {
config, TASK_QUEUE_LIMIT, rpc_proxy_, ObDRTaskPriority::LOW_PRI))) {
LOG_WARN("fail to init low priority task queue", KR(ret));
} else if (OB_FAIL(disaster_recovery_task_table_updater_.init(sql_proxy, this))) {
LOG_WARN("fail to init a ObDRTaskTableUpdater", KR(ret));

View File

@ -33,7 +33,6 @@ struct ObDRTaskReplyResult;
namespace rootserver
{
class ObDRTaskExecutor;
class ObServerManager;
class ObDRTaskMgr;
class ObDRTaskQueue
@ -53,12 +52,10 @@ public:
// @param [in] config, server config
// @param [in] bucket_num, the size of task_map
// @param [in] rpc_proxy, to send rpc
// @param [in] server_mgr, server manager to get server infos
int init(
common::ObServerConfig &config,
const int64_t bucket_num,
obrpc::ObSrvRpcProxy *rpc_proxy,
ObServerManager *server_mgr,
ObDRTaskPriority priority);
public:
@ -190,7 +187,6 @@ private:
TaskList schedule_list_;
TaskMap task_map_;
obrpc::ObSrvRpcProxy *rpc_proxy_;
ObServerManager *server_mgr_;
ObDRTaskPriority priority_;
private:
DISALLOW_COPY_AND_ASSIGN(ObDRTaskQueue);
@ -218,7 +214,6 @@ public:
low_task_queue_(queues_[1]),
self_(),
task_executor_(nullptr),
server_mgr_(nullptr),
rpc_proxy_(nullptr),
sql_proxy_(nullptr),
schema_service_(nullptr) {}
@ -229,7 +224,6 @@ public:
// @param [in] server, local server address
// @param [in] config, local server config
// @param [in] task_executor, to execute a task
// @param [in] server_mgr, to check server status for task queue
// @param [in] rpc_proxy, to send rpc for task queue
// @param [in] sql_proxy, to send sql for updater
// @param [in] schema_service, to get infos about objects
@ -237,7 +231,6 @@ public:
const common::ObAddr &server,
common::ObServerConfig &config,
ObDRTaskExecutor &task_executor,
ObServerManager *server_mgr,
obrpc::ObSrvRpcProxy *rpc_proxy,
common::ObMySQLProxy *sql_proxy,
share::schema::ObMultiVersionSchemaService *schema_service);
@ -427,7 +420,6 @@ private:
ObDRTaskQueue &low_task_queue_; // queues_[1]
common::ObAddr self_;
ObDRTaskExecutor *task_executor_;
ObServerManager *server_mgr_;
obrpc::ObSrvRpcProxy *rpc_proxy_;
common::ObMySQLProxy *sql_proxy_;
share::schema::ObMultiVersionSchemaService *schema_service_;

View File

@ -222,13 +222,11 @@ int ObDRWorker::LocalityAlignment::locate_zone_locality(
}
ObDRWorker::LocalityAlignment::LocalityAlignment(ObUnitManager *unit_mgr,
ObServerManager *server_mgr,
ObZoneManager *zone_mgr,
DRLSInfo &dr_ls_info)
: task_idx_(0),
add_replica_task_(),
unit_mgr_(unit_mgr),
server_mgr_(server_mgr),
zone_mgr_(zone_mgr),
dr_ls_info_(dr_ls_info),
task_array_(),
@ -1077,11 +1075,9 @@ int ObDRWorker::LocalityAlignment::build()
int ret = OB_SUCCESS;
uint64_t tenant_id = OB_INVALID_ID;
if (OB_UNLIKELY(nullptr == unit_mgr_
|| nullptr == server_mgr_
|| nullptr == zone_mgr_)) {
ret = OB_NOT_INIT;
LOG_WARN("LocalityAlignment not init",
KR(ret), KP(unit_mgr_), KP(server_mgr_), KP(zone_mgr_));
LOG_WARN("LocalityAlignment not init", KR(ret), KP(unit_mgr_), KP(zone_mgr_));
} else if (OB_FAIL(locality_map_.create(LOCALITY_MAP_BUCKET_NUM, "LocAlign"))) {
LOG_WARN("fail to create locality map", KR(ret));
} else if (OB_FAIL(generate_paxos_replica_number())) {
@ -1098,7 +1094,7 @@ int ObDRWorker::LocalityAlignment::build()
LOG_WARN("fail to create unit set", KR(ret));
} else if (OB_FAIL(init_unit_set(unit_set_))) {
LOG_WARN("fail to init unit set", KR(ret));
} else if (OB_FAIL(unit_provider_.init(gen_user_tenant_id(tenant_id), unit_mgr_, server_mgr_))) {
} else if (OB_FAIL(unit_provider_.init(gen_user_tenant_id(tenant_id), unit_mgr_))) {
LOG_WARN("fail to init unit provider", KR(ret), K(tenant_id));
}
return ret;
@ -1521,25 +1517,18 @@ int ObDRWorker::LocalityAlignment::get_next_locality_alignment_task(
int ObDRWorker::UnitProvider::init(
const uint64_t tenant_id,
ObUnitManager *unit_mgr,
ObServerManager *server_mgr)
ObUnitManager *unit_mgr)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(inited_)) {
ret = OB_INIT_TWICE;
LOG_WARN("init twice", KR(ret));
} else if (OB_UNLIKELY(OB_INVALID_ID == tenant_id
|| nullptr == unit_mgr
|| nullptr == server_mgr)) {
} else if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || nullptr == unit_mgr)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret),
K(tenant_id),
KP(unit_mgr),
KP(server_mgr));
LOG_WARN("invalid argument", KR(ret), K(tenant_id), KP(unit_mgr));
} else {
tenant_id_ = tenant_id;
unit_mgr_ = unit_mgr;
server_mgr_ = server_mgr;
inited_ = true;
}
return ret;
@ -1554,9 +1543,9 @@ int ObDRWorker::UnitProvider::get_unit(
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret));
} else if (OB_ISNULL(unit_mgr_) || OB_ISNULL(server_mgr_)) {
} else if (OB_ISNULL(unit_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit mgr ptr is null", KR(ret), KP(unit_mgr_), KP(server_mgr_));
LOG_WARN("unit_mgr is null", KR(ret), KP(unit_mgr_));
} else {
common::ObArray<ObUnitInfo> unit_array;
bool found = false;
@ -1571,7 +1560,7 @@ int ObDRWorker::UnitProvider::get_unit(
int hash_ret = OB_SUCCESS;
if (this_info.unit_.zone_ != zone) {
// bypass, because we only support migrate in same zone
} else if (OB_FAIL(server_mgr_->check_server_active(this_info.unit_.server_, is_active))) {
} else if (OB_FAIL(SVR_TRACER.check_server_active(this_info.unit_.server_, is_active))) {
LOG_WARN("fail to check server active", KR(ret), "server", this_info.unit_.server_);
} else if (!is_active) {
LOG_INFO("server is not active", "server", this_info.unit_.server_, K(is_active));
@ -1603,7 +1592,7 @@ int ObDRWorker::UnitProvider::get_unit(
int hash_ret = OB_SUCCESS;
if (this_info.unit_.zone_ != zone) {
// bypass, because only support migrate in same zone
} else if (OB_FAIL(server_mgr_->check_server_active(this_info.unit_.server_, is_active))) {
} else if (OB_FAIL(SVR_TRACER.check_server_active(this_info.unit_.server_, is_active))) {
LOG_WARN("fail to check server active", KR(ret), "server", this_info.unit_.server_);
} else if (!is_active) {
LOG_INFO("server is not active", "server", this_info.unit_.server_, K(is_active));
@ -1655,7 +1644,6 @@ ObDRWorker::ObDRWorker(volatile bool &stop)
self_addr_(),
config_(nullptr),
unit_mgr_(nullptr),
server_mgr_(nullptr),
zone_mgr_(nullptr),
disaster_recovery_task_mgr_(nullptr),
lst_operator_(nullptr),
@ -1675,7 +1663,6 @@ int ObDRWorker::init(
common::ObAddr &self_addr,
common::ObServerConfig &config,
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
ObDRTaskMgr &task_mgr,
share::ObLSTableOperator &lst_operator,
@ -1694,7 +1681,6 @@ int ObDRWorker::init(
self_addr_ = self_addr;
config_ = &config;
unit_mgr_ = &unit_mgr;
server_mgr_ = &server_mgr;
zone_mgr_ = &zone_mgr;
disaster_recovery_task_mgr_ = &task_mgr;
lst_operator_ = &lst_operator;
@ -1789,7 +1775,6 @@ void ObDRWorker::statistic_total_dr_task(const int64_t task_cnt)
int ObDRWorker::check_tenant_locality_match(
const uint64_t tenant_id,
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
bool &locality_is_matched)
{
@ -1816,7 +1801,6 @@ int ObDRWorker::check_tenant_locality_match(
share::ObLSStatusInfo &ls_status_info = ls_status_info_array.at(i);
DRLSInfo dr_ls_info(gen_user_tenant_id(tenant_id),
&unit_mgr,
&server_mgr,
&zone_mgr,
GCTX.schema_service_);
if (ls_status_info.ls_is_creating()) {
@ -1834,7 +1818,7 @@ int ObDRWorker::check_tenant_locality_match(
ls_info, ls_status_info))) {
LOG_WARN("fail to generate dr log stream info", KR(ret));
} else if (OB_FAIL(check_ls_locality_match_(
dr_ls_info, unit_mgr, server_mgr, zone_mgr, locality_is_matched))) {
dr_ls_info, unit_mgr, zone_mgr, locality_is_matched))) {
LOG_WARN("fail to try log stream disaster recovery", KR(ret));
}
}
@ -1846,7 +1830,6 @@ int ObDRWorker::check_tenant_locality_match(
int ObDRWorker::check_ls_locality_match_(
DRLSInfo &dr_ls_info,
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
bool &locality_is_matched)
{
@ -1855,7 +1838,6 @@ int ObDRWorker::check_ls_locality_match_(
locality_is_matched = false;
LOG_INFO("start to check ls locality match", K(dr_ls_info));
LocalityAlignment locality_alignment(&unit_mgr,
&server_mgr,
&zone_mgr,
dr_ls_info);
if (!dr_ls_info.has_leader()) {
@ -1948,7 +1930,6 @@ int ObDRWorker::try_tenant_disaster_recovery(
share::ObLSStatusInfo &ls_status_info = ls_status_info_array.at(i);
DRLSInfo dr_ls_info(gen_user_tenant_id(tenant_id),
unit_mgr_,
server_mgr_,
zone_mgr_,
schema_service_);
int64_t ls_acc_dr_task = 0;
@ -2409,13 +2390,12 @@ int ObDRWorker::do_single_replica_permanent_offline_(
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret));
} else if (OB_ISNULL(server_mgr_)
|| OB_UNLIKELY(!member_to_remove.is_valid()
} else if (OB_UNLIKELY(!member_to_remove.is_valid()
|| OB_INVALID_TENANT_ID == tenant_id
|| !ls_id.is_valid_with_tenant(tenant_id))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(member_to_remove), K(tenant_id), K(ls_id), KP(server_mgr_));
} else if (OB_FAIL(server_mgr_->check_server_permanent_offline(member_to_remove.get_server(), is_offline))) {
LOG_WARN("invalid argument", KR(ret), K(member_to_remove), K(tenant_id), K(ls_id));
} else if (OB_FAIL(SVR_TRACER.check_server_permanent_offline(member_to_remove.get_server(), is_offline))) {
LOG_WARN("fail to check server permanent offline", KR(ret), K(member_to_remove));
} else if (is_offline) {
FLOG_INFO("found ls replica need to permanent offline", K(member_to_remove));
@ -2565,7 +2545,6 @@ int ObDRWorker::construct_extra_infos_to_build_migrate_task(
//shall never be here
} else if (OB_FAIL(choose_disaster_recovery_data_source(
zone_mgr_,
server_mgr_,
dr_ls_info,
dst_member,
src_member,
@ -2907,7 +2886,6 @@ int ObDRWorker::try_generate_add_replica_locality_alignment_task(
//shall never be here
} else if (OB_FAIL(choose_disaster_recovery_data_source(
zone_mgr_,
server_mgr_,
dr_ls_info,
dst_member,
ObReplicaMember(),/*empty*/
@ -2992,7 +2970,6 @@ int ObDRWorker::try_generate_type_transform_locality_alignment_task(
LOG_INFO("may has no leader while member change", K(dr_ls_info));
} else if (OB_FAIL(choose_disaster_recovery_data_source(
zone_mgr_,
server_mgr_,
dr_ls_info,
dst_member,
src_member,
@ -3242,7 +3219,6 @@ int ObDRWorker::record_task_plan_for_locality_alignment(
my_task->memstore_percent_);
if (OB_FAIL(choose_disaster_recovery_data_source(
zone_mgr_,
server_mgr_,
dr_ls_info,
dst_member,
ObReplicaMember(),/*empty*/
@ -3275,7 +3251,6 @@ int ObDRWorker::record_task_plan_for_locality_alignment(
my_task->dst_memstore_percent_);
if (OB_FAIL(choose_disaster_recovery_data_source(
zone_mgr_,
server_mgr_,
dr_ls_info,
dst_member,
ObReplicaMember(),/*empty*/
@ -3347,10 +3322,7 @@ int ObDRWorker::try_locality_alignment(
int ret = OB_SUCCESS;
DEBUG_SYNC(BEFORE_TRY_LOCALITY_ALIGNMENT);
LOG_INFO("try locality alignment", K(dr_ls_info), K(only_for_display));
LocalityAlignment locality_alignment(unit_mgr_,
server_mgr_,
zone_mgr_,
dr_ls_info);
LocalityAlignment locality_alignment(unit_mgr_, zone_mgr_, dr_ls_info);
const LATask *task = nullptr;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
@ -3784,7 +3756,6 @@ int ObDRWorker::construct_extra_infos_for_generate_migrate_to_unit_task(
//shall never be here
} else if (OB_FAIL(choose_disaster_recovery_data_source(
zone_mgr_,
server_mgr_,
dr_ls_info,
dst_member,
src_member,
@ -4131,7 +4102,6 @@ int ObDRWorker::generate_disaster_recovery_paxos_replica_number(
int ObDRWorker::choose_disaster_recovery_data_source(
ObZoneManager *zone_mgr,
ObServerManager *server_mgr,
DRLSInfo &dr_ls_info,
const ObReplicaMember &dst_member,
const ObReplicaMember &src_member,
@ -4139,7 +4109,7 @@ int ObDRWorker::choose_disaster_recovery_data_source(
int64_t &data_size)
{
int ret = OB_SUCCESS;
ObZone dst_zone;
ObServerInfoInTable server_info;
ObRegion dst_region;
ObDataSourceCandidateChecker type_checker(dst_member.get_replica_type());
int64_t replica_cnt = 0;
@ -4147,12 +4117,13 @@ int ObDRWorker::choose_disaster_recovery_data_source(
DRServerStatInfo *server_stat_info = nullptr;
DRUnitStatInfo *unit_stat_info = nullptr;
DRUnitStatInfo *unit_in_group_stat_info = nullptr;
ObZone dst_zone;
if (OB_ISNULL(zone_mgr) || OB_ISNULL(server_mgr)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), KP(zone_mgr), KP(server_mgr));
} else if (OB_FAIL(server_mgr->get_server_zone(dst_member.get_server(), dst_zone))) {
LOG_WARN("fail to get server zone", KR(ret), "server", dst_member.get_server());
if (OB_ISNULL(zone_mgr)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("zone_mgr is null", KR(ret), KP(zone_mgr));
} else if (OB_FAIL(SVR_TRACER.get_server_zone(dst_member.get_server(), dst_zone))) {
LOG_WARN("fail to get server zone", KR(ret), K(dst_member.get_server()));
} else if (OB_FAIL(zone_mgr->get_region(dst_zone, dst_region))) {
LOG_WARN("fail to get region", KR(ret), K(dst_zone));
} else if (OB_FAIL(dr_ls_info.get_replica_cnt(replica_cnt))) {
@ -4161,8 +4132,8 @@ int ObDRWorker::choose_disaster_recovery_data_source(
ObLSReplica *src_replica = nullptr;
// try task offline src
for (int64_t i = 0;
OB_SUCC(ret) && i < replica_cnt && src_member.is_valid() && nullptr == src_replica;
++i) {
OB_SUCC(ret) && i < replica_cnt && src_member.is_valid() && nullptr == src_replica;
++i) {
if (OB_FAIL(dr_ls_info.get_replica_stat(
i,
ls_replica,

View File

@ -36,7 +36,6 @@ class ObLSReplica;
namespace rootserver
{
class ObUnitManager;
class ObServerManager;
class ObZoneManager;
class ObDRTaskMgr;
class DRLSInfo;
@ -111,14 +110,12 @@ public:
common::ObAddr &self_addr,
common::ObServerConfig &cfg,
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
ObDRTaskMgr &task_mgr,
share::ObLSTableOperator &lst_operator,
share::schema::ObMultiVersionSchemaService &schema_service,
obrpc::ObSrvRpcProxy &rpc_proxy,
common::ObMySQLProxy &sql_proxy);
int try_disaster_recovery();
int try_tenant_disaster_recovery(
const uint64_t tenant_id,
@ -127,7 +124,6 @@ public:
static int check_tenant_locality_match(
const uint64_t tenant_id,
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
bool &locality_is_matched);
@ -172,7 +168,6 @@ private:
static int choose_disaster_recovery_data_source(
ObZoneManager *zone_mgr,
ObServerManager *server_mgr,
DRLSInfo &dr_ls_info,
const ObReplicaMember &dst_member,
const ObReplicaMember &src_member,
@ -556,12 +551,8 @@ private:
: inited_(false),
tenant_id_(OB_INVALID_ID),
unit_mgr_(nullptr),
server_mgr_(nullptr),
unit_set_(unit_set) {}
int init(
const uint64_t tenant_id,
ObUnitManager *unit_mgr,
ObServerManager *server_mgr);
int init(const uint64_t tenant_id, ObUnitManager *unit_mgr);
int get_unit(
const common::ObZone &zone,
const uint64_t unit_group_id,
@ -570,7 +561,6 @@ private:
bool inited_;
uint64_t tenant_id_;
ObUnitManager *unit_mgr_;
ObServerManager *server_mgr_;
common::hash::ObHashSet<int64_t> &unit_set_;
};
@ -583,10 +573,7 @@ private:
class LocalityAlignment
{
public:
LocalityAlignment(ObUnitManager *unit_mgr,
ObServerManager *server_mgr,
ObZoneManager *zone_mgr,
DRLSInfo &dr_ls_info);
LocalityAlignment(ObUnitManager *unit_mgr, ObZoneManager *zone_mgr, DRLSInfo &dr_ls_info);
virtual ~LocalityAlignment();
int build();
int get_next_locality_alignment_task(
@ -678,7 +665,6 @@ private:
int64_t task_idx_;
AddReplicaLATask add_replica_task_;
ObUnitManager *unit_mgr_;
ObServerManager *server_mgr_;
ObZoneManager *zone_mgr_;
DRLSInfo &dr_ls_info_;
common::ObArray<LATask *> task_array_;
@ -696,7 +682,6 @@ private:
static int check_ls_locality_match_(
DRLSInfo &dr_ls_info,
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
bool &locality_is_matched);
@ -960,7 +945,6 @@ private:
common::ObAddr self_addr_;
common::ObServerConfig *config_;
ObUnitManager *unit_mgr_;
ObServerManager *server_mgr_;
ObZoneManager *zone_mgr_;
ObDRTaskMgr *disaster_recovery_task_mgr_;
share::ObLSTableOperator *lst_operator_;

View File

@ -24,12 +24,15 @@
#include "share/ob_rpc_struct.h"//GetLSReportCnt
#include "share/ls/ob_ls_table_iterator.h"//ObAllLSTableIterator
#include "share/ls/ob_ls_info.h"//ObLSInfo
#include "share/ob_all_server_tracer.h"
#include "observer/ob_server_struct.h"
#include "ob_server_manager.h"
#include "ob_unit_manager.h"//ObUnitManager
#include "ob_server_zone_op_service.h"
#include "rootserver/ob_rs_async_rpc_proxy.h"//ObGetLSReportCntProxy
#include "rootserver/ob_heartbeat_service.h"
namespace oceanbase
{
@ -43,7 +46,8 @@ int ObEmptyServerChecker::init(
ObServerManager &server_mgr,
ObUnitManager &unit_mgr,
share::ObLSTableOperator &lst_operator,
schema::ObMultiVersionSchemaService &schema_service)
schema::ObMultiVersionSchemaService &schema_service,
ObServerZoneOpService &server_zone_op_service)
{
int ret = OB_SUCCESS;
const int64_t empty_server_checker_thread_cnt = 1;
@ -60,6 +64,7 @@ int ObEmptyServerChecker::init(
lst_operator_ = &lst_operator;
schema_service_ = &schema_service;
unit_mgr_ = &unit_mgr;
server_zone_op_service_ = &server_zone_op_service;
empty_servers_.reset();
need_check_ = true;
inited_ = true;
@ -98,47 +103,31 @@ int ObEmptyServerChecker::try_delete_server_()
{
int ret = OB_SUCCESS;
ObZone zone; // empty means all zones
ObArray<ObServerStatus> statuses;
ObArray<ObServerInfoInTable> servers_info;
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret));
} else if (OB_ISNULL(server_mgr_) || OB_ISNULL(unit_mgr_)) {
} else if (OB_ISNULL(server_mgr_) || OB_ISNULL(unit_mgr_) || OB_ISNULL(server_zone_op_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error", KR(ret), KP(server_mgr_), KP(unit_mgr_));
} else if (OB_FAIL(server_mgr_->get_server_statuses(zone, statuses))) {
LOG_WARN("get_server_statuses failed", K(zone), KR(ret));
LOG_WARN("unexpected error", KR(ret), KP(server_mgr_), KP(unit_mgr_), KP(server_zone_op_service_));
} else if (OB_FAIL(SVR_TRACER.get_servers_info(zone, servers_info))) {
LOG_WARN("get_servers_info failed", KR(ret), K(zone));
} else {
int first_error_ret = OB_SUCCESS;
need_check_ = false;
empty_servers_.reset();
FOREACH_CNT_X(status, statuses, OB_SUCCESS == ret) {
if (ObServerStatus::OB_SERVER_ADMIN_DELETING == status->admin_status_) {
FOREACH_CNT_X(server_info, servers_info, OB_SUCC(ret)) {
if (server_info->is_deleting()) {
need_check_ = true;
bool server_empty = false;
if (OB_FAIL(unit_mgr_->check_server_empty(status->server_, server_empty))) {
LOG_WARN("check_server_empty failed", "server", status->server_, K(ret));
} else if (server_empty && !(status->force_stop_hb_)) {
// stop server's heartbeat
bool force_stop_hb = true;
if (OB_FAIL(server_mgr_->set_force_stop_hb(status->server_, force_stop_hb))) {
LOG_WARN("set force stop hb failed", K(status->server_), K(ret));
} else {
LOG_INFO("force set stop hb", KR(ret), K(status->server_));
}
DEBUG_SYNC(SET_FORCE_STOP_HB_DONE);
const ObAddr &addr= server_info->get_server();
if (OB_FAIL(unit_mgr_->check_server_empty(addr, server_empty))) {
LOG_WARN("check_server_empty failed", "server", addr, KR(ret));
} else if (server_empty && OB_FAIL(empty_servers_.push_back(addr))) {
LOG_WARN("failed to push back empty server", KR(ret), KPC(server_info));
}
if (OB_FAIL(ret)) {
} else if (server_empty && OB_FAIL(empty_servers_.push_back(status->server_))) {
LOG_WARN("failed to push back empty server", KR(ret), KPC(status));
}
}
// ignore single server error
if (OB_FAIL(ret)) {
first_error_ret = OB_SUCC(first_error_ret) ? ret : first_error_ret;
ret = OB_SUCCESS;
}
}
ret = OB_SUCC(first_error_ret) ? ret : first_error_ret;
DEBUG_SYNC(END_DELETE_SERVER_BEFORE_CHECK_META_TABLE);
if (OB_SUCC(ret) && empty_servers_.count() > 0) {
//need check empty
if (OB_FAIL(check_server_empty_())) {
@ -149,10 +138,20 @@ int ObEmptyServerChecker::try_delete_server_()
const bool commit = true;
for (int64_t i = 0; OB_SUCC(ret) && i < empty_servers_.count(); ++i) {
const ObAddr &addr = empty_servers_.at(i);
if (OB_FAIL(server_mgr_->end_delete_server(addr, zone, commit))) {
LOG_WARN("server_mgr end_delete_server failed", KR(ret), K(addr), K(zone));
if (!ObHeartbeatService::is_service_enabled()) { // the old logic
LOG_INFO("sys tenant data version < 4.2, server manager executes end_delete_server");
if (OB_FAIL(server_mgr_->end_delete_server(addr, zone, commit))) {
LOG_WARN("server_mgr end_delete_server failed", KR(ret), K(addr), K(zone));
}
} else {
LOG_INFO("sys tenant data version >= 4.2, server zone op service executes finish_delete_server");
if (OB_FAIL(server_zone_op_service_->finish_delete_server(addr, zone))) {
LOG_WARN("server_zone_op_service finish_delete_server failed", KR(ret), K(addr), K(zone));
} else if (OB_FAIL(server_mgr_->load_server_manager())) {
LOG_WARN("fail to load server manager", KR(ret));
}
}
}
}
}
}
return ret;

View File

@ -38,7 +38,7 @@ namespace rootserver
{
class ObServerManager;
class ObUnitManager;
class ObServerZoneOpService;
/// Empty server checker thread.
class ObEmptyServerChecker : public ObRsReentrantThread
{
@ -50,7 +50,8 @@ public:
server_mgr_(NULL),
unit_mgr_(NULL),
lst_operator_(NULL),
schema_service_(NULL) {};
schema_service_(NULL),
server_zone_op_service_(NULL) {};
virtual ~ObEmptyServerChecker() {};
virtual void run3() override;
@ -59,7 +60,8 @@ public:
int init(ObServerManager &server_mgr,
ObUnitManager &unit_mgr,
share::ObLSTableOperator &lst_operator,
share::schema::ObMultiVersionSchemaService &schema_service);
share::schema::ObMultiVersionSchemaService &schema_service,
ObServerZoneOpService &server_zone_op_service);
virtual void wakeup();
virtual void stop();
@ -79,6 +81,7 @@ private:
ObUnitManager *unit_mgr_;
share::ObLSTableOperator *lst_operator_;
share::schema::ObMultiVersionSchemaService *schema_service_;
ObServerZoneOpService *server_zone_op_service_;
DISALLOW_COPY_AND_ASSIGN(ObEmptyServerChecker);
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,148 @@
/**
* Copyright (c) 2022 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_ROOTSERVER_OB_HEARTBEAT_SERVICE_H
#define OCEANBASE_ROOTSERVER_OB_HEARTBEAT_SERVICE_H
#include "lib/container/ob_array.h" // ObArray
#include "lib/net/ob_addr.h" // ObZone
#include "lib/utility/ob_unify_serialize.h"
#include "common/ob_zone.h" // ObAddr
#include "share/ob_server_table_operator.h" // ObServerTableOperator
#include "share/ob_heartbeat_struct.h"
#include "rootserver/ob_primary_ls_service.h" // ObTenantThreadHelper
#include "rootserver/ob_rs_async_rpc_proxy.h" // ObSendHeartbeatProxy
#include "observer/ob_heartbeat_handler.h" // ObServerHealthStatus
namespace oceanbase
{
namespace rootserver
{
class ObHeartbeatService : public ObTenantThreadHelper,
public logservice::ObICheckpointSubHandler,
public logservice::ObIReplaySubHandler
{
public:
typedef common::ObArray<share::ObHBRequest> ObHBRequestArray;
typedef common::ObArray<share::ObHBResponse> ObHBResponseArray;
typedef common::hash::ObHashMap<common::ObAddr, share::ObServerHBInfo> ObServerHBInfoMap;
typedef common::ObArray<share::ObServerInfoInTable> ObServerInfoInTableArray;
static const int64_t THREAD_COUNT = 2;
ObHeartbeatService();
virtual ~ObHeartbeatService();
int init();
void destroy();
virtual void do_work() override;
virtual int switch_to_leader() override;
virtual share::SCN get_rec_scn() override { return share::SCN::max_scn();}
virtual int flush(share::SCN &rec_scn) override { return OB_SUCCESS; }
int replay(const void *buffer, const int64_t nbytes, const palf::LSN &lsn, const share::SCN &scn) override
{
int ret = OB_SUCCESS;
UNUSEDx(buffer, nbytes, lsn, scn);
return ret;
}
static bool is_service_enabled() {return is_service_enabled_; }
DEFINE_MTL_FUNC(ObHeartbeatService)
private:
static const int64_t HB_IDLE_TIME_US = 2 * 1000 * 1000L; // 2s
static const int64_t HB_FAILED_IDLE_TIME_US = 0.5 * 1000 * 1000L; // 0.5s
// (based on the whitelist) generate & send hb_requests and receive hb_responses
int check_is_service_enabled_();
int64_t get_epoch_id_() const { return ATOMIC_LOAD(&epoch_id_); }
void set_epoch_id_(int64_t epoch_id) { ATOMIC_SET(&epoch_id_, epoch_id); }
int check_upgrade_compat_();
int send_heartbeat_();
int prepare_hb_requests_(ObHBRequestArray &hb_requests, int64_t &whitelist_epoch_id);
// generate the whitelist and process received hb_responses
int set_hb_responses_(const int64_t whitelist_epoch_id, ObSendHeartbeatProxy *proxy);
int get_and_reset_hb_responses_(ObHBResponseArray &hb_responses, int64_t &hb_responses_epoch_id);
int manage_heartbeat_();
// read __all_server table and generate whitelist
int prepare_whitelist_();
int check_or_update_service_epoch_(const int64_t epoch_id);
int process_hb_responses_();
int check_server_(
const ObHBResponseArray &hb_responses,
const share::ObServerInfoInTable &server_info_in_table,
const common::ObArray<common::ObZone> &zone_list,
const int64_t now,
const int64_t hb_responses_epoch_id);
int init_server_hb_info_(
const int64_t now,
const share::ObServerInfoInTable &server_info_in_table,
share::ObServerHBInfo &server_hb_info);
int check_server_without_hb_response_(
const int64_t now,
const share::ObServerInfoInTable &server_info_in_table,
const int64_t hb_responses_epoch_id,
share::ObServerHBInfo &server_hb_info);
int update_table_for_online_to_offline_server_(
const share::ObServerInfoInTable &server_info_in_table,
const int64_t now,
const int64_t hb_responses_epoch_id);
int check_server_with_hb_response_(
const share::ObHBResponse &hb_response,
const share::ObServerInfoInTable &server_info_in_table,
const common::ObArray<common::ObZone> &zone_list,
const int64_t now,
const int64_t hb_responses_epoch_id,
share::ObServerHBInfo &server_hb_info);
int check_if_hb_response_can_be_processed_(
const share::ObHBResponse &hb_response,
const share::ObServerInfoInTable &server_info_in_table,
const common::ObArray<common::ObZone> &zone_list) const;
// a common func. for all servers to update server_hb_info
// if a server has hb_response, server_hb_info.server_health_status_ will be updated at
// check_server_with_hb_response()
int update_server_hb_info_(
const int64_t now,
const bool hb_response_exists,
share::ObServerHBInfo &server_hb_info);
int check_and_execute_start_or_stop_server_(
const share::ObHBResponse &hb_response,
const share::ObServerHBInfo &server_hb_info,
const share::ObServerInfoInTable &server_info_in_table);
int clear_deleted_servers_in_all_servers_hb_info_();
int update_table_for_server_with_hb_response_(
const share::ObHBResponse &hb_response,
const share::ObServerInfoInTable &server_info_in_table,
const int64_t hb_responses_epoch_id);
template <typename T>
bool has_server_exist_in_array_(
const ObIArray<T> &array,
const common::ObAddr &server,
int64_t &idx);
bool is_inited_;
common::ObMySQLProxy *sql_proxy_;
obrpc::ObSrvRpcProxy *srv_rpc_proxy_;
int64_t epoch_id_; // the leader epoch, only be updated when the ls becomes leader
int64_t whitelist_epoch_id_; // the leader epoch when we read prepare whitelist
int64_t hb_responses_epoch_id_; // It indicates that current hb_responses are based on which epoch of the whitelist
common::SpinRWLock hb_responses_rwlock_; // when we read/write hb_responses_, need_process_hb_responses_
// and hb_responses_epoch_id_, we should use this lock
common::SpinRWLock all_servers_info_in_table_rwlock_; // when we read/write all_servers_info_in_table_
// and whitelist_epoch_id_, we should use this lock
ObServerHBInfoMap all_servers_hb_info_; // only used in manage_heartbeat()
ObServerInfoInTableArray all_servers_info_in_table_; // whitelist, send_heartbeat() read it and manage_heartbeat() write it
common::ObArray<common::ObZone> inactive_zone_list_;
ObHBResponseArray hb_responses_; // send_heartbeat() write it and manage_heartbeat() read it
bool need_process_hb_responses_; // true if send rpc, and will be reset if responses are processed
bool need_update_server_tracer_;
bool is_rs_server_info_updated_;
static bool is_service_enabled_;
private:
DISALLOW_COPY_AND_ASSIGN(ObHeartbeatService);
}; // end class ObHeartbeatService
} // end namespace rootserver
} // end namespace oceanbase
#endif

View File

@ -30,7 +30,6 @@
#include "share/ob_index_builder_util.h"
#include "observer/ob_server_struct.h"
#include "sql/resolver/ddl/ob_ddl_resolver.h"
#include "ob_server_manager.h"
#include "ob_zone_manager.h"
#include "ob_ddl_service.h"
#include "ob_root_service.h"

View File

@ -44,7 +44,6 @@ class ObSrvRpcProxy;
namespace rootserver
{
class ObServerManager;
class ObZoneManager;
class ObDDLService;
class ObDDLTaskRecord;

View File

@ -23,7 +23,7 @@
#include "share/schema/ob_schema_getter_guard.h"
#include "share/ls/ob_ls_table_iterator.h"//ObTenantLSTableIterator
#include "share/ls/ob_ls_info.h"//ObLSInfo
#include "rootserver/ob_server_manager.h"
#include "share/ob_all_server_tracer.h"
#include "observer/ob_server_struct.h"
#include "rootserver/ob_root_service.h"
namespace oceanbase
@ -37,7 +37,6 @@ namespace rootserver
ObLostReplicaChecker::ObLostReplicaChecker()
: inited_(false), cond_(),
server_manager_(NULL),
lst_operator_(NULL),
schema_service_(NULL)
{
@ -65,9 +64,7 @@ int ObLostReplicaChecker::check_cancel_()
return ret;
}
int ObLostReplicaChecker::init(ObServerManager &server_manager,
ObLSTableOperator &lst_operator,
ObMultiVersionSchemaService &schema_service)
int ObLostReplicaChecker::init(ObLSTableOperator &lst_operator, ObMultiVersionSchemaService &schema_service)
{
int ret = OB_SUCCESS;
const int64_t thread_cnt = 1;
@ -79,7 +76,6 @@ int ObLostReplicaChecker::init(ObServerManager &server_manager,
} else if (OB_FAIL(create(thread_cnt, "LostRepCheck"))) {
LOG_WARN("create empty server checker thread failed", K(ret), K(thread_cnt));
} else {
server_manager_ = &server_manager;
lst_operator_ = &lst_operator;
schema_service_ = &schema_service;
inited_ = true;
@ -293,21 +289,18 @@ int ObLostReplicaChecker::check_lost_server_(const ObAddr &server, bool &is_lost
} else if (!server.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid server", K(server), K(ret));
} else if (OB_ISNULL(server_manager_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server mgr is null", KR(ret), KP(server_manager_));
} else if (!server_manager_->has_build()) {
} else if (!SVR_TRACER.has_build()) {
is_lost_server = false;
} else {
ObServerStatus status;
ret = server_manager_->get_server_status(server, status);
ObServerInfoInTable server_info;
ret = SVR_TRACER.get_server_info(server, server_info);
if (OB_ENTRY_NOT_EXIST != ret && OB_SUCCESS != ret) {
LOG_WARN("get_server_status failed", K(server), K(ret));
} else if (OB_ENTRY_NOT_EXIST == ret) {
ret = OB_SUCCESS;
is_lost_server = true;
LOG_INFO("server not exist", K(server));
} else if (status.is_permanent_offline()) {
} else if (server_info.is_permanent_offline()) {
is_lost_server = true;
}
}

View File

@ -34,7 +34,6 @@ class ObMultiVersionSchemaService;
namespace rootserver
{
class ObServerManager;
class ObLostReplicaChecker : public ObRsReentrantThread
{
@ -42,9 +41,7 @@ public:
ObLostReplicaChecker();
virtual ~ObLostReplicaChecker();
int init(ObServerManager &server_manager,
share::ObLSTableOperator &lst_operator,
share::schema::ObMultiVersionSchemaService &schema_service);
int init(share::ObLSTableOperator &lst_operator, share::schema::ObMultiVersionSchemaService &schema_service);
int check_lost_replicas();
virtual void run3() override;
virtual int blocking_run() {
@ -66,7 +63,6 @@ private:
private:
bool inited_;
common::ObThreadCond cond_;
ObServerManager *server_manager_;
share::ObLSTableOperator *lst_operator_;
share::schema::ObMultiVersionSchemaService *schema_service_;
private:

View File

@ -19,7 +19,6 @@
#include "share/ls/ob_ls_status_operator.h"
#include "share/ls/ob_ls_table_operator.h"
#include "ob_unit_manager.h"
#include "ob_server_manager.h"
#include "ob_zone_manager.h"
using namespace oceanbase::common;
@ -29,7 +28,6 @@ using namespace oceanbase::rootserver;
ObMigrateUnitFinishChecker::ObMigrateUnitFinishChecker(volatile bool &stop)
: inited_(false),
unit_mgr_(nullptr),
server_mgr_(nullptr),
zone_mgr_(nullptr),
schema_service_(nullptr),
sql_proxy_(nullptr),
@ -54,7 +52,6 @@ int ObMigrateUnitFinishChecker::check_stop() const
int ObMigrateUnitFinishChecker::init(
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
share::schema::ObMultiVersionSchemaService &schema_service,
common::ObMySQLProxy &sql_proxy,
@ -66,7 +63,6 @@ int ObMigrateUnitFinishChecker::init(
LOG_WARN("init twice", KR(ret));
} else {
unit_mgr_ = &unit_mgr;
server_mgr_ = &server_mgr;
zone_mgr_ = &zone_mgr;
schema_service_ = &schema_service;
sql_proxy_ = &sql_proxy;
@ -211,7 +207,6 @@ int ObMigrateUnitFinishChecker::try_check_migrate_unit_finish_by_tenant(
LOG_INFO("try check migrate unit finish by tenant", K(tenant_id));
DRLSInfo dr_ls_info(gen_user_tenant_id(tenant_id),
unit_mgr_,
server_mgr_,
zone_mgr_,
schema_service_);
common::ObArray<share::ObLSStatusInfo> ls_status_info_array;

View File

@ -33,7 +33,6 @@ class ObLSTableOperator;
namespace rootserver
{
class ObUnitManager;
class ObServerManager;
class ObZoneManager;
class DRLSInfo;
@ -45,7 +44,6 @@ public:
public:
int init(
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
share::schema::ObMultiVersionSchemaService &schema_service,
common::ObMySQLProxy &sql_proxy,
@ -72,7 +70,6 @@ private:
// data members
bool inited_;
ObUnitManager *unit_mgr_;
ObServerManager *server_mgr_;
ObZoneManager *zone_mgr_;
share::schema::ObMultiVersionSchemaService *schema_service_;
common::ObMySQLProxy *sql_proxy_;

View File

@ -81,11 +81,11 @@ int ObRootBalancer::init(common::ObServerConfig &cfg,
} else if (OB_FAIL(create(root_balancer_thread_cnt, "RootBalance"))) {
LOG_WARN("create root balancer thread failed", K(ret), K(root_balancer_thread_cnt));
} else if (OB_FAIL(disaster_recovery_worker_.init(
self_addr, cfg, unit_mgr, server_mgr, zone_mgr,
self_addr, cfg, unit_mgr, zone_mgr,
dr_task_mgr, *GCTX.lst_operator_, schema_service, rpc_proxy, sql_proxy))) {
LOG_WARN("fail to init disaster recovery worker", KR(ret));
} else if (OB_FAIL(rootservice_util_checker_.init(
unit_mgr, server_mgr, zone_mgr, *GCTX.rs_rpc_proxy_, self_addr, schema_service, sql_proxy, *GCTX.lst_operator_))) {
unit_mgr, zone_mgr, *GCTX.rs_rpc_proxy_, self_addr, schema_service, sql_proxy, *GCTX.lst_operator_))) {
LOG_WARN("fail to init rootservice util checker", KR(ret));
} else {
inited_ = true;

View File

@ -47,7 +47,6 @@ namespace rootserver
class ObZoneManager;
class ObRootService;
class ObFetchPrimaryDDLOperator;
class ObServerManager;
// Interface of all the inspection task
class ObInspectionTask

View File

@ -16,9 +16,9 @@
#include "share/ob_srv_rpc_proxy.h"
#include "share/location_cache/ob_location_service.h"
#include "share/ob_all_server_tracer.h"
#include "lib/container/ob_se_array.h"
#include "rootserver/ddl_task/ob_ddl_scheduler.h"
#include "rootserver/ob_server_manager.h"
#include "rootserver/ob_unit_manager.h"
#include "rootserver/ob_rs_async_rpc_proxy.h"
@ -35,7 +35,6 @@ ObRootMinorFreeze::ObRootMinorFreeze()
:inited_(false),
stopped_(false),
rpc_proxy_(NULL),
server_manager_(NULL),
unit_manager_(NULL)
{
}
@ -49,7 +48,6 @@ ObRootMinorFreeze::~ObRootMinorFreeze()
}
int ObRootMinorFreeze::init(ObSrvRpcProxy &rpc_proxy,
ObServerManager &server_manager,
ObUnitManager &unit_manager)
{
int ret = OB_SUCCESS;
@ -58,7 +56,6 @@ int ObRootMinorFreeze::init(ObSrvRpcProxy &rpc_proxy,
LOG_WARN("init twice", K(ret));
} else {
rpc_proxy_ = &rpc_proxy;
server_manager_ = &server_manager;
unit_manager_ = &unit_manager;
stopped_ = false;
inited_ = true;
@ -105,7 +102,7 @@ bool ObRootMinorFreeze::is_server_alive(const ObAddr &server) const
bool is_alive = false;
if (OB_LIKELY(server.is_valid())) {
if (OB_FAIL(server_manager_->check_server_alive(server, is_alive))) {
if (OB_FAIL(SVR_TRACER.check_server_alive(server, is_alive))) {
LOG_WARN("fail to check whether server is alive, ", K(server), K(ret));
is_alive = false;
}
@ -242,13 +239,10 @@ int ObRootMinorFreeze::is_server_belongs_to_zone(const ObAddr &addr,
int ret = OB_SUCCESS;
ObZone server_zone;
if (OB_ISNULL(server_manager_)) {
ret = OB_NOT_INIT;
LOG_WARN("server_manager_ is NULL", K(ret));
} else if (0 == zone.size()) {
if (0 == zone.size()) {
server_in_zone = true;
} else if (OB_FAIL(server_manager_->get_server_zone(addr, server_zone))) {
LOG_WARN("fail to get server zone", K(ret));
} else if (OB_FAIL(SVR_TRACER.get_server_zone(addr, server_zone))) {
LOG_WARN("fail to get server zone", KR(ret), K(addr));
} else if (server_zone == zone) {
server_in_zone = true;
} else {
@ -362,8 +356,8 @@ int ObRootMinorFreeze::init_params_by_zone(const ObZone &zone,
if (OB_UNLIKELY(0 == zone.size())) {
ret = OB_ERR_UNEXPECTED;
} else {
if (OB_FAIL(server_manager_->get_servers_of_zone(zone, target_server_list))) {
LOG_WARN("fail to get tenant server list, ", K(ret));
if (OB_FAIL(SVR_TRACER.get_servers_of_zone(zone, target_server_list))) {
LOG_WARN("fail to get tenant server list, ", KR(ret), K(zone));
} else if (0 == target_server_list.count()) {
ret = OB_ZONE_NOT_ACTIVE;
LOG_WARN("empty zone or invalid", K(zone), K(ret));
@ -398,8 +392,8 @@ int ObRootMinorFreeze::init_params_by_server(const ObIArray<ObAddr> &server_list
ObSEArray<ObAddr, 256> target_server_list;
// get all alive server
if (OB_FAIL(server_manager_->get_alive_servers(zone, target_server_list))) {
LOG_WARN("fail to get alive servers, ", K(ret));
if (OB_FAIL(SVR_TRACER.get_alive_servers(zone, target_server_list))) {
LOG_WARN("fail to get alive servers, ", KR(ret), K(zone));
} else {
for (int i = 0; i < target_server_list.count() && OB_SUCC(ret); ++i) {
if (OB_FAIL(params.push_back_param(target_server_list.at(i)))) {

View File

@ -31,7 +31,6 @@ class ObSrvRpcProxy;
namespace rootserver
{
class ObServerManager;
class ObUnitManager;
class ObRootMinorFreeze
@ -41,7 +40,6 @@ public:
virtual ~ObRootMinorFreeze();
int init(obrpc::ObSrvRpcProxy &rpc_proxy,
ObServerManager &server_manager,
ObUnitManager &unit_manager);
void start();
void stop();
@ -106,7 +104,6 @@ private:
bool inited_;
bool stopped_;
obrpc::ObSrvRpcProxy *rpc_proxy_;
ObServerManager *server_manager_;
ObUnitManager *unit_manager_;
};

File diff suppressed because it is too large Load Diff

View File

@ -60,6 +60,7 @@
#include "rootserver/ob_disaster_recovery_task_executor.h"
#include "rootserver/ob_empty_server_checker.h"
#include "rootserver/ob_lost_replica_checker.h"
#include "rootserver/ob_server_zone_op_service.h"
namespace oceanbase
{
@ -455,9 +456,6 @@ public:
int fetch_location(const obrpc::ObFetchLocationArg &arg,
obrpc::ObFetchLocationResult &res);
int merge_finish(const obrpc::ObMergeFinishArg &arg);
int try_block_server(int rc, const common::ObAddr &server);
// 4.0 backup
// balance over
int receive_backup_over(const obrpc::ObBackupTaskRes &res);
@ -467,8 +465,6 @@ public:
int check_dangling_replica_finish(const obrpc::ObCheckDanglingReplicaFinishArg &arg);
int fetch_alive_server(const obrpc::ObFetchAliveServerArg &arg,
obrpc::ObFetchAliveServerResult &result);
int fetch_active_server_status(const obrpc::ObFetchAliveServerArg &arg,
obrpc::ObFetchActiveServerAddrResult &result);
int get_tenant_schema_versions(const obrpc::ObGetSchemaArg &arg,
obrpc::ObTenantSchemaVersions &tenant_schema_versions);
@ -650,18 +646,39 @@ public:
//----End of functions for managing row level security----
// server related
int load_server_manager();
ObStatusChangeCallback &get_status_change_cb() { return status_change_cb_; }
int add_server(const obrpc::ObAdminServerArg &arg);
int add_server_for_bootstrap_in_version_smaller_than_4_2_0(
const common::ObAddr &server,
const common::ObZone &zone);
int delete_server(const obrpc::ObAdminServerArg &arg);
int cancel_delete_server(const obrpc::ObAdminServerArg &arg);
int start_server(const obrpc::ObAdminServerArg &arg);
int stop_server(const obrpc::ObAdminServerArg &arg);
// Check if all ls has leader
// @param [in] print_str: string of operation. Used to print LOG_USER_ERROR "'print_str' not allowed".
int check_all_ls_has_leader(const char *print_str);
// zone related
int add_zone(const obrpc::ObAdminZoneArg &arg);
int delete_zone(const obrpc::ObAdminZoneArg &arg);
int start_zone(const obrpc::ObAdminZoneArg &arg);
int stop_zone(const obrpc::ObAdminZoneArg &arg);
int alter_zone(const obrpc::ObAdminZoneArg &arg);
int check_can_stop(
const common::ObZone &zone,
const common::ObIArray<common::ObAddr> &servers,
const bool is_stop_zone);
// Check if all ls has leader, enough member and if log is in sync.
// @param [in] to_stop_servers: server_list to be stopped.
// @param [in] skip_log_sync_check: whether skip log_sync check.
// @param [in] print_str: string of operation. Used to print LOG_USER_ERROR "'print_str' not allowed".
// @return: OB_SUCCESS if all check is passed.
// OB_OP_NOT_ALLOW if ls doesn't have leader/enough member or ls' log is not in sync.
int check_majority_and_log_in_sync(
const ObIArray<ObAddr> &to_stop_servers,
const bool skip_log_sync_check,
const char *print_str);
// system admin command (alter system ...)
int admin_switch_replica_role(const obrpc::ObAdminSwitchReplicaRoleArg &arg);
@ -760,8 +777,6 @@ public:
int broadcast_schema(const obrpc::ObBroadcastSchemaArg &arg);
ObDDLService &get_ddl_service() { return ddl_service_; }
ObDDLScheduler &get_ddl_scheduler() { return ddl_scheduler_; }
int check_merge_finish(const obrpc::ObCheckMergeFinishArg &arg);
int get_recycle_schema_versions(
const obrpc::ObGetRecycleSchemaVersionsArg &arg,
obrpc::ObGetRecycleSchemaVersionsResult &result);
@ -794,23 +809,15 @@ private:
int refresh_server(const bool fast_recover, const bool need_retry);
int refresh_schema(const bool fast_recover);
int init_sequence_id();
int load_server_manager();
int start_timer_tasks();
int stop_timer_tasks();
int request_heartbeats();
int self_check();
int update_all_server_and_rslist();
int check_zone_and_server(const ObIArray<ObAddr> &servers, bool &is_same_zone, bool &is_all_stopped);
int check_can_stop(const common::ObZone &zone,
const common::ObIArray<common::ObAddr> &servers,
const bool is_stop_zone);
bool have_other_stop_task(const ObZone &zone);
int init_sys_admin_ctx(ObSystemAdminCtx &ctx);
int set_cluster_version();
bool is_replica_count_reach_rs_limit(int64_t replica_count) { return replica_count > OB_MAX_CLUSTER_REPLICA_COUNT; }
int update_all_server_config();
int get_readwrite_servers(const common::ObIArray<common::ObAddr> &input_servers,
common::ObIArray<common::ObAddr> &readwrite_servers);
int generate_table_schema_in_tenant_space(
const obrpc::ObCreateTableArg &arg,
share::schema::ObTableSchema &table_schema);
@ -856,27 +863,14 @@ private:
const share::ObLeaseRequest &lease_request,
share::ObLeaseResponse &lease_response,
const share::ObServerStatus &server_status);
// Check if all ls has leader, enough member and if log is in sync.
// @param [in] to_stop_servers: server_list to be stopped.
// @param [in] skip_log_sync_check: whether skip log_sync check.
// @param [in] print_str: string of operation. Used to print LOG_USER_ERROR "'print_str' not allowed".
// @return: OB_SUCCESS if all check is passed.
// OB_OP_NOT_ALLOW if ls doesn't have leader/enough member or ls' log is not in sync.
int check_majority_and_log_in_sync_(
const ObIArray<ObAddr> &to_stop_servers,
const bool skip_log_sync_check,
const char *print_str);
// Check if all ls has leader
// @param [in] print_str: string of operation. Used to print LOG_USER_ERROR "'print_str' not allowed".
int check_all_ls_has_leader_(const char *print_str);
void update_cpu_quota_concurrency_in_memory_();
int set_cpu_quota_concurrency_config_();
int try_notify_switch_leader(const obrpc::ObNotifySwitchLeaderArg::SwitchLeaderComment &comment);
private:
int construct_rs_list_arg(obrpc::ObRsListArg &rs_list_arg);
int precheck_interval_part(const obrpc::ObAlterTableArg &arg);
int old_add_server(const obrpc::ObAdminServerArg &arg);
int old_delete_server(const obrpc::ObAdminServerArg &arg);
int old_cancel_delete_server(const obrpc::ObAdminServerArg &arg);
private:
static const int64_t OB_MAX_CLUSTER_REPLICA_COUNT = 10000000;
static const int64_t OB_ROOT_SERVICE_START_FAIL_COUNT_UPPER_LIMIT = 5;
@ -904,6 +898,7 @@ private:
ObHeartbeatChecker hb_checker_;
ObAllServerChecker server_checker_;
RsListChangeCb rs_list_change_cb_;
ObServerZoneOpService server_zone_op_service_;
// minor freeze
ObRootMinorFreeze root_minor_freeze_;

View File

@ -18,6 +18,7 @@
#include "lib/json/ob_json.h"
#include "lib/string/ob_sql_string.h"
#include "lib/hash/ob_hashset.h"
#include "lib/mysqlclient/ob_mysql_result.h"
#include "share/ob_rpc_struct.h"
#include "share/ob_share_util.h"
#include "share/ob_common_rpc_proxy.h"
@ -35,6 +36,8 @@
#include "logservice/ob_log_service.h"
#include "share/system_variable/ob_system_variable_alias.h"
#include "share/ob_primary_zone_util.h" // ObPrimaryZoneUtil
#include "share/ob_server_table_operator.h"
#include "share/ob_zone_table_operation.h"
using namespace oceanbase::rootserver;
using namespace oceanbase::share;
@ -1629,30 +1632,26 @@ int ObRootUtils::get_rs_default_timeout_ctx(ObTimeoutCtx &ctx)
}
//get all observer that is stopeed, start_service_time<=0 and lease expire
int ObRootUtils::get_invalid_server_list(const ObZoneManager &zone_mgr,
const ObServerManager &server_mgr,
ObIArray<ObAddr> &invalid_server_list)
int ObRootUtils::get_invalid_server_list(
const ObIArray<ObServerInfoInTable> &servers_info,
ObIArray<ObAddr> &invalid_server_list)
{
int ret = OB_SUCCESS;
invalid_server_list.reset();
ObArray<ObAddr> stopped_server_list;
ObArray<ObZone> stopped_zone_list;
ObArray<ObServerStatus> server_list;
ObZone empty_zone;
if (OB_FAIL(get_stopped_zone_list(zone_mgr, server_mgr, stopped_zone_list,
stopped_server_list))) {
if (OB_FAIL(get_stopped_zone_list(stopped_zone_list, stopped_server_list))) {
LOG_WARN("fail to get stopped zone list", KR(ret));
} else if (OB_FAIL(invalid_server_list.assign(stopped_server_list))) {
LOG_WARN("fail to assign array", KR(ret), K(stopped_zone_list));
} else if (OB_FAIL(server_mgr.get_server_statuses(empty_zone, server_list))) {
LOG_WARN("fail to get servers of zone", KR(ret));
} else {
for (int64_t i = 0; i < server_list.count() && OB_SUCC(ret); i++) {
const ObServerStatus &status = server_list.at(i);
if ((!status.is_alive() || !status.in_service())
&& !has_exist_in_array(invalid_server_list, status.server_)) {
if (OB_FAIL(invalid_server_list.push_back(status.server_))) {
LOG_WARN("fail to push back", KR(ret), K(status));
for (int64_t i = 0; i < servers_info.count() && OB_SUCC(ret); i++) {
const ObServerInfoInTable &server_info = servers_info.at(i);
if ((!server_info.is_alive() || !server_info.in_service())
&& !has_exist_in_array(invalid_server_list, server_info.get_server())) {
if (OB_FAIL(invalid_server_list.push_back(server_info.get_server()))) {
LOG_WARN("fail to push back", KR(ret), K(server_info));
}
}
}
@ -1660,72 +1659,248 @@ int ObRootUtils::get_invalid_server_list(const ObZoneManager &zone_mgr,
return ret;
}
int ObRootUtils::get_stopped_zone_list(const ObZoneManager &zone_mgr,
const ObServerManager &server_mgr,
ObIArray<ObZone> &stopped_zone_list,
ObIArray<ObAddr> &stopped_server_list)
int ObRootUtils::find_server_info(
const ObIArray<share::ObServerInfoInTable> &servers_info,
const common::ObAddr &server,
share::ObServerInfoInTable &server_info)
{
int ret = OB_SUCCESS;
ObServerManager::ObServerStatusArray server_array;
ObZone empty_zone;
if (OB_FAIL(server_mgr.get_server_statuses(empty_zone, server_array))) {
LOG_WARN("fail to get server status", KR(ret));
bool server_exists = false;
server_info.reset();
if (OB_UNLIKELY(!server.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid server", KR(ret), K(server));
} else {
for (int64_t i = 0; i < server_array.count() && OB_SUCC(ret); i++) {
if (!server_array.at(i).is_stopped()) {
//nothing todo
} else {
if (has_exist_in_array(stopped_zone_list, server_array.at(i).zone_)) {
//nothing todo
} else if (OB_FAIL(stopped_zone_list.push_back(server_array.at(i).zone_))) {
LOG_WARN("fail to push back", KR(ret), "zone", server_array.at(i).zone_);
}
if (OB_FAIL(ret)) {
} else if (has_exist_in_array(stopped_server_list, server_array.at(i).server_)) {
//nothing todo
} else if (OB_FAIL(stopped_server_list.push_back(server_array.at(i).server_))) {
LOG_WARN("fail to push back", KR(ret), "server", server_array.at(i).server_);
for (int64_t i = 0; OB_SUCC(ret) && !server_exists && i < servers_info.count(); i++) {
const ObServerInfoInTable & server_info_i = servers_info.at(i);
if (OB_UNLIKELY(!server_info_i.is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("server_info_i is not valid", KR(ret), K(server_info_i));
} else if (server == server_info_i.get_server()) {
server_exists = true;
if (OB_FAIL(server_info.assign(server_info_i))) {
LOG_WARN("fail to assign server_info", KR(ret), K(server_info_i));
}
}
}
}
LOG_INFO("get stop observer", KR(ret), K(stopped_zone_list), K(stopped_server_list));
//get stopped zone;
ObArray<ObZoneInfo> zone_infos;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(zone_mgr.get_zone(zone_infos))) {
LOG_WARN("fail to get zone", K(ret));
if (OB_SUCC(ret) && !server_exists) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("server not exists", KR(ret), K(server));
}
return ret;
}
int ObRootUtils::get_servers_of_zone(
const ObIArray<share::ObServerInfoInTable> &servers_info,
const common::ObZone &zone,
ObIArray<common::ObAddr> &servers,
bool only_active_servers)
{
int ret = OB_SUCCESS;
servers.reset();
if (OB_UNLIKELY(zone.is_empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid zone", KR(ret), K(zone));
} else {
for (int64_t i = 0; i < zone_infos.count() && OB_SUCC(ret); i++) {
if (ObZoneStatus::ACTIVE == zone_infos.at(i).status_) {
//nothing todo
for (int64_t i = 0; OB_SUCC(ret) && i < servers_info.count(); i++) {
const ObServerInfoInTable &server_info = servers_info.at(i);
if (OB_UNLIKELY(!server_info.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid server_info", KR(ret), K(server_info));
} else if (zone != server_info.get_zone() || (only_active_servers && !server_info.is_active())) {
// do nothing
} else if (OB_FAIL(servers.push_back(server_info.get_server()))) {
LOG_WARN("fail to push an element into servers", KR(ret), K(server_info));
}
}
}
return ret;
}
int ObRootUtils::get_server_count(
const ObIArray<share::ObServerInfoInTable> &servers_info,
const ObZone &zone,
int64_t &alive_count,
int64_t &not_alive_count)
{
int ret = OB_SUCCESS;
alive_count = 0;
not_alive_count = 0;
for (int64_t i = 0; OB_SUCC(ret) && i < servers_info.count(); ++i) {
const ObServerInfoInTable &server_info = servers_info.at(i);
if (server_info.get_zone() == zone || zone.is_empty()) {
if (server_info.is_alive()) {
++alive_count;
} else {
if (has_exist_in_array(stopped_zone_list, zone_infos.at(i).zone_)) {
//nothing todo
} else if (OB_FAIL(stopped_zone_list.push_back(zone_infos.at(i).zone_))) {
LOG_WARN("fail to push back", KR(ret));
}
ObArray<common::ObAddr> server_list;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(server_mgr.get_servers_of_zone(zone_infos.at(i).zone_,
server_list))) {
LOG_WARN("fail to get server of zone", KR(ret), K(i), "zone", zone_infos.at(i).zone_);
} else {
for (int64_t j = 0; j < server_list.count() && OB_SUCC(ret); j++) {
if (has_exist_in_array(stopped_server_list, server_list.at(j))) {
//nothing todo
} else if (OB_FAIL(stopped_server_list.push_back(server_list.at(j)))) {
LOG_WARN("fail to push back", KR(ret), K(j));
++not_alive_count;
}
}
}
return ret;
}
int ObRootUtils::check_server_alive(
const ObIArray<ObServerInfoInTable> &servers_info,
const ObAddr &server,
bool &is_alive)
{
int ret = OB_SUCCESS;
is_alive = false;
ObServerInfoInTable server_info;
if (OB_UNLIKELY(!server.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid server", KR(ret), K(server));
} else if (OB_FAIL(find_server_info(servers_info, server, server_info))) {
LOG_WARN("fail to find server_info", KR(ret), K(servers_info), K(server));
} else {
is_alive = server_info.is_alive();
}
return ret;
}
int ObRootUtils::get_server_resource_info(
const ObIArray<obrpc::ObGetServerResourceInfoResult> &server_resources_info,
const ObAddr &server,
share::ObServerResourceInfo &resource_info)
{
int ret = OB_SUCCESS;
bool server_exists = false;
resource_info.reset();
if (OB_UNLIKELY(!server.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid server", KR(ret), K(server));
} else {
for (int64_t i = 0; OB_SUCC(ret) && !server_exists && i < server_resources_info.count(); i++) {
const obrpc::ObGetServerResourceInfoResult &server_resource_info_i = server_resources_info.at(i);
if (OB_UNLIKELY(!server_resource_info_i.is_valid())){
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("server_resource_info_i is not valid", KR(ret), K(server_resource_info_i));
} else if (server == server_resource_info_i.get_server()) {
server_exists = true;
resource_info = server_resource_info_i.get_resource_info();
}
}
}
if (OB_SUCC(ret) && !server_exists) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("server not exists", KR(ret), K(server));
}
return ret;
}
int ObRootUtils::get_stopped_zone_list(
ObIArray<ObZone> &stopped_zone_list,
ObIArray<ObAddr> &stopped_server_list)
{
int ret = OB_SUCCESS;
ObSqlString sql;
ObTimeoutCtx ctx;
stopped_zone_list.reset();
stopped_server_list.reset();
if (OB_ISNULL(GCTX.sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("GCTX.sql_proxy_ is null", KR(ret), KP(GCTX.sql_proxy_));
} else if (OB_FAIL(ObRootUtils::get_rs_default_timeout_ctx(ctx))) {
LOG_WARN("fail to get timeout ctx", K(ret), K(ctx));
} else if (OB_FAIL(sql.assign_fmt("SELECT s.svr_ip, s.svr_port, s.zone "
"FROM %s AS s JOIN (SELECT zone, info FROM %s WHERE name = 'status') AS z "
"ON s.zone = z.zone WHERE s.stop_time > 0 OR z.info = 'INACTIVE'",
OB_ALL_SERVER_TNAME, OB_ALL_ZONE_TNAME))) {
LOG_WARN("fail to append sql", KR(ret));
} else if (OB_FAIL(ObZoneTableOperation::get_inactive_zone_list(*GCTX.sql_proxy_, stopped_zone_list))) {
LOG_WARN("fail to get inactive zone_list", KR(ret), KP(GCTX.sql_proxy_));
} else {
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
int tmp_ret = OB_SUCCESS;
ObMySQLResult *result = NULL;
if (OB_FAIL(GCTX.sql_proxy_->read(res, OB_SYS_TENANT_ID, sql.ptr()))) {
LOG_WARN("fail to execute sql", KR(ret), K(sql));
} else if (OB_ISNULL(result = res.get_result())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get sql result", KR(ret), K(sql));
} else {
ObZone zone;
ObAddr server;
ObString tmp_zone;
ObString svr_ip;
while (OB_SUCC(ret)) {
if (OB_FAIL(result->next())) {
if (OB_ITER_END != ret) {
LOG_WARN("result next failed", KR(ret));
} else {
ret = OB_SUCCESS;
break;
}
} else {
int64_t svr_port = 0;
server.reset();
zone.reset();
svr_ip.reset();
tmp_zone.reset();
EXTRACT_VARCHAR_FIELD_MYSQL(*result, "svr_ip", svr_ip);
EXTRACT_INT_FIELD_MYSQL(*result, "svr_port", svr_port, int64_t);
EXTRACT_VARCHAR_FIELD_MYSQL(*result, "zone", tmp_zone);
if (OB_UNLIKELY(!server.set_ip_addr(svr_ip, static_cast<int32_t>(svr_port)))) {
ret = OB_INVALID_DATA;
LOG_WARN("fail to set ip addr", KR(ret), K(svr_ip), K(svr_port));
} else if (OB_FAIL(zone.assign(tmp_zone))) {
LOG_WARN("fail to assign zone", KR(ret), K(tmp_zone));
} else if (OB_FAIL(stopped_server_list.push_back(server))) {
LOG_WARN("fail to push an element into stopped_server_list", KR(ret), K(server));
} else if (has_exist_in_array(stopped_zone_list, zone)) {
// do nothing
} else if (OB_FAIL(stopped_zone_list.push_back(zone))) {
LOG_WARN("fail to push an element into stopped_zone_list", KR(ret), K(zone));
}
}
}
} //end else ACTIVE
} //end for zone_infos
}
}
}
LOG_INFO("get stopped zone list", KR(ret), K(stopped_server_list), K(stopped_zone_list));
return ret;
}
bool ObRootUtils::have_other_stop_task(const ObZone &zone)
{
int ret = OB_SUCCESS;
bool bret = true;
int64_t cnt = 0;
ObSqlString sql;
ObTimeoutCtx ctx;
if (OB_ISNULL(GCTX.sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("GCTX.sql_proxy_ is null", KR(ret), KP(GCTX.sql_proxy_));
} else if (OB_FAIL(ObRootUtils::get_rs_default_timeout_ctx(ctx))) {
LOG_WARN("fail to get timeout ctx", KR(ret), K(ctx));
} else if (OB_FAIL(sql.assign_fmt("SELECT COUNT(*) AS cnt FROM "
"(SELECT zone FROM %s WHERE stop_time > 0 AND zone != '%s' UNION "
"SELECT zone FROM %s WHERE name = 'status' AND info = 'INACTIVE' AND zone != '%s')",
OB_ALL_SERVER_TNAME, zone.ptr(), OB_ALL_ZONE_TNAME, zone.ptr()))) {
LOG_WARN("fail to append sql", KR(ret), K(zone));
} else {
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
int tmp_ret = OB_SUCCESS;
ObMySQLResult *result = NULL;
if (OB_FAIL(GCTX.sql_proxy_->read(res, OB_SYS_TENANT_ID, sql.ptr()))) {
LOG_WARN("fail to execute sql", KR(ret), K(sql));
} else if (OB_ISNULL(result = res.get_result())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("fail to get sql result", KR(ret), K(sql));
} else if (OB_FAIL(result->next())) {
LOG_WARN("fail to get next", KR(ret), K(sql));;
} else {
EXTRACT_INT_FIELD_MYSQL(*result, "cnt", cnt, int64_t);
}
if (OB_SUCC(ret) && (OB_ITER_END != (tmp_ret = result->next()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get more row than one", KR(ret), KR(tmp_ret), K(sql));
}
}
}
if (OB_SUCC(ret) && 0 == cnt) {
bret = false;
}
LOG_INFO("have other stop task", KR(ret), K(bret), K(zone), K(cnt));
return bret;
}
int ObRootUtils::get_tenant_intersection(ObUnitManager &unit_mgr,
ObIArray<ObAddr> &this_server_list,
ObIArray<ObAddr> &other_server_list,
@ -1961,6 +2136,27 @@ int ObRootUtils::check_left_f_in_primary_zone(ObZoneManager &zone_mgr,
return ret;
}
int ObRootUtils::get_proposal_id_from_sys_ls(int64_t &proposal_id, ObRole &role)
{
int ret = OB_SUCCESS;
storage::ObLSHandle ls_handle;
logservice::ObLogHandler *handler = nullptr;
MTL_SWITCH(OB_SYS_TENANT_ID) {
if (OB_FAIL(MTL(ObLSService*)->get_ls(SYS_LS, ls_handle, ObLSGetMod::RS_MOD))) {
LOG_WARN("fail to get ls", KR(ret));
} else if (OB_UNLIKELY(!ls_handle.is_valid())
|| OB_ISNULL(ls_handle.get_ls())
|| OB_ISNULL(handler = ls_handle.get_ls()->get_log_handler())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error", KR(ret), KP(ls_handle.get_ls()),
KP(ls_handle.get_ls()->get_log_handler()));
} else if (OB_FAIL(handler->get_role(role, proposal_id))) {
LOG_WARN("fail to get role", KR(ret));
}
}
return ret;
}
int ObRootUtils::try_notify_switch_ls_leader(
obrpc::ObSrvRpcProxy *rpc_proxy,
const share::ObLSInfo &ls_info,

View File

@ -46,7 +46,6 @@ namespace rootserver
{
class ObDDLService;
class ObUnitManager;
class ObServerManager;
class ObZoneManager;
class ObLocalityDistribution;
template <typename T>
@ -604,14 +603,34 @@ public:
virtual ~ObRootUtils() {}
static int get_rs_default_timeout_ctx(ObTimeoutCtx &ctx);
static int get_invalid_server_list(const ObZoneManager &zone_mgr,
const ObServerManager &server_mgr,
common::ObIArray<common::ObAddr> &invalid_server_list);
static int get_stopped_zone_list(const ObZoneManager &zone_mgr,
const ObServerManager &server_mgr,
common::ObIArray<common::ObZone> &stopped_zone_list,
static int get_invalid_server_list(
const ObIArray<share::ObServerInfoInTable> &servers_info,
common::ObIArray<common::ObAddr> &invalid_server_list);
static int find_server_info(
const ObIArray<share::ObServerInfoInTable> &servers_info,
const common::ObAddr &server,
share::ObServerInfoInTable &server_info);
static int get_servers_of_zone(
const ObIArray<share::ObServerInfoInTable> &servers_info,
const common::ObZone &zone,
ObIArray<common::ObAddr> &servers,
bool only_active_servers = false);
static int get_server_count(
const ObIArray<share::ObServerInfoInTable> &servers_info,
const ObZone &zone,
int64_t &alive_count,
int64_t &not_alive_count);
static int check_server_alive(
const ObIArray<share::ObServerInfoInTable> &servers_info,
const common::ObAddr &server,
bool &is_alive);
static int get_server_resource_info(
const ObIArray<obrpc::ObGetServerResourceInfoResult> &server_resources_info,
const ObAddr &server,
share::ObServerResourceInfo &resource_info);
static int get_stopped_zone_list(common::ObIArray<common::ObZone> &stopped_zone_list,
common::ObIArray<common::ObAddr> &stopped_server_list);
static bool have_other_stop_task(const ObZone &zone);
static int check_primary_region_in_zonelist(share::schema::ObMultiVersionSchemaService *schema_service,
ObDDLService *ddl_service,
ObUnitManager &unit_mgr,
@ -643,6 +662,7 @@ public:
common::ObIArray<common::ObAddr> &this_server_list,
common::ObIArray<common::ObAddr> &other_server_list,
common::ObIArray<uint64_t> &tenant_ids);
static int get_proposal_id_from_sys_ls(int64_t &proposal_id, ObRole &role);
static int notify_switch_leader(
obrpc::ObSrvRpcProxy *rpc_proxy,

View File

@ -30,7 +30,6 @@ ObRootServiceUtilChecker::~ObRootServiceUtilChecker()
int ObRootServiceUtilChecker::init(
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
obrpc::ObCommonRpcProxy &common_rpc_proxy,
common::ObAddr &self,
@ -44,7 +43,6 @@ int ObRootServiceUtilChecker::init(
LOG_WARN("init twice", KR(ret));
} else if (OB_FAIL(migrate_unit_finish_checker_.init(
unit_mgr,
server_mgr,
zone_mgr,
schema_service,
sql_proxy,
@ -55,7 +53,6 @@ int ObRootServiceUtilChecker::init(
common_rpc_proxy,
self,
unit_mgr,
server_mgr,
zone_mgr,
sql_proxy,
lst_operator))) {

View File

@ -28,7 +28,6 @@ public:
public:
int init(
ObUnitManager &unit_mgr,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
obrpc::ObCommonRpcProxy &common_rpc_proxy,
common::ObAddr &self,

View File

@ -19,6 +19,7 @@
#include "rpc/obrpc/ob_rpc_packet.h"
#include "rpc/obrpc/ob_rpc_result_code.h"
#include "rpc/obrpc/ob_rpc_proxy.h"
#include "share/ob_heartbeat_struct.h"
namespace oceanbase
{
@ -69,6 +70,8 @@ RPC_F(obrpc::OB_GET_LEADER_LOCATIONS, obrpc::ObGetLeaderLocationsArg,
RPC_F(obrpc::OB_DDL_CHECK_TABLET_MERGE_STATUS, obrpc::ObDDLCheckTabletMergeStatusArg,
obrpc::ObDDLCheckTabletMergeStatusResult, ObCheckTabletMergeStatusProxy);
RPC_F(obrpc::OB_REFRESH_TENANT_INFO, obrpc::ObRefreshTenantInfoArg, obrpc::ObRefreshTenantInfoRes, ObRefreshTenantInfoProxy);
RPC_F(obrpc::OB_SEND_HEARTBEAT, share::ObHBRequest, share::ObHBResponse, ObSendHeartbeatProxy);
RPC_F(obrpc::OB_GET_SERVER_RESOURCE_INFO, obrpc::ObGetServerResourceInfoArg, obrpc::ObGetServerResourceInfoResult, ObGetServerResourceInfoProxy);
RPC_F(obrpc::OB_NOTIFY_SWITCH_LEADER, obrpc::ObNotifySwitchLeaderArg,
obrpc::ObSrvRpcProxy::ObRpc<obrpc::OB_NOTIFY_SWITCH_LEADER>::Response, ObNotifySwitchLeaderProxy);
RPC_F(obrpc::OB_UPDATE_TENANT_INFO_CACHE, obrpc::ObUpdateTenantInfoCacheArg, obrpc::ObUpdateTenantInfoCacheRes, ObUpdateTenantInfoCacheProxy);

View File

@ -60,6 +60,7 @@ public:
int start();
void stop();
void wait();
void reset_last_run_timestamp() { ATOMIC_STORE(&last_run_timestamp_, 0); }
TO_STRING_KV("name", get_thread_name());
private:

View File

@ -297,8 +297,8 @@ DEFINE_LIMITED_RS_RPC_PROCESSOR(obrpc::OB_EXECUTE_BOOTSTRAP, ObRpcExecuteBootstr
// check server_refreshed_ flag in rootservice
DEFINE_LIMITED_RS_RPC_PROCESSOR(obrpc::OB_FETCH_ALIVE_SERVER, ObRpcFetchAliveServerP, fetch_alive_server(arg_, result_));
DEFINE_RS_RPC_PROCESSOR(obrpc::OB_MERGE_FINISH, ObRpcMergeFinishP, merge_finish(arg_));
DEFINE_RS_RPC_PROCESSOR(obrpc::OB_FETCH_ACTIVE_SERVER_STATUS, ObRpcFetchActiveServerStatusP, fetch_active_server_status(arg_, result_));
// DEFINE_RS_RPC_PROCESSOR(obrpc::OB_MERGE_FINISH, ObRpcMergeFinishP, merge_finish(arg_));
// DEFINE_RS_RPC_PROCESSOR(obrpc::OB_FETCH_ACTIVE_SERVER_STATUS, ObRpcFetchActiveServerStatusP, fetch_active_server_status(arg_, result_));
DEFINE_RS_RPC_PROCESSOR(obrpc::OB_DISASTER_RECOVERY_TASK_REPLY, ObRpcDisasterRecoveryTaskReplyP, disaster_recovery_task_reply(arg_));
DEFINE_RS_RPC_PROCESSOR(obrpc::OB_BACKUP_LS_DATA_RES, ObRpcBackupDataResP, receive_backup_over(arg_));
@ -492,7 +492,6 @@ protected:
DEFINE_DDL_RS_RPC_PROCESSOR(obrpc::OB_DO_SEQUENCE_DDL, ObRpcDoSequenceDDLP, do_sequence_ddl(arg_));
DEFINE_RS_RPC_PROCESSOR(obrpc::OB_BROADCAST_SCHEMA, ObBroadcastSchemaP, broadcast_schema(arg_));
// only for upgrade
DEFINE_RS_RPC_PROCESSOR(obrpc::OB_CHECK_MERGE_FINISH, ObCheckMergeFinishP, check_merge_finish(arg_));
DEFINE_RS_RPC_PROCESSOR(obrpc::OB_GET_RECYCLE_SCHEMA_VERSIONS, ObGetRecycleSchemaVersionsP, get_recycle_schema_versions(arg_, result_));
DEFINE_DDL_RS_RPC_PROCESSOR(obrpc::OB_UPGRADE_TABLE_SCHEMA, ObRpcUpgradeTableSchemaP, upgrade_table_schema(arg_));
//label security ddl

View File

@ -24,6 +24,7 @@
#include "share/ob_freeze_info_proxy.h"
#include "share/ob_global_merge_table_operator.h"
#include "share/ob_zone_merge_info.h"
#include "share/ob_all_server_tracer.h"
namespace oceanbase
{
@ -87,8 +88,7 @@ int64_t ObSchemaHistoryRecyclerIdling::get_idle_interval_us()
ObSchemaHistoryRecycler::ObSchemaHistoryRecycler()
: inited_(false), idling_(stop_), schema_service_(NULL),
/*freeze_info_mgr_(NULL),*/ zone_mgr_(NULL), sql_proxy_(NULL),
server_mgr_(NULL), recycle_schema_versions_()
/*freeze_info_mgr_(NULL),*/ zone_mgr_(NULL), sql_proxy_(NULL), recycle_schema_versions_()
{
}
@ -104,8 +104,7 @@ int ObSchemaHistoryRecycler::init(
ObMultiVersionSchemaService &schema_service,
//ObFreezeInfoManager &freeze_info_manager,
ObZoneManager &zone_manager,
ObMySQLProxy &sql_proxy,
ObServerManager &server_mgr)
ObMySQLProxy &sql_proxy)
{
int ret = OB_SUCCESS;
const int schema_history_recycler_thread_cnt = 1;
@ -122,7 +121,6 @@ int ObSchemaHistoryRecycler::init(
//freeze_info_mgr_ = &freeze_info_manager;
zone_mgr_ = &zone_manager;
sql_proxy_ = &sql_proxy;
server_mgr_ = &server_mgr;
inited_ = true;
}
return ret;
@ -359,15 +357,13 @@ int ObSchemaHistoryRecycler::get_recycle_schema_version_by_server(
int ret = OB_SUCCESS;
ObArray<ObAddr> server_list;
obrpc::ObGetMinSSTableSchemaVersionArg arg;
ObZone zone;
if (OB_FAIL(check_inner_stat())) {
LOG_WARN("fail to check inner stat", KR(ret));
} else if (OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ptr is null", KR(ret), KP_(server_mgr));
} else if (OB_FAIL(arg.tenant_id_arg_list_.assign(tenant_ids))) {
LOG_WARN("fail to assign arg", KR(ret));
} else if (OB_FAIL(server_mgr_->get_all_server_list(server_list))) {
LOG_WARN("fail to get all server list", KR(ret));
} else if (OB_FAIL(SVR_TRACER.get_servers_of_zone(zone, server_list))) {
LOG_WARN("fail to get server_list", KR(ret));
} else {
rootserver::ObGetMinSSTableSchemaVersionProxy proxy_batch(
*(GCTX.srv_rpc_proxy_), &obrpc::ObSrvRpcProxy::get_min_sstable_schema_version);

View File

@ -18,7 +18,6 @@
#include "rootserver/ob_thread_idling.h"
//#include "rootserver/ob_freeze_info_manager.h"
#include "rootserver/ob_zone_manager.h"
#include "rootserver/ob_server_manager.h"
#include "share/schema/ob_multi_version_schema_service.h"
#include "share/config/ob_server_config.h"
@ -162,8 +161,7 @@ public:
int init(share::schema::ObMultiVersionSchemaService &schema_service,
//ObFreezeInfoManager &freeze_info_manager,
ObZoneManager &zone_manager,
common::ObMySQLProxy &sql_proxy,
ObServerManager &server_mgr);
common::ObMySQLProxy &sql_proxy);
virtual void run3() override;
void wakeup();
void stop();
@ -211,7 +209,6 @@ private:
//ObFreezeInfoManager *freeze_info_mgr_;
ObZoneManager *zone_mgr_;
common::ObMySQLProxy *sql_proxy_;
ObServerManager *server_mgr_;
common::hash::ObHashMap<uint64_t, int64_t, common::hash::ReadWriteDefendMode> recycle_schema_versions_;
DISALLOW_COPY_AND_ASSIGN(ObSchemaHistoryRecycler);
};

View File

@ -21,6 +21,9 @@
#include "rootserver/ob_root_utils.h"
#include "rootserver/ob_root_service.h"
#include "storage/ob_file_system_router.h"
#include "share/ob_all_server_tracer.h"
#include "share/ob_server_table_operator.h"
#include "rootserver/ob_heartbeat_service.h"
using namespace oceanbase::common;
using namespace oceanbase::common::hash;
@ -62,6 +65,41 @@ int ObServerBalancer::init(
return ret;
}
int ObServerBalancer::get_active_servers_info_and_resource_info_of_zone(
const ObZone &zone,
ObIArray<share::ObServerInfoInTable> &servers_info,
ObIArray<obrpc::ObGetServerResourceInfoResult> &server_resources_info)
{
int ret = OB_SUCCESS;
servers_info.reset();
server_resources_info.reset();
ObServerResourceInfo resource_info_in_server_status;
obrpc::ObGetServerResourceInfoResult resource_info_result;
if (OB_FAIL(SVR_TRACER.get_active_servers_info(zone, servers_info))) {
LOG_WARN("fail to execute get_active_servers_info", KR(ret), K(zone));
} else if (OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_mgr_ is null", KR(ret), KP(server_mgr_));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < servers_info.count(); i++) {
const ObAddr &server = servers_info.at(i).get_server();
resource_info_result.reset();
resource_info_in_server_status.reset();
if (OB_FAIL(server_mgr_->get_server_resource_info(server, resource_info_in_server_status))) {
LOG_WARN("fail to get resource_info_in_server_status", KR(ret), K(server));
} else if (OB_UNLIKELY(!resource_info_in_server_status.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid resource_info_in_server_status", KR(ret), K(server), K(resource_info_in_server_status));
} else if (OB_FAIL(resource_info_result.init(server,resource_info_in_server_status))) {
LOG_WARN("fail to init", KR(ret), K(server), K(resource_info_in_server_status));
} else if (OB_FAIL(server_resources_info.push_back(resource_info_result))) {
LOG_WARN("fail to push an element into server_resources_info", KR(ret), K(resource_info_result));
}
}
}
return ret;
}
int ObServerBalancer::tenant_group_balance()
{
int ret = OB_SUCCESS;
@ -99,82 +137,6 @@ int ObServerBalancer::tenant_group_balance()
return ret;
}
int ObServerBalancer::check_if_ofs_rs_without_sys_unit(
const share::ObServerStatus &status,
const share::ObUnitInfo &unit_info,
bool &ofs_rs_without_sys_unit)
{
int ret = OB_SUCCESS;
UNUSED(status);
common::ObArray<ObUnitManager::ObUnitLoad> *unit_load_array;
if (!check_inner_stat()) {
ret = OB_INNER_STAT_ERROR;
LOG_WARN("fail to check inner stat", KR(ret));
} else if (OB_UNLIKELY(nullptr == unit_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit_mgr ptr is null", KR(ret), KP(unit_mgr_));
} else {
ofs_rs_without_sys_unit = true;
if (OB_SYS_TENANT_ID != unit_info.pool_.tenant_id_) {
ofs_rs_without_sys_unit = false;
} else if (OB_FAIL(unit_mgr_->get_loads_by_server(GCTX.self_addr(), unit_load_array))) {
if (OB_ENTRY_NOT_EXIST == ret) {
ret = OB_SUCCESS; // server load empty, no need to distribute
} else {
LOG_WARN("fail to get loads by server", KR(ret));
}
} else if (OB_UNLIKELY(nullptr == unit_load_array)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit load array ptr is null", KR(ret));
} else {
for (int64_t i = 0;
OB_SUCC(ret) && i < unit_load_array->count() && ofs_rs_without_sys_unit;
++i) {
const ObUnit *unit = unit_load_array->at(i).unit_;
const share::ObResourcePool *pool = unit_load_array->at(i).pool_;
// some certain unit exists on observer with rs
if (OB_UNLIKELY(nullptr == unit || nullptr == pool)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit ptr is null", KR(ret), KP(unit), K(pool));
} else if (OB_SYS_TENANT_ID != pool->tenant_id_) {
// bypass
} else if (unit->server_ == GCTX.self_addr()
|| unit->migrate_from_server_ == GCTX.self_addr()) {
ofs_rs_without_sys_unit = false;
}
}
}
}
return ret;
}
int ObServerBalancer::distribute_for_ofs_sys_unit(
const share::ObServerStatus &status,
const share::ObUnitInfo &unit_info)
{
int ret = OB_SUCCESS;
if (!check_inner_stat()) {
ret = OB_INNER_STAT_ERROR;
LOG_WARN("fail to check inner stat", KR(ret));
} else if (!status.is_valid() || !unit_info.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(status), K(unit_info));
} else if (OB_UNLIKELY(nullptr == unit_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit mgr ptr is null", KR(ret));
} else if (OB_SYS_TENANT_ID != unit_info.pool_.tenant_id_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit info tenant unexpected", KR(ret), "tenant_id", unit_info.pool_.tenant_id_);
} else if (!status.is_taken_over_by_rs()) {
ret = OB_STATE_NOT_MATCH;
LOG_WARN("server not taken over by rs", KR(ret), K(status));
} else if (OB_FAIL(unit_mgr_->migrate_unit(
unit_info.unit_.unit_id_, GCTX.self_addr(), false/*not manual*/))) {
LOG_WARN("fail to migrate unit", KR(ret), K(unit_info));
}
return ret;
}
int ObServerBalancer::check_has_unit_in_migration(
const common::ObIArray<ObUnitManager::ObUnitLoad> *unit_load_array,
bool &has_unit_in_migration)
@ -268,28 +230,34 @@ int ObServerBalancer::distribute_pool_for_standalone_sys_unit(
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit config ptr is null", K(ret), K(pool));
} else {
ObServerStatus status;
ObServerInfoInTable server_info;
ObUnitStat unit_stat;
ObArray<ObUnitStat> in_migrate_unit_stat;
common::ObArray<common::ObAddr> excluded_servers;
common::ObAddr migrate_server;
std::string resource_not_enough_reason;
ObArray<ObServerInfoInTable> servers_info_of_zone;
ObArray<ObServerInfoInTable> active_servers_info_of_zone;
ObArray<obrpc::ObGetServerResourceInfoResult> active_servers_resource_info_of_zone;
for (int64_t i = 0; OB_SUCC(ret) && i < pool_unit_array->count(); ++i) {
excluded_servers.reset();
status.reset();
server_info.reset();
unit_stat.reset();
migrate_server.reset();
servers_info_of_zone.reset();
active_servers_resource_info_of_zone.reset();
active_servers_info_of_zone.reset();
share::ObUnit *unit = pool_unit_array->at(i);
if (OB_UNLIKELY(nullptr == unit)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit ptr is null", K(ret));
} else if (unit->migrate_from_server_.is_valid()) {
// unit in migrate, bypass
} else if (OB_FAIL(server_mgr_->get_server_status(unit->server_, status))) {
} else if (OB_FAIL(SVR_TRACER.get_server_info(unit->server_, server_info))) {
LOG_WARN("fail to get server status", K(ret), "server", unit->server_);
} else if (!status.is_active()) {
} else if (!server_info.is_active()) {
// Only process servers that are active, skip non-active servers
LOG_INFO("unit server status not active", K(ret), K(status), K(*unit));
LOG_INFO("unit server status not active", K(ret), K(server_info), K(*unit));
} else if (!has_exist_in_array(sys_unit_server_array, unit->server_)) {
// bypass
} else if (OB_FAIL(unit_stat_mgr_->get_unit_stat(
@ -297,14 +265,32 @@ int ObServerBalancer::distribute_pool_for_standalone_sys_unit(
unit->zone_,
unit_stat))) {
LOG_WARN("fail to locate unit", K(ret), "unit", *unit);
} else if (OB_FAIL(unit_mgr_->get_excluded_servers(*unit, unit_stat, module, excluded_servers))) {
LOG_WARN("fail to get exclude servers", K(ret), "unit", *unit);
} else if (OB_FAIL(SVR_TRACER.get_servers_info(unit->zone_, servers_info_of_zone))) {
LOG_WARN("fail to servers_info_of_zone", KR(ret), K(unit->zone_));
} else if (OB_FAIL(get_active_servers_info_and_resource_info_of_zone(
unit->zone_,
active_servers_info_of_zone,
active_servers_resource_info_of_zone))) {
LOG_WARN("fail to execute get_active_servers_info_and_resource_info_of_zone", KR(ret), K(unit->zone_));
} else if (OB_FAIL(unit_mgr_->get_excluded_servers(
*unit,
unit_stat,
module,
servers_info_of_zone,
active_servers_resource_info_of_zone,
excluded_servers))) {
LOG_WARN("fail to get exclude servers", K(ret), KPC(unit), K(servers_info_of_zone),
K(active_servers_resource_info_of_zone));
} else if (OB_FAIL(append(excluded_servers, sys_unit_server_array))) {
LOG_WARN("fail tp append sys unit server array", K(ret));
} else if (OB_FAIL(unit_mgr_->choose_server_for_unit(unit_config->unit_resource(), unit->zone_,
excluded_servers, module,
migrate_server,
resource_not_enough_reason))) {
} else if (OB_FAIL(unit_mgr_->choose_server_for_unit(
unit_config->unit_resource(), unit->zone_,
excluded_servers,
module,
active_servers_info_of_zone,
active_servers_resource_info_of_zone,
migrate_server,
resource_not_enough_reason))) {
if (OB_ZONE_RESOURCE_NOT_ENOUGH == ret || OB_ZONE_SERVER_NOT_ENOUGH == ret) {
LOG_WARN("has no place to migrate unit", K(module), KR(ret), "unit", *unit,
K(excluded_servers), "resource_not_enough_reason", resource_not_enough_reason.c_str());
@ -479,34 +465,21 @@ int ObServerBalancer::distribute_zone_unit(const ObUnitManager::ZoneUnit &zone_u
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(zone_unit), K(ret));
} else {
ObServerStatus status;
ObServerInfoInTable server_info;
FOREACH_CNT_X(unit_info, zone_unit.unit_infos_, OB_SUCCESS == ret) {
status.reset();
server_info.reset();
if (ObUnit::UNIT_STATUS_ACTIVE != unit_info->unit_.status_) {
// ignore the unit that is in deleting
} else if (OB_FAIL(server_mgr_->get_server_status(unit_info->unit_.server_, status))) {
LOG_WARN("get_server_status failed", "server", unit_info->unit_.server_, K(ret));
} else if (status.is_active()) {
if (OB_FAIL(distribute_for_active(status, *unit_info))) {
LOG_WARN("distribute_for_active failed", K(status), "unit_info", *unit_info, K(ret));
} else if (OB_FAIL(SVR_TRACER.get_server_info(unit_info->unit_.server_, server_info))) {
LOG_WARN("get_server_info failed", "server", unit_info->unit_.server_, KR(ret));
} else if (server_info.is_active()) {
if (OB_FAIL(distribute_for_active(server_info, *unit_info))) {
LOG_WARN("distribute_for_active failed", K(server_info), "unit_info", *unit_info, K(ret));
}
} else if (status.is_permanent_offline()
|| status.is_deleting()
|| status.is_taken_over_by_rs()) {
bool ofs_rs_without_sys_unit = false;
if (OB_FAIL(check_if_ofs_rs_without_sys_unit(
status, *unit_info, ofs_rs_without_sys_unit))) {
LOG_WARN("fail to check if rs without sys unit", KR(ret));
} else if (ofs_rs_without_sys_unit) {
if (OB_FAIL(distribute_for_ofs_sys_unit(status, *unit_info))) {
LOG_WARN("distribute for ofs sys unit", KR(ret),
K(status), "unit_info", *unit_info);
}
} else {
if (OB_FAIL(distribute_for_permanent_offline_or_delete(status, *unit_info))) {
LOG_WARN("distribute for permanent offline or delete failed",
K(status), "unit_info", *unit_info, K(ret));
}
} else if (server_info.is_permanent_offline() || server_info.is_deleting()) {
if (OB_FAIL(distribute_for_permanent_offline_or_delete(server_info, *unit_info))) {
LOG_WARN("distribute for permanent offline or delete failed",
K(server_info), "unit_info", *unit_info, KR(ret));
}
}
}
@ -514,20 +487,24 @@ int ObServerBalancer::distribute_zone_unit(const ObUnitManager::ZoneUnit &zone_u
return ret;
}
int ObServerBalancer::distribute_for_active(const ObServerStatus &status,
const ObUnitInfo &unit_info)
int ObServerBalancer::distribute_for_active(
const ObServerInfoInTable &server_info,
const ObUnitInfo &unit_info)
{
int ret = OB_SUCCESS;
if (!check_inner_stat()) {
ret = OB_INNER_STAT_ERROR;
LOG_WARN("check inner stat failed", K_(inited), K(ret));
} else if (!status.is_valid() || !status.is_active() || !unit_info.is_valid()) {
} else if (!server_info.is_valid()
|| !server_info.is_active()
|| !unit_info.is_valid()
|| unit_info.unit_.server_ != server_info.get_server()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(status), K(unit_info), K(ret));
LOG_WARN("invalid argument", K(server_info), K(unit_info), K(ret));
} else {
//When the destination is blocked, cancel this migration
//Temporary offline does not cancel the task, need to wait for permanent offline
if ((status.is_migrate_in_blocked())
if ((server_info.is_migrate_in_blocked())
&& unit_info.unit_.migrate_from_server_.is_valid()) {
LOG_INFO("find unit server active but can't migrate in, "
"migrate_from_server is set", "unit", unit_info.unit_);
@ -539,55 +516,34 @@ int ObServerBalancer::distribute_for_active(const ObServerStatus &status,
return ret;
}
int ObServerBalancer::check_is_ofs_zone_zombie_unit(
const ObUnitInfo &unit_info,
bool &is_ofs_zone_zombie_unit)
{
int ret = OB_SUCCESS;
if (!check_inner_stat()) {
ret = OB_INNER_STAT_ERROR;
LOG_WARN("fail to check inner stat", K(ret), K_(inited));
} else if (OB_UNLIKELY(!unit_info.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(unit_info));
} else {
is_ofs_zone_zombie_unit = false;
const common::ObZone &zone = unit_info.unit_.zone_;
const common::ObAddr &dst_server = unit_info.unit_.server_;
const common::ObAddr &src_server = unit_info.unit_.migrate_from_server_;
if (OB_UNLIKELY(zone.is_empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(zone));
}
}
return ret;
}
//When the migration destination is permanently offline,
//need to change to another destination
//Need to make sure that the member has been kicked out after being permanently offline
int ObServerBalancer::distribute_for_permanent_offline_or_delete(
const ObServerStatus &status,
const share::ObServerInfoInTable &server_info,
const ObUnitInfo &unit_info)
{
int ret = OB_SUCCESS;
const char *module = "UNIT_BALANCE_FOR_SERVER_PERMANENT_OFFLINE_OR_DELETE";
LOG_INFO("find unit server permanent offline or delete, need distribute unit",
K(module), "unit", unit_info.unit_, "server", status.server_);
K(module), "unit", unit_info.unit_, K(server_info));
const bool enable_sys_unit_standalone = GCONF.enable_sys_unit_standalone;
bool need_migrate_unit = false;
if (!check_inner_stat()) {
ret = OB_INNER_STAT_ERROR;
LOG_WARN("check inner stat failed", K_(inited), K(ret));
} else if (!status.is_valid() || !unit_info.is_valid()) {
} else if (!server_info.is_valid()
|| !unit_info.is_valid()
|| unit_info.unit_.server_ != server_info.get_server()
|| (!server_info.is_deleting() && !server_info.is_permanent_offline())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(status), K(unit_info), K(ret));
LOG_WARN("invalid argument", K(server_info), K(unit_info), KR(ret));
} else if (!unit_info.unit_.migrate_from_server_.is_valid()) {
//The current unit is in a stable state, move it out
need_migrate_unit = true;
LOG_INFO("server is permanent offline or in deleting status, need migrate unit",
K(unit_info), K(status));
K(unit_info), K(server_info));
} else {
//Currently moving in, try to cancel
bool is_canceled = false;
@ -597,19 +553,21 @@ int ObServerBalancer::distribute_for_permanent_offline_or_delete(
//If cancel fails, wait for the result of the check-in process
//If the move-in process cannot be ended,
//the delete server lasts for too long, and manual intervention should be required
if (!status.is_with_partition()) {
//If there is no local replica, cancel this migration directly
const ObUnitManager::EndMigrateOp op = ObUnitManager::ABORT;
if (OB_FAIL(unit_mgr_->end_migrate_unit(unit_info.unit_.unit_id_, op))) {
LOG_WARN("end_migrate_unit failed", "unit_id", unit_info.unit_.unit_id_, K(op), K(ret));
} else {
need_migrate_unit = true;
LOG_INFO("unit has no partition, abort the migration",
K(ret), K(unit_info), K(op), K(status));
}
}
// ** FIXME (linqiucen): now we do not do the following commented process due to the deprecated variable with_partition
// ** FIXME (linqiucen): in the future, we can do this process again by directly looking up the related table
// if (!status.is_with_partition()) {
// //If there is no local replica, cancel this migration directly
// const ObUnitManager::EndMigrateOp op = ObUnitManager::ABORT;
// if (OB_FAIL(unit_mgr_->end_migrate_unit(unit_info.unit_.unit_id_, op))) {
// LOG_WARN("end_migrate_unit failed", "unit_id", unit_info.unit_.unit_id_, K(op), K(ret));
// } else {
// need_migrate_unit = true;
// LOG_INFO("unit has no partition, abort the migration",
// K(ret), K(unit_info), K(op), K(status));
// }
// }
} else {
LOG_INFO("revert migrate unit success", K(ret), K(unit_info), K(status));
LOG_INFO("revert migrate unit success", K(ret), K(unit_info), K(server_info));
}
}
ObUnitStat unit_stat;
@ -617,17 +575,44 @@ int ObServerBalancer::distribute_for_permanent_offline_or_delete(
const ObZone zone = unit_info.unit_.zone_;
ObAddr migrate_server;
std::string resource_not_enough_reason;
ObArray<ObServerInfoInTable> servers_info_of_zone;
ObArray<ObServerInfoInTable> active_servers_info_of_zone;
ObArray<obrpc::ObGetServerResourceInfoResult> active_servers_resource_info_of_zone;
if (OB_FAIL(ret) || !need_migrate_unit) {
//nothing todo
} else if (OB_ISNULL(unit_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit_mgr_ is null", KR(ret), KP(unit_mgr_));
} else if (OB_FAIL(unit_stat_mgr_->get_unit_stat(
unit_info.unit_.unit_id_,
unit_info.unit_.zone_,
unit_stat))) {
LOG_WARN("fail to locate unit", K(ret), "unit", unit_info.unit_);
} else if (OB_FAIL(unit_mgr_->get_excluded_servers(unit_info.unit_, unit_stat, module, excluded_servers))) {
LOG_WARN("get_excluded_servers failed", "unit", unit_info.unit_, K(ret));
} else if (OB_FAIL(unit_mgr_->choose_server_for_unit(unit_info.config_.unit_resource(), zone, excluded_servers, module,
migrate_server, resource_not_enough_reason))) {
} else if (OB_FAIL(SVR_TRACER.get_servers_info(unit_info.unit_.zone_, servers_info_of_zone))) {
LOG_WARN("fail to servers_info_of_zone", KR(ret), K(unit_info.unit_.zone_));
} else if (OB_FAIL(get_active_servers_info_and_resource_info_of_zone(
unit_info.unit_.zone_,
active_servers_info_of_zone,
active_servers_resource_info_of_zone))) {
LOG_WARN("fail to execute get_active_servers_info_and_resource_info_of_zone", KR(ret), K(unit_info.unit_.zone_));
} else if (OB_FAIL(unit_mgr_->get_excluded_servers(
unit_info.unit_,
unit_stat,
module,
servers_info_of_zone,
active_servers_resource_info_of_zone,
excluded_servers))) {
LOG_WARN("get_excluded_servers failed", "unit", unit_info.unit_, KR(ret), K(servers_info_of_zone),
K(active_servers_resource_info_of_zone));
} else if (OB_FAIL(unit_mgr_->choose_server_for_unit(
unit_info.config_.unit_resource(),
zone,
excluded_servers,
module,
active_servers_info_of_zone,
active_servers_resource_info_of_zone,
migrate_server,
resource_not_enough_reason))) {
if (OB_ZONE_RESOURCE_NOT_ENOUGH == ret || OB_ZONE_SERVER_NOT_ENOUGH == ret) {
LOG_WARN("has no place to migrate unit", K(module), KR(ret), K(zone), K(excluded_servers),
K(unit_info), "resource_not_enough_reason", resource_not_enough_reason.c_str());
@ -650,7 +635,7 @@ int ObServerBalancer::distribute_for_permanent_offline_or_delete(
migrate_server))) {
LOG_WARN("fail to try migrate unit", "unit", unit_info.unit_, K(migrate_server), K(ret));
} else {
LOG_INFO("migrate unit success", K(module), K(unit_info), K(status), "dest_server", migrate_server);
LOG_INFO("migrate unit success", K(module), K(unit_info), K(server_info), "dest_server", migrate_server);
}
}
return ret;
@ -664,21 +649,21 @@ int ObServerBalancer::distribute_for_permanent_offline_or_delete(
int ObServerBalancer::distribute_for_migrate_in_blocked(const ObUnitInfo &unit_info)
{
int ret = OB_SUCCESS;
ObServerStatus status;
ObServerInfoInTable server_info;
if (!check_inner_stat()) {
ret = OB_INNER_STAT_ERROR;
LOG_WARN("check inner stat failed", K_(inited), K(ret));
} else if (!unit_info.is_valid() || !unit_info.unit_.migrate_from_server_.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(unit_info), K(ret));
} else if (OB_FAIL(server_mgr_->get_server_status(
unit_info.unit_.migrate_from_server_, status))) {
} else if (OB_FAIL(SVR_TRACER.get_server_info(
unit_info.unit_.migrate_from_server_, server_info))) {
LOG_WARN("get_server_status failed",
"server", unit_info.unit_.migrate_from_server_, K(ret));
} else if (ObUnit::UNIT_STATUS_ACTIVE != unit_info.unit_.status_) {
// ignore the unit which is in deleting
} else {
if (status.can_migrate_in()) {
if (server_info.can_migrate_in()) {
LOG_INFO("unit migrate_from_server can migrate in, "
"migrate unit back to migrate_from_server", "unit", unit_info.unit_);
const ObUnitManager::EndMigrateOp op = ObUnitManager::REVERSE;
@ -690,7 +675,7 @@ int ObServerBalancer::distribute_for_migrate_in_blocked(const ObUnitInfo &unit_i
//nothing todo
LOG_WARN("NOTICE: unit migration is hung. dest server is blocked "
"and source server can not migrate in. NEED to be involved manually.",
"unit", unit_info.unit_, "migrate_from_server", status);
"unit", unit_info.unit_, "migrate_from_server", server_info);
}
/*
@ -781,11 +766,23 @@ int ObServerBalancer::try_migrate_unit(const uint64_t unit_id,
const ObAddr &dst)
{
int ret = OB_SUCCESS;
ObServerResourceInfo dst_resource_info;
if (!inited_) {
ret = OB_NOT_INIT;
LOG_WARN("server balancer not init", K_(inited), K(ret));
} else if (OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_mgr_ is null", KR(ret), KP(server_mgr_));
} else if (OB_FAIL(server_mgr_->get_server_resource_info(dst, dst_resource_info))) {
LOG_WARN("fail to get dst_resource_info", KR(ret), K(dst));
} else {
ret = unit_mgr_->try_migrate_unit(unit_id, tenant_id, unit_stat, migrating_unit_stat, dst);
ret = unit_mgr_->try_migrate_unit(
unit_id,
tenant_id,
unit_stat,
migrating_unit_stat,
dst,
dst_resource_info);
unit_migrated_ = true;
}
return ret;
@ -843,17 +840,15 @@ int ObServerBalancer::check_can_execute_rebalance(
} else if (OB_UNLIKELY(zone.is_empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(zone));
} else if (OB_UNLIKELY(NULL == server_mgr_
|| NULL == unit_mgr_
|| NULL == zone_mgr_)) {
} else if (OB_ISNULL(unit_mgr_) || OB_ISNULL(zone_mgr_) || OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_mgr_ or unit_mgr_ ptr is null", K(ret), KP(server_mgr_), KP(unit_mgr_));
LOG_WARN("unit_mgr_, zone_mgr_ or server_mgr_ is null", KR(ret), KP(unit_mgr_), KP(zone_mgr_), KP(server_mgr_));
} else if (OB_FAIL(zone_mgr_->get_zone(zone, zone_info))) {
LOG_WARN("fail to get zone info", K(ret), K(zone));
} else if (ObZoneStatus::ACTIVE != zone_info.status_) {
can_execute_rebalance = false;
LOG_INFO("cannot execute server rebalance since zone inactive", K(zone));
} else if (OB_FAIL(server_mgr_->get_servers_of_zone(zone, server_list))) {
} else if (OB_FAIL(SVR_TRACER.get_servers_of_zone(zone, server_list))) {
LOG_WARN("fail to get servers of zone", K(ret), K(zone));
} else if (OB_FAIL(unit_mgr_->inner_get_unit_ids(unit_ids))) {
LOG_WARN("fail to get unit ids", K(ret));
@ -862,7 +857,8 @@ int ObServerBalancer::check_can_execute_rebalance(
share::ObUnitConfig sum_load;
for (int64_t i = 0; can_execute_rebalance && OB_SUCC(ret) && i < server_list.count(); ++i) {
const common::ObAddr &server = server_list.at(i);
share::ObServerStatus server_status;
ObServerInfoInTable server_info;
ObServerResourceInfo resource_info;
ObArray<ObUnitManager::ObUnitLoad> *unit_loads = nullptr;
sum_load.reset();
if (OB_FAIL(unit_mgr_->get_loads_by_server(server, unit_loads))) {
@ -879,18 +875,18 @@ int ObServerBalancer::check_can_execute_rebalance(
}
if (OB_FAIL(ret)) {
// failed
} else if (OB_FAIL(server_mgr_->get_server_status(server, server_status))) {
} else if (OB_FAIL(SVR_TRACER.get_server_info(server, server_info))) {
LOG_WARN("fail to get server status", K(ret));
} else if (server_status.is_temporary_offline()
|| server_status.is_stopped()
|| ObServerStatus::OB_SERVER_ADMIN_TAKENOVER_BY_RS == server_status.admin_status_) {
} else if (server_info.is_temporary_offline() || server_info.is_stopped()) {
can_execute_rebalance = false;
LOG_INFO("cannot execute server rebalance", K(server_status));
} else if (fabs(server_status.resource_info_.report_cpu_assigned_ - sum_load.min_cpu()) > CPU_EPSILON
|| fabs(server_status.resource_info_.report_cpu_max_assigned_ - sum_load.max_cpu()) > CPU_EPSILON
|| server_status.resource_info_.report_mem_assigned_ != sum_load.memory_size()) {
LOG_INFO("cannot execute server rebalance", K(server_info));
} else if (OB_FAIL(server_mgr_->get_server_resource_info(server_info.get_server(), resource_info))) {
LOG_WARN("fail to execute get_server_resource_info", KR(ret), K(server_info.get_server()));
} else if (fabs(resource_info.report_cpu_assigned_ - sum_load.min_cpu()) > CPU_EPSILON
|| fabs(resource_info.report_cpu_max_assigned_ - sum_load.max_cpu()) > CPU_EPSILON
|| resource_info.report_mem_assigned_ != sum_load.memory_size()) {
can_execute_rebalance = false;
LOG_INFO("cannot execute server rebalance", K(server_status), K(sum_load));
LOG_INFO("cannot execute server rebalance", K(resource_info), K(sum_load));
} else {} // no more to do
}
for (int64_t j = 0; can_execute_rebalance && OB_SUCC(ret) && j < unit_ids.count(); ++j) {
@ -1225,15 +1221,15 @@ int ObServerBalancer::generate_available_servers(
} else if (OB_UNLIKELY(zone.is_empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(zone));
} else if (OB_UNLIKELY(NULL == server_mgr_ || NULL == zone_mgr_ || NULL == unit_mgr_)) {
} else if (OB_ISNULL(zone_mgr_) || OB_ISNULL(unit_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_mgr_ or zone_mgr_ ptr is null", K(ret), KP(server_mgr_), KP(zone_mgr_));
LOG_WARN("zone_mgr_ or unit_mgr_ is null", K(ret), KP(unit_mgr_), KP(zone_mgr_));
} else if (OB_FAIL(zone_mgr_->get_zone(zone, zone_info))) {
LOG_WARN("fail to get zone info", K(ret), K(zone));
} else if (ObZoneStatus::ACTIVE != zone_info.status_) {
ret = OB_STATE_NOT_MATCH;
LOG_WARN("zone is not in active", K(ret), K(zone_info));
} else if (OB_FAIL(server_mgr_->get_servers_of_zone(zone, server_list))) {
} else if (OB_FAIL(SVR_TRACER.get_servers_of_zone(zone, server_list))) {
LOG_WARN("fail to get servers of zone", K(ret), K(zone));
} else if (OB_FAIL(unit_mgr_->get_tenant_unit_servers(
OB_SYS_TENANT_ID, zone, sys_unit_server_array))) {
@ -1241,19 +1237,18 @@ int ObServerBalancer::generate_available_servers(
} else {
available_servers.reset();
for (int64_t i = 0; OB_SUCC(ret) && i < server_list.count(); ++i) {
share::ObServerStatus server_status;
if (OB_FAIL(server_mgr_->get_server_status(server_list.at(i), server_status))) {
share::ObServerInfoInTable server_info;
if (OB_FAIL(SVR_TRACER.get_server_info(server_list.at(i), server_info))) {
LOG_WARN("fail to get server status", K(ret));
} else if (server_status.is_temporary_offline()
|| server_status.is_stopped()) {
} else if (server_info.is_temporary_offline() || server_info.is_stopped()) {
ret = OB_STATE_NOT_MATCH;
LOG_WARN("server in zone is not stable, stop balance servers", K(ret), K(server_status),
"is_temporary_offline", server_status.is_temporary_offline(),
"is_stopped", server_status.is_stopped());
LOG_WARN("server in zone is not stable, stop balance servers", K(ret), K(server_info),
"is_temporary_offline", server_info.is_temporary_offline(),
"is_stopped", server_info.is_stopped());
} else if (excluded_sys_unit_server
&& has_exist_in_array(sys_unit_server_array, server_list.at(i))) {
// bypass
} else if (server_status.is_active()) {
} else if (server_info.is_active()) {
if (OB_FAIL(available_servers.push_back(server_list.at(i)))) {
LOG_WARN("fail to push back", K(ret));
}
@ -2698,9 +2693,9 @@ int ObServerBalancer::calc_inter_ttg_weights(
|| NULL == info_need_amend)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), KP(info_need_amend));
} else if (OB_UNLIKELY(NULL == server_mgr_)) {
} else if (OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_mgr_ ptr is null", K(ret));
LOG_WARN("server_mgr_ is null", KR(ret), KP(server_mgr_));
} else {
LoadSum load_sum;
for (int64_t i = 0;
@ -2747,10 +2742,10 @@ int ObServerBalancer::calc_inter_ttg_weights(
ResourceSum resource_sum;
for (int64_t i = 0; OB_SUCC(ret) && i < available_servers.count(); ++i) {
const common::ObAddr &server = available_servers.at(i);
share::ObServerStatus server_status;
if (OB_FAIL(server_mgr_->get_server_status(server, server_status))) {
LOG_WARN("fail to get server status", K(ret), K(server));
} else if (OB_FAIL(resource_sum.append_resource(server_status.resource_info_))) {
share::ObServerResourceInfo resource_info;
if (OB_FAIL(server_mgr_->get_server_resource_info(server, resource_info))) {
LOG_WARN("fail to get server resource_info", KR(ret), K(server));
} else if (OB_FAIL(resource_sum.append_resource(resource_info))) {
LOG_WARN("fail to append resource", K(ret));
} else {} // no more to do
}
@ -3352,7 +3347,7 @@ int ObServerBalancer::do_migrate_unit_task(
if (!unit_migrate_stat.unit_load_.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid argument", K(ret), "unit_load", unit_migrate_stat.unit_load_);
} else if (OB_FAIL(unit_mgr_->check_can_migrate_in(
} else if (OB_FAIL(SVR_TRACER.check_server_can_migrate_in(
unit_migrate_stat.arranged_pos_, can_migrate_in))) {
LOG_WARN("fail to check can migrate in", K(ret));
} else if (!can_migrate_in) {
@ -3408,8 +3403,9 @@ int ObServerBalancer::do_migrate_unit_task(
} else if (!unit_migrate_stat->unit_load_.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid argument", K(ret), "unit_load", unit_migrate_stat->unit_load_);
} else if (OB_FAIL(unit_mgr_->check_can_migrate_in(
unit_migrate_stat->arranged_pos_, can_migrate_in))) {
} else if (OB_FAIL(SVR_TRACER.check_server_can_migrate_in(
unit_migrate_stat->arranged_pos_,
can_migrate_in))) {
LOG_WARN("fail to check can migrate in", K(ret));
} else if (!can_migrate_in) {
// bypass
@ -3574,9 +3570,9 @@ int ObServerBalancer::check_servers_resource_enough(
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", K(ret));
} else if (OB_UNLIKELY(NULL == unit_mgr_ || NULL == server_mgr_)) {
} else if (OB_ISNULL(unit_mgr_) || OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit_mgr_ or server_mgr_ or unit_stat_mgr_ is null", K(ret));
LOG_WARN("unit_mgr_ or server_mgr_ is null", KR(ret), KP(unit_mgr_), KP(server_mgr_));
} else if (OB_FAIL(unit_mgr_->get_hard_limit(hard_limit))) {
LOG_WARN("fail to hard limit", K(ret));
} else if (OB_FAIL(get_server_balance_critical_disk_waterlevel(disk_waterlevel))) {
@ -3585,15 +3581,16 @@ int ObServerBalancer::check_servers_resource_enough(
enough = true;
for (int64_t i = 0; OB_SUCC(ret) && enough && i < server_load_sums.count(); ++i) {
ObArray<ObUnitManager::ObUnitLoad> *unit_loads = NULL;
share::ObServerStatus server_status;
share::ObServerResourceInfo server_resource_info;
const common::ObAddr &server = server_load_sums.at(i).server_;
LoadSum load_sum = server_load_sums.at(i).load_sum_;
int64_t disk_in_use = server_load_sums.at(i).disk_in_use_;
ServerDiskStatistic disk_statistic;
if (OB_FAIL(zone_disk_statistic_.get_server_disk_statistic(server, disk_statistic))) {
LOG_WARN("fail to get disk statistic", K(ret), K(server));
} else if (OB_FAIL(server_mgr_->get_server_status(server, server_status))) {
LOG_WARN("fail to get server status", K(ret));
} else if (OB_FAIL(server_mgr_->get_server_resource_info(server, server_resource_info))) {
// **TODO (linqiucen.lqc): temp.solution
LOG_WARN("fail to get server resource info", KR(ret), K(server));
} else if (OB_FAIL(unit_mgr_->get_loads_by_server(server, unit_loads))) {
if (OB_ENTRY_NOT_EXIST != ret) {
LOG_WARN("fail to get loads by server", K(ret));
@ -3617,13 +3614,13 @@ int ObServerBalancer::check_servers_resource_enough(
}
if (OB_SUCC(ret)) {
if (load_sum.load_sum_.max_cpu()
> server_status.resource_info_.cpu_ * hard_limit
> server_resource_info.cpu_ * hard_limit
|| load_sum.load_sum_.min_cpu()
> server_status.resource_info_.cpu_
> server_resource_info.cpu_
|| static_cast<double>(load_sum.load_sum_.memory_size())
> static_cast<double>(server_status.resource_info_.mem_total_)
> static_cast<double>(server_resource_info.mem_total_)
|| static_cast<double>(load_sum.load_sum_.log_disk_size())
> static_cast<double>(server_status.resource_info_.log_disk_total_)
> static_cast<double>(server_resource_info.log_disk_total_)
|| static_cast<double>(disk_in_use + disk_statistic.disk_in_use_)
> static_cast<double>(disk_statistic.disk_total_) * disk_waterlevel) {
enough = false;
@ -4433,25 +4430,22 @@ int ObServerBalancer::generate_complete_server_loads(
|| zone.is_empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), KP(resource_weights), K(weights_count));
} else if (OB_UNLIKELY(NULL == unit_mgr_ || NULL == server_mgr_)) {
} else if (OB_ISNULL(unit_mgr_) || OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit_mgr_ or server_mgr_ ptr is null", K(ret), KP(unit_mgr_), KP(server_mgr_));
} else if (OB_FAIL(server_mgr_->get_servers_of_zone(zone, zone_servers))) {
LOG_WARN("unit_mgr_ or server_mgr_ is null", K(ret), KP(unit_mgr_), KP(server_mgr_));
} else if (OB_FAIL(SVR_TRACER.get_servers_of_zone(zone, zone_servers))) {
LOG_WARN("fail to get servers of zone", K(ret), K(zone));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < zone_servers.count(); ++i) {
const common::ObAddr &server = zone_servers.at(i);
ServerTotalLoad server_load;
server_load.server_ = server;
share::ObServerStatus server_status;
share::ObServerResourceInfo server_resource_info;
ObArray<ObUnitManager::ObUnitLoad> *unit_loads = NULL;
LoadSum load_sum;
ResourceSum resource_sum;
server_load.wild_server_ = !has_exist_in_array(available_servers, server);
if (OB_FAIL(server_mgr_->get_server_status(server, server_status))) {
if (OB_FAIL(server_mgr_->get_server_resource_info(server, server_resource_info))) {
LOG_WARN("fail to get server status", K(ret), K(server));
} else if (OB_FAIL(resource_sum.append_resource(server_status.resource_info_))) {
LOG_WARN("fail to append resource", K(ret));
} else if (OB_FAIL(unit_mgr_->get_loads_by_server(server, unit_loads))) {
if (OB_ENTRY_NOT_EXIST != ret) {
LOG_WARN("get loads by server failed", K(ret), K(server));
@ -4469,7 +4463,7 @@ int ObServerBalancer::generate_complete_server_loads(
server_load.resource_weights_[i] = resource_weights[i];
}
server_load.load_sum_ = load_sum;
server_load.resource_info_ = server_status.resource_info_;
server_load.resource_info_ = server_resource_info;
if (OB_FAIL(server_load.update_load_value())) {
LOG_WARN("fail to update load value", K(ret));
} else if (OB_FAIL(server_loads.push_back(server_load))) {
@ -5585,10 +5579,10 @@ int ObServerBalancer::calc_global_balance_resource_weights(
|| RES_MAX != weights_count)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(zone));
} else if (OB_UNLIKELY(NULL == unit_mgr_ || NULL == server_mgr_)) {
} else if (OB_ISNULL(unit_mgr_) || OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit_mgr_ or server_mgr_ is null", K(ret));
} else if (OB_FAIL(server_mgr_->get_servers_of_zone(zone, zone_servers))) {
LOG_WARN("unit_mgr_ or server_mgr_ is null", KR(ret), KP(unit_mgr_), KP(server_mgr_));
} else if (OB_FAIL(SVR_TRACER.get_servers_of_zone(zone, zone_servers))) {
LOG_WARN("fail to get zone servers", K(ret), K(zone));
} else {
LoadSum load_sum;
@ -5610,10 +5604,10 @@ int ObServerBalancer::calc_global_balance_resource_weights(
ResourceSum resource_sum;
for (int64_t i = 0; OB_SUCC(ret) && i < available_servers.count(); ++i) {
const common::ObAddr &server = available_servers.at(i);
share::ObServerStatus server_status;
if (OB_FAIL(server_mgr_->get_server_status(server, server_status))) {
LOG_WARN("fail to get server status", K(ret), K(server));
} else if (OB_FAIL(resource_sum.append_resource(server_status.resource_info_))) {
share::ObServerResourceInfo resource_info;
if (OB_FAIL(server_mgr_->get_server_resource_info(server, resource_info))) {
LOG_WARN("fail to get resource_info", KR(ret), K(server));
} else if (OB_FAIL(resource_sum.append_resource(resource_info))) {
LOG_WARN("fail to append resource", K(ret));
} else {} // no more to do
}
@ -6262,30 +6256,31 @@ int ObServerBalancer::generate_server_load(
if (OB_UNLIKELY(!inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", K(ret));
} else if (OB_UNLIKELY(NULL == server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_mgr_ ptr is null", K(ret), KP(server_mgr_));
} else if (available_servers.count() <= 0) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(available_servers));
} else if (OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_mgr_ is null", KR(ret), KP(server_mgr_));
} else {
// Place the generated unitgroup load into the corresponding server load
server_loads.reset();
ServerLoad server_load;
share::ObServerStatus server_status;
share::ObServerResourceInfo resource_info;
share::ObServerResourceInfo intra_ttg_resource_info;
// Pre-fill the server first, and fill in the server resource info
for (int64_t i = 0; OB_SUCC(ret) && i < available_servers.count(); ++i) {
server_load.reset();
server_status.reset();
resource_info.reset();
server_load.server_ = available_servers.at(i);
if (OB_FAIL(server_mgr_->get_server_status(server_load.server_, server_status))) {
LOG_WARN("fail to get server status", K(ret));
if (OB_FAIL(server_mgr_->get_server_resource_info(server_load.server_, resource_info))) {
LOG_WARN("fail to get server status", KR(ret), K(server_load.server_));
} else if (OB_FAIL(try_regulate_intra_ttg_resource_info(
server_status.resource_info_, intra_ttg_resource_info))) {
resource_info,
intra_ttg_resource_info))) {
LOG_WARN("fail to try regulate intra resource info", K(ret));
} else {
server_load.resource_info_ = server_status.resource_info_;
server_load.resource_info_ = resource_info;
if (OB_FAIL(server_loads.push_back(server_load))) {
LOG_WARN("fail to push back", K(ret));
} else {} // no more to do
@ -7866,11 +7861,11 @@ int ObServerBalancer::generate_zone_server_disk_statistic(
} else if (OB_UNLIKELY(zone.is_empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ret), K(zone));
} else if (OB_UNLIKELY(NULL == server_mgr_)) {
} else if (OB_ISNULL(server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_mgr_ ptr is null", K(ret));
} else if (OB_FAIL(server_mgr_->get_servers_of_zone(zone, server_list))) {
LOG_WARN("fail to get servers of zone", K(ret));
LOG_WARN("server_mgr_ ptr is null", KR(ret), KP(server_mgr_));
} else if (OB_FAIL(SVR_TRACER.get_servers_of_zone(zone, server_list))) {
LOG_WARN("fail to get servers of zone", KR(ret), K(zone));
} else if (OB_FAIL(get_server_balance_critical_disk_waterlevel(disk_waterlevel))) {
LOG_WARN("fail to get server balance disk water level", K(ret));
} else {
@ -7878,36 +7873,36 @@ int ObServerBalancer::generate_zone_server_disk_statistic(
zone_disk_statistic_.zone_ = zone;
for (int64_t i = 0; OB_SUCC(ret) && i < server_list.count(); ++i) {
const common::ObAddr &server = server_list.at(i);
share::ObServerStatus server_status;
share::ObServerResourceInfo server_resource_info;
share::ObServerInfoInTable server_info;
ServerDiskStatistic disk_statistic;
if (OB_FAIL(server_mgr_->get_server_status(server, server_status))) {
LOG_WARN("fail to get server status", K(ret));
} else if (server_status.is_temporary_offline()
|| server_status.is_stopped()) {
if (OB_FAIL(SVR_TRACER.get_server_info(server, server_info))) {
LOG_WARN("fail to get server info", KR(ret), K(server));
} else if (server_info.is_temporary_offline() || server_info.is_stopped()) {
ret = OB_STATE_NOT_MATCH;
LOG_WARN("server is not stable, stop balance servers in this zone",
K(ret), K(server), K(zone),
"is_temporary_offline", server_status.is_temporary_offline(),
"is_stopped", server_status.is_stopped());
} else if (server_status.is_active()) {
KR(ret), K(server), K(zone),
"is_temporary_offline", server_info.is_temporary_offline(),
"is_stopped", server_info.is_stopped());
} else if (OB_FAIL(server_mgr_->get_server_resource_info(server, server_resource_info))) {
LOG_WARN("fail to get server resource info", KR(ret), K(server));
} else if (server_info.is_active()) {
disk_statistic.server_ = server;
disk_statistic.wild_server_ = false;
disk_statistic.disk_in_use_ = server_status.resource_info_.disk_in_use_;
disk_statistic.disk_total_ = server_status.resource_info_.disk_total_;
disk_statistic.disk_in_use_ = server_resource_info.disk_in_use_;
disk_statistic.disk_total_ = server_resource_info.disk_total_;
if (static_cast<double>(disk_statistic.disk_in_use_)
> disk_waterlevel * static_cast<double>(disk_statistic.disk_total_)) {
zone_disk_statistic_.over_disk_waterlevel_ = true;
}
} else if (ObServerStatus::OB_SERVER_ADMIN_DELETING == server_status.admin_status_
|| ObServerStatus::OB_SERVER_ADMIN_TAKENOVER_BY_RS == server_status.admin_status_
|| server_status.is_permanent_offline()) {
} else if (server_info.is_deleting() || server_info.is_permanent_offline()) {
disk_statistic.server_ = server;
disk_statistic.wild_server_ = true;
disk_statistic.disk_in_use_ = server_status.resource_info_.disk_in_use_;
disk_statistic.disk_total_ = server_status.resource_info_.disk_total_;
disk_statistic.disk_in_use_ = server_resource_info.disk_in_use_;
disk_statistic.disk_total_ = server_resource_info.disk_total_;
} else {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unknow server status", K(ret), K(server_status));
LOG_WARN("unknow server_info", K(ret), K(server_info));
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(zone_disk_statistic_.append(disk_statistic))) {

View File

@ -268,6 +268,7 @@ public:
ObZoneManager &zone_mgr,
ObServerManager &server_mgr,
ObUnitStatManager &unit_stat_mgr);
int build_active_servers_resource_info();
// 1. migrate units to balance the load
// 2. migrate units from offline servers
int balance_servers();
@ -285,19 +286,9 @@ public:
common::ObIArray<ObTenantGroupParser::TenantNameGroup> &tenant_groups,
bool &legal);
private:
int check_is_ofs_zone_zombie_unit(
const share::ObUnitInfo &unit_info,
bool &is_ofs_zone_zombie_unit);
bool check_inner_stat() const { return inited_; }
// distribute for server online/permanent_offline/migrate_in_blocked
int distribute_for_server_status_change();
int check_if_ofs_rs_without_sys_unit(
const share::ObServerStatus &status,
const share::ObUnitInfo &unit_info,
bool &ofs_rs_without_sys_unit);
int distribute_for_ofs_sys_unit(
const share::ObServerStatus &status,
const share::ObUnitInfo &unit_info);
int check_has_unit_in_migration(
const common::ObIArray<ObUnitManager::ObUnitLoad> *unit_load_array,
bool &has_unit_in_migration);
@ -307,10 +298,11 @@ private:
int distribute_by_pool(share::ObResourcePool *pool);
int distribute_for_migrate_in_blocked(const share::ObUnitInfo &unit_info);
int distribute_zone_unit(const ObUnitManager::ZoneUnit &zone_unit);
int distribute_for_active(const share::ObServerStatus &status,
const share::ObUnitInfo &unit_info);
int distribute_for_active(
const share::ObServerInfoInTable &server_info,
const share::ObUnitInfo &unit_info);
int distribute_for_permanent_offline_or_delete(
const share::ObServerStatus &status,
const share::ObServerInfoInTable &server_info,
const share::ObUnitInfo &unit_info);
int distribute_for_standalone_sys_unit();
@ -330,6 +322,10 @@ private:
const common::ObAddr &dst);
int try_cancel_migrate_unit(const share::ObUnit &unit, bool &is_canceled);
int get_active_servers_info_and_resource_info_of_zone(
const ObZone &zone,
ObIArray<share::ObServerInfoInTable> &servers_info,
ObIArray<obrpc::ObGetServerResourceInfoResult> &server_resources_info);
// the new version server balance
private:
@ -1421,7 +1417,7 @@ protected:
ObUnitStatManager *unit_stat_mgr_;
CountBalanceStrategy count_balance_strategy_;
InnerTenantGroupBalanceStrategy &inner_ttg_balance_strategy_;
// Each time the unit balance between servers is executed,
// Each time the unit balance between servers is executed,
// the disk information of each server in the zone is calculated
ZoneServerDiskStatistic zone_disk_statistic_;

File diff suppressed because it is too large Load Diff

View File

@ -82,9 +82,6 @@ public:
const bool commit = true);
virtual int start_server_list(const obrpc::ObServerList &server_list, const common::ObZone &zone);
virtual int stop_server_list(const obrpc::ObServerList &server_list, const common::ObZone &zone);
// only add to memory, only used by bootstrap
int add_server_list(const obrpc::ObServerInfoList &server_list,
uint64_t &server_id);
// server_id is OB_INVALID_ID before build server manager from __all_server
int receive_hb(const share::ObLeaseRequest &lease_request,
@ -94,10 +91,6 @@ public:
const common::ObAddr &server,
uint64_t &server_id) const;
int expend_server_lease(
const common::ObAddr &server,
const int64_t new_lease_end);
// if server not exist or server's status is not serving, return false
// otherwise, return true
virtual int check_server_alive(const common::ObAddr &server, bool &is_alive) const;
@ -106,27 +99,6 @@ public:
virtual int check_server_stopped(const common::ObAddr &server, bool &is_stopped) const;
virtual int check_server_permanent_offline(const common::ObAddr &server, bool &is_offline) const;
virtual int check_migrate_in_blocked(const common::ObAddr &addr, bool &blocked) const;
virtual int check_server_takenover_by_rs(const common::ObAddr &addr, bool &taken_over_by_rs) const;
virtual int check_server_valid_for_partition(const common::ObAddr &server, bool &is_valid) const;
virtual int check_server_with_id_exist(
const common::ObAddr &server,
const uint64_t server_id,
bool &exist) const;
virtual int get_alive_servers(const common::ObZone &zone, ObIServerArray &server_list) const;
virtual int get_servers_by_status(ObIServerArray &active_server_list,
ObIServerArray &inactive_server_list) const;
virtual int get_servers_by_status(const common::ObZone &zone,
ObIServerArray &active_server_list,
ObIServerArray &inactive_server_list) const;
virtual int get_alive_server_count(const common::ObZone &zone, int64_t &count) const;
virtual int get_zone_max_unit_num(const common::ObZone &zone, int64_t &count) const;
virtual int get_active_server_array(const common::ObZone &zone, ObIServerArray &server_list) const;
virtual int get_servers_takenover_by_rs(
const common::ObZone &zone,
ObIServerArray &server_list) const;
virtual int finish_server_recovery(const common::ObAddr &server);
void clear_in_recovery_server_takenover_by_rs(const common::ObAddr &server);
virtual int get_servers_of_zone(
const common::ObZone &zone,
ObServerArray &server_list) const;
@ -143,16 +115,20 @@ public:
// get ObServerStatus through server addr, return OB_ENTRY_NOT_EXIST if not exist
virtual int get_server_status(const common::ObAddr &server,
share::ObServerStatus &server_status) const;
int get_server_resource_info(
const common::ObAddr &server,
share::ObServerResourceInfo &resource_info);
int update_server_status(const share::ObServerStatus &server_status);
// build ObServerManager from __all_server table
int load_server_manager();
int load_server_statuses(const ObServerStatusArray &server_status);
virtual bool has_build() const;
virtual int get_all_server_list(common::ObIArray<common::ObAddr> &server_list);
// get server infos of zone, if zone is empty, get all server_infos
virtual int get_server_statuses(const common::ObZone &zone,
ObServerStatusIArray &server_statuses,
bool include_permanent_offline = true) const;
virtual int build_server_resource_info_result(
const common::ObZone &zone,
ObIArray<obrpc::ObGetServerResourceInfoResult> &active_servers_resource_info);
virtual int get_server_statuses(const ObServerArray &servers,
ObServerStatusArray &server_statuses) const;
int get_persist_server_statuses(ObServerStatusArray &server_statuses);
@ -160,36 +136,18 @@ public:
const ObAddr &server,
ObDRTaskMgr &disaster_recovery_task_mgr,
const bool with_rootserver);
int get_lease_duration(int64_t &lease_duration_time) const;
virtual int get_server_zone(const common::ObAddr &addr, common::ObZone &zone) const;
inline ObIStatusChangeCallback &get_status_change_callback() const;
inline const common::ObAddr &get_rs_addr() const { return rs_addr_; }
void reset();
// set %zone_merged to true if servers in the same zone of %addr merged to %frozen_version
virtual int update_merged_version(
const common::ObAddr &addr, int64_t frozen_version, bool &zone_merged);
int get_merged_version(const common::ObAddr &addr, int64_t &merged_version) const;
int block_migrate_in(const common::ObAddr &addr);
int unblock_migrate_in(const common::ObAddr &addr);
int64_t to_string(char *buf, const int64_t buf_len) const;
virtual int set_with_partition(const common::ObAddr &server);
virtual int clear_with_partiton(const common::ObAddr &server, const int64_t last_hb_time);
virtual int set_force_stop_hb(const common::ObAddr &server, const bool &force_stop_hb);
virtual int is_server_stopped(const common::ObAddr &server, bool &is_stopped) const;
virtual int get_server_leader_cnt(const common::ObAddr &server, int64_t &leader_cnt) const;
int check_other_zone_stopped(const common::ObZone &zone, bool &stopped);
int have_server_stopped(const common::ObZone &zone, bool &is_stopped) const;
int get_min_server_version(char min_server_version[OB_SERVER_VERSION_LENGTH]);
bool have_server_deleting() const;
int check_all_server_active(bool &all_active) const;
int try_modify_recovery_server_takenover_by_rs(
const common::ObAddr &server,
const common::ObZone &zone);
int get_server_id(const ObZone &zone, const common::ObAddr &server, uint64_t &server_id) const;
static int try_delete_server_working_dir(
const common::ObZone &zone,
const common::ObAddr &server,
const int64_t svr_seq);
protected:
int construct_not_empty_server_set(
common::hash::ObHashSet<common::ObAddr> &not_empty_server_set);
@ -201,21 +159,13 @@ protected:
const bool with_rootserver,
share::ObServerStatus &server_status);
int reset_existing_rootserver();
int try_delete_server_working_dir(
const common::ObZone &zone,
const common::ObAddr &server,
const int64_t svr_seq);
int update_admin_status(const common::ObAddr &server,
const share::ObServerStatus::ServerAdminStatus status,
const bool remove);
int set_migrate_in_blocked(const common::ObAddr &addr, const bool block);
int find(const common::ObAddr &server, const share::ObServerStatus *&status) const;
int find(const common::ObAddr &server, share::ObServerStatus *&status);
int fetch_new_server_id(uint64_t &server_id);
int check_server_id_used(const uint64_t server_id, bool &server_id_used);
int start_or_stop_server(const common::ObAddr &server,
const common::ObZone &zone, const bool is_start);
virtual int start_server(const common::ObAddr &server, const common::ObZone &zone);

View File

@ -0,0 +1,759 @@
/**
* Copyright (c) 2022 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX RS
#include "ob_server_zone_op_service.h"
#include "share/ob_zone_table_operation.h"
#include "share/ob_service_epoch_proxy.h"
#include "share/ob_max_id_fetcher.h"
#include "lib/mysqlclient/ob_mysql_transaction.h" // ObMySQLTransaction
#include "rootserver/ob_root_service.h" // callback
#include "share/ob_all_server_tracer.h"
#include "rootserver/ob_server_manager.h"
namespace oceanbase
{
using namespace common;
using namespace share;
using namespace obrpc;
namespace rootserver
{
ObServerZoneOpService::ObServerZoneOpService()
: is_inited_(false),
server_change_callback_(NULL),
rpc_proxy_(NULL),
sql_proxy_(NULL),
lst_operator_(NULL),
unit_manager_(NULL)
{
}
ObServerZoneOpService::~ObServerZoneOpService()
{
}
int ObServerZoneOpService::init(
ObIServerChangeCallback &server_change_callback,
ObSrvRpcProxy &rpc_proxy,
ObLSTableOperator &lst_operator,
ObUnitManager &unit_manager,
ObMySQLProxy &sql_proxy
)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(is_inited_)) {
ret = OB_INIT_TWICE;
LOG_WARN("server zone operation service has been inited already", KR(ret), K(is_inited_));
} else if (OB_FAIL(st_operator_.init(&sql_proxy))) {
LOG_WARN("fail to init server table operator", KR(ret));
} else {
server_change_callback_ = &server_change_callback;
rpc_proxy_ = &rpc_proxy;
sql_proxy_ = &sql_proxy;
lst_operator_ = &lst_operator;
unit_manager_ = &unit_manager;
is_inited_ = true;
}
return ret;
}
int ObServerZoneOpService::add_servers(const ObIArray<ObAddr> &servers, const ObZone &zone, bool is_bootstrap)
{
int ret = OB_SUCCESS;
uint64_t sys_tenant_data_version = 0;
ObCheckServerForAddingServerArg rpc_arg;
ObCheckServerForAddingServerResult rpc_result;
ObZone picked_zone;
ObTimeoutCtx ctx;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_FAIL(GET_MIN_DATA_VERSION(OB_SYS_TENANT_ID, sys_tenant_data_version))) {
LOG_WARN("fail to get sys tenant's min data version", KR(ret));
} else if (OB_ISNULL(rpc_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("rpc_proxy_ is null", KR(ret), KP(rpc_proxy_));
} else if (OB_FAIL(rootserver::ObRootUtils::get_rs_default_timeout_ctx(ctx))) {
LOG_WARN("fail to get timeout ctx", KR(ret), K(ctx));
} else if (OB_FAIL(rpc_arg.init(
ObCheckServerForAddingServerArg::ADD_SERVER,
sys_tenant_data_version))) {
LOG_WARN("fail to init rpc arg", KR(ret), K(sys_tenant_data_version));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) {
const ObAddr &addr = servers.at(i);
int64_t timeout = ctx.get_timeout();
if (OB_UNLIKELY(timeout <= 0)) {
ret = OB_TIMEOUT;
LOG_WARN("ctx time out", KR(ret), K(timeout));
} else if (OB_FAIL(rpc_proxy_->to(addr)
.timeout(timeout)
.check_server_for_adding_server(rpc_arg, rpc_result))) {
LOG_WARN("fail to check whether the server is empty", KR(ret), K(addr));
} else if (!rpc_result.get_is_server_empty()) {
ret = OB_OP_NOT_ALLOW;
LOG_WARN("adding non-empty server is not allowed", KR(ret));
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "add non-empty server");
} else if (OB_FAIL(zone_checking_for_adding_server_(zone, rpc_result.get_zone(), picked_zone))) {
LOG_WARN("zone checking for adding server is failed", KR(ret), K(zone), K(rpc_result.get_zone()));
} else if (OB_FAIL(add_server_(
addr,
picked_zone,
rpc_result.get_sql_port(),
rpc_result.get_build_version()))) {
LOG_WARN("add_server failed", "server", addr, "zone", picked_zone, "sql_port",
rpc_result.get_sql_port(), "build_version", rpc_result.get_build_version(), KR(ret));
} else {}
}
}
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(SVR_TRACER.refresh())) {
LOG_WARN("fail to refresh server tracer", KR(ret), KR(tmp_ret));
}
return ret;
}
int ObServerZoneOpService::delete_servers(
const ObIArray<ObAddr> &servers,
const ObZone &zone)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(GCTX.root_service_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("root_service_ is null", KR(ret), KP(GCTX.root_service_));
} else if (OB_UNLIKELY(servers.count() <= 0)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(servers));
} else if (OB_FAIL(check_server_have_enough_resource_for_delete_server_(servers, zone))) {
LOG_WARN("not enough resource, cannot delete servers", KR(ret), K(servers), K(zone));
} else if (OB_FAIL(GCTX.root_service_->check_all_ls_has_leader("delete server"))) {
LOG_WARN("fail to check all ls has leader", KR(ret));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) {
if (OB_FAIL(delete_server_(servers.at(i), zone))) {
LOG_WARN("delete_server failed", "server", servers.at(i), "zone", zone, KR(ret));
}
}
}
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(SVR_TRACER.refresh())) {
LOG_WARN("fail to refresh server tracer", KR(ret), KR(tmp_ret));
}
return ret;
}
int ObServerZoneOpService::cancel_delete_servers(
const ObIArray<ObAddr> &servers,
const ObZone &zone)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(unit_manager_) || OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit_manager_ or sql_proxy_ or server_change_callback_ is null", KR(ret),
KP(unit_manager_), KP(sql_proxy_));
} else {
ObServerInfoInTable server_info_in_table;
for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) {
const ObAddr &server = servers.at(i);
const int64_t now = ObTimeUtility::current_time();
ObMySQLTransaction trans;
server_info_in_table.reset();
if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) {
LOG_WARN("fail to start trans", KR(ret));
} else if (OB_FAIL(check_and_end_delete_server_(trans, server, zone, true /* is_cancel */, server_info_in_table))) {
LOG_WARN("fail to check and end delete server", KR(ret), K(server), K(zone));
} else if (OB_FAIL(ObServerTableOperator::update_status(
trans,
server,
ObServerStatus::OB_SERVER_DELETING,
server_info_in_table.is_alive() ? ObServerStatus::OB_SERVER_ACTIVE : ObServerStatus::OB_SERVER_INACTIVE))) {
LOG_WARN("fail to update status in __all_server table", KR(ret),
K(server), K(server_info_in_table));
} else if (OB_FAIL(unit_manager_->cancel_migrate_out_units(server))) {
LOG_WARN("unit_manager_ cancel_migrate_out_units failed", KR(ret), K(server));
}
(void) end_trans_and_on_server_change_(ret, trans, "cancel_delete_server", server, server_info_in_table.get_zone(), now);
}
}
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(SVR_TRACER.refresh())) {
LOG_WARN("fail to refresh server tracer", KR(ret), KR(tmp_ret));
}
return ret;
}
int ObServerZoneOpService::finish_delete_server(
const ObAddr &server,
const ObZone &zone)
{
int ret = OB_SUCCESS;
ObServerInfoInTable server_info_in_table;
const int64_t now = ObTimeUtility::current_time();
ObMySQLTransaction trans;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql_proxy_ is null", KR(ret), KP(sql_proxy_));
} else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) {
LOG_WARN("fail to start trans", KR(ret));
} else if (OB_FAIL(check_and_end_delete_server_(trans, server, zone, false /* is_cancel */, server_info_in_table))) {
LOG_WARN("fail to check and end delete server", KR(ret), K(server), K(zone));
} else if (OB_FAIL(ObServerManager::try_delete_server_working_dir(
server_info_in_table.get_zone(),
server,
server_info_in_table.get_server_id()))) {
LOG_WARN("fail to delete server working dir", KR(ret), K(server_info_in_table));
} else if (OB_FAIL(st_operator_.remove(server, trans))) {
LOG_WARN("fail to remove this server from __all_server table", KR(ret), K(server));
}
(void) end_trans_and_on_server_change_(ret, trans, "finish_delete_server", server, server_info_in_table.get_zone(), now);
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(SVR_TRACER.refresh())) {
LOG_WARN("fail to refresh server tracer", KR(ret), KR(tmp_ret));
}
return ret;
}
int ObServerZoneOpService::stop_servers(
const ObIArray<ObAddr> &servers,
const ObZone &zone,
const obrpc::ObAdminServerArg::AdminServerOp &op)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_FAIL(stop_server_precheck(servers, op))) {
LOG_WARN("fail to precheck stop server", KR(ret), K(servers), K(zone));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); i++) {
const ObAddr &server = servers.at(i);
if (OB_FAIL(start_or_stop_server_(server, zone, op))) {
LOG_WARN("fail to stop server", KR(ret), K(server), K(zone));
}
}
}
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(SVR_TRACER.refresh())) {
LOG_WARN("fail to refresh server tracer", KR(ret), KR(tmp_ret));
}
return ret;
}
int ObServerZoneOpService::start_servers(
const ObIArray<ObAddr> &servers,
const ObZone &zone)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(servers.count() <= 0)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("servers' count is zero", KR(ret), K(servers));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < servers.count(); ++i) {
const ObAddr &server = servers.at(i);
if (OB_FAIL(start_or_stop_server_(server, zone, ObAdminServerArg::START))) {
LOG_WARN("fail to start server", KR(ret), K(server), K(zone));
}
}
}
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(SVR_TRACER.refresh())) {
LOG_WARN("fail to refresh server tracer", KR(ret), KR(tmp_ret));
}
return ret;
}
int ObServerZoneOpService::stop_server_precheck(
const ObIArray<ObAddr> &servers,
const obrpc::ObAdminServerArg::AdminServerOp &op)
{
int ret = OB_SUCCESS;
ObZone zone;
bool is_same_zone = false;
bool is_all_stopped = false;
ObArray<ObServerInfoInTable> all_servers_info_in_table;
ObServerInfoInTable server_info;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(servers.count() <= 0)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("servers' count is zero", KR(ret), K(servers));
} else if (OB_ISNULL(GCTX.root_service_) || OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("GCTX.root_service_ or sql_proxy_ is null", KR(ret), KP(GCTX.root_service_), KP(sql_proxy_));
} else if (OB_FAIL(ObServerTableOperator::get(*sql_proxy_, all_servers_info_in_table))) {
LOG_WARN("fail to read __all_server table", KR(ret), KP(sql_proxy_));
} else if (OB_FAIL(check_zone_and_server_(
all_servers_info_in_table,
servers,
is_same_zone,
is_all_stopped))) {
LOG_WARN("fail to check zone and server", KR(ret), K(all_servers_info_in_table), K(servers));
} else if (is_all_stopped) {
//nothing todo
} else if (!is_same_zone) {
ret = OB_STOP_SERVER_IN_MULTIPLE_ZONES;
LOG_WARN("can not stop servers in multiple zones", KR(ret), K(server_info), K(servers));
} else if (OB_FAIL((ObRootUtils::find_server_info(all_servers_info_in_table, servers.at(0), server_info)))) {
LOG_WARN("fail to find server info", KR(ret), K(all_servers_info_in_table), K(servers.at(0)));
} else {
const ObZone &zone = server_info.get_zone();
if (ObAdminServerArg::ISOLATE == op) {
//"Isolate server" does not need to check the total number and status of replicas; it cannot be restarted later;
if (OB_FAIL(GCTX.root_service_->check_can_stop(zone, servers, false /*is_stop_zone*/))) {
LOG_WARN("fail to check can stop", KR(ret), K(zone), K(servers), K(op));
if (OB_OP_NOT_ALLOW == ret) {
LOG_USER_ERROR(OB_OP_NOT_ALLOW, "Stop all servers in primary region is");
}
}
} else {
if (ObRootUtils::have_other_stop_task(zone)) {
ret = OB_STOP_SERVER_IN_MULTIPLE_ZONES;
LOG_WARN("can not stop servers in multiple zones", KR(ret), K(zone), K(servers), K(op));
LOG_USER_ERROR(OB_STOP_SERVER_IN_MULTIPLE_ZONES,
"cannot stop server or stop zone in multiple zones");
} else if (OB_FAIL(GCTX.root_service_->check_majority_and_log_in_sync(
servers,
ObAdminServerArg::FORCE_STOP == op,/*skip_log_sync_check*/
"stop server"))) {
LOG_WARN("fail to check majority and log in-sync", KR(ret), K(zone), K(servers), K(op));
}
}
}
return ret;
}
int ObServerZoneOpService::zone_checking_for_adding_server_(
const ObZone &command_zone,
const ObZone &rpc_zone,
ObZone &picked_zone)
{
int ret = OB_SUCCESS;
// command_zone: the zone specified in the system command ADD SERVER
// rpc_zone: the zone specified in the server's local config and send to rs via rpc
// picked_zone: the zone we will use in add_server
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(rpc_zone.is_empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("rpc_zone cannot be empty. It implies that server's local config zone is empty.",
KR(ret), K(rpc_zone));
} else if (!command_zone.is_empty() && command_zone != rpc_zone) {
ret = OB_SERVER_ZONE_NOT_MATCH;
LOG_WARN("the zone specified in the server's local config is not the same as"
" the zone specified in the command", KR(ret), K(command_zone), K(rpc_zone));
} else if (OB_FAIL(picked_zone.assign(rpc_zone))) {
LOG_WARN("fail to assign picked_zone", KR(ret), K(rpc_zone));
} else {}
return ret;
}
int ObServerZoneOpService::add_server_(
const ObAddr &server,
const ObZone &zone,
const int64_t sql_port,
const ObServerInfoInTable::ObBuildVersion &build_version)
{
int ret = OB_SUCCESS;
bool is_active = false;
uint64_t server_id = OB_INVALID_ID;
const int64_t now = ObTimeUtility::current_time();
ObServerInfoInTable server_info_in_table;
ObMySQLTransaction trans;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(!server.is_valid()
|| zone.is_empty()
|| sql_port <= 0
|| build_version.is_empty())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(server), K(zone),
K(sql_port), K(build_version));
} else if (OB_ISNULL(sql_proxy_) || OB_ISNULL(server_change_callback_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql_proxy_ or server_change_callback_ is null", KR(ret),
KP(sql_proxy_), KP(server_change_callback_));
} else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) {
LOG_WARN("fail to start trans", KR(ret));
} else if (OB_FAIL(check_and_update_service_epoch_(trans))) {
LOG_WARN("fail to check and update service epoch", KR(ret));
} else if (OB_FAIL(ObZoneTableOperation::check_zone_active(trans, zone, is_active))){
// we do not need to lock the zone info in __all_zone table
// all server/zone operations are mutually exclusive since we locked the service epoch
LOG_WARN("fail to check whether the zone is active", KR(ret), K(zone));
} else if (OB_UNLIKELY(!is_active)) {
ret = OB_ZONE_NOT_ACTIVE;
LOG_WARN("the zone is not active", KR(ret), K(zone), K(is_active));
} else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info_in_table))) {
if (OB_SERVER_NOT_IN_WHITE_LIST == ret) {
ret = OB_SUCCESS;
} else {
LOG_WARN("fail to get server_info in table", KR(ret), K(server));
}
} else {
ret = OB_ENTRY_EXIST;
LOG_WARN("server exists", KR(ret), K(server_info_in_table));
}
if (FAILEDx(fetch_new_server_id_(server_id))) {
// fetch a new server id and insert the server into __all_server table
LOG_WARN("fail to fetch new server id", KR(ret));
} else if (OB_UNLIKELY(OB_INVALID_ID == server_id)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("server id is invalid", KR(ret), K(server_id));
} else if (OB_FAIL(server_info_in_table.init(
server,
server_id,
zone,
sql_port,
false, /* with_rootserver */
ObServerStatus::OB_SERVER_ACTIVE,
build_version,
0, /* stop_time */
0, /* start_service_time */
0 /* last_offline_time */))) {
LOG_WARN("fail to init server info in table", KR(ret), K(server), K(server_id), K(zone),
K(sql_port), K(build_version), K(now));
} else if (OB_FAIL(ObServerTableOperator::insert(trans, server_info_in_table))) {
LOG_WARN("fail to insert server info into __all_server table", KR(ret), K(server_info_in_table));
}
(void) end_trans_and_on_server_change_(ret, trans, "add_server", server, zone, now);
return ret;
}
int ObServerZoneOpService::delete_server_(
const common::ObAddr &server,
const ObZone &zone)
{
int ret = OB_SUCCESS;
ObServerInfoInTable server_info_in_table;
const int64_t now = ObTimeUtility::current_time();
char ip[OB_MAX_SERVER_ADDR_SIZE] = "";
ObMySQLTransaction trans;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(!server.is_valid() || !server.ip_to_string(ip, sizeof(ip)))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(server));
} else if (OB_ISNULL(sql_proxy_) || OB_ISNULL(server_change_callback_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql_proxy_ or server_change_callback_ is null", KR(ret),
KP(sql_proxy_), KP(server_change_callback_));
} else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) {
LOG_WARN("fail to start trans", KR(ret));
} else if (OB_FAIL(check_and_update_service_epoch_(trans))) {
LOG_WARN("fail to check and update service epoch", KR(ret));
} else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info_in_table))) {
LOG_WARN("fail to get server_info in table", KR(ret), K(server));
} else if (!zone.is_empty() && zone != server_info_in_table.get_zone()) {
ret = OB_SERVER_ZONE_NOT_MATCH;
LOG_WARN("zone not matches", KR(ret), K(server), K(zone), K(server_info_in_table));
} else if (OB_UNLIKELY(server_info_in_table.is_deleting())) {
ret = OB_SERVER_ALREADY_DELETED;
LOG_WARN("the server has been deleted", KR(ret), K(server_info_in_table));
} else {
int64_t job_id = RS_JOB_CREATE(DELETE_SERVER, trans, "svr_ip", ip, "svr_port", server.get_port());
if (job_id < 1) {
ret = OB_SQL_OPT_ERROR;
LOG_WARN("insert into all_rootservice_job failed ", K(ret));
} else if (OB_FAIL(ObServerTableOperator::update_status(
trans,
server,
server_info_in_table.get_status(),
ObServerStatus::OB_SERVER_DELETING))) {
LOG_WARN("fail to update status", KR(ret), K(server), K(server_info_in_table));
}
}
(void) end_trans_and_on_server_change_(ret, trans, "delete_server", server, server_info_in_table.get_zone(), now);
return ret;
}
int ObServerZoneOpService::check_and_end_delete_server_(
common::ObMySQLTransaction &trans,
const common::ObAddr &server,
const ObZone &zone,
const bool is_cancel,
share::ObServerInfoInTable &server_info)
{
int ret = OB_SUCCESS;
server_info.reset();
char ip[OB_MAX_SERVER_ADDR_SIZE] = "";
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(!server.is_valid() || !server.ip_to_string(ip, sizeof(ip)))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(server));
} else if (OB_FAIL(check_and_update_service_epoch_(trans))) {
LOG_WARN("fail to check and update service epoch", KR(ret));
} else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info))) {
LOG_WARN("fail to get server_info in table", KR(ret), K(server));
} else if (!zone.is_empty() && zone != server_info.get_zone()) {
ret = OB_SERVER_ZONE_NOT_MATCH;
LOG_WARN("zone not matches", KR(ret), K(server), K(zone), K(server_info));
} else if (OB_UNLIKELY(!server_info.is_deleting())) {
ret = OB_SERVER_NOT_DELETING;
LOG_ERROR("server is not in deleting status, cannot be removed from __all_server table",
KR(ret), K(server_info));
} else {
ObRsJobInfo job_info;
ret = RS_JOB_FIND(job_info, trans, "job_type", "DELETE_SERVER",
"job_status", "INPROGRESS",
"svr_ip", ip, "svr_port", server.get_port());
if (OB_SUCC(ret) && job_info.job_id_ > 0) {
int tmp_ret = is_cancel ? OB_CANCELED : OB_SUCCESS;
if (OB_FAIL(RS_JOB_COMPLETE(job_info.job_id_, tmp_ret, trans))) {
LOG_WARN("fail to all_rootservice_job" , KR(ret), K(server));
}
} else {
LOG_WARN("failed to find job", KR(ret), K(server));
}
}
return ret;
}
int ObServerZoneOpService::start_or_stop_server_(
const common::ObAddr &server,
const ObZone &zone,
const obrpc::ObAdminServerArg::AdminServerOp &op)
{
int ret = OB_SUCCESS;
const int64_t now = ObTimeUtility::current_time();
ObServerInfoInTable server_info;
ObMySQLTransaction trans;
bool is_start = (ObAdminServerArg::START == op);
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_UNLIKELY(!server.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", KR(ret), K(server));
} else if (OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("sql_proxy_ is null", KR(ret), KP(sql_proxy_));
} else if (OB_FAIL(trans.start(sql_proxy_, OB_SYS_TENANT_ID))) {
LOG_WARN("fail to start trans", KR(ret));
} else if (OB_FAIL(check_and_update_service_epoch_(trans))) {
LOG_WARN("fail to check and update service epoch", KR(ret));
} else if (OB_FAIL(ObServerTableOperator::get(trans, server, server_info))) {
LOG_WARN("fail to get server_info", KR(ret), K(server));
} else if (!zone.is_empty() && zone != server_info.get_zone()) {
ret = OB_SERVER_ZONE_NOT_MATCH;
LOG_WARN("zone not matches", KR(ret), K(server), K(zone), K(server_info));
} else if (ObAdminServerArg::STOP == op || ObAdminServerArg::FORCE_STOP == op) {
// check again, if there exists stopped servers in other zones
if (ObRootUtils::have_other_stop_task(server_info.get_zone())) {
ret = OB_STOP_SERVER_IN_MULTIPLE_ZONES;
LOG_WARN("can not stop servers in multiple zones", KR(ret), K(server_info.get_zone()));
LOG_USER_ERROR(OB_STOP_SERVER_IN_MULTIPLE_ZONES,
"cannot stop server or stop zone in multiple zones");
}
}
if (OB_SUCC(ret)) {
int64_t new_stop_time = is_start ? 0 : now;
int64_t old_stop_time = server_info.get_stop_time();
if ((is_start && 0 != old_stop_time) || (!is_start && 0 == old_stop_time)) {
if (OB_FAIL(ObServerTableOperator::update_stop_time(
trans,
server,
old_stop_time,
new_stop_time))) {
LOG_WARN("fail to update stop_time", KR(ret), K(server), K(old_stop_time), K(new_stop_time));
}
}
LOG_INFO("update stop time", KR(ret), K(server_info),
K(old_stop_time), K(new_stop_time), K(op), K(is_start));
}
const char *op_print_str = is_start ? "start_server" : "stop_server";
(void) end_trans_and_on_server_change_(ret, trans, op_print_str, server, server_info.get_zone(), now);
return ret;
}
int ObServerZoneOpService::construct_rs_list_arg(ObRsListArg &rs_list_arg)
{
int ret = OB_SUCCESS;
ObLSInfo ls_info;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(lst_operator_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("lst operator is null", KR(ret), KP(lst_operator_));
} else if (OB_FAIL(lst_operator_->get(
GCONF.cluster_id,
OB_SYS_TENANT_ID,
SYS_LS,
share::ObLSTable::DEFAULT_MODE,
ls_info))) {
LOG_WARN("fail to get ls info", KR(ret));
} else {
rs_list_arg.master_rs_ = GCONF.self_addr_;
FOREACH_CNT_X(replica, ls_info.get_replicas(), OB_SUCC(ret)) {
if (replica->get_server() == GCONF.self_addr_
|| (replica->is_in_service()
&& ObReplicaTypeCheck::is_paxos_replica_V2(replica->get_replica_type()))) {
if (OB_FAIL(rs_list_arg.rs_list_.push_back(replica->get_server()))) {
LOG_WARN("fail to push a server into rs list", KR(ret), K(replica->get_server()));
}
}
}
}
return ret;
}
int ObServerZoneOpService::check_and_update_service_epoch_(ObMySQLTransaction &trans)
{
int ret = OB_SUCCESS;
int64_t service_epoch_in_table = palf::INVALID_PROPOSAL_ID;
int64_t proposal_id = palf::INVALID_PROPOSAL_ID;
ObRole role;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_FAIL(ObRootUtils::get_proposal_id_from_sys_ls(proposal_id, role))) {
LOG_WARN("fail to get proposal id from sys ls", KR(ret));
} else if (ObRole::LEADER != role) {
ret = OB_NOT_MASTER;
LOG_WARN("not leader ls", KR(ret), K(proposal_id), K(service_epoch_in_table), K(role));
} else if (palf::INVALID_PROPOSAL_ID == proposal_id) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid proposal id", KR(ret), K(proposal_id));
} else if (OB_FAIL(ObServiceEpochProxy::check_and_update_service_epoch(
trans,
OB_SYS_TENANT_ID,
ObServiceEpochProxy::SERVER_ZONE_OP_SERVICE_EPOCH,
proposal_id))) {
LOG_WARN("fail to check and update server zone op service epoch", KR(ret), K(proposal_id));
} else {}
return ret;
}
int ObServerZoneOpService::fetch_new_server_id_(uint64_t &server_id)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("invalid sql proxy", KR(ret), KP(sql_proxy_));
} else {
uint64_t new_max_id = OB_INVALID_ID;
ObMaxIdFetcher id_fetcher(*sql_proxy_);
if (OB_FAIL(id_fetcher.fetch_new_max_id(
OB_SYS_TENANT_ID,
OB_MAX_USED_SERVER_ID_TYPE,
new_max_id))) {
LOG_WARN("fetch_new_max_id failed", KR(ret));
} else {
server_id = new_max_id;
}
}
return ret;
}
int ObServerZoneOpService::check_server_have_enough_resource_for_delete_server_(
const ObIArray<ObAddr> &servers,
const ObZone &zone)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else if (OB_ISNULL(unit_manager_) || OB_ISNULL(sql_proxy_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unit_manager_ or sql_proxy_ is null", KR(ret), KP(unit_manager_), KP(sql_proxy_));
} else {
ObServerInfoInTable server_info;
FOREACH_CNT_X(server, servers, OB_SUCC(ret)) {
server_info.reset();
if (OB_FAIL(ObServerTableOperator::get(*sql_proxy_, *server, server_info))) {
LOG_WARN("fail to get server_info in table", KR(ret), KP(sql_proxy_), KPC(server));
} else if (!zone.is_empty() && server_info.get_zone() != zone) {
ret = OB_SERVER_ZONE_NOT_MATCH;
LOG_WARN("the arg zone is not the same as the server's zone in __all_server table", KR(ret),
K(zone), K(server_info));
} else if (OB_FAIL(unit_manager_->check_enough_resource_for_delete_server(
*server, server_info.get_zone()))) {
LOG_WARN("fail to check enouch resource", KR(ret), KPC(server), K(server_info));
}
}//end for each
}
return ret;
}
int ObServerZoneOpService::check_zone_and_server_(
const ObIArray<share::ObServerInfoInTable> &servers_info,
const ObIArray<ObAddr> &servers,
bool &is_same_zone,
bool &is_all_stopped)
{
int ret = OB_SUCCESS;
is_same_zone = true;
is_all_stopped = true;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret), K(is_inited_));
} else {
ObServerInfoInTable server_info;
ObZone zone;
for (int64_t i = 0; i < servers.count() && OB_SUCC(ret) && (is_same_zone || is_all_stopped); i++) {
const ObAddr &server = servers.at(i);
server_info.reset();
if (OB_FAIL(ObRootUtils::find_server_info(servers_info, server, server_info))) {
LOG_WARN("fail to get server info", KR(ret), K(servers_info), K(server));
} else if (0 == i) {
if (OB_FAIL(zone.assign(server_info.get_zone()))) {
LOG_WARN("fail to assign zone", KR(ret), K(server_info.get_zone()));
}
} else if (zone != server_info.get_zone()) {
is_same_zone = false;
LOG_WARN("server zone not same", K(zone), K(server_info), K(servers));
}
if (OB_FAIL(ret)) {
} else if (!server_info.is_stopped()) {
is_all_stopped = false;
}
}
}
return ret;
}
void ObServerZoneOpService::end_trans_and_on_server_change_(
int &ret,
common::ObMySQLTransaction &trans,
const char *op_print_str,
const common::ObAddr &server,
const ObZone &zone,
const int64_t start_time)
{
int tmp_ret = OB_SUCCESS;
LOG_INFO("start execute end_trans_and_on_server_change_", KR(ret),
K(op_print_str), K(server), K(zone), K(start_time));
if (OB_UNLIKELY(!trans.is_started())) {
LOG_WARN("the transaction is not started");
} else {
if (OB_TMP_FAIL(trans.end(OB_SUCC(ret)))) {
LOG_WARN("fail to commit the transaction", KR(ret), KR(tmp_ret), K(server), K(zone));
ret = OB_SUCC(ret) ? tmp_ret : ret;
}
}
if (OB_ISNULL(server_change_callback_)) {
tmp_ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_change_callback_ is null", KR(ret), KR(tmp_ret), KP(server_change_callback_));
ret = OB_SUCC(ret) ? tmp_ret : ret;
} else if (OB_TMP_FAIL(server_change_callback_->on_server_change())) {
LOG_WARN("fail to callback on server change", KR(ret), KR(tmp_ret));
}
int64_t time_cost = ::oceanbase::common::ObTimeUtility::current_time() - start_time;
FLOG_INFO(op_print_str, K(server), K(zone), "time cost", time_cost, KR(ret));
ROOTSERVICE_EVENT_ADD("server", op_print_str, K(server), K(ret));
}
}
}

View File

@ -0,0 +1,216 @@
/**
* Copyright (c) 2022 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_ROOTSERVER_OB_SERVER_ZONE_OP_SERVICE_H
#define OCEANBASE_ROOTSERVER_OB_SERVER_ZONE_OP_SERVICE_H
#include "share/ob_server_table_operator.h"
#include "share/ob_rpc_struct.h"
namespace oceanbase
{
namespace obrpc
{
class ObSrvRpcProxy;
struct ObRsListArg;
// struct ObAdminServerArg;
}
namespace share
{
class ObLSTableOperator;
class ObAllServerTracer;
}
namespace rootserver
{
class ObIServerChangeCallback;
class ObUnitManager;
class ObServerZoneOpService
{
public:
ObServerZoneOpService();
virtual ~ObServerZoneOpService();
int init(
ObIServerChangeCallback &server_change_callback,
obrpc::ObSrvRpcProxy &rpc_proxy,
share::ObLSTableOperator &lst_operator,
ObUnitManager &unit_manager,
ObMySQLProxy &sql_proxy
);
// Add new servers to a specified(optional) zone in the cluster.
// The servers should be empty and the zone should be active.
// This operation is successful
// if the servers' info are inserted into __all_server table successfully.
//
// @param[in] servers the servers which we want to add
// @param[in] zone the zone in which the servers will be located. If it's empty,
// the zone specified in the servers' local config will be picked
//
// @ret OB_SUCCESS add successfully
// @ret OB_ZONE_NOT_ACTIVE the specified zone is not active
// @ret OB_SERVER_ZONE_NOT_MATCH the zone specified in the server's local config is not the same
// as the zone specified in the system command ADD SERVER
// or both are empty
// @ret OB_ENTRY_EXIST there exists servers which are already added
//
// @ret other error code failure
int add_servers(const ObIArray<ObAddr> &servers, const ObZone &zone, bool is_bootstrap = false);
int construct_rs_list_arg(obrpc::ObRsListArg &rs_list_arg);
// Try to delete the given servers from the cluster (logically).
// In this func, we only set their statuses in __all_server table be OB_SERVER_DELETING.
// Root balancer will detect servers with such statuses
// and start to migrate units on these servers to other servers.
// Once a server with status OB_SERVER_DELETING has no units and no records in __all_ls_meta_table,
// this server will be deleted from __all_server table, which means this server is no longer in the cluster
// (see related machanism in ObEmptyServerChecker).
//
// @param[in] server the server which we try to delete
// @param[in] zone the zone in which the server is located
//
// @ret OB_SUCCESS set status be OB_SERVER_DELETING in __all_server table successfully
// @ret OB_SERVER_ZONE_NOT_MATCH the arg zone is not the same as the server's zone in __all_server table
// @ret OB_SERVER_ALREADY_DELETED the server's status has been OB_SERVER_DELETING already
// @ret OB_SERVER_NOT_IN_WHITE_LIST the server is not in the cluster
// @ret OB_NOT_MASTER not rs leader, cannot execute the command
//
// @ret other error code failure
int delete_servers(
const ObIArray<common::ObAddr> &servers,
const common::ObZone &zone);
// Revoke the delete operation for the given server from the cluster (logically).
// What we do in this func is to set servers' status be OB_SERVER_ACTIVE
// or OB_SERVER_INACTIVE in __all_server table
// and prevent units on this server be migrated to other servers.
//
// @param[in] server the server for which we want to revoke the delete operation
// @param[in] zone the zone in which the server is located
//
// @ret OB_SUCCESS set status be OB_SERVER_ACTIVE or OB_SERVER_INACTIVE in __all_server table successfully
// @ret OB_SERVER_ZONE_NOT_MATCH the arg zone is not the same as the server's zone in __all_server table
// @ret OB_SERVER_NOT_DELETING the server's status is not OB_SERVER_DELETING, we cannot cancel delete
// @ret OB_SERVER_NOT_IN_WHITE_LIST the server is not in the cluster
// @ret OB_NOT_MASTER not rs leader, cannot execute the command
// @ret other error code failure
int cancel_delete_servers(
const ObIArray<common::ObAddr> &servers,
const common::ObZone &zone);
// Delete the given server from the cluster
// In this func, we delete the server from __all_server table.
// @param[in] server the server which we want to delete
// @param[in] zone the zone in which the server is located
// @ret OB_SUCCESS delete the server from __all_server table successfully
// @ret OB_SERVER_NOT_DELETING the server's status is not OB_SERVER_DELETING, we cannot remove it
// @ret OB_SERVER_NOT_IN_WHITE_LIST the server is not in the cluster
// @ret OB_NOT_MASTER not rs leader, cannot execute the command
// @ret other error code failure
int finish_delete_server(
const common::ObAddr &server,
const common::ObZone &zone);
// stop the given server
// In this func, we set the server's stop_time be now in __all_server table
// Stopping server should guarantee that there is no other zone's server is stopped.
// Isolating server should guarantee that there still exists started server in primary region after isolating
// In addition, stop server will check majority and log sync.
//
// @param[in] server the server which we want to stop
// @param[in] zone the zone in which the server is located
// @param[in] is_stop true if stop, otherwise isolate
//
// @ret OB_SUCCESS stop the server successfully
// @ret OB_INVALID_ARGUMENT an invalid server
// @ret OB_SERVER_ZONE_NOT_MATCH the arg zone is not the same as the server's zone in __all_server table
// @ret OB_NOT_MASTER not rs leader, cannot execute the command
// @ret OB_SERVER_NOT_IN_WHITE_LIST the server is not in the cluster
// @ret other error code failure
int stop_servers(
const ObIArray<ObAddr> &servers,
const ObZone &zone,
const obrpc::ObAdminServerArg::AdminServerOp &op);
// start the given server
// In this func, we set the server's stop_time be zero in __all_server table
//
// @param[in] server the server which we want to start
// @param[in] zone the zone in which the server is located
// @param[in] op op: isolate, stop, force_stop
//
// @ret OB_SUCCESS start the server successfully
// @ret OB_INVALID_ARGUMENT an invalid server
// @ret OB_SERVER_ZONE_NOT_MATCH the arg zone is not the same as the server's zone in __all_server table
// @ret OB_NOT_MASTER not rs leader, cannot execute the command
// @ret OB_SERVER_NOT_IN_WHITE_LIST the server is not in the cluster
// @ret other error code failure
int start_servers(
const ObIArray<ObAddr> &servers,
const ObZone &zone);
int stop_server_precheck(
const ObIArray<ObAddr> &servers,
const obrpc::ObAdminServerArg::AdminServerOp &op);
private:
int zone_checking_for_adding_server_(
const common::ObZone &command_zone,
const common::ObZone &rpc_zone,
ObZone &picked_zone);
int add_server_(
const common::ObAddr &server,
const common::ObZone &zone,
const int64_t sql_port,
const share::ObServerInfoInTable::ObBuildVersion &build_version);
int delete_server_(
const common::ObAddr &server,
const common::ObZone &zone);
int check_and_end_delete_server_(
common::ObMySQLTransaction &trans,
const common::ObAddr &server,
const common::ObZone &zone,
const bool is_cancel,
share::ObServerInfoInTable &server_info);
int start_or_stop_server_(
const common::ObAddr &server,
const ObZone &zone,
const obrpc::ObAdminServerArg::AdminServerOp &op);
int check_and_update_service_epoch_(common::ObMySQLTransaction &trans);
int fetch_new_server_id_(uint64_t &server_id);
int check_server_have_enough_resource_for_delete_server_(
const ObIArray<common::ObAddr> &servers,
const common::ObZone &zone);
int check_zone_and_server_(
const ObIArray<share::ObServerInfoInTable> &servers_info,
const ObIArray<ObAddr> &servers,
bool &is_same_zone,
bool &is_all_stopped);
void end_trans_and_on_server_change_(
int &ret,
common::ObMySQLTransaction &trans,
const char *op_print_str,
const common::ObAddr &server,
const common::ObZone &zone,
const int64_t start_time);
bool is_inited_;
ObIServerChangeCallback *server_change_callback_;
obrpc::ObSrvRpcProxy *rpc_proxy_;
ObMySQLProxy *sql_proxy_;
share::ObLSTableOperator *lst_operator_;
share::ObServerTableOperator st_operator_;
ObUnitManager *unit_manager_;
private:
DISALLOW_COPY_AND_ASSIGN(ObServerZoneOpService);
};
} // rootserver
} // oceanbase
#endif

View File

@ -255,7 +255,7 @@ int ObAdminSwitchReplicaRole::get_tenants_of_zone(const ObZone &zone,
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(zone),
"tenant_id_set created", tenant_id_set.created(), KR(ret));
} else if (OB_FAIL(ctx_.server_mgr_->get_alive_servers(zone, server_array))) {
} else if (OB_FAIL(SVR_TRACER.get_alive_servers(zone, server_array))) {
LOG_WARN("get alive servers failed", K(zone), KR(ret));
} else {
FOREACH_CNT_X(server, server_array, OB_SUCCESS == ret) {
@ -320,7 +320,7 @@ int ObAdminCallServer::get_server_list(const ObServerZoneArg &arg, ObIArray<ObAd
LOG_WARN("invalid arg", K(arg), KR(ret));
} else if (arg.server_.is_valid()) {
bool is_alive = false;
if (OB_FAIL(ctx_.server_mgr_->check_server_alive(arg.server_, is_alive))) {
if (OB_FAIL(SVR_TRACER.check_server_alive(arg.server_, is_alive))) {
LOG_WARN("fail to check server alive", KR(ret), "server", arg.server_);
} else if (!is_alive) {
ret = OB_INVALID_ARGUMENT;
@ -335,7 +335,7 @@ int ObAdminCallServer::get_server_list(const ObServerZoneArg &arg, ObIArray<ObAd
} else if (!zone_exist) {
ret = OB_ZONE_INFO_NOT_EXIST;
LOG_WARN("zone info not exist", KR(ret), K(arg.zone_));
} else if (OB_FAIL(ctx_.server_mgr_->get_alive_servers(arg.zone_, server_list))) {
} else if (OB_FAIL(SVR_TRACER.get_alive_servers(arg.zone_, server_list))) {
LOG_WARN("get alive servers failed", KR(ret), K(arg));
}
}
@ -474,6 +474,9 @@ int ObAdminReloadServer::execute()
if (!ctx_.is_inited()) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret));
} else if (OB_ISNULL(ctx_.server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ctx_.server_mgr_ is null", KR(ret), KP(ctx_.server_mgr_));
} else if (OB_FAIL(ctx_.server_mgr_->load_server_manager())) {
LOG_WARN("build server status failed", KR(ret));
}
@ -932,7 +935,7 @@ int ObAdminSetConfig::update_config(obrpc::ObAdminSetConfigArg &arg, int64_t new
if (false == addr.set_ip_addr(svr_ip, static_cast<int32_t>(svr_port))){
ret = OB_ERR_UNEXPECTED;
LOG_WARN("set addr fail", KR(ret), "svr_ip", svr_ip, K(svr_port));
} else if (OB_FAIL(ctx_.server_mgr_->is_server_exist(addr, is_server_exist))) {
} else if (OB_FAIL(SVR_TRACER.is_server_exist(addr, is_server_exist))) {
LOG_WARN("check server exist fail", K(addr));
} else if (!is_server_exist) {
ret = OB_INVALID_ARGUMENT;
@ -1495,10 +1498,7 @@ int ObAdminRollingUpgradeCmd::execute(const obrpc::ObAdminRollingUpgradeArg &arg
} else if (obrpc::OB_UPGRADE_STAGE_POSTUPGRADE == arg.stage_) {
// end rolling upgrade, should raise min_observer_version
const char *min_obs_version_name = "min_observer_version";
if (OB_ISNULL(ctx_.server_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("server_mgr is null", KR(ret));
} else if (OB_FAIL(ctx_.server_mgr_->get_min_server_version(min_server_version))) {
if (OB_FAIL(SVR_TRACER.get_min_server_version(min_server_version))) {
LOG_WARN("failed to get the min server version", KR(ret));
} else if (OB_FAIL(item.name_.assign(min_obs_version_name))) {
LOG_WARN("assign min_observer_version config name failed",
@ -1751,6 +1751,7 @@ int ObAdminRootInspection::execute(const obrpc::ObRunJobArg &arg)
{
int ret = OB_SUCCESS;
LOG_INFO("execute root inspection request", K(arg));
ObAddr rs_addr;
if (!ctx_.is_inited()) {
ret = OB_NOT_INIT;
LOG_WARN("not init", KR(ret));
@ -1760,19 +1761,24 @@ int ObAdminRootInspection::execute(const obrpc::ObRunJobArg &arg)
} else if (ROOT_INSPECTION != get_inner_job_value(arg.job_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("job to run not root inspection", K(arg), KR(ret));
} else if (!ctx_.server_mgr_->is_inited()) {
ret = OB_INNER_STAT_ERROR;
LOG_WARN("server_mgr_ not inited", KR(ret));
} else if (OB_ISNULL(GCTX.rs_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("GCTX.rs_mgr_ is null", KR(ret), KP(GCTX.rs_mgr_));
} else if (OB_FAIL(GCTX.rs_mgr_->get_master_root_server(rs_addr))) {
LOG_WARN("fail to get master root server", KR(ret));
} else if (OB_UNLIKELY(!rs_addr.is_valid())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("rs_addr is invalid", KR(ret), K(rs_addr));
} else if (!ctx_.root_inspection_->is_inited()) {
ret = OB_INNER_STAT_ERROR;
LOG_WARN("root_inspection not inited", KR(ret));
} else if (!arg.zone_.is_empty()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("root inspection can't execute by zone", K(arg), KR(ret));
} else if (arg.server_.is_valid() && arg.server_ != ctx_.server_mgr_->get_rs_addr()) {
} else if (arg.server_.is_valid() && arg.server_ != rs_addr) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("only rs can execute root inspection", K(arg),
"rs", ctx_.server_mgr_->get_rs_addr(), KR(ret));
"rs", rs_addr, KR(ret));
} else if (OB_FAIL(ctx_.root_inspection_->check_all())) {
LOG_WARN("root_inspection check_all failed", KR(ret));
}
@ -1890,13 +1896,12 @@ int ObTenantServerAdminUtil::get_tenant_servers(const uint64_t tenant_id, common
}
} else {
ObArray<uint64_t> pool_ids;
if (OB_ISNULL(ctx_.server_mgr_) || OB_ISNULL(ctx_.unit_mgr_)) {
if (OB_ISNULL(ctx_.unit_mgr_)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ctx_.server_mgr_), K(ctx_.unit_mgr_), KR(ret));
} else if (!ctx_.server_mgr_->has_build() || !ctx_.unit_mgr_->check_inner_stat()) {
LOG_WARN("invalid argument", K(ctx_.unit_mgr_), KR(ret));
} else if (!SVR_TRACER.has_build() || !ctx_.unit_mgr_->check_inner_stat()) {
ret = OB_SERVER_IS_INIT;
LOG_WARN("server manager or unit manager hasn't built",
"server_mgr built", ctx_.server_mgr_->has_build(),
"unit_mgr built", ctx_.unit_mgr_->check_inner_stat(), KR(ret));
} else if (OB_FAIL(ctx_.unit_mgr_->get_pool_ids_of_tenant(tenant_id, pool_ids))) {
LOG_WARN("get_pool_ids_of_tenant failed", K(tenant_id), KR(ret));
@ -1910,7 +1915,7 @@ int ObTenantServerAdminUtil::get_tenant_servers(const uint64_t tenant_id, common
for (int64_t j = 0; OB_SUCC(ret) && j < unit_infos.count(); ++j) {
bool is_alive = false;
const ObUnit &unit = unit_infos.at(j).unit_;
if (OB_FAIL(ctx_.server_mgr_->check_server_alive(unit.server_, is_alive))) {
if (OB_FAIL(SVR_TRACER.check_server_alive(unit.server_, is_alive))) {
LOG_WARN("check_server_alive failed", "server", unit.server_, KR(ret));
} else if (is_alive) {
if (OB_FAIL(servers.push_back(unit.server_))) {
@ -1919,7 +1924,7 @@ int ObTenantServerAdminUtil::get_tenant_servers(const uint64_t tenant_id, common
}
if (OB_SUCC(ret)) {
if (unit.migrate_from_server_.is_valid()) {
if (OB_FAIL(ctx_.server_mgr_->check_server_alive(
if (OB_FAIL(SVR_TRACER.check_server_alive(
unit.migrate_from_server_, is_alive))) {
LOG_WARN("check_server_alive failed", "server",
unit.migrate_from_server_, KR(ret));
@ -1943,10 +1948,7 @@ int ObTenantServerAdminUtil::get_all_servers(common::ObIArray<ObAddr> &servers)
{
int ret = OB_SUCCESS;
ObZone empty_zone;
if (OB_ISNULL(ctx_.server_mgr_)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("invalid argument", K(ctx_.server_mgr_), KR(ret));
} else if (OB_FAIL(ctx_.server_mgr_->get_alive_servers(empty_zone, servers))){
if (OB_FAIL(SVR_TRACER.get_alive_servers(empty_zone, servers))) {
//if zone is empty, get all servers
LOG_WARN("fail to get all servers", KR(ret));
}

View File

@ -26,7 +26,6 @@ class SCN;
}
namespace rootserver
{
class ObServerManager;
class ObTableCreator
{
public:

View File

@ -33,7 +33,6 @@ class ObLSTableOperator;
}
namespace rootserver
{
class ObServerManager;
struct ObTabletCreatorArg
{
public:

View File

@ -304,26 +304,26 @@ void ObTenantInfoLoader::broadcast_tenant_info_content_()
if (OB_FAIL(tenant_info_cache_.get_tenant_info(tenant_info, last_sql_update_time, ora_rowscn))) {
LOG_WARN("failed to get tenant info", KR(ret));
} else if (OB_FAIL(share::ObAllServerTracer::get_instance().for_each_server_status(
[&rpc_count, &tenant_info, &proxy, ora_rowscn](const share::ObServerStatus &status) -> int {
} else if (OB_FAIL(share::ObAllServerTracer::get_instance().for_each_server_info(
[&rpc_count, &tenant_info, &proxy, ora_rowscn](const share::ObServerInfoInTable &server_info) -> int {
int ret = OB_SUCCESS;
obrpc::ObUpdateTenantInfoCacheArg arg;
if (!status.is_valid()) {
LOG_WARN("skip invalid status", KR(ret), K(status));
} else if (!status.is_alive()) {
if (!server_info.is_valid()) {
LOG_WARN("skip invalid server_info", KR(ret), K(server_info));
} else if (!server_info.is_alive()) {
//not send to alive
} else if (OB_FAIL(arg.init(tenant_info.get_tenant_id(), tenant_info, ora_rowscn))) {
LOG_WARN("failed to init arg", KR(ret), K(tenant_info), K(ora_rowscn));
// use meta rpc process thread
} else if (OB_FAIL(proxy.call(status.server_, DEFAULT_TIMEOUT_US, gen_meta_tenant_id(tenant_info.get_tenant_id()), arg))) {
LOG_WARN("failed to send rpc", KR(ret), K(status), K(tenant_info), K(arg));
} else if (OB_FAIL(proxy.call(server_info.get_server(), DEFAULT_TIMEOUT_US, gen_meta_tenant_id(tenant_info.get_tenant_id()), arg))) {
LOG_WARN("failed to send rpc", KR(ret), K(server_info), K(tenant_info), K(arg));
} else {
rpc_count++;
}
return ret;
}))) {
LOG_WARN("for each server status failed", KR(ret));
LOG_WARN("for each server_info failed", KR(ret));
}
int tmp_ret = OB_SUCCESS;

File diff suppressed because it is too large Load Diff

View File

@ -414,10 +414,6 @@ public:
const common::ObIArray<common::ObZone> &schema_zone_list,
const common::ObIArray<share::ObZoneReplicaNumSet> &zone_locality,
bool &is_legal);
// get all server loads
int get_server_loads(const common::ObZone &zone,
common::ObArray<ObServerLoad> &server_loads,
double *weights, int64_t weights_count);
static int calc_sum_load(const common::ObArray<ObUnitLoad> *unit_loads,
share::ObUnitConfig &sum_load);
// get hard limit
@ -528,25 +524,32 @@ protected:
const share::ObUnitStat &unit_stat,
const common::ObIArray<share::ObUnitStat> &migrating_unit_stat,
const common::ObAddr &dst,
const share::ObServerResourceInfo &dst_resource_info,
const bool is_manual = false);
int get_zone_units(const common::ObArray<share::ObResourcePool *> &pools,
common::ObArray<ZoneUnit> &zone_units) const;
virtual int end_migrate_unit(const uint64_t unit_id, const EndMigrateOp end_migrate_op = COMMIT);
int get_excluded_servers(const share::ObUnit &unit,
const share::ObUnitStat &unit_stat,
const char *module,
common::ObIArray<common::ObAddr> &servers) const;
int get_excluded_servers(
const share::ObUnit &unit,
const share::ObUnitStat &unit_stat,
const char *module,
const ObIArray<share::ObServerInfoInTable> &servers_info, // servers info in unit.zone_
const ObIArray<obrpc::ObGetServerResourceInfoResult> &report_servers_resource_info, // active servers' resource info in unit.zone_
common::ObIArray<common::ObAddr> &servers) const;
int get_excluded_servers(const uint64_t resource_pool_id,
const common::ObZone &zone,
const char *module,
const bool new_allocate_pool,
common::ObIArray<common::ObAddr> &excluded_servers) const;
int choose_server_for_unit(const share::ObUnitResource &config,
const common::ObZone &zone,
const common::ObArray<common::ObAddr> &excluded_servers,
const char *module,
common::ObAddr &server,
std::string &resource_not_enough_reason) const;
int choose_server_for_unit(
const share::ObUnitResource &config,
const common::ObZone &zone,
const common::ObArray<common::ObAddr> &excluded_servers,
const char *module,
const ObIArray<share::ObServerInfoInTable> &active_servers_info, // active_servers_info of the give zone,
const ObIArray<obrpc::ObGetServerResourceInfoResult> &active_servers_resource_info, // active_servers_resource_info of the give zone
common::ObAddr &server,
std::string &resource_not_enough_reason) const;
int inner_choose_server_for_unit(const share::ObUnitConfig &config,
const common::ObZone &zone,
const common::ObArray<common::ObAddr> &excluded_servers,
@ -569,14 +572,7 @@ protected:
const uint64_t tenant_id,
const int64_t unit_group_num,
common::ObIArray<uint64_t> &new_unit_group_id_array);
int get_server_loads_internal(const common::ObZone &zone,
const bool only_active,
common::ObArray<ObServerLoad> &server_loads,
double &sum_load,
int64_t &alive_server_count,
double *weights, int64_t weights_count);
int check_unit_group_normal(const share::ObUnit &unit, bool &normal);
int check_can_migrate_in(const common::ObAddr &server, bool &can_migrate_in) const;
int get_migrate_units_by_server(const ObAddr &server,
common::ObIArray<uint64_t> &migrate_units) const;
int try_cancel_migrate_unit(const share::ObUnit &unit, bool &is_canceled);
@ -590,7 +586,7 @@ protected:
int check_has_intersect_pg(const share::ObUnit &a,
const share::ObUnit &b,
bool &intersect);
int have_enough_resource(const share::ObServerStatus &server_status,
int have_enough_resource(const obrpc::ObGetServerResourceInfoResult &report_server_resource_info,
const share::ObUnitResource &unit_resource,
const double limit,
bool &is_enough,
@ -1047,7 +1043,10 @@ protected:
common::ObPooledAllocator<common::ObArray<share::ObResourcePool *> > &allocator,
const uint64_t id,
share::ObResourcePool *resource_pool);
int cancel_migrate_unit(const share::ObUnit &unit, const bool is_gts_unit);
int cancel_migrate_unit(
const share::ObUnit &unit,
const bool migrate_from_server_can_migrate_in,
const bool is_gts_unit);
int check_split_pool_name_condition(
const common::ObIArray<share::ObResourcePoolName> &split_pool_name_list);
int check_split_pool_zone_condition(
@ -1133,6 +1132,12 @@ protected:
const uint64_t tenant_id,
const bool is_active,
common::ObIArray<uint64_t> &unit_group_id_array);
int get_servers_resource_info_via_rpc(
const ObIArray<share::ObServerInfoInTable> &servers_info,
ObIArray<obrpc::ObGetServerResourceInfoResult> &report_server_resource_info) const;
int get_server_resource_info_via_rpc(
const share::ObServerInfoInTable &server_inzfo,
obrpc::ObGetServerResourceInfoResult &report_servers_resource_info) const ;
private:
int check_shrink_resource_(const common::ObIArray<share::ObResourcePool *> &pools,
@ -1146,7 +1151,8 @@ private:
const common::ObIArray<share::ObResourcePool *> &pools,
const share::ObUnitResource &old_resource,
const share::ObUnitResource &new_resource) const;
int check_expand_resource_(const common::ObAddr &server,
int check_expand_resource_(
const share::ObServerInfoInTable &server_info,
const share::ObUnitResource &expand_resource,
bool &can_expand,
AlterResourceErr &err_index) const;
@ -1170,21 +1176,35 @@ private:
int expand_pool_unit_num_(
share::ObResourcePool *pool,
const int64_t unit_num);
int check_enough_resource_for_delete_server_(
const ObAddr &server,
const ObZone &zone,
const ObIArray<share::ObServerInfoInTable> &servers_info,
const ObIArray<obrpc::ObGetServerResourceInfoResult> &report_servers_resource_info);
int get_servers_resource_info_via_rpc_(
const ObIArray<share::ObServerInfoInTable> &servers_info,
ObIArray<obrpc::ObGetServerResourceInfoResult> &report_servers_resource_info);
static int order_report_servers_resource_info_(
const ObIArray<share::ObServerInfoInTable> &servers_info,
const ObIArray<obrpc::ObGetServerResourceInfoResult> &report_servers_resource_info,
ObIArray<obrpc::ObGetServerResourceInfoResult> &ordered_report_servers_resource_info);
int check_server_have_enough_resource_for_delete_server_(
const ObUnitLoad &unit_load,
const common::ObZone &zone,
const ObIArray<share::ObServerStatus> &statuses,
const ObIArray<share::ObServerInfoInTable> &servers_info,
ObIArray<ObUnitPlacementStrategy::ObServerResource> &initial_servers_resource,
std::string &resource_not_enough_reason);
int compute_server_resource_(const share::ObServerStatus &server_status,
ObUnitPlacementStrategy::ObServerResource &server_resource) const;
int compute_server_resource_(
const obrpc::ObGetServerResourceInfoResult &report_server_resource_info,
ObUnitPlacementStrategy::ObServerResource &server_resource) const;
int build_server_resources_(
const ObIArray<share::ObServerStatus> &statuses,
const ObIArray<obrpc::ObGetServerResourceInfoResult> &report_servers_resource_info,
ObIArray<ObUnitPlacementStrategy::ObServerResource> &initial_server_resource) const;
int do_choose_server_for_unit_(const share::ObUnitResource &config,
const ObZone &zone,
const ObArray<ObAddr> &excluded_servers,
const ObIArray<share::ObServerStatus> &statuses,
const ObIArray<share::ObServerInfoInTable> &servers_info,
const ObIArray<ObUnitPlacementStrategy::ObServerResource> &server_resources,
const char *module,
ObAddr &server,

View File

@ -19,7 +19,7 @@
#include "share/ls/ob_ls_table_operator.h"
#include "share/ob_root_addr_agent.h"
#include "share/ob_debug_sync.h"
#include "rootserver/ob_server_manager.h"
#include "share/ob_all_server_tracer.h"
#include "rootserver/ob_root_utils.h"
#include "rootserver/ob_root_service.h"
#include "observer/ob_server_struct.h"
@ -33,7 +33,7 @@ using namespace share;
ObUpdateRsListTask::ObUpdateRsListTask()
: inited_(false), lst_operator_(NULL),
root_addr_agent_(NULL), server_mgr_(NULL), zone_mgr_(NULL),
root_addr_agent_(NULL), zone_mgr_(NULL),
lock_(NULL), force_update_(false), self_addr_()
{
}
@ -71,7 +71,6 @@ void ObUpdateRsListTask::clear_lock()
int ObUpdateRsListTask::init(ObLSTableOperator &lst_operator,
ObRootAddrAgent *agent,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
SpinRWLock &lock,
const bool force_update,
@ -90,7 +89,6 @@ int ObUpdateRsListTask::init(ObLSTableOperator &lst_operator,
} else {
lst_operator_ = &lst_operator;
root_addr_agent_ = agent;
server_mgr_ = &server_mgr;
zone_mgr_ = &zone_mgr;
lock_ = &lock;
force_update_ = force_update;
@ -115,11 +113,10 @@ int ObUpdateRsListTask::process_without_lock()
if (!inited_) {
ret = OB_NOT_INIT;
LOG_WARN("not init", K(ret));
} else if (OB_ISNULL(lst_operator_)
|| OB_ISNULL(server_mgr_)) {
} else if (OB_ISNULL(lst_operator_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("is null", KP(lst_operator_), KP(server_mgr_));
} else if (OB_FAIL(get_rs_list(*lst_operator_, *server_mgr_, self_addr_,
LOG_WARN("lst_operator_ is null", KP(lst_operator_));
} else if (OB_FAIL(get_rs_list(*lst_operator_, self_addr_,
new_rs_list, new_readonly_rs_list, rs_list_diff_member_list))) {
LOG_WARN("get_rs_list failed", K(ret));
} else if (common::INVALID_CLUSTER_ROLE == cluster_role) {
@ -208,10 +205,10 @@ ObAsyncTask *ObUpdateRsListTask::deep_copy(char *buf, const int64_t buf_size) co
} else {
task = new(buf) ObUpdateRsListTask();
if (OB_FAIL(static_cast<ObUpdateRsListTask *>(task)->init(
*lst_operator_, root_addr_agent_, *server_mgr_,
*lst_operator_, root_addr_agent_,
*zone_mgr_, *lock_, force_update_, self_addr_))) {
LOG_WARN("init task failed", KP(lst_operator_), KP(root_addr_agent_),
KP(server_mgr_), KP(zone_mgr_), KP(lock_), K(ret));
KP(zone_mgr_), KP(lock_), K(ret));
}
if (OB_FAIL(ret)) {
@ -224,7 +221,6 @@ ObAsyncTask *ObUpdateRsListTask::deep_copy(char *buf, const int64_t buf_size) co
int ObUpdateRsListTask::get_rs_list(
ObLSTableOperator &lst,
ObServerManager &server_mgr,
const ObAddr &self_addr,
share::ObIAddrList &rs_list,
share::ObIAddrList &readonly_rs_list,
@ -241,8 +237,8 @@ int ObUpdateRsListTask::get_rs_list(
ObRootAddr rs;
FOREACH_CNT_X(replica, ls_info.get_replicas(), OB_SUCCESS == ret) {
bool is_server_alive = false;
if (server_mgr.has_build()) {
if (OB_FAIL(server_mgr.check_server_alive(replica->get_server(), is_server_alive))) {
if (SVR_TRACER.has_build()) {
if (OB_FAIL(SVR_TRACER.check_server_alive(replica->get_server(), is_server_alive))) {
LOG_WARN("check_server_alive failed", "server", replica->get_server(), KR(ret));
}
} else {

View File

@ -29,7 +29,6 @@ class ObLSTableOperator;
namespace rootserver
{
class ObRootService;
class ObServerManager;
class ObZoneManager;
class ObUpdateRsListTask : public share::ObAsyncTask
{
@ -39,7 +38,6 @@ public:
int init(share::ObLSTableOperator &lst_operator,
share::ObRootAddrAgent *addr_agent_,
ObServerManager &server_mgr,
ObZoneManager &zone_mgr,
common::SpinRWLock &lock,
const bool force_update,
@ -49,7 +47,6 @@ public:
int64_t get_deep_copy_size() const;
share::ObAsyncTask *deep_copy(char *buf, const int64_t buf_size) const;
static int get_rs_list(share::ObLSTableOperator &lst,
ObServerManager &server_mgr,
const common::ObAddr &self_addr,
share::ObIAddrList &rs_list,
share::ObIAddrList &readonly_rs_list,
@ -81,7 +78,6 @@ private:
bool inited_;
share::ObLSTableOperator *lst_operator_;
share::ObRootAddrAgent *root_addr_agent_;
ObServerManager *server_mgr_;
ObZoneManager *zone_mgr_;
common::SpinRWLock *lock_;
bool force_update_;

View File

@ -16,8 +16,9 @@
#include "lib/container/ob_array_serialization.h"
#include "lib/container/ob_array_iterator.h"
#include "rootserver/ob_server_manager.h"
#include "rootserver/ob_unit_manager.h"
#include "share/ob_all_server_tracer.h"
#include "rootserver/ob_root_utils.h"
namespace oceanbase
{
@ -25,10 +26,7 @@ using namespace common;
using namespace share;
namespace rootserver
{
ObVTableLocationGetter::ObVTableLocationGetter(ObServerManager &server_mgr,
ObUnitManager &unit_mgr)
: server_mgr_(server_mgr),
unit_mgr_(unit_mgr)
ObVTableLocationGetter::ObVTableLocationGetter(ObUnitManager &unit_mgr) : unit_mgr_(unit_mgr)
{
}
@ -37,6 +35,8 @@ ObVTableLocationGetter::~ObVTableLocationGetter()
{
}
// **FIXME (linqiucen.lqc): in the future, we can remove unit_mgr_,
// ** then this func can be executed locally on observers
int ObVTableLocationGetter::get(const ObVtableLocationType &vtable_type,
ObSArray<common::ObAddr> &servers)
{
@ -64,7 +64,7 @@ int ObVTableLocationGetter::get(const ObVtableLocationType &vtable_type,
}
if (OB_SUCC(ret) && OB_UNLIKELY(servers.count() <= 0)) {
ret = OB_LOCATION_NOT_EXIST;
LOG_WARN("servers from server_mgr_ are empty", KR(ret), K(vtable_type), K(servers));
LOG_WARN("servers are empty", KR(ret), K(vtable_type), K(servers));
}
return ret;
}
@ -75,11 +75,20 @@ int ObVTableLocationGetter::get_only_rs_vtable_location_(
{
int ret = OB_SUCCESS;
servers.reuse();
ObAddr rs_addr;
if (OB_UNLIKELY(!vtable_type.is_only_rs())) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("vtable_type is invalid", K(vtable_type), KR(ret));
} else if (OB_FAIL(servers.push_back(server_mgr_.get_rs_addr()))) {
LOG_WARN("push_back failed", KR(ret));
} else if (OB_ISNULL(GCTX.rs_mgr_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("GCTX.rs_mgr_ is null", KP(GCTX.rs_mgr_));
} else if (OB_FAIL(GCTX.rs_mgr_->get_master_root_server(rs_addr))) {
LOG_WARN("fail to get master root server", KR(ret), KP(GCTX.rs_mgr_));
} else if (OB_UNLIKELY(!rs_addr.is_valid() || rs_addr != GCTX.self_addr())) {
ret = OB_ENTRY_NOT_EXIST;
LOG_WARN("rs_addr is invalid or not equal to self_addr", KR(ret), K(rs_addr), K(GCTX.self_addr()));
} else if (OB_FAIL(servers.push_back(rs_addr))) {
LOG_WARN("push_back failed", KR(ret), K(rs_addr));
}
return ret;
}
@ -94,12 +103,8 @@ int ObVTableLocationGetter::get_global_vtable_location_(
if (OB_UNLIKELY(!(vtable_type.is_cluster_distributed()))) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("vtable_type is invalid", K(vtable_type), KR(ret));
} else if (!server_mgr_.has_build()) {
ret = OB_SERVER_IS_INIT;
LOG_WARN("server manager hasn't built",
"server_mgr built", server_mgr_.has_build(), KR(ret));
} else if (OB_FAIL(server_mgr_.get_alive_servers(zone, servers))) {
LOG_WARN("get_alive_servers failed", KR(ret));
} else if (OB_FAIL(SVR_TRACER.get_alive_servers(zone, servers))) {
LOG_WARN("get_alive_servers failed", KR(ret), KP(GCTX.sql_proxy_));
}
return ret;
}
@ -112,16 +117,15 @@ int ObVTableLocationGetter::get_tenant_vtable_location_(
servers.reuse();
ObArray<ObAddr> unit_servers;
ObArray<uint64_t> pool_ids;
bool unit_mgr_check = unit_mgr_.check_inner_stat();
if (OB_UNLIKELY(!vtable_type.is_valid()
|| !vtable_type.is_tenant_distributed()
|| is_sys_tenant(vtable_type.get_tenant_id()))) { // sys_tenant should get cluster location
ret = OB_INVALID_ARGUMENT;
LOG_WARN("vtable_type is invalid", KR(ret), K(vtable_type));
} else if (!server_mgr_.has_build() || !unit_mgr_.check_inner_stat()) {
} else if (OB_UNLIKELY(!unit_mgr_check)) {
ret = OB_SERVER_IS_INIT;
LOG_WARN("server manager or unit manager hasn't built",
"server_mgr built", server_mgr_.has_build(),
"unit_mgr built", unit_mgr_.check_inner_stat(), KR(ret));
LOG_WARN("unit manager hasn't built", "unit_mgr built", unit_mgr_check, KR(ret));
} else if (OB_FAIL(unit_mgr_.get_pool_ids_of_tenant(vtable_type.get_tenant_id(), pool_ids))) {
LOG_WARN("get_pool_ids_of_tenant failed", KR(ret), K(vtable_type));
} else {
@ -134,8 +138,8 @@ int ObVTableLocationGetter::get_tenant_vtable_location_(
for (int64_t j = 0; OB_SUCC(ret) && j < unit_infos.count(); ++j) {
bool is_alive = false;
const ObUnit &unit = unit_infos.at(j).unit_;
if (OB_FAIL(server_mgr_.check_server_alive(unit.server_, is_alive))) {
LOG_WARN("check_server_alive failed", "server", unit.server_, KR(ret));
if (OB_FAIL(SVR_TRACER.check_server_alive(unit.server_, is_alive))) {
LOG_WARN("check_server_alive failed", KR(ret), K(unit.server_));
} else if (is_alive) {
if (OB_FAIL(unit_servers.push_back(unit.server_))) {
LOG_WARN("push_back failed", KR(ret));
@ -144,10 +148,8 @@ int ObVTableLocationGetter::get_tenant_vtable_location_(
if (OB_SUCC(ret)) {
if (unit.migrate_from_server_.is_valid()) {
if (OB_FAIL(server_mgr_.check_server_alive(
unit.migrate_from_server_, is_alive))) {
LOG_WARN("check_server_alive failed", "server",
unit.migrate_from_server_, KR(ret));
if (OB_FAIL(SVR_TRACER.check_server_alive(unit.migrate_from_server_, is_alive))) {
LOG_WARN("check_server_alive failed", KR(ret), K(unit.migrate_from_server_));
} else if (is_alive) {
if (OB_FAIL(unit_servers.push_back(unit.migrate_from_server_))) {
LOG_WARN("push_back failed", KR(ret));

Some files were not shown because too many files have changed in this diff Show More