diff --git a/deps/oblib/src/common/ob_learner_list.h b/deps/oblib/src/common/ob_learner_list.h index 684866e7ed..9d72ce9b6d 100644 --- a/deps/oblib/src/common/ob_learner_list.h +++ b/deps/oblib/src/common/ob_learner_list.h @@ -15,6 +15,7 @@ #include "lib/container/ob_se_array.h" // SEArray #include "lib/container/ob_se_array_iterator.h" // SEArrayIterator +#include "lib/string/ob_sql_string.h" // ObSqlString #include "lib/utility/ob_unify_serialize.h" // serialize #include "common/ob_member.h" #include @@ -54,6 +55,7 @@ public: int deep_copy(const BaseLearnerList &learner_list); template int deep_copy_to(BaseLearnerList &learner_list) const; + int transform_to_string(common::ObSqlString &output_string) const; TO_STRING_KV("learner_num", learner_array_.count(), K_(learner_array)); // by operator == int64_t get_index_by_learner(const T &learner) const; diff --git a/deps/oblib/src/common/ob_learner_list.ipp b/deps/oblib/src/common/ob_learner_list.ipp index bca847bf8e..ca8e35e074 100644 --- a/deps/oblib/src/common/ob_learner_list.ipp +++ b/deps/oblib/src/common/ob_learner_list.ipp @@ -320,5 +320,37 @@ int64_t BaseLearnerList::get_serialize_size() const size += learner_array_.get_serialize_size(); return size; } + +template +int BaseLearnerList::transform_to_string( + common::ObSqlString &output_string) const +{ + int ret = OB_SUCCESS; + output_string.reset(); + if (0 > get_member_number()) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "invalid argument", K(ret), "learner count", get_member_number()); + } else if (0 == get_member_number()) { + output_string.reset(); + } else { + bool need_comma = false; + common::ObMember learner; + char ip_port[MAX_IP_PORT_LENGTH] = ""; + for (int i = 0; OB_SUCC(ret) && i < get_member_number(); i++) { + if (OB_FAIL(get_member_by_index(i, learner))) { + COMMON_LOG(WARN, "failed to get learner from learner list", K(ret), K(i)); + } else if (OB_FAIL(learner.get_server().ip_port_to_string(ip_port, sizeof(ip_port)))) { + COMMON_LOG(WARN, "convert server to string failed", K(ret), K(learner)); + } else if (need_comma && OB_FAIL(output_string.append(","))) { + COMMON_LOG(WARN, "failed to append comma to string", K(ret)); + } else if (OB_FAIL(output_string.append_fmt("%.*s:%ld", static_cast(sizeof(ip_port)), ip_port, learner.get_timestamp()))) { + COMMON_LOG(WARN, "failed to append ip_port to string", K(ret), K(learner)); + } else { + need_comma = true; + } + } + } + return ret; +} } // namespace common end } // namespace oceanbase end diff --git a/deps/oblib/src/lib/ob_define.h b/deps/oblib/src/lib/ob_define.h index 302399c926..8e38b3161f 100644 --- a/deps/oblib/src/lib/ob_define.h +++ b/deps/oblib/src/lib/ob_define.h @@ -166,6 +166,7 @@ const uint64_t OB_DEFAULT_COLUMN_SRS_ID = 0xffffffffffffffe0; const int64_t OB_MAX_SPAN_LENGTH = 1024; const int64_t OB_MAX_SPAN_TAG_LENGTH = 8 * 1024L; const int64_t OB_MAX_REF_TYPE_LENGTH = 10; +const int64_t OB_MAX_LS_FLAG_LENGTH = 2048; // See ObDeviceHealthStatus for more information const int64_t OB_MAX_DEVICE_HEALTH_STATUS_STR_LENGTH = 20; @@ -281,6 +282,8 @@ const int64_t OB_MAX_COMMAND_LENGTH = 4096; const int64_t OB_MAX_SESSION_STATE_LENGTH = 128; const int64_t OB_MAX_SESSION_INFO_LENGTH = 128; const int64_t OB_MAX_TRANS_STATE_LENGTH = 32; +const int64_t OB_MAX_DUP_TABLE_TABLET_SET_ATTR_LENGTH = 16; +const int64_t OB_MAX_DUP_TABLE_TABLET_SET_STATE_LENGTH = 16; const int64_t OB_MAX_VERSION_LENGTH = 256; const int64_t COLUMN_CHECKSUM_LENGTH = 8 * 1024; const int64_t OB_MAX_SYS_PARAM_INFO_LENGTH = 1024; @@ -1962,9 +1965,7 @@ public: static bool is_replica_type_valid(const int32_t replica_type) { return REPLICA_TYPE_FULL == replica_type - || REPLICA_TYPE_LOGONLY == replica_type - || REPLICA_TYPE_READONLY == replica_type - || REPLICA_TYPE_ENCRYPTION_LOGONLY == replica_type; + || REPLICA_TYPE_READONLY == replica_type; } static bool is_can_elected_replica(const int32_t replica_type) { @@ -2027,14 +2028,13 @@ public: { bool bool_ret = false; - if (REPLICA_TYPE_FULL == source) { + if (REPLICA_TYPE_LOGONLY == source || REPLICA_TYPE_LOGONLY == target) { + bool_ret = false; + } else if (REPLICA_TYPE_FULL == source) { bool_ret = true; } else if (REPLICA_TYPE_READONLY == source && REPLICA_TYPE_FULL == target) { bool_ret = true; - } else if (REPLICA_TYPE_LOGONLY == source && REPLICA_TYPE_FULL == target) { - bool_ret=false; } - return bool_ret; } }; diff --git a/deps/oblib/src/lib/oblog/ob_log_module.h b/deps/oblib/src/lib/oblog/ob_log_module.h index 576b6823c0..f92a4def36 100644 --- a/deps/oblib/src/lib/oblog/ob_log_module.h +++ b/deps/oblib/src/lib/oblog/ob_log_module.h @@ -156,6 +156,7 @@ DEFINE_LOG_SUB_MOD(TRANS) // transaction DEFINE_LOG_SUB_MOD(RU) // transaction DEFINE_LOG_SUB_MOD(REPLAY) // replay engine DEFINE_LOG_SUB_MOD(IMC) +DEFINE_LOG_SUB_MOD(DUP_TABLE) DEFINE_LOG_SUB_MOD(TABLELOCK) // tablelock DEFINE_LOG_SUB_MOD(BLKMGR) // block manager LOG_MOD_END(STORAGE) @@ -424,6 +425,8 @@ LOG_MOD_END(PL) #define TX_LOG(level, info_string, args...) OB_SUB_MOD_LOG(STORAGE, TX, level, info_string, ##args) #define TRANS_LOG(level, info_string, args...) OB_SUB_MOD_LOG(STORAGE, TRANS, level, info_string, ##args) #define _TRANS_LOG(level, _fmt_, args...) _OB_SUB_MOD_LOG(STORAGE, TRANS, level, _fmt_, ##args) +#define DUP_TABLE_LOG(level, info_string,args...) OB_SUB_MOD_LOG(STORAGE, DUP_TABLE, level, info_string, ##args) +#define _DUP_TABLE_LOG(level, _fmt_ , args...) _OB_SUB_MOD_LOG(STORAGE, DUP_TABLE, level, _fmt_, ##args) #define TABLELOCK_LOG(level, info_string, args...) OB_SUB_MOD_LOG(STORAGE, TABLELOCK, level, info_string, ##args) #define _TABLELOCK_LOG(level, _fmt_, args...) OB_SUB_MOD_LOG(STORAGE, TABLELOCK, level, _fmt_, ##args) #define RU_LOG(level, info_string, args...) OB_SUB_MOD_LOG(STORAGE, RU, level, info_string, ##args) @@ -902,6 +905,8 @@ LOG_MOD_END(PL) #define TX_LOG_RET(level, errcode, args...) { int ret = errcode; TX_LOG(level, ##args); } #define TRANS_LOG_RET(level, errcode, args...) { int ret = errcode; TRANS_LOG(level, ##args); } #define _TRANS_LOG_RET(level, errcode, args...) { int ret = errcode; _TRANS_LOG(level, ##args); } +#define DUP_TABLE_LOG_RET(level, errcode, args...) { int ret = errcode; DUP_TABLE_LOG(level, ##args); } +#define _DUP_TABLE_LOG_RET(level, errcode, args...) { int ret = errcode; _DUP_TABLE_LOG(level, ##args); } #define TABLELOCK_LOG_RET(level, errcode, args...) { int ret = errcode; TABLELOCK_LOG(level, ##args); } #define _TABLELOCK_LOG_RET(level, errcode, args...) { int ret = errcode; _TABLELOCK_LOG(level, ##args); } #define RU_LOG_RET(level, errcode, args...) { int ret = errcode; RU_LOG(level, ##args); } diff --git a/deps/oblib/src/lib/oblog/ob_log_module.ipp b/deps/oblib/src/lib/oblog/ob_log_module.ipp index 751264e3dd..89e63cd44a 100644 --- a/deps/oblib/src/lib/oblog/ob_log_module.ipp +++ b/deps/oblib/src/lib/oblog/ob_log_module.ipp @@ -117,6 +117,7 @@ REG_LOG_SUB_MOD(STORAGE, COMPACTION) REG_LOG_SUB_MOD(STORAGE, BSST) REG_LOG_SUB_MOD(STORAGE, MEMT) REG_LOG_SUB_MOD(STORAGE, TRANS) +REG_LOG_SUB_MOD(STORAGE, DUP_TABLE) REG_LOG_SUB_MOD(STORAGE, REPLAY) REG_LOG_SUB_MOD(STORAGE, IMC) REG_LOG_SUB_MOD(STORAGE, TABLELOCK) diff --git a/deps/oblib/src/lib/stat/ob_latch_define.h b/deps/oblib/src/lib/stat/ob_latch_define.h index 832496f0ef..cbc3caf5d6 100644 --- a/deps/oblib/src/lib/stat/ob_latch_define.h +++ b/deps/oblib/src/lib/stat/ob_latch_define.h @@ -312,6 +312,7 @@ LATCH_DEF(MAJOR_FREEZE_DIAGNOSE_LOCK, 299, "major freeze diagnose lock", LATCH_R LATCH_DEF(HB_RESPONSES_LOCK, 300, "hb responses lock", LATCH_READ_PREFER, 2000, 0, HB_RESPONSES_LOCK_WAIT, "hb responses lock") LATCH_DEF(ALL_SERVERS_INFO_IN_TABLE_LOCK, 301, "all servers info in table lock", LATCH_READ_PREFER, 2000, 0, ALL_SERVERS_INFO_IN_TABLE_LOCK_WAIT, "all servers info in table lock") LATCH_DEF(OPT_STAT_GATHER_STAT_LOCK, 302, "optimizer stat gather stat lock", LATCH_FIFO, 2000, 0, OPT_STAT_GATHER_STAT_LOCK_WAIT, "optimizer stat gather stat lock") +LATCH_DEF(DUP_TABLET_LOCK, 303, "dup tablet lock", LATCH_FIFO, 2000, 0, DUP_TABLET_LOCK_WAIT, "dup tablet lock") LATCH_DEF(LATCH_END, 99999, "latch end", LATCH_FIFO, 2000, 0, WAIT_EVENT_END, "latch end") #endif diff --git a/deps/oblib/src/lib/wait_event/ob_wait_event.h b/deps/oblib/src/lib/wait_event/ob_wait_event.h index 244048ebf4..897e8283d6 100644 --- a/deps/oblib/src/lib/wait_event/ob_wait_event.h +++ b/deps/oblib/src/lib/wait_event/ob_wait_event.h @@ -363,6 +363,7 @@ WAIT_EVENT_DEF(MAX_APPLY_SCN_WAIT, 16053, "max apply scn lock wait", "", "", "", WAIT_EVENT_DEF(GC_HANDLER_WAIT, 16054, "gc handler lock wait", "", "", "", CONCURRENCY, "GC_HANDLER_WAIT", true) WAIT_EVENT_DEF(FREEZE_THREAD_POOL_WAIT, 16055, "freeze thread pool wait", "", "", "", CONCURRENCY, "FREEZE_THREAD_POOL_WAIT", true) WAIT_EVENT_DEF(DDL_EXECUTE_LOCK_WAIT, 16056, "ddl execute lock wait", "", "", "", CONCURRENCY, "DDL_EXECUTE_LOCK_WAIT", true) +WAIT_EVENT_DEF(DUP_TABLET_LOCK_WAIT, 16057, "dup tablet lock wait", "", "", "", CONCURRENCY, "DUP_TABLET_LOCK_WAIT", true) // WAIT_EVENT_DEF(TENANT_MGR_TENANT_BUCKET_LOCK_WAIT, 16056, "tenant mgr tenant bucket lock wait", "", "", "", CONCURRENCY, "TENANT_MGR_TENANT_BUCKET_LOCK_WAIT", true) diff --git a/deps/oblib/src/rpc/obrpc/ob_rpc_packet_list.h b/deps/oblib/src/rpc/obrpc/ob_rpc_packet_list.h index c7e2c1ae8e..17957c1226 100644 --- a/deps/oblib/src/rpc/obrpc/ob_rpc_packet_list.h +++ b/deps/oblib/src/rpc/obrpc/ob_rpc_packet_list.h @@ -617,9 +617,9 @@ PCODE_DEF(OB_CHANGE_LEADER, 0x708) PCODE_DEF(OB_GET_GTS_REQUEST, 0x710) PCODE_DEF(OB_GET_GTS_RESPONSE, 0x711) PCODE_DEF(OB_GET_GTS_ERR_RESPONSE, 0x712) -PCODE_DEF(OB_REDO_LOG_SYNC_REQUEST, 0x713) -PCODE_DEF(OB_REDO_LOG_SYNC_RESPONSE, 0x714) -PCODE_DEF(OB_DUP_TABLE_LEASE_RESPONSE, 0x715) +PCODE_DEF(OB_DUP_TABLE_TS_SYNC_REQUEST, 0x713) +PCODE_DEF(OB_DUP_TABLE_TS_SYNC_RESPONSE, 0x714) +PCODE_DEF(OB_DUP_TABLE_BEFORE_PREPARE_REQUEST, 0x715) PCODE_DEF(OB_WRS_GET_CLUSTER_VERSION, 0x716) PCODE_DEF(OB_WRS_CLUSTER_HEARTBEAT, 0x717) PCODE_DEF(OB_HA_GTS_PING_REQUEST, 0x718) diff --git a/mittest/CMakeLists.txt b/mittest/CMakeLists.txt index 7b3a6bb374..dbefd6a913 100644 --- a/mittest/CMakeLists.txt +++ b/mittest/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(logservice) add_subdirectory(simple_server) add_subdirectory(mtlenv) +add_subdirectory(multi_replica) diff --git a/mittest/logservice/env/ob_simple_log_cluster_env.cpp b/mittest/logservice/env/ob_simple_log_cluster_env.cpp old mode 100644 new mode 100755 index e19f2ed1c7..30439b9493 --- a/mittest/logservice/env/ob_simple_log_cluster_env.cpp +++ b/mittest/logservice/env/ob_simple_log_cluster_env.cpp @@ -269,7 +269,8 @@ int ObSimpleLogClusterTestEnv::create_paxos_group(const int64_t id, const PalfBa } else { handle->set_location_cache_cb(loc_cb); const ObMemberList &member_list = get_member_list(); - handle->set_initial_member_list(member_list, member_list.get_member_number()); + GlobalLearnerList learner_list; + handle->set_initial_member_list(member_list, member_list.get_member_number(), learner_list); handle->set_paxos_member_region_map(get_member_region_map()); CLOG_LOG(INFO, "set_initial_member_list success", K(id), "addr", svr->get_addr(), K(member_list)); } @@ -306,6 +307,7 @@ int ObSimpleLogClusterTestEnv::create_paxos_group_with_arb( // if member_cnt_ is 3, arb_replica_idx should be 0,1,2 ObMemberList member_list = get_member_list(); ObMember arb_replica; + GlobalLearnerList learner_list; arb_replica_idx = 2; for (int i = 0; i < get_cluster().size(); i++) { auto svr = get_cluster()[i]; @@ -331,7 +333,7 @@ int ObSimpleLogClusterTestEnv::create_paxos_group_with_arb( break; } else if (OB_FAIL(svr->get_palf_env()->create_palf_handle_impl(id, palf::AccessMode::APPEND, palf_base_info, handle))) { CLOG_LOG(WARN, "create_palf_handle_impl failed", K(ret), K(id), KPC(svr)); - } else if (!svr->is_arb_server() && OB_FAIL(handle->set_initial_member_list(member_list, arb_replica, get_member_cnt()-1))) { + } else if (!svr->is_arb_server() && OB_FAIL(handle->set_initial_member_list(member_list, arb_replica, get_member_cnt()-1, learner_list))) { CLOG_LOG(ERROR, "set_initial_member_list failed", K(ret), K(id), KPC(svr)); } else { handle->set_location_cache_cb(loc_cb); diff --git a/mittest/logservice/test_ob_simple_arb_server_mutil_replica.cpp b/mittest/logservice/test_ob_simple_arb_server_mutil_replica.cpp index edd38ca0c8..01ba23aad5 100644 --- a/mittest/logservice/test_ob_simple_arb_server_mutil_replica.cpp +++ b/mittest/logservice/test_ob_simple_arb_server_mutil_replica.cpp @@ -103,6 +103,7 @@ TEST_F(TestObSimpleMutilArbServer, create_mutil_tenant) obrpc::ObSetMemberListArgV2 memberlist_result; const ObMemberList member_list = get_arb_member_list(); const ObMember arb_member = get_arb_member(); + const GlobalLearnerList learner_list; EXPECT_EQ(true, arb_member.is_valid()); rootserver::ObSetMemberListProxy proxy(rpc_proxy, &obrpc::ObSrvRpcProxy::set_member_list); @@ -112,7 +113,8 @@ TEST_F(TestObSimpleMutilArbServer, create_mutil_tenant) ObLSID(1), 2, member_list, - arb_member)); + arb_member, + learner_list)); proxy.call(dst_addr, 1000*1000, cluster_id, 1001, memberlist_arg); proxy.wait(); diff --git a/mittest/logservice/test_ob_simple_arb_server_single_replica.cpp b/mittest/logservice/test_ob_simple_arb_server_single_replica.cpp old mode 100644 new mode 100755 index eb893382c7..f1eecdac78 --- a/mittest/logservice/test_ob_simple_arb_server_single_replica.cpp +++ b/mittest/logservice/test_ob_simple_arb_server_single_replica.cpp @@ -143,12 +143,13 @@ TEST_F(TestObSimpleMutilArbServer, out_interface) ObSimpleArbServer *arb_server = dynamic_cast(iserver); ObTenantRole tenant_role(ObTenantRole::PRIMARY_TENANT); int64_t cluster_id = 1; + GlobalLearnerList learner_list; arbserver::GCMsgEpoch epoch = arbserver::GCMsgEpoch(1, 1); EXPECT_EQ(OB_ARBITRATION_SERVICE_ALREADY_EXIST, arb_server->palf_env_mgr_.add_cluster( iserver->get_addr(), cluster_id, "arbserver_test", epoch)); EXPECT_EQ(OB_ENTRY_NOT_EXIST, arb_server->palf_env_mgr_.set_initial_member_list( palflite::PalfEnvKey(cluster_id, 1), arb_server->self_, - 1000, get_member_list(), member, get_member_cnt())); + 1000, get_member_list(), member, get_member_cnt(), learner_list)); EXPECT_EQ(OB_SUCCESS, arb_server->palf_env_mgr_.create_arbitration_instance( palflite::PalfEnvKey(cluster_id, 1), arb_server->self_, 1000, tenant_role)); @@ -157,12 +158,12 @@ TEST_F(TestObSimpleMutilArbServer, out_interface) 1000, tenant_role)); EXPECT_EQ(OB_NOT_SUPPORTED, arb_server->palf_env_mgr_.set_initial_member_list( palflite::PalfEnvKey(cluster_id, 1), arb_server->self_, - 1000, get_member_list(), member, get_member_cnt())); + 1000, get_member_list(), member, get_member_cnt(), learner_list)); ObMemberList member_list = get_member_list(); member_list.add_server(arb_server->self_); EXPECT_EQ(OB_NOT_SUPPORTED, arb_server->palf_env_mgr_.set_initial_member_list( palflite::PalfEnvKey(cluster_id, 1), arb_server->self_, - 1000, member_list, member, get_member_cnt())); + 1000, member_list, member, get_member_cnt(), learner_list)); EXPECT_EQ(OB_SUCCESS, arb_server->palf_env_mgr_.delete_arbitration_instance( palflite::PalfEnvKey(cluster_id, 1), arb_server->self_, 1000)); palflite::PalfEnvLite *palf_env_lite = NULL; diff --git a/mittest/logservice/test_ob_simple_log_access_mode.cpp b/mittest/logservice/test_ob_simple_log_access_mode.cpp index 0502f7c64d..fb261ebd8c 100644 --- a/mittest/logservice/test_ob_simple_log_access_mode.cpp +++ b/mittest/logservice/test_ob_simple_log_access_mode.cpp @@ -87,6 +87,15 @@ TEST_F(TestObSimpleLogClusterAccessMode, basic_change_access_mode) EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_access_mode(mode_version, curr_access_mode)); EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->get_role(unused_role, curr_proposal_id, state)); EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->change_access_mode(curr_proposal_id, mode_version, AccessMode::APPEND, ref_scn)); + // check all member's applied access_mode + sleep(1); + std::vector palf_list; + EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list)); + EXPECT_EQ(palf::AccessMode::APPEND, palf_list[0]->palf_handle_impl_->mode_mgr_.applied_mode_meta_.access_mode_); + EXPECT_EQ(palf::AccessMode::APPEND, palf_list[1]->palf_handle_impl_->mode_mgr_.applied_mode_meta_.access_mode_); + EXPECT_EQ(palf::AccessMode::APPEND, palf_list[2]->palf_handle_impl_->mode_mgr_.applied_mode_meta_.access_mode_); + revert_cluster_palf_handle_guard(palf_list); + std::vector lsn_array; std::vector scn_arrary; EXPECT_EQ(OB_SUCCESS, submit_log(leader, 50, id, lsn_array, scn_arrary)); @@ -223,7 +232,7 @@ TEST_F(TestObSimpleLogClusterAccessMode, add_member) EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_learner(ObMember(get_cluster()[3]->get_addr(), 1), CONFIG_CHANGE_TIMEOUT)); sleep(2); - EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_learner_to_acceptor(ObMember(get_cluster()[3]->get_addr(), 1), CONFIG_CHANGE_TIMEOUT)); + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_learner_to_acceptor(ObMember(get_cluster()[3]->get_addr(), 1), 4, CONFIG_CHANGE_TIMEOUT)); unblock_net(leader_idx, follower2_idx); revert_cluster_palf_handle_guard(palf_list); } diff --git a/mittest/logservice/test_ob_simple_log_arb.cpp b/mittest/logservice/test_ob_simple_log_arb.cpp index 7c99c9e58e..cbc622e561 100644 --- a/mittest/logservice/test_ob_simple_log_arb.cpp +++ b/mittest/logservice/test_ob_simple_log_arb.cpp @@ -358,8 +358,13 @@ TEST_F(TestObSimpleLogClusterArbService, test_2f1a_config_change) CONFIG_CHANGE_TIMEOUT)); // switch learner + EXPECT_EQ(OB_INVALID_ARGUMENT, leader.palf_handle_impl_->switch_learner_to_acceptor( + ObMember(palf_list[4]->palf_handle_impl_->self_, 1), + 2, + CONFIG_CHANGE_TIMEOUT)); EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_learner_to_acceptor( ObMember(palf_list[4]->palf_handle_impl_->self_, 1), + 3, CONFIG_CHANGE_TIMEOUT)); revert_cluster_palf_handle_guard(palf_list); leader.reset(); diff --git a/mittest/logservice/test_ob_simple_log_config_change.cpp b/mittest/logservice/test_ob_simple_log_config_change.cpp index 15960b3fdc..b29138eb0f 100644 --- a/mittest/logservice/test_ob_simple_log_config_change.cpp +++ b/mittest/logservice/test_ob_simple_log_config_change.cpp @@ -333,7 +333,7 @@ TEST_F(TestObSimpleLogClusterConfigChange, test_basic_config_change) CONFIG_CHANGE_TIMEOUT)); // switch acceptor to learner - EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_acceptor_to_learner(ObMember(palf_list[5]->palf_handle_impl_->self_, 1), CONFIG_CHANGE_TIMEOUT)); + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_acceptor_to_learner(ObMember(palf_list[5]->palf_handle_impl_->self_, 1), 3, CONFIG_CHANGE_TIMEOUT)); // add learner EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_learner(ObMember(palf_list[3]->palf_handle_impl_->self_, 1), CONFIG_CHANGE_TIMEOUT)); EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_learner(ObMember(palf_list[4]->palf_handle_impl_->self_, 1), CONFIG_CHANGE_TIMEOUT)); @@ -391,7 +391,7 @@ TEST_F(TestObSimpleLogClusterConfigChange, test_replace_member) CONFIG_CHANGE_TIMEOUT)); // switch acceptor to learner - EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_acceptor_to_learner(ObMember(palf_list[5]->palf_handle_impl_->self_, 1), CONFIG_CHANGE_TIMEOUT)); + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_acceptor_to_learner(ObMember(palf_list[5]->palf_handle_impl_->self_, 1), 3, CONFIG_CHANGE_TIMEOUT)); // add learner EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_learner(ObMember(palf_list[3]->palf_handle_impl_->self_, 1), CONFIG_CHANGE_TIMEOUT)); EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_learner(ObMember(palf_list[4]->palf_handle_impl_->self_, 1), CONFIG_CHANGE_TIMEOUT)); @@ -404,8 +404,6 @@ TEST_F(TestObSimpleLogClusterConfigChange, test_replace_member) PALF_LOG(INFO, "end test replace_member", K(id)); } -// TODO: config_mgr need support location_cb to get leader for learner -/* TEST_F(TestObSimpleLogClusterConfigChange, learner) { SET_CASE_LOG_FILE(TEST_NAME, "learner"); @@ -424,15 +422,13 @@ TEST_F(TestObSimpleLogClusterConfigChange, learner) region_list.push_back(ObRegion("SHANGHAI")); region_list.push_back(ObRegion("TIANJIN")); region_list.push_back(ObRegion("SHENZHEN")); + region_list.push_back(ObRegion("GUANGZHOU")); const ObMemberList &node_list = get_node_list(); const int64_t CONFIG_CHANGE_TIMEOUT = 10 * 1000 * 1000L; // 10s EXPECT_EQ(OB_SUCCESS, create_paxos_group(id, &loc_cb, leader_idx, leader)); EXPECT_EQ(OB_SUCCESS, get_cluster_palf_handle_guard(id, palf_list)); - PalfHandleGuard new_leader; - int64_t new_leader_idx; - EXPECT_EQ(OB_SUCCESS, get_leader(id, new_leader, new_leader_idx)); - loc_cb.leader_ = get_cluster()[new_leader_idx]->get_addr(); - PALF_LOG(INFO, "set leader for loc_cb", "leader", get_cluster()[new_leader_idx]->get_addr()); + loc_cb.leader_ = get_cluster()[leader_idx]->get_addr(); + PALF_LOG(INFO, "set leader for loc_cb", "leader", get_cluster()[leader_idx]->get_addr()); EXPECT_EQ(OB_SUCCESS, submit_log(leader, 100, id)); // case 1: set region and switch_acceptor_to_learner @@ -449,9 +445,11 @@ TEST_F(TestObSimpleLogClusterConfigChange, learner) while (false == check_children_valid(palf_list, all_learner)) { sleep(1); + PALF_LOG(INFO, "check_children_valid 1"); } // change region of one follower bool has_change_region = false; + int64_t diff_region_follower_idx = -1; int64_t another_follower_idx = -1; for (int i = 0; i < ObSimpleLogClusterTestBase::member_cnt_; i++) { const bool not_leader = palf_list[i]->palf_handle_impl_->self_ != leader.palf_handle_impl_->self_; @@ -459,6 +457,7 @@ TEST_F(TestObSimpleLogClusterConfigChange, learner) EXPECT_EQ(OB_SUCCESS, palf_list[i]->palf_handle_impl_->set_region(region_list[0])); region_map.insert(palf_list[i]->palf_handle_impl_->self_, region_list[0]); has_change_region = true; + diff_region_follower_idx = i; } else { if (not_leader) { another_follower_idx = i; @@ -472,6 +471,7 @@ TEST_F(TestObSimpleLogClusterConfigChange, learner) while (false == check_children_valid(palf_list, all_learner)) { sleep(1); + PALF_LOG(INFO, "check_children_valid 2"); } // after setting region of a follower, parents of all learners should be another follower EXPECT_GE(another_follower_idx, 0); @@ -480,6 +480,7 @@ TEST_F(TestObSimpleLogClusterConfigChange, learner) while (false == check_parent(palf_list, all_learner, curr_parent)) { sleep(1); + PALF_LOG(INFO, "check_parent 1"); } // continue submitting log EXPECT_EQ(OB_SUCCESS, submit_log(leader, 20, id)); @@ -488,12 +489,13 @@ TEST_F(TestObSimpleLogClusterConfigChange, learner) // switch current unique parent to learner EXPECT_EQ(OB_SUCCESS, all_learner.add_learner(LogLearner(curr_parent, 1))); - EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_acceptor_to_learner(ObMember(curr_parent, 1), CONFIG_CHANGE_TIMEOUT)); + EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->switch_acceptor_to_learner(ObMember(curr_parent, 1), 2, CONFIG_CHANGE_TIMEOUT)); // after switch follower 1 to learner, a learner will be registered to leader, and other learners will // be registerd to this learner while (false == check_children_valid(palf_list, all_learner)) { sleep(1); + PALF_LOG(INFO, "check_children_valid 3"); } // check learner topology ObAddr leaderschild; @@ -507,35 +509,47 @@ TEST_F(TestObSimpleLogClusterConfigChange, learner) EXPECT_TRUE(expect_children.learner_addr_equal(leaderschild_handle.palf_handle_impl_->config_mgr_.children_)); EXPECT_EQ(OB_SUCCESS, submit_log(leader, 20, id)); // EXPECT_EQ(OB_SUCCESS, check_log_sync(palf_list, get_member_list(), all_learner, leader)); + // learners' regions are different from paxos member, so parent of all learners is leader + // set regions + for (int64_t i = 3; i < ObSimpleLogClusterTestBase::node_cnt_; ++i) { + PalfHandleImplGuard tmp_handle; + common::ObMember learner; + EXPECT_EQ(OB_SUCCESS, node_list.get_member_by_index(i, learner)); + EXPECT_EQ(OB_SUCCESS, get_palf_handle_guard(palf_list, learner.get_server(), tmp_handle)); + EXPECT_EQ(OB_SUCCESS, tmp_handle.palf_handle_impl_->set_region(region_list[i-2])); + } + sleep(1); + // check children_cnt + while (false == check_children_valid(palf_list, all_learner)) + { + sleep(1); + PALF_LOG(INFO, "check_children_valid 4"); + } + while (false == check_parent(palf_list, all_learner, leader.palf_handle_impl_->self_)) + { + sleep(1); + PALF_LOG(INFO, "check_parent 2"); + } + + // switch leader, after switching leader, the parent of all learners is the new leader + const int64_t new_leader_idx = diff_region_follower_idx; + PalfHandleImplGuard new_leader; + EXPECT_EQ(OB_SUCCESS, switch_leader(id, 0, new_leader)); + + while (false == check_children_valid(palf_list, all_learner)) + { + sleep(1); + PALF_LOG(INFO, "check_children_valid 5"); + } + while (false == check_parent(palf_list, all_learner, new_leader.palf_handle_impl_->self_)) + { + sleep(1); + PALF_LOG(INFO, "check_parent 3"); + } + revert_cluster_palf_handle_guard(palf_list); PALF_LOG(INFO, "end test learner", K(id)); - // TODO by yunlong:: after mit test supports sync RPC and switch_leader func become stable, - // add switch_acceptor_to_learner case and switch_leader case - // // case 2: switch leader - // // learners' regions are different from paxos member, so parent of all learners is leader - // // set regions - // for (int64_t i = 3; i < 7; ++i) { - // PalfHandleImplGuard tmp_handle; - // common::ObMember added_learner; - // EXPECT_EQ(OB_SUCCESS, node_list.get_member_by_index(i, added_learner)); - // EXPECT_EQ(OB_SUCCESS, all_learner.add_learner(LogLearner(added_learner.get_server(), region_list[i - 3]))); - // EXPECT_EQ(OB_SUCCESS, get_palf_handle_guard(id, added_learner.get_server(), tmp_handle)); - // EXPECT_EQ(OB_SUCCESS, tmp_handle.set_region(region_list[i-3])); - // EXPECT_EQ(OB_SUCCESS, leader.palf_handle_impl_->add_learner(added_learner, CONFIG_CHANGE_TIMEOUT)); - // } - // sleep(1); - // // check children_cnt - // check_children_valid(palf_list, all_learner); - // check_parent(palf_list, leader.palf_handle_impl_->self_); - // // switch leader - // const int64_t new_leader_idx = 1; - // PalfHandleImplGuard new_leader; - // switch_leader(id, new_leader_idx, new_leader); - // sleep(5); - // check_children_valid(palf_list, all_learner); - // check_parent(palf_list, new_leader.palf_handle_impl_->self_); } -*/ } // end unittest } // end oceanbase diff --git a/mittest/logservice/test_ob_simple_log_flashback.cpp b/mittest/logservice/test_ob_simple_log_flashback.cpp index 9c377d2279..3150faf90c 100644 --- a/mittest/logservice/test_ob_simple_log_flashback.cpp +++ b/mittest/logservice/test_ob_simple_log_flashback.cpp @@ -64,7 +64,8 @@ int ObLogFlashbackService::get_ls_list_(const uint64_t tenant_id, share::ObLSStatusInfo ls_status; int64_t palf_id = -1; palf_handle.get_palf_id(palf_id); - if (OB_FAIL(ls_status.init(tenant_id, share::ObLSID(palf_id), 1, share::ObLSStatus::OB_LS_NORMAL, 1, "z1"))) { + share::ObLSFlag flag(share::ObLSFlag::NORMAL_FLAG); + if (OB_FAIL(ls_status.init(tenant_id, share::ObLSID(palf_id), 1, share::ObLSStatus::OB_LS_NORMAL, 1, "z1", flag))) { CLOG_LOG(WARN, "ls_status init failed", K(ret), K(palf_id)); } else if (OB_FAIL(ls_array.push_back(ls_status))) { CLOG_LOG(WARN, "ls_array push_back failed", K(ret), K(palf_id)); diff --git a/mittest/logservice/test_ob_simple_log_replay.cpp b/mittest/logservice/test_ob_simple_log_replay.cpp index 88d68bc460..34443299a7 100644 --- a/mittest/logservice/test_ob_simple_log_replay.cpp +++ b/mittest/logservice/test_ob_simple_log_replay.cpp @@ -133,7 +133,7 @@ TEST_F(TestObSimpleLogReplayFunc, basic_replay) rp_sv.init(palf_env, &ls_adapter, get_cluster()[0]->get_allocator()); rp_sv.start(); get_cluster()[0]->get_tenant_base()->update_thread_cnt(10); - EXPECT_EQ(OB_SUCCESS, rp_sv.add_ls(ls_id, ObReplicaType::REPLICA_TYPE_FULL)); + EXPECT_EQ(OB_SUCCESS, rp_sv.add_ls(ls_id)); EXPECT_EQ(OB_SUCCESS, rp_sv.enable(ls_id, basic_lsn, basic_scn)); { ObReplayStatusGuard guard; @@ -178,7 +178,7 @@ TEST_F(TestObSimpleLogReplayFunc, basic_replay) PalfHandleImplGuard leader_shadow; SCN first_new_scn = SCN::min_scn(); EXPECT_EQ(OB_SUCCESS, create_paxos_group(id_shadow, leader_idx, leader_shadow)); - EXPECT_EQ(OB_SUCCESS, rp_sv.add_ls(ls_id_shadow, ObReplicaType::REPLICA_TYPE_FULL)); + EXPECT_EQ(OB_SUCCESS, rp_sv.add_ls(ls_id_shadow)); EXPECT_EQ(OB_ITER_END, read_log(leader)); EXPECT_EQ(OB_SUCCESS, change_access_mode_to_raw_write(leader_shadow)); EXPECT_EQ(OB_ITER_END, read_and_submit_group_log(leader, leader_shadow)); @@ -239,7 +239,7 @@ TEST_F(TestObSimpleLogReplayFunc, test_flashback_to_padding) CLOG_LOG(INFO, "get_file_end_lsn", K(*iterator_end_lsn_ptr)); return *iterator_end_lsn_ptr; }; - EXPECT_EQ(OB_SUCCESS, rp_sv.add_ls(ls_id, ObReplicaType::REPLICA_TYPE_FULL)); + EXPECT_EQ(OB_SUCCESS, rp_sv.add_ls(ls_id)); EXPECT_EQ(OB_SUCCESS, rp_sv.enable(ls_id, basic_lsn, basic_scn)); { ObReplayStatusGuard guard; @@ -404,7 +404,7 @@ TEST_F(TestObSimpleLogReplayFunc, test_wait_replay_done) return *iterator_end_lsn_ptr; }; iterator_end_lsn = LSN(PALF_BLOCK_SIZE); - EXPECT_EQ(OB_SUCCESS, rp_sv.add_ls(ls_id, ObReplicaType::REPLICA_TYPE_FULL)); + EXPECT_EQ(OB_SUCCESS, rp_sv.add_ls(ls_id)); EXPECT_EQ(OB_SUCCESS, rp_sv.enable(ls_id, basic_lsn, basic_scn)); { ObReplayStatusGuard guard; diff --git a/mittest/mtlenv/storage/test_ls_restore_task_mgr.cpp b/mittest/mtlenv/storage/test_ls_restore_task_mgr.cpp index 57472e9733..2858add377 100644 --- a/mittest/mtlenv/storage/test_ls_restore_task_mgr.cpp +++ b/mittest/mtlenv/storage/test_ls_restore_task_mgr.cpp @@ -109,7 +109,6 @@ public: FakeLS(const int64_t &ls_id) { ls_meta_.tenant_id_ = 1001; ls_meta_.ls_id_ = ObLSID(ls_id); - ls_meta_.replica_type_ = common::ObReplicaType::REPLICA_TYPE_FULL; ls_meta_.migration_status_ = storage::ObMigrationStatus::OB_MIGRATION_STATUS_NONE; ls_meta_.gc_state_ = logservice::LSGCState::NORMAL; ls_meta_.restore_status_ = ObLSRestoreStatus::Status::RESTORE_TABLETS_META; diff --git a/mittest/mtlenv/storage/test_ls_service.cpp b/mittest/mtlenv/storage/test_ls_service.cpp index 3027a9048e..a2618426f8 100644 --- a/mittest/mtlenv/storage/test_ls_service.cpp +++ b/mittest/mtlenv/storage/test_ls_service.cpp @@ -189,8 +189,10 @@ TEST_F(TestLSService, tablet_test) EXPECT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, handle, ObLSGetMod::STORAGE_MOD)); ls = handle.get_ls(); ASSERT_NE(nullptr, ls); + GlobalLearnerList learner_list; ASSERT_EQ(OB_SUCCESS, ls->set_initial_member_list(member_list, - paxos_replica_num)); + paxos_replica_num, + learner_list)); for (int i=0;i<15;i++) { ObRole role; diff --git a/mittest/mtlenv/storage/test_memtable_v2.cpp b/mittest/mtlenv/storage/test_memtable_v2.cpp index e472e992bd..761a663cdb 100644 --- a/mittest/mtlenv/storage/test_memtable_v2.cpp +++ b/mittest/mtlenv/storage/test_memtable_v2.cpp @@ -139,7 +139,7 @@ public: // mock sequence no ObClockGenerator::init(); // mock tx table - ObTxPalfParam palf_param((logservice::ObLogHandler *)(0x01)); + ObTxPalfParam palf_param((logservice::ObLogHandler *)(0x01), (ObDupTableLSHandler *)(0x02)); EXPECT_EQ(OB_SUCCESS, ls_tx_ctx_mgr_.init(tenant_id_, /*tenant_id*/ ls_id_, diff --git a/mittest/mtlenv/storage/test_table_scan_pure_data_table.cpp b/mittest/mtlenv/storage/test_table_scan_pure_data_table.cpp index 57aa47f872..9ce21dfef8 100644 --- a/mittest/mtlenv/storage/test_table_scan_pure_data_table.cpp +++ b/mittest/mtlenv/storage/test_table_scan_pure_data_table.cpp @@ -21,6 +21,13 @@ namespace oceanbase { +namespace transaction { + int ObTransService::gen_trans_id_(ObTransID &trans_id) { + trans_id = ObTransID(1001); + return OB_SUCCESS; + } +} + namespace storage { class TestTableScanPureDataTable : public ::testing::Test @@ -53,8 +60,8 @@ TestTableScanPureDataTable::TestTableScanPureDataTable() void TestTableScanPureDataTable::SetUpTestCase() { ASSERT_EQ(OB_SUCCESS, MockTenantModuleEnv::get_instance().init()); - MTL(transaction::ObTransService*)->tx_desc_mgr_.tx_id_allocator_ = - [](transaction::ObTransID &tx_id) { tx_id = transaction::ObTransID(1001); return OB_SUCCESS; }; + // MTL(transaction::ObTransService*)->tx_desc_mgr_.tx_id_allocator_ = + // [](transaction::ObTransID &tx_id) { tx_id = transaction::ObTransID(1001); return OB_SUCCESS; }; SAFE_DESTROY_INSTANCE.init(); SAFE_DESTROY_INSTANCE.start(); ObServerCheckpointSlogHandler::get_instance().is_started_ = true; diff --git a/mittest/mtlenv/storage/test_trans.cpp b/mittest/mtlenv/storage/test_trans.cpp index 60b7eb0ba7..ece2256b41 100644 --- a/mittest/mtlenv/storage/test_trans.cpp +++ b/mittest/mtlenv/storage/test_trans.cpp @@ -138,8 +138,10 @@ void TestTrans::create_ls(uint64_t tenant_id, ObLSID &ls_id, ObLS *&ls) ObMemberList member_list; int64_t paxos_replica_num = 1; (void) member_list.add_server(MockTenantModuleEnv::get_instance().self_addr_); + GlobalLearnerList learner_list; ASSERT_EQ(OB_SUCCESS, ls->set_initial_member_list(member_list, - paxos_replica_num)); + paxos_replica_num, + learner_list)); // check leader LOG_INFO("check leader"); diff --git a/mittest/mtlenv/test_tx_data_table.cpp b/mittest/mtlenv/test_tx_data_table.cpp index d4fabd1103..1195a01008 100644 --- a/mittest/mtlenv/test_tx_data_table.cpp +++ b/mittest/mtlenv/test_tx_data_table.cpp @@ -674,7 +674,6 @@ void TestTxDataTable::fake_ls_(ObLS &ls) ls.ls_meta_.gc_state_ = logservice::LSGCState::NORMAL; ls.ls_meta_.migration_status_ = ObMigrationStatus::OB_MIGRATION_STATUS_NONE; ls.ls_meta_.restore_status_ = ObLSRestoreStatus::RESTORE_NONE; - ls.ls_meta_.replica_type_ = ObReplicaType::REPLICA_TYPE_FULL; ls.ls_meta_.rebuild_seq_ = 0; } diff --git a/mittest/multi_replica/CMakeLists.txt b/mittest/multi_replica/CMakeLists.txt new file mode 100644 index 0000000000..22c82cba36 --- /dev/null +++ b/mittest/multi_replica/CMakeLists.txt @@ -0,0 +1,25 @@ +set(OBSERVER_TEST_SRCS + env/ob_simple_replica.cpp + # env/ob_simple_server_restart_helper.cpp + env/ob_multi_replica_test_base.cpp + ) + +add_library(simple_replica_test ${OBSERVER_TEST_SRCS}) + +target_include_directories( + simple_replica_test PUBLIC + ${CMAKE_SOURCE_DIR}/unittest ${CMAKE_SOURCE_DIR}/mittest) + target_link_libraries(simple_replica_test PUBLIC + oceanbase + ) + +function(ob_unittest_multi_replica case) + ob_unittest(${ARGV}) + target_link_libraries(${case} PRIVATE gtest gmock simple_replica_test oceanbase) + set_tests_properties(${case} PROPERTIES TIMEOUT 360) + set_tests_properties(${case} PROPERTIES LABELS "simple_replica") +endfunction() + +ob_unittest_multi_replica(test_ob_multi_replica_basic) +ob_unittest_multi_replica(test_ob_dup_table_basic) +ob_unittest_multi_replica(test_ob_dup_table_leader_switch) diff --git a/mittest/multi_replica/env/ob_fast_bootstrap.h b/mittest/multi_replica/env/ob_fast_bootstrap.h new file mode 100644 index 0000000000..eb0e6d8ca7 --- /dev/null +++ b/mittest/multi_replica/env/ob_fast_bootstrap.h @@ -0,0 +1,212 @@ +#pragma once + +#include "lib/ob_define.h" +#include "lib/profile/ob_trace_id.h" +#include "rootserver/ob_bootstrap.h" +#include "share/schema/ob_schema_service.h" +#include "share/schema/ob_schema_getter_guard.h" +#include "share/schema/ob_multi_version_schema_service.h" +#include "share/schema/ob_ddl_sql_service.h" +#include "share/schema/ob_schema_service_sql_impl.h" + +#include + +namespace oceanbase +{ +namespace rootserver +{ + +int batch_create_schema_local(uint64_t tenant_id, + ObDDLService &ddl_service, + ObIArray &table_schemas, + const int64_t begin, const int64_t end) +{ + int ret = OB_SUCCESS; + const int64_t begin_time = ObTimeUtility::current_time(); + if (begin < 0 || begin >= end || end > table_schemas.count()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(begin), K(end), "table count", table_schemas.count()); + } else { + ObDDLOperator ddl_operator(ddl_service.get_schema_service(), ddl_service.get_sql_proxy()); + ObMySQLTransaction trans(true); + if (OB_FAIL(trans.start(&ddl_service.get_sql_proxy(), tenant_id))) { + LOG_WARN("start transaction failed", KR(ret)); + } else { + for (int64_t idx = begin;idx < end && OB_SUCC(ret); idx++) { + ObTableSchema &table = table_schemas.at(idx); + const ObString *ddl_stmt = NULL; + bool need_sync_schema_version = !(ObSysTableChecker::is_sys_table_index_tid(table.get_table_id()) || + is_sys_lob_table(table.get_table_id())); + int64_t start_time = ObTimeUtility::current_time(); + if (OB_FAIL(ddl_operator.create_table(table, trans, ddl_stmt, + need_sync_schema_version, + false))) { + LOG_WARN("add table schema failed", K(ret), + "table_id", table.get_table_id(), + "table_name", table.get_table_name()); + } else { + int64_t end_time = ObTimeUtility::current_time(); + LOG_INFO("add table schema succeed", K(idx), + "table_id", table.get_table_id(), + "table_name", table.get_table_name(), "core_table", is_core_table(table.get_table_id()), "cost", end_time-start_time); + } + } + } + if (trans.is_started()) { + const bool is_commit = (OB_SUCCESS == ret); + int tmp_ret = trans.end(is_commit); + if (OB_SUCCESS != tmp_ret) { + LOG_WARN("end trans failed", K(tmp_ret), K(is_commit)); + ret = (OB_SUCCESS == ret) ? tmp_ret : ret; + } else { + } + } + } + + const int64_t now = ObTimeUtility::current_time(); + LOG_INFO("batch create schema finish", K(ret), "table_count", end - begin, "total_time_used", now - begin_time); + //BOOTSTRAP_CHECK_SUCCESS(); + return ret; +} + +int parallel_create_table_schema(uint64_t tenant_id, ObDDLService &ddl_service, ObIArray &table_schemas) +{ + int ret = OB_SUCCESS; + int64_t begin = 0; + int64_t batch_count = table_schemas.count() / 16; + const int64_t MAX_RETRY_TIMES = 10; + int64_t finish_cnt = 0; + std::vector ths; + ObCurTraceId::TraceId *cur_trace_id = ObCurTraceId::get_trace_id(); + for (int64_t i = 0; OB_SUCC(ret) && i < table_schemas.count(); ++i) { + if (table_schemas.count() == (i + 1) || (i + 1 - begin) >= batch_count) { + std::thread th([&, begin, i, cur_trace_id] () { + int ret = OB_SUCCESS; + ObCurTraceId::set(*cur_trace_id); + int64_t retry_times = 1; + while (OB_SUCC(ret)) { + if (OB_FAIL(batch_create_schema_local(tenant_id, ddl_service, table_schemas, begin, i + 1))) { + LOG_WARN("batch create schema failed", K(ret), "table count", i + 1 - begin); + // bugfix: + if (retry_times <= MAX_RETRY_TIMES) { + retry_times++; + ret = OB_SUCCESS; + LOG_INFO("schema error while create table, need retry", KR(ret), K(retry_times)); + usleep(1 * 1000 * 1000L); // 1s + } + } else { + ATOMIC_AAF(&finish_cnt, i + 1 - begin); + break; + } + } + LOG_INFO("worker job", K(begin), K(i), K(i-begin), K(ret)); + }); + ths.push_back(std::move(th)); + if (OB_SUCC(ret)) { + begin = i + 1; + } + } + } + for (auto &th : ths) { + th.join(); + } + if (finish_cnt != table_schemas.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("parallel_create_table_schema fail", K(finish_cnt), K(table_schemas.count()), K(ret), K(tenant_id)); + } + return ret; +} + +int ObBootstrap::create_all_schema(ObDDLService &ddl_service, + ObIArray &table_schemas) +{ + int ret = OB_SUCCESS; + const int64_t begin_time = ObTimeUtility::current_time(); + LOG_INFO("start create all schemas", "table count", table_schemas.count()); + if (table_schemas.count() <= 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("table_schemas is empty", K(table_schemas), K(ret)); + } else { + // persist __all_core_table's schema in inner table, which is only used for sys views. + HEAP_VAR(ObTableSchema, core_table) { + ObArray tmp_tables; + if (OB_FAIL(share::ObInnerTableSchema::all_core_table_schema(core_table))) { + LOG_WARN("fail to construct __all_core_table's schema", KR(ret), K(core_table)); + } else if (OB_FAIL(tmp_tables.push_back(core_table))) { + LOG_WARN("fail to push back __all_core_table's schema", KR(ret), K(core_table)); + } else if (OB_FAIL(batch_create_schema_local(OB_SYS_TENANT_ID, ddl_service, tmp_tables, 0, 1))) { + LOG_WARN("fail to create __all_core_table's schema", KR(ret), K(core_table)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(parallel_create_table_schema(OB_SYS_TENANT_ID, ddl_service, table_schemas))) { + LOG_WARN("create_all_schema", K(ret)); + } + } + LOG_INFO("end create all schemas", K(ret), "table count", table_schemas.count(), + "time_used", ObTimeUtility::current_time() - begin_time); + return ret; +} + +/* +int ObDDLService::create_sys_table_schemas( + ObDDLOperator &ddl_operator, + ObMySQLTransaction &trans, + common::ObIArray &tables) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(check_inner_stat())) { + LOG_WARN("variable is not init", KR(ret)); + } else if (OB_ISNULL(sql_proxy_) || OB_ISNULL(schema_service_) || tables.count() == 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ptr is null", KR(ret), KP_(sql_proxy), KP_(schema_service)); + } else if (OB_FAIL(parallel_create_table_schema(tables.at(0).get_tenant_id(), *this, tables))) { + LOG_WARN("create_sys_table_schemas", K(ret)); + } + return ret; +} +*/ + + +} // end rootserver + +namespace share +{ +namespace schema +{ + +common::SpinRWLock lock_for_schema_version; +int ObSchemaServiceSQLImpl::gen_new_schema_version( + uint64_t tenant_id, + int64_t refreshed_schema_version, + int64_t &schema_version) +{ + int ret = OB_SUCCESS; + schema_version = OB_INVALID_VERSION; + SpinWLockGuard guard(lock_for_schema_version); + if (OB_INVALID_TENANT_ID == tenant_id) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid tenant_id", K(ret), K(tenant_id)); + } else { + if (is_sys_tenant(tenant_id)) { + if (OB_FAIL(gen_leader_sys_schema_version(tenant_id, schema_version))) { + LOG_WARN("failed to gen leader sys tenant_id schema version", K(ret), K(tenant_id)); + } + } else { + // normal tenant + if (OB_FAIL(gen_leader_normal_schema_version(tenant_id, refreshed_schema_version, schema_version))) { + LOG_WARN("failed to gen leader normal schema version", K(ret), K(tenant_id), K(refreshed_schema_version)); + } + + } + } + if (OB_FAIL(ret)) { + } else { + LOG_INFO("new schema version", K(schema_version), "this", OB_P(this)); + } + return ret; +} +} +} + +} // end oceanbase diff --git a/mittest/multi_replica/env/ob_multi_replica_test_base.cpp b/mittest/multi_replica/env/ob_multi_replica_test_base.cpp new file mode 100644 index 0000000000..c9acc4a25c --- /dev/null +++ b/mittest/multi_replica/env/ob_multi_replica_test_base.cpp @@ -0,0 +1,709 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include "ob_multi_replica_test_base.h" +#include "lib/ob_errno.h" +#include "lib/oblog/ob_log.h" +#include "lib/profile/ob_trace_id.h" +#include "lib/time/ob_time_utility.h" +#include "lib/utility/ob_defer.h" +#include "logservice/palf/election/utils/election_common_define.h" + +namespace oceanbase +{ +namespace unittest +{ + +int set_trace_id(char *buf) { return ObCurTraceId::get_trace_id()->set(buf); } + +void init_log_and_gtest(int argc, char **argv) +{ + if (argc < 1) { + abort(); + } + + std::string app_name = argv[0]; + app_name = app_name.substr(app_name.find_last_of("/\\") + 1); + std::string app_log_name = app_name + ".log"; + std::string app_rs_log_name = app_name + "_rs.log"; + std::string app_ele_log_name = app_name + "_election.log"; + std::string app_gtest_log_name = app_name + "_gtest.log"; + std::string app_trace_log_name = app_name + "_trace.log"; + + system(("rm -rf " + app_log_name + "*").c_str()); + system(("rm -rf " + app_rs_log_name + "*").c_str()); + system(("rm -rf " + app_ele_log_name + "*").c_str()); + system(("rm -rf " + app_gtest_log_name + "*").c_str()); + system(("rm -rf " + app_trace_log_name + "*").c_str()); + system(("rm -rf " + app_name + "_*").c_str()); + + init_gtest_output(app_gtest_log_name); + OB_LOGGER.set_file_name(app_log_name.c_str(), true, false, app_rs_log_name.c_str(), + app_ele_log_name.c_str(), app_trace_log_name.c_str()); +} + +void init_gtest_output(std::string >est_log_name) +{ + // 判断是否处于Farm中 + char *mit_network_start_port_env = getenv("mit_network_start_port"); + char *mit_network_port_num_env = getenv("mit_network_port_num"); + if (mit_network_start_port_env != nullptr && mit_network_port_num_env != nullptr) { + std::string gtest_file_name = gtest_log_name; + int fd = open(gtest_file_name.c_str(), O_RDWR | O_CREAT, 0666); + if (fd == 0) { + ob_abort(); + } + dup2(fd, STDOUT_FILENO); + dup2(fd, STDERR_FILENO); + } +} + +uint32_t get_local_addr(const char *dev_name) +{ + int fd, intrface; + struct ifreq buf[16]; + struct ifconf ifc; + + if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { + return 0; + } + + ifc.ifc_len = sizeof(buf); + ifc.ifc_buf = (caddr_t)buf; + if (ioctl(fd, SIOCGIFCONF, (char *)&ifc) != 0) { + close(fd); + return 0; + } + + intrface = static_cast(ifc.ifc_len / sizeof(struct ifreq)); + while (intrface-- > 0) { + if (ioctl(fd, SIOCGIFFLAGS, (char *)&buf[intrface]) != 0) { + continue; + } + if ((buf[intrface].ifr_flags & IFF_LOOPBACK) != 0) + continue; + if (!(buf[intrface].ifr_flags & IFF_UP)) + continue; + if (dev_name != NULL && strcmp(dev_name, buf[intrface].ifr_name)) + continue; + if (!(ioctl(fd, SIOCGIFADDR, (char *)&buf[intrface]))) { + close(fd); + return ((struct sockaddr_in *)(&buf[intrface].ifr_addr))->sin_addr.s_addr; + } + } + close(fd); + return 0; +} + +std::string get_local_ip() +{ + uint32_t ip = get_local_addr("bond0"); + if (ip == 0) { + ip = get_local_addr("eth0"); + } + if (ip == 0) { + return ""; + } + return inet_ntoa(*(struct in_addr *)(&ip)); +} + +const char *ObMultiReplicaTestBase::log_disk_size_ = "10G"; +const char *ObMultiReplicaTestBase::memory_size_ = "10G"; +std::shared_ptr ObMultiReplicaTestBase::replica_ = nullptr; +bool ObMultiReplicaTestBase::is_started_ = false; +bool ObMultiReplicaTestBase::is_inited_ = false; +std::string ObMultiReplicaTestBase::env_prefix_; +std::string ObMultiReplicaTestBase::curr_dir_; +std::string ObMultiReplicaTestBase::env_prefix_path_; +std::string ObMultiReplicaTestBase::event_file_path_; +bool ObMultiReplicaTestBase::enable_env_warn_log_ = false; + +std::vector ObMultiReplicaTestBase::rpc_ports_; +ObServerInfoList ObMultiReplicaTestBase::server_list_; +std::string ObMultiReplicaTestBase::rs_list_; + +std::string ObMultiReplicaTestBase::local_ip_; +int ObMultiReplicaTestBase::child_pid_ = -1; +int ObMultiReplicaTestBase::child_pid2_ = -1; +int ObMultiReplicaTestBase::cur_zone_id_ = 0; + +bool ObMultiReplicaTestBase::block_msg_ = false; + +ObMultiReplicaTestBase::ObMultiReplicaTestBase() +{ +} + +ObMultiReplicaTestBase::~ObMultiReplicaTestBase() {} + +int ObMultiReplicaTestBase::bootstrap_multi_replica(const std::string &env_prefix) +{ + int ret = OB_SUCCESS; + + if (!is_inited_) { + env_prefix_ = + env_prefix + "_test_data"; //+ std::to_string(ObTimeUtility::current_time()) + "_"; + curr_dir_ = get_current_dir_name(); + env_prefix_path_ = curr_dir_ + "/" + env_prefix_; + event_file_path_ = env_prefix_path_ + "/" + CLUSTER_EVENT_FILE_NAME; + if (OB_FAIL(init_replicas_())) { + SERVER_LOG(WARN, "init multi replica failed.", KR(ret)); + } + } + + if (OB_FAIL(ret)) { + // do nothing + } else if (!is_started_) { + if (OB_FAIL(start())) { + SERVER_LOG(WARN, "start multi replica failed.", KR(ret)); + sleep(5); + abort(); + } + } + return ret; +} + +int ObMultiReplicaTestBase::wait_all_test_completed() +{ + int ret = OB_SUCCESS; + std::string zone_str = "ZONE" + std::to_string(cur_zone_id_); + if (OB_FAIL(finish_event(TEST_CASE_FINSH_EVENT_PREFIX + zone_str, zone_str))) { + + } else { + for (int i = 1; i <= MAX_ZONE_COUNT && OB_SUCC(ret); i++) { + zone_str = "ZONE" + std::to_string(i); + if (OB_FAIL( + wait_event_finish(TEST_CASE_FINSH_EVENT_PREFIX + zone_str, zone_str, INT64_MAX))) { + + fprintf(stdout, "[WAIT EVENT] wait target event failed : ret = %d, zone_str = %s\n", ret, + zone_str.c_str()); + } + } + SERVER_LOG(INFO, "ObMultiReplicaTestBase [WAIT EVENT] find all finish event", K(ret), + K(cur_zone_id_), K(TEST_CASE_FINSH_EVENT_PREFIX)); + fprintf(stdout, + "[WAIT EVENT] wait all test case successfully, ret = %d, cur_zone_id = %d, " + "MAX_ZONE_COUNT = %d\n", + ret, cur_zone_id_, MAX_ZONE_COUNT); + } + if (cur_zone_id_ == 1) { + int status = 0; + int status2 = 0; + waitpid(child_pid_, &status, 0); + waitpid(child_pid2_, &status2, 0); + if (0 != status || 0 != status2) { + fprintf(stdout, + "Child process exit with error code : [%d]%d, [%d]%d\n", + child_pid_, status, child_pid2_, status2); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] Child process exit with error code", K(child_pid_), + K(status), K(child_pid2_), K(status2)); + ret = status; + return ret; + } else { + fprintf(stdout, + "Child process run all test cases done. [%d]%d, [%d]%d\n", + child_pid_, status, child_pid2_, status2); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] Child process run all test cases done", + K(child_pid_), K(status), K(child_pid2_), K(status2)); + } + } + return ret; +} + +void ObMultiReplicaTestBase::SetUp() +{ + std::string cur_test_case_name = ::testing::UnitTest::GetInstance()->current_test_case()->name(); + std::string cur_test_info_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] SetUp", K(cur_test_case_name.c_str()), + K(cur_test_info_name.c_str())); +} + +void ObMultiReplicaTestBase::TearDown() +{ + std::string cur_test_case_name = ::testing::UnitTest::GetInstance()->current_test_case()->name(); + std::string cur_test_info_name = ::testing::UnitTest::GetInstance()->current_test_info()->name(); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] TearDown", K(cur_test_case_name.c_str()), + K(cur_test_info_name.c_str())); +} + +void ObMultiReplicaTestBase::SetUpTestCase() +{ + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] SetUpTestCase"); +} + +void ObMultiReplicaTestBase::TearDownTestCase() +{ + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] TearDownTestCase"); + + int ret = OB_SUCCESS; + + // fprintf(stdout, ">>>>>>> AFTER RUN TEST: pid = %d\n", getpid()); + if (OB_FAIL(oceanbase::unittest::ObMultiReplicaTestBase::wait_all_test_completed())) { + fprintf(stdout, "wait test case completed failed. ret = %d", ret); + } + if (OB_NOT_NULL(replica_)) { + // ret = close(); + // ASSERT_EQ(ret, OB_SUCCESS); + } + int fail_cnt = ::testing::UnitTest::GetInstance()->failed_test_case_count(); + if (chdir(curr_dir_.c_str()) == 0) { + bool to_delete = true; + if (to_delete) { + // system((std::string("rm -rf ") + env_prefix_ + std::string("*")).c_str()); + } + } + _Exit(fail_cnt); +} + +int ObMultiReplicaTestBase::init_replicas_() +{ + SERVER_LOG(INFO, "init simple cluster test base"); + int ret = OB_SUCCESS; + + system(("rm -rf " + env_prefix_).c_str()); + + SERVER_LOG(INFO, "create dir and change work dir start.", K(env_prefix_.c_str())); + if (OB_FAIL(mkdir(env_prefix_.c_str(), 0777))) { + } else if (OB_FAIL(chdir(env_prefix_.c_str()))) { + } else { + const char *current_dir = env_prefix_.c_str(); + SERVER_LOG(INFO, "create dir and change work dir done.", K(current_dir)); + } + + if (OB_SUCC(ret)) { + local_ip_ = get_local_ip(); + if (local_ip_ == "") { + SERVER_LOG(WARN, "get_local_ip failed"); + return -666666666; + } + } + + // mkdir + std::vector dirs; + rs_list_.clear(); + rpc_ports_.clear(); + server_list_.reset(); + + int server_fd = 0; + for (int i = 1; i <= MAX_ZONE_COUNT && OB_SUCC(ret); i++) { + std::string zone_dir = "zone" + std::to_string(i); + ret = mkdir(zone_dir.c_str(), 0777); + std::string data_dir = zone_dir + "/store"; + dirs.push_back(data_dir); + dirs.push_back(zone_dir + "/run"); + dirs.push_back(zone_dir + "/etc"); + dirs.push_back(zone_dir + "/log"); + dirs.push_back(zone_dir + "/wallet"); + + dirs.push_back(data_dir + "/clog"); + dirs.push_back(data_dir + "/slog"); + dirs.push_back(data_dir + "/sstable"); + + int64_t tmp_port = observer::ObSimpleServerReplica::get_rpc_port(server_fd); + rpc_ports_.push_back(tmp_port); + + rs_list_ += local_ip_ + ":" + std::to_string(rpc_ports_[i - 1]) + ":" + + std::to_string(rpc_ports_[i - 1] + 1); + + if (i < MAX_ZONE_COUNT) { + rs_list_ += ";"; + } + + obrpc::ObServerInfo server_info; + server_info.zone_ = zone_dir.c_str(); + server_info.server_ = + common::ObAddr(common::ObAddr::IPV4, local_ip_.c_str(), rpc_ports_[i - 1]); + server_info.region_ = "sys_region"; + server_list_.push_back(server_info); + } + + if (OB_SUCC(ret)) { + for (auto &dir : dirs) { + ret = mkdir(dir.c_str(), 0777); + if (OB_FAIL(ret)) { + SERVER_LOG(ERROR, "ObSimpleServerReplica mkdir", K(ret), K(dir.c_str())); + return ret; + } + } + } + + if (OB_SUCC(ret)) { + + child_pid_ = fork(); + child_pid2_ = -1; + + if (child_pid_ < 0) { + perror("fork"); + exit(EXIT_FAILURE); + } else if (child_pid_ > 0) { + child_pid2_ = fork(); + if (child_pid2_ > 0) { + ret = init_test_replica_(1); + } else if (child_pid2_ == 0) { + ret = init_test_replica_(3); + } + } else if (child_pid_ == 0) { + ret = init_test_replica_(2); + } + } + + is_inited_ = true; + return ret; +} + +int ObMultiReplicaTestBase::init_test_replica_(const int zone_id) +{ + int ret = OB_SUCCESS; + + ::testing::GTEST_FLAG(filter) = ObMultiReplicaTestBase::ZONE_TEST_CASE_NAME[zone_id - 1] + "*"; + // std::string output_file_path = + // env_prefix_path_ + "/" + "ZONE" + std::to_string(zone_id) + ".output"; + // ::testing::GTEST_FLAG(output) = output_file_path; + fprintf(stdout, "zone %d test_case_name = %s\n", zone_id, + ObMultiReplicaTestBase::ZONE_TEST_CASE_NAME[zone_id - 1].c_str()); + + if (replica_ == nullptr) { + cur_zone_id_ = zone_id; + replica_ = std::make_shared( + "zone" + std::to_string(zone_id), zone_id, rpc_ports_[zone_id - 1], rs_list_, server_list_, + oceanbase::observer::ObServer::get_instance(), "./store", log_disk_size_, memory_size_); + } else { + SERVER_LOG(ERROR, "construct ObSimpleServerReplica repeatedlly", K(ret), K(zone_id), + K(rpc_ports_[zone_id - 1]), K(rs_list_.c_str())); + } + + if (replica_ != nullptr) { + int ret = replica_->simple_init(); + if (OB_FAIL(ret)) { + SERVER_LOG(ERROR, "init replica failed", K(ret), K(zone_id)); + } + } + return ret; +} + +int ObMultiReplicaTestBase::read_cur_json_document_(rapidjson::Document &json_doc) +{ + int ret = OB_SUCCESS; + FILE *fp = fopen(event_file_path_.c_str(), "r"); + if (fp == NULL) { + if(json_doc.IsObject()) + { + fprintf(stdout, "Fail to open file! file_path = %s\n", event_file_path_.c_str()); + } + ret = OB_ENTRY_NOT_EXIST; + return ret; + } + + char read_buffer[2 * 1024 * 1024]; + rapidjson::FileReadStream rs(fp, read_buffer, sizeof(read_buffer)); + + json_doc.ParseStream(rs); + + fclose(fp); + + return OB_SUCCESS; +} + +int ObMultiReplicaTestBase::wait_event_finish(const std::string &event_name, + std::string &event_content, + int64_t wait_timeout_ms, + int64_t retry_interval_ms) +{ + int ret = OB_SUCCESS; + + bool find_event = false; + int64_t start_time = ObTimeUtility::fast_current_time(); + + while (OB_SUCC(ret) && !find_event) { + + rapidjson::Document json_doc; + + if (OB_FAIL(read_cur_json_document_(json_doc))) { + SERVER_LOG(WARN, "read existed json document failed", K(ret)); + if (ret == OB_ENTRY_NOT_EXIST) { + ret = OB_SUCCESS; + } + } else { + rapidjson::Value::ConstMemberIterator iter = json_doc.FindMember(event_name.c_str()); + if (iter == json_doc.MemberEnd()) { + + SERVER_LOG(WARN, "[ObMultiReplicaTestBase] [WAIT EVENT] not find target event", K(ret), + K(event_name.c_str())); + ret = OB_SUCCESS; + } else { + find_event = true; + event_content = std::string(iter->value.GetString(), iter->value.GetStringLength()); + fprintf(stdout, "[WAIT EVENT] find target event : EVENT_KEY = %s; EVENT_VAL = %s\n", + event_name.c_str(), iter->value.GetString()); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] [WAIT EVENT] find target event", + K(event_name.c_str()), K(iter->value.GetString())); + } + } + + if (!find_event) { + if (wait_timeout_ms != INT64_MAX + && ObTimeUtility::fast_current_time() - start_time > wait_timeout_ms * 1000) { + ret = OB_TIMEOUT; + break; + } else { + ob_usleep(retry_interval_ms * 1000); + } + } else { + break; + } + } + + return ret; +} + +int ObMultiReplicaTestBase::finish_event(const std::string &event_name, + const std::string &event_content) +{ + int ret = OB_SUCCESS; + + rapidjson::Document json_doc; + json_doc.Parse("{}"); + + if (OB_FAIL(read_cur_json_document_(json_doc))) { + SERVER_LOG(WARN, "read existed json document failed", K(ret)); + if (ret == OB_ENTRY_NOT_EXIST) { + ret = OB_SUCCESS; + } + } + + if (OB_SUCC(ret)) { + FILE *fp = fopen(event_file_path_.c_str(), "w"); + char write_buffer[2 * 1024 * 1024]; + rapidjson::FileWriteStream file_w_stream(fp, write_buffer, sizeof(write_buffer)); + rapidjson::PrettyWriter prettywriter(file_w_stream); + json_doc.AddMember(rapidjson::StringRef(event_name.c_str(), event_name.size()), + rapidjson::StringRef(event_content.c_str(), event_content.size()), + json_doc.GetAllocator()); + json_doc.Accept(prettywriter); + fclose(fp); + } + + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] [WAIT EVENT] write target event", K(event_name.c_str()), + K(event_content.c_str())); + return ret; +} + +int ObMultiReplicaTestBase::start() +{ + SERVER_LOG(INFO, "start simple cluster test base"); + OB_LOGGER.set_enable_log_limit(false); + // oceanbase::palf::election::GLOBAL_INIT_ELECTION_MODULE(); + // oceanbase::palf::election::INIT_TS = 1; + // oceanbase::palf::election::MAX_TST = 100 * 1000; + GCONF.enable_perf_event = false; + GCONF.enable_sql_audit = true; + GCONF.enable_record_trace_log = false; + GMEMCONF.set_server_memory_limit(10 * 1024 * 1024 * 1024ul); + + int32_t log_level; + bool change_log_level = false; + if (enable_env_warn_log_) { + if (OB_LOGGER.get_log_level() > OB_LOG_LEVEL_WARN) { + change_log_level = true; + log_level = OB_LOGGER.get_log_level(); + OB_LOGGER.set_log_level("WARN"); + } + } + + int ret = replica_->simple_start(); + is_started_ = true; + if (change_log_level) { + OB_LOGGER.set_log_level(log_level); + } + return ret; +} + +int ObMultiReplicaTestBase::close() +{ + int ret = OB_SUCCESS; + if (OB_NOT_NULL(replica_)) { + ret = replica_->simple_close(); + } + return ret; +} + +int ObMultiReplicaTestBase::create_tenant(const char *tenant_name, + const char *memory_size, + const char *log_disk_size, + const bool oracle_mode) +{ + SERVER_LOG(INFO, "create tenant start"); + int32_t log_level; + bool change_log_level = false; + if (enable_env_warn_log_) { + if (OB_LOGGER.get_log_level() > OB_LOG_LEVEL_WARN) { + change_log_level = true; + log_level = OB_LOGGER.get_log_level(); + OB_LOGGER.set_log_level("WARN"); + } + } + int ret = OB_SUCCESS; + common::ObMySQLProxy &sql_proxy = replica_->get_sql_proxy(); + int64_t affected_rows = 0; + { + ObSqlString sql; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(sql.assign_fmt("set session ob_trx_timeout=1000000000000;"))) { + SERVER_LOG(WARN, "set session", K(ret)); + } else if (OB_FAIL(sql_proxy.write(sql.ptr(), affected_rows))) { + SERVER_LOG(WARN, "set session", K(ret)); + } + } + { + ObSqlString sql; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(sql.assign_fmt("set session ob_query_timeout=1000000000000;"))) { + SERVER_LOG(WARN, "set session", K(ret)); + } else if (OB_FAIL(sql_proxy.write(sql.ptr(), affected_rows))) { + SERVER_LOG(WARN, "set session", K(ret)); + } + } + { + ObSqlString sql; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(sql.assign_fmt("create resource unit box_ym_%s max_cpu 2, memory_size '%s', " + "log_disk_size='%s';", + tenant_name, memory_size, log_disk_size))) { + SERVER_LOG(WARN, "create_tenant", K(ret)); + } else if (OB_FAIL(sql_proxy.write(sql.ptr(), affected_rows))) { + SERVER_LOG(WARN, "create_tenant", K(ret)); + } + } + { + ObSqlString sql; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(sql.assign_fmt("create resource pool pool_ym_%s unit = 'box_ym_%s', " + "unit_num = 1, zone_list = ('zone1', 'zone2', 'zone3');", + tenant_name, tenant_name))) { + SERVER_LOG(WARN, "create_tenant", K(ret)); + } else if (OB_FAIL(sql_proxy.write(sql.ptr(), affected_rows))) { + SERVER_LOG(WARN, "create_tenant", K(ret)); + } + } + { + ObSqlString sql; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(sql.assign_fmt( + "create tenant %s replica_num = 3, primary_zone='zone1', " + "resource_pool_list=('pool_ym_%s') set ob_tcp_invited_nodes='%%'%s", + tenant_name, tenant_name, + oracle_mode ? ", ob_compatibility_mode='oracle'" : ";"))) { + SERVER_LOG(WARN, "create_tenant", K(ret)); + } else if (OB_FAIL(sql_proxy.write(sql.ptr(), affected_rows))) { + SERVER_LOG(WARN, "create_tenant", K(ret)); + } + } + if (change_log_level) { + OB_LOGGER.set_log_level(log_level); + } + SERVER_LOG(INFO, "create tenant finish", K(ret)); + return ret; +} + +int ObMultiReplicaTestBase::delete_tenant(const char *tenant_name) +{ + ObSqlString sql; + common::ObMySQLProxy &sql_proxy = replica_->get_sql_proxy(); + sql.assign_fmt("drop tenant %s force", tenant_name); + + int64_t affected_rows = 0; + return sql_proxy.write(sql.ptr(), affected_rows); +} + +int ObMultiReplicaTestBase::get_tenant_id(uint64_t &tenant_id, const char *tenant_name) +{ + SERVER_LOG(INFO, "get_tenant_id"); + int ret = OB_SUCCESS; + ObSqlString sql; + common::ObMySQLProxy &sql_proxy = replica_->get_sql_proxy(); + sql.assign_fmt("select tenant_id from oceanbase.__all_tenant where tenant_name = '%s'", + tenant_name); + SMART_VAR(ObMySQLProxy::MySQLResult, res) + { + if (OB_FAIL(sql_proxy.read(res, sql.ptr()))) { + SERVER_LOG(WARN, "get_tenant_id", K(ret)); + } else { + sqlclient::ObMySQLResult *result = res.get_result(); + if (result != nullptr && OB_SUCC(result->next())) { + ret = result->get_uint("tenant_id", tenant_id); + SERVER_LOG(WARN, "get_tenant_id", K(ret)); + } else { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "get_tenant_id", K(ret)); + } + } + } + return ret; +} + +int ObMultiReplicaTestBase::exec_write_sql_sys(const char *sql_str, int64_t &affected_rows) +{ + int ret = OB_SUCCESS; + ObSqlString sql; + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy(); + return sql_proxy.write(sql_str, affected_rows); +} + +int ObMultiReplicaTestBase::check_tenant_exist(bool &bool_ret, const char *tenant_name) +{ + int ret = OB_SUCCESS; + bool_ret = true; + uint64_t tenant_id; + if (OB_FAIL(get_tenant_id(tenant_id, tenant_name))) { + SERVER_LOG(WARN, "get_tenant_id failed", K(ret)); + } else { + ObSqlString sql; + common::ObMySQLProxy &sql_proxy = replica_->get_sql_proxy(); + sql.assign_fmt("select tenant_id from oceanbase.gv$ob_units where tenant_id= '%" PRIu64 "' ", + tenant_id); + SMART_VAR(ObMySQLProxy::MySQLResult, res) + { + if (OB_FAIL(sql_proxy.read(res, sql.ptr()))) { + SERVER_LOG(WARN, "get gv$ob_units", K(ret)); + } else { + sqlclient::ObMySQLResult *result = res.get_result(); + if (result != nullptr && OB_SUCC(result->next())) { + bool_ret = true; + } else if (result == nullptr) { + bool_ret = false; + } else { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "get_tenant_id", K(ret)); + } + } + } + } + return ret; +} + + +} // namespace unittest +} // namespace oceanbase + + +int ::oceanbase::omt::ObWorkerProcessor::process_err_test() +{ + int ret = OB_SUCCESS; + + if(ATOMIC_LOAD(&::oceanbase::unittest::ObMultiReplicaTestBase::block_msg_)) + { + ret =OB_EAGAIN; + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] block msg process",K(ret)); + } + + return ret; +} diff --git a/mittest/multi_replica/env/ob_multi_replica_test_base.h b/mittest/multi_replica/env/ob_multi_replica_test_base.h new file mode 100644 index 0000000000..7fe1c22d4d --- /dev/null +++ b/mittest/multi_replica/env/ob_multi_replica_test_base.h @@ -0,0 +1,111 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#pragma once + +#include + +#include "ob_simple_replica.h" +#include +#include +#include +#include +#include + +#define MAX_ZONE_COUNT 3 +#define CLUSTER_EVENT_FILE_NAME "CLUSTER_EVENT" +#define EVENT_KV_SEPARATOR " = " +#define TEST_CASE_FINSH_EVENT_PREFIX "FINISH_TEST_CASE_FOR_" + +namespace oceanbase +{ +namespace unittest +{ + +int set_trace_id(char *buf); +void init_log_and_gtest(int argc, char **argv); +void init_gtest_output(std::string >est_log_name); + +class ObMultiReplicaTestBase : public testing::Test +{ +public: + static const int64_t TRANS_TIMEOUT = 5 * 1000 * 1000; + // set_bootstrap_and_create_tenant_warn_log 默认bootstrap和创建租户使用WARN日志,加速启动 + ObMultiReplicaTestBase(); + virtual ~ObMultiReplicaTestBase(); + + static int bootstrap_multi_replica(const std::string &env_prefix = "run_"); + static int wait_all_test_completed(); + static int start(); + static int close(); + observer::ObServer &get_curr_observer() { return replica_->get_observer(); } + observer::ObSimpleServerReplica &get_curr_simple_server() { return *replica_; } + + static int read_cur_json_document_(rapidjson::Document & json_doc); + static int wait_event_finish(const std::string &event_name, + std::string &event_content, + int64_t wait_timeout_ms, + int64_t retry_interval_ms = 1 * 1000); + static int finish_event(const std::string &event_name, const std::string &event_content); + + int create_tenant(const char *tenant_name = DEFAULT_TEST_TENANT_NAME, + const char *memory_size = "2G", + const char *log_disk_size = "2G", + const bool oracle_mode = false); + int delete_tenant(const char *tenant_name = DEFAULT_TEST_TENANT_NAME); + int get_tenant_id(uint64_t &tenant_id, const char *tenant_name = DEFAULT_TEST_TENANT_NAME); + int exec_write_sql_sys(const char *sql_str, int64_t &affected_rows); + int check_tenant_exist(bool &bool_ret, const char *tenant_name = DEFAULT_TEST_TENANT_NAME); + + static std::string ZONE_TEST_CASE_NAME[MAX_ZONE_COUNT]; + +protected: + static int init_replicas_(); + static int init_test_replica_(const int zone_id); + +protected: + virtual void SetUp(); + virtual void TearDown(); + static void SetUpTestCase(); + static void TearDownTestCase(); + +protected: + // 因为ob_server.h 中ObServer的使用方式导致现在只能启动单台 + static std::shared_ptr replica_; + static bool is_started_; + static bool is_inited_; + static std::thread th_; + static std::string env_prefix_; + static std::string curr_dir_; + static std::string event_file_path_; + static std::string env_prefix_path_; + static bool enable_env_warn_log_; + + static const char *log_disk_size_; + static const char *memory_size_; + + static std::string local_ip_; + + static int cur_zone_id_; + static int child_pid_; + static int child_pid2_; + + static std::vector rpc_ports_; + static ObServerInfoList server_list_; + static std::string rs_list_; + +public: + static bool block_msg_; +}; + +} // namespace unittest +} // namespace oceanbase diff --git a/mittest/multi_replica/env/ob_multi_replica_util.h b/mittest/multi_replica/env/ob_multi_replica_util.h new file mode 100644 index 0000000000..62f17f37fd --- /dev/null +++ b/mittest/multi_replica/env/ob_multi_replica_util.h @@ -0,0 +1,287 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "ob_multi_replica_test_base.h" +#include "storage/tx/ob_trans_define.h" +#include "storage/tx/ob_trans_part_ctx.h" +#include "storage/tx_storage/ob_ls_service.h" + +#ifndef OCEANBASE_MULTI_REPLICA_TEST_UTIL +#define OCEANBASE_MULTI_REPLICA_TEST_UTIL + +// need define TEST_CASE_NAME + +#define ZONE_TEST_CASE_CALSS_NAME_INNER(TEST_CASE_NAME, ZONE_ID) TEST_CASE_NAME##_ZONE##ZONE_ID +#define ZONE_TEST_CASE_CALSS_NAME(TEST_CASE_NAME, ZONE_ID) \ + ZONE_TEST_CASE_CALSS_NAME_INNER(TEST_CASE_NAME, ZONE_ID) + +#define GET_ZONE_TEST_CLASS_NAME(ZONE_ID) ZONE_TEST_CASE_CALSS_NAME(CUR_TEST_CASE_NAME, ZONE_ID) + +#define STR_NAME_INNER(x) #x +#define STR_NAME(x) STR_NAME_INNER(x) + +#define GET_ZONE_TEST_CLASS_STR(ZONE_ID) \ + STR_NAME(ZONE_TEST_CASE_CALSS_NAME(CUR_TEST_CASE_NAME, ZONE_ID)) + +#define DEFINE_MULTI_ZONE_TEST_CASE_CLASS \ + namespace oceanbase \ + { \ + namespace unittest \ + { \ + std::string ObMultiReplicaTestBase::ZONE_TEST_CASE_NAME[MAX_ZONE_COUNT] = { \ + GET_ZONE_TEST_CLASS_STR(1), GET_ZONE_TEST_CLASS_STR(2), GET_ZONE_TEST_CLASS_STR(3)}; \ + \ + class GET_ZONE_TEST_CLASS_NAME(1) : public ObMultiReplicaTestBase \ + { \ + public: \ + GET_ZONE_TEST_CLASS_NAME(1)() : ObMultiReplicaTestBase() {} \ + }; \ + \ + class GET_ZONE_TEST_CLASS_NAME(2) : public ObMultiReplicaTestBase \ + { \ + public: \ + GET_ZONE_TEST_CLASS_NAME(2)() : ObMultiReplicaTestBase() {} \ + }; \ + class GET_ZONE_TEST_CLASS_NAME(3) : public ObMultiReplicaTestBase \ + { \ + public: \ + GET_ZONE_TEST_CLASS_NAME(3)() : ObMultiReplicaTestBase() {} \ + }; \ + TEST_F(GET_ZONE_TEST_CLASS_NAME(1), start_observer) {} \ + TEST_F(GET_ZONE_TEST_CLASS_NAME(2), start_observer) {} \ + TEST_F(GET_ZONE_TEST_CLASS_NAME(3), start_observer) {} \ + } \ + } + +#define MULTI_REPLICA_TEST_MAIN_FUNCTION(TEST_DIR_PREFIX) \ + int main(int argc, char **argv) \ + { \ + int ret = OB_SUCCESS; \ + char *log_level = (char *)"INFO"; \ + oceanbase::unittest::init_log_and_gtest(argc, argv); \ + OB_LOGGER.set_log_level(log_level); \ + ::testing::InitGoogleTest(&argc, argv); \ + if (OB_FAIL(oceanbase::unittest::ObMultiReplicaTestBase::bootstrap_multi_replica( \ + #TEST_DIR_PREFIX))) { \ + fprintf(stdout, "init test case failed. ret = %d", ret); \ + return ret; \ + } \ + return RUN_ALL_TESTS(); \ + } + +namespace oceanbase +{ +namespace unittest +{ + +#define CREATE_TEST_TENANT(tenant_id) \ + uint64_t tenant_id; \ + SERVER_LOG(INFO, "create_tenant start"); \ + ASSERT_EQ(OB_SUCCESS, create_tenant()); \ + ASSERT_EQ(OB_SUCCESS, get_tenant_id(tenant_id)); \ + ASSERT_EQ(OB_SUCCESS, get_curr_simple_server().init_sql_proxy2()); \ + SERVER_LOG(INFO, "create_tenant end", K(tenant_id)); + +#define GET_LS(tenant_id, ls_id_num, ls_handle) \ + ObLSHandle ls_handle; \ + { \ + share::ObTenantSwitchGuard tenant_guard; \ + ASSERT_EQ(OB_SUCCESS, tenant_guard.switch_to(tenant_id)); \ + ObLSService *ls_svr = MTL(ObLSService *); \ + ASSERT_NE(nullptr, ls_svr); \ + share::ObLSID ls_id(ls_id_num); \ + ASSERT_EQ(OB_SUCCESS, ls_svr->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD)); \ + ASSERT_NE(nullptr, ls_handle.get_ls()); \ + } + +#define BLOCK_MSG_PROCESSOR(timeout_us) \ + { \ + int ret = OB_SUCCESS; \ + ATOMIC_STORE(&block_msg_, true); \ + ob_usleep(timeout_us); \ + ATOMIC_STORE(&block_msg_, false); \ + } + +#define ACQUIRE_CONN_FROM_SQL_PROXY(CONN_NAME, SQL_PROXY) \ + sqlclient::ObISQLConnection *CONN_NAME = nullptr; \ + ASSERT_EQ(OB_SUCCESS, SQL_PROXY.acquire(CONN_NAME)); \ + ASSERT_NE(nullptr, CONN_NAME); + +#define WRITE_SQL_BY_CONN_INNER(conn, sql_str, ret) \ + { \ + ObSqlString sql; \ + int64_t affected_rows = 0; \ + ASSERT_EQ(OB_SUCCESS, sql.assign(sql_str)); \ + ret = conn->execute_write(OB_SYS_TENANT_ID, sql.ptr(), affected_rows); \ + SERVER_LOG(INFO, "TEST WRITE SQL: ", K(ret), K(sql)); \ + } + +#define WRITE_SQL_BY_CONN(conn, sql_str) \ + { \ + int res_ret = OB_SUCCESS; \ + WRITE_SQL_BY_CONN_INNER(conn, sql_str, res_ret); \ + ASSERT_EQ(res_ret, OB_SUCCESS); \ + } + +#define READ_SQL_BY_CONN(conn, result, sql_str) \ + sqlclient::ObMySQLResult *result = nullptr; \ + ObISQLClient::ReadResult read_res_##result; \ + { \ + ObSqlString sql; \ + ASSERT_EQ(OB_SUCCESS, sql.assign(sql_str)); \ + SERVER_LOG(INFO, "TEST READ SQL: ", K(sql)); \ + ASSERT_EQ(OB_SUCCESS, conn->execute_read(OB_SYS_TENANT_ID, sql.ptr(), read_res_##result)); \ + result = read_res_##result.get_result(); \ + ASSERT_EQ(true, OB_NOT_NULL(result)); \ + } + +#define GET_RUNNGING_TRX_ID(conn, tx_id) \ + { \ + ASSERT_EQ(true, conn != nullptr); \ + std::string sql_str = "select TRANS_ID from oceanbase.V$OB_PROCESSLIST where ID = " \ + + std::to_string(conn->get_sessid()); \ + READ_SQL_BY_CONN(conn, process_result, sql_str.c_str()); \ + ASSERT_EQ(OB_SUCCESS, process_result->next()); \ + ASSERT_EQ(OB_SUCCESS, process_result->get_int("TRANS_ID", tx_id)); \ + ASSERT_EQ(true, ::oceanbase::transaction::ObTransID(tx_id).is_valid()); \ + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] get trans_id in current_connection", \ + K(conn->get_sessid()), K(tx_id)); \ + } + +#define GET_TX_ID_FROM_SQL_AUDIT(conn, sql, tx_id) \ + { \ + common::ObString trace_id; \ + common::ObString query_sql; \ + int64_t request_time = 0; \ + int64_t ret_code = OB_SUCCESS; \ + int64_t retry_cnt = 0; \ + ASSERT_EQ(true, conn != nullptr); \ + std::string sql_str = \ + "select TX_ID, TRACE_ID, REQUEST_TIME, RET_CODE, RETRY_CNT, QUERY_SQL from " \ + "oceanbase.V$OB_SQL_AUDIT where QUERY_SQL like " \ + + std::string(" \"") + std::string(sql) + std::string("\" order by REQUEST_TIME DESC"); \ + READ_SQL_BY_CONN(conn, process_result, sql_str.c_str()); \ + ASSERT_EQ(OB_SUCCESS, process_result->next()); \ + ASSERT_EQ(OB_SUCCESS, process_result->get_int("TX_ID", tx_id)); \ + ASSERT_EQ(OB_SUCCESS, process_result->get_varchar("TRACE_ID", trace_id)); \ + ASSERT_EQ(OB_SUCCESS, process_result->get_int("REQUEST_TIME", request_time)); \ + ASSERT_EQ(OB_SUCCESS, process_result->get_int("RET_CODE", ret_code)); \ + ASSERT_EQ(OB_SUCCESS, process_result->get_int("RETRY_CNT", retry_cnt)); \ + ASSERT_EQ(OB_SUCCESS, process_result->get_varchar("QUERY_SQL", query_sql)); \ + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] query sql_audit for tx_id", K(trace_id), K(tx_id), \ + K(request_time), K(ret_code), K(retry_cnt), K(query_sql)); \ + } + +#define PREPARE_CONN_ENV(conn) \ + WRITE_SQL_BY_CONN(connection, "set ob_trx_timeout = 3000000000"); \ + WRITE_SQL_BY_CONN(connection, "set ob_trx_idle_timeout = 3000000000"); \ + WRITE_SQL_BY_CONN(connection, "set ob_query_timeout = 3000000000"); \ + WRITE_SQL_BY_CONN(connection, "set autocommit=0"); + +#define RETRY_UNTIL_TIMEOUT(condition, timeout_us, retry_interval_us) \ + { \ + int64_t start_time = ObTimeUtility::fast_current_time(); \ + while (!(condition)) { \ + if (ObTimeUtility::fast_current_time() - start_time > timeout_us) { \ + ret = OB_TIMEOUT; \ + break; \ + } \ + SERVER_LOG(INFO, "retry one time until timeout", K(condition), K(start_time), \ + K(timeout_us)); \ + ob_usleep(retry_interval_us); \ + } \ + } + +template +class EventArgSerTool +{ +public: + static int serialize_arg(const T &arg, std::string &event_arg) + { + int ret = OB_SUCCESS; + + char tmp_buf[2048]; + memset(tmp_buf, 0, 2048); + int64_t pos = 0; + if (OB_FAIL(arg.serialize(tmp_buf, 2047, pos))) { + + } else { + event_arg = std::string(tmp_buf, pos); + } + SERVER_LOG(INFO, "serialize event arg", K(ret), K(arg)); + return ret; + } + + static int deserialize_arg(T &arg, const std::string &event_arg) + { + int ret = OB_SUCCESS; + + int64_t pos = 0; + if (OB_FAIL(arg.deserialize(event_arg.c_str(), event_arg.size(), pos))) { + } + SERVER_LOG(INFO, "deserialize event arg", K(ret), K(arg)); + return ret; + } +}; + +class TestTxCtxGuard +{ +public: + static bool is_trx_abort_sql_ret(int ret) + { + return ret == OB_TRANS_ROLLBACKED || ret == OB_TRANS_NEED_ROLLBACK || ret == OB_TRANS_KILLED; + } + +public: + TestTxCtxGuard(int64_t tx_id_num, storage::ObLS *ls_ptr) + : tx_id_(tx_id_num), tx_ctx_(nullptr), ls_(ls_ptr) + {} + + ~TestTxCtxGuard() + { + int ret = OB_SUCCESS; + if (tx_ctx_ != nullptr && ls_ != nullptr) { + ret = ls_->revert_tx_ctx(tx_ctx_); + if (ret != OB_SUCCESS) { + TRANS_LOG(ERROR, "revert tx ctx failed", K(ret), KPC(this)); + } + } + } + + int init(bool for_replay) + { + int ret = OB_SUCCESS; + if (OB_ISNULL(ls_) || !tx_id_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "invalid argument", K(ret), K(tx_id_), KPC(ls_)); + } else if (OB_NOT_NULL(tx_ctx_)) { + ret = OB_INIT_TWICE; + TRANS_LOG(WARN, "init test tx ctx guard twice", K(ret), KPC(this)); + + } else if (OB_FAIL(ls_->get_tx_ctx(tx_id_, for_replay, tx_ctx_))) { + TRANS_LOG(WARN, "get tx ctx failed", K(ret), K(for_replay), KPC(this)); + } + + return ret; + } + + TO_STRING_KV(K(tx_id_), KPC(tx_ctx_), KPC(ls_)); + + transaction::ObTransID tx_id_; + transaction::ObPartTransCtx *tx_ctx_; + storage::ObLS *ls_; +}; + +} // namespace unittest +} // namespace oceanbase + +#endif diff --git a/mittest/multi_replica/env/ob_simple_replica.cpp b/mittest/multi_replica/env/ob_simple_replica.cpp new file mode 100644 index 0000000000..087a8509c6 --- /dev/null +++ b/mittest/multi_replica/env/ob_simple_replica.cpp @@ -0,0 +1,439 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include +#include +#include +#include + +#define private public +#define protected public + +#include "lib/oblog/ob_log.h" +#include "observer/ob_server.h" +#include "storage/tx_storage/ob_ls_service.h" + +#undef private +#undef protected + +#include "ob_simple_replica.h" +#include "lib/allocator/ob_libeasy_mem_pool.h" +#include "ob_mittest_utils.h" + + + +namespace oceanbase +{ +namespace observer +{ + +uint32_t get_local_addr(const char *dev_name) +{ + int fd, intrface; + struct ifreq buf[16]; + struct ifconf ifc; + + if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { + return 0; + } + + ifc.ifc_len = sizeof(buf); + ifc.ifc_buf = (caddr_t)buf; + if (ioctl(fd, SIOCGIFCONF, (char *)&ifc) != 0) { + close(fd); + return 0; + } + + intrface = static_cast(ifc.ifc_len / sizeof(struct ifreq)); + while (intrface-- > 0) { + if (ioctl(fd, SIOCGIFFLAGS, (char *)&buf[intrface]) != 0) { + continue; + } + if ((buf[intrface].ifr_flags & IFF_LOOPBACK) != 0) + continue; + if (!(buf[intrface].ifr_flags & IFF_UP)) + continue; + if (dev_name != NULL && strcmp(dev_name, buf[intrface].ifr_name)) + continue; + if (!(ioctl(fd, SIOCGIFADDR, (char *)&buf[intrface]))) { + close(fd); + return ((struct sockaddr_in *)(&buf[intrface].ifr_addr))->sin_addr.s_addr; + } + } + close(fd); + return 0; +} + +int64_t ObSimpleServerReplica::get_rpc_port(int &server_fd) +{ + return unittest::get_rpc_port(server_fd); +} + +ObSimpleServerReplica::ObSimpleServerReplica(const std::string &env_prefix, + const int zone_id, + const int rpc_port, + const string &rs_list, + const ObServerInfoList &server_list, + ObServer &server , + const std::string &dir_prefix, + const char *log_disk_size, + const char *memory_limit) + : server_(server), + zone_id_(zone_id), + rpc_port_(rpc_port), + rs_list_(rs_list), + server_info_list_(server_list), + data_dir_(dir_prefix), + run_dir_(env_prefix), + log_disk_size_(log_disk_size), + memory_limit_(memory_limit) +{ + // if (ObSimpleServerReplicaRestartHelper::is_restart_) { + // std::string port_file_name = run_dir_ + std::string("/port.txt"); + // FILE *infile = nullptr; + // if (nullptr == (infile = fopen(port_file_name.c_str(), "r"))) { + // ob_abort(); + // } + // fscanf(infile, "%d\n", &rpc_port_); + // } else { + // rpc_port_ = unittest::get_rpc_port(server_fd_); + // } + mysql_port_ = rpc_port_ + 1; +} + +std::string ObSimpleServerReplica::get_local_ip() +{ + uint32_t ip = get_local_addr("bond0"); + if (ip == 0) { + ip = get_local_addr("eth0"); + } + if (ip == 0) { + return ""; + } + return inet_ntoa(*(struct in_addr *)(&ip)); +} + +int ObSimpleServerReplica::simple_init() +{ + int ret = OB_SUCCESS; + + local_ip_ = get_local_ip(); + if (local_ip_ == "") { + SERVER_LOG(WARN, "get_local_ip failed"); + return -666666666; + } + + easy_pool_set_allocator(ob_easy_realloc); + ev_set_allocator(ob_easy_realloc); + + std::string zone_str = "zone" + std::to_string(zone_id_); + + ObServerOptions opts; + opts.cluster_id_ = 1; + opts.rpc_port_ = rpc_port_; + opts.mysql_port_ = mysql_port_; + opts.data_dir_ = data_dir_.c_str(); + opts.zone_ = zone_str.c_str(); + opts.appname_ = "test_ob"; + opts.rs_list_ = rs_list_.c_str(); + // NOTE: memory_limit must keep same with log_disk_size + optstr_ = std::string(); + optstr_ = optstr_ + "log_disk_size=" + std::string(log_disk_size_) + + ",memory_limit=" + std::string(memory_limit_) + + ",cache_wash_threshold=1G,net_thread_count=4,cpu_count=16,schema_history_expire_time=" + "1d,workers_per_cpu_quota=10,datafile_disk_percentage=2,__min_full_resource_pool_" + "memory=1073741824,system_memory=5G,trace_log_slow_query_watermark=100ms,datafile_" + "size=10G,stack_size=512K"; + opts.optstr_ = optstr_.c_str(); + // opts.devname_ = "eth0"; + opts.use_ipv6_ = false; + + char *curr_dir = get_current_dir_name(); + + if (OB_FAIL(chdir(run_dir_.c_str()))) { + SERVER_LOG(WARN, "change dir failed.", KR(ret), K(curr_dir), K(run_dir_.c_str()), K(errno)); + } else { + SERVER_LOG(INFO, "change dir done.", K(curr_dir), K(run_dir_.c_str())); + } + fprintf(stdout, + "[PID:%d] init opt : zone_id = %d, rpc_port = %d, mysql_port = %d, zone = %s, " + "all_server_count = " + "%ld, rs_list = %s\n", + getpid(), zone_id_, rpc_port_, mysql_port_, zone_str.c_str(), server_info_list_.count(), + rs_list_.c_str()); + + // 因为改变了工作目录,设置为绝对路径 + for (int i = 0; i < MAX_FD_FILE; i++) { + int len = strlen(OB_LOGGER.log_file_[i].filename_); + if (len > 0) { + std::string ab_file = std::string(curr_dir) + "/" + run_dir_ + "/" + + std::string(OB_LOGGER.log_file_[i].filename_); + SERVER_LOG(INFO, "convert ab file", K(ab_file.c_str())); + MEMCPY(OB_LOGGER.log_file_[i].filename_, ab_file.c_str(), ab_file.size()); + } + } + + ObPLogWriterCfg log_cfg; + ret = server_.init(opts, log_cfg); + if (OB_FAIL(ret)) { + return ret; + } + ret = init_sql_proxy(); + + if (OB_SUCC(ret)) { + if (OB_FAIL(bootstrap_client_.init())) { + SERVER_LOG(WARN, "client init failed", K(ret)); + } else if (OB_FAIL(bootstrap_client_.get_proxy(bootstrap_srv_proxy_))) { + SERVER_LOG(WARN, "get_proxy failed", K(ret)); + } + } + return ret; +} + +int ObSimpleServerReplica::init_sql_proxy() +{ + int ret = OB_SUCCESS; + sql_conn_pool_.set_db_param("root@sys", "", "test"); + common::ObAddr db_addr; + db_addr.set_ip_addr(local_ip_.c_str(), mysql_port_); + + ObConnPoolConfigParam param; + //param.sqlclient_wait_timeout_ = 10; // 10s + // turn up it, make unittest pass + param.sqlclient_wait_timeout_ = 1000; // 300s + param.long_query_timeout_ = 300*1000*1000; // 120s + param.connection_refresh_interval_ = 200*1000; // 200ms + param.connection_pool_warn_time_ = 10*1000*1000; // 1s + param.sqlclient_per_observer_conn_limit_ = 1000; + ret = sql_conn_pool_.init(db_addr, param); + if (OB_SUCC(ret)) { + sql_conn_pool_.set_mode(common::sqlclient::ObMySQLConnection::DEBUG_MODE); + ret = sql_proxy_.init(&sql_conn_pool_); + } + + return ret; +} + +int ObSimpleServerReplica::init_sql_proxy2(const char *tenant_name, const char *db_name, const bool oracle_mode) +{ + int ret = OB_SUCCESS; + std::string user = oracle_mode ? "sys@" : "root@"; + sql_conn_pool2_.set_db_param((user + std::string(tenant_name)).c_str(), "", db_name); + common::ObAddr db_addr; + db_addr.set_ip_addr(local_ip_.c_str(), mysql_port_); + + ObConnPoolConfigParam param; + //param.sqlclient_wait_timeout_ = 10; // 10s + // turn up it, make unittest pass + param.sqlclient_wait_timeout_ = 1000; // 100s + param.long_query_timeout_ = 300*1000*1000; // 120s + param.connection_refresh_interval_ = 200*1000; // 200ms + param.connection_pool_warn_time_ = 10*1000*1000; // 1s + param.sqlclient_per_observer_conn_limit_ = 1000; + ret = sql_conn_pool2_.init(db_addr, param); + if (OB_SUCC(ret)) { + sql_conn_pool2_.set_mode(common::sqlclient::ObMySQLConnection::DEBUG_MODE); + ret = sql_proxy2_.init(&sql_conn_pool2_); + } + + return ret; +} + +int ObSimpleServerReplica::init_sql_proxy_with_short_wait() +{ + int ret = OB_SUCCESS; + sql_conn_pool_with_short_wait_.set_db_param("root@sys", "", "test"); + common::ObAddr db_addr; + db_addr.set_ip_addr(local_ip_.c_str(), mysql_port_); + + ObConnPoolConfigParam param; + //param.sqlclient_wait_timeout_ = 10; // 10s + // turn up it, make unittest pass + param.sqlclient_wait_timeout_ = 3; // 3s + param.long_query_timeout_ = 3*1000*1000; // 3s + param.connection_refresh_interval_ = 200*1000; // 200ms + param.connection_pool_warn_time_ = 10*1000*1000; // 1s + param.sqlclient_per_observer_conn_limit_ = 1000; + ret = sql_conn_pool_with_short_wait_.init(db_addr, param); + if (OB_SUCC(ret)) { + sql_conn_pool_with_short_wait_.set_mode(common::sqlclient::ObMySQLConnection::DEBUG_MODE); + ret = sql_proxy_with_short_wait_.init(&sql_conn_pool_with_short_wait_); + } + + return ret; +} + +int ObSimpleServerReplica::simple_start() +{ + int ret = OB_SUCCESS; + // bootstrap + if (zone_id_ == 1) { + std::thread th([this]() { + int64_t start_time = ObTimeUtility::current_time(); + int ret = OB_SUCCESS; + int64_t curr_time = ObTimeUtility::current_time(); + while (curr_time - start_time < 5 * 60 * 1000 * 1000) { + ret = this->bootstrap(); + if (OB_SUCC(ret)) { + break; + } + ::usleep(200 * 1000); + curr_time = ObTimeUtility::current_time(); + } + SERVER_LOG(INFO, "ObSimpleServerReplica bootstrap th exit", K(ret), K(zone_id_), K(rpc_port_), + K(mysql_port_)); + }); + th_ = std::move(th); + } + SERVER_LOG(INFO, "ObSimpleServerReplica init succ prepare to start...", K(zone_id_), K(rpc_port_), + K(mysql_port_)); + ret = server_.start(); + if (zone_id_ == 1) { + th_.join(); + fprintf(stdout, "[BOOTSTRAP SUCC] zone_id = %d, rpc_port = %d, mysql_port = %d\n", zone_id_, + rpc_port_, mysql_port_); + } + if (OB_SUCC(ret)) { + SERVER_LOG(INFO, "ObSimpleServerReplica start succ", K(zone_id_), K(rpc_port_), K(mysql_port_)); + fprintf(stdout, "[START OBSERVER SUCC] zone_id = %d, rpc_port = %d, mysql_port = %d\n", + zone_id_, rpc_port_, mysql_port_); + } else { + SERVER_LOG(WARN, "ObSimpleServerReplica start failed", K(ret), K(zone_id_), K(rpc_port_), + K(mysql_port_)); + // fprintf(stdout, "start failed. ret = %d\n", ret); + ob_abort(); + } + return ret; +} + +int ObSimpleServerReplica::bootstrap() +{ + SERVER_LOG(INFO, "ObSimpleServerReplica::bootstrap start", K(zone_id_), K(rpc_port_), K(mysql_port_)); + int ret = OB_SUCCESS; + /* + if (server_.get_gctx().ob_service_ == nullptr) { + ret = -66666666; + SERVER_LOG(INFO, "observice is nullptr"); + } else { + // observer内部有线程的检查, 这里在新建线程下调用会有问题 + obrpc::ObServerInfo server_info; + server_info.zone_ = "zone1"; + server_info.server_ = common::ObAddr(common::ObAddr::IPV4, local_ip_.c_str(), rpc_port_); + server_info.region_ = "sys_region"; + obrpc::ObBootstrapArg arg; + arg.cluster_role_ = common::PRIMARY_CLUSTER; + arg.server_list_.push_back(server_info); + SERVER_LOG(INFO, "observice.bootstrap call", K(arg), K(ret)); + ret = server_.get_gctx().ob_service_->bootstrap(arg); + SERVER_LOG(INFO, "observice.bootstrap return", K(arg), K(ret)); + } + */ + + // obrpc::ObNetClient client; + // obrpc::ObSrvRpcProxy srv_proxy; + + // } else { + const int64_t timeout = 180 * 1000 * 1000; //180s + common::ObAddr dst_server(common::ObAddr::IPV4, local_ip_.c_str(), rpc_port_); + bootstrap_srv_proxy_.set_server(dst_server); + bootstrap_srv_proxy_.set_timeout(timeout); + // obrpc::ObServerInfo server_info; + // std::string zone_str = "zone" +std::tostrin + // server_info.zone_ = ""; + // server_info.server_ = common::ObAddr(common::ObAddr::IPV4, local_ip_.c_str(), rpc_port_); + // server_info.region_ = "sys_region"; + obrpc::ObBootstrapArg arg; + arg.cluster_role_ = common::PRIMARY_CLUSTER; + arg.server_list_.assign(server_info_list_); + if (OB_FAIL(bootstrap_srv_proxy_.bootstrap(arg))) { + SERVER_LOG(WARN, "bootstrap failed", K(arg), K(ret)); + } + // } + SERVER_LOG(INFO, "ObSimpleServerReplica::bootstrap end", K(ret), K(zone_id_), K(rpc_port_), K(mysql_port_)); + return ret; +} + +int ObSimpleServerReplica::simple_close() +{ + SERVER_LOG(INFO, "ObSimpleServerReplica::simple_close start"); + int ret = OB_SUCCESS; + + // remove ls for exit + /* + ObSEArray tenant_ids; + GCTX.omt_->get_mtl_tenant_ids(tenant_ids); + + auto do_remove_ls = [] (uint64_t tenant_id) { + int ret = OB_SUCCESS; + share::ObTenantSwitchGuard guard; + ObLS *ls; + if (OB_SUCC(guard.switch_to(tenant_id))) { + ObSEArray ls_ids; + common::ObSharedGuard ls_iter; + if (OB_SUCC(MTL(ObLSService*)->get_ls_iter(ls_iter, ObLSGetMod::STORAGE_MOD))) { + while (true) { + if (OB_SUCC(ls_iter->get_next(ls))) { + ls_ids.push_back(ls->get_ls_id()); + } else { + break; + } + } + } + ls_iter.reset(); + SERVER_LOG(INFO, "safe quit need remove ls", K(MTL_ID()), K(ls_ids)); + for (int i = 0; i < ls_ids.count(); i++) { + if (ls_ids.at(i).id() > share::ObLSID::SYS_LS_ID) { + MTL(ObLSService*)->remove_ls(ls_ids.at(i), false); + } + } + MTL(ObLSService*)->remove_ls(share::ObLSID{share::ObLSID::SYS_LS_ID}, false); + } + + }; + for (int64_t i = 0; i < tenant_ids.count(); i++) { + if (tenant_ids.at(i) != OB_SYS_TENANT_ID) { + do_remove_ls(tenant_ids.at(i)); + } + } + do_remove_ls(OB_SYS_TENANT_ID); + */ + + sql_conn_pool_.stop(); + sql_conn_pool_.close_all_connection(); + sql_conn_pool2_.stop(); + sql_conn_pool2_.close_all_connection(); + + SERVER_LOG(INFO, "ObSimpleServerReplica::simple_close set_stop"); + server_.set_stop(); + + SERVER_LOG(INFO, "ObSimpleServerReplica::simple_close wait"); + ret = server_.wait(); + + SERVER_LOG(INFO, "ObSimpleServerReplica::simple_close destroy"); + server_.destroy(); + SERVER_LOG(INFO, "ObSimpleServerReplica::simple_close destroy"); + ObKVGlobalCache::get_instance().destroy(); + SERVER_LOG(INFO, "ObSimpleServerReplica::simple_close destroy"); + ObVirtualTenantManager::get_instance().destroy(); + SERVER_LOG(INFO, "ObSimpleServerReplica::simple_close end", K(ret)); + + SERVER_LOG(INFO, "ObSimpleServerReplica::simple_close end", K(ret)); + return ret; +} + +void ObSimpleServerReplica::reset() +{ +} + +} // end observer +} // end oceanbase diff --git a/mittest/multi_replica/env/ob_simple_replica.h b/mittest/multi_replica/env/ob_simple_replica.h new file mode 100644 index 0000000000..64b798646e --- /dev/null +++ b/mittest/multi_replica/env/ob_simple_replica.h @@ -0,0 +1,99 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#pragma once + +#include "observer/ob_server.h" +#include "lib/net/ob_addr.h" +#include "share/ob_srv_rpc_proxy.h" +#include "rpc/obrpc/ob_net_client.h" +#include "share/ob_rpc_struct.h" +#include "lib/mysqlclient/ob_single_mysql_connection_pool.h" + +namespace oceanbase +{ +namespace observer +{ + +#define DEFAULT_TEST_TENANT_NAME "tt1" + +class ObSimpleServerReplica +{ +public: + static const int64_t MAX_WAIT_TENANT_SCHEMA_TIME = 20_s; + + static int64_t get_rpc_port(int &server_fd); +public: + ObSimpleServerReplica(const std::string &env_prefix, + const int zone_id, + const int rpc_port, + const string &rs_list, + const ObServerInfoList &server_list, + ObServer &server = ObServer::get_instance(), + const std::string &dir_prefix = "./store", + const char *log_disk_size = "10G", + const char *memory_limit = "10G"); + ~ObSimpleServerReplica() { reset(); } + ObServer &get_observer() { return server_; } + int simple_init(); + int simple_start(); + int simple_close(); + std::string get_local_ip(); + int bootstrap(); + void reset(); + common::ObMySQLProxy &get_sql_proxy() { return sql_proxy_; } + common::ObMySQLProxy &get_sql_proxy2() { return sql_proxy2_; } + common::ObMySQLProxy &get_sql_proxy_with_short_wait() { return sql_proxy_with_short_wait_; } + common::ObAddr get_addr() + { + common::ObAddr addr; + addr.set_ip_addr(local_ip_.c_str(), rpc_port_); + return addr; + } + + int init_sql_proxy2(const char *tenant_name = DEFAULT_TEST_TENANT_NAME, + const char *db_name = "test", + const bool oracle_mode = false); + int init_sql_proxy_with_short_wait(); + +protected: + int init_sql_proxy(); + +private: + ObServer &server_; + std::thread th_; + std::string local_ip_; + int zone_id_; + int rpc_port_; + int mysql_port_; + std::string rs_list_; + ObServerInfoList server_info_list_; + std::string data_dir_; + std::string optstr_; + std::string run_dir_; + const char *log_disk_size_; + const char *memory_limit_; + common::sqlclient::ObSingleMySQLConnectionPool sql_conn_pool_; + common::ObMySQLProxy sql_proxy_; + common::sqlclient::ObSingleMySQLConnectionPool sql_conn_pool2_; + common::ObMySQLProxy sql_proxy2_; + common::sqlclient::ObSingleMySQLConnectionPool sql_conn_pool_with_short_wait_; + common::ObMySQLProxy sql_proxy_with_short_wait_; + int server_fd_; + bool set_bootstrap_warn_log_; + + obrpc::ObNetClient bootstrap_client_; + obrpc::ObSrvRpcProxy bootstrap_srv_proxy_; +}; + +} // namespace observer +} // namespace oceanbase diff --git a/mittest/multi_replica/test_ob_dup_table_basic.cpp b/mittest/multi_replica/test_ob_dup_table_basic.cpp new file mode 100644 index 0000000000..4d9f101849 --- /dev/null +++ b/mittest/multi_replica/test_ob_dup_table_basic.cpp @@ -0,0 +1,396 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#define USING_LOG_PREFIX SERVER +#define protected public +#define private public + +#include "env/ob_fast_bootstrap.h" +#include "env/ob_multi_replica_util.h" +#include "lib/mysqlclient/ob_mysql_result.h" +#include "storage/tx/ob_dup_table_lease.h" + +using namespace oceanbase::transaction; +using namespace oceanbase::storage; + +#define CUR_TEST_CASE_NAME ObDupTableBasicTest + +DEFINE_MULTI_ZONE_TEST_CASE_CLASS + +MULTI_REPLICA_TEST_MAIN_FUNCTION(test_dup_table_basic_); + +namespace oceanbase +{ +namespace unittest +{ + +struct DupTableBasicArg +{ + uint64_t tenant_id_; + int64_t ls_id_num_; + int64_t table_id_; + int64_t tablet_count_; + ObSEArray tablet_id_array_; + + TO_STRING_KV(K(tenant_id_), K(ls_id_num_), K(table_id_), K(tablet_count_), K(tablet_id_array_)); + + OB_UNIS_VERSION(1); +}; + +OB_SERIALIZE_MEMBER(DupTableBasicArg, + tenant_id_, + ls_id_num_, + table_id_, + tablet_count_, + tablet_id_array_); + +static DupTableBasicArg static_basic_arg_; +sqlclient::ObISQLConnection *static_test_conn_ = nullptr; + +TEST_F(GET_ZONE_TEST_CLASS_NAME(1), create_dup_table) +{ + int ret = OB_SUCCESS; + CREATE_TEST_TENANT(test_tenant_id); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] create test tenant success", K(test_tenant_id)); + + common::ObMySQLProxy &test_tenant_sql_proxy = get_curr_simple_server().get_sql_proxy2(); + + ACQUIRE_CONN_FROM_SQL_PROXY(test_conn, test_tenant_sql_proxy); + + WRITE_SQL_BY_CONN(test_conn, + "CREATE TABLE Persons( " + "PersonID int, " + " LastName varchar(255)," + " FirstName varchar(255)," + " Address varchar(255), " + " City varchar(255), " + " PRIMARY KEY(PersonID)" + ") duplicate_scope='cluster' PARTITION BY hash(PersonID) partitions 10;"); + + READ_SQL_BY_CONN(test_conn, table_info_result, + "select table_id, duplicate_scope from " + "oceanbase.__all_table where table_name = 'Persons' "); + + ASSERT_EQ(OB_SUCCESS, table_info_result->next()); + int64_t table_id; + int64_t dup_scope; + ASSERT_EQ(OB_SUCCESS, table_info_result->get_int("table_id", table_id)); + ASSERT_EQ(OB_SUCCESS, table_info_result->get_int("duplicate_scope", dup_scope)); + ASSERT_EQ(true, table_id > 0); + ASSERT_EQ(true, dup_scope != 0); + + std::string tablet_count_sql = + "select count(*), ls_id from oceanbase.__all_tablet_to_ls where table_id = " + + std::to_string(table_id) + " group by ls_id order by count(*)"; + READ_SQL_BY_CONN(test_conn, tablet_count_result, tablet_count_sql.c_str()); + int64_t tablet_count = 0; + int64_t ls_id_num = 0; + ASSERT_EQ(OB_SUCCESS, tablet_count_result->next()); + ASSERT_EQ(OB_SUCCESS, tablet_count_result->get_int("count(*)", tablet_count)); + ASSERT_EQ(OB_SUCCESS, tablet_count_result->get_int("ls_id", ls_id_num)); + ASSERT_EQ(10, tablet_count); + ASSERT_EQ(true, share::ObLSID(ls_id_num).is_valid()); + + std::string tablet_id_sql = "select tablet_id from oceanbase.__all_tablet_to_ls where table_id = " + + std::to_string(table_id) + + " and ls_id = " + std::to_string(ls_id_num); + READ_SQL_BY_CONN(test_conn, tablet_id_reult, tablet_id_sql.c_str()); + while (OB_SUCC(tablet_id_reult->next())) { + int64_t id = 0; + ASSERT_EQ(OB_SUCCESS, tablet_id_reult->get_int("tablet_id", id)); + ASSERT_EQ(true, ObTabletID(id).is_valid()); + ASSERT_EQ(OB_SUCCESS, static_basic_arg_.tablet_id_array_.push_back(id)); + } + ASSERT_EQ(tablet_count, static_basic_arg_.tablet_id_array_.count()); + ASSERT_EQ(OB_ITER_END, ret); + ret = OB_SUCCESS; + + GET_LS(test_tenant_id, ls_id_num, ls_handle); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] -------- before wait dup tablet discover", K(ret), + K(ls_id_num), K(tablet_count)); + RETRY_UNTIL_TIMEOUT(ls_handle.get_ls()->dup_table_ls_handler_.get_dup_tablet_count() + == tablet_count, + 20 * 1000 * 1000, 100 * 1000); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] -------- after wait dup tablet discover", K(ret), + K(ls_id_num), K(ls_handle.get_ls()->dup_table_ls_handler_.get_dup_tablet_count())); + ASSERT_EQ(OB_SUCCESS, ret /*has_dup_tablet*/); + + // DupTableBasicArg static_basic_arg_; + static_basic_arg_.tenant_id_ = test_tenant_id; + static_basic_arg_.ls_id_num_ = ls_id_num; + static_basic_arg_.table_id_ = table_id; + static_basic_arg_.tablet_count_ = tablet_count; + + // char tmp_buf[2048]; + // memset(tmp_buf, 0, 2048); + // int64_t pos = 0; + // ASSERT_EQ(OB_SUCCESS, static_basic_arg_.serialize(tmp_buf, 2047, pos)); + std::string tmp_str; + ASSERT_EQ(OB_SUCCESS, EventArgSerTool::serialize_arg(static_basic_arg_, tmp_str)); + finish_event("CREATE_DUP_TABLE", tmp_str); +} + +void check_dup_tablet_replayed(ObMultiReplicaTestBase *multi_replica_test_ptr, int64_t zone_id) +{ + int ret = OB_SUCCESS; + + std::string tmp_event_val; + ASSERT_EQ(OB_SUCCESS, multi_replica_test_ptr->wait_event_finish("CREATE_DUP_TABLE", tmp_event_val, + 30 * 60 * 1000)); + ASSERT_EQ(OB_SUCCESS, EventArgSerTool::deserialize_arg(static_basic_arg_, tmp_event_val)); + ASSERT_EQ(true, static_basic_arg_.ls_id_num_ > 0); + + uint64_t test_tenant_id = static_basic_arg_.tenant_id_; + // ASSERT_EQ(OB_SUCCESS, multi_replica_test_ptr->get_tenant_id(test_tenant_id)); + GET_LS(test_tenant_id, static_basic_arg_.ls_id_num_, ls_handle); + ASSERT_EQ(false, ls_handle.get_ls()->dup_table_ls_handler_.is_master()); + RETRY_UNTIL_TIMEOUT(ls_handle.get_ls()->dup_table_ls_handler_.get_dup_tablet_count() + == static_basic_arg_.tablet_count_, + 20 * 1000 * 1000, 100 * 1000); + ASSERT_EQ(OB_SUCCESS, ret /*has_dup_tablet*/); + RETRY_UNTIL_TIMEOUT(ls_handle.get_ls() + ->dup_table_ls_handler_.lease_mgr_ptr_->follower_lease_info_ + .durable_lease_.request_ts_ + > 0, + 20 * 1000 * 1000, 100 * 1000); + ASSERT_EQ(OB_SUCCESS, ret /*lease request ts*/); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(2), follower_replay_dup_tablet) +{ + check_dup_tablet_replayed(this, 2); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(3), follower_replay_dup_tablet) +{ + + check_dup_tablet_replayed(this, 3); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(1), leader_lease_info) +{ + int ret = OB_SUCCESS; + + GET_LS(static_basic_arg_.tenant_id_, static_basic_arg_.ls_id_num_, ls_handle); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] -------- before wait dup lease request", K(ret), + K(static_basic_arg_)); + RETRY_UNTIL_TIMEOUT( + ls_handle.get_ls()->dup_table_ls_handler_.lease_mgr_ptr_->leader_lease_map_.size() == 2, + 20 * 1000 * 1000, 100 * 1000); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] -------- after wait dup lease request", K(ret), + K(static_basic_arg_), + K(ls_handle.get_ls()->dup_table_ls_handler_.lease_mgr_ptr_->leader_lease_map_.size())); + + RETRY_UNTIL_TIMEOUT( + ls_handle.get_ls()->dup_table_ls_handler_.lease_mgr_ptr_->last_lease_req_cache_handle_time_ + > 0 + && ls_handle.get_ls()->dup_table_ls_handler_.log_operator_ != nullptr + && !ls_handle.get_ls()->dup_table_ls_handler_.log_operator_->is_busy(), + 5 * 1000 * 1000, 100 * 1000); + ASSERT_EQ(OB_SUCCESS, ret /*logging*/); + DupTableLeaderLeaseMap::const_iterator iter = + ls_handle.get_ls()->dup_table_ls_handler_.lease_mgr_ptr_->leader_lease_map_.begin(); + int64_t lease_valid_cnt = 0; + while (iter + != ls_handle.get_ls()->dup_table_ls_handler_.lease_mgr_ptr_->leader_lease_map_.end()) { + EXPECT_EQ(true, iter->second.lease_expired_ts_ > ObTimeUtility::fast_current_time()); + if (iter->second.lease_expired_ts_ > ObTimeUtility::fast_current_time()) { + lease_valid_cnt++; + } + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] check dup table lease expired", K(ret), + K(static_basic_arg_), K(iter->first), K(iter->second)); + iter++; + } + ASSERT_EQ(true, lease_valid_cnt > 0); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(1), dup_table_trx_insert_exec) +{ + int ret = OB_SUCCESS; + + common::ObMySQLProxy &test_tenant_sql_proxy = get_curr_simple_server().get_sql_proxy2(); + + ACQUIRE_CONN_FROM_SQL_PROXY(test_conn, test_tenant_sql_proxy); + + static_test_conn_ = test_conn; + + WRITE_SQL_BY_CONN(test_conn, "begin;"); + + WRITE_SQL_BY_CONN(test_conn, "INSERT INTO Persons VALUES (1, 'a','aa' , 'aaa','a')"); + + // TODO check dup tx after submit redo + + // fprintf(stdout, "============== cur conn: %d", test_conn->get_sessid()); + // sleep(600); +} + +bool check_all_tablet_follower_readable(ObMultiReplicaTestBase *multi_replica_test_ptr, + ObLS *ls_ptr) +{ + int64_t readable_tablet_cnt = 0; + EXPECT_TRUE(static_basic_arg_.tablet_id_array_.count() > 0); + for (int i = 0; i < static_basic_arg_.tablet_id_array_.count(); i++) { + ObTabletID tablet_id(static_basic_arg_.tablet_id_array_[i]); + bool readable = false; + share::SCN max_replayed_scn; + EXPECT_EQ(OB_SUCCESS, ls_ptr->get_max_decided_scn(max_replayed_scn)); + EXPECT_EQ(OB_SUCCESS, ls_ptr->dup_table_ls_handler_.check_dup_tablet_readable( + tablet_id, share::SCN::min_scn(), false /*read_from_leader*/, + max_replayed_scn, readable)); + if (readable) { + readable_tablet_cnt++; + } else { + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] this dup tablet has not been readable now", + K(tablet_id), K(max_replayed_scn), K(readable)); + } + } + + ls_ptr->dup_table_ls_handler_.interface_stat_.dup_table_follower_read_succ_cnt_ -= + readable_tablet_cnt; + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] check all dup tablet readable on follower", + K(static_basic_arg_), K(readable_tablet_cnt), + K(static_basic_arg_.tablet_id_array_.count())); + + return readable_tablet_cnt == static_basic_arg_.tablet_id_array_.count(); +} + +void check_dup_table_insert_readable(ObMultiReplicaTestBase *multi_replica_test_ptr, + int64_t expected_row_cnt, + bool expected_follower_read) +{ + int ret = OB_SUCCESS; + + common::ObMySQLProxy &test_tenant_sql_proxy = + multi_replica_test_ptr->get_curr_simple_server().get_sql_proxy2(); + + ACQUIRE_CONN_FROM_SQL_PROXY(test_conn, test_tenant_sql_proxy); + + GET_LS(static_basic_arg_.tenant_id_, static_basic_arg_.ls_id_num_, ls_handle); + int64_t origin_follower_read_cnt = + ls_handle.get_ls()->dup_table_ls_handler_.interface_stat_.dup_table_follower_read_succ_cnt_; + + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] origin follower read cnt", + K(static_basic_arg_), K(origin_follower_read_cnt), + K(ls_handle.get_ls()->dup_table_ls_handler_.interface_stat_.dup_table_follower_read_succ_cnt_), + K(static_basic_arg_.tablet_id_array_.count())); + + if (expected_follower_read) { + RETRY_UNTIL_TIMEOUT( + check_all_tablet_follower_readable(multi_replica_test_ptr, ls_handle.get_ls()), + 30 * 1000 * 1000, 1 * 1000 * 1000); + ASSERT_EQ(OB_SUCCESS, ret /*all tablet readable*/); + } + ASSERT_EQ( + origin_follower_read_cnt, + ls_handle.get_ls()->dup_table_ls_handler_.interface_stat_.dup_table_follower_read_succ_cnt_); + + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] origin follower read cnt2", + K(static_basic_arg_), K(origin_follower_read_cnt), + K(ls_handle.get_ls()->dup_table_ls_handler_.interface_stat_.dup_table_follower_read_succ_cnt_), + K(static_basic_arg_.tablet_id_array_.count())); + + int64_t row_cnt = 0; + READ_SQL_BY_CONN(test_conn, read_insert_result, "select count(*) from Persons;"); + ASSERT_EQ(OB_SUCCESS, read_insert_result->next()); + ASSERT_EQ(OB_SUCCESS, read_insert_result->get_int("count(*)", row_cnt)); + ASSERT_EQ(row_cnt, expected_row_cnt); + + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] origin follower read cnt3", + K(static_basic_arg_), K(origin_follower_read_cnt), + K(ls_handle.get_ls()->dup_table_ls_handler_.interface_stat_.dup_table_follower_read_succ_cnt_), + K(static_basic_arg_.tablet_id_array_.count())); + + if (expected_follower_read) { + ASSERT_EQ(true, + origin_follower_read_cnt + static_basic_arg_.tablet_count_ + <= ls_handle.get_ls() + ->dup_table_ls_handler_.interface_stat_.dup_table_follower_read_succ_cnt_); + } + + ASSERT_EQ(expected_row_cnt, row_cnt); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(2), dup_table_trx_read_uncommitted) +{ + int ret = OB_SUCCESS; + ASSERT_EQ(OB_SUCCESS, get_curr_simple_server().init_sql_proxy2()); + check_dup_table_insert_readable(this, 0, true /*expected_follower_read*/); + ASSERT_EQ(OB_SUCCESS, finish_event("ZONE2_READ_INSERT_UNCOMMITTED", "")); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(3), dup_table_trx_read_uncommitted) +{ + int ret = OB_SUCCESS; + ASSERT_EQ(OB_SUCCESS, get_curr_simple_server().init_sql_proxy2()); + check_dup_table_insert_readable(this, 0, true /*expected_follower_read*/); + ASSERT_EQ(OB_SUCCESS, finish_event("ZONE3_READ_INSERT_UNCOMMITTED", "")); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(1), dup_table_trx_insert_commit) +{ + std::string tmp_event_val; + ASSERT_EQ(OB_SUCCESS, + wait_event_finish("ZONE2_READ_INSERT_UNCOMMITTED", tmp_event_val, 30 * 1000 * 1000)); + ASSERT_EQ(OB_SUCCESS, + wait_event_finish("ZONE3_READ_INSERT_UNCOMMITTED", tmp_event_val, 30 * 1000 * 1000)); + + int64_t tx_id_num = 0; + + GET_TX_ID_FROM_SQL_AUDIT(static_test_conn_, + "INSERT INTO Persons VALUES (1, 'a','aa' , 'aaa','a')", tx_id_num); + + GET_LS(static_basic_arg_.tenant_id_, static_basic_arg_.ls_id_num_, ls_handle); + TestTxCtxGuard tx_ctx_guard(tx_id_num, ls_handle.get_ls()); + ASSERT_EQ(OB_SUCCESS, tx_ctx_guard.init(true)); + + // ASSERT_EQ(OB_SUCCESS, tx_ctx_guard.tx_ctx_->submit_redo_log(false)); + WRITE_SQL_BY_CONN(static_test_conn_, "commit;"); + + ASSERT_EQ(true, tx_ctx_guard.tx_ctx_->exec_info_.is_dup_tx_); + ASSERT_EQ(transaction::TransType::DIST_TRANS, tx_ctx_guard.tx_ctx_->exec_info_.trans_type_); + + ASSERT_EQ(OB_SUCCESS, finish_event("INSERT_TRX_COMMIT", "")); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(2), dup_table_trx_read_committed) +{ + int ret = OB_SUCCESS; + std::string tmp_event_val; + ASSERT_EQ(OB_SUCCESS, wait_event_finish("INSERT_TRX_COMMIT", tmp_event_val, 30 * 1000 * 1000)); + check_dup_table_insert_readable(this, 1, true /*expected_follower_read*/); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(3), dup_table_trx_read_committed) +{ + int ret = OB_SUCCESS; + std::string tmp_event_val; + ASSERT_EQ(OB_SUCCESS, + wait_event_finish("INSERT_TRX_COMMIT", tmp_event_val, 30 * 1000 * 1000, 100)); + check_dup_table_insert_readable(this, 1, true /*expected_follower_read*/); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(1), remove_dup_table) +{ + int ret = OB_SUCCESS; + WRITE_SQL_BY_CONN(static_test_conn_, "drop table Persons"); + GET_LS(static_basic_arg_.tenant_id_, static_basic_arg_.ls_id_num_, ls_handle); + + oceanbase::transaction::ObLSDupTabletsMgr::GC_DUP_TABLETS_TIME_INTERVAL = 100 * 1000; + oceanbase::transaction::ObLSDupTabletsMgr::GC_DUP_TABLETS_FAILED_TIMEOUT = + 5 * oceanbase::transaction::ObLSDupTabletsMgr::GC_DUP_TABLETS_TIME_INTERVAL; + RETRY_UNTIL_TIMEOUT(ls_handle.get_ls()->dup_table_ls_handler_.get_dup_tablet_count() == 0, + 5 * 1000 * 1000, 100 * 1000); +} + +} // namespace unittest +} // namespace oceanbase diff --git a/mittest/multi_replica/test_ob_dup_table_leader_switch.cpp b/mittest/multi_replica/test_ob_dup_table_leader_switch.cpp new file mode 100644 index 0000000000..ac2397c4d8 --- /dev/null +++ b/mittest/multi_replica/test_ob_dup_table_leader_switch.cpp @@ -0,0 +1,301 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ +#include +#define USING_LOG_PREFIX SERVER +#define protected public +#define private public + +#include "env/ob_fast_bootstrap.h" +#include "env/ob_multi_replica_util.h" +#include "lib/mysqlclient/ob_mysql_result.h" +#include "storage/tx/ob_dup_table_lease.h" + +using namespace oceanbase::transaction; +using namespace oceanbase::storage; + +#define CUR_TEST_CASE_NAME ObDupTableLeaderSwitch + +DEFINE_MULTI_ZONE_TEST_CASE_CLASS + +MULTI_REPLICA_TEST_MAIN_FUNCTION(test_dup_table_leader_switch_); + +#define DEFAULT_LOAD_ROW_CNT 30 + +namespace oceanbase +{ + +namespace unittest +{ + +struct DupTableBasicArg +{ + uint64_t tenant_id_; + int64_t ls_id_num_; + int64_t table_id_; + int64_t tablet_count_; + ObSEArray tablet_id_array_; + + TO_STRING_KV(K(tenant_id_), K(ls_id_num_), K(table_id_), K(tablet_count_), K(tablet_id_array_)); + + OB_UNIS_VERSION(1); +}; + +OB_SERIALIZE_MEMBER(DupTableBasicArg, + tenant_id_, + ls_id_num_, + table_id_, + tablet_count_, + tablet_id_array_); + +static DupTableBasicArg static_basic_arg_; + +TEST_F(GET_ZONE_TEST_CLASS_NAME(1), create_dup_table_and_load_data) +{ + + int ret = OB_SUCCESS; + CREATE_TEST_TENANT(test_tenant_id); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] create test tenant success", K(test_tenant_id)); + + common::ObMySQLProxy &test_tenant_sql_proxy = get_curr_simple_server().get_sql_proxy2(); + + ACQUIRE_CONN_FROM_SQL_PROXY(test_conn, test_tenant_sql_proxy); + ACQUIRE_CONN_FROM_SQL_PROXY(sys_conn, get_curr_simple_server().get_sql_proxy()); + + WRITE_SQL_BY_CONN(test_conn, + "CREATE TABLE test_t1( " + "id_x int, " + "id_y int, " + "id_z int, " + "PRIMARY KEY(id_x)" + ") duplicate_scope='cluster' PARTITION BY hash(id_x) partitions 10;"); + + WRITE_SQL_BY_CONN(test_conn, "CREATE TABLE test_t2( " + "id_x int, " + "id_y int, " + "id_z int, " + "PRIMARY KEY(id_x)" + ") PARTITION BY hash(id_x) partitions 10;"); + + std::string primary_zone_sql = "ALTER TENANT " + std::string(DEFAULT_TEST_TENANT_NAME) + + " set primary_zone='zone1, zone3; zone2';"; + WRITE_SQL_BY_CONN(test_conn, primary_zone_sql.c_str()); + + READ_SQL_BY_CONN(test_conn, table_info_result, + "select table_id, duplicate_scope from " + "oceanbase.__all_table where table_name = 'test_t1' "); + + ASSERT_EQ(OB_SUCCESS, table_info_result->next()); + int64_t table_id; + int64_t dup_scope; + ASSERT_EQ(OB_SUCCESS, table_info_result->get_int("table_id", table_id)); + ASSERT_EQ(OB_SUCCESS, table_info_result->get_int("duplicate_scope", dup_scope)); + ASSERT_EQ(true, table_id > 0); + ASSERT_EQ(true, dup_scope != 0); + + std::string tablet_count_sql = + "select count(*), ls_id from oceanbase.__all_tablet_to_ls where table_id = " + + std::to_string(table_id) + " group by ls_id order by count(*)"; + READ_SQL_BY_CONN(test_conn, tablet_count_result, tablet_count_sql.c_str()); + int64_t tablet_count = 0; + int64_t ls_id_num = 0; + ASSERT_EQ(OB_SUCCESS, tablet_count_result->next()); + ASSERT_EQ(OB_SUCCESS, tablet_count_result->get_int("count(*)", tablet_count)); + ASSERT_EQ(OB_SUCCESS, tablet_count_result->get_int("ls_id", ls_id_num)); + ASSERT_EQ(10, tablet_count); + ASSERT_EQ(true, share::ObLSID(ls_id_num).is_valid()); + + std::string ls_id_str = std::to_string(ls_id_num); + std::string target_ip = local_ip_ + ":" + std::to_string(rpc_ports_[2]); + std::string switch_leader_sql = "alter system switch replica leader ls=" + ls_id_str + " server='" + + target_ip + "' tenant='tt1';"; + WRITE_SQL_BY_CONN(sys_conn, switch_leader_sql.c_str()); + + ObSEArray tablet_id_array; + std::string tablet_id_sql = "select tablet_id from oceanbase.__all_tablet_to_ls where table_id = " + + std::to_string(table_id) + + " and ls_id = " + std::to_string(ls_id_num); + READ_SQL_BY_CONN(test_conn, tablet_id_reult, tablet_id_sql.c_str()); + while (OB_SUCC(tablet_id_reult->next())) { + int64_t id = 0; + ASSERT_EQ(OB_SUCCESS, tablet_id_reult->get_int("tablet_id", id)); + ASSERT_EQ(true, ObTabletID(id).is_valid()); + ASSERT_EQ(OB_SUCCESS, tablet_id_array.push_back(id)); + } + ASSERT_EQ(tablet_count, tablet_id_array.count()); + ASSERT_EQ(OB_ITER_END, ret); + ret = OB_SUCCESS; + + GET_LS(test_tenant_id, ls_id_num, ls_handle); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] -------- before wait dup tablet discover", K(ret), + K(ls_id_num), K(tablet_count)); + RETRY_UNTIL_TIMEOUT(ls_handle.get_ls()->dup_table_ls_handler_.get_dup_tablet_count() + == tablet_count, + 20 * 1000 * 1000, 100 * 1000); + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] -------- after wait dup tablet discover", K(ret), + K(ls_id_num), K(ls_handle.get_ls()->dup_table_ls_handler_.get_dup_tablet_count())); + ASSERT_EQ(OB_SUCCESS, ret /*has_dup_tablet*/); + + WRITE_SQL_BY_CONN(test_conn, "set autocommit = true;"); + + for (int i = 1; i <= DEFAULT_LOAD_ROW_CNT; i++) { + std::string insert_sql_str = "INSERT INTO test_t1 VALUES(" + std::to_string(i) + ", 0 , 0)"; + WRITE_SQL_BY_CONN(test_conn, insert_sql_str.c_str()); + } + + for (int i = 1; i <= DEFAULT_LOAD_ROW_CNT; i++) { + std::string insert_sql_str = "INSERT INTO test_t2 VALUES(" + std::to_string(i) + ", 0 , 0)"; + WRITE_SQL_BY_CONN(test_conn, insert_sql_str.c_str()); + } + + int64_t row_cnt = 0; + READ_SQL_BY_CONN(test_conn, read_insert_result, "select count(*) from test_t1;"); + ASSERT_EQ(OB_SUCCESS, read_insert_result->next()); + ASSERT_EQ(OB_SUCCESS, read_insert_result->get_int("count(*)", row_cnt)); + ASSERT_EQ(row_cnt, DEFAULT_LOAD_ROW_CNT); + + static_basic_arg_.tenant_id_ = test_tenant_id; + static_basic_arg_.ls_id_num_ = ls_id_num; + static_basic_arg_.table_id_ = table_id; + static_basic_arg_.tablet_count_ = tablet_count; + + std::string tmp_str; + ASSERT_EQ(OB_SUCCESS, + EventArgSerTool::serialize_arg(static_basic_arg_, tmp_str)); + ASSERT_EQ(OB_SUCCESS, finish_event("CREATE_DUP_TABLE", tmp_str)); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(3), switch_to_follwer_forcedly) +{ + int ret = OB_SUCCESS; + + std::string tmp_event_val; + ASSERT_EQ(OB_SUCCESS, wait_event_finish("CREATE_DUP_TABLE", tmp_event_val, 30 * 60 * 1000)); + ASSERT_EQ(OB_SUCCESS, + EventArgSerTool::deserialize_arg(static_basic_arg_, tmp_event_val)); + + ASSERT_EQ(OB_SUCCESS, wait_event_finish("INIT_ZONE2_SQL_PROXY", tmp_event_val, 30 * 60 * 1000)); + + ASSERT_EQ(OB_SUCCESS, get_curr_simple_server().init_sql_proxy2()); + + ASSERT_EQ(OB_SUCCESS, finish_event("START_RANDOM_SWITCH_FOLLOWER_FORCEDLY", "")); + + GET_LS(static_basic_arg_.tenant_id_, static_basic_arg_.ls_id_num_, ls_handle); + + RETRY_UNTIL_TIMEOUT(ls_handle.get_ls()->dup_table_ls_handler_.is_master(), 20 * 1000 * 1000, + 100 * 1000); + + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] -------- before block msg for leader revoke", K(ret)); + + for (int i = 0; i < 10; i++) { + BLOCK_MSG_PROCESSOR(6 * 1000 * 1000L); + RETRY_UNTIL_TIMEOUT(!ls_handle.get_ls()->dup_table_ls_handler_.is_master(), 20 * 1000 * 1000, + 100 * 1000); + RETRY_UNTIL_TIMEOUT(ls_handle.get_ls()->dup_table_ls_handler_.is_master(), 20 * 1000 * 1000, + 100 * 1000); + sleep(1); + } + SERVER_LOG(INFO, "[ObMultiReplicaTestBase] -------- after block msg for leader revoke", K(ret)); + + ASSERT_EQ(OB_SUCCESS, finish_event("END_RANDOM_SWITCH_FOLLOWER_FORCEDLY", "")); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(3), switch_to_follwer_gracefully) +{ + int ret = OB_SUCCESS; + + ASSERT_EQ(OB_SUCCESS, finish_event("START_RANDOM_SWITCH_FOLLOWER_GRACEFULLY", "")); + + common::ObMySQLProxy &test_tenant_sql_proxy = get_curr_simple_server().get_sql_proxy(); + ACQUIRE_CONN_FROM_SQL_PROXY(test_conn, test_tenant_sql_proxy); + + GET_LS(static_basic_arg_.tenant_id_, static_basic_arg_.ls_id_num_, ls_handle); + RETRY_UNTIL_TIMEOUT(ls_handle.get_ls()->dup_table_ls_handler_.is_master(), 20 * 1000 * 1000, + 100 * 1000); + + std::string ls_id_str = std::to_string(static_basic_arg_.ls_id_num_); + std::string target_ip = local_ip_ + ":" + std::to_string(rpc_ports_[1]); + + std::string switch_leader_sql = "alter system switch replica leader ls=" + ls_id_str + " server='" + + target_ip + "' tenant='tt1';"; + + for (int i = 0; i < 10; i++) { + WRITE_SQL_BY_CONN(test_conn, switch_leader_sql.c_str()); + RETRY_UNTIL_TIMEOUT(!ls_handle.get_ls()->dup_table_ls_handler_.is_master(), 20 * 1000 * 1000, + 100 * 1000); + RETRY_UNTIL_TIMEOUT(ls_handle.get_ls()->dup_table_ls_handler_.is_master(), 20 * 1000 * 1000, + 100 * 1000); + sleep(1); + } + + ASSERT_EQ(OB_SUCCESS, finish_event("END_RANDOM_SWITCH_FOLLOWER_GRACEFULLY", "")); +} + +TEST_F(GET_ZONE_TEST_CLASS_NAME(2), update_in_leader_switch) +{ + int ret = OB_TIMEOUT; + + std::string tmp_event_val; + ASSERT_EQ(OB_SUCCESS, wait_event_finish("CREATE_DUP_TABLE", tmp_event_val, 30 * 60 * 1000)); + ASSERT_EQ(OB_SUCCESS, get_curr_simple_server().init_sql_proxy2()); + ASSERT_EQ(OB_SUCCESS, finish_event("INIT_ZONE2_SQL_PROXY", "")); + common::ObMySQLProxy &test_tenant_sql_proxy = get_curr_simple_server().get_sql_proxy2(); + ACQUIRE_CONN_FROM_SQL_PROXY(test_conn, test_tenant_sql_proxy); + + int64_t update_trx_cnt = 0; + std::string event_content; + + int res_ret_1, res_ret_2, commit_res_ret; + res_ret_1 = res_ret_2 = commit_res_ret = OB_SUCCESS; + + bool is_waiting_forcedly = true; + + while (ret == OB_TIMEOUT) { + update_trx_cnt++; + WRITE_SQL_BY_CONN(test_conn, "begin"); + for (int i = 1; i <= DEFAULT_LOAD_ROW_CNT; i++) { + std::string insert_sql_str = "UPDATE test_t1 set id_y = " + std::to_string(update_trx_cnt) + + " where id_x = " + std::to_string(i); + WRITE_SQL_BY_CONN_INNER(test_conn, insert_sql_str.c_str(), res_ret_1); + std::string insert_sql_str2 = "UPDATE test_t2 set id_y = " + std::to_string(update_trx_cnt) + + " where id_x = " + std::to_string(i); + WRITE_SQL_BY_CONN_INNER(test_conn, insert_sql_str2.c_str(), res_ret_2); + if (is_waiting_forcedly) { + EXPECT_EQ(true, res_ret_1 == OB_SUCCESS || res_ret_1 == OB_TIMEOUT); + EXPECT_EQ(true, res_ret_2 == OB_SUCCESS || res_ret_2 == OB_TIMEOUT); + } else { + EXPECT_EQ(OB_SUCCESS, res_ret_1); + EXPECT_EQ(OB_SUCCESS, res_ret_2); + } + } + WRITE_SQL_BY_CONN_INNER(test_conn, "commit", commit_res_ret); + if (is_waiting_forcedly) { + EXPECT_EQ(true, TestTxCtxGuard::is_trx_abort_sql_ret(commit_res_ret)); + } else { + EXPECT_EQ(OB_SUCCESS, commit_res_ret); + } + + if (is_waiting_forcedly) { + + ret = wait_event_finish("END_RANDOM_SWITCH_FOLLOWER_FORCEDLY", event_content, 100L, 50L); + if (OB_SUCCESS == ret) { + ret = OB_TIMEOUT; + is_waiting_forcedly = false; + } + } else { + ret = wait_event_finish("END_RANDOM_SWITCH_FOLLOWER_GRACEFULLY", event_content, 100L, 50L); + } + } + ASSERT_EQ(OB_SUCCESS, ret); +} + +} // namespace unittest +} // namespace oceanbase diff --git a/mittest/multi_replica/test_ob_multi_replica_basic.cpp b/mittest/multi_replica/test_ob_multi_replica_basic.cpp new file mode 100644 index 0000000000..43cb8ff1f5 --- /dev/null +++ b/mittest/multi_replica/test_ob_multi_replica_basic.cpp @@ -0,0 +1,194 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#define USING_LOG_PREFIX SERVER +#define protected public +#define private public + +#include "env/ob_multi_replica_test_base.h" +#include "env/ob_fast_bootstrap.h" +#include "lib/mysqlclient/ob_mysql_result.h" + +namespace oceanbase +{ +namespace unittest +{ + +using namespace oceanbase::transaction; +using namespace oceanbase::storage; + +std::string ObMultiReplicaTestBase::ZONE_TEST_CASE_NAME[MAX_ZONE_COUNT] = { + "ObSimpleMultiReplicaExampleTest_ZONE1", "ObSimpleMultiReplicaExampleTest_ZONE2", + "ObSimpleMultiReplicaExampleTest_ZONE3"}; + +static const std::string TEST_DIR_PREFIX = "test_multi_replica_basic_"; + +class TestRunCtx +{ +public: + uint64_t tenant_id_ = 0; + int time_sec_ = 0; +}; + +TestRunCtx RunCtx; + +class ObSimpleMultiReplicaExampleTest_ZONE1 : public ObMultiReplicaTestBase +{ +public: + ObSimpleMultiReplicaExampleTest_ZONE1() : ObMultiReplicaTestBase() {} + +}; + +class ObSimpleMultiReplicaExampleTest_ZONE2 : public ObMultiReplicaTestBase +{ +public: + ObSimpleMultiReplicaExampleTest_ZONE2() : ObMultiReplicaTestBase() {} +}; + +class ObSimpleMultiReplicaExampleTest_ZONE3 : public ObMultiReplicaTestBase +{ +public: + ObSimpleMultiReplicaExampleTest_ZONE3() : ObMultiReplicaTestBase() {} +}; + +TEST_F(ObSimpleMultiReplicaExampleTest_ZONE1, observer_start) +{ + SERVER_LOG(INFO, "observer_start succ"); +} + +// 创建租户并不轻量,看场景必要性使用 +TEST_F(ObSimpleMultiReplicaExampleTest_ZONE1, add_tenant) +{ + // 创建普通租户tt1 + ASSERT_EQ(OB_SUCCESS, create_tenant()); + // 获取租户tt1的tenant_id + ASSERT_EQ(OB_SUCCESS, get_tenant_id(RunCtx.tenant_id_)); + ASSERT_NE(0, RunCtx.tenant_id_); + // 初始化普通租户tt1的sql proxy + ASSERT_EQ(OB_SUCCESS, get_curr_simple_server().init_sql_proxy2()); +} + + +TEST_F(ObSimpleMultiReplicaExampleTest_ZONE1, create_table) +{ + int ret = OB_SUCCESS; + // 使用普通租户tt1 + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy2(); + // 创建表 + { + OB_LOG(INFO, "create_table start"); + ObSqlString sql; + sql.assign_fmt( + "create table school (sid int,sname varchar(100), primary key(sid)) " + "partition by range(sid) (partition p0 values less than (100), partition p1 values less than (200), partition p2 values less than MAXVALUE)"); + int64_t affected_rows = 0; + ASSERT_EQ(OB_SUCCESS, sql_proxy.write(sql.ptr(), affected_rows)); + OB_LOG(INFO, "create_table succ"); + } + + { + OB_LOG(INFO, "insert data start"); + for (int i = 1;i <= 1000; i++) { + ObSqlString sql; + ASSERT_EQ(OB_SUCCESS, sql.assign_fmt("insert into school values(%d, '%s')", i, "ob")); + int64_t affected_rows = 0; + ASSERT_EQ(OB_SUCCESS, sql_proxy.write(sql.ptr(), affected_rows)); + } + //check row count + OB_LOG(INFO, "check row count"); + { + int64_t row_cnt = 0; + ObSqlString sql; + ASSERT_EQ(OB_SUCCESS, sql.assign_fmt("select count(*) row_cnt from school")); + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + ASSERT_EQ(OB_SUCCESS, sql_proxy.read(res, sql.ptr())); + sqlclient::ObMySQLResult *result = res.get_result(); + ASSERT_NE(nullptr, result); + ASSERT_EQ(OB_SUCCESS, result->next()); + ASSERT_EQ(OB_SUCCESS, result->get_int("row_cnt", row_cnt)); + } + ASSERT_EQ(row_cnt, 1000); + } + + } +} + + +TEST_F(ObSimpleMultiReplicaExampleTest_ZONE1, delete_tenant) +{ + ASSERT_EQ(OB_SUCCESS, delete_tenant()); +} + + +TEST_F(ObSimpleMultiReplicaExampleTest_ZONE1, end) +{ + RunCtx.time_sec_ = 0; + if (RunCtx.time_sec_ > 0) { + ::sleep(RunCtx.time_sec_); + } +} + +TEST_F(ObSimpleMultiReplicaExampleTest_ZONE2, end) +{ + RunCtx.time_sec_ = 0; + if (RunCtx.time_sec_ > 0) { + ::sleep(RunCtx.time_sec_); + } +} + +TEST_F(ObSimpleMultiReplicaExampleTest_ZONE3, end) +{ + RunCtx.time_sec_ = 0; + if (RunCtx.time_sec_ > 0) { + ::sleep(RunCtx.time_sec_); + } +} + +} // end unittest +} // end oceanbase + + +int main(int argc, char **argv) +{ + int return_code = 0; + int ret = OB_SUCCESS; + int c = 0; + int time_sec = 0; + char *log_level = (char *)"INFO"; + while (EOF != (c = getopt(argc, argv, "t:l:"))) { + switch (c) { + case 't': + time_sec = atoi(optarg); + break; + case 'l': + log_level = optarg; + oceanbase::unittest::ObMultiReplicaTestBase::enable_env_warn_log_ = false; + break; + default: + break; + } + } + oceanbase::unittest::init_log_and_gtest(argc, argv); + OB_LOGGER.set_log_level(log_level); + + LOG_INFO("main>>>"); + oceanbase::unittest::RunCtx.time_sec_ = time_sec; + ::testing::InitGoogleTest(&argc, argv); + if (OB_FAIL(oceanbase::unittest::ObMultiReplicaTestBase::bootstrap_multi_replica( + oceanbase::unittest::TEST_DIR_PREFIX))) { + fprintf(stdout, "init test case failed. ret = %d", ret); + return ret; + } + return_code = RUN_ALL_TESTS(); + return return_code; +} diff --git a/mittest/simple_server/test_ls_recover.cpp b/mittest/simple_server/test_ls_recover.cpp index 04098e9e6a..6337d9788c 100644 --- a/mittest/simple_server/test_ls_recover.cpp +++ b/mittest/simple_server/test_ls_recover.cpp @@ -264,7 +264,6 @@ TEST_F(ObLSBeforeRestartTest, create_unfinished_ls_without_disk) ASSERT_EQ(OB_SUCCESS, gen_create_ls_arg(tenant_id, id_100, arg)); LOG_INFO("create_ls", K(arg), K(id_100)); ASSERT_EQ(OB_SUCCESS, ls_svr->inner_create_ls_(arg.get_ls_id(), - arg.get_replica_type(), migration_status, ObLSRestoreStatus(ObLSRestoreStatus::RESTORE_NONE), arg.get_create_scn(), @@ -293,7 +292,6 @@ TEST_F(ObLSBeforeRestartTest, create_unfinished_ls_with_disk) ASSERT_EQ(OB_SUCCESS, gen_create_ls_arg(tenant_id, id_101, arg)); LOG_INFO("create_ls", K(arg), K(id_101)); ASSERT_EQ(OB_SUCCESS, ls_svr->inner_create_ls_(arg.get_ls_id(), - arg.get_replica_type(), migration_status, ObLSRestoreStatus(ObLSRestoreStatus::RESTORE_NONE), arg.get_create_scn(), @@ -306,6 +304,7 @@ TEST_F(ObLSBeforeRestartTest, create_unfinished_ls_with_disk) ASSERT_EQ(OB_SUCCESS, ls_svr->write_prepare_create_ls_slog_(ls_meta)); ASSERT_EQ(OB_SUCCESS, ls->create_ls(arg.get_tenant_info().get_tenant_role(), palf_base_info, + arg.get_replica_type(), unused_allow_log_sync)); } @@ -327,7 +326,6 @@ TEST_F(ObLSBeforeRestartTest, create_unfinished_ls_with_inner_tablet) ASSERT_EQ(OB_SUCCESS, gen_create_ls_arg(tenant_id, id_102, arg)); LOG_INFO("create_ls", K(arg), K(id_102)); ASSERT_EQ(OB_SUCCESS, ls_svr->inner_create_ls_(arg.get_ls_id(), - arg.get_replica_type(), migration_status, ObLSRestoreStatus(ObLSRestoreStatus::RESTORE_NONE), arg.get_create_scn(), @@ -340,6 +338,7 @@ TEST_F(ObLSBeforeRestartTest, create_unfinished_ls_with_inner_tablet) ASSERT_EQ(OB_SUCCESS, ls_svr->write_prepare_create_ls_slog_(ls_meta)); ASSERT_EQ(OB_SUCCESS, ls->create_ls(arg.get_tenant_info().get_tenant_role(), palf_base_info, + arg.get_replica_type(), unused_allow_log_sync)); ASSERT_EQ(OB_SUCCESS, ls->create_ls_inner_tablet(arg.get_compat_mode(), arg.get_create_scn())); @@ -363,7 +362,6 @@ TEST_F(ObLSBeforeRestartTest, create_unfinished_ls_with_commit_slog) ASSERT_EQ(OB_SUCCESS, gen_create_ls_arg(tenant_id, id_103, arg)); LOG_INFO("create_ls", K(arg), K(id_103)); ASSERT_EQ(OB_SUCCESS, ls_svr->inner_create_ls_(arg.get_ls_id(), - arg.get_replica_type(), migration_status, ObLSRestoreStatus(ObLSRestoreStatus::RESTORE_NONE), arg.get_create_scn(), @@ -376,6 +374,7 @@ TEST_F(ObLSBeforeRestartTest, create_unfinished_ls_with_commit_slog) ASSERT_EQ(OB_SUCCESS, ls_svr->write_prepare_create_ls_slog_(ls_meta)); ASSERT_EQ(OB_SUCCESS, ls->create_ls(arg.get_tenant_info().get_tenant_role(), palf_base_info, + arg.get_replica_type(), unused_allow_log_sync)); ASSERT_EQ(OB_SUCCESS, ls->create_ls_inner_tablet(arg.get_compat_mode(), arg.get_create_scn())); diff --git a/mittest/simple_server/test_ls_status_operator.cpp b/mittest/simple_server/test_ls_status_operator.cpp index b0df079e3d..6f1e97b006 100644 --- a/mittest/simple_server/test_ls_status_operator.cpp +++ b/mittest/simple_server/test_ls_status_operator.cpp @@ -140,7 +140,8 @@ TEST_F(TestLSStatusOperator, LSLifeAgent) ret = ls_life.create_new_ls(info, create_scn, zone_priority.str(), share::NORMAL_SWITCHOVER_STATUS); ASSERT_EQ(OB_INVALID_ARGUMENT, ret); ObZone primary_zone("z1"); - ret = info.init(tenant_id_, SYS_LS, 0, share::OB_LS_CREATING, 0, primary_zone); + ObLSFlag flag(share::ObLSFlag::NORMAL_FLAG); + ret = info.init(tenant_id_, SYS_LS, 0, share::OB_LS_CREATING, 0, primary_zone, flag); ASSERT_EQ(OB_SUCCESS, ret); ret = status_operator.get_all_ls_status_by_order(OB_SYS_TENANT_ID, ls_array, get_curr_simple_server().get_observer().get_mysql_proxy()); @@ -152,7 +153,7 @@ TEST_F(TestLSStatusOperator, LSLifeAgent) //创建新日志流 ObLSID ls_id(1001); - ret = info.init(tenant_id_, ls_id, 0, share::OB_LS_CREATING, 0, primary_zone); + ret = info.init(tenant_id_, ls_id, 0, share::OB_LS_CREATING, 0, primary_zone, flag); ASSERT_EQ(OB_SUCCESS, ret); ret = ls_life.create_new_ls(info, create_scn, zone_priority.str(), share::NORMAL_SWITCHOVER_STATUS); ASSERT_EQ(OB_SUCCESS, ret); diff --git a/src/logservice/logrpc/ob_log_request_handler.cpp b/src/logservice/logrpc/ob_log_request_handler.cpp index da46b2a64e..e5d4c67b26 100644 --- a/src/logservice/logrpc/ob_log_request_handler.cpp +++ b/src/logservice/logrpc/ob_log_request_handler.cpp @@ -279,10 +279,10 @@ int ConfigChangeCmdHandler::handle_config_change_cmd(const LogConfigChangeCmd &r ret = palf_handle_->remove_learner(req.removed_member_, req.timeout_us_); break; case SWITCH_TO_ACCEPTOR_CMD: - ret = palf_handle_->switch_learner_to_acceptor(req.removed_member_, req.timeout_us_); + ret = palf_handle_->switch_learner_to_acceptor(req.removed_member_, req.new_replica_num_, req.timeout_us_); break; case SWITCH_TO_LEARNER_CMD: - ret = palf_handle_->switch_acceptor_to_learner(req.removed_member_, req.timeout_us_); + ret = palf_handle_->switch_acceptor_to_learner(req.removed_member_, req.new_replica_num_, req.timeout_us_); break; default: break; diff --git a/src/logservice/logrpc/ob_log_rpc_req.cpp b/src/logservice/logrpc/ob_log_rpc_req.cpp index cfbe7c99fb..712ef2b688 100644 --- a/src/logservice/logrpc/ob_log_rpc_req.cpp +++ b/src/logservice/logrpc/ob_log_rpc_req.cpp @@ -80,8 +80,7 @@ bool LogConfigChangeCmd::is_valid() const SWITCH_TO_LEARNER_CMD == cmd_type_)? added_member_.is_valid(): true); bool_ret = bool_ret && ((is_remove_member_list() || REMOVE_LEARNER_CMD == cmd_type_ || \ SWITCH_TO_ACCEPTOR_CMD == cmd_type_)? removed_member_.is_valid(): true); - bool_ret = bool_ret && ((ADD_MEMBER_CMD == cmd_type_ || REMOVE_MEMBER_CMD == cmd_type_) ? \ - is_valid_replica_num(new_replica_num_): true); + bool_ret = bool_ret && ((is_set_new_replica_num())? is_valid_replica_num(new_replica_num_): true); bool_ret = bool_ret && ((CHANGE_REPLICA_NUM_CMD == cmd_type_)? curr_member_list_.is_valid() \ && is_valid_replica_num(curr_replica_num_) && is_valid_replica_num(new_replica_num_): true); return bool_ret; @@ -101,6 +100,14 @@ bool LogConfigChangeCmd::is_add_member_list() const || SWITCH_TO_ACCEPTOR_CMD == cmd_type_; } +bool LogConfigChangeCmd::is_set_new_replica_num() const +{ + return ADD_MEMBER_CMD == cmd_type_ + || REMOVE_MEMBER_CMD == cmd_type_ + || SWITCH_TO_LEARNER_CMD == cmd_type_ + || SWITCH_TO_ACCEPTOR_CMD == cmd_type_; +} + void LogConfigChangeCmd::reset() { src_.reset(); diff --git a/src/logservice/logrpc/ob_log_rpc_req.h b/src/logservice/logrpc/ob_log_rpc_req.h index e241fdd871..8965a4577f 100644 --- a/src/logservice/logrpc/ob_log_rpc_req.h +++ b/src/logservice/logrpc/ob_log_rpc_req.h @@ -80,6 +80,7 @@ public: void reset(); bool is_remove_member_list() const; bool is_add_member_list() const; + bool is_set_new_replica_num() const; TO_STRING_KV("cmd_type", log_config_change_cmd2str(cmd_type_), K_(src), K_(palf_id), \ K_(added_member), K_(removed_member), K_(curr_member_list), K_(curr_replica_num), \ K_(new_replica_num), K_(timeout_us)); diff --git a/src/logservice/ob_garbage_collector.cpp b/src/logservice/ob_garbage_collector.cpp index 397748295f..e75af2e546 100644 --- a/src/logservice/ob_garbage_collector.cpp +++ b/src/logservice/ob_garbage_collector.cpp @@ -88,7 +88,6 @@ private: const ObLSArray &ls_array); int handle_rpc_response_(const ObAddr &leader, const obrpc::ObQueryLSIsValidMemberResponse &response); - bool is_normal_readonly_replica_(ObLS *ls) const; int try_renew_location_(const ObLSArray &ls_array); private: obrpc::ObSrvRpcProxy *rpc_proxy_; @@ -191,9 +190,6 @@ int ObGarbageCollector::QueryLSIsValidMemberFunctor::handle_rpc_response_(const } else if (OB_ISNULL(gc_handler = ls->get_gc_handler())) { tmp_ret = OB_ERR_UNEXPECTED; CLOG_LOG(WARN, "gc_handler is NULL", K(tmp_ret), K(id)); - } else if (is_normal_readonly_replica_(ls)) { - // do nothing, remove by RS - CLOG_LOG(INFO, "GC skip R replica", K(id)); } else if (!is_valid_member) { if (OB_SUCCESS != (tmp_ret = gc_handler->gc_check_invalid_member_seq(gc_seq_, need_gc))) { CLOG_LOG(WARN, "gc_check_invalid_member_seq failed", K(tmp_ret), K(id), K(leader), K(gc_seq_), K(need_gc)); @@ -219,11 +215,6 @@ int ObGarbageCollector::QueryLSIsValidMemberFunctor::handle_rpc_response_(const return ret; } -bool ObGarbageCollector::QueryLSIsValidMemberFunctor::is_normal_readonly_replica_(ObLS *ls) const -{ - return ObReplicaTypeCheck::is_readonly_replica(ls->get_replica_type()); -} - //---------------ObGCLSLog---------------// ObGCLSLog::ObGCLSLog() { diff --git a/src/logservice/ob_log_base_type.h b/src/logservice/ob_log_base_type.h index 1a38905e1b..f0639b3e2e 100644 --- a/src/logservice/ob_log_base_type.h +++ b/src/logservice/ob_log_base_type.h @@ -87,6 +87,9 @@ enum ObLogBaseType // for padding log entry PADDING_LOG_BASE_TYPE = 25, + // for dup table trans + DUP_TABLE_LOG_BASE_TYPE = 26, + // pay attention!!! // add log type in log_base_type_to_string // max value @@ -153,6 +156,8 @@ int log_base_type_to_string(const ObLogBaseType log_type, strncpy(str ,"HEARTBEAT_SERVICE", str_len); } else if (log_type == PADDING_LOG_BASE_TYPE) { strncpy(str ,"PADDING_LOG_ENTRY", str_len); + } else if (log_type == DUP_TABLE_LOG_BASE_TYPE) { + strncpy(str ,"DUP_TABLE", str_len); } else { ret = OB_INVALID_ARGUMENT; } diff --git a/src/logservice/ob_log_handler.cpp b/src/logservice/ob_log_handler.cpp index f28f081edd..61a85ff385 100644 --- a/src/logservice/ob_log_handler.cpp +++ b/src/logservice/ob_log_handler.cpp @@ -333,10 +333,11 @@ int ObLogHandler::seek(const SCN &scn, palf::PalfGroupBufferIterator &iter) } int ObLogHandler::set_initial_member_list(const common::ObMemberList &member_list, - const int64_t paxos_replica_num) + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list) { RLockGuard guard(lock_); - return palf_handle_.set_initial_member_list(member_list, paxos_replica_num); + return palf_handle_.set_initial_member_list(member_list, paxos_replica_num, learner_list); } @@ -406,6 +407,14 @@ int ObLogHandler::get_paxos_member_list(common::ObMemberList &member_list, int64 return palf_handle_.get_paxos_member_list(member_list, paxos_replica_num); } +int ObLogHandler::get_paxos_member_list_and_learner_list(common::ObMemberList &member_list, + int64_t &paxos_replica_num, + common::GlobalLearnerList &learner_list) const +{ + RLockGuard guard(lock_); + return palf_handle_.get_paxos_member_list_and_learner_list(member_list, paxos_replica_num, learner_list); +} + int ObLogHandler::get_global_learner_list(common::GlobalLearnerList &learner_list) const { RLockGuard guard(lock_); @@ -796,6 +805,36 @@ int ObLogHandler::remove_learner(const common::ObMember &removed_learner, return ret; } +int ObLogHandler::replace_learner(const common::ObMember &added_learner, + const common::ObMember &removed_learner, + const int64_t timeout_us) +{ + int ret = OB_SUCCESS; + common::ObSpinLockGuard deps_guard(deps_lock_); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + } else if (is_in_stop_state_) { + ret = OB_NOT_RUNNING; + } else if (!added_learner.is_valid() || + !removed_learner.is_valid() || + timeout_us <= 0) { + ret = OB_INVALID_ARGUMENT; + CLOG_LOG(WARN, "invalid argument", KR(ret), K_(id), K(added_learner), K(removed_learner), K(timeout_us)); + } else { + common::ObMember dummy_member; + LogConfigChangeCmd add_req(self_, id_, added_learner, dummy_member, 0, ADD_LEARNER_CMD, timeout_us); + LogConfigChangeCmd remove_req(self_, id_, dummy_member, removed_learner, 0, REMOVE_LEARNER_CMD, timeout_us); + if (OB_FAIL(submit_config_change_cmd_(add_req))) { + CLOG_LOG(WARN, " submit_config_change_cmd failed", KR(ret), K_(id), K(add_req), K(timeout_us)); + } else if (OB_FAIL(submit_config_change_cmd_(remove_req))) { + CLOG_LOG(WARN, " submit_config_change_cmd failed", KR(ret), K_(id), K(remove_req), K(timeout_us)); + } else { + CLOG_LOG(INFO, "replace_learner success", KR(ret), K_(id), K(added_learner), K(removed_learner), K(timeout_us)); + } + } + return ret; +} + // @desc: switch_learner_to_acceptor interface // | 1.switch_learner_to_accetpor() // V @@ -803,6 +842,7 @@ int ObLogHandler::remove_learner(const common::ObMember &removed_learner, // | // [any_member] <----[4. Sync LogConfigChangeCmdResp]--- | 3. one_stage_config_change_(SWITCH_LEARNER_TO_ACCEPTOR) int ObLogHandler::switch_learner_to_acceptor(const common::ObMember &learner, + const int64_t new_replica_num, const int64_t timeout_us) { int ret = OB_SUCCESS; @@ -812,15 +852,16 @@ int ObLogHandler::switch_learner_to_acceptor(const common::ObMember &learner, } else if (is_in_stop_state_) { ret = OB_NOT_RUNNING; } else if (!learner.is_valid() || + !is_valid_replica_num(new_replica_num) || timeout_us <= 0) { ret = OB_INVALID_ARGUMENT; - CLOG_LOG(WARN, "invalid argument", KR(ret), K_(id), K(learner), K(timeout_us)); + CLOG_LOG(WARN, "invalid argument", KR(ret), K_(id), K(learner), K(new_replica_num), K(timeout_us)); } else { - LogConfigChangeCmd req(self_, id_, learner, learner, 0, SWITCH_TO_ACCEPTOR_CMD, timeout_us); + LogConfigChangeCmd req(self_, id_, learner, learner, new_replica_num, SWITCH_TO_ACCEPTOR_CMD, timeout_us); if (OB_FAIL(submit_config_change_cmd_(req))) { CLOG_LOG(WARN, " submit_config_change_cmd failed", KR(ret), K_(id), K(req), K(timeout_us)); } else { - CLOG_LOG(INFO, "add_member success", KR(ret), K_(id), K(learner)); + CLOG_LOG(INFO, "add_member success", KR(ret), K_(id), K(learner), K(new_replica_num)); } } return ret; @@ -833,6 +874,7 @@ int ObLogHandler::switch_learner_to_acceptor(const common::ObMember &learner, // | // [any_member] <----[4. Sync LogConfigChangeCmdResp]--- | 3. one_stage_config_change_(SWITCH_ACCEPTOR_TO_LEARNER) int ObLogHandler::switch_acceptor_to_learner(const common::ObMember &member, + const int64_t new_replica_num, const int64_t timeout_us) { int ret = OB_SUCCESS; @@ -842,15 +884,16 @@ int ObLogHandler::switch_acceptor_to_learner(const common::ObMember &member, } else if (is_in_stop_state_) { ret = OB_NOT_RUNNING; } else if (!member.is_valid() || + !is_valid_replica_num(new_replica_num) || timeout_us <= 0) { ret = OB_INVALID_ARGUMENT; - CLOG_LOG(WARN, "invalid argument", KR(ret), K_(id), K(member), K(timeout_us)); + CLOG_LOG(WARN, "invalid argument", KR(ret), K_(id), K(member), K(new_replica_num), K(timeout_us)); } else { - LogConfigChangeCmd req(self_, id_, member, member, 0, SWITCH_TO_LEARNER_CMD, timeout_us); + LogConfigChangeCmd req(self_, id_, member, member, new_replica_num, SWITCH_TO_LEARNER_CMD, timeout_us); if (OB_FAIL(submit_config_change_cmd_(req))) { CLOG_LOG(WARN, " submit_config_change_cmd failed", KR(ret), K_(id), K(req), K(timeout_us)); } else { - CLOG_LOG(INFO, "add_member success", KR(ret), K_(id), K(member)); + CLOG_LOG(INFO, "add_member success", KR(ret), K_(id), K(member), K(new_replica_num)); } } return ret; @@ -964,6 +1007,7 @@ int ObLogHandler::is_valid_member(const common::ObAddr &addr, int64_t new_proposal_id; common::ObMemberList member_list; int64_t paxos_replica_num = 0; + GlobalLearnerList learner_list; bool is_pending_state = false; RLockGuard guard(lock_); if (IS_NOT_INIT) { @@ -980,15 +1024,16 @@ int ObLogHandler::is_valid_member(const common::ObAddr &addr, CLOG_LOG(ERROR, "get_role failed", K(ret), KPC(this)); } else if (LEADER != role) { ret = OB_NOT_MASTER; - } else if (OB_FAIL(palf_handle_.get_paxos_member_list(member_list, paxos_replica_num))) { + } else if (OB_FAIL(palf_handle_.get_paxos_member_list_and_learner_list(member_list, + paxos_replica_num, learner_list))) { ret = OB_ERR_UNEXPECTED; - CLOG_LOG(ERROR, "get_paxos_member_list failed", K(ret), KPC(this)); + CLOG_LOG(ERROR, "get_paxos_member_list_and_learner_list failed", K(ret), KPC(this)); } else if (OB_FAIL(palf_handle_.get_role(new_role, new_proposal_id, is_pending_state))) { ret = OB_ERR_UNEXPECTED; CLOG_LOG(ERROR, "get_role failed", K(ret), KPC(this)); } else { if (role == new_role && proposal_id == new_proposal_id) { - is_valid = member_list.contains(addr); + is_valid = member_list.contains(addr) || learner_list.contains(addr); } else { ret = OB_NOT_MASTER; CLOG_LOG(INFO, "role changed during is_valid_member", K(ret), KPC(this), K(role), diff --git a/src/logservice/ob_log_handler.h b/src/logservice/ob_log_handler.h index 57d3a627bc..c91420e473 100644 --- a/src/logservice/ob_log_handler.h +++ b/src/logservice/ob_log_handler.h @@ -86,7 +86,8 @@ public: virtual int seek(const palf::LSN &lsn, palf::PalfGroupBufferIterator &iter) = 0; virtual int seek(const share::SCN &scn, palf::PalfGroupBufferIterator &iter) = 0; virtual int set_initial_member_list(const common::ObMemberList &member_list, - const int64_t paxos_replica_num) = 0; + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list) = 0; virtual int set_region(const common::ObRegion ®ion) = 0; virtual int set_election_priority(palf::election::ElectionPriority *priority) = 0; virtual int reset_election_priority() = 0; @@ -101,6 +102,9 @@ public: virtual int get_max_scn(share::SCN &scn) const = 0; virtual int get_end_scn(share::SCN &scn) const = 0; virtual int get_paxos_member_list(common::ObMemberList &member_list, int64_t &paxos_replica_num) const = 0; + virtual int get_paxos_member_list_and_learner_list(common::ObMemberList &member_list, + int64_t &paxos_replica_num, + common::GlobalLearnerList &learner_list) const = 0; virtual int get_global_learner_list(common::GlobalLearnerList &learner_list) const = 0; virtual int get_election_leader(common::ObAddr &addr) const = 0; virtual int change_replica_num(const common::ObMemberList &member_list, @@ -119,8 +123,15 @@ public: const int64_t timeout_us) = 0; virtual int add_learner(const common::ObMember &added_learner, const int64_t timeout_us) = 0; virtual int remove_learner(const common::ObMember &removed_learner, const int64_t timeout_us) = 0; - virtual int switch_learner_to_acceptor(const common::ObMember &learner, const int64_t timeout_us) = 0; - virtual int switch_acceptor_to_learner(const common::ObMember &member, const int64_t timeout_us) = 0; + virtual int replace_learner(const common::ObMember &added_learner, + const common::ObMember &removed_learner, + const int64_t timeout_us) = 0; + virtual int switch_learner_to_acceptor(const common::ObMember &learner, + const int64_t paxos_replica_num, + const int64_t timeout_us) = 0; + virtual int switch_acceptor_to_learner(const common::ObMember &member, + const int64_t paxos_replica_num, + const int64_t timeout_us) = 0; virtual int get_palf_base_info(const palf::LSN &base_lsn, palf::PalfBaseInfo &palf_base_info) = 0; virtual int is_in_sync(bool &is_log_sync, bool &is_need_rebuild) const = 0; virtual int enable_sync() = 0; @@ -245,11 +256,13 @@ public: // @brief set the initial member list of paxos group // @param[in] ObMemberList, the initial member list // @param[in] int64_t, the paxos relica num + // @param[in] GlobalLearnerList, the initial learner list // @retval // return OB_SUCCESS if success // else return other errno int set_initial_member_list(const common::ObMemberList &member_list, - const int64_t paxos_replica_num) override final; + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list) override final; int set_region(const common::ObRegion ®ion) override final; int set_election_priority(palf::election::ElectionPriority *priority) override final; int reset_election_priority() override final; @@ -297,6 +310,13 @@ public: // @param[out] common::ObMemberList& // @param[out] int64_t& int get_paxos_member_list(common::ObMemberList &member_list, int64_t &paxos_replica_num) const override final; + // @brief, get paxos member list and global list of this paxos group atomically + // @param[out] common::ObMemberList& + // @param[out] int64_t& + // @param[out] common::GlobalLearnerList& + int get_paxos_member_list_and_learner_list(common::ObMemberList &member_list, + int64_t &paxos_replica_num, + common::GlobalLearnerList &learner_list) const override final; // @brief, get global learner list of this paxos group // @param[out] common::GlobalLearnerList& int get_global_learner_list(common::GlobalLearnerList &learner_list) const override final; @@ -403,6 +423,19 @@ public: int add_learner(const common::ObMember &added_learner, const int64_t timeout_us) override final; + // @brief, replace removed_learner with added_learner, can be called in any member + // @param[in] const common::ObMember &added_learner: learner wil be added + // @param[in] const common::ObMember &removed_learner: learner will be removed + // @param[in] const int64_t timeout_us + // @return + // - OB_SUCCESS: replace learner successfully + // - OB_INVALID_ARGUMENT: invalid argumemt or not supported config change + // - OB_TIMEOUT: replace learner timeout + // - other: bug + int replace_learner(const common::ObMember &added_learner, + const common::ObMember &removed_learner, + const int64_t timeout_us) override final; + // @brief: remove a learner(read only replica) in this cluster // @param[in] const common::ObMember &removed_learner: learner will be removed // @param[in] const int64_t timeout_us @@ -415,22 +448,28 @@ public: // @brief: switch a learner(read only replica) to acceptor(full replica) in this cluster // @param[in] const common::ObMember &learner: learner will be switched to acceptor + // @param[in] const int64_t new_replica_num: replica number of paxos group after switching + // learner to acceptor (similar to add_member) // @param[in] const int64_t timeout_us // @return // - OB_SUCCESS // - OB_INVALID_ARGUMENT: invalid argument // - OB_TIMEOUT: switch_learner_to_acceptor timeout int switch_learner_to_acceptor(const common::ObMember &learner, + const int64_t new_replica_num, const int64_t timeout_us) override final; // @brief: switch an acceptor(full replica) to learner(read only replica) in this cluster // @param[in] const common::ObMember &member: acceptor will be switched to learner + // @param[in] const int64_t new_replica_num: replica number of paxos group after switching + // acceptor to learner (similar to remove_member) // @param[in] const int64_t timeout_us // @return // - OB_SUCCESS // - OB_INVALID_ARGUMENT: invalid argument // - OB_TIMEOUT: switch_acceptor_to_learner timeout int switch_acceptor_to_learner(const common::ObMember &member, + const int64_t new_replica_num, const int64_t timeout_us) override final; diff --git a/src/logservice/ob_log_service.cpp b/src/logservice/ob_log_service.cpp index 2f6db39185..3b89a0a9c8 100644 --- a/src/logservice/ob_log_service.cpp +++ b/src/logservice/ob_log_service.cpp @@ -370,7 +370,6 @@ int ObLogService::check_palf_exist(const ObLSID &id, bool &exist) const } int ObLogService::add_ls(const ObLSID &id, - const ObReplicaType &replica_type, ObLogHandler &log_handler, ObLogRestoreHandler &restore_handler) { @@ -386,8 +385,7 @@ int ObLogService::add_ls(const ObLSID &id, CLOG_LOG(WARN, "failed to get palf_handle", K(ret), K(id)); } else if (OB_FAIL(apply_service_.add_ls(id))) { CLOG_LOG(WARN, "failed to add_ls for apply_service", K(ret), K(id)); - } else if (OB_FAIL(replay_service_.add_ls(id, - replica_type))) { + } else if (OB_FAIL(replay_service_.add_ls(id))) { CLOG_LOG(WARN, "failed to add_ls for replay_service", K(ret), K(id)); } else if (OB_FAIL(log_handler.init(id.id(), self_, &apply_service_, &replay_service_, &role_change_service_, palf_handle, palf_env_, loc_cache_cb, &rpc_proxy_))) { @@ -399,7 +397,7 @@ int ObLogService::add_ls(const ObLSID &id, } else if (OB_FAIL(log_handler_palf_handle.set_location_cache_cb(loc_cache_cb))) { CLOG_LOG(WARN, "set_location_cache_cb failed", K(ret), K(id)); } else { - FLOG_INFO("add_ls success", K(ret), K(id), K(replica_type), KP(this)); + FLOG_INFO("add_ls success", K(ret), K(id), KP(this)); } if (OB_FAIL(ret)) { @@ -621,7 +619,7 @@ int ObLogService::create_ls_(const share::ObLSID &id, CLOG_LOG(WARN, "failed to disable_sync", K(ret), K(id)); } else if (OB_FAIL(apply_service_.add_ls(id))) { CLOG_LOG(WARN, "failed to add_ls for apply engine", K(ret), K(id)); - } else if (OB_FAIL(replay_service_.add_ls(id, replica_type))) { + } else if (OB_FAIL(replay_service_.add_ls(id))) { CLOG_LOG(WARN, "failed to add_ls", K(ret), K(id)); } else if (OB_FAIL(log_handler.init(id.id(), self_, &apply_service_, &replay_service_, &role_change_service_, palf_handle, palf_env_, loc_cache_cb, &rpc_proxy_))) { diff --git a/src/logservice/ob_log_service.h b/src/logservice/ob_log_service.h index 2ed9aaf785..9f171ca108 100644 --- a/src/logservice/ob_log_service.h +++ b/src/logservice/ob_log_service.h @@ -124,11 +124,9 @@ public: int check_palf_exist(const share::ObLSID &id, bool &exist) const; //宕机重启恢复日志流接口,包括生成并初始化对应的ObReplayStatus结构 // @param [in] id,日志流标识符 - // @param [in] replica_type,日志流的副本类型 // @param [out] log_handler,新建日志流以ObLogHandler形式返回,保证上层使用日志流时的生命周期 // @param [out] restore_handler,新建日志流以ObLogRestoreHandler形式返回,用于备库同步日志 int add_ls(const share::ObLSID &id, - const common::ObReplicaType &replica_type, ObLogHandler &log_handler, ObLogRestoreHandler &restore_handler); diff --git a/src/logservice/palf/log_config_mgr.cpp b/src/logservice/palf/log_config_mgr.cpp index e5cc3083b7..3ea4308147 100644 --- a/src/logservice/palf/log_config_mgr.cpp +++ b/src/logservice/palf/log_config_mgr.cpp @@ -194,6 +194,7 @@ void LogConfigMgr::destroy() int LogConfigMgr::set_initial_member_list(const ObMemberList &member_list, const int64_t replica_num, + const common::GlobalLearnerList &learner_list, const int64_t proposal_id, LogConfigVersion &init_config_version) { @@ -213,6 +214,7 @@ int LogConfigMgr::set_initial_member_list(const ObMemberList &member_list, LogConfigInfo config_info = log_ms_meta_.curr_; config_info.log_sync_memberlist_ = member_list; config_info.log_sync_replica_num_ = replica_num; + config_info.learnerlist_ = learner_list; if (OB_FAIL(set_initial_config_info_(config_info, proposal_id, init_config_version))) { PALF_LOG(WARN, "set_initial_config_info failed", K(ret), K_(palf_id), K_(self), K(config_info), K(proposal_id)); } else { @@ -225,6 +227,7 @@ int LogConfigMgr::set_initial_member_list(const ObMemberList &member_list, int LogConfigMgr::set_initial_member_list(const common::ObMemberList &member_list, const common::ObMember &arb_member, const int64_t replica_num, + const common::GlobalLearnerList &learner_list, const int64_t proposal_id, LogConfigVersion &init_config_version) { @@ -247,6 +250,7 @@ int LogConfigMgr::set_initial_member_list(const common::ObMemberList &member_lis config_info.log_sync_memberlist_ = member_list; config_info.log_sync_replica_num_ = replica_num; config_info.arbitration_member_ = arb_member; + config_info.learnerlist_ = learner_list; if (OB_FAIL(set_initial_config_info_(config_info, proposal_id, init_config_version))) { PALF_LOG(WARN, "set_initial_config_info failed", K(ret), K_(palf_id), K_(self), K(config_info), K(proposal_id)); } else { @@ -263,11 +267,8 @@ int LogConfigMgr::set_initial_config_info_(const LogConfigInfo &config_info, int ret = OB_SUCCESS; const int64_t initial_config_seq = 1; LogReplicaType replica_type = state_mgr_->get_replica_type(); - // TODO by haofan: Mittest case creates arb member by same interface with F member, - // so its replica_type maybe NORMAL_REPLICA too. We just skip type check here temporarily. -// const bool valid_replica_type = (config_info.arbitration_member_.get_server() == self_) ? \ -// (replica_type == ARBITRATION_REPLICA) : (replica_type == NORMAL_REPLICA); - const bool valid_replica_type = true; + const bool valid_replica_type = (config_info.arbitration_member_.get_server() == self_) ? \ + (replica_type == ARBITRATION_REPLICA) : true; if (false == valid_replica_type) { ret = OB_NOT_SUPPORTED; PALF_LOG(WARN, "set_initial_member_list don't match with replica_type", KR(ret), K_(palf_id), K_(self), K(replica_type), K(config_info)); @@ -276,7 +277,10 @@ int LogConfigMgr::set_initial_config_info_(const LogConfigInfo &config_info, } else { LogConfigInfo init_config_info = config_info; init_config_info.config_version_ = init_config_version; - if (false == check_need_update_memberlist_without_lock_(init_config_version)) { + if (false == init_config_info.is_valid()) { + ret = OB_INVALID_ARGUMENT; + PALF_LOG(WARN, "initial config info is invalid", K_(palf_id), K(config_info), K(proposal_id)); + } else if (false == check_need_update_memberlist_without_lock_(init_config_version)) { PALF_LOG(INFO, "persistent_config_version_ has been greater than or equal to config_version, \ no need set_initial_config_info_", K(ret), K_(palf_id), K_(self), K_(log_ms_meta), K_(persistent_config_version), K(init_config_version)); } else if (OB_FAIL(log_ms_meta_.generate(proposal_id, init_config_info, init_config_info, @@ -1112,8 +1116,14 @@ int LogConfigMgr::check_config_change_args_(const LogConfigChangeArgs &args, boo PALF_LOG(WARN, "can not upgrade a normal learner", KR(ret), K_(palf_id), K_(self), K_(log_ms_meta), K(args)); } } else if (!is_in_learnerlist && !is_in_degraded_learnerlist && is_in_log_sync_memberlist) { - is_already_finished = true; - PALF_LOG(INFO, "learner_to_acceptor is already finished", KR(ret), K_(palf_id), K_(self), K_(log_ms_meta), K(member)); + if (args.type_ == UPGRADE_LEARNER_TO_ACCEPTOR || new_replica_num == curr_replica_num) { + is_already_finished = true; + PALF_LOG(INFO, "learner_to_acceptor is already finished", KR(ret), K_(palf_id), K_(self), K_(log_ms_meta), K(member)); + } else { + ret = OB_INVALID_ARGUMENT; + PALF_LOG(INFO, "member already exists, but new_replica_num not equal to curr val", KR(ret), K_(palf_id), K_(self), + K_(log_ms_meta), K(member), K(new_replica_num), K_(alive_paxos_replica_num)); + } } else { ret = OB_INVALID_ARGUMENT; PALF_LOG(WARN, "server is neither in memberlist nor in learnerlist", KR(ret), K_(palf_id), K_(self), K_(log_ms_meta), K(member)); @@ -1137,8 +1147,14 @@ int LogConfigMgr::check_config_change_args_(const LogConfigChangeArgs &args, boo ret = OB_INVALID_ARGUMENT; PALF_LOG(WARN, "server has been degraded, can't switch to learner", KR(ret), K_(palf_id), K_(self), K_(log_ms_meta), K(member)); } else { - is_already_finished = true; - PALF_LOG(INFO, "acceptor_to_learner is already finished", KR(ret), K_(palf_id), K_(self), K_(log_ms_meta), K(member)); + if (args.type_ == DEGRADE_ACCEPTOR_TO_LEARNER || new_replica_num == curr_replica_num) { + is_already_finished = true; + PALF_LOG(INFO, "acceptor_to_learner is already finished", KR(ret), K_(palf_id), K_(self), K_(log_ms_meta), K(member)); + } else { + ret = OB_INVALID_ARGUMENT; + PALF_LOG(INFO, "member is already removed, but new_replica_num not equal to curr val", KR(ret), K_(palf_id), K_(self), + K_(log_ms_meta), K(member), K(new_replica_num), K_(alive_paxos_replica_num)); + } } } else if (!is_in_degraded_learnerlist && is_in_log_sync_memberlist) { // degrade operation can only be done when there is arbitration replica in paxos group @@ -2228,7 +2244,8 @@ bool LogConfigMgr::is_registering_() const int LogConfigMgr::get_register_leader_(common::ObAddr &leader) const { int ret = OB_SUCCESS; - leader = state_mgr_->get_leader(); + // TODO by yunlong, get leader from location cache temporarily, need remove + sw_->get_leader_from_cache(leader); if (!leader.is_valid()) { ret = OB_EAGAIN; } diff --git a/src/logservice/palf/log_config_mgr.h b/src/logservice/palf/log_config_mgr.h index 7a37e25e3d..b9aaf60eef 100644 --- a/src/logservice/palf/log_config_mgr.h +++ b/src/logservice/palf/log_config_mgr.h @@ -116,7 +116,8 @@ inline bool is_upgrade_or_degrade(const LogConfigChangeType type) inline bool is_use_replica_num_args(const LogConfigChangeType type) { - return ADD_MEMBER == type || REMOVE_MEMBER == type || CHANGE_REPLICA_NUM == type || FORCE_SINGLE_MEMBER == type; + return ADD_MEMBER == type || REMOVE_MEMBER == type || CHANGE_REPLICA_NUM == type || + FORCE_SINGLE_MEMBER == type || SWITCH_LEARNER_TO_ACCEPTOR == type || SWITCH_ACCEPTOR_TO_LEARNER == type; } inline bool need_exec_on_leader_(const LogConfigChangeType type) @@ -222,12 +223,14 @@ public: // require caller holds WLock in PalfHandleImpl virtual int set_initial_member_list(const common::ObMemberList &member_list, const int64_t replica_num, + const common::GlobalLearnerList &learner_list, const int64_t proposal_id, LogConfigVersion &config_version); // require caller holds WLock in PalfHandleImpl virtual int set_initial_member_list(const common::ObMemberList &member_list, const common::ObMember &arb_member, const int64_t replica_num, + const common::GlobalLearnerList &learner_list, const int64_t proposal_id, LogConfigVersion &config_version); // set region for self diff --git a/src/logservice/palf/log_engine.cpp b/src/logservice/palf/log_engine.cpp index 43554e903e..c485d11487 100644 --- a/src/logservice/palf/log_engine.cpp +++ b/src/logservice/palf/log_engine.cpp @@ -964,22 +964,6 @@ int LogEngine::submit_change_config_meta_resp(const common::ObAddr &server, return ret; } -int LogEngine::submit_change_mode_meta_req( - const common::ObMemberList &member_list, - const int64_t &msg_proposal_id, - const bool is_applied_mode_meta, - const LogModeMeta &mode_meta) -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - } else { - ret = log_net_service_.submit_change_mode_meta_req(member_list, msg_proposal_id, - is_applied_mode_meta, mode_meta); - } - return ret; -} - int LogEngine::submit_change_mode_meta_resp(const common::ObAddr &server, const int64_t &msg_proposal_id) { diff --git a/src/logservice/palf/log_engine.h b/src/logservice/palf/log_engine.h index 02c823ffa0..45fe1fc70c 100644 --- a/src/logservice/palf/log_engine.h +++ b/src/logservice/palf/log_engine.h @@ -73,6 +73,16 @@ class PurgeThrottlingCbCtx; return submit_prepare_meta_req_(member_list, log_proposal_id); \ } +#define OVERLOAD_SUBMIT_CHANGE_MODE_META_REQ(type) \ + virtual int submit_change_mode_meta_req(const type &member_list, \ + const int64_t &msg_proposal_id, \ + const bool is_applied_mode_meta, \ + const LogModeMeta &mode_meta) \ + { \ + return submit_change_mode_meta_req_(member_list, msg_proposal_id, \ + is_applied_mode_meta, mode_meta); \ + } + class LogEngine { friend class PalfHandleImpl; // need get net_service to init election @@ -281,21 +291,35 @@ public: return ret; } + template + int submit_change_mode_meta_req_( + const List &member_list, + const int64_t &msg_proposal_id, + const bool is_applied_mode_meta, + const LogModeMeta &mode_meta) + { + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + } else { + ret = log_net_service_.submit_change_mode_meta_req(member_list, msg_proposal_id, + is_applied_mode_meta, mode_meta); + } + return ret; + } + OVERLOAD_SUBMIT_CHANGE_CONFIG_META_REQ(common::ObMemberList); OVERLOAD_SUBMIT_CHANGE_CONFIG_META_REQ(common::GlobalLearnerList); - OVERLOAD_SUBMIT_CHANGE_CONFIG_META_REQ(ResendConfigLogList); + OVERLOAD_SUBMIT_CHANGE_CONFIG_META_REQ(common::ResendConfigLogList); OVERLOAD_SUBMIT_PREPARE_META_REQ(common::ObMemberList); OVERLOAD_SUBMIT_PREPARE_META_REQ(common::GlobalLearnerList); + OVERLOAD_SUBMIT_CHANGE_MODE_META_REQ(common::ObMemberList); + OVERLOAD_SUBMIT_CHANGE_MODE_META_REQ(common::ResendConfigLogList); virtual int submit_change_config_meta_resp(const common::ObAddr &server, const int64_t msg_proposal_id, const LogConfigVersion &config_version); - virtual int submit_change_mode_meta_req(const common::ObMemberList &member_list, - const int64_t &msg_proposal_id, - const bool is_applied_mode_meta, - const LogModeMeta &mode_meta); - virtual int submit_change_mode_meta_resp(const common::ObAddr &server, const int64_t &msg_proposal_id); diff --git a/src/logservice/palf/log_meta_info.cpp b/src/logservice/palf/log_meta_info.cpp index bf5fa0245e..81ffc4a432 100644 --- a/src/logservice/palf/log_meta_info.cpp +++ b/src/logservice/palf/log_meta_info.cpp @@ -384,14 +384,45 @@ LogConfigInfo::~LogConfigInfo() bool LogConfigInfo::is_valid() const { - const bool is_arb_in_log_sync = log_sync_memberlist_.contains(arbitration_member_.get_server()); + const bool is_all_list_unique = this->is_all_list_unique(); return true == log_sync_memberlist_.is_valid() && - false == is_arb_in_log_sync && + true == is_all_list_unique && 0 < log_sync_replica_num_ && common::OB_MAX_MEMBER_NUMBER >= log_sync_replica_num_ && config_version_.is_valid(); } +bool LogConfigInfo::is_all_list_unique() const +{ + int ret = OB_SUCCESS; + bool is_all_list_unique = true; + GlobalLearnerList server_list; + server_list = learnerlist_; + if (OB_ENTRY_EXIST == (ret = server_list.append(degraded_learnerlist_))) { + is_all_list_unique = false; + PALF_LOG(WARN, "learnerlist_ should not overlap with degraded_learnerlist_", + K_(learnerlist), K_(degraded_learnerlist)); + } else if (arbitration_member_.is_valid() && + OB_ENTRY_EXIST == (ret = server_list.add_learner(arbitration_member_))) { + is_all_list_unique = false; + PALF_LOG(WARN, "learnerlist should not overlap with arb_member", + K_(learnerlist), K_(degraded_learnerlist), K_(arbitration_member)); + } else { + for (int i = 0; i < log_sync_memberlist_.get_member_number(); i++) { + common::ObMember member; + if (OB_FAIL(log_sync_memberlist_.get_member_by_index(i, member))) { + PALF_LOG(WARN, "get_server_by_index failed", K_(log_sync_memberlist)); + } else if (OB_ENTRY_EXIST == (ret = server_list.add_learner(member))) { + is_all_list_unique = false; + PALF_LOG(WARN, "serverlist should not overlap with log_sync_member_list", K_(learnerlist), + K_(degraded_learnerlist), K_(log_sync_memberlist), K_(arbitration_member)); + break; + } + } + } + return is_all_list_unique; +} + void LogConfigInfo::reset() { log_sync_memberlist_.reset(); diff --git a/src/logservice/palf/log_meta_info.h b/src/logservice/palf/log_meta_info.h index ea0e0f9889..f474efaa8b 100644 --- a/src/logservice/palf/log_meta_info.h +++ b/src/logservice/palf/log_meta_info.h @@ -132,6 +132,7 @@ public: int convert_to_complete_config(common::ObMemberList &alive_paxos_memberlist, int64_t &alive_paxos_replica_num, GlobalLearnerList &all_learners) const; + bool is_all_list_unique() const; // For unittest bool operator==(const LogConfigInfo &config_info) const; TO_STRING_KV(K_(config_version), diff --git a/src/logservice/palf/log_mode_mgr.cpp b/src/logservice/palf/log_mode_mgr.cpp index a020f1f3a6..4c1f5820b3 100644 --- a/src/logservice/palf/log_mode_mgr.cpp +++ b/src/logservice/palf/log_mode_mgr.cpp @@ -45,6 +45,7 @@ LogModeMgr::LogModeMgr() local_max_log_pid_(INVALID_PROPOSAL_ID), max_majority_accepted_pid_(INVALID_PROPOSAL_ID), max_majority_lsn_(), + resend_mode_meta_list_(), state_mgr_(NULL), log_engine_(NULL), config_mgr_(NULL), @@ -100,6 +101,7 @@ void LogModeMgr::destroy() accepted_mode_meta_.reset(); last_submit_mode_meta_.reset(); reset_status_(); + resend_mode_meta_list_.reset(); state_mgr_ = NULL; log_engine_ = NULL; config_mgr_ = NULL; @@ -328,6 +330,7 @@ void LogModeMgr::reset_status() { common::ObSpinLockGuard guard(lock_); reset_status_(); + resend_mode_meta_list_.reset(); } void LogModeMgr::reset_status_() @@ -465,9 +468,11 @@ int LogModeMgr::switch_state_(const AccessMode &access_mode, change_done = (true == is_reconfirm)? true: can_finish_change_mode_(); if (change_done) { applied_mode_meta_ = accepted_mode_meta_; - const bool is_applied_mode_meta = true; - (void) submit_accept_req_(new_proposal_id_, is_applied_mode_meta, applied_mode_meta_); - if (applied_mode_meta_.ref_scn_.is_valid() && AccessMode::APPEND == applied_mode_meta_.access_mode_ && + if (OB_FAIL(set_resend_mode_meta_list_())) { + PALF_LOG(WARN, "set_resend_mode_meta_list_ failed", K(ret), K_(palf_id), K_(self)); + } else if (OB_FAIL(resend_applied_mode_meta_())) { + PALF_LOG(WARN, "resend_applied_mode_meta_ failed", K(ret), K_(palf_id), K_(self)); + } else if (applied_mode_meta_.ref_scn_.is_valid() && AccessMode::APPEND == applied_mode_meta_.access_mode_ && OB_FAIL(sw_->inc_update_scn_base(applied_mode_meta_.ref_scn_))) { PALF_LOG(ERROR, "inc_update_base_log_ts failed", KR(ret), K_(palf_id), K_(self), K_(applied_mode_meta)); @@ -515,6 +520,64 @@ int LogModeMgr::switch_state_(const AccessMode &access_mode, return ret; } +int LogModeMgr::set_resend_mode_meta_list_() +{ + int ret = OB_SUCCESS; + common::ObMemberList member_list; + common::GlobalLearnerList learner_list; + int64_t replica_num; + resend_mode_meta_list_.reset(); + if (OB_FAIL(config_mgr_->get_alive_member_list_with_arb(member_list, replica_num))) { + PALF_LOG(WARN, "get_alive_member_list_with_arb failed", K(ret), K_(palf_id), K_(self)); + } else if (OB_FAIL(config_mgr_->get_global_learner_list(learner_list))) { + PALF_LOG(WARN, "get_global_learner_list failed", K(ret), K_(palf_id), K_(self)); + } else { + member_list.remove_server(self_); + (void) learner_list.deep_copy_to(resend_mode_meta_list_); + const int64_t member_number = member_list.get_member_number(); + for (int64_t idx = 0; idx < member_number && OB_SUCC(ret); ++idx) { + common::ObAddr server; + if (OB_FAIL(member_list.get_server_by_index(idx, server))) { + PALF_LOG(WARN, "get_server_by_index failed", K(ret), K(idx)); + } else if (OB_FAIL(resend_mode_meta_list_.add_learner(ObMember(server, 1)))) { + PALF_LOG(WARN, "add_learner failed", K(ret), K(server)); + } + } + } + return ret; +} + +int LogModeMgr::leader_do_loop_work() +{ + int ret = OB_SUCCESS; + common::ObSpinLockGuard guard(lock_); + const bool is_leader = (self_ == state_mgr_->get_leader()); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + } else if (true == is_leader && + ModeChangeState::MODE_INIT == state_ && + 0 != resend_mode_meta_list_.get_member_number() && + is_need_retry_()) { + ret = resend_applied_mode_meta_(); + } + return ret; +} + +int LogModeMgr::resend_applied_mode_meta_() +{ + int ret = OB_SUCCESS; + const int64_t proposal_id = state_mgr_->get_proposal_id(); + const bool is_applied_mode_meta = true; + if (OB_FAIL(log_engine_->submit_change_mode_meta_req(resend_mode_meta_list_, proposal_id, + is_applied_mode_meta, applied_mode_meta_))) { + PALF_LOG(WARN, "submit_prepare_meta_req failed", K(ret), K_(palf_id), K_(self), + K_(resend_mode_meta_list), K(proposal_id), K(is_applied_mode_meta), K_(applied_mode_meta)); + } else { + last_submit_req_ts_ = common::ObTimeUtility::current_time(); + } + return ret; +} + // require wlock in PalfHandleImpl int LogModeMgr::submit_prepare_req_(const bool need_inc_pid, const bool need_send_and_handle_prepare) { @@ -715,6 +778,7 @@ int LogModeMgr::ack_mode_meta(const common::ObAddr &server, const int64_t propos PALF_LOG(INFO, "ack_mode_meta success", K(ret), K_(palf_id), K_(self), K(server), K(proposal_id), K_(follower_list), K_(majority_cnt), K_(ack_list)); } + (void) resend_mode_meta_list_.remove_learner(server); return ret; } diff --git a/src/logservice/palf/log_mode_mgr.h b/src/logservice/palf/log_mode_mgr.h index ecd7697f5b..ab680e2267 100644 --- a/src/logservice/palf/log_mode_mgr.h +++ b/src/logservice/palf/log_mode_mgr.h @@ -89,6 +89,7 @@ public: virtual int submit_fetch_mode_meta_resp(const common::ObAddr &server, const int64_t msg_proposal_id, const int64_t accepted_mode_pid); + int leader_do_loop_work(); TO_STRING_KV(K_(palf_id), K_(self), K_(applied_mode_meta), K_(accepted_mode_meta), K_(last_submit_mode_meta), "state", state2str_(state_), K_(new_proposal_id), K_(local_max_lsn), K_(local_max_log_pid), K_(max_majority_accepted_pid), K_(max_majority_lsn), @@ -112,6 +113,8 @@ private: const int64_t proposal_id, const bool is_applied_mode_meta, const LogModeMeta &mode_meta); + int set_resend_mode_meta_list_(); + int resend_applied_mode_meta_(); private: static const int64_t PREPARE_RETRY_INTERVAL_US = 2 * 1000 * 1000; // 2s @@ -160,6 +163,7 @@ private: int64_t local_max_log_pid_; int64_t max_majority_accepted_pid_; LSN max_majority_lsn_; + ResendConfigLogList resend_mode_meta_list_; mutable int64_t wait_committed_log_slide_warn_ts_; // =========access_mode changing state============ LogStateMgr *state_mgr_; diff --git a/src/logservice/palf/log_net_service.cpp b/src/logservice/palf/log_net_service.cpp index 83ba5c2ff5..f6a249d771 100644 --- a/src/logservice/palf/log_net_service.cpp +++ b/src/logservice/palf/log_net_service.cpp @@ -223,23 +223,6 @@ int LogNetService::submit_change_config_meta_resp( return ret; } -int LogNetService::submit_change_mode_meta_req( - const common::ObMemberList &member_list, - const int64_t &msg_proposal_id, - const bool is_applied_mode_meta, - const LogModeMeta &mode_meta) -{ - int ret = OB_SUCCESS; - int64_t pos = 0; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - } else { - LogChangeModeMetaReq req(msg_proposal_id, mode_meta, is_applied_mode_meta); - ret = post_request_to_member_list_(member_list, req); - } - return ret; -} - int LogNetService::submit_change_mode_meta_resp( const common::ObAddr &server, const int64_t &msg_proposal_id) diff --git a/src/logservice/palf/log_net_service.h b/src/logservice/palf/log_net_service.h index 9a1c0f0e9d..b9da819371 100644 --- a/src/logservice/palf/log_net_service.h +++ b/src/logservice/palf/log_net_service.h @@ -162,11 +162,24 @@ public: const int64_t msg_proposal_id, const LogConfigVersion &config_version); + template int submit_change_mode_meta_req( - const common::ObMemberList &member_list, + const List &member_list, const int64_t &msg_proposal_id, const bool is_applied_mode_meta, - const LogModeMeta &mode_meta); + const LogModeMeta &mode_meta) + { + int ret = OB_SUCCESS; + int64_t pos = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + } else { + LogChangeModeMetaReq req(msg_proposal_id, mode_meta, is_applied_mode_meta); + ret = post_request_to_member_list_(member_list, req); + } + return ret; + } + int submit_change_mode_meta_resp( const common::ObAddr &server, const int64_t &msg_proposal_id); diff --git a/src/logservice/palf/log_sliding_window.cpp b/src/logservice/palf/log_sliding_window.cpp index 21cd3b69a1..fd101d67d8 100644 --- a/src/logservice/palf/log_sliding_window.cpp +++ b/src/logservice/palf/log_sliding_window.cpp @@ -2037,19 +2037,21 @@ int LogSlidingWindow::do_fetch_log_(const FetchTriggerType &trigger_type, return ret; } -int LogSlidingWindow::get_fetch_log_dst_(common::ObAddr &fetch_dst) const +int LogSlidingWindow::get_leader_from_cache(common::ObAddr &leader) const +{ + return get_leader_from_cache_(leader); +} + +int LogSlidingWindow::get_leader_from_cache_(common::ObAddr &leader) const { int ret = OB_SUCCESS; const common::ObAddr state_mgr_leader = state_mgr_->get_leader(); - const common::ObAddr parent = mm_->get_parent(); - if (parent.is_valid()) { - fetch_dst = parent; - } else if (state_mgr_leader.is_valid()) { - fetch_dst = state_mgr_leader; + if (state_mgr_leader.is_valid()) { + leader = state_mgr_leader; } else if (palf_reach_time_interval(PALF_FETCH_LOG_RENEW_LEADER_INTERVAL_US, last_fetch_log_renew_leader_ts_us_) && OB_FAIL(plugins_->nonblock_renew_leader(palf_id_))) { PALF_LOG(WARN, "nonblock_renew_leader failed", KR(ret), K_(palf_id), K_(self)); - } else if (OB_FAIL(plugins_->nonblock_get_leader(palf_id_, fetch_dst))) { + } else if (OB_FAIL(plugins_->nonblock_get_leader(palf_id_, leader))) { if (palf_reach_time_interval(5 * 1000 * 1000, lc_cb_get_warn_time_)) { PALF_LOG(WARN, "nonblock_get_leader failed", KR(ret), K_(palf_id), K_(self)); } @@ -2057,6 +2059,17 @@ int LogSlidingWindow::get_fetch_log_dst_(common::ObAddr &fetch_dst) const return ret; } +int LogSlidingWindow::get_fetch_log_dst_(common::ObAddr &fetch_dst) const +{ + int ret = OB_SUCCESS; + const common::ObAddr parent = mm_->get_parent(); + if (parent.is_valid()) { + fetch_dst = parent; + } else if (OB_FAIL(get_leader_from_cache_(fetch_dst))) { + } + return ret; +} + bool LogSlidingWindow::is_all_committed_log_slided_out(LSN &prev_lsn, int64_t &prev_log_id, LSN &committed_end_lsn) const { return is_all_committed_log_slided_out_(prev_lsn, prev_log_id, committed_end_lsn); diff --git a/src/logservice/palf/log_sliding_window.h b/src/logservice/palf/log_sliding_window.h old mode 100644 new mode 100755 index 62d7931878..6075215292 --- a/src/logservice/palf/log_sliding_window.h +++ b/src/logservice/palf/log_sliding_window.h @@ -255,11 +255,14 @@ public: const LSN &log_lsn, const LSN &log_end_lsn, const int64_t &log_proposal_id); + + virtual int get_leader_from_cache(common::ObAddr &leader) const; virtual int read_data_from_buffer(const LSN &read_begin_lsn, const int64_t in_read_size, char *buf, int64_t &out_read_size) const; int64_t get_last_slide_log_id() const; + TO_STRING_KV(K_(palf_id), K_(self), K_(lsn_allocator), K_(group_buffer), \ K_(last_submit_lsn), K_(last_submit_end_lsn), K_(last_submit_log_id), K_(last_submit_log_pid), \ K_(max_flushed_lsn), K_(max_flushed_end_lsn), K_(max_flushed_log_pid), K_(committed_end_lsn), \ @@ -271,6 +274,7 @@ private: const PalfBaseInfo &palf_base_info, common::ObILogAllocator *alloc_mgr); int get_fetch_log_dst_(common::ObAddr &leader) const; + int get_leader_from_cache_(common::ObAddr &leader) const; int clean_log_(); int reset_match_lsn_map_(); bool is_all_log_flushed_(); diff --git a/src/logservice/palf/palf_handle.cpp b/src/logservice/palf/palf_handle.cpp index 80c3dcdcf5..d779a4bdf5 100644 --- a/src/logservice/palf/palf_handle.cpp +++ b/src/logservice/palf/palf_handle.cpp @@ -78,10 +78,12 @@ bool PalfHandle::operator==(const PalfHandle &rhs) const return palf_handle_impl_ == rhs.palf_handle_impl_; } -int PalfHandle::set_initial_member_list(const common::ObMemberList &member_list, const int64_t paxos_replica_num) +int PalfHandle::set_initial_member_list(const common::ObMemberList &member_list, + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list) { CHECK_VALID; - return palf_handle_impl_->set_initial_member_list(member_list, paxos_replica_num); + return palf_handle_impl_->set_initial_member_list(member_list, paxos_replica_num, learner_list); } @@ -294,6 +296,14 @@ int PalfHandle::get_paxos_member_list(common::ObMemberList &member_list, int64_t return palf_handle_impl_->get_paxos_member_list(member_list, paxos_replica_num); } +int PalfHandle::get_paxos_member_list_and_learner_list(common::ObMemberList &member_list, + int64_t &paxos_replica_num, + GlobalLearnerList &learner_list) const +{ + CHECK_VALID; + return palf_handle_impl_->get_paxos_member_list_and_learner_list(member_list, paxos_replica_num, learner_list); +} + int PalfHandle::get_election_leader(common::ObAddr &addr) const { CHECK_VALID; @@ -356,16 +366,20 @@ int PalfHandle::remove_learner(const common::ObMember &removed_learner, const in return palf_handle_impl_->remove_learner(removed_learner, timeout_us); } -int PalfHandle::switch_learner_to_acceptor(const common::ObMember &learner, const int64_t timeout_us) +int PalfHandle::switch_learner_to_acceptor(const common::ObMember &learner, + const int64_t new_replica_num, + const int64_t timeout_us) { CHECK_VALID; - return palf_handle_impl_->switch_learner_to_acceptor(learner, timeout_us); + return palf_handle_impl_->switch_learner_to_acceptor(learner, new_replica_num, timeout_us); } -int PalfHandle::switch_acceptor_to_learner(const common::ObMember &member, const int64_t timeout_us) +int PalfHandle::switch_acceptor_to_learner(const common::ObMember &member, + const int64_t new_replica_num, + const int64_t timeout_us) { CHECK_VALID; - return palf_handle_impl_->switch_acceptor_to_learner(member, timeout_us); + return palf_handle_impl_->switch_acceptor_to_learner(member, new_replica_num, timeout_us); } diff --git a/src/logservice/palf/palf_handle.h b/src/logservice/palf/palf_handle.h index 23f0494278..a2936dd9ba 100644 --- a/src/logservice/palf/palf_handle.h +++ b/src/logservice/palf/palf_handle.h @@ -63,7 +63,8 @@ public: // return OB_SUCCESS if success // else return other errno int set_initial_member_list(const common::ObMemberList &member_list, - const int64_t paxos_replica_num); + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list); int set_region(const common::ObRegion ®ion); int set_paxos_member_region_map(const common::ObArrayHashMap ®ion_map); //================ 文件访问相关接口 ======================= @@ -181,6 +182,9 @@ public: int get_global_learner_list(common::GlobalLearnerList &learner_list) const; int get_paxos_member_list(common::ObMemberList &member_list, int64_t &paxos_replica_num) const; + int get_paxos_member_list_and_learner_list(common::ObMemberList &member_list, + int64_t &paxos_replica_num, + GlobalLearnerList &learner_list) const; int get_election_leader(common::ObAddr &addr) const; // @brief: a special config change interface, change replica number of paxos group @@ -269,6 +273,8 @@ public: // @brief: switch a learner(read only replica) to acceptor(full replica) in this clsuter // @param[in] const common::ObMember &learner: learner will be switched to acceptor + // @param[in] const int64_t new_replica_num: replica number of paxos group after switching + // learner to acceptor (similar to add_member) // @param[in] const int64_t timeout_us // @return // - OB_SUCCESS @@ -276,10 +282,13 @@ public: // - OB_TIMEOUT: switch_learner_to_acceptor timeout // - OB_NOT_MASTER: not leader or rolechange during membership changing int switch_learner_to_acceptor(const common::ObMember &learner, + const int64_t new_replica_num, const int64_t timeout_us); // @brief: switch an acceptor(full replica) to learner(read only replica) in this clsuter // @param[in] const common::ObMember &member: acceptor will be switched to learner + // @param[in] const int64_t new_replica_num: replica number of paxos group after switching + // acceptor to learner (similar to remove_member) // @param[in] const int64_t timeout_us // @return // - OB_SUCCESS @@ -287,6 +296,7 @@ public: // - OB_TIMEOUT: switch_acceptor_to_learner timeout // - OB_NOT_MASTER: not leader or rolechange during membership changing int switch_acceptor_to_learner(const common::ObMember &member, + const int64_t new_replica_num, const int64_t timeout_us); int revoke_leader(const int64_t proposal_id); int change_leader_to(const common::ObAddr &dst_addr); diff --git a/src/logservice/palf/palf_handle_impl.cpp b/src/logservice/palf/palf_handle_impl.cpp index 008c455d8b..f80bc95f18 100644 --- a/src/logservice/palf/palf_handle_impl.cpp +++ b/src/logservice/palf/palf_handle_impl.cpp @@ -54,7 +54,6 @@ PalfHandleImpl::PalfHandleImpl() rebuild_cb_wrapper_(), plugins_(), last_locate_scn_(), - last_send_mode_meta_time_us_(OB_INVALID_TIMESTAMP), last_locate_block_(LOG_INVALID_BLOCK_ID), cannot_recv_log_warn_time_(OB_INVALID_TIMESTAMP), cannot_handle_committed_info_time_(OB_INVALID_TIMESTAMP), @@ -259,7 +258,8 @@ int PalfHandleImpl::start() int PalfHandleImpl::set_initial_member_list( const common::ObMemberList &member_list, - const int64_t paxos_replica_num) + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; LogConfigVersion config_version; @@ -270,7 +270,8 @@ int PalfHandleImpl::set_initial_member_list( { WLockGuard guard(lock_); const int64_t proposal_id = state_mgr_.get_proposal_id(); - if (OB_FAIL(config_mgr_.set_initial_member_list(member_list, paxos_replica_num, proposal_id, config_version))) { + if (OB_FAIL(config_mgr_.set_initial_member_list(member_list, paxos_replica_num, learner_list, + proposal_id, config_version))) { PALF_LOG(WARN, "LogConfigMgr set_initial_member_list failed", K(ret), KPC(this)); } } @@ -278,7 +279,8 @@ int PalfHandleImpl::set_initial_member_list( } else if (OB_FAIL(config_mgr_.wait_config_log_persistence(config_version))) { PALF_LOG(WARN, "want_config_log_persistence failed", K(ret), KPC(this)); } else { - PALF_EVENT("set_initial_member_list success", palf_id_, K(ret), K(member_list), K(paxos_replica_num)); + PALF_EVENT("set_initial_member_list success", palf_id_, K(ret), K(member_list), + K(learner_list), K(paxos_replica_num)); } } return ret; @@ -511,6 +513,24 @@ int PalfHandleImpl::get_paxos_member_list( return ret; } +int PalfHandleImpl::get_paxos_member_list_and_learner_list( + common::ObMemberList &member_list, + int64_t &paxos_replica_num, + GlobalLearnerList &learner_list) const +{ + int ret = OB_SUCCESS; + RLockGuard guard(lock_); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + PALF_LOG(ERROR, "PalfHandleImpl has not inited", K(ret)); + } else if (OB_FAIL(config_mgr_.get_curr_member_list(member_list, paxos_replica_num))) { + PALF_LOG(WARN, "get_curr_member_list failed", K(ret), KPC(this)); + } else if (OB_FAIL(config_mgr_.get_global_learner_list(learner_list))) { + PALF_LOG(WARN, "get_global_learner_list failed", K(ret), KPC(this)); + } else {} + return ret; +} + int PalfHandleImpl::get_election_leader(ObAddr &addr) const { int ret = OB_SUCCESS; @@ -760,7 +780,9 @@ int PalfHandleImpl::remove_learner(const common::ObMember &removed_learner, cons return ret; } -int PalfHandleImpl::switch_learner_to_acceptor(const common::ObMember &learner, const int64_t timeout_us) +int PalfHandleImpl::switch_learner_to_acceptor(const common::ObMember &learner, + const int64_t new_replica_num, + const int64_t timeout_us) { int ret = OB_SUCCESS; if (IS_NOT_INIT) { @@ -768,7 +790,7 @@ int PalfHandleImpl::switch_learner_to_acceptor(const common::ObMember &learner, } else if (!learner.is_valid() || timeout_us <= 0) { ret = OB_INVALID_ARGUMENT; } else { - LogConfigChangeArgs args(learner, 0, SWITCH_LEARNER_TO_ACCEPTOR); + LogConfigChangeArgs args(learner, new_replica_num, SWITCH_LEARNER_TO_ACCEPTOR); if (OB_FAIL(one_stage_config_change_(args, timeout_us))) { PALF_LOG(WARN, "switch_learner_to_acceptor failed", KR(ret), KPC(this), K(args), K(timeout_us)); } else { @@ -778,7 +800,9 @@ int PalfHandleImpl::switch_learner_to_acceptor(const common::ObMember &learner, return ret; } -int PalfHandleImpl::switch_acceptor_to_learner(const common::ObMember &member, const int64_t timeout_us) +int PalfHandleImpl::switch_acceptor_to_learner(const common::ObMember &member, + const int64_t new_replica_num, + const int64_t timeout_us) { int ret = OB_SUCCESS; if (IS_NOT_INIT) { @@ -786,7 +810,7 @@ int PalfHandleImpl::switch_acceptor_to_learner(const common::ObMember &member, c } else if (!member.is_valid() || timeout_us <= 0) { ret = OB_INVALID_ARGUMENT; } else { - LogConfigChangeArgs args(member, 0, SWITCH_ACCEPTOR_TO_LEARNER); + LogConfigChangeArgs args(member, new_replica_num, SWITCH_ACCEPTOR_TO_LEARNER); if (OB_FAIL(one_stage_config_change_(args, timeout_us))) { PALF_LOG(WARN, "switch_acceptor_to_learner failed", KR(ret), KPC(this), K(args), K(timeout_us)); } else { @@ -861,13 +885,13 @@ int PalfHandleImpl::change_access_mode(const int64_t proposal_id, if (OB_SUCCESS != (tmp_ret = role_change_cb_wrpper_.on_role_change(palf_id_))) { PALF_LOG(WARN, "on_role_change failed", K(tmp_ret), K_(palf_id), K_(self)); } - PALF_EVENT("change_access_mode success", palf_id_, K(ret), KPC(this), - K(proposal_id), K(access_mode), K(ref_scn), K(time_guard), K_(sw)); } if (OB_EAGAIN == ret) { ob_usleep(1000); } } + PALF_EVENT("change_access_mode finish", palf_id_, K(ret), KPC(this), + K(proposal_id), K(access_mode), K(ref_scn), K(time_guard), K_(sw)); config_change_lock_.unlock(); mode_change_lock_.unlock(); } @@ -2140,32 +2164,6 @@ bool PalfHandleImpl::is_in_period_freeze_mode() const return sw_.is_in_period_freeze_mode(); } -int PalfHandleImpl::leader_sync_mode_meta_to_arb_member_() -{ - // caller need hold rdlock - int ret = OB_SUCCESS; - ObMember arb_member; - (void) config_mgr_.get_arbitration_member(arb_member); - const int64_t curr_time_us = common::ObTimeUtility::current_time(); - if (arb_member.is_valid() - && self_ == state_mgr_.get_leader() - && curr_time_us - last_send_mode_meta_time_us_ >= 3 * 1000 * 1000ll) { - // send mode meta to arb member periodically by 3s - int64_t curr_proposal_id = state_mgr_.get_proposal_id(); - int64_t mock_accepted_mode_pid = -1; - if (OB_FAIL(mode_mgr_.submit_fetch_mode_meta_resp(arb_member.get_server(), \ - curr_proposal_id, mock_accepted_mode_pid))) { - PALF_LOG(WARN, "submit_fetch_mode_meta_resp failed", K(ret), K_(palf_id), K_(self), - K(curr_proposal_id), K(mock_accepted_mode_pid)); - } else { - last_send_mode_meta_time_us_ = curr_time_us; - PALF_LOG(INFO, "submit_fetch_mode_meta_resp to arb_member", K(arb_member), K_(palf_id), K_(self), - K(curr_proposal_id), K(mock_accepted_mode_pid)); - } - } - return ret; -} - int PalfHandleImpl::period_freeze_last_log() { int ret = OB_SUCCESS; @@ -2175,8 +2173,6 @@ int PalfHandleImpl::period_freeze_last_log() } else { RLockGuard guard(lock_); sw_.period_freeze_last_log(); - // TODO by yunlong: replaced by mode_meta resend logic. - (void) leader_sync_mode_meta_to_arb_member_(); } return ret; } @@ -2203,7 +2199,9 @@ int PalfHandleImpl::check_and_switch_state() do { RLockGuard guard(lock_); if (OB_FAIL(config_mgr_.leader_do_loop_work(config_state_changed))) { - PALF_LOG(WARN, "leader_do_loop_work", KR(ret), K_(self), K_(palf_id)); + PALF_LOG(WARN, "LogConfigMgr::leader_do_loop_work failed", KR(ret), K_(self), K_(palf_id)); + } else if (OB_FAIL(mode_mgr_.leader_do_loop_work())) { + PALF_LOG(WARN, "LogModeMgr::leader_do_loop_work failed", KR(ret), K_(self), K_(palf_id)); } } while (0); if (OB_UNLIKELY(config_state_changed)) { @@ -4246,6 +4244,7 @@ int PalfHandleImpl::stat(PalfStat &palf_stat) (void)config_mgr_.get_curr_member_list(palf_stat.paxos_member_list_, palf_stat.paxos_replica_num_); (void)config_mgr_.get_arbitration_member(palf_stat.arbitration_member_); (void)config_mgr_.get_degraded_learner_list(palf_stat.degraded_list_); + (void)config_mgr_.get_global_learner_list(palf_stat.learner_list_); palf_stat.allow_vote_ = state_mgr_.is_allow_vote(); palf_stat.replica_type_ = state_mgr_.get_replica_type(); palf_stat.base_lsn_ = log_engine_.get_log_meta().get_log_snapshot_meta().base_lsn_; @@ -4390,6 +4389,7 @@ void PalfStat::reset() access_mode_ = AccessMode::INVALID_ACCESS_MODE; paxos_member_list_.reset(); paxos_replica_num_ = -1; + learner_list_.reset(); arbitration_member_.reset(); degraded_list_.reset(); allow_vote_ = true; @@ -4430,7 +4430,7 @@ int PalfHandleImpl::read_data_from_buffer(const LSN &read_begin_lsn, OB_SERIALIZE_MEMBER(PalfStat, self_, palf_id_, role_, log_proposal_id_, config_version_, mode_version_, access_mode_, paxos_member_list_, paxos_replica_num_, allow_vote_, replica_type_, begin_lsn_, begin_scn_, base_lsn_, end_lsn_, end_scn_, max_lsn_, max_scn_, - arbitration_member_, degraded_list_, is_in_sync_, is_need_rebuild_); + arbitration_member_, degraded_list_, is_in_sync_, is_need_rebuild_, learner_list_); } // end namespace palf } // end namespace oceanbase diff --git a/src/logservice/palf/palf_handle_impl.h b/src/logservice/palf/palf_handle_impl.h old mode 100644 new mode 100755 index 2f81c18da6..ccfa6edd92 --- a/src/logservice/palf/palf_handle_impl.h +++ b/src/logservice/palf/palf_handle_impl.h @@ -82,6 +82,7 @@ public: int64_t paxos_replica_num_; common::ObMember arbitration_member_; common::GlobalLearnerList degraded_list_; + common::GlobalLearnerList learner_list_; bool allow_vote_; LogReplicaType replica_type_; LSN begin_lsn_; @@ -94,7 +95,7 @@ public: bool is_in_sync_; bool is_need_rebuild_; TO_STRING_KV(K_(self), K_(palf_id), K_(role), K_(log_proposal_id), K_(config_version), K_(mode_version), - K_(access_mode), K_(paxos_member_list), K_(paxos_replica_num), K_(allow_vote), K_(replica_type), + K_(access_mode), K_(paxos_member_list), K_(paxos_replica_num), K_(learner_list), K_(allow_vote), K_(replica_type), K_(begin_lsn), K_(begin_scn), K_(base_lsn), K_(end_lsn), K_(end_scn), K_(max_lsn), K_(max_scn), K_(is_in_sync), K_(is_need_rebuild)); }; @@ -234,10 +235,12 @@ public: // // @param [in] member_list, paxos memberlist // @param [in] paxos_replica_num, number of paxos replicas + // @param [in] learner_list, learner_list // // @return :TODO virtual int set_initial_member_list(const common::ObMemberList &member_list, - const int64_t paxos_replica_num) = 0; + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list) = 0; // set region for self // @param [common::ObRegion] region virtual int set_region(const common::ObRegion ®ion) = 0; @@ -307,6 +310,9 @@ public: virtual int get_global_learner_list(common::GlobalLearnerList &learner_list) const = 0; virtual int get_paxos_member_list(common::ObMemberList &member_list, int64_t &paxos_replica_num) const = 0; + virtual int get_paxos_member_list_and_learner_list(common::ObMemberList &member_list, + int64_t &paxos_replica_num, + common::GlobalLearnerList &learner_list) const = 0; virtual int get_election_leader(common::ObAddr &addr) const = 0; // @brief: a special config change interface, change replica number of paxos group @@ -391,23 +397,31 @@ public: // @brief: switch a learner(read only replica) to acceptor(full replica) in this clsuter // @param[in] const common::ObMember &learner: learner will be switched to acceptor + // @param[in] const int64_t new_replica_num: replica number of paxos group after switching + // learner to acceptor (similar to add_member) // @param[in] const int64_t timeout_us // @return // - OB_SUCCESS // - OB_INVALID_ARGUMENT: invalid argument // - OB_TIMEOUT: switch_learner_to_acceptor timeout // - OB_NOT_MASTER: not leader or rolechange during membership changing - virtual int switch_learner_to_acceptor(const common::ObMember &learner, const int64_t timeout_us) = 0; + virtual int switch_learner_to_acceptor(const common::ObMember &learner, + const int64_t new_replica_num, + const int64_t timeout_us) = 0; // @brief: switch an acceptor(full replica) to learner(read only replica) in this clsuter // @param[in] const common::ObMember &member: acceptor will be switched to learner + // @param[in] const int64_t new_replica_num: replica number of paxos group after switching + // acceptor to learner (similar to remove_member) // @param[in] const int64_t timeout_us // @return // - OB_SUCCESS // - OB_INVALID_ARGUMENT: invalid argument // - OB_TIMEOUT: switch_acceptor_to_learner timeout // - OB_NOT_MASTER: not leader or rolechange during membership changing - virtual int switch_acceptor_to_learner(const common::ObMember &member, const int64_t timeout_us) = 0; + virtual int switch_acceptor_to_learner(const common::ObMember &member, + const int64_t new_replica_num, + const int64_t timeout_us) = 0; // 设置日志文件的可回收位点,小于等于lsn的日志文件均可以安全回收 @@ -706,7 +720,8 @@ public: void destroy(); int start(); int set_initial_member_list(const common::ObMemberList &member_list, - const int64_t paxos_replica_num) override final; + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list) override final; int set_region(const common::ObRegion ®ion) override final; int set_paxos_member_region_map(const LogMemberRegionMap ®ion_map) override final; int submit_log(const PalfAppendOptions &opts, @@ -727,6 +742,9 @@ public: int change_leader_to(const common::ObAddr &dest_addr) override final; int get_global_learner_list(common::GlobalLearnerList &learner_list) const override final; int get_paxos_member_list(common::ObMemberList &member_list, int64_t &paxos_replica_num) const override final; + int get_paxos_member_list_and_learner_list(common::ObMemberList &member_list, + int64_t &paxos_replica_num, + common::GlobalLearnerList &learner_list) const override final; int get_election_leader(common::ObAddr &addr) const; int force_set_as_single_replica() override final; int change_replica_num(const common::ObMemberList &member_list, @@ -747,8 +765,10 @@ public: int remove_learner(const common::ObMember &removed_learner, const int64_t timeout_us) override final; int switch_learner_to_acceptor(const common::ObMember &learner, + const int64_t new_replica_num, const int64_t timeout_us) override final; int switch_acceptor_to_learner(const common::ObMember &member, + const int64_t new_replica_num, const int64_t timeout_us) override final; int set_base_lsn(const LSN &lsn) override final; int enable_sync() override final; @@ -1128,7 +1148,6 @@ private: // ======optimization for locate_by_scn_coarsely========= mutable SpinLock last_locate_lock_; share::SCN last_locate_scn_; - int64_t last_send_mode_meta_time_us_; block_id_t last_locate_block_; // ======optimization for locate_by_scn_coarsely========= int64_t cannot_recv_log_warn_time_; diff --git a/src/logservice/replayservice/ob_log_replay_service.cpp b/src/logservice/replayservice/ob_log_replay_service.cpp index 744cab58a6..7cd302d401 100644 --- a/src/logservice/replayservice/ob_log_replay_service.cpp +++ b/src/logservice/replayservice/ob_log_replay_service.cpp @@ -363,8 +363,7 @@ void ObLogReplayService::handle(void *task) } } -int ObLogReplayService::add_ls(const share::ObLSID &id, - const common::ObReplicaType &replica_type) +int ObLogReplayService::add_ls(const share::ObLSID &id) { int ret = OB_SUCCESS; ObReplayStatus *replay_status = NULL; @@ -377,7 +376,7 @@ int ObLogReplayService::add_ls(const share::ObLSID &id, CLOG_LOG(WARN, "failed to alloc replay status", K(ret), K(id)); } else { new (replay_status) ObReplayStatus(); - if (OB_FAIL(replay_status->init(id, replica_type, palf_env_, this))) { + if (OB_FAIL(replay_status->init(id, palf_env_, this))) { mtl_free(replay_status); replay_status = NULL; CLOG_LOG(WARN, "failed to init replay status", K(ret), K(id), K(palf_env_), K(this)); diff --git a/src/logservice/replayservice/ob_log_replay_service.h b/src/logservice/replayservice/ob_log_replay_service.h index 1d86dcf247..a2e07b9b88 100644 --- a/src/logservice/replayservice/ob_log_replay_service.h +++ b/src/logservice/replayservice/ob_log_replay_service.h @@ -132,8 +132,7 @@ public: }; public: void handle(void *task); - int add_ls(const share::ObLSID &id, - const common::ObReplicaType &replica_type); + int add_ls(const share::ObLSID &id); int remove_ls(const share::ObLSID &id); int enable(const share::ObLSID &id, const palf::LSN &base_lsn, diff --git a/src/logservice/replayservice/ob_replay_status.cpp b/src/logservice/replayservice/ob_replay_status.cpp index 83aff0c5b3..cb5571cf17 100644 --- a/src/logservice/replayservice/ob_replay_status.cpp +++ b/src/logservice/replayservice/ob_replay_status.cpp @@ -544,12 +544,10 @@ ObReplayStatus::~ObReplayStatus() } int ObReplayStatus::init(const share::ObLSID &id, - const common::ObReplicaType &replica_type, PalfEnv *palf_env, ObLogReplayService *rp_sv) { //TODO: use replica type init need_replay - UNUSED(replica_type); int ret = OB_SUCCESS; if (is_inited_) { ret = OB_INIT_TWICE; diff --git a/src/logservice/replayservice/ob_replay_status.h b/src/logservice/replayservice/ob_replay_status.h index 81017fb72e..2cea2c19cd 100644 --- a/src/logservice/replayservice/ob_replay_status.h +++ b/src/logservice/replayservice/ob_replay_status.h @@ -449,7 +449,6 @@ public: ObReplayStatus(); ~ObReplayStatus(); int init(const share::ObLSID &id, - const common::ObReplicaType &replica_type, palf::PalfEnv *palf_env, ObLogReplayService *rp_sv); void destroy(); diff --git a/src/observer/CMakeLists.txt b/src/observer/CMakeLists.txt index 21b1e3b013..c665414c56 100644 --- a/src/observer/CMakeLists.txt +++ b/src/observer/CMakeLists.txt @@ -223,6 +223,9 @@ ob_set_subtarget(ob_server virtual_table virtual_table/ob_all_virtual_dtl_first_cached_buffer.cpp virtual_table/ob_all_virtual_dtl_memory.cpp virtual_table/ob_all_virtual_dump_tenant_info.cpp + virtual_table/ob_all_virtual_dup_ls_lease_mgr.cpp + virtual_table/ob_all_virtual_dup_ls_tablets.cpp + virtual_table/ob_all_virtual_dup_ls_tablet_set.cpp virtual_table/ob_all_virtual_engine_table.cpp virtual_table/ob_all_virtual_files_table.cpp virtual_table/ob_all_virtual_id_service.cpp diff --git a/src/observer/mysql/ob_query_retry_ctrl.cpp b/src/observer/mysql/ob_query_retry_ctrl.cpp index f2071dc965..c1ea672517 100644 --- a/src/observer/mysql/ob_query_retry_ctrl.cpp +++ b/src/observer/mysql/ob_query_retry_ctrl.cpp @@ -88,7 +88,7 @@ void ObRetryPolicy::sleep_before_local_retry(ObRetryParam &v, } if (sleep_us > 0) { LOG_INFO("will sleep", K(sleep_us), K(remain_us), K(base_sleep_us), - K(retry_sleep_type), K(v.stmt_retry_times_), K(timeout_timestamp)); + K(retry_sleep_type), K(v.stmt_retry_times_), K(v.err_), K(timeout_timestamp)); THIS_WORKER.sched_wait(); ob_usleep(static_cast(sleep_us)); THIS_WORKER.sched_run(); @@ -113,10 +113,7 @@ public: ~ObRefreshLocationCachePolicy() = default; virtual void test(ObRetryParam &v) const override { - int refresh_err = v.result_.refresh_location_cache(is_async); - if (OB_SUCCESS != refresh_err) { - LOG_WARN_RET(OB_ERR_UNEXPECTED, "fail to refresh location cache", K(is_async), K(refresh_err), K(v)); - } + v.result_.refresh_location_cache(is_async, v.err_); } }; @@ -225,10 +222,7 @@ public: K(v.session_.get_retry_info().get_last_query_retry_err())); if (v.session_.get_retry_info().is_rpc_timeout() || is_transaction_rpc_timeout_err(v.err_)) { // rpc超时了,可能是location cache不对,异步刷新location cache - int err1 = v.result_.refresh_location_cache(true); // 非阻塞 - if (OB_SUCCESS != err1) { - LOG_WARN("fail to nonblock refresh location cache", K(v), K(err1)); - } + v.result_.refresh_location_cache(true, v.err_); // 非阻塞 LOG_WARN("sql rpc timeout, or trans rpc timeout, maybe location is changed, " "refresh location cache non blockly", K(v), K(v.session_.get_retry_info().is_rpc_timeout())); @@ -655,6 +649,7 @@ void ObQueryRetryCtrl::location_error_proc(ObRetryParam &v) ObFastFailRetryPolicy fast_fail; ObCommonRetryIndexLongWaitPolicy retry_long_wait; retry_obj.test(fast_fail).test(retry_long_wait); + if (RETRY_TYPE_LOCAL == v.retry_type_) { ObRefreshLocationCacheBlockPolicy block_refresh; // FIXME: why block? retry_obj.test(block_refresh); diff --git a/src/observer/mysql/ob_sync_plan_driver.cpp b/src/observer/mysql/ob_sync_plan_driver.cpp index cac10161ff..576380bdc1 100644 --- a/src/observer/mysql/ob_sync_plan_driver.cpp +++ b/src/observer/mysql/ob_sync_plan_driver.cpp @@ -114,7 +114,7 @@ int ObSyncPlanDriver::response_result(ObMySQLResultSet &result) K(ret), K(cli_ret), K(retry_ctrl_.need_retry())); ret = cli_ret; } else { - ObResultSet::refresh_location_cache(result.get_exec_context().get_task_exec_ctx(), true, ret); + result.refresh_location_cache(true, ret); } int cret = result.close(); if (cret != OB_SUCCESS) { diff --git a/src/observer/mysql/obmp_query.cpp b/src/observer/mysql/obmp_query.cpp index 9cce6decee..d01380dbe2 100644 --- a/src/observer/mysql/obmp_query.cpp +++ b/src/observer/mysql/obmp_query.cpp @@ -841,13 +841,6 @@ OB_INLINE int ObMPQuery::do_process(ObSQLSessionInfo &session, plan_ctx->get_timeout_timestamp()); } } - } else { - // end trans结束事务之后不能再改错误码 - // 只有成功的时候才检查是否需要刷新location cache,否则已经在重试逻辑里面刷新了,不用刷新 - int refresh_ret = OB_SUCCESS; - if (OB_UNLIKELY(OB_SUCCESS != (refresh_ret = result.check_and_nonblock_refresh_location_cache()))) { - LOG_WARN("fail to check and nonblock refresh location cache", K(ret), K(refresh_ret)); - } } } diff --git a/src/observer/ob_inner_sql_result.cpp b/src/observer/ob_inner_sql_result.cpp index a53a958d1e..3972631c54 100644 --- a/src/observer/ob_inner_sql_result.cpp +++ b/src/observer/ob_inner_sql_result.cpp @@ -149,7 +149,7 @@ int ObInnerSQLResult::open() ret = OB_INIT_TWICE; LOG_WARN("result set already open", K(ret)); } else if (has_tenant_resource() && OB_FAIL(result_set_->open())) { - ObResultSet::refresh_location_cache(result_set_->get_exec_context().get_task_exec_ctx(), true, ret); + result_set_->refresh_location_cache(true, ret); LOG_WARN("open result set failed", K(ret)); // move after precess_retry(). // result_set_->close(); @@ -214,7 +214,7 @@ int ObInnerSQLResult::inner_close() } else { WITH_CONTEXT(mem_context_) { if (has_tenant_resource() && OB_FAIL(result_set_->close())) { - ObResultSet::refresh_location_cache(result_set_->get_exec_context().get_task_exec_ctx(), true, ret); + result_set_->refresh_location_cache(true, ret); LOG_WARN("result set close failed", K(ret)); } else if(!has_tenant_resource() && OB_FAIL(remote_result_set_->close())) { LOG_WARN("remote_result_set close failed", K(ret)); @@ -248,7 +248,7 @@ int ObInnerSQLResult::next() WITH_CONTEXT(mem_context_) { if (has_tenant_resource() && OB_FAIL(result_set_->get_next_row(row_))) { if (OB_ITER_END != ret) { - ObResultSet::refresh_location_cache(result_set_->get_exec_context().get_task_exec_ctx(), true, ret); + result_set_->refresh_location_cache(true, ret); LOG_WARN("get next row failed", K(ret)); } } else if (!has_tenant_resource() && OB_FAIL(remote_result_set_->get_next_row(row_))) { diff --git a/src/observer/ob_rpc_processor_simple.cpp b/src/observer/ob_rpc_processor_simple.cpp index ba81bf3d1d..c2d7e81d7b 100644 --- a/src/observer/ob_rpc_processor_simple.cpp +++ b/src/observer/ob_rpc_processor_simple.cpp @@ -57,6 +57,7 @@ #include "observer/ob_server_event_history_table_operator.h" #include "sql/udr/ob_udr_mgr.h" #include "sql/plan_cache/ob_ps_cache.h" +#include "rootserver/ob_primary_ls_service.h" // for ObPrimaryLSService #include "sql/session/ob_sql_session_info.h" #include "sql/session/ob_sess_info_verify.h" @@ -212,7 +213,40 @@ int ObRpcLSAddReplicaP::process() int ObRpcLSTypeTransformP::process() { int ret = OB_SUCCESS; - //TODO(muwei.ym) FIX IT later ObRpcLSTypeTransformP::process + uint64_t tenant_id = arg_.tenant_id_; + MAKE_TENANT_SWITCH_SCOPE_GUARD(guard); + ObLSService *ls_service = nullptr; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + + if (tenant_id != MTL_ID()) { + if (OB_FAIL(guard.switch_to(tenant_id))) { + LOG_WARN("failed to switch to tenant", K(ret), K(tenant_id)); + } + } + if (OB_SUCC(ret)) { + SERVER_EVENT_ADD("storage_ha", "ls_type_transform start", "tenant_id", arg_.tenant_id_, "ls_id", arg_.ls_id_.id(), + "dest", arg_.src_.get_server()); + LOG_INFO("start do ls type transform", K(arg_)); + + ls_service = MTL(ObLSService*); + if (OB_ISNULL(ls_service)) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(ERROR, "mtl ObLSService should not be null", K(ret)); + } else if (OB_FAIL(ls_service->get_ls(arg_.ls_id_, ls_handle, ObLSGetMod::OBSERVER_MOD))) { + LOG_WARN("failed to get ls", K(ret), K(arg_)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls should not be NULL", K(ret), K(arg_)); + } else if (OB_FAIL(ls->get_ls_remove_member_handler()->transform_member(arg_))) { + LOG_WARN("failed to transform member", K(ret), K(arg_)); + } + } + + if (OB_FAIL(ret)) { + SERVER_EVENT_ADD("storage_ha", "ls_type_transform failed", "tenant_id", + arg_.tenant_id_, "ls_id", arg_.ls_id_.id(), "result", ret); + } return ret; } @@ -1359,7 +1393,7 @@ int ObRpcCreateLSP::process() COMMON_LOG(WARN, "failed create log stream", KR(ret), K(arg_)); } } - (void)result_.init(ret, GCTX.self_addr()); + (void)result_.init(ret, GCTX.self_addr(), arg_.get_replica_type()); return ret; } @@ -1557,13 +1591,12 @@ int ObRpcSetMemberListP::process() } else if (OB_ISNULL(ls = handle.get_ls())) { ret = OB_ERR_UNEXPECTED; COMMON_LOG(ERROR, "ls should not be null", K(ret)); - } else { - if (OB_FAIL(ls->set_initial_member_list(arg_.get_member_list(), - arg_.get_paxos_replica_num()))) { - COMMON_LOG(WARN, "failed to set member list", KR(ret), K(arg_)); - } + } else if (OB_FAIL(ls->set_initial_member_list(arg_.get_member_list(), + arg_.get_paxos_replica_num(), + arg_.get_learner_list()))) { + COMMON_LOG(WARN, "failed to set member list", KR(ret), K(arg_)); } - result_.set_result(ret); + result_.init(ret); return ret; } @@ -2364,6 +2397,28 @@ int ObRpcSendHeartbeatP::process() return ret; } +int ObRpcCreateDuplicateLSP::process() +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = arg_.get_tenant_id(); + if (OB_UNLIKELY(!is_user_tenant(tenant_id))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id)); + } else { + MTL_SWITCH(tenant_id) { + rootserver::ObPrimaryLSService* primary_ls_service = MTL(rootserver::ObPrimaryLSService*); + if (OB_ISNULL(primary_ls_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("primary ls service is null", KR(ret), K(tenant_id)); + } else if (OB_FAIL(primary_ls_service->create_duplicate_ls())) { + LOG_WARN("failed to create duplicate log stream", KR(ret), K(tenant_id)); + } + } + } + (void)result_.init(ret); + return ret; +} + int ObSessInfoVerificationP::process() { int ret = OB_SUCCESS; diff --git a/src/observer/ob_rpc_processor_simple.h b/src/observer/ob_rpc_processor_simple.h index e19d1a255f..4f393aa292 100644 --- a/src/observer/ob_rpc_processor_simple.h +++ b/src/observer/ob_rpc_processor_simple.h @@ -152,6 +152,7 @@ OB_DEFINE_PROCESSOR_S(Srv, OB_BACKUP_BUILD_INDEX, ObRpcBackupBuildIndexP); OB_DEFINE_PROCESSOR_S(Srv, OB_DELETE_BACKUP_LS_TASK, ObRpcBackupLSCleanP); OB_DEFINE_PROCESSOR_S(Srv, OB_BACKUP_META, ObRpcBackupMetaP); OB_DEFINE_PROCESSOR_S(Srv, OB_BACKUP_CHECK_TABLET_CREATE_TS, ObRpcBackupCheckTabletP); +OB_DEFINE_PROCESSOR_S(Srv, OB_NOTIFY_CREATE_DUPLICATE_LS, ObRpcCreateDuplicateLSP); OB_DEFINE_PROCESSOR_S(Srv, OB_LS_MIGRATE_REPLICA, ObRpcLSMigrateReplicaP); OB_DEFINE_PROCESSOR_S(Srv, OB_LS_ADD_REPLICA, ObRpcLSAddReplicaP); diff --git a/src/observer/ob_service.cpp b/src/observer/ob_service.cpp index a8ce94cdb4..e0d28c01da 100644 --- a/src/observer/ob_service.cpp +++ b/src/observer/ob_service.cpp @@ -2407,6 +2407,7 @@ int ObService::fill_ls_replica( share::ObLSReplica &replica) { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; uint64_t unit_id = common::OB_INVALID_ID; if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; @@ -2426,10 +2427,13 @@ int ObService::fill_ls_replica( common::ObRole role = FOLLOWER; ObMemberList ob_member_list; ObLSReplica::MemberList member_list; + GlobalLearnerList learner_list; int64_t proposal_id = 0; int64_t paxos_replica_number = 0; ObLSRestoreStatus restore_status; ObReplicaStatus replica_status = REPLICA_STATUS_NORMAL; + ObReplicaType replica_type = REPLICA_TYPE_FULL; + bool is_compatible_with_readonly_replica = false; if (OB_ISNULL(ls_svr = MTL(ObLSService*))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("MTL ObLSService is null", KR(ret), K(tenant_id)); @@ -2437,8 +2441,8 @@ int ObService::fill_ls_replica( ObLSID(ls_id), ls_handle, ObLSGetMod::OBSERVER_MOD))) { LOG_WARN("get ls handle failed", KR(ret)); - } else if (OB_FAIL(ls_handle.get_ls()->get_paxos_member_list(ob_member_list, paxos_replica_number))) { - LOG_WARN("get paxos_member_list from ObLS failed", KR(ret)); + } else if (OB_FAIL(ls_handle.get_ls()->get_paxos_member_list_and_learner_list(ob_member_list, paxos_replica_number, learner_list))) { + LOG_WARN("get member list and learner list from ObLS failed", KR(ret)); } else if (OB_FAIL(ls_handle.get_ls()->get_restore_status(restore_status))) { LOG_WARN("get restore status failed", KR(ret)); } else if (OB_FAIL(ls_handle.get_ls()->get_replica_status(replica_status))) { @@ -2448,6 +2452,23 @@ int ObService::fill_ls_replica( LOG_WARN("MTL ObLogService is null", KR(ret), K(tenant_id)); } else if (OB_FAIL(get_role_from_palf_(*log_service, ls_id, role, proposal_id))) { LOG_WARN("failed to get role from palf", KR(ret), K(tenant_id), K(ls_id)); + } else if (OB_SUCCESS != (tmp_ret = ObShareUtil::check_compat_version_for_readonly_replica( + tenant_id, is_compatible_with_readonly_replica))) { + LOG_WARN("fail to check data version for read-only replica", KR(ret), K(tenant_id)); + } + + if (OB_FAIL(ret)) { + } else if (!is_compatible_with_readonly_replica) { + replica_type = REPLICA_TYPE_FULL; + } else if (learner_list.contains(gctx_.self_addr())) { + // if replica exists in learner_list, report it as R-replica. + // Otherwise, report as F-replica + replica_type = REPLICA_TYPE_READONLY; + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObLSReplica::transform_ob_member_list(ob_member_list, member_list))) { + LOG_WARN("fail to transfrom ob_member_list into member_list", KR(ret), K(ob_member_list)); } else if (OB_FAIL(replica.init( 0, /*create_time_us*/ 0, /*modify_time_us*/ @@ -2456,7 +2477,7 @@ int ObService::fill_ls_replica( gctx_.self_addr(), /*server*/ gctx_.config_->mysql_port, /*sql_port*/ role, /*role*/ - REPLICA_TYPE_FULL, /*replica_type*/ + replica_type, /*replica_type*/ proposal_id, /*proposal_id*/ is_strong_leader(role) ? REPLICA_STATUS_NORMAL : replica_status,/*replica_status*/ restore_status, /*restore_status*/ @@ -2465,13 +2486,11 @@ int ObService::fill_ls_replica( gctx_.config_->zone.str(), /*zone*/ paxos_replica_number, /*paxos_replica_number*/ 0, /*data_size*/ - 0))) { /*required_size*/ + 0, + member_list, + learner_list))) { /*required_size*/ LOG_WARN("fail to init a ls replica", KR(ret), K(tenant_id), K(ls_id), K(role), - K(proposal_id), K(unit_id), K(paxos_replica_number)); - } else if (OB_FAIL(ObLSReplica::transform_ob_member_list(ob_member_list, member_list))) { - LOG_WARN("fail to transfrom ob_member_list into member_list", KR(ret), K(ob_member_list)); - } else if (OB_FAIL(replica.set_member_list(member_list))) { - LOG_WARN("fail to set member_list", KR(ret), K(member_list), K(replica)); + K(proposal_id), K(unit_id), K(paxos_replica_number), K(member_list), K(learner_list)); } else { LOG_TRACE("finish fill ls replica", KR(ret), K(tenant_id), K(ls_id), K(replica)); } diff --git a/src/observer/ob_srv_xlator_primary.cpp b/src/observer/ob_srv_xlator_primary.cpp index 0b1159f452..eafb786e71 100644 --- a/src/observer/ob_srv_xlator_primary.cpp +++ b/src/observer/ob_srv_xlator_primary.cpp @@ -29,7 +29,8 @@ #include "sql/das/ob_das_rpc_processor.h" #include "storage/tx/ob_trans_rpc.h" #include "storage/tx/ob_gts_rpc.h" -#include "storage/tx/ob_dup_table_rpc.h" +// #include "storage/tx/ob_dup_table_rpc.h" +#include "storage/tx/ob_dup_table_base.h" #include "storage/tx/ob_ts_response_handler.h" #include "storage/tx/wrs/ob_weak_read_service_rpc_define.h" // weak_read_service #include "observer/ob_rpc_processor_simple.h" @@ -134,12 +135,17 @@ void oceanbase::observer::init_srv_xlator_for_transaction(ObSrvRpcXlator *xlator RPC_PROCESSOR(ObTxRollbackSPP); RPC_PROCESSOR(ObTxKeepaliveP); RPC_PROCESSOR(ObTxKeepaliveRespP); - RPC_PROCESSOR(ObDupTableLeaseRequestMsgP, gctx_); - RPC_PROCESSOR(ObDupTableLeaseResponseMsgP, gctx_); - RPC_PROCESSOR(ObRedoLogSyncRequestP, gctx_); - RPC_PROCESSOR(ObRedoLogSyncResponseP, gctx_); - RPC_PROCESSOR(ObPreCommitRequestP, gctx_); - RPC_PROCESSOR(ObPreCommitResponseP, gctx_); + //for dup_table + // RPC_PROCESSOR(ObDupTableLeaseRequestMsgP, gctx_); + // RPC_PROCESSOR(ObDupTableLeaseResponseMsgP, gctx_); + // RPC_PROCESSOR(ObRedoLogSyncRequestP, gctx_); + // RPC_PROCESSOR(ObRedoLogSyncResponseP, gctx_); + RPC_PROCESSOR(ObDupTableLeaseRequestP); + RPC_PROCESSOR(ObDupTableTsSyncRequestP); + RPC_PROCESSOR(ObDupTableTsSyncResponseP); + RPC_PROCESSOR(ObDupTableBeforePrepareRequestP); + // RPC_PROCESSOR(ObPreCommitRequestP, gctx_); + // RPC_PROCESSOR(ObPreCommitResponseP, gctx_); // for xa RPC_PROCESSOR(ObTxSubPrepareP); RPC_PROCESSOR(ObTxSubPrepareRespP); diff --git a/src/observer/ob_srv_xlator_storage.cpp b/src/observer/ob_srv_xlator_storage.cpp index aae50b5cb8..62c1817d34 100644 --- a/src/observer/ob_srv_xlator_storage.cpp +++ b/src/observer/ob_srv_xlator_storage.cpp @@ -107,4 +107,5 @@ void oceanbase::observer::init_srv_xlator_for_storage(ObSrvRpcXlator *xlator) { RPC_PROCESSOR(ObRpcLSModifyPaxosReplicaNumberP, gctx_); RPC_PROCESSOR(ObRpcLSCheckDRTaskExistP, gctx_); RPC_PROCESSOR(ObRpcDDLCheckTabletMergeStatusP, gctx_); + RPC_PROCESSOR(ObRpcCreateDuplicateLSP, gctx_); } diff --git a/src/observer/omt/ob_worker_processor.cpp b/src/observer/omt/ob_worker_processor.cpp index 6ea7a19147..1ee4b4992a 100644 --- a/src/observer/omt/ob_worker_processor.cpp +++ b/src/observer/omt/ob_worker_processor.cpp @@ -49,12 +49,28 @@ void ObWorkerProcessor::th_destroy() translator_.th_destroy(); } +#ifdef ERRSIM +ERRSIM_POINT_DEF(EN_WORKER_PROCESS_REQUEST) +#endif + +int ObWorkerProcessor::process_err_test() +{ + int ret = OB_SUCCESS; + +#ifdef ERRSIM + ret = EN_WORKER_PROCESS_REQUEST; +#endif + return ret; +} + inline int ObWorkerProcessor::process_one(rpc::ObRequest &req) { int ret = OB_SUCCESS; ObReqProcessor *processor = NULL; - if (OB_FAIL(translator_.translate(req, processor))) { + if (OB_FAIL(process_err_test())) { + LOG_WARN("ignore request with err_test", K(ret)); + } else if (OB_FAIL(translator_.translate(req, processor))) { LOG_WARN("translate request fail", K(ret)); on_translate_fail(&req, ret); } else if (OB_ISNULL(processor)) { diff --git a/src/observer/omt/ob_worker_processor.h b/src/observer/omt/ob_worker_processor.h index 7de50bd829..685c75c990 100644 --- a/src/observer/omt/ob_worker_processor.h +++ b/src/observer/omt/ob_worker_processor.h @@ -35,6 +35,8 @@ public: virtual int process(rpc::ObRequest &req); +public: + OB_NOINLINE int process_err_test(); private: int process_one(rpc::ObRequest &req); diff --git a/src/observer/virtual_table/ob_all_virtual_arbitration_member_info.h b/src/observer/virtual_table/ob_all_virtual_arbitration_member_info.h index 26b3fc163c..da25d9e560 100644 --- a/src/observer/virtual_table/ob_all_virtual_arbitration_member_info.h +++ b/src/observer/virtual_table/ob_all_virtual_arbitration_member_info.h @@ -42,7 +42,7 @@ private: char role_str_[VARCHAR_32] = {'\0'}; char access_mode_str_[VARCHAR_32] = {'\0'}; char ip_[common::OB_IP_PORT_STR_BUFF] = {'\0'}; - char member_list_buf_[MAX_MEMBER_LIST_LENGTH] = {'\0'}; + ObSqlString member_list_buf_; char arbitration_member_buf_[MAX_SINGLE_MEMBER_LENGTH] = {'\0'}; char degraded_list_buf_[MAX_LEARNER_LIST_LENGTH] = {'\0'}; char config_version_buf_[VARCHAR_128] = {'\0'}; diff --git a/src/observer/virtual_table/ob_all_virtual_dup_ls_lease_mgr.cpp b/src/observer/virtual_table/ob_all_virtual_dup_ls_lease_mgr.cpp new file mode 100644 index 0000000000..da06d683ec --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_dup_ls_lease_mgr.cpp @@ -0,0 +1,227 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "observer/virtual_table/ob_all_virtual_dup_ls_lease_mgr.h" +#include "observer/ob_server.h" + +using namespace oceanbase::common; +using namespace oceanbase::transaction; + +namespace oceanbase +{ +namespace observer +{ +void ObAllVirtualDupLSLeaseMgr::reset() +{ + memset(ip_buffer_, 0, sizeof(ip_buffer_)); + memset(follower_ip_buffer_, 0, sizeof(follower_ip_buffer_)); + + ObVirtualTableScannerIterator::reset(); + dup_ls_lease_mgr_stat_iter_.reset(); + all_tenants_.reset(); + self_addr_.reset(); + init_ = false; +} + +void ObAllVirtualDupLSLeaseMgr::destroy() +{ + reset(); +} + +int ObAllVirtualDupLSLeaseMgr::prepare_start_to_read_() +{ + int ret = OB_SUCCESS; + dup_ls_lease_mgr_stat_iter_.reset(); + if (OB_ISNULL(allocator_)) { + SERVER_LOG(WARN, "invalid argument, allocator_ is null", KP(allocator_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(fill_tenant_ids_())) { + SERVER_LOG(WARN, "fail to fill tenant ids", K(ret)); + } else { + for (int i = 0; i < all_tenants_.count() && OB_SUCC(ret); i++) { + int64_t cur_tenant_id = all_tenants_.at(i); + MTL_SWITCH(cur_tenant_id) { + transaction::ObTransService *txs = MTL(transaction::ObTransService*); + if (OB_ISNULL(txs)) { + ret = OB_INVALID_ARGUMENT; + SERVER_LOG(WARN, "invalid argument, txs is null", KP(txs)); + } else if (OB_FAIL(txs->get_dup_table_loop_worker(). + iterate_dup_ls(dup_ls_lease_mgr_stat_iter_))) { + if (OB_NOT_INIT == ret ) { + ret = OB_SUCCESS; + } else { + SERVER_LOG(WARN, "collect dup ls lease mgr failed", K(ret), K(cur_tenant_id)); + } + } + } + } + } + if (OB_FAIL(ret)) { + + } else if (OB_FAIL(dup_ls_lease_mgr_stat_iter_.set_ready())) { // set ready for the first count + SERVER_LOG(WARN, "dup_ls_iter set ready error", K(ret)); + } else { + start_to_read_ = true; + } + + return ret; +} + +int ObAllVirtualDupLSLeaseMgr::init(const common::ObAddr &addr) +{ + int ret = OB_SUCCESS; + + if (init_) { + ret = OB_INIT_TWICE; + SERVER_LOG(WARN, "init twice", K(ret)); + } else { + init_ = true; + self_addr_ = addr; + + if (false == self_addr_.ip_to_string(ip_buffer_, common::OB_IP_STR_BUFF)) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "fail to execute ip_to_string", K(ret)); + } + } + + return ret; +} + +int ObAllVirtualDupLSLeaseMgr::fill_tenant_ids_() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(!init_)) { + ret = OB_NOT_INIT; + SERVER_LOG(WARN, "not init", K(ret)); + } else if (OB_INVALID_TENANT_ID == effective_tenant_id_) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(ERROR, "invalid tenant id", K(ret), K_(effective_tenant_id)); + } else if (OB_ISNULL(GCTX.omt_)) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "fail to get multi tenant from GCTX", K(ret)); + } else { + omt::TenantIdList tmp_all_tenants; + tmp_all_tenants.set_label(ObModIds::OB_TENANT_ID_LIST); + GCTX.omt_->get_tenant_ids(tmp_all_tenants); + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_all_tenants.size(); ++i) { + uint64_t tenant_id = tmp_all_tenants[i]; + if (!is_virtual_tenant_id(tenant_id) && // skip virtual tenant + (is_sys_tenant(effective_tenant_id_) || tenant_id == effective_tenant_id_)) { + if (OB_FAIL(all_tenants_.push_back(tenant_id))) { + SERVER_LOG(WARN, "fail to push back tenant id", K(ret), K(tenant_id)); + } + } + } + SERVER_LOG(INFO, "succeed to get tenant ids", K(all_tenants_)); + } + + return ret; +} + +int ObAllVirtualDupLSLeaseMgr::inner_get_next_row(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + ObDupTableLSLeaseMgrStat lease_mgr_stat; + + if (!start_to_read_ && OB_FAIL(prepare_start_to_read_())) { + SERVER_LOG(WARN, "prepare start to read error", K(ret), K(start_to_read_)); + } else if (OB_FAIL(dup_ls_lease_mgr_stat_iter_.get_next(lease_mgr_stat))) { + if (OB_ITER_END != ret) { + SERVER_LOG(WARN, "ObAllVirtualDupLSLeaseMgr iter end", K(ret)); + } + } else { + const int64_t col_count = output_column_ids_.count(); + for (int64_t i = 0; OB_SUCC(ret) && i < col_count; ++i) { + uint64_t col_id = output_column_ids_.at(i); + switch (col_id) { + case TENANT_ID: + cur_row_.cells_[i].set_int(lease_mgr_stat.get_tenant_id()); + break; + case LS_ID: + cur_row_.cells_[i].set_int(lease_mgr_stat.get_ls_id().id()); + break; + case SVR_IP: + cur_row_.cells_[i].set_varchar(ip_buffer_); + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + case SVR_PORT: + cur_row_.cells_[i].set_int(self_addr_.get_port()); + break; + case FOLLOWER_IP: + if (lease_mgr_stat.get_follower_addr().ip_to_string(follower_ip_buffer_, common::OB_IP_STR_BUFF)) { + cur_row_.cells_[i].set_varchar(follower_ip_buffer_); + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + } else { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "fail to execute ip_to_string", K(ret)); + } + break; + case FOLLOWER_PORT: + cur_row_.cells_[i].set_int(lease_mgr_stat.get_follower_addr().get_port()); + break; + case GRANT_TIMESTAMP: + if (is_valid_timestamp_(lease_mgr_stat.get_grant_ts())) { + cur_row_.cells_[i].set_timestamp(lease_mgr_stat.get_grant_ts()); + } + break; + case EXPIRED_TIMESTAMP: + if (is_valid_timestamp_(lease_mgr_stat.get_expired_ts())) { + cur_row_.cells_[i].set_timestamp(lease_mgr_stat.get_expired_ts()); + } + break; + case REMAIN_US: + cur_row_.cells_[i].set_int(lease_mgr_stat.get_remain_us()); + break; + case GRANT_REQ_TS: + cur_row_.cells_[i].set_int(lease_mgr_stat.get_grant_req_ts()); + break; + case CACHED_REQ_TS: + cur_row_.cells_[i].set_int(lease_mgr_stat.get_cached_req_ts()); + break; + case LEASE_INTERVAL_US: + cur_row_.cells_[i].set_int(lease_mgr_stat.get_lease_interval()); + break; + case MAX_REPLAYED_LOG_SCN: + cur_row_.cells_[i].set_int(lease_mgr_stat.get_max_replayed_scn().convert_to_ts(true /* ignore invalid */)); + break; + case MAX_READ_VERSION: + cur_row_.cells_[i].set_int(lease_mgr_stat.get_max_read_version()); + break; + case MAX_COMMIT_VERSION: + cur_row_.cells_[i].set_int(lease_mgr_stat.get_max_commit_version()); + break; + default: + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "invalid coloum_id", K(ret), K(col_id)); + break; + } + } + } + if (OB_SUCC(ret)) { + row = &cur_row_; + } + + return ret; +} + +bool ObAllVirtualDupLSLeaseMgr::is_valid_timestamp_(const int64_t timestamp) const +{ + bool ret_bool = true; + if (INT64_MAX == timestamp || 0 > timestamp) { + ret_bool = false; + } + return ret_bool; +} + +}/* ns observer*/ +}/* ns oceanbase */ diff --git a/src/observer/virtual_table/ob_all_virtual_dup_ls_lease_mgr.h b/src/observer/virtual_table/ob_all_virtual_dup_ls_lease_mgr.h new file mode 100644 index 0000000000..4d8805bf87 --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_dup_ls_lease_mgr.h @@ -0,0 +1,75 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_ALL_VIRTUAL_DUP_LS_LEASE_MGR_H_ +#define OB_ALL_VIRTUAL_DUP_LS_LEASE_MGR_H_ + +#include "share/ob_virtual_table_scanner_iterator.h" +#include "share/ob_scanner.h" +#include "common/row/ob_row.h" +#include "lib/container/ob_se_array.h" +#include "common/ob_simple_iterator.h" +#include "storage/tx/ob_dup_table_stat.h" + +namespace oceanbase +{ +namespace observer +{ +class ObAllVirtualDupLSLeaseMgr: public common::ObVirtualTableScannerIterator +{ +public: + explicit ObAllVirtualDupLSLeaseMgr() { reset(); } + virtual ~ObAllVirtualDupLSLeaseMgr() { destroy(); } +public: + int init(const common::ObAddr &addr); + virtual int inner_get_next_row(common::ObNewRow *&row); + virtual void reset(); + virtual void destroy(); + +private: + int prepare_start_to_read_(); + int fill_tenant_ids_(); + bool is_valid_timestamp_(const int64_t timestamp) const; +private: + enum + { + TENANT_ID = common::OB_APP_MIN_COLUMN_ID, + LS_ID, + SVR_IP, + SVR_PORT, + FOLLOWER_IP, + FOLLOWER_PORT, + GRANT_TIMESTAMP, + EXPIRED_TIMESTAMP, + REMAIN_US, + LEASE_INTERVAL_US, + GRANT_REQ_TS, + CACHED_REQ_TS, + MAX_REPLAYED_LOG_SCN, + MAX_READ_VERSION, + MAX_COMMIT_VERSION, + }; + char ip_buffer_[common::OB_IP_STR_BUFF]; + char follower_ip_buffer_[common::OB_IP_STR_BUFF]; + +private: + bool init_; + transaction::ObDupLSLeaseMgrStatIterator dup_ls_lease_mgr_stat_iter_; + common::ObArray all_tenants_; + common::ObAddr self_addr_; +private: + DISALLOW_COPY_AND_ASSIGN(ObAllVirtualDupLSLeaseMgr); +}; + +} //transaction +} //oceanbase +#endif /* OB_ALL_VIRTUAL_DUP_DUP_LS_LEASE_MGR_H */ \ No newline at end of file diff --git a/src/observer/virtual_table/ob_all_virtual_dup_ls_tablet_set.cpp b/src/observer/virtual_table/ob_all_virtual_dup_ls_tablet_set.cpp new file mode 100644 index 0000000000..9c6f7817f5 --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_dup_ls_tablet_set.cpp @@ -0,0 +1,218 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "observer/virtual_table/ob_all_virtual_dup_ls_tablet_set.h" + +#include "observer/ob_server.h" +#include "storage/tx/ob_trans_service.h" + +using namespace oceanbase::common; +using namespace oceanbase::transaction; + +namespace oceanbase +{ +namespace observer +{ +void ObAllVirtualDupLSTabletSet::reset() +{ + memset(ip_buffer_, 0, sizeof(ip_buffer_)); + + ObVirtualTableScannerIterator::reset(); + dup_ls_tablet_set_stat_iter_.reset(); + all_tenants_.reset(); + self_addr_.reset(); + init_ = false; +} + +void ObAllVirtualDupLSTabletSet::destroy() +{ + reset(); +} + +int ObAllVirtualDupLSTabletSet::prepare_start_to_read_() +{ + int ret = OB_SUCCESS; + dup_ls_tablet_set_stat_iter_.reset(); + if (OB_ISNULL(allocator_)) { + SERVER_LOG(WARN, "invalid argument, allocator_", KP(allocator_)); + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(fill_tenant_ids_())) { + SERVER_LOG(WARN, "fail to fill tenant ids", K(ret)); + } else { + for (int i = 0; i < all_tenants_.count() && OB_SUCC(ret); i++) { + int64_t cur_tenant_id = all_tenants_.at(i); + MTL_SWITCH(cur_tenant_id) { + transaction::ObTransService *txs = MTL(transaction::ObTransService*); + if (OB_FAIL(txs->get_dup_table_loop_worker(). + iterate_dup_ls(dup_ls_tablet_set_stat_iter_))) { + if (OB_NOT_INIT == ret) { + ret = OB_SUCCESS; + } + SERVER_LOG(WARN, "collect tablet set info failed", KR(ret)); + } + } + } + } + if (OB_FAIL(ret)) { + SERVER_LOG(WARN, "prepare start to read failed", KR(ret)); + } else if (OB_FAIL(dup_ls_tablet_set_stat_iter_.set_ready())) { // set ready for the first count + SERVER_LOG(WARN, "ObTransStatIterator set ready error", KR(ret)); + } else { + start_to_read_ = true; + } + + return ret; +} + +int ObAllVirtualDupLSTabletSet::init(const common::ObAddr &addr) +{ + int ret = OB_SUCCESS; + if (init_) { + ret = OB_INIT_TWICE; + SERVER_LOG(WARN, "init twice", K(ret)); + } else { + init_ = true; + self_addr_ = addr; + // init addr + if (false == self_addr_.ip_to_string(ip_buffer_, common::OB_IP_STR_BUFF)) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "fail to execute ip_to_string", K(ret)); + } + } + + return ret; +} + +int ObAllVirtualDupLSTabletSet::fill_tenant_ids_() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(!init_)) { + ret = OB_NOT_INIT; + SERVER_LOG(WARN, "not init", K(ret)); + } else if (OB_INVALID_TENANT_ID == effective_tenant_id_) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(ERROR, "invalid tenant id", K(ret), K_(effective_tenant_id)); + } else if (OB_ISNULL(GCTX.omt_)) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "fail to get multi tenant from GCTX", K(ret)); + } else { + omt::TenantIdList tmp_all_tenants; + tmp_all_tenants.set_label(ObModIds::OB_TENANT_ID_LIST); + GCTX.omt_->get_tenant_ids(tmp_all_tenants); + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_all_tenants.size(); ++i) { + uint64_t tenant_id = tmp_all_tenants[i]; + if (!is_virtual_tenant_id(tenant_id) && // skip virtual tenant + (is_sys_tenant(effective_tenant_id_) || tenant_id == effective_tenant_id_)) { + if (OB_FAIL(all_tenants_.push_back(tenant_id))) { + SERVER_LOG(WARN, "fail to push back tenant id", K(ret), K(tenant_id)); + } + } + } + SERVER_LOG(INFO, "succeed to get tenant ids", K(all_tenants_)); + } + + return ret; +} + +int ObAllVirtualDupLSTabletSet::inner_get_next_row(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + ObDupTableLSTabletSetStat tablet_set_stat; + + if (!start_to_read_ && OB_FAIL(prepare_start_to_read_())) { + SERVER_LOG(WARN, "prepare start to read error", K(ret), K(start_to_read_)); + } else if (OB_FAIL(dup_ls_tablet_set_stat_iter_.get_next(tablet_set_stat))) { + if (OB_ITER_END != ret) { + SERVER_LOG(WARN, "iter error", KR(ret)); + } + } else { + const int64_t col_count = output_column_ids_.count(); + int64_t tmp_ts_us = 0; + + for (int64_t i = 0; OB_SUCC(ret) && i < col_count; ++i) { + uint64_t col_id = output_column_ids_.at(i); + switch (col_id) { + case TENANT_ID: + cur_row_.cells_[i].set_int(tablet_set_stat.get_tenant_id()); + break; + case LS_ID: + cur_row_.cells_[i].set_int(tablet_set_stat.get_ls_id().id()); + break; + case SVR_IP: + cur_row_.cells_[i].set_varchar(ip_buffer_); + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + case SVR_PORT: + cur_row_.cells_[i].set_int(self_addr_.get_port()); + break; + case LS_STATE: + cur_row_.cells_[i].set_varchar(tablet_set_stat.get_ls_state_str().ptr()); + break; + case UNIQUE_ID: + cur_row_.cells_[i].set_int(tablet_set_stat.get_unique_id()); + break; + case ATTRIBUTE: + cur_row_.cells_[i].set_varchar(tablet_set_stat.get_tablet_set_attr_str().ptr()); + break; + case COUNT: + cur_row_.cells_[i].set_int(tablet_set_stat.get_count()); + break; + case READABLE_SCN: + tmp_ts_us = tablet_set_stat.get_readable_scn().convert_to_ts(true /*ignore invalid*/); + if (is_valid_timestamp_(tmp_ts_us)) { + cur_row_.cells_[i].set_int(tmp_ts_us); + } + break; + case CHANGE_SCN: + tmp_ts_us = tablet_set_stat.get_change_scn().convert_to_ts(true /*ignore invalid*/); + if (is_valid_timestamp_(tmp_ts_us)) { + cur_row_.cells_[i].set_int(tmp_ts_us); + } + break; + case NEED_CONFIRM_SCN: + tmp_ts_us = tablet_set_stat.get_need_confirm_scn().convert_to_ts(true /*ignore invalid*/); + if (is_valid_timestamp_(tmp_ts_us)) { + cur_row_.cells_[i].set_int(tmp_ts_us); + } + break; + case STATE: + cur_row_.cells_[i].set_varchar(tablet_set_stat.get_tablet_set_state_str().ptr()); + break; + case TRX_REF: + cur_row_.cells_[i].set_int(tablet_set_stat.get_trx_ref()); + break; + default: + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "invalid coloum_id", K(ret), K(col_id)); + break; + } + } + } + if (OB_SUCC(ret)) { + row = &cur_row_; + } + + return ret; +} + +bool ObAllVirtualDupLSTabletSet::is_valid_timestamp_(const int64_t timestamp) const +{ + bool ret_bool = true; + if (INT64_MAX == timestamp || 0 > timestamp) { + ret_bool = false; + } + return ret_bool; +} + +}/* ns observer*/ +}/* ns oceanbase */ diff --git a/src/observer/virtual_table/ob_all_virtual_dup_ls_tablet_set.h b/src/observer/virtual_table/ob_all_virtual_dup_ls_tablet_set.h new file mode 100644 index 0000000000..6474880765 --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_dup_ls_tablet_set.h @@ -0,0 +1,72 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_ALL_VIRTUAL_DUP_LS_TABLET_SET_ +#define OB_ALL_VIRTUAL_DUP_LS_TABLET_SET_ + +#include "share/ob_virtual_table_scanner_iterator.h" +#include "share/ob_scanner.h" +#include "common/row/ob_row.h" +#include "lib/container/ob_se_array.h" +#include "common/ob_simple_iterator.h" +#include "storage/tx/ob_trans_ctx_mgr_v4.h" +#include "storage/tx/ob_dup_table_stat.h" + +namespace oceanbase +{ +namespace observer +{ +class ObAllVirtualDupLSTabletSet: public common::ObVirtualTableScannerIterator +{ +public: + explicit ObAllVirtualDupLSTabletSet() { reset(); } + virtual ~ObAllVirtualDupLSTabletSet() { destroy(); } +public: + int init(const common::ObAddr &addr); + virtual int inner_get_next_row(common::ObNewRow *&row); + virtual void reset(); + virtual void destroy(); +private: + int prepare_start_to_read_(); + int fill_tenant_ids_(); + bool is_valid_timestamp_(const int64_t timestamp) const; +private: + enum + { + TENANT_ID = common::OB_APP_MIN_COLUMN_ID, + LS_ID, + SVR_IP, + SVR_PORT, + LS_STATE, + UNIQUE_ID, + ATTRIBUTE, + COUNT, + READABLE_SCN, + CHANGE_SCN, + NEED_CONFIRM_SCN, + STATE, + TRX_REF, + }; + char ip_buffer_[common::OB_IP_STR_BUFF]; + +private: + bool init_; + transaction::ObDupLSTabletSetStatIterator dup_ls_tablet_set_stat_iter_; + common::ObArray all_tenants_; + common::ObAddr self_addr_; +private: + DISALLOW_COPY_AND_ASSIGN(ObAllVirtualDupLSTabletSet); +}; + +} //transaction +} //oceanbase +#endif /* OB_ALL_VIRTUAL_TRANS_STAT_H */ \ No newline at end of file diff --git a/src/observer/virtual_table/ob_all_virtual_dup_ls_tablets.cpp b/src/observer/virtual_table/ob_all_virtual_dup_ls_tablets.cpp new file mode 100644 index 0000000000..a68b717b7d --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_dup_ls_tablets.cpp @@ -0,0 +1,195 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "observer/virtual_table/ob_all_virtual_dup_ls_tablets.h" +#include "observer/ob_server.h" + +using namespace oceanbase::common; +using namespace oceanbase::transaction; + +namespace oceanbase +{ +namespace observer +{ +void ObAllVirtualDupLSTablets::reset() +{ + memset(ip_buffer_, 0, sizeof(ip_buffer_)); + + ObVirtualTableScannerIterator::reset(); + dup_ls_tablets_iter_.reset(); + all_tenants_.reset(); + self_addr_.reset(); + init_ = false; +} + +void ObAllVirtualDupLSTablets::destroy() +{ + reset(); +} + +int ObAllVirtualDupLSTablets::prepare_start_to_read_() +{ + int ret = OB_SUCCESS; + dup_ls_tablets_iter_.reset(); + if (OB_ISNULL(allocator_)) { + ret = OB_INVALID_ARGUMENT; + SERVER_LOG(WARN, "invalid argument, allocator_ is null", K(ret), KP(allocator_)); + } else if (OB_FAIL(fill_tenant_ids_())) { + SERVER_LOG(WARN, "fail to fill tenant ids", K(ret)); + } else { + for (int i = 0; i < all_tenants_.count() && OB_SUCC(ret); i++) { + int64_t cur_tenant_id = all_tenants_.at(i); + MTL_SWITCH(cur_tenant_id) { + transaction::ObTransService *txs = MTL(transaction::ObTransService*); + if (OB_ISNULL(txs)) { + ret = OB_INVALID_ARGUMENT; + SERVER_LOG(WARN, "invalid argument, allocator_", KR(ret), KP(txs)); + } else if (OB_FAIL(txs->get_dup_table_loop_worker(). + iterate_dup_ls(dup_ls_tablets_iter_))) { + SERVER_LOG(WARN, "iterate dup ls for collect tabltes stat failed", KR(ret), KP(txs)); + } + } + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(dup_ls_tablets_iter_.set_ready())) { // set ready for the first count + SERVER_LOG(WARN, "Iterator set ready error", KR(ret)); + } else { + start_to_read_ = true; + } + + return ret; +} + +int ObAllVirtualDupLSTablets::init(const common::ObAddr &addr) +{ + int ret = OB_SUCCESS; + if (init_) { + ret = OB_INIT_TWICE; + SERVER_LOG(WARN, "init twice", KR(ret)); + } else { + init_ = true; + self_addr_ = addr; + + if (false == self_addr_.ip_to_string(ip_buffer_, common::OB_IP_STR_BUFF)) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "fail to execute ip_to_string", K(ret)); + } + } + return ret; +} + +int ObAllVirtualDupLSTablets::fill_tenant_ids_() +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(!init_)) { + ret = OB_NOT_INIT; + SERVER_LOG(WARN, "not init", KR(ret)); + } else if (OB_INVALID_TENANT_ID == effective_tenant_id_) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(ERROR, "invalid tenant id", KR(ret), K_(effective_tenant_id)); + } else if (OB_ISNULL(GCTX.omt_)) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "fail to get multi tenant from GCTX", KR(ret)); + } else { + omt::TenantIdList tmp_all_tenants; + tmp_all_tenants.set_label(ObModIds::OB_TENANT_ID_LIST); + GCTX.omt_->get_tenant_ids(tmp_all_tenants); + for (int64_t i = 0; OB_SUCC(ret) && i < tmp_all_tenants.size(); ++i) { + uint64_t tenant_id = tmp_all_tenants[i]; + if (!is_virtual_tenant_id(tenant_id) && // skip virtual tenant + (is_sys_tenant(effective_tenant_id_) || tenant_id == effective_tenant_id_)) { + if (OB_FAIL(all_tenants_.push_back(tenant_id))) { + SERVER_LOG(WARN, "fail to push back tenant id", K(ret), K(tenant_id)); + } + } + } + SERVER_LOG(INFO, "succeed to get tenant ids", K(all_tenants_)); + } + + return ret; +} + +int ObAllVirtualDupLSTablets::inner_get_next_row(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + ObDupTableLSTabletsStat tablet_stat; + + if (!start_to_read_ && OB_FAIL(prepare_start_to_read_())) { + SERVER_LOG(WARN, "prepare start to read error", K(ret), K(start_to_read_)); + } else if (OB_FAIL(dup_ls_tablets_iter_.get_next(tablet_stat))) { + if (OB_ITER_END != ret) { + SERVER_LOG(WARN, "ObAllVirtualDupLSTablets iter error", KR(ret)); + } + } else { + const int64_t col_count = output_column_ids_.count(); + + for (int64_t i = 0; OB_SUCC(ret) && i < col_count; ++i) { + uint64_t col_id = output_column_ids_.at(i); + switch (col_id) { + case TENANT_ID: + cur_row_.cells_[i].set_int(tablet_stat.get_tenant_id()); + break; + case LS_ID: + cur_row_.cells_[i].set_int(tablet_stat.get_ls_id().id()); + break; + case SVR_IP: + cur_row_.cells_[i].set_varchar(ip_buffer_); + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + case SVR_PORT: + cur_row_.cells_[i].set_int(self_addr_.get_port()); + break; + case LS_STATE: + cur_row_.cells_[i].set_varchar(tablet_stat.get_ls_state_str().ptr()); + break; + case TABLET_ID: + cur_row_.cells_[i].set_uint64(tablet_stat.get_tablet_id().id()); + break; + case UNIQUE_ID: + cur_row_.cells_[i].set_int(tablet_stat.get_unique_id()); + break; + case ATTRIBUTE: + cur_row_.cells_[i].set_varchar(tablet_stat.get_tablet_set_attr_str().ptr()); + break; + case REFRESH_SCHEMA_TIMESTAMP: + if (is_valid_timestamp_(tablet_stat.get_refresh_schema_ts())) { + cur_row_.cells_[i].set_timestamp(tablet_stat.get_refresh_schema_ts()); + } + break; + default: + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "invalid coloum_id", K(ret), K(col_id)); + break; + } + } + } + if (OB_SUCC(ret)) { + row = &cur_row_; + } + + return ret; +} + +bool ObAllVirtualDupLSTablets::is_valid_timestamp_(const int64_t timestamp) const +{ + bool ret_bool = true; + if (INT64_MAX == timestamp || 0 > timestamp) { + ret_bool = false; + } + return ret_bool; +} + +}/* ns observer*/ +}/* ns oceanbase */ diff --git a/src/observer/virtual_table/ob_all_virtual_dup_ls_tablets.h b/src/observer/virtual_table/ob_all_virtual_dup_ls_tablets.h new file mode 100644 index 0000000000..8138ddcc94 --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_dup_ls_tablets.h @@ -0,0 +1,67 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#ifndef OB_ALL_VIRTUAL_DUP_LS_TABLETS_H_ +#define OB_ALL_VIRTUAL_DUP_LS_TABLETS_H_ + +#include "share/ob_virtual_table_scanner_iterator.h" +#include "share/ob_scanner.h" +#include "common/row/ob_row.h" +#include "lib/container/ob_se_array.h" +#include "common/ob_simple_iterator.h" +#include "storage/tx/ob_dup_table_stat.h" + +namespace oceanbase +{ +namespace observer +{ +class ObAllVirtualDupLSTablets: public common::ObVirtualTableScannerIterator +{ +public: + explicit ObAllVirtualDupLSTablets() { reset(); } + virtual ~ObAllVirtualDupLSTablets() { destroy(); } +public: + int init(const common::ObAddr &addr); + virtual int inner_get_next_row(common::ObNewRow *&row); + virtual void reset(); + virtual void destroy(); +private: + int prepare_start_to_read_(); + int fill_tenant_ids_(); + bool is_valid_timestamp_(const int64_t timestamp) const; +private: + enum + { + TENANT_ID = common::OB_APP_MIN_COLUMN_ID, + LS_ID, + SVR_IP, + SVR_PORT, + LS_STATE, + TABLET_ID, + UNIQUE_ID, + ATTRIBUTE, + REFRESH_SCHEMA_TIMESTAMP, + }; + char ip_buffer_[common::OB_IP_STR_BUFF]; + +private: + bool init_; + transaction::ObDupLSTabletsStatIterator dup_ls_tablets_iter_; + common::ObArray all_tenants_; + common::ObAddr self_addr_; +private: + DISALLOW_COPY_AND_ASSIGN(ObAllVirtualDupLSTablets); +}; + +} //transaction +} //oceanbase +#endif /* OB_ALL_VIRTUAL_DUP_LS_TABLETS_H_ */ \ No newline at end of file diff --git a/src/observer/virtual_table/ob_all_virtual_log_stat.cpp b/src/observer/virtual_table/ob_all_virtual_log_stat.cpp index 1810656a1e..0b3438b6f6 100644 --- a/src/observer/virtual_table/ob_all_virtual_log_stat.cpp +++ b/src/observer/virtual_table/ob_all_virtual_log_stat.cpp @@ -165,7 +165,7 @@ int ObAllVirtualPalfStat::insert_log_stat_(const logservice::ObLogStat &log_stat if (OB_FAIL(member_list_to_string_(palf_stat.paxos_member_list_))) { SERVER_LOG(WARN, "memberlist to_string failed", K(ret), K(palf_stat)); } else { - cur_row_.cells_[i].set_varchar(ObString::make_string(member_list_buf_)); + cur_row_.cells_[i].set_varchar(member_list_buf_.string()); cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation( ObCharset::get_default_charset())); } @@ -225,7 +225,7 @@ int ObAllVirtualPalfStat::insert_log_stat_(const logservice::ObLogStat &log_stat break; } case OB_APP_MIN_COLUMN_ID + 19: { - if (OB_FAIL(learner_list_to_string_(palf_stat.degraded_list_))) { + if (OB_FAIL(learner_list_to_string_(palf_stat.degraded_list_, degraded_list_buf_))) { SERVER_LOG(WARN, "learner list to_string failed", K(ret), K(palf_stat)); } else { cur_row_.cells_[i].set_varchar(ObString::make_string(degraded_list_buf_)); @@ -234,6 +234,17 @@ int ObAllVirtualPalfStat::insert_log_stat_(const logservice::ObLogStat &log_stat } break; } + case OB_APP_MIN_COLUMN_ID + 20: { + if (OB_FAIL(learner_list_to_string_(palf_stat.learner_list_, learner_list_buf_))) { + SERVER_LOG(WARN, "learner list to_string failed", K(ret), K(palf_stat)); + } else { + ObString learner_list_str = ObString::make_string(learner_list_buf_); + cur_row_.cells_[i].set_lob_value(ObLongTextType, learner_list_str.ptr(), static_cast(learner_list_str.length())); + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation( + ObCharset::get_default_charset())); + } + break; + } } } return ret; @@ -284,49 +295,46 @@ int ObAllVirtualPalfStat::member_list_to_string_( SERVER_LOG(WARN, "fail to transform member_list", KR(ret), K(member_list)); } else if (OB_FAIL(share::ObLSReplica::member_list2text( tmp_member_list, - member_list_buf_, - MAX_MEMBER_LIST_LENGTH))) { + member_list_buf_))) { SERVER_LOG(WARN, "member_list2text failed", KR(ret), - K(member_list), K(tmp_member_list), K_(member_list_buf)); + K(member_list), K(tmp_member_list), K(member_list_buf_)); } return ret; } int ObAllVirtualPalfStat::learner_list_to_string_( - const common::GlobalLearnerList &learner_list) + const common::GlobalLearnerList &learner_list, + char *output_buf) { int ret = OB_SUCCESS; int64_t pos = 0; char buf[MAX_IP_PORT_LENGTH]; - if (learner_list.get_member_number() == 0) { - memset(degraded_list_buf_, 0, MAX_LEARNER_LIST_LENGTH); - } else { - const int64_t count = learner_list.get_member_number(); - ObMember tmp_learner; - for (int64_t i = 0; i < count && (OB_SUCCESS == ret); ++i) { - if (OB_FAIL(learner_list.get_learner(i, tmp_learner))) { - SERVER_LOG(WARN, "get_learner failed", KR(ret), K(i)); - } - if (0 != pos) { - if (pos + 1 < MAX_LEARNER_LIST_LENGTH) { - degraded_list_buf_[pos++] = ','; - } else { - ret = OB_BUF_NOT_ENOUGH; - SERVER_LOG(WARN, "buffer not enough", KR(ret), K(pos)); - } - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(tmp_learner.get_server().ip_port_to_string(buf, sizeof(buf)))) { - SERVER_LOG(WARN, "convert server to string failed", KR(ret), K(tmp_learner)); + memset(output_buf, 0, MAX_LEARNER_LIST_LENGTH); + const int64_t count = learner_list.get_member_number(); + ObMember tmp_learner; + for (int64_t i = 0; i < count && (OB_SUCCESS == ret); ++i) { + if (OB_FAIL(learner_list.get_learner(i, tmp_learner))) { + SERVER_LOG(WARN, "get_learner failed", KR(ret), K(i)); + } + if (0 != pos) { + if (pos + 1 < MAX_LEARNER_LIST_LENGTH) { + output_buf[pos++] = ','; } else { - int n = snprintf(degraded_list_buf_ + pos, MAX_LEARNER_LIST_LENGTH - pos, \ - "%s:%ld", buf, tmp_learner.get_timestamp()); - if (n < 0 || n >= MAX_LEARNER_LIST_LENGTH - pos) { - ret = OB_BUF_NOT_ENOUGH; - SERVER_LOG(WARN, "snprintf error or buf not enough", KR(ret), K(n), K(pos)); - } else { - pos += n; - } + ret = OB_BUF_NOT_ENOUGH; + SERVER_LOG(WARN, "buffer not enough", KR(ret), K(pos)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(tmp_learner.get_server().ip_port_to_string(buf, sizeof(buf)))) { + SERVER_LOG(WARN, "convert server to string failed", KR(ret), K(tmp_learner)); + } else { + int n = snprintf(output_buf + pos, MAX_LEARNER_LIST_LENGTH - pos, \ + "%s:%ld", buf, tmp_learner.get_timestamp()); + if (n < 0 || n >= MAX_LEARNER_LIST_LENGTH - pos) { + ret = OB_BUF_NOT_ENOUGH; + SERVER_LOG(WARN, "snprintf error or buf not enough", KR(ret), K(n), K(pos)); + } else { + pos += n; } } } diff --git a/src/observer/virtual_table/ob_all_virtual_log_stat.h b/src/observer/virtual_table/ob_all_virtual_log_stat.h index 2e4de61225..8ef38e5950 100644 --- a/src/observer/virtual_table/ob_all_virtual_log_stat.h +++ b/src/observer/virtual_table/ob_all_virtual_log_stat.h @@ -39,7 +39,7 @@ private: int insert_log_stat_(const logservice::ObLogStat &log_stat, common::ObNewRow *row); int get_log_handler_stat_(const palf::PalfStat &palf_stat, logservice::ObLogStat &log_stat); int member_list_to_string_(const common::ObMemberList &member_list); - int learner_list_to_string_(const common::GlobalLearnerList &learner_list); + int learner_list_to_string_(const common::GlobalLearnerList &learner_list, char *output_buf); private: static const int64_t VARCHAR_32 = 32; static const int64_t VARCHAR_64 = 64; @@ -47,11 +47,12 @@ private: char role_str_[VARCHAR_32] = {'\0'}; char access_mode_str_[VARCHAR_32] = {'\0'}; char ip_[common::OB_IP_PORT_STR_BUFF] = {'\0'}; - char member_list_buf_[MAX_MEMBER_LIST_LENGTH] = {'\0'}; + ObSqlString member_list_buf_; char arbitration_member_buf_[MAX_SINGLE_MEMBER_LENGTH] = {'\0'}; char degraded_list_buf_[MAX_LEARNER_LIST_LENGTH] = {'\0'}; char config_version_buf_[VARCHAR_128] = {'\0'}; char replica_type_str_[VARCHAR_32] = {'\0'}; + char learner_list_buf_[MAX_LEARNER_LIST_LENGTH] = {'\0'}; omt::ObMultiTenant *omt_; }; }//namespace observer diff --git a/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp b/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp index 3764a2d7b1..591e231be6 100644 --- a/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp +++ b/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp @@ -199,8 +199,11 @@ #include "observer/virtual_table/ob_all_virtual_archive_dest_status.h" #include "observer/virtual_table/ob_virtual_show_trace.h" #include "observer/virtual_table/ob_all_virtual_sql_plan.h" +#include "observer/virtual_table/ob_all_virtual_dup_ls_lease_mgr.h" +#include "observer/virtual_table/ob_all_virtual_dup_ls_tablets.h" #include "observer/virtual_table/ob_all_virtual_opt_stat_gather_monitor.h" #include "observer/virtual_table/ob_all_virtual_thread.h" +#include "observer/virtual_table/ob_all_virtual_dup_ls_tablet_set.h" #include "observer/virtual_table/ob_all_virtual_px_p2p_datahub.h" #include "observer/virtual_table/ob_all_virtual_ls_log_restore_status.h" @@ -291,7 +294,7 @@ int ObVirtualTableIteratorFactory::create_virtual_table_iterator(ObVTableScanPar int ObVirtualTableIteratorFactory::revert_virtual_table_iterator(ObVirtualTableIterator *vt_iter) { - int ret = OB_SUCCESS; +int ret = OB_SUCCESS; if (OB_UNLIKELY(NULL == vt_iter)) { ret = OB_ERR_UNEXPECTED; SERVER_LOG(WARN, "vt_iter is NULL, can not free it"); @@ -862,6 +865,42 @@ int ObVTIterCreator::create_vt_iter(ObVTableScanParam ¶ms, END_CREATE_VT_ITER_SWITCH_LAMBDA BEGIN_CREATE_VT_ITER_SWITCH_LAMBDA + case OB_ALL_VIRTUAL_DUP_LS_LEASE_MGR_TID: { + ObAllVirtualDupLSLeaseMgr *dup_ls_lease_mgr = NULL; + if (OB_FAIL(NEW_VIRTUAL_TABLE(ObAllVirtualDupLSLeaseMgr, + dup_ls_lease_mgr))) { + SERVER_LOG(ERROR, "ObAllVirtualDupLSLeaseMgr construct failed", K(ret)); + } else if (OB_FAIL(dup_ls_lease_mgr->init(addr_))) { + SERVER_LOG(WARN, "all_virtual_dup_ls_lease_mgr init failed", K(ret)); + } else { + vt_iter = static_cast(dup_ls_lease_mgr); + } + break; + } + case OB_ALL_VIRTUAL_DUP_LS_TABLETS_TID: { + ObAllVirtualDupLSTablets *dup_ls_tablets = NULL; + if (OB_FAIL(NEW_VIRTUAL_TABLE(ObAllVirtualDupLSTablets, + dup_ls_tablets))) { + SERVER_LOG(ERROR, "failed to init ObAllVirtualDupLSTabletsr", K(ret)); + } else if (OB_FAIL(dup_ls_tablets->init(addr_))) { + SERVER_LOG(WARN, "fail to init all_virtual_dup_ls_tablets", K(ret)); + } else { + vt_iter = static_cast(dup_ls_tablets); + } + break; + } + case OB_ALL_VIRTUAL_DUP_LS_TABLET_SET_TID: { + ObAllVirtualDupLSTabletSet *dup_ls_tablet_set = NULL; + if (OB_FAIL(NEW_VIRTUAL_TABLE(ObAllVirtualDupLSTabletSet, + dup_ls_tablet_set))) { + SERVER_LOG(ERROR, "failed to init ObAllVirtualDMmlStats", K(ret)); + } else if (OB_FAIL(dup_ls_tablet_set->init(addr_))) { + SERVER_LOG(WARN, "fail to init all_virtual_dup_ls_tablet_set", K(ret)); + } else { + vt_iter = static_cast(dup_ls_tablet_set); + } + break; + } case OB_ALL_VIRTUAL_TRANS_STAT_TID: { ObGVTxStat *gv_tx_stat = NULL; if (OB_FAIL(NEW_VIRTUAL_TABLE(ObGVTxStat, gv_tx_stat))) { diff --git a/src/rootserver/ob_balance_group_ls_stat_operator.cpp b/src/rootserver/ob_balance_group_ls_stat_operator.cpp index 7374760822..43116dd9da 100644 --- a/src/rootserver/ob_balance_group_ls_stat_operator.cpp +++ b/src/rootserver/ob_balance_group_ls_stat_operator.cpp @@ -18,13 +18,18 @@ #include "lib/oblog/ob_log_module.h" #include "lib/utility/ob_print_utils.h" #include "common/ob_timeout_ctx.h" +#include "observer/ob_server_struct.h" // for GCTX #include "share/schema/ob_table_schema.h" #include "share/schema/ob_schema_struct.h" #include "share/schema/ob_schema_getter_guard.h" #include "share/ob_share_util.h" #include "share/inner_table/ob_inner_table_schema_constants.h" +#include "share/ob_srv_rpc_proxy.h" // ObSrvRpcProxy #include "share/tablet/ob_tablet_to_ls_operator.h" - +#include "share/ls/ob_ls_table.h" // ObLSTable +#include "share/ls/ob_ls_table_operator.h" // ObLSTableOperator +#include "share/location_cache/ob_location_service.h" // ObLocationService +#include "share/ob_rpc_struct.h" // ObCreateDupLSArg & ObCreateDupLSResult namespace oceanbase { @@ -424,6 +429,10 @@ int ObNewTableTabletAllocator::prepare( if (OB_FAIL(alloc_ls_for_meta_or_sys_tenant_tablet(table_schema))) { LOG_WARN("fail to alloc ls for meta or sys tenant tablet", KR(ret)); } + } else if (table_schema.is_duplicate_table()) { + if (OB_FAIL(alloc_ls_for_duplicate_table_(table_schema))) { + LOG_WARN("fail to alloc ls for duplicate tablet", KR(ret), K(table_schema)); + } } else { if (table_schema.is_index_table()) { if (table_schema.is_index_local_storage()) { @@ -633,7 +642,7 @@ int ObNewTableTabletAllocator::get_available_ls( } else { for (int64_t i = 0; OB_SUCC(ret) && i < my_ls_array.count(); ++i) { share::ObLSStatusInfo &ls_status_info = my_ls_array.at(i); - if (ls_status_info.ls_is_normal() && SYS_LS != ls_status_info.ls_id_) { + if (ls_status_info.ls_is_normal() && SYS_LS != ls_status_info.ls_id_ && !ls_status_info.is_duplicate_ls()) { if (OB_FAIL(ls_status_info_array.push_back(ls_status_info))) { LOG_WARN("fail to push back", KR(ret)); } @@ -1191,6 +1200,119 @@ int ObNewTableTabletAllocator::alloc_ls_for_normal_table_tablet( return ret; } +int ObNewTableTabletAllocator::wait_ls_elect_leader_( + const uint64_t tenant_id, + const ObLSID &ls_id) +{ + int ret = OB_SUCCESS; + ObTimeoutCtx ctx; + if (OB_UNLIKELY(!inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObNewTableTabletAllocator not init", KR(ret), K(tenant_id), K(ls_id)); + } else if (OB_ISNULL(GCTX.location_service_) + || OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id || !ls_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(ls_id)); + } else if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(ctx, GCONF.internal_sql_execute_timeout))) { + LOG_WARN("failed to set default timeout", KR(ret)); + } else { + bool has_leader = false; + ObAddr ls_leader; + while (OB_SUCC(ret) && !has_leader) { + int tmp_ret = OB_SUCCESS; + ls_leader.reset(); + const share::ObLSReplica *leader_replica = nullptr; + if (0 > ctx.get_timeout()) { + ret = OB_TIMEOUT; + LOG_WARN("wait ls elect leader timeout", KR(ret)); + } else if (OB_TMP_FAIL(GCTX.location_service_->nonblock_get_leader(GCONF.cluster_id, tenant_id, ls_id, ls_leader))) { + LOG_WARN("fail to get ls leader", KR(ret), K(tenant_id), K(ls_id), K(ls_leader)); + } else { + has_leader = true; + } + if (OB_SUCC(ret) && !has_leader) { + LOG_WARN("fail to wait log stream elect leader, need retry", K(tenant_id), K(ls_id), K(ls_leader)); + ob_usleep(WAIT_INTERVAL_US); + } + } + } + return ret; +} + +int ObNewTableTabletAllocator::alloc_ls_for_duplicate_table_( + const share::schema::ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t tenant_id = table_schema.get_tenant_id(); + LOG_INFO("alloc ls for duplicate table tablet", + "tenant_id", table_schema.get_tenant_id(), + "table_id", table_schema.get_table_id()); + share::ObLSStatusOperator ls_status_operator; + share::ObLSStatusInfo duplicate_ls_status_info; + ObTimeoutCtx ctx; + if (OB_UNLIKELY(!inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("ObNewTableTabletAllocator not init", KR(ret)); + } else if (OB_ISNULL(GCTX.sql_proxy_) + || OB_ISNULL(GCTX.location_service_) + || OB_ISNULL(GCTX.srv_rpc_proxy_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret)); + } else if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(ctx, GCONF.internal_sql_execute_timeout))) { + LOG_WARN("failed to set default timeout", KR(ret)); + } else { + obrpc::ObCreateDupLSArg arg; + obrpc::ObCreateDupLSResult result; + while (OB_SUCC(ret)) { + int tmp_ret = OB_SUCCESS; + duplicate_ls_status_info.reset(); + if (0 > ctx.get_timeout()) { + ret = OB_TIMEOUT; + LOG_WARN("wait creating duplicate log stream timeout", KR(ret)); + } else if (OB_TMP_FAIL(ls_status_operator.get_duplicate_ls_status_info( + tenant_id, + *GCTX.sql_proxy_, + duplicate_ls_status_info))) { + if (OB_ENTRY_NOT_EXIST == tmp_ret) { + LOG_INFO("duplicate log stream not exist, should create one duplicate log stream"); + tmp_ret = OB_SUCCESS; + // create duplicate ls + ObAddr leader; + const int64_t timeout = ctx.get_timeout(); + if (OB_TMP_FAIL(GCTX.location_service_->get_leader(GCONF.cluster_id, tenant_id, + SYS_LS, FALSE, leader))) { + LOG_WARN("failed to get leader", KR(tmp_ret), K(tenant_id)); + } else if (OB_TMP_FAIL(arg.init(tenant_id))) { + LOG_WARN("failed to init arg", KR(ret), K(tenant_id)); + } else if (OB_TMP_FAIL(GCTX.srv_rpc_proxy_->to(leader).timeout(timeout).notify_create_duplicate_ls(arg, result))) { + LOG_WARN("failed to create tenant duplicate ls", KR(tmp_ret), K(tenant_id), K(leader), K(arg), K(timeout)); + } + } else { + LOG_WARN("fail to get duplicate log stream from table", KR(tmp_ret), K(tenant_id)); + } + } else if (!duplicate_ls_status_info.ls_is_normal()) { + LOG_TRACE("duplicate log stream is not in normal status", K(duplicate_ls_status_info)); + } else if (OB_FAIL(wait_ls_elect_leader_( + duplicate_ls_status_info.tenant_id_, + duplicate_ls_status_info.ls_id_))) { + LOG_WARN("fail to wait duplicate ls elect leader", KR(ret), K(duplicate_ls_status_info)); + } else { + for (int64_t i = 0; i < table_schema.get_all_part_num() && OB_SUCC(ret); i++) { + if (OB_FAIL(ls_id_array_.push_back(duplicate_ls_status_info.ls_id_))) { + LOG_WARN("failed to push_back", KR(ret), K(i), K(duplicate_ls_status_info)); + } + } + break; + } + if (OB_SUCC(ret)) { + LOG_WARN("fail to get duplicate log stream, need retry", K(tenant_id), K(duplicate_ls_status_info)); + ob_usleep(WAIT_INTERVAL_US); + } + } + } + return ret; +} + int ObNewTableTabletAllocator::get_non_partitioned_bg_info( const uint64_t tenant_id, ObBalanceGroupName &bg_name, diff --git a/src/rootserver/ob_balance_group_ls_stat_operator.h b/src/rootserver/ob_balance_group_ls_stat_operator.h index 08b8d40938..1235c67b71 100644 --- a/src/rootserver/ob_balance_group_ls_stat_operator.h +++ b/src/rootserver/ob_balance_group_ls_stat_operator.h @@ -199,7 +199,12 @@ private: const share::schema::ObTableSchema &table_schema); int alloc_ls_for_normal_table_tablet( const share::schema::ObTableSchema &table_schema); + int alloc_ls_for_duplicate_table_( + const share::schema::ObTableSchema &table_schema); private: + int wait_ls_elect_leader_( + const uint64_t tenant_id, + const share::ObLSID &ls_id); int get_tablet_id_array( const share::schema::ObTableSchema &table_schema, common::ObIArray &ls_id_array); @@ -234,6 +239,7 @@ private: const int64_t partition_num); private: static const int64_t MAX_TENANT_LS_CNT = 1024; + static const int64_t WAIT_INTERVAL_US = 1000 * 1000; // 1s enum class MyStatus : int64_t { WAIT_TO_PREPARE = 0, WAIT_TO_OUTPUT, diff --git a/src/rootserver/ob_bootstrap.cpp b/src/rootserver/ob_bootstrap.cpp index cb9d718297..81ce855aed 100644 --- a/src/rootserver/ob_bootstrap.cpp +++ b/src/rootserver/ob_bootstrap.cpp @@ -1378,9 +1378,10 @@ int ObBootstrap::insert_sys_ls_(const share::schema::ObTenantSchema &tenant_sche share::ObLSStatusInfo status_info; const uint64_t unit_group_id = 0; const uint64_t ls_group_id = 0; + share::ObLSFlag flag(share::ObLSFlag::NORMAL_FLAG); if (OB_FAIL(status_info.init(OB_SYS_TENANT_ID, SYS_LS, ls_group_id, - share::OB_LS_NORMAL, unit_group_id, primary_zone))) { - LOG_WARN("failed to init ls info", KR(ret), K(primary_zone)); + share::OB_LS_NORMAL, unit_group_id, primary_zone, flag))) { + LOG_WARN("failed to init ls info", KR(ret), K(primary_zone), K(flag)); } else if (OB_FAIL(life_agent.create_new_ls(status_info, SCN::base_scn(), primary_zone_str.string(), share::NORMAL_SWITCHOVER_STATUS))) { LOG_WARN("failed to get init member list", KR(ret), K(status_info), K(primary_zone_str)); diff --git a/src/rootserver/ob_ddl_service.cpp b/src/rootserver/ob_ddl_service.cpp index 6ec1fd6c08..8daf743037 100644 --- a/src/rootserver/ob_ddl_service.cpp +++ b/src/rootserver/ob_ddl_service.cpp @@ -337,6 +337,8 @@ int ObDDLService::create_user_tables( int ret = OB_SUCCESS; RS_TRACE(create_user_tables_begin); uint64_t tenant_id = OB_INVALID_TENANT_ID; + bool have_duplicate_table = false; + bool is_compatible = false; if (OB_FAIL(check_inner_stat())) { LOG_WARN("not init", K(ret)); } else if (table_schemas.count() < 1) { @@ -344,6 +346,7 @@ int ObDDLService::create_user_tables( LOG_WARN("table_schemas have no element", K(ret)); } else { tenant_id = table_schemas.at(0).get_tenant_id(); + have_duplicate_table = table_schemas.at(0).is_duplicate_table(); // for checking unique index name duplicate when create user table in oracle mode bool is_oracle_mode = false; if (OB_FAIL(table_schemas.at(0).check_if_oracle_compat_mode(is_oracle_mode))) { @@ -379,6 +382,18 @@ int ObDDLService::create_user_tables( } } + if (OB_FAIL(ret)) { + //do nothing + } else if (!have_duplicate_table) { + // do nothing + } else if (OB_FAIL(ObShareUtil::check_compat_version_for_readonly_replica(tenant_id, is_compatible))) { + LOG_WARN("fail to check compat version for duplicate log stream", KR(ret), K(tenant_id)); + } else if (!is_compatible) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("duplicate table is not supported below 4.2", KR(ret), K(tenant_id)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create duplicate table below 4.2"); + } + if (OB_FAIL(ret)) { //do nothing } else if (OB_FAIL(create_tables_in_trans(if_not_exist, ddl_stmt_str, error_info, table_schemas, @@ -1626,10 +1641,10 @@ int ObDDLService::set_tablegroup_id(ObTableSchema &table_schema) if (ObDuplicateScope::DUPLICATE_SCOPE_NONE != table_schema.get_duplicate_scope() && OB_INVALID_ID != table_schema.get_tablegroup_id()) { ret = OB_NOT_SUPPORTED; - LOG_WARN("replicated table in tablegroup is not supported", K(ret), + LOG_WARN("duplicated table in tablegroup is not supported", K(ret), "table_id", table_schema.get_table_id(), "tablegroup_id", table_schema.get_tablegroup_id()); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "replicated table in tablegroup"); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "duplicated table in tablegroup"); } } @@ -2172,10 +2187,10 @@ int ObDDLService::set_new_table_options( } else if (ObDuplicateScope::DUPLICATE_SCOPE_NONE != new_table_schema.get_duplicate_scope() && OB_INVALID_ID != new_table_schema.get_tablegroup_id()) { ret = OB_NOT_SUPPORTED; - LOG_WARN("replicated table in tablegroup is not supported", K(ret), + LOG_WARN("duplicated table in tablegroup is not supported", K(ret), "table_id", new_table_schema.get_table_id(), "tablegroup_id", new_table_schema.get_tablegroup_id()); - LOG_USER_ERROR(OB_NOT_SUPPORTED, "replicated table in tablegroup"); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "duplicated table in tablegroup"); } else { if (OB_SUCC(ret) && alter_table_schema.alter_option_bitset_.has_member(obrpc::ObAlterTableArg::TABLEGROUP_NAME)) { @@ -2549,6 +2564,34 @@ int ObDDLService::set_raw_table_options( return ret; } +int ObDDLService::check_locality_compatible_( + ObTenantSchema &schema) +{ + int ret = OB_SUCCESS; + common::ObArray zone_locality; + bool is_compatible_with_readonly_replica = false; + if (OB_FAIL(ObShareUtil::check_compat_version_for_readonly_replica( + schema.get_tenant_id(), is_compatible_with_readonly_replica))) { + LOG_WARN("fail to check compatible with readonly replica", KR(ret), K(schema)); + } else if (is_compatible_with_readonly_replica) { + } else if (OB_FAIL(schema.get_zone_replica_attr_array(zone_locality))) { + LOG_WARN("fail to get locality from schema", K(ret), K(schema)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < zone_locality.count(); ++i) { + const share::ObZoneReplicaAttrSet &this_set = zone_locality.at(i); + if (this_set.zone_set_.count() <= 0) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("zone set count unexpected", K(ret), "zone_set_cnt", this_set.zone_set_.count()); + } else if (0 != this_set.get_readonly_replica_num()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("can not create tenant with read-only replica below data version 4.2", KR(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "Create tenant with R-replica in locality below data version 4.2"); + } + } + } + return ret; +} + int ObDDLService::parse_and_set_create_tenant_new_locality_options( share::schema::ObSchemaGetterGuard &schema_guard, ObTenantSchema &schema, @@ -12223,6 +12266,7 @@ int ObDDLService::alter_table(obrpc::ObAlterTableArg &alter_table_arg, int64_t cost_usec = 0; start_usec = ObTimeUtility::current_time(); bool is_alter_sess_active_time = false; + bool is_alter_duplicate_scope = false; const AlterTableSchema &alter_table_schema = alter_table_arg.alter_table_schema_; const uint64_t tenant_id = alter_table_schema.get_tenant_id(); int64_t &task_id = res.task_id_; @@ -12238,8 +12282,13 @@ int ObDDLService::alter_table(obrpc::ObAlterTableArg &alter_table_arg, schema_guard.set_session_id(alter_table_arg.session_id_); const ObTableSchema *orig_table_schema = NULL; is_alter_sess_active_time = alter_table_schema.alter_option_bitset_.has_member(obrpc::ObAlterTableArg::SESSION_ACTIVE_TIME); + is_alter_duplicate_scope = alter_table_schema.alter_option_bitset_.has_member(obrpc::ObAlterTableArg::DUPLICATE_SCOPE); ObTZMapWrap tz_map_wrap; if (OB_FAIL(ret)) { + } else if (is_alter_duplicate_scope) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("alter table duplicate scope not supported", KR(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "alter table duplicate scope"); } else if (OB_FAIL(OTTZ_MGR.get_tenant_tz(tenant_id, tz_map_wrap))) { LOG_WARN("get tenant timezone map failed", K(ret), K(tenant_id)); } else if (FALSE_IT(alter_table_arg.set_tz_info_map(tz_map_wrap.get_tz_map()))) { @@ -21483,7 +21532,7 @@ int ObDDLService::set_sys_ls_status(const uint64_t tenant_id) LOG_WARN("invalid tenant_id", KR(ret), K(tenant_id)); } else { share::ObLSAttr new_ls; - share::ObLSFlag flag;//TODO + share::ObLSFlag flag(share::ObLSFlag::NORMAL_FLAG); int64_t ls_group_id = 0; SCN create_scn = SCN::base_scn(); share::ObLSAttrOperator ls_operator(tenant_id, sql_proxy_); @@ -21850,6 +21899,8 @@ int ObDDLService::set_new_tenant_options( } else if (OB_FAIL(parse_and_set_create_tenant_new_locality_options( schema_guard, new_tenant_schema, resource_pool_names, zones_in_pool, zone_region_list))) { LOG_WARN("fail to parse and set new locality option", K(ret)); + } else if (OB_FAIL(check_locality_compatible_(new_tenant_schema))) { + LOG_WARN("fail to check locality with data version", KR(ret), K(new_tenant_schema)); } else if (OB_FAIL(check_alter_tenant_locality_type( schema_guard, orig_tenant_schema, new_tenant_schema, alter_locality_type))) { LOG_WARN("fail to check alter tenant locality allowed", K(ret)); @@ -29945,7 +29996,7 @@ int ObDDLService::set_schema_replica_num_options( } if (full_replica_num <= 0) { ret = OB_INVALID_ARGUMENT; - LOG_USER_ERROR(OB_INVALID_ARGUMENT, "locality"); + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "locality, should have at least one paxos replica"); LOG_WARN("full replica num is zero", K(ret), K(full_replica_num), K(schema)); } } diff --git a/src/rootserver/ob_ddl_service.h b/src/rootserver/ob_ddl_service.h index b25250cf41..f9d534d28f 100644 --- a/src/rootserver/ob_ddl_service.h +++ b/src/rootserver/ob_ddl_service.h @@ -2313,6 +2313,8 @@ private: const ObIArray &orig_table_schemas, const ObIArray &new_table_schemas, ObMySQLTransaction &trans); +private: + int check_locality_compatible_(ObTenantSchema &schema); private: bool inited_; volatile bool stopped_; diff --git a/src/rootserver/ob_disaster_recovery_info.cpp b/src/rootserver/ob_disaster_recovery_info.cpp index 99ec9adc58..08a9919053 100644 --- a/src/rootserver/ob_disaster_recovery_info.cpp +++ b/src/rootserver/ob_disaster_recovery_info.cpp @@ -192,9 +192,9 @@ int DRLSInfo::append_replica_server_unit_stat( if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret)); - } else if (OB_UNLIKELY(nullptr == server_stat_info - || nullptr == unit_stat_info - || nullptr == unit_in_group_stat_info)) { + } else if (OB_ISNULL(server_stat_info) + || OB_ISNULL(unit_stat_info) + || OB_ISNULL(unit_in_group_stat_info)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), @@ -377,6 +377,34 @@ void DRLSInfo::reset_last_disaster_recovery_ls() has_leader_ = false; } +int DRLSInfo::construct_filtered_ls_info_to_use_( + const share::ObLSInfo &input_ls_info, + share::ObLSInfo &output_ls_info) +{ + int ret = OB_SUCCESS; + output_ls_info.reset(); + if (OB_UNLIKELY(!input_ls_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(input_ls_info)); + } else if (OB_FAIL(output_ls_info.init( + input_ls_info.get_tenant_id(), + input_ls_info.get_ls_id()))) { + LOG_WARN("fail to init ls info", KR(ret), K(input_ls_info)); + } else { + uint64_t tenant_id = input_ls_info.get_tenant_id(); + ObLSID ls_id = input_ls_info.get_ls_id(); + for (int64_t i = 0; OB_SUCC(ret) && i < input_ls_info.get_replicas().count(); i++) { + const ObLSReplica &ls_replica = input_ls_info.get_replicas().at(i); + if (ls_replica.get_in_member_list() || ls_replica.get_in_learner_list()) { + if (OB_FAIL(output_ls_info.add_replica(ls_replica))) { + LOG_WARN("fail to add replica to new ls_info", KR(ret), K(ls_replica)); + } + } + } + } + return ret; +} + int DRLSInfo::build_disaster_ls_info( const share::ObLSInfo &ls_info, const share::ObLSStatusInfo &ls_status_info) @@ -388,15 +416,15 @@ int DRLSInfo::build_disaster_ls_info( if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; LOG_WARN("DRWorker not init", KR(ret)); - } else if (OB_UNLIKELY(nullptr == schema_service_ || nullptr == unit_mgr_)) { + } else if (OB_ISNULL(schema_service_) || OB_ISNULL(unit_mgr_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("schema service ptr is null", KR(ret), KP(schema_service_), KP(unit_mgr_)); } else if (resource_tenant_id_ != gen_user_tenant_id(ls_info.get_tenant_id())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("tenant id not match", KR(ret), K(resource_tenant_id_), "ls_tenant_id", ls_info.get_tenant_id()); - } else if (OB_FAIL(inner_ls_info_.assign(ls_info))) { - LOG_WARN("fail to assign inner_ls_info", KR(ret)); + } else if (OB_FAIL(construct_filtered_ls_info_to_use_(ls_info, inner_ls_info_))) { + LOG_WARN("fail to filter replicas not in both member_list and learner_list", KR(ret), K(ls_info)); } else if (OB_FAIL(ls_status_info_.assign(ls_status_info))) { LOG_WARN("fail to assign ls_status_info", KR(ret)); } else if (OB_FAIL(sys_schema_guard_.get_tenant_info( @@ -404,7 +432,7 @@ int DRLSInfo::build_disaster_ls_info( tenant_schema))) { LOG_WARN("fail to get tenant schema", KR(ret), "tenant_id", inner_ls_info_.get_tenant_id()); - } else if (OB_UNLIKELY(nullptr == tenant_schema)) { + } else if (OB_ISNULL(tenant_schema)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tenant schema ptr is null", KR(ret), KP(tenant_schema), K(inner_ls_info_), K(ls_info)); @@ -425,7 +453,9 @@ int DRLSInfo::build_disaster_ls_info( UnitStatInfoMap::Item *unit_in_group = nullptr; share::ObUnitInfo unit_info; share::ObLSReplica &ls_replica = inner_ls_info_.get_replicas().at(i); - if (OB_FAIL(server_stat_info_map_.locate(ls_replica.get_server(), server))) { + if (!ls_replica.get_in_member_list() && !ls_replica.get_in_learner_list()) { + LOG_INFO("replica is neither in member list nor in learner list", K(ls_replica)); + } else if (OB_FAIL(server_stat_info_map_.locate(ls_replica.get_server(), server))) { LOG_WARN("fail to locate server", KR(ret), "server", ls_replica.get_server()); } else if (OB_FAIL(unit_stat_info_map_.locate(ls_replica.get_unit_id(), unit))) { LOG_WARN("fail to locate unit", KR(ret), "unit_id", ls_replica.get_unit_id()); @@ -441,16 +471,16 @@ int DRLSInfo::build_disaster_ls_info( } else if (OB_FAIL(unit_stat_info_map_.locate(unit_info.unit_.unit_id_, unit_in_group))) { LOG_WARN("fail to locate unit", KR(ret), "unit_id", unit_info.unit_.unit_id_); } - } - if (OB_SUCC(ret)) { - if (OB_UNLIKELY(nullptr == server || nullptr == unit || nullptr == unit_in_group)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unit or server ptr is null", KR(ret), KP(server), KP(unit), K(ls_replica)); - } else if (OB_FAIL(append_replica_server_unit_stat( - &server->v_, &unit->v_, &unit_in_group->v_))) { - LOG_WARN("fail to append replica server/unit stat", KR(ret), - "server_stat_info", server->v_, "unit_stat_info", unit->v_); + if (OB_SUCC(ret)) { + if (OB_ISNULL(server) || OB_ISNULL(unit) || OB_ISNULL(unit_in_group)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unit or server ptr is null", KR(ret), KP(server), KP(unit), K(ls_replica)); + } else if (OB_FAIL(append_replica_server_unit_stat( + &server->v_, &unit->v_, &unit_in_group->v_))) { + LOG_WARN("fail to append replica server/unit stat", KR(ret), + "server_stat_info", server->v_, "unit_stat_info", unit->v_); + } } } } @@ -485,10 +515,13 @@ int DRLSInfo::get_leader( int DRLSInfo::get_leader_and_member_list( common::ObAddr &leader_addr, - common::ObMemberList &member_list) + common::ObMemberList &member_list, + GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; const ObLSReplica *leader_replica = nullptr; + member_list.reset(); + learner_list.reset(); if (OB_FAIL(inner_ls_info_.find_leader(leader_replica))) { LOG_WARN("fail to find leader", KR(ret)); } else if (OB_ISNULL(leader_replica)) { @@ -496,6 +529,7 @@ int DRLSInfo::get_leader_and_member_list( LOG_WARN("leader replica ptr is null", KR(ret), KP(leader_replica)); } else { leader_addr = leader_replica->get_server(); + // construct member list FOREACH_CNT_X(m, leader_replica->get_member_list(), OB_SUCC(ret)) { if (OB_ISNULL(m)) { ret = OB_INVALID_ARGUMENT; @@ -504,6 +538,15 @@ int DRLSInfo::get_leader_and_member_list( LOG_WARN("fail to add server to member list", KR(ret), KPC(m)); } } + // construct learner list + for (int64_t index = 0; OB_SUCC(ret) && index < leader_replica->get_learner_list().get_member_number(); ++index) { + ObMember learner; + if (OB_FAIL(leader_replica->get_learner_list().get_member_by_index(index, learner))) { + LOG_WARN("fail to get learner by index", KR(ret), K(index)); + } else if (OB_FAIL(learner_list.add_learner(learner))) { + LOG_WARN("fail to add learner to learner list", KR(ret), K(learner)); + } + } } return ret; } diff --git a/src/rootserver/ob_disaster_recovery_info.h b/src/rootserver/ob_disaster_recovery_info.h index 5c9cdfc88a..bd446a1c8a 100644 --- a/src/rootserver/ob_disaster_recovery_info.h +++ b/src/rootserver/ob_disaster_recovery_info.h @@ -183,6 +183,7 @@ public: int64_t get_member_list_cnt() const { return member_list_cnt_; } int64_t get_paxos_replica_number() const { return paxos_replica_number_; } bool has_leader() const { return has_leader_; } + bool is_duplicate_ls() const { return ls_status_info_.is_duplicate_ls(); } int get_tenant_id( uint64_t &tenant_id) const; int get_ls_id( @@ -203,8 +204,12 @@ public: common::ObAddr &leader_addr) const; int get_leader_and_member_list( common::ObAddr &leader_addr, - common::ObMemberList &member_list); + common::ObMemberList &member_list, + GlobalLearnerList &learner_list); private: + int construct_filtered_ls_info_to_use_( + const share::ObLSInfo &input_ls_info, + share::ObLSInfo &output_ls_info); // init related private func int gather_server_unit_stat(); int fill_servers(); diff --git a/src/rootserver/ob_disaster_recovery_task.cpp b/src/rootserver/ob_disaster_recovery_task.cpp index 3909f0aa3f..1cdc0b9b5d 100644 --- a/src/rootserver/ob_disaster_recovery_task.cpp +++ b/src/rootserver/ob_disaster_recovery_task.cpp @@ -135,7 +135,7 @@ const char* ob_disaster_recovery_task_priority_strs(const rootserver::ObDRTaskPr } else { LOG_WARN_RET(OB_INVALID_ARGUMENT, "invalid ObDRTask priority", K(task_priority)); } - return str; + return str; } static const char* disaster_recovery_task_type_strs[] = { @@ -510,7 +510,7 @@ int ObMigrateLSReplicaTask::log_execute_start() const "src_member", src_member_.get_server(), "data_src_member", data_src_member_.get_server()); } else { - ROOTSERVICE_EVENT_ADD("disaster_recovery", "start_migrate_ls_replica", + ROOTSERVICE_EVENT_ADD("disaster_recovery", get_log_start_str(), "tenant_id", get_tenant_id(), "ls_id", get_ls_id().id(), "task_id", get_task_id(), @@ -531,7 +531,7 @@ int ObMigrateLSReplicaTask::log_execute_result( if (OB_FAIL(build_execute_result(ret_code, ret_comment, execute_result))) { LOG_WARN("fail to build execute result", KR(ret), K(ret_code), K(ret_comment)); } else { - ROOTSERVICE_EVENT_ADD("disaster_recovery", "finish_migrate_ls_replica", + ROOTSERVICE_EVENT_ADD("disaster_recovery", get_log_finish_str(), "tenant_id", get_tenant_id(), "ls_id", get_ls_id().id(), "task_id", get_task_id(), @@ -961,7 +961,7 @@ int ObAddLSReplicaTask::get_virtual_disaster_recovery_task_stat( int ObAddLSReplicaTask::log_execute_start() const { int ret = OB_SUCCESS; - ROOTSERVICE_EVENT_ADD("disaster_recovery", "start_add_ls_replica", + ROOTSERVICE_EVENT_ADD("disaster_recovery", get_log_start_str(), "tenant_id", get_tenant_id(), "ls_id", get_ls_id().id(), "task_id", get_task_id(), @@ -981,7 +981,7 @@ int ObAddLSReplicaTask::log_execute_result( if (OB_FAIL(build_execute_result(ret_code, ret_comment, execute_result))) { LOG_WARN("fail to build execute result", KR(ret), K(ret_code), K(ret_comment)); } else { - ROOTSERVICE_EVENT_ADD("disaster_recovery", "finish_add_ls_replica", + ROOTSERVICE_EVENT_ADD("disaster_recovery", get_log_finish_str(), "tenant_id", get_tenant_id(), "ls_id", get_ls_id().id(), "task_id", get_task_id(), @@ -1432,7 +1432,7 @@ int ObLSTypeTransformTask::get_virtual_disaster_recovery_task_stat( int ObLSTypeTransformTask::log_execute_start() const { int ret = OB_SUCCESS; - ROOTSERVICE_EVENT_ADD("disaster_recovery", "start_type_transform_ls_replica", + ROOTSERVICE_EVENT_ADD("disaster_recovery", get_log_start_str(), "tenant_id", get_tenant_id(), "ls_id", get_ls_id().id(), "task_id", get_task_id(), @@ -1451,7 +1451,7 @@ int ObLSTypeTransformTask::log_execute_result( if (OB_FAIL(build_execute_result(ret_code, ret_comment, execute_result))) { LOG_WARN("fail to build execute result", KR(ret), K(ret_code), K(ret_comment)); } else { - ROOTSERVICE_EVENT_ADD("disaster_recovery", "finish_type_transform_ls_replica", + ROOTSERVICE_EVENT_ADD("disaster_recovery", get_log_finish_str(), "tenant_id", get_tenant_id(), "ls_id", get_ls_id().id(), "task_id", get_task_id(), @@ -1886,8 +1886,8 @@ int ObLSTypeTransformTask::build_task_from_sql_result( return ret; } -// ================================== ObRemoveLSPaxosReplicaTask ================================== -int ObRemoveLSPaxosReplicaTask::get_execute_transmit_size( +// ======================================== ObRemoveLSReplicaTask ====================================== +int ObRemoveLSReplicaTask::get_execute_transmit_size( int64_t &execute_transmit_size) const { int ret = OB_SUCCESS; @@ -1895,7 +1895,7 @@ int ObRemoveLSPaxosReplicaTask::get_execute_transmit_size( return ret; } -int ObRemoveLSPaxosReplicaTask::get_virtual_disaster_recovery_task_stat( +int ObRemoveLSReplicaTask::get_virtual_disaster_recovery_task_stat( common::ObAddr &src, common::ObAddr &data_src, common::ObAddr &dst, @@ -1909,10 +1909,10 @@ int ObRemoveLSPaxosReplicaTask::get_virtual_disaster_recovery_task_stat( return ret; } -int ObRemoveLSPaxosReplicaTask::log_execute_start() const +int ObRemoveLSReplicaTask::log_execute_start() const { int ret = OB_SUCCESS; - ROOTSERVICE_EVENT_ADD("disaster_recovery", "start_remove_ls_paxos_replica", + ROOTSERVICE_EVENT_ADD("disaster_recovery", get_log_start_str(), "tenant_id", get_tenant_id(), "ls_id", get_ls_id().id(), "task_id", get_task_id(), @@ -1923,7 +1923,7 @@ int ObRemoveLSPaxosReplicaTask::log_execute_start() const } -int ObRemoveLSPaxosReplicaTask::log_execute_result( +int ObRemoveLSReplicaTask::log_execute_result( const int ret_code, const ObDRTaskRetComment &ret_comment) const { @@ -1932,7 +1932,7 @@ int ObRemoveLSPaxosReplicaTask::log_execute_result( if (OB_FAIL(build_execute_result(ret_code, ret_comment, execute_result))) { LOG_WARN("fail to build execute result", KR(ret), K(ret_code), K(ret_comment)); } else { - ROOTSERVICE_EVENT_ADD("disaster_recovery", "finish_remove_ls_paxos_replica", + ROOTSERVICE_EVENT_ADD("disaster_recovery", get_log_finish_str(), "tenant_id", get_tenant_id(), "ls_id", get_ls_id().id(), "task_id", get_task_id(), @@ -1944,43 +1944,63 @@ int ObRemoveLSPaxosReplicaTask::log_execute_result( return ret; } -int ObRemoveLSPaxosReplicaTask::check_before_execute( +int ObRemoveLSReplicaTask::check_before_execute( share::ObLSTableOperator &lst_operator, ObDRTaskRetComment &ret_comment) const -{ +{ int ret = OB_SUCCESS; UNUSED(lst_operator); return ret; } -int ObRemoveLSPaxosReplicaTask::execute( +int ObRemoveLSReplicaTask::execute( obrpc::ObSrvRpcProxy &rpc_proxy, int &ret_code, ObDRTaskRetComment &ret_comment) const { int ret = OB_SUCCESS; - - ObLSDropPaxosReplicaArg arg; - if (OB_FAIL(arg.init( - get_task_id(), - get_tenant_id(), - get_ls_id(), - get_remove_server(), - get_orig_paxos_replica_number(), - get_paxos_replica_number()))) { - LOG_WARN("fail to init arg", KR(ret)); - } else if (OB_FAIL(rpc_proxy.to(get_dst_server()) - .by(get_tenant_id()).ls_remove_paxos_replica(arg))) { - ret_code = ret; - ret_comment = ObDRTaskRetComment::FAIL_TO_SEND_RPC; - LOG_WARN("fail to send ls remove paxos replica rpc", KR(ret), K(arg)); + if (ObDRTaskType::LS_REMOVE_PAXOS_REPLICA == get_disaster_recovery_task_type()) { + ObLSDropPaxosReplicaArg arg; + if (OB_FAIL(arg.init( + get_task_id(), + get_tenant_id(), + get_ls_id(), + get_remove_server(), + get_orig_paxos_replica_number(), + get_paxos_replica_number()))) { + LOG_WARN("fail to init arg", KR(ret)); + } else if (OB_FAIL(rpc_proxy.to(get_dst_server()) + .by(get_tenant_id()).ls_remove_paxos_replica(arg))) { + ret_code = ret; + ret_comment = ObDRTaskRetComment::FAIL_TO_SEND_RPC; + LOG_WARN("fail to send ls remove paxos replica rpc", KR(ret), K(arg)); + } else { + LOG_INFO("start to execute ls remove paxos replica", K(arg)); + } + } else if (ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA == get_disaster_recovery_task_type()) { + ObLSDropNonPaxosReplicaArg arg; + if (OB_FAIL(arg.init( + get_task_id(), + get_tenant_id(), + get_ls_id(), + get_remove_server()))) { + LOG_WARN("fail to init arg", KR(ret)); + } else if (OB_FAIL(rpc_proxy.to(get_dst_server()) + .by(get_tenant_id()).ls_remove_nonpaxos_replica(arg))) { + ret_code = ret; + ret_comment = ObDRTaskRetComment::FAIL_TO_SEND_RPC; + LOG_WARN("fail to send ls remove nonpaxos replica", KR(ret), K(arg)); + } else { + LOG_INFO("start to execute ls remove nonpaxos replica", K(arg)); + } } else { - LOG_INFO("start to execute ls remove paxos replica", K(arg)); + ret = OB_STATE_NOT_MATCH; + LOG_WARN("task type not expected", KR(ret), "task_type", get_disaster_recovery_task_type()); } return ret; } -int ObRemoveLSPaxosReplicaTask::fill_dml_splicer( +int ObRemoveLSReplicaTask::fill_dml_splicer( ObDMLSqlSplicer &dml_splicer) const { int ret = OB_SUCCESS; @@ -1988,15 +2008,15 @@ int ObRemoveLSPaxosReplicaTask::fill_dml_splicer( char dest_ip[OB_MAX_SERVER_ADDR_SIZE] = ""; char target_ip[OB_MAX_SERVER_ADDR_SIZE] = ""; char task_id[OB_TRACE_STAT_BUFFER_SIZE] = ""; - char task_type[MAX_DISASTER_RECOVERY_TASK_TYPE_LENGTH] = "REMOVE PAXOS REPLICA"; int64_t transmit_data_size = 0; + const char *task_type_to_set = ob_disaster_recovery_task_type_strs(get_disaster_recovery_task_type()); if (OB_UNLIKELY(!is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid task", KR(ret)); } else if (false == get_leader().ip_to_string(dest_ip, sizeof(dest_ip))) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("convert dest_server ip to string failed", KR(ret), "dest_server", get_leader()); + LOG_WARN("convert dest_server ip to string failed", KR(ret), "dest_server", get_dst_server()); } else if (false == get_remove_server().get_server().ip_to_string(target_ip, sizeof(target_ip))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("convert target_server ip to string failed", KR(ret), "target_server", get_remove_server().get_server()); @@ -2005,7 +2025,7 @@ int ObRemoveLSPaxosReplicaTask::fill_dml_splicer( } else { if (OB_FAIL(dml_splicer.add_pk_column("tenant_id", tenant_id_)) || OB_FAIL(dml_splicer.add_pk_column("ls_id", ls_id_.id())) - || OB_FAIL(dml_splicer.add_pk_column("task_type", task_type)) + || OB_FAIL(dml_splicer.add_pk_column("task_type", task_type_to_set)) || OB_FAIL(dml_splicer.add_pk_column("task_id", task_id_)) || OB_FAIL(dml_splicer.add_column("task_status", TASK_STATUS)) || OB_FAIL(dml_splicer.add_column("priority", static_cast(ObDRTaskPriority::HIGH_PRI))) @@ -2028,22 +2048,22 @@ int ObRemoveLSPaxosReplicaTask::fill_dml_splicer( return ret; } -int64_t ObRemoveLSPaxosReplicaTask::get_clone_size() const +int64_t ObRemoveLSReplicaTask::get_clone_size() const { return sizeof(*this); } -int ObRemoveLSPaxosReplicaTask::clone( +int ObRemoveLSReplicaTask::clone( void *input_ptr, ObDRTask *&output_task) const { int ret = OB_SUCCESS; - if (OB_UNLIKELY(nullptr == input_ptr)) { + if (OB_ISNULL(input_ptr)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret)); } else { - ObRemoveLSPaxosReplicaTask *my_task = new (input_ptr) ObRemoveLSPaxosReplicaTask(); - if (OB_UNLIKELY(nullptr == my_task)) { + ObRemoveLSReplicaTask *my_task = new (input_ptr) ObRemoveLSReplicaTask(); + if (OB_ISNULL(my_task)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("fail to construct", KR(ret)); } else if (OB_FAIL(my_task->deep_copy(*this))) { @@ -2053,13 +2073,14 @@ int ObRemoveLSPaxosReplicaTask::clone( my_task->set_remove_server(get_remove_server()); my_task->set_orig_paxos_replica_number(get_orig_paxos_replica_number()); my_task->set_paxos_replica_number(get_paxos_replica_number()); + my_task->set_replica_type(get_replica_type()); output_task = my_task; } } return ret; } -int ObRemoveLSPaxosReplicaTask::build( +int ObRemoveLSReplicaTask::build( const ObDRTaskKey &task_key, const uint64_t tenant_id, const share::ObLSID &ls_id, @@ -2075,7 +2096,8 @@ int ObRemoveLSPaxosReplicaTask::build( const common::ObAddr &leader, const common::ObReplicaMember &remove_server, const int64_t orig_paxos_replica_number, - const int64_t paxos_replica_number) + const int64_t paxos_replica_number, + const ObReplicaType &replica_type) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!leader.is_valid() @@ -2113,14 +2135,16 @@ int ObRemoveLSPaxosReplicaTask::build( set_remove_server(remove_server); orig_paxos_replica_number_ = orig_paxos_replica_number; paxos_replica_number_ = paxos_replica_number; + replica_type_ = replica_type; } return ret; } -int ObRemoveLSPaxosReplicaTask::build_task_from_sql_result( +int ObRemoveLSReplicaTask::build_task_from_sql_result( const sqlclient::ObMySQLResult &res) { int ret = OB_SUCCESS; + common::ObString task_type; uint64_t tenant_id = OB_INVALID_TENANT_ID; int64_t ls_id = ObLSID::INVALID_LS_ID; common::ObString task_id; @@ -2135,6 +2159,7 @@ int ObRemoveLSPaxosReplicaTask::build_task_from_sql_result( int64_t schedule_time_us = 0; int64_t generate_time_us = 0; common::ObString comment; + ObReplicaType replica_type = REPLICA_TYPE_MAX; //STEP1_0: read certain members from sql result EXTRACT_INT_FIELD_MYSQL(res, "tenant_id", tenant_id, uint64_t); { @@ -2147,6 +2172,7 @@ int ObRemoveLSPaxosReplicaTask::build_task_from_sql_result( } (void)GET_COL_IGNORE_NULL(res.get_int, "ls_id", ls_id); (void)GET_COL_IGNORE_NULL(res.get_varchar, "task_id", task_id); + (void)GET_COL_IGNORE_NULL(res.get_varchar, "task_type", task_type); (void)GET_COL_IGNORE_NULL(res.get_int, "priority", priority); (void)GET_COL_IGNORE_NULL(res.get_varchar, "task_exec_svr_ip", dest_ip); (void)GET_COL_IGNORE_NULL(res.get_int, "task_exec_svr_port", dest_port); @@ -2190,6 +2216,14 @@ int ObRemoveLSPaxosReplicaTask::build_task_from_sql_result( } else { priority_to_set = ObDRTaskPriority::MAX_PRI; } + //transform task_type(string) -> replica_type(ObReplicaType) + if (0 == task_type.case_compare(ob_disaster_recovery_task_type_strs(ObDRTaskType::LS_REMOVE_PAXOS_REPLICA))) { + replica_type_ = ObReplicaType::REPLICA_TYPE_FULL; + } else if (0 == task_type.case_compare(ob_disaster_recovery_task_type_strs(ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA))) { + replica_type_ = ObReplicaType::REPLICA_TYPE_READONLY; + } else { + replica_type_ = ObReplicaType::REPLICA_TYPE_MAX; + } } //STEP3_0: to build a task if (OB_FAIL(ret)) { @@ -2207,310 +2241,13 @@ int ObRemoveLSPaxosReplicaTask::build_task_from_sql_result( priority_to_set, //(not used) comment_to_set.ptr(), //comment dest_server, //(in used)leader - ObReplicaMember(target_server, 0), //(in used)target_server + ObReplicaMember(target_server, 0), //(in used)target_server src_paxos_replica_number, //(in used) - dest_paxos_replica_number))) { //(in used) - LOG_WARN("fail to build a ObRemoveLSPaxosReplicaTask", KR(ret)); + dest_paxos_replica_number, //(in used) + replica_type_))) { //(in used) + LOG_WARN("fail to build a ObRemoveLSReplicaTask", KR(ret)); } else { - LOG_INFO("success to build a ObRemoveLSPaxosReplicaTask", KPC(this)); - } - return ret; -} - -// ================================== ObRemoveLSNonPaxosReplicaTask ================================== -int ObRemoveLSNonPaxosReplicaTask::get_execute_transmit_size( - int64_t &execute_transmit_size) const -{ - int ret = OB_SUCCESS; - execute_transmit_size = 0; - return ret; -} - -int ObRemoveLSNonPaxosReplicaTask::get_virtual_disaster_recovery_task_stat( - common::ObAddr &src, - common::ObAddr &data_src, - common::ObAddr &dst, - common::ObAddr &offline) const -{ - int ret = OB_SUCCESS; - UNUSED(src); - UNUSED(data_src); - dst = remove_server_.get_server(); - UNUSED(offline); - return ret; -} - -int ObRemoveLSNonPaxosReplicaTask::log_execute_start() const -{ - int ret = OB_SUCCESS; - ROOTSERVICE_EVENT_ADD("disaster_recovery", "start_remove_non_ls_paxos_replica", - "tenant_id", get_tenant_id(), - "ls_id", get_ls_id().id(), - "task_id", get_task_id(), - "destination", remove_server_.get_server(), - "comment", get_comment().ptr()); - return ret; -} - - -int ObRemoveLSNonPaxosReplicaTask::log_execute_result( - const int ret_code, - const ObDRTaskRetComment &ret_comment) const -{ - int ret = OB_SUCCESS; - ObSqlString execute_result; - if (OB_FAIL(build_execute_result(ret_code, ret_comment, execute_result))) { - LOG_WARN("fail to build execute result", KR(ret), K(ret_code), K(ret_comment)); - } else { - ROOTSERVICE_EVENT_ADD("disaster_recovery", "finish_remove_non_ls_paxos_replica", - "tenant_id", get_tenant_id(), - "ls_id", get_ls_id().id(), - "task_id", get_task_id(), - "destination", remove_server_.get_server(), - "execute_result", execute_result, - "comment", get_comment().ptr()); - } - return ret; -} - -int ObRemoveLSNonPaxosReplicaTask::check_before_execute( - share::ObLSTableOperator &lst_operator, - ObDRTaskRetComment &ret_comment) const -{ - int ret = OB_SUCCESS; - UNUSED(lst_operator); - return ret; -} - -int ObRemoveLSNonPaxosReplicaTask::execute( - obrpc::ObSrvRpcProxy &rpc_proxy, - int &ret_code, - ObDRTaskRetComment &ret_comment) const -{ - int ret = OB_SUCCESS; - - ObLSDropNonPaxosReplicaArg arg; - if (OB_FAIL(arg.init( - get_task_id(), - get_tenant_id(), - get_ls_id(), - get_remove_server()))) { - LOG_WARN("fail to init arg", KR(ret)); - } else if (OB_FAIL(rpc_proxy.to(get_dst_server()) - .by(get_tenant_id()).ls_remove_nonpaxos_replica(arg))) { - ret_code = ret; - ret_comment = ObDRTaskRetComment::FAIL_TO_SEND_RPC; - LOG_WARN("fail to send ls remove nonpaxos replica", KR(ret), K(arg)); - } else { - LOG_INFO("start to execute ls remove nonpaxos replica", K(arg)); - } - return ret; -} - -int ObRemoveLSNonPaxosReplicaTask::fill_dml_splicer( - ObDMLSqlSplicer &dml_splicer) const -{ - int ret = OB_SUCCESS; - char src_ip[OB_MAX_SERVER_ADDR_SIZE] = ""; - char dest_ip[OB_MAX_SERVER_ADDR_SIZE] = ""; - char target_ip[OB_MAX_SERVER_ADDR_SIZE] = ""; - char task_id[OB_TRACE_STAT_BUFFER_SIZE] = ""; - char task_type[MAX_DISASTER_RECOVERY_TASK_TYPE_LENGTH] = "REMOVE NON PAXOS REPLICA"; - int64_t transmit_data_size = 0; - - if (OB_UNLIKELY(!is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid task", KR(ret)); - } else if (false == get_dst_server().ip_to_string(dest_ip, sizeof(dest_ip))) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("convert dest_server ip to string failed", KR(ret), "dest_server", get_dst_server()); - } else if (false == get_remove_server().get_server().ip_to_string(target_ip, sizeof(target_ip))) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("convert target_server ip to string failed", KR(ret), "target_server", get_remove_server().get_server()); - } else if (OB_FAIL(get_execute_transmit_size(transmit_data_size))) { - LOG_WARN("fail to get transmit_data_size", KR(ret), K(transmit_data_size)); - } else { - if (OB_FAIL(dml_splicer.add_pk_column("tenant_id", tenant_id_)) - || OB_FAIL(dml_splicer.add_pk_column("ls_id", ls_id_.id())) - || OB_FAIL(dml_splicer.add_pk_column("task_type", task_type)) - || OB_FAIL(dml_splicer.add_pk_column("task_id", task_id_)) - || OB_FAIL(dml_splicer.add_column("task_status", TASK_STATUS)) - || OB_FAIL(dml_splicer.add_column("priority", static_cast(ObDRTaskPriority::LOW_PRI))) - || OB_FAIL(dml_splicer.add_column("target_replica_svr_ip", target_ip)) - || OB_FAIL(dml_splicer.add_column("target_replica_svr_port", get_remove_server().get_server().get_port())) - || OB_FAIL(dml_splicer.add_column("target_paxos_replica_number", 0)) - || OB_FAIL(dml_splicer.add_column("target_replica_type", ob_replica_type_strs(get_remove_server().get_replica_type()))) - || OB_FAIL(dml_splicer.add_column("source_replica_svr_ip", src_ip)) - || OB_FAIL(dml_splicer.add_column("source_replica_svr_port", 0)) - || OB_FAIL(dml_splicer.add_column("source_paxos_replica_number", 0)) - || OB_FAIL(dml_splicer.add_column("source_replica_type", "")) - || OB_FAIL(dml_splicer.add_column("task_exec_svr_ip", dest_ip)) - || OB_FAIL(dml_splicer.add_column("task_exec_svr_port", get_dst_server().get_port())) - || OB_FAIL(dml_splicer.add_time_column("generate_time", generate_time_)) - || OB_FAIL(dml_splicer.add_time_column("schedule_time", schedule_time_)) - || OB_FAIL(dml_splicer.add_column("comment", comment_.ptr()))) { - LOG_WARN("add column failed", KR(ret)); - } - } - return ret; -} - -int64_t ObRemoveLSNonPaxosReplicaTask::get_clone_size() const -{ - return sizeof(*this); -} - -int ObRemoveLSNonPaxosReplicaTask::clone( - void *input_ptr, - ObDRTask *&output_task) const -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(nullptr == input_ptr)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret)); - } else { - ObRemoveLSNonPaxosReplicaTask *my_task = new (input_ptr) ObRemoveLSNonPaxosReplicaTask(); - if (OB_UNLIKELY(nullptr == my_task)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("fail to construct", KR(ret)); - } else if (OB_FAIL(my_task->deep_copy(*this))) { - LOG_WARN("fail to deep copy", KR(ret)); - } else { - my_task->set_remove_server(get_remove_server()); - output_task = my_task; - } - } - return ret; -} - -int ObRemoveLSNonPaxosReplicaTask::build( - const ObDRTaskKey &task_key, - const uint64_t tenant_id, - const share::ObLSID &ls_id, - const share::ObTaskId &task_id, - const int64_t schedule_time_us, - const int64_t generate_time_us, - const int64_t cluster_id, - const int64_t transmit_data_size, - const obrpc::ObAdminClearDRTaskArg::TaskType invoked_source, - const bool skip_change_member_list, - const ObDRTaskPriority priority, - const ObString &comment, - const common::ObReplicaMember &dst_server) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!dst_server.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), - K(dst_server)); - } else if (OB_FAIL(ObDRTask::build( - task_key, - tenant_id, - ls_id, - task_id, - schedule_time_us, - generate_time_us, - cluster_id, - transmit_data_size, - invoked_source, - skip_change_member_list, - priority, - comment))) { - LOG_WARN("fail to build ObDRTask", KR(ret), - K(task_key), - K(tenant_id), - K(ls_id), - K(task_id), - K(transmit_data_size), - K(invoked_source), - K(priority)); - } else { - set_remove_server(dst_server); - } - return ret; -} - -int ObRemoveLSNonPaxosReplicaTask::build_task_from_sql_result( - const sqlclient::ObMySQLResult &res) -{ - int ret = OB_SUCCESS; - uint64_t tenant_id = OB_INVALID_TENANT_ID; - int64_t ls_id = ObLSID::INVALID_LS_ID; - common::ObString task_id; - int64_t priority = 2; - common::ObString target_ip; - int64_t target_port = OB_INVALID_INDEX; - int64_t transmit_data_size = 0; - int64_t schedule_time_us = 0; - int64_t generate_time_us = 0; - common::ObString comment; - //STEP1_0: read certain members from sql result - EXTRACT_INT_FIELD_MYSQL(res, "tenant_id", tenant_id, uint64_t); - { - ObTimeZoneInfoWrap tz_info_wrap; - ObTZMapWrap tz_map_wrap; - OZ(OTTZ_MGR.get_tenant_tz(tenant_id, tz_map_wrap)); - tz_info_wrap.set_tz_info_map(tz_map_wrap.get_tz_map()); - (void)GET_COL_IGNORE_NULL(res.get_timestamp, "generate_time", tz_info_wrap.get_time_zone_info(), generate_time_us); - (void)GET_COL_IGNORE_NULL(res.get_timestamp, "schedule_time", tz_info_wrap.get_time_zone_info(), schedule_time_us); - } - (void)GET_COL_IGNORE_NULL(res.get_int, "ls_id", ls_id); - (void)GET_COL_IGNORE_NULL(res.get_varchar, "task_id", task_id); - (void)GET_COL_IGNORE_NULL(res.get_int, "priority", priority); - (void)GET_COL_IGNORE_NULL(res.get_varchar, "target_replica_svr_ip", target_ip); - (void)GET_COL_IGNORE_NULL(res.get_int, "target_replica_svr_port", target_port); - (void)GET_COL_IGNORE_NULL(res.get_varchar, "comment", comment); - //STEP2_0: make necessary members to build a task - ObDRTaskKey task_key; - common::ObAddr target_server; - rootserver::ObDRTaskPriority priority_to_set; - share::ObTaskId task_id_to_set; - ObSqlString comment_to_set; - - if (OB_FAIL(ret)) { - } else if (OB_FAIL(comment_to_set.assign(comment))) { - LOG_WARN("fai to assign a ObString to ObSqlString", KR(ret), K(comment)); - } else if (OB_FAIL(task_id_to_set.set(task_id.ptr()))) { - LOG_WARN("fail to init a task_id", KR(ret), K(task_id)); - } else if (OB_FAIL(task_key.init( - tenant_id, - ls_id, - 0/* set to 0 */, - 0/* set to 0 */, - ObDRTaskKeyType::FORMAL_DR_KEY))) { - LOG_WARN("fail to init a ObDRTaskKey", KR(ret), K(tenant_id), K(ls_id)); - } else if (false == target_server.set_ip_addr(target_ip, static_cast(target_port))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid server address", K(target_ip), K(target_port)); - } else { - //transform priority(int) -> priority_to_set(ObDRTaskPriority) - if (priority == 0) { - priority_to_set = ObDRTaskPriority::HIGH_PRI; - } else if (priority == 1) { - priority_to_set = ObDRTaskPriority::LOW_PRI; - } else { - priority_to_set = ObDRTaskPriority::MAX_PRI; - } - } - //STEP3_0: to build a task - if (OB_FAIL(ret)) { - } else if (OB_FAIL(build( - task_key, //(in used) - tenant_id, //(in used) - ObLSID(ls_id), //(in used) - task_id_to_set, //(in used) - schedule_time_us, - generate_time_us, - GCONF.cluster_id, //(not used)cluster_id - transmit_data_size, //(not used) - obrpc::ObAdminClearDRTaskArg::TaskType::AUTO,//(not used)invoked_source - true, //(not used)skip_change_member_list - priority_to_set, //(not used) - comment_to_set.ptr(), //comment - ObReplicaMember(target_server, 0)))) { //(in used)target_server - LOG_WARN("fail to build a ObRemoveLSNonPaxosReplicaTask", KR(ret)); - } else { - LOG_INFO("success to build a ObRemoveLSNonPaxosReplicaTask", KPC(this)); + LOG_INFO("success to build a ObRemoveLSReplicaTask", KPC(this)); } return ret; } @@ -2548,7 +2285,7 @@ int ObLSModifyPaxosReplicaNumberTask::log_execute_start() const LOG_WARN("fail to append to paxos_replica_number", KR(ret), K(orig_paxos_replica_number_), K(paxos_replica_number_)); } else { - ROOTSERVICE_EVENT_ADD("disaster_recovery", "start_modify_paxos_replica_number", + ROOTSERVICE_EVENT_ADD("disaster_recovery", get_log_start_str(), "tenant_id", get_tenant_id(), "ls_id", get_ls_id().id(), "task_id", get_task_id(), @@ -2569,7 +2306,7 @@ int ObLSModifyPaxosReplicaNumberTask::log_execute_result( if (OB_FAIL(build_execute_result(ret_code, ret_comment, execute_result))) { LOG_WARN("fail to build execute result", KR(ret), K(ret_code), K(ret_comment)); } else { - ROOTSERVICE_EVENT_ADD("disaster_recovery", "finish_modify_paxos_replica_number", + ROOTSERVICE_EVENT_ADD("disaster_recovery", get_log_finish_str(), "tenant_id", get_tenant_id(), "ls_id", get_ls_id().id(), "task_id", get_task_id(), diff --git a/src/rootserver/ob_disaster_recovery_task.h b/src/rootserver/ob_disaster_recovery_task.h index a6fd303c32..202fb69a57 100644 --- a/src/rootserver/ob_disaster_recovery_task.h +++ b/src/rootserver/ob_disaster_recovery_task.h @@ -37,6 +37,23 @@ class ObLSTableOperator; namespace rootserver { + +namespace drtask +{ + const static char * const REMOVE_LOCALITY_PAXOS_REPLICA = "remove redundant paxos replica according to locality"; + const static char * const REMOVE_LOCALITY_NON_PAXOS_REPLICA = "remove redundant non-paxos replica according to locality"; + const static char * const ADD_LOCALITY_PAXOS_REPLICA = "add paxos replica according to locality"; + const static char * const ADD_LOCALITY_NON_PAXOS_REPLICA = "add non-paxos replica according to locality"; + const static char * const TRANSFORM_LOCALITY_REPLICA_TYPE = "type transform according to locality"; + const static char * const MODIFY_PAXOS_REPLICA_NUMBER = "modify paxos replica number according to locality"; + const static char * const REMOVE_PERMANENT_OFFLINE_REPLICA = "remove permanent offline replica"; + const static char * const REPLICATE_REPLICA = "replicate to unit task"; + const static char * const CANCEL_MIGRATE_UNIT_WITH_PAXOS_REPLICA = "cancel migrate unit remove paxos replica"; + const static char * const CANCEL_MIGRATE_UNIT_WITH_NON_PAXOS_REPLICA = "cancel migrate unit remove non-paxos replica"; + const static char * const MIGRATE_REPLICA_DUE_TO_UNIT_GROUP_NOT_MATCH = "migrate replica due to unit group not match"; + const static char * const MIGRATE_REPLICA_DUE_TO_UNIT_NOT_MATCH = "migrate replica due to unit not match"; +}; + enum class ObDRTaskType : int64_t; enum class ObDRTaskPriority : int64_t; @@ -320,6 +337,8 @@ public: // operations of comments ObString get_comment() const { return comment_.string(); } int set_comment(const ObString comment) { return comment_.assign(comment); } + virtual const char* get_log_start_str() const = 0; + virtual const char* get_log_finish_str() const = 0; // operations of schedule_time_ int64_t get_schedule_time() const { return schedule_time_; } void set_schedule_time(const int64_t schedule_time) { schedule_time_ = schedule_time; } @@ -432,8 +451,9 @@ public: virtual int fill_dml_splicer( share::ObDMLSqlSplicer &dml_splicer) const override; + virtual const char* get_log_start_str() const override { return "start_migrate_ls_replica"; } + virtual const char* get_log_finish_str() const override { return "finish_migrate_ls_replica"; } virtual int64_t get_clone_size() const override; - virtual int clone( void *input_ptr, ObDRTask *&output_task) const override; @@ -541,8 +561,9 @@ public: virtual int fill_dml_splicer( share::ObDMLSqlSplicer &dml_splicer) const override; + virtual const char* get_log_start_str() const override { return "start_add_ls_replica"; } + virtual const char* get_log_finish_str() const override { return "finish_add_ls_replica"; } virtual int64_t get_clone_size() const override; - virtual int clone( void *input_ptr, ObDRTask *&output_task) const override; @@ -653,8 +674,9 @@ public: virtual int fill_dml_splicer( share::ObDMLSqlSplicer &dml_splicer) const override; + virtual const char* get_log_start_str() const override { return "start_type_transform_ls_replica"; } + virtual const char* get_log_finish_str() const override { return "finish_type_transform_ls_replica"; } virtual int64_t get_clone_size() const override; - virtual int clone( void *input_ptr, ObDRTask *&output_task) const override; @@ -696,15 +718,16 @@ private: int64_t paxos_replica_number_; }; -class ObRemoveLSPaxosReplicaTask : public ObDRTask +class ObRemoveLSReplicaTask : public ObDRTask { public: - ObRemoveLSPaxosReplicaTask() : ObDRTask(), + ObRemoveLSReplicaTask() : ObDRTask(), leader_(), remove_server_(), orig_paxos_replica_number_(0), - paxos_replica_number_(0) {} - virtual ~ObRemoveLSPaxosReplicaTask() {} + paxos_replica_number_(0), + replica_type_(REPLICA_TYPE_FULL) {} + virtual ~ObRemoveLSReplicaTask() {} public: int build( const ObDRTaskKey &task_key, @@ -722,9 +745,10 @@ public: const common::ObAddr &leader, const common::ObReplicaMember &remove_server, const int64_t orig_paxos_replica_number, - const int64_t paxos_replica_number); + const int64_t paxos_replica_number, + const ObReplicaType &replica_type); - // build a ObRemoveLSPaxosReplicaTask from sql result read from inner table + // build a ObRemoveLSReplicaTask from sql result read from inner table // @param [in] res, sql result read from inner table int build_task_from_sql_result(const sqlclient::ObMySQLResult &res); public: @@ -733,14 +757,17 @@ public: } virtual ObDRTaskType get_disaster_recovery_task_type() const override { - return ObDRTaskType::LS_REMOVE_PAXOS_REPLICA; + return ObReplicaTypeCheck::is_paxos_replica_V2(replica_type_) + ? ObDRTaskType::LS_REMOVE_PAXOS_REPLICA + : ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA; } virtual INHERIT_TO_STRING_KV("ObDRTask", ObDRTask, K(leader_), K(remove_server_), K(orig_paxos_replica_number_), - K(paxos_replica_number_)); + K(paxos_replica_number_), + K(replica_type_)); virtual int get_execute_transmit_size( int64_t &execute_transmit_size) const override; @@ -750,7 +777,7 @@ public: common::ObAddr &data_src, common::ObAddr &dest, common::ObAddr &offline) const override; - + virtual int log_execute_start() const override; virtual int log_execute_result(const int ret_code, const ObDRTaskRetComment &ret_comment) const override; @@ -767,8 +794,20 @@ public: virtual int fill_dml_splicer( share::ObDMLSqlSplicer &dml_splicer) const override; + virtual const char* get_log_start_str() const override + { + return ObDRTaskType::LS_REMOVE_PAXOS_REPLICA == get_disaster_recovery_task_type() + ? "start_remove_ls_paxos_replica" + : "start_remove_ls_non_paxos_replica"; + } + virtual const char* get_log_finish_str() const override + { + return ObDRTaskType::LS_REMOVE_PAXOS_REPLICA == get_disaster_recovery_task_type() + ? "finish_remove_ls_paxos_replica" + : "finish_remove_ls_non_paxos_replica"; + } + virtual int64_t get_clone_size() const override; - virtual int clone( void *input_ptr, ObDRTask *&output_task) const override; @@ -785,86 +824,15 @@ public: // operations of paxos_replica_number_ void set_paxos_replica_number(const int64_t q) { paxos_replica_number_ = q; } int64_t get_paxos_replica_number() const { return paxos_replica_number_; } + // operations of replica_type_ + void set_replica_type(const ObReplicaType &replica_type) { replica_type_ = replica_type; } + const ObReplicaType &get_replica_type() const { return replica_type_; } private: common::ObAddr leader_; common::ObReplicaMember remove_server_; int64_t orig_paxos_replica_number_; int64_t paxos_replica_number_; -}; - -class ObRemoveLSNonPaxosReplicaTask : public ObDRTask -{ -public: - ObRemoveLSNonPaxosReplicaTask() : ObDRTask(), - remove_server_() {} - virtual ~ObRemoveLSNonPaxosReplicaTask() {} -public: - int build( - const ObDRTaskKey &task_key, - const uint64_t tenant_id, - const share::ObLSID &ls_id, - const share::ObTaskId &task_id, - const int64_t schedule_time_us, - const int64_t generate_time_us, - const int64_t cluster_id, - const int64_t transmit_data_size, - const obrpc::ObAdminClearDRTaskArg::TaskType invoked_source, - const bool skip_change_member_list, - const ObDRTaskPriority priority, - const ObString &comment, - const common::ObReplicaMember &dst_server); - - // build a ObRemoveLSNonPaxosReplicaTask from sql result read from inner table - // @param [in] res, sql result read from inner table - int build_task_from_sql_result(const sqlclient::ObMySQLResult &res); -public: - virtual const common::ObAddr &get_dst_server() const override { - return remove_server_.get_server(); - } - - virtual ObDRTaskType get_disaster_recovery_task_type() const override { - return ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA; - } - - virtual INHERIT_TO_STRING_KV("ObDRTask", ObDRTask, - K(remove_server_)); - - virtual int get_execute_transmit_size( - int64_t &execute_transmit_size) const override; - - virtual int get_virtual_disaster_recovery_task_stat( - common::ObAddr &src, - common::ObAddr &data_src, - common::ObAddr &dest, - common::ObAddr &offline) const override; - - virtual int log_execute_start() const override; - - virtual int log_execute_result(const int ret_code, const ObDRTaskRetComment &ret_comment) const override; - - virtual int check_before_execute( - share::ObLSTableOperator &lst_operator, - ObDRTaskRetComment &ret_comment) const override; - - virtual int execute( - obrpc::ObSrvRpcProxy &rpc_proxy, - int &ret_code, - ObDRTaskRetComment &ret_comment) const override; - - virtual int fill_dml_splicer( - share::ObDMLSqlSplicer &dml_splicer) const override; - - virtual int64_t get_clone_size() const override; - - virtual int clone( - void *input_ptr, - ObDRTask *&output_task) const override; -public: - // operations of server_ - void set_remove_server(const common::ObReplicaMember &d) { remove_server_ = d; } - const common::ObReplicaMember &get_remove_server() const { return remove_server_; } -private: - common::ObReplicaMember remove_server_; + ObReplicaType replica_type_; }; class ObLSModifyPaxosReplicaNumberTask : public ObDRTask @@ -937,8 +905,9 @@ public: virtual int fill_dml_splicer( share::ObDMLSqlSplicer &dml_splicer) const override; + virtual const char* get_log_start_str() const override { return "start_modify_paxos_replica_number"; } + virtual const char* get_log_finish_str() const override { return "finish_modify_paxos_replica_number"; } virtual int64_t get_clone_size() const override; - virtual int clone( void *input_ptr, ObDRTask *&output_task) const override; diff --git a/src/rootserver/ob_disaster_recovery_task_mgr.cpp b/src/rootserver/ob_disaster_recovery_task_mgr.cpp index aa54f08745..787f7af40d 100644 --- a/src/rootserver/ob_disaster_recovery_task_mgr.cpp +++ b/src/rootserver/ob_disaster_recovery_task_mgr.cpp @@ -711,7 +711,7 @@ void ObDRTaskMgr::run3() if (!loaded_ && OB_FAIL(load_task_to_schedule_list_())) { LOG_WARN("fail to load task infos into schedule list, will retry until success", KR(ret)); } else { - update_last_run_timestamp(); + update_last_run_timestamp(); common::ObArenaAllocator allocator; ObDRTask *task = nullptr; @@ -1152,20 +1152,13 @@ int ObDRTaskMgr::load_task_info_( LOG_WARN("fail to load ObLSTypeTransformTask into schedule list", KR(ret)); } } - } else if (task_type == common::ObString("REMOVE PAXOS REPLICA")) { - SMART_VAR(ObRemoveLSPaxosReplicaTask, tmp_task) { + } else if (0 == task_type.case_compare(ob_disaster_recovery_task_type_strs(ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA)) + || 0 == task_type.case_compare(ob_disaster_recovery_task_type_strs(ObDRTaskType::LS_REMOVE_PAXOS_REPLICA))) { + SMART_VAR(ObRemoveLSReplicaTask, tmp_task) { if (OB_FAIL(tmp_task.build_task_from_sql_result(res))) { - LOG_WARN("fail to build ObRemoveLSPaxosReplicaTask from res", KR(ret)); + LOG_WARN("fail to build ObRemoveLSReplicaTask from res", KR(ret)); } else if (OB_FAIL(queues_[priority].push_task_in_schedule_list(tmp_task))) { - LOG_WARN("fail to load ObRemoveLSPaxosReplicaTask into schedule list", KR(ret)); - } - } - } else if (task_type == common::ObString("REMOVE NON PAXOS REPLICA")) { - SMART_VAR(ObRemoveLSNonPaxosReplicaTask, tmp_task) { - if (OB_FAIL(tmp_task.build_task_from_sql_result(res))) { - LOG_WARN("fail to build ObRemoveLSNonPaxosReplicaTask from res", KR(ret)); - } else if (OB_FAIL(queues_[priority].push_task_in_schedule_list(tmp_task))) { - LOG_WARN("fail to load ObRemoveLSNonPaxosReplicaTask into schedule list", KR(ret)); + LOG_WARN("fail to load ObRemoveLSReplicaTask into schedule list", KR(ret)); } } } else if (task_type == common::ObString("MODIFY PAXOS REPLICA NUMBER")) { diff --git a/src/rootserver/ob_disaster_recovery_worker.cpp b/src/rootserver/ob_disaster_recovery_worker.cpp index 8bb3270918..8f8049df42 100644 --- a/src/rootserver/ob_disaster_recovery_worker.cpp +++ b/src/rootserver/ob_disaster_recovery_worker.cpp @@ -190,7 +190,7 @@ int ObDRWorker::LocalityAlignment::locate_zone_locality( ReplicaDescArray *my_desc_array = nullptr; int tmp_ret = locality_map_.get_refactored(zone, my_desc_array); if (OB_SUCCESS == tmp_ret) { - if (OB_UNLIKELY(nullptr == my_desc_array)) { + if (OB_ISNULL(my_desc_array)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("my_desc_array ptr is null", KR(ret)); } else { @@ -210,7 +210,7 @@ int ObDRWorker::LocalityAlignment::locate_zone_locality( } else { replica_desc_array = my_desc_array; } - if (OB_FAIL(ret) && nullptr != my_desc_array) { + if (OB_FAIL(ret) && OB_NOT_NULL(my_desc_array)) { my_desc_array->~ReplicaDescArray(); } } else { @@ -275,7 +275,7 @@ int ObDRWorker::LocalityAlignment::build_locality_stat_map() int ret = OB_SUCCESS; uint64_t tenant_id = OB_INVALID_ID; share::ObLSID ls_id; - if (OB_UNLIKELY(nullptr == unit_mgr_)) { + if (OB_ISNULL(unit_mgr_)) { ret = OB_NOT_INIT; LOG_WARN("LocalityAlignment not init", KR(ret), KP(unit_mgr_)); } else if (OB_FAIL(dr_ls_info_.get_ls_id(tenant_id, ls_id))) { @@ -298,10 +298,10 @@ int ObDRWorker::LocalityAlignment::build_locality_stat_map() // readonly locality const ObIArray &readonly_locality = zone_locality.replica_attr_set_.get_readonly_replica_attr_array(); - + if (OB_FAIL(locate_zone_locality(zone, zone_replica_desc))) { LOG_WARN("fail to locate zone locality", KR(ret), K(zone)); - } else if (OB_UNLIKELY(nullptr == zone_replica_desc)) { + } else if (OB_ISNULL(zone_replica_desc)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("fail to locate zone locality", KR(ret), K(zone)); } else { @@ -341,31 +341,23 @@ int ObDRWorker::LocalityAlignment::build_locality_stat_map() LOG_WARN("fail to push back", KR(ret)); } } - // readonly replica, normal - for (int64_t j = 0; OB_SUCC(ret) && j < readonly_locality.count(); ++j) { - const ReplicaAttr &replica_attr = readonly_locality.at(j); - if (replica_attr.num_ <= 0) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("replica num unexpected", KR(ret), K(zone), K(readonly_locality)); - } else if (ObLocalityDistribution::ALL_SERVER_CNT == replica_attr.num_) { - // bypass, postpone processing - } else if (OB_FAIL(zone_replica_desc->push_back(ReplicaDesc(REPLICA_TYPE_READONLY, - replica_attr.memstore_percent_, - replica_attr.num_)))) { - LOG_WARN("fail to push back", KR(ret)); - } - } // readonly replica, all_server - for (int64_t j = 0; OB_SUCC(ret) && j < readonly_locality.count(); ++j) { - const ReplicaAttr &replica_attr = readonly_locality.at(j); - if (replica_attr.num_ <= 0) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("replica num unexpected", KR(ret), K(zone), K(readonly_locality)); - } else if (ObLocalityDistribution::ALL_SERVER_CNT != replica_attr.num_) { - // bypass, processed before - } else { - zone_replica_desc->is_readonly_all_server_ = true; - zone_replica_desc->readonly_memstore_percent_ = replica_attr.memstore_percent_; + if (dr_ls_info_.is_duplicate_ls()) { + // duplicate ls, should has R-replica all_server + zone_replica_desc->is_readonly_all_server_ = true; + zone_replica_desc->readonly_memstore_percent_ = 100; + } else { + // readonly replica, normal + for (int64_t j = 0; OB_SUCC(ret) && j < readonly_locality.count(); ++j) { + const ReplicaAttr &replica_attr = readonly_locality.at(j); + if (0 >= replica_attr.num_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("replica num unexpected", KR(ret), K(zone), K(readonly_locality)); + } else if (OB_FAIL(zone_replica_desc->push_back(ReplicaDesc(REPLICA_TYPE_READONLY, + replica_attr.memstore_percent_, + replica_attr.num_)))) { + LOG_WARN("fail to push back", KR(ret)); + } } } } @@ -394,10 +386,10 @@ int ObDRWorker::LocalityAlignment::build_replica_stat_map() unit_stat_info, unit_in_group_stat_info))) { LOG_WARN("fail to get replica stat", KR(ret)); - } else if (OB_UNLIKELY(nullptr == replica - || nullptr == server_stat_info - || nullptr == unit_stat_info - || nullptr == unit_in_group_stat_info)) { + } else if (OB_ISNULL(replica) + || OB_ISNULL(server_stat_info) + || OB_ISNULL(unit_stat_info) + || OB_ISNULL(unit_in_group_stat_info)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replica related ptrs are null", KR(ret), KP(replica), @@ -446,7 +438,7 @@ int ObDRWorker::LocalityAlignment::try_remove_match( if (OB_HASH_NOT_EXIST == tmp_ret) { // zone not exist, not match } else if (OB_SUCCESS == tmp_ret) { - if (OB_UNLIKELY(nullptr == zone_replica_desc)) { + if (OB_ISNULL(zone_replica_desc)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("zone replica desc ptr is null", KR(ret), K(zone)); } else { @@ -507,23 +499,8 @@ int ObDRWorker::LocalityAlignment::prepare_generate_locality_task() if (OB_UNLIKELY(!replica_stat_desc.is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replica stat desc unexpected", KR(ret)); - } else { - const ObReplicaType replica_type = replica_stat_desc.replica_->get_replica_type(); - if (ObReplicaTypeCheck::is_paxos_replica_V2(replica_type)) { - if (!replica_stat_desc.replica_->get_in_member_list()) { - if (OB_FAIL(replica_stat_map_.remove(i))) { - LOG_WARN("fail to remove task", KR(ret)); - } - } else { - if (OB_FAIL(try_remove_match(replica_stat_desc, i))) { - LOG_WARN("fail to try remove match", KR(ret)); - } - } - } else { - if (OB_FAIL(try_remove_match(replica_stat_desc, i))) { - LOG_WARN("fail to try remove match", KR(ret)); - } - } + } else if (OB_FAIL(try_remove_match(replica_stat_desc, i))) { + LOG_WARN("fail to try remove match", KR(ret)); } } return ret; @@ -539,12 +516,12 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_full_replica( if (REPLICA_TYPE_FULL != replica.get_replica_type()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replica type unexpected", KR(ret), K(replica)); - } else { + } else { ReplicaDescArray *zone_replica_desc = nullptr; int tmp_ret = locality_map_.get_refactored(zone, zone_replica_desc); if (OB_HASH_NOT_EXIST == tmp_ret) { - if (OB_FAIL(generate_remove_paxos_task(replica_stat_desc))) { - LOG_WARN("fail to generate remove paxos task", KR(ret)); + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret)); } else if (OB_FAIL(replica_stat_map_.remove(index))) { LOG_WARN("fail to remove", KR(ret)); } @@ -587,8 +564,8 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_full_replica( LOG_WARN("fail to remove", KR(ret), K(index), K(replica), K(replica_stat_map_)); } } else { - if (OB_FAIL(generate_remove_paxos_task(replica_stat_desc))) { - LOG_WARN("fail to generate remove paxos task", KR(ret)); + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret)); } else if (OB_FAIL(replica_stat_map_.remove(index))) { LOG_WARN("fail to remove", KR(ret)); } @@ -611,12 +588,12 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_logonly_replic if (REPLICA_TYPE_LOGONLY != replica.get_replica_type()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replica type unexpected", KR(ret), K(replica)); - } else { + } else { ReplicaDescArray *zone_replica_desc = nullptr; int tmp_ret = locality_map_.get_refactored(zone, zone_replica_desc); if (OB_HASH_NOT_EXIST == tmp_ret) { - if (OB_FAIL(generate_remove_paxos_task(replica_stat_desc))) { - LOG_WARN("fail to generate remove paxos task", KR(ret)); + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret)); } else if (OB_FAIL(replica_stat_map_.remove(index))) { LOG_WARN("fail to remove", KR(ret)); } @@ -633,8 +610,8 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_logonly_replic } // normal routine if (OB_SUCC(ret)) { - if (OB_FAIL(generate_remove_paxos_task(replica_stat_desc))) { - LOG_WARN("fail to generate remove paxos task", KR(ret)); + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret)); } else if (OB_FAIL(replica_stat_map_.remove(index))) { LOG_WARN("fail to remove", KR(ret)); } @@ -657,12 +634,12 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_encryption_log if (REPLICA_TYPE_ENCRYPTION_LOGONLY != replica.get_replica_type()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replica type unexpected", KR(ret), K(replica)); - } else { + } else { ReplicaDescArray *zone_replica_desc = nullptr; int tmp_ret = locality_map_.get_refactored(zone, zone_replica_desc); if (OB_HASH_NOT_EXIST == tmp_ret) { - if (OB_FAIL(generate_remove_paxos_task(replica_stat_desc))) { - LOG_WARN("fail to generate remove paxos task", KR(ret)); + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret)); } else if (OB_FAIL(replica_stat_map_.remove(index))) { LOG_WARN("fail to remove", KR(ret)); } @@ -679,8 +656,8 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_encryption_log } // normal routine if (OB_SUCC(ret)) { - if (OB_FAIL(generate_remove_paxos_task(replica_stat_desc))) { - LOG_WARN("fail to generate remove paxos task", KR(ret)); + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret)); } else if (OB_FAIL(replica_stat_map_.remove(index))) { LOG_WARN("fail to remove", KR(ret)); } @@ -703,12 +680,12 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_readonly_repli if (REPLICA_TYPE_READONLY != replica.get_replica_type()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replica type unexpected", KR(ret), K(replica)); - } else { + } else { ReplicaDescArray *zone_replica_desc = nullptr; int tmp_ret = locality_map_.get_refactored(zone, zone_replica_desc); if (OB_HASH_NOT_EXIST == tmp_ret) { - if (OB_FAIL(generate_remove_nonpaxos_task(replica_stat_desc))) { - LOG_WARN("fail to generate remove paxos task", KR(ret)); + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret)); } else if (OB_FAIL(replica_stat_map_.remove(index))) { LOG_WARN("fail to remove", KR(ret)); } @@ -721,17 +698,30 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_readonly_repli ret = OB_ERR_UNEXPECTED; LOG_WARN("replica type unexpected", KR(ret), K(dr_ls_info_)); } else if (REPLICA_TYPE_FULL == replica_desc.replica_type_) { - if (OB_FAIL(generate_type_transform_task( - replica_stat_desc, - replica_desc.replica_type_, - replica_desc.memstore_percent_))) { - LOG_WARN("fail to generate type transform task", KR(ret), K(replica_stat_desc)); - } else if (OB_FAIL(zone_replica_desc->remove(i))) { - LOG_WARN("fail to remove", KR(ret), K(i), K(replica), K(zone_replica_desc)); - } else if (OB_FAIL(replica_stat_map_.remove(index))) { - LOG_WARN("fail to remove", KR(ret), K(index), K(replica), K(replica_stat_map_)); + if (OB_ISNULL(replica_stat_desc.unit_stat_info_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(replica_stat_desc)); } else { - found = true; + const share::ObUnitInfo &unit_info = replica_stat_desc.unit_stat_info_->get_unit_info(); + bool server_is_active = false; + if (!unit_info.unit_.is_active_status()) { + FLOG_INFO("unit status is not normal, can not generate type transform task", K(unit_info)); + } else if (OB_FAIL(SVR_TRACER.check_server_active(unit_info.unit_.server_, server_is_active))) { + LOG_WARN("fail to check server is active", KR(ret), K(unit_info)); + } else if (!server_is_active) { + FLOG_INFO("server status is not active, can not generate type transform task", K(unit_info)); + } else if (OB_FAIL(generate_type_transform_task( + replica_stat_desc, + replica_desc.replica_type_, + replica_desc.memstore_percent_))) { + LOG_WARN("fail to generate type transform task", KR(ret), K(replica_stat_desc)); + } else if (OB_FAIL(zone_replica_desc->remove(i))) { + LOG_WARN("fail to remove", KR(ret), K(i), K(replica), K(zone_replica_desc)); + } else if (OB_FAIL(replica_stat_map_.remove(index))) { + LOG_WARN("fail to remove", KR(ret), K(index), K(replica), K(replica_stat_map_)); + } else { + found = true; + } } } } @@ -745,8 +735,8 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task_from_readonly_repli LOG_WARN("fail to remove", KR(ret), K(index), K(replica), K(replica_stat_map_)); } } else { - if (OB_FAIL(generate_remove_nonpaxos_task(replica_stat_desc))) { - LOG_WARN("fail to generate remove paxos task", KR(ret)); + if (OB_FAIL(generate_remove_replica_task(replica_stat_desc))) { + LOG_WARN("fail to generate remove replica task", KR(ret)); } else if (OB_FAIL(replica_stat_map_.remove(index))) { LOG_WARN("fail to remove", KR(ret)); } @@ -765,7 +755,7 @@ int ObDRWorker::LocalityAlignment::try_generate_locality_task_from_locality_map( LocalityMap::iterator iter = locality_map_.begin(); for (; iter != locality_map_.end(); ++iter) { ReplicaDescArray *replica_desc_array = iter->second; - if (OB_UNLIKELY(nullptr == replica_desc_array)) { + if (OB_ISNULL(replica_desc_array)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("zone locality ptr is null", KR(ret), "zone", iter->first); } else { @@ -816,7 +806,7 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task() if (OB_UNLIKELY(!replica_stat_desc.is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replica stat desc unexpected", KR(ret)); - } else if (OB_UNLIKELY(nullptr == replica)) { + } else if (OB_ISNULL(replica)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replica ptr is null", KR(ret)); } else if (REPLICA_TYPE_FULL == replica->get_replica_type()) { @@ -845,7 +835,7 @@ int ObDRWorker::LocalityAlignment::do_generate_locality_task() replica_stat_desc, *replica, i))) { - LOG_WARN("fail to generate locality task from logonly replica", KR(ret)); + LOG_WARN("fail to generate locality task from readonly replica", KR(ret)); } } else { ret = OB_ERR_UNEXPECTED; @@ -884,7 +874,7 @@ int ObDRWorker::LocalityAlignment::generate_locality_task() return ret; } -int ObDRWorker::LocalityAlignment::generate_remove_paxos_task( +int ObDRWorker::LocalityAlignment::generate_remove_replica_task( ReplicaStatDesc &replica_stat_desc) { int ret = OB_SUCCESS; @@ -893,15 +883,15 @@ int ObDRWorker::LocalityAlignment::generate_remove_paxos_task( LOG_WARN("invalid argument", KR(ret), K(replica_stat_desc)); } else { void *raw_ptr = nullptr; - RemovePaxosLATask *task = nullptr; + RemoveReplicaLATask *task = nullptr; ObLSReplica *replica = replica_stat_desc.replica_; - if (OB_UNLIKELY(nullptr == replica)) { + if (OB_ISNULL(replica)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("replica ptr is null", KR(ret)); - } else if (nullptr == (raw_ptr = allocator_.alloc(sizeof(RemovePaxosLATask)))) { + LOG_WARN("replica ptr is null", KR(ret), KP(replica)); + } else if (nullptr == (raw_ptr = allocator_.alloc(sizeof(RemoveReplicaLATask)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc memory", KR(ret)); - } else if (nullptr == (task = new (raw_ptr) RemovePaxosLATask())) { + } else if (nullptr == (task = new (raw_ptr) RemoveReplicaLATask())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("construct task failed", KR(ret)); } else { @@ -909,54 +899,17 @@ int ObDRWorker::LocalityAlignment::generate_remove_paxos_task( task->replica_type_ = replica->get_replica_type(); task->memstore_percent_ = replica->get_memstore_percent(); task->member_time_us_ = replica->get_member_time_us(); + task->orig_paxos_replica_number_ = replica->get_paxos_replica_number(); + task->paxos_replica_number_ = replica->get_paxos_replica_number(); if (OB_FAIL(task_array_.push_back(task))) { - LOG_WARN("fail to push back", KR(ret)); + LOG_WARN("fail to push back", KR(ret), KPC(task)); } else { - LOG_INFO("success to push a remove paxos task to task_array", KR(ret), KPC(task)); + LOG_INFO("success to push a remove replica task to task_array", KR(ret), KPC(task)); } } // btw: no need to free memory when failed for arena, just destruct - if (OB_FAIL(ret) && nullptr != task) { - task->~RemovePaxosLATask(); - } - } - return ret; -} - -int ObDRWorker::LocalityAlignment::generate_remove_nonpaxos_task( - ReplicaStatDesc &replica_stat_desc) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!replica_stat_desc.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), K(replica_stat_desc)); - } else { - void *raw_ptr = nullptr; - RemoveNonPaxosLATask *task = nullptr; - ObLSReplica *replica = replica_stat_desc.replica_; - if (OB_UNLIKELY(nullptr == replica)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("replica ptr is null", KR(ret)); - } else if (nullptr == (raw_ptr = allocator_.alloc(sizeof(RemoveNonPaxosLATask)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to alloc memory", KR(ret)); - } else if (nullptr == (task = new (raw_ptr) RemoveNonPaxosLATask())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("construct task failed", KR(ret)); - } else { - task->remove_server_ = replica->get_server(); - task->replica_type_ = replica->get_replica_type(); - task->memstore_percent_ = replica->get_memstore_percent(); - task->member_time_us_ = replica->get_member_time_us(); - if (OB_FAIL(task_array_.push_back(task))) { - LOG_WARN("fail to push back", KR(ret)); - } else { - LOG_INFO("success to push a remove non paxos task to task_array", KR(ret), KPC(task)); - } - } - // btw: no need to free memory when failed for arena, just destruct - if (OB_FAIL(ret) && nullptr != task) { - task->~RemoveNonPaxosLATask(); + if (OB_FAIL(ret) && OB_NOT_NULL(task)) { + task->~RemoveReplicaLATask(); } } return ret; @@ -1142,7 +1095,7 @@ int ObDRWorker::LocalityAlignment::init_unit_set( return ret; } -int ObDRWorker::LocalityAlignment::try_review_remove_paxos_task( +int ObDRWorker::LocalityAlignment::try_review_remove_replica_task( UnitProvider &unit_provider, LATask *this_task, const LATask *&output_task, @@ -1150,10 +1103,16 @@ int ObDRWorker::LocalityAlignment::try_review_remove_paxos_task( { int ret = OB_SUCCESS; UNUSED(unit_provider); - RemovePaxosLATask *my_task = reinterpret_cast(this_task); - if (OB_UNLIKELY(nullptr == this_task || nullptr == my_task)) { + RemoveReplicaLATask *my_task = reinterpret_cast(this_task); + if (OB_ISNULL(this_task) || OB_ISNULL(my_task)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), KP(this_task), KP(my_task)); + } else if (REPLICA_TYPE_FULL != my_task->replica_type_) { + // no need to check when remove non-paxos replica + my_task->orig_paxos_replica_number_ = curr_paxos_replica_number_; + my_task->paxos_replica_number_ = curr_paxos_replica_number_; + output_task = my_task; + found = true; } else { found = false; int64_t new_paxos_replica_number = 0; @@ -1177,25 +1136,6 @@ int ObDRWorker::LocalityAlignment::try_review_remove_paxos_task( return ret; } -int ObDRWorker::LocalityAlignment::try_review_remove_nonpaxos_task( - UnitProvider &unit_provider, - LATask *this_task, - const LATask *&output_task, - bool &found) -{ - int ret = OB_SUCCESS; - UNUSED(unit_provider); - RemoveNonPaxosLATask *my_task = reinterpret_cast(this_task); - if (OB_UNLIKELY(nullptr == this_task || nullptr == my_task)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), KP(this_task), KP(my_task)); - } else { - output_task = my_task; - found = true; - } - return ret; -} - int ObDRWorker::LocalityAlignment::try_review_add_replica_task( UnitProvider &unit_provider, LATask *this_task, @@ -1364,31 +1304,21 @@ int ObDRWorker::LocalityAlignment::try_get_normal_locality_alignment_task( int64_t index = 0; for (index = task_idx_; !found && OB_SUCC(ret) && index < task_array_.count(); ++index) { LATask *this_task = task_array_.at(index); - if (OB_UNLIKELY(nullptr == this_task)) { + if (OB_ISNULL(this_task)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("this task ptr is null", KR(ret)); } else { switch (this_task->get_task_type()) { case RemovePaxos: - if (OB_FAIL(try_review_remove_paxos_task( - unit_provider, - this_task, - task, - found))) { - LOG_WARN("fail to try review remove paxos task", KR(ret), KPC(this_task), K(found)); - } else { - LOG_INFO("success to try review remove paxos task", KR(ret), KPC(this_task), K(found)); - } - break; case RemoveNonPaxos: - if (OB_FAIL(try_review_remove_nonpaxos_task( + if (OB_FAIL(try_review_remove_replica_task( unit_provider, this_task, task, found))) { - LOG_WARN("fail to try review remove nonpaxos task", KR(ret), KPC(this_task), K(found)); + LOG_WARN("fail to try review remove replica task", KR(ret), KPC(this_task), K(found)); } else { - LOG_INFO("success to try review remove nonpaxos task", KR(ret), KPC(this_task), K(found)); + LOG_INFO("success to try review remove replica task", KR(ret), KPC(this_task), K(found)); } break; case AddReplica: @@ -1534,6 +1464,52 @@ int ObDRWorker::UnitProvider::init( return ret; } +int ObDRWorker::UnitProvider::inner_get_valid_unit_( + const common::ObZone &zone, + const common::ObArray &unit_array, + share::ObUnitInfo &output_unit_info, + bool &found) +{ + int ret = OB_SUCCESS; + output_unit_info.reset(); + found = false; + if (OB_UNLIKELY(!inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret)); + } else if (OB_ISNULL(unit_mgr_) || OB_UNLIKELY(0 >= unit_array.count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("unit mgr ptr is null", KR(ret), KP(unit_mgr_), "unit_count", unit_array.count()); + } else { + bool server_is_active = false; + for (int64_t i = 0; OB_SUCC(ret) && i < unit_array.count(); ++i) { + server_is_active = false; + const share::ObUnitInfo &unit_info = unit_array.at(i); + const uint64_t unit_id = unit_info.unit_.unit_id_; + int hash_ret = OB_SUCCESS; + if (unit_info.unit_.zone_ != zone) { + // bypass, because we do not support operation between different zones + } else if (OB_FAIL(SVR_TRACER.check_server_active(unit_info.unit_.server_, server_is_active))) { + LOG_WARN("fail to check server active", KR(ret), "server", unit_info.unit_.server_); + } else if (!server_is_active) { + FLOG_INFO("server is not active", "server", unit_info.unit_.server_, K(server_is_active)); + } else if (!unit_info.unit_.is_active_status()) { + FLOG_INFO("unit status is not normal", K(unit_info)); + } else if (OB_HASH_EXIST == (hash_ret = unit_set_.exist_refactored(unit_id))) { + FLOG_INFO("unit existed", K(unit_id)); + } else if (OB_HASH_NOT_EXIST != hash_ret) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("exist refactored failed", KR(ret), KR(hash_ret)); + } else if (OB_FAIL(output_unit_info.assign(unit_info))) { + LOG_WARN("fail to assign unit info", KR(ret), K(unit_info)); + } else { + found = true; + break; + } + } + } + return ret; +} + int ObDRWorker::UnitProvider::get_unit( const common::ObZone &zone, const uint64_t unit_group_id, @@ -1543,76 +1519,29 @@ int ObDRWorker::UnitProvider::get_unit( if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret)); - } else if (OB_ISNULL(unit_mgr_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unit_mgr is null", KR(ret), KP(unit_mgr_)); } else { common::ObArray unit_array; bool found = false; + // 1. if unit_group_id is valid, try get valid unit in this unit group if (unit_group_id > 0) { if (OB_FAIL(unit_mgr_->get_unit_group(tenant_id_, unit_group_id, unit_array))) { LOG_WARN("fail to get unit group", KR(ret), K(tenant_id_), K(unit_group_id)); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < unit_array.count(); ++i) { - bool is_active = false; - const share::ObUnitInfo &this_info = unit_array.at(i); - const uint64_t unit_id = this_info.unit_.unit_id_; - int hash_ret = OB_SUCCESS; - if (this_info.unit_.zone_ != zone) { - // bypass, because we only support migrate in same zone - } else if (OB_FAIL(SVR_TRACER.check_server_active(this_info.unit_.server_, is_active))) { - LOG_WARN("fail to check server active", KR(ret), "server", this_info.unit_.server_); - } else if (!is_active) { - LOG_INFO("server is not active", "server", this_info.unit_.server_, K(is_active)); - break; // server not active - } else if (OB_HASH_EXIST == (hash_ret = unit_set_.exist_refactored(unit_id))) { - LOG_INFO("unit existed", K(unit_id)); - break; - } else if (OB_HASH_NOT_EXIST != hash_ret) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("exist refactored failed", KR(ret), KR(hash_ret)); - } else if (OB_FAIL(unit_info.assign(this_info))) { - LOG_WARN("fail to assign unit info", KR(ret)); - } else { - found = true; - break; - } - } + } else if (OB_FAIL(inner_get_valid_unit_(zone, unit_array, unit_info, found))) { + LOG_WARN("fail to get valid unit from certain unit group", KR(ret), K(zone), K(unit_array)); } } + // 2. if unit_group_id = 0 or no valid unit foudn in certain unit group, try get from all units if (OB_SUCC(ret) && !found) { unit_array.reset(); if (OB_FAIL(unit_mgr_->get_all_unit_infos_by_tenant(tenant_id_, unit_array))) { LOG_WARN("fail to get ll unit infos by tenant", KR(ret), K(tenant_id_)); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < unit_array.count(); ++i) { - bool is_active = false; - const share::ObUnitInfo &this_info = unit_array.at(i); - const uint64_t unit_id = this_info.unit_.unit_id_; - int hash_ret = OB_SUCCESS; - if (this_info.unit_.zone_ != zone) { - // bypass, because only support migrate in same zone - } else if (OB_FAIL(SVR_TRACER.check_server_active(this_info.unit_.server_, is_active))) { - LOG_WARN("fail to check server active", KR(ret), "server", this_info.unit_.server_); - } else if (!is_active) { - LOG_INFO("server is not active", "server", this_info.unit_.server_, K(is_active)); - } else if (OB_HASH_EXIST == (hash_ret = unit_set_.exist_refactored(unit_id))) { - LOG_INFO("unit existed", K(unit_id)); - } else if (OB_HASH_NOT_EXIST != hash_ret) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("exist refactored failed", KR(ret), KR(hash_ret)); - } else if (OB_FAIL(unit_info.assign(this_info))) { - LOG_WARN("fail to assign unit info", KR(ret)); - } else { - found = true; - break; - } - } + } else if (OB_FAIL(inner_get_valid_unit_(zone, unit_array, unit_info, found))) { + LOG_WARN("fail to get valid unit from all units in tenant", KR(ret), K(zone), K(unit_array)); } } if (OB_SUCC(ret) && !found) { ret = OB_ITER_END; - LOG_WARN("fail to get valid unit", KR(ret), K(found)); + LOG_WARN("fail to get valid unit", KR(ret), K(zone), K(found)); } } return ret; @@ -2259,30 +2188,34 @@ int ObDRWorker::check_can_generate_task( return ret; } -int ObDRWorker::construct_extra_infos_to_build_remove_paxos_replica_task( +int ObDRWorker::construct_extra_infos_to_build_remove_replica_task( const DRLSInfo &dr_ls_info, share::ObTaskId &task_id, int64_t &new_paxos_replica_number, int64_t &old_paxos_replica_number, - common::ObAddr &leader_addr) + common::ObAddr &leader_addr, + const ObReplicaType &replica_type) { int ret = OB_SUCCESS; bool found_new_paxos_replica_number = false; + bool is_paxos_replica = ObReplicaTypeCheck::is_paxos_replica_V2(replica_type); if (FALSE_IT(task_id.init(self_addr_))) { - } else if (OB_FAIL(generate_disaster_recovery_paxos_replica_number( + } else if (is_paxos_replica + && OB_FAIL(generate_disaster_recovery_paxos_replica_number( dr_ls_info, dr_ls_info.get_paxos_replica_number(), dr_ls_info.get_schema_replica_cnt(), MEMBER_CHANGE_SUB, new_paxos_replica_number, found_new_paxos_replica_number))) { - LOG_WARN("fail to generate disaster recovery paxos_replica_number", KR(ret), K(found_new_paxos_replica_number)); - } else if (!found_new_paxos_replica_number) { + LOG_WARN("fail to generate disaster recovery paxos_replica_number", KR(ret), K(found_new_paxos_replica_number)); + } else if (is_paxos_replica && !found_new_paxos_replica_number) { LOG_WARN("paxos_replica_number not found", K(dr_ls_info)); } else if (OB_FAIL(dr_ls_info.get_leader(leader_addr))) { LOG_WARN("fail to get leader", KR(ret)); } else { old_paxos_replica_number = dr_ls_info.get_paxos_replica_number(); + new_paxos_replica_number = is_paxos_replica ? new_paxos_replica_number : old_paxos_replica_number; } return ret; } @@ -2296,14 +2229,15 @@ int ObDRWorker::generate_remove_permanent_offline_replicas_and_push_into_task_ma const ObReplicaMember &remove_member, const int64_t &old_paxos_replica_number, const int64_t &new_paxos_replica_number, - int64_t &acc_dr_task) + int64_t &acc_dr_task, + const ObReplicaType &replica_type) { int ret = OB_SUCCESS; - ObRemoveLSPaxosReplicaTask remove_member_task; + ObRemoveLSReplicaTask remove_replica_task; if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret)); - } else if (OB_FAIL(remove_member_task.build( + } else if (OB_FAIL(remove_replica_task.build( task_key, tenant_id, ls_id, @@ -2315,14 +2249,16 @@ int ObDRWorker::generate_remove_permanent_offline_replicas_and_push_into_task_ma obrpc::ObAdminClearDRTaskArg::TaskType::AUTO, false/*skip change member list*/, ObDRTaskPriority::HIGH_PRI, - "remove permanent offline replica", + ObString(drtask::REMOVE_PERMANENT_OFFLINE_REPLICA), leader_addr, remove_member, old_paxos_replica_number, - new_paxos_replica_number))) { - LOG_WARN("fail to build remove member task", KR(ret)); - } else if (OB_FAIL(disaster_recovery_task_mgr_->add_task(remove_member_task))) { - LOG_WARN("fail to add task", KR(ret), K(remove_member_task)); + new_paxos_replica_number, + replica_type))) { + LOG_WARN("fail to build remove member task", KR(ret), K(task_key), K(tenant_id), K(ls_id), K(leader_addr), + K(remove_member), K(old_paxos_replica_number), K(new_paxos_replica_number), K(replica_type)); + } else if (OB_FAIL(disaster_recovery_task_mgr_->add_task(remove_replica_task))) { + LOG_WARN("fail to add task", KR(ret), K(remove_replica_task)); } else { acc_dr_task++; } @@ -2337,6 +2273,7 @@ int ObDRWorker::try_remove_permanent_offline_replicas( int ret = OB_SUCCESS; common::ObAddr leader; common::ObMemberList member_list; + GlobalLearnerList learner_list; uint64_t tenant_id = OB_INVALID_TENANT_ID; share::ObLSID ls_id; if (OB_UNLIKELY(!inited_)) { @@ -2346,7 +2283,7 @@ int ObDRWorker::try_remove_permanent_offline_replicas( LOG_WARN("has no leader, maybe not report yet", KR(ret), K(dr_ls_info)); } else if (dr_ls_info.get_paxos_replica_number() <= 0) { LOG_WARN("paxos_replica_number is invalid, maybe not report yet", KR(ret), K(dr_ls_info)); - } else if (OB_FAIL(dr_ls_info.get_leader_and_member_list(leader, member_list))) { + } else if (OB_FAIL(dr_ls_info.get_leader_and_member_list(leader, member_list, learner_list))) { LOG_WARN("fail to get leader and member list", KR(ret), K(dr_ls_info)); } else if (OB_UNLIKELY(0 >= member_list.get_member_number())) { ret = OB_ERR_UNEXPECTED; @@ -2371,6 +2308,24 @@ int ObDRWorker::try_remove_permanent_offline_replicas( K(dr_ls_info), K(only_for_display), K(replica_type), K(member_to_remove), K(acc_dr_task)); } } + // try generate permanent offline task for readonly replicas + for (int64_t index = 0; OB_SUCC(ret) && index < learner_list.get_member_number(); ++index) { + ObMember learner_to_remove; + common::ObReplicaType replica_type = REPLICA_TYPE_READONLY; + if (OB_FAIL(learner_list.get_member_by_index(index, learner_to_remove))) { + LOG_WARN("fail to get learner by index", KR(ret), K(index)); + } else if (OB_FAIL(do_single_replica_permanent_offline_( + tenant_id, + ls_id, + dr_ls_info, + only_for_display, + replica_type, + learner_to_remove, + acc_dr_task))) { + LOG_WARN("fail to do single replica permanent offline task for readonly replica", KR(ret), K(tenant_id), + K(ls_id), K(dr_ls_info), K(only_for_display), K(replica_type), K(learner_to_remove), K(acc_dr_task)); + } + } } FLOG_INFO("finish try remove permanent offline replica", KR(ret), K(tenant_id), K(ls_id), K(acc_dr_task)); return ret; @@ -2398,7 +2353,7 @@ int ObDRWorker::do_single_replica_permanent_offline_( } else if (OB_FAIL(SVR_TRACER.check_server_permanent_offline(member_to_remove.get_server(), is_offline))) { LOG_WARN("fail to check server permanent offline", KR(ret), K(member_to_remove)); } else if (is_offline) { - FLOG_INFO("found ls replica need to permanent offline", K(member_to_remove)); + FLOG_INFO("found ls replica need to permanent offline", K(tenant_id), K(ls_id), K(member_to_remove), K(replica_type), K(dr_ls_info)); share::ObTaskId task_id; int64_t new_paxos_replica_number; int64_t old_paxos_replica_number; @@ -2416,12 +2371,13 @@ int ObDRWorker::do_single_replica_permanent_offline_( ObDRTaskType task_type = ObReplicaTypeCheck::is_paxos_replica_V2(replica_type) ? ObDRTaskType::LS_REMOVE_PAXOS_REPLICA : ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA; - if (OB_FAIL(construct_extra_infos_to_build_remove_paxos_replica_task( + if (OB_FAIL(construct_extra_infos_to_build_remove_replica_task( dr_ls_info, task_id, new_paxos_replica_number, old_paxos_replica_number, - leader_addr))) { + leader_addr, + replica_type))) { LOG_WARN("fail to construct extra infos to build remove replica task"); } else if (only_for_display) { // only for display, no need to execute this task @@ -2468,8 +2424,10 @@ int ObDRWorker::do_single_replica_permanent_offline_( remove_member, old_paxos_replica_number, new_paxos_replica_number, - acc_dr_task))) { - LOG_WARN("fail to generate remove permanent offline task", KR(ret)); + acc_dr_task, + replica_type))) { + LOG_WARN("fail to generate remove permanent offline task", KR(ret), K(tenant_id), K(ls_id), K(leader_addr), + K(remove_member), K(old_paxos_replica_number), K(new_paxos_replica_number), K(replica_type)); } } } @@ -2601,7 +2559,7 @@ int ObDRWorker::generate_replicate_to_unit_and_push_into_task_manager( obrpc::ObAdminClearDRTaskArg::TaskType::AUTO, skip_change_member_list, ObDRTaskPriority::HIGH_PRI, - "replicate to unit task", + ObString(drtask::REPLICATE_REPLICA), dst_replica, src_member, data_source, @@ -2700,7 +2658,7 @@ int ObDRWorker::try_replicate_to_unit( ls_replica->get_replica_type(), old_paxos_replica_number, unit_stat_info->get_unit_info().unit_.server_, - "replicate to unit task"))) { + ObString(drtask::REPLICATE_REPLICA)))) { LOG_WARN("fail to init a ObLSReplicaTaskDisplayInfo", KR(ret)); } else if (OB_FAIL(add_display_info(display_info))) { LOG_WARN("fail to add display info", KR(ret), K(display_info)); @@ -2739,7 +2697,7 @@ int ObDRWorker::try_replicate_to_unit( return ret; } -int ObDRWorker::try_generate_remove_paxos_locality_alignment_task( +int ObDRWorker::try_generate_remove_replica_locality_alignment_task( DRLSInfo &dr_ls_info, const ObDRTaskKey &task_key, const LATask *task, @@ -2750,21 +2708,30 @@ int ObDRWorker::try_generate_remove_paxos_locality_alignment_task( bool sibling_task_executing = false; uint64_t tenant_id = OB_INVALID_ID; share::ObLSID ls_id; - if (OB_UNLIKELY(!task_key.is_valid() || nullptr == task)) { + if (OB_UNLIKELY(!task_key.is_valid()) || OB_ISNULL(task)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(task_key), KP(task)); } else if (OB_FAIL(dr_ls_info.get_ls_id(tenant_id, ls_id))) { LOG_WARN("fail to get ls id", KR(ret)); } else { - const RemovePaxosLATask *my_task = reinterpret_cast(task); + const RemoveReplicaLATask *my_task = reinterpret_cast(task); ObReplicaMember remove_member(my_task->remove_server_, my_task->member_time_us_, my_task->replica_type_, my_task->memstore_percent_); - ObRemoveLSPaxosReplicaTask remove_paxos_task; + ObRemoveLSReplicaTask remove_paxos_task; bool has_leader = false; common::ObAddr leader_addr; share::ObTaskId task_id; + ObString comment_to_set = ""; + if (ObReplicaTypeCheck::is_paxos_replica_V2(my_task->replica_type_)) { + comment_to_set.assign_ptr(drtask::REMOVE_LOCALITY_PAXOS_REPLICA, + strlen(drtask::REMOVE_LOCALITY_PAXOS_REPLICA)); + } else { + comment_to_set.assign_ptr(drtask::REMOVE_LOCALITY_NON_PAXOS_REPLICA, + strlen(drtask::REMOVE_LOCALITY_NON_PAXOS_REPLICA)); + } + if (FALSE_IT(task_id.init(self_addr_))) { //shall never be here } else if (OB_FAIL(check_has_leader_while_remove_replica( @@ -2788,67 +2755,18 @@ int ObDRWorker::try_generate_remove_paxos_locality_alignment_task( obrpc::ObAdminClearDRTaskArg::TaskType::AUTO, false,/*skip change member list*/ ObDRTaskPriority::HIGH_PRI, - "remove redundant paxos replica according to locality", + comment_to_set, leader_addr, remove_member, my_task->orig_paxos_replica_number_, - my_task->paxos_replica_number_))) { - LOG_WARN("fail to build task", KR(ret)); + my_task->paxos_replica_number_, + my_task->replica_type_))) { + LOG_WARN("fail to build task", KR(ret), K(task_key), K(tenant_id), K(ls_id), K(task_id), + K(leader_addr), K(remove_member), KPC(my_task)); } else if (OB_FAIL(disaster_recovery_task_mgr_->add_task(remove_paxos_task))) { LOG_WARN("fail to add task", KR(ret)); } else { - LOG_INFO("success to add a ObRemoveLSPaxosReplicaTask to task manager", KR(ret), K(remove_paxos_task)); - acc_dr_task++; - } - } - return ret; -} - -int ObDRWorker::try_generate_remove_non_paxos_locality_alignment_task( - DRLSInfo &dr_ls_info, - const ObDRTaskKey &task_key, - const LATask *task, - int64_t &acc_dr_task) -{ - int ret = OB_SUCCESS; - bool task_exist = false; - bool sibling_task_executing = false; - uint64_t tenant_id = OB_INVALID_ID; - share::ObLSID ls_id; - if (OB_UNLIKELY(!task_key.is_valid() || nullptr == task)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), K(task_key), KP(task)); - } else if (OB_FAIL(dr_ls_info.get_ls_id(tenant_id, ls_id))) { - LOG_WARN("fail to get ls id", KR(ret)); - } else { - const RemoveNonPaxosLATask *my_task = reinterpret_cast(task); - ObReplicaMember remove_member(my_task->remove_server_, - my_task->member_time_us_, - my_task->replica_type_, - my_task->memstore_percent_); - ObRemoveLSNonPaxosReplicaTask remove_non_paxos_task; - share::ObTaskId task_id; - if (FALSE_IT(task_id.init(self_addr_))) { - //shall never be here - } else if (OB_FAIL(remove_non_paxos_task.build( - task_key, - tenant_id, - ls_id, - task_id, - 0,/*schedule_time*/ - 0,/*generate_time*/ - GCONF.cluster_id, - 0,/*transmit data size*/ - obrpc::ObAdminClearDRTaskArg::TaskType::AUTO, - true,/*skip change member list*/ - ObDRTaskPriority::LOW_PRI, - "remove redundant non paxos replica according to locality", - remove_member))) { - LOG_WARN("fail to build task", KR(ret)); - } else if (OB_FAIL(disaster_recovery_task_mgr_->add_task(remove_non_paxos_task))) { - LOG_WARN("fail to add task", KR(ret)); - } else { - LOG_INFO("success to add a ObRemoveLSNonPaxosReplicaTask to task manager", KR(ret), K(remove_non_paxos_task)); + LOG_INFO("success to add a ObRemoveLSReplicaTask to task manager", KR(ret), K(remove_paxos_task)); acc_dr_task++; } } @@ -2882,6 +2800,15 @@ int ObDRWorker::try_generate_add_replica_locality_alignment_task( my_task->memstore_percent_); ObAddLSReplicaTask add_replica_task; share::ObTaskId task_id; + ObString comment_to_set; + if (ObReplicaTypeCheck::is_paxos_replica_V2(my_task->replica_type_)) { + comment_to_set.assign_ptr(drtask::ADD_LOCALITY_PAXOS_REPLICA, + strlen(drtask::ADD_LOCALITY_PAXOS_REPLICA)); + } else { + comment_to_set.assign_ptr(drtask::ADD_LOCALITY_NON_PAXOS_REPLICA, + strlen(drtask::ADD_LOCALITY_NON_PAXOS_REPLICA)); + } + if (FALSE_IT(task_id.init(self_addr_))) { //shall never be here } else if (OB_FAIL(choose_disaster_recovery_data_source( @@ -2910,7 +2837,7 @@ int ObDRWorker::try_generate_add_replica_locality_alignment_task( obrpc::ObAdminClearDRTaskArg::TaskType::AUTO, false,/*skip change member list*/ ObDRTaskPriority::HIGH_PRI, - "add paxos replica according to locality", + comment_to_set, dst_replica, data_source, my_task->orig_paxos_replica_number_, @@ -3000,7 +2927,7 @@ int ObDRWorker::try_generate_type_transform_locality_alignment_task( obrpc::ObAdminClearDRTaskArg::TaskType::AUTO, false,/*skip change member list*/ ObDRTaskPriority::HIGH_PRI, - "type transform according to locality", + ObString(drtask::TRANSFORM_LOCALITY_REPLICA_TYPE), dst_replica, src_member, data_source, @@ -3028,6 +2955,7 @@ int ObDRWorker::try_generate_modify_paxos_replica_number_locality_alignment_task bool sibling_task_executing = false; uint64_t tenant_id = OB_INVALID_ID; share::ObLSID ls_id; + GlobalLearnerList learner_list; if (OB_UNLIKELY(!task_key.is_valid() || nullptr == task)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(task_key), KP(task)); @@ -3043,7 +2971,8 @@ int ObDRWorker::try_generate_modify_paxos_replica_number_locality_alignment_task //shall never be here } else if (OB_FAIL(dr_ls_info.get_leader_and_member_list( leader_addr, - member_list))) { + member_list, + learner_list))) { LOG_WARN("fail to get leader", KR(ret)); } else if (OB_FAIL(modify_paxos_replica_number_task.build( task_key, @@ -3057,7 +2986,7 @@ int ObDRWorker::try_generate_modify_paxos_replica_number_locality_alignment_task obrpc::ObAdminClearDRTaskArg::TaskType::AUTO, true,/*skip change member list*/ ObDRTaskPriority::HIGH_PRI, - "modify paxos replica number according to locality", + ObString(drtask::MODIFY_PAXOS_REPLICA_NUMBER), leader_addr, my_task->orig_paxos_replica_number_, my_task->paxos_replica_number_, @@ -3080,31 +3009,21 @@ int ObDRWorker::try_generate_locality_alignment_task( { int ret = OB_SUCCESS; ObDRTaskKey task_key; - if (OB_UNLIKELY(nullptr == task)) { + if (OB_ISNULL(task)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), KP(task)); } else if (OB_FAIL(generate_task_key(dr_ls_info, task_key))) { LOG_WARN("fail to generate task key", KR(ret)); } else { switch (task->get_task_type()) { - case RemovePaxos: { - if (OB_FAIL(try_generate_remove_paxos_locality_alignment_task( - dr_ls_info, - task_key, - task, - acc_dr_task_cnt))) { - LOG_WARN("fail to try generate remove paxos task", - KR(ret), K(task_key), KPC(task)); - } - break; - } + case RemovePaxos: case RemoveNonPaxos: { - if (OB_FAIL(try_generate_remove_non_paxos_locality_alignment_task( + if (OB_FAIL(try_generate_remove_replica_locality_alignment_task( dr_ls_info, task_key, task, acc_dr_task_cnt))) { - LOG_WARN("fail to try generate remove non paxos task", + LOG_WARN("fail to try generate remove replica task", KR(ret), K(task_key), KPC(task)); } break; @@ -3185,30 +3104,22 @@ int ObDRWorker::record_task_plan_for_locality_alignment( } else { ObLSReplicaTaskDisplayInfo display_info; switch (task->get_task_type()) { - case RemovePaxos: { - const RemovePaxosLATask *my_task = reinterpret_cast(task); - task_type = ObDRTaskType::LS_REMOVE_PAXOS_REPLICA; + case RemovePaxos: + case RemoveNonPaxos: { + const RemoveReplicaLATask *my_task = reinterpret_cast(task); + task_type = RemovePaxos == task->get_task_type() ? ObDRTaskType::LS_REMOVE_PAXOS_REPLICA : ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA; source_replica_type = REPLICA_TYPE_MAX; target_replica_type = my_task->replica_type_; - task_priority = ObDRTaskPriority::HIGH_PRI; + task_priority = task_type == ObDRTaskType::LS_REMOVE_PAXOS_REPLICA ? ObDRTaskPriority::HIGH_PRI : ObDRTaskPriority::LOW_PRI; target_svr = my_task->remove_server_; execute_svr = leader_addr; source_replica_paxos_replica_number = my_task->orig_paxos_replica_number_; target_replica_paxos_replica_number = my_task->paxos_replica_number_; - comment = "remove redundant paxos replica according to locality"; - break; - } - case RemoveNonPaxos: { - const RemoveNonPaxosLATask *my_task = reinterpret_cast(task); - task_type = ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA; - source_replica_type = REPLICA_TYPE_MAX; - target_replica_type = my_task->replica_type_; - task_priority = ObDRTaskPriority::LOW_PRI; - target_svr = my_task->remove_server_; - execute_svr = my_task->remove_server_; - source_replica_paxos_replica_number = OB_INVALID_COUNT; - target_replica_paxos_replica_number = OB_INVALID_COUNT; - comment = "remove redundant non paxos replica according to locality"; + if (task_type == ObDRTaskType::LS_REMOVE_PAXOS_REPLICA) { + comment.assign_ptr(drtask::REMOVE_LOCALITY_PAXOS_REPLICA, strlen(drtask::REMOVE_LOCALITY_PAXOS_REPLICA)); + } else { + comment.assign_ptr(drtask::REMOVE_LOCALITY_NON_PAXOS_REPLICA, strlen(drtask::REMOVE_LOCALITY_NON_PAXOS_REPLICA)); + } break; } case AddReplica: { @@ -3235,7 +3146,11 @@ int ObDRWorker::record_task_plan_for_locality_alignment( execute_svr = my_task->dst_server_; source_replica_paxos_replica_number = my_task->orig_paxos_replica_number_; target_replica_paxos_replica_number = my_task->paxos_replica_number_; - comment = "add paxos replica according to locality"; + if (ObReplicaTypeCheck::is_paxos_replica_V2(target_replica_type)) { + comment.assign_ptr(drtask::ADD_LOCALITY_PAXOS_REPLICA, strlen(drtask::ADD_LOCALITY_PAXOS_REPLICA)); + } else { + comment.assign_ptr(drtask::ADD_LOCALITY_NON_PAXOS_REPLICA, strlen(drtask::ADD_LOCALITY_NON_PAXOS_REPLICA)); + } } break; } @@ -3267,7 +3182,7 @@ int ObDRWorker::record_task_plan_for_locality_alignment( execute_svr = my_task->dst_server_; source_replica_paxos_replica_number = my_task->orig_paxos_replica_number_; target_replica_paxos_replica_number = my_task->paxos_replica_number_; - comment = "type transform according to locality"; + comment.assign_ptr(drtask::TRANSFORM_LOCALITY_REPLICA_TYPE, strlen(drtask::TRANSFORM_LOCALITY_REPLICA_TYPE)); } break; } @@ -3281,7 +3196,7 @@ int ObDRWorker::record_task_plan_for_locality_alignment( execute_svr = leader_addr; source_replica_paxos_replica_number = my_task->orig_paxos_replica_number_; target_replica_paxos_replica_number = my_task->paxos_replica_number_; - comment = "modify paxos replica number according to locality"; + comment.assign_ptr(drtask::MODIFY_PAXOS_REPLICA_NUMBER, strlen(drtask::MODIFY_PAXOS_REPLICA_NUMBER)); break; } default: { @@ -3304,11 +3219,13 @@ int ObDRWorker::record_task_plan_for_locality_alignment( source_replica_paxos_replica_number, execute_svr, comment))) { - LOG_WARN("fail to init a ObLSReplicaTaskDisplayInfo", KR(ret)); + LOG_WARN("fail to init a ObLSReplicaTaskDisplayInfo", KR(ret), K(tenant_id), K(ls_id), K(task_type), + K(task_priority), K(target_svr), K(target_replica_type), K(target_replica_paxos_replica_number), + K(source_svr), K(source_replica_type), K(source_replica_paxos_replica_number), K(execute_svr), K(comment)); } else if (OB_FAIL(add_display_info(display_info))) { FLOG_WARN("fail to add display info", KR(ret), K(display_info)); } else { - FLOG_INFO("success to add display info", KR(ret), K(display_info)); + FLOG_INFO("success to add display info", KR(ret), K(display_info)); } } return ret; @@ -3489,13 +3406,21 @@ int ObDRWorker::generate_cancel_unit_migration_task( int64_t &acc_dr_task) { int ret = OB_SUCCESS; + ObRemoveLSReplicaTask remove_member_task; + ObString comment_to_set = ""; + ObReplicaType replica_type = is_paxos_replica_related ? REPLICA_TYPE_FULL : REPLICA_TYPE_READONLY; + if (is_paxos_replica_related) { + comment_to_set.assign_ptr(drtask::CANCEL_MIGRATE_UNIT_WITH_PAXOS_REPLICA, + strlen(drtask::CANCEL_MIGRATE_UNIT_WITH_PAXOS_REPLICA)); + } else { + comment_to_set.assign_ptr(drtask::CANCEL_MIGRATE_UNIT_WITH_NON_PAXOS_REPLICA, + strlen(drtask::CANCEL_MIGRATE_UNIT_WITH_NON_PAXOS_REPLICA)); + } + if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret)); - } else { - if (is_paxos_replica_related) { - ObRemoveLSPaxosReplicaTask remove_member_task; - if (OB_FAIL(remove_member_task.build( + } else if (OB_FAIL(remove_member_task.build( task_key, tenant_id, ls_id, @@ -3507,40 +3432,17 @@ int ObDRWorker::generate_cancel_unit_migration_task( obrpc::ObAdminClearDRTaskArg::TaskType::AUTO, false/*skip change member list*/, ObDRTaskPriority::HIGH_PRI, - "cancel migrate unit remove paxos replica", + comment_to_set, leader_addr, remove_member, old_paxos_replica_number, - new_paxos_replica_number))) { - LOG_WARN("fail to build remove member task", KR(ret)); - } else if (OB_FAIL(disaster_recovery_task_mgr_->add_task(remove_member_task))) { - LOG_WARN("fail to add task", KR(ret), K(remove_member_task)); - } else { - ++acc_dr_task; - } - } else { - ObRemoveLSNonPaxosReplicaTask remove_non_paxos_task; - if (OB_FAIL(remove_non_paxos_task.build( - task_key, - tenant_id, - ls_id, - task_id, - 0,/*schedule_time*/ - 0,/*generate_time*/ - GCONF.cluster_id, - 0/*transmit_data_size*/, - obrpc::ObAdminClearDRTaskArg::TaskType::AUTO, - true,/*skip change member list*/ - ObDRTaskPriority::LOW_PRI, - "cancel migrate unit remove non paxos replica", - remove_member))) { - LOG_WARN("fail to build remove member task", KR(ret)); - } else if (OB_FAIL(disaster_recovery_task_mgr_->add_task(remove_non_paxos_task))) { - LOG_WARN("fail to add task", KR(ret), K(remove_non_paxos_task)); - } else { - ++acc_dr_task; - } - } + new_paxos_replica_number, + replica_type))) { + LOG_WARN("fail to build remove member task", KR(ret)); + } else if (OB_FAIL(disaster_recovery_task_mgr_->add_task(remove_member_task))) { + LOG_WARN("fail to add task", KR(ret), K(remove_member_task)); + } else { + ++acc_dr_task; } return ret; } @@ -3607,6 +3509,21 @@ int ObDRWorker::try_cancel_unit_migration( ls_replica->get_member_time_us(), ls_replica->get_replica_type(), ls_replica->get_memstore_percent()); + ObDRTaskType task_type = is_paxos_replica_related + ? ObDRTaskType::LS_REMOVE_PAXOS_REPLICA + : ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA; + ObDRTaskPriority task_priority = is_paxos_replica_related + ? ObDRTaskPriority::HIGH_PRI + : ObDRTaskPriority::LOW_PRI; + ObString comment_to_set = ""; + if (is_paxos_replica_related) { + comment_to_set.assign_ptr(drtask::CANCEL_MIGRATE_UNIT_WITH_PAXOS_REPLICA, + strlen(drtask::CANCEL_MIGRATE_UNIT_WITH_PAXOS_REPLICA)); + } else { + comment_to_set.assign_ptr(drtask::CANCEL_MIGRATE_UNIT_WITH_NON_PAXOS_REPLICA, + strlen(drtask::CANCEL_MIGRATE_UNIT_WITH_NON_PAXOS_REPLICA)); + } + if (OB_FAIL(construct_extra_info_to_build_cancael_migration_task( is_paxos_replica_related, dr_ls_info, @@ -3623,12 +3540,8 @@ int ObDRWorker::try_cancel_unit_migration( if (OB_FAIL(display_info.init( tenant_id, ls_id, - is_paxos_replica_related - ? ObDRTaskType::LS_REMOVE_PAXOS_REPLICA - : ObDRTaskType::LS_REMOVE_NON_PAXOS_REPLICA, - is_paxos_replica_related - ? ObDRTaskPriority::HIGH_PRI - : ObDRTaskPriority::LOW_PRI, + task_type, + task_priority, ls_replica->get_server(), ls_replica->get_replica_type(), new_paxos_replica_number, @@ -3636,15 +3549,13 @@ int ObDRWorker::try_cancel_unit_migration( REPLICA_TYPE_MAX, old_paxos_replica_number, leader_addr, - is_paxos_replica_related - ? "cancel migrate unit remove paxos replica" - : "cancel migrate unit remove non paxos replica"))) { + comment_to_set))) { LOG_WARN("fail to init a ObLSReplicaTaskDisplayInfo", KR(ret)); } else if (OB_FAIL(add_display_info(display_info))) { LOG_WARN("fail to add display info", KR(ret), K(display_info)); } else { LOG_INFO("success to add display info", KR(ret), K(display_info)); - } + } } else if (OB_FAIL(check_can_generate_task( acc_dr_task, need_check_has_leader_while_remove_replica, @@ -3800,6 +3711,15 @@ int ObDRWorker::generate_migrate_to_unit_task( { int ret = OB_SUCCESS; ObMigrateLSReplicaTask migrate_task; + ObString comment_to_set; + if (is_unit_in_group_related) { + comment_to_set.assign_ptr(drtask::MIGRATE_REPLICA_DUE_TO_UNIT_GROUP_NOT_MATCH, + strlen(drtask::MIGRATE_REPLICA_DUE_TO_UNIT_GROUP_NOT_MATCH)); + } else { + comment_to_set.assign_ptr(drtask::MIGRATE_REPLICA_DUE_TO_UNIT_NOT_MATCH, + strlen(drtask::MIGRATE_REPLICA_DUE_TO_UNIT_NOT_MATCH)); + } + if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret)); @@ -3815,9 +3735,7 @@ int ObDRWorker::generate_migrate_to_unit_task( obrpc::ObAdminClearDRTaskArg::TaskType::AUTO, skip_change_member_list, ObDRTaskPriority::LOW_PRI, - is_unit_in_group_related - ? "migrate replica due to unit group not match" - : "migrate replica due to unit not match", + comment_to_set, dst_replica, src_member, data_source, @@ -3900,6 +3818,15 @@ int ObDRWorker::try_migrate_to_unit( ObTimeUtility::current_time(), ls_replica->get_replica_type(), ls_replica->get_memstore_percent()); + ObString comment_to_set = ""; + + if (is_unit_in_group_related) { + comment_to_set.assign_ptr(drtask::MIGRATE_REPLICA_DUE_TO_UNIT_GROUP_NOT_MATCH, + strlen(drtask::MIGRATE_REPLICA_DUE_TO_UNIT_GROUP_NOT_MATCH)); + } else { + comment_to_set.assign_ptr(drtask::MIGRATE_REPLICA_DUE_TO_UNIT_NOT_MATCH, + strlen(drtask::MIGRATE_REPLICA_DUE_TO_UNIT_NOT_MATCH)); + } if (OB_FAIL(construct_extra_infos_for_generate_migrate_to_unit_task( dr_ls_info, *ls_replica, @@ -3935,9 +3862,7 @@ int ObDRWorker::try_migrate_to_unit( is_unit_in_group_related ? unit_in_group_stat_info->get_unit_info().unit_.server_ : unit_stat_info->get_unit_info().unit_.server_, - is_unit_in_group_related - ? "migrate replica due to unit group not match" - : "migrate replica due to unit not match"))) { + comment_to_set))) { LOG_WARN("fail to init a ObLSReplicaTaskDisplayInfo", KR(ret)); } else if (OB_FAIL(add_display_info(display_info))) { LOG_WARN("fail to add display info", KR(ret), K(display_info)); @@ -4297,6 +4222,7 @@ int ObDRWorker::check_ls_only_in_member_list_( ret = OB_ERR_UNEXPECTED; LOG_WARN("leader member list has no member", KR(ret), "member_lsit", leader_replica->get_member_list()); } else { + // check member list for (int64_t i = 0; OB_SUCC(ret) && i < leader_replica->get_member_list().count(); ++i) { const share::ObLSReplica *replica = nullptr; const common::ObAddr &server = leader_replica->get_member_list().at(i).get_server(); @@ -4305,6 +4231,16 @@ int ObDRWorker::check_ls_only_in_member_list_( LOG_WARN("fail to find replica", KR(ret), K(inner_ls_info), K(server)); } } + // check learner list + for (int64_t index = 0; OB_SUCC(ret) && index < leader_replica->get_learner_list().get_member_number(); ++index) { + common::ObAddr server_to_check; + const share::ObLSReplica *replica = nullptr; + if (OB_FAIL(leader_replica->get_learner_list().get_server_by_index(index,server_to_check))) { + LOG_WARN("fail to get learner by index", KR(ret), K(index)); + } else if (OB_FAIL(inner_ls_info.find(server_to_check, replica))) { + LOG_WARN("fail to find read only replica", KR(ret), K(inner_ls_info), K(server_to_check)); + } + } } return ret; } diff --git a/src/rootserver/ob_disaster_recovery_worker.h b/src/rootserver/ob_disaster_recovery_worker.h index ddd1f3fece..671be185ff 100644 --- a/src/rootserver/ob_disaster_recovery_worker.h +++ b/src/rootserver/ob_disaster_recovery_worker.h @@ -69,7 +69,6 @@ public: int assign(const ObLSReplicaTaskDisplayInfo &other); int64_t to_string(char *buf, const int64_t buf_len) const; - inline const uint64_t &get_tenant_id() const { return tenant_id_; } inline const share::ObLSID &get_ls_id() const { return ls_id_; } inline const ObDRTaskType &get_task_type() const { return task_type_; } @@ -222,10 +221,10 @@ private: int ret_; }; - struct RemovePaxosLATask : public LATask + struct RemoveReplicaLATask : public LATask { public: - RemovePaxosLATask() + RemoveReplicaLATask() : LATask(), remove_server_(), replica_type_(REPLICA_TYPE_MAX), @@ -233,22 +232,24 @@ private: member_time_us_(-1), orig_paxos_replica_number_(0), paxos_replica_number_(0) {} - virtual ~RemovePaxosLATask() {} + virtual ~RemoveReplicaLATask() {} public: - virtual LATaskType get_task_type() const override { return RemovePaxos; } - + virtual LATaskType get_task_type() const override { return ObReplicaTypeCheck::is_paxos_replica_V2(replica_type_) + ? RemovePaxos + : RemoveNonPaxos; } virtual LATaskPrio get_task_priority() const override { - LATaskPrio priority = LATaskPrio::LA_P_REMOVE_PAXOS; + LATaskPrio priority = ObReplicaTypeCheck::is_paxos_replica_V2(replica_type_) + ? LATaskPrio::LA_P_REMOVE_PAXOS + : LATaskPrio::LA_P_REMOVE_NON_PAXOS; return priority; } - VIRTUAL_TO_STRING_KV("task_type", get_task_type(), - K(remove_server_), - K(replica_type_), - K(memstore_percent_), - K(member_time_us_), - K(orig_paxos_replica_number_), - K(paxos_replica_number_)); + K_(remove_server), + K_(replica_type), + K_(memstore_percent), + K_(member_time_us), + K_(orig_paxos_replica_number), + K_(paxos_replica_number)); public: common::ObAddr remove_server_; ObReplicaType replica_type_; @@ -258,36 +259,6 @@ private: int64_t paxos_replica_number_; }; - struct RemoveNonPaxosLATask : public LATask - { - public: - RemoveNonPaxosLATask() - : LATask(), - remove_server_(), - replica_type_(REPLICA_TYPE_MAX), - memstore_percent_(100), - member_time_us_(-1) {} - virtual ~RemoveNonPaxosLATask() {} - public: - virtual LATaskType get_task_type() const override { return RemoveNonPaxos; } - - virtual LATaskPrio get_task_priority() const override { - LATaskPrio priority = LATaskPrio::LA_P_REMOVE_NON_PAXOS; - return priority; - } - - VIRTUAL_TO_STRING_KV("task_type", get_task_type(), - K(remove_server_), - K(replica_type_), - K(memstore_percent_), - K(member_time_us_)); - public: - common::ObAddr remove_server_; - ObReplicaType replica_type_; - int64_t memstore_percent_; - int64_t member_time_us_; - }; - struct AddReplicaLATask : public LATask { public: @@ -305,7 +276,6 @@ private: virtual ~AddReplicaLATask() {} public: virtual LATaskType get_task_type() const override{ return AddReplica; } - virtual LATaskPrio get_task_priority() const override { LATaskPrio priority = LATaskPrio::LA_P_MAX; if (common::REPLICA_TYPE_FULL == replica_type_) { @@ -362,7 +332,6 @@ private: virtual ~TypeTransformLATask() {} public: virtual LATaskType get_task_type() const override { return TypeTransform; } - virtual LATaskPrio get_task_priority() const override { LATaskPrio priority = LATaskPrio::LA_P_MAX; if (common::REPLICA_TYPE_FULL == dst_replica_type_ @@ -377,7 +346,6 @@ private: } else {} // default priority value return priority; } - VIRTUAL_TO_STRING_KV("task_type", get_task_type(), K(zone_), K(dst_server_), @@ -415,7 +383,6 @@ private: virtual ~ModifyPaxosReplicaNumberLATask() {} public: virtual LATaskType get_task_type() const override { return ModifyPaxosReplicaNumber; } - virtual LATaskPrio get_task_priority() const override { LATaskPrio priority = LATaskPrio::LA_P_MODIFY_PAXOS_REPLICA_NUMBER; return priority; @@ -557,6 +524,12 @@ private: const common::ObZone &zone, const uint64_t unit_group_id, share::ObUnitInfo &unit_info); + private: + int inner_get_valid_unit_( + const common::ObZone &zone, + const common::ObArray &unit_array, + share::ObUnitInfo &output_unit_info, + bool &found); private: bool inited_; uint64_t tenant_id_; @@ -569,7 +542,7 @@ private: ReplicaDescArray *, common::hash::NoPthreadDefendMode> LocalityMap; typedef common::ObArray ReplicaStatMap; - + class LocalityAlignment { public: @@ -612,9 +585,7 @@ private: ReplicaStatDesc &replica_stat_desc, const int64_t index); // generate specific task - int generate_remove_paxos_task( - ReplicaStatDesc &replica_stat_desc); - int generate_remove_nonpaxos_task( + int generate_remove_replica_task( ReplicaStatDesc &replica_stat_desc); int generate_type_transform_task( ReplicaStatDesc &replica_stat_desc, @@ -633,12 +604,7 @@ private: int try_get_normal_locality_alignment_task( UnitProvider &unit_provider, const LATask *&task); - int try_review_remove_paxos_task( - UnitProvider &unit_provider, - LATask *my_task, - const LATask *&output_task, - bool &found); - int try_review_remove_nonpaxos_task( + int try_review_remove_replica_task( UnitProvider &unit_provider, LATask *my_task, const LATask *&output_task, @@ -738,12 +704,13 @@ private: ObDRTaskKey &task_key, bool &can_generate); - int construct_extra_infos_to_build_remove_paxos_replica_task( + int construct_extra_infos_to_build_remove_replica_task( const DRLSInfo &dr_ls_info, share::ObTaskId &task_id, int64_t &new_paxos_replica_number, int64_t &old_paxos_replica_number, - common::ObAddr &leader_addr); + common::ObAddr &leader_addr, + const ObReplicaType &replica_type); int generate_remove_permanent_offline_replicas_and_push_into_task_manager( const ObDRTaskKey task_key, @@ -754,7 +721,8 @@ private: const ObReplicaMember &remove_member, const int64_t &old_paxos_replica_number, const int64_t &new_paxos_replica_number, - int64_t &acc_dr_task); + int64_t &acc_dr_task, + const ObReplicaType &replica_type); int try_replicate_to_unit( const bool only_for_display, @@ -803,7 +771,7 @@ private: const bool only_for_display, DRLSInfo &dr_ls_info, int64_t &acc_dr_task); - + int try_shrink_resource_pools( const bool &only_for_display, DRLSInfo &dr_ls_info, @@ -896,7 +864,7 @@ private: int generate_task_key( const DRLSInfo &dr_ls_info, ObDRTaskKey &task_key) const; - + int add_display_info(const ObLSReplicaTaskDisplayInfo &display_info); int record_task_plan_for_locality_alignment( @@ -908,24 +876,18 @@ private: const LATask *task, int64_t &acc_dr_task); - int try_generate_remove_paxos_locality_alignment_task( + int try_generate_remove_replica_locality_alignment_task( DRLSInfo &dr_ls_info, const ObDRTaskKey &task_key, const LATask *task, int64_t &acc_dr_task); - - int try_generate_remove_non_paxos_locality_alignment_task( - DRLSInfo &dr_ls_info, - const ObDRTaskKey &task_key, - const LATask *task, - int64_t &acc_dr_task); - + int try_generate_add_replica_locality_alignment_task( DRLSInfo &dr_ls_info, const ObDRTaskKey &task_key, const LATask *task, - int64_t &acc_dr_task); - + int64_t &acc_dr_task); + int try_generate_type_transform_locality_alignment_task( DRLSInfo &dr_ls_info, const ObDRTaskKey &task_key, diff --git a/src/rootserver/ob_index_builder.cpp b/src/rootserver/ob_index_builder.cpp index afc4ac9747..603bf15464 100644 --- a/src/rootserver/ob_index_builder.cpp +++ b/src/rootserver/ob_index_builder.cpp @@ -764,6 +764,7 @@ int ObIndexBuilder::generate_schema( const bool need_generate_index_schema_column = (is_index_local_storage || global_index_without_column_info); schema.set_table_mode(data_schema.get_table_mode_flag()); schema.set_table_state_flag(data_schema.get_table_state_flag()); + schema.set_duplicate_scope(data_schema.get_duplicate_scope()); if (OB_FAIL(set_basic_infos(arg, data_schema, schema))) { LOG_WARN("set_basic_infos failed", K(arg), K(data_schema), K(ret)); } else if (need_generate_index_schema_column diff --git a/src/rootserver/ob_lob_meta_builder.cpp b/src/rootserver/ob_lob_meta_builder.cpp index 935ee89ae5..e8e9f4b8df 100644 --- a/src/rootserver/ob_lob_meta_builder.cpp +++ b/src/rootserver/ob_lob_meta_builder.cpp @@ -141,6 +141,7 @@ int ObLobMetaBuilder::set_basic_infos( aux_lob_meta_schema.set_pctfree(data_schema.get_pctfree()); aux_lob_meta_schema.set_storage_format_version(data_schema.get_storage_format_version()); aux_lob_meta_schema.set_progressive_merge_round(data_schema.get_progressive_merge_round()); + aux_lob_meta_schema.set_duplicate_scope(data_schema.get_duplicate_scope()); if (OB_FAIL(aux_lob_meta_schema.set_compress_func_name(data_schema.get_compress_func_name()))) { LOG_WARN("set_compress_func_name failed", K(data_schema)); } diff --git a/src/rootserver/ob_lob_piece_builder.cpp b/src/rootserver/ob_lob_piece_builder.cpp index de49c2fb40..6e95f2a24c 100644 --- a/src/rootserver/ob_lob_piece_builder.cpp +++ b/src/rootserver/ob_lob_piece_builder.cpp @@ -140,6 +140,7 @@ int ObLobPieceBuilder::set_basic_infos( aux_lob_piece_schema.set_pctfree(data_schema.get_pctfree()); aux_lob_piece_schema.set_storage_format_version(data_schema.get_storage_format_version()); aux_lob_piece_schema.set_progressive_merge_round(data_schema.get_progressive_merge_round()); + aux_lob_piece_schema.set_duplicate_scope(data_schema.get_duplicate_scope()); if (OB_FAIL(aux_lob_piece_schema.set_compress_func_name(data_schema.get_compress_func_name()))) { LOG_WARN("set_compress_func_name failed", K(data_schema)); } diff --git a/src/rootserver/ob_locality_util.cpp b/src/rootserver/ob_locality_util.cpp index 361a8a0d2b..ab953741ff 100644 --- a/src/rootserver/ob_locality_util.cpp +++ b/src/rootserver/ob_locality_util.cpp @@ -657,18 +657,17 @@ int ObLocalityDistribution::RawLocalityIter::get_replica_arrangements( while (OB_SUCC(ret) && OB_SUCC(get_next_replica_arrangement( cursor, end, replica_type, replica_num, memstore_percent))) { - if (OB_UNLIKELY(replica_type != FULL_REPLICA)) { - // TODO: in 4.0 we only support f-replica in locality, other types will be supported later + if (OB_UNLIKELY(FULL_REPLICA != replica_type + && READONLY_REPLICA != replica_type)) { + // TODO: F-replica is supported since 4.0, + // R-replica is supported since 4.2, + // other types will be supported later INVALID_LOCALITY(); switch (replica_type) { case LOGONLY_REPLICA: ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "logonly-replica"); break; - case READONLY_REPLICA: - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "readonly-replica"); - break; case ENCRYPTION_LOGONLY_REPLICA: ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "encryption-logonly-replica"); @@ -833,6 +832,10 @@ int ObLocalityDistribution::RawLocalityIter::get_replica_attribute_recursively( // failed, } else if (OB_FAIL(check_right_brace_and_afterwards_syntax(cursor, end))) { LOG_WARN("fail to check right brace and afterwards syntax", K(ret)); + } else if (replica_num > 1) { + // each zone should has only one replica + INVALID_LOCALITY(); + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "locality, each zone should has only one replica"); } } else { int64_t remain = end - cursor; @@ -925,15 +928,9 @@ int ObLocalityDistribution::RawLocalityIter::get_replica_attribute( replica_num = 1; memstore_percent = MAX_MEMSTORE_PERCENT; } else if (COMMA_TOKEN == locality_str_[cursor]) { - // the keywords 'replica_num' and 'memstore_percent' are not specified - replica_num = 1; - memstore_percent = MAX_MEMSTORE_PERCENT; - inc_cursor(cursor); // pass comma - jump_over_blanks(cursor, end); - if (cursor >= end) { - INVALID_LOCALITY(); // a ',' token before the '@' token is not allowed - LOG_USER_ERROR(OB_INVALID_ARGUMENT, "locality, illegal , before @ token"); - } + // in 4.x, we support only one replica in each zone as locality described + INVALID_LOCALITY(); // a ',' token before the '@' token is not allowed + LOG_USER_ERROR(OB_INVALID_ARGUMENT, "locality, each zone should have only one replica type"); } else if (LEFT_BRACE_TOKEN == locality_str_[cursor]) { inc_cursor(cursor); // pass left brace token jump_over_blanks(cursor, end); diff --git a/src/rootserver/ob_lost_replica_checker.cpp b/src/rootserver/ob_lost_replica_checker.cpp index 1fc20ed0c4..ef6594d3eb 100644 --- a/src/rootserver/ob_lost_replica_checker.cpp +++ b/src/rootserver/ob_lost_replica_checker.cpp @@ -227,37 +227,22 @@ int ObLostReplicaChecker::check_lost_replica_(const ObLSInfo &ls_info, LOG_WARN("check lost server failed", "server", replica.get_server(), K(ret)); } else if (is_lost_server) { /* - * 下面的逻辑处理了两种宕机情况: - * 1. paxos replica 不在 member list 中,永久下线 - * 2. nonpaxos replica 永久下线 - * - * 发生永久下线之前,副本可能都已经迁移,对应内部表记录也被清理。但可能存在异常: - * - 内部表记录清理失败 - * - 发生永久下线之前,副本没有被迁移 - * - * 为了应对这两种异常,需要在这里做内部表记录回收清理。 - * - * Knonw Issue: 迁移失败、又在这里做了副本回收,会出现少副本的情况。 - * 稍后走补副本逻辑补充副本。R@region 时补副本可能补充到其它 zone。 + * make sure whether a replica is lost, a lost replica could be cleaned from meta table + * a replica is lost if satisfied consitions below: + * 1. server is lost. + * 2. replica is not in service. + * (F-replica is in-service when exists in member_list; + * R-replica is in-service when exists in learner_list) + * 3. replica is in service but not exists in __all_ls_status + * (if exists, let remove_member handle this replica; + * if not exist, this replica is lost, maybe GC module didn't clean it) * */ - /* - * 该逻辑功能是判断一个副本是否需要删除。前提都是该server已经处于永久下线 - * 1.首先根据是否在leader的member_list中,如果不在member_list中,那么该replica是需要被删除的。 - * 非paxos副本或者非in_service中的副本需要直接删除 - * 2.如果在member_list中则需要判断在日志流状态表存在, - * 当日志流状态表中存在,这里不处理,交给remove_member处理。 - * 当日志流状态表中不存在,则可以直接处理了,这种属于GC的残留。 - * - */ - if (!replica.is_in_service() - || !ObReplicaTypeCheck::is_paxos_replica_V2(replica.get_replica_type())) { + if (!replica.is_in_service()) { is_lost_replica = true; - LOG_INFO("replica not in service or not paxos replica", K(replica)); + LOG_INFO("replica not in service", K(replica)); } else { // go on check ls_status - } - if (OB_SUCC(ret) && !is_lost_replica) { ObLSStatusOperator status_op; share::ObLSStatusInfo status_info; if (OB_ISNULL(GCTX.sql_proxy_)) { diff --git a/src/rootserver/ob_primary_ls_service.cpp b/src/rootserver/ob_primary_ls_service.cpp index fa9128a17b..c5eb57302f 100755 --- a/src/rootserver/ob_primary_ls_service.cpp +++ b/src/rootserver/ob_primary_ls_service.cpp @@ -710,16 +710,18 @@ int ObTenantLSInfo::create_new_ls_for_recovery( const share::ObLSID &ls_id, const uint64_t ls_group_id, const SCN &create_scn, - ObMySQLTransaction &trans) + ObMySQLTransaction &trans, + const share::ObLSFlag &ls_flag) { int ret = OB_SUCCESS; const bool is_recovery = true; int64_t info_index = OB_INVALID_INDEX_INT64; if (OB_UNLIKELY(!ls_id.is_valid() || OB_INVALID_ID == ls_group_id - || !create_scn.is_valid())) { + || !create_scn.is_valid() + || !ls_flag.is_valid())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("ls id is invalid", KR(ret), K(ls_id), K(ls_group_id), K(create_scn)); + LOG_WARN("ls id is invalid", KR(ret), K(ls_id), K(ls_group_id), K(create_scn), K(ls_flag)); } else if (OB_UNLIKELY(!is_valid()) || OB_ISNULL(tenant_schema_) || OB_ISNULL(sql_proxy_)) { @@ -760,7 +762,8 @@ int ObTenantLSInfo::create_new_ls_for_recovery( } else if (OB_FAIL(new_info.init(tenant_id, ls_id, ls_group_id, share::OB_LS_CREATING, - group_info.unit_group_id_, primary_zone))) { + ls_flag.is_duplicate_ls() ? 0 : group_info.unit_group_id_, + primary_zone, ls_flag))) { LOG_WARN("failed to init new info", KR(ret), K(tenant_id), K(ls_id), K(ls_group_id), K(group_info), K(primary_zone)); } else if (OB_FAIL(get_zone_priority(primary_zone, *tenant_schema_, zone_priority))) { @@ -826,8 +829,8 @@ int ObTenantLSInfo::add_ls_status_info_( LOG_WARN("failed to remove status", KR(ret), K(ls_info)); } else if (OB_FAIL(status_map_.set_refactored(ls_info.ls_id_, index))) { LOG_WARN("failed to remove ls from map", KR(ret), K(ls_info), K(index)); - } else if (ls_info.ls_id_.is_sys_ls()) { - //sys ls no ls group + } else if (ls_info.ls_id_.is_sys_ls() || ls_info.is_duplicate_ls()) { + //sys ls and duplicate ls have no ls group } else if (OB_FAIL(add_ls_to_ls_group_(ls_info))) { LOG_WARN("failed to add ls info", KR(ret), K(ls_info)); } @@ -1034,6 +1037,7 @@ int ObTenantLSInfo::create_new_ls_for_empty_unit_group_(const uint64_t unit_grou share::ObLSID new_id; share::ObLSStatusInfo new_info; uint64_t new_ls_group_id = OB_INVALID_INDEX_INT64; + share::ObLSFlag flag(share::ObLSFlag::NORMAL_FLAG); if (OB_FAIL(fetch_new_ls_group_id(tenant_id, new_ls_group_id))) { LOG_WARN("failed to fetch new id", KR(ret), K(tenant_id)); } @@ -1044,9 +1048,9 @@ int ObTenantLSInfo::create_new_ls_for_empty_unit_group_(const uint64_t unit_grou LOG_WARN("failed to get new id", KR(ret), K(tenant_id)); } else if (OB_FAIL(new_info.init(tenant_id, new_id, new_ls_group_id, share::OB_LS_CREATING, unit_group_id, - zone))) { + zone, flag))) { LOG_WARN("failed to init new info", KR(ret), K(new_id), - K(new_ls_group_id), K(unit_group_id), K(zone), K(tenant_id)); + K(new_ls_group_id), K(unit_group_id), K(zone), K(tenant_id), K(flag)); } else if (OB_FAIL(create_new_ls_(new_info, share::NORMAL_SWITCHOVER_STATUS))) { LOG_WARN("failed to add ls info", KR(ret), K(new_info)); } @@ -1142,6 +1146,7 @@ int ObTenantLSInfo::check_ls_match_primary_zone() ObZone zone; share::ObLSID new_id; share::ObLSStatusInfo new_info; + share::ObLSFlag flag(share::ObLSFlag::NORMAL_FLAG); for (int64_t i = 0; OB_SUCC(ret) && i < ls_group_array_.count(); ++i) { ObLSGroupInfo &group_info = ls_group_array_.at(i); //check the unit group is active @@ -1160,9 +1165,9 @@ int ObTenantLSInfo::check_ls_match_primary_zone() LOG_WARN("failed to get new id", KR(ret), K(tenant_id)); } else if (OB_FAIL(new_info.init(tenant_id, new_id, group_info.ls_group_id_, share::OB_LS_CREATING, - group_info.unit_group_id_, zone))) { + group_info.unit_group_id_, zone, flag))) { LOG_WARN("failed to init new info", KR(ret), K(new_id), - K(group_info), K(zone), K(tenant_id)); + K(group_info), K(zone), K(tenant_id), K(flag)); } else if (OB_FAIL(create_new_ls_(new_info, share::NORMAL_SWITCHOVER_STATUS))) { LOG_WARN("failed to create new ls", KR(ret), K(new_info)); } @@ -1208,7 +1213,7 @@ int ObTenantLSInfo::create_new_ls_(const share::ObLSStatusInfo &status_info, } else { share::ObLSLifeAgentManager ls_life_agent(*sql_proxy_); share::ObLSAttr ls_info; - share::ObLSFlag flag = share::OB_LS_FLAG_NORMAL;//TODO + share::ObLSFlag flag = status_info.get_flag(); SCN create_scn; ObSqlString zone_priority; if (OB_FAIL(ObLSAttrOperator::get_tenant_gts(status_info.tenant_id_, create_scn))) { @@ -1926,7 +1931,33 @@ int ObTenantLSInfo::balance_ls_primary_zone( return ret; } - +int ObTenantLSInfo::create_duplicate_ls() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_valid()) || OB_ISNULL(tenant_schema_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tenant ls info not valid", KR(ret), KP(tenant_schema_)); + } else if (OB_UNLIKELY(0 == primary_zone_.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("primary zone is invalid", KR(ret)); + } else { + const uint64_t tenant_id = tenant_schema_->get_tenant_id(); + const ObZone &zone = primary_zone_.at(0); + share::ObLSID new_id; + share::ObLSStatusInfo new_info; + ObLSFlag flag(ObLSFlag::DUPLICATE_FLAG); + if (OB_FAIL(fetch_new_ls_id(tenant_id, new_id))) { + LOG_WARN("failed to get new id", KR(ret), K(tenant_id)); + } else if (OB_FAIL(new_info.init(tenant_id, new_id, 0/*ls_group_id*/, share::OB_LS_CREATING, + 0/*unit_group_id*/, zone, flag))) { + LOG_WARN("failed to init new info", KR(ret), K(new_id), K(zone), K(tenant_id), K(flag)); + } else if (OB_FAIL(create_new_ls_(new_info, share::NORMAL_SWITCHOVER_STATUS))) { + LOG_WARN("failed to create duplicate ls", KR(ret), K(new_info)); + } + LOG_INFO("[LS_MGR] create duplicate ls", KR(ret), K(new_info)); + } + return ret; +} //////////////ObTenantThreadHelper int ObTenantThreadHelper::create( const char* thread_name, int tg_def_id, ObTenantThreadHelper &tenant_thread) @@ -2393,6 +2424,34 @@ int ObPrimaryLSService::gather_tenant_recovery_stat_() } return ret; } -} -} +int ObPrimaryLSService::create_duplicate_ls() +{ + int ret = OB_SUCCESS; + share::schema::ObSchemaGetterGuard schema_guard; + const share::schema::ObTenantSchema *tenant_schema = NULL; + if (OB_ISNULL(GCTX.schema_service_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error", KR(ret)); + } else if (OB_FAIL(GCTX.schema_service_->get_tenant_schema_guard( + OB_SYS_TENANT_ID, schema_guard))) { + LOG_WARN("fail to get schema guard", KR(ret)); + } else if (OB_FAIL(schema_guard.get_tenant_info(tenant_id_, tenant_schema))) { + LOG_WARN("failed to get tenant ids", KR(ret), K(tenant_id_)); + } else if (OB_ISNULL(tenant_schema)) { + ret = OB_TENANT_NOT_EXIST; + LOG_WARN("tenant not exist", KR(ret), K(tenant_id_)); + } else { + const uint64_t tenant_id = tenant_schema->get_tenant_id(); + ObTenantLSInfo tenant_stat(GCTX.sql_proxy_, tenant_schema, tenant_id, + GCTX.srv_rpc_proxy_, GCTX.lst_operator_); + if (OB_FAIL(tenant_stat.gather_stat(false))) { + LOG_WARN("failed to gather stat", KR(ret)); + } else if (OB_FAIL(tenant_stat.create_duplicate_ls())) { + LOG_WARN("failed to create ls for unit group", KR(ret)); + } + } + return ret; +} +}//end of rootserver +} diff --git a/src/rootserver/ob_primary_ls_service.h b/src/rootserver/ob_primary_ls_service.h index f6af3c1642..fc025c546a 100644 --- a/src/rootserver/ob_primary_ls_service.h +++ b/src/rootserver/ob_primary_ls_service.h @@ -168,7 +168,8 @@ public: int create_new_ls_for_recovery(const share::ObLSID &ls_id, const uint64_t ls_group_id, const share::SCN &create_scn, - common::ObMySQLTransaction &trans); + common::ObMySQLTransaction &trans, + const share::ObLSFlag &ls_flag); //for recovery tenant, if ls is in creating in __all_ls_status, create the ls int process_ls_stats_for_recovery(); @@ -196,6 +197,8 @@ public: const share::ObLSAttrIArray &ls_array, common::ObIArray &status_machine_array); + int create_duplicate_ls(); + private: int fix_ls_status_(const ObLSStatusMachineParameter &status_machine, const share::ObTenantSwitchoverStatus &working_sw_status); @@ -362,6 +365,7 @@ public: return OB_SUCCESS; } + int create_duplicate_ls(); private: int process_user_tenant_(const share::schema::ObTenantSchema &tenant_schema); diff --git a/src/rootserver/ob_recovery_ls_service.cpp b/src/rootserver/ob_recovery_ls_service.cpp index 54c0a86327..201f15f60e 100644 --- a/src/rootserver/ob_recovery_ls_service.cpp +++ b/src/rootserver/ob_recovery_ls_service.cpp @@ -748,10 +748,11 @@ int ObRecoveryLSService::create_new_ls_(const share::ObLSAttr &ls_attr, } else { ObTenantLSInfo tenant_stat(GCTX.sql_proxy_, tenant_schema, tenant_id_, GCTX.srv_rpc_proxy_, GCTX.lst_operator_); + ObLSFlag ls_flag = ls_attr.get_ls_flag(); if (OB_FAIL(tenant_stat.gather_stat(true))) { LOG_WARN("failed to gather stat", KR(ret)); } else if (OB_FAIL(tenant_stat.create_new_ls_for_recovery(ls_attr.get_ls_id(), - ls_attr.get_ls_group_id(), ls_attr.get_create_scn(), trans))) { + ls_attr.get_ls_group_id(), ls_attr.get_create_scn(), trans, ls_flag))) { LOG_WARN("failed to add new ls status info", KR(ret), K(ls_attr), K(sync_scn)); } } diff --git a/src/rootserver/restore/ob_restore_scheduler.cpp b/src/rootserver/restore/ob_restore_scheduler.cpp index a39eaa369e..864f2f6fc9 100644 --- a/src/rootserver/restore/ob_restore_scheduler.cpp +++ b/src/rootserver/restore/ob_restore_scheduler.cpp @@ -1017,6 +1017,7 @@ int ObRestoreService::create_all_ls_( } else { for (int64_t i = 0; OB_SUCC(ret) && i < ls_attr_array.count(); ++i) { const ObLSAttr &ls_info = ls_attr_array.at(i); + ObLSFlag ls_flag = ls_info.get_ls_flag(); if (ls_info.get_ls_id().is_sys_ls()) { } else if (OB_SUCC(status_op.get_ls_status_info(tenant_id_, ls_info.get_ls_id(), status_info, trans))) { @@ -1025,7 +1026,7 @@ int ObRestoreService::create_all_ls_( LOG_WARN("failed to get ls status info", KR(ret), K(tenant_id_), K(ls_info)); } else if (OB_FAIL(tenant_stat.create_new_ls_for_recovery( ls_info.get_ls_id(), ls_info.get_ls_group_id(), ls_info.get_create_scn(), - trans))) { + trans, ls_flag))) { LOG_WARN("failed to add new ls status info", KR(ret), K(ls_info)); } LOG_INFO("create init ls", KR(ret), K(ls_info)); diff --git a/src/rootserver/virtual_table/ob_core_meta_table.cpp b/src/rootserver/virtual_table/ob_core_meta_table.cpp index 0ba1f254fb..2ebfe6aa1a 100644 --- a/src/rootserver/virtual_table/ob_core_meta_table.cpp +++ b/src/rootserver/virtual_table/ob_core_meta_table.cpp @@ -17,6 +17,7 @@ #include "share/schema/ob_schema_getter_guard.h" #include "share/schema/ob_table_schema.h" #include "share/schema/ob_column_schema.h" +#include "lib/string/ob_string.h" // for ObString namespace oceanbase { @@ -113,7 +114,10 @@ int ObCoreMetaTable::get_full_row(const ObTableSchema *table, int ret = OB_SUCCESS; char *ip = NULL; char *zone = NULL; - char *member_list = NULL; + ObString member_list; + ObString learner_list; + ObSqlString member_list_str; + ObSqlString learner_list_str; const char* replica_status = ob_replica_status_str(replica.get_replica_status()); if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; @@ -131,20 +135,20 @@ int ObCoreMetaTable::get_full_row(const ObTableSchema *table, } else if (NULL == (zone = static_cast(allocator_->alloc(MAX_ZONE_LENGTH)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_ERROR("alloc zone buf failed", "size", MAX_ZONE_LENGTH, KR(ret)); - } else if (NULL == (member_list = - static_cast(allocator_->alloc(MAX_MEMBER_LIST_LENGTH)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_ERROR("alloc member_list failed", "size", OB_MAX_SERVER_ADDR_SIZE, KR(ret)); } else if (false == replica.get_server().ip_to_string(ip, OB_MAX_SERVER_ADDR_SIZE)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("convert server ip to string failed", KR(ret), "server", replica.get_server()); } else if (OB_FAIL(databuff_printf(zone, MAX_ZONE_LENGTH, "%s", replica.get_zone().ptr()))) { ret = OB_BUF_NOT_ENOUGH; - LOG_WARN("snprintf failed", "buf_len", MAX_ZONE_LENGTH, - "src_len", strlen(replica.get_zone().ptr()), KR(ret)); - } else if (OB_FAIL(ObLSReplica::member_list2text( - replica.get_member_list(), member_list, MAX_MEMBER_LIST_LENGTH))) { + LOG_WARN("snprintf failed", "buf_len", MAX_ZONE_LENGTH, "src_len", strlen(replica.get_zone().ptr()), KR(ret)); + } else if (OB_FAIL(ObLSReplica::member_list2text(replica.get_member_list(), member_list_str))) { LOG_WARN("member_list2text failed", K(replica), KR(ret)); + } else if (OB_FAIL(replica.get_learner_list().transform_to_string(learner_list_str))) { + LOG_WARN("failed to transform GlobalLearnerList to ObString", KR(ret), K(replica)); + } else if (OB_FAIL(ob_write_string(*allocator_, member_list_str.string(), member_list))) { + LOG_WARN("failed to construct member list", KR(ret), K(member_list_str)); + } else if (OB_FAIL(ob_write_string(*allocator_, learner_list_str.string(), learner_list))) { + LOG_WARN("failed to construct learner list", KR(ret), K(learner_list_str)); } else { ADD_COLUMN(set_int, table, "tenant_id", static_cast(OB_SYS_TENANT_ID), columns); ADD_COLUMN(set_int, table, "ls_id", ObLSID::SYS_LS_ID, columns); @@ -163,6 +167,7 @@ int ObCoreMetaTable::get_full_row(const ObTableSchema *table, ADD_COLUMN(set_int, table, "paxos_replica_number", replica.get_paxos_replica_number(), columns); ADD_COLUMN(set_int, table, "data_size", replica.get_data_size(), columns); ADD_COLUMN(set_int, table, "required_size", replica.get_required_size(), columns); + ADD_TEXT_COLUMN(ObLongTextType, table, "learner_list", learner_list, columns); } if (OB_FAIL(ret)) { @@ -174,10 +179,6 @@ int ObCoreMetaTable::get_full_row(const ObTableSchema *table, allocator_->free(zone); zone = NULL; } - if (NULL != member_list) { - allocator_->free(member_list); - member_list = NULL; - } } } return ret; diff --git a/src/share/config/ob_server_config.h b/src/share/config/ob_server_config.h index fd9751004b..6f129c507a 100644 --- a/src/share/config/ob_server_config.h +++ b/src/share/config/ob_server_config.h @@ -21,6 +21,7 @@ namespace oceanbase namespace unittest { class ObSimpleClusterTestBase; + class ObMultiReplicaTestBase; } namespace common { @@ -149,6 +150,7 @@ public: HIDDEN_SYS_MEMORY, }; friend class unittest::ObSimpleClusterTestBase; + friend class unittest::ObMultiReplicaTestBase; ObServerMemoryConfig(); static ObServerMemoryConfig &get_instance(); int reload_config(const ObServerConfig& server_config); diff --git a/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp b/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp index e35a90b1d1..f26a3f773e 100644 --- a/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp +++ b/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp @@ -341,6 +341,21 @@ int ObInnerTableSchema::all_virtual_core_meta_table_schema(ObTableSchema &table_ required_size_default, required_size_default); //default_value } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("learner_list", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } table_schema.set_index_using_type(USING_HASH); table_schema.set_row_store_type(ENCODING_ROW_STORE); table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); diff --git a/src/share/inner_table/ob_inner_table_schema.12201_12250.cpp b/src/share/inner_table/ob_inner_table_schema.12201_12250.cpp index 0581c5b352..41af83532d 100644 --- a/src/share/inner_table/ob_inner_table_schema.12201_12250.cpp +++ b/src/share/inner_table/ob_inner_table_schema.12201_12250.cpp @@ -8474,6 +8474,55 @@ int ObInnerTableSchema::all_virtual_ls_status_schema(ObTableSchema &table_schema true, //is_nullable false); //is_autoincrement } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("init_learner_list", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("b_init_learner_list", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ObObj flag_default; + flag_default.set_varchar(ObString::make_string("")); + ADD_COLUMN_SCHEMA_T("flag", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_MAX_LS_FLAG_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + flag_default, + flag_default); //default_value + } table_schema.set_index_using_type(USING_BTREE); table_schema.set_row_store_type(ENCODING_ROW_STORE); table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); @@ -8619,7 +8668,7 @@ int ObInnerTableSchema::all_virtual_ls_schema(ObTableSchema &table_schema) 0, //part_key_pos ObVarcharType, //column_type CS_TYPE_INVALID, //column_collation_type - 100, //column_length + OB_MAX_LS_FLAG_LENGTH, //column_length -1, //column_precision -1, //column_scale false, //is_nullable @@ -8999,6 +9048,21 @@ int ObInnerTableSchema::all_virtual_ls_meta_table_schema(ObTableSchema &table_sc required_size_default, required_size_default); //default_value } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("learner_list", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } table_schema.set_index_using_type(USING_BTREE); table_schema.set_row_store_type(ENCODING_ROW_STORE); table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); diff --git a/src/share/inner_table/ob_inner_table_schema.12251_12300.cpp b/src/share/inner_table/ob_inner_table_schema.12251_12300.cpp index 04c51e4005..a9c526d08b 100644 --- a/src/share/inner_table/ob_inner_table_schema.12251_12300.cpp +++ b/src/share/inner_table/ob_inner_table_schema.12251_12300.cpp @@ -1148,6 +1148,21 @@ int ObInnerTableSchema::all_virtual_log_stat_schema(ObTableSchema &table_schema) false, //is_nullable false); //is_autoincrement } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("learner_list", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } if (OB_SUCC(ret)) { table_schema.get_part_option().set_part_num(1); table_schema.set_part_level(PARTITION_LEVEL_ONE); diff --git a/src/share/inner_table/ob_inner_table_schema.12351_12400.cpp b/src/share/inner_table/ob_inner_table_schema.12351_12400.cpp index 4fe9a841aa..8dbe5514d2 100644 --- a/src/share/inner_table/ob_inner_table_schema.12351_12400.cpp +++ b/src/share/inner_table/ob_inner_table_schema.12351_12400.cpp @@ -3678,6 +3678,726 @@ int ObInnerTableSchema::all_virtual_external_table_file_schema(ObTableSchema &ta return ret; } +int ObInnerTableSchema::all_virtual_dup_ls_lease_mgr_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_DUP_LS_LEASE_MGR_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(6); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_DUP_LS_LEASE_MGR_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tenant_id", //column_name + ++column_id, //column_id + 1, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ls_id", //column_name + ++column_id, //column_id + 2, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_ip", //column_name + ++column_id, //column_id + 3, //rowkey_id + 0, //index_id + 1, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + MAX_IP_ADDR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_port", //column_name + ++column_id, //column_id + 4, //rowkey_id + 0, //index_id + 2, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("follower_ip", //column_name + ++column_id, //column_id + 5, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + MAX_IP_ADDR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("follower_port", //column_name + ++column_id, //column_id + 6, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA_TS("grant_timestamp", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(ObPreciseDateTime), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + false); //is_on_update_for_timestamp + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA_TS("expired_timestamp", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(ObPreciseDateTime), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + false); //is_on_update_for_timestamp + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("remain_us", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("lease_interval_us", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("grant_req_ts", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("cached_req_ts", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("max_replayed_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("max_read_version", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("max_commit_version", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + if (OB_SUCC(ret)) { + table_schema.get_part_option().set_part_num(1); + table_schema.set_part_level(PARTITION_LEVEL_ONE); + table_schema.get_part_option().set_part_func_type(PARTITION_FUNC_TYPE_LIST_COLUMNS); + if (OB_FAIL(table_schema.get_part_option().set_part_expr("svr_ip, svr_port"))) { + LOG_WARN("set_part_expr failed", K(ret)); + } else if (OB_FAIL(table_schema.mock_list_partition_array())) { + LOG_WARN("mock list partition array failed", K(ret)); + } + } + table_schema.set_index_using_type(USING_HASH); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + +int ObInnerTableSchema::all_virtual_dup_ls_tablet_set_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_DUP_LS_TABLET_SET_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(6); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_DUP_LS_TABLET_SET_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tenant_id", //column_name + ++column_id, //column_id + 1, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ls_id", //column_name + ++column_id, //column_id + 2, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_ip", //column_name + ++column_id, //column_id + 3, //rowkey_id + 0, //index_id + 1, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + MAX_IP_ADDR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_port", //column_name + ++column_id, //column_id + 4, //rowkey_id + 0, //index_id + 2, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ls_state", //column_name + ++column_id, //column_id + 5, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + MAX_LS_STATE_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("unique_id", //column_name + ++column_id, //column_id + 6, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("attribute", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_MAX_DUP_TABLE_TABLET_SET_ATTR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("count", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("readbale_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("change_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("need_confirm_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("state", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_MAX_DUP_TABLE_TABLET_SET_STATE_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("trx_ref", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + if (OB_SUCC(ret)) { + table_schema.get_part_option().set_part_num(1); + table_schema.set_part_level(PARTITION_LEVEL_ONE); + table_schema.get_part_option().set_part_func_type(PARTITION_FUNC_TYPE_LIST_COLUMNS); + if (OB_FAIL(table_schema.get_part_option().set_part_expr("svr_ip, svr_port"))) { + LOG_WARN("set_part_expr failed", K(ret)); + } else if (OB_FAIL(table_schema.mock_list_partition_array())) { + LOG_WARN("mock list partition array failed", K(ret)); + } + } + table_schema.set_index_using_type(USING_HASH); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + +int ObInnerTableSchema::all_virtual_dup_ls_tablets_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_DUP_LS_TABLETS_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(6); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_DUP_LS_TABLETS_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tenant_id", //column_name + ++column_id, //column_id + 1, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ls_id", //column_name + ++column_id, //column_id + 2, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_ip", //column_name + ++column_id, //column_id + 3, //rowkey_id + 0, //index_id + 1, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + MAX_IP_ADDR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_port", //column_name + ++column_id, //column_id + 4, //rowkey_id + 0, //index_id + 2, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ls_state", //column_name + ++column_id, //column_id + 5, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + MAX_LS_STATE_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tablet_id", //column_name + ++column_id, //column_id + 6, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObUInt64Type, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(uint64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("unique_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("attribute", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_MAX_DUP_TABLE_TABLET_SET_ATTR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA_TS("refresh_schema_timestamp", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObTimestampType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(ObPreciseDateTime), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + false); //is_on_update_for_timestamp + } + if (OB_SUCC(ret)) { + table_schema.get_part_option().set_part_num(1); + table_schema.set_part_level(PARTITION_LEVEL_ONE); + table_schema.get_part_option().set_part_func_type(PARTITION_FUNC_TYPE_LIST_COLUMNS); + if (OB_FAIL(table_schema.get_part_option().set_part_expr("svr_ip, svr_port"))) { + LOG_WARN("set_part_expr failed", K(ret)); + } else if (OB_FAIL(table_schema.mock_list_partition_array())) { + LOG_WARN("mock list partition array failed", K(ret)); + } + } + table_schema.set_index_using_type(USING_HASH); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + int ObInnerTableSchema::all_virtual_tx_data_schema(ObTableSchema &table_schema) { int ret = OB_SUCCESS; diff --git a/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp b/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp index b52f7370c5..bb7b85c1b5 100644 --- a/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp +++ b/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp @@ -4534,6 +4534,21 @@ int ObInnerTableSchema::all_virtual_ls_meta_table_ora_schema(ObTableSchema &tabl false, //is_nullable false); //is_autoincrement } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("LEARNER_LIST", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } table_schema.set_index_using_type(USING_BTREE); table_schema.set_row_store_type(ENCODING_ROW_STORE); table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); diff --git a/src/share/inner_table/ob_inner_table_schema.15251_15300.cpp b/src/share/inner_table/ob_inner_table_schema.15251_15300.cpp index da792a6f0a..de65a71d9f 100644 --- a/src/share/inner_table/ob_inner_table_schema.15251_15300.cpp +++ b/src/share/inner_table/ob_inner_table_schema.15251_15300.cpp @@ -3274,6 +3274,21 @@ int ObInnerTableSchema::all_virtual_log_stat_ora_schema(ObTableSchema &table_sch false, //is_nullable false); //is_autoincrement } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("LEARNER_LIST", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } if (OB_SUCC(ret)) { table_schema.get_part_option().set_part_num(1); table_schema.set_part_level(PARTITION_LEVEL_ONE); @@ -4255,6 +4270,51 @@ int ObInnerTableSchema::all_virtual_ls_status_ora_schema(ObTableSchema &table_sc true, //is_nullable false); //is_autoincrement } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("INIT_LEARNER_LIST", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("B_INIT_LEARNER_LIST", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("FLAG", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_UTF8MB4_BIN, //column_collation_type + OB_MAX_LS_FLAG_LENGTH, //column_length + 2, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } table_schema.set_index_using_type(USING_BTREE); table_schema.set_row_store_type(ENCODING_ROW_STORE); table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); diff --git a/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp b/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp index a911d4c3cd..603bb39c9b 100644 --- a/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp +++ b/src/share/inner_table/ob_inner_table_schema.21151_21200.cpp @@ -910,7 +910,7 @@ int ObInnerTableSchema::dba_ob_ls_locations_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( ( SELECT NOW(6) AS CREATE_TIME, NOW(6) AS MODIFY_TIME, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE FROM OCEANBASE.__ALL_VIRTUAL_CORE_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() ) UNION ALL ( SELECT GMT_CREATE AS CREATE_TIME, GMT_MODIFIED AS MODIFY_TIME, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE FROM OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() AND TENANT_ID != 1 ) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( ( SELECT NOW(6) AS CREATE_TIME, NOW(6) AS MODIFY_TIME, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST FROM OCEANBASE.__ALL_VIRTUAL_CORE_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() ) UNION ALL ( SELECT GMT_CREATE AS CREATE_TIME, GMT_MODIFIED AS MODIFY_TIME, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST FROM OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() AND TENANT_ID != 1 ) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -960,7 +960,7 @@ int ObInnerTableSchema::cdb_ob_ls_locations_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( ( SELECT NOW(6) AS CREATE_TIME, NOW(6) AS MODIFY_TIME, TENANT_ID, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE FROM OCEANBASE.__ALL_VIRTUAL_CORE_META_TABLE ) UNION ALL ( SELECT GMT_CREATE AS CREATE_TIME, GMT_MODIFIED AS MODIFY_TIME, TENANT_ID, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE FROM OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID != 1 ) )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( ( SELECT NOW(6) AS CREATE_TIME, NOW(6) AS MODIFY_TIME, TENANT_ID, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST FROM OCEANBASE.__ALL_VIRTUAL_CORE_META_TABLE ) UNION ALL ( SELECT GMT_CREATE AS CREATE_TIME, GMT_MODIFIED AS MODIFY_TIME, TENANT_ID, LS_ID, SVR_IP, SVR_PORT, SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN "LEADER" ELSE "FOLLOWER" END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, (CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN "FULL" WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST FROM OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID != 1 ) )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.21301_21350.cpp b/src/share/inner_table/ob_inner_table_schema.21301_21350.cpp index 4db84cca5d..1f78aa113d 100644 --- a/src/share/inner_table/ob_inner_table_schema.21301_21350.cpp +++ b/src/share/inner_table/ob_inner_table_schema.21301_21350.cpp @@ -60,7 +60,7 @@ int ObInnerTableSchema::gv_ob_log_stat_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT TENANT_ID, LS_ID, SVR_IP, SVR_PORT, ROLE, PROPOSAL_ID, CONFIG_VERSION, ACCESS_MODE, PAXOS_MEMBER_LIST, PAXOS_REPLICA_NUM, CASE in_sync WHEN 1 THEN 'YES' ELSE 'NO' END AS IN_SYNC, BASE_LSN, BEGIN_LSN, BEGIN_SCN, END_LSN, END_SCN, MAX_LSN, MAX_SCN, ARBITRATION_MEMBER, DEGRADED_LIST FROM oceanbase.__all_virtual_log_stat )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT TENANT_ID, LS_ID, SVR_IP, SVR_PORT, ROLE, PROPOSAL_ID, CONFIG_VERSION, ACCESS_MODE, PAXOS_MEMBER_LIST, PAXOS_REPLICA_NUM, CASE in_sync WHEN 1 THEN 'YES' ELSE 'NO' END AS IN_SYNC, BASE_LSN, BEGIN_LSN, BEGIN_SCN, END_LSN, END_SCN, MAX_LSN, MAX_SCN, ARBITRATION_MEMBER, DEGRADED_LIST, LEARNER_LIST FROM oceanbase.__all_virtual_log_stat )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -667,7 +667,7 @@ int ObInnerTableSchema::dba_ob_ls_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT A.LS_ID, A.STATUS, C.ZONE_PRIORITY AS PRIMARY_ZONE, A.UNIT_GROUP_ID, A.LS_GROUP_ID, /* SYS LS's CREATE_SCN always is NULL, it means nothing */ (CASE A.LS_ID WHEN 1 THEN NULL ELSE B.CREATE_SCN END) AS CREATE_SCN, /* show NULL if not dropped */ (CASE B.DROP_SCN WHEN 1 THEN NULL ELSE B.DROP_SCN END) AS DROP_SCN, /* SYS tenant and Meta tenant always show NULL */ (CASE WHEN A.TENANT_ID = 1 THEN NULL WHEN (A.TENANT_ID & 0x1) = 1 THEN NULL ELSE B.SYNC_SCN END) AS SYNC_SCN, /* SYS tenant and Meta tenant always show NULL */ (CASE WHEN A.TENANT_ID = 1 THEN NULL WHEN (A.TENANT_ID & 0x1) = 1 THEN NULL ELSE B.READABLE_SCN END) AS READABLE_SCN FROM OCEANBASE.__ALL_VIRTUAL_LS_STATUS AS A JOIN OCEANBASE.__ALL_VIRTUAL_LS_RECOVERY_STAT AS B JOIN OCEANBASE.__ALL_VIRTUAL_LS_ELECTION_REFERENCE_INFO AS C ON A.TENANT_ID = B.TENANT_ID AND A.LS_ID = B.LS_ID AND A.TENANT_ID = C.TENANT_ID AND A.LS_ID = C.LS_ID WHERE A.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT A.LS_ID, A.STATUS, C.ZONE_PRIORITY AS PRIMARY_ZONE, A.UNIT_GROUP_ID, A.LS_GROUP_ID, /* SYS LS's CREATE_SCN always is NULL, it means nothing */ (CASE A.LS_ID WHEN 1 THEN NULL ELSE B.CREATE_SCN END) AS CREATE_SCN, /* show NULL if not dropped */ (CASE B.DROP_SCN WHEN 1 THEN NULL ELSE B.DROP_SCN END) AS DROP_SCN, /* SYS tenant and Meta tenant always show NULL */ (CASE WHEN A.TENANT_ID = 1 THEN NULL WHEN (A.TENANT_ID & 0x1) = 1 THEN NULL ELSE B.SYNC_SCN END) AS SYNC_SCN, /* SYS tenant and Meta tenant always show NULL */ (CASE WHEN A.TENANT_ID = 1 THEN NULL WHEN (A.TENANT_ID & 0x1) = 1 THEN NULL ELSE B.READABLE_SCN END) AS READABLE_SCN, FLAG FROM OCEANBASE.__ALL_VIRTUAL_LS_STATUS AS A JOIN OCEANBASE.__ALL_VIRTUAL_LS_RECOVERY_STAT AS B JOIN OCEANBASE.__ALL_VIRTUAL_LS_ELECTION_REFERENCE_INFO AS C ON A.TENANT_ID = B.TENANT_ID AND A.LS_ID = B.LS_ID AND A.TENANT_ID = C.TENANT_ID AND A.LS_ID = C.LS_ID WHERE A.TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -717,7 +717,7 @@ int ObInnerTableSchema::cdb_ob_ls_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT A.TENANT_ID, A.LS_ID, A.STATUS, C.ZONE_PRIORITY AS PRIMARY_ZONE, A.UNIT_GROUP_ID, A.LS_GROUP_ID, /* SYS LS's CREATE_SCN always is NULL, it means nothing */ (CASE A.LS_ID WHEN 1 THEN NULL ELSE B.CREATE_SCN END) AS CREATE_SCN, /* show NULL if not dropped */ (CASE B.DROP_SCN WHEN 1 THEN NULL ELSE B.DROP_SCN END) AS DROP_SCN, /* SYS tenant and Meta tenant always show NULL */ (CASE WHEN A.TENANT_ID = 1 THEN NULL WHEN (A.TENANT_ID & 0x1) = 1 THEN NULL ELSE B.SYNC_SCN END) AS SYNC_SCN, /* SYS tenant and Meta tenant always show NULL */ (CASE WHEN A.TENANT_ID = 1 THEN NULL WHEN (A.TENANT_ID & 0x1) = 1 THEN NULL ELSE B.READABLE_SCN END) AS READABLE_SCN FROM OCEANBASE.__ALL_VIRTUAL_LS_STATUS AS A JOIN OCEANBASE.__ALL_VIRTUAL_LS_RECOVERY_STAT AS B JOIN OCEANBASE.__ALL_VIRTUAL_LS_ELECTION_REFERENCE_INFO AS C ON A.TENANT_ID = B.TENANT_ID AND A.LS_ID = B.LS_ID AND A.TENANT_ID = C.TENANT_ID AND A.LS_ID = C.LS_ID )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT A.TENANT_ID, A.LS_ID, A.STATUS, C.ZONE_PRIORITY AS PRIMARY_ZONE, A.UNIT_GROUP_ID, A.LS_GROUP_ID, /* SYS LS's CREATE_SCN always is NULL, it means nothing */ (CASE A.LS_ID WHEN 1 THEN NULL ELSE B.CREATE_SCN END) AS CREATE_SCN, /* show NULL if not dropped */ (CASE B.DROP_SCN WHEN 1 THEN NULL ELSE B.DROP_SCN END) AS DROP_SCN, /* SYS tenant and Meta tenant always show NULL */ (CASE WHEN A.TENANT_ID = 1 THEN NULL WHEN (A.TENANT_ID & 0x1) = 1 THEN NULL ELSE B.SYNC_SCN END) AS SYNC_SCN, /* SYS tenant and Meta tenant always show NULL */ (CASE WHEN A.TENANT_ID = 1 THEN NULL WHEN (A.TENANT_ID & 0x1) = 1 THEN NULL ELSE B.READABLE_SCN END) AS READABLE_SCN, FLAG FROM OCEANBASE.__ALL_VIRTUAL_LS_STATUS AS A JOIN OCEANBASE.__ALL_VIRTUAL_LS_RECOVERY_STAT AS B JOIN OCEANBASE.__ALL_VIRTUAL_LS_ELECTION_REFERENCE_INFO AS C ON A.TENANT_ID = B.TENANT_ID AND A.LS_ID = B.LS_ID AND A.TENANT_ID = C.TENANT_ID AND A.LS_ID = C.LS_ID )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -767,7 +767,7 @@ int ObInnerTableSchema::dba_ob_table_locations_schema(ObTableSchema &table_schem table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT D.DATABASE_NAME, A.TABLE_NAME, A.TABLE_ID, CASE WHEN A.TABLE_TYPE IN (0) THEN 'SYSTEM TABLE' WHEN A.TABLE_TYPE IN (3,6,8,9) THEN 'USER TABLE' WHEN A.TABLE_TYPE IN (5) THEN 'INDEX' WHEN A.TABLE_TYPE IN (12,13) THEN 'LOB AUX TABLE' ELSE NULL END AS TABLE_TYPE, A.PARTITION_NAME, A.SUBPARTITION_NAME, /* INDEX_NAME is valid when table is index */ CASE WHEN A.TABLE_TYPE != 5 THEN NULL WHEN D.DATABASE_NAME != '__recyclebin' THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) ELSE TABLE_NAME END AS INDEX_NAME, CASE WHEN DATA_TABLE_ID = 0 THEN NULL ELSE DATA_TABLE_ID END AS DATA_TABLE_ID, A.TABLET_ID, C.LS_ID, C.ZONE, C.SVR_IP AS SVR_IP, C.SVR_PORT AS SVR_PORT, C.ROLE, C.REPLICA_TYPE FROM ( SELECT DATABASE_ID, TABLE_NAME, TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM OCEANBASE.__ALL_VIRTUAL_CORE_ALL_TABLE WHERE TABLET_ID != 0 AND TENANT_ID = EFFECTIVE_TENANT_ID() UNION ALL SELECT DATABASE_ID, TABLE_NAME, TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM OCEANBASE.__ALL_TABLE WHERE TABLET_ID != 0 AND PART_LEVEL = 0 AND TENANT_ID = 0 UNION ALL SELECT T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, P.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM OCEANBASE.__ALL_TABLE T JOIN OCEANBASE.__ALL_PART P ON T.TABLE_ID = P.TABLE_ID AND T.TENANT_ID = P.TENANT_ID WHERE T.PART_LEVEL = 1 AND T.TENANT_ID = 0 UNION ALL SELECT T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, Q.SUB_PART_NAME AS SUBPARTITION_NAME, Q.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_PART P,OCEANBASE.__ALL_SUB_PART Q WHERE T.TABLE_ID =P.TABLE_ID AND P.TABLE_ID=Q.TABLE_ID AND P.PART_ID = Q.PART_ID AND T.TENANT_ID = P.TENANT_ID AND P.TENANT_ID = Q.TENANT_ID AND T.PART_LEVEL = 2 AND T.TENANT_ID = 0 ) A JOIN OCEANBASE.DBA_OB_TABLET_TO_LS B ON A.TABLET_ID = B.TABLET_ID JOIN OCEANBASE.DBA_OB_LS_LOCATIONS C ON B.LS_ID = C.LS_ID JOIN OCEANBASE.__ALL_DATABASE D ON A.DATABASE_ID = D.DATABASE_ID WHERE D.TENANT_ID = 0 ORDER BY A.TABLE_ID, A.TABLET_ID, C.ZONE, SVR_IP, SVR_PORT )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT D.DATABASE_NAME, A.TABLE_NAME, A.TABLE_ID, CASE WHEN A.TABLE_TYPE IN (0) THEN 'SYSTEM TABLE' WHEN A.TABLE_TYPE IN (3,6,8,9) THEN 'USER TABLE' WHEN A.TABLE_TYPE IN (5) THEN 'INDEX' WHEN A.TABLE_TYPE IN (12,13) THEN 'LOB AUX TABLE' ELSE NULL END AS TABLE_TYPE, A.PARTITION_NAME, A.SUBPARTITION_NAME, /* INDEX_NAME is valid when table is index */ CASE WHEN A.TABLE_TYPE != 5 THEN NULL WHEN D.DATABASE_NAME != '__recyclebin' THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) ELSE TABLE_NAME END AS INDEX_NAME, CASE WHEN DATA_TABLE_ID = 0 THEN NULL ELSE DATA_TABLE_ID END AS DATA_TABLE_ID, A.TABLET_ID, C.LS_ID, C.ZONE, C.SVR_IP AS SVR_IP, C.SVR_PORT AS SVR_PORT, C.ROLE, C.REPLICA_TYPE, CASE WHEN A.DUPLICATE_SCOPE = 1 THEN 'CLUSTER' ELSE 'NONE' END AS DUPLICATE_SCOPE FROM ( SELECT DATABASE_ID, TABLE_NAME, TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM OCEANBASE.__ALL_VIRTUAL_CORE_ALL_TABLE WHERE TABLET_ID != 0 AND TENANT_ID = EFFECTIVE_TENANT_ID() UNION ALL SELECT DATABASE_ID, TABLE_NAME, TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM OCEANBASE.__ALL_TABLE WHERE TABLET_ID != 0 AND PART_LEVEL = 0 AND TENANT_ID = 0 UNION ALL SELECT T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, P.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM OCEANBASE.__ALL_TABLE T JOIN OCEANBASE.__ALL_PART P ON T.TABLE_ID = P.TABLE_ID AND T.TENANT_ID = P.TENANT_ID WHERE T.PART_LEVEL = 1 AND T.TENANT_ID = 0 UNION ALL SELECT T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, Q.SUB_PART_NAME AS SUBPARTITION_NAME, Q.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_PART P,OCEANBASE.__ALL_SUB_PART Q WHERE T.TABLE_ID =P.TABLE_ID AND P.TABLE_ID=Q.TABLE_ID AND P.PART_ID = Q.PART_ID AND T.TENANT_ID = P.TENANT_ID AND P.TENANT_ID = Q.TENANT_ID AND T.PART_LEVEL = 2 AND T.TENANT_ID = 0 ) A JOIN OCEANBASE.DBA_OB_TABLET_TO_LS B ON A.TABLET_ID = B.TABLET_ID JOIN OCEANBASE.DBA_OB_LS_LOCATIONS C ON B.LS_ID = C.LS_ID JOIN OCEANBASE.__ALL_DATABASE D ON A.DATABASE_ID = D.DATABASE_ID WHERE D.TENANT_ID = 0 ORDER BY A.TABLE_ID, A.TABLET_ID, C.ZONE, SVR_IP, SVR_PORT )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } @@ -817,7 +817,7 @@ int ObInnerTableSchema::cdb_ob_table_locations_schema(ObTableSchema &table_schem table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT A.TENANT_ID, D.DATABASE_NAME, A.TABLE_NAME, A.TABLE_ID, CASE WHEN A.TABLE_TYPE IN (0) THEN 'SYSTEM TABLE' WHEN A.TABLE_TYPE IN (3,6,8,9) THEN 'USER TABLE' WHEN A.TABLE_TYPE IN (5) THEN 'INDEX' WHEN A.TABLE_TYPE IN (12,13) THEN 'LOB AUX TABLE' ELSE NULL END AS TABLE_TYPE, A.PARTITION_NAME, A.SUBPARTITION_NAME, /* INDEX_NAME is valid when table is index */ CASE WHEN A.TABLE_TYPE != 5 THEN NULL WHEN D.DATABASE_NAME != '__recyclebin' THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) ELSE TABLE_NAME END AS INDEX_NAME, CASE WHEN DATA_TABLE_ID = 0 THEN NULL ELSE DATA_TABLE_ID END AS DATA_TABLE_ID, A.TABLET_ID, C.LS_ID, C.ZONE, C.SVR_IP AS SVR_IP, C.SVR_PORT AS SVR_PORT, C.ROLE, C.REPLICA_TYPE FROM ( SELECT TENANT_ID, DATABASE_ID, TABLE_NAME, TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM OCEANBASE.__ALL_VIRTUAL_CORE_ALL_TABLE WHERE TABLET_ID != 0 UNION ALL SELECT TENANT_ID, DATABASE_ID, TABLE_NAME, TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM OCEANBASE.__ALL_VIRTUAL_TABLE WHERE TABLET_ID != 0 AND PART_LEVEL = 0 UNION ALL SELECT P.TENANT_ID AS TENANT_ID, T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, P.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM OCEANBASE.__ALL_VIRTUAL_TABLE T JOIN OCEANBASE.__ALL_VIRTUAL_PART P ON T.TABLE_ID = P.TABLE_ID WHERE T.TENANT_ID = P.TENANT_ID AND T.PART_LEVEL = 1 UNION ALL SELECT T.TENANT_ID AS TENANT_ID, T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, Q.SUB_PART_NAME AS SUBPARTITION_NAME, Q.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_PART P,OCEANBASE.__ALL_VIRTUAL_SUB_PART Q WHERE T.TABLE_ID =P.TABLE_ID AND P.TABLE_ID=Q.TABLE_ID AND P.PART_ID =Q.PART_ID AND T.TENANT_ID = P.TENANT_ID AND P.TENANT_ID = Q.TENANT_ID AND T.PART_LEVEL = 2 ) A JOIN OCEANBASE.CDB_OB_TABLET_TO_LS B ON A.TABLET_ID = B.TABLET_ID AND A.TENANT_ID = B.TENANT_ID JOIN OCEANBASE.CDB_OB_LS_LOCATIONS C ON B.LS_ID = C.LS_ID AND A.TENANT_ID = C.TENANT_ID JOIN OCEANBASE.__ALL_VIRTUAL_DATABASE D ON A.TENANT_ID = D.TENANT_ID AND A.DATABASE_ID = D.DATABASE_ID ORDER BY A.TENANT_ID, A.TABLE_ID, A.TABLET_ID, C.ZONE, SVR_IP, SVR_PORT )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT A.TENANT_ID, D.DATABASE_NAME, A.TABLE_NAME, A.TABLE_ID, CASE WHEN A.TABLE_TYPE IN (0) THEN 'SYSTEM TABLE' WHEN A.TABLE_TYPE IN (3,6,8,9) THEN 'USER TABLE' WHEN A.TABLE_TYPE IN (5) THEN 'INDEX' WHEN A.TABLE_TYPE IN (12,13) THEN 'LOB AUX TABLE' ELSE NULL END AS TABLE_TYPE, A.PARTITION_NAME, A.SUBPARTITION_NAME, /* INDEX_NAME is valid when table is index */ CASE WHEN A.TABLE_TYPE != 5 THEN NULL WHEN D.DATABASE_NAME != '__recyclebin' THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) ELSE TABLE_NAME END AS INDEX_NAME, CASE WHEN DATA_TABLE_ID = 0 THEN NULL ELSE DATA_TABLE_ID END AS DATA_TABLE_ID, A.TABLET_ID, C.LS_ID, C.ZONE, C.SVR_IP AS SVR_IP, C.SVR_PORT AS SVR_PORT, C.ROLE, C.REPLICA_TYPE, CASE WHEN A.DUPLICATE_SCOPE = 1 THEN 'CLUSTER' ELSE 'NONE' END AS DUPLICATE_SCOPE FROM ( SELECT TENANT_ID, DATABASE_ID, TABLE_NAME, TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM OCEANBASE.__ALL_VIRTUAL_CORE_ALL_TABLE WHERE TABLET_ID != 0 UNION ALL SELECT TENANT_ID, DATABASE_ID, TABLE_NAME, TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM OCEANBASE.__ALL_VIRTUAL_TABLE WHERE TABLET_ID != 0 AND PART_LEVEL = 0 UNION ALL SELECT P.TENANT_ID AS TENANT_ID, T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, P.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM OCEANBASE.__ALL_VIRTUAL_TABLE T JOIN OCEANBASE.__ALL_VIRTUAL_PART P ON T.TABLE_ID = P.TABLE_ID WHERE T.TENANT_ID = P.TENANT_ID AND T.PART_LEVEL = 1 UNION ALL SELECT T.TENANT_ID AS TENANT_ID, T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, Q.SUB_PART_NAME AS SUBPARTITION_NAME, Q.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_PART P,OCEANBASE.__ALL_VIRTUAL_SUB_PART Q WHERE T.TABLE_ID =P.TABLE_ID AND P.TABLE_ID=Q.TABLE_ID AND P.PART_ID =Q.PART_ID AND T.TENANT_ID = P.TENANT_ID AND P.TENANT_ID = Q.TENANT_ID AND T.PART_LEVEL = 2 ) A JOIN OCEANBASE.CDB_OB_TABLET_TO_LS B ON A.TABLET_ID = B.TABLET_ID AND A.TENANT_ID = B.TENANT_ID JOIN OCEANBASE.CDB_OB_LS_LOCATIONS C ON B.LS_ID = C.LS_ID AND A.TENANT_ID = C.TENANT_ID JOIN OCEANBASE.__ALL_VIRTUAL_DATABASE D ON A.TENANT_ID = D.TENANT_ID AND A.DATABASE_ID = D.DATABASE_ID ORDER BY A.TENANT_ID, A.TABLE_ID, A.TABLET_ID, C.ZONE, SVR_IP, SVR_PORT )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp b/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp index 67f15761da..79b7536646 100644 --- a/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp +++ b/src/share/inner_table/ob_inner_table_schema.25151_25200.cpp @@ -710,7 +710,7 @@ int ObInnerTableSchema::dba_ob_ls_locations_ora_schema(ObTableSchema &table_sche table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(TO_CHAR(GMT_CREATE) AS VARCHAR2(19)) AS CREATE_TIME, CAST(TO_CHAR(GMT_MODIFIED) AS VARCHAR2(19)) AS MODIFY_TIME, CAST(LS_ID AS NUMBER) AS LS_ID, SVR_IP, CAST(SVR_PORT AS NUMBER) AS SVR_PORT, CAST(SQL_PORT AS NUMBER) AS SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN 'LEADER' ELSE 'FOLLOWER' END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, CAST((CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS NUMBER) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN 'FULL' WHEN 5 THEN 'LOGONLY' WHEN 16 THEN 'READONLY' WHEN 261 THEN 'ENCRYPTION LOGONLY' ELSE NULL END) AS REPLICA_TYPE FROM SYS.ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT CAST(TO_CHAR(GMT_CREATE) AS VARCHAR2(19)) AS CREATE_TIME, CAST(TO_CHAR(GMT_MODIFIED) AS VARCHAR2(19)) AS MODIFY_TIME, CAST(LS_ID AS NUMBER) AS LS_ID, SVR_IP, CAST(SVR_PORT AS NUMBER) AS SVR_PORT, CAST(SQL_PORT AS NUMBER) AS SQL_PORT, ZONE, (CASE ROLE WHEN 1 THEN 'LEADER' ELSE 'FOLLOWER' END) AS ROLE, (CASE ROLE WHEN 1 THEN MEMBER_LIST ELSE NULL END) AS MEMBER_LIST, CAST((CASE ROLE WHEN 1 THEN PAXOS_REPLICA_NUMBER ELSE NULL END) AS NUMBER) AS PAXOS_REPLICA_NUMBER, (CASE REPLICA_TYPE WHEN 0 THEN 'FULL' WHEN 5 THEN 'LOGONLY' WHEN 16 THEN 'READONLY' WHEN 261 THEN 'ENCRYPTION LOGONLY' ELSE NULL END) AS REPLICA_TYPE, (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE NULL END) AS LEARNER_LIST FROM SYS.ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.28101_28150.cpp b/src/share/inner_table/ob_inner_table_schema.28101_28150.cpp index 5f780068fb..c734f865a2 100644 --- a/src/share/inner_table/ob_inner_table_schema.28101_28150.cpp +++ b/src/share/inner_table/ob_inner_table_schema.28101_28150.cpp @@ -2410,7 +2410,7 @@ int ObInnerTableSchema::gv_ob_log_stat_ora_schema(ObTableSchema &table_schema) table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT TENANT_ID, LS_ID, SVR_IP, SVR_PORT, ROLE, PROPOSAL_ID, CONFIG_VERSION, ACCESS_MODE, PAXOS_MEMBER_LIST, PAXOS_REPLICA_NUM, CASE in_sync WHEN 1 THEN 'YES' ELSE 'NO' END AS IN_SYNC, BASE_LSN, BEGIN_LSN, BEGIN_SCN, END_LSN, END_SCN, MAX_LSN, MAX_SCN, ARBITRATION_MEMBER, DEGRADED_LIST FROM SYS.ALL_VIRTUAL_LOG_STAT )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT TENANT_ID, LS_ID, SVR_IP, SVR_PORT, ROLE, PROPOSAL_ID, CONFIG_VERSION, ACCESS_MODE, PAXOS_MEMBER_LIST, PAXOS_REPLICA_NUM, CASE in_sync WHEN 1 THEN 'YES' ELSE 'NO' END AS IN_SYNC, BASE_LSN, BEGIN_LSN, BEGIN_SCN, END_LSN, END_SCN, MAX_LSN, MAX_SCN, ARBITRATION_MEMBER, DEGRADED_LIST, LEARNER_LIST FROM SYS.ALL_VIRTUAL_LOG_STAT )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.28151_28200.cpp b/src/share/inner_table/ob_inner_table_schema.28151_28200.cpp index 8f58a86d5e..2e1a845238 100644 --- a/src/share/inner_table/ob_inner_table_schema.28151_28200.cpp +++ b/src/share/inner_table/ob_inner_table_schema.28151_28200.cpp @@ -160,7 +160,7 @@ int ObInnerTableSchema::dba_ob_table_locations_ora_schema(ObTableSchema &table_s table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); if (OB_SUCC(ret)) { - if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT D.DATABASE_NAME, A.TABLE_NAME, A.TABLE_ID, CASE WHEN A.TABLE_TYPE IN (0) THEN 'SYSTEM TABLE' WHEN A.TABLE_TYPE IN (3,6,8,9) THEN 'USER TABLE' WHEN A.TABLE_TYPE IN (5) THEN 'INDEX' WHEN A.TABLE_TYPE IN (12,13) THEN 'LOB AUX TABLE' ELSE NULL END AS TABLE_TYPE, A.PARTITION_NAME, A.SUBPARTITION_NAME, /* INDEX_NAME is valid when table is index */ CASE WHEN A.TABLE_TYPE != 5 THEN NULL WHEN D.DATABASE_NAME != '__recyclebin' THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) ELSE TABLE_NAME END AS INDEX_NAME, CASE WHEN DATA_TABLE_ID = 0 THEN NULL ELSE DATA_TABLE_ID END AS DATA_TABLE_ID, A.TABLET_ID, C.LS_ID, C.ZONE, C.SVR_IP AS SVR_IP, C.SVR_PORT AS SVR_PORT, C.ROLE, C.REPLICA_TYPE FROM ( SELECT TENANT_ID, DATABASE_ID, TABLE_NAME, TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM SYS.ALL_VIRTUAL_CORE_ALL_TABLE WHERE TABLET_ID != 0 AND TENANT_ID = EFFECTIVE_TENANT_ID() UNION ALL SELECT T.TENANT_ID AS TENANT_ID, T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T WHERE T.TABLET_ID != 0 AND T.PART_LEVEL = 0 AND T.TENANT_ID = EFFECTIVE_TENANT_ID() UNION ALL SELECT T.TENANT_ID AS TENANT_ID, T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, P.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TABLE_ID = P.TABLE_ID AND T.TENANT_ID = P.TENANT_ID WHERE T.PART_LEVEL = 1 AND T.TENANT_ID = EFFECTIVE_TENANT_ID() UNION ALL SELECT T.TENANT_ID AS TENANT_ID, T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, Q.SUB_PART_NAME AS SUBPARTITION_NAME, Q.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID FROM SYS.ALL_VIRTUAL_SUB_PART_REAL_AGENT Q JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON P.PART_ID =Q.PART_ID AND Q.TENANT_ID = P.TENANT_ID JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID =P.TABLE_ID AND T.TENANT_ID = Q.TENANT_ID WHERE T.PART_LEVEL = 2 AND T.TENANT_ID = EFFECTIVE_TENANT_ID() ) A JOIN SYS.DBA_OB_TABLET_TO_LS B ON A.TABLET_ID = B.TABLET_ID JOIN SYS.DBA_OB_LS_LOCATIONS C ON B.LS_ID = C.LS_ID JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D ON A.DATABASE_ID = D.DATABASE_ID AND A.TENANT_ID = D.TENANT_ID ORDER BY A.TABLE_ID, A.TABLET_ID, C.ZONE, SVR_IP, SVR_PORT )__"))) { + if (OB_FAIL(table_schema.set_view_definition(R"__( SELECT D.DATABASE_NAME, A.TABLE_NAME, A.TABLE_ID, CASE WHEN A.TABLE_TYPE IN (0) THEN 'SYSTEM TABLE' WHEN A.TABLE_TYPE IN (3,6,8,9) THEN 'USER TABLE' WHEN A.TABLE_TYPE IN (5) THEN 'INDEX' WHEN A.TABLE_TYPE IN (12,13) THEN 'LOB AUX TABLE' ELSE NULL END AS TABLE_TYPE, A.PARTITION_NAME, A.SUBPARTITION_NAME, /* INDEX_NAME is valid when table is index */ CASE WHEN A.TABLE_TYPE != 5 THEN NULL WHEN D.DATABASE_NAME != '__recyclebin' THEN SUBSTR(TABLE_NAME, 7 + INSTR(SUBSTR(TABLE_NAME, 7), '_')) ELSE TABLE_NAME END AS INDEX_NAME, CASE WHEN DATA_TABLE_ID = 0 THEN NULL ELSE DATA_TABLE_ID END AS DATA_TABLE_ID, A.TABLET_ID, C.LS_ID, C.ZONE, C.SVR_IP AS SVR_IP, C.SVR_PORT AS SVR_PORT, C.ROLE, C.REPLICA_TYPE, CASE WHEN A.DUPLICATE_SCOPE = 1 THEN 'CLUSTER' ELSE 'NONE' END AS DUPLICATE_SCOPE FROM ( SELECT TENANT_ID, DATABASE_ID, TABLE_NAME, TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM SYS.ALL_VIRTUAL_CORE_ALL_TABLE WHERE TABLET_ID != 0 AND TENANT_ID = EFFECTIVE_TENANT_ID() UNION ALL SELECT T.TENANT_ID AS TENANT_ID, T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, 'NULL' AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T WHERE T.TABLET_ID != 0 AND T.PART_LEVEL = 0 AND T.TENANT_ID = EFFECTIVE_TENANT_ID() UNION ALL SELECT T.TENANT_ID AS TENANT_ID, T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, 'NULL' AS SUBPARTITION_NAME, P.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TABLE_ID = P.TABLE_ID AND T.TENANT_ID = P.TENANT_ID WHERE T.PART_LEVEL = 1 AND T.TENANT_ID = EFFECTIVE_TENANT_ID() UNION ALL SELECT T.TENANT_ID AS TENANT_ID, T.DATABASE_ID AS DATABASE_ID, T.TABLE_NAME AS TABLE_NAME, T.TABLE_ID AS TABLE_ID, P.PART_NAME AS PARTITION_NAME, Q.SUB_PART_NAME AS SUBPARTITION_NAME, Q.TABLET_ID AS TABLET_ID, TABLE_TYPE, DATA_TABLE_ID, DUPLICATE_SCOPE FROM SYS.ALL_VIRTUAL_SUB_PART_REAL_AGENT Q JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON P.PART_ID =Q.PART_ID AND Q.TENANT_ID = P.TENANT_ID JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID =P.TABLE_ID AND T.TENANT_ID = Q.TENANT_ID WHERE T.PART_LEVEL = 2 AND T.TENANT_ID = EFFECTIVE_TENANT_ID() ) A JOIN SYS.DBA_OB_TABLET_TO_LS B ON A.TABLET_ID = B.TABLET_ID JOIN SYS.DBA_OB_LS_LOCATIONS C ON B.LS_ID = C.LS_ID JOIN SYS.ALL_VIRTUAL_DATABASE_REAL_AGENT D ON A.DATABASE_ID = D.DATABASE_ID AND A.TENANT_ID = D.TENANT_ID ORDER BY A.TABLE_ID, A.TABLET_ID, C.ZONE, SVR_IP, SVR_PORT )__"))) { LOG_ERROR("fail to set view_definition", K(ret)); } } diff --git a/src/share/inner_table/ob_inner_table_schema.301_350.cpp b/src/share/inner_table/ob_inner_table_schema.301_350.cpp index 5e3313f936..6f931b49dd 100644 --- a/src/share/inner_table/ob_inner_table_schema.301_350.cpp +++ b/src/share/inner_table/ob_inner_table_schema.301_350.cpp @@ -7680,6 +7680,21 @@ int ObInnerTableSchema::all_ls_meta_table_schema(ObTableSchema &table_schema) required_size_default, required_size_default); //default_value } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("learner_list", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } table_schema.set_index_using_type(USING_BTREE); table_schema.set_row_store_type(ENCODING_ROW_STORE); table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); @@ -8283,6 +8298,55 @@ int ObInnerTableSchema::all_ls_status_schema(ObTableSchema &table_schema) true, //is_nullable false); //is_autoincrement } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("init_learner_list", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("b_init_learner_list", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObLongTextType, //column_type + CS_TYPE_INVALID, //column_collation_type + 0, //column_length + -1, //column_precision + -1, //column_scale + true, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ObObj flag_default; + flag_default.set_varchar(ObString::make_string("")); + ADD_COLUMN_SCHEMA_T("flag", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_MAX_LS_FLAG_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + flag_default, + flag_default); //default_value + } table_schema.set_index_using_type(USING_BTREE); table_schema.set_row_store_type(ENCODING_ROW_STORE); table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); diff --git a/src/share/inner_table/ob_inner_table_schema.351_400.cpp b/src/share/inner_table/ob_inner_table_schema.351_400.cpp index b4698cd2e0..af864ce923 100644 --- a/src/share/inner_table/ob_inner_table_schema.351_400.cpp +++ b/src/share/inner_table/ob_inner_table_schema.351_400.cpp @@ -990,7 +990,7 @@ int ObInnerTableSchema::all_ls_schema(ObTableSchema &table_schema) 0, //part_key_pos ObVarcharType, //column_type CS_TYPE_INVALID, //column_collation_type - 100, //column_length + OB_MAX_LS_FLAG_LENGTH, //column_length -1, //column_precision -1, //column_scale false, //is_nullable diff --git a/src/share/inner_table/ob_inner_table_schema.h b/src/share/inner_table/ob_inner_table_schema.h index 27c6afbd4d..5e49edceff 100644 --- a/src/share/inner_table/ob_inner_table_schema.h +++ b/src/share/inner_table/ob_inner_table_schema.h @@ -875,6 +875,9 @@ public: static int all_virtual_archive_dest_status_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_io_scheduler_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_external_table_file_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_dup_ls_lease_mgr_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_dup_ls_tablet_set_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_dup_ls_tablets_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_tx_data_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_task_opt_stat_gather_history_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_table_opt_stat_gather_history_schema(share::schema::ObTableSchema &table_schema); @@ -3084,6 +3087,9 @@ const schema_create_func virtual_table_schema_creators [] = { ObInnerTableSchema::all_virtual_archive_dest_status_schema, ObInnerTableSchema::all_virtual_io_scheduler_schema, ObInnerTableSchema::all_virtual_external_table_file_schema, + ObInnerTableSchema::all_virtual_dup_ls_lease_mgr_schema, + ObInnerTableSchema::all_virtual_dup_ls_tablet_set_schema, + ObInnerTableSchema::all_virtual_dup_ls_tablets_schema, ObInnerTableSchema::all_virtual_tx_data_schema, ObInnerTableSchema::all_virtual_task_opt_stat_gather_history_schema, ObInnerTableSchema::all_virtual_table_opt_stat_gather_history_schema, @@ -4609,6 +4615,9 @@ const uint64_t tenant_space_tables [] = { OB_ALL_VIRTUAL_LS_ARB_REPLICA_TASK_TID, OB_ALL_VIRTUAL_LS_ARB_REPLICA_TASK_HISTORY_TID, OB_ALL_VIRTUAL_ARCHIVE_DEST_STATUS_TID, + OB_ALL_VIRTUAL_DUP_LS_LEASE_MGR_TID, + OB_ALL_VIRTUAL_DUP_LS_TABLET_SET_TID, + OB_ALL_VIRTUAL_DUP_LS_TABLETS_TID, OB_ALL_VIRTUAL_TASK_OPT_STAT_GATHER_HISTORY_TID, OB_ALL_VIRTUAL_TABLE_OPT_STAT_GATHER_HISTORY_TID, OB_ALL_VIRTUAL_OPT_STAT_GATHER_MONITOR_TID, @@ -6710,6 +6719,9 @@ const char* const tenant_space_table_names [] = { OB_ALL_VIRTUAL_LS_ARB_REPLICA_TASK_TNAME, OB_ALL_VIRTUAL_LS_ARB_REPLICA_TASK_HISTORY_TNAME, OB_ALL_VIRTUAL_ARCHIVE_DEST_STATUS_TNAME, + OB_ALL_VIRTUAL_DUP_LS_LEASE_MGR_TNAME, + OB_ALL_VIRTUAL_DUP_LS_TABLET_SET_TNAME, + OB_ALL_VIRTUAL_DUP_LS_TABLETS_TNAME, OB_ALL_VIRTUAL_TASK_OPT_STAT_GATHER_HISTORY_TNAME, OB_ALL_VIRTUAL_TABLE_OPT_STAT_GATHER_HISTORY_TNAME, OB_ALL_VIRTUAL_OPT_STAT_GATHER_MONITOR_TNAME, @@ -8302,6 +8314,9 @@ const uint64_t tenant_distributed_vtables [] = { OB_ALL_VIRTUAL_TABLET_COMPACTION_INFO_TID, OB_ALL_VIRTUAL_SQL_PLAN_TID, OB_ALL_VIRTUAL_MALLOC_SAMPLE_INFO_TID, + OB_ALL_VIRTUAL_DUP_LS_LEASE_MGR_TID, + OB_ALL_VIRTUAL_DUP_LS_TABLET_SET_TID, + OB_ALL_VIRTUAL_DUP_LS_TABLETS_TID, OB_ALL_VIRTUAL_OPT_STAT_GATHER_MONITOR_TID, OB_ALL_VIRTUAL_THREAD_TID, OB_ALL_VIRTUAL_ARBITRATION_MEMBER_INFO_TID, @@ -10543,11 +10558,11 @@ static inline int get_sys_table_lob_aux_schema(const uint64_t tid, const int64_t OB_CORE_TABLE_COUNT = 4; const int64_t OB_SYS_TABLE_COUNT = 233; -const int64_t OB_VIRTUAL_TABLE_COUNT = 673; +const int64_t OB_VIRTUAL_TABLE_COUNT = 676; const int64_t OB_SYS_VIEW_COUNT = 716; -const int64_t OB_SYS_TENANT_TABLE_COUNT = 1627; +const int64_t OB_SYS_TENANT_TABLE_COUNT = 1630; const int64_t OB_CORE_SCHEMA_VERSION = 1; -const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 1630; +const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 1633; } // end namespace share } // end namespace oceanbase diff --git a/src/share/inner_table/ob_inner_table_schema_constants.h b/src/share/inner_table/ob_inner_table_schema_constants.h index bc91bf2c66..321ab2a55e 100644 --- a/src/share/inner_table/ob_inner_table_schema_constants.h +++ b/src/share/inner_table/ob_inner_table_schema_constants.h @@ -617,6 +617,9 @@ const uint64_t OB_ALL_VIRTUAL_LS_ARB_REPLICA_TASK_HISTORY_TID = 12365; // "__all const uint64_t OB_ALL_VIRTUAL_ARCHIVE_DEST_STATUS_TID = 12366; // "__all_virtual_archive_dest_status" const uint64_t OB_ALL_VIRTUAL_IO_SCHEDULER_TID = 12369; // "__all_virtual_io_scheduler" const uint64_t OB_ALL_VIRTUAL_EXTERNAL_TABLE_FILE_TID = 12371; // "__all_virtual_external_table_file" +const uint64_t OB_ALL_VIRTUAL_DUP_LS_LEASE_MGR_TID = 12376; // "__all_virtual_dup_ls_lease_mgr" +const uint64_t OB_ALL_VIRTUAL_DUP_LS_TABLET_SET_TID = 12378; // "__all_virtual_dup_ls_tablet_set" +const uint64_t OB_ALL_VIRTUAL_DUP_LS_TABLETS_TID = 12379; // "__all_virtual_dup_ls_tablets" const uint64_t OB_ALL_VIRTUAL_TX_DATA_TID = 12380; // "__all_virtual_tx_data" const uint64_t OB_ALL_VIRTUAL_TASK_OPT_STAT_GATHER_HISTORY_TID = 12381; // "__all_virtual_task_opt_stat_gather_history" const uint64_t OB_ALL_VIRTUAL_TABLE_OPT_STAT_GATHER_HISTORY_TID = 12382; // "__all_virtual_table_opt_stat_gather_history" @@ -2810,6 +2813,9 @@ const char *const OB_ALL_VIRTUAL_LS_ARB_REPLICA_TASK_HISTORY_TNAME = "__all_virt const char *const OB_ALL_VIRTUAL_ARCHIVE_DEST_STATUS_TNAME = "__all_virtual_archive_dest_status"; const char *const OB_ALL_VIRTUAL_IO_SCHEDULER_TNAME = "__all_virtual_io_scheduler"; const char *const OB_ALL_VIRTUAL_EXTERNAL_TABLE_FILE_TNAME = "__all_virtual_external_table_file"; +const char *const OB_ALL_VIRTUAL_DUP_LS_LEASE_MGR_TNAME = "__all_virtual_dup_ls_lease_mgr"; +const char *const OB_ALL_VIRTUAL_DUP_LS_TABLET_SET_TNAME = "__all_virtual_dup_ls_tablet_set"; +const char *const OB_ALL_VIRTUAL_DUP_LS_TABLETS_TNAME = "__all_virtual_dup_ls_tablets"; const char *const OB_ALL_VIRTUAL_TX_DATA_TNAME = "__all_virtual_tx_data"; const char *const OB_ALL_VIRTUAL_TASK_OPT_STAT_GATHER_HISTORY_TNAME = "__all_virtual_task_opt_stat_gather_history"; const char *const OB_ALL_VIRTUAL_TABLE_OPT_STAT_GATHER_HISTORY_TNAME = "__all_virtual_table_opt_stat_gather_history"; diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index ebd769d672..851134efda 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -3411,6 +3411,7 @@ def_table_schema( ('paxos_replica_number', 'int', 'false', '-1'), ('data_size', 'int'), ('required_size', 'int', 'false', '0'), + ('learner_list', 'longtext', 'true'), ], ) @@ -3480,6 +3481,9 @@ def_table_schema( ('ls_group_id', 'int'), ('unit_group_id', 'int'), ('primary_zone', 'varchar:MAX_ZONE_LENGTH', 'true'), + ('init_learner_list', 'longtext', 'true'), + ('b_init_learner_list', 'longtext', 'true'), + ('flag', 'varchar:OB_MAX_LS_FLAG_LENGTH', 'false', ''), ], ) @@ -3649,7 +3653,7 @@ def_table_schema( normal_columns = [ ('ls_group_id', 'int'), ('status', 'varchar:100'), - ('flag', 'varchar:100'), + ('flag', 'varchar:OB_MAX_LS_FLAG_LENGTH'), ('create_scn', 'uint'), ], ) @@ -5958,6 +5962,7 @@ def_table_schema( ('paxos_replica_number', 'int', 'false', '-1'), ('data_size', 'int'), ('required_size', 'int', 'false', '0'), + ('learner_list', 'longtext', 'true'), ], ) @@ -10243,6 +10248,7 @@ def_table_schema( ('max_scn', 'uint'), ('arbitration_member', 'varchar:128'), ('degraded_list', 'varchar:1024'), + ('learner_list', 'longtext') ], partition_columns = ['svr_ip', 'svr_port'], @@ -11575,10 +11581,92 @@ def_table_schema(**gen_iterate_virtual_table_def( # 12373: __all_virtual_mds_node_stat # 12374: __all_virtual_mds_event_history # 12375: __all_virtual_time_guard_slow_history -# 12376: __all_virtual_dup_ls_lease_mgr +def_table_schema( + owner = 'wyh329796', + table_name = '__all_virtual_dup_ls_lease_mgr', + table_id = '12376', + table_type = 'VIRTUAL_TABLE', + gm_columns = [], + in_tenant_space = True, + rowkey_columns = [ + ('tenant_id', 'int'), + ('ls_id', 'int'), + ('svr_ip', 'varchar:MAX_IP_ADDR_LENGTH'), + ('svr_port', 'int'), + ('follower_ip', 'varchar:MAX_IP_ADDR_LENGTH'), + ('follower_port', 'int'), + ], + + normal_columns = [ + ('grant_timestamp', 'timestamp'), + ('expired_timestamp', 'timestamp'), + ('remain_us', 'int'), + ('lease_interval_us', 'int'), + ('grant_req_ts', 'int'), + ('cached_req_ts', 'int'), + ('max_replayed_scn', 'int'), + ('max_read_version', 'int'), + ('max_commit_version', 'int'), + ], + + partition_columns = ['svr_ip', 'svr_port'], + vtable_route_policy = 'distributed', +) # 12377: __all_virtual_dup_ls_follower_lease_info -# 12378: __all_virtual_dup_ls_tablet_set -# 12379: __all_virtual_dup_ls_tablets +def_table_schema( + owner = 'wyh329796', + table_name = '__all_virtual_dup_ls_tablet_set', + table_id = '12378', + table_type = 'VIRTUAL_TABLE', + gm_columns = [], + in_tenant_space = True, + rowkey_columns = [ + ('tenant_id', 'int'), + ('ls_id', 'int'), + ('svr_ip', 'varchar:MAX_IP_ADDR_LENGTH'), + ('svr_port', 'int'), + ('ls_state', 'varchar:MAX_LS_STATE_LENGTH'), + ('unique_id', 'int'), + ], + + normal_columns = [ + ('attribute', 'varchar:OB_MAX_DUP_TABLE_TABLET_SET_ATTR_LENGTH'), # length:16 + ('count', 'int'), + ('readbale_scn', 'int'), + ('change_scn', 'int'), + ('need_confirm_scn', 'int'), + ('state', 'varchar:OB_MAX_DUP_TABLE_TABLET_SET_STATE_LENGTH'), # length:16 + ('trx_ref', 'int'), + ], + + partition_columns = ['svr_ip', 'svr_port'], + vtable_route_policy = 'distributed', +) +def_table_schema( + owner = 'wyh329796', + table_name = '__all_virtual_dup_ls_tablets', + table_id = '12379', + table_type = 'VIRTUAL_TABLE', + gm_columns = [], + in_tenant_space = True, + rowkey_columns = [ + ('tenant_id', 'int'), + ('ls_id', 'int'), + ('svr_ip', 'varchar:MAX_IP_ADDR_LENGTH'), + ('svr_port', 'int'), + ('ls_state', 'varchar:MAX_LS_STATE_LENGTH'), + ('tablet_id', 'uint'), + ], + + normal_columns = [ + ('unique_id', 'int'), + ('attribute', 'varchar:OB_MAX_DUP_TABLE_TABLET_SET_ATTR_LENGTH'), + ('refresh_schema_timestamp', 'timestamp'), + ], + + partition_columns = ['svr_ip', 'svr_port'], + vtable_route_policy = 'distributed', +) def_table_schema( owner = 'gengli.wzy', @@ -16299,7 +16387,8 @@ def_table_schema( WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" - ELSE NULL END) AS REPLICA_TYPE + ELSE NULL END) AS REPLICA_TYPE, + (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST FROM OCEANBASE.__ALL_VIRTUAL_CORE_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() @@ -16321,7 +16410,8 @@ def_table_schema( WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" - ELSE NULL END) AS REPLICA_TYPE + ELSE NULL END) AS REPLICA_TYPE, + (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST FROM OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() AND TENANT_ID != 1 @@ -16356,7 +16446,8 @@ def_table_schema( WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" - ELSE NULL END) AS REPLICA_TYPE + ELSE NULL END) AS REPLICA_TYPE, + (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST FROM OCEANBASE.__ALL_VIRTUAL_CORE_META_TABLE ) UNION ALL @@ -16377,7 +16468,8 @@ def_table_schema( WHEN 5 THEN "LOGONLY" WHEN 16 THEN "READONLY" WHEN 261 THEN "ENCRYPTION LOGONLY" - ELSE NULL END) AS REPLICA_TYPE + ELSE NULL END) AS REPLICA_TYPE, + (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE "" END) AS LEARNER_LIST FROM OCEANBASE.__ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID != 1 ) @@ -23732,7 +23824,8 @@ def_table_schema( MAX_LSN, MAX_SCN, ARBITRATION_MEMBER, - DEGRADED_LIST + DEGRADED_LIST, + LEARNER_LIST FROM oceanbase.__all_virtual_log_stat """.replace("\n", " "), ) @@ -24092,7 +24185,8 @@ def_table_schema( WHEN A.TENANT_ID = 1 THEN NULL WHEN (A.TENANT_ID & 0x1) = 1 THEN NULL ELSE B.READABLE_SCN - END) AS READABLE_SCN + END) AS READABLE_SCN, + FLAG FROM OCEANBASE.__ALL_VIRTUAL_LS_STATUS AS A JOIN OCEANBASE.__ALL_VIRTUAL_LS_RECOVERY_STAT AS B JOIN OCEANBASE.__ALL_VIRTUAL_LS_ELECTION_REFERENCE_INFO AS C @@ -24142,7 +24236,8 @@ def_table_schema( WHEN A.TENANT_ID = 1 THEN NULL WHEN (A.TENANT_ID & 0x1) = 1 THEN NULL ELSE B.READABLE_SCN - END) AS READABLE_SCN + END) AS READABLE_SCN, + FLAG FROM OCEANBASE.__ALL_VIRTUAL_LS_STATUS AS A JOIN OCEANBASE.__ALL_VIRTUAL_LS_RECOVERY_STAT AS B JOIN OCEANBASE.__ALL_VIRTUAL_LS_ELECTION_REFERENCE_INFO AS C @@ -24194,7 +24289,10 @@ SELECT C.SVR_IP AS SVR_IP, C.SVR_PORT AS SVR_PORT, C.ROLE, - C.REPLICA_TYPE + C.REPLICA_TYPE, + CASE WHEN A.DUPLICATE_SCOPE = 1 THEN 'CLUSTER' + ELSE 'NONE' + END AS DUPLICATE_SCOPE FROM ( SELECT DATABASE_ID, TABLE_NAME, @@ -24203,7 +24301,8 @@ FROM ( 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM OCEANBASE.__ALL_VIRTUAL_CORE_ALL_TABLE WHERE TABLET_ID != 0 AND TENANT_ID = EFFECTIVE_TENANT_ID() @@ -24217,7 +24316,8 @@ FROM ( 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM OCEANBASE.__ALL_TABLE WHERE TABLET_ID != 0 AND PART_LEVEL = 0 AND TENANT_ID = 0 @@ -24231,7 +24331,8 @@ FROM ( 'NULL' AS SUBPARTITION_NAME, P.TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM OCEANBASE.__ALL_TABLE T JOIN OCEANBASE.__ALL_PART P ON T.TABLE_ID = P.TABLE_ID AND T.TENANT_ID = P.TENANT_ID WHERE T.PART_LEVEL = 1 AND T.TENANT_ID = 0 @@ -24246,7 +24347,8 @@ FROM ( Q.SUB_PART_NAME AS SUBPARTITION_NAME, Q.TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM OCEANBASE.__ALL_TABLE T, OCEANBASE.__ALL_PART P,OCEANBASE.__ALL_SUB_PART Q WHERE T.TABLE_ID =P.TABLE_ID AND P.TABLE_ID=Q.TABLE_ID AND P.PART_ID = Q.PART_ID AND T.TENANT_ID = P.TENANT_ID AND P.TENANT_ID = Q.TENANT_ID AND T.PART_LEVEL = 2 @@ -24303,7 +24405,10 @@ SELECT C.SVR_IP AS SVR_IP, C.SVR_PORT AS SVR_PORT, C.ROLE, - C.REPLICA_TYPE + C.REPLICA_TYPE, + CASE WHEN A.DUPLICATE_SCOPE = 1 THEN 'CLUSTER' + ELSE 'NONE' + END AS DUPLICATE_SCOPE FROM ( SELECT TENANT_ID, DATABASE_ID, @@ -24313,7 +24418,8 @@ FROM ( 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM OCEANBASE.__ALL_VIRTUAL_CORE_ALL_TABLE WHERE TABLET_ID != 0 @@ -24328,7 +24434,8 @@ FROM ( 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM OCEANBASE.__ALL_VIRTUAL_TABLE WHERE TABLET_ID != 0 AND PART_LEVEL = 0 @@ -24343,7 +24450,8 @@ FROM ( 'NULL' AS SUBPARTITION_NAME, P.TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM OCEANBASE.__ALL_VIRTUAL_TABLE T JOIN OCEANBASE.__ALL_VIRTUAL_PART P ON T.TABLE_ID = P.TABLE_ID WHERE T.TENANT_ID = P.TENANT_ID AND T.PART_LEVEL = 1 @@ -24358,7 +24466,8 @@ FROM ( Q.SUB_PART_NAME AS SUBPARTITION_NAME, Q.TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM OCEANBASE.__ALL_VIRTUAL_TABLE T, OCEANBASE.__ALL_VIRTUAL_PART P,OCEANBASE.__ALL_VIRTUAL_SUB_PART Q WHERE T.TABLE_ID =P.TABLE_ID AND P.TABLE_ID=Q.TABLE_ID AND P.PART_ID =Q.PART_ID AND T.TENANT_ID = P.TENANT_ID AND P.TENANT_ID = Q.TENANT_ID AND T.PART_LEVEL = 2 @@ -41580,7 +41689,8 @@ def_table_schema( WHEN 5 THEN 'LOGONLY' WHEN 16 THEN 'READONLY' WHEN 261 THEN 'ENCRYPTION LOGONLY' - ELSE NULL END) AS REPLICA_TYPE + ELSE NULL END) AS REPLICA_TYPE, + (CASE ROLE WHEN 1 THEN LEARNER_LIST ELSE NULL END) AS LEARNER_LIST FROM SYS.ALL_VIRTUAL_LS_META_TABLE WHERE TENANT_ID = EFFECTIVE_TENANT_ID() @@ -49863,7 +49973,8 @@ def_table_schema( MAX_LSN, MAX_SCN, ARBITRATION_MEMBER, - DEGRADED_LIST + DEGRADED_LIST, + LEARNER_LIST FROM SYS.ALL_VIRTUAL_LOG_STAT """.replace("\n", " "), ) @@ -50042,7 +50153,10 @@ SELECT C.SVR_IP AS SVR_IP, C.SVR_PORT AS SVR_PORT, C.ROLE, - C.REPLICA_TYPE + C.REPLICA_TYPE, + CASE WHEN A.DUPLICATE_SCOPE = 1 THEN 'CLUSTER' + ELSE 'NONE' + END AS DUPLICATE_SCOPE FROM ( SELECT TENANT_ID, DATABASE_ID, @@ -50052,7 +50166,8 @@ FROM ( 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM SYS.ALL_VIRTUAL_CORE_ALL_TABLE WHERE TABLET_ID != 0 AND TENANT_ID = EFFECTIVE_TENANT_ID() @@ -50067,7 +50182,8 @@ FROM ( 'NULL' AS SUBPARTITION_NAME, TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T WHERE T.TABLET_ID != 0 AND T.PART_LEVEL = 0 AND T.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -50082,7 +50198,8 @@ FROM ( 'NULL' AS SUBPARTITION_NAME, P.TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON T.TABLE_ID = P.TABLE_ID AND T.TENANT_ID = P.TENANT_ID WHERE T.PART_LEVEL = 1 AND T.TENANT_ID = EFFECTIVE_TENANT_ID() @@ -50098,7 +50215,8 @@ FROM ( Q.SUB_PART_NAME AS SUBPARTITION_NAME, Q.TABLET_ID AS TABLET_ID, TABLE_TYPE, - DATA_TABLE_ID + DATA_TABLE_ID, + DUPLICATE_SCOPE FROM SYS.ALL_VIRTUAL_SUB_PART_REAL_AGENT Q JOIN SYS.ALL_VIRTUAL_PART_REAL_AGENT P ON P.PART_ID =Q.PART_ID AND Q.TENANT_ID = P.TENANT_ID JOIN SYS.ALL_VIRTUAL_TABLE_REAL_AGENT T ON T.TABLE_ID =P.TABLE_ID AND T.TENANT_ID = Q.TENANT_ID diff --git a/src/share/location_cache/ob_location_struct.cpp b/src/share/location_cache/ob_location_struct.cpp index a8da133830..b0c2d5ea65 100644 --- a/src/share/location_cache/ob_location_struct.cpp +++ b/src/share/location_cache/ob_location_struct.cpp @@ -372,6 +372,22 @@ bool ObLSLocation::operator!=(const ObLSLocation &other) const return !(*this == other); } +int ObLSLocation::get_replica_count(int64_t &full_replica_cnt, int64_t &readonly_replica_cnt) +{ + int ret = OB_SUCCESS; + full_replica_cnt = 0; + readonly_replica_cnt = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < replica_locations_.count(); ++i) { + const ObLSReplicaLocation &replica = replica_locations_.at(i); + if (REPLICA_TYPE_FULL == replica.get_replica_type()) { + full_replica_cnt++; + } else if (REPLICA_TYPE_READONLY == replica.get_replica_type()) { + readonly_replica_cnt++; + } + } + return ret; +} + int ObLSLocation::get_leader(common::ObAddr &leader) const { int ret = OB_LS_LOCATION_LEADER_NOT_EXIST; diff --git a/src/share/location_cache/ob_location_struct.h b/src/share/location_cache/ob_location_struct.h index 4b0fc18224..6fc4499aab 100644 --- a/src/share/location_cache/ob_location_struct.h +++ b/src/share/location_cache/ob_location_struct.h @@ -190,6 +190,7 @@ public: inline uint64_t get_tenant_id() const { return cache_key_.get_tenant_id(); } inline ObLSID get_ls_id() const { return cache_key_.get_ls_id(); } const ObLSLocationCacheKey &get_cache_key() const { return cache_key_; } + int get_replica_count(int64_t &full_replica_cnt, int64_t &readonly_replica_cnt); inline const common::ObIArray &get_replica_locations() const { return replica_locations_; diff --git a/src/share/ls/ob_ls_creator.cpp b/src/share/ls/ob_ls_creator.cpp old mode 100644 new mode 100755 index 9a91b89878..a5a49dc7cf --- a/src/share/ls/ob_ls_creator.cpp +++ b/src/share/ls/ob_ls_creator.cpp @@ -109,6 +109,7 @@ int ObLSCreator::create_sys_tenant_ls( const SCN create_scn = SCN::base_scn();//SYS_LS no need create_scn palf::PalfBaseInfo palf_base_info; common::ObMember arbitration_service; + common::GlobalLearnerList learner_list; for (int64_t i = 0; OB_SUCC(ret) && i < rs_list.count(); ++i) { replica_addr.reset(); if (rs_list.at(i).zone_ != unit_array.at(i).zone_) { @@ -131,11 +132,11 @@ int ObLSCreator::create_sys_tenant_ls( if (OB_FAIL(ret)) { } else if (OB_FAIL(create_ls_(addr, paxos_replica_num, tenant_info, create_scn, compat_mode, false/*create_with_palf*/, palf_base_info, - member_list, arbitration_service))) { + member_list, arbitration_service, learner_list))) { LOG_WARN("failed to create log stream", KR(ret), K_(id), K_(tenant_id), K(addr), K(paxos_replica_num), K(tenant_info), K(create_scn), K(compat_mode), K(palf_base_info)); - } else if (OB_FAIL(set_member_list_(member_list, arbitration_service, paxos_replica_num))) { + } else if (OB_FAIL(set_member_list_(member_list, arbitration_service, paxos_replica_num, learner_list))) { LOG_WARN("failed to set member list", KR(ret), K(member_list), K(arbitration_service), K(paxos_replica_num)); } } @@ -147,14 +148,14 @@ int ObLSCreator::create_sys_tenant_ls( if (OB_FAIL(ret)) { \ } else if (0 >= member_list.get_member_number()) { \ if (OB_FAIL(do_create_ls_(addr, arbitration_service, status_info, paxos_replica_num, \ - create_scn, compat_mode, member_list, create_with_palf, palf_base_info))) { \ + create_scn, compat_mode, member_list, create_with_palf, palf_base_info, learner_list))) { \ LOG_WARN("failed to create log stream", KR(ret), K_(id), \ K_(tenant_id), K(addr), K(paxos_replica_num), \ K(status_info), K(create_scn), K(palf_base_info)); \ } \ } \ if (FAILEDx(process_after_has_member_list_(member_list, arbitration_service, \ - paxos_replica_num))) { \ + paxos_replica_num, learner_list))) { \ LOG_WARN("failed to process after has member list", KR(ret), \ K(member_list), K(paxos_replica_num)); \ } \ @@ -192,11 +193,21 @@ int ObLSCreator::create_user_ls( share::ObLSStatusInfo exist_status_info; share::ObLSStatusOperator ls_operator; ObMember arbitration_service; - if (OB_FAIL(alloc_user_ls_addr(tenant_id_, status_info.unit_group_id_, - zone_locality, addr))) { + common::GlobalLearnerList learner_list; + if (status_info.is_duplicate_ls()) { + if (OB_FAIL(alloc_duplicate_ls_addr_(tenant_id_, zone_locality, addr))) { + LOG_WARN("failed to alloc duplicate ls addr", KR(ret), K_(tenant_id)); + } else { + LOG_INFO("finish alloc duplicate ls addr", K_(tenant_id), K(addr)); + } + } else if (OB_FAIL(alloc_user_ls_addr(tenant_id_, status_info.unit_group_id_, + zone_locality, addr))) { LOG_WARN("failed to alloc user ls addr", KR(ret), K(tenant_id_), K(status_info)); + } + + if (OB_FAIL(ret)) { } else if (OB_FAIL(ls_operator.get_ls_init_member_list(tenant_id_, id_, member_list, - exist_status_info, *proxy_, arbitration_service))) { + exist_status_info, *proxy_, arbitration_service, learner_list))) { LOG_WARN("failed to get ls init member list", KR(ret), K(tenant_id_), K(id_)); } else if (status_info.ls_is_created()) { } else if (status_info.ls_group_id_ != exist_status_info.ls_group_id_ @@ -225,7 +236,7 @@ int ObLSCreator::create_tenant_sys_ls( { int ret = OB_SUCCESS; LOG_INFO("start to create log stream", K_(id), K_(tenant_id)); - const int64_t start_time = ObTimeUtility::current_time(); + const int64_t start_time = ObTimeUtility::current_time(); share::ObLSStatusInfo status_info; if (OB_UNLIKELY(!is_valid())) { ret = OB_INVALID_ARGUMENT; @@ -247,15 +258,17 @@ int ObLSCreator::create_tenant_sys_ls( const SCN create_scn = SCN::base_scn(); share::ObLSStatusOperator ls_operator; ObMember arbitration_service; + common::GlobalLearnerList learner_list; + ObLSFlag flag(ObLSFlag::NORMAL_FLAG); // TODO: sys ls should be duplicate if (OB_FAIL(status_info.init(tenant_id_, id_, 0, share::OB_LS_CREATING, 0, - primary_zone))) { + primary_zone, flag))) { LOG_WARN("failed to init ls info", KR(ret), K(id_), K(primary_zone), - K(tenant_id_)); + K(tenant_id_), K(flag)); } else if (OB_FAIL(alloc_sys_ls_addr(tenant_id_, pool_list, zone_locality, addr))) { LOG_WARN("failed to alloc user ls addr", KR(ret), K(tenant_id_), K(pool_list)); } else { - ret = ls_operator.get_ls_init_member_list(tenant_id_, id_, member_list, exist_status_info, *proxy_, arbitration_service); + ret = ls_operator.get_ls_init_member_list(tenant_id_, id_, member_list, exist_status_info, *proxy_, arbitration_service, learner_list); if (OB_FAIL(ret) && OB_ENTRY_NOT_EXIST != ret) { LOG_WARN("failed to get log stream member list", KR(ret), K_(id), K(tenant_id_)); } else if (OB_SUCC(ret) && status_info.ls_is_created()) { @@ -288,7 +301,8 @@ int ObLSCreator::do_create_ls_(const ObLSAddr &addr, const common::ObCompatibilityMode &compat_mode, ObMemberList &member_list, const bool create_with_palf, - const palf::PalfBaseInfo &palf_base_info) + const palf::PalfBaseInfo &palf_base_info, + common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; ObAllTenantInfo tenant_info; @@ -304,12 +318,12 @@ int ObLSCreator::do_create_ls_(const ObLSAddr &addr, } else if (OB_FAIL(ObAllTenantInfoProxy::load_tenant_info(tenant_id_, proxy_, false, tenant_info))) { LOG_WARN("failed to load tenant info", KR(ret), K_(tenant_id)); } else if (OB_FAIL(create_ls_(addr, paxos_replica_num, tenant_info, create_scn, - compat_mode, create_with_palf, palf_base_info, member_list, arbitration_service))) { + compat_mode, create_with_palf, palf_base_info, member_list, arbitration_service, learner_list))) { LOG_WARN("failed to create log stream", KR(ret), K_(id), K_(tenant_id), K(create_with_palf), - K(addr), K(paxos_replica_num), K(tenant_info), K(create_scn), K(compat_mode), K(palf_base_info)); - } else if (OB_FAIL(persist_ls_member_list_(member_list, arbitration_service))) { + K(addr), K(paxos_replica_num), K(tenant_info), K(create_scn), K(compat_mode), K(palf_base_info), K(learner_list)); + } else if (OB_FAIL(persist_ls_member_list_(member_list, arbitration_service, learner_list))) { LOG_WARN("failed to persist log stream member list", KR(ret), - K(member_list), K(arbitration_service)); + K(member_list), K(arbitration_service), K(learner_list)); } return ret; } @@ -317,15 +331,16 @@ int ObLSCreator::do_create_ls_(const ObLSAddr &addr, int ObLSCreator::process_after_has_member_list_( const common::ObMemberList &member_list, const common::ObMember &arbitration_service, - const int64_t paxos_replica_num) + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret)); - } else if (OB_FAIL(set_member_list_(member_list, arbitration_service, paxos_replica_num))) { + } else if (OB_FAIL(set_member_list_(member_list, arbitration_service, paxos_replica_num, learner_list))) { LOG_WARN("failed to set member list", KR(ret), K_(id), K_(tenant_id), - K(member_list), K(arbitration_service), K(paxos_replica_num)); + K(member_list), K(arbitration_service), K(paxos_replica_num), K(learner_list)); } else if (OB_ISNULL(proxy_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("sql proxy is null", KR(ret)); @@ -334,7 +349,7 @@ int ObLSCreator::process_after_has_member_list_( DEBUG_SYNC(BEFORE_PROCESS_AFTER_HAS_MEMBER_LIST); share::ObLSStatusOperator ls_operator; if (OB_FAIL(ls_operator.update_ls_status( - tenant_id_, id_, share::OB_LS_CREATING, share::OB_LS_CREATED, + tenant_id_, id_, share::OB_LS_CREATING, share::OB_LS_CREATED, share::NORMAL_SWITCHOVER_STATUS, *proxy_))) { LOG_WARN("failed to update ls status", KR(ret), K(id_)); } else if (id_.is_sys_ls()) { @@ -356,7 +371,8 @@ int ObLSCreator::create_ls_(const ObILSAddr &addrs, const bool create_with_palf, const palf::PalfBaseInfo &palf_base_info, common::ObMemberList &member_list, - common::ObMember &arbitration_service) + common::ObMember &arbitration_service, + common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_valid())) { @@ -403,8 +419,8 @@ int ObLSCreator::create_ls_(const ObILSAddr &addrs, ret = OB_SUCC(ret) ? tmp_ret : ret; LOG_WARN("failed to wait all async rpc", KR(ret), KR(tmp_ret), K(rpc_count)); } - if (FAILEDx(check_create_ls_result_(rpc_count, paxos_replica_num, return_code_array, member_list))) { - LOG_WARN("failed to check ls result", KR(ret), K(rpc_count), K(paxos_replica_num), K(return_code_array)); + if (FAILEDx(check_create_ls_result_(rpc_count, paxos_replica_num, return_code_array, member_list, learner_list))) { + LOG_WARN("failed to check ls result", KR(ret), K(rpc_count), K(paxos_replica_num), K(return_code_array), K(learner_list)); } } @@ -416,10 +432,12 @@ int ObLSCreator::create_ls_(const ObILSAddr &addrs, int ObLSCreator::check_create_ls_result_(const int64_t rpc_count, const int64_t paxos_replica_num, const ObIArray &return_code_array, - common::ObMemberList &member_list) + common::ObMemberList &member_list, + common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; member_list.reset(); + learner_list.reset(); if (OB_UNLIKELY(!is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret)); @@ -454,8 +472,14 @@ int ObLSCreator::check_create_ls_result_(const int64_t rpc_count, } else if (OB_UNLIKELY(!addr.is_valid())) { ret = OB_NEED_RETRY; LOG_WARN("addr is invalid, ls create failed", KR(ret), K(addr)); - } else if (OB_FAIL(member_list.add_member(ObMember(addr, timestamp)))) { - LOG_WARN("failed to add member", KR(ret), K(addr)); + } else if (result->get_replica_type() == REPLICA_TYPE_FULL) { + if (OB_FAIL(member_list.add_member(ObMember(addr, timestamp)))) { + LOG_WARN("failed to add member", KR(ret), K(addr)); + } + } else if (result->get_replica_type() == REPLICA_TYPE_READONLY) { + if (OB_FAIL(learner_list.add_learner(ObMember(addr, timestamp)))) { + LOG_WARN("failed to add member", KR(ret), K(addr)); + } } LOG_TRACE("create ls result", KR(ret), K(i), K(addr), KPC(result), K(rpc_count)); } @@ -471,7 +495,8 @@ int ObLSCreator::check_create_ls_result_(const int64_t rpc_count, } int ObLSCreator::persist_ls_member_list_(const common::ObMemberList &member_list, - const ObMember &arb_member) + const ObMember &arb_member, + const common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; DEBUG_SYNC(BEFORE_SET_LS_MEMBER_LIST); @@ -486,18 +511,20 @@ int ObLSCreator::persist_ls_member_list_(const common::ObMemberList &member_list LOG_WARN("sql proxy is null", KR(ret)); } else { share::ObLSStatusOperator ls_operator; - if (OB_FAIL(ls_operator.update_init_member_list(tenant_id_, id_, member_list, *proxy_, arb_member))) { - LOG_WARN("failed to insert ls", KR(ret), K(member_list), K(arb_member)); + if (OB_FAIL(ls_operator.update_init_member_list(tenant_id_, id_, member_list, *proxy_, arb_member, learner_list))) { + LOG_WARN("failed to insert ls", KR(ret), K(member_list), K(arb_member), K(learner_list)); } } return ret; } -int ObLSCreator::check_member_list_all_in_meta_table_(const common::ObMemberList &member_list) +int ObLSCreator::check_member_list_and_learner_list_all_in_meta_table_( + const common::ObMemberList &member_list, + const common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; - bool has_replica_only_in_member_list = true; + bool has_replica_only_in_member_list_or_learner_list = true; ObLSInfo ls_info_to_check; const int64_t retry_interval_us = 1000l * 1000l; // 1s ObTimeoutCtx ctx; @@ -508,8 +535,8 @@ int ObLSCreator::check_member_list_all_in_meta_table_(const common::ObMemberList } else if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(ctx, GCONF.internal_sql_execute_timeout))) { LOG_WARN("failed to set default timeout", KR(ret)); } else { - while (OB_SUCC(ret) && has_replica_only_in_member_list) { - has_replica_only_in_member_list = false; + while (OB_SUCC(ret) && has_replica_only_in_member_list_or_learner_list) { + has_replica_only_in_member_list_or_learner_list = false; if (ctx.is_timeouted()) { ret = OB_TIMEOUT; LOG_WARN("wait member list all reported to meta table timeout", KR(ret), K(member_list), K_(tenant_id), K_(id)); @@ -526,13 +553,29 @@ int ObLSCreator::check_member_list_all_in_meta_table_(const common::ObMemberList if (OB_SUCCESS == tmp_ret) { // replica exists, bypass } else { - has_replica_only_in_member_list = true; + has_replica_only_in_member_list_or_learner_list = true; LOG_INFO("has replica only in member list", KR(tmp_ret), K(member_list), K(ls_info_to_check), K(i), K(server)); break; } } } - if (OB_SUCC(ret) && has_replica_only_in_member_list) { + for (int64_t i = 0; OB_SUCC(ret) && i < learner_list.get_member_number(); ++i) { + const share::ObLSReplica *replica = nullptr; + common::ObAddr server; + if (OB_FAIL(learner_list.get_server_by_index(i, server))) { + LOG_WARN("fail to get server by index", KR(ret), K(i), K(learner_list)); + } else { + int tmp_ret = ls_info_to_check.find(server, replica); + if (OB_SUCCESS == tmp_ret) { + // replica exists, bypass + } else { + has_replica_only_in_member_list_or_learner_list = true; + LOG_INFO("has replica only in learner list", KR(tmp_ret), K(learner_list), K(ls_info_to_check), K(i), K(server)); + break; + } + } + } + if (OB_SUCC(ret) && has_replica_only_in_member_list_or_learner_list) { ob_usleep(retry_interval_us); } } @@ -543,7 +586,8 @@ int ObLSCreator::check_member_list_all_in_meta_table_(const common::ObMemberList int ObLSCreator::set_member_list_(const common::ObMemberList &member_list, const common::ObMember &arbitration_service, - const int64_t paxos_replica_num) + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!is_valid())) { @@ -553,25 +597,24 @@ int ObLSCreator::set_member_list_(const common::ObMemberList &member_list, || member_list.get_member_number() < rootserver::majority(paxos_replica_num))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(member_list), K(paxos_replica_num)); - } else if (!is_sys_tenant(tenant_id_) && OB_FAIL(check_member_list_all_in_meta_table_(member_list))) { - LOG_WARN("fail to check member_list all in meta table", KR(ret), K(member_list), K_(tenant_id), K_(id)); + } else if (!is_sys_tenant(tenant_id_) && OB_FAIL(check_member_list_and_learner_list_all_in_meta_table_(member_list, learner_list))) { + LOG_WARN("fail to check member_list all in meta table", KR(ret), K(member_list), K(learner_list), K_(tenant_id), K_(id)); } else { ObTimeoutCtx ctx; if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(ctx, GCONF.rpc_timeout))) { LOG_WARN("fail to set timeout ctx", KR(ret)); } else { - ObSetMemberListArgV2 arg; int64_t rpc_count = 0; int tmp_ret = OB_SUCCESS; ObArray return_code_array; - if (OB_FAIL(arg.init(tenant_id_, id_, paxos_replica_num, member_list, arbitration_service))) { - LOG_WARN("failed to init set member list arg", KR(ret), K_(id), K_(tenant_id), - K(paxos_replica_num), K(member_list), K(arbitration_service)); - } for (int64_t i = 0; OB_SUCC(ret) && i < member_list.get_member_number(); ++i) { ObAddr addr; rpc_count++; - if (OB_FAIL(member_list.get_server_by_index(i, addr))) { + ObSetMemberListArgV2 arg; + if (OB_FAIL(arg.init(tenant_id_, id_, paxos_replica_num, member_list, arbitration_service, learner_list))) { + LOG_WARN("failed to init set member list arg", KR(ret), K_(id), K_(tenant_id), + K(paxos_replica_num), K(member_list), K(arbitration_service), K(learner_list)); + } else if (OB_FAIL(member_list.get_server_by_index(i, addr))) { LOG_WARN("failed to get member by index", KR(ret), K(i), K(member_list)); } else if (OB_TMP_FAIL(set_member_list_proxy_.call(addr, ctx.get_timeout(), GCONF.cluster_id, tenant_id_, arg))) { @@ -716,6 +759,95 @@ int ObLSCreator::alloc_user_ls_addr( return ret; } +int ObLSCreator::alloc_duplicate_ls_addr_( + const uint64_t tenant_id, + const share::schema::ZoneLocalityIArray &zone_locality_array, + ObILSAddr &ls_addr) +{ + //TODO: alloc_sys_ls_addr and alloc_duplicate_ls_addr should merge into one function + int ret = OB_SUCCESS; + ObUnitTableOperator unit_operator; + common::ObArray unit_info_array; + + if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id + || zone_locality_array.count() <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(zone_locality_array)); + } else if (OB_ISNULL(proxy_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("proxy ptr is null", KR(ret)); + } else if (OB_FAIL(unit_operator.init(*proxy_))) { + LOG_WARN("unit operator init failed", KR(ret)); + } else if (OB_FAIL(unit_operator.get_units_by_tenant(tenant_id, unit_info_array))) { + LOG_WARN("fail to get unit info array", KR(ret), K(tenant_id)); + } else { + ls_addr.reset(); + const bool is_duplicate_ls = true; + for (int64_t i = 0; OB_SUCC(ret) && i < zone_locality_array.count(); ++i) { + const share::ObZoneReplicaAttrSet &zone_locality = zone_locality_array.at(i); + ObLSReplicaAddr replica_addr; + if (OB_FAIL(alloc_zone_ls_addr(is_duplicate_ls, zone_locality, unit_info_array, replica_addr))) { + LOG_WARN("fail to alloc zone ls addr", KR(ret), K(zone_locality), K(unit_info_array)); + } else if (OB_FAIL(ls_addr.push_back(replica_addr))) { + LOG_WARN("fail to push back", KR(ret)); + } else if (OB_FAIL(compensate_zone_readonly_replica_( + zone_locality, + replica_addr, + unit_info_array, + ls_addr))) { + LOG_WARN("fail to compensate readonly replica", KR(ret), + K(zone_locality), K(replica_addr), K(ls_addr)); + } + } + } + return ret; +} + +int ObLSCreator::compensate_zone_readonly_replica_( + const share::ObZoneReplicaAttrSet &zlocality, + const ObLSReplicaAddr &exclude_replica, + const common::ObIArray &unit_info_array, + ObILSAddr &ls_addr) +{ + int ret = OB_SUCCESS; + const common::ObZone &locality_zone = zlocality.zone_; + const uint64_t unit_group_id = 0; // duplicate log stream + if (OB_UNLIKELY(0 >= unit_info_array.count())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(unit_info_array)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < unit_info_array.count(); ++i) { + const share::ObUnit &unit = unit_info_array.at(i); + if (locality_zone != unit.zone_) { + // not match + } else if (exclude_replica.unit_id_ == unit.unit_id_) { + // already exists in ls_addr + } else if (ObUnit::UNIT_STATUS_DELETING == unit.status_) { + // unit may be deleting + LOG_TRACE("unit is not active", K(unit)); + } else { + ObLSReplicaAddr ls_replica_addr; + const int64_t m_percent = 100; + ObReplicaProperty replica_property; + replica_property.set_memstore_percent(m_percent); + if (OB_FAIL(ls_replica_addr.init( + unit.server_, + ObReplicaType::REPLICA_TYPE_READONLY, + replica_property, + unit_group_id, + unit.unit_id_, + locality_zone))) { + LOG_WARN("fail to init ls replica addr", KR(ret), K(unit), K(replica_property), + K(unit_group_id), K(locality_zone)); + } else if (OB_FAIL(ls_addr.push_back(ls_replica_addr))) { + LOG_WARN("fail to push back", KR(ret), K(ls_replica_addr)); + } + } + } + } + return ret; +} + int ObLSCreator::alloc_zone_ls_addr( const bool is_sys_ls, const share::ObZoneReplicaAttrSet &zlocality, @@ -723,7 +855,7 @@ int ObLSCreator::alloc_zone_ls_addr( ObLSReplicaAddr &ls_replica_addr) { int ret = OB_SUCCESS; - + bool found = false; const common::ObZone &locality_zone = zlocality.zone_; ls_replica_addr.reset(); @@ -787,6 +919,23 @@ int ObLSCreator::alloc_zone_ls_addr( KR(ret), K(unit), K(replica_property), K(unit_group_id), K(locality_zone)); } + } else if (zlocality.replica_attr_set_.get_readonly_replica_attr_array().count() > 0) { + const int64_t m_percent = zlocality.replica_attr_set_ + .get_readonly_replica_attr_array().at(0) + .memstore_percent_; + ObReplicaProperty replica_property; + replica_property.set_memstore_percent(m_percent); + if (OB_FAIL(ls_replica_addr.init( + unit.server_, + ObReplicaType::REPLICA_TYPE_READONLY, + replica_property, + unit_group_id, + unit.unit_id_, + locality_zone))) { + LOG_WARN("fail to init ls replica addr", + KR(ret), K(unit), K(replica_property), K(unit_group_id), + K(locality_zone)); + } } else { // zone locality shall has a paxos replica in 4.0 by // now(2021.10.25) ret = OB_NOT_SUPPORTED; diff --git a/src/share/ls/ob_ls_creator.h b/src/share/ls/ob_ls_creator.h old mode 100644 new mode 100755 index 53801d211a..8159d08957 --- a/src/share/ls/ob_ls_creator.h +++ b/src/share/ls/ob_ls_creator.h @@ -120,10 +120,12 @@ private: const common::ObCompatibilityMode &compat_mode, common::ObMemberList &member_list, const bool create_with_palf, - const palf::PalfBaseInfo &palf_base_info); + const palf::PalfBaseInfo &palf_base_info, + common::GlobalLearnerList &learner_list); int process_after_has_member_list_(const common::ObMemberList &member_list, const common::ObMember &arbitration_service, - const int64_t paxos_replica_num); + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list); int create_ls_(const ObILSAddr &addr, const int64_t paxos_replica_num, const share::ObAllTenantInfo &tenant_info, const SCN &create_scn, @@ -131,13 +133,18 @@ private: const bool create_with_palf, const palf::PalfBaseInfo &palf_base_info, common::ObMemberList &member_list, - common::ObMember &arbitration_service); - int check_member_list_all_in_meta_table_(const common::ObMemberList &member_list); + common::ObMember &arbitration_service, + common::GlobalLearnerList &learner_list); + int check_member_list_and_learner_list_all_in_meta_table_( + const common::ObMemberList &member_list, + const common::GlobalLearnerList &learner_list); int set_member_list_(const common::ObMemberList &member_list, const common::ObMember &arbitration_service, - const int64_t paxos_replica_num); + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list); int persist_ls_member_list_(const common::ObMemberList &member_list, - const ObMember &arb_member); + const ObMember &arb_member, + const common::GlobalLearnerList &learner_list); // interface for oceanbase 4.0 int alloc_sys_ls_addr(const uint64_t tenant_id, @@ -156,11 +163,32 @@ private: int check_create_ls_result_(const int64_t rpc_count, const int64_t paxos_replica_num, const ObIArray &return_code_array, - common::ObMemberList &member_list); + common::ObMemberList &member_list, + common::GlobalLearnerList &learner_list); int check_set_memberlist_result_(const int64_t rpc_count, const ObIArray &return_code_array, const int64_t paxos_replica_num); + // alloc ls addr for duplicate log stream + // @params[in] tenant_id, which tenant's log stream + // @params[in] zone_locality_array, locality describtion + // @params[out] ls_addr, which server to create this log stream + int alloc_duplicate_ls_addr_( + const uint64_t tenant_id, + const share::schema::ZoneLocalityIArray &zone_locality_array, + ObILSAddr &ls_addr); + + // compensate readonly replica for duplicate ls + // @params[in] zlocality, locality describtion in one zone + // @params[in] exclude_replica, already allocated-replica in locality + // @params[in] unit_info_array, tenant's all unit + // @params[out] ls_addr, which server to create this lpg stream + int compensate_zone_readonly_replica_( + const share::ObZoneReplicaAttrSet &zlocality, + const ObLSReplicaAddr &exclude_replica, + const common::ObIArray &unit_info_array, + ObILSAddr &ls_addr); + private: rootserver::ObLSCreatorProxy create_ls_proxy_; rootserver::ObSetMemberListProxy set_member_list_proxy_; diff --git a/src/share/ls/ob_ls_info.cpp b/src/share/ls/ob_ls_info.cpp index 364554e040..fc6622bfcf 100644 --- a/src/share/ls/ob_ls_info.cpp +++ b/src/share/ls/ob_ls_info.cpp @@ -15,6 +15,8 @@ #include "share/ls/ob_ls_info.h" // for decalrations of functions in this cpp #include "share/config/ob_server_config.h" // for KR(), common::ob_error_name(x) #include "share/ls/ob_ls_replica_filter.h" // ObLSReplicaFilter +#include "share/ob_share_util.h" // ObShareUtils +#include "lib/string/ob_sql_string.h" // ObSqlString namespace oceanbase { @@ -119,7 +121,9 @@ ObLSReplica::ObLSReplica() data_size_(0), required_size_(0), in_member_list_(false), - member_time_us_(0) + member_time_us_(0), + learner_list_(), + in_learner_list_(false) { } @@ -151,6 +155,8 @@ void ObLSReplica::reset() required_size_ = 0; in_member_list_ = false; member_time_us_ = 0; + learner_list_.reset(); + in_learner_list_ = false; } int ObLSReplica::init( @@ -170,7 +176,9 @@ int ObLSReplica::init( const ObString &zone, const int64_t paxos_replica_number, const int64_t data_size, - const int64_t required_size) + const int64_t required_size, + const MemberList &member_list, + const GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; reset(); @@ -182,6 +190,10 @@ int ObLSReplica::init( LOG_WARN("fail to assign memstore_percent", KR(ret), K(memstore_percent)); } else if (OB_FAIL(zone_.assign(zone))) { LOG_WARN("fail to assign zone", KR(ret), K(zone)); + } else if (OB_FAIL(member_list_.assign(member_list))) { + LOG_WARN("failed to assign member list", KR(ret), K(member_list)); + } else if (OB_FAIL(learner_list_.deep_copy(learner_list))) { + LOG_WARN("failed to deep copy learner list", KR(ret), K(learner_list)); } else { create_time_us_ = create_time_us; modify_time_us_ = modify_time_us; @@ -209,6 +221,8 @@ int ObLSReplica::assign(const ObLSReplica &other) reset(); if (OB_FAIL(copy_assign(member_list_, other.member_list_))) { LOG_WARN("failed to assign member_list_", KR(ret)); + } else if (OB_FAIL(copy_assign(learner_list_, other.learner_list_))) { + LOG_WARN("failed to assign learner_list_", KR(ret)); } else if (OB_FAIL(copy_assign(property_, other.property_))) { LOG_WARN("fail to assign property", KR(ret)); } else if (OB_FAIL(zone_.assign(other.zone_))) { @@ -233,6 +247,7 @@ int ObLSReplica::assign(const ObLSReplica &other) required_size_ = other.required_size_; in_member_list_ = other.in_member_list_; member_time_us_ = other.member_time_us_; + in_learner_list_ = other.in_learner_list_; } } return ret; @@ -249,7 +264,6 @@ bool ObLSReplica::is_equal_for_report(const ObLSReplica &other) const && sql_port_ == other.sql_port_ && role_ == other.role_ && member_list_is_equal(member_list_, other.member_list_) - && replica_type_ == other.replica_type_ && replica_status_ == other.replica_status_ && restore_status_ == other.restore_status_ && property_ == other.property_ @@ -258,11 +272,45 @@ bool ObLSReplica::is_equal_for_report(const ObLSReplica &other) const && paxos_replica_number_ == other.paxos_replica_number_) { is_equal = true; } + // only proposal_id of leader is meaningful // proposal_id of follower will be set to 0 in reporting process if (is_equal && ObRole::LEADER == role_) { is_equal = (proposal_id_ == other.proposal_id_); } + + // check replica_type and learner_list if necessary + bool is_compatible_with_readonly_replica = false; + int ret = OB_SUCCESS; + if (is_equal && OB_FAIL(ObShareUtil::check_compat_version_for_readonly_replica( + tenant_id_, + is_compatible_with_readonly_replica))) { + LOG_WARN("failed to check compat version for readonly replica", KR(ret), K_(tenant_id)); + } else if (is_equal && is_compatible_with_readonly_replica) { + is_equal = learner_list_is_equal(learner_list_, other.learner_list_) + && replica_type_ == other.replica_type_; + } + + return is_equal; +} + +bool ObLSReplica::learner_list_is_equal(const common::GlobalLearnerList &a, const common::GlobalLearnerList &b) const +{ + bool is_equal = true; + if (a.get_member_number() != b.get_member_number()) { + is_equal = false; + } else { + for (int i = 0; is_equal && i < a.get_member_number(); ++i) { + ObAddr learner; + int ret = OB_SUCCESS; + if (OB_FAIL(a.get_server_by_index(i, learner))) { + is_equal = false; + LOG_WARN("failed to get server by index", KR(ret), K(i), K(a), K(b)); + } else { + is_equal = b.contains(learner); + } + } + } return is_equal; } @@ -311,7 +359,9 @@ int64_t ObLSReplica::to_string(char *buf, const int64_t buf_len) const K_(data_size), K_(required_size), K_(in_member_list), - K_(member_time_us)); + K_(member_time_us), + K_(learner_list), + K_(in_learner_list)); J_OBJ_END(); return pos; } @@ -338,44 +388,78 @@ OB_SERIALIZE_MEMBER(ObLSReplica, data_size_, required_size_, in_member_list_, - member_time_us_); + member_time_us_, + learner_list_, + in_learner_list_); -int ObLSReplica::member_list2text(const MemberList &member_list, - char *text, - const int64_t length) +int ObLSReplica::member_list2text( + const MemberList &member_list, + ObSqlString &text) { int ret = OB_SUCCESS; - int64_t pos = 0; - char buf[MAX_IP_PORT_LENGTH]; - if (OB_ISNULL(text) || length <= 0 || member_list.count() < 0) { + text.reset(); + if (0 > member_list.count()) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), KP(text), K(length), - "member count", member_list.count()); - } else if (member_list.count() == 0) { - memset(text, 0, length); - } - FOREACH_CNT_X(m, member_list, OB_SUCCESS == ret) { - if (0 != pos) { - if (pos + 1 < length) { - text[pos++] = ','; + LOG_WARN("invalid argument", KR(ret), "member count", member_list.count()); + } else if (0 == member_list.count()) { + text.reset(); + } else { + bool need_comma = false; + char ip_port[MAX_IP_PORT_LENGTH]; + FOREACH_CNT_X(m, member_list, OB_SUCC(ret)) { + if (OB_FAIL(m->get_server().ip_port_to_string(ip_port, sizeof(ip_port)))) { + LOG_WARN("convert server to string failed", KR(ret), "member", *m); + } else if (need_comma && OB_FAIL(text.append(","))) { + LOG_WARN("failed to append comma to string", KR(ret)); + } else if (OB_FAIL(text.append_fmt("%.*s:%ld", static_cast(sizeof(ip_port)), ip_port, m->get_timestamp()))) { + LOG_WARN("failed to append ip_port to string", KR(ret), "member", *m); } else { - ret = OB_BUF_NOT_ENOUGH; - LOG_WARN("buffer not enough", KR(ret), K(pos), K(length)); + need_comma = true; } } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(m->get_server().ip_port_to_string(buf, sizeof(buf)))) { - LOG_WARN("convert server to string failed", KR(ret), "member", *m); + } + return ret; +} + +int ObLSReplica::text2learner_list(const char *text, GlobalLearnerList &learner_list) +{ + int ret = OB_SUCCESS; + char *learner_text = nullptr; + char *save_ptr1 = nullptr; + learner_list.reset(); + if (nullptr == text) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), KP(text)); + } + while (OB_SUCC(ret)) { + learner_text = strtok_r((nullptr == learner_text ? const_cast(text) : nullptr), ",", &save_ptr1); + /* + * ipv4 format: a.b.c.d:port:timestamp,... + * ipv6 format: [a:b:c:d:e:f:g:h]:port:timestamp,... + */ + if (nullptr != learner_text) { + char *timestamp_str = nullptr; + char *end_ptr = nullptr; + ObAddr learner_addr; + if (OB_NOT_NULL(timestamp_str = strrchr(learner_text, ':'))) { + *timestamp_str++ = '\0'; + int64_t timestamp_val = strtoll(timestamp_str, &end_ptr, 10); + if (end_ptr == timestamp_str || *end_ptr != '\0') { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("strtoll failed", KR(ret)); + } else if (OB_FAIL(learner_addr.parse_from_cstring(learner_text))) { + LOG_ERROR("parse from cstring failed", KR(ret), K(learner_text)); + } else if (OB_FAIL(learner_list.add_learner(ObMember(learner_addr, timestamp_val)))) { + LOG_WARN("push back failed", KR(ret), K(learner_addr), K(timestamp_val)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("parse learner text failed", KR(ret), K(learner_text)); + } } else { - int n = snprintf(text + pos, length - pos, "%s:%ld", buf, m->get_timestamp()); - if (n < 0 || n >= length - pos) { - ret = OB_BUF_NOT_ENOUGH; - LOG_WARN("snprintf error or buf not enough", KR(ret), K(n), K(length), K(pos)); - } else { - pos += n; - } + break; } - } + } // while return ret; } @@ -444,15 +528,6 @@ int ObLSReplica::transform_ob_member_list( return ret; } -int ObLSReplica::set_member_list(const MemberList &member_list) -{ - int ret = OB_SUCCESS; - if (OB_FAIL(member_list_.assign(member_list))) { - LOG_WARN("fail to assign member_list", KR(ret), K(member_list), KPC(this)); - } - return ret; -} - ObLSInfo::ObLSInfo() : tenant_id_(OB_INVALID_TENANT_ID), ls_id_(), @@ -689,9 +764,11 @@ int ObLSInfo::update_replica_status() LOG_WARN("invalid argument", KR(ret), "ls", *this); } else { const ObLSReplica::MemberList *member_list = NULL; + const common::GlobalLearnerList *learner_list = NULL; FOREACH_CNT_X(r, replicas_, OB_ISNULL(member_list) && OB_SUCCESS == ret) { if (r->is_strong_leader()) { member_list = &r->get_member_list(); + learner_list = &r->get_learner_list(); } } @@ -699,7 +776,22 @@ int ObLSInfo::update_replica_status() bool in_leader_member_list = (OB_ISNULL(member_list) && ObReplicaTypeCheck::is_paxos_replica_V2(r->get_replica_type())); int64_t in_member_time_us = 0; - if (NULL != member_list) { + bool in_leader_learner_list = false; + ObMember learner; + // rectify replica_type_ + if (OB_NOT_NULL(learner_list) && learner_list->contains(r->get_server())) { + r->set_replica_type(REPLICA_TYPE_READONLY); + in_leader_learner_list = true; + if (OB_FAIL(learner_list->get_learner_by_addr(r->get_server(), learner))) { + LOG_WARN("fail to get learner by addr", KR(ret)); + } else if (in_leader_learner_list) { + in_member_time_us = learner.get_timestamp(); + } + } else { + r->set_replica_type(REPLICA_TYPE_FULL); + } + // rectify in_member_list_ and in_member_list_time_ + if (OB_NOT_NULL(member_list)) { ARRAY_FOREACH_X(*member_list, idx, cnt, !in_leader_member_list) { if (r->get_server() == member_list->at(idx)) { in_leader_member_list = true; @@ -708,24 +800,19 @@ int ObLSInfo::update_replica_status() } } r->update_in_member_list_status(in_leader_member_list, in_member_time_us); - // replica_status_ rules: - // 1 paxos replicas (FULL,LOGONLY),NORMAL when in leader's member_list otherwise offline. - // 2 non_paxos replicas (READONLY),NORMAL all the time + r->update_in_learner_list_status(in_leader_learner_list, in_member_time_us); + // rectify replica_status_ + // follow these rules below: + // 1 paxos replicas (FULL),NORMAL when in leader's member_list otherwise offline. + // 2 non_paxos replicas (READONLY),NORMAL when in leader's learner_list otherwise offline // 3 if non_paxos replicas are deleted by partition service, status in meta table is set to REPLICA_STATUS_OFFLINE, // then set replica_status to REPLICA_STATUS_OFFLINE if (REPLICA_STATUS_OFFLINE == r->get_replica_status()) { // do nothing - } else if (in_leader_member_list) { + } else if (in_leader_member_list || in_leader_learner_list) { r->set_replica_status(REPLICA_STATUS_NORMAL); - } else if (!ObReplicaTypeCheck::is_replica_type_valid(r->get_replica_type())) { - // invalid replicas - r->set_replica_status(REPLICA_STATUS_OFFLINE); - } else if (ObReplicaTypeCheck::is_paxos_replica_V2(r->get_replica_type())) { - // FULL, LOGONLY. - r->set_replica_status(REPLICA_STATUS_OFFLINE); } else { - // READONLY and so on. - r->set_replica_status(REPLICA_STATUS_NORMAL); + r->set_replica_status(REPLICA_STATUS_OFFLINE); } } } diff --git a/src/share/ls/ob_ls_info.h b/src/share/ls/ob_ls_info.h index 6918ffe1c2..02e415d400 100644 --- a/src/share/ls/ob_ls_info.h +++ b/src/share/ls/ob_ls_info.h @@ -20,6 +20,7 @@ #include "common/ob_role.h" // for ObRole #include "common/ob_member_list.h" // for ObMemberList #include "share/restore/ob_ls_restore_status.h" +#include "common/ob_learner_list.h" // for GlobalLearnerList namespace oceanbase { @@ -104,7 +105,9 @@ public: const ObString &zone, const int64_t paxos_replica_number, const int64_t data_size, - const int64_t required_size); + const int64_t required_size, + const MemberList &member_list, + const GlobalLearnerList &learner_list); // check-related functions inline bool is_valid() const; inline bool is_strong_leader() const { return common::is_strong_leader(role_); } @@ -114,7 +117,8 @@ public: || common::REPLICA_TYPE_LOGONLY == replica_type_; } inline bool is_in_restore() const { return !restore_status_.is_restore_none(); } // format-related functions - static int member_list2text(const MemberList &member_list, char *text, const int64_t length); + static int member_list2text(const MemberList &member_list, ObSqlString &text); + static int text2learner_list(const char *text, GlobalLearnerList &learner_list); static int text2member_list(const char *text, MemberList &member_list); // transform ObMemberList into MemberList static int transform_ob_member_list( @@ -145,9 +149,11 @@ public: inline common::ObZone get_zone() const { return zone_; } inline int64_t get_paxos_replica_number() const { return paxos_replica_number_; } inline bool get_in_member_list() const { return in_member_list_; } + inline bool get_in_learner_list() const { return in_learner_list_; } inline int64_t get_member_time_us() const { return member_time_us_; } inline int64_t get_data_size() const { return data_size_; } inline int64_t get_required_size() const { return required_size_; } + inline const common::GlobalLearnerList &get_learner_list() const { return learner_list_; } // functions to set values // ATTENTION:we use set_x() in cases for special needs below @@ -161,12 +167,13 @@ public: inline void set_replica_type(const common::ObReplicaType &replica_type) { replica_type_ = replica_type; } inline int add_member(SimpleMember m); + inline int add_learner(const ObMember &m); inline void update_in_member_list_status(const bool in_member_list, const int64_t member_time_us); + inline void update_in_learner_list_status(const bool in_learner_list, const int64_t learner_time_us) { in_learner_list_ = in_learner_list; member_time_us_ = learner_time_us; } //set replica role(FOLLOWER), proposal_id(0), modify_time(now) inline void update_to_follower_role(); + bool learner_list_is_equal(const common::GlobalLearnerList &a, const common::GlobalLearnerList &b) const; bool member_list_is_equal(const MemberList &a, const MemberList &b) const; - int set_member_list(const MemberList &member_list); - private: int64_t create_time_us_; // store utc time int64_t modify_time_us_; // store utc time @@ -192,6 +199,8 @@ private: // no need to SERIALIZE, can be constructd by ObLSInfo::update_replica_status() bool in_member_list_; // whether in member_list int64_t member_time_us_; // member_time_us + common::GlobalLearnerList learner_list_; // list to record R-replicas + bool in_learner_list_; // whether in learner_list }; // [class_full_name] class ObLSInfo @@ -264,6 +273,11 @@ inline int ObLSReplica::add_member(SimpleMember m) return member_list_.push_back(m); } +inline int ObLSReplica::add_learner(const ObMember &m) +{ + return learner_list_.add_learner(m); +} + inline void ObLSReplica::update_in_member_list_status( const bool in_member_list, const int64_t member_time_us) diff --git a/src/share/ls/ob_ls_operator.cpp b/src/share/ls/ob_ls_operator.cpp index 4c461361b9..7c7ee70fac 100644 --- a/src/share/ls/ob_ls_operator.cpp +++ b/src/share/ls/ob_ls_operator.cpp @@ -32,6 +32,7 @@ #include "share/ob_global_stat_proxy.h"//get gc #include "logservice/palf/log_define.h"//SCN #include "share/scn.h"//SCN +#include "share/ls/ob_ls_status_operator.h" using namespace oceanbase; using namespace oceanbase::common; @@ -41,6 +42,92 @@ using namespace transaction; using namespace palf; namespace share { +int ObLSFlag::assign(const ObLSFlag &ls_flag) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!ls_flag.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(ls_flag)); + } else { + flag_ = ls_flag.flag_; + } + return ret; +} + +int ObLSFlag::flag_to_str(ObLSFlagStr &str) const +{ + STATIC_ASSERT(ARRAYSIZEOF(LS_FLAG_ARRAY) == (int64_t)MAX_FLAG, + "ls flag string array size mismatch enum LSFlag count"); + int ret = OB_SUCCESS; + str.reset(); + int64_t pos = 0; + const int64_t len = str.capacity(); + if (is_normal_flag()) { + if (OB_FAIL(databuff_printf(str.ptr(), len, pos, "%s", LS_FLAG_ARRAY[0]))) { + LOG_WARN("failed to databuf printf", KR(ret), K(len), K(pos)); + } + } else { + bool need_append_split_char = false; + for (int64_t i = 1; i < ARRAYSIZEOF(LS_FLAG_ARRAY) && OB_SUCC(ret); i++) { + int64_t flag_bit_to_compare = 1 << (i - 1); + if (flag_ & flag_bit_to_compare) { + if (need_append_split_char && OB_FAIL(databuff_printf(str.ptr(), len, pos, "|"))) { + LOG_WARN("failed to databuff print", KR(ret), K(pos), K(len)); + } else if (OB_FAIL(databuff_printf(str.ptr(), len, pos, "%s", LS_FLAG_ARRAY[i]))) { + LOG_WARN("failed to databuff print", KR(ret), K(pos), K(len)); + } else { + need_append_split_char = true; + } + } + } + } + return ret; +} + +int ObLSFlag::str_to_flag(const common::ObString &sql) +{ + int ret = OB_SUCCESS; + reset(); + ObLSFlagStr sql_copy; + if (OB_UNLIKELY(sql.empty())) { + flag_ = NORMAL_FLAG; + } else if (sql.length() >= FLAG_STR_LENGTH) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("sql is larger than expected", KR(ret), K(sql)); + } else if (OB_FAIL(sql_copy.assign(sql))) { + LOG_WARN("failed to assign sql", KR(ret), K(sql)); + } else { + char *data_str = NULL; + char *save_ptr = NULL; + while (OB_SUCC(ret)) { + data_str = strtok_r((NULL == data_str ? sql_copy.ptr() : NULL), "|", &save_ptr); + if (NULL != data_str) { + bool found = false; + for (int64_t i = 1; i < ARRAYSIZEOF(LS_FLAG_ARRAY); i++) { + int64_t flag_bit_to_compare = 1 << (i - 1); + if (0 == ObString(data_str).case_compare(LS_FLAG_ARRAY[i])) { + if (flag_ & flag_bit_to_compare) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("can not double ls flag", KR(ret), K(data_str), K(save_ptr)); + } else { + flag_ |= flag_bit_to_compare; + found = true; + } + } + } + if (OB_SUCC(ret) && !found) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected flag string", KR(ret), K(sql)); + } + } else { + break; + } + } + } + return ret; +} + +OB_SERIALIZE_MEMBER(ObLSFlag, flag_); ////////////ObLSAttr bool ObLSAttr::is_valid() const { @@ -100,7 +187,7 @@ void ObLSAttr::reset() create_scn_.reset(); } -OB_SERIALIZE_MEMBER(ObLSAttr, id_, ls_group_id_, flag_, status_, operation_type_, create_scn_); +OB_SERIALIZE_MEMBER(ObLSAttr, id_, ls_group_id_, flag_compatible_, status_, operation_type_, create_scn_, flag_); //////////////ObLSAttrOperator bool ObLSAttrOperator::is_valid() const @@ -131,6 +218,7 @@ int ObLSAttrOperator::operator_ls_( ObMySQLTransaction trans; const bool for_update = true; ObLSAttr sys_ls_attr; + ObLSAttr duplicate_ls_attr; bool skip_sub_trans = false; ObAllTenantInfo tenant_info; if (OB_FAIL(trans.start(proxy_, tenant_id_))) { @@ -153,10 +241,23 @@ int ObLSAttrOperator::operator_ls_( } else if (OB_LS_OP_CREATE_PRE == ls_attr.get_ls_operation_type()) { if (OB_LS_NORMAL != sys_ls_attr.get_ls_status()) { //for sys ls, need insert_ls, but ls_status is normal - //delete ls may in ls is creating, use operation_type + //delete ls may in ls is creating, use operation_type ret = OB_OP_NOT_ALLOW; LOG_WARN("ls_status not expected while create ls", KR(ret), K(ls_attr), K(sys_ls_attr)); + } else if (ls_attr.get_ls_flag().is_duplicate_ls() + && OB_FAIL(get_duplicate_ls_attr(false/*for_update*/, trans, duplicate_ls_attr))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // good, duplicate ls not exist + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to get duplicate ls info", KR(ret), K(ls_attr)); + } + } else if (duplicate_ls_attr.get_ls_flag().is_duplicate_ls()) { + ret = OB_LS_EXIST; + LOG_WARN("duplicate ls already exist", KR(ret), K(duplicate_ls_attr), K(ls_attr)); + } + if (OB_FAIL(ret)) { } else { //check ls create not concurrency uint64_t max_ls_id = 0; @@ -200,19 +301,22 @@ int ObLSAttrOperator::insert_ls( const ObTenantSwitchoverStatus &working_sw_status) { int ret = OB_SUCCESS; + ObLSFlagStr flag_str; if (OB_UNLIKELY(!ls_attr.is_valid() || OB_INVALID_ID == max_ls_group_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("operation is invalid", KR(ret), K(ls_attr), K(max_ls_group_id)); } else if (OB_UNLIKELY(!is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("operation is not valid", KR(ret), "operation", *this); + } else if (OB_FAIL(ls_attr.get_ls_flag().flag_to_str(flag_str))) { + LOG_WARN("fail to convert flag to string", KR(ret), K(ls_attr)); } else { ObSqlString sql; if (FAILEDx(sql.assign_fmt( "insert into %s (ls_id, ls_group_id, status, flag, create_scn) values(%ld, " "%ld, '%s', '%s', '%lu')", OB_ALL_LS_TNAME, ls_attr.get_ls_id().id(), ls_attr.get_ls_group_id(), - ObLSStatusOperator::ls_status_to_str(ls_attr.get_ls_status()), "", + ObLSStatusOperator::ls_status_to_str(ls_attr.get_ls_status()), flag_str.ptr(), ls_attr.get_create_scn().get_val_for_inner_table_field()))) { LOG_WARN("failed to assign sql", KR(ret), K(ls_attr), K(sql)); } else if (OB_FAIL(operator_ls_(ls_attr, sql, max_ls_group_id, working_sw_status))) { @@ -400,6 +504,40 @@ int ObLSAttrOperator::get_ls_attr(const ObLSID &id, const bool for_update, commo return ret; } +int ObLSAttrOperator::get_duplicate_ls_attr(const bool for_update, + common::ObISQLClient &client, ObLSAttr &ls_attr) +{ + int ret = OB_SUCCESS; + ls_attr.reset(); + if (OB_UNLIKELY(!is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("operation is not valid", KR(ret), "operation", *this); + } else { + common::ObSqlString sql; + int64_t affected_rows = 0; + if (OB_FAIL(sql.assign_fmt("select * from %s where flag like \"%%%s%%\"", + OB_ALL_LS_TNAME, LS_FLAG_ARRAY[ObLSFlag::DUPLICATE_FLAG]))) { + LOG_WARN("failed to assign sql", KR(ret), K(sql)); + } else if (for_update && OB_FAIL(sql.append(" for update"))) { + LOG_WARN("failed to append sql", KR(ret), K(sql), K(for_update)); + } else { + ObLSAttrArray ls_array; + if (OB_FAIL(exec_read(tenant_id_, sql, client, this, ls_array))) { + LOG_WARN("failed to get ls array", KR(ret), K(tenant_id_), K(sql)); + } else if (0 == ls_array.count()) { + ret = OB_ENTRY_NOT_EXIST; + LOG_WARN("failed to ls array", KR(ret), K_(tenant_id)); + } else if (OB_UNLIKELY(1 != ls_array.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("more than one ls is unexpected", KR(ret), K(ls_array), K(sql)); + } else if (OB_FAIL(ls_attr.assign(ls_array.at(0)))) { + LOG_WARN("failed to assign ls attr", KR(ret), K(ls_array)); + } + } + } + return ret; +} + int ObLSAttrOperator::get_all_ls_by_order( ObLSAttrIArray &ls_operation_array) { @@ -464,18 +602,22 @@ int ObLSAttrOperator::fill_cell(common::sqlclient::ObMySQLResult *result, ObLSAt int64_t id_value = OB_INVALID_ID; uint64_t ls_group_id = OB_INVALID_ID; uint64_t create_scn_val = OB_INVALID_SCN_VAL; - ObLSFlag flag; // TODO no used + ObString flag_str; + ObLSFlag flag(share::ObLSFlag::NORMAL_FLAG); SCN create_scn; EXTRACT_INT_FIELD_MYSQL(*result, "ls_id", id_value, int64_t); EXTRACT_INT_FIELD_MYSQL(*result, "ls_group_id", ls_group_id, uint64_t); EXTRACT_VARCHAR_FIELD_MYSQL(*result, "status", status_str); EXTRACT_UINT_FIELD_MYSQL(*result, "create_scn", create_scn_val, uint64_t); + EXTRACT_VARCHAR_FIELD_MYSQL(*result, "flag", flag_str); if (OB_FAIL(ret)) { LOG_WARN("failed to get result", KR(ret), K(ls_group_id), K(status_str), K(id_value)); } else if (OB_FAIL(create_scn.convert_for_inner_table_field(create_scn_val))) { LOG_WARN("failed to convert create_scn", KR(ret), K(ls_group_id), K(status_str), K(id_value), K(create_scn)); + } else if (OB_FAIL(flag.str_to_flag(flag_str))) { + LOG_WARN("failed to convert flag", KR(ret), K(flag_str)); } else { ObLSID ls_id(id_value); ObLSStatus status = ObLSStatusOperator::str_to_ls_status(status_str); diff --git a/src/share/ls/ob_ls_operator.h b/src/share/ls/ob_ls_operator.h index b10b81d6b4..4965659cdb 100644 --- a/src/share/ls/ob_ls_operator.h +++ b/src/share/ls/ob_ls_operator.h @@ -16,7 +16,6 @@ #include "share/ob_ls_id.h"//share::ObLSID #include "lib/container/ob_array.h"//ObArray #include "lib/container/ob_iarray.h"//ObIArray -#include "share/ls/ob_ls_status_operator.h" //ObLSStatus #include "share/ls/ob_ls_i_life_manager.h"//ObLSTemplateOperator #include "logservice/palf/log_define.h"//SCN #include "share/scn.h"//SCN @@ -38,12 +37,62 @@ class ObMySQLResult; } namespace share { +static const char* LS_FLAG_ARRAY[] = { ""/*NORMAL*/, "DUPLICATE", "BLOCK_TABLET_IN" }; +//maybe empty, DUPLICATE, BLOCK_TABLET_IN, DUPLICATE|BLOCK_TABLET_IN +static const int64_t FLAG_STR_LENGTH = 100; +typedef common::ObFixedLengthString ObLSFlagStr; class SCN; +bool ls_is_empty_status(const ObLSStatus &status); +bool ls_is_creating_status(const ObLSStatus &status); +bool ls_is_created_status(const ObLSStatus &status); +bool ls_is_normal_status(const ObLSStatus &status); +bool ls_is_tenant_dropping_status(const ObLSStatus &status); +bool ls_is_dropping_status(const ObLSStatus &status); +bool ls_is_wait_offline_status(const ObLSStatus &status); +bool is_valid_status_in_ls(const ObLSStatus &status); +bool ls_is_create_abort_status(const ObLSStatus &status); +bool ls_need_create_abort_status(const ObLSStatus &status); +bool ls_is_pre_tenant_dropping_status(const ObLSStatus &status); //TODO for duplicate ls -enum ObLSFlag +enum ObLSFlagForCompatible { OB_LS_FLAG_NORMAL = 0, }; +class ObLSFlag +{ +public: + OB_UNIS_VERSION(1); +public: + enum LSFlag + { + INVALID_TYPE = -1, + NORMAL_FLAG = 0, + //If the low 0 bit is 1, it means that this is duplicate ls + DUPLICATE_FLAG = 1, + //If the low 1 bit is 1, it means that this is block tablet in + BLOCK_TABLET_IN_FLAG = 2, + MAX_FLAG + }; + ObLSFlag() : flag_(NORMAL_FLAG) {} + ObLSFlag(const int64_t flag) : flag_(flag) {} + ~ObLSFlag() {} + void reset() {flag_ = NORMAL_FLAG;} + int assign(const ObLSFlag &ls_flag); + bool is_valid() const { return flag_ >= 0; } + void set_block_tablet_in() { flag_ |= BLOCK_TABLET_IN_FLAG; } + void clear_block_tablet_in() { flag_ &= (~BLOCK_TABLET_IN_FLAG); } + bool is_normal_flag() const { return NORMAL_FLAG == flag_; } + bool is_block_tablet_in() const {return flag_ & BLOCK_TABLET_IN_FLAG;} + void set_duplicate() { flag_ |= DUPLICATE_FLAG; } + bool is_duplicate_ls() const { return flag_ & DUPLICATE_FLAG; } + int flag_to_str(ObLSFlagStr &str) const; + int str_to_flag(const common::ObString &sql); + int64_t get_flag_value() const { return flag_; } + TO_STRING_KV(K_(flag), "is_duplicate", is_duplicate_ls(), "is_block_tablet_in", is_block_tablet_in()); + +private: + int64_t flag_; +}; enum ObLSOperationType { OB_LS_OP_INVALID_TYPE = -1, @@ -75,7 +124,8 @@ struct ObLSAttr ObLSAttr() : id_(), ls_group_id_(OB_INVALID_ID), - flag_(OB_LS_FLAG_NORMAL), + flag_compatible_(OB_LS_FLAG_NORMAL), + flag_(ObLSFlag::NORMAL_FLAG), status_(OB_LS_EMPTY), operation_type_(OB_LS_OP_INVALID_TYPE) { create_scn_.set_min();} @@ -142,6 +192,7 @@ struct ObLSAttr private: ObLSID id_; uint64_t ls_group_id_; + ObLSFlagForCompatible flag_compatible_; ObLSFlag flag_; ObLSStatus status_; ObLSOperationType operation_type_; @@ -170,6 +221,16 @@ public: int fill_cell(common::sqlclient::ObMySQLResult *result, ObLSAttr &ls_attr); public: bool is_valid() const; + + // get duplicate ls status info + // @params[in] for_update, whether to lock line + // @params[in] client, sql client to use + // @params[out] ls_attr, the result + int get_duplicate_ls_attr( + const bool for_update, + common::ObISQLClient &client, + ObLSAttr &ls_attr); + int get_all_ls_by_order( ObLSAttrIArray &ls_array); /** diff --git a/src/share/ls/ob_ls_replica_filter.cpp b/src/share/ls/ob_ls_replica_filter.cpp index 6084701fb5..9a54f06c6b 100644 --- a/src/share/ls/ob_ls_replica_filter.cpp +++ b/src/share/ls/ob_ls_replica_filter.cpp @@ -117,6 +117,5 @@ int ObLSReplicaFilterHolder::check(const ObLSReplica &replica, bool &pass) const } return ret; } - } // end namespace share -} // end namespace oceanbase \ No newline at end of file +} // end namespace oceanbase diff --git a/src/share/ls/ob_ls_replica_filter.h b/src/share/ls/ob_ls_replica_filter.h index bd297d9c89..ebc47691e4 100644 --- a/src/share/ls/ob_ls_replica_filter.h +++ b/src/share/ls/ob_ls_replica_filter.h @@ -61,4 +61,4 @@ private: }; } // end namespace share } // end namespace oceanbase -#endif \ No newline at end of file +#endif diff --git a/src/share/ls/ob_ls_status_operator.cpp b/src/share/ls/ob_ls_status_operator.cpp old mode 100644 new mode 100755 index 603dba335a..28fe65253c --- a/src/share/ls/ob_ls_status_operator.cpp +++ b/src/share/ls/ob_ls_status_operator.cpp @@ -28,6 +28,7 @@ #include "logservice/palf/log_define.h" // INVALID_PROPOSAL_ID #include "share/schema/ob_multi_version_schema_service.h" // ObMultiVersionSchemaService #include "share/scn.h" // SCN +#include "share/ls/ob_ls_operator.h" //ObLSFlag using namespace oceanbase; using namespace oceanbase::common; @@ -56,7 +57,8 @@ bool ObLSStatusInfo::is_valid() const && (ls_id_.is_sys_ls() || (OB_INVALID_ID != ls_group_id_ && OB_INVALID_ID != unit_group_id_)) - && share::OB_LS_EMPTY != status_; + && share::OB_LS_EMPTY != status_ + && flag_.is_valid(); } void ObLSStatusInfo::reset() @@ -66,6 +68,7 @@ void ObLSStatusInfo::reset() ls_group_id_ = OB_INVALID_ID; unit_group_id_ = OB_INVALID_ID; status_ = OB_LS_EMPTY; + flag_.reset(); } int ObLSStatusInfo::init(const uint64_t tenant_id, @@ -73,17 +76,21 @@ int ObLSStatusInfo::init(const uint64_t tenant_id, const uint64_t ls_group_id, const ObLSStatus status, const uint64_t unit_group_id, - const ObZone &primary_zone) + const ObZone &primary_zone, + const ObLSFlag &flag) { int ret = OB_SUCCESS; if (OB_UNLIKELY(!id.is_valid() - || OB_INVALID_TENANT_ID == tenant_id - || OB_LS_EMPTY == status)) { + || !flag.is_valid() + || OB_INVALID_TENANT_ID == tenant_id + || OB_LS_EMPTY == status)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(id), K(ls_group_id), - K(status), K(unit_group_id)); + K(status), K(unit_group_id), K(flag)); } else if (OB_FAIL(primary_zone_.assign(primary_zone))) { LOG_WARN("failed to assign primary zone", KR(ret), K(primary_zone)); + } else if (OB_FAIL(flag_.assign(flag))) { + LOG_WARN("failed to assign ls flag", KR(ret), K(flag)); } else { tenant_id_ = tenant_id; ls_id_ = id; @@ -100,6 +107,8 @@ int ObLSStatusInfo::assign(const ObLSStatusInfo &other) if (this != &other) { if (OB_FAIL(primary_zone_.assign(other.primary_zone_))) { LOG_WARN("failed to assign other primary zone", KR(ret), K(other)); + } else if (OB_FAIL(flag_.assign(other.flag_))) { + LOG_WARN("failed to assign ls flag", KR(ret), K(other)); } else { tenant_id_ = other.tenant_id_; ls_id_ = other.ls_id_; @@ -161,7 +170,6 @@ bool ls_is_pre_tenant_dropping_status(const ObLSStatus &status) return OB_LS_PRE_TENANT_DROPPING == status; } - bool is_valid_status_in_ls(const ObLSStatus &status) { return OB_LS_CREATING == status || OB_LS_NORMAL == status @@ -264,6 +272,7 @@ int ObLSStatusOperator::create_new_ls(const ObLSStatusInfo &ls_info, UNUSEDx(current_tenant_scn, zone_priority); int ret = OB_SUCCESS; ObAllTenantInfo tenant_info; + ObLSFlagStr flag_str; if (OB_UNLIKELY(!ls_info.is_valid() || !working_sw_status.is_valid())) { ret = OB_INVALID_ARGUMENT; @@ -274,16 +283,35 @@ int ObLSStatusOperator::create_new_ls(const ObLSStatusInfo &ls_info, } else if (working_sw_status != tenant_info.get_switchover_status()) { ret = OB_NEED_RETRY; LOG_WARN("tenant not in specified switchover status", K(ls_info), K(working_sw_status), K(tenant_info)); + } else if (OB_FAIL(ls_info.get_flag().flag_to_str(flag_str))) { + LOG_WARN("fail to convert ls flag into string", KR(ret), K(ls_info)); + } else if (ls_info.get_flag().is_duplicate_ls()) { + bool is_compatible = false; + if (OB_FAIL(ObShareUtil::check_compat_version_for_readonly_replica( + ls_info.tenant_id_, is_compatible))) { + LOG_WARN("fail to check data version for duplicate table", KR(ret), K(ls_info)); + } else if (!is_compatible) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("ls flag is not empty", KR(ret), K(ls_info), K(is_compatible)); + } + } + + if (OB_FAIL(ret)) { } else { + ObDMLSqlSplicer dml_splicer; common::ObSqlString sql; - if (OB_FAIL(sql.assign_fmt("INSERT into %s (tenant_id, ls_id, status, " - "ls_group_id, unit_group_id, primary_zone) " - "values (%lu, %ld, '%s', %ld, %ld, '%s')", - OB_ALL_LS_STATUS_TNAME, ls_info.tenant_id_, ls_info.ls_id_.id(), - ls_status_to_str(ls_info.status_), - ls_info.ls_group_id_, ls_info.unit_group_id_, - ls_info.primary_zone_.ptr()))) { - LOG_WARN("failed to assing sql", KR(ret), K(ls_info)); + const char *table_name = OB_ALL_LS_STATUS_TNAME; + if (OB_FAIL(dml_splicer.add_pk_column("tenant_id", ls_info.tenant_id_)) + || OB_FAIL(dml_splicer.add_pk_column("ls_id", ls_info.ls_id_.id())) + || OB_FAIL(dml_splicer.add_column("status", ls_status_to_str(ls_info.status_))) + || OB_FAIL(dml_splicer.add_column("ls_group_id", ls_info.ls_group_id_)) + || OB_FAIL(dml_splicer.add_column("unit_group_id", ls_info.unit_group_id_)) + || OB_FAIL(dml_splicer.add_column("primary_zone", ls_info.primary_zone_.ptr()))) { + LOG_WARN("add columns failed", KR(ret), K(ls_info)); + } else if (!ls_info.get_flag().is_normal_flag() && OB_FAIL(dml_splicer.add_column("flag", flag_str.ptr()))) { + LOG_WARN("add flag column failed", KR(ret), K(ls_info), K(flag_str)); + } else if (OB_FAIL(dml_splicer.splice_insert_sql(table_name, sql))) { + LOG_WARN("fail to splice insert sql", KR(ret), K(sql), K(ls_info), K(flag_str)); } else if (OB_FAIL(exec_write(ls_info.tenant_id_, sql, this, trans))) { LOG_WARN("failed to exec write", KR(ret), K(ls_info), K(sql)); } @@ -431,13 +459,27 @@ int ObLSStatusOperator::update_ls_status_in_trans_( common::ObSqlString sql; const uint64_t exec_tenant_id = ObLSLifeIAgent::get_exec_tenant_id(tenant_id); - if (OB_FAIL(sql.assign_fmt("UPDATE %s set status = '%s',init_member_list = '', b_init_member_list = ''" + common::ObSqlString sub_string; + bool is_compatible_with_readonly_replica = false; + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = ObShareUtil::check_compat_version_for_readonly_replica( + exec_tenant_id, is_compatible_with_readonly_replica))) { + LOG_WARN("fail to check tenant compat version with readonly replica", KR(tmp_ret), K(exec_tenant_id)); + } else if (is_compatible_with_readonly_replica + && OB_SUCCESS != (tmp_ret = sub_string.assign(", init_learner_list = '', b_init_learner_list = ''"))) { + LOG_WARN("fail to construct substring for learner list", KR(tmp_ret)); + sub_string.reset(); + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(sql.assign_fmt("UPDATE %s set status = '%s',init_member_list = '', b_init_member_list = ''%.*s" " where ls_id = %ld and tenant_id = %lu and status = '%s'", OB_ALL_LS_STATUS_TNAME, - ls_status_to_str(new_status), id.id(), - tenant_id, ls_status_to_str(old_status)))) { + ls_status_to_str(new_status), + static_cast(sub_string.length()), sub_string.ptr(), + id.id(), tenant_id, ls_status_to_str(old_status)))) { LOG_WARN("failed to assign sql", KR(ret), K(id), K(new_status), - K(old_status), K(tenant_id), K(sql)); + K(old_status), K(tenant_id), K(sub_string), K(sql)); } else if (OB_FAIL(exec_write(tenant_id, sql, this, trans))) { LOG_WARN("failed to exec write", KR(ret), K(tenant_id), K(id), K(sql)); } @@ -448,30 +490,58 @@ int ObLSStatusOperator::update_ls_status_in_trans_( int ObLSStatusOperator::update_init_member_list( const uint64_t tenant_id, const ObLSID &id, const ObMemberList &member_list, ObISQLClient &client, - const ObMember &arb_member) + const ObMember &arb_member, const common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; + bool is_compatible_with_readonly_replica = false; + ObSqlString learner_list_sub_sql; if (OB_UNLIKELY(!id.is_valid() || !member_list.is_valid() || OB_INVALID_TENANT_ID == tenant_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid_argument", KR(ret), K(id), K(member_list), K(tenant_id)); + } else if (OB_FAIL(ObShareUtil::check_compat_version_for_readonly_replica( + ObLSLifeIAgent::get_exec_tenant_id(tenant_id), + is_compatible_with_readonly_replica))) { + LOG_WARN("failed to check data version for read-only replica", KR(ret), + "exec_tenant_id", ObLSLifeIAgent::get_exec_tenant_id(tenant_id)); } else { common::ObSqlString sql; - ObSqlString visist_member_list; + ObSqlString visible_member_list; ObString hex_member_list; + ObSqlString visible_learner_list; + ObString hex_learner_list; ObArenaAllocator allocator("MemberList"); - if (OB_FAIL(get_visible_member_list_str_(member_list, allocator, visist_member_list, arb_member))) { + if (OB_FAIL(get_visible_member_list_str_(member_list, allocator, visible_member_list, arb_member))) { LOG_WARN("failed to get visible member list", KR(ret), K(member_list)); - } else if (OB_FAIL(get_member_list_hex_(member_list, allocator, hex_member_list, arb_member))) { + } else if (OB_FAIL(get_list_hex_(member_list, allocator, hex_member_list, arb_member))) { LOG_WARN("faield to get member list hex", KR(ret), K(member_list)); + } else if (learner_list.is_valid()) { + if (!is_compatible_with_readonly_replica) { + ret = OB_STATE_NOT_MATCH; + LOG_WARN("data version is below 4.2 and learner list is not null", KR(ret), + "exec_tenant_id", ObLSLifeIAgent::get_exec_tenant_id(tenant_id), K(learner_list)); + } else if (OB_FAIL(learner_list.transform_to_string(visible_learner_list))) { + LOG_WARN("failed to get visible learner list", KR(ret), K(learner_list)); + } else if (OB_FAIL(get_list_hex_(learner_list, allocator, hex_learner_list, arb_member))) { + LOG_WARN("failed to get learner list hex", KR(ret), K(learner_list)); + } else if (OB_FAIL(learner_list_sub_sql.assign_fmt(", init_learner_list = '%.*s', b_init_learner_list = '%.*s' ", + static_cast(visible_learner_list.length()), visible_learner_list.ptr(), + static_cast(hex_learner_list.length()), hex_learner_list.ptr()))) { + LOG_WARN("fail to construct learner list sub sql", KR(ret), K(visible_learner_list)); + } + } + + if (OB_FAIL(ret)) { } else if (OB_FAIL(sql.assign_fmt( - "UPDATE %s set init_member_list = '%.*s', b_init_member_list = '%.*s' " - "where ls_id = %ld and tenant_id = %lu and b_init_member_list is null", - OB_ALL_LS_STATUS_TNAME, - static_cast(visist_member_list.length()), visist_member_list.ptr(), - hex_member_list.length(), hex_member_list.ptr(), id.id(), tenant_id))) { - LOG_WARN("failed to assign sql", KR(ret), K(id), K(member_list), K(sql)); + "UPDATE %s set init_member_list = '%.*s', b_init_member_list = '%.*s'%.*s " + "where ls_id = %ld and tenant_id = %lu and b_init_member_list is null", + OB_ALL_LS_STATUS_TNAME, + static_cast(visible_member_list.length()), visible_member_list.ptr(), + hex_member_list.length(), hex_member_list.ptr(), + static_cast(learner_list_sub_sql.length()), learner_list_sub_sql.ptr(), + id.id(), tenant_id))) { + LOG_WARN("failed to assign sql", KR(ret), K(id), K(member_list), K(learner_list_sub_sql), K(sql)); } else if (OB_FAIL(exec_write(tenant_id, sql, this, client))) { LOG_WARN("failed to exec write", KR(ret), K(id), K(sql)); } @@ -542,17 +612,19 @@ int ObLSStatusOperator::get_ls_init_member_list( const uint64_t tenant_id, const ObLSID &id, ObMemberList &member_list, share::ObLSStatusInfo &status_info, ObISQLClient &client, - ObMember &arb_member) + ObMember &arb_member, + common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; member_list.reset(); + learner_list.reset(); status_info.reset(); arb_member.reset(); if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("tenant id is invalid", KR(ret), K(tenant_id)); } else if (OB_FAIL(get_ls_status_(tenant_id, id, true /*need_member_list*/, - member_list, status_info, client, arb_member))) { + member_list, status_info, client, arb_member, learner_list))) { LOG_WARN("failed to get ls status", KR(ret), K(id), K(tenant_id)); } return ret; @@ -564,18 +636,47 @@ int ObLSStatusOperator::get_ls_status_info( { int ret = OB_SUCCESS; ObMemberList member_list; + common::GlobalLearnerList learner_list; ObMember arb_member; status_info.reset(); if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("tenant id is invalid", KR(ret), K(tenant_id)); } else if (OB_FAIL(get_ls_status_(tenant_id, id, false /*need_member_list*/, - member_list, status_info, client, arb_member))) { + member_list, status_info, client, arb_member, learner_list))) { LOG_WARN("failed to get ls status", KR(ret), K(id), K(tenant_id)); } return ret; } +int ObLSStatusOperator::get_duplicate_ls_status_info( + const uint64_t tenant_id, + ObISQLClient &client, + share::ObLSStatusInfo &status_info) +{ + int ret = OB_SUCCESS; + status_info.reset(); + ObSqlString sql; + bool need_member_list = false; + ObMemberList member_list; + common::GlobalLearnerList learner_list; + ObMember arb_member; + if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id)); + } else if (OB_FAIL(sql.assign_fmt( + "SELECT * FROM %s where tenant_id = %lu and flag like \"%%%s%%\"", + OB_ALL_LS_STATUS_TNAME, tenant_id, + LS_FLAG_ARRAY[ObLSFlag::DUPLICATE_FLAG]))) { + LOG_WARN("failed to assign sql", KR(ret), K(sql)); + } else if (OB_FAIL(inner_get_ls_status_(sql, get_exec_tenant_id(tenant_id), need_member_list, + client, member_list, status_info, arb_member, learner_list))) { + LOG_WARN("fail to inner get ls status info", KR(ret), K(sql), K(tenant_id), "exec_tenant_id", + get_exec_tenant_id(tenant_id), K(need_member_list)); + } + return ret; +} + int ObLSStatusOperator::get_visible_member_list_str_(const ObMemberList &member_list, common::ObIAllocator &allocator, common::ObSqlString &visible_member_list_str, @@ -632,65 +733,19 @@ int ObLSStatusOperator::get_visible_member_list_str_(const ObMemberList &member_ return ret; } -int ObLSStatusOperator::get_member_list_hex_(const ObMemberList &member_list, - common::ObIAllocator &allocator, - common::ObString &hex_str, - const ObMember &arb_member) +template +int ObLSStatusOperator::set_list_with_hex_str_( + const common::ObString &str, + T &list, + ObMember &arb_member) { int ret = OB_SUCCESS; - char *serialize_buf = NULL; - ObMemberListFlag arb_flag(ObMemberListFlag::HAS_ARB_MEMBER); - const int64_t flag_size = arb_member.is_valid() ? arb_flag.get_serialize_size() : 0; - const int64_t arb_member_serialize_size = arb_member.is_valid() ? arb_member.get_serialize_size() : 0; - const int64_t serialize_size = member_list.get_serialize_size() + flag_size + arb_member_serialize_size; - int64_t serialize_pos = 0; - char *hex_buf = NULL; - const int64_t hex_size = 2 * serialize_size; - int64_t hex_pos = 0; - if (OB_UNLIKELY(!member_list.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("member_list is invlaid", KR(ret), K(member_list)); - } else if (OB_UNLIKELY(hex_size > OB_MAX_LONGTEXT_LENGTH + 1)) { - ret = OB_SIZE_OVERFLOW; - LOG_WARN("format str is too long", KR(ret), K(hex_size), K(member_list), K(arb_member)); - } else if (OB_ISNULL(serialize_buf = static_cast(allocator.alloc(serialize_size)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to alloc buf", KR(ret), K(serialize_size)); - } else if (OB_FAIL(member_list.serialize(serialize_buf, serialize_size, serialize_pos))) { - LOG_WARN("failed to serialize set member list arg", KR(ret), K(member_list), K(serialize_size), K(serialize_pos)); - } else if (0 != flag_size && OB_FAIL(arb_flag.serialize(serialize_buf, serialize_size, serialize_pos))) { - LOG_WARN("failed to serialize flag", KR(ret), K(arb_flag), K(serialize_size), K(serialize_pos)); - } else if (0 != arb_member_serialize_size && OB_FAIL(arb_member.serialize(serialize_buf, serialize_size, serialize_pos))) { - LOG_WARN("failed to serialize set arb member arg", KR(ret), K(arb_member), K(serialize_size), K(serialize_pos)); - } else if (OB_UNLIKELY(serialize_pos > serialize_size)) { - ret = OB_SIZE_OVERFLOW; - LOG_WARN("serialize error", KR(ret), K(serialize_pos), K(serialize_size)); - } else if (OB_ISNULL(hex_buf = static_cast(allocator.alloc(hex_size)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to alloc memory", KR(ret), K(hex_size)); - } else if (OB_FAIL(hex_print(serialize_buf, serialize_pos, hex_buf, hex_size, hex_pos))) { - LOG_WARN("fail to print hex", KR(ret), K(serialize_pos), K(hex_size), K(serialize_buf)); - } else if (OB_UNLIKELY(hex_pos > hex_size)) { - ret = OB_SIZE_OVERFLOW; - LOG_WARN("encode error", KR(ret), K(hex_pos), K(hex_size)); - } else { - hex_str.assign_ptr(hex_buf, static_cast(hex_pos)); - } - return ret; -} - -int ObLSStatusOperator::set_member_list_with_hex_str_(const common::ObString &str, - ObMemberList &member_list, - ObMember &arb_member) -{ - int ret = OB_SUCCESS; - member_list.reset(); + list.reset(); arb_member.reset(); char *deserialize_buf = NULL; const int64_t str_size = str.length(); const int64_t deserialize_size = str.length() / 2 + 1; int64_t deserialize_pos = 0; - bool has_arb_member = false; ObArenaAllocator allocator("MemberList"); if (OB_UNLIKELY(str.empty())) { ret = OB_INVALID_ARGUMENT; @@ -700,7 +755,7 @@ int ObLSStatusOperator::set_member_list_with_hex_str_(const common::ObString &st LOG_WARN("fail to alloc memory", KR(ret), K(deserialize_size)); } else if (OB_FAIL(hex_to_cstr(str.ptr(), str_size, deserialize_buf, deserialize_size))) { LOG_WARN("fail to get cstr from hex", KR(ret), K(str_size), K(deserialize_size), K(str)); - } else if (OB_FAIL(member_list.deserialize(deserialize_buf, deserialize_size, deserialize_pos))) { + } else if (OB_FAIL(list.deserialize(deserialize_buf, deserialize_size, deserialize_pos))) { LOG_WARN("fail to deserialize set member list arg", KR(ret), K(deserialize_pos), K(deserialize_size), K(str)); } else if (OB_UNLIKELY(deserialize_pos > deserialize_size)) { @@ -727,7 +782,55 @@ int ObLSStatusOperator::set_member_list_with_hex_str_(const common::ObString &st } } return ret; +} +template +int ObLSStatusOperator::get_list_hex_( + const T &list, + common::ObIAllocator &allocator, + common::ObString &hex_str, + const ObMember &arb_member) +{ + int ret = OB_SUCCESS; + char *serialize_buf = NULL; + ObMemberListFlag arb_flag(ObMemberListFlag::HAS_ARB_MEMBER); + const int64_t flag_size = arb_member.is_valid() ? arb_flag.get_serialize_size() : 0; + const int64_t arb_member_serialize_size = arb_member.is_valid() ? arb_member.get_serialize_size() : 0; + const int64_t serialize_size = list.get_serialize_size() + flag_size + arb_member_serialize_size; + int64_t serialize_pos = 0; + char *hex_buf = NULL; + const int64_t hex_size = 2 * serialize_size; + int64_t hex_pos = 0; + if (OB_UNLIKELY(!list.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("list is invalid", KR(ret), K(list)); + } else if (OB_UNLIKELY(hex_size > OB_MAX_LONGTEXT_LENGTH + 1)) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("format str is too long", KR(ret), K(hex_size), K(list), K(arb_member)); + } else if (OB_ISNULL(serialize_buf = static_cast(allocator.alloc(serialize_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc buf", KR(ret), K(serialize_size)); + } else if (OB_FAIL(list.serialize(serialize_buf, serialize_size, serialize_pos))) { + LOG_WARN("failed to serialize set list arg", KR(ret), K(list), K(serialize_size), K(serialize_pos)); + } else if (0 != flag_size && OB_FAIL(arb_flag.serialize(serialize_buf, serialize_size, serialize_pos))) { + LOG_WARN("failed to serialize flag", KR(ret), K(arb_flag), K(serialize_size), K(serialize_pos)); + } else if (0 != arb_member_serialize_size && OB_FAIL(arb_member.serialize(serialize_buf, serialize_size, serialize_pos))) { + LOG_WARN("failed to serialize set arb member arg", KR(ret), K(arb_member), K(serialize_size), K(serialize_pos)); + } else if (OB_UNLIKELY(serialize_pos > serialize_size)) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("serialize error", KR(ret), K(serialize_pos), K(serialize_size)); + } else if (OB_ISNULL(hex_buf = static_cast(allocator.alloc(hex_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory", KR(ret), K(hex_size)); + } else if (OB_FAIL(hex_print(serialize_buf, serialize_pos, hex_buf, hex_size, hex_pos))) { + LOG_WARN("fail to print hex", KR(ret), K(serialize_pos), K(hex_size), K(serialize_buf)); + } else if (OB_UNLIKELY(hex_pos > hex_size)) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("encode error", KR(ret), K(hex_pos), K(hex_size)); + } else { + hex_str.assign_ptr(hex_buf, static_cast(hex_pos)); + } + return ret; } int ObLSStatusOperator::fill_cell( @@ -746,23 +849,31 @@ int ObLSStatusOperator::fill_cell( uint64_t ls_group_id = OB_INVALID_ID; uint64_t unit_group_id = OB_INVALID_ID; uint64_t tenant_id = OB_INVALID_TENANT_ID; + ObString flag_str; + ObString flag_str_default_value(""); + ObLSFlag flag(share::ObLSFlag::NORMAL_FLAG); EXTRACT_INT_FIELD_MYSQL(*result, "tenant_id", tenant_id, uint64_t); EXTRACT_INT_FIELD_MYSQL(*result, "ls_id", id_value, int64_t); EXTRACT_INT_FIELD_MYSQL(*result, "ls_group_id", ls_group_id, uint64_t); EXTRACT_INT_FIELD_MYSQL(*result, "unit_group_id", unit_group_id, uint64_t); EXTRACT_VARCHAR_FIELD_MYSQL(*result, "status", status_str); EXTRACT_VARCHAR_FIELD_MYSQL(*result, "primary_zone", primary_zone_str); + EXTRACT_VARCHAR_FIELD_MYSQL_WITH_DEFAULT_VALUE(*result, "flag", flag_str, + true /* skip_null_error */, true /* skip_column_error */, flag_str_default_value); if (OB_FAIL(ret)) { LOG_WARN("failed to get result", KR(ret), K(id_value), K(ls_group_id), K(unit_group_id), K(status_str), K(primary_zone_str)); } else { ObLSID ls_id(id_value); ObZone zone(primary_zone_str); - if (OB_FAIL(status_info.init(tenant_id, ls_id, ls_group_id, + if (OB_FAIL(flag.str_to_flag(flag_str))) { + // if flag_str is empty then flag is setted to normal + LOG_WARN("fail to convert string to flag", KR(ret), K(flag_str)); + } else if (OB_FAIL(status_info.init(tenant_id, ls_id, ls_group_id, str_to_ls_status(status_str), unit_group_id, - zone))) { + zone, flag))) { LOG_WARN("failed to init ls operation", KR(ret), K(tenant_id), K(zone), - K(ls_group_id), K(ls_id), K(status_str), K(unit_group_id)); + K(ls_group_id), K(ls_id), K(status_str), K(unit_group_id), K(flag)); } } } @@ -804,37 +915,29 @@ int ObLSStatusOperator::fill_cell( return ret; } - -int ObLSStatusOperator::get_ls_status_(const uint64_t tenant_id, - const ObLSID &id, - const bool need_member_list, - ObMemberList &member_list, - share::ObLSStatusInfo &status_info, - ObISQLClient &client, - ObMember &arb_member) +int ObLSStatusOperator::inner_get_ls_status_( + const ObSqlString &sql, + const uint64_t exec_tenant_id, + const bool need_member_list, + ObISQLClient &client, + ObMemberList &member_list, + share::ObLSStatusInfo &status_info, + ObMember &arb_member, + common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; member_list.reset(); status_info.reset(); + learner_list.reset(); arb_member.reset(); - if (OB_UNLIKELY(!id.is_valid() - || OB_INVALID_TENANT_ID == tenant_id)) { + if (OB_UNLIKELY(sql.empty() || OB_INVALID_TENANT_ID == exec_tenant_id)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), K(id), K(tenant_id)); + LOG_WARN("invalid argument", KR(ret), K(sql), K(exec_tenant_id)); } else { - ObSqlString sql; ObTimeoutCtx ctx; const int64_t default_timeout = GCONF.internal_sql_execute_timeout; - uint64_t exec_tenant_id = get_exec_tenant_id(tenant_id); - if (OB_UNLIKELY(OB_INVALID_TENANT_ID == exec_tenant_id)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to get exec tenant id", KR(ret), K(exec_tenant_id)); - } else if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(ctx, default_timeout))) { + if (OB_FAIL(ObShareUtil::set_default_timeout_ctx(ctx, default_timeout))) { LOG_WARN("failed to set default timeout ctx", KR(ret), K(default_timeout)); - } else if (OB_FAIL(sql.assign_fmt( - "SELECT * FROM %s where ls_id = %ld and tenant_id = %lu", - OB_ALL_LS_STATUS_TNAME, id.id(), tenant_id))) { - LOG_WARN("failed to assign sql", KR(ret), K(sql)); } else { HEAP_VAR(ObMySQLProxy::MySQLResult, res) { common::sqlclient::ObMySQLResult *result = NULL; @@ -845,9 +948,11 @@ int ObLSStatusOperator::get_ls_status_(const uint64_t tenant_id, LOG_WARN("failed to get sql result", KR(ret)); } else { ObString init_member_list_str; + ObString init_learner_list_str; ret = result->next(); if (OB_ITER_END == ret) { ret = OB_ENTRY_NOT_EXIST; + LOG_WARN("ls not exist in __all_ls_status table", KR(ret)); } else if (OB_FAIL(ret)) { LOG_WARN("failed to get ls", KR(ret), K(sql)); } else { @@ -861,10 +966,21 @@ int ObLSStatusOperator::get_ls_status_(const uint64_t tenant_id, K(init_member_list_str)); } else if (init_member_list_str.empty()) { // maybe - } else if (OB_FAIL(set_member_list_with_hex_str_( + } else if (OB_FAIL(set_list_with_hex_str_( init_member_list_str, member_list, arb_member))) { LOG_WARN("failed to set member list", KR(ret), K(init_member_list_str)); + } else { + // deal with learner list + EXTRACT_VARCHAR_FIELD_MYSQL_SKIP_RET( + *result, "b_init_learner_list", init_learner_list_str); + if (OB_FAIL(ret)) { + LOG_WARN("failed to get result", KR(ret), K(init_learner_list_str)); + } else if (init_learner_list_str.empty()) { + // maybe + } else if (OB_FAIL(set_list_with_hex_str_(init_learner_list_str, learner_list, arb_member))) { + LOG_WARN("failed to set learner list", KR(ret), K(init_learner_list_str)); + } } } } @@ -881,6 +997,35 @@ int ObLSStatusOperator::get_ls_status_(const uint64_t tenant_id, return ret; } +int ObLSStatusOperator::get_ls_status_(const uint64_t tenant_id, + const ObLSID &id, + const bool need_member_list, + ObMemberList &member_list, + share::ObLSStatusInfo &status_info, + ObISQLClient &client, + ObMember &arb_member, + common::GlobalLearnerList &learner_list) +{ + int ret = OB_SUCCESS; + member_list.reset(); + learner_list.reset(); + status_info.reset(); + ObSqlString sql; + if (OB_UNLIKELY(!id.is_valid() + || OB_INVALID_TENANT_ID == tenant_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(id), K(tenant_id)); + } else if (OB_FAIL(sql.assign_fmt("SELECT * FROM %s where ls_id = %ld and tenant_id = %lu", + OB_ALL_LS_STATUS_TNAME, id.id(), tenant_id))) { + LOG_WARN("failed to assign sql", KR(ret), K(sql)); + } else if (OB_FAIL(inner_get_ls_status_(sql, get_exec_tenant_id(tenant_id), need_member_list, + client, member_list, status_info, arb_member, learner_list))) { + LOG_WARN("fail to inner get ls status info", KR(ret), K(sql), K(tenant_id), "exec_tenant_id", + get_exec_tenant_id(tenant_id), K(need_member_list)); + } + return ret; +} + int ObLSStatusOperator::construct_ls_primary_info_sql_(common::ObSqlString &sql) { @@ -1396,10 +1541,11 @@ int ObLSStatusOperator::create_abort_ls_in_switch_tenant( ret = OB_NEED_RETRY; LOG_WARN("switchover may concurrency, need retry", KR(ret), K(switchover_epoch), K(status), K(tenant_info)); } else if (OB_FAIL(sql.assign_fmt("UPDATE %s set status = '%s',init_member_list = '', b_init_member_list = ''" - " where tenant_id = %lu and status in ('%s', '%s')", - OB_ALL_LS_STATUS_TNAME, - ls_status_to_str(share::OB_LS_CREATE_ABORT), - tenant_id, ls_status_to_str(OB_LS_CREATED), ls_status_to_str(OB_LS_CREATING)))) { + ", init_learner_list = '', b_init_learner_list = ''" + " where tenant_id = %lu and status in ('%s', '%s')", + OB_ALL_LS_STATUS_TNAME, + ls_status_to_str(share::OB_LS_CREATE_ABORT), + tenant_id, ls_status_to_str(OB_LS_CREATED), ls_status_to_str(OB_LS_CREATING)))) { LOG_WARN("failed to assign sql", KR(ret), K(tenant_id), K(sql)); } else if (OB_FAIL(exec_write(tenant_id, sql, this, trans, true))) { LOG_WARN("failed to exec write", KR(ret), K(tenant_id), K(sql)); diff --git a/src/share/ls/ob_ls_status_operator.h b/src/share/ls/ob_ls_status_operator.h old mode 100644 new mode 100755 index 2fe012fe11..3550724d55 --- a/src/share/ls/ob_ls_status_operator.h +++ b/src/share/ls/ob_ls_status_operator.h @@ -24,6 +24,7 @@ #include "share/ls/ob_ls_info.h" //ObLSReplica::MemberList #include "share/ls/ob_ls_log_stat_info.h" //ObLSLogStatInfo #include "share/ls/ob_ls_recovery_stat_operator.h" //ObLSRecoveryStat +#include "share/ls/ob_ls_operator.h" namespace oceanbase { @@ -55,6 +56,8 @@ namespace schema class ObMultiVersionSchemaService; } +ObLSStatus str_to_ls_status(const ObString &status_str); +const char* ls_status_to_str(const ObLSStatus &status); bool ls_is_empty_status(const ObLSStatus &status); bool ls_is_creating_status(const ObLSStatus &status); bool ls_is_created_status(const ObLSStatus &status); @@ -98,13 +101,13 @@ struct ObLSStatusInfo ObLSStatusInfo() : tenant_id_(OB_INVALID_TENANT_ID), ls_id_(), ls_group_id_(OB_INVALID_ID), status_(OB_LS_EMPTY), unit_group_id_(OB_INVALID_ID), - primary_zone_() {} + primary_zone_(), flag_(ObLSFlag::NORMAL_FLAG) {} virtual ~ObLSStatusInfo() {} bool is_valid() const; int init(const uint64_t tenant_id, const ObLSID &id, const uint64_t ls_group_id, const ObLSStatus status, const uint64_t unit_group_id, - const ObZone &primary_zone); + const ObZone &primary_zone, const ObLSFlag &flag); bool ls_is_creating() const { return ls_is_creating_status(status_); @@ -141,12 +144,22 @@ struct ObLSStatusInfo { return ls_is_pre_tenant_dropping_status(status_); } - + bool is_duplicate_ls() const + { + return flag_.is_duplicate_ls(); + } + bool ls_is_block_tablet_in() const + { + return flag_.is_block_tablet_in(); + } ObLSStatus get_status() const { return status_; } - + ObLSFlag get_flag() const + { + return flag_; + } int assign(const ObLSStatusInfo &other); void reset(); bool is_normal() const @@ -162,9 +175,10 @@ struct ObLSStatusInfo ObLSStatus status_; uint64_t unit_group_id_; ObZone primary_zone_; + share::ObLSFlag flag_; TO_STRING_KV(K_(tenant_id), K_(ls_id), K_(ls_group_id), K_(status), - K_(unit_group_id), K_(primary_zone)); + K_(unit_group_id), K_(primary_zone), K_(flag)); }; typedef ObArray ObLSStatusInfoArray; @@ -315,10 +329,24 @@ public: int update_init_member_list(const uint64_t tenant_id, const ObLSID &id, const ObMemberList &member_list, ObISQLClient &client, - const ObMember &arb_member); + const ObMember &arb_member, + const common::GlobalLearnerList &learner_list); + int get_all_ls_status_by_order(const uint64_t tenant_id, ObLSStatusInfoIArray &ls_array, ObISQLClient &client); + + // get duplicate ls status info + // @params[in] tenant_id, which tenant to get + // @params[in] client, client to execute sql + // @params[out] status_info, duplicate ls status info + // + // ATTENTION!!! + // status_info not include visible_member_list and b_init_member_list + // @return OB_ENTRY_NOT_EXIST if duplicate log stream not exist + int get_duplicate_ls_status_info(const uint64_t tenant_id, + ObISQLClient &client, + share::ObLSStatusInfo &status_info); /** * @description: * get ls list from all_ls_status order by tenant_id, ls_id for switchover tenant @@ -348,7 +376,8 @@ public: ObMemberList &member_list, ObLSStatusInfo &status_info, ObISQLClient &client, - ObMember &arb_member); + ObMember &arb_member, + common::GlobalLearnerList &learner_list); int get_ls_status_info(const uint64_t tenant_id, const ObLSID &id, ObLSStatusInfo &status_info, ObISQLClient &client); int fill_cell(common::sqlclient::ObMySQLResult *result, @@ -448,19 +477,32 @@ public: static int check_ls_exist(const uint64_t tenant_id, const ObLSID &ls_id, ObLSExistState &state); private: + template int get_list_hex_( + const T &list, + common::ObIAllocator &allocator, + common::ObString &hex_str, + const ObMember &arb_member); + + template int set_list_with_hex_str_( + const common::ObString &str, + T &learner_list, + ObMember &arb_member); + int get_visible_member_list_str_(const ObMemberList &member_list, common::ObIAllocator &allocator, common::ObSqlString &visible_member_list_str, const ObMember &arb_member); - int get_member_list_hex_(const ObMemberList &member_list, - common::ObIAllocator &allocator, - common::ObString &hex_str, - const ObMember &arb_member); - int set_member_list_with_hex_str_(const common::ObString &str, - ObMemberList &member_list, ObMember &arb_member); + + int inner_get_ls_status_(const ObSqlString &sql, const uint64_t exec_tenant_id, + const bool need_member_list, ObISQLClient &client, + ObMemberList &member_list, share::ObLSStatusInfo &status_info, + ObMember &arb_member, common::GlobalLearnerList &learner_list); + int get_ls_status_(const uint64_t tenant_id, const ObLSID &id, const bool need_member_list, ObMemberList &member_list, - ObLSStatusInfo &status_info, ObISQLClient &client, ObMember &arb_member); + ObLSStatusInfo &status_info, ObISQLClient &client, + ObMember &arb_member, common::GlobalLearnerList &learner_list); + int construct_ls_primary_info_sql_(common::ObSqlString &sql); //////////for checking all ls log_stat_info///////// diff --git a/src/share/ls/ob_persistent_ls_table.cpp b/src/share/ls/ob_persistent_ls_table.cpp index 46e34d6e02..e0c13fa721 100644 --- a/src/share/ls/ob_persistent_ls_table.cpp +++ b/src/share/ls/ob_persistent_ls_table.cpp @@ -199,6 +199,8 @@ int ObPersistentLSTable::construct_ls_replica( int64_t paxos_replica_number = OB_INVALID_COUNT; int64_t data_size = 0; int64_t required_size = 0; + ObString learner_list; + GlobalLearnerList learner_list_to_set; // TODO: try to fetch coulmn_value by column_name // column select order defined in LSTableColNames::LSTableColNames // location related @@ -228,11 +230,17 @@ int ObPersistentLSTable::construct_ls_replica( (void)GET_COL_IGNORE_NULL_WITH_DEFAULT_VALUE(res.get_int, "paxos_replica_number", paxos_replica_number, OB_INVALID_COUNT); (void)GET_COL_IGNORE_NULL(res.get_int, "data_size", data_size); (void)GET_COL_IGNORE_NULL_WITH_DEFAULT_VALUE(res.get_int, "required_size", required_size, 0); + EXTRACT_VARCHAR_FIELD_MYSQL_SKIP_RET(res, "learner_list", learner_list); - if (OB_FAIL(ObLSReplica::text2member_list( + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObLSReplica::text2member_list( to_cstring(member_list), member_list_to_set))) { LOG_WARN("text2member_list failed", KR(ret)); + } else if (OB_FAIL(ObLSReplica::text2learner_list( + to_cstring(learner_list), + learner_list_to_set))) { + LOG_WARN("text2member_list for learner_list failed", KR(ret)); } else if (false == server.set_ip_addr(ip, static_cast(port))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid server address", K(ip), K(port)); @@ -265,17 +273,13 @@ int ObPersistentLSTable::construct_ls_replica( zone, paxos_replica_number, data_size, - required_size))) { + required_size, + member_list_to_set, + learner_list_to_set))) { LOG_WARN("fail to init a ls replica", KR(ret), K(create_time_us), K(modify_time_us), K(tenant_id), K(ls_id), K(server), K(sql_port), K(role), K(replica_type), K(proposal_id), K(unit_id), K(zone), - K(paxos_replica_number)); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < member_list_to_set.count(); i++) { - if (OB_FAIL(replica.add_member(member_list_to_set.at(i)))) { - LOG_WARN("push_back failed", KR(ret)); - } - } + K(paxos_replica_number), K(member_list_to_set), K(learner_list_to_set)); } LOG_DEBUG("construct log stream replica", KR(ret), K(replica)); @@ -553,7 +557,8 @@ int ObPersistentLSTable::fill_dml_splicer_( { int ret = OB_SUCCESS; char ip[OB_MAX_SERVER_ADDR_SIZE] = ""; - char member_list[MAX_MEMBER_LIST_LENGTH] = ""; + ObSqlString member_list; + ObSqlString learner_list; if (!replica.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid replica", KR(ret), K(replica)); @@ -561,15 +566,17 @@ int ObPersistentLSTable::fill_dml_splicer_( ret = OB_INVALID_ARGUMENT; LOG_WARN("convert server ip to string failed", KR(ret), "server", replica.get_server()); } else if (OB_FAIL(ObLSReplica::member_list2text( - replica.get_member_list(), member_list, MAX_MEMBER_LIST_LENGTH))) { - LOG_WARN("member_list2text failed", K(replica), KR(ret)); + replica.get_member_list(), member_list))) { + LOG_WARN("member_list2text failed", KR(ret), K(replica)); + } else if (OB_FAIL(replica.get_learner_list().transform_to_string(learner_list))) { + LOG_WARN("failed to transform GlobalLearnerList to ObSqlString", KR(ret), K(replica)); } else if (OB_FAIL(dml_splicer.add_pk_column("tenant_id", replica.get_tenant_id())) //location related || OB_FAIL(dml_splicer.add_pk_column("ls_id", replica.get_ls_id().id())) || OB_FAIL(dml_splicer.add_pk_column("svr_ip", ip)) || OB_FAIL(dml_splicer.add_pk_column("svr_port", replica.get_server().get_port())) || OB_FAIL(dml_splicer.add_column("sql_port", replica.get_sql_port())) || OB_FAIL(dml_splicer.add_column("role", replica.get_role())) - || OB_FAIL(dml_splicer.add_column("member_list", member_list)) + || OB_FAIL(dml_splicer.add_column("member_list", member_list.empty() ? "" : member_list.ptr())) || OB_FAIL(dml_splicer.add_column("proposal_id", replica.get_proposal_id())) || OB_FAIL(dml_splicer.add_column("replica_type", replica.get_replica_type())) || OB_FAIL(dml_splicer.add_column("replica_status", ob_replica_status_str(replica.get_replica_status()))) @@ -579,9 +586,23 @@ int ObPersistentLSTable::fill_dml_splicer_( || OB_FAIL(dml_splicer.add_column("zone", replica.get_zone().ptr())) || OB_FAIL(dml_splicer.add_column("paxos_replica_number", replica.get_paxos_replica_number())) || OB_FAIL(dml_splicer.add_column("data_size", replica.get_data_size())) - || OB_FAIL(dml_splicer.add_column("required_size", replica.get_required_size()))) { + || OB_FAIL(dml_splicer.add_column("required_size", replica.get_required_size()))){ LOG_WARN("add column failed", KR(ret), K(replica)); } + + uint64_t tenant_to_check_data_version = replica.get_tenant_id(); + bool is_compatible_with_readonly_replica = false; + int tmp_ret = OB_SUCCESS; + if (OB_FAIL(ret)) { + } else if (OB_SUCCESS != (tmp_ret = ObShareUtil::check_compat_version_for_readonly_replica( + tenant_to_check_data_version, is_compatible_with_readonly_replica))) { + LOG_WARN("fail to check compat version with readonly replica", KR(tmp_ret), K(tenant_to_check_data_version)); + } else if (is_compatible_with_readonly_replica) { + if (OB_FAIL(dml_splicer.add_column("learner_list", learner_list.empty() ? "" : learner_list.ptr()))) { + LOG_WARN("fail to add learner list column", KR(ret)); + } + } + return ret; } diff --git a/src/share/ob_locality_parser.cpp b/src/share/ob_locality_parser.cpp index d78c8e8a02..9f40422eda 100644 --- a/src/share/ob_locality_parser.cpp +++ b/src/share/ob_locality_parser.cpp @@ -39,7 +39,7 @@ const char *ObLocalityParser::E_REPLICA_STR = "E"; int ObLocalityParser::parse_type(const char *str, int64_t len, ObReplicaType &replica_type) { UNUSED(len); - // TODO: only support F-replica in 4.0 for now, will support others in the future + // TODO: only support F-replica in 4.0 and R-replica in 4.2 for now, will support others in the future int ret = OB_SUCCESS; if (OB_ISNULL(str)) { ret = OB_INVALID_ARGUMENT; @@ -75,12 +75,8 @@ int ObLocalityParser::parse_type(const char *str, int64_t len, ObReplicaType &re LOG_USER_ERROR(OB_NOT_SUPPORTED, "backup-replica"); } else if ( 0 == STRCASECMP(READONLY_REPLICA_STR, str)) { replica_type = REPLICA_TYPE_READONLY; - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "readonly-replica"); } else if ( 0 == STRCASECMP(R_REPLICA_STR, str)) { replica_type = REPLICA_TYPE_READONLY; - ret = OB_NOT_SUPPORTED; - LOG_USER_ERROR(OB_NOT_SUPPORTED, "readonly-replica"); } else if ( 0 == STRCASECMP(MEMONLY_REPLICA_STR, str)) { replica_type = REPLICA_TYPE_MEMONLY; ret = OB_NOT_SUPPORTED; diff --git a/src/share/ob_ls_id.h b/src/share/ob_ls_id.h index 7e10f359b2..cbf5883629 100644 --- a/src/share/ob_ls_id.h +++ b/src/share/ob_ls_id.h @@ -103,7 +103,7 @@ static const ObLSID MAJOR_FREEZE_LS(ObLSID::MAJOR_FREEZE_LS_ID); static const ObLSID WRS_LS_ID(ObLSID::WRS_LS_ID); static const int64_t OB_DEFAULT_LS_COUNT = 3; -typedef common::ObSEArray ObLSArray; +typedef common::ObSEArray ObLSArray; } // end namespace share } // end namespace oceanbase diff --git a/src/share/ob_rpc_struct.cpp b/src/share/ob_rpc_struct.cpp index 7b72ef2cfc..7ff177f215 100644 --- a/src/share/ob_rpc_struct.cpp +++ b/src/share/ob_rpc_struct.cpp @@ -6623,6 +6623,34 @@ OB_SERIALIZE_MEMBER((ObDropDirectoryArg, ObDDLArg), tenant_id_, directory_name_) +int ObCreateDupLSArg::assign(const ObCreateDupLSArg &arg) +{ + int ret = OB_SUCCESS; + tenant_id_ = arg.tenant_id_; + return ret; +} + +int ObCreateDupLSArg::init(const uint64_t tenant_id) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id)); + } else { + tenant_id_ = tenant_id; + } + return ret; +} + +DEF_TO_STRING(ObCreateDupLSArg) +{ + int64_t pos = 0; + J_KV(K_(tenant_id)); + return pos; +} + +OB_SERIALIZE_MEMBER(ObCreateDupLSArg, tenant_id_); + bool ObCreateLSArg::is_valid() const { return OB_INVALID_TENANT_ID != tenant_id_ @@ -6728,6 +6756,7 @@ void ObSetMemberListArgV2::reset() member_list_.reset(); paxos_replica_num_ = 0; arbitration_service_.reset(); + learner_list_.reset(); } int ObSetMemberListArgV2::assign(const ObSetMemberListArgV2 &arg) @@ -6738,6 +6767,8 @@ int ObSetMemberListArgV2::assign(const ObSetMemberListArgV2 &arg) LOG_WARN("arg is invalid", KR(ret), K(arg)); } else if (OB_FAIL(member_list_.deep_copy(arg.member_list_))) { LOG_WARN("failed to assign member list", KR(ret), K(arg)); + } else if (OB_FAIL(learner_list_.deep_copy(arg.learner_list_))) { + LOG_WARN("failed to assign learner list", KR(ret), K(arg)); } else if (OB_FAIL(arbitration_service_.assign(arg.arbitration_service_))) { LOG_WARN("failed to assign arbitration_service", KR(ret), K(arg)); } else { @@ -6750,7 +6781,8 @@ int ObSetMemberListArgV2::assign(const ObSetMemberListArgV2 &arg) int ObSetMemberListArgV2::init(const int64_t tenant_id, const share::ObLSID &id, const int64_t paxos_replica_num, - const ObMemberList &member_list, const ObMember &arbitration_service) + const ObMemberList &member_list, const ObMember &arbitration_service, + const common::GlobalLearnerList &learner_list) { int ret = OB_SUCCESS; if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id @@ -6761,6 +6793,8 @@ int ObSetMemberListArgV2::init(const int64_t tenant_id, LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(id), K(member_list), K(paxos_replica_num)); } else if (OB_FAIL(member_list_.deep_copy(member_list))) { LOG_WARN("failed to assign member list", KR(ret), K(member_list)); + } else if (OB_FAIL(learner_list_.deep_copy(learner_list))) { + LOG_WARN("fail ed to assign learner list", KR(ret), K(learner_list)); } else if (OB_FAIL(arbitration_service_.assign(arbitration_service))) { LOG_WARN("failed to assign arbitration service", KR(ret), K(arbitration_service)); } else { @@ -6774,11 +6808,11 @@ int ObSetMemberListArgV2::init(const int64_t tenant_id, DEF_TO_STRING(ObSetMemberListArgV2) { int64_t pos = 0; - J_KV(K_(tenant_id), K_(id), K_(paxos_replica_num), K_(member_list), K_(arbitration_service)); + J_KV(K_(tenant_id), K_(id), K_(paxos_replica_num), K_(member_list), K_(arbitration_service), K_(learner_list)); return pos; } -OB_SERIALIZE_MEMBER(ObSetMemberListArgV2, tenant_id_, id_, member_list_, paxos_replica_num_, arbitration_service_); +OB_SERIALIZE_MEMBER(ObSetMemberListArgV2, tenant_id_, id_, member_list_, paxos_replica_num_, arbitration_service_, learner_list_); bool ObGetLSAccessModeInfoArg::is_valid() const { @@ -7178,7 +7212,22 @@ DEF_TO_STRING(ObBatchCreateTabletArg) OB_SERIALIZE_MEMBER(ObBatchCreateTabletArg, id_, major_frozen_scn_, tablets_, table_schemas_, need_check_tablet_cnt_); -OB_SERIALIZE_MEMBER(ObCreateLSResult, ret_, addr_); +OB_SERIALIZE_MEMBER(ObCreateDupLSResult, ret_); +bool ObCreateDupLSResult::is_valid() const +{ + return true; +} +int ObCreateDupLSResult::assign(const ObCreateDupLSResult &other) +{ + int ret = OB_SUCCESS; + if (this == &other) { + } else { + ret_ = other.ret_; + } + return ret; +} + +OB_SERIALIZE_MEMBER(ObCreateLSResult, ret_, addr_, replica_type_); bool ObCreateLSResult::is_valid() const { return true; @@ -7190,6 +7239,7 @@ int ObCreateLSResult::assign(const ObCreateLSResult &other) } else { ret_ = other.ret_; addr_ = other.addr_; + replica_type_ = other.replica_type_; } return ret; } diff --git a/src/share/ob_rpc_struct.h b/src/share/ob_rpc_struct.h index 1b337ab3c9..0e2e0f8315 100644 --- a/src/share/ob_rpc_struct.h +++ b/src/share/ob_rpc_struct.h @@ -2754,6 +2754,41 @@ public: TO_STRING_KV(K_(tenant_id), K_(purge_num), K_(expire_time), K_(auto_purge)); }; +struct ObCreateDupLSArg +{ + OB_UNIS_VERSION(1); +public: + ObCreateDupLSArg() : tenant_id_(OB_INVALID_TENANT_ID) {} + ~ObCreateDupLSArg() {} + bool is_valid() const { return OB_INVALID_TENANT_ID != tenant_id_; } + void reset() { tenant_id_ = OB_INVALID_TENANT_ID; } + int assign(const ObCreateDupLSArg &arg); + int init(const uint64_t tenant_id); + int64_t get_tenant_id() const { return tenant_id_; } + DECLARE_TO_STRING; +private: + uint64_t tenant_id_; +private: + DISALLOW_COPY_AND_ASSIGN(ObCreateDupLSArg); +}; + +struct ObCreateDupLSResult +{ + OB_UNIS_VERSION(1); +public: + ObCreateDupLSResult(): ret_(common::OB_SUCCESS) {} + ~ObCreateDupLSResult() {} + bool is_valid() const; + int assign(const ObCreateDupLSResult &other); + void init(const int ret) { ret_ = ret; } + TO_STRING_KV(K_(ret)); + int get_result() const { return ret_; } +private: + DISALLOW_COPY_AND_ASSIGN(ObCreateDupLSResult); +private: + int ret_; +}; + struct ObCreateLSArg { OB_UNIS_VERSION(1); @@ -2841,16 +2876,17 @@ struct ObCreateLSResult { OB_UNIS_VERSION(1); public: - ObCreateLSResult(): ret_(common::OB_SUCCESS), addr_() {} + ObCreateLSResult(): ret_(common::OB_SUCCESS), addr_(), replica_type_(REPLICA_TYPE_FULL) {} ~ObCreateLSResult() {} bool is_valid() const; int assign(const ObCreateLSResult &other); - void init(const int ret, const ObAddr &addr) + void init(const int ret, const ObAddr &addr, const ObReplicaType &replica_type) { ret_ = ret; addr_ = addr; + replica_type_ = replica_type; } - TO_STRING_KV(K_(ret), K_(addr)); + TO_STRING_KV(K_(ret), K_(addr), K_(replica_type)); int get_result() const { return ret_; @@ -2859,11 +2895,16 @@ public: { return addr_; } + const common::ObReplicaType &get_replica_type() const + { + return replica_type_; + } private: DISALLOW_COPY_AND_ASSIGN(ObCreateLSResult); private: int ret_; ObAddr addr_;//for async rpc, dests and results not one-by-one mapping + common::ObReplicaType replica_type_; }; @@ -2873,7 +2914,7 @@ struct ObSetMemberListArgV2 public: ObSetMemberListArgV2() : tenant_id_(OB_INVALID_TENANT_ID), id_(), member_list_(), paxos_replica_num_(0), - arbitration_service_() {} + arbitration_service_(), learner_list_() {} ~ObSetMemberListArgV2() {} bool is_valid() const; void reset(); @@ -2882,7 +2923,8 @@ public: const share::ObLSID &id, const int64_t paxos_replica_num, const ObMemberList &member_list, - const ObMember &arbitration_service); + const ObMember &arbitration_service, + const common::GlobalLearnerList &learner_list); DECLARE_TO_STRING; const ObMemberList& get_member_list() const { @@ -2904,12 +2946,17 @@ public: { return paxos_replica_num_; } + const common::GlobalLearnerList& get_learner_list() const + { + return learner_list_; + } private: int64_t tenant_id_; share::ObLSID id_; ObMemberList member_list_; int64_t paxos_replica_num_; ObMember arbitration_service_; + common::GlobalLearnerList learner_list_; private: DISALLOW_COPY_AND_ASSIGN(ObSetMemberListArgV2); }; @@ -2923,7 +2970,7 @@ public: bool is_valid() const; int assign(const ObSetMemberListResult &other); TO_STRING_KV(K_(ret)); - void set_result(const int ret) + void init(const int ret) { ret_ = ret; } diff --git a/src/share/ob_share_util.cpp b/src/share/ob_share_util.cpp index f47fdd6e73..2442a6ed2f 100644 --- a/src/share/ob_share_util.cpp +++ b/src/share/ob_share_util.cpp @@ -18,6 +18,7 @@ #include "lib/oblog/ob_log_module.h" #include "share/ob_cluster_version.h" // for GET_MIN_DATA_VERSION #include "lib/mysqlclient/ob_isql_client.h" + namespace oceanbase { using namespace common; @@ -114,6 +115,31 @@ int ObShareUtil::generate_arb_replica_num( return ret; } +int ObShareUtil::check_compat_version_for_readonly_replica( + const uint64_t tenant_id, + bool &is_compatible) +{ + int ret = OB_SUCCESS; + uint64_t data_version = 0; + is_compatible = false; + if (OB_UNLIKELY(OB_INVALID_TENANT_ID == tenant_id)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(OB_SYS_TENANT_ID, data_version))) { + LOG_WARN("fail to get sys tenant data version", KR(ret)); + } else if (DATA_VERSION_4_2_0_0 > data_version) { + is_compatible = false; + } else if (!is_sys_tenant(tenant_id) + && OB_FAIL(GET_MIN_DATA_VERSION(gen_meta_tenant_id(tenant_id), data_version))) { + LOG_WARN("fail to get meta tenant data version", KR(ret), "tenant_id", gen_meta_tenant_id(tenant_id)); + } else if (!is_sys_tenant(tenant_id) && DATA_VERSION_4_2_0_0 > data_version) { + is_compatible = false; + } else { + is_compatible = true; + } + return ret; +} + int ObShareUtil::fetch_current_cluster_version( common::ObISQLClient &client, uint64_t &cluster_version) diff --git a/src/share/ob_share_util.h b/src/share/ob_share_util.h index 6d9ddbdc39..836bd51a48 100644 --- a/src/share/ob_share_util.h +++ b/src/share/ob_share_util.h @@ -44,6 +44,13 @@ public: const ObLSID &ls_id, int64_t &arb_replica_num); + // data version must up to 4.2 with read only replica + // @params[in] tenant_id, which tenant to check + // @params[out] is_compatible, whether it is up to 4.2 + static int check_compat_version_for_readonly_replica( + const uint64_t tenant_id, + bool &is_compatible); + static int fetch_current_cluster_version( common::ObISQLClient &client, uint64_t &cluster_version); diff --git a/src/share/ob_srv_rpc_proxy.h b/src/share/ob_srv_rpc_proxy.h index 9422107b13..7144a58918 100644 --- a/src/share/ob_srv_rpc_proxy.h +++ b/src/share/ob_srv_rpc_proxy.h @@ -137,6 +137,7 @@ public: RPC_S(PR5 force_disable_blacklist, OB_FORCE_DISABLE_BLACKLIST); RPC_S(PR5 force_enable_blacklist, OB_FORCE_ENABLE_BLACKLIST); RPC_S(PR5 force_clear_srv_blacklist, OB_FORCE_CLEAR_BLACKLIST); + RPC_S(PR5 notify_create_duplicate_ls, OB_NOTIFY_CREATE_DUPLICATE_LS, (obrpc::ObCreateDupLSArg), obrpc::ObCreateDupLSResult); RPC_S(PR5 update_local_stat_cache, obrpc::OB_SERVER_UPDATE_STAT_CACHE, (ObUpdateStatCacheArg)); // The optimizer estimates the number of rows diff --git a/src/share/stat/ob_basic_stats_estimator.cpp b/src/share/stat/ob_basic_stats_estimator.cpp index cb92fb3532..6b902b0b65 100644 --- a/src/share/stat/ob_basic_stats_estimator.cpp +++ b/src/share/stat/ob_basic_stats_estimator.cpp @@ -344,7 +344,7 @@ int ObBasicStatsEstimator::get_tablet_locations(ObExecContext &ctx, ObCandiTabletLocIArray &candi_tablet_locs) { int ret = OB_SUCCESS; - ObDASCtx &das_ctx = ctx.get_das_ctx(); + ObDASLocationRouter &loc_router = ctx.get_das_ctx().get_location_router(); ObSQLSessionInfo *session = ctx.get_my_session(); if (OB_ISNULL(session) || OB_UNLIKELY(tablet_ids.count() != partition_ids.count())) { ret = OB_ERR_UNEXPECTED; @@ -355,29 +355,19 @@ int ObBasicStatsEstimator::get_tablet_locations(ObExecContext &ctx, LOG_WARN("Partitoin location list prepare error", K(ret)); } else { ObArenaAllocator allocator(ObModIds::OB_SQL_PARSER); + //This interface does not require the first_level_part_ids, so construct an empty array. + ObSEArray first_level_part_ids; ObDASTableLocMeta loc_meta(allocator); - ObDASLocationRouter &loc_router = das_ctx.get_location_router(); - share::ObLSLocation location; loc_meta.ref_table_id_ = ref_table_id; - for (int64_t i = 0; OB_SUCC(ret) && i < tablet_ids.count(); ++i) { - location.reset(); - ObCandiTabletLoc &candi_tablet_loc = candi_tablet_locs.at(i); - if (OB_FAIL(loc_router.get(loc_meta, tablet_ids.at(i), location))) { - LOG_WARN("failed to get location", K(ret), K(loc_meta), K(tablet_ids.at(i))); - } else if (OB_FAIL(candi_tablet_loc.set_part_loc_with_only_readable_replica( - partition_ids.at(i), - OB_INVALID_INDEX, - tablet_ids.at(i), location, - session->get_retry_info().get_invalid_servers()))) { - LOG_WARN("fail to set partition location with only readable replica", - K(ret),K(i), K(location), K(candi_tablet_locs), K(tablet_ids), K(partition_ids), - K(session->get_retry_info().get_invalid_servers())); - } else { - LOG_TRACE("succeed to get partition location with only readable replica", - K(location), K(candi_tablet_locs), K(tablet_ids), K(partition_ids), - K(session->get_retry_info().get_invalid_servers())); - } - } // for end + loc_meta.table_loc_id_ = ref_table_id; + loc_meta.select_leader_ = 0; + if (OB_FAIL(loc_router.nonblock_get_candi_tablet_locations(loc_meta, + tablet_ids, + partition_ids, + first_level_part_ids, + candi_tablet_locs))) { + LOG_WARN("nonblock get candi tablet location failed", K(ret), K(loc_meta), K(partition_ids), K(tablet_ids)); + } } } return ret; diff --git a/src/share/unit/ob_unit_info.h b/src/share/unit/ob_unit_info.h index 6ce6119bf4..e8178ffdce 100644 --- a/src/share/unit/ob_unit_info.h +++ b/src/share/unit/ob_unit_info.h @@ -44,8 +44,9 @@ public: void reset(); bool is_valid() const; bool is_manual_migrate() const { return is_manual_migrate_; } + bool is_active_status() const { return UNIT_STATUS_ACTIVE == status_; } int get_unit_status_str(const char *&status) const; - Status get_unit_status() { return status_; } + Status get_unit_status() const { return status_; } DECLARE_TO_STRING; diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index 26eb60fbf6..d98842241e 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -6592,6 +6592,22 @@ int ObStaticEngineCG::set_other_properties(const ObLogPlan &log_plan, ObPhysical } } + // set location cons + if (OB_SUCC(ret)) { + if (OB_ISNULL(sql_ctx)) { + // do nothing + } else if (OB_FAIL(phy_plan.set_location_constraints(sql_ctx->base_constraints_, + sql_ctx->strict_constraints_, + sql_ctx->non_strict_constraints_, + sql_ctx->dup_table_replica_cons_))) { + LOG_WARN("failed to set location constraints", K(ret), K(phy_plan), + K(sql_ctx->base_constraints_), + K(sql_ctx->strict_constraints_), + K(sql_ctx->non_strict_constraints_), + K(sql_ctx->dup_table_replica_cons_)); + } + } + // set schema version and all base table version in phy plan if (OB_SUCC(ret)) { const ObIArray *dependency_table = log_plan.get_stmt()->get_global_dependency_table(); diff --git a/src/sql/das/ob_das_context.cpp b/src/sql/das/ob_das_context.cpp index 8869177a6c..5700d99ddb 100644 --- a/src/sql/das/ob_das_context.cpp +++ b/src/sql/das/ob_das_context.cpp @@ -36,6 +36,8 @@ int ObDASCtx::init(const ObPhysicalPlan &plan, ObExecContext &ctx) ObDataTypeCastParams dtc_params = ObBasicSessionInfo::create_dtc_params(ctx.get_my_session()); const ObIArray &normal_locations = plan.get_table_locations(); const ObIArray &das_locations = plan.get_das_table_locations(); + location_router_.set_last_errno(ctx.get_my_session()->get_retry_info().get_last_query_retry_err()); + location_router_.set_retry_cnt(ctx.get_my_session()->get_retry_info().get_retry_cnt()); for (int64_t i = 0; OB_SUCC(ret) && i < das_locations.count(); ++i) { const ObTableLocation &das_location = das_locations.at(i); ObDASTableLoc *table_loc = nullptr; @@ -346,7 +348,9 @@ OB_INLINE int ObDASCtx::build_related_tablet_loc(ObDASTabletLoc &tablet_loc) related_tablet_loc->partition_id_ = rv->part_id_; related_tablet_loc->first_level_part_id_ = rv->first_level_part_id_; tablet_loc.next_ = related_tablet_loc; - if (OB_FAIL(related_table_loc->add_tablet_loc(related_tablet_loc))) { + if (OB_FAIL(location_router_.save_touched_tablet_id(related_tablet_loc->tablet_id_))) { + LOG_WARN("save touched tablet id failed", K(ret), KPC(related_tablet_loc)); + } else if (OB_FAIL(related_table_loc->add_tablet_loc(related_tablet_loc))) { LOG_WARN("add related tablet location failed", K(ret)); } } @@ -371,12 +375,6 @@ OB_INLINE int ObDASCtx::build_related_table_loc(ObDASTableLoc &table_loc) return ret; } -int ObDASCtx::refresh_tablet_loc(ObDASTabletLoc &tablet_loc) -{ - tablet_loc.need_refresh_ = true; - return location_router_.get_tablet_loc(*tablet_loc.loc_meta_, tablet_loc.tablet_id_, tablet_loc); -} - int ObDASCtx::extended_table_loc(const ObDASTableLocMeta &loc_meta, ObDASTableLoc *&table_loc) { int ret = OB_SUCCESS; diff --git a/src/sql/das/ob_das_context.h b/src/sql/das/ob_das_context.h index 8bb5b131b5..b80819788b 100644 --- a/src/sql/das/ob_das_context.h +++ b/src/sql/das/ob_das_context.h @@ -80,7 +80,6 @@ public: int extended_tablet_loc(ObDASTableLoc &table_loc, const ObCandiTabletLoc &candi_tablet_loc, ObDASTabletLoc *&talet_loc); - int refresh_tablet_loc(ObDASTabletLoc &tablet_loc); int extended_table_loc(const ObDASTableLocMeta &loc_meta, ObDASTableLoc *&table_loc); int add_candi_table_loc(const ObDASTableLocMeta &loc_meta, const ObCandiTableLoc &candi_table_loc); int get_das_tablet_mapper(const uint64_t ref_table_id, diff --git a/src/sql/das/ob_das_define.h b/src/sql/das/ob_das_define.h index acc66c3496..94146166e0 100644 --- a/src/sql/das/ob_das_define.h +++ b/src/sql/das/ob_das_define.h @@ -166,7 +166,7 @@ public: TO_STRING_KV(K_(tablet_id), K_(ls_id), K_(server), - K_(need_refresh), + K_(in_retry), K_(partition_id), K_(first_level_part_id)); /** @@ -186,7 +186,7 @@ public: union { uint64_t flags_; struct { - uint64_t need_refresh_ : 1; //need to refresh tablet location cache + uint64_t in_retry_ : 1; //need to refresh tablet location cache uint64_t reserved_ : 63; }; }; diff --git a/src/sql/das/ob_das_location_router.cpp b/src/sql/das/ob_das_location_router.cpp index 22eac720d9..040d9a09b9 100644 --- a/src/sql/das/ob_das_location_router.cpp +++ b/src/sql/das/ob_das_location_router.cpp @@ -20,6 +20,7 @@ #include "share/schema/ob_multi_version_schema_service.h" #include "share/schema/ob_schema_utils.h" #include "sql/das/ob_das_utils.h" +#include "sql/ob_sql_context.h" #include "storage/tx/wrs/ob_black_list.h" namespace oceanbase @@ -739,77 +740,67 @@ int ObDASTabletMapper::get_partition_id_map(ObObjectID partition_id, ObDASLocationRouter::ObDASLocationRouter(ObIAllocator &allocator) - : virtual_server_list_(allocator), + : last_errno_(OB_SUCCESS), + retry_cnt_(0), + all_tablet_list_(allocator), + virtual_server_list_(allocator), allocator_(allocator) { } int ObDASLocationRouter::nonblock_get_readable_replica(const uint64_t tenant_id, const ObTabletID &tablet_id, - ObDASTabletLoc &tablet_loc, - int64_t expire_renew_time) + ObDASTabletLoc &tablet_loc) { int ret = OB_SUCCESS; - bool is_cache_hit = false; - bool is_found = false; ObLSLocation ls_loc; tablet_loc.tablet_id_ = tablet_id; - if (OB_FAIL(GCTX.location_service_->get(tenant_id, - tablet_id, - expire_renew_time, - is_cache_hit, - tablet_loc.ls_id_))) { + if (OB_FAIL(GCTX.location_service_->nonblock_get(tenant_id, tablet_id, tablet_loc.ls_id_))) { LOG_WARN("nonblock get ls id failed", K(ret)); - } else if (OB_FAIL(GCTX.location_service_->get(GCONF.cluster_id, - tenant_id, - tablet_loc.ls_id_, - expire_renew_time, - is_cache_hit, - ls_loc))) { - LOG_WARN("get ls replica location failed", K(ret)); + } else if (OB_FAIL(GCTX.location_service_->nonblock_get(GCONF.cluster_id, + tenant_id, + tablet_loc.ls_id_, + ls_loc))) { + LOG_WARN("get ls replica location failed", K(ret), K(tablet_loc)); } - - if (OB_UNLIKELY(tablet_loc.need_refresh_)){ - ObAddr strong_leader; - ObBLKey bl_key; - bool in_black_list = true; - for (int64_t i = 0; OB_SUCC(ret) && !is_found && i < ls_loc.get_replica_locations().count(); ++i) { - const ObLSReplicaLocation &tmp_replica_loc = ls_loc.get_replica_locations().at(i); - if (tmp_replica_loc.is_strong_leader()) { - strong_leader = tmp_replica_loc.get_server(); - } else if (OB_SUCC(bl_key.init(tmp_replica_loc.get_server(), tenant_id, tablet_loc.ls_id_)) - && OB_SUCC(ObBLService::get_instance().check_in_black_list(bl_key, in_black_list)) - && !in_black_list) { - tablet_loc.server_ = tmp_replica_loc.get_server(); - is_found = true; + ObBLKey bl_key; + bool in_black_list = true; + ObSEArray remote_replicas; + const ObLSReplicaLocation *local_replica = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < ls_loc.get_replica_locations().count(); ++i) { + const ObLSReplicaLocation &tmp_replica_loc = ls_loc.get_replica_locations().at(i); + if (OB_FAIL(bl_key.init(tmp_replica_loc.get_server(), tenant_id, tablet_loc.ls_id_))) { + LOG_WARN("init black list key failed", K(ret)); + } else if (OB_FAIL(ObBLService::get_instance().check_in_black_list(bl_key, in_black_list))) { + LOG_WARN("check in black list failed", K(ret)); + } else if (!in_black_list) { + if (tmp_replica_loc.get_server() == GCTX.self_addr()) { + //prefer choose the local replica + local_replica = &tmp_replica_loc; + } else if (OB_FAIL(remote_replicas.push_back(&tmp_replica_loc))) { + LOG_WARN("store tmp replica failed", K(ret)); } } - if (!is_found && strong_leader.is_valid()) { - tablet_loc.server_ = strong_leader; - is_found = true; - } } - - for (int64_t i = 0; OB_SUCC(ret) && !is_found && i < ls_loc.get_replica_locations().count(); ++i) { - const ObLSReplicaLocation &tmp_replica_loc = ls_loc.get_replica_locations().at(i); - if (tmp_replica_loc.get_server() == GCTX.self_addr()) { - //prefer choose the local replica - tablet_loc.server_ = tmp_replica_loc.get_server(); - is_found = true; + if (OB_SUCC(ret)) { + if (local_replica != nullptr) { + tablet_loc.server_ = local_replica->get_server(); + } else if (remote_replicas.empty()) { + ret = OB_NO_READABLE_REPLICA; + LOG_WARN("there has no readable replica", K(ret), K(tablet_id), K(ls_loc)); + } else { + //no local copy, randomly select a readable replica + int64_t select_idx = rand() % remote_replicas.count(); + const ObLSReplicaLocation *remote_loc = remote_replicas.at(select_idx); + tablet_loc.server_ = remote_loc->get_server(); } } - if (OB_SUCC(ret) && OB_UNLIKELY(!is_found)) { - //no local copy, randomly select a readable replica - int64_t select_idx = rand() % ls_loc.get_replica_locations().count(); - const ObLSReplicaLocation &tmp_replica_loc = ls_loc.get_replica_locations().at(select_idx); - tablet_loc.server_ = tmp_replica_loc.get_server(); - } return ret; } -int ObDASLocationRouter::get(const ObDASTableLocMeta &loc_meta, - const common::ObTabletID &tablet_id, - ObLSLocation &location) +int ObDASLocationRouter::nonblock_get(const ObDASTableLocMeta &loc_meta, + const common::ObTabletID &tablet_id, + ObLSLocation &location) { int ret = OB_SUCCESS; uint64_t tenant_id = MTL_ID(); @@ -827,21 +818,15 @@ int ObDASLocationRouter::get(const ObDASTableLocMeta &loc_meta, } else if (loc_meta.is_external_table_) { ret = get_external_table_ls_location(location); } else { - int64_t expire_renew_time = 2 * 1000000; // 2s - bool is_cache_hit = false; ObLSID ls_id; - if (OB_FAIL(GCTX.location_service_->get(tenant_id, - tablet_id, - expire_renew_time, - is_cache_hit, - ls_id))) { + if (OB_FAIL(all_tablet_list_.push_back(tablet_id))) { + LOG_WARN("store all tablet list failed", K(ret)); + } else if (OB_FAIL(GCTX.location_service_->nonblock_get(tenant_id, tablet_id, ls_id))) { LOG_WARN("nonblock get ls id failed", K(ret)); - } else if (OB_FAIL(GCTX.location_service_->get(GCONF.cluster_id, - tenant_id, - ls_id, - expire_renew_time, - is_cache_hit, - location))) { + } else if (OB_FAIL(GCTX.location_service_->nonblock_get(GCONF.cluster_id, + tenant_id, + ls_id, + location))) { LOG_WARN("fail to get tablet locations", K(ret), K(tenant_id), K(ls_id)); } } @@ -849,6 +834,63 @@ int ObDASLocationRouter::get(const ObDASTableLocMeta &loc_meta, return ret; } +int ObDASLocationRouter::nonblock_get_candi_tablet_locations(const ObDASTableLocMeta &loc_meta, + const ObIArray &tablet_ids, + const ObIArray &partition_ids, + const ObIArray &first_level_part_ids, + ObIArray &candi_tablet_locs) +{ + int ret = OB_SUCCESS; + NG_TRACE(get_location_cache_begin); + candi_tablet_locs.reset(); + int64_t N = tablet_ids.count(); + if (OB_FAIL(candi_tablet_locs.prepare_allocate(N))) { + LOG_WARN("Partitoin location list prepare error", K(ret)); + } else { + ObLSLocation location; + int64_t i = 0; + for (; OB_SUCC(ret) && i < N; ++i) { + location.reset(); + ObCandiTabletLoc &candi_tablet_loc = candi_tablet_locs.at(i); + //after 4.1, all modules that need to access location will use nonblock_get to fetch location + //if the location has expired, DAS location router will refresh all accessed tablets + if (OB_FAIL(nonblock_get(loc_meta, tablet_ids.at(i), location))) { + LOG_WARN("Get partition error, the location cache will be renewed later", + K(ret), "tablet_id", tablet_ids.at(i), K(candi_tablet_loc)); + } else { + ObObjectID first_level_part_id = first_level_part_ids.empty() ? OB_INVALID_ID : first_level_part_ids.at(i); + if (OB_FAIL(candi_tablet_loc.set_part_loc_with_only_readable_replica(partition_ids.at(i), + first_level_part_id, + tablet_ids.at(i), + location))) { + LOG_WARN("fail to set partition location with only readable replica", + K(ret),K(i), K(location), K(candi_tablet_locs), K(tablet_ids), K(partition_ids)); + } + LOG_TRACE("set partition location with only readable replica", + K(ret),K(i), K(location), K(candi_tablet_locs), K(tablet_ids), K(partition_ids)); + } + } // for end + //When the OB_MAPPING_BETWEEN_TABLET_AND_LS_NOT_EXIST error is encountered, + //it means that the tablet mapper have been updated, + //and it is necessary to record all the tablet ids that have been touched by this query, + //and at the end of this query, + //the mapping relationship of these tablet ids need to be refreshed; + if (OB_MAPPING_BETWEEN_TABLET_AND_LS_NOT_EXIST == ret && i < N) { + int save_ret = OB_SUCCESS; + for (; OB_SUCCESS == save_ret && i < N; i++) { + if (OB_SUCCESS != (save_ret = all_tablet_list_.push_back(tablet_ids.at(i)))) { + LOG_WARN("save the remaining tablet id failed", K(ret), K(save_ret)); + } + } + if (save_ret != OB_SUCCESS) { + ret = save_ret; + } + } + } + NG_TRACE(get_location_cache_end); + return ret; +} + int ObDASLocationRouter::get_tablet_loc(const ObDASTableLocMeta &loc_meta, const ObTabletID &tablet_id, ObDASTabletLoc &tablet_loc) @@ -856,39 +898,55 @@ int ObDASLocationRouter::get_tablet_loc(const ObDASTableLocMeta &loc_meta, int ret = OB_SUCCESS; uint64_t tenant_id = MTL_ID(); bool is_vt = is_virtual_table(loc_meta.ref_table_id_); - const int64_t expire_renew_time = tablet_loc.need_refresh_ ? INT64_MAX : 2 * 1000000; if (OB_UNLIKELY(is_vt)) { if (OB_FAIL(get_vt_tablet_loc(loc_meta.ref_table_id_, tablet_id, tablet_loc))) { LOG_WARN("get virtual tablet loc failed", K(ret), K(loc_meta)); } - } else if (OB_LIKELY(loc_meta.select_leader_)) { - ret = get_leader(tenant_id, tablet_id, tablet_loc, expire_renew_time); + } else if (OB_FAIL(all_tablet_list_.push_back(tablet_id))) { + LOG_WARN("store tablet id failed", K(ret)); } else { - ret = nonblock_get_readable_replica(tenant_id, tablet_id, tablet_loc, expire_renew_time); + int64_t retry_cnt = 0; + bool need_retry = false; + do { + need_retry = false; + if (OB_LIKELY(loc_meta.select_leader_) || OB_UNLIKELY(last_errno_ == OB_NOT_MASTER)) { + //if this statement is retried because of OB_NOT_MASTER, we will choose the leader directly + ret = nonblock_get_leader(tenant_id, tablet_id, tablet_loc); + } else { + ret = nonblock_get_readable_replica(tenant_id, tablet_id, tablet_loc); + } + if (is_partition_change_error(ret) && OB_SUCCESS == last_errno_ && retry_cnt <= 0) { + /*During the execution phase, if nonblock location interface is used to obtain the location + * and an exception occurs, retries are necessary. + * However, statement-level retries cannot rollback many execution states, + * so it is necessary to avoid retries in this scenario as much as possible. + * During the execution phase, when encountering a location exception for the first time, + * try to refresh the location once synchronously. + * If it fails, then proceed with statement-level retries.*/ + need_retry = true; + ++retry_cnt; + refresh_location_cache(tablet_id, false, ret); + ret = OB_SUCCESS; + } + } while (OB_SUCCESS == ret && need_retry); } return ret; } -int ObDASLocationRouter::get_leader(const uint64_t tenant_id, - const ObTabletID &tablet_id, - ObDASTabletLoc &tablet_loc, - int64_t expire_renew_time) +int ObDASLocationRouter::nonblock_get_leader(const uint64_t tenant_id, + const ObTabletID &tablet_id, + ObDASTabletLoc &tablet_loc) { int ret = OB_SUCCESS; bool is_cache_hit = false; tablet_loc.tablet_id_ = tablet_id; - if (OB_FAIL(GCTX.location_service_->get(tenant_id, - tablet_id, - expire_renew_time, - is_cache_hit, - tablet_loc.ls_id_))) { + if (OB_FAIL(GCTX.location_service_->nonblock_get(tenant_id, tablet_id, tablet_loc.ls_id_))) { LOG_WARN("nonblock get ls id failed", K(ret)); - } else if (OB_FAIL(GCTX.location_service_->get_leader(GCONF.cluster_id, - tenant_id, - tablet_loc.ls_id_, - false, - tablet_loc.server_))) { - LOG_WARN("nonblock get ls location failed", K(ret)); + } else if (OB_FAIL(GCTX.location_service_->nonblock_get_leader(GCONF.cluster_id, + tenant_id, + tablet_loc.ls_id_, + tablet_loc.server_))) { + LOG_WARN("nonblock get ls location failed", K(ret), K(tablet_loc)); } return ret; } @@ -925,12 +983,10 @@ int ObDASLocationRouter::get_full_ls_replica_loc(const ObObjectID &tenant_id, int ret = OB_SUCCESS; bool is_cache_hit = false; ObLSLocation ls_loc; - if (OB_FAIL(GCTX.location_service_->get(GCONF.cluster_id, - tenant_id, - tablet_loc.ls_id_, - 0, /*not force to renew*/ - is_cache_hit, - ls_loc))) { + if (OB_FAIL(GCTX.location_service_->nonblock_get(GCONF.cluster_id, + tenant_id, + tablet_loc.ls_id_, + ls_loc))) { LOG_WARN("get ls replica location failed", K(ret)); } for (int64_t i = 0; OB_SUCC(ret) && i < ls_loc.get_replica_locations().count(); ++i) { @@ -1049,6 +1105,73 @@ OB_NOINLINE int ObDASLocationRouter::get_vt_ls_location(uint64_t table_id, return ret; } +void ObDASLocationRouter::refresh_location_cache(bool is_nonblock, int err_no) +{ + NG_TRACE_TIMES(1, get_location_cache_begin); + if (is_master_changed_error(err_no) + || is_partition_change_error(err_no) + || is_get_location_timeout_error(err_no) + || is_server_down_error(err_no)) { + FOREACH(tmp_node, all_tablet_list_) { + ObTabletID tablet_id = *tmp_node; + refresh_location_cache(tablet_id, is_nonblock, err_no); + } + all_tablet_list_.clear(); + } + NG_TRACE_TIMES(1, get_location_cache_end); +} + +void ObDASLocationRouter::refresh_location_cache(const ObTabletID &tablet_id, + bool is_nonblock, + int err_no) +{ + int ret = OB_SUCCESS; + //try to refresh all tablet id, and ignore the tmp error + //all_tablet_list_ may contain duplicate tablet_id + if (is_nonblock) { + if (OB_FAIL(GCTX.location_service_->nonblock_renew(MTL_ID(), tablet_id))) { + LOG_WARN("LOCATION: fail to nonblock renew location cache", K(ret), K(tablet_id)); + } else { + LOG_INFO("LOCATION: nonblock renew success", K(tablet_id), K(err_no)); + } + } else { + const int64_t expire_renew_time = INT64_MAX; // means must renew location + bool is_cache_hit = false; + ObLSLocation dummy_loc; + ObLSID ls_id; + int64_t query_timeout_ts = THIS_WORKER.get_timeout_ts(); + int64_t now = ObTimeUtility::current_time(); + if (query_timeout_ts - now > 1 * 1000L * 1000L) { + //the timeout limit for "refresh location" is within 1s + THIS_WORKER.set_timeout_ts(now + 1 * 1000L * 1000L); + } + if (OB_FAIL(GCTX.location_service_->get(MTL_ID(), + tablet_id, + expire_renew_time, + is_cache_hit, + ls_id))) { + LOG_WARN("fail to get ls id", K(ret)); + } else if (OB_FAIL(GCTX.location_service_->get(GCONF.cluster_id, + MTL_ID(), + ls_id, + expire_renew_time, + is_cache_hit, + dummy_loc))) { + LOG_WARN("failed to get location", K(ls_id), K(ret)); + } else { + LOG_INFO("LOCATION: refresh table cache succ", K(tablet_id), K(err_no), K(dummy_loc)); + } + //recover query timeout ts + THIS_WORKER.set_timeout_ts(query_timeout_ts); + } +} + +void ObDASLocationRouter::set_retry_info(const ObQueryRetryInfo* retry_info) +{ + last_errno_ = retry_info->get_last_query_retry_err(); + retry_cnt_ = retry_info->get_retry_cnt(); +} + int ObDASLocationRouter::get_external_table_ls_location(ObLSLocation &location) { int ret = OB_SUCCESS; diff --git a/src/sql/das/ob_das_location_router.h b/src/sql/das/ob_das_location_router.h index de5af092c1..c981a576e7 100644 --- a/src/sql/das/ob_das_location_router.h +++ b/src/sql/das/ob_das_location_router.h @@ -33,6 +33,7 @@ namespace sql { struct ObDASTableLocMeta; struct ObDASTabletLoc; +class ObQueryRetryInfo; class ObDASCtx; typedef common::ObFixedArray AddrArray; typedef common::hash::ObHashMap ObPartitionIdMap; @@ -278,17 +279,22 @@ class ObDASLocationRouter typedef common::ObList VirtualSvrList; public: ObDASLocationRouter(common::ObIAllocator &allocator); - int get(const ObDASTableLocMeta &loc_meta, - const common::ObTabletID &tablet_id, - share::ObLSLocation &location); + int nonblock_get(const ObDASTableLocMeta &loc_meta, + const common::ObTabletID &tablet_id, + share::ObLSLocation &location); + + int nonblock_get_candi_tablet_locations(const ObDASTableLocMeta &loc_meta, + const common::ObIArray &tablet_ids, + const common::ObIArray &partition_ids, + const ObIArray &first_level_part_ids, + common::ObIArray &candi_tablet_locs); int get_tablet_loc(const ObDASTableLocMeta &loc_meta, const common::ObTabletID &tablet_id, ObDASTabletLoc &tablet_loc); - static int get_leader(const uint64_t tenant_id, - const ObTabletID &tablet_id, - ObDASTabletLoc &tablet_loc, - int64_t expire_renew_time); + static int nonblock_get_leader(const uint64_t tenant_id, + const ObTabletID &tablet_id, + ObDASTabletLoc &tablet_loc); static int get_leader(const uint64_t tenant_id, const common::ObTabletID &tablet_id, ObAddr &leader_addr, @@ -296,6 +302,14 @@ public: int get_full_ls_replica_loc(const common::ObObjectID &tenant_id, const ObDASTabletLoc &tablet_loc, share::ObLSReplicaLocation &replica_loc); + void refresh_location_cache(bool is_nonblock, int err_no); + void refresh_location_cache(const common::ObTabletID &tablet_id, bool is_nonblock, int err_no); + int save_touched_tablet_id(const common::ObTabletID &tablet_id) { return all_tablet_list_.push_back(tablet_id); } + void set_last_errno(int err_no) { last_errno_ = err_no; } + void set_retry_cnt(int64_t retry_cnt) { retry_cnt_ = retry_cnt; } + void inc_retry_cnt() { ++retry_cnt_; } + void set_retry_info(const ObQueryRetryInfo* retry_info); + int64_t get_retry_cnt() const { return retry_cnt_; } int get_external_table_ls_location(share::ObLSLocation &location); private: int get_vt_svr_pair(uint64_t vt_id, const VirtualSvrPair *&vt_svr_pair); @@ -307,9 +321,11 @@ private: share::ObLSLocation &location); int nonblock_get_readable_replica(const uint64_t tenant_id, const common::ObTabletID &tablet_id, - ObDASTabletLoc &tablet_loc, - int64_t expire_renew_time); + ObDASTabletLoc &tablet_loc); private: + int last_errno_; + int64_t retry_cnt_; + ObList all_tablet_list_; VirtualSvrList virtual_server_list_; common::ObIAllocator &allocator_; private: diff --git a/src/sql/das/ob_das_utils.cpp b/src/sql/das/ob_das_utils.cpp index 2530b90320..f612061784 100644 --- a/src/sql/das/ob_das_utils.cpp +++ b/src/sql/das/ob_das_utils.cpp @@ -385,5 +385,25 @@ int ObDASUtils::generate_spatial_index_rows( return ret; } +int ObDASUtils::wait_das_retry(int64_t retry_cnt) +{ + int ret = OB_SUCCESS; + uint32_t timeout_factor = static_cast((retry_cnt > 100) ? 100 : retry_cnt); + int64_t sleep_us = 1000L * timeout_factor > THIS_WORKER.get_timeout_remain() + ? THIS_WORKER.get_timeout_remain() + : 1000L * timeout_factor; + if (sleep_us > 0) { + LOG_INFO("will sleep", K(sleep_us), K(THIS_WORKER.get_timeout_remain())); + THIS_WORKER.sched_wait(); + ob_usleep(static_cast(sleep_us)); + THIS_WORKER.sched_run(); + if (THIS_WORKER.is_timeout()) { + ret = OB_TIMEOUT; + LOG_WARN("this worker is timeout after retry sleep. no more retry", K(ret)); + } + } + return ret; +} + } // namespace sql } // namespace oceanbase diff --git a/src/sql/das/ob_das_utils.h b/src/sql/das/ob_das_utils.h index e45a05067f..a7ac26808c 100644 --- a/src/sql/das/ob_das_utils.h +++ b/src/sql/das/ob_das_utils.h @@ -63,6 +63,7 @@ public: const IntFixedArray &row_projector, const ObDASWriteBuffer::DmlRow &dml_row, ObSpatIndexRow &spat_rows); + static int wait_das_retry(int64_t retry_cnt); }; } // namespace sql } // namespace oceanbase diff --git a/src/sql/das/ob_data_access_service.cpp b/src/sql/das/ob_data_access_service.cpp index 0ccdfee6a3..10f6ab597a 100644 --- a/src/sql/das/ob_data_access_service.cpp +++ b/src/sql/das/ob_data_access_service.cpp @@ -209,17 +209,27 @@ int ObDataAccessService::clear_task_exec_env(ObDASRef &das_ref, ObIDASTaskOp &ta return ret; } -int ObDataAccessService::refresh_partition_location(ObDASRef &das_ref, ObIDASTaskOp &task_op) +int ObDataAccessService::refresh_partition_location(ObDASRef &das_ref, + ObIDASTaskOp &task_op, + int err_no) { int ret = OB_SUCCESS; ObExecContext &exec_ctx = das_ref.get_exec_ctx(); ObDASBaseRtDef *das_rtdef = task_op.get_rtdef(); ObDASTableLoc *table_loc = das_rtdef->table_loc_; ObDASTabletLoc *tablet_loc = const_cast(task_op.get_tablet_loc()); - if (OB_SUCC(DAS_CTX(exec_ctx).refresh_tablet_loc(*tablet_loc))) { + int64_t retry_cnt = DAS_CTX(exec_ctx).get_location_router().get_retry_cnt(); + DAS_CTX(exec_ctx).get_location_router().refresh_location_cache(tablet_loc->tablet_id_, true, err_no); + if (OB_FAIL(ObDASUtils::wait_das_retry(retry_cnt))) { + LOG_WARN("wait das retry failed", K(ret)); + } else if (OB_FAIL(DAS_CTX(exec_ctx).get_location_router().get_tablet_loc(*tablet_loc->loc_meta_, + tablet_loc->tablet_id_, + *tablet_loc))) { + LOG_WARN("get tablet location failed", K(ret), KPC(tablet_loc)); + } else { task_op.set_ls_id(tablet_loc->ls_id_); } - LOG_INFO("LOCATION: refresh tablet cache", K(ret), KPC(tablet_loc), KPC(tablet_loc)); + LOG_INFO("LOCATION: refresh tablet cache", K(ret), KPC(table_loc), KPC(tablet_loc)); return ret; } @@ -235,12 +245,14 @@ int ObDataAccessService::retry_das_task(ObDASRef &das_ref, ObIDASTaskOp &task_op int tmp_ret = ret; if (!can_fast_fail(task_op)) { task_op.in_part_retry_ = true; - das_ref.get_exec_ctx().get_my_session()->set_session_in_retry(true, ret); + ObDASLocationRouter &location_router = DAS_CTX(das_ref.get_exec_ctx()).get_location_router(); + location_router.set_last_errno(ret); + location_router.inc_retry_cnt(); if (OB_FAIL(clear_task_exec_env(das_ref, task_op))) { LOG_WARN("clear task execution environment", K(ret)); } else if (OB_FAIL(das_ref.get_exec_ctx().check_status())) { LOG_WARN("query is timeout, terminate retry", K(ret)); - } else if (OB_FAIL(refresh_partition_location(das_ref, task_op))) { + } else if (OB_FAIL(refresh_partition_location(das_ref, task_op, task_op.errcode_))) { LOG_WARN("refresh partition location failed", K(ret), "ori_err_code", tmp_ret, K(lbt())); } else if (FALSE_IT(das_task_wrapper.reuse())) { } else if (FALSE_IT(task_op.set_task_status(ObDasTaskStatus::UNSTART))) { diff --git a/src/sql/das/ob_data_access_service.h b/src/sql/das/ob_data_access_service.h index 75cba9268d..99c5583e1b 100644 --- a/src/sql/das/ob_data_access_service.h +++ b/src/sql/das/ob_data_access_service.h @@ -58,7 +58,7 @@ private: int execute_dist_das_task(ObDASRef &das_ref, ObDasAggregatedTasks &task_ops, bool async = true); int clear_task_exec_env(ObDASRef &das_ref, ObIDASTaskOp &task_op); - int refresh_partition_location(ObDASRef &das_ref, ObIDASTaskOp &task_op); + int refresh_partition_location(ObDASRef &das_ref, ObIDASTaskOp &task_op, int err_no); int do_local_das_task(ObDASRef &das_ref, ObDASTaskArg &task_arg); int do_async_remote_das_task(ObDASRef &das_ref, ObDasAggregatedTasks &aggregated_tasks, ObDASTaskArg &task_arg); int do_sync_remote_das_task(ObDASRef &das_ref, ObDasAggregatedTasks &aggregated_tasks, ObDASTaskArg &task_arg); diff --git a/src/sql/engine/cmd/ob_table_executor.cpp b/src/sql/engine/cmd/ob_table_executor.cpp index 1f24d4daa7..a9dba4dccd 100644 --- a/src/sql/engine/cmd/ob_table_executor.cpp +++ b/src/sql/engine/cmd/ob_table_executor.cpp @@ -472,6 +472,24 @@ int ObCreateTableExecutor::execute(ObExecContext &ctx, ObCreateTableStmt &stmt) LOG_WARN("session is null", K(ret)); } else if (OB_FAIL(stmt.get_first_stmt(first_stmt))) { LOG_WARN("get first statement failed", K(ret)); + } else if (table_schema.is_duplicate_table()) { + bool is_compatible = false; + uint64_t tenant_id = table_schema.get_tenant_id(); + if (OB_FAIL(ObShareUtil::check_compat_version_for_readonly_replica(tenant_id, is_compatible))) { + LOG_WARN("fail to check data version for duplicate table", KR(ret), K(tenant_id)); + } else if (!is_compatible) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("duplicate table is not supported below 4.2", KR(ret), K(table_schema), K(is_compatible)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create duplicate table below 4.2"); + } else if (is_sys_tenant(tenant_id) || is_meta_tenant(tenant_id)) { + // TODO@jingyu_cr: make sure whether sys log stream have to be duplicated + ret = OB_NOT_SUPPORTED; + LOG_USER_ERROR(OB_NOT_SUPPORTED, "create duplicate table under sys or meta tenant"); + LOG_WARN("create dup table not supported", KR(ret), K(table_schema)); + } + } + + if (OB_FAIL(ret)) { } else { create_table_arg.is_inner_ = my_session->is_inner(); create_table_arg.consumer_group_id_ = THIS_WORKER.get_group_id(); diff --git a/src/sql/engine/ob_des_exec_context.cpp b/src/sql/engine/ob_des_exec_context.cpp index 1870473e57..f7020cd32f 100644 --- a/src/sql/engine/ob_des_exec_context.cpp +++ b/src/sql/engine/ob_des_exec_context.cpp @@ -194,6 +194,8 @@ DEFINE_DESERIALIZE(ObDesExecContext) if (OB_SUCC(ret)) { if (OB_FAIL(init_expr_op(phy_plan_ctx_->get_expr_op_size()))) { LOG_WARN("init exec context expr op failed", K(ret)); + } else { + das_ctx_.get_location_router().set_retry_info(&my_session_->get_retry_info()); } } use_temp_expr_ctx_cache_ = true; diff --git a/src/sql/engine/ob_physical_plan.cpp b/src/sql/engine/ob_physical_plan.cpp index 3eeb021f19..8c841d449b 100644 --- a/src/sql/engine/ob_physical_plan.cpp +++ b/src/sql/engine/ob_physical_plan.cpp @@ -804,7 +804,8 @@ int ObPhysicalPlan::set_table_locations(const ObTablePartitionInfoArray &infos, int ObPhysicalPlan::set_location_constraints(const ObIArray &base_constraints, const ObIArray &strict_constraints, - const ObIArray &non_strict_constraints) + const ObIArray &non_strict_constraints, + const ObIArray &dup_table_replica_cons) { // deep copy location constraints int ret = OB_SUCCESS; @@ -885,18 +886,78 @@ int ObPhysicalPlan::set_location_constraints(const ObIArray } } + if (OB_SUCC(ret) && dup_table_replica_cons.count() > 0) { + dup_table_replica_cons_.reset(); + dup_table_replica_cons_.set_allocator(&allocator_); + if (OB_FAIL(dup_table_replica_cons_.init(dup_table_replica_cons.count()))) { + LOG_WARN("failed to init duplicate table constraints", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < dup_table_replica_cons.count(); ++i) { + if(OB_FAIL(dup_table_replica_cons_.push_back(dup_table_replica_cons.at(i)))) { + LOG_WARN("failed to assign element", K(ret), K(dup_table_replica_cons.at(i))); + } else { /*do nothing*/ } + } + } + } + if (OB_FAIL(ret)) { base_constraints_.reset(); strict_constrinats_.reset(); non_strict_constrinats_.reset(); + dup_table_replica_cons_.reset(); } else { - LOG_DEBUG("deep copied location constraints", K(base_constraints_), - K(strict_constrinats_), K(non_strict_constrinats_)); + LOG_TRACE("deep copied location constraints", K(base_constraints_), K(strict_constrinats_), + K(non_strict_constrinats_), K(dup_table_replica_cons_)); } return ret; } +bool ObPhysicalPlan::has_same_location_constraints(const ObPhysicalPlan &r) const +{ + bool is_same = true; + const ObIArray& l_base_cons = get_base_constraints(); + const ObIArray& r_base_cons = r.get_base_constraints(); + const ObIArray& l_non_strict_cons = get_non_strict_constraints(); + const ObIArray& r_non_strict_cons = r.get_non_strict_constraints(); + const ObIArray& l_strict_cons = get_strict_constraints(); + const ObIArray& r_strict_cons = r.get_strict_constraints(); + const ObIArray& l_dup_rep_cons = get_dup_table_replica_constraints(); + const ObIArray& r_dup_rep_cons = r.get_dup_table_replica_constraints(); + if (l_base_cons.count() != r_base_cons.count() || + l_strict_cons.count() != r_strict_cons.count() || + l_non_strict_cons.count() != r_non_strict_cons.count()|| + l_dup_rep_cons.count() != r_dup_rep_cons.count()) { + is_same = false; + } else { + for (int64_t i = 0; is_same && i < l_base_cons.count(); i++) { + is_same = is_same && (l_base_cons.at(i) == r_base_cons.at(i)); + } + for (int64_t i = 0; is_same && i < l_strict_cons.count(); i++) { + if (l_strict_cons.at(i).count() != r_strict_cons.at(i).count()) { + is_same = false; + } else { + for (int64_t j = 0; is_same && j < l_strict_cons.at(i).count(); j++) { + is_same = (l_strict_cons.at(i).at(j) == (r_strict_cons.at(i)).at(j)); + } + } + } + for (int64_t i = 0; is_same && i < l_non_strict_cons.count(); i++) { + if (l_non_strict_cons.at(i).count() != r_non_strict_cons.at(i).count()) { + is_same = false; + } else { + for (int64_t j = 0; is_same && j < l_non_strict_cons.at(i).count(); j++) { + is_same = (l_non_strict_cons.at(i).at(j) == r_non_strict_cons.at(i).at(j)); + } + } + } + for(int64_t i = 0; is_same && i < l_dup_rep_cons.count(); i++) { + is_same = is_same && (l_dup_rep_cons.at(i) == r_dup_rep_cons.at(i)); + } + } + return is_same; +} + DEF_TO_STRING(FlashBackQueryItem) { int64_t pos = 0; diff --git a/src/sql/engine/ob_physical_plan.h b/src/sql/engine/ob_physical_plan.h index 7118c891ff..38aec2a1e7 100644 --- a/src/sql/engine/ob_physical_plan.h +++ b/src/sql/engine/ob_physical_plan.h @@ -58,6 +58,7 @@ class ObEvolutionPlan; typedef common::ObFixedArray, common::ObIAllocator> PhyRowParamMap; typedef common::ObFixedArray TableLocationFixedArray; typedef common::ObFixedArray PlanPwjConstraintArray; +typedef common::ObFixedArray DupTabReplicaArray; typedef common::ObFixedArray EncryptMetaCacheArray; //2.2.5版本之后已废弃 @@ -387,10 +388,17 @@ public: const ObIArray& get_strict_constraints() const { return strict_constrinats_; } ObIArray& get_non_strict_constraints() { return non_strict_constrinats_; } const ObIArray& get_non_strict_constraints() const { return non_strict_constrinats_; } - + ObIArray &get_dup_table_replica_constraints() { + return dup_table_replica_cons_; + } + const ObIArray &get_dup_table_replica_constraints() const { + return dup_table_replica_cons_; + } int set_location_constraints(const ObIArray &base_constraints, const ObIArray &strict_constraints, - const ObIArray &non_strict_constraints); + const ObIArray &non_strict_constraints, + const ObIArray &dup_table_replica_cons); + bool has_same_location_constraints(const ObPhysicalPlan &r) const; ObIArray& get_encrypt_meta_array() { return encrypt_meta_array_; } @@ -583,6 +591,9 @@ private: // 每个分组是一个array,保存了对应基表在base_table_constraints_中的偏移 // 如果t1, t2需要满足非严格约束,则对于分区裁剪后t1的每一个分区,都要求有一个t2的分区与其在相同的物理机器上 PlanPwjConstraintArray non_strict_constrinats_; + // constraint for duplicate table to choose replica + // dist plan will use this as (dup_tab_pos, advisor_tab_pos) pos is position in base constraint + DupTabReplicaArray dup_table_replica_cons_; public: ObExprFrameInfo expr_frame_info_; diff --git a/src/sql/engine/px/ob_dfo_scheduler.cpp b/src/sql/engine/px/ob_dfo_scheduler.cpp index 56cc788386..bcdde6fddc 100644 --- a/src/sql/engine/px/ob_dfo_scheduler.cpp +++ b/src/sql/engine/px/ob_dfo_scheduler.cpp @@ -1243,14 +1243,7 @@ int ObParallelDfoScheduler::deal_with_init_sqc_error(ObExecContext &exec_ctx, if (OB_ISNULL(session = GET_MY_SESSION(exec_ctx))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("session is NULL", K(ret)); - } else { - ObQueryRetryInfo &retry_info = session->get_retry_info_for_update(); - int add_ret = retry_info.add_invalid_server_distinctly(invalid_server, true); - if (OB_UNLIKELY(OB_SUCCESS != add_ret)) { - LOG_WARN("fail to add dist addr to invalid servers distinctly", - K(rc), "sqc", sqc, K(add_ret)); - } - } + } else { } } return ret; } diff --git a/src/sql/engine/px/ob_px_rpc_processor.cpp b/src/sql/engine/px/ob_px_rpc_processor.cpp index 738e415ec8..5b2433d77c 100644 --- a/src/sql/engine/px/ob_px_rpc_processor.cpp +++ b/src/sql/engine/px/ob_px_rpc_processor.cpp @@ -553,13 +553,6 @@ void ObDealWithRpcTimeoutCall::deal_with_rpc_timeout_err() LOG_DEBUG("rpc return OB_TIMEOUT, but it is actually not timeout, " "change error code to OB_CONNECT_ERROR", K(ret_), K(timeout_ts_), K(cur_timestamp)); - if (NULL != retry_info_) { - int a_ret = OB_SUCCESS; - if (OB_UNLIKELY(OB_SUCCESS != (a_ret = retry_info_->add_invalid_server_distinctly( - addr_)))) { - LOG_WARN_RET(a_ret, "fail to add invalid server distinctly", K_(trace_id), K(a_ret), K_(addr)); - } - } ret_ = OB_RPC_CONNECT_ERROR; } else { LOG_DEBUG("rpc return OB_TIMEOUT, and it is actually timeout, " diff --git a/src/sql/engine/px/ob_px_task_process.cpp b/src/sql/engine/px/ob_px_task_process.cpp index 87a7a503cf..70f60b9cef 100644 --- a/src/sql/engine/px/ob_px_task_process.cpp +++ b/src/sql/engine/px/ob_px_task_process.cpp @@ -451,7 +451,9 @@ int ObPxTaskProcess::do_process() } } } - + if (OB_NOT_NULL(arg_.exec_ctx_)) { + DAS_CTX(*arg_.exec_ctx_).get_location_router().refresh_location_cache(true, ret); + } // for transaction (void)record_tx_desc(); // for exec feedback info diff --git a/src/sql/executor/ob_direct_receive_op.cpp b/src/sql/executor/ob_direct_receive_op.cpp index 2af02861dd..a6d349bcf9 100644 --- a/src/sql/executor/ob_direct_receive_op.cpp +++ b/src/sql/executor/ob_direct_receive_op.cpp @@ -203,17 +203,6 @@ int ObDirectReceiveOp::setup_next_scanner() LOG_WARN("while fetching first scanner, the remote rcode is not OB_SUCCESS", K(ret), K(err_msg), "dst_addr", to_cstring(resp_handler->get_dst_addr())); - if (is_data_not_readable_err(ret)) { - // 读到落后太多的备机或者正在回放日志的副本了, - // 将远端的这个observer加进retry info的invalid servers中 - ObQueryRetryInfo &retry_info = my_session->get_retry_info_for_update(); - if (OB_UNLIKELY(OB_SUCCESS != ( - add_ret = retry_info.add_invalid_server_distinctly( - resp_handler->get_dst_addr(), true)))) { - LOG_WARN("fail to add remote addr to invalid servers distinctly", K(ret), K(add_ret), - K(resp_handler->get_dst_addr()), K(retry_info)); - } - } } else { scanner_ = scanner; first_request_received_ = true; @@ -257,17 +246,6 @@ int ObDirectReceiveOp::setup_next_scanner() LOG_WARN("while getting more scanner, the remote rcode is not OB_SUCCESS", K(ret), K(err_msg), "dst_addr", to_cstring(resp_handler->get_dst_addr())); - if (is_data_not_readable_err(ret)) { - // 读到落后太多的备机或者正在回放日志的副本了, - // 将远端的这个observer加进retry info的invalid servers中 - ObQueryRetryInfo &retry_info = my_session->get_retry_info_for_update(); - if (OB_UNLIKELY(OB_SUCCESS != ( - add_ret = retry_info.add_invalid_server_distinctly( - resp_handler->get_dst_addr(), true)))) { - LOG_WARN("fail to add remote addr to invalid servers distinctly", K(ret), K(add_ret), - K(resp_handler->get_dst_addr()), K(retry_info)); - } - } } else { scanner_ = result_scanner; found_rows_ += scanner_->get_found_rows(); diff --git a/src/sql/executor/ob_executor_rpc_impl.cpp b/src/sql/executor/ob_executor_rpc_impl.cpp index ce901333f0..f7942035b8 100644 --- a/src/sql/executor/ob_executor_rpc_impl.cpp +++ b/src/sql/executor/ob_executor_rpc_impl.cpp @@ -205,16 +205,6 @@ void ObExecutorRpcImpl::deal_with_rpc_timeout_err(ObExecutorRpcCtx &rpc_ctx, LOG_DEBUG("rpc return OB_TIMEOUT, but it is actually not timeout, " "change error code to OB_CONNECT_ERROR", K(err), K(timeout_timestamp), K(cur_timestamp)); - ObQueryRetryInfo *retry_info = rpc_ctx.get_retry_info_for_update(); - if (NULL != retry_info) { - int a_ret = OB_SUCCESS; - if (OB_UNLIKELY(OB_SUCCESS != (a_ret = retry_info->add_invalid_server_distinctly( - dist_server)))) { - LOG_WARN_RET(a_ret, "fail to add invalid server distinctly", K(a_ret), K(dist_server)); - } else { - //LOG_INFO("YZFDEBUG add invalid server distinctly", K(a_ret), K(dist_server), "p", &retry_info->get_invalid_servers()); - } - } err = OB_RPC_CONNECT_ERROR; } else { LOG_DEBUG("rpc return OB_TIMEOUT, and it is actually timeout, " diff --git a/src/sql/executor/ob_remote_executor_processor.cpp b/src/sql/executor/ob_remote_executor_processor.cpp index ad442cb710..3ffde14e87 100644 --- a/src/sql/executor/ob_remote_executor_processor.cpp +++ b/src/sql/executor/ob_remote_executor_processor.cpp @@ -717,16 +717,7 @@ int ObRemoteBaseExecuteP::execute_with_sql(ObRemoteTask &task) NULL, session->get_effective_tenant_id())) { ret = OB_ERR_REMOTE_SCHEMA_NOT_FULL; } - if (is_master_changed_error(ret) - || is_partition_change_error(ret) - || is_get_location_timeout_error(ret)) { - ObTaskExecutorCtx &task_exec_ctx = exec_ctx_.get_task_exec_ctx(); - LOG_DEBUG("remote execute failed, begin to refresh location cache nonblocking", K(ret)); - int refresh_err = ObTaskExecutorCtxUtil::refresh_location_cache(task_exec_ctx, true); - if (OB_SUCCESS != refresh_err) { - LOG_WARN("refresh location cache failed", K(ret), K(refresh_err)); - } - } + DAS_CTX(exec_ctx_).get_location_router().refresh_location_cache(true, ret); } //监控项统计结束 exec_end_timestamp_ = ObTimeUtility::current_time(); diff --git a/src/sql/executor/ob_remote_scheduler.cpp b/src/sql/executor/ob_remote_scheduler.cpp index 2db5a3cf98..d585cdc292 100644 --- a/src/sql/executor/ob_remote_scheduler.cpp +++ b/src/sql/executor/ob_remote_scheduler.cpp @@ -245,16 +245,7 @@ int ObRemoteScheduler::execute_with_sql(ObExecContext &ctx, ObPhysicalPlan *phy_ *handler, has_sent_task, has_transfer_err))) { - int add_ret = OB_SUCCESS; - if (is_data_not_readable_err(ret) || is_server_down_error(ret)) { - // 读到落后太多的备机或者正在回放日志的副本了, - // 将远端的这个observer加进retry info的invalid servers中 - if (OB_UNLIKELY(OB_SUCCESS != (add_ret = - retry_info->add_invalid_server_distinctly(task.get_runner_svr(), true)))) { - LOG_WARN("fail to add remote addr to invalid servers distinctly", - K(ret), K(add_ret), K(task), K(*retry_info)); - } - } + LOG_WARN("task execute failed", K(ret)); } // handle tx relative info if plan involved in transaction diff --git a/src/sql/executor/ob_remote_task_executor.cpp b/src/sql/executor/ob_remote_task_executor.cpp index 3859fa73d7..4b4526d376 100644 --- a/src/sql/executor/ob_remote_task_executor.cpp +++ b/src/sql/executor/ob_remote_task_executor.cpp @@ -77,17 +77,6 @@ int ObRemoteTaskExecutor::execute(ObExecContext &query_ctx, ObJob *job, ObTaskIn has_transfer_err))) { bool skip_failed_tasks = false; int check_ret = OB_SUCCESS; - int add_ret = OB_SUCCESS; - if (is_data_not_readable_err(ret) || is_server_down_error(ret)) { - // 读到落后太多的备机或者正在回放日志的副本了, - // 将远端的这个observer加进retry info的invalid servers中 - if (OB_UNLIKELY(OB_SUCCESS != ( - add_ret = retry_info->add_invalid_server_distinctly( - task_info->get_task_location().get_server(), true)))) { - LOG_WARN("fail to add remote addr to invalid servers distinctly", K(ret), K(add_ret), - K(task_info->get_task_location().get_server()), K(*retry_info)); - } - } if (OB_SUCCESS != (check_ret = should_skip_failed_tasks(*task_info, skip_failed_tasks))) { // check fail, set ret to check_ret LOG_WARN("fail to check if it should skip failed tasks", K(ret), K(check_ret), K(*job)); diff --git a/src/sql/executor/ob_task_executor_ctx.cpp b/src/sql/executor/ob_task_executor_ctx.cpp index 2ca0b7755f..113f66ff85 100644 --- a/src/sql/executor/ob_task_executor_ctx.cpp +++ b/src/sql/executor/ob_task_executor_ctx.cpp @@ -59,9 +59,6 @@ ObTaskExecutorCtx::ObTaskExecutorCtx(ObExecContext &exec_context) : task_resp_handler_(NULL), virtual_part_servers_(exec_context.get_allocator()), exec_ctx_(&exec_context), - partition_infos_(exec_context.get_allocator()), - need_renew_location_cache_(false), - need_renew_tablet_keys_(exec_context.get_allocator()), expected_worker_cnt_(0), minimal_worker_cnt_(0), admited_worker_cnt_(0), @@ -143,34 +140,6 @@ int ObTaskExecutorCtx::append_table_location(const ObCandiTableLoc &phy_location return ret; } -int ObTaskExecutorCtx::add_need_renew_tablet_keys_distinctly(const ObTabletID &tablet_id) -{ - int ret = OB_SUCCESS; - bool has_found = false; - if (OB_UNLIKELY(!tablet_id.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("partition key is invalid", K(ret), K(tablet_id)); - } - FOREACH_X(it, need_renew_tablet_keys_, OB_SUCC(ret) && !has_found) { - if (tablet_id == *it) { - has_found = true; - } - } - if (OB_SUCC(ret) && !has_found) { - if (OB_FAIL(need_renew_tablet_keys_.push_back(tablet_id))) { - LOG_WARN("fail to push back partition key", K(ret), K(tablet_id)); - } else { - LOG_DEBUG("add dated partition location key", K(tablet_id)); - } - } - return ret; -} - -const ObTablePartitionInfoArray &ObTaskExecutorCtx::get_partition_infos() const -{ - return partition_infos_; -} - // // // Utility @@ -289,7 +258,7 @@ int ObTaskExecutorCtxUtil::nonblock_renew( if (NULL == GCTX.location_service_) { ret = OB_NOT_INIT; LOG_WARN("loc_cache is NULL", K(ret)); - } else if (OB_FAIL(GCTX.location_service_->nonblock_renew(GET_MY_SESSION(*exec_ctx)->get_effective_tenant_id(), + } else if (OB_FAIL(GCTX.location_service_->nonblock_renew(GET_MY_SESSION(*exec_ctx)->get_effective_tenant_id(), tablet_id))) { LOG_WARN("nonblock_renew failed", K(tablet_id), K(ret)); } @@ -307,71 +276,13 @@ int ObTaskExecutorCtx::nonblock_renew_with_limiter( if (NULL == GCTX.location_service_) { ret = OB_NOT_INIT; LOG_WARN("tmp_loc_cache is NULL", K(ret)); - } else if (OB_FAIL(GCTX.location_service_->nonblock_renew(GET_MY_SESSION(*exec_ctx_)->get_effective_tenant_id(), + } else if (OB_FAIL(GCTX.location_service_->nonblock_renew(GET_MY_SESSION(*exec_ctx_)->get_effective_tenant_id(), tablet_id))) { LOG_WARN("nonblock_renew failed", K(tablet_id), K(ret)); } return ret; } -//TODO: @wangzhennan.wzn Provide new interface to refresh location according to different error codes. -// Refresh_location_cache should refresh location of ls/tablet/vtable. - -// obmp_query中重试整个SQL之前,可能需要调用本接口来刷新Location,以避免总是发给了错误的服务器 -int ObTaskExecutorCtxUtil::refresh_location_cache(ObTaskExecutorCtx &task_exec_ctx, - bool is_nonblock) -{ - int ret = OB_SUCCESS; - NG_TRACE_TIMES(1, get_location_cache_begin); - bool is_cache_hit = false; - ObLSLocation dummy_loc; - DASTableLocList &table_locs = DAS_CTX(*task_exec_ctx.get_exec_context()).get_table_loc_list(); - FOREACH_X(tmp_node, table_locs, OB_SUCC(ret)) { - ObDASTableLoc *table_loc = *tmp_node; - for (DASTabletLocListIter tablet_node = table_loc->tablet_locs_begin(); - OB_SUCC(ret) && tablet_node != table_loc->tablet_locs_end(); ++tablet_node) { - const ObDASTabletLoc *tablet_loc = *tablet_node; - if (is_nonblock) { - const int64_t expire_renew_time = 0; //表示在刷location cache之前不清空现有的location cache - if (OB_FAIL(nonblock_renew(task_exec_ctx.get_exec_context(), - tablet_loc->tablet_id_, - expire_renew_time))) { - LOG_WARN("LOCATION: fail to nonblock renew location cache", K(ret), K(tablet_loc->tablet_id_), K(expire_renew_time)); - } else { -#if !defined(NDEBUG) - LOG_INFO("LOCATION: nonblock renew success", K(tablet_loc->tablet_id_), K(expire_renew_time)); -#endif - } - } else { - const int64_t expire_renew_time = INT64_MAX; // means must renew location - bool is_cache_hit = false; - dummy_loc.reset(); - ObLSID ls_id = tablet_loc->ls_id_; - if (OB_FAIL(GCTX.location_service_->get(GET_MY_SESSION(*task_exec_ctx.get_exec_context())->get_effective_tenant_id(), - tablet_loc->tablet_id_, - expire_renew_time, - is_cache_hit, - ls_id))) { - LOG_WARN("fail to get ls id", K(ret)); - } else if (OB_FAIL(GCTX.location_service_->get(GCONF.cluster_id, - GET_MY_SESSION(*task_exec_ctx.get_exec_context())->get_effective_tenant_id(), - tablet_loc->ls_id_, - 0, /*not force to renew*/ - is_cache_hit, - dummy_loc))) { - LOG_WARN("failed to get location", K(tablet_loc->ls_id_), K(ret)); - } else { -#if !defined(NDEBUG) - LOG_INFO("LOCATION: refresh table cache succ", K(tablet_loc->tablet_id_), K(dummy_loc)); -#endif - } - } - } - } - NG_TRACE_TIMES(1, get_location_cache_end); - return ret; -} - void ObTaskExecutorCtx::set_self_addr(const common::ObAddr &self_addr) { UNUSED(self_addr); diff --git a/src/sql/executor/ob_task_executor_ctx.h b/src/sql/executor/ob_task_executor_ctx.h index 2801a8804d..78474ce1bd 100644 --- a/src/sql/executor/ob_task_executor_ctx.h +++ b/src/sql/executor/ob_task_executor_ctx.h @@ -73,16 +73,6 @@ public: int get_addr_by_virtual_partition_id(int64_t partition_id, common::ObAddr &addr); int set_table_locations(const ObTablePartitionInfoArray &table_partition_infos); int append_table_location(const ObCandiTableLoc &phy_location_info); - inline void set_need_renew_location_cache(bool need_renew_location_cache) - { - need_renew_location_cache_ = need_renew_location_cache; - } - inline bool is_need_renew_location_cache() const { return need_renew_location_cache_; } - inline const common::ObList &get_need_renew_tablet_keys() const - { - return need_renew_tablet_keys_; - } - int add_need_renew_tablet_keys_distinctly(const ObTabletID &tablet_id); const ObTablePartitionInfoArray &get_partition_infos() const; inline RemoteExecuteStreamHandle* get_stream_handler() @@ -202,12 +192,6 @@ private: CalcVirtualPartitionIdParams calc_params_; // ObExecContext *exec_ctx_; - // - common::ObFixedArray partition_infos_; - // 每次执行完之后是否需要刷新location cache - bool need_renew_location_cache_; - // 需要刷新location cache的partition key - common::ObList need_renew_tablet_keys_; // PX 记录执行预期整个 Query 需要的线程数,以及实际分配的线程数 int64_t expected_worker_cnt_; // query expected worker count computed by optimizer int64_t minimal_worker_cnt_; // minimal worker count to support execute this query @@ -255,7 +239,6 @@ public: static int get_stream_handler(ObExecContext &ctx, RemoteExecuteStreamHandle *&handler); static int get_task_executor_rpc(ObExecContext &ctx, ObExecutorRpcImpl *&rpc); - static int refresh_location_cache(ObTaskExecutorCtx &task_exec_ctx, bool is_nonblock); template static int merge_task_result_meta(DEST_TYPE &dest, const SRC_TYPE &task_meta); }; /* class ObTaskExecutorCtxUtil */ diff --git a/src/sql/ob_result_set.cpp b/src/sql/ob_result_set.cpp index 2339ce3d6a..84745cac10 100644 --- a/src/sql/ob_result_set.cpp +++ b/src/sql/ob_result_set.cpp @@ -68,7 +68,7 @@ ObResultSet::~ObResultSet() && OB_UNLIKELY(physical_plan->is_limited_concurrent_num())) { physical_plan->dec_concurrent_num(); } - // when ObExecContext is destroyed, it also depends on the physical plan, so need to ensure + // when ObExecContext is destroyed, it also depends on the physical plan, so need to ensure // that inner_exec_ctx_ is destroyed before cache_obj_guard_ if (NULL != inner_exec_ctx_) { inner_exec_ctx_->~ObExecContext(); @@ -226,7 +226,7 @@ int ObResultSet::open_result() SQL_LOG(DEBUG, "get affected row", K(get_stmt_type()), K(get_exec_context().get_physical_plan_ctx()->get_affected_rows())); set_affected_rows(get_exec_context().get_physical_plan_ctx()->get_affected_rows()); - } + } if (OB_SUCC(ret) && get_stmt_type() == stmt::T_ANONYMOUS_BLOCK) { // Compatible with oracle anonymous block affect rows setting set_affected_rows(1); @@ -1094,45 +1094,10 @@ int ObResultSet::init_cmd_exec_context(ObExecContext &exec_ctx) return ret; } -void ObResultSet::refresh_location_cache(ObTaskExecutorCtx &task_exec_ctx, bool is_nonblock, int err) -{ - if (OB_NOT_MASTER == err || OB_PARTITION_NOT_EXIST == err || is_server_down_error(err)) { - int err2 = ObTaskExecutorCtxUtil::refresh_location_cache(task_exec_ctx, - is_nonblock); - if (OB_SUCCESS != err2) { - LOG_WARN_RET(err2, "fail to refresh location cache", K(err2), K(is_nonblock), K(err)); - } - LOG_TRACE("partition change or not master or no response, refresh location cache", K(err)); - } -} - // obmp_query中重试整个SQL之前,可能需要调用本接口来刷新Location,以避免总是发给了错误的服务器 -int ObResultSet::refresh_location_cache(bool is_nonblock) +void ObResultSet::refresh_location_cache(bool is_nonblock, int err) { - return ObTaskExecutorCtxUtil::refresh_location_cache(get_exec_context().get_task_exec_ctx(), - is_nonblock); -} - -int ObResultSet::check_and_nonblock_refresh_location_cache() -{ - int ret = OB_SUCCESS; - ObTaskExecutorCtx &task_exec_ctx = get_exec_context().get_task_exec_ctx(); - if (task_exec_ctx.is_need_renew_location_cache()) { - const int64_t expire_renew_time = INT64_MAX; // 必须刷 - const common::ObList &tablet_ids - = task_exec_ctx.get_need_renew_tablet_keys(); - FOREACH_X(it, tablet_ids, OB_SUCC(ret)) { - bool is_limited = false; - if (OB_FAIL(task_exec_ctx.nonblock_renew_with_limiter(*it, expire_renew_time, is_limited))) { - LOG_WARN("LOCATION: fail to renew", K(ret), K(*it), K(expire_renew_time), K(is_limited)); - } else { -#if !defined(NDEBUG) - LOG_INFO("LOCATION: noblock renew with limiter", "key", *it); -#endif - } - } - } - return ret; + DAS_CTX(get_exec_context()).get_location_router().refresh_location_cache(is_nonblock, err); } // 告诉mysql是否要传入一个EndTransCallback diff --git a/src/sql/ob_result_set.h b/src/sql/ob_result_set.h index f02c796dac..f13d65a82f 100644 --- a/src/sql/ob_result_set.h +++ b/src/sql/ob_result_set.h @@ -190,9 +190,7 @@ public: const common::ObString& get_stmt_ps_sql() const { return ps_sql_; } common::ObString& get_stmt_ps_sql() { return ps_sql_; } int64_t get_query_string_id() const; - static void refresh_location_cache(ObTaskExecutorCtx &task_exec_ctx, bool is_nonblock, int err); - int refresh_location_cache(bool is_nonblock); - int check_and_nonblock_refresh_location_cache(); + void refresh_location_cache(bool is_nonblock, int err); bool need_execute_remote_sql_async() const { return get_exec_context().use_remote_sql() && !is_inner_result_set_; } diff --git a/src/sql/ob_sql_context.cpp b/src/sql/ob_sql_context.cpp index fff8eded25..7527ff3256 100644 --- a/src/sql/ob_sql_context.cpp +++ b/src/sql/ob_sql_context.cpp @@ -137,7 +137,6 @@ void ObQueryRetryInfo::reset() { inited_ = false; is_rpc_timeout_ = false; - invalid_servers_.reset(); last_query_retry_err_ = OB_SUCCESS; retry_cnt_ = 0; query_switch_leader_retry_timeout_ts_ = 0; @@ -147,27 +146,9 @@ void ObQueryRetryInfo::clear() { // 这里不能将inited_设为false is_rpc_timeout_ = false; - invalid_servers_.reset(); //last_query_retry_err_ = OB_SUCCESS; } -// 合并重试信息,用于主线程和调度线程的重试信息合并 -int ObQueryRetryInfo::merge(const ObQueryRetryInfo &other) -{ - int ret = OB_SUCCESS; - if (other.is_rpc_timeout_) { - is_rpc_timeout_ = other.is_rpc_timeout_; - } - for (int64_t i = 0; OB_SUCC(ret) && i < other.invalid_servers_.count(); ++i) { - if (OB_FAIL(add_invalid_server_distinctly(other.invalid_servers_.at(i)))) { - LOG_WARN("fail to add invalid server distinctly", K(ret), K(i), - K(other.invalid_servers_.at(i)), K(other.invalid_servers_), K(invalid_servers_)); - } - } - // last_query_retry_err_不会在调度线程上修改,所以这里不用管 - return ret; -} - void ObQueryRetryInfo::set_is_rpc_timeout(bool is_rpc_timeout) { is_rpc_timeout_ = is_rpc_timeout; @@ -178,27 +159,6 @@ bool ObQueryRetryInfo::is_rpc_timeout() const return is_rpc_timeout_; } -int ObQueryRetryInfo::add_invalid_server_distinctly(const ObAddr &invalid_server, - bool print_info_log/* = false*/) -{ - int ret = OB_SUCCESS; - bool is_found = false; - for (int64_t i = 0; OB_SUCC(ret) && !is_found && i < invalid_servers_.count(); ++i) { - if (invalid_server == invalid_servers_.at(i)) { - is_found = true; - } - } - if (OB_SUCC(ret) && !is_found) { - if (OB_FAIL(invalid_servers_.push_back(invalid_server))) { - LOG_WARN("fail to push back invalid server", K(ret), K(invalid_server)); - } - } - if (print_info_log) { - LOG_INFO("add a server to invalid server list", K(ret), K(invalid_server), K(invalid_servers_)); - } - return ret; -} - ObSqlCtx::ObSqlCtx() : session_info_(NULL), schema_guard_(NULL), @@ -309,6 +269,7 @@ void ObSqlCtx::clear() base_constraints_.reset(); strict_constraints_.reset(); non_strict_constraints_.reset(); + dup_table_replica_cons_.reset(); multi_stmt_rowkey_pos_.reset(); spm_ctx_.bl_key_.reset(); cur_stmt_ = nullptr; @@ -689,9 +650,11 @@ int ObSqlCtx::set_location_constraints(const ObLocationConstraintContext &locati base_constraints_.reset(); strict_constraints_.reset(); non_strict_constraints_.reset(); + dup_table_replica_cons_.reset(); const ObIArray &base_constraints = location_constraint.base_table_constraints_; const ObIArray &strict_constraints = location_constraint.strict_constraints_; const ObIArray &non_strict_constraints = location_constraint.non_strict_constraints_; + const ObIArray &dup_table_replica_cons = location_constraint.dup_table_replica_cons_; if (base_constraints.count() > 0) { base_constraints_.set_allocator(&allocator); if (OB_FAIL(base_constraints_.init(base_constraints.count()))) { @@ -734,6 +697,19 @@ int ObSqlCtx::set_location_constraints(const ObLocationConstraintContext &locati LOG_DEBUG("set non strict constraints", K(non_strict_constraints.count())); } } + if (OB_SUCC(ret) && dup_table_replica_cons.count() > 0) { + dup_table_replica_cons_.set_allocator(&allocator); + if (OB_FAIL(dup_table_replica_cons_.init(dup_table_replica_cons.count()))) { + LOG_WARN("init duplicate table replica constraints failed", K(ret)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < dup_table_replica_cons.count(); i++) { + if (OB_FAIL(dup_table_replica_cons_.push_back(dup_table_replica_cons.at(i)))) { + LOG_WARN("failed to push back location constraint", K(ret)); + } + } + LOG_DEBUG("set duplicate table replica constraints", K(dup_table_replica_cons.count())); + } + } return ret; } diff --git a/src/sql/ob_sql_context.h b/src/sql/ob_sql_context.h index 83174dc494..110fc2a996 100644 --- a/src/sql/ob_sql_context.h +++ b/src/sql/ob_sql_context.h @@ -66,12 +66,14 @@ struct LocationConstraint RightIsSuperior // right contains all the elements in left set }; enum ConstraintFlag { - NoExtraFlag = 0, - IsMultiPartInsert = 1, + NoExtraFlag = 0, + IsMultiPartInsert = 1, // 分区裁剪后基表只涉及到一个一级分区 - SinglePartition = 1 << 1, + SinglePartition = 1 << 1, // 分区裁剪后基表每个一级分区都只涉及一个二级分区 - SingleSubPartition= 1 << 2 + SingleSubPartition = 1 << 2, + // is duplicate table not in dml + DupTabNotInDML = 1 << 3 }; TableLocationKey key_; ObTableLocationType phy_loc_type_; @@ -90,6 +92,7 @@ struct LocationConstraint inline bool is_multi_part_insert() const { return constraint_flags_ & IsMultiPartInsert; } inline bool is_partition_single() const { return constraint_flags_ & SinglePartition; } inline bool is_subpartition_single() const { return constraint_flags_ & SingleSubPartition; } + inline bool is_dup_table_not_in_dml() const {return constraint_flags_ & DupTabNotInDML; } bool operator==(const LocationConstraint &other) const; bool operator!=(const LocationConstraint &other) const; @@ -111,7 +114,10 @@ struct ObLocationConstraintContext }; ObLocationConstraintContext() - : base_table_constraints_(), strict_constraints_(), non_strict_constraints_() + : base_table_constraints_(), + strict_constraints_(), + non_strict_constraints_(), + dup_table_replica_cons_() { } ~ObLocationConstraintContext() @@ -121,7 +127,10 @@ struct ObLocationConstraintContext const ObPwjConstraint *right, InclusionType &inclusion_result); - TO_STRING_KV(K_(base_table_constraints), K_(strict_constraints), K_(non_strict_constraints)); + TO_STRING_KV(K_(base_table_constraints), + K_(strict_constraints), + K_(non_strict_constraints), + K_(dup_table_replica_cons)); // 基表location约束,包括TABLE_SCAN算子上的基表和INSERT算子上的基表 ObLocationConstraint base_table_constraints_; // 严格partition wise join约束,要求同一个分组内的基表分区逻辑上和物理上都相等。 @@ -130,6 +139,9 @@ struct ObLocationConstraintContext // 严格partition wise join约束,要求用一个分组内的基表分区物理上相等。 // 每个分组是一个array,保存了对应基表在base_table_constraints_中的偏移 common::ObSEArray non_strict_constraints_; + // constraints for duplicate table's replica selection + // if not found values in this array, just use local server's replica. + common::ObSEArray dup_table_replica_cons_; }; class ObIVtScannerableFactory; @@ -243,7 +255,6 @@ public: ObQueryRetryInfo() : inited_(false), is_rpc_timeout_(false), - invalid_servers_(), last_query_retry_err_(common::OB_SUCCESS), retry_cnt_(0), query_switch_leader_retry_timeout_ts_(0) @@ -259,16 +270,10 @@ public: is_rpc_timeout_ = false; // 这里不能清除逐次重试累计的成员,如:invalid_servers_,last_query_retry_err_ } - int merge(const ObQueryRetryInfo &other); bool is_inited() const { return inited_; } void set_is_rpc_timeout(bool is_rpc_timeout); bool is_rpc_timeout() const; - int add_invalid_server_distinctly(const common::ObAddr &invalid_server, bool print_info_log = false); - const common::ObIArray &get_invalid_servers() const - { - return invalid_servers_; - } void set_last_query_retry_err(int last_query_retry_err) { last_query_retry_err_ = last_query_retry_err; @@ -299,13 +304,12 @@ public: int64_t get_retry_cnt() const { return retry_cnt_; } - TO_STRING_KV(K_(inited), K_(is_rpc_timeout), K_(invalid_servers), K_(last_query_retry_err)); + TO_STRING_KV(K_(inited), K_(is_rpc_timeout), K_(last_query_retry_err)); private: bool inited_; // 这个变量用于写一些防御性代码,基本没用 // 用于标记是否是rpc返回的timeout错误码(包括本地超时和回包中的超时错误码) bool is_rpc_timeout_; - common::ObArray invalid_servers_; // 重试阶段可以将错误码的处理分为三类: // 1.重试到超时,将timeout返回给客户端; // 2.不再重试的错误码,直接将其返回给客客户端; @@ -402,14 +406,14 @@ struct ObBaselineKey : db_id_(db_id), constructed_sql_(constructed_sql), sql_id_(sql_id) {} - + inline void reset() { db_id_ = common::OB_INVALID_ID; constructed_sql_.reset(); sql_id_.reset(); } - + TO_STRING_KV(K_(db_id), K_(constructed_sql), K_(sql_id)); @@ -497,6 +501,9 @@ public: // 严格partition wise join约束,要求用一个分组内的基表分区物理上相等。 // 每个分组是一个array,保存了对应基表在base_table_constraints_中的偏移 common::ObFixedArray non_strict_constraints_; + // constraints for duplicate table's replica selection + // if not found values in this array, just use local server's replica. + common::ObFixedArray dup_table_replica_cons_; // wether need late compilation bool need_late_compile_; diff --git a/src/sql/optimizer/ob_log_plan.cpp b/src/sql/optimizer/ob_log_plan.cpp index c8cb485ab7..f2c27bec0e 100644 --- a/src/sql/optimizer/ob_log_plan.cpp +++ b/src/sql/optimizer/ob_log_plan.cpp @@ -2083,33 +2083,6 @@ int ObLogPlan::select_replicas(ObExecContext &exec_ctx, LOG_WARN("fail to set_follower_first_feedback", K(follower_first_feedback), K(ret)); } } - if (OB_SUCC(ret)) { - // weak读如果不命中,要刷新location cache - task_exec_ctx.set_need_renew_location_cache(!is_hit_partition); - // 目前暂时没想到如何处理分布式的情况,分布式的情况一定是命中, - // 暂时没想到办法判断是否要刷location cache。 - // 这里有可能会被多次递归到,但是按目前的选择策略, - // 要么每次递归到这里都是命中,要么每次递归到这里都是不命中或者分布式的情况, - // 所以只需要在不命中的时候add_need_renew_tablet_keys_distinctly就可以了。 - if (task_exec_ctx.is_need_renew_location_cache()) { - for (int64_t i = 0; OB_SUCC(ret) && i < phy_tbl_loc_info_list.count(); ++i) { - const ObCandiTableLoc *phy_tbl_loc_info = phy_tbl_loc_info_list.at(i); - if (OB_ISNULL(phy_tbl_loc_info)) { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("phy tbl loc info is NULL", K(ret), K(i)); - } else { - const ObCandiTabletLocIArray &phy_part_loc_info_list = phy_tbl_loc_info->get_phy_part_loc_info_list(); - for (int64_t j = 0; OB_SUCC(ret) && j < phy_part_loc_info_list.count(); ++j) { - const ObCandiTabletLoc &phy_part_loc_info = phy_part_loc_info_list.at(j); - if (OB_FAIL(task_exec_ctx.add_need_renew_tablet_keys_distinctly( - phy_part_loc_info.get_partition_location().get_tablet_id()))) { - LOG_WARN("fail to add need renew partition key", K(ret)); - } - } - } - } - } - } } } else { const bool sess_in_retry = session->get_is_in_retry_for_dup_tbl(); //重试状态下不优化复制表的副本选择 @@ -2117,7 +2090,6 @@ int ObLogPlan::select_replicas(ObExecContext &exec_ctx, LOG_WARN("fail to strong select replicas", K(ret), K(local_server), K(phy_tbl_loc_info_list.count())); } else { session->partition_hit().try_set_bool(is_hit_partition); - task_exec_ctx.set_need_renew_location_cache(false); // 含有strong的无论如何都不renew了 } } return ret; @@ -10571,6 +10543,8 @@ int ObLogPlan::remove_duplicate_constraint(ObLocationConstraintContext &location LOG_WARN("failed to remove duplicate strict pwj constraint", K(ret)); } else if (OB_FAIL(sort_pwj_constraint(location_constraint))) { LOG_WARN("failed to sort pwj constraint", K(ret)); + } else if (OB_FAIL(resolve_dup_tab_constraint(location_constraint))) { + LOG_WARN("failed to resolve duplicatet table constraint"); // 将约束设置给sql_ctx } else if (OB_FAIL(sql_ctx.set_location_constraints(location_constraint, get_allocator()))) { LOG_WARN("failed to set location constraints", K(ret)); @@ -11403,6 +11377,8 @@ int ObLogPlan::do_post_plan_processing() LOG_WARN("failed to re est cost", K(ret)); } else if (OB_FAIL(set_duplicated_table_location(root, OB_INVALID_INDEX))) { LOG_WARN("failed to set duplicated table location", K(ret)); + } else if (OB_FAIL(set_advisor_table_id(root))) { + LOG_WARN("failed to set advise table id from duplicate table", K(ret)); } else if (OB_FAIL(collect_table_location(root))) { LOG_WARN("failed to collect table location", K(ret)); } else if (OB_FAIL(build_location_related_tablet_ids())) { @@ -13255,6 +13231,70 @@ int ObLogPlan::allocate_material_for_recursive_cte_plan(ObIArrayget_sharding())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("operator is null", K(ret), K(op)); + } else if (op->get_sharding()->is_local() || op->get_sharding()->is_remote()) { + if (OB_FAIL(negotiate_advisor_table_id(op))) { + LOG_WARN("failed to negotiate advise table id", K(ret)); + } + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < op->get_num_of_child(); ++i) { + if (OB_FAIL(SMART_CALL(set_advisor_table_id(op->get_child(i))))) { + LOG_WARN("failed to update advise table id", K(ret)); + } + } + } + return ret; +} +int ObLogPlan::negotiate_advisor_table_id(ObLogicalOperator *op) +{ + int ret = OB_SUCCESS; + uint64_t base_table_id = OB_INVALID_ID; + uint64_t dup_table_id = OB_INVALID_ID; + ObArray all_ops; + ObArray all_dup_tables; + for (int64_t i = -1; OB_SUCC(ret) && i < all_ops.count(); ++i) { + ObLogicalOperator *cur_op = (i == -1 ? op : all_ops.at(i)); + if (OB_ISNULL(cur_op)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("current operator is null", K(ret)); + } else if (cur_op->is_table_scan()) { + ObLogTableScan *table_scan = static_cast(cur_op); + if (table_scan->is_duplicate_table()) { + if (OB_FAIL(all_dup_tables.push_back(table_scan))) { + LOG_WARN("failed to push back duplicate table scan", K(ret)); + } else if (OB_INVALID_ID == dup_table_id) { + dup_table_id = table_scan->get_table_id(); + } + } else { + if (OB_INVALID_ID == base_table_id) { + base_table_id = table_scan->get_table_id(); + } + } + } + for (int64_t j = 0; OB_SUCC(ret) && j < cur_op->get_num_of_child(); ++j) { + if (OB_FAIL(all_ops.push_back(cur_op->get_child(j)))) { + LOG_WARN("failed to push back child operator", K(ret)); + } + } + } + + if (base_table_id != OB_INVALID_ID || dup_table_id != OB_INVALID_ID) { + uint64_t final_table_id = (base_table_id == OB_INVALID_ID ? dup_table_id : base_table_id); + for (int64_t i = 0; OB_SUCC(ret) && i < all_dup_tables.count(); ++i) { + if (final_table_id != all_dup_tables.at(i)->get_table_id()) { + all_dup_tables.at(i)->set_advisor_table_id(final_table_id); + } + // LOG_INFO("link debug", K(all_dup_tables.at(i)->get_table_id()), K(final_table_id)); + } + } + return ret; +} + int ObLogPlan::find_possible_join_filter_tables(ObLogicalOperator *op, const JoinFilterPushdownHintInfo &hint_info, ObRelIds &right_tables, @@ -13600,6 +13640,40 @@ int ObLogPlan::fill_join_filter_info(JoinFilterInfo &join_filter_info) return ret; } +int ObLogPlan::resolve_dup_tab_constraint(ObLocationConstraintContext &location_constraint) const +{ + int ret = OB_SUCCESS; + ObIArray &dup_cons = location_constraint.dup_table_replica_cons_; + ObIArray &base_cons = location_constraint.base_table_constraints_; + + for (int64_t i=0; i &from_items, ObIArray &table_items); @@ -705,6 +706,8 @@ public: int perform_simplify_win_expr(ObLogicalOperator *op); int perform_adjust_onetime_expr(ObLogicalOperator *op); int init_onetime_replaced_exprs_if_needed(); + int set_advisor_table_id(ObLogicalOperator *op); + int negotiate_advisor_table_id(ObLogicalOperator *op); int simplify_win_expr(ObLogicalOperator* child_op, ObWinFunRawExpr &win_expr); int simplify_win_partition_exprs(ObLogicalOperator* child_op, ObWinFunRawExpr &win_expr); diff --git a/src/sql/optimizer/ob_log_table_scan.cpp b/src/sql/optimizer/ob_log_table_scan.cpp index 39c17f1069..880ea55316 100644 --- a/src/sql/optimizer/ob_log_table_scan.cpp +++ b/src/sql/optimizer/ob_log_table_scan.cpp @@ -1945,6 +1945,15 @@ int ObLogTableScan::get_phy_location_type(ObTableLocationType &location_type) return ret; } +bool ObLogTableScan::is_duplicate_table() +{ + bool bret = false; + if (NULL != table_partition_info_) { + bret = table_partition_info_->get_phy_tbl_location_info().is_duplicate_table_not_in_dml(); + } + return bret; +} + int ObLogTableScan::extract_bnlj_param_idxs(ObIArray &bnlj_params) { int ret = OB_SUCCESS; diff --git a/src/sql/optimizer/ob_log_table_scan.h b/src/sql/optimizer/ob_log_table_scan.h index 285210c72c..931fae7a21 100644 --- a/src/sql/optimizer/ob_log_table_scan.h +++ b/src/sql/optimizer/ob_log_table_scan.h @@ -33,6 +33,7 @@ public: ref_table_id_(common::OB_INVALID_ID ), index_table_id_(common::OB_INVALID_ID ), session_id_(0), + advisor_table_id_(OB_INVALID_ID), is_index_global_(false), is_spatial_index_(false), use_das_(false), @@ -140,6 +141,17 @@ public: ObSchemaUtils::get_real_table_mappings_tid(index_table_id_) : index_table_id_; } + inline uint64_t get_advisor_table_id() const + { + return advisor_table_id_; + } + + inline void set_advisor_table_id(uint64_t advise_table_id) + { + advisor_table_id_ = advise_table_id; + } + + bool is_duplicate_table(); /** * Get pre query range @@ -480,6 +492,7 @@ protected: // memeber variables uint64_t ref_table_id_; //base table id uint64_t index_table_id_; uint64_t session_id_; //for temporary table, record session id + uint64_t advisor_table_id_; // used for duplicate table replica selection in the plan cache bool is_index_global_; bool is_spatial_index_; // TODO yuming: tells whether the table scan uses shared data access or not diff --git a/src/sql/optimizer/ob_logical_operator.cpp b/src/sql/optimizer/ob_logical_operator.cpp index 0d7d9e97da..d6e18f7e77 100644 --- a/src/sql/optimizer/ob_logical_operator.cpp +++ b/src/sql/optimizer/ob_logical_operator.cpp @@ -2470,6 +2470,8 @@ int ObLogicalOperator::gen_location_constraint(void *ctx) if (log_op_def::LOG_TABLE_SCAN == get_type()) { // base table constraints for TABLE SCAN LocationConstraint loc_cons; + ObDupTabConstraint dup_rep_cons; + bool found_dup_con = false; ObLogTableScan *log_scan_op = dynamic_cast(this); if (log_scan_op->get_contains_fake_cte()) { // do nothing @@ -2479,6 +2481,11 @@ int ObLogicalOperator::gen_location_constraint(void *ctx) // dblink table, execute at other cluster } else if (OB_FAIL(get_tbl_loc_cons_for_scan(loc_cons))) { LOG_WARN("failed to get location constraint for table scan op", K(ret)); + } else if (OB_FAIL(get_dup_replica_cons_for_scan(dup_rep_cons, found_dup_con))) { + LOG_WARN("failed to get duplicate table replica constraint for table scan op", K(ret)); + } else if (found_dup_con && + OB_FAIL(loc_cons_ctx->dup_table_replica_cons_.push_back(dup_rep_cons))) { + LOG_WARN("failed to push back location constraint", K(ret)); } else if (OB_FAIL(loc_cons_ctx->base_table_constraints_.push_back(loc_cons))) { LOG_WARN("failed to push back location constraint", K(ret)); } else if (OB_FAIL(strict_pwj_constraint_.push_back( @@ -2704,6 +2711,10 @@ int ObLogicalOperator::get_tbl_loc_cons_for_scan(LocationConstraint &loc_cons) loc_cons.key_.table_id_ = log_scan_op->get_table_id(); loc_cons.table_partition_info_ = log_scan_op->get_table_partition_info(); loc_cons.key_.ref_table_id_ = log_scan_op->get_real_index_table_id(); + if (NULL != sharding->get_phy_table_location_info() && + sharding->get_phy_table_location_info()->is_duplicate_table_not_in_dml()) { + loc_cons.add_constraint_flag(LocationConstraint::DupTabNotInDML); + } if (sharding->get_part_cnt() > 1 && sharding->is_distributed()) { if (sharding->is_partition_single()) { loc_cons.add_constraint_flag(LocationConstraint::SinglePartition); @@ -2717,6 +2728,30 @@ int ObLogicalOperator::get_tbl_loc_cons_for_scan(LocationConstraint &loc_cons) return ret; } +int ObLogicalOperator::get_dup_replica_cons_for_scan(ObDupTabConstraint &dup_rep_cons, + bool &found_dup_con) +{ + int ret = OB_SUCCESS; + ObLogTableScan *log_scan_op = dynamic_cast(this); + ObShardingInfo *sharding = NULL; + if (OB_ISNULL(log_scan_op) || + OB_ISNULL(sharding = log_scan_op->get_sharding())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(log_scan_op)); + } else if (NULL != sharding->get_phy_table_location_info()) { + // is duplicate table + if (log_scan_op->get_advisor_table_id() != OB_INVALID_ID && + sharding->get_phy_table_location_info()->is_duplicate_table_not_in_dml()) { + dup_rep_cons.first_ = log_scan_op->get_table_id(); + dup_rep_cons.second_ = log_scan_op->get_advisor_table_id(); + found_dup_con = true; + } else { + // do nothing + } + } + return ret; +} + int ObLogicalOperator::get_tbl_loc_cons_for_insert(LocationConstraint &loc_cons, bool &is_multi_part_dml) { int ret = OB_SUCCESS; diff --git a/src/sql/optimizer/ob_logical_operator.h b/src/sql/optimizer/ob_logical_operator.h index 1a1bdb41b4..2025d22eda 100644 --- a/src/sql/optimizer/ob_logical_operator.h +++ b/src/sql/optimizer/ob_logical_operator.h @@ -1348,7 +1348,8 @@ public: * Generate a table's location constraint for table scan op */ int get_tbl_loc_cons_for_scan(LocationConstraint &loc_cons); - + // generate a table location constraint for duplicate table's replica selection + int get_dup_replica_cons_for_scan(ObDupTabConstraint &dup_rep_cons, bool &found_dup_con); /** * @brief Generate a table's location constraint for insert op */ diff --git a/src/sql/optimizer/ob_optimizer_util.cpp b/src/sql/optimizer/ob_optimizer_util.cpp index e6eeef4a66..7a0bf6eeff 100644 --- a/src/sql/optimizer/ob_optimizer_util.cpp +++ b/src/sql/optimizer/ob_optimizer_util.cpp @@ -7322,6 +7322,7 @@ int ObOptimizerUtil::compute_basic_sharding_info(const ObAddr &local_addr, } else { ObAddr basic_addr; bool has_duplicated = false; + bool can_reselect_replica = true; ObShardingInfo *sharding = NULL; ObSEArray valid_addrs; ObSEArray intersect_addrs; @@ -7346,6 +7347,9 @@ int ObOptimizerUtil::compute_basic_sharding_info(const ObAddr &local_addr, } else { if (OB_FAIL(ObOptimizerUtil::intersect(valid_addrs, intersect_addrs, candidate_addrs))) { LOG_WARN("failed to intersect addrs", K(ret)); + } else if (OB_FALSE_IT(can_reselect_replica = can_reselect_replica && + valid_addrs.count() == candidate_addrs.count())) { + // do nothing } else if (OB_FAIL(intersect_addrs.assign(candidate_addrs))) { LOG_WARN("failed to assign addrs", K(ret)); } else { /*do nothing*/ } @@ -7400,6 +7404,7 @@ int ObOptimizerUtil::compute_basic_sharding_info(const ObAddr &local_addr, allocator, *input_shardings.at(i), reselected_pos.at(i), + can_reselect_replica, result_sharding))) { LOG_WARN("failed to compute duplicate table sharding", K(ret)); } else { /*do nothing*/ } @@ -7475,6 +7480,7 @@ int ObOptimizerUtil::compute_duplicate_table_sharding(const ObAddr &local_addr, ObIAllocator &allocator, const ObShardingInfo &src_sharding, const int64_t reselected_pos, + bool can_reselect_replica, ObShardingInfo *&target_sharding) { int ret = OB_SUCCESS; @@ -7503,6 +7509,7 @@ int ObOptimizerUtil::compute_duplicate_table_sharding(const ObAddr &local_addr, phy_table_loc->get_phy_part_loc_info_list_for_update().at(0); phy_part_loc.set_selected_replica_idx(reselected_pos); target_sharding->set_phy_table_location_info(phy_table_loc); + target_sharding->set_can_reselect_replica(can_reselect_replica); if (OB_FAIL(phy_part_loc.get_selected_replica(replica_loc))) { LOG_WARN("failed to get selected replica", K(ret)); } else if (replica_loc.get_server() == local_addr) { diff --git a/src/sql/optimizer/ob_optimizer_util.h b/src/sql/optimizer/ob_optimizer_util.h index 86bf5f0a15..abd0240b86 100644 --- a/src/sql/optimizer/ob_optimizer_util.h +++ b/src/sql/optimizer/ob_optimizer_util.h @@ -1307,6 +1307,7 @@ public: ObIAllocator &allocator, const ObShardingInfo &src_sharding, const int64_t reselected_pos, + bool can_reselect_replica, ObShardingInfo *&target_sharding); static int64_t get_join_style_parallel(const int64_t left_parallel, diff --git a/src/sql/optimizer/ob_phy_table_location_info.cpp b/src/sql/optimizer/ob_phy_table_location_info.cpp index 1fc0b7758c..9b28fbefa2 100644 --- a/src/sql/optimizer/ob_phy_table_location_info.cpp +++ b/src/sql/optimizer/ob_phy_table_location_info.cpp @@ -15,8 +15,10 @@ #include "ob_phy_table_location_info.h" #include "observer/ob_server_struct.h" #include "sql/das/ob_das_location_router.h" +#include "storage/tx/wrs/ob_black_list.h" using namespace oceanbase::common; using namespace oceanbase::share; +using namespace oceanbase::transaction; namespace oceanbase { namespace sql @@ -61,8 +63,7 @@ int ObOptTabletLoc::assign(const ObOptTabletLoc &other) int ObOptTabletLoc::assign_with_only_readable_replica(const ObObjectID &partition_id, const ObObjectID &first_level_part_id, const common::ObTabletID &tablet_id, - const ObLSLocation &ls_location, - const ObIArray &invalid_servers) + const ObLSLocation &ls_location) { int ret = OB_SUCCESS; reset(); @@ -74,27 +75,26 @@ int ObOptTabletLoc::assign_with_only_readable_replica(const ObObjectID &partitio for (int64_t i = 0; OB_SUCC(ret) && i < ls_location.get_replica_locations().count(); ++i) { const ObLSReplicaLocation &replica_loc = ls_location.get_replica_locations().at(i); if (ObReplicaTypeCheck::is_readable_replica(replica_loc.get_replica_type())) { - bool is_in_invalid_servers = false; - for (int64_t j = 0; OB_SUCC(ret) - && !is_in_invalid_servers && j < invalid_servers.count(); ++j) { - if (replica_loc.get_server() == invalid_servers.at(j)) { - is_in_invalid_servers = true; - } - } - if (OB_SUCC(ret) && !is_in_invalid_servers) { - //此处依赖了构造函数隐藏转换 + transaction::ObBLKey bl_key; + bool in_black_list = false; + if (OB_FAIL(bl_key.init(replica_loc.get_server(), ls_location.get_tenant_id(), ls_location.get_ls_id()))) { + LOG_WARN("init black list key failed", K(ret)); + } else if (OB_FAIL(ObBLService::get_instance().check_in_black_list(bl_key, in_black_list))) { + LOG_WARN("check in black list failed", K(ret)); + } else if (!in_black_list || replica_loc.is_strong_leader()) { if (OB_FAIL(replica_locations_.push_back(replica_loc))) { LOG_WARN("Failed to push back replica locations", K(ret), K(i), K(replica_loc), K(replica_locations_)); } + } else { + LOG_INFO("the replica location is invalid", K(bl_key), K(replica_loc)); } } } if (OB_SUCC(ret)) { if (OB_UNLIKELY(0 == replica_locations_.count())) { ret = OB_NO_READABLE_REPLICA; - LOG_WARN("there has no readable replica", K(ret), - K(invalid_servers), K(ls_location.get_replica_locations())); + LOG_WARN("there has no readable replica", K(ret), K(ls_location.get_replica_locations())); } } return ret; @@ -332,23 +332,22 @@ int ObCandiTabletLoc::get_selected_replica(ObRoutePolicy::CandidateReplica &repl return ret; } -int ObCandiTabletLoc::set_part_loc_with_only_readable_replica( - const ObObjectID &partition_id, - const ObObjectID &first_level_part_id, - const common::ObTabletID &tablet_id, - const ObLSLocation &partition_location, - const ObIArray &invalid_servers) +int ObCandiTabletLoc::set_part_loc_with_only_readable_replica(const ObObjectID &partition_id, + const ObObjectID &first_level_part_id, + const common::ObTabletID &tablet_id, + const ObLSLocation &partition_location) { int ret = OB_SUCCESS; if (OB_UNLIKELY(has_selected_replica())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("partition location has not been set yet, but replica idx has been selected", K(ret), K(*this), K(partition_location)); - } else if (OB_FAIL(opt_tablet_loc_.assign_with_only_readable_replica( - partition_id, first_level_part_id, tablet_id, - partition_location, invalid_servers))) { + } else if (OB_FAIL(opt_tablet_loc_.assign_with_only_readable_replica(partition_id, + first_level_part_id, + tablet_id, + partition_location))) { LOG_WARN("fail to assign partition location with only readable replica", - K(ret), K(partition_location), K(invalid_servers)); + K(ret), K(partition_location)); } return ret; } diff --git a/src/sql/optimizer/ob_phy_table_location_info.h b/src/sql/optimizer/ob_phy_table_location_info.h index ca208cc04d..b8ad207df8 100644 --- a/src/sql/optimizer/ob_phy_table_location_info.h +++ b/src/sql/optimizer/ob_phy_table_location_info.h @@ -38,8 +38,7 @@ public: int assign_with_only_readable_replica(const ObObjectID &partition_id, const ObObjectID &first_level_part_id, const common::ObTabletID &tablet_id, - const share::ObLSLocation &partition_location, - const common::ObIArray &invalid_servers); + const share::ObLSLocation &partition_location); bool is_valid() const; bool operator==(const ObOptTabletLoc &other) const; @@ -103,12 +102,10 @@ public: int get_priority_replica(int64_t idx, ObRoutePolicy::CandidateReplica &replica_loc) const; template int get_priority_replica_base(int64_t selected_replica_idx, T &replica_loc) const; - int set_part_loc_with_only_readable_replica( - const ObObjectID &partition_id, - const ObObjectID &first_level_part_id, - const common::ObTabletID &tablet_id, - const share::ObLSLocation &partition_location, - const common::ObIArray &invalid_servers); + int set_part_loc_with_only_readable_replica(const ObObjectID &partition_id, + const ObObjectID &first_level_part_id, + const common::ObTabletID &tablet_id, + const share::ObLSLocation &partition_location); const ObOptTabletLoc &get_partition_location() const { return opt_tablet_loc_; } ObOptTabletLoc &get_partition_location() { return opt_tablet_loc_; } const common::ObIArray &get_priority_replica_idxs() const { return priority_replica_idxs_; } diff --git a/src/sql/optimizer/ob_table_location.cpp b/src/sql/optimizer/ob_table_location.cpp index d8d720ce9e..1ecf16d8d3 100644 --- a/src/sql/optimizer/ob_table_location.cpp +++ b/src/sql/optimizer/ob_table_location.cpp @@ -1379,8 +1379,7 @@ int ObTableLocation::calculate_candi_tablet_locations( ObExecContext &exec_ctx, const ParamStore ¶ms, ObCandiTabletLocIArray &candi_tablet_locs, - const ObDataTypeCastParams &dtc_params, - bool nonblock/*false*/) const + const ObDataTypeCastParams &dtc_params) const { int ret = OB_SUCCESS; ObSEArray partition_ids; @@ -1397,14 +1396,11 @@ int ObTableLocation::calculate_candi_tablet_locations( dtc_params))) { LOG_WARN("Failed to calculate partition ids", K(ret)); } else if (OB_FAIL(get_tablet_locations(exec_ctx.get_das_ctx(), - exec_ctx.get_my_session(), - loc_meta_.ref_table_id_, tablet_ids, partition_ids, first_level_part_ids, - candi_tablet_locs, - nonblock))) { - LOG_WARN("Failed to set partition locations", K(ret), K(partition_ids)); + candi_tablet_locs))) { + LOG_WARN("Failed to set partition locations", K(ret), K(partition_ids), K(tablet_ids)); } else {}//do nothing return ret; @@ -1720,65 +1716,16 @@ int ObTableLocation::calculate_tablet_ids(ObExecContext &exec_ctx, } int ObTableLocation::get_tablet_locations(ObDASCtx &das_ctx, - ObSQLSessionInfo *session, - const uint64_t ref_table_id, const ObIArray &tablet_ids, const ObIArray &partition_ids, const ObIArray &first_level_part_ids, - ObCandiTabletLocIArray &candi_tablet_locs, - bool nonblock /*false*/) const + ObCandiTabletLocIArray &candi_tablet_locs) const { - int ret = OB_SUCCESS; - if (OB_INVALID_ID == ref_table_id) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ref_table_id), K(tablet_ids.empty())); - } else if (PARTITION_LEVEL_TWO == part_level_ && partition_ids.count() != first_level_part_ids.count()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected log part id count", K(partition_ids.count()), K(first_level_part_ids.count())); - } else { - NG_TRACE(get_location_cache_begin); - candi_tablet_locs.reset(); - int64_t N = tablet_ids.count(); - if (OB_FAIL(candi_tablet_locs.prepare_allocate(N))) { - LOG_WARN("Partitoin location list prepare error", K(ret)); - } else { - ObDASLocationRouter &loc_router = das_ctx.get_location_router(); - ObLSLocation location; - for (int64_t i = 0; OB_SUCC(ret) && i < N; ++i) { - location.reset(); - ObCandiTabletLoc &candi_tablet_loc = candi_tablet_locs.at(i); - if (nonblock) { - //TODO shengle use nonblock after location service support nonblock interface - ret = loc_router.get(loc_meta_, tablet_ids.at(i), location); - } else { - ret = loc_router.get(loc_meta_, tablet_ids.at(i), location); - } - if (OB_FAIL(ret)) { - //TODO shengle set partition key for location cache renew - LOG_WARN("Get partition error, then set partition key for location cache renew later", - K(ret), K(ref_table_id), "tablet_id", tablet_ids.at(i), K(candi_tablet_loc), K(table_type_)); - } else { - if (OB_ISNULL(session)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(session), K(ret)); - } else if (OB_FAIL(candi_tablet_loc.set_part_loc_with_only_readable_replica( - partition_ids.at(i), - PARTITION_LEVEL_TWO == part_level_ ? first_level_part_ids.at(i) : OB_INVALID_ID, - tablet_ids.at(i), location, - session->get_retry_info().get_invalid_servers()))) { - LOG_WARN("fail to set partition location with only readable replica", - K(ret),K(i), K(location), K(candi_tablet_locs), K(tablet_ids), K(partition_ids), - K(session->get_retry_info().get_invalid_servers())); - } - LOG_TRACE("set partition location with only readable replica", - K(ret),K(i), K(location), K(candi_tablet_locs), K(tablet_ids), K(partition_ids), - K(session->get_retry_info().get_invalid_servers())); - } - } // for end - } - } - - return ret; + return das_ctx.get_location_router().nonblock_get_candi_tablet_locations(loc_meta_, + tablet_ids, + partition_ids, + first_level_part_ids, + candi_tablet_locs); } int ObTableLocation::get_part_col_type(const ObRawExpr *expr, @@ -5352,7 +5299,6 @@ int ObTableLocation::get_full_leader_table_loc(ObIAllocator &allocator, { int ret = OB_SUCCESS; const ObTableSchema *table_schema = NULL; - const int64_t expire_renew_time = 2 * 1000000; // 2s ObSEArray tablet_ids; ObSEArray partition_ids; ObSEArray first_level_part_ids; @@ -5388,7 +5334,7 @@ int ObTableLocation::get_full_leader_table_loc(ObIAllocator &allocator, OX(tablet_loc->loc_meta_ = loc_meta); OX(tablet_loc->partition_id_ = partition_ids.at(i)); OX(tablet_loc->first_level_part_id_ = first_level_part_ids.at(i)); - OZ(ObDASLocationRouter::get_leader(tenant_id, tablet_ids.at(i), *tablet_loc, expire_renew_time)); + OZ(ObDASLocationRouter::nonblock_get_leader(tenant_id, tablet_ids.at(i), *tablet_loc)); OZ(table_loc->add_tablet_loc(tablet_loc)); } } diff --git a/src/sql/optimizer/ob_table_location.h b/src/sql/optimizer/ob_table_location.h index 51734cfdbb..f73c0e8b69 100644 --- a/src/sql/optimizer/ob_table_location.h +++ b/src/sql/optimizer/ob_table_location.h @@ -624,8 +624,7 @@ public: ObExecContext &exec_ctx, const ParamStore ¶ms, ObCandiTabletLocIArray &candi_tablet_locs, - const common::ObDataTypeCastParams &dtc_params, - bool nonblock = false) const; + const common::ObDataTypeCastParams &dtc_params) const; /** * Calculate tablet ids from input parameters. */ @@ -663,13 +662,10 @@ public: common::ObIArray &partition_ids); int get_tablet_locations(ObDASCtx &das_ctx, - ObSQLSessionInfo *session, - const uint64_t ref_table_id, const ObIArray &tablet_ids, const ObIArray &partition_ids, const ObIArray &first_level_part_ids, - ObCandiTabletLocIArray &candi_tablet_locs, - bool nonblock = false) const; + ObCandiTabletLocIArray &candi_tablet_locs) const; static int send_add_interval_partition_rpc_new_engine(ObIAllocator &allocator, ObSQLSessionInfo *session, @@ -750,7 +746,7 @@ public: (part_get_all_ && subpart_get_all_ && (part_level_ == share::schema::PARTITION_LEVEL_TWO)); } - inline bool is_part_or_subpart_all_partition() const + inline bool is_part_or_subpart_all_partition() const { return (part_level_ == share::schema::PARTITION_LEVEL_ZERO) || (part_level_ == share::schema::PARTITION_LEVEL_ONE && (part_get_all_ || !is_part_range_get_)) || diff --git a/src/sql/parser/sql_parser_mysql_mode.y b/src/sql/parser/sql_parser_mysql_mode.y index b1b90c9799..b9c30f344c 100755 --- a/src/sql/parser/sql_parser_mysql_mode.y +++ b/src/sql/parser/sql_parser_mysql_mode.y @@ -5927,6 +5927,11 @@ TABLE_MODE opt_equal_mark STRING_VALUE (void)($2); malloc_non_terminal_node($$, result->malloc_pool_, T_TABLE_MODE, 1, $3); } +| DUPLICATE_SCOPE opt_equal_mark STRING_VALUE +{ + (void)($2); + malloc_non_terminal_node($$, result->malloc_pool_, T_DUPLICATE_SCOPE, 1, $3); +} | EXPIRE_INFO opt_equal_mark '(' expr ')' { (void)($2) ; /* make bison mute */ diff --git a/src/sql/plan_cache/ob_dist_plans.cpp b/src/sql/plan_cache/ob_dist_plans.cpp index ef6ad7fadd..354ef041db 100644 --- a/src/sql/plan_cache/ob_dist_plans.cpp +++ b/src/sql/plan_cache/ob_dist_plans.cpp @@ -157,6 +157,7 @@ int ObDistPlans::add_plan(ObPhysicalPlan &plan, ObPlanMatchHelper helper(plan_set_); ObArray phy_tbl_infos; ObArray out_tbl_locations; + for (int64_t i = 0; OB_SUCC(ret) && !is_matched && i < dist_plans_.count(); i++) { //检查是否已有其他线程add该plan成功 phy_tbl_infos.reuse(); @@ -167,7 +168,11 @@ int ObDistPlans::add_plan(ObPhysicalPlan &plan, LOG_WARN("invalid argument", K(tmp_plan)); } else if (OB_FAIL(helper.match_plan(pc_ctx, tmp_plan, is_matched, phy_tbl_infos, out_tbl_locations))) { LOG_WARN("fail to match dist plan", K(ret)); - } else if (false == is_matched) { + } else { + is_matched = is_matched && tmp_plan->has_same_location_constraints(plan); + } + + if (!is_matched) { // do nothing } else { ret = OB_SQL_PC_PLAN_DUPLICATE; @@ -175,14 +180,7 @@ int ObDistPlans::add_plan(ObPhysicalPlan &plan, } if (OB_SUCC(ret) && !is_matched) { - if (OB_FAIL(plan.set_location_constraints(pc_ctx.sql_ctx_.base_constraints_, - pc_ctx.sql_ctx_.strict_constraints_, - pc_ctx.sql_ctx_.non_strict_constraints_))) { - LOG_WARN("failed to set location constraints", K(ret), K(plan), - K(pc_ctx.sql_ctx_.base_constraints_), - K(pc_ctx.sql_ctx_.strict_constraints_), - K(pc_ctx.sql_ctx_.non_strict_constraints_)); - } else if (OB_FAIL(dist_plans_.push_back(&plan))) { + if (OB_FAIL(dist_plans_.push_back(&plan))) { LOG_WARN("fail to add plan", K(ret)); } } @@ -196,7 +194,6 @@ int ObDistPlans::is_same_plan(const ObPhysicalPlan *l_plan, bool &is_same) const { int ret = OB_SUCCESS; - if (OB_ISNULL(l_plan) || OB_ISNULL(r_plan)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(l_plan), K(r_plan)); @@ -204,7 +201,6 @@ int ObDistPlans::is_same_plan(const ObPhysicalPlan *l_plan, is_same = (l_plan->get_signature() == r_plan->get_signature()); LOG_DEBUG("compare plan", K(l_plan->get_signature()), K(r_plan->get_signature()), K(is_same)); } - return ret; } diff --git a/src/sql/plan_cache/ob_plan_cache_util.cpp b/src/sql/plan_cache/ob_plan_cache_util.cpp index 55dfe82987..f3456f7c8f 100644 --- a/src/sql/plan_cache/ob_plan_cache_util.cpp +++ b/src/sql/plan_cache/ob_plan_cache_util.cpp @@ -285,8 +285,7 @@ int ObPhyLocationGetter::get_phy_locations(const ObIArray &tabl if (OB_FAIL(table_location.calculate_candi_tablet_locations(exec_ctx, params, candi_table_loc.get_phy_part_loc_info_list_for_update(), - dtc_params, - true /* non-block */))) { + dtc_params))) { LOG_WARN("failed to calculate partition location", K(ret)); } else { NG_TRACE(calc_partition_location_end); diff --git a/src/sql/plan_cache/ob_plan_cache_util.h b/src/sql/plan_cache/ob_plan_cache_util.h index b93652b736..f05ffe01b9 100644 --- a/src/sql/plan_cache/ob_plan_cache_util.h +++ b/src/sql/plan_cache/ob_plan_cache_util.h @@ -315,6 +315,25 @@ struct ObPCParamEqualInfo } }; +struct ObDupTabConstraint +{ + uint64_t first_; + uint64_t second_; + TO_STRING_KV(K_(first), K_(second)); + ObDupTabConstraint() + : first_(common::OB_INVALID_ID), + second_(common::OB_INVALID_ID) + {} + ObDupTabConstraint(int64_t first, int64_t second) + : first_(first), + second_(second) + {} + inline bool operator==(const ObDupTabConstraint &other) const + { + return first_ == other.first_ && second_ == other.second_; + } +}; + struct ObPCPrivInfo { share::ObRawPriv sys_priv_; diff --git a/src/sql/plan_cache/ob_plan_match_helper.cpp b/src/sql/plan_cache/ob_plan_match_helper.cpp index a831cbc935..16350b918d 100644 --- a/src/sql/plan_cache/ob_plan_match_helper.cpp +++ b/src/sql/plan_cache/ob_plan_match_helper.cpp @@ -35,6 +35,7 @@ int ObPlanMatchHelper::match_plan(const ObPlanCacheCtx &pc_ctx, const ObIArray& base_cons = plan->get_base_constraints(); const ObIArray& strict_cons = plan->get_strict_constraints(); const ObIArray& non_strict_cons = plan->get_non_strict_constraints(); + const ObIArray& dup_rep_cons = plan->get_dup_table_replica_constraints(); const ObIArray &plan_tbl_locs = plan->get_table_locations(); PWJTabletIdMap pwj_map; bool use_pwj_map = false; @@ -59,7 +60,8 @@ int ObPlanMatchHelper::match_plan(const ObPlanCacheCtx &pc_ctx, out_tbl_locations, phy_tbl_infos))) { LOG_WARN("failed to calculate table locations", K(ret), K(base_cons)); } else if (has_duplicate_table && - OB_FAIL(reselect_duplicate_table_best_replica(base_cons, server, phy_tbl_infos))) { + OB_FAIL(reselect_duplicate_table_best_replica(base_cons, server, phy_tbl_infos, + dup_rep_cons))) { LOG_WARN("failed to reselect duplicate table replica", K(ret)); } else if (OB_FAIL(cmp_table_types(base_cons, server, out_tbl_locations, phy_tbl_infos, is_matched))) { @@ -202,23 +204,66 @@ int ObPlanMatchHelper::calc_table_locations( int ObPlanMatchHelper::reselect_duplicate_table_best_replica( const ObIArray &loc_cons, const common::ObAddr &server, - const common::ObIArray &phy_tbl_infos) const + const common::ObIArray &phy_tbl_infos, + const ObIArray &dup_table_replica_cons) const { int ret = OB_SUCCESS; if (loc_cons.count() == phy_tbl_infos.count()) { for (int64_t i = 0; OB_SUCC(ret) && i < phy_tbl_infos.count(); ++i) { const ObCandiTableLoc &phy_tbl_info = phy_tbl_infos.at(i); if (phy_tbl_info.is_duplicate_table_not_in_dml()) { - for (int64_t j = 0; OB_SUCC(ret) && j < phy_tbl_info.get_partition_cnt(); ++j) { - const ObCandiTabletLoc &phy_part_loc_info = - phy_tbl_info.get_phy_part_loc_info_list().at(j); + bool selected_replica = false; + if (phy_tbl_info.get_partition_cnt() == 1) { + const ObCandiTabletLoc &dup_phy_part_loc = + phy_tbl_info.get_phy_part_loc_info_list().at(0); int64_t replica_idx = 0; - if (phy_part_loc_info.is_server_in_replica(server, replica_idx)) { - LOG_DEBUG("reselect replica index will happen", - K(phy_tbl_info), K(replica_idx), K(server)); - if (OB_FAIL(const_cast(phy_part_loc_info). - set_selected_replica_idx(replica_idx))) { - LOG_WARN("failed to set selected replica idx", K(ret), K(replica_idx)); + int64_t left_tbl_pos = -1; + // find first constraint + for (int64_t j = 0; OB_SUCC(ret) && j < dup_table_replica_cons.count(); ++j) { + ObDupTabConstraint con = dup_table_replica_cons.at(j); + if (con.first_ == OB_INVALID) { + // do nothing + } else if (con.first_==i) { + left_tbl_pos = con.second_; + } + } + if (left_tbl_pos != -1) { + const ObCandiTabletLoc& left_tbl_part_loc_info = + phy_tbl_infos.at(left_tbl_pos).get_phy_part_loc_info_list().at(0); + share::ObLSReplicaLocation replica_loc; + if (OB_FAIL(left_tbl_part_loc_info.get_selected_replica(replica_loc))) { + LOG_WARN("failed to set selected replica idx", K(ret), K(left_tbl_part_loc_info)); + } else if (dup_phy_part_loc.is_server_in_replica(replica_loc.get_server(), replica_idx)) { + LOG_DEBUG("reselect replica index according to pwj constraints will happen", + K(dup_phy_part_loc), K(replica_idx), K(replica_loc.get_server()), K(replica_loc)); + ObRoutePolicy::CandidateReplica replica; + if (OB_FAIL(dup_phy_part_loc.get_priority_replica(replica_idx, replica))) { + LOG_WARN("failed to get priority replica", K(ret)); + } else if (OB_FAIL(const_cast(dup_phy_part_loc). + set_selected_replica_idx(replica_idx))) { + LOG_WARN("failed to set selected replica idx", K(ret), K(replica_idx)); + } else { + selected_replica = true; + } + } + } + } + // if not found, just select local + if (!selected_replica) { + for (int64_t j = 0; OB_SUCC(ret) && j < phy_tbl_info.get_partition_cnt(); ++j) { + const ObCandiTabletLoc &phy_part_loc_info = + phy_tbl_info.get_phy_part_loc_info_list().at(j); + int64_t replica_idx = 0; + if (phy_part_loc_info.is_server_in_replica(server, replica_idx)) { + LOG_DEBUG("reselect replica index will happen", + K(phy_tbl_info), K(replica_idx), K(server)); + ObRoutePolicy::CandidateReplica replica; + if (OB_FAIL(phy_part_loc_info.get_priority_replica(replica_idx, replica))) { + LOG_WARN("failed to get priority replica", K(ret)); + } else if (OB_FAIL(const_cast(phy_part_loc_info). + set_selected_replica_idx(replica_idx))) { + LOG_WARN("failed to set selected replica idx", K(ret), K(replica_idx)); + } } } } diff --git a/src/sql/plan_cache/ob_plan_match_helper.h b/src/sql/plan_cache/ob_plan_match_helper.h index a7da42cca4..d401240526 100644 --- a/src/sql/plan_cache/ob_plan_match_helper.h +++ b/src/sql/plan_cache/ob_plan_match_helper.h @@ -73,7 +73,8 @@ private: int reselect_duplicate_table_best_replica( const ObIArray &loc_cons, const common::ObAddr &server, - const common::ObIArray &phy_tbl_infos) const; + const common::ObIArray &phy_tbl_infos, + const ObIArray &dup_table_replica_cons) const; /** * @brief Compare table location types * diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.cpp b/src/sql/resolver/ddl/ob_ddl_resolver.cpp index 2fa0f52149..e04d2b3895 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.cpp +++ b/src/sql/resolver/ddl/ob_ddl_resolver.cpp @@ -1814,9 +1814,9 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool duplicate_scope_ = my_duplicate_scope; } if (OB_SUCC(ret) && stmt::T_ALTER_TABLE == stmt_->get_stmt_type()) { - if (OB_FAIL(alter_table_bitset_.add_member(ObAlterTableArg::DUPLICATE_SCOPE))) { - LOG_WARN("fail to add member to bitset!", K(ret)); - } + ret = OB_NOT_SUPPORTED; + LOG_WARN("alter table duplicate scope not supported", KR(ret)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, "alter table duplicate scope"); } } break; diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index d765cebb6b..4b0f8b2858 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -271,6 +271,12 @@ ob_set_subtarget(ob_storage tx tx/ob_tx_ls_log_writer.cpp tx/ob_tx_msg.cpp tx/ob_tx_replay_executor.cpp + tx/ob_dup_table_util.cpp + tx/ob_dup_table_lease.cpp + tx/ob_dup_table_base.cpp + tx/ob_dup_table_tablets.cpp + tx/ob_dup_table_ts_sync.cpp + tx/ob_dup_table_stat.cpp tx/ob_xa_ctx.cpp tx/ob_xa_ctx_mgr.cpp tx/ob_xa_dblink_service.cpp diff --git a/src/storage/high_availability/ob_ls_complete_migration.cpp b/src/storage/high_availability/ob_ls_complete_migration.cpp index 5dbda1685d..9bd63201f8 100644 --- a/src/storage/high_availability/ob_ls_complete_migration.cpp +++ b/src/storage/high_availability/ob_ls_complete_migration.cpp @@ -1131,13 +1131,27 @@ int ObStartCompleteMigrationTask::change_member_list_() } else { if (ObMigrationOpType::ADD_LS_OP == ctx_->arg_.type_) { const int64_t change_member_list_timeout_us = GCONF.sys_bkgd_migration_change_member_list_timeout; - if (OB_FAIL(ls->add_member(ctx_->arg_.dst_, ctx_->arg_.paxos_replica_number_, change_member_list_timeout_us))) { - LOG_WARN("failed to add member", K(ret), KPC(ctx_)); + if (REPLICA_TYPE_FULL == ctx_->arg_.dst_.get_replica_type()) { + if (OB_FAIL(ls->add_member(ctx_->arg_.dst_, ctx_->arg_.paxos_replica_number_, change_member_list_timeout_us))) { + LOG_WARN("failed to add member", K(ret), KPC(ctx_)); + } + } else { + // R-replica + if (OB_FAIL(ls->add_learner(ctx_->arg_.dst_, change_member_list_timeout_us))) { + LOG_WARN("failed to add learner", K(ret), KPC(ctx_)); + } } } else if (ObMigrationOpType::MIGRATE_LS_OP == ctx_->arg_.type_) { const int64_t change_member_list_timeout_us = GCONF.sys_bkgd_migration_change_member_list_timeout; - if (OB_FAIL(ls->replace_member(ctx_->arg_.dst_, ctx_->arg_.src_, change_member_list_timeout_us))) { - LOG_WARN("failed to repalce member", K(ret), KPC(ctx_)); + if (REPLICA_TYPE_FULL == ctx_->arg_.dst_.get_replica_type()) { + if (OB_FAIL(ls->replace_member(ctx_->arg_.dst_, ctx_->arg_.src_, change_member_list_timeout_us))) { + LOG_WARN("failed to replace member", K(ret), KPC(ctx_)); + } + } else { + // R-replica + if (OB_FAIL(ls->replace_learner(ctx_->arg_.dst_, ctx_->arg_.src_, change_member_list_timeout_us))) { + LOG_WARN("failed to replace_learner", K(ret), KPC(ctx_)); + } } } else { ret = OB_ERR_UNEXPECTED; @@ -1176,7 +1190,8 @@ int ObStartCompleteMigrationTask::check_need_wait_( || ObMigrationOpType::MIGRATE_LS_OP == ctx_->arg_.type_) { need_wait = true; } else if (ObMigrationOpType::CHANGE_LS_OP == ctx_->arg_.type_) { - if (!ObReplicaTypeCheck::is_replica_with_ssstore(ls->get_replica_type()) + // TODO: make sure this is right + if (!ObReplicaTypeCheck::is_replica_with_ssstore(REPLICA_TYPE_FULL) && ObReplicaTypeCheck::is_full_replica(ctx_->arg_.dst_.get_replica_type())) { need_wait = true; } diff --git a/src/storage/high_availability/ob_ls_migration.cpp b/src/storage/high_availability/ob_ls_migration.cpp index 7048798f67..801c8ada61 100644 --- a/src/storage/high_availability/ob_ls_migration.cpp +++ b/src/storage/high_availability/ob_ls_migration.cpp @@ -1004,12 +1004,11 @@ int ObStartMigrationTask::try_remove_member_list_() } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("log stream should not be NULL", KR(ret), K(*ctx_), KP(ls)); - } else if (ls->get_replica_type() != src_type - || self_addr != ctx_->arg_.src_.get_server() + // TODO: muwei make sure this is right + } else if (self_addr != ctx_->arg_.src_.get_server() || self_addr != ctx_->arg_.dst_.get_server()) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("replica type do not match", K(ret), K(self_addr), K(src_type), K(dest_type), - "local log stream type", ls->get_replica_type()); + LOG_WARN("replica type do not match", K(ret), K(self_addr), K(src_type), K(dest_type)); } else if (src_type == dest_type) { ret = OB_ALREADY_DONE; LOG_WARN("src type and dest type is same, no need change", K(ret), K(src_type), K(dest_type)); @@ -1083,7 +1082,8 @@ int ObStartMigrationTask::deal_with_local_ls_() ctx_->local_clog_checkpoint_scn_ = local_ls_meta.get_clog_checkpoint_scn(); ctx_->local_rebuild_seq_ = local_ls_meta.get_rebuild_seq(); common::ObReplicaType replica_type = ctx_->arg_.dst_.get_replica_type(); - common::ObReplicaType local_replica_type = ls->get_replica_type(); + // TODO: muwei make sure this is right + common::ObReplicaType local_replica_type = REPLICA_TYPE_FULL; if (local_replica_type != replica_type && !ObReplicaTypeCheck::change_replica_op_allow(local_replica_type, replica_type)) { ret = OB_OP_NOT_ALLOW; @@ -1253,6 +1253,8 @@ int ObStartMigrationTask::update_ls_() if (OB_FAIL(ls->update_ls_meta(update_restore_status, ctx_->src_ls_meta_package_.ls_meta_))) { LOG_WARN("failed to update ls meta", K(ret), KPC(ctx_)); + } else if (OB_FAIL(ls->set_dup_table_ls_meta(ctx_->src_ls_meta_package_.dup_ls_meta_))) { + LOG_WARN("failed to set dup table ls meta", K(ret), KPC(ctx_)); } else if (OB_FAIL(ls->get_end_lsn(end_lsn))) { LOG_WARN("failed to get end lsn", K(ret), KPC(ctx_)); } else if (end_lsn >= ctx_->src_ls_meta_package_.palf_meta_.curr_lsn_) { @@ -1410,7 +1412,8 @@ int ObStartMigrationTask::check_ls_need_copy_data_(bool &need_copy) } else if (OB_FAIL(ObStorageHADagUtils::get_ls(ctx_->arg_.ls_id_, ls_handle))) { LOG_WARN("failed to get ls", K(ret), KPC(ctx_)); } else if (ObMigrationOpType::CHANGE_LS_OP == ctx_->arg_.type_ && - ObReplicaTypeCheck::is_readable_replica(ls_handle.get_ls()->get_replica_type())) { + // TODO: muwei make sure this is right + ObReplicaTypeCheck::is_readable_replica(REPLICA_TYPE_FULL)) { //no need generate copy task, only change member need_copy = false; LOG_INFO("no need change replica no need copy task", "src_type", ctx_->arg_.src_.get_replica_type(), diff --git a/src/storage/high_availability/ob_ls_migration_handler.cpp b/src/storage/high_availability/ob_ls_migration_handler.cpp index a842392aa3..a87a74322d 100644 --- a/src/storage/high_availability/ob_ls_migration_handler.cpp +++ b/src/storage/high_availability/ob_ls_migration_handler.cpp @@ -1120,8 +1120,9 @@ int ObLSMigrationHandler::build_rebuild_task_() } else { ObTaskId task_id; task_id.init(GCONF.self_addr_); - ObReplicaMember dst_replica_member(GCONF.self_addr_, timestamp, ls_->get_replica_type()); - ObReplicaMember src_replica_member(leader_addr, timestamp, ls_->get_replica_type()); + // TODO: muwei make sure this is right + ObReplicaMember dst_replica_member(GCONF.self_addr_, timestamp, REPLICA_TYPE_FULL); + ObReplicaMember src_replica_member(leader_addr, timestamp, REPLICA_TYPE_FULL); ObMigrationOpArg arg; arg.cluster_id_ = GCONF.cluster_id; arg.data_src_ = src_replica_member; diff --git a/src/storage/high_availability/ob_ls_remove_member_dag.cpp b/src/storage/high_availability/ob_ls_remove_member_dag.cpp index fb8bc44a65..712aabb329 100644 --- a/src/storage/high_availability/ob_ls_remove_member_dag.cpp +++ b/src/storage/high_availability/ob_ls_remove_member_dag.cpp @@ -236,8 +236,8 @@ int ObLSRemoveMemberTask::process() LOG_WARN("ls remove member task do not init", K(ret)); } else { - if (OB_FAIL(remove_member_())) { - LOG_WARN("failed to remove member", K(ret), KPC(ctx_)); + if (OB_FAIL(do_change_member_())) { + LOG_WARN("failed to change member", K(ret), KPC(ctx_)); } if (OB_SUCCESS != (tmp_ret = report_to_rs_())) { @@ -252,14 +252,13 @@ int ObLSRemoveMemberTask::process() return ret; } -int ObLSRemoveMemberTask::remove_member_() +int ObLSRemoveMemberTask::do_change_member_() { int ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS; ObLSHandle ls_handle; ObLS *ls = nullptr; ObLSService *ls_service = nullptr; - const int64_t change_member_list_timeout_us = GCONF.sys_bkgd_migration_change_member_list_timeout; if (!is_inited_) { ret = OB_NOT_INIT; @@ -273,18 +272,30 @@ int ObLSRemoveMemberTask::remove_member_() } else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("log stream should not be NULL", KR(ret), K(*ctx_), KP(ls)); - } else if (ctx_->arg_.member_list_.is_valid()) { - if (OB_FAIL(ls->change_replica_num(ctx_->arg_.member_list_, ctx_->arg_.orig_paxos_replica_number_, - ctx_->arg_.new_paxos_replica_number_, change_member_list_timeout_us))) { - LOG_WARN("failed to modify paxos replica number", KR(ret), KPC(ctx_)); - } - } else if (ctx_->arg_.is_paxos_member_) { - if (OB_FAIL(ls->remove_member(ctx_->arg_.remove_member_, ctx_->arg_.new_paxos_replica_number_, change_member_list_timeout_us))) { - LOG_WARN("failed to remove paxos member", K(ret), KPC(ctx_)); - } } else { - if (OB_FAIL(ls->remove_learner(ctx_->arg_.remove_member_, change_member_list_timeout_us))) { - LOG_WARN("failed to remove learner member", K(ret), KPC(ctx_)); + switch (ctx_->arg_.type_) { + case ObLSChangeMemberType::LS_REMOVE_MEMBER: { + if (OB_FAIL(remove_member_(ls))) { + LOG_WARN("failed to do remove member", K(ret), KPC(ctx_)); + } + break; + } + case ObLSChangeMemberType::LS_MODIFY_REPLICA_NUMBER : { + if (OB_FAIL(modify_member_number_(ls))) { + LOG_WARN("failed to modify member number", K(ret), KPC(ctx_)); + } + break; + } + case ObLSChangeMemberType::LS_TRANSFORM_MEMBER : { + if (OB_FAIL(transform_member_(ls))) { + LOG_WARN("failed to transform member", K(ret), KPC(ctx_)); + } + break; + } + default: { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("invalid type", K(ret), KPC(ctx_)); + } } } @@ -301,6 +312,71 @@ int ObLSRemoveMemberTask::remove_member_() return ret; } +int ObLSRemoveMemberTask::remove_member_(ObLS *ls) +{ + int ret = OB_SUCCESS; + const int64_t change_member_list_timeout_us = GCONF.sys_bkgd_migration_change_member_list_timeout; + if (!ctx_->arg_.type_.is_remove_member()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("remove member get invalid argument", K(ret), KPC(ctx_)); + } else { + if (ctx_->arg_.is_paxos_member_) { + if (OB_FAIL(ls->remove_member(ctx_->arg_.remove_member_, ctx_->arg_.new_paxos_replica_number_, change_member_list_timeout_us))) { + LOG_WARN("failed to remove paxos member", K(ret), KPC(ctx_)); + } + } else { + if (OB_FAIL(ls->remove_learner(ctx_->arg_.remove_member_, change_member_list_timeout_us))) { + LOG_WARN("failed to remove learner member", K(ret), KPC(ctx_)); + } + } + } + return ret; +} + +int ObLSRemoveMemberTask::modify_member_number_(ObLS *ls) +{ + int ret = OB_SUCCESS; + const int64_t change_member_list_timeout_us = GCONF.sys_bkgd_migration_change_member_list_timeout; + if (!ctx_->arg_.type_.is_modify_replica_number()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("modify member number get invalid argument", K(ret), KPC(ctx_)); + } else if (OB_FAIL(ls->change_replica_num(ctx_->arg_.member_list_, ctx_->arg_.orig_paxos_replica_number_, + ctx_->arg_.new_paxos_replica_number_, change_member_list_timeout_us))) { + LOG_WARN("failed to modify paxos replica number", KR(ret), KPC(ctx_)); + } + return ret; +} + +int ObLSRemoveMemberTask::transform_member_(ObLS *ls) +{ + int ret = OB_SUCCESS; + const int64_t change_member_list_timeout_us = GCONF.sys_bkgd_migration_change_member_list_timeout; + const ObReplicaType &src_type = ctx_->arg_.src_.get_replica_type(); + const ObReplicaType &dest_type = ctx_->arg_.dest_.get_replica_type(); + + if (!ctx_->arg_.type_.is_transform_member()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("transform member get invalid argument", K(ret), KPC(ctx_)); + } else { + if (ObReplicaTypeCheck::is_full_replica(src_type) && ObReplicaTypeCheck::is_readonly_replica(dest_type)) { + //F -> R + if (OB_FAIL(ls->switch_acceptor_to_learner(ctx_->arg_.src_, ctx_->arg_.new_paxos_replica_number_, change_member_list_timeout_us))) { + LOG_WARN("failed to switch acceptor to learner", KR(ret), KPC(ctx_)); + } + } else if (ObReplicaTypeCheck::is_readonly_replica(src_type) && ObReplicaTypeCheck::is_full_replica(dest_type)) { + //R -> F + //TODO(muwei.ym) need consider add F to member list with TRANSFER + if (OB_FAIL(ls->switch_learner_to_acceptor(ctx_->arg_.src_, ctx_->arg_.new_paxos_replica_number_, change_member_list_timeout_us))) { + LOG_WARN("failed to switch learner to acceptor", KR(ret), KPC(ctx_)); + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("replica type is unexpected", K(ret), KPC(ctx_)); + } + } + return ret; +} + int ObLSRemoveMemberTask::report_to_rs_() { int ret = OB_SUCCESS; diff --git a/src/storage/high_availability/ob_ls_remove_member_dag.h b/src/storage/high_availability/ob_ls_remove_member_dag.h index e537d2aafc..60691ae9bb 100644 --- a/src/storage/high_availability/ob_ls_remove_member_dag.h +++ b/src/storage/high_availability/ob_ls_remove_member_dag.h @@ -92,7 +92,11 @@ public: virtual int process() override; VIRTUAL_TO_STRING_KV(K("ObLSRemoveMemberTask"), KP(this), KPC(ctx_)); private: - int remove_member_(); + int do_change_member_(); + int remove_member_(ObLS *ls); + int modify_member_number_(ObLS *ls); + int transform_member_(ObLS *ls); + int report_to_rs_(); private: bool is_inited_; diff --git a/src/storage/high_availability/ob_ls_remove_member_handler.cpp b/src/storage/high_availability/ob_ls_remove_member_handler.cpp index 5f441ac99c..a102abac1b 100644 --- a/src/storage/high_availability/ob_ls_remove_member_handler.cpp +++ b/src/storage/high_availability/ob_ls_remove_member_handler.cpp @@ -19,15 +19,65 @@ using namespace oceanbase; using namespace share; using namespace storage; +ObLSChangeMemberType::ObLSChangeMemberType(const TYPE &type) + : type_(type) +{ +} + +const char *ObLSChangeMemberType::get_type_str(const ObLSChangeMemberType &type) +{ + const char *str = "UNKNOWN"; + const char *type_str[] = { + "LS_REMOVE_MEMBER", + "LS_MODIFY_REPLICA_NUMBER", + "LS_TRANSFORM_MEMBER", + }; + STATIC_ASSERT(MAX == ARRAYSIZEOF(type_str), "type count mismatch"); + if (type.type_ < 0 || type.type_ >= MAX) { + LOG_ERROR_RET(OB_ERR_UNEXPECTED, "invalid type", K(type)); + } else { + str = type_str[type.type_]; + } + return str; + +} + +int ObLSChangeMemberType::set_type(int32_t type) +{ + int ret = OB_SUCCESS; + if (0 > type || MAX <= type) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid type", K(ret), K(type)); + } else { + type_ = static_cast(type); + } + return ret; +} + +ObLSChangeMemberType &ObLSChangeMemberType::operator=(const TYPE &type) +{ + type_ = type; + return *this; +} + +void ObLSChangeMemberType::reset() +{ + type_ = MAX; +} + + ObLSRemoveMemberArg::ObLSRemoveMemberArg() : task_id_(), tenant_id_(OB_INVALID_ID), ls_id_(), + type_(), remove_member_(), orig_paxos_replica_number_(0), new_paxos_replica_number_(0), is_paxos_member_(false), - member_list_() + member_list_(), + src_(), + dest_() { } @@ -36,11 +86,14 @@ void ObLSRemoveMemberArg::reset() task_id_.reset(); tenant_id_ = OB_INVALID_ID; ls_id_.reset(); + type_.reset(); remove_member_.reset(); orig_paxos_replica_number_ = 0; new_paxos_replica_number_ = 0; is_paxos_member_ = false; member_list_.reset(); + src_.reset(); + dest_.reset(); } bool ObLSRemoveMemberArg::is_valid() const @@ -49,7 +102,20 @@ bool ObLSRemoveMemberArg::is_valid() const bool_ret = !task_id_.is_invalid() && OB_INVALID_ID != tenant_id_ && ls_id_.is_valid() - && (remove_member_.is_valid() || member_list_.is_valid()); + && type_.is_valid(); + + if (bool_ret) { + if (type_.is_remove_member()) { + bool_ret = remove_member_.is_valid(); + } else if (type_.is_modify_replica_number()) { + bool_ret = member_list_.is_valid(); + } else if (type_.is_transform_member()) { + bool_ret = src_.is_valid() && dest_.is_valid(); + } else { + bool_ret = false; + } + } + if (bool_ret && is_paxos_member_) { bool_ret = orig_paxos_replica_number_ > 0 && new_paxos_replica_number_ > 0; } @@ -113,6 +179,7 @@ int ObLSRemoveMemberHandler::remove_paxos_member( remove_member_arg.new_paxos_replica_number_ = arg.new_paxos_replica_number_; remove_member_arg.orig_paxos_replica_number_ = arg.orig_paxos_replica_number_; remove_member_arg.is_paxos_member_ = true; + remove_member_arg.type_ = ObLSChangeMemberType::LS_REMOVE_MEMBER; if (OB_FAIL(generate_remove_member_dag_(remove_member_arg))) { LOG_WARN("failed to generate remove member dag", K(ret), K(arg), K(remove_member_arg)); @@ -138,6 +205,7 @@ int ObLSRemoveMemberHandler::remove_learner_member(const obrpc::ObLSDropNonPaxos remove_member_arg.task_id_ = arg.task_id_; remove_member_arg.remove_member_ = arg.remove_member_; remove_member_arg.is_paxos_member_ = false; + remove_member_arg.type_ = ObLSChangeMemberType::LS_REMOVE_MEMBER; if (OB_FAIL(generate_remove_member_dag_(remove_member_arg))) { LOG_WARN("failed to generate remove member dag", K(ret), K(arg), K(remove_member_arg)); @@ -164,6 +232,41 @@ int ObLSRemoveMemberHandler::modify_paxos_replica_number(const obrpc::ObLSModify remove_member_arg.new_paxos_replica_number_ = arg.new_paxos_replica_number_; remove_member_arg.orig_paxos_replica_number_ = arg.orig_paxos_replica_number_; remove_member_arg.member_list_ = arg.member_list_; + remove_member_arg.is_paxos_member_ = true; + remove_member_arg.type_ = ObLSChangeMemberType::LS_MODIFY_REPLICA_NUMBER; + + if (OB_FAIL(generate_remove_member_dag_(remove_member_arg))) { + LOG_WARN("failed to generate remove member dag", KR(ret), K(arg), K(remove_member_arg)); + } + } + return ret; +} + +int ObLSRemoveMemberHandler::transform_member(const obrpc::ObLSChangeReplicaArg &arg) +{ + int ret = OB_SUCCESS; + ObLSRemoveMemberArg remove_member_arg; + + if (!is_inited_) { + ret = OB_NOT_INIT; + LOG_WARN("ls remove member handler do not init", KR(ret), K(arg)); + } else if (!arg.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("transform member get invalid argument", KR(ret), K(arg)); + } else if (arg.src_.get_replica_type() == arg.dst_.get_replica_type() + || !ObReplicaTypeCheck::change_replica_op_allow(arg.src_.get_replica_type(), arg.dst_.get_replica_type())) { + ret = OB_OP_NOT_ALLOW; + LOG_WARN("change replica op not allow", K(ret), K(arg)); + } else { + remove_member_arg.tenant_id_ = arg.tenant_id_; + remove_member_arg.ls_id_ = arg.ls_id_; + remove_member_arg.task_id_ = arg.task_id_; + remove_member_arg.new_paxos_replica_number_ = arg.new_paxos_replica_number_; + remove_member_arg.orig_paxos_replica_number_ = arg.orig_paxos_replica_number_; + remove_member_arg.src_ = arg.src_; + remove_member_arg.dest_ = arg.dst_; + remove_member_arg.is_paxos_member_ = true; + remove_member_arg.type_ = ObLSChangeMemberType::LS_TRANSFORM_MEMBER; if (OB_FAIL(generate_remove_member_dag_(remove_member_arg))) { LOG_WARN("failed to generate remove member dag", KR(ret), K(arg), K(remove_member_arg)); @@ -228,6 +331,7 @@ int ObLSRemoveMemberHandler::check_task_exist( mock_remove_member_arg.ls_id_ = ls_->get_ls_id(); mock_remove_member_arg.task_id_ = task_id; mock_remove_member_arg.is_paxos_member_ = false; + mock_remove_member_arg.type_ = ObLSChangeMemberType::LS_REMOVE_MEMBER; param.arg_ = mock_remove_member_arg; if (OB_FAIL(mock_remove_member_arg.remove_member_.set_member(mock_member))) { diff --git a/src/storage/high_availability/ob_ls_remove_member_handler.h b/src/storage/high_availability/ob_ls_remove_member_handler.h index 006a59bc9b..2770a646db 100644 --- a/src/storage/high_availability/ob_ls_remove_member_handler.h +++ b/src/storage/high_availability/ob_ls_remove_member_handler.h @@ -24,6 +24,40 @@ namespace oceanbase namespace storage { +class ObLSChangeMemberType final +{ +public: + enum TYPE : uint8_t + { + LS_REMOVE_MEMBER = 0, + LS_MODIFY_REPLICA_NUMBER = 1, + LS_TRANSFORM_MEMBER = 2, + MAX, + }; + +public: + ObLSChangeMemberType() : type_(MAX) {} + ~ObLSChangeMemberType() = default; + explicit ObLSChangeMemberType(const TYPE &type); + ObLSChangeMemberType &operator=(const TYPE &type); + bool operator ==(const ObLSChangeMemberType &other) const { return type_ == other.type_; } + bool operator !=(const ObLSChangeMemberType &other) const { return type_ != other.type_; } + operator TYPE() const { return type_; } + static const char *get_type_str(const ObLSChangeMemberType &type); + bool is_valid() const { return type_ >= TYPE::LS_REMOVE_MEMBER && type_ < TYPE::MAX; } + bool is_remove_member() const { return TYPE::LS_REMOVE_MEMBER == type_; } + bool is_modify_replica_number() const { return TYPE::LS_MODIFY_REPLICA_NUMBER == type_; } + bool is_transform_member() const { return TYPE::LS_TRANSFORM_MEMBER == type_; } + TYPE get_type() const { return type_; } + int set_type(int32_t type); + void reset(); + + TO_STRING_KV(K_(type)); + +private: + TYPE type_; +}; + struct ObLSRemoveMemberArg final { ObLSRemoveMemberArg(); @@ -35,20 +69,26 @@ struct ObLSRemoveMemberArg final K_(task_id), K_(tenant_id), K_(ls_id), + K_(type), K_(remove_member), K_(orig_paxos_replica_number), K_(new_paxos_replica_number), K_(is_paxos_member), - K_(member_list)); + K_(member_list), + K_(src), + K_(dest)); share::ObTaskId task_id_; uint64_t tenant_id_; share::ObLSID ls_id_; + ObLSChangeMemberType type_; common::ObReplicaMember remove_member_; int64_t orig_paxos_replica_number_; int64_t new_paxos_replica_number_; bool is_paxos_member_; common::ObMemberList member_list_; + common::ObReplicaMember src_; + common::ObReplicaMember dest_; }; class ObLSRemoveMemberHandler @@ -63,6 +103,7 @@ public: int remove_paxos_member(const obrpc::ObLSDropPaxosReplicaArg &arg); int remove_learner_member(const obrpc::ObLSDropNonPaxosReplicaArg &arg); int modify_paxos_replica_number(const obrpc::ObLSModifyPaxosReplicaNumberArg &arg); + int transform_member(const obrpc::ObLSChangeReplicaArg &arg); int check_task_exist(const share::ObTaskId &task_id, bool &is_exist); void destroy(); private: diff --git a/src/storage/high_availability/ob_ls_restore.cpp b/src/storage/high_availability/ob_ls_restore.cpp index 00826d08fa..2b7d14d28c 100644 --- a/src/storage/high_availability/ob_ls_restore.cpp +++ b/src/storage/high_availability/ob_ls_restore.cpp @@ -1083,6 +1083,8 @@ int ObStartLSRestoreTask::update_ls_meta_() } else if (OB_FAIL(ls->update_ls_meta(false/*don't update restore status*/, ctx_->src_ls_meta_package_.ls_meta_))) { LOG_WARN("fail to update ls meta", K(ret), KPC(ls), KPC(ctx_)); + } else if (OB_FAIL(ls->set_dup_table_ls_meta(ctx_->src_ls_meta_package_.dup_ls_meta_))) { + LOG_WARN("fail to set dup table ls meta", K(ret), KPC(ctx_)); } else { LOG_INFO("update ls meta succeed", KPC(ls), KPC(ctx_)); } diff --git a/src/storage/high_availability/ob_storage_ha_src_provider.cpp b/src/storage/high_availability/ob_storage_ha_src_provider.cpp index 2b48154114..d0d078b173 100644 --- a/src/storage/high_availability/ob_storage_ha_src_provider.cpp +++ b/src/storage/high_availability/ob_storage_ha_src_provider.cpp @@ -204,7 +204,8 @@ int ObStorageHASrcProvider::inner_choose_ob_src_(const uint64_t tenant_id, const } else { LOG_WARN("failed to check version", K(ret), K(tenant_id), K(ls_id), K(ls_info)); } - } else if (!ObReplicaTypeCheck::is_full_replica(ls_info.ls_meta_package_.ls_meta_.replica_type_)) { + // TODO: muwei make sure this is right + } else if (!ObReplicaTypeCheck::is_full_replica(REPLICA_TYPE_FULL)) { LOG_INFO("do not choose this src", K(tenant_id), K(ls_id), K(addr), K(ls_info)); } else if (local_clog_checkpoint_scn > ls_info.ls_meta_package_.ls_meta_.get_clog_checkpoint_scn()) { LOG_INFO("do not choose this src", K(tenant_id), K(ls_id), K(addr), K(local_clog_checkpoint_scn), K(ls_info)); diff --git a/src/storage/high_availability/ob_tablet_group_restore.cpp b/src/storage/high_availability/ob_tablet_group_restore.cpp index 03c0c83229..263b3f2d63 100644 --- a/src/storage/high_availability/ob_tablet_group_restore.cpp +++ b/src/storage/high_availability/ob_tablet_group_restore.cpp @@ -1858,7 +1858,8 @@ int ObTabletRestoreDag::init( tablet_restore_ctx_.is_leader_ = param.is_leader_; tablet_restore_ctx_.meta_index_store_ = param.meta_index_store_; tablet_restore_ctx_.second_meta_index_store_ = param.second_meta_index_store_; - tablet_restore_ctx_.replica_type_ = ls->get_replica_type(); + // TODO: yanfeng make sure this is right + tablet_restore_ctx_.replica_type_ = REPLICA_TYPE_FULL; tablet_restore_ctx_.ha_table_info_mgr_ = param.ha_table_info_mgr_; tablet_restore_ctx_.need_check_seq_ = param.need_check_seq_; tablet_restore_ctx_.ls_rebuild_seq_ = param.ls_rebuild_seq_; diff --git a/src/storage/ls/ob_ls.cpp b/src/storage/ls/ob_ls.cpp index 68e00d0d54..54e23cb77b 100644 --- a/src/storage/ls/ob_ls.cpp +++ b/src/storage/ls/ob_ls.cpp @@ -85,7 +85,6 @@ ObLS::~ObLS() int ObLS::init(const share::ObLSID &ls_id, const uint64_t tenant_id, - const ObReplicaType replica_type, const ObMigrationStatus &migration_status, const ObLSRestoreStatus &restore_status, const SCN &create_scn, @@ -99,11 +98,10 @@ int ObLS::init(const share::ObLSID &ls_id, if (!ls_id.is_valid() || !is_valid_tenant_id(tenant_id) || - !common::ObReplicaTypeCheck::is_replica_type_valid(replica_type) || !ObMigrationStatusHelper::is_valid(migration_status) || OB_ISNULL(reporter)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ls_id), K(tenant_id), K(replica_type), K(migration_status), + LOG_WARN("invalid argument", K(ret), K(ls_id), K(tenant_id), K(migration_status), KP(reporter)); } else if (IS_INIT) { ret = OB_INIT_TWICE; @@ -114,15 +112,14 @@ int ObLS::init(const share::ObLSID &ls_id, } else if (FALSE_IT(tenant_id_ = tenant_id)) { } else if (OB_FAIL(ls_meta_.init(tenant_id, ls_id, - replica_type, migration_status, restore_status, create_scn))) { - LOG_WARN("failed to init ls meta", K(ret), K(tenant_id), K(ls_id), K(replica_type)); + LOG_WARN("failed to init ls meta", K(ret), K(tenant_id), K(ls_id)); } else { rs_reporter_ = reporter; ls_freezer_.init(this); - transaction::ObTxPalfParam tx_palf_param(get_log_handler()); + transaction::ObTxPalfParam tx_palf_param(get_log_handler(), &dup_table_ls_handler_); // tx_table_.init() should after ls_table_svr.init() if (OB_FAIL(txs_svr->create_ls(ls_id, *this, &tx_palf_param, nullptr))) { @@ -216,6 +213,10 @@ int ObLS::init(const share::ObLSID &ls_id, REGISTER_TO_RESTORESERVICE(logservice::NET_STANDBY_TNT_SERVICE_LOG_BASE_TYPE, MTL(rootserver::ObCreateStandbyFromNetActor *)); } + if (OB_SUCC(ret) && OB_FAIL(ls_init_for_dup_table_())) { + LOG_WARN("pre init for dup_table_ls_handler_ failed", K(ret), K(get_ls_id())); + } + if (OB_SUCC(ret) && !is_user_tenant(tenant_id) && ls_id.is_sys_ls()) { //sys and meta tenant REGISTER_TO_LOGSERVICE(logservice::RESTORE_SERVICE_LOG_BASE_TYPE, MTL(rootserver::ObRestoreService *)); @@ -247,6 +248,22 @@ int ObLS::init(const share::ObLSID &ls_id, return ret; } +int ObLS::ls_init_for_dup_table_() +{ + int ret = OB_SUCCESS; + REGISTER_TO_LOGSERVICE(logservice::DUP_TABLE_LOG_BASE_TYPE, &dup_table_ls_handler_); + dup_table_ls_handler_.default_init(get_ls_id(), get_log_handler()); + return ret; +} + +int ObLS::ls_destory_for_dup_table_() +{ + int ret = OB_SUCCESS; + UNREGISTER_FROM_LOGSERVICE(logservice::DUP_TABLE_LOG_BASE_TYPE, &dup_table_ls_handler_); + dup_table_ls_handler_.destroy(); + return ret; +} + int ObLS::create_ls_inner_tablet(const lib::Worker::CompatMode compat_mode, const SCN &create_scn) { @@ -275,6 +292,7 @@ int ObLS::load_ls_inner_tablet() int ObLS::create_ls(const share::ObTenantRole tenant_role, const palf::PalfBaseInfo &palf_base_info, + const ObReplicaType &replica_type, const bool allow_log_sync) { int ret = OB_SUCCESS; @@ -293,7 +311,7 @@ int ObLS::create_ls(const share::ObTenantRole tenant_role, ret = OB_ERR_UNEXPECTED; LOG_WARN("palf should not exist now", K(ret), K_(ls_meta)); } else if (OB_FAIL(logservice->create_ls(ls_meta_.ls_id_, - ls_meta_.replica_type_, + replica_type, tenant_role, palf_base_info, allow_log_sync, @@ -340,7 +358,6 @@ int ObLS::load_ls(const share::ObTenantRole &tenant_role, } else if (!is_palf_exist) { LOG_WARN("there is no ls at disk, skip load", K_(ls_meta)); } else if (OB_FAIL(logservice->add_ls(ls_meta_.ls_id_, - ls_meta_.replica_type_, log_handler_, restore_handler_))) { LOG_WARN("add ls failed", K(ret), K_(ls_meta)); @@ -661,6 +678,9 @@ void ObLS::destroy() rootserver::ObCreateStandbyFromNetActor* net_standby_tnt_service = MTL(rootserver::ObCreateStandbyFromNetActor*); UNREGISTER_FROM_RESTORESERVICE(logservice::NET_STANDBY_TNT_SERVICE_LOG_BASE_TYPE, net_standby_tnt_service); } + + (void)ls_destory_for_dup_table_(); + if (OB_SUCC(ret) && !is_user_tenant(MTL_ID()) && ls_meta_.ls_id_.is_sys_ls()) { rootserver::ObRestoreService * restore_service = MTL(rootserver::ObRestoreService*); UNREGISTER_FROM_LOGSERVICE(logservice::RESTORE_SERVICE_LOG_BASE_TYPE, restore_service); @@ -932,6 +952,10 @@ int ObLS::get_ls_meta_package(const bool check_archive, ObLSMetaPackage &meta_pa LOG_WARN("get palf base info failed", K(ret), K(id), K(curr_lsn), K(archive_force), K(archive_ignore), K(archive_lsn), K_(ls_meta)); } + + if (OB_SUCC(ret) && OB_FAIL(dup_table_ls_handler_.get_dup_table_ls_meta(meta_package.dup_ls_meta_))) { + LOG_WARN("get dup table ls meta failed", K(ret), K(id), K(meta_package.dup_ls_meta_)); + } } return ret; } @@ -1063,7 +1087,7 @@ int ObLS::get_ls_info(ObLSVTInfo &ls_info) LOG_WARN("get ls migrate status failed", K(ret), KPC(this)); } else { ls_info.ls_id_ = ls_meta_.ls_id_; - ls_info.replica_type_ = ls_meta_.replica_type_; + ls_info.replica_type_ = ls_meta_.get_replica_type(); ls_info.ls_state_ = role; ls_info.migrate_status_ = migrate_status; ls_info.tablet_count_ = ls_tablet_svr_.get_tablet_count(); diff --git a/src/storage/ls/ob_ls.h b/src/storage/ls/ob_ls.h index ec5d0bed1b..c83fa6cc45 100644 --- a/src/storage/ls/ob_ls.h +++ b/src/storage/ls/ob_ls.h @@ -38,6 +38,7 @@ #include "storage/checkpoint/ob_data_checkpoint.h" #include "storage/tx_table/ob_tx_table.h" #include "storage/tx/ob_keep_alive_ls_handler.h" +#include "storage/tx/ob_dup_table_util.h" #include "storage/restore/ob_ls_restore_handler.h" #include "logservice/applyservice/ob_log_apply_service.h" #include "logservice/replayservice/ob_replay_handler.h" @@ -160,7 +161,6 @@ public: virtual ~ObLS(); int init(const share::ObLSID &ls_id, const uint64_t tenant_id, - const ObReplicaType replica_type, const ObMigrationStatus &migration_status, const share::ObLSRestoreStatus &restore_status, const share::SCN &create_scn, @@ -185,7 +185,6 @@ public: ObLSTabletService *get_tablet_svr() { return &ls_tablet_svr_; } share::ObLSID get_ls_id() const { return ls_meta_.ls_id_; } bool is_sys_ls() const { return ls_meta_.ls_id_.is_sys_ls(); } - ObReplicaType get_replica_type() const { return ls_meta_.replica_type_; } int get_replica_status(ObReplicaStatus &replica_status); uint64_t get_tenant_id() const { return ls_meta_.tenant_id_; } ObFreezer *get_freezer() { return &ls_freezer_; } @@ -194,8 +193,8 @@ public: checkpoint::ObDataCheckpoint *get_data_checkpoint() { return &data_checkpoint_; } transaction::ObKeepAliveLSHandler *get_keep_alive_ls_handler() { return &keep_alive_ls_handler_; } ObLSRestoreHandler *get_ls_restore_handler() { return &ls_restore_handler_; } + transaction::ObDupTableLSHandler *get_dup_table_ls_handler() { return &dup_table_ls_handler_; } ObLSDDLLogHandler *get_ddl_log_handler() { return &ls_ddl_log_handler_; } - // ObObLogHandler interface: // get the log_service pointer logservice::ObLogHandler *get_log_handler() { return &log_handler_; } @@ -238,6 +237,7 @@ public: // after migrating as learner int create_ls(const share::ObTenantRole tenant_role, const palf::PalfBaseInfo &palf_base_info, + const common::ObReplicaType &replica_type, const bool allow_log_sync); // load ls info from disk // @param[in] tenant_role, role of tenant, which determains palf access mode @@ -299,6 +299,8 @@ public: TO_STRING_KV(K_(ls_meta), K_(log_handler), K_(restore_handler), K_(is_inited), K_(tablet_gc_handler)); private: + int ls_init_for_dup_table_(); + int ls_destory_for_dup_table_(); int stop_(); void wait_(); int prepare_for_safe_destroy_(); @@ -496,6 +498,11 @@ public: // @param [out] quorum, the quorum of member_list // int get_paxos_member_list(common::ObMemberList &member_list, int64_t &quorum) const; CONST_DELEGATE_WITH_RET(log_handler_, get_paxos_member_list, int); + // get paxos member list and learner list of log_service + // @param [out] member_list, the member_list of current log_service + // @param [out] quorum, the quorum of member_list + // @param [out] learner_list, the learner list of log_service + CONST_DELEGATE_WITH_RET(log_handler_, get_paxos_member_list_and_learner_list, int); // advance the base_lsn of log_handler. // @param[in] palf_base_info, the palf meta used to advance base lsn. // int advance_base_info(const palf::PalfBaseInfo &palf_base_info); @@ -547,14 +554,17 @@ public: DELEGATE_WITH_RET(log_handler_, disable_vote, int); DELEGATE_WITH_RET(log_handler_, add_member, int); DELEGATE_WITH_RET(log_handler_, remove_member, int); + DELEGATE_WITH_RET(log_handler_, add_learner, int); DELEGATE_WITH_RET(log_handler_, remove_learner, int); + DELEGATE_WITH_RET(log_handler_, replace_learner, int); DELEGATE_WITH_RET(log_handler_, replace_member, int); DELEGATE_WITH_RET(log_handler_, is_in_sync, int); DELEGATE_WITH_RET(log_handler_, get_end_scn, int); DELEGATE_WITH_RET(log_handler_, disable_sync, int); DELEGATE_WITH_RET(log_handler_, change_replica_num, int); DELEGATE_WITH_RET(log_handler_, get_end_lsn, int); - + DELEGATE_WITH_RET(log_handler_, switch_acceptor_to_learner, int); + DELEGATE_WITH_RET(log_handler_, switch_learner_to_acceptor, int); // Create a TxCtx whose tx_id is specified // @param [in] tx_id: transaction ID @@ -632,6 +642,10 @@ public: // int iterate_tx_obj_lock_op(ObLockOpIterator &iter) const; CONST_DELEGATE_WITH_RET(ls_tx_svr_, iterate_tx_obj_lock_op, int); + //dup table ls meta interface + CONST_DELEGATE_WITH_RET(dup_table_ls_handler_, get_dup_table_ls_meta, int); + DELEGATE_WITH_RET(dup_table_ls_handler_, set_dup_table_ls_meta, int); + // ObReplayHandler interface: DELEGATE_WITH_RET(replay_handler_, replay, int); @@ -743,6 +757,10 @@ private: ObLSDDLLogHandler ls_ddl_log_handler_; // interface for submit keep alive log transaction::ObKeepAliveLSHandler keep_alive_ls_handler_; + + // dup_table ls interface ,alloc memory when discover a dup_table_tablet + transaction::ObDupTableLSHandler dup_table_ls_handler_; + ObLSWRSHandler ls_wrs_handler_; //for migration ObLSMigrationHandler ls_migration_handler_; diff --git a/src/storage/ls/ob_ls_meta.cpp b/src/storage/ls/ob_ls_meta.cpp index 75156984b5..2d55306181 100644 --- a/src/storage/ls/ob_ls_meta.cpp +++ b/src/storage/ls/ob_ls_meta.cpp @@ -48,7 +48,7 @@ ObLSMeta::ObLSMeta() : lock_(common::ObLatchIds::LS_META_LOCK), tenant_id_(OB_INVALID_TENANT_ID), ls_id_(), - replica_type_(REPLICA_TYPE_MAX), + unused_replica_type_(REPLICA_TYPE_FULL), ls_create_status_(ObInnerLSStatus::CREATING), clog_checkpoint_scn_(ObScnRange::MIN_SCN), clog_base_lsn_(PALF_INITIAL_LSN_VAL), @@ -68,7 +68,7 @@ ObLSMeta::ObLSMeta(const ObLSMeta &ls_meta) : lock_(), tenant_id_(ls_meta.tenant_id_), ls_id_(ls_meta.ls_id_), - replica_type_(ls_meta.replica_type_), + unused_replica_type_(ls_meta.unused_replica_type_), ls_create_status_(ls_meta.ls_create_status_), clog_checkpoint_scn_(ls_meta.clog_checkpoint_scn_), clog_base_lsn_(ls_meta.clog_base_lsn_), @@ -102,7 +102,7 @@ ObLSMeta &ObLSMeta::operator=(const ObLSMeta &other) if (this != &other) { tenant_id_ = other.tenant_id_; ls_id_ = other.ls_id_; - replica_type_ = other.replica_type_; + unused_replica_type_ = other.unused_replica_type_; ls_create_status_ = other.ls_create_status_; rebuild_seq_ = other.rebuild_seq_; migration_status_ = other.migration_status_; @@ -124,7 +124,7 @@ void ObLSMeta::reset() ObSpinLockTimeGuard guard(lock_); tenant_id_ = OB_INVALID_TENANT_ID; ls_id_.reset(); - replica_type_ = REPLICA_TYPE_MAX; + unused_replica_type_ = REPLICA_TYPE_FULL; clog_base_lsn_.reset(); clog_checkpoint_scn_ = ObScnRange::MIN_SCN; rebuild_seq_ = -1; @@ -209,7 +209,6 @@ bool ObLSMeta::is_valid() const { return is_valid_id(tenant_id_) && ls_id_.is_valid() - && REPLICA_TYPE_MAX != replica_type_ && OB_MIGRATION_STATUS_MAX != migration_status_ && ObGCHandler::is_valid_ls_gc_state(gc_state_) && restore_status_.is_valid() @@ -587,23 +586,20 @@ int ObLSMeta::clear_saved_info() int ObLSMeta::init( const uint64_t tenant_id, const share::ObLSID &ls_id, - const ObReplicaType &replica_type, const ObMigrationStatus &migration_status, const share::ObLSRestoreStatus &restore_status, const SCN &create_scn) { int ret = OB_SUCCESS; if (OB_INVALID_ID == tenant_id || !ls_id.is_valid() - || !ObReplicaTypeCheck::is_replica_type_valid(replica_type) || !ObMigrationStatusHelper::is_valid(migration_status) || !restore_status.is_valid()) { ret = OB_INVALID_ARGUMENT; LOG_WARN("init ls meta get invalid argument", K(ret), K(tenant_id), K(ls_id), - K(replica_type), K(migration_status), K(restore_status)); + K(migration_status), K(restore_status)); } else { tenant_id_ = tenant_id; ls_id_ = ls_id; - replica_type_ = replica_type; ls_create_status_ = ObInnerLSStatus::CREATING; clog_checkpoint_scn_ = create_scn; clog_base_lsn_.val_ = PALF_INITIAL_LSN_VAL; @@ -699,7 +695,7 @@ ObLSMeta::ObSpinLockTimeGuard::ObSpinLockTimeGuard(common::ObSpinLock &lock, OB_SERIALIZE_MEMBER(ObLSMeta, tenant_id_, ls_id_, - replica_type_, + unused_replica_type_, ls_create_status_, clog_checkpoint_scn_, clog_base_lsn_, diff --git a/src/storage/ls/ob_ls_meta.h b/src/storage/ls/ob_ls_meta.h index 0c6bfe87b5..ce56fd5e81 100644 --- a/src/storage/ls/ob_ls_meta.h +++ b/src/storage/ls/ob_ls_meta.h @@ -43,7 +43,6 @@ public: ~ObLSMeta() {} int init(const uint64_t tenant_id, const share::ObLSID &ls_id, - const ObReplicaType &replica_type, const ObMigrationStatus &migration_status, const share::ObLSRestoreStatus &restore_status, const int64_t create_scn); @@ -96,10 +95,12 @@ public: int init( const uint64_t tenant_id, const share::ObLSID &ls_id, - const ObReplicaType &replica_type, const ObMigrationStatus &migration_status, const share::ObLSRestoreStatus &restore_status, const share::SCN &create_scn); + + ObReplicaType get_replica_type() const + { return unused_replica_type_; } class ObSpinLockTimeGuard { public: @@ -111,7 +112,7 @@ public: ObTimeGuard time_guard_; ObSpinLockGuard lock_guard_; }; - TO_STRING_KV(K_(tenant_id), K_(ls_id), K_(replica_type), K_(ls_create_status), + TO_STRING_KV(K_(tenant_id), K_(ls_id), K_(ls_create_status), K_(clog_checkpoint_scn), K_(clog_base_lsn), K_(rebuild_seq), K_(migration_status), K(gc_state_), K(offline_scn_), K_(restore_status), K_(replayable_point), K_(tablet_change_checkpoint_scn), @@ -122,8 +123,8 @@ public: mutable common::ObSpinLock lock_; uint64_t tenant_id_; share::ObLSID ls_id_; - ObReplicaType replica_type_; private: + ObReplicaType unused_replica_type_; ObInnerLSStatus ls_create_status_; typedef common::ObFunction WriteSlog; // for test diff --git a/src/storage/ls/ob_ls_meta_package.cpp b/src/storage/ls/ob_ls_meta_package.cpp index 91148267c9..e15f0b3260 100644 --- a/src/storage/ls/ob_ls_meta_package.cpp +++ b/src/storage/ls/ob_ls_meta_package.cpp @@ -18,11 +18,13 @@ namespace storage { OB_SERIALIZE_MEMBER(ObLSMetaPackage, ls_meta_, - palf_meta_); + palf_meta_, + dup_ls_meta_); ObLSMetaPackage::ObLSMetaPackage() : ls_meta_(), - palf_meta_() + palf_meta_(), + dup_ls_meta_() { } @@ -30,13 +32,25 @@ ObLSMetaPackage::ObLSMetaPackage(const ObLSMetaPackage &other) : ls_meta_(other.ls_meta_), palf_meta_(other.palf_meta_) { + int ret = OB_SUCCESS; + if (OB_FAIL(dup_ls_meta_.copy(other.dup_ls_meta_))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "copy dup ls meta failed", K(dup_ls_meta_), + K(other.dup_ls_meta_)) + } } ObLSMetaPackage &ObLSMetaPackage::operator=(const ObLSMetaPackage &other) { + int ret = OB_SUCCESS; if (this != &other) { ls_meta_ = other.ls_meta_; palf_meta_ = other.palf_meta_; + if (OB_FAIL(dup_ls_meta_.copy(other.dup_ls_meta_))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "copy dup ls meta failed", K(dup_ls_meta_), + K(other.dup_ls_meta_)) + } } return *this; } @@ -45,12 +59,14 @@ void ObLSMetaPackage::reset() { ls_meta_.reset(); palf_meta_.reset(); + dup_ls_meta_.reset(); } bool ObLSMetaPackage::is_valid() const { return (ls_meta_.is_valid() && - palf_meta_.is_valid()); + palf_meta_.is_valid() && + dup_ls_meta_.is_valid()); } } diff --git a/src/storage/ls/ob_ls_meta_package.h b/src/storage/ls/ob_ls_meta_package.h index a3e04d04fc..349ca7ed25 100644 --- a/src/storage/ls/ob_ls_meta_package.h +++ b/src/storage/ls/ob_ls_meta_package.h @@ -14,6 +14,7 @@ #define OCEANBASE_STORAGE_OB_LS_META_PACKAGE_ #include "storage/ls/ob_ls_meta.h" // ObLSMeta #include "logservice/palf/palf_base_info.h" // PalfBaseInfo +#include "storage/tx/ob_dup_table_base.h" namespace oceanbase { @@ -34,10 +35,11 @@ public: void reset(); bool is_valid() const; - TO_STRING_KV(K_(ls_meta), K_(palf_meta)); + TO_STRING_KV(K_(ls_meta), K_(palf_meta), K_(dup_ls_meta)); public: ObLSMeta ls_meta_; // the meta of ls palf::PalfBaseInfo palf_meta_; // the meta of palf + transaction::ObDupTableLSCheckpoint::ObLSDupTableMeta dup_ls_meta_; // the meta of dup_ls_meta_; }; } // storage diff --git a/src/storage/memtable/ob_redo_log_generator.cpp b/src/storage/memtable/ob_redo_log_generator.cpp index 854be7ad50..73f963ca73 100644 --- a/src/storage/memtable/ob_redo_log_generator.cpp +++ b/src/storage/memtable/ob_redo_log_generator.cpp @@ -206,7 +206,7 @@ int ObRedoLogGenerator::log_submitted(const ObCallbackScope &callbacks) // check dup table tx if(check_dup_tablet_(iter)) { - mem_ctx_->get_trans_ctx()->set_dup_table_tx(); + // mem_ctx_->get_trans_ctx()->set_dup_table_tx_(); } } else { TRANS_LOG(ERROR, "log_submitted error", K(ret), K(iter), K(iter->need_submit_log())); @@ -352,17 +352,18 @@ int ObRedoLogGenerator::search_unsubmitted_dup_tablet_redo() if (!is_inited_) { TRANS_LOG(WARN, "redo log generate is not inited", K(ret)); } else { - // ObTransCallbackMgr::RDLockGuard guard(callback_mgr_->get_rwlock()); - // for (cursor = generate_cursor_ + 1; OB_SUCC(ret) && callback_mgr_->end() != cursor; ++cursor) { - // ObITransCallback *iter = (ObITransCallback *)*cursor; - // - // if (!iter->need_fill_redo() || !iter->need_submit_log()) { - // } else if (check_dup_tablet_(iter)) { - // ret = OB_SUCCESS; - // mem_ctx_->get_trans_ctx()->set_dup_table_tx(); - // break; - // } - // } + ObTransCallbackMgr::RDLockGuard guard(callback_mgr_->get_rwlock()); + for (cursor = generate_cursor_ + 1; OB_SUCC(ret) && callback_mgr_->end() != cursor; ++cursor) { + ObITransCallback *iter = (ObITransCallback *)*cursor; + + if (!iter->need_fill_redo() || !iter->need_submit_log()) { + //do nothing + } else if (check_dup_tablet_(iter)) { + // ret = OB_SUCCESS; + // mem_ctx_->get_trans_ctx()->set_dup_table_tx_(); + // break; + } + } } return ret; } @@ -370,13 +371,18 @@ int ObRedoLogGenerator::search_unsubmitted_dup_tablet_redo() bool ObRedoLogGenerator::check_dup_tablet_(const ObITransCallback *callback_ptr) const { bool is_dup_tablet = false; + int64_t tmp_ret = OB_SUCCESS; // If id is a dup table tablet => true // If id is not a dup table tablet => false if (MutatorType::MUTATOR_ROW == callback_ptr->get_mutator_type()) { const ObMvccRowCallback *row_iter = static_cast(callback_ptr); const ObTabletID &target_tablet = row_iter->get_tablet_id(); - // check dup table + if (OB_TMP_FAIL(mem_ctx_->get_trans_ctx()->merge_tablet_modify_record_(target_tablet))) { + TRANS_LOG_RET(WARN, tmp_ret, "merge tablet modify record failed", K(tmp_ret), + K(target_tablet), KPC(row_iter)); + } + // check dup table } return is_dup_tablet; diff --git a/src/storage/ob_super_block_struct.cpp b/src/storage/ob_super_block_struct.cpp index db55da3f3d..8aa6b3f08f 100644 --- a/src/storage/ob_super_block_struct.cpp +++ b/src/storage/ob_super_block_struct.cpp @@ -256,6 +256,7 @@ ObTenantSuperBlock::ObTenantSuperBlock(const uint64_t tenant_id, const bool is_h replay_start_point_.offset_ = 0; tablet_meta_entry_ = ObServerSuperBlock::EMPTY_LIST_ENTRY_BLOCK; ls_meta_entry_ = ObServerSuperBlock::EMPTY_LIST_ENTRY_BLOCK; + ls_dup_table_entry_ = ObServerSuperBlock::EMPTY_LIST_ENTRY_BLOCK; } void ObTenantSuperBlock::reset() @@ -264,14 +265,15 @@ void ObTenantSuperBlock::reset() replay_start_point_.reset(); ls_meta_entry_.reset(); tablet_meta_entry_.reset(); + ls_dup_table_entry_ = ObServerSuperBlock::EMPTY_LIST_ENTRY_BLOCK; is_hidden_= false; } bool ObTenantSuperBlock::is_valid() const { - return OB_INVALID_TENANT_ID != tenant_id_ && - replay_start_point_.is_valid() && - ls_meta_entry_.is_valid() && tablet_meta_entry_.is_valid(); + return OB_INVALID_TENANT_ID != tenant_id_ && replay_start_point_.is_valid() + && ls_meta_entry_.is_valid() && tablet_meta_entry_.is_valid() + && ls_dup_table_entry_.is_valid(); } OB_SERIALIZE_MEMBER(ObTenantSuperBlock, @@ -279,7 +281,8 @@ OB_SERIALIZE_MEMBER(ObTenantSuperBlock, replay_start_point_, ls_meta_entry_, tablet_meta_entry_, - is_hidden_); + is_hidden_, + ls_dup_table_entry_); } // end namespace storage } // end namespace oceanbase diff --git a/src/storage/ob_super_block_struct.h b/src/storage/ob_super_block_struct.h index 15d7e752f2..4d8407f5b4 100644 --- a/src/storage/ob_super_block_struct.h +++ b/src/storage/ob_super_block_struct.h @@ -119,6 +119,7 @@ public: K_(replay_start_point), K_(ls_meta_entry), K_(tablet_meta_entry), + K_(ls_dup_table_entry), K_(is_hidden)); OB_UNIS_VERSION(TENANT_SUPER_BLOCK_VERSION); @@ -127,6 +128,7 @@ public: common::ObLogCursor replay_start_point_; blocksstable::MacroBlockId ls_meta_entry_; blocksstable::MacroBlockId tablet_meta_entry_; + blocksstable::MacroBlockId ls_dup_table_entry_; bool is_hidden_; }; diff --git a/src/storage/restore/ob_ls_restore_handler.cpp b/src/storage/restore/ob_ls_restore_handler.cpp index 9528229d1e..91de94e4b2 100644 --- a/src/storage/restore/ob_ls_restore_handler.cpp +++ b/src/storage/restore/ob_ls_restore_handler.cpp @@ -1194,8 +1194,9 @@ int ObILSRestoreState::follower_fill_tablet_group_restore_arg_( LOG_WARN("fail to set src replica type", K(ret), K(leader)); } else if (OB_FAIL(tablet_group_restore_arg.src_.set_member(ObMember(leader.get_server(), 0/*invalid timestamp is ok*/)))) { LOG_WARN("fail to set src member", K(ret)); - } else if (OB_FAIL(tablet_group_restore_arg.dst_.set_replica_type(ls_->get_replica_type()))) { - LOG_WARN("fail to set dst replica type", K(ret), "replica type", ls_->get_replica_type()); + // TODO: muwei use the right replica type + } else if (OB_FAIL(tablet_group_restore_arg.dst_.set_replica_type(REPLICA_TYPE_FULL))) { + LOG_WARN("fail to set dst replica type", K(ret)); } else if (OB_FAIL(tablet_group_restore_arg.dst_.set_member(ObMember(GCTX.self_addr(), 0/*invalid timestamp is ok*/)))) { LOG_WARN("fail to set dst member", K(ret), "server", GCTX.self_addr()); } else if (OB_FAIL(append(tablet_group_restore_arg.tablet_id_array_, tablet_need_restore))) { @@ -1249,16 +1250,22 @@ int ObILSRestoreState::get_follower_server_(ObIArray &follow logservice::ObLogHandler *log_handler = nullptr; int64_t paxos_replica_num = 0; common::ObMemberList member_list; + GlobalLearnerList learner_list; + int64_t full_replica_count = 0; + int64_t readonly_replica_count = 0; if (OB_ISNULL(log_handler = ls_->get_log_handler())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("log handler should not be NULL", K(ret)); - } else if (OB_FAIL(log_handler->get_paxos_member_list(member_list, paxos_replica_num))) { - LOG_WARN("failed to get paxos member list", K(ret)); + } else if (OB_FAIL(log_handler->get_paxos_member_list_and_learner_list(member_list, paxos_replica_num, learner_list))) { + LOG_WARN("failed to get paxos member list and learner list", K(ret)); } else if (OB_FAIL(location_service_->get(follower_info.cluster_id_, tenant_id, ls_->get_ls_id(), expire_renew_time, is_cache_hit, location))) { LOG_WARN("fail to get location", K(ret), KPC(ls_)); - } else if (location.get_replica_locations().count() != paxos_replica_num) { + } else if (OB_FAIL(location.get_replica_count(full_replica_count, readonly_replica_count))) { + LOG_WARN("fail to get replica count in location", KR(ret), K(location), K(full_replica_count), K(readonly_replica_count)); + } else if (full_replica_count != paxos_replica_num || readonly_replica_count != learner_list.get_member_number()) { ret = OB_REPLICA_NUM_NOT_MATCH; - LOG_WARN("replica num not match, ls may in migration", K(ret), K(location), K(member_list), K(paxos_replica_num)); + LOG_WARN("replica num not match, ls may in migration", K(ret), K(location), K(full_replica_count), + K(readonly_replica_count), K(member_list), K(paxos_replica_num), K(learner_list)); } else { const ObIArray &replica_locations = location.get_replica_locations(); for (int64_t i = 0; OB_SUCC(ret) && i < replica_locations.count(); ++i) { @@ -1954,8 +1961,9 @@ int ObLSRestoreSysTabletState::follower_fill_ls_restore_arg_(ObLSRestoreArg &arg LOG_WARN("fail to set src replica type", K(ret), K(leader)); } else if (OB_FAIL(arg.src_.set_member(ObMember(leader.get_server(), 0/*invalid timestamp is ok*/)))) { LOG_WARN("fail to set src member", K(ret)); - } else if (OB_FAIL(arg.dst_.set_replica_type(ls_->get_replica_type()))) { - LOG_WARN("fail to set dst replica type", K(ret), "replica type", ls_->get_replica_type()); + // TODO: muwei use the right replica type + } else if (OB_FAIL(arg.dst_.set_replica_type(REPLICA_TYPE_FULL))) { + LOG_WARN("fail to set dst replica type", K(ret)); } else if (OB_FAIL(arg.dst_.set_member(ObMember(GCTX.self_addr(), 0/*invalid timestamp is ok*/)))) { LOG_WARN("fail to set dst member", K(ret), "server", GCTX.self_addr()); } else if (OB_FAIL(arg.restore_base_info_.copy_from(*ls_restore_arg_))) { diff --git a/src/storage/slog/ob_storage_log.cpp b/src/storage/slog/ob_storage_log.cpp index 4c4d0ac1bc..03455e9d26 100644 --- a/src/storage/slog/ob_storage_log.cpp +++ b/src/storage/slog/ob_storage_log.cpp @@ -126,6 +126,8 @@ DEF_TO_STRING(ObLSMetaLog) OB_SERIALIZE_MEMBER(ObLSMetaLog, ls_meta_); +OB_SERIALIZE_MEMBER(ObDupTableCkptLog, dup_ls_meta_); + ObLSIDLog::ObLSIDLog(ObLSID &ls_id) : ls_id_(ls_id) { diff --git a/src/storage/slog/ob_storage_log.h b/src/storage/slog/ob_storage_log.h index 302ca4177c..68453aeed2 100644 --- a/src/storage/slog/ob_storage_log.h +++ b/src/storage/slog/ob_storage_log.h @@ -19,6 +19,7 @@ #include "observer/omt/ob_tenant_meta.h" #include "share/ob_unit_getter.h" #include "storage/ls/ob_ls_meta.h" +#include "storage/tx/ob_dup_table_base.h" namespace oceanbase { @@ -140,6 +141,28 @@ private: ObLSMeta ls_meta_; }; +struct ObDupTableCkptLog : public ObIBaseStorageLogEntry +{ +public: + ObDupTableCkptLog() {} + int init(const transaction::ObDupTableLSCheckpoint::ObLSDupTableMeta &dup_ls_meta) + { + return dup_ls_meta_.copy(dup_ls_meta); + } + + const transaction::ObDupTableLSCheckpoint::ObLSDupTableMeta &get_dup_ls_meta() + { + return dup_ls_meta_; + } + bool is_valid() const { return dup_ls_meta_.is_valid(); } + + TO_STRING_KV(K(dup_ls_meta_)); + OB_UNIS_VERSION(1); + +private: + transaction::ObDupTableLSCheckpoint::ObLSDupTableMeta dup_ls_meta_; +}; + struct ObLSIDLog : public ObIBaseStorageLogEntry { public: diff --git a/src/storage/slog/ob_storage_log_struct.h b/src/storage/slog/ob_storage_log_struct.h index 03ba33291d..a88b2c9f7b 100644 --- a/src/storage/slog/ob_storage_log_struct.h +++ b/src/storage/slog/ob_storage_log_struct.h @@ -51,6 +51,8 @@ enum class ObRedoLogSubType OB_REDO_LOG_PUT_TABLET = 14, OB_REDO_LOG_DELETE_TABLET = 15, + OB_REDO_LOG_UPDATE_DUP_TABLE_LS = 16, + OB_REDO_LOG_MAX }; diff --git a/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.cpp b/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.cpp index fb6575d820..2978d8504f 100644 --- a/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.cpp +++ b/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.cpp @@ -24,6 +24,7 @@ #include "storage/slog/ob_storage_logger.h" #include "storage/tx/ob_timestamp_service.h" #include "storage/tx/ob_trans_id_service.h" +#include "storage/tx/ob_dup_table_base.h" #include "observer/omt/ob_tenant.h" #include "storage/tx_storage/ob_ls_service.h" #include "storage/compaction/ob_tenant_tablet_scheduler.h" @@ -187,12 +188,20 @@ int ObTenantCheckpointSlogHandler::replay_checkpoint(const ObTenantSuperBlock &s ObTenantStorageCheckpointReader::ObCheckpointMetaOp replay_tablet_op = std::bind(&ObTenantCheckpointSlogHandler::replay_tablet, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + + ObTenantStorageCheckpointReader::ObCheckpointMetaOp replay_dup_table_ls_meta_op = + std::bind(&ObTenantCheckpointSlogHandler::replay_dup_table_ls_meta, + this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + if (!replay_ls_op.is_valid()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replay_ls_op invalid", K(ret)); } else if (!replay_tablet_op.is_valid()) { ret = OB_ERR_UNEXPECTED; LOG_WARN("replay_tablet_op invalid", K(ret)); + } else if (!replay_dup_table_ls_meta_op.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("replay_dup_table_ls_meta_op invalid", K(ret)); } else if (OB_FAIL(tenant_storage_ckpt_reader.iter_read_checkpoint_item( super_block.ls_meta_entry_, replay_ls_op, meta_block_list))) { LOG_WARN("fail to replay ls meta checkpoint", K(ret)); @@ -203,6 +212,11 @@ int ObTenantCheckpointSlogHandler::replay_checkpoint(const ObTenantSuperBlock &s LOG_WARN("fail to replay tablet checkpoint", K(ret)); } else if (OB_FAIL(tablet_block_handle_.add_macro_blocks(meta_block_list, true /*switch handle*/))) { LOG_WARN("fail to add_macro_blocks", K(ret)); + } else if (OB_FAIL(tenant_storage_ckpt_reader.iter_read_checkpoint_item( + super_block.ls_dup_table_entry_, replay_dup_table_ls_meta_op, meta_block_list))) { + LOG_WARN("fail to replay tablet checkpoint", K(ret)); + } else if (OB_FAIL(tablet_block_handle_.add_macro_blocks(meta_block_list, true /*switch handle*/))) { + LOG_WARN("fail to add_macro_blocks", K(ret)); } LOG_INFO("finish replay tenant checkpoint", K(ret), K(super_block)); @@ -249,6 +263,34 @@ int ObTenantCheckpointSlogHandler::replay_tablet( return ret; } + +int ObTenantCheckpointSlogHandler::replay_dup_table_ls_meta(const ObMetaDiskAddr &addr, + const char *buf, + const int64_t buf_len) +{ + int ret = OB_SUCCESS; + UNUSED(addr); + transaction::ObDupTableLSCheckpoint::ObLSDupTableMeta dup_ls_meta; + ObLSHandle ls_handle; + ObLS *ls = nullptr; + int64_t pos = 0; + if (OB_ISNULL(buf)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret)); + } else if (OB_FAIL(dup_ls_meta.deserialize(buf, buf_len, pos))) { + LOG_WARN("fail to deserialize", K(ret)); + } else if (OB_FAIL(MTL(ObLSService *) + ->get_ls(dup_ls_meta.ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("fail to replay_put_ls", K(ret)); + } else if (OB_ISNULL(ls = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is null", K(dup_ls_meta)); + } else if (OB_FAIL(ls->set_dup_table_ls_meta(dup_ls_meta))) { + LOG_WARN("set dup ls meta failed", K(ret), K(dup_ls_meta)); + } + return ret; +} + int ObTenantCheckpointSlogHandler::replay_tenant_slog(const common::ObLogCursor &start_point) { int ret = OB_SUCCESS; @@ -568,6 +610,12 @@ int ObTenantCheckpointSlogHandler::replay(const ObRedoModuleReplayParam ¶m) } break; } + case ObRedoLogSubType::OB_REDO_LOG_UPDATE_DUP_TABLE_LS: { + if (OB_FAIL(inner_replay_dup_table_ls_slog(param))) { + LOG_WARN("fail to replay dup_table ls slog", K(param)); + } + break; + } default: { ret = OB_ERR_SYS; @@ -613,6 +661,35 @@ int ObTenantCheckpointSlogHandler::inner_replay_update_ls_slog(const ObRedoModul return ret; } +int ObTenantCheckpointSlogHandler::inner_replay_dup_table_ls_slog( + const ObRedoModuleReplayParam ¶m) +{ + int ret = OB_SUCCESS; + + ObDupTableCkptLog slog_entry; + int64_t pos = 0; + ObLSHandle ls_handle; + ObLS *ls_ptr = nullptr; + + if (OB_FAIL(slog_entry.deserialize(param.buf_, param.disk_addr_.size(), pos))) { + LOG_WARN("fail to deserialize slog", K(ret), K(param), K(pos)); + } else if (OB_FAIL(MTL(ObLSService *) + ->get_ls(slog_entry.get_dup_ls_meta().ls_id_, ls_handle, + ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("get ls failed", K(ret), K(param), K(pos)); + } else if (OB_ISNULL(ls_ptr = ls_handle.get_ls())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid ls_ptr", K(ret), K(param), K(pos)); + } else if (OB_FAIL(ls_ptr->set_dup_table_ls_meta( + slog_entry.get_dup_ls_meta()))) { + LOG_WARN("fail to replay dup table ls meta slog", K(ret), K(param), K(pos)); + } else { + LOG_INFO("successfully replay dup table ls meta slog", K(param), K(pos)); + } + + return ret; +} + int ObTenantCheckpointSlogHandler::inner_replay_create_ls_commit_slog( const ObRedoModuleReplayParam ¶m) { @@ -829,6 +906,14 @@ int ObTenantCheckpointSlogHandler::parse( } break; } + case ObRedoLogSubType::OB_REDO_LOG_UPDATE_DUP_TABLE_LS: { + ObDupTableCkptLog slog_entry; + snprintf(slog_name, ObStorageLogReplayer::MAX_SLOG_NAME_LEN, "update dup table ls meta slog: "); + if (OB_FAIL(ObStorageLogReplayer::print_slog(buf, len, slog_name, slog_entry, stream))) { + LOG_WARN("fail to print slog", K(ret), KP(buf), K(len), K(slog_name), K(slog_entry)); + } + break; + } default: { ret = OB_ERR_SYS; diff --git a/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.h b/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.h index 8b780958b2..6e9e2e057a 100644 --- a/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.h +++ b/src/storage/slog_ckpt/ob_tenant_checkpoint_slog_handler.h @@ -83,6 +83,7 @@ private: int replay_checkpoint(const ObTenantSuperBlock &super_block); int replay_ls_meta(const ObMetaDiskAddr &addr, const char *buf, const int64_t buf_len); int replay_tablet(const ObMetaDiskAddr &addr, const char *buf, const int64_t buf_len); + int replay_dup_table_ls_meta(const ObMetaDiskAddr &addr, const char *buf, const int64_t buf_len); int update_tablet_meta_addr_and_block_list(ObTenantStorageCheckpointWriter &ckpt_writer); int replay_tenant_slog(const common::ObLogCursor &start_point); int replay_load_tablets(); @@ -91,6 +92,7 @@ private: int inner_replay_create_ls_slog(const ObRedoModuleReplayParam ¶m); int inner_replay_create_ls_commit_slog(const ObRedoModuleReplayParam ¶m); int inner_replay_delete_ls(const ObRedoModuleReplayParam ¶m); + int inner_replay_dup_table_ls_slog(const ObRedoModuleReplayParam ¶m); int inner_replay_put_tablet(const ObRedoModuleReplayParam ¶m); int inner_replay_delete_tablet(const ObRedoModuleReplayParam ¶m); int inner_replay_gts_record(const ObRedoModuleReplayParam ¶m); diff --git a/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.cpp b/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.cpp index 8a838d1977..489cac4313 100644 --- a/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.cpp +++ b/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.cpp @@ -21,6 +21,7 @@ #include "storage/tablet/ob_tablet_iterator.h" #include "storage/tx/ob_timestamp_service.h" #include "storage/tx/ob_trans_id_service.h" +#include "storage/tx/ob_dup_table_base.h" #include "storage/tx_storage/ob_ls_service.h" #include "sql/das/ob_das_id_service.h" @@ -81,9 +82,11 @@ int ObTenantStorageCheckpointWriter::write_checkpoint(ObTenantSuperBlock &super_ LOG_WARN("ObTenantStorageCheckpointWriter not inited", K(ret)); } else if (OB_FAIL(write_ls_checkpoint(super_block.ls_meta_entry_))) { LOG_WARN("fail to write_ls_checkpoint", K(ret)); - } else if (OB_FAIL(write_tablet_checkpoint( - super_block.replay_start_point_, super_block.tablet_meta_entry_))) { + } else if (OB_FAIL(write_tablet_checkpoint(super_block.replay_start_point_, + super_block.tablet_meta_entry_))) { LOG_WARN("fail to write_tablet_checkpoint", K(ret)); + } else if (OB_FAIL(write_ls_dup_table_checkpoint(super_block.ls_dup_table_entry_))) { + LOG_WARN("fail to write dup_table ls checkpoint", K(ret)); } else if (OB_FAIL(THE_IO_DEVICE->fsync_block())) { LOG_WARN("fail to fsync_block", K(ret)); } @@ -164,6 +167,76 @@ int ObTenantStorageCheckpointWriter::write_ls_checkpoint(blocksstable::MacroBloc return ret; } +int ObTenantStorageCheckpointWriter::write_ls_dup_table_checkpoint(blocksstable::MacroBlockId &entry_block) +{ + int ret = OB_SUCCESS; + + common::ObSharedGuard ls_iter; + ObLS *ls = nullptr; + char *buf = nullptr; + int64_t buf_len = 0; + int64_t pos = 0; + int64_t count = 0; + + transaction::ObDupTableLSCheckpoint::ObLSDupTableMeta dup_ls_meta; + + ls_item_writer_.reset(); + if (OB_FAIL(MTL(ObLSService *)->get_ls_iter(ls_iter, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get log stream iter", K(ret)); + } else if (OB_FAIL(ls_item_writer_.init(false /*no need addr*/))) { + LOG_WARN("failed to init logs tream item writer", K(ret)); + } else { + while (OB_SUCC(ret)) { + if (OB_FAIL(ls_iter->get_next(ls))) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + break; + } else { + LOG_WARN("fail to get next log stream", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(ls->get_dup_table_ls_meta(dup_ls_meta))) { + LOG_WARN("fail to get_ls_meta", K(ret)); + } + } + + if (OB_FAIL(ret)) { + // do nothing + } else { + count++; + buf_len = dup_ls_meta.get_serialize_size(); + pos = 0; + if (OB_ISNULL(buf = static_cast(allocator_.alloc(buf_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to allocate memory", K(ret)); + } else if (OB_FAIL(dup_ls_meta.serialize(buf, buf_len, pos))) { + LOG_WARN("fail to serialize", K(ret)); + } else if (OB_FAIL(ls_item_writer_.write_item(buf, buf_len, nullptr))) { + LOG_WARN("fail to write log stream item", K(ret)); + } + + if (OB_LIKELY(nullptr != buf)) { + allocator_.free(buf); + } + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(ls_item_writer_.close())) { + LOG_WARN("fail to close log stream item writer", K(ret)); + } else if (OB_FAIL(ls_item_writer_.get_entry_block(entry_block))) { + LOG_WARN("fail to get entry block", K(ret)); + } + } + + } + + LOG_INFO("write ls dup_table checkpoint finish", K(ret), K(count), K(entry_block)); + return ret; +} + int ObTenantStorageCheckpointWriter::write_tablet_checkpoint( const common::ObLogCursor &cursor, blocksstable::MacroBlockId &entry_block) { diff --git a/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.h b/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.h index d06f09872d..421384e3c0 100644 --- a/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.h +++ b/src/storage/slog_ckpt/ob_tenant_storage_checkpoint_writer.h @@ -51,6 +51,7 @@ private: }; int write_ls_checkpoint(blocksstable::MacroBlockId &entry_block); + int write_ls_dup_table_checkpoint(blocksstable::MacroBlockId &entry_block); int write_tablet_checkpoint(const common::ObLogCursor &cursor, blocksstable::MacroBlockId &entry_block); int copy_one_tablet_item(ObLinkedMacroBlockItemWriter &tablet_item_writer, const ObMetaDiskAddr &addr, int64_t *item_idx); diff --git a/src/storage/tx/ob_dup_table_base.cpp b/src/storage/tx/ob_dup_table_base.cpp new file mode 100644 index 0000000000..468f74d71a --- /dev/null +++ b/src/storage/tx/ob_dup_table_base.cpp @@ -0,0 +1,984 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#include "logservice/ob_log_base_header.h" +#include "ob_dup_table_base.h" +#include "ob_dup_table_lease.h" +#include "ob_dup_table_tablets.h" +#include "storage/tx/ob_trans_part_ctx.h" +#include "storage/tx/ob_trans_service.h" +#include "storage/tx_storage/ob_ls_handle.h" +#include "storage/tx_storage/ob_ls_service.h" + +namespace oceanbase +{ + +using namespace storage; + +namespace transaction +{ + +const uint64_t DupTableDiagStd::DUP_DIAG_INFO_LOG_BUF_LEN[3] = { + 1 << 12, // 4K + 1 << 16, // 64k + 1 << 12, // 4k +}; +const char *DupTableDiagStd::DUP_DIAG_INDENT_SPACE = " "; // 4 +const char *DupTableDiagStd::DUP_DIAG_COMMON_PREFIX = "DUP_TABLE_DIAG: "; +const int64_t DupTableDiagStd::DUP_DIAG_PRINT_INTERVAL[DupTableDiagStd::TypeIndex::MAX_INDEX] = { + ObDupTableLSLeaseMgr::DEFAULT_LEASE_INTERVAL, + 30 * 1000 * 1000, // 10s , tablet_print_interval + 3 * 60 * 1000 * 1000 // 3min , ts_sync_print_interval +}; + +/******************************************************* + * HashMapTool (not thread safe) + *******************************************************/ +// nothing + +/******************************************************* + * Dup_Table Lease + *******************************************************/ +OB_SERIALIZE_MEMBER(DupTableDurableLease, request_ts_, lease_interval_us_); +OB_SERIALIZE_MEMBER(DupTableLeaseItem, log_header_, durable_lease_); +OB_SERIALIZE_MEMBER(DupTableDurableLeaseLogBody, durable_lease_); +OB_SERIALIZE_MEMBER(DupTableLeaseLogHeader, addr_, lease_log_code_); + +/******************************************************* + * Dup_Table Tablets + *******************************************************/ +// nothing + +/******************************************************* + * Dup_Table Checkpoint + *******************************************************/ + +OB_SERIALIZE_MEMBER(ObDupTableLSCheckpoint::ObLSDupTableMeta, + ls_id_, + lease_item_array_, + lease_log_applied_scn_, + readable_tablets_base_scn_, + readable_tablets_min_base_applied_scn_); + +int ObDupTableLSCheckpoint::ObLSDupTableMeta::copy(const ObLSDupTableMeta &dup_ls_meta) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(this->lease_item_array_.assign(dup_ls_meta.lease_item_array_))) { + DUP_TABLE_LOG(WARN, "assign lease item array failed", K(ret)); + } else { + this->ls_id_ = dup_ls_meta.ls_id_; + this->lease_log_applied_scn_ = dup_ls_meta.lease_log_applied_scn_; + this->readable_tablets_base_scn_ = dup_ls_meta.readable_tablets_base_scn_; + this->readable_tablets_min_base_applied_scn_ = + dup_ls_meta.readable_tablets_min_base_applied_scn_; + } + + return ret; +} + +int ObDupTableLSCheckpoint::get_dup_ls_meta(ObLSDupTableMeta &dup_ls_meta_replica) const +{ + int ret = OB_SUCCESS; + + SpinRLockGuard r_guard(ckpt_rw_lock_); + + if (OB_FAIL(dup_ls_meta_replica.copy(dup_ls_meta_))) { + DUP_TABLE_LOG(WARN, "copy from dup_ls_meta_replica failed", K(ret)); + } else if (!dup_ls_meta_replica.is_valid()) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "invalid dup_ls_meta", K(ret), KPC(this), K(dup_ls_meta_replica)); + } + + return ret; +} + +int ObDupTableLSCheckpoint::set_dup_ls_meta(const ObLSDupTableMeta &dup_ls_meta_replica) +{ + int ret = OB_SUCCESS; + + SpinWLockGuard w_guard(ckpt_rw_lock_); + + if (dup_ls_meta_.ls_id_ != dup_ls_meta_replica.ls_id_) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret), KPC(this), K(dup_ls_meta_replica)); + } else if (OB_FAIL(dup_ls_meta_.copy(dup_ls_meta_replica))) { + DUP_TABLE_LOG(WARN, "copy from dup_ls_meta_replica failed", K(ret)); + } + + return ret; +} + +share::SCN ObDupTableLSCheckpoint::get_lease_log_rec_scn() const +{ + share::SCN rec_scn; + + SpinRLockGuard r_guard(ckpt_rw_lock_); + + if (lease_log_rec_scn_.is_valid()) { + rec_scn = lease_log_rec_scn_; + } else { + rec_scn.set_max(); + } + + return rec_scn; +} + +int ObDupTableLSCheckpoint::reserve_ckpt_memory(const DupTableLeaseItemArray &lease_log_items) +{ + int ret = OB_SUCCESS; + int64_t lease_log_array_size = lease_log_items.count(); + + SpinWLockGuard w_guard(ckpt_rw_lock_); + + if (OB_FAIL(dup_ls_meta_.lease_item_array_.reserve(lease_log_array_size))) { + DUP_TABLE_LOG(WARN, "reserve lease_item_array_ failed", K(ret), K(lease_log_array_size), + K(dup_ls_meta_)); + } + + return ret; +} + +int ObDupTableLSCheckpoint::update_ckpt_after_lease_log_synced( + const DupTableLeaseItemArray &lease_log_items, + const share::SCN &scn, + const bool modify_readable_sets, + const bool contain_all_readable, + const bool for_replay) +{ + int ret = OB_SUCCESS; + SpinWLockGuard w_guard(ckpt_rw_lock_); + + if (OB_SUCC(ret) && for_replay) { + if (!start_replay_scn_.is_valid()) { + start_replay_scn_ = scn; + DUP_TABLE_LOG(INFO, "[CKPT] replay the first dup_table log", K(ret), KPC(this), K(scn), + K(for_replay)); + } + } + + if (OB_SUCC(ret) && lease_log_items.count() > 0) { + if (OB_FAIL(dup_ls_meta_.lease_item_array_.assign(lease_log_items))) { + DUP_TABLE_LOG(WARN, "copy from lease item array failed", K(ret), K(lease_log_items), K(scn), + K(contain_all_readable), K(modify_readable_sets), K(for_replay)); + } else { + dup_ls_meta_.lease_log_applied_scn_ = scn; + + if (!lease_log_rec_scn_.is_valid()) { + DUP_TABLE_LOG(INFO, "[CKPT] set rec log scn for lease", K(ret), KPC(this), K(scn), + K(for_replay)); + lease_log_rec_scn_ = scn; + } + } + } + + if (OB_SUCC(ret) && modify_readable_sets) { + if (contain_all_readable) { + dup_ls_meta_.readable_tablets_base_scn_ = scn; + dup_ls_meta_.readable_tablets_min_base_applied_scn_.reset(); + } else if (!dup_ls_meta_.readable_tablets_min_base_applied_scn_.is_valid()) { + dup_ls_meta_.readable_tablets_min_base_applied_scn_ = scn; + } + } + + return ret; +} + +bool ObDupTableLSCheckpoint::contain_all_readable_on_replica() const +{ + bool contain_all_readable = false; + + SpinRLockGuard r_guard(ckpt_rw_lock_); + + if (!start_replay_scn_.is_valid()) { + // replay no log + if (!dup_ls_meta_.readable_tablets_base_scn_.is_valid() + && !dup_ls_meta_.readable_tablets_min_base_applied_scn_.is_valid()) { + contain_all_readable = true; + DUP_TABLE_LOG(INFO, "[CKPT] No changes to readable sets and no replay", + K(contain_all_readable), KPC(this)); + } + + } else { + if (!dup_ls_meta_.readable_tablets_base_scn_.is_valid() + && !dup_ls_meta_.readable_tablets_min_base_applied_scn_.is_valid()) { + // no changes to readable tablets + contain_all_readable = true; + } else if (dup_ls_meta_.readable_tablets_base_scn_.is_valid() + && !dup_ls_meta_.readable_tablets_min_base_applied_scn_.is_valid()) { + if (start_replay_scn_ < dup_ls_meta_.readable_tablets_base_scn_) { + contain_all_readable = true; + } + } else if (!dup_ls_meta_.readable_tablets_base_scn_.is_valid() + && dup_ls_meta_.readable_tablets_min_base_applied_scn_.is_valid()) { + if (start_replay_scn_ < dup_ls_meta_.readable_tablets_min_base_applied_scn_) { + contain_all_readable = true; + } + } else if (dup_ls_meta_.readable_tablets_base_scn_.is_valid() + && dup_ls_meta_.readable_tablets_min_base_applied_scn_.is_valid()) { + if (start_replay_scn_ < dup_ls_meta_.readable_tablets_base_scn_) { + contain_all_readable = true; + } + } + + DUP_TABLE_LOG(INFO, "[CKPT] check readable sets completed after replay", + K(contain_all_readable), KPC(this)); + } + + return contain_all_readable; +} + +int ObDupTableLSCheckpoint::flush() +{ + int ret = OB_SUCCESS; + + SpinWLockGuard w_guard(ckpt_rw_lock_); + + ObDupTableCkptLog slog_entry; + + if (OB_SUCC(ret)) { + if (OB_FAIL(slog_entry.init(dup_ls_meta_))) { + DUP_TABLE_LOG(WARN, "init slog entry failed", K(ret), K(slog_entry), KPC(this)); + } + } + + if (OB_SUCC(ret)) { + ObStorageLogParam log_param; + log_param.data_ = &slog_entry; + log_param.cmd_ = ObIRedoModule::gen_cmd(ObRedoLogMainType::OB_REDO_LOG_TENANT_STORAGE, + ObRedoLogSubType::OB_REDO_LOG_UPDATE_DUP_TABLE_LS); + ObStorageLogger *slogger = nullptr; + if (OB_ISNULL(slogger = MTL(ObStorageLogger *))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "get slog service failed", K(ret)); + } else if (OB_FAIL(slogger->write_log(log_param))) { + DUP_TABLE_LOG(WARN, "fail to write ls meta slog", K(ret), K(log_param), KPC(this)); + } else { + DUP_TABLE_LOG(INFO, "Write dup_table slog successfully", K(ret), K(log_param), KPC(this)); + } + } + + if (OB_SUCC(ret)) { + lease_log_rec_scn_.reset(); + } + + return ret; +} + +/******************************************************* + * Dup_Table Log + *******************************************************/ + +// OB_SERIALIZE_MEMBER(ObDupTableLogBlockHeader, position_, remain_length_); +OB_SERIALIZE_MEMBER(DupTableLogEntryHeader, entry_type_); +OB_SERIALIZE_MEMBER(DupTableStatLog, lease_addr_cnt_, readable_cnt_, all_tablet_set_cnt_); + +// OB_SERIALIZE_MEMBER(ObLSDupTabletsMgr, max_submitted_tablet_change_ts_); + +void ObDupTableLogOperator::reuse() +{ + big_segment_buf_.reset(); + logging_tablet_set_ids_.reset(); + logging_lease_addrs_.reset(); + logging_scn_.reset(); + logging_lsn_.reset(); + stat_log_.reset(); + // durable_block_scn_arr_.reset(); +} +void ObDupTableLogOperator::reset() +{ + reuse(); + if (OB_NOT_NULL(block_buf_)) { + share::mtl_free(block_buf_); + } + block_buf_ = nullptr; + stat_log_.reset(); + + last_block_submit_us_ = 0; + last_block_sync_us_ = 0; + last_entry_submit_us_ = 0; + last_entry_sync_us_ = 0; + + total_cb_wait_time_ = 0; + append_block_count_ = 0; + log_entry_count_ = 0; + total_log_entry_wait_time_ = 0; +} + +int ObDupTableLogOperator::submit_log_entry() +{ + int ret = OB_SUCCESS; + + SpinWLockGuard guard(log_lock_); + + LOG_OPERATOR_INIT_CHECK + + int64_t max_ser_size = 0; + DupLogTypeArray type_array; + if (OB_SUCC(ret)) { + if (OB_FAIL(prepare_serialize_log_entry_(max_ser_size, type_array))) { + DUP_TABLE_LOG(WARN, "prepare serialize log entry failed", K(ret)); + } else if (!type_array.empty()) { + if (OB_FAIL(serialize_log_entry_(max_ser_size, type_array))) { + DUP_TABLE_LOG(WARN, "serialize log entry failed", K(ret)); + } else if (OB_FAIL(retry_submit_log_block_())) { + DUP_TABLE_LOG(WARN, "retry submit log block failed", K(ret), K(ls_id_), K(max_ser_size), + K(type_array), K(logging_lease_addrs_), K(logging_tablet_set_ids_), + K(logging_scn_), K(logging_lsn_)); + } else { + DUP_TABLE_LOG(INFO, "submit log entry successfully", K(ret), K(ls_id_), K(max_ser_size), + K(type_array), K(logging_lease_addrs_), K(logging_tablet_set_ids_), + K(logging_scn_), K(logging_lsn_)); + } + } else { + DUP_TABLE_LOG(INFO, "no need submit log entry", K(ret), K(ls_id_), K(max_ser_size), + K(type_array)); + } + + if (OB_FAIL(ret)) { + // reuse for serialize failed or submit_log failed + reuse(); + } + } + return ret; +} + +int ObDupTableLogOperator::merge_replay_block(const char *replay_buf, int64_t replay_buf_len) +{ + int ret = OB_SUCCESS; + + SpinWLockGuard guard(log_lock_); + + LOG_OPERATOR_INIT_CHECK + + if (OB_SUCC(ret)) { + logservice::ObLogBaseHeader base_header; + int64_t replay_buf_pos = 0; + if (OB_FAIL(base_header.deserialize(replay_buf, replay_buf_len, replay_buf_pos))) { + DUP_TABLE_LOG(WARN, "deserialize base log header failed", K(ret), K(replay_buf_len), + K(replay_buf_pos)); + } else if (OB_FAIL( + big_segment_buf_.collect_one_part(replay_buf, replay_buf_len, replay_buf_pos))) { + if (OB_ITER_END == ret) { + // need clear big_segment after collected all part for replay + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "Log entry is completed, can not merge new block", K(ret), + K(big_segment_buf_)); + } else if (OB_START_LOG_CURSOR_INVALID == ret) { + DUP_TABLE_LOG(INFO, "start replay from the middle of a big log entry", K(ret), + K(big_segment_buf_)); + } else if (big_segment_buf_.is_completed()) { + ret = OB_ITER_END; + } + } + } + + return ret; +} + +int ObDupTableLogOperator::deserialize_log_entry() +{ + int ret = OB_SUCCESS; + + SpinWLockGuard guard(log_lock_); + + LOG_OPERATOR_INIT_CHECK + + if (OB_SUCC(ret)) { + if (!big_segment_buf_.is_completed()) { + ret = OB_STATE_NOT_MATCH; + DUP_TABLE_LOG(WARN, "need collect more parts of log entry", K(ret), K(big_segment_buf_)); + } else if (OB_FAIL(deserialize_log_entry_())) { + DUP_TABLE_LOG(WARN, "deserialize log entry failed", K(ret), K(big_segment_buf_)); + } + } + + return ret; +} + +bool ObDupTableLogOperator::is_busy() +{ + return !logging_tablet_set_ids_.empty() || !logging_lease_addrs_.empty(); +} + +int ObDupTableLogOperator::on_success() +{ + int ret = OB_SUCCESS; + + SpinWLockGuard guard(log_lock_); + + LOG_OPERATOR_INIT_CHECK + + if (OB_SUCC(ret)) { + // It will clear scn or lsn + if (OB_FAIL(sync_log_succ_(false))) { + DUP_TABLE_LOG(WARN, "invoke sync_log_succ failed", K(ret)); + } + // if (OB_FAIL(retry_submit_log_block_())) { + // if (OB_ITER_END != ret) { + // DUP_TABLE_LOG(WARN, "retry submit log block failed", K(ret), K(big_segment_buf_)); + // } else if (OB_FAIL(sync_log_succ_(false))) { + // DUP_TABLE_LOG(WARN, "invoke sync_log_succ failed", K(ret)); + // } + // } + } + + return ret; +} + +int ObDupTableLogOperator::on_failure() +{ + int ret = OB_SUCCESS; + + bool modify_readable = false; + + SpinWLockGuard guard(log_lock_); + + LOG_OPERATOR_INIT_CHECK + + if (OB_SUCC(ret)) { + if (OB_FAIL(lease_mgr_ptr_->lease_log_synced(false /*sync_result*/, logging_scn_, + false /*for_replay*/, logging_lease_addrs_))) { + DUP_TABLE_LOG(WARN, "lease mgr on_success failed", K(ret)); + } else if (OB_FAIL(tablet_mgr_ptr_->tablet_log_synced( + false /*sync_result*/, logging_scn_, false /*for_replay*/, + logging_tablet_set_ids_, modify_readable))) { + DUP_TABLE_LOG(WARN, "tablets mgr on_failure failed", K(ret)); + } else { + reuse(); + } + } + + DUP_TABLE_LOG(INFO, "on failure", K(ret), K(logging_scn_), K(logging_lease_addrs_), + K(logging_tablet_set_ids_), K(modify_readable)); + + return ret; +} + +int ObDupTableLogOperator::replay_succ() +{ + int ret = OB_SUCCESS; + + SpinWLockGuard guard(log_lock_); + + LOG_OPERATOR_INIT_CHECK + + if (OB_SUCC(ret)) { + after_submit_log(true /*for_replay*/); + + if (OB_FAIL(sync_log_succ_(true))) { + DUP_TABLE_LOG(WARN, "invoke sync_log_succ failed", K(ret)); + } + } + return ret; +} + +int ObDupTableLogOperator::sync_log_succ_(const bool for_replay) +{ + int ret = OB_SUCCESS; + + bool modify_readable = false; + bool contain_all_readable = false; + + if (OB_SUCC(ret)) { + if (stat_log_.readable_cnt_ == tablet_mgr_ptr_->get_readable_tablet_set_count()) { + contain_all_readable = true; + } + } + + if (OB_SUCC(ret)) { + + if (OB_FAIL(lease_mgr_ptr_->lease_log_synced( + true /*sync_result*/, logging_scn_, for_replay /*for_replay*/, logging_lease_addrs_))) { + DUP_TABLE_LOG(WARN, "apply lease_log failed", K(ret), K(logging_scn_), + K(logging_lease_addrs_)); + } else if (OB_FAIL(tablet_mgr_ptr_->tablet_log_synced( + true /*sync_result*/, logging_scn_, for_replay /*for_replay*/, + logging_tablet_set_ids_, contain_all_readable))) { + DUP_TABLE_LOG(WARN, "apply tablet_log failed", K(ret), K(logging_scn_), + K(logging_tablet_set_ids_)); + } else if (OB_FAIL(dup_ls_ckpt_->update_ckpt_after_lease_log_synced( + logging_lease_addrs_, logging_scn_, modify_readable /*modify_readable_sets*/, + contain_all_readable /*contain_all_readable*/, for_replay /*for_replay*/))) { + DUP_TABLE_LOG(WARN, "update lease log ckpt failed", K(ret), KPC(dup_ls_ckpt_)); + } else { + reuse(); + } + } + + return ret; +} + +void ObDupTableLogOperator::set_logging_scn(const share::SCN &scn) +{ + SpinWLockGuard guard(log_lock_); + + logging_scn_ = scn; +} + +int ObDupTableLogOperator::prepare_serialize_log_entry_(int64_t &max_ser_size, + DupLogTypeArray &type_array) +{ + int ret = OB_SUCCESS; + + int64_t origin_max_ser_size = 0; + max_ser_size = 0; + + DupTableStatLog max_stat_log; + max_stat_log.lease_addr_cnt_ = INT64_MAX; + max_stat_log.readable_cnt_ = INT64_MAX; + max_stat_log.all_tablet_set_cnt_ = INT64_MAX; + + if (big_segment_buf_.is_active()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid block buf", K(ret), K(big_segment_buf_)); + } else { + if (OB_SUCC(ret)) { + origin_max_ser_size = max_ser_size; + if (OB_FAIL(lease_mgr_ptr_->prepare_serialize(max_ser_size, logging_lease_addrs_))) { + DUP_TABLE_LOG(WARN, "prepare serialize lease_mgr failed", K(ret)); + } else if (max_ser_size > origin_max_ser_size + && OB_FAIL(type_array.push_back(DupTableLogEntryType::LeaseListLog))) { + DUP_TABLE_LOG(WARN, "push back log entry type failed", K(ret)); + } + } + if (OB_SUCC(ret)) { + origin_max_ser_size = max_ser_size; + int64_t max_log_buf_size = MAX_LOG_BLOCK_SIZE - max_stat_log.get_serialize_size(); + if (OB_FAIL(tablet_mgr_ptr_->prepare_serialize(max_ser_size, logging_tablet_set_ids_, + max_log_buf_size))) { + DUP_TABLE_LOG(WARN, "prepare serialize tablets_mgr failed", K(ret)); + } else if (max_ser_size > origin_max_ser_size + && OB_FAIL(type_array.push_back(DupTableLogEntryType::TabletChangeLog))) { + DUP_TABLE_LOG(WARN, "push back log entry_type failed", K(ret)); + } + } + + if (OB_SUCC(ret) && max_ser_size > 0) { + if (OB_FALSE_IT(max_ser_size += max_stat_log.get_serialize_size())) { + // do nothing + } else if (max_ser_size > 0 + && OB_FAIL(type_array.push_back(DupTableLogEntryType::DuptableStatLog))) { + DUP_TABLE_LOG(WARN, "push back log entry_type failed", K(ret)); + } + } + if (OB_SUCC(ret)) { + // for compute size + DupTableLogEntryHeader tmp_entry_header; + tmp_entry_header.entry_type_ = DupTableLogEntryType::MAX; + int64_t entry_log_size = 0; + + int64_t entry_header_size = type_array.count() + * (tmp_entry_header.get_serialize_size() + + serialization::encoded_length_i64(entry_log_size)); + max_ser_size += entry_header_size; + } + } + return ret; +} + +int ObDupTableLogOperator::serialize_log_entry_(const int64_t max_ser_size, + const DupLogTypeArray &type_array) +{ + int ret = OB_SUCCESS; + + if (max_ser_size > MAX_LOG_BLOCK_SIZE) { + ret = OB_LOG_TOO_LARGE; + DUP_TABLE_LOG(WARN, "serialize buf is not enough for a big log", K(ls_id_), K(max_ser_size), + K(type_array)); + } else if (OB_FAIL(big_segment_buf_.init_for_serialize(max_ser_size))) { + DUP_TABLE_LOG(WARN, "init big_segment_buf_ failed", K(ret), K(max_ser_size), + K(big_segment_buf_)); + } + + int64_t data_pos = big_segment_buf_.get_serialize_buf_pos(); + for (int i = 0; i < type_array.count() && OB_SUCC(ret); i++) { + int64_t after_header_pos = 0; + int64_t log_entry_size = 0; + const DupTableLogEntryType &entry_type = type_array[i]; + if (entry_type != DupTableLogEntryType::LeaseListLog + && entry_type != DupTableLogEntryType::TabletChangeLog + && entry_type != DupTableLogEntryType::DuptableStatLog) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret), K(entry_type)); + } else { + DupTableLogEntryHeader entry_header; + entry_header.entry_type_ = entry_type; + if (OB_FAIL(entry_header.serialize(big_segment_buf_.get_serialize_buf(), + big_segment_buf_.get_serialize_buf_len(), data_pos))) { + DUP_TABLE_LOG(WARN, "serialize entry header", K(ret), K(entry_header)); + } else if (OB_FALSE_IT(after_header_pos = data_pos)) { + // do nothing + } else if (OB_FALSE_IT(data_pos = after_header_pos + + serialization::encoded_length_i64(log_entry_size))) { + // do nothing + } else { + switch (entry_type) { + case DupTableLogEntryType::TabletChangeLog: { + if (OB_FAIL(tablet_mgr_ptr_->serialize_tablet_log( + logging_tablet_set_ids_, big_segment_buf_.get_serialize_buf(), + big_segment_buf_.get_serialize_buf_len(), data_pos))) { + DUP_TABLE_LOG(WARN, "serialize tablet log failed", K(ret), K(data_pos)); + } + break; + } + case DupTableLogEntryType::LeaseListLog: { + if (OB_FAIL(lease_mgr_ptr_->serialize_lease_log( + logging_lease_addrs_, big_segment_buf_.get_serialize_buf(), + big_segment_buf_.get_serialize_buf_len(), data_pos))) { + DUP_TABLE_LOG(WARN, "serialize lease log failed", K(ret), K(data_pos)); + } + break; + } + case DupTableLogEntryType::DuptableStatLog: { + DupTableStatLog stat_log; + stat_log.lease_addr_cnt_ = logging_lease_addrs_.count(); + stat_log.readable_cnt_ = tablet_mgr_ptr_->get_readable_tablet_set_count(); + stat_log.all_tablet_set_cnt_ = tablet_mgr_ptr_->get_all_tablet_set_count(); + if (OB_FAIL(stat_log.serialize(big_segment_buf_.get_serialize_buf(), + big_segment_buf_.get_serialize_buf_len(), data_pos))) { + DUP_TABLE_LOG(WARN, "serialize stat log failed", K(ret), K(data_pos)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected log entry type", K(ret), K(entry_header)); + break; + } + } + if (OB_SUCC(ret)) { + log_entry_size = + data_pos - after_header_pos - serialization::encoded_length_i64(log_entry_size); + if (OB_FAIL(serialization::encode_i64(big_segment_buf_.get_serialize_buf(), + big_segment_buf_.get_serialize_buf_len(), + after_header_pos, log_entry_size))) { + DUP_TABLE_LOG(WARN, "encode log entry size failed", K(ret)); + } + } + } + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(big_segment_buf_.set_serialize_pos(data_pos))) { + DUP_TABLE_LOG(WARN, "set serialize pos failed", K(ret), K(data_pos), K(big_segment_buf_)); + } + } + + return ret; +} + +int ObDupTableLogOperator::deserialize_log_entry_() +{ + int ret = OB_SUCCESS; + + int64_t data_pos = 0; + + data_pos = big_segment_buf_.get_deserialize_buf_pos(); + const int64_t segment_buf_len = big_segment_buf_.get_deserialize_buf_len(); + int64_t log_entry_size = 0; + int64_t after_header_pos = 0; + + while (OB_SUCC(ret) && data_pos < segment_buf_len) { + DupTableLogEntryHeader entry_header; + log_entry_size = 0; + after_header_pos = 0; + if (OB_FAIL(entry_header.deserialize(big_segment_buf_.get_deserialize_buf(), segment_buf_len, + data_pos))) { + DUP_TABLE_LOG(WARN, "serialize entry header", K(ret), K(data_pos), K(big_segment_buf_), + K(entry_header)); + } else if (OB_FAIL(serialization::decode_i64(big_segment_buf_.get_deserialize_buf(), + segment_buf_len, data_pos, &log_entry_size))) { + DUP_TABLE_LOG(WARN, "decode log entry size failed"); + } else if (OB_FALSE_IT(after_header_pos = data_pos)) { + // do nothing + } else { + switch (entry_header.entry_type_) { + case DupTableLogEntryType::TabletChangeLog: { + if (OB_ISNULL(tablet_mgr_ptr_)) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "invalid tablet mgr", K(ret)); + } else if (OB_FAIL(tablet_mgr_ptr_->deserialize_tablet_log( + logging_tablet_set_ids_, big_segment_buf_.get_deserialize_buf(), + data_pos + log_entry_size, data_pos))) { + DUP_TABLE_LOG(WARN, "deserialize tablet log failed", K(ret), K(data_pos)); + } + break; + } + case DupTableLogEntryType::LeaseListLog: { + if (OB_FAIL(lease_mgr_ptr_->deserialize_lease_log(logging_lease_addrs_, + big_segment_buf_.get_deserialize_buf(), + data_pos + log_entry_size, data_pos))) { + DUP_TABLE_LOG(WARN, "deserialize lease log failed", K(ret), K(data_pos)); + } + break; + } + case DupTableLogEntryType::DuptableStatLog: { + if (OB_FAIL(stat_log_.deserialize(big_segment_buf_.get_deserialize_buf(), + data_pos + log_entry_size, data_pos))) { + DUP_TABLE_LOG(WARN, "deserialize stat log failed", K(ret), K(data_pos)); + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected log entry type", K(ret), K(entry_header), K(data_pos)); + break; + } + } + + if (OB_SUCC(ret) && data_pos < after_header_pos + log_entry_size) { + DUP_TABLE_LOG(INFO, "try to deserialize a new version log", K(ret), K(data_pos), + K(after_header_pos), K(log_entry_size), K(entry_header)); + data_pos = after_header_pos + log_entry_size; + } + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(big_segment_buf_.set_deserialize_pos(after_header_pos + log_entry_size))) { + DUP_TABLE_LOG(WARN, "set deserialize pos failed", K(ret), K(after_header_pos), + K(log_entry_size), K(big_segment_buf_)); + } + DUP_TABLE_LOG(DEBUG, "deser log succ", K(ret), K(data_pos), K(after_header_pos), + K(log_entry_size)); + } else { + DUP_TABLE_LOG(WARN, "deser log failed", K(ret), K(data_pos)); + } + + return ret; +} + +int ObDupTableLogOperator::retry_submit_log_block_() +{ + int ret = OB_SUCCESS; + + int64_t block_buf_pos = 0; + if (OB_ISNULL(block_buf_)) { + if (OB_ISNULL(block_buf_ = static_cast( + share::mtl_malloc(MAX_LOG_BLOCK_SIZE, "DUP_LOG_BLOCK")))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + DUP_TABLE_LOG(WARN, "alloc block memory failed", K(ret)); + } + } + + if (OB_SUCC(ret)) { + logservice::ObLogBaseHeader base_header(logservice::ObLogBaseType::DUP_TABLE_LOG_BASE_TYPE, + logservice::ObReplayBarrierType::NO_NEED_BARRIER, + ls_id_.hash()); + bool unused = false; + if (!big_segment_buf_.is_active()) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "big_segment_buf_ is not active", K(ret), K(big_segment_buf_)); + } else if (big_segment_buf_.is_completed()) { + ret = OB_ITER_END; + } else if (OB_FAIL(base_header.serialize(block_buf_, MAX_LOG_BLOCK_SIZE, block_buf_pos))) { + DUP_TABLE_LOG(WARN, "serialize base header failed", K(ret), K(base_header)); + } else if (OB_FAIL(big_segment_buf_.split_one_part(block_buf_, MAX_LOG_BLOCK_SIZE, + block_buf_pos, unused))) { + DUP_TABLE_LOG(WARN, "split one part of segment failed", K(ret), K(big_segment_buf_), + K(block_buf_pos)); + } else if (OB_FAIL(log_handler_->append(block_buf_, block_buf_pos, share::SCN::min_scn(), false, + this, logging_lsn_, logging_scn_))) { + DUP_TABLE_LOG(WARN, "append block failed", K(ret), K(ls_id_)); + } else { + after_submit_log(false /*for_replay*/); + // DUP_TABLE_LOG(INFO, "submit one part of lease log in palf success", K(ret), K(logging_lsn_), + // K(logging_scn_), K(big_segment_buf_), K(*tablet_mgr_ptr_), K(*lease_mgr_ptr_)); + } + } + + return ret; +} + +void ObDupTableLogOperator::after_submit_log(const bool for_replay) +{ + int ret = OB_SUCCESS; + + if (!logging_tablet_set_ids_.empty()) { + if (OB_FAIL(tablet_mgr_ptr_->tablet_log_submitted(true, logging_scn_, for_replay, + logging_tablet_set_ids_))) { + DUP_TABLE_LOG(ERROR, "tablet log submitted failed", K(ret), K(ls_id_), K(logging_scn_), + K(logging_tablet_set_ids_)); + } + } + + if (OB_SUCC(ret) && !logging_lease_addrs_.empty()) { + if (OB_FAIL(lease_mgr_ptr_->lease_log_submitted(true, logging_scn_, for_replay, + logging_lease_addrs_))) { + DUP_TABLE_LOG(ERROR, "lease log submitted failed", K(ret), K(ls_id_), K(logging_scn_), + K(logging_lease_addrs_)); + } + } +} + +/******************************************************* + * Dup_Table Msg + *******************************************************/ + +OB_SERIALIZE_MEMBER(ObDupTableMsgBase, src_, dst_, proxy_, ls_id_); +OB_SERIALIZE_MEMBER_INHERIT(ObDupTableTsSyncRequest, ObDupTableMsgBase, max_commit_scn_); +OB_SERIALIZE_MEMBER_INHERIT(ObDupTableTsSyncResponse, + ObDupTableMsgBase, + max_replayed_scn_, + max_commit_scn_, + max_read_scn_); +OB_SERIALIZE_MEMBER_INHERIT(ObDupTableLeaseRequest, + ObDupTableTsSyncResponse, + request_ts_, + lease_interval_us_); +OB_SERIALIZE_MEMBER_INHERIT(ObDupTableBeforePrepareRequest, + ObDupTableMsgBase, + tx_id_, + before_prepare_version_); + +void ObDupTableMsgBase::reset() +{ + src_.reset(); + dst_.reset(); + proxy_.reset(); + ls_id_.reset(); +} + +void ObDupTableMsgBase::set_header(const ObAddr &src, + const ObAddr &dst, + const ObAddr &proxy, + const share::ObLSID &ls_id) +{ + src_ = src; + dst_ = dst; + proxy_ = proxy; + ls_id_ = ls_id; +} + +/******************************************************* + * Dup_Table RPC + *******************************************************/ +int ObDupTableRpc::init(rpc::frame::ObReqTransport *req_transport, + const oceanbase::common::ObAddr &addr) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(proxy_.init(req_transport, addr))) { + DUP_TABLE_LOG(WARN, "init dup_table rpc proxy failed", K(ret)); + } + + return ret; +} +} // namespace transaction + +namespace obrpc +{ + +int ObDupTableLeaseRequestP::process() +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + + if (!arg_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid msg", K(ret), K(arg_)); + } else if (OB_FAIL( + MTL(ObLSService *)->get_ls(arg_.get_ls_id(), ls_handle, ObLSGetMod::TRANS_MOD))) { + DUP_TABLE_LOG(WARN, "get ls failed", K(ret), K(arg_)); + } else if (OB_ISNULL(ls_handle.get_ls())) { + ret = OB_ERR_NULL_VALUE; + DUP_TABLE_LOG(WARN, "ls pointer is nullptr", K(ret)); + } else if (ls_handle.get_ls()->get_dup_table_ls_handler()->recive_lease_request(arg_)) { + DUP_TABLE_LOG(WARN, "recive_lease_request error", K(ret)); + } + + DUP_TABLE_LOG(DEBUG, "recive lease request", K(ret), K(arg_)); + return ret; +} + +int ObDupTableTsSyncRequestP::process() +{ + int ret = OB_SUCCESS; + + ObLSHandle ls_handle; + + if (!arg_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid msg", K(ret), K(arg_)); + } else if (OB_FAIL( + MTL(ObLSService *)->get_ls(arg_.get_ls_id(), ls_handle, ObLSGetMod::TRANS_MOD))) { + DUP_TABLE_LOG(WARN, "get ls failed", K(ret), K(arg_)); + } else if (OB_ISNULL(ls_handle.get_ls())) { + ret = OB_ERR_NULL_VALUE; + DUP_TABLE_LOG(WARN, "ls pointer is nullptr", K(ret)); + } else if (ls_handle.get_ls()->get_dup_table_ls_handler()->handle_ts_sync_request(arg_)) { + DUP_TABLE_LOG(WARN, "handle ts sync request error", K(ret)); + } + + DUP_TABLE_LOG(DEBUG, "recive ts sync request", K(ret), K(arg_)); + + return ret; +} + +int ObDupTableTsSyncResponseP::process() +{ + int ret = OB_SUCCESS; + + ObLSHandle ls_handle; + + if (!arg_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid msg", K(ret), K(arg_)); + } else if (OB_FAIL( + MTL(ObLSService *)->get_ls(arg_.get_ls_id(), ls_handle, ObLSGetMod::TRANS_MOD))) { + DUP_TABLE_LOG(WARN, "get ls failed", K(ret), K(arg_)); + } else if (OB_ISNULL(ls_handle.get_ls())) { + ret = OB_ERR_NULL_VALUE; + DUP_TABLE_LOG(WARN, "ls pointer is nullptr", K(ret)); + } else if (ls_handle.get_ls()->get_dup_table_ls_handler()->handle_ts_sync_response(arg_)) { + DUP_TABLE_LOG(WARN, "handle ts sync request error", K(ret)); + } + + DUP_TABLE_LOG(DEBUG, "recive ts sync response", K(ret), K(arg_)); + + return ret; +} + +int ObDupTableBeforePrepareRequestP::process() +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + transaction::ObPartTransCtx *part_ctx = nullptr; + + if (!arg_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid msg", K(ret), K(arg_)); + } else if (OB_FAIL( + MTL(ObLSService *)->get_ls(arg_.get_ls_id(), ls_handle, ObLSGetMod::TRANS_MOD))) { + DUP_TABLE_LOG(WARN, "get ls failed", K(ret), K(arg_)); + } else if (OB_ISNULL(ls_handle.get_ls())) { + ret = OB_ERR_NULL_VALUE; + DUP_TABLE_LOG(WARN, "ls pointer is nullptr", K(ret), K(arg_)); + } else if (OB_FAIL(ls_handle.get_ls()->get_tx_ctx(arg_.get_tx_id(), true, part_ctx))) { + DUP_TABLE_LOG(WARN, "get part ctx failed", K(ret), K(arg_)); + } else { + if (OB_ISNULL(part_ctx)) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected part ctx", K(ret), KPC(part_ctx), K(arg_)); + } else if (OB_FAIL(part_ctx->retry_dup_trx_before_prepare(arg_.get_before_prepare_version()))) { + DUP_TABLE_LOG(WARN, "retry dup trx before_prepare failed", K(ret), KPC(part_ctx), K(arg_)); + } + + ls_handle.get_ls()->revert_tx_ctx(part_ctx); + } + + DUP_TABLE_LOG(DEBUG, "recive before prepare request", K(ret), K(arg_)); + return ret; +} + +} // namespace obrpc +} // namespace oceanbase diff --git a/src/storage/tx/ob_dup_table_base.h b/src/storage/tx/ob_dup_table_base.h new file mode 100644 index 0000000000..ff29f09253 --- /dev/null +++ b/src/storage/tx/ob_dup_table_base.h @@ -0,0 +1,1225 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OCEANBASE_TRANSACTION_DUP_TABLE_BASE_H +#define OCEANBASE_TRANSACTION_DUP_TABLE_BASE_H + +#include "lib/container/ob_se_array.h" +#include "lib/list/ob_list.h" +#include "logservice/ob_append_callback.h" +#include "observer/ob_server_struct.h" +#include "rpc/obrpc/ob_rpc_packet.h" +#include "rpc/obrpc/ob_rpc_processor.h" +#include "rpc/obrpc/ob_rpc_proxy.h" +#include "rpc/obrpc/ob_rpc_result_code.h" +#include "share/ob_ls_id.h" +#include "share/ob_rpc_struct.h" +#include "share/rc/ob_tenant_base.h" +#include "share/scn.h" +#include "storage/tx/ob_tx_big_segment_buf.h" + +namespace oceanbase +{ + +namespace logservice +{ +class ObLogHandler; +} + +namespace transaction +{ + +class ObDupTableLSHandler; +class ObDupTableLSLeaseMgr; +class ObLSDupTabletsMgr; + +typedef common::ObSEArrayImpl LogLsnArray; +typedef common::ObSEArray LogScnArray; +typedef common::ObSEArray LeaseAddrArray; + +class DupTableDiagStd +{ +public: + static const char *DUP_DIAG_INDENT_SPACE; + static const char *DUP_DIAG_COMMON_PREFIX; + + enum TypeIndex + { + LEASE_INDEX = 0, + TABLET_INDEX, + TS_SYNC_INDEX, + MAX_INDEX, + }; + static const uint64_t DUP_DIAG_INFO_LOG_BUF_LEN[TypeIndex::MAX_INDEX]; + static const int64_t DUP_DIAG_PRINT_INTERVAL[TypeIndex::MAX_INDEX]; +}; + +struct DupTableInterfaceStat +{ + int64_t dup_table_follower_read_succ_cnt_; + int64_t dup_table_follower_read_tablet_not_exist_cnt_; + int64_t dup_table_follower_read_tablet_not_ready_cnt_; + int64_t dup_table_follower_read_lease_expired_cnt_; + int64_t dup_table_redo_sync_succ_cnt_; + int64_t dup_table_redo_sync_fail_cnt_; + + void reset() + { + dup_table_follower_read_succ_cnt_ = 0; + dup_table_follower_read_tablet_not_exist_cnt_ = 0; + dup_table_follower_read_tablet_not_ready_cnt_ = 0; + dup_table_follower_read_lease_expired_cnt_ = 0; + dup_table_redo_sync_succ_cnt_ = 0; + dup_table_redo_sync_fail_cnt_ = 0; + } + + TO_STRING_KV(K(dup_table_follower_read_succ_cnt_), + K(dup_table_follower_read_tablet_not_exist_cnt_), + K(dup_table_follower_read_lease_expired_cnt_), + K(dup_table_follower_read_lease_expired_cnt_), + K(dup_table_redo_sync_succ_cnt_), + K(dup_table_redo_sync_fail_cnt_)); +}; +/******************************************************* + * HashMapTool (not thread safe) + *******************************************************/ +template +int hash_for_each_update(HashMap &hash_map, Update_CallBack &callback) +{ + int ret = OB_SUCCESS; + int64_t cnt = 0; + typename HashMap::iterator iter; + for (iter = hash_map.begin(); iter != hash_map.end(); iter++) { + // int + if (OB_FAIL(callback(*iter))) { + break; + } + cnt++; + } + // DUP_TABLE_LOG(INFO, "hash for each update", K(ret), K(cnt)); + return ret; +} + +template +int hash_for_each_remove(HashKeyType tmp_key, HashMap &hash_map, RemoveIF_CallBack &callback) +{ + int ret = OB_SUCCESS; + typename HashMap::iterator iter; + // typedef typename HashMap::_key_type HashKey2; + TransModulePageAllocator allocator; + ObList del_list(allocator); + // ObList::iterator del_iter; + int64_t cnt = 0; + + for (iter = hash_map.begin(); iter != hash_map.end(); iter++) { + // bool + if (callback(*iter)) { + if (OB_FAIL(del_list.push_back(iter->first))) { + DUP_TABLE_LOG(WARN, "insert into del_list failed", K(ret), K(del_list.size())); + break; + } + } + cnt++; + } + + if (OB_SUCC(ret)) { + if (del_list.size() > 0) { + while (OB_SUCC(del_list.pop_front(tmp_key))) { + if (OB_FAIL(hash_map.erase_refactored(tmp_key))) { + DUP_TABLE_LOG(WARN, "erase from hash map failed", K(ret), K(tmp_key)); + break; + } + } + if (OB_ENTRY_NOT_EXIST == ret) { + // when pop all list, rewrite ret code + DUP_TABLE_LOG(INFO, "end del in while loop", K(ret)); + ret = OB_SUCCESS; + } + // DUP_TABLE_LOG(WARN, "del item from hash map failed", K(ret), K(tmp_key)); + } + } + // DUP_TABLE_LOG(DEBUG, "hash for each remove", K(ret), K(cnt), K(del_list.size())); + return ret; +} + +template +int hash_for_each_remove_with_timeout(HashKeyType tmp_key, + HashMap &hash_map, + RemoveIF_CallBack &callback, + int64_t &remain_time) +{ + int ret = OB_SUCCESS; + typename HashMap::iterator iter; + // typedef typename HashMap::_key_type HashKey2; + TransModulePageAllocator allocator; + ObList del_list(allocator); + // ObList::iterator del_iter; + int64_t cnt = 0; + int64_t scan_start_time = ObTimeUtility::fast_current_time(); + int64_t cur_time = 0; + const int64_t update_cur_cnt = 100; + + const bool enable_timeout_check = (remain_time == INT64_MAX); + + if (OB_SUCC(ret) && enable_timeout_check && remain_time <= 0) { + ret = OB_TIMEOUT; + DUP_TABLE_LOG(WARN, "the remain timeout is not enough", K(ret), K(remain_time)); + } + + for (iter = hash_map.begin(); iter != hash_map.end() && OB_SUCC(ret); iter++) { + // bool + if (callback(*iter)) { + if (OB_FAIL(del_list.push_back(iter->first))) { + DUP_TABLE_LOG(WARN, "insert into del_list failed", K(ret), K(del_list.size())); + } + } + // check whether timeout + if (OB_SUCC(ret) && enable_timeout_check) { + if (OB_UNLIKELY(0 == ((++cnt) % update_cur_cnt))) { + cur_time = ObTimeUtility::fast_current_time(); + if ((cur_time - scan_start_time) > remain_time) { + ret = OB_TIMEOUT; + DUP_TABLE_LOG(WARN, "scan map cost too much time", K(ret), K(cnt), K(remain_time), + K(del_list.size())); + } + } + } + } + + if (OB_SUCC(ret)) { + if (del_list.size() > 0) { + while (OB_SUCC(del_list.pop_front(tmp_key))) { + if (OB_FAIL(hash_map.erase_refactored(tmp_key))) { + DUP_TABLE_LOG(WARN, "erase from hash map failed", K(ret), K(tmp_key)); + break; + } + } + if (OB_ENTRY_NOT_EXIST == ret) { + // when pop all list, rewrite ret code + DUP_TABLE_LOG(DEBUG, "end del in while loop", K(ret)); + ret = OB_SUCCESS; + } + } + } + + remain_time = remain_time - (ObTimeUtility::fast_current_time() - scan_start_time); + if (OB_SUCC(ret) && enable_timeout_check && remain_time < 0) { + ret = OB_TIMEOUT; + DUP_TABLE_LOG(WARN, "the remain timeout is not enough", K(ret), K(remain_time)); + } + // DUP_TABLE_LOG(DEBUG, "hash for each remove", K(ret), K(cnt), K(timeout), K(del_list.size())); + return ret; +} + +class IHashSerCallBack +{ +public: + IHashSerCallBack(char *buf, int64_t buf_len, int64_t pos) + { + header_pos_ = 0; + buf_ = buf; + pos_ = pos; + buf_len_ = buf_len; + } + + void reserve_header() + { + int32_t hash_size = INT32_MAX; + header_pos_ = pos_; + pos_ = pos_ + common::serialization::encoded_length_i32(hash_size); + } + + int serialize_size(int32_t hash_size) + { + int ret = OB_SUCCESS; + if (OB_FAIL(common::serialization::encode_i32(buf_, buf_len_, pos_, hash_size))) { + DUP_TABLE_LOG(WARN, "serialize tablets hash size failed", K(ret)); + } + // DUP_TABLE_LOG(INFO, "serialize hash size", K(ret), K(buf_len_), K(pos_), K(hash_size)); + return ret; + } + + int64_t get_pos() { return pos_; } + +protected: + int64_t header_pos_; + + char *buf_; + int64_t pos_; + int64_t buf_len_; +}; + +class IHashDeSerCallBack +{ +public: + IHashDeSerCallBack(const char *buf, int64_t buf_len, int64_t pos) + { + hash_size_ = 0; + + buf_ = buf; + pos_ = pos; + buf_len_ = buf_len; + } + + // virtual int insert_deser_kv() = 0; + + int deserialize_size(int32_t &hash_size) + { + int ret = OB_SUCCESS; + if (OB_FAIL(common::serialization::decode_i32(buf_, buf_len_, pos_, &hash_size))) { + DUP_TABLE_LOG(WARN, "deserialize tablets hash size failed", K(ret)); + } + // DUP_TABLE_LOG(INFO, "deserialize hash size", K(ret), K(buf_len_), K(pos_), K(hash_size)); + return ret; + } + + // int64_t get_hash_size() { return hash_size_; } + + int64_t get_pos() { return pos_; } + +protected: + int32_t hash_size_; + + const char *buf_; + int64_t pos_; + int64_t buf_len_; +}; + +template +int hash_for_each_serialize(HashClass &hash_map, Ser_CallBack &callback) +{ + int ret = OB_SUCCESS; + typedef typename HashClass::const_iterator ConstHashIter; + + ConstHashIter hash_iter = hash_map.begin(); + int32_t hash_size = hash_map.size(); + // callback.reserve_header(); + + if (OB_FAIL(callback.serialize_size(hash_size))) { + DUP_TABLE_LOG(WARN, "serialize hash size failed", K(ret)); + } + + if (OB_SUCC(ret)) { + for (; OB_SUCC(ret) && hash_iter != hash_map.end(); hash_iter++) { + // int + if (OB_FAIL(callback(*hash_iter))) { + break; + DUP_TABLE_LOG(WARN, "serialize hash iter failed", K(ret)); + } + } + } + + // DUP_TABLE_LOG(INFO, "serialize hash item", K(ret), K(hash_size)); + return ret; +} + +template +int hash_for_each_deserialize(HashClass &hash_map, DeSer_CallBack &callback) +{ + int ret = OB_SUCCESS; + + int32_t hash_size = 0; + common::ObTabletID tmp_tablet_id; + + if (OB_FAIL(callback.deserialize_size(hash_size))) { + DUP_TABLE_LOG(WARN, "deserialize hash_size failed", K(ret)); + } else { + for (int i = 0; OB_SUCC(ret) && i < hash_size; i++) { + // int + if (OB_FAIL(callback(hash_map))) { + DUP_TABLE_LOG(WARN, "deserialize kv failed", K(ret)); + // } else if (OB_FAIL(callback.insert_deser_kv(hash_map))) { + // DUP_TABLE_LOG(WARN, "insert into hash_struct failed", K(ret)); + } + } + } + + // DUP_TABLE_LOG(INFO, "deserialize hash item", K(ret), K(hash_size)); + return ret; +} + +template +int64_t hash_for_each_serialize_size(HashClass &hash_map, Size_CallBack &callback) +{ + + typename HashClass::const_iterator const_iter; + int64_t total_size = 0; + int32_t hash_size = 0; + + for (const_iter = hash_map.begin(); const_iter != hash_map.end(); const_iter++) { + // int64_t + hash_size++; + total_size += callback(*const_iter); + } + + total_size += common::serialization::encoded_length_i32(hash_size); + return total_size; +} + +/******************************************************* + * Dup_Table Lease + *******************************************************/ + +enum class LeaseReqCacheState +{ + INVALID = 0, + PREPARE, + READY, +}; + +struct DupTableLeaseReqCache +{ +public: + int64_t request_ts_; + int64_t lease_acquire_ts_; + int64_t lease_interval_us_; + +private: + LeaseReqCacheState state_; + +public: + // reset after log_cb + void reset() + { + request_ts_ = -1; + lease_acquire_ts_ = -1; + lease_interval_us_ = -1; + state_ = LeaseReqCacheState::INVALID; + } + void set_invalid() { state_ = LeaseReqCacheState::INVALID; } + void set_ready() { state_ = LeaseReqCacheState::READY; } + bool is_invalid() const { return state_ == LeaseReqCacheState::INVALID; } + bool is_prepare() const { return state_ == LeaseReqCacheState::PREPARE; } + bool is_ready() const { return state_ == LeaseReqCacheState::READY; } + + void renew_lease_req(const int64_t &request_ts, const int64_t &lease_interval) + { + request_ts_ = request_ts; + lease_acquire_ts_ = 0; + lease_interval_us_ = lease_interval; + state_ = LeaseReqCacheState::PREPARE; + } + + void grant_lease_failed() + { + state_ = LeaseReqCacheState::INVALID; + lease_acquire_ts_ = -1; + } + + void grant_lease_success(const int64_t &acquire_ts) + { + state_ = LeaseReqCacheState::READY; + lease_acquire_ts_ = acquire_ts; + } + + DupTableLeaseReqCache() { reset(); } + + TO_STRING_KV(K(request_ts_), K(lease_acquire_ts_), K(lease_interval_us_), K(state_)); +}; + +struct DupTableDurableLease +{ + int64_t request_ts_; + int64_t lease_interval_us_; + + void reset() + { + request_ts_ = -1; + lease_interval_us_ = -1; + } + + DupTableDurableLease() { reset(); } + + TO_STRING_KV(K(request_ts_), K(lease_interval_us_)); + + OB_UNIS_VERSION(1); +}; + +struct DupTableLeaderLeaseInfo +{ + DupTableDurableLease confirmed_lease_info_; + DupTableLeaseReqCache cache_lease_req_; + int64_t lease_expired_ts_; + + void reset() + { + confirmed_lease_info_.reset(); + cache_lease_req_.reset(); + lease_expired_ts_ = -1; + } + + DupTableLeaderLeaseInfo() { reset(); } + TO_STRING_KV(K(confirmed_lease_info_), K(cache_lease_req_), K(lease_expired_ts_)); +}; + +struct DupTableFollowerLeaseInfo +{ + DupTableDurableLease durable_lease_; + int64_t lease_expired_ts_; + share::SCN last_lease_scn_; + share::SCN lease_acquire_scn_; + + void reset() + { + durable_lease_.reset(); + lease_expired_ts_ = -1; + last_lease_scn_.reset(); + lease_acquire_scn_.reset(); + } + + DupTableFollowerLeaseInfo() { reset(); } + + TO_STRING_KV(K(durable_lease_), K(lease_expired_ts_), K(last_lease_scn_), K(lease_acquire_scn_)); +}; + +typedef common::hash:: + ObHashMap + DupTableLeaderLeaseMap; + +class DupTableLeaseLogHeader +{ + OB_UNIS_VERSION(1); + +public: + static const uint64_t INVALID_LEASE_LOG_TYPE = 0; + static const uint64_t DURABLE_LEASE_LOG_TYPE = 1; + +public: + DupTableLeaseLogHeader() : addr_() { reset(); } + DupTableLeaseLogHeader(const common::ObAddr &addr) + : addr_(addr), lease_log_code_(DURABLE_LEASE_LOG_TYPE) + {} + + void reset() + { + lease_log_code_ = INVALID_LEASE_LOG_TYPE; + addr_.reset(); + } + + void set_lease_owner(const common::ObAddr &addr) + { + addr_ = addr; + lease_log_code_ = DURABLE_LEASE_LOG_TYPE; + } + const common::ObAddr &get_lease_owner() const { return addr_; } + + bool is_durable_lease_log() const { return lease_log_code_ == DURABLE_LEASE_LOG_TYPE; } + + TO_STRING_KV(K(addr_), K(lease_log_code_)); + +private: + common::ObAddr addr_; + uint64_t lease_log_code_; +}; + +struct DupTableLeaseItem +{ + DupTableLeaseLogHeader log_header_; + DupTableDurableLease durable_lease_; + + DupTableLeaseItem() + { + log_header_.reset(); + durable_lease_.reset(); + } + + DupTableLeaseItem(const DupTableLeaseLogHeader &lease_log_header, + const DupTableDurableLease &durable_lease) + : log_header_(lease_log_header), durable_lease_(durable_lease) + {} + + TO_STRING_KV(K(log_header_), K(durable_lease_)); + + OB_UNIS_VERSION(1); +}; + +typedef ObSEArray DupTableLeaseItemArray; + +class DupTableDurableLeaseLogBody +{ + OB_UNIS_VERSION(1); + +public: + DupTableDurableLeaseLogBody(DupTableDurableLease &durable_lease) : durable_lease_(durable_lease) + { + // reset(); + } + void reset() { durable_lease_.reset(); } + + TO_STRING_KV(K(durable_lease_)); + +private: + DupTableDurableLease &durable_lease_; +}; + +/******************************************************* + * Dup_Table Tablets + *******************************************************/ + +class DupTabletCommonHeader +{ + OB_UNIS_VERSION(1); + +public: + static const int64_t MAX_TABLET_COUNT_IN_SINGLE_SET = 10 * 10000; + +public: + static const uint64_t INVALID_SET_TYPE = 0; + static const uint64_t INVALID_UNIQUE_ID = 0; + static const uint64_t INVALID_SPECIAL_OP = 0; + static const uint64_t INVALID_COMMON_CODE = 0; + +private: + static const uint64_t DUP_FREE_SET_TYPE = 1; + static const uint64_t DUP_OLD_SET_TYPE = 2; + static const uint64_t DUP_NEW_SET_TYPE = 3; + static const uint64_t DUP_READABLE_SET_TYPE = 4; + + // seralize a special op with a empty new set + // confirm it and free in tablet_log_synced + static const uint64_t DUP_SPECIAL_OP_CLEAN_ALL_READABLE_SET = 1; + static const uint64_t DUP_SPECIAL_OP_CLEAN_DATA_CONFIRMING_SET = 2; + static const uint64_t DUP_SPECIAL_OP_BLOCK_CONFIRMING = 3; + + // static const uint64_t UNIQUE_ID_BIT_COUNT = 32; + // static const uint64_t TABLET_SET_BIT_COUNT = 4; + // static const uint64_t SPECIAL_OP_BIT_COUNT = 8; + // static const uint64_t UNIQUE_ID_BIT = static_cast(0xFFFFFFFFULL); + // static const uint64_t TABLET_SET_BIT = static_cast(0xFULL) << UNIQUE_ID_BIT_COUNT; + // static const uint64_t SPECIAL_OP_BIT = static_cast(0xFFULL) << (UNIQUE_ID_BIT_COUNT + TABLET_SET_BIT_COUNT); + +public: + TO_STRING_KV(K(unique_id_), + K(tablet_set_type_), + K(sp_op_type_)); + + DupTabletCommonHeader(const uint64_t id) : unique_id_(id) + { + // set_free(); + // set_invalid_sp_op_type(); + reuse(); + } + DupTabletCommonHeader() { reset(); } + ~DupTabletCommonHeader() { reset(); } + + bool is_valid() const { + return unique_id_is_valid() && tablet_set_type_is_valid(); + } + void reset() + { + set_invalid_unique_id(); + set_invalid_tablet_set_type(); + set_invalid_sp_op_type(); + } + void reuse() + { + set_free(); + set_invalid_sp_op_type(); + } + + void set_invalid_unique_id() { set_unique_id_(INVALID_UNIQUE_ID); } + void set_invalid_tablet_set_type() { change_tablet_set_type_(INVALID_SET_TYPE); } + void set_invalid_sp_op_type() { set_special_op_(INVALID_SPECIAL_OP); } + + uint64_t get_unique_id() const { return unique_id_; } + int64_t get_special_op() const { return sp_op_type_; } + int64_t get_tablet_set_type() const { return tablet_set_type_; } + bool unique_id_is_valid() const { return INVALID_UNIQUE_ID != unique_id_; } + bool tablet_set_type_is_valid() const { return INVALID_SET_TYPE != tablet_set_type_; } + + bool is_free() const { return get_tablet_set_type_() == DUP_FREE_SET_TYPE; } + bool is_readable_set() const { return get_tablet_set_type_() == DUP_READABLE_SET_TYPE; } + bool is_new_set() const { return get_tablet_set_type_() == DUP_NEW_SET_TYPE; } + bool is_old_set() const { return get_tablet_set_type_() == DUP_OLD_SET_TYPE; } + void set_free() { change_tablet_set_type_(DUP_FREE_SET_TYPE); } + void set_readable() { change_tablet_set_type_(DUP_READABLE_SET_TYPE); } + void set_new() { change_tablet_set_type_(DUP_NEW_SET_TYPE); } + void set_old() { change_tablet_set_type_(DUP_OLD_SET_TYPE); } + + void set_op_of_clean_all_readable_set() + { + set_special_op_(DUP_SPECIAL_OP_CLEAN_ALL_READABLE_SET); + } + bool need_clean_all_readable_set() + { + return DUP_SPECIAL_OP_CLEAN_ALL_READABLE_SET == sp_op_type_; + } + void set_op_of_clean_data_confirming_set() + { + set_special_op_(DUP_SPECIAL_OP_CLEAN_DATA_CONFIRMING_SET); + } + bool need_clean_data_confirming_set() const + { + return DUP_SPECIAL_OP_CLEAN_DATA_CONFIRMING_SET == sp_op_type_; + } + void set_op_of_block_confirming() + { + set_special_op_(DUP_SPECIAL_OP_BLOCK_CONFIRMING); + } + bool need_block_confirming() const + { + return DUP_SPECIAL_OP_BLOCK_CONFIRMING == sp_op_type_; + } + // bool contain_special_op(uint64_t special_op) const { return get_special_op_() == special_op; } + bool no_specail_op() const { return INVALID_SPECIAL_OP == sp_op_type_; } + void copy_tablet_set_type(const DupTabletCommonHeader &src_common_header) + { + set_unique_id_(src_common_header.get_unique_id()); + set_special_op_(src_common_header.get_special_op()); + change_tablet_set_type_(src_common_header.get_tablet_set_type()); + } + +private: + void set_unique_id_(const uint64_t id) + { + uint64_t real_id = id; + if (id > UINT32_MAX) { + DUP_TABLE_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "the unique_id is too large, set invalid", K(id), + K(UINT32_MAX)); + real_id = INVALID_UNIQUE_ID; + } + unique_id_ = id; + } + void change_tablet_set_type_(const int64_t set_type) + { + tablet_set_type_ = set_type; + } + int64_t get_tablet_set_type_() const + { + return tablet_set_type_; + } + + void set_special_op_(const int64_t special_op_type) + { + sp_op_type_ = special_op_type; + } + int64_t get_special_op_() const + { + // return (common_code_ & SPECIAL_OP_BIT) >> (UNIQUE_ID_BIT_COUNT + TABLET_SET_BIT_COUNT); + return sp_op_type_; + } + +private: + uint64_t unique_id_; + int64_t tablet_set_type_; + int64_t sp_op_type_; +}; + +typedef common::ObSEArray DupTabletSetIDArray; + +/******************************************************* + * Dup_Table Checkpoint + *******************************************************/ + +class ObDupTableLSCheckpoint +{ +public: + class ObLSDupTableMeta + { + public: + share::ObLSID ls_id_; + + DupTableLeaseItemArray lease_item_array_; + share::SCN lease_log_applied_scn_; + + // the scn of a tablet log which contain all readable tablet sets. + // If we want to get all readable sets, only need replay this log. + share::SCN readable_tablets_base_scn_; + + // the scn of the first tablet log which modify any readable set + // If we want to get all readable sets, we need replay from the log to the latest log + // clear this scn after set base_scn + share::SCN readable_tablets_min_base_applied_scn_; + + TO_STRING_KV(K(ls_id_), + K(lease_log_applied_scn_), + K(readable_tablets_base_scn_), + K(readable_tablets_min_base_applied_scn_), + K(lease_item_array_)); + + ObLSDupTableMeta() { reset(); } + + bool is_valid() const { return ls_id_.is_valid(); } + void reset() + { + ls_id_.reset(); + lease_item_array_.reset(); + lease_log_applied_scn_.reset(); + readable_tablets_base_scn_.reset(); + readable_tablets_min_base_applied_scn_.reset(); + } + + int copy(const ObLSDupTableMeta &dup_ls_meta); + + DISALLOW_COPY_AND_ASSIGN(ObLSDupTableMeta); + OB_UNIS_VERSION(1); + }; + + TO_STRING_KV(K(dup_ls_meta_), K(lease_log_rec_scn_), K(start_replay_scn_)); + +public: + ObDupTableLSCheckpoint() { reset(); } + void default_init(const share::ObLSID &ls_id) { dup_ls_meta_.ls_id_ = ls_id; } + + int get_dup_ls_meta(ObLSDupTableMeta &dup_ls_meta) const; + int set_dup_ls_meta(const ObLSDupTableMeta &dup_ls_meta); + share::SCN get_lease_log_rec_scn() const; + int reserve_ckpt_memory(const DupTableLeaseItemArray &lease_log_items); + int update_ckpt_after_lease_log_synced(const DupTableLeaseItemArray &lease_log_items, + const share::SCN &scn, + const bool modify_readable_sets, + const bool contain_all_readable, + const bool for_replay); + bool contain_all_readable_on_replica() const; + + int flush(); + + void reset() + { + dup_ls_meta_.reset(); + lease_log_rec_scn_.reset(); + start_replay_scn_.reset(); + } + +private: + SpinRWLock ckpt_rw_lock_; + ObLSDupTableMeta dup_ls_meta_; + share::SCN lease_log_rec_scn_; + share::SCN start_replay_scn_; +}; + +/******************************************************* + * Dup_Table Log + *******************************************************/ +enum class DupTableLogEntryType +{ + TabletChangeLog = 1, + LeaseListLog, + DuptableStatLog, + MAX +}; + +typedef common::ObSEArray DupLogTypeArray; + +// record log entry size after log entry header by int64_t +struct DupTableLogEntryHeader +{ + DupTableLogEntryType entry_type_; + + const static int64_t RESERVED_LOG_ENTRY_SIZE_SPACE = sizeof(int64_t); + + TO_STRING_KV(K(entry_type_)); + OB_UNIS_VERSION(1); +}; + +struct DupTableStatLog +{ + int64_t lease_addr_cnt_; + int64_t readable_cnt_; + int64_t all_tablet_set_cnt_; + + TO_STRING_KV(K(lease_addr_cnt_), K(readable_cnt_), K(all_tablet_set_cnt_)); + + DupTableStatLog() { reset(); } + + void reset() + { + lease_addr_cnt_ = 0; + readable_cnt_ = 0; + all_tablet_set_cnt_ = 0; + } + + OB_UNIS_VERSION(1); +}; + +class ObDupTableLogOperator : public logservice::AppendCb +{ +public: + ObDupTableLogOperator(const share::ObLSID &ls_id, + logservice::ObLogHandler *log_handler, + ObDupTableLSCheckpoint *dup_ls_ckpt, + ObDupTableLSLeaseMgr *lease_mgr, + ObLSDupTabletsMgr *tablets_mgr) + : ls_id_(ls_id), block_buf_(nullptr), log_handler_(log_handler), dup_ls_ckpt_(dup_ls_ckpt), + lease_mgr_ptr_(lease_mgr), tablet_mgr_ptr_(tablets_mgr) + { + reset(); + } + void reuse(); + void reset(); + + int submit_log_entry(); + + int merge_replay_block(const char *replay_buf, int64_t replay_buf_len); + + int deserialize_log_entry(); + + bool is_busy(); + + int on_success(); + int on_failure(); + + int replay_succ(); + + void print_statistics_log(); + +public: + void set_logging_scn(const share::SCN &scn); + + TO_STRING_KV(K(ls_id_), + K(big_segment_buf_), + K(logging_tablet_set_ids_), + K(logging_lease_addrs_), + K(logging_scn_), + K(logging_lsn_)); +private: + static const int64_t MAX_LOG_BLOCK_SIZE = common::OB_MAX_LOG_ALLOWED_SIZE; + int prepare_serialize_log_entry_(int64_t &max_ser_size, DupLogTypeArray &type_array); + int serialize_log_entry_(const int64_t max_ser_size, const DupLogTypeArray &type_array); + int deserialize_log_entry_(); + int retry_submit_log_block_(); + int sync_log_succ_(const bool for_replay); + +private: + void after_submit_log(const bool for_replay); + +#define LOG_OPERATOR_INIT_CHECK \ + if (OB_SUCC(ret)) { \ + if (OB_ISNULL(log_handler_) || OB_ISNULL(lease_mgr_ptr_) || OB_ISNULL(tablet_mgr_ptr_)) { \ + ret = OB_NOT_INIT; \ + DUP_TABLE_LOG(ERROR, "invalid log operator", K(ret), KP(log_handler_), KP(lease_mgr_ptr_), \ + KP(tablet_mgr_ptr_)); \ + } \ + } + +private: + SpinRWLock log_lock_; + + share::ObLSID ls_id_; + + char *block_buf_; + // int64_t block_buf_pos_; + + ObTxBigSegmentBuf big_segment_buf_; + + logservice::ObLogHandler *log_handler_; + + ObDupTableLSCheckpoint *dup_ls_ckpt_; + + ObDupTableLSLeaseMgr *lease_mgr_ptr_; + ObLSDupTabletsMgr *tablet_mgr_ptr_; + + DupTabletSetIDArray logging_tablet_set_ids_; + DupTableLeaseItemArray logging_lease_addrs_; + + DupTableStatLog stat_log_; + + share::SCN first_part_scn_; + + share::SCN logging_scn_; + palf::LSN logging_lsn_; + + // LogScnArray durable_block_scn_arr_; + + int64_t last_block_submit_us_; + int64_t last_block_sync_us_; + int64_t last_entry_submit_us_; + int64_t last_entry_sync_us_; + + uint64_t total_cb_wait_time_; + uint64_t append_block_count_; + uint64_t log_entry_count_; + uint64_t total_log_entry_wait_time_; +}; + +/******************************************************* + * Dup_Table Msg + *******************************************************/ + +class ObDupTableMsgBase +{ + OB_UNIS_VERSION(1); + +public: + ObDupTableMsgBase() { reset(); } + virtual ~ObDupTableMsgBase() {} + void reset(); + + void + set_header(const ObAddr &src, const ObAddr &dst, const ObAddr &proxy, const share::ObLSID &ls_id); + const ObAddr &get_src() const { return src_; } + const ObAddr &get_dst() const { return dst_; } + const ObAddr &get_proxy() const { return proxy_; } + const share::ObLSID &get_ls_id() const { return ls_id_; } + + bool is_valid() const + { + return src_.is_valid() && dst_.is_valid() && proxy_.is_valid() && ls_id_.is_valid(); + } + TO_STRING_KV(K_(src), K_(dst), K_(proxy), K(ls_id_)); + +protected: + ObAddr src_; + ObAddr dst_; + ObAddr proxy_; + share::ObLSID ls_id_; +}; + +class ObDupTableTsSyncRequest : public ObDupTableMsgBase +{ + OB_UNIS_VERSION(1); + +public: + ObDupTableTsSyncRequest() { reset(); } + ObDupTableTsSyncRequest(const share::SCN &commit_scn) { max_commit_scn_ = commit_scn; } + + void reset() + { + ObDupTableMsgBase::reset(); + max_commit_scn_.reset(); + } + + bool is_valid() const { return ObDupTableMsgBase::is_valid() && max_commit_scn_.is_valid(); } + + const share::SCN &get_max_commit_scn() const { return max_commit_scn_; } + INHERIT_TO_STRING_KV("ObDupTableMsgBase", ObDupTableMsgBase, K(max_commit_scn_)); + +private: + share::SCN max_commit_scn_; +}; + +class ObDupTableTsSyncResponse : public ObDupTableMsgBase +{ + OB_UNIS_VERSION(1); + +public: + ObDupTableTsSyncResponse() { reset(); } + ObDupTableTsSyncResponse(const share::SCN &replay_ts, + const share::SCN &commit_ts, + const share::SCN &read_ts) + { + max_replayed_scn_ = replay_ts; + max_commit_scn_ = commit_ts; + max_read_scn_ = read_ts; + } + + void reset() + { + ObDupTableMsgBase::reset(); + max_replayed_scn_.reset(); + max_commit_scn_.reset(); + max_read_scn_.reset(); + } + + bool is_valid() const + { + return ObDupTableMsgBase::is_valid() && max_replayed_scn_.is_valid() + && max_commit_scn_.is_valid() && max_read_scn_.is_valid(); + } + + const share::SCN &get_max_replayed_scn() const { return max_replayed_scn_; } + const share::SCN &get_max_commit_scn() const { return max_commit_scn_; } + const share::SCN &get_max_read_scn() const { return max_read_scn_; } + INHERIT_TO_STRING_KV("ObDupTableMsgBase", + ObDupTableMsgBase, + K(max_replayed_scn_), + K(max_commit_scn_), + K(max_read_scn_)); + +private: + share::SCN max_replayed_scn_; + share::SCN max_commit_scn_; + share::SCN max_read_scn_; +}; + +class ObDupTableLeaseRequest : public ObDupTableTsSyncResponse +{ + OB_UNIS_VERSION(1); + +public: + ObDupTableLeaseRequest() { reset(); } + ObDupTableLeaseRequest(const share::SCN &replay_ts, + const share::SCN &commit_ts, + const share::SCN &read_ts, + const int64_t &request_ts, + const int64_t &lease_interval_us) + : ObDupTableTsSyncResponse(replay_ts, commit_ts, read_ts), request_ts_(request_ts), + lease_interval_us_(lease_interval_us){}; + + void reset() + { + ObDupTableTsSyncResponse::reset(); + request_ts_ = lease_interval_us_ = OB_INVALID_TIMESTAMP; + }; + + bool is_valid() const + { + return ObDupTableTsSyncResponse::is_valid() && request_ts_ > 0 && lease_interval_us_ > 0; + } + + int64_t get_request_ts() const { return request_ts_; } + int64_t get_lease_interval_us() const { return lease_interval_us_; }; + + INHERIT_TO_STRING_KV("ObDupTableTsSyncResponse", + ObDupTableTsSyncResponse, + K(request_ts_), + K(lease_interval_us_)); + +private: + int64_t request_ts_; + int64_t lease_interval_us_; +}; + +class ObDupTableBeforePrepareRequest : public ObDupTableMsgBase +{ + OB_UNIS_VERSION(1); + +public: + ObDupTableBeforePrepareRequest() { reset(); } + ObDupTableBeforePrepareRequest(const ObTransID &tx_id, const share::SCN &before_prepare_version) + : tx_id_(tx_id), before_prepare_version_(before_prepare_version) + {} + + void reset() + { + ObDupTableMsgBase::reset(); + tx_id_.reset(); + before_prepare_version_.reset(); + } + + bool is_valid() const + { + return ObDupTableMsgBase::is_valid() && tx_id_.is_valid() && before_prepare_version_.is_valid(); + } + + const ObTransID &get_tx_id() { return tx_id_; } + const share::SCN &get_before_prepare_version() { return before_prepare_version_; } + + INHERIT_TO_STRING_KV("ObDupTableMsgBase", + ObDupTableMsgBase, + K(tx_id_), + K(before_prepare_version_)); + +private: + ObTransID tx_id_; + share::SCN before_prepare_version_; +}; + +} // namespace transaction + +/******************************************************* + * Dup_Table RPC + *******************************************************/ +namespace obrpc +{ +class ObDupTableProxy : public obrpc::ObRpcProxy +{ +public: + DEFINE_TO(ObDupTableProxy); + + RPC_AP(PRZ post_msg, OB_DUP_TABLE_LEASE_REQUEST, (transaction::ObDupTableLeaseRequest)); + + RPC_AP(PR3 post_msg, OB_DUP_TABLE_TS_SYNC_REQUEST, (transaction::ObDupTableTsSyncRequest)); + + RPC_AP(PR3 post_msg, OB_DUP_TABLE_TS_SYNC_RESPONSE, (transaction::ObDupTableTsSyncResponse)); + + RPC_AP(PR3 post_msg, + OB_DUP_TABLE_BEFORE_PREPARE_REQUEST, + (transaction::ObDupTableBeforePrepareRequest)); +}; + +class ObDupTableLeaseRequestP + : public ObRpcProcessor> +{ +public: + explicit ObDupTableLeaseRequestP() {} + +protected: + int process(); + +private: + DISALLOW_COPY_AND_ASSIGN(ObDupTableLeaseRequestP); + +private: +}; + +class ObDupTableTsSyncRequestP + : public ObRpcProcessor> +{ +public: + explicit ObDupTableTsSyncRequestP() {} + +protected: + int process(); + +private: + DISALLOW_COPY_AND_ASSIGN(ObDupTableTsSyncRequestP); +}; + +class ObDupTableTsSyncResponseP + : public ObRpcProcessor> +{ +public: + explicit ObDupTableTsSyncResponseP() {} + +protected: + int process(); + +private: + DISALLOW_COPY_AND_ASSIGN(ObDupTableTsSyncResponseP); +}; + +class ObDupTableBeforePrepareRequestP + : public ObRpcProcessor> +{ +public: + explicit ObDupTableBeforePrepareRequestP() {} + +protected: + int process(); + +private: + DISALLOW_COPY_AND_ASSIGN(ObDupTableBeforePrepareRequestP); +}; + +} // namespace obrpc + +namespace transaction +{ + +class ObDupTableRpc +{ +public: + int init(rpc::frame::ObReqTransport *req_transport, const oceanbase::common::ObAddr &addr); + + template + int post_msg(const common::ObAddr dst, DupTableMsgType &msg); + +private: + obrpc::ObDupTableProxy proxy_; +}; + +template +int ObDupTableRpc::post_msg(const common::ObAddr dst, DupTableMsgType &msg) +{ + int ret = OB_SUCCESS; + if (!dst.is_valid() || !msg.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid msg or addr", K(dst), K(msg)); + } else if (OB_FAIL(proxy_.to(dst).by(MTL_ID()).post_msg(msg, nullptr))) { + DUP_TABLE_LOG(WARN, "post msg error", K(ret), K(msg)); + } + return ret; +} + +} // namespace transaction + +} // namespace oceanbase +#endif diff --git a/src/storage/tx/ob_dup_table_lease.cpp b/src/storage/tx/ob_dup_table_lease.cpp new file mode 100644 index 0000000000..3b8fa8e943 --- /dev/null +++ b/src/storage/tx/ob_dup_table_lease.cpp @@ -0,0 +1,1108 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +// #include "lib/utility/ob_print_utils.h" +#include "observer/ob_server_struct.h" +#include "share/rc/ob_tenant_base.h" +#include "storage/tx/ob_dup_table_base.h" +#include "storage/tx/ob_dup_table_lease.h" +#include "storage/tx/ob_dup_table_ts_sync.h" +#include "storage/tx/ob_dup_table_util.h" +#include "storage/tx/ob_location_adapter.h" +#include "storage/tx/ob_trans_service.h" + +namespace oceanbase +{ +namespace transaction +{ + +const int64_t ObDupTableLSLeaseMgr::LEASE_UNIT = ObDupTableLoopWorker::LOOP_INTERVAL; +const int64_t ObDupTableLSLeaseMgr::DEFAULT_LEASE_INTERVAL = ObDupTableLSLeaseMgr::LEASE_UNIT * 60; +const int64_t ObDupTableLSLeaseMgr::MIN_LEASE_INTERVAL = ObDupTableLSLeaseMgr::LEASE_UNIT * 60; + +int ObDupTableLSLeaseMgr::init(ObDupTableLSHandler *dup_ls_handle) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(dup_ls_handle)) { + ret = OB_INVALID_ARGUMENT; + } else if (OB_FAIL(leader_lease_map_.create(32, "DUP_TABLE"))) { + DUP_TABLE_LOG(WARN, "create leader_lease_map_ failed", K(ret)); + } else { + follower_lease_info_.reset(); + dup_ls_handle_ptr_ = dup_ls_handle; + is_master_ = false; + is_stopped_ = false; + ls_id_ = dup_ls_handle->get_ls_id(); + } + + return ret; +} + +void ObDupTableLSLeaseMgr::reset() +{ + ls_id_.reset(); + is_master_ = false; + is_stopped_ = true; + dup_ls_handle_ptr_ = nullptr; + leader_lease_map_.destroy(); + follower_lease_info_.reset(); + last_lease_req_post_time_ = 0; + last_lease_req_cache_handle_time_ = 0; + if (OB_NOT_NULL(lease_diag_info_log_buf_)) { + ob_free(lease_diag_info_log_buf_); + } + lease_diag_info_log_buf_ = nullptr; +} + +int ObDupTableLSLeaseMgr::recive_lease_request(const ObDupTableLeaseRequest &lease_req) +{ + int ret = OB_SUCCESS; + SpinWLockGuard guard(lease_lock_); + + DupTableLeaderLeaseInfo tmp_lease_info; + + if (ATOMIC_LOAD(&is_stopped_)) { + ret = OB_NOT_INIT; + } else if (!is_master()) { + ret = OB_NOT_MASTER; + } else if (OB_FAIL(leader_lease_map_.get_refactored(lease_req.get_src(), tmp_lease_info))) { + if (OB_HASH_NOT_EXIST != ret) { + DUP_TABLE_LOG(WARN, "get from lease_req_cache_ failed", K(ret), K(lease_req)); + } else { + DUP_TABLE_LOG(INFO, "first lease request from new dup_table follower", K(ret), K(lease_req)); + } + } else if (tmp_lease_info.cache_lease_req_.is_ready()) { + DUP_TABLE_LOG(INFO, "leader lease info is logging which can not recive new lease request", + K(lease_req.get_src())); + } else if (tmp_lease_info.cache_lease_req_.request_ts_ < lease_req.get_request_ts()) { + // renew request ts before submit lease log + ret = OB_HASH_NOT_EXIST; + } + + if (OB_HASH_NOT_EXIST == ret) { + tmp_lease_info.cache_lease_req_.renew_lease_req(lease_req.get_request_ts(), + lease_req.get_lease_interval_us()); + + if (OB_FAIL(leader_lease_map_.set_refactored(lease_req.get_src(), tmp_lease_info, 1))) { + DUP_TABLE_LOG(WARN, "insert into lease_req_cache_ failed", K(ret), K(lease_req)); + } + } + + DUP_TABLE_LOG(INFO, "cache lease request", K(ret), K(leader_lease_map_.size()), K(lease_req), + K(tmp_lease_info)); + return ret; +} + +int ObDupTableLSLeaseMgr::prepare_serialize(int64_t &max_ser_size, + DupTableLeaseItemArray &lease_header_array) +{ + int ret = OB_SUCCESS; + max_ser_size = 0; + lease_header_array.reuse(); + int64_t loop_start_time = ObTimeUtility::current_time(); + common::ObAddr tmp_addr; + DupTableTsInfo local_ts_info; + + SpinWLockGuard guard(lease_lock_); + + if (ATOMIC_LOAD(&is_stopped_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "the tablets mgr is not running", K(ret), K(is_stopped_), K(ls_id_)); + } else { + if (need_retry_lease_operation_(loop_start_time, last_lease_req_cache_handle_time_)) { + if (OB_FAIL(dup_ls_handle_ptr_->get_local_ts_info(local_ts_info))) { + DUP_TABLE_LOG(WARN, "get local ts info failed", K(ret), K(ls_id_), K(local_ts_info)); + } else { + LeaseReqCacheHandler req_handler(this, loop_start_time, local_ts_info.max_replayed_scn_, + lease_header_array); + if (OB_FAIL(hash_for_each_remove(tmp_addr, leader_lease_map_, req_handler))) { + DUP_TABLE_LOG(WARN, "handle lease requests failed", K(ret)); + } + if (req_handler.get_error_ret() != OB_SUCCESS) { + lease_header_array.reuse(); + ret = req_handler.get_error_ret(); + } else { + max_ser_size += req_handler.get_max_ser_size(); + } + + if (OB_SUCC(ret) && req_handler.get_renew_lease_count() > 0) { + DUP_TABLE_LOG(INFO, "renew lease list in the log", K(ret), K(req_handler), + K(lease_header_array), K(max_ser_size), K(loop_start_time), + K(last_lease_req_cache_handle_time_)); + } + + if (OB_SUCC(ret) && req_handler.get_max_ser_size() > 0) { + last_lease_req_cache_handle_time_ = loop_start_time; + } + } + } + } + + DUP_TABLE_LOG(DEBUG, "prepare serialize lease log", K(ret), K(max_ser_size), + K(lease_header_array), K(loop_start_time), K(last_lease_req_cache_handle_time_)); + return ret; +} + +int ObDupTableLSLeaseMgr::serialize_lease_log(const DupTableLeaseItemArray &lease_header_array, + char *buf, + const int64_t buf_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + + int64_t tmp_pos = pos; + // SpinRLockGuard guard(lease_lock_); + + for (int i = 0; i < lease_header_array.count() && OB_SUCC(ret); i++) { + if (OB_FAIL(lease_header_array[i].log_header_.serialize(buf, buf_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize lease log header failed", K(ret), K(lease_header_array[i])); + } else if (lease_header_array[i].log_header_.is_durable_lease_log()) { + DupTableDurableLease tmp_durable_lease = lease_header_array[i].durable_lease_; + DupTableDurableLeaseLogBody durable_log_body(tmp_durable_lease); + if (OB_FAIL(durable_log_body.serialize(buf, buf_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize durable lease log body failed", K(ret), K(durable_log_body)); + } + } + } + + if (OB_SUCC(ret)) { + pos = tmp_pos; + } + + return ret; +} + +int ObDupTableLSLeaseMgr::deserialize_lease_log(DupTableLeaseItemArray &lease_header_array, + const char *buf, + const int64_t data_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + + int64_t tmp_pos = pos; + + lease_header_array.reuse(); + + SpinWLockGuard guard(lease_lock_); + + if (OB_ISNULL(buf) || data_len <= 0 || pos <= 0) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid argument", K(ret), KP(buf), K(data_len), K(pos)); + } else { + DupTableLeaseLogHeader lease_log_header; + DupTableLeaderLeaseInfo leader_lease_info; + // DupTableDurableLeaseLogBody durable_lease_log_body; + while (OB_SUCC(ret) && tmp_pos < data_len) { + lease_log_header.reset(); + leader_lease_info.reset(); + DupTableDurableLeaseLogBody durable_lease_log_body(leader_lease_info.confirmed_lease_info_); + if (OB_FAIL(lease_log_header.deserialize(buf, data_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "deserialize lease log header failed", K(ret), K(lease_log_header), + K(tmp_pos), K(data_len)); + } else if (lease_log_header.is_durable_lease_log()) { + if (OB_FAIL(durable_lease_log_body.deserialize(buf, data_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "deserialize leader lease info failed", K(ret)); + } else if (OB_FAIL(lease_header_array.push_back(DupTableLeaseItem( + lease_log_header, leader_lease_info.confirmed_lease_info_)))) { + DUP_TABLE_LOG(WARN, "push back leader_lease_info failed", K(ret), K(leader_lease_info)); + } else if (OB_FALSE_IT(leader_lease_info.lease_expired_ts_ = + leader_lease_info.confirmed_lease_info_.request_ts_ + + leader_lease_info.confirmed_lease_info_.lease_interval_us_)) { + // do nothing + } else if (OB_FAIL(leader_lease_map_.set_refactored(lease_log_header.get_lease_owner(), + leader_lease_info, 1))) { + DUP_TABLE_LOG(WARN, "insert into leader_lease_map_ for replay failed", K(ret), + K(lease_log_header), K(leader_lease_info)); + } + } + } + } + return ret; +} + +int ObDupTableLSLeaseMgr::lease_log_submitted(const bool submit_result, + const share::SCN &lease_log_scn, + const bool for_replay, + const DupTableLeaseItemArray &lease_header_array) +{ + int ret = OB_SUCCESS; + + UNUSED(submit_result); + UNUSED(lease_log_scn); + UNUSED(for_replay); + UNUSED(lease_header_array); + + return ret; +} + +int ObDupTableLSLeaseMgr::lease_log_synced(const bool sync_result, + const share::SCN &lease_log_scn, + const bool for_replay, + const DupTableLeaseItemArray &lease_header_array) +{ + int ret = OB_SUCCESS; + + if (OB_SUCC(ret) && !for_replay) { + for (int i = 0; i < lease_header_array.count() && OB_SUCC(ret); i++) { + DupTableLeaderLeaseInfo *leader_lease_ptr = nullptr; + common::ObAddr lease_owner = lease_header_array[i].log_header_.get_lease_owner(); + if (OB_ISNULL(leader_lease_ptr = leader_lease_map_.get(lease_owner))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(ERROR, "null leader lease info ptr", K(ret), K(lease_header_array[i])); + } else { + if (sync_result) { + // if cache in invalid or preapre state, do nothing + if (leader_lease_ptr->cache_lease_req_.is_ready()) { + if (leader_lease_ptr->cache_lease_req_.request_ts_ + != lease_header_array[i].durable_lease_.request_ts_) { + DUP_TABLE_LOG(WARN, "lease req cache not equal to durable lease", + K(lease_header_array[i].durable_lease_), + K(leader_lease_ptr->cache_lease_req_)); + // rewrite req cache + leader_lease_ptr->cache_lease_req_.request_ts_ = + lease_header_array[i].durable_lease_.request_ts_; + leader_lease_ptr->cache_lease_req_.lease_acquire_ts_ = ObTimeUtility::current_time(); + // update leader expired lease + leader_lease_ptr->lease_expired_ts_ = + leader_lease_ptr->cache_lease_req_.lease_acquire_ts_ + DEFAULT_LEASE_INTERVAL; + // reset req cache to invalid state + leader_lease_ptr->cache_lease_req_.set_invalid(); + } else { + leader_lease_ptr->confirmed_lease_info_ = lease_header_array[i].durable_lease_; + leader_lease_ptr->lease_expired_ts_ = + leader_lease_ptr->cache_lease_req_.lease_acquire_ts_ + + leader_lease_ptr->cache_lease_req_.lease_interval_us_; + // reset req cache to invalid state + leader_lease_ptr->cache_lease_req_.set_invalid(); + } + } + } else { + // if log sync failed, allow renew lease req + leader_lease_ptr->cache_lease_req_.set_invalid(); + } + } + } + } + + if (OB_SUCC(ret) && sync_result && for_replay) { + if (OB_FAIL(follower_try_acquire_lease(lease_log_scn))) { + DUP_TABLE_LOG(WARN, "acquire lease from lease log error", K(ret), K(lease_log_scn)); + } + } + + if (lease_header_array.count() > 0) { + DUP_TABLE_LOG(INFO, "lease log sync", K(ret), K(sync_result), K(for_replay), K(lease_log_scn), + K(lease_header_array), K(is_master())); + } + return ret; +} + +// int ObDupTableLSLeaseMgr::log_cb_success() +// { +// int ret = OB_SUCCESS; +// SpinWLockGuard guard(lease_lock_); +// +// const bool is_success = true; +// LeaseDurableHandler durable_handler(is_success); +// +// if (OB_FAIL(hash_for_each_update(leader_lease_map_, durable_handler))) { +// DUP_TABLE_LOG(WARN, "update durable lease info", K(ret), K(is_success)); +// } +// +// return ret; +// } +// +// int ObDupTableLSLeaseMgr::log_cb_failure() +// { +// int ret = OB_SUCCESS; +// SpinWLockGuard guard(lease_lock_); +// +// const bool is_success = false; +// LeaseDurableHandler durable_handler(is_success); +// +// if (OB_FAIL(hash_for_each_update(leader_lease_map_, durable_handler))) { +// DUP_TABLE_LOG(WARN, "update durable lease info", K(ret), K(is_success)); +// } +// +// return ret; +// } + +bool ObDupTableLSLeaseMgr::can_grant_lease_(const common::ObAddr &addr, + const share::SCN &local_max_applyed_scn, + const DupTableLeaderLeaseInfo &lease_info) +{ + bool lease_success = true; + int ret = OB_SUCCESS; + const share::ObLSID ls_id = ls_id_; + + DupTableTsInfo cache_ts_info; + if (OB_FAIL(dup_ls_handle_ptr_->get_cache_ts_info(addr, cache_ts_info))) { + lease_success = false; + DUP_TABLE_LOG(WARN, "Not allowed to acquire lease - get cache ts info failed", K(ret), K(ls_id), + K(addr), K(cache_ts_info), K(local_max_applyed_scn), K(lease_info)); + } else if (!cache_ts_info.max_replayed_scn_.is_valid() || !local_max_applyed_scn.is_valid()) { + lease_success = false; + DUP_TABLE_LOG(WARN, "Not allowed to acquire lease - invalid applyed scn", K(ret), K(ls_id), + K(addr), K(cache_ts_info), K(local_max_applyed_scn), K(lease_info)); + } else { + + uint64_t local_max_applied_ts = local_max_applyed_scn.get_val_for_gts(); + uint64_t follower_replayed_ts = cache_ts_info.max_replayed_scn_.get_val_for_gts(); + const uint64_t MAX_APPLIED_SCN_INTERVAL = DEFAULT_LEASE_INTERVAL * 1000; /*us -> ns*/ + + if (local_max_applied_ts > follower_replayed_ts + && local_max_applied_ts - follower_replayed_ts >= MAX_APPLIED_SCN_INTERVAL) { + lease_success = false; + DUP_TABLE_LOG(WARN, "Not allowed to acquire lease - slow replay", K(ret), K(ls_id), K(addr), + K(local_max_applied_ts), K(follower_replayed_ts), + K(local_max_applied_ts - follower_replayed_ts), K(MAX_APPLIED_SCN_INTERVAL), + K(cache_ts_info), K(lease_info)); + } + } + return lease_success; +} + +int ObDupTableLSLeaseMgr::handle_lease_req_cache_(int64_t loop_start_time, + const share::SCN &local_max_applyed_scn, + const common::ObAddr &addr, + DupTableLeaderLeaseInfo &single_lease_info) +{ + int ret = OB_SUCCESS; + if (!single_lease_info.cache_lease_req_.is_prepare()) { + // do nothing + if (single_lease_info.cache_lease_req_.is_invalid()) { + DUP_TABLE_LOG(DEBUG, "No lease request from this follower", K(addr), K(single_lease_info)); + } else if (single_lease_info.cache_lease_req_.is_ready()) { + DUP_TABLE_LOG(DEBUG, "the lease request is still logging", K(ret), K(addr), + K(single_lease_info)); + } else { + // error state + DUP_TABLE_LOG(WARN, "unexpected lease request cache for this follower", K(addr), + K(single_lease_info)); + } + } else if (can_grant_lease_(addr, local_max_applyed_scn, single_lease_info)) { + // grant lease + single_lease_info.cache_lease_req_.grant_lease_success(loop_start_time); + } else { + single_lease_info.cache_lease_req_.grant_lease_failed(); + } + + DUP_TABLE_LOG(DEBUG, "handle lease request cache", K(ret), K(addr), K(loop_start_time), + K(single_lease_info)); + + return ret; +} + +// int ObDupTableLSLeaseMgr::leader_handle(bool &need_log) +// { +// int ret = OB_SUCCESS; +// +// SpinWLockGuard guard(lease_lock_); +// +// common::ObAddr tmp_addr; +// int64_t loop_start_time = ObTimeUtility::current_time(); +// DupTableTsInfo local_ts_info; +// need_log = false; +// +// if (need_retry_lease_operation_(loop_start_time, last_lease_req_cache_handle_time_)) { +// if (OB_FAIL(dup_ls_handle_ptr_->get_local_ts_info(local_ts_info))) { +// need_log = false; +// DUP_TABLE_LOG(WARN, "get local ts info failed", K(ret), K(ls_id_), K(local_ts_info)); +// } else { +// LeaseReqCacheHandler req_handler(this, loop_start_time, local_ts_info.max_replayed_scn_); +// if (OB_FAIL(hash_for_each_remove(tmp_addr, leader_lease_map_, req_handler))) { +// DUP_TABLE_LOG(WARN, "handle lease requests failed", K(ret)); +// } +// +// last_lease_req_cache_handle_time_ = loop_start_time; +// if (req_handler.get_lease_changed()) { +// need_log = true; +// } else { +// need_log = false; +// } +// } +// } else { +// need_log = false; +// } +// DUP_TABLE_LOG(DEBUG, "leader handler", K(loop_start_time), K(need_log)); +// return ret; +// } +// +int ObDupTableLSLeaseMgr::follower_handle() +{ + int ret = OB_SUCCESS; + DupTableTsInfo local_ts_info; + if (ATOMIC_LOAD(&is_stopped_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "dup table lease mgr is not inited", K(ret)); + } else if (dup_ls_handle_ptr_->get_local_ts_info(local_ts_info)) { + DUP_TABLE_LOG(WARN, "get local ts info failed", K(ret)); + } else { + SpinWLockGuard guard(lease_lock_); + + ObILocationAdapter *location_adapter = MTL(ObTransService *)->get_location_adapter(); + const share::ObLSID cur_ls_id = ls_id_; + const common::ObAddr self_addr = MTL(ObTransService *)->get_server(); + int64_t loop_start_time = ObTimeUtility::current_time(); + + if (OB_ISNULL(location_adapter) || !cur_ls_id.is_valid() || loop_start_time <= 0) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret), KP(location_adapter), K(cur_ls_id), + K(loop_start_time)); + } else if (need_post_lease_request_(loop_start_time)) { + update_request_ts_(loop_start_time); + ObDupTableLeaseRequest req(local_ts_info.max_replayed_scn_, local_ts_info.max_commit_version_, + local_ts_info.max_read_version_, + follower_lease_info_.durable_lease_.request_ts_, + DEFAULT_LEASE_INTERVAL); + common::ObAddr leader_addr; + if (OB_FAIL(location_adapter->nonblock_get_leader(GCONF.cluster_id, MTL_ID(), cur_ls_id, + leader_addr))) { + DUP_TABLE_LOG(WARN, "get ls leader failed", K(ret)); + (void)location_adapter->nonblock_renew(GCONF.cluster_id, MTL_ID(), cur_ls_id); + } else if (FALSE_IT(req.set_header(self_addr, leader_addr, self_addr, cur_ls_id))) { + DUP_TABLE_LOG(WARN, "set msg header failed", K(ret)); + } else if (OB_FAIL( + MTL(ObTransService *)->get_dup_table_rpc_impl().post_msg(leader_addr, req))) { + DUP_TABLE_LOG(WARN, "post lease request failed", K(ret), K(leader_addr)); + } else { + last_lease_req_post_time_ = loop_start_time; + DUP_TABLE_LOG(INFO, "post lease request success", K(ret), K(leader_addr), K(req), + K(cur_ls_id)); + } + } + } + return ret; +} + +int ObDupTableLSLeaseMgr::follower_try_acquire_lease(const share::SCN &lease_log_scn) +{ + int ret = OB_SUCCESS; + + // SpinRLockGuard guard(lease_lock_); + + const common::ObAddr self_addr = MTL(ObTransService *)->get_server(); + DupTableLeaderLeaseInfo tmp_leader_lease_info; + + if (ATOMIC_LOAD(&is_stopped_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "dup_table lease_mgr not init", K(ret)); + } else if (!lease_log_scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret), K(lease_log_scn)); + } else if (OB_FAIL(leader_lease_map_.get_refactored(self_addr, tmp_leader_lease_info))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + DUP_TABLE_LOG(DEBUG, "It can not get lease from this lease log", K(lease_log_scn), + K(self_addr)); + } else { + DUP_TABLE_LOG(WARN, "get lease info from leader_lease_map_ failed", K(ret), K(self_addr)); + } + + } else if (tmp_leader_lease_info.confirmed_lease_info_.request_ts_ + == follower_lease_info_.durable_lease_.request_ts_) { + + const share::ObLSID ls_id = ls_id_; + if (follower_lease_info_.lease_expired_ts_ + < tmp_leader_lease_info.confirmed_lease_info_.lease_interval_us_ + + tmp_leader_lease_info.confirmed_lease_info_.request_ts_) { + follower_lease_info_.last_lease_scn_ = lease_log_scn; + const bool acquire_new_lease = tmp_leader_lease_info.confirmed_lease_info_.request_ts_ + > follower_lease_info_.lease_expired_ts_; + if (acquire_new_lease) { + follower_lease_info_.lease_acquire_scn_ = lease_log_scn; + } + follower_lease_info_.durable_lease_.lease_interval_us_ = + tmp_leader_lease_info.confirmed_lease_info_.lease_interval_us_; + follower_lease_info_.lease_expired_ts_ = + follower_lease_info_.durable_lease_.request_ts_ + + follower_lease_info_.durable_lease_.lease_interval_us_; + + DUP_TABLE_LOG(INFO, "The follower can get new lease from this lease log", K(ret), K(ls_id), + K(lease_log_scn), K(self_addr), K(acquire_new_lease), K(follower_lease_info_)); + } else { + DUP_TABLE_LOG(DEBUG, "No new lease in this lease log", K(ret), K(ls_id), K(lease_log_scn), + K(self_addr), K(follower_lease_info_)); + } + } else { + DUP_TABLE_LOG( + INFO, "request_ts_ in lease log is not match, wait for new lease", + "request_ts_in_lease_log", tmp_leader_lease_info.confirmed_lease_info_.request_ts_, + "request_ts_in_memory", follower_lease_info_.durable_lease_.request_ts_, + K(follower_lease_info_.lease_expired_ts_), K(follower_lease_info_.last_lease_scn_), + K(follower_lease_info_.lease_acquire_scn_)); + } + + return ret; +} + +int ObDupTableLSLeaseMgr::get_lease_valid_array(LeaseAddrArray &lease_array) +{ + int ret = OB_SUCCESS; + + SpinRLockGuard guard(lease_lock_); + GetLeaseValidAddrFunctor functor(lease_array); + + if (ATOMIC_LOAD(&is_stopped_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "dup_table lease_mgr not init", K(ret)); + } else if (OB_FAIL(hash_for_each_update(leader_lease_map_, functor))) { + DUP_TABLE_LOG(WARN, "get lease valid array from leader_lease_map failed", K(ret)); + } + + return ret; +} + +int ObDupTableLSLeaseMgr::leader_takeover(bool is_resume) +{ + int ret = OB_SUCCESS; + + SpinWLockGuard guard(lease_lock_); + + // clear follower lease info + follower_lease_info_.reset(); + + if (!is_resume) { + // If it is new leader, we will continue to use the last lease list from the last leader. + // Each follower acquire lease at this time in leader's view. + // The lease length of leader will be larger than follower. + LeaderActiveLeaseFunctor functor; + if (OB_FAIL(hash_for_each_update(leader_lease_map_, functor))) { + DUP_TABLE_LOG(WARN, "update lease_expired_ts_ when leader active failed", K(ret)); + } + } + + ATOMIC_STORE(&is_master_, true); + + return ret; +} + +int ObDupTableLSLeaseMgr::leader_revoke() +{ + int ret = OB_SUCCESS; + + SpinWLockGuard guard(lease_lock_); + + // only can reset follower lease, + // can not reset leader lease list + follower_lease_info_.reset(); + + ATOMIC_STORE(&is_master_, false); + return ret; +} + +bool ObDupTableLSLeaseMgr::check_follower_lease_serving(const bool is_election_leader, + const share::SCN &max_replayed_scn) +{ + SpinRLockGuard guard(lease_lock_); + bool follower_lease_serving = false; + if (is_election_leader) { + follower_lease_serving = true; + DUP_TABLE_LOG(INFO, "no need to check follower serving on a leader", K(is_election_leader), + K(max_replayed_scn)); + } else if (follower_lease_info_.lease_expired_ts_ <= ObTimeUtility::current_time()) { + follower_lease_serving = false; + DUP_TABLE_LOG(INFO, "dup table lease has been expired", K(follower_lease_serving), + K(is_election_leader), K(max_replayed_scn), K(follower_lease_info_)); + } else if (follower_lease_info_.lease_acquire_scn_.is_valid() + || follower_lease_info_.lease_acquire_scn_ <= max_replayed_scn) { + follower_lease_serving = true; + } + return follower_lease_serving; +} + +void ObDupTableLSLeaseMgr::print_lease_diag_info_log(const bool is_master) +{ + SpinRLockGuard guard(lease_lock_); + int ret = OB_SUCCESS; + + const uint64_t LEASE_PRINT_BUF_LEN = + DupTableDiagStd::DUP_DIAG_INFO_LOG_BUF_LEN[DupTableDiagStd::TypeIndex::LEASE_INDEX]; + const int64_t tenant_id = MTL_ID(); + const ObLSID ls_id = ls_id_; + const int64_t cur_time = ObTimeUtility::current_time(); + + if (OB_ISNULL(lease_diag_info_log_buf_)) { + if (OB_ISNULL(lease_diag_info_log_buf_ = + static_cast(ob_malloc(LEASE_PRINT_BUF_LEN, "DupTableDiag")))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + _DUP_TABLE_LOG(WARN, "%salloc lease diag info buf failed, ret=%d, ls_id=%lu, cur_time=%s", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id(), + common::ObTime2Str::ob_timestamp_str(cur_time)); + } + } + + if (OB_SUCC(ret)) { + if (is_master) { + + DiagInfoGenerator diag_info_gen(true, lease_diag_info_log_buf_, LEASE_PRINT_BUF_LEN, + cur_time); + if (OB_FAIL(hash_for_each_update(leader_lease_map_, diag_info_gen))) { + _DUP_TABLE_LOG(WARN, "%sprint leader lease list failed, ret=%d, ls_id=%lu, cur_time=%s", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id(), + common::ObTime2Str::ob_timestamp_str(cur_time)); + } + + lease_diag_info_log_buf_[MIN(diag_info_gen.get_buf_pos(), LEASE_PRINT_BUF_LEN - 1)] = '\0'; + + _DUP_TABLE_LOG(INFO, "[%sLeader Lease List] tenant: %lu, ls: %lu , current_time = %s\n%s", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, tenant_id, ls_id.id(), + common::ObTime2Str::ob_timestamp_str(cur_time), lease_diag_info_log_buf_); + } else { + _DUP_TABLE_LOG( + INFO, + "[%sFollower Lease Info] tenant: %lu, ls: %lu , current_time = %s\n" + "%s[%sFollower Lease] is_expired=%s, request_ts=%lu, request_ts(date)=%s, " + "lease_expired_time=%s, " + "lease_interval_us=%lu, last_lease_scn=%s, lease_acquire_scn=%s", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, tenant_id, ls_id.id(), + common::ObTime2Str::ob_timestamp_str(cur_time), DupTableDiagStd::DUP_DIAG_INDENT_SPACE, + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, + to_cstring(cur_time >= follower_lease_info_.lease_expired_ts_), + follower_lease_info_.durable_lease_.request_ts_, + common::ObTime2Str::ob_timestamp_str(follower_lease_info_.durable_lease_.request_ts_), + common::ObTime2Str::ob_timestamp_str(follower_lease_info_.lease_expired_ts_), + follower_lease_info_.durable_lease_.lease_interval_us_, + to_cstring(follower_lease_info_.last_lease_scn_), + to_cstring(follower_lease_info_.lease_acquire_scn_)); + } + } +} + +int ObDupTableLSLeaseMgr::get_lease_mgr_stat(FollowerLeaseMgrStatArr &collect_arr) +{ + int ret = OB_SUCCESS; + const int64_t tenant_id = MTL_ID(); + const ObAddr leader_addr = GCTX.self_addr(); + const int64_t cur_time = ObTimeUtility::current_time(); + SpinRLockGuard r_lock(lease_lock_); + + if (OB_FAIL(collect_arr.prepare_allocate(leader_lease_map_.size()))) { + DUP_TABLE_LOG(WARN, "pre allocate failed", K(ret)); + } else { + LeaderLeaseMgrStatFunctor collect_handler(collect_arr, tenant_id, cur_time, leader_addr, + dup_ls_handle_ptr_->get_ls_id()); + + if (OB_FAIL(hash_for_each_update(leader_lease_map_, collect_handler))) { + DUP_TABLE_LOG(WARN, "colloect leader mgr info failed", K(ret)); + } + } + DUP_TABLE_LOG(DEBUG, "colloect leader mgr info", K(ret), K(collect_arr)); + return ret; +} + +void ObDupTableLSLeaseMgr::update_request_ts_(int64_t loop_start_time) +{ + // set self_request_ts_ = 0 when replay lease log success + int64_t cur_lease_interval = follower_lease_info_.durable_lease_.lease_interval_us_ > 0 + ? follower_lease_info_.durable_lease_.lease_interval_us_ + : DEFAULT_LEASE_INTERVAL; + + if (loop_start_time - follower_lease_info_.durable_lease_.request_ts_ >= cur_lease_interval / 2) { + follower_lease_info_.durable_lease_.request_ts_ = loop_start_time; + } +} + +bool ObDupTableLSLeaseMgr::need_post_lease_request_(int64_t loop_start_time) +{ + bool need_post = false; + + if (need_retry_lease_operation_(loop_start_time, last_lease_req_post_time_) + && (loop_start_time >= follower_lease_info_.lease_expired_ts_ + || follower_lease_info_.lease_expired_ts_ - loop_start_time + <= follower_lease_info_.durable_lease_.lease_interval_us_ / 10 * 4)) { + need_post = true; + } + + return need_post; +} + +bool ObDupTableLSLeaseMgr::need_retry_lease_operation_(const int64_t cur_time, + const int64_t last_time) +{ + return cur_time - last_time >= MIN_LEASE_INTERVAL / 10 * 2; +} + +bool ObDupTableLSLeaseMgr::LeaseReqCacheHandler::operator()( + common::hash::HashMapPair &hash_pair) +{ + bool will_remove = false; + int ret = OB_SUCCESS; + + DupTableLeaseItem item; + if (OB_FAIL(lease_mgr_ptr_->handle_lease_req_cache_(loop_start_time_, local_max_applyed_scn_, + hash_pair.first, hash_pair.second))) { + error_ret = ret; + DUP_TABLE_LOG(WARN, "handle lease request failed", K(ret)); + } else if (hash_pair.second.cache_lease_req_.is_ready()) { + renew_lease_count_++; + item.log_header_.set_lease_owner(hash_pair.first); + item.durable_lease_.request_ts_ = hash_pair.second.cache_lease_req_.request_ts_; + item.durable_lease_.lease_interval_us_ = hash_pair.second.cache_lease_req_.lease_interval_us_; + } else if (loop_start_time_ >= hash_pair.second.lease_expired_ts_) { + DUP_TABLE_LOG(INFO, "remove expired lease follower from map", K(ret), K(will_remove), + K(hash_pair.first), K(hash_pair.second)); + will_remove = true; + } else { + item.log_header_.set_lease_owner(hash_pair.first); + item.durable_lease_.request_ts_ = hash_pair.second.confirmed_lease_info_.request_ts_; + item.durable_lease_.lease_interval_us_ = + hash_pair.second.confirmed_lease_info_.lease_interval_us_; + } + + if (!will_remove) { + DupTableDurableLeaseLogBody durable_log_body(item.durable_lease_); + max_ser_size_ += item.log_header_.get_serialize_size() + durable_log_body.get_serialize_size(); + if (OB_FAIL(lease_item_array_.push_back(item))) { + error_ret = ret; + DUP_TABLE_LOG(WARN, "push back into lease_item_array_ failed", K(ret), K(item)); + } + } + + return will_remove; +} + +// int ObDupTableLSLeaseMgr::LeaseDurableHandler::operator()( +// common::hash::HashMapPair &hash_pair) +// { +// int ret = OB_SUCCESS; +// +// if (hash_pair.second.cache_lease_req_.is_ready()) { +// if (is_success_) { +// // set durable lease info +// hash_pair.second.confirmed_lease_info_.request_ts_ = +// hash_pair.second.cache_lease_req_.request_ts_; +// +// hash_pair.second.confirmed_lease_info_.lease_interval_us_ = +// hash_pair.second.cache_lease_req_.lease_interval_us_; +// +// hash_pair.second.lease_expired_ts_ = +// hash_pair.second.cache_lease_req_.lease_acquire_ts_ +// + hash_pair.second.cache_lease_req_.lease_interval_us_; +// } +// hash_pair.second.cache_lease_req_.reset(); +// // avoid serializing older lease request than previous log +// hash_pair.second.cache_lease_req_.request_ts_ = +// hash_pair.second.confirmed_lease_info_.request_ts_; +// } +// +// return ret; +// } + +int ObDupTableLSLeaseMgr::GetLeaseValidAddrFunctor::operator()( + common::hash::HashMapPair &hash_pair) +{ + int ret = OB_SUCCESS; + + if (INT64_MAX == cur_time_) { + cur_time_ = ObTimeUtility::current_time(); + } + + /* + +----------------------------------------------------+ + | submit lease log | + +----------------------------------------------------+ + | + | lease A ready + v + +----------------------------------------------------+ + | submit commit info | + +----------------------------------------------------+ + | + | + v + +----------------------------------------------------+ + | commit_info::on_success | + +----------------------------------------------------+ + | + | + v + +----------------------------------------------------+ + | redo sync finish without A | + +----------------------------------------------------+ + | + | + v + +----------------------------------------------------+ + | lease_log::on_success | + +----------------------------------------------------+ + | + | lease A is valid + v + +----------------------------------------------------+ + | read without the trx on A | + | replay ts between (lease_log_scn, commit_info_scn) | + +----------------------------------------------------+ + */ + + if (hash_pair.second.lease_expired_ts_ > cur_time_ + // include a granted logging lease + || hash_pair.second.cache_lease_req_.is_ready()) { + if (OB_FAIL(addr_arr_.push_back(hash_pair.first))) { + DUP_TABLE_LOG(WARN, "push back lease valid array failed", K(ret)); + } + } + + return ret; +} + +int ObDupTableLSLeaseMgr::DiagInfoGenerator::operator()( + const common::hash::HashMapPair &hash_pair) +{ + int ret = OB_SUCCESS; + + const char *addr_str = to_cstring(hash_pair.first); + + ret = ::oceanbase::common::databuff_printf( + info_buf_, info_buf_len_, info_buf_pos_, + "%s[%sConfirmed Lease] owner=%s, is_expired=%s, request_ts=%lu, request_ts(date)=%s, " + "lease_expired_time=%s, lease_interval_us=%lu\n", + DupTableDiagStd::DUP_DIAG_INDENT_SPACE, DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, addr_str, + to_cstring(cur_time_ >= hash_pair.second.lease_expired_ts_), + hash_pair.second.confirmed_lease_info_.request_ts_, + common::ObTime2Str::ob_timestamp_str(hash_pair.second.confirmed_lease_info_.request_ts_), + common::ObTime2Str::ob_timestamp_str(hash_pair.second.lease_expired_ts_), + hash_pair.second.confirmed_lease_info_.lease_interval_us_); + + if (OB_SUCC(ret) && need_cache_) { + if (hash_pair.second.cache_lease_req_.is_invalid()) { + + ret = ::oceanbase::common::databuff_printf( + info_buf_, info_buf_len_, info_buf_pos_, + "%s[%sCached Lease] owner=%s, No Lease Request Cache\n", + DupTableDiagStd::DUP_DIAG_INDENT_SPACE, DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, + addr_str); + } else { + + ret = ::oceanbase::common::databuff_printf( + info_buf_, info_buf_len_, info_buf_pos_, + "%s[%sCached Lease] owner=%s, request_ts=%lu, request_ts(date)=%s, " + "handle_request_time=%lu, handle_request_time(date)=%s, request_lease_interval_us " + "=%lu\n", + DupTableDiagStd::DUP_DIAG_INDENT_SPACE, DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, addr_str, + hash_pair.second.cache_lease_req_.request_ts_, + common::ObTime2Str::ob_timestamp_str(hash_pair.second.cache_lease_req_.request_ts_), + hash_pair.second.cache_lease_req_.lease_acquire_ts_, + common::ObTime2Str::ob_timestamp_str(hash_pair.second.cache_lease_req_.lease_acquire_ts_), + hash_pair.second.cache_lease_req_.lease_interval_us_); + } + } + + return ret; +} + +int ObDupTableLSLeaseMgr::LeaderActiveLeaseFunctor::operator()( + common::hash::HashMapPair &hash_pair) +{ + int ret = OB_SUCCESS; + + if (INT64_MAX == cur_time_) { + cur_time_ = ObTimeUtility::current_time(); + } + + hash_pair.second.lease_expired_ts_ = + cur_time_ + hash_pair.second.confirmed_lease_info_.lease_interval_us_; + + return ret; +} + +int ObDupTableLSLeaseMgr::LeaderLeaseMgrStatFunctor::operator()( + const common::hash::HashMapPair &hash_pair) +{ + int ret = OB_SUCCESS; + + if (cnt_ > collect_arr_.count()) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(ERROR, "unexpect err", K(ret), K(cnt_), K(collect_arr_.count())); + } else { + // first get emptry stat + ObDupTableLSLeaseMgrStat &tmp_stat = collect_arr_.at(cnt_); + // second fill content + // tmp_stat.set_addr(leader_addr_); + tmp_stat.set_tenant_id(tenant_id_); + tmp_stat.set_ls_id(ls_id_); + tmp_stat.set_follower_addr(hash_pair.first); + tmp_stat.set_expired_ts(hash_pair.second.lease_expired_ts_); + tmp_stat.set_cached_req_ts(hash_pair.second.cache_lease_req_.request_ts_); + tmp_stat.set_lease_interval(hash_pair.second.confirmed_lease_info_.lease_interval_us_); + tmp_stat.set_grant_req_ts(hash_pair.second.confirmed_lease_info_.request_ts_); + tmp_stat.set_grant_ts(hash_pair.second.lease_expired_ts_ - tmp_stat.get_lease_interval()); + // set remain us + if (collect_ts_ > tmp_stat.get_grant_ts() && collect_ts_ < tmp_stat.get_expired_ts()) { + tmp_stat.set_remain_us(tmp_stat.get_expired_ts() - collect_ts_); + } else { + tmp_stat.set_remain_us(0); + } + // update cnt_ for next read + cnt_++; + + DUP_TABLE_LOG(DEBUG, "insert one row in svr list", K(ret), K(tmp_stat), K(cnt_), + K(hash_pair.second.confirmed_lease_info_), K(collect_arr_)); + } + + return ret; +} + +// OB_DEF_SERIALIZE(DupTableLeaderLeaseInfo) +// { +// int ret = OB_SUCCESS; +// int64_t tmp_pos = pos; +// +// int64_t tmp_request_ts_ = 0; +// int64_t tmp_lease_interval = 0; +// +// if (cache_lease_req_.is_ready()) { +// tmp_request_ts_ = cache_lease_req_.request_ts_; +// tmp_lease_interval = cache_lease_req_.lease_interval_us_; +// } else { +// tmp_request_ts_ = confirmed_lease_info_.request_ts_; +// tmp_lease_interval = confirmed_lease_info_.lease_interval_us_; +// } +// +// if (OB_FAIL(common::serialization::encode(buf, buf_len, tmp_pos, tmp_request_ts_))) { +// DUP_TABLE_LOG(WARN, "encode request_ts failed", K(ret)); +// } else if (OB_FAIL(common::serialization::encode(buf, buf_len, tmp_pos, tmp_lease_interval))) { +// DUP_TABLE_LOG(WARN, "encode lease_interval failed", K(ret)); +// } else { +// pos = tmp_pos; +// } +// +// return ret; +// } +// +// OB_DEF_DESERIALIZE(DupTableLeaderLeaseInfo) +// { +// int ret = OB_SUCCESS; +// int64_t tmp_pos = pos; +// +// if (OB_FAIL(common::serialization::decode(buf, data_len, tmp_pos, +// confirmed_lease_info_.request_ts_))) { +// DUP_TABLE_LOG(WARN, "decode request_ts failed", K(ret)); +// } else if (OB_FAIL(common::serialization::decode(buf, data_len, tmp_pos, +// confirmed_lease_info_.lease_interval_us_))) { +// DUP_TABLE_LOG(WARN, "decode lease_interval failed", K(ret)); +// } else { +// cache_lease_req_.request_ts_ = confirmed_lease_info_.request_ts_; +// pos = tmp_pos; +// } +// +// return ret; +// } +// +// OB_DEF_SERIALIZE_SIZE(DupTableLeaderLeaseInfo) +// { +// int64_t total_length = 0; +// +// int64_t tmp_request_ts_ = 0; +// int64_t tmp_lease_interval = 0; +// +// if (cache_lease_req_.is_ready()) { +// tmp_request_ts_ = cache_lease_req_.request_ts_; +// tmp_lease_interval = cache_lease_req_.lease_interval_us_; +// } else { +// tmp_request_ts_ = confirmed_lease_info_.request_ts_; +// tmp_lease_interval = confirmed_lease_info_.lease_interval_us_; +// } +// +// total_length += common::serialization::encoded_length(tmp_request_ts_); +// total_length += common::serialization::encoded_length(tmp_lease_interval); +// +// return total_length; +// } + +// int ObDupTableLSLeaseMgr::LeaderLeaseInfoSerCallBack::operator()( +// const common::hash::HashMapPair &hash_pair) +// { +// int ret = OB_SUCCESS; +// +// if (OB_FAIL(hash_pair.first.serialize(buf_, buf_len_, pos_))) { +// DUP_TABLE_LOG(WARN, "serialize key(addr) failed", K(ret), K(hash_pair.first)); +// } else if (OB_FAIL(hash_pair.second.serialize(buf_, buf_len_, pos_))) { +// DUP_TABLE_LOG(WARN, "serialize val(leader_lease_info) failed", K(ret), K(hash_pair.first)); +// } +// +// return ret; +// } +// +// int ObDupTableLSLeaseMgr::LeaderLeaseInfoDeSerCallBack::operator()( +// DupTableLeaderLeaseMap &lease_map) +// { +// int ret = OB_SUCCESS; +// +// common::ObAddr lease_addr; +// DupTableLeaderLeaseInfo lease_info; +// +// if (OB_FAIL(lease_addr.deserialize(buf_, buf_len_, pos_))) { +// DUP_TABLE_LOG(WARN, "deserialize key(addr) failed", K(ret), K(lease_addr)); +// } else if (OB_FAIL(lease_info.deserialize(buf_, buf_len_, pos_))) { +// DUP_TABLE_LOG(WARN, "deserialize val(leader_lease_info) failed", K(ret), K(lease_addr)); +// } else if (OB_FAIL(lease_map.set_refactored(lease_addr, lease_info, 1))) { +// DUP_TABLE_LOG(WARN, "insert into lease_map failed", K(ret)); +// } +// +// return ret; +// } +// +// int64_t ObDupTableLSLeaseMgr::LeaderLeaseInfoGetSizeCallBack::operator()( +// const common::hash::HashMapPair &hash_pair) +// { +// int64_t total_length = 0; +// total_length += hash_pair.first.get_serialize_size(); +// total_length += hash_pair.second.get_serialize_size(); +// return total_length; +// } +// +// OB_DEF_SERIALIZE(ObDupTableLSLeaseMgr) +// { +// int ret = OB_SUCCESS; +// +// SpinRLockGuard guard(lease_lock_); +// +// LeaderLeaseInfoSerCallBack ser_cb(buf, buf_len, pos); +// +// if (OB_FAIL(hash_for_each_serialize(leader_lease_map_, ser_cb))) { +// DUP_TABLE_LOG(WARN, "serialize leader_lease_map failed", K(ret)); +// } else { +// pos = ser_cb.get_pos(); +// } +// return ret; +// } +// +// OB_DEF_DESERIALIZE(ObDupTableLSLeaseMgr) +// { +// int ret = OB_SUCCESS; +// SpinWLockGuard guard(lease_lock_); +// +// LeaderLeaseInfoDeSerCallBack deser_cb(buf, data_len, pos); +// +// if (OB_FAIL(leader_lease_map_.clear())) { +// DUP_TABLE_LOG(WARN, "clear leader_lease_map_ failed", K(ret)); +// } else if (OB_FAIL(hash_for_each_deserialize(leader_lease_map_, deser_cb))) { +// DUP_TABLE_LOG(WARN, "serialize leader_lease_map failed", K(ret)); +// } else { +// pos = deser_cb.get_pos(); +// } +// return ret; +// } +// +// OB_DEF_SERIALIZE_SIZE(ObDupTableLSLeaseMgr) +// { +// int64_t serialize_size = 0; +// SpinRLockGuard guard(lease_lock_); +// +// LeaderLeaseInfoGetSizeCallBack get_size_cb; +// serialize_size += hash_for_each_serialize_size(leader_lease_map_, get_size_cb); +// +// return serialize_size; +// } + +} // namespace transaction +} // namespace oceanbase diff --git a/src/storage/tx/ob_dup_table_lease.h b/src/storage/tx/ob_dup_table_lease.h new file mode 100644 index 0000000000..0506fb7f44 --- /dev/null +++ b/src/storage/tx/ob_dup_table_lease.h @@ -0,0 +1,292 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OCEANBASE_TRANSACTION_DUP_TABLE_LEASE_H +#define OCEANBASE_TRANSACTION_DUP_TABLE_LEASE_H + +#include "lib/hash/ob_hashmap.h" +#include "lib/list/ob_dlist.h" +#include "lib/lock/ob_spin_rwlock.h" +#include "lib/net/ob_addr.h" +#include "storage/tx/ob_dup_table_base.h" +#include "storage/tx/ob_dup_table_stat.h" + + +namespace oceanbase +{ +namespace logservice +{ +class ObLogHandler; +} +namespace transaction +{ + +class ObDupTableLSHandler; +class ObDupTableLeaseRequest; + +class ObDupTableLSLeaseMgr +{ +public: + OB_UNIS_VERSION(1); + +public: + static const int64_t LEASE_UNIT; + static const int64_t DEFAULT_LEASE_INTERVAL; + static const int64_t MIN_LEASE_INTERVAL; + + TO_STRING_KV(K(leader_lease_map_.size()), K(follower_lease_info_)); + +public: + ObDupTableLSLeaseMgr() : lease_diag_info_log_buf_(nullptr) { reset(); } + + int init(ObDupTableLSHandler *dup_ls_handle); + void destroy() { reset(); }; + void reset(); + bool is_master() { return ATOMIC_LOAD(&is_master_); } + + // handle lease requests + int leader_handle(bool &need_log); + // post lease requests + int follower_handle(); + + int follower_try_acquire_lease(const share::SCN &lease_log_scn); + + int recive_lease_request(const ObDupTableLeaseRequest &lease_req); + + int leader_takeover(bool is_resume); + int leader_revoke(); + + int prepare_serialize(int64_t &max_ser_size, DupTableLeaseItemArray &lease_header_array); + int serialize_lease_log(const DupTableLeaseItemArray &unique_id_array, + char *buf, + const int64_t buf_len, + int64_t &pos); + int deserialize_lease_log(DupTableLeaseItemArray &lease_header_array, + const char *buf, + const int64_t data_len, + int64_t &pos); + + int lease_log_submitted(const bool submit_result, + const share::SCN &lease_log_scn, + const bool for_replay, + const DupTableLeaseItemArray &lease_header_array); + int lease_log_synced(const bool sync_result, + const share::SCN &lease_log_scn, + const bool for_replay, + const DupTableLeaseItemArray &lease_header_array); + + // int log_cb_success(); + // int log_cb_failure(); + + int get_lease_valid_array(LeaseAddrArray &lease_array); + + bool check_follower_lease_serving(const bool election_is_leader, + const share::SCN &max_replayed_scn); + + void print_lease_diag_info_log(const bool is_master); + + int get_lease_mgr_stat(FollowerLeaseMgrStatArr &collect_arr); + +private: + bool can_grant_lease_(const common::ObAddr &addr, + const share::SCN &local_max_applyed_scn, + const DupTableLeaderLeaseInfo &lease_info); + int update_durable_lease_info_(DupTableLeaderLeaseInfo &single_lease_info); + int handle_lease_req_cache_(int64_t loop_start_time, + const share::SCN &local_max_applyed_scn, + const common::ObAddr &addr, + DupTableLeaderLeaseInfo &single_lease_info); + + int submit_lease_log_(); + + // update request_ts_ + // 1. the follower try to get lease for th first time (request_ts_ = 0) + // 2. the follower get lease failed for a long time + void update_request_ts_(int64_t loop_start_time); + + bool need_post_lease_request_(int64_t loop_start_time); + + bool need_retry_lease_operation_(const int64_t cur_time, const int64_t last_time); + +private: + class LeaseReqCacheHandler + { + public: + LeaseReqCacheHandler(ObDupTableLSLeaseMgr *lease_mgr, + int64_t loop_start_time, + const share::SCN &max_applyed_scn, + DupTableLeaseItemArray &item_array) + : lease_item_array_(item_array) + { + lease_mgr_ptr_ = lease_mgr; + lease_item_array_.reuse(); + renew_lease_count_ = 0; + max_ser_size_ = 0; + local_max_applyed_scn_ = max_applyed_scn; + loop_start_time_ = loop_start_time; + error_ret = OB_SUCCESS; + } + bool operator()(common::hash::HashMapPair &hash_pair); + int64_t get_max_ser_size() { return max_ser_size_; } + int get_error_ret() { return error_ret; } + int64_t get_renew_lease_count() { return renew_lease_count_; } + + TO_STRING_KV(K(renew_lease_count_), + K(max_ser_size_), + K(loop_start_time_), + K(local_max_applyed_scn_), + K(error_ret)); + + private: + ObDupTableLSLeaseMgr *lease_mgr_ptr_; + DupTableLeaseItemArray &lease_item_array_; + int64_t renew_lease_count_; + int64_t max_ser_size_; + int64_t loop_start_time_; + share::SCN local_max_applyed_scn_; + int error_ret; + }; + + class GetLeaseValidAddrFunctor + { + public: + GetLeaseValidAddrFunctor(LeaseAddrArray &addr_arr) : addr_arr_(addr_arr), cur_time_(INT64_MAX) + {} + int operator()(common::hash::HashMapPair &hash_pair); + + private: + LeaseAddrArray &addr_arr_; + int64_t cur_time_; + }; + + class DiagInfoGenerator + { + public: + DiagInfoGenerator(bool need_cache, char *info_buf, int64_t info_buf_len, int64_t cur_time) + : need_cache_(need_cache), info_buf_(info_buf), info_buf_len_(info_buf_len), + info_buf_pos_(0), cur_time_(cur_time) + {} + + int64_t get_buf_pos() { return info_buf_pos_; } + + int + operator()(const common::hash::HashMapPair &hash_pair); + + private: + bool need_cache_; + char *info_buf_; + int64_t info_buf_len_; + int64_t info_buf_pos_; + int64_t cur_time_; + }; + + class LeaderActiveLeaseFunctor + { + public: + LeaderActiveLeaseFunctor() : cur_time_(INT64_MAX) {} + int operator()(common::hash::HashMapPair &hash_pair); + + private: + int64_t cur_time_; + }; + + // class LeaseDurableHandler + // { + // public: + // LeaseDurableHandler(bool success, const DupTableLeaseItemArray &lease_array) + // : is_success_(success), item_array_(lease_array) + // {} + // + // int operator()(common::hash::HashMapPair + // &hash_pair); + // + // private: + // bool is_success_; + // const DupTableLeaseItemArray &item_array_; + // }; + + class LeaderLeaseInfoSerCallBack : public IHashSerCallBack + { + public: + LeaderLeaseInfoSerCallBack(char *buf, int64_t buf_len, int64_t pos) + : IHashSerCallBack(buf, buf_len, pos) + {} + int + operator()(const common::hash::HashMapPair &hash_pair); + }; + + class LeaderLeaseInfoDeSerCallBack : public IHashDeSerCallBack + { + public: + LeaderLeaseInfoDeSerCallBack(const char *buf, int64_t buf_len, int64_t pos) + : IHashDeSerCallBack(buf, buf_len, pos) + {} + int operator()(DupTableLeaderLeaseMap &lease_map); + }; + + class LeaderLeaseInfoGetSizeCallBack + { + public: + int64_t + operator()(const common::hash::HashMapPair &hash_pair); + }; + + class LeaderLeaseMgrStatFunctor + { + public: + LeaderLeaseMgrStatFunctor(FollowerLeaseMgrStatArr &collect_arr, + const uint64_t tenant_id, + const int64_t collect_ts, + const ObAddr &leader_addr, + const share::ObLSID ls_id) + : + collect_arr_(collect_arr), + tenant_id_(tenant_id), + collect_ts_(collect_ts), + leader_addr_(leader_addr), + ls_id_(ls_id), + cnt_(0) {} + int operator()(const common::hash::HashMapPair &hash_pair); + + private: + FollowerLeaseMgrStatArr &collect_arr_; + uint64_t tenant_id_; + int64_t collect_ts_; + const ObAddr leader_addr_; + share::ObLSID ls_id_; + int cnt_; + }; + +private: + SpinRWLock lease_lock_; + + share::ObLSID ls_id_; + bool is_master_; + bool is_stopped_; + + // bool is_serializing_; // TODO use lease array to serialize + + ObDupTableLSHandler *dup_ls_handle_ptr_; + + DupTableLeaderLeaseMap leader_lease_map_; + + // int64_t self_request_ts_; + // DupTableLeaseInfo follower_lease_info_; + DupTableFollowerLeaseInfo follower_lease_info_; + + int64_t last_lease_req_post_time_; + int64_t last_lease_req_cache_handle_time_; + + char *lease_diag_info_log_buf_; +}; + +} // namespace transaction +} // namespace oceanbase +#endif diff --git a/src/storage/tx/ob_dup_table_rpc.cpp b/src/storage/tx/ob_dup_table_rpc.cpp index 6d433504e7..d111a63d90 100644 --- a/src/storage/tx/ob_dup_table_rpc.cpp +++ b/src/storage/tx/ob_dup_table_rpc.cpp @@ -219,79 +219,129 @@ bool ObRedoLogSyncResponseMsg::is_valid() const namespace obrpc { -int ObDupTableLeaseRequestMsgP::process() -{ - int ret = OB_SUCCESS; -// transaction::ObPartTransCtxMgr *ctx_mgr = NULL; -// transaction::ObTransService *trans_service = global_ctx_.par_ser_->get_trans_service(); +// int ObDupTableLeaseRequestMsgP::process() +// { +// int ret = OB_SUCCESS; +// // transaction::ObPartTransCtxMgr *ctx_mgr = NULL; +// // transaction::ObTransService *trans_service = global_ctx_.par_ser_->get_trans_service(); +// // +// // if (OB_ISNULL(trans_service)) { +// // ret = OB_ERR_UNEXPECTED; +// // TRANS_LOG(WARN, "trans service is NULL", KR(ret)); +// // } else if (OB_ISNULL(ctx_mgr = &trans_service->get_part_trans_ctx_mgr())) { +// // ret = OB_ERR_UNEXPECTED; +// // TRANS_LOG(WARN, "ObPartTransCtxMgr is NULL", K(ret)); +// // } else if (OB_FAIL(ctx_mgr->handle_dup_lease_request(arg_.get_partition(), +// // arg_))) { +// // TRANS_LOG(WARN, "handle lease request error", K(ret), K(arg_.get_partition())); +// // } else { +// // //do nothing +// // } +// // +// return ret; +// } // -// if (OB_ISNULL(trans_service)) { -// ret = OB_ERR_UNEXPECTED; -// TRANS_LOG(WARN, "trans service is NULL", KR(ret)); -// } else if (OB_ISNULL(ctx_mgr = &trans_service->get_part_trans_ctx_mgr())) { -// ret = OB_ERR_UNEXPECTED; -// TRANS_LOG(WARN, "ObPartTransCtxMgr is NULL", K(ret)); -// } else if (OB_FAIL(ctx_mgr->handle_dup_lease_request(arg_.get_partition(), -// arg_))) { -// TRANS_LOG(WARN, "handle lease request error", K(ret), K(arg_.get_partition())); -// } else { -// //do nothing -// } +// int ObDupTableLeaseResponseMsgP::process() +// { +// int ret = common::OB_SUCCESS; +// // transaction::ObPartTransCtxMgr *ctx_mgr = NULL; +// // transaction::ObTransService *trans_service = global_ctx_.par_ser_->get_trans_service(); +// // +// // if (OB_ISNULL(trans_service)) { +// // ret = OB_ERR_UNEXPECTED; +// // TRANS_LOG(WARN, "trans service is NULL", KR(ret)); +// // } else if (OB_ISNULL(ctx_mgr = &trans_service->get_part_trans_ctx_mgr())) { +// // ret = OB_ERR_UNEXPECTED; +// // TRANS_LOG(WARN, "ObPartTransCtxMgr is NULL", K(ret)); +// // } else if (OB_FAIL(ctx_mgr->handle_dup_lease_response(arg_.get_partition(), +// // arg_, +// // trans_service))) { +// // TRANS_LOG(WARN, "handle lease response error", K(ret), K(arg_.get_partition())); +// // } else { +// // //do nothing +// // } +// // +// return ret; +// } // - return ret; -} - -int ObDupTableLeaseResponseMsgP::process() -{ - int ret = common::OB_SUCCESS; -// transaction::ObPartTransCtxMgr *ctx_mgr = NULL; -// transaction::ObTransService *trans_service = global_ctx_.par_ser_->get_trans_service(); +// int ObRedoLogSyncRequestP::process() +// { +// int ret = OB_SUCCESS; +// // transaction::ObPartTransCtxMgr *ctx_mgr = NULL; +// // transaction::ObTransService *trans_service = global_ctx_.par_ser_->get_trans_service(); +// // +// // if (OB_ISNULL(trans_service)) { +// // ret = OB_ERR_UNEXPECTED; +// // TRANS_LOG(WARN, "trans service is NULL", KR(ret)); +// // } else if (OB_ISNULL(ctx_mgr = &trans_service->get_part_trans_ctx_mgr())) { +// // ret = OB_ERR_UNEXPECTED; +// // TRANS_LOG(WARN, "ObPartTransCtxMgr is NULL", K(ret)); +// // } else if (OB_FAIL(ctx_mgr->handle_dup_redo_log_sync_request(arg_.get_partition(), +// // arg_, +// // trans_service))) { +// // TRANS_LOG(WARN, "handle redo log sync request error", K(ret), K(arg_.get_partition())); +// // } else { +// // //do nothing +// // } +// // +// return ret; +// } // -// if (OB_ISNULL(trans_service)) { -// ret = OB_ERR_UNEXPECTED; -// TRANS_LOG(WARN, "trans service is NULL", KR(ret)); -// } else if (OB_ISNULL(ctx_mgr = &trans_service->get_part_trans_ctx_mgr())) { -// ret = OB_ERR_UNEXPECTED; -// TRANS_LOG(WARN, "ObPartTransCtxMgr is NULL", K(ret)); -// } else if (OB_FAIL(ctx_mgr->handle_dup_lease_response(arg_.get_partition(), -// arg_, -// trans_service))) { -// TRANS_LOG(WARN, "handle lease response error", K(ret), K(arg_.get_partition())); -// } else { -// //do nothing -// } +// int ObRedoLogSyncResponseP::process() +// { +// int ret = OB_SUCCESS; +// // transaction::ObPartTransCtxMgr *ctx_mgr = NULL; +// // transaction::ObTransService *trans_service = global_ctx_.par_ser_->get_trans_service(); +// // +// // if (OB_ISNULL(trans_service)) { +// // ret = OB_ERR_UNEXPECTED; +// // TRANS_LOG(WARN, "trans service is NULL", KR(ret)); +// // } else if (OB_ISNULL(ctx_mgr = &trans_service->get_part_trans_ctx_mgr())) { +// // ret = OB_ERR_UNEXPECTED; +// // TRANS_LOG(WARN, "ObPartTransCtxMgr is NULL", K(ret)); +// // } else if (OB_FAIL(ctx_mgr->handle_dup_redo_log_sync_response(arg_.get_partition(), +// // arg_))) { +// // TRANS_LOG(WARN, "handle redo log sync response error", K(ret), K(arg_.get_partition())); +// // } else { +// // //do nothing +// // } +// // +// return ret; +// } // - return ret; -} - -int ObRedoLogSyncRequestP::process() -{ - int ret = OB_SUCCESS; -// transaction::ObPartTransCtxMgr *ctx_mgr = NULL; -// transaction::ObTransService *trans_service = global_ctx_.par_ser_->get_trans_service(); +// int ObPreCommitRequestP::process() +// { +// int ret = OB_SUCCESS; +// transaction::ObIDupTableRpc *dup_table_rpc = NULL; +// transaction::ObTransService *trans_service = global_ctx_.par_ser_->get_trans_service(); // -// if (OB_ISNULL(trans_service)) { -// ret = OB_ERR_UNEXPECTED; -// TRANS_LOG(WARN, "trans service is NULL", KR(ret)); -// } else if (OB_ISNULL(ctx_mgr = &trans_service->get_part_trans_ctx_mgr())) { -// ret = OB_ERR_UNEXPECTED; -// TRANS_LOG(WARN, "ObPartTransCtxMgr is NULL", K(ret)); -// } else if (OB_FAIL(ctx_mgr->handle_dup_redo_log_sync_request(arg_.get_partition(), -// arg_, -// trans_service))) { -// TRANS_LOG(WARN, "handle redo log sync request error", K(ret), K(arg_.get_partition())); -// } else { -// //do nothing -// } +// if (OB_ISNULL(trans_service)) { +// ret = OB_ERR_UNEXPECTED; +// TRANS_LOG(WARN, "trans service is NULL", KR(ret)); +// } else if (OB_ISNULL(dup_table_rpc = trans_service->get_dup_table_rpc())) { +// ret = OB_ERR_UNEXPECTED; +// TRANS_LOG(WARN, "dup table rpc is null", KR(ret), K(arg_)); +// } else { +// trans_service->get_tx_version_mgr().update_max_commit_ts(arg_.get_commit_version(), false); +// // respond leader +// // TODO, whether need to check lease_expired and update +// // ObPartitionTransCtxMgr::update_max_trans_version? +// transaction::ObPreCommitResponseMsg msg; +// if (OB_FAIL(msg.init(arg_.get_partition(), +// arg_.get_trans_id(), +// trans_service->get_server(), +// OB_SUCCESS))) { +// TRANS_LOG(WARN, "init pre commit response msg failed", KR(ret), K(arg_), K(msg)); +// } else if (OB_FAIL(msg.set_header(arg_.get_dst(), arg_.get_dst(), arg_.get_src()))) { +// TRANS_LOG(WARN, "ObPreCommitResponseMsg set header error", KR(ret), K(arg_), K(msg)); +// } else if (OB_FAIL(dup_table_rpc->post_pre_commit_response(arg_.get_tenant_id(), +// arg_.get_src(), msg))) { +// TRANS_LOG(WARN, "post pre commit response failed", KR(ret), K(arg_), K(msg)); +// } +// } // - return ret; -} - -int ObRedoLogSyncResponseP::process() -{ - int ret = OB_SUCCESS; -// transaction::ObPartTransCtxMgr *ctx_mgr = NULL; -// transaction::ObTransService *trans_service = global_ctx_.par_ser_->get_trans_service(); +// return ret; +// } // // if (OB_ISNULL(trans_service)) { // ret = OB_ERR_UNEXPECTED; @@ -306,12 +356,12 @@ int ObRedoLogSyncResponseP::process() // //do nothing // } // - return ret; -} +// return ret; +// } -int ObPreCommitRequestP::process() -{ - int ret = OB_NOT_SUPPORTED; +// int ObPreCommitRequestP::process() +// { +// int ret = OB_NOT_SUPPORTED; // transaction::ObIDupTableRpc *dup_table_rpc = NULL; // transaction::ObTransService *trans_service = global_ctx_.par_ser_->get_trans_service(); @@ -340,12 +390,12 @@ int ObPreCommitRequestP::process() // } // } - return ret; -} - -int ObPreCommitResponseP::process() -{ - int ret = OB_SUCCESS; +// return ret; +// } +// +// int ObPreCommitResponseP::process() +// { +// int ret = OB_SUCCESS; // transaction::ObPartTransCtxMgr *ctx_mgr = NULL; // transaction::ObTransService *trans_service = global_ctx_.par_ser_->get_trans_service(); // if (OB_ISNULL(trans_service)) { @@ -358,37 +408,31 @@ int ObPreCommitResponseP::process() // TRANS_LOG(WARN, "handle dup pre commit response error", KR(ret), K(arg_)); // } // - return ret; -} - }// obrpc namespace transaction { -int ObDupTableRpc::init(ObTransService *trans_service, - rpc::frame::ObReqTransport *transport, - const ObAddr &addr) +int ObDupTableRpc_old::init(ObTransService *trans_service, rpc::frame::ObReqTransport *transport, const ObAddr &addr) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(is_inited_)) { - ret = OB_INIT_TWICE; - TRANS_LOG(WARN, "duplicate table rpc init error", KR(ret)); - } else if (OB_ISNULL(trans_service) || OB_ISNULL(transport)) { - ret = OB_INVALID_ARGUMENT; - TRANS_LOG(WARN, "invalid argument", KR(ret), KP(trans_service), KP(transport)); - } else if (OB_FAIL(rpc_proxy_.init(transport, addr))) { - TRANS_LOG(WARN, "init rpc proxy fail", K(ret)); - } else { - trans_service_ = trans_service; - is_inited_ = true; - TRANS_LOG(INFO, "dup table rpc init success"); - } + // if (OB_UNLIKELY(is_inited_)) { + // ret = OB_INIT_TWICE; + // TRANS_LOG(WARN, "duplicate table rpc init error", KR(ret)); + // } else if (OB_ISNULL(trans_service) || OB_ISNULL(transport)) { + // ret = OB_INVALID_ARGUMENT; + // TRANS_LOG(WARN, "invalid argument", KR(ret), KP(trans_service), KP(transport)); + // } else if (OB_FAIL(rpc_proxy_.init(transport, addr))) { + // TRANS_LOG(WARN, "init rpc proxy fail", K(ret)); + // } else { + // trans_service_ = trans_service; + // is_inited_ = true; + // TRANS_LOG(INFO, "dup table rpc init success"); return ret; } -int ObDupTableRpc::start() +int ObDupTableRpc_old::start() { int ret = OB_SUCCESS; if (!is_inited_) { @@ -404,7 +448,7 @@ int ObDupTableRpc::start() return ret; } -int ObDupTableRpc::stop() +int ObDupTableRpc_old::stop() { int ret = OB_SUCCESS; if (!is_inited_) { @@ -420,7 +464,7 @@ int ObDupTableRpc::stop() return ret; } -int ObDupTableRpc::wait() +int ObDupTableRpc_old::wait() { int ret = OB_SUCCESS; if (!is_inited_) { @@ -435,7 +479,7 @@ int ObDupTableRpc::wait() return ret; } -void ObDupTableRpc::destroy() +void ObDupTableRpc_old::destroy() { int tmp_ret = OB_SUCCESS; if (is_inited_) { @@ -453,136 +497,136 @@ void ObDupTableRpc::destroy() } } -int ObDupTableRpc::post_dup_table_lease_request(const uint64_t tenant_id, +int ObDupTableRpc_old::post_dup_table_lease_request(const uint64_t tenant_id, const common::ObAddr &server, const ObDupTableLeaseRequestMsg &msg) { int ret = OB_SUCCESS; - if (!is_inited_) { - ret = OB_NOT_INIT; - TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); - } else if (!is_running_) { - ret = OB_NOT_RUNNING; - TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); - } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { - ret = OB_INVALID_ARGUMENT; - TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); - } else if (OB_FAIL(rpc_proxy_.to(server) - .by(tenant_id) - .post_dup_table_lease_request(msg, NULL))) { - TRANS_LOG(WARN, "post dup table lease message error", KR(ret), K(server), K(msg)); - } else { - TRANS_LOG(DEBUG, "post dup table lease message success", K(server), K(msg)); - } + // if (!is_inited_) { + // ret = OB_NOT_INIT; + // TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); + // } else if (!is_running_) { + // ret = OB_NOT_RUNNING; + // TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); + // } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { + // ret = OB_INVALID_ARGUMENT; + // TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); + // } else if (OB_FAIL(rpc_proxy_->to(server) + // .by(tenant_id) + // .post_dup_table_lease_request(msg, NULL))) { + // TRANS_LOG(WARN, "post dup table lease message error", KR(ret), K(server), K(msg)); + // } else { + // TRANS_LOG(DEBUG, "post dup table lease message success", K(server), K(msg)); + // } return ret; } -int ObDupTableRpc::post_dup_table_lease_response(const uint64_t tenant_id, +int ObDupTableRpc_old::post_dup_table_lease_response(const uint64_t tenant_id, const common::ObAddr &server, const ObDupTableLeaseResponseMsg &msg) { int ret = OB_SUCCESS; - if (!is_inited_) { - ret = OB_NOT_INIT; - TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); - } else if (!is_running_) { - ret = OB_NOT_RUNNING; - TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); - } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { - ret = OB_INVALID_ARGUMENT; - TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); - } else if (OB_FAIL(rpc_proxy_.to(server) - .by(tenant_id) - .post_dup_table_lease_response(msg, NULL))) { - TRANS_LOG(WARN, "post dup table lease message error", KR(ret), K(server), K(msg)); - } else { - TRANS_LOG(DEBUG, "post dup table lease message success", K(server), K(msg)); - } + // if (!is_inited_) { + // ret = OB_NOT_INIT; + // TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); + // } else if (!is_running_) { + // ret = OB_NOT_RUNNING; + // TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); + // } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { + // ret = OB_INVALID_ARGUMENT; + // TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); + // } else if (OB_FAIL(rpc_proxy_->to(server) + // .by(tenant_id) + // .post_dup_table_lease_response(msg, NULL))) { + // TRANS_LOG(WARN, "post dup table lease message error", KR(ret), K(server), K(msg)); + // } else { + // TRANS_LOG(DEBUG, "post dup table lease message success", K(server), K(msg)); + // } return ret; } -int ObDupTableRpc::post_redo_log_sync_request(const uint64_t tenant_id, +int ObDupTableRpc_old::post_redo_log_sync_request(const uint64_t tenant_id, const common::ObAddr &server, const ObRedoLogSyncRequestMsg &msg) { int ret = OB_SUCCESS; - if (!is_inited_) { - ret = OB_NOT_INIT; - TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); - } else if (!is_running_) { - ret = OB_NOT_RUNNING; - TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); - } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { - ret = OB_INVALID_ARGUMENT; - TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); - } else if (OB_FAIL(rpc_proxy_.to(server).by(tenant_id).post_redo_log_sync_request(msg, NULL))) { - TRANS_LOG(WARN, "post redo log sync request message error", KR(ret), K(server), K(msg)); - } else { - TRANS_LOG(DEBUG, "post redo log sync request message success", K(server), K(msg)); - } + // if (!is_inited_) { + // ret = OB_NOT_INIT; + // TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); + // } else if (!is_running_) { + // ret = OB_NOT_RUNNING; + // TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); + // } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { + // ret = OB_INVALID_ARGUMENT; + // TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); + // } else if (OB_FAIL(rpc_proxy_->to(server).by(tenant_id).post_redo_log_sync_request(msg, NULL))) { + // TRANS_LOG(WARN, "post redo log sync request message error", KR(ret), K(server), K(msg)); + // } else { + // TRANS_LOG(DEBUG, "post redo log sync request message success", K(server), K(msg)); + // } return ret; } -int ObDupTableRpc::post_redo_log_sync_response(const uint64_t tenant_id, +int ObDupTableRpc_old::post_redo_log_sync_response(const uint64_t tenant_id, const common::ObAddr &server, const ObRedoLogSyncResponseMsg &msg) { int ret = OB_SUCCESS; - if (!is_inited_) { - ret = OB_NOT_INIT; - TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); - } else if (!is_running_) { - ret = OB_NOT_RUNNING; - TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); - } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { - ret = OB_INVALID_ARGUMENT; - TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); - } else if (OB_FAIL(rpc_proxy_.to(server).by(tenant_id).post_redo_log_sync_response(msg, NULL))) { - TRANS_LOG(WARN, "post redo log sync response message error", KR(ret), K(server), K(msg)); - } else { - TRANS_LOG(DEBUG, "post redo log sync response message success", K(server), K(msg)); - } + // if (!is_inited_) { + // ret = OB_NOT_INIT; + // TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); + // } else if (!is_running_) { + // ret = OB_NOT_RUNNING; + // TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); + // } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { + // ret = OB_INVALID_ARGUMENT; + // TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); + // } else if (OB_FAIL(rpc_proxy_->to(server).by(tenant_id).post_redo_log_sync_response(msg, NULL))) { + // TRANS_LOG(WARN, "post redo log sync response message error", KR(ret), K(server), K(msg)); + // } else { + // TRANS_LOG(DEBUG, "post redo log sync response message success", K(server), K(msg)); + // } return ret; } -int ObDupTableRpc::post_pre_commit_request(const uint64_t tenant_id, +int ObDupTableRpc_old::post_pre_commit_request(const uint64_t tenant_id, const common::ObAddr &server, const ObPreCommitRequestMsg &msg) { int ret = OB_SUCCESS; - if (!is_inited_) { - ret = OB_NOT_INIT; - TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); - } else if (!is_running_) { - ret = OB_NOT_RUNNING; - TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); - } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { - ret = OB_INVALID_ARGUMENT; - TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); - } else if (OB_FAIL(rpc_proxy_.to(server).by(tenant_id).post_pre_commit_request(msg, NULL))) { - TRANS_LOG(WARN, "post pre commit request error", KR(ret), K(server), K(msg)); - } + // if (!is_inited_) { + // ret = OB_NOT_INIT; + // TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); + // } else if (!is_running_) { + // ret = OB_NOT_RUNNING; + // TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); + // } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { + // ret = OB_INVALID_ARGUMENT; + // TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); + // } else if (OB_FAIL(rpc_proxy_->to(server).by(tenant_id).post_pre_commit_request(msg, NULL))) { + // TRANS_LOG(WARN, "post pre commit request error", KR(ret), K(server), K(msg)); + // } return ret; } -int ObDupTableRpc::post_pre_commit_response(const uint64_t tenant_id, +int ObDupTableRpc_old::post_pre_commit_response(const uint64_t tenant_id, const common::ObAddr &server, const ObPreCommitResponseMsg &msg) { int ret = OB_SUCCESS; - if (!is_inited_) { - ret = OB_NOT_INIT; - TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); - } else if (!is_running_) { - ret = OB_NOT_RUNNING; - TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); - } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { - ret = OB_INVALID_ARGUMENT; - TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); - } else if (OB_FAIL(rpc_proxy_.to(server).by(tenant_id).post_pre_commit_response(msg, NULL))) { - TRANS_LOG(WARN, "post pre commit response error", KR(ret), K(server), K(msg)); - } + // if (!is_inited_) { + // ret = OB_NOT_INIT; + // TRANS_LOG(WARN, "dup table rpc not inited", KR(ret)); + // } else if (!is_running_) { + // ret = OB_NOT_RUNNING; + // TRANS_LOG(WARN, "dup table rpc not running", KR(ret)); + // } else if (!is_valid_tenant_id(tenant_id) || !server.is_valid() || !msg.is_valid()) { + // ret = OB_INVALID_ARGUMENT; + // TRANS_LOG(WARN, "invalid argument", KR(ret), K(tenant_id), K(server), K(msg)); + // } else if (OB_FAIL(rpc_proxy_->to(server).by(tenant_id).post_pre_commit_response(msg, NULL))) { + // TRANS_LOG(WARN, "post pre commit response error", KR(ret), K(server), K(msg)); + // } return ret; } diff --git a/src/storage/tx/ob_dup_table_rpc.h b/src/storage/tx/ob_dup_table_rpc.h index 4ed159f947..b6e09495f6 100644 --- a/src/storage/tx/ob_dup_table_rpc.h +++ b/src/storage/tx/ob_dup_table_rpc.h @@ -26,6 +26,7 @@ #include "share/config/ob_server_config.h" #include "observer/ob_server_struct.h" + namespace oceanbase { namespace observer @@ -281,94 +282,94 @@ class ObDupTableRpcProxy : public obrpc::ObRpcProxy public: DEFINE_TO(ObDupTableRpcProxy); - RPC_AP(PRZ post_dup_table_lease_request, OB_DUP_TABLE_LEASE_REQUEST, - (transaction::ObDupTableLeaseRequestMsg)); - RPC_AP(PRZ post_dup_table_lease_response, OB_DUP_TABLE_LEASE_RESPONSE, - (transaction::ObDupTableLeaseResponseMsg)); - RPC_AP(PR3 post_redo_log_sync_request, OB_REDO_LOG_SYNC_REQUEST, - (transaction::ObRedoLogSyncRequestMsg)); - RPC_AP(PR3 post_redo_log_sync_response, OB_REDO_LOG_SYNC_RESPONSE, - (transaction::ObRedoLogSyncResponseMsg)); - RPC_AP(PR3 post_pre_commit_request, OB_DUP_TABLE_PRE_COMMIT_REQ, - (transaction::ObPreCommitRequestMsg)); - RPC_AP(PR3 post_pre_commit_response, OB_DUP_TABLE_PRE_COMMIT_RESP, - (transaction::ObPreCommitResponseMsg)); + // RPC_AP(PRZ post_dup_table_lease_request, OB_DUP_TABLE_LEASE_REQUEST, + // (transaction::ObDupTableLeaseRequestMsg)); + // RPC_AP(PRZ post_dup_table_lease_response, OB_DUP_TABLE_LEASE_RESPONSE, + // (transaction::ObDupTableLeaseResponseMsg)); + // RPC_AP(PR3 post_redo_log_sync_request, OB_DUP_TABLE_LEASE_RESPONSE, + // (transaction::ObRedoLogSyncRequestMsg)); + // RPC_AP(PR3 post_redo_log_sync_response, OB_DUP_TABLE_LEASE_RESPONSE, + // (transaction::ObRedoLogSyncResponseMsg)); + // RPC_AP(PR3 post_pre_commit_request, OB_DUP_TABLE_PRE_COMMIT_REQ, + // (transaction::ObPreCommitRequestMsg)); + // RPC_AP(PR3 post_pre_commit_response, OB_DUP_TABLE_PRE_COMMIT_RESP, + // (transaction::ObPreCommitResponseMsg)); }; -class ObDupTableLeaseRequestMsgP : public ObRpcProcessor> -{ -public: - explicit ObDupTableLeaseRequestMsgP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} -protected: - int process(); - -private: - DISALLOW_COPY_AND_ASSIGN(ObDupTableLeaseRequestMsgP); -private: - const observer::ObGlobalContext &global_ctx_; -}; - -class ObDupTableLeaseResponseMsgP : public ObRpcProcessor> -{ -public: - explicit ObDupTableLeaseResponseMsgP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} -protected: - int process(); - -private: - DISALLOW_COPY_AND_ASSIGN(ObDupTableLeaseResponseMsgP); -private: - const observer::ObGlobalContext &global_ctx_; -}; - -class ObRedoLogSyncRequestP : public ObRpcProcessor> -{ -public: - explicit ObRedoLogSyncRequestP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} -protected: - int process(); -private: - DISALLOW_COPY_AND_ASSIGN(ObRedoLogSyncRequestP); -private: - const observer::ObGlobalContext &global_ctx_; -}; - -class ObRedoLogSyncResponseP : public ObRpcProcessor> -{ -public: - explicit ObRedoLogSyncResponseP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} -protected: - int process(); -private: - DISALLOW_COPY_AND_ASSIGN(ObRedoLogSyncResponseP); -private: - const observer::ObGlobalContext &global_ctx_; -}; - -class ObPreCommitRequestP : public ObRpcProcessor> -{ -public: - explicit ObPreCommitRequestP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} -protected: - int process(); -private: - DISALLOW_COPY_AND_ASSIGN(ObPreCommitRequestP); -private: - const observer::ObGlobalContext &global_ctx_; -}; - -class ObPreCommitResponseP : public ObRpcProcessor> -{ -public: - explicit ObPreCommitResponseP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} -protected: - int process(); -private: - DISALLOW_COPY_AND_ASSIGN(ObPreCommitResponseP); -private: - const observer::ObGlobalContext &global_ctx_; -}; +// class ObDupTableLeaseRequestMsgP : public ObRpcProcessor> +// { +// public: +// explicit ObDupTableLeaseRequestMsgP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} +// protected: +// int process(); +// +// private: +// DISALLOW_COPY_AND_ASSIGN(ObDupTableLeaseRequestMsgP); +// private: +// const observer::ObGlobalContext &global_ctx_; +// }; +// +// class ObDupTableLeaseResponseMsgP : public ObRpcProcessor> +// { +// public: +// explicit ObDupTableLeaseResponseMsgP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} +// protected: +// int process(); +// +// private: +// DISALLOW_COPY_AND_ASSIGN(ObDupTableLeaseResponseMsgP); +// private: +// const observer::ObGlobalContext &global_ctx_; +// }; +// class ObRedoLogSyncRequestP : public ObRpcProcessor> +// { +// public: +// explicit ObRedoLogSyncRequestP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} +// protected: +// int process(); +// private: +// DISALLOW_COPY_AND_ASSIGN(ObRedoLogSyncRequestP); +// private: +// const observer::ObGlobalContext &global_ctx_; +// }; +// +// class ObRedoLogSyncResponseP : public ObRpcProcessor> +// { +// public: +// explicit ObRedoLogSyncResponseP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} +// protected: +// int process(); +// private: +// DISALLOW_COPY_AND_ASSIGN(ObRedoLogSyncResponseP); +// private: +// const observer::ObGlobalContext &global_ctx_; +// }; +// +// class ObPreCommitRequestP : public ObRpcProcessor> +// { +// public: +// explicit ObPreCommitRequestP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} +// protected: +// int process(); +// private: +// DISALLOW_COPY_AND_ASSIGN(ObPreCommitRequestP); +// private: +// const observer::ObGlobalContext &global_ctx_; +// }; +// +// class ObPreCommitResponseP : public ObRpcProcessor> +// { +// public: +// explicit ObPreCommitResponseP(const observer::ObGlobalContext &global_ctx) : global_ctx_(global_ctx) {} +// protected: +// int process(); +// private: +// DISALLOW_COPY_AND_ASSIGN(ObPreCommitResponseP); +// private: +// const observer::ObGlobalContext &global_ctx_; +// }; +// }//obrpc namespace transaction @@ -403,15 +404,13 @@ public: const ObPreCommitResponseMsg &msg) = 0; }; -class ObDupTableRpc : public ObIDupTableRpc +class ObDupTableRpc_old : public ObIDupTableRpc { public: - ObDupTableRpc() : is_inited_(false), is_running_(false), - trans_service_(NULL), rpc_proxy_() {} - ~ObDupTableRpc() { destroy(); } - int init(ObTransService *trans_service, - rpc::frame::ObReqTransport *transport, - const common::ObAddr &addr); + ObDupTableRpc_old() : is_inited_(false), is_running_(false), + trans_service_(NULL), rpc_proxy_(NULL) {} + ~ObDupTableRpc_old() { destroy(); } + int init(ObTransService *trans_service, rpc::frame::ObReqTransport *transport, const ObAddr &addr); int start(); int stop(); int wait(); diff --git a/src/storage/tx/ob_dup_table_stat.cpp b/src/storage/tx/ob_dup_table_stat.cpp new file mode 100644 index 0000000000..05bea281d4 --- /dev/null +++ b/src/storage/tx/ob_dup_table_stat.cpp @@ -0,0 +1,144 @@ +//Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#include "storage/tx/ob_dup_table_stat.h" +#include "storage/tx/ob_dup_table_tablets.h" + +namespace oceanbase +{ +namespace transaction +{ +constexpr const char DupTableModID::OB_VIRTUAL_DUP_LS_LEASE_MGR[]; +constexpr const char DupTableModID::OB_VIRTUAL_DUP_LS_TABLETS[]; +constexpr const char DupTableModID::OB_VIRTUAL_DUP_LS_TABLET_SET[]; + +void ObDupTableLSBaseStat::reset() +{ + tenant_id_ = 0; + ls_id_.reset(); + // addr_.reset(); +} + +void ObDupTableLSLeaseMgrStat::reset() +{ + ObDupTableLSBaseStat::reset(); + follower_addr_.reset(); + grant_ts_ = 0; + expired_ts_ = 0; + remain_us_ = 0; + grant_req_ts_ = 0; + cached_req_ts_ = 0; + lease_interval_ = 0; + max_read_version_ = 0; + max_commit_version_ = 0; + max_replayed_scn_.set_invalid(); +} + +void ObDupTableLSTabletSetStat::reset() +{ + ObDupTableLSBaseStat::reset(); + is_master_ = false; + unique_id_ = INT64_MAX; + count_ = 0; + attr_ = TabletSetAttr::INVALID; + readable_scn_.set_invalid(); + change_scn_.set_invalid(); + need_confirm_scn_.set_invalid(); + state_ = TabletSetState::INVALID; + trx_ref_ = 0; +} + +void ObDupTableLSTabletsStat::reset() +{ + ObDupTableLSBaseStat::reset(); + is_master_ = false; + unique_id_ = UINT64_MAX; + attr_ = TabletSetAttr::INVALID; + refresh_schema_ts_ = 0; + // need_gc_ = false; +} + +void ObDupTableLSTabletSetStat::set_from_change_status( + struct DupTabletSetChangeStatus *tmp_status) +{ + if (OB_NOT_NULL(tmp_status)) { + if (tmp_status->flag_ <= DupTabletSetChangeFlag::UNUSED) { + // map flag(-1) and flag(0) to TabletSetState(0)["INVALID"] + set_state(static_cast(0)); + } else { + set_state(static_cast(tmp_status->flag_)); + } + + set_trx_ref(tmp_status->trx_ref_); + set_change_scn(tmp_status->tablet_change_scn_); + set_readable_scn(tmp_status->readable_version_); + set_need_confirm_scn(tmp_status->need_confirm_scn_); + } +} + +const ObString &get_dup_ls_state_str(const bool is_master) +{ + static const ObString LSStateName[] = + { + ObString("LEADER"), + ObString("FOLLOWER") + }; + const int state = is_master ? 0 : 1; + + return LSStateName[state]; +} + +const ObString &get_dup_tablet_set_attr_str(const TabletSetAttr attr) +{ + static const ObString LSTabletSetAttrName[] = + { + ObString("INVALID"), + ObString("DATA_SYNCING"), + ObString("READABLE"), + ObString("DELETIGN"), + ObString("UNKONW") // invliad argument, return unknow string + }; + + int8_t attr_idx = 0; + if (attr > TabletSetAttr::MAX || attr <= TabletSetAttr::INVALID) { + DUP_TABLE_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "unexpect attr", K(attr)); + attr_idx = static_cast(TabletSetAttr::MAX); // return unkonw + } else { + attr_idx = static_cast(attr); + } + + return LSTabletSetAttrName[attr_idx]; +} + +const ObString &get_dup_tablet_set_state_str(const TabletSetState state) +{ + static const ObString LSTabletSetStateName[] = + { + ObString("INVALID"), + ObString("TMP"), + ObString("LOGGING"), + ObString("CONFIRMING"), + ObString("CONFIRMED"), + ObString("UNKONW") // invliad argument, return unknow string + }; + + int8_t state_idx = 0; + if (state > TabletSetState::MAX || state <= TabletSetState::INVALID) { + DUP_TABLE_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "unexpect state", K(state)); + state_idx = static_cast(TabletSetState::MAX); // return unknow + } else { + state_idx = static_cast(state); + } + + return LSTabletSetStateName[state_idx]; +} + +} // namespace transaction +} // namespace oceanbase diff --git a/src/storage/tx/ob_dup_table_stat.h b/src/storage/tx/ob_dup_table_stat.h new file mode 100644 index 0000000000..1343572900 --- /dev/null +++ b/src/storage/tx/ob_dup_table_stat.h @@ -0,0 +1,229 @@ +//Copyright (c) 2023 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OCEANBASE_TRANSACTION_DUP_TABLE_STAT_H +#define OCEANBASE_TRANSACTION_DUP_TABLE_STAT_H + +#include "lib/hash/ob_hashmap.h" +#include "common/ob_simple_iterator.h" +#include "storage/tx/ob_trans_define.h" +#include "lib/allocator/ob_mod_define.h" + +namespace oceanbase +{ + +namespace transaction +{ +struct DupTabletSetChangeStatus; +// base data structure for dup table +// addr_ hold server ip and port +class ObDupTableLSBaseStat +{ +public: + ObDupTableLSBaseStat() { reset(); } + ~ObDupTableLSBaseStat() { destroy(); } + void reset(); + void destroy() { reset(); } + + void set_tenant_id(uint64_t tenant_id) { tenant_id_ = tenant_id; } + // void set_addr(const common::ObAddr &addr) { addr_ = addr; } + void set_ls_id(share::ObLSID ls_id) { ls_id_ = ls_id; } + + uint64_t get_tenant_id() const { return tenant_id_; } + // const common::ObAddr &get_addr() const { return addr_; } + share::ObLSID get_ls_id() const { return ls_id_; } + + TO_STRING_KV(K_(tenant_id), K_(ls_id)); + +private: + uint64_t tenant_id_; + share::ObLSID ls_id_; + // common::ObAddr addr_; +}; + +class ObDupTableLSLeaseMgrStat: public ObDupTableLSBaseStat +{ +public: + ObDupTableLSLeaseMgrStat() { reset(); } + ~ObDupTableLSLeaseMgrStat() { destroy(); } + + void reset(); + void destroy() { reset(); } + + OB_INLINE void set_follower_addr(const common::ObAddr &follower_addr) { follower_addr_ = follower_addr; } + OB_INLINE void set_grant_ts(const int64_t grant_ts) { grant_ts_ = grant_ts; } + OB_INLINE void set_expired_ts(const int64_t expired_ts) { expired_ts_ = expired_ts; } + OB_INLINE void set_cached_req_ts(const int64_t cached_req_ts) { cached_req_ts_ = cached_req_ts; } + OB_INLINE void set_grant_req_ts(const int64_t grant_req_ts) { grant_req_ts_ = grant_req_ts; } + OB_INLINE void set_remain_us (const int64_t remain_us) { remain_us_ = remain_us; } + OB_INLINE void set_lease_interval(const int64_t lease_interval) { lease_interval_ = lease_interval; } + OB_INLINE void set_max_replayed_scn(const share::SCN &max_replayed_scn) { max_replayed_scn_ = max_replayed_scn; } + OB_INLINE void set_max_read_version(const int64_t max_read_version) { max_read_version_ = max_read_version; } + OB_INLINE void set_max_commit_version(const int64_t max_commit_version) { max_commit_version_ = max_commit_version; } + + OB_INLINE const common::ObAddr &get_follower_addr() const { return follower_addr_; } + OB_INLINE int64_t get_grant_ts() const { return grant_ts_; } + OB_INLINE int64_t get_expired_ts() const { return expired_ts_; } + OB_INLINE int64_t get_remain_us() const { return remain_us_; } + OB_INLINE int64_t get_cached_req_ts() const { return cached_req_ts_; } + OB_INLINE int64_t get_grant_req_ts() const { return grant_req_ts_; } + OB_INLINE int64_t get_lease_interval() const { return lease_interval_; } + OB_INLINE int64_t get_max_read_version() const { return max_read_version_; } + OB_INLINE int64_t get_max_commit_version() const { return max_commit_version_; } + OB_INLINE const share::SCN &get_max_replayed_scn() const { return max_replayed_scn_; } + + INHERIT_TO_STRING_KV("ObDupTableLSLeaseMgrStat", ObDupTableLSBaseStat, K_(follower_addr), + K_(grant_ts), K_(expired_ts), K_(remain_us), K_(lease_interval), K_(grant_req_ts), + K_(cached_req_ts), K_(max_replayed_scn), K_(max_read_version), K_(max_commit_version)); + +private: + common::ObAddr follower_addr_; + int64_t grant_ts_; + int64_t expired_ts_; + int64_t remain_us_; + int64_t lease_interval_; + int64_t grant_req_ts_; + int64_t cached_req_ts_; + int64_t max_read_version_; + int64_t max_commit_version_; + share::SCN max_replayed_scn_; +}; + +struct DupTableModID{ + static constexpr const char OB_VIRTUAL_DUP_LS_LEASE_MGR[] {"OB_VIRTUAL_DUP_LS_LEASE_MGR"}; + static constexpr const char OB_VIRTUAL_DUP_LS_TABLETS[] {"OB_VIRTUAL_DUP_LS_TABLETS"}; + static constexpr const char OB_VIRTUAL_DUP_LS_TABLET_SET[] {"OB_VIRTUAL_DUP_LS_TABLET_SET"}; +}; + +typedef common::ObSimpleIterator ObDupLSLeaseMgrStatIterator; + +typedef ObSEArray FollowerLeaseMgrStatArr; + +enum class TabletSetAttr { + INVALID = 0, + DATA_SYNCING, + READABLE, + DELETING, + MAX, +}; + +enum class TabletSetState { + INVALID = 0, + TMP, + LOGGING, + CONFIRMING, + CONFIRMED, + MAX, +}; + +const ObString &get_dup_ls_state_str(const bool is_master); +const ObString &get_dup_tablet_set_attr_str(const TabletSetAttr attr); +// for tablet set virtual table +const ObString &get_dup_tablet_set_state_str(const TabletSetState state); + +class ObDupTableLSTabletsStat: public ObDupTableLSBaseStat +{ +public: + ObDupTableLSTabletsStat() { reset(); } + ~ObDupTableLSTabletsStat() { destroy(); } + + void reset(); + void destroy() { reset(); } + + OB_INLINE void set_is_master(const bool is_master) { is_master_ = is_master; } + OB_INLINE void set_unique_id(const uint64_t unique_id) { unique_id_ = unique_id; } + OB_INLINE void set_tablet_id(const common::ObTabletID tablet_id) { tablet_id_ = tablet_id; } + OB_INLINE void set_attr(const TabletSetAttr attr) { attr_ = attr; } + OB_INLINE void set_refresh_schema_ts(const int64_t refresh_schema_ts) { refresh_schema_ts_ = refresh_schema_ts; } + + OB_INLINE int64_t get_unique_id() const { return unique_id_; } + OB_INLINE common::ObTabletID get_tablet_id() const { return tablet_id_; } + OB_INLINE const ObString &get_ls_state_str() { return get_dup_ls_state_str(is_master_); } + OB_INLINE const ObString &get_tablet_set_attr_str() { return get_dup_tablet_set_attr_str(attr_); } + OB_INLINE int64_t get_refresh_schema_ts() const { return refresh_schema_ts_; } + + INHERIT_TO_STRING_KV("ObDupTableLSTabletsStat", ObDupTableLSBaseStat, K_(is_master), + K_(unique_id), K_(tablet_id), K_(attr), K_(refresh_schema_ts)); + +private: + bool is_master_; + int64_t unique_id_; + common::ObTabletID tablet_id_; + TabletSetAttr attr_; + int64_t refresh_schema_ts_; + // bool need_gc_; +}; + +typedef common::ObSimpleIterator ObDupLSTabletsStatIterator; + +class ObDupTableLSTabletSetStat: public ObDupTableLSBaseStat +{ +public: + ObDupTableLSTabletSetStat() { reset(); } + ~ObDupTableLSTabletSetStat() { destroy(); } + + void reset(); + void destroy() { reset(); } + OB_INLINE void set_is_master(const bool is_master) { is_master_ = is_master; } + OB_INLINE void set_unique_id(const int64_t unique_id) { unique_id_ = unique_id; } + OB_INLINE void set_count(const int64_t count) { count_ = count; } + OB_INLINE void set_attr(const TabletSetAttr attr) { attr_ = attr; } + OB_INLINE void set_readable_scn(const share::SCN &readable_scn) { readable_scn_ = readable_scn; } + OB_INLINE void set_change_scn(const share::SCN &change_scn) { change_scn_ = change_scn; } + OB_INLINE void set_need_confirm_scn(const share::SCN &need_confirm_scn) { need_confirm_scn_ = need_confirm_scn; } + OB_INLINE void set_state(const TabletSetState state) { state_ = state; } + OB_INLINE void set_trx_ref(const int64_t trx_ref) { trx_ref_ = trx_ref; } + OB_INLINE void set_basic_info(const uint64_t tenant_id, const share::ObLSID ls_id, + const bool is_master) + { + set_tenant_id(tenant_id); + set_ls_id(ls_id); + set_is_master(is_master); + } + void set_from_change_status(struct DupTabletSetChangeStatus *tmp_status); + // bool get_is_master() const { return is_master_; } + OB_INLINE int64_t get_unique_id() const { return unique_id_; } + OB_INLINE int64_t get_count() const { return count_; } + // tablet_set_attr get_attr() const { return attr_; } + OB_INLINE const share::SCN &get_readable_scn() const { return readable_scn_; } + OB_INLINE const share::SCN &get_change_scn() const { return change_scn_; } + OB_INLINE const share::SCN &get_need_confirm_scn() { return need_confirm_scn_; } + // tablet_set_state get_state() { return state_; } + OB_INLINE const ObString &get_ls_state_str() const { return get_dup_ls_state_str(is_master_); } + OB_INLINE const ObString &get_tablet_set_attr_str() const { return get_dup_tablet_set_attr_str(attr_); } + OB_INLINE const ObString &get_tablet_set_state_str() const { return get_dup_tablet_set_state_str(state_); } + OB_INLINE int64_t get_trx_ref() const { return trx_ref_; } + + INHERIT_TO_STRING_KV("ObDupTableLSTabletSet", ObDupTableLSBaseStat, K_(is_master), + K_(unique_id), K_(count), K_(attr), K_(readable_scn), + K_(change_scn), K_(need_confirm_scn), K_(state), K_(trx_ref)); + +private: + bool is_master_; + int64_t unique_id_; + int64_t count_; + TabletSetAttr attr_; + share::SCN readable_scn_; + share::SCN change_scn_; + share::SCN need_confirm_scn_; + TabletSetState state_; + int64_t trx_ref_; +}; + +typedef common::ObSimpleIterator ObDupLSTabletSetStatIterator; + + + +} // namespace transaction +} // namespace oceanbase +#endif diff --git a/src/storage/tx/ob_dup_table_tablets.cpp b/src/storage/tx/ob_dup_table_tablets.cpp new file mode 100644 index 0000000000..0ff4a62da1 --- /dev/null +++ b/src/storage/tx/ob_dup_table_tablets.cpp @@ -0,0 +1,2779 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#include "lib/utility/ob_tracepoint.h" +#include "ob_dup_table_base.h" +#include "ob_dup_table_tablets.h" +#include "ob_dup_table_util.h" +#include "observer/ob_sql_client_decorator.h" +#include "share/inner_table/ob_inner_table_schema_constants.h" +#include "share/schema/ob_multi_version_schema_service.h" +#include "share/schema/ob_schema_struct.h" + +namespace oceanbase +{ + +using namespace common; +using namespace share; +namespace transaction +{ + +int64_t ObLSDupTabletsMgr::GC_DUP_TABLETS_TIME_INTERVAL = 5 * 60 * 1000 * 1000L; // 5 min +int64_t ObLSDupTabletsMgr::GC_DUP_TABLETS_FAILED_TIMEOUT = + 5 * GC_DUP_TABLETS_TIME_INTERVAL; // 25 min +const int64_t ObLSDupTabletsMgr::GC_TIMEOUT = 1 * 1000 * 1000L; // 1s + +const int64_t ObLSDupTabletsMgr::RESERVED_FREE_SET_COUNT = 64; +const int64_t ObLSDupTabletsMgr::MAX_FREE_SET_COUNT = 1000; +const int64_t ObLSDupTabletsMgr::MAX_CONFIRMING_TABLET_COUNT = 20000; + +OB_SERIALIZE_MEMBER(DupTabletCommonHeader, unique_id_, tablet_set_type_, sp_op_type_); +OB_SERIALIZE_MEMBER(DupTabletChangeLogTail, readable_version_, has_confirmed_); +OB_SERIALIZE_MEMBER(DupTabletSpecialOpArg, op_objects_); + +OB_SERIALIZE_MEMBER(DupTabletCommonLogBody, tablet_id_map_); +OB_SERIALIZE_MEMBER_INHERIT(DupTabletChangeLogBody, DupTabletCommonLogBody, change_tail_); +OB_SERIALIZE_MEMBER_INHERIT(DupTabletSpecialOpLogBody, DupTabletChangeLogBody, sp_op_arg_); + +//********************************************************************** +//****** Hash Callback +//********************************************************************** + +int TabletsSerCallBack::operator()( + const common::hash::HashMapPair &hash_pair) +{ + return hash_pair.first.serialize(buf_, buf_len_, pos_); +} + +int TabletsDeSerCallBack::operator()(DupTabletChangeMap &dup_tablet_map) +{ + int ret = OB_SUCCESS; + ObTabletID tablet_id; + DupTabletInfo tmp_info; + tmp_info.update_dup_schema_ts_ = deser_time_; + + if (OB_FAIL(tablet_id.deserialize(buf_, buf_len_, pos_))) { + DUP_TABLE_LOG(WARN, "deserialize tablet id failed", K(ret)); + } else if (OB_FAIL(dup_tablet_map.set_refactored(tablet_id, tmp_info, 1))) { + DUP_TABLE_LOG(WARN, "insert tablet failed", K(ret), K(tablet_id), K(tmp_info)); + } + + return ret; +} + +int64_t TabletsGetSizeCallBack::operator()( + const common::hash::HashMapPair &hash_pair) +{ + return hash_pair.first.get_serialize_size(); +} + +bool ObLSDupTabletsMgr::GcDiscardedDupTabletHandler::operator()( + common::hash::HashMapPair &hash_pair) +{ + bool will_remove = false; + int tmp_ret = OB_SUCCESS; + + if (0 > hash_pair.second.update_dup_schema_ts_ || 0 > gc_ts_) { + tmp_ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG_RET(WARN, tmp_ret, "invalid timestamp", K(hash_pair.first), + K(hash_pair.second.update_dup_schema_ts_), K(gc_ts_)); + } else if (OB_SUCCESS == ret_) { // if ret_ is failed, not need continue + if ((gc_ts_ - hash_pair.second.update_dup_schema_ts_) >= gc_time_interval_) { + if (src_common_header_.is_old_set()) { + // do nothing + } else if (src_common_header_.is_new_set()) { + will_remove = true; + gc_tablet_cnt_++; + } else if (src_common_header_.is_readable_set()) { + DupTabletInfo tmp_info = hash_pair.second; + if (!old_tablets_.get_change_status()->is_modifiable()) { + tmp_ret = OB_EAGAIN; + } else if (OB_TMP_FAIL(old_tablets_.set_refactored(hash_pair.first, tmp_info))) { + DUP_TABLE_LOG_RET(WARN, tmp_ret, "insert into old_tablets_ failed", K(tmp_ret)); + } else { + will_remove = true; + gc_tablet_cnt_++; + } + } else { + DUP_TABLE_LOG_RET(ERROR, tmp_ret, "unexpected src type", K(tmp_ret), K(src_common_header_)); + } + } + if (OB_TMP_FAIL(tmp_ret)) { + ret_ = tmp_ret; + } + } + DUP_TABLE_LOG(DEBUG, "gc handler", K(ret_), K(hash_pair.first), K(src_common_header_), + K(gc_tablet_cnt_), + K((gc_ts_ - hash_pair.second.update_dup_schema_ts_) >= gc_time_interval_)); + + return will_remove; +} + +int ObLSDupTabletsMgr::ConfirmedDupTabletHandler::operator()( + common::hash::HashMapPair &hash_pair) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(readable_.set_refactored(hash_pair.first, hash_pair.second, 1))) { + DUP_TABLE_LOG(WARN, "insert into readable_tablets_ failed", K(ret)); + } + return ret; +} + +int ObLSDupTabletsMgr::DiagInfoGenerator::operator()( + const common::hash::HashMapPair &hash_pair) +{ + int ret = OB_SUCCESS; + + if ((iter_count_) % 2 == 0) { + // no need \n after tablet set header + ret = ::oceanbase::common::databuff_printf( + info_buf_, info_buf_len_, info_buf_pos_, "\n%s%s[%sTablet Set Member - from %lu] ", + DupTableDiagStd::DUP_DIAG_INDENT_SPACE, DupTableDiagStd::DUP_DIAG_INDENT_SPACE, + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, tablet_set_id_); + } + if (OB_SUCC(ret)) { + ret = ::oceanbase::common::databuff_printf(info_buf_, info_buf_len_, info_buf_pos_, + "{ TabletID = %-10lu, RefreshDupSchemaTs = %-20lu} ", + hash_pair.first.id(), + hash_pair.second.update_dup_schema_ts_); + } + + iter_count_++; + return ret; +} + +int ObLSDupTabletsMgr::CollectTabletsHandler::operator()( + const common::hash::HashMapPair &hash_pair) +{ + int ret = OB_SUCCESS; + + ObDupTableLSTabletsStat tmp_stat; + tmp_stat.set_tenant_id(tenant_id_); + tmp_stat.set_ls_id(ls_id_); + // tmp_stat.set_addr(addr_); + tmp_stat.set_is_master(is_master_); + tmp_stat.set_unique_id(tablet_set_id_); + tmp_stat.set_attr(attr_); + tmp_stat.set_tablet_id(hash_pair.first); + tmp_stat.set_refresh_schema_ts(hash_pair.second.update_dup_schema_ts_); + // tmp_stat.set_need_gc(hash_pair.second.update_dup_schema_ts_ - + // collect_ts_ > tablet_gc_window_); + + if (OB_FAIL(collect_iter_.push(tmp_stat))) { + DUP_TABLE_LOG(WARN, "push into iter failed", K(tmp_stat)); + } + + return ret; +} + +//********************************************************************** +//****** DupTabletSet & DupTabletLog +//********************************************************************** + +int DupTabletChangeMap::create(int64_t bucket_num) +{ + int ret = OB_SUCCESS; + if (!common_header_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "invalid unique_id", K(ret), K(common_header_)); + } else if (OB_FAIL(DupTabletIdMap::create(bucket_num, "DupTabletHash"))) { + DUP_TABLE_LOG(WARN, "create dup tablet id map failed", K(ret), K(common_header_), + K(bucket_num)); + } + + return ret; +} + +int DupTabletChangeMap::serialize(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + int64_t tmp_pos = pos; + TabletsSerCallBack ser_cb(buf, buf_len, tmp_pos); + + if (OB_FAIL(hash_for_each_serialize(*this, ser_cb))) { + DUP_TABLE_LOG(WARN, "serialize dup tablet hash map faild", K(ret)); + } else { + tmp_pos = ser_cb.get_pos(); + } + + if (OB_SUCC(ret)) { + pos = tmp_pos; + } + return ret; +} + +int DupTabletChangeMap::deserialize(const char *buf, const int64_t data_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + int64_t deser_time = ObTimeUtility::fast_current_time(); + + int64_t tmp_pos = pos; + TabletsDeSerCallBack deser_cb(buf, data_len, tmp_pos, deser_time); + if (OB_FAIL(this->clear())) { + DUP_TABLE_LOG(WARN, "clear dup tablet hash map faild", K(ret)); + } else if (OB_FAIL(hash_for_each_deserialize(*this, deser_cb))) { + DUP_TABLE_LOG(WARN, "deserialize dup tablet hash map faild", K(ret)); + } else { + tmp_pos = deser_cb.get_pos(); + } + + if (OB_SUCC(ret)) { + pos = tmp_pos; + } + + return ret; +} + +int64_t DupTabletChangeMap::get_serialize_size() const +{ + int64_t serialize_size = 0; + + TabletsGetSizeCallBack get_size_cb; + serialize_size += hash_for_each_serialize_size(*this, get_size_cb); + + return serialize_size; +} + +bool DupTabletChangeLogTail::is_valid() const { return readable_version_.is_valid(); } + +int DupTabletLog::serialize(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + int64_t tmp_pos = pos; + + if (OB_FAIL(common_header_.serialize(buf, buf_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize dup tablet set header faild", K(ret), K(buf_len), K(tmp_pos), + K(pos)); + + } else if (!common_header_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "invalid common header", K(ret), K(common_header_), K(buf_len), K(tmp_pos), + K(pos)); + } else if (common_header_.is_readable_set()) { + if (OB_FAIL(hash_map_->serialize(buf, buf_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize readable hash map failed", K(ret), KPC(this), K(buf_len), + K(tmp_pos), K(pos)); + } + } else { + if (!change_tail_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid change header", K(ret), KPC(this)); + } else if (OB_FAIL(hash_map_->serialize(buf, buf_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize new/old hash map failed", K(ret), KPC(this), K(buf_len), + K(tmp_pos), K(pos)); + } else if (OB_FAIL(change_tail_.serialize(buf, buf_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize change header failed", K(ret), KPC(this), K(buf_len), + K(tmp_pos), K(pos)); + } + } + + if (OB_SUCC(ret)) { + pos = tmp_pos; + } + return ret; +} + +int DupTabletLog::deserialize_common_header(const char *buf, const int64_t data_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + + int64_t tmp_pos = pos; + + if (OB_FAIL(common_header_.deserialize(buf, data_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "deserialize dup tablet set header faild", K(ret), K(data_len), K(tmp_pos), + K(pos)); + + } else if (!common_header_.is_valid()) { + + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "invalid common header", K(ret), K(common_header_), K(data_len), K(tmp_pos), + K(pos)); + } + + if (OB_SUCC(ret)) { + pos = tmp_pos; + } + return ret; +} + +int DupTabletLog::deserialize_content(const char *buf, const int64_t data_len, int64_t &pos) +{ + int ret = OB_SUCCESS; + + int64_t tmp_pos = pos; + + if (!common_header_.is_valid() || OB_ISNULL(hash_map_)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret), KP(hash_map_), K(common_header_)); + } else if (hash_map_->get_common_header().get_unique_id() != common_header_.get_unique_id()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid hash map", K(ret), K(hash_map_->get_common_header()), + K(common_header_)); + } else { + + if (common_header_.is_readable_set()) { + if (OB_FAIL(hash_map_->deserialize(buf, data_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize readable hash map failed", K(ret), KPC(this), K(data_len), + K(tmp_pos), K(pos)); + } + } else { + if (OB_FAIL(hash_map_->deserialize(buf, data_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize new/old hash map failed", K(ret), KPC(this), K(data_len), + K(tmp_pos), K(pos)); + } else if (OB_FAIL(change_tail_.deserialize(buf, data_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize change header failed", K(ret), KPC(this), K(data_len), + K(tmp_pos), K(pos)); + } else if (!change_tail_.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid change header", K(ret), KPC(this)); + } else { + DUP_TABLE_LOG(DEBUG, "deser tablet end", K(ret), K(change_tail_)); + } + } + } + + if (OB_SUCC(ret)) { + pos = tmp_pos; + } + return ret; +} + +int64_t DupTabletLog::get_serialize_size() +{ + int64_t max_size = 0; + + if (OB_NOT_NULL(hash_map_)) { + max_size += common_header_.get_serialize_size(); + max_size += hash_map_->get_serialize_size(); + if (!common_header_.is_readable_set()) { + max_size += change_tail_.get_serialize_size(); + } + } else { + DUP_TABLE_LOG_RET(ERROR, OB_ERR_UNEXPECTED, "unexpected error"); + } + + return max_size; +} + +int DupTabletLog::set_hash_map_ptr(DupTabletChangeMap *hash_map_ptr, DupTabletSpecialOpArg *arg_ptr) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(hash_map_ptr)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid hash map", K(ret), KP(hash_map_ptr)); + } else if (hash_map_ptr->get_common_header().get_unique_id() != common_header_.get_unique_id()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "unexpected hash map", K(ret), K(hash_map_ptr->get_common_header()), + K(common_header_)); + } else { + hash_map_ = hash_map_ptr; + } + + return ret; +} + +const DupTabletCommonHeader &DupTabletLog::get_common_header() { return common_header_; } + +const DupTabletChangeLogTail &DupTabletLog::get_change_tail() { return change_tail_; } + +//********************************************************************** +//****** ObLSDupTabletsMgr +//********************************************************************** +int ObLSDupTabletsMgr::init(ObDupTableLSHandler *dup_ls_handle) +{ + int ret = OB_SUCCESS; + + SpinWLockGuard guard(dup_tablets_lock_); + + if (!ATOMIC_LOAD(&is_stopped_)) { + ret = OB_INIT_TWICE; + } else if (OB_FAIL(init_free_tablet_pool_())) { + DUP_TABLE_LOG(WARN, "init tablet change set failed", K(ret)); + } else if (OB_FAIL(op_arg_map_.create(8, "DupSpecOp"))) { + DUP_TABLE_LOG(WARN, "create spec op failed", K(ret)); + } else { + ATOMIC_STORE(&is_stopped_, false); + ls_id_ = dup_ls_handle->get_ls_id(); + ATOMIC_STORE(&is_master_, false); + } + + if (OB_FAIL(ret)) { + reset(); + } + + return ret; +} + +int ObLSDupTabletsMgr::init_free_tablet_pool_() +{ + int ret = OB_SUCCESS; + + destroy_free_tablet_pool_(); + + for (int i = 0; i < RESERVED_FREE_SET_COUNT && OB_SUCC(ret); i++) { + DupTabletChangeMap *tmp_map_ptr = nullptr; + if (OB_ISNULL(tmp_map_ptr = static_cast( + share::mtl_malloc(sizeof(DupTabletChangeMap), "DupTabletMap")))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + // } else if (OB_FALSE_IT(extra_free_set_alloc_count_++)) { + } else if (OB_FALSE_IT(new (tmp_map_ptr) DupTabletChangeMap(i + 1))) { + } else if (OB_FAIL(tmp_map_ptr->create(1024))) { + DUP_TABLE_LOG(WARN, "create dup_tablet hash map", K(ret)); + } else if (false == (free_set_pool_.add_last(tmp_map_ptr))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "push back into free_set_pool failed", K(ret), + K(free_set_pool_.get_size()), KPC(tmp_map_ptr)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(get_free_tablet_set(removing_old_set_))) { + DUP_TABLE_LOG(WARN, "get free tablet set failed", K(ret)); + } else { + removing_old_set_->get_common_header().set_old(); + } + + DUP_TABLE_LOG(INFO, "finish init tablet map", K(ret), KPC(removing_old_set_), + K(free_set_pool_.get_size())); + return ret; +} + +int ObLSDupTabletsMgr::destroy_free_tablet_pool_() +{ + int ret = OB_SUCCESS; + + if (OB_NOT_NULL(removing_old_set_)) { + return_tablet_set(removing_old_set_); + removing_old_set_ = nullptr; + } + + if (OB_NOT_NULL(changing_new_set_)) { + return_tablet_set(changing_new_set_); + } + + while (!readable_tablets_list_.is_empty()) { + return_tablet_set(readable_tablets_list_.remove_last()); + } + + while (!need_confirm_new_queue_.is_empty()) { + return_tablet_set(need_confirm_new_queue_.remove_last()); + } + + while (!free_set_pool_.is_empty()) { + DupTabletChangeMap *dup_map_ptr = free_set_pool_.remove_last(); + dup_map_ptr->destroy(); + share::mtl_free(dup_map_ptr); + } + + return ret; +} + +void ObLSDupTabletsMgr::destroy() { reset(); } + +void ObLSDupTabletsMgr::reset() +{ + destroy_free_tablet_pool_(); + ls_id_.reset(); + ATOMIC_STORE(&is_stopped_, true); + ATOMIC_STORE(&is_master_, false); + last_gc_succ_time_ = 0; + last_no_free_set_time_ = 0; + extra_free_set_alloc_count_ = 0; + + if (OB_NOT_NULL(tablet_diag_info_log_buf_)) { + ob_free(tablet_diag_info_log_buf_); + } + tablet_diag_info_log_buf_ = nullptr; +} + +int ObLSDupTabletsMgr::check_readable(const common::ObTabletID &tablet_id, + bool &readable, + const share::SCN &snapshot, + DupTableInterfaceStat interface_stat) +{ + int ret = OB_SUCCESS; + readable = false; + DupTabletInfo tmp_status; + + SpinRLockGuard guard(dup_tablets_lock_); + + DLIST_FOREACH(readable_node, readable_tablets_list_) + { + ret = readable_node->get_refactored(tablet_id, tmp_status); + if (OB_SUCCESS == ret) { + readable = true; + break; + } else if (OB_HASH_NOT_EXIST == ret) { + readable = false; + ret = OB_SUCCESS; + } else { + DUP_TABLE_LOG(WARN, "check readable tablet failed", K(ret)); + } + } + + if (OB_SUCC(ret) && !readable) { + + DLIST_FOREACH_X(new_change_map_ptr, need_confirm_new_queue_, !readable && OB_SUCC(ret)) + { + share::SCN readable_version; + + if (OB_ISNULL(new_change_map_ptr->get_change_status())) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(ERROR, "unexpected tablet set", K(ret), KPC(new_change_map_ptr)); + + } else if (FALSE_IT(readable_version = + new_change_map_ptr->get_change_status()->readable_version_)) { + } else if (!readable_version.is_valid()) { + // can not read + readable = false; + } else if (readable_version < snapshot) { + // can not read + readable = false; + } else if (OB_SUCC(new_change_map_ptr->get_refactored(tablet_id, tmp_status))) { + readable = true; + } else if (OB_HASH_NOT_EXIST != ret) { + DUP_TABLE_LOG(WARN, "check dup_table new_tablets_ failed", K(ret)); + } else { + interface_stat.dup_table_follower_read_tablet_not_exist_cnt_++; + ret = OB_SUCCESS; + } + } + } + + if (!readable && OB_SUCC(ret)) { + interface_stat.dup_table_follower_read_tablet_not_ready_cnt_++; + } + + return ret; +} + +int ObLSDupTabletsMgr::find_dup_tablet_in_set(const common::ObTabletID &tablet_id, + bool &is_dup_table, + const share::SCN &from_scn, + const share::SCN &to_scn) +{ + int ret = OB_SUCCESS; + + is_dup_table = false; + DupTabletInfo tmp_status; + + SpinRLockGuard guard(dup_tablets_lock_); + + // for DEBUG + // no need to check dup_table which has not submitted + // if (OB_NOT_NULL(changing_new_set_)) { + // if (OB_SUCC(changing_new_set_->get_tablet_id_map().get_refactored(tablet_id, tmp_status))) { + // is_dup_table = true; + // } else if (OB_HASH_NOT_EXIST != ret) { + // DUP_TABLE_LOG(WARN, "check dup_table old_tablets_ failed", K(ret)); + // } else { + // ret = OB_SUCCESS; + // } + // } + if (!need_confirm_new_queue_.is_empty()) { + if (need_confirm_new_queue_.get_first()->get_common_header().need_clean_all_readable_set()) { + is_dup_table = true; + DUP_TABLE_LOG(INFO, "set all redo as dup_table during clean all followers' readable set", + K(ret), K(is_dup_table), K(tablet_id), K(from_scn), K(to_scn), + KPC(need_confirm_new_queue_.get_first())); + } + } + + if (OB_SUCC(ret) && !is_dup_table) { + DupTabletChangeMap *new_change_map_ptr = need_confirm_new_queue_.get_first(); + DLIST_FOREACH_X(new_change_map_ptr, need_confirm_new_queue_, !is_dup_table && OB_SUCC(ret)) + { + share::SCN tablet_change_scn; + + if (OB_ISNULL(new_change_map_ptr->get_change_status())) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(ERROR, "unexpected tablet set type", K(ret), KPC(new_change_map_ptr)); + + } else if (FALSE_IT(tablet_change_scn = + new_change_map_ptr->get_change_status()->tablet_change_scn_)) { + if (tablet_change_scn >= from_scn) { + if (tablet_change_scn > to_scn) { + break; + } else if (OB_SUCC(new_change_map_ptr->get_refactored(tablet_id, tmp_status))) { + is_dup_table = true; + } else if (OB_HASH_NOT_EXIST != ret) { + DUP_TABLE_LOG(WARN, "check dup_table new_tablets_ failed", K(ret)); + } else { + ret = OB_SUCCESS; + } + } + + new_change_map_ptr = new_change_map_ptr->get_next(); + } + } + } + + if (OB_SUCC(ret) && !is_dup_table) { + DupTabletChangeMap *old_tablet_set = nullptr; + DupTabletCommonHeader target_common_header; + target_common_header.set_old(); + target_common_header.set_invalid_unique_id(); + + if (OB_FAIL(get_target_tablet_set_(target_common_header, old_tablet_set))) { + DUP_TABLE_LOG(WARN, "get old tablet set failed", K(ret), KPC(old_tablet_set)); + } else if (OB_SUCC(old_tablet_set->get_refactored(tablet_id, tmp_status))) { + is_dup_table = true; + } else if (OB_HASH_NOT_EXIST != ret) { + DUP_TABLE_LOG(WARN, "check dup_table old_tablets_ failed", K(ret), KPC(old_tablet_set)); + } else { + ret = OB_SUCCESS; + } + } + + if (OB_SUCC(ret) && !is_dup_table) { + DLIST_FOREACH_X(readable_set_ptr, readable_tablets_list_, !is_dup_table && OB_SUCC(ret)) + { + if (OB_SUCC(readable_set_ptr->get_refactored(tablet_id, tmp_status))) { + is_dup_table = true; + } else if (OB_HASH_NOT_EXIST != ret) { + DUP_TABLE_LOG(WARN, "check dup_table old_tablets_ failed", K(ret)); + } else { + ret = OB_SUCCESS; + } + } + } + + if (is_dup_table) { + DUP_TABLE_LOG(INFO, "modify a dup tablet by redo log", K(ret), K(tablet_id), K(is_dup_table), + K(*this)); + } + + return ret; +} + +ERRSIM_POINT_DEF(ERRSIM_DUP_TABLE_GC_RIGHT_NOW); +// for gc those not refreshed tablets +int ObLSDupTabletsMgr::gc_dup_tablets(const int64_t gc_ts, const int64_t max_task_interval) +{ + int ret = OB_SUCCESS; + SpinWLockGuard guard(dup_tablets_lock_); + int gc_tablet_cnt = 0; + ObTabletID tmp_id; + + // run gc now + if (OB_FAIL(ERRSIM_DUP_TABLE_GC_RIGHT_NOW)) { + ret = OB_SUCCESS; + last_gc_succ_time_ = gc_ts - GC_DUP_TABLETS_TIME_INTERVAL; + DUP_TABLE_LOG(WARN, "use errsim to invoke gc", KR(ret), K(last_gc_succ_time_), K(gc_ts), + K(max_task_interval)); + } + + if (0 > (gc_ts - last_gc_succ_time_) || 0 > last_gc_succ_time_ || 0 > gc_ts) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "Invalid gc_ts or last_gc_time_", KR(ret), K(last_gc_succ_time_), K(gc_ts)); + } else if ((gc_ts - last_gc_succ_time_) < GC_DUP_TABLETS_TIME_INTERVAL) { + DUP_TABLE_LOG(DEBUG, "not need gc now", K(last_gc_succ_time_)); + } else { + tablet_gc_window_ = 2 + * (max_task_interval > ObDupTabletScanTask::DUP_TABLET_SCAN_INTERVAL + ? max_task_interval + : ObDupTabletScanTask::DUP_TABLET_SCAN_INTERVAL); + + int64_t gc_timeout = 0; + if ((gc_ts - last_gc_succ_time_) > GC_DUP_TABLETS_FAILED_TIMEOUT && last_gc_succ_time_ != 0) { + gc_timeout = INT64_MAX; + DUP_TABLE_LOG(WARN, "gc failed too much times, this time should not break", ); + } else { + gc_timeout = GC_TIMEOUT; + } + + int64_t gc_start_time = ObTimeUtility::fast_current_time(); + + /** + * Gc readable tablet set + * */ + DupTabletChangeMap *old_tablet_set = nullptr; + DupTabletCommonHeader old_tablet_common_header; + old_tablet_common_header.set_old(); + old_tablet_common_header.set_invalid_unique_id(); + if (OB_FAIL(ret)) { + } else if (OB_FAIL(get_target_tablet_set_(old_tablet_common_header, old_tablet_set))) { + DUP_TABLE_LOG(WARN, "get old tablet set failed, need skip gc readable tablets", K(ret), + KPC(old_tablet_set)); + ret = OB_SUCCESS; + } else if (!old_tablet_set->get_change_status()->is_modifiable()) { + ret = OB_EAGAIN; // should not update gc succ time to increase gc freq + DUP_TABLE_LOG(INFO, "old tablet set can not be modified, skip gc readable tablets", K(ret), + KPC(old_tablet_set)); + } else { + DLIST_FOREACH(readable_tablets_ptr, readable_tablets_list_) + { + GcDiscardedDupTabletHandler readable_gc_handler( + gc_ts, tablet_gc_window_, readable_tablets_ptr->get_common_header(), *old_tablet_set); + if (OB_FAIL(hash_for_each_remove_with_timeout(tmp_id, *readable_tablets_ptr, + readable_gc_handler, gc_timeout))) { + DUP_TABLE_LOG(WARN, "remove readable tablets failed", KR(ret), + K(readable_gc_handler.get_gc_tablet_cnt())); + } else if (OB_FAIL(readable_gc_handler.get_ret())) { + // if fail, not update last gc succ time to increase gc freqency + DUP_TABLE_LOG(WARN, "remove readable tablets failed, may need retry", KR(ret), + K(readable_gc_handler.get_gc_tablet_cnt())); + } + gc_tablet_cnt += readable_gc_handler.get_gc_tablet_cnt(); + } + } + + /** + * Gc new tablet set + * */ + DupTabletChangeMap *changing_new_set = nullptr; + DupTabletCommonHeader new_tablet_common_header; + new_tablet_common_header.set_new(); + new_tablet_common_header.set_invalid_unique_id(); + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(get_target_tablet_set_(new_tablet_common_header, changing_new_set))) { + DUP_TABLE_LOG(WARN, "get changing new set failed", K(ret), KPC(changing_new_set)); + } else if (OB_NOT_NULL(changing_new_set)) { + if (changing_new_set->empty()) { + // do nothing + DUP_TABLE_LOG(DEBUG, "changing_new_set is empty, not need gc", K(ret)); + } else { + GcDiscardedDupTabletHandler new_gc_handler( + gc_ts, tablet_gc_window_, changing_new_set->get_common_header(), *old_tablet_set); + + if (OB_FAIL(hash_for_each_remove_with_timeout(tmp_id, *changing_new_set, new_gc_handler, + gc_timeout))) { + DUP_TABLE_LOG(WARN, "remove new tablets failed", KR(ret)); + } + // collect gc in new tablets count + gc_tablet_cnt += new_gc_handler.get_gc_tablet_cnt(); + } + } + // collect gc readable tablet + if (OB_SUCC(ret)) { + last_gc_succ_time_ = gc_ts; + } else if (OB_TIMEOUT == ret) { + DUP_TABLE_LOG(WARN, "gc tablets failed, scan all tablets set cost too much time", K(ret), + K(gc_start_time), K(gc_timeout), K(gc_tablet_cnt)); + } else if (OB_EAGAIN == ret) { + ret = OB_SUCCESS; + } + + if (0 != gc_tablet_cnt) { + DUP_TABLE_LOG(INFO, "finish gc dup tablet on time", K(ret), KPC(changing_new_set_), + KPC(removing_old_set_), K(readable_tablets_list_.get_size()), K(gc_tablet_cnt)); + } + } + + return ret; +} + +int ObLSDupTabletsMgr::refresh_dup_tablet(const common::ObTabletID &tablet_id, + bool is_dup_table, + int64_t refresh_time) +{ + int ret = OB_SUCCESS; + + SpinWLockGuard guard(dup_tablets_lock_); + + if (!tablet_id.is_valid() || ATOMIC_LOAD(&is_stopped_)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret)); + } else if (!is_master()) { + ret = OB_NOT_MASTER; + // DUP_TABLE_LOG(INFO, "dup_table ls is not master", K(ret), + // K(dup_ls_handle_ptr_->get_ls_id())); + } else if (is_dup_table) { + // exist in readable_tablets_、new_tablets_ => do nothing + // exist in old_tablets_ => remove from old_tablets_ and insert into readable_tablets_ + // not exist => insert into new_tablets_ + + if (OB_FAIL(discover_dup_tablet_(tablet_id, refresh_time))) { + DUP_TABLE_LOG(WARN, "discover a dup tablet failed", K(tablet_id), K(refresh_time)); + } + + } else { + if (OB_FAIL(lose_dup_tablet_(tablet_id))) { + DUP_TABLE_LOG(WARN, "a dup tablet lose dup attr failed", K(tablet_id)); + } + } + + return ret; +} + +int ObLSDupTabletsMgr::prepare_serialize(int64_t &max_ser_size, + DupTabletSetIDArray &unique_id_array, + const int64_t max_log_buf_len) +{ + int ret = OB_SUCCESS; + + SpinRLockGuard guard(dup_tablets_lock_); + + // max_ser_size = 0; + unique_id_array.reuse(); + + if (OB_SUCC(ret)) { + if (OB_ISNULL(changing_new_set_)) { + // do nothing + } else if (changing_new_set_->empty()) { + // empty change map not need add to need confirm and ser + DUP_TABLE_LOG(DEBUG, "changing_new_set_ is empty", K(ret), K(changing_new_set_->empty())); + } else if (OB_FAIL(changing_new_set_->get_change_status()->prepare_serialize())) { + DUP_TABLE_LOG(WARN, "changing new set prepare serialize failed", K(ret)); + } else if (false == need_confirm_new_queue_.add_last(changing_new_set_)) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "push back change_new_set_ failed", K(ret)); + } else if (OB_FALSE_IT(changing_new_set_ = nullptr)) { + // do nothing + } + } + + if (OB_SUCC(ret)) { + bool can_be_confirmed = true; + DLIST_FOREACH(cur_map, need_confirm_new_queue_) + { + if (OB_ISNULL(cur_map->get_change_status())) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(ERROR, "unexpected tablet set type", K(ret), KPC(cur_map)); + } else if (!cur_map->get_change_status()->need_log()) { + DUP_TABLE_LOG(INFO, "no need serialize need_confirm_set in log", K(ret), KPC(cur_map)); + } else if (OB_FAIL(cal_single_set_max_ser_size_(cur_map, max_ser_size, unique_id_array))) { + DUP_TABLE_LOG(WARN, "cal new set max ser_size failed", K(ret)); + } else { + int64_t tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(cur_map->get_change_status()->try_set_confirmed(can_be_confirmed))) { + if (tmp_ret != OB_EAGAIN) { + ret = tmp_ret; + DUP_TABLE_LOG(WARN, "try to set confirmed error", K(ret), K(cur_map)); + } else { + can_be_confirmed = false; + } + } + } + } + } + + if (OB_SUCC(ret)) { + if (max_ser_size > max_log_buf_len) { + ret = OB_LOG_TOO_LARGE; + DUP_TABLE_LOG(INFO, "Too large tablet log, we will not serialize old or readable tablets", + K(ret), K(ls_id_), K(max_ser_size), K(max_log_buf_len), + K(unique_id_array.count()), K(unique_id_array)); + } + } + + if (OB_SUCC(ret)) { + DupTabletChangeMap *old_tablet_set = nullptr; + DupTabletCommonHeader old_tablets_header; + old_tablets_header.set_old(); + old_tablets_header.set_invalid_unique_id(); + if (OB_FAIL(get_target_tablet_set_(old_tablets_header, old_tablet_set))) { + DUP_TABLE_LOG(WARN, "get old tablets failed", K(ret)); + } else if (old_tablet_set->empty()) { + // do nothing + } else if (!old_tablet_set->get_change_status()->need_log()) { + DUP_TABLE_LOG(INFO, "no need serialize old tablets in log", K(ret), KPC(old_tablet_set)); + } else if (OB_FAIL( + cal_single_set_max_ser_size_(old_tablet_set, max_ser_size, unique_id_array))) { + DUP_TABLE_LOG(WARN, "cal old set max ser_size failed", K(ret)); + } else if (OB_FAIL(old_tablet_set->get_change_status()->prepare_serialize())) { + DUP_TABLE_LOG(WARN, "old set prepare serialize failed", K(ret)); + } else { + // try confirm old tablets + int64_t tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(old_tablet_set->get_change_status()->try_set_confirmed(true))) { + if (tmp_ret != OB_EAGAIN) { + ret = tmp_ret; + DUP_TABLE_LOG(WARN, "try to set confirmed error", K(ret), K(old_tablet_set)); + } + } + } + } + + // TODO serialize readable tablets + if (OB_SUCC(ret)) { + DLIST_FOREACH(readable_ptr, readable_tablets_list_) + { + if (OB_FAIL(cal_single_set_max_ser_size_(readable_ptr, max_ser_size, unique_id_array))) { + DUP_TABLE_LOG(WARN, "cal readable set max ser_size failed", K(ret)); + } + } + } + + if (OB_LOG_TOO_LARGE == ret) { + ret = OB_SUCCESS; + } + DUP_TABLE_LOG(WARN, "finish prepare ser", K(ret), K(unique_id_array)); + return ret; +} + +int ObLSDupTabletsMgr::serialize_tablet_log(const DupTabletSetIDArray &unique_id_array, + char *buf, + const int64_t buf_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + + int64_t tmp_pos = pos; + + SpinRLockGuard guard(dup_tablets_lock_); + + if (OB_ISNULL(buf) || buf_len <= 0 || pos <= 0) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid argument", K(ret), KP(buf), K(buf_len), K(pos)); + } else if (unique_id_array.count() <= 0) { + ret = OB_ENTRY_NOT_EXIST; + DUP_TABLE_LOG(INFO, "no need to serialize tablet log", K(ret), K(unique_id_array.count())); + } else { + for (int i = 0; i < unique_id_array.count() && OB_SUCC(ret); i++) { + DupTabletChangeMap *tablet_set_ptr = nullptr; + const DupTabletCommonHeader &seralize_common_header = unique_id_array.at(i); + if (OB_FAIL(get_target_tablet_set_(seralize_common_header, tablet_set_ptr))) { + DUP_TABLE_LOG(WARN, "get target tablet set failed", K(ret), K(i), KPC(tablet_set_ptr)); + } else if (OB_FAIL(seralize_common_header.serialize(buf, buf_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize common header failed", K(ret), K(seralize_common_header)); + } else if (seralize_common_header.is_readable_set()) { + // DupTabletLog readable_log(tablet_set_ptr); + // if (OB_FAIL(readable_log.serialize(buf, buf_len, tmp_pos))) { + // DUP_TABLE_LOG(WARN, "serialize readable log failed", K(ret)); + // } + DupTabletCommonLogBody readable_log(*tablet_set_ptr); + if (OB_FAIL(readable_log.serialize(buf, buf_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize readable tablet log failed", K(ret), KPC(tablet_set_ptr)); + } + } else if (seralize_common_header.is_new_set() || seralize_common_header.is_old_set()) { + if (seralize_common_header.no_specail_op()) { + DupTabletChangeLogBody change_log(*tablet_set_ptr); + if (OB_FAIL(change_log.serialize(buf, buf_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize new/old tablet log failed", K(ret), KPC(tablet_set_ptr)); + } + + } else { + DupTabletSpecialOpArg *sp_op_arg = nullptr; + uint64_t uid = seralize_common_header.get_unique_id(); + if (OB_ISNULL(sp_op_arg = op_arg_map_.get(uid))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "get special op arg failed", K(ret), KPC(sp_op_arg)); + } else { + DupTabletSpecialOpLogBody sp_op_log(*tablet_set_ptr, *sp_op_arg); + if (OB_FAIL(sp_op_log.serialize(buf, buf_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "serialize special op log failed", K(ret), KPC(tablet_set_ptr), + KPC(sp_op_arg)); + } + } + } + // DupTabletChangeLogTail log_tail(tablet_set_ptr->get_change_status()->readable_version_, + // tablet_set_ptr->get_change_status()->has_confirmed()); + // DupTabletSpecialOpArg sp_op_arg; + // if (!tablet_set_ptr->get_common_header().no_specail_op()) { + // if (OB_FAIL(op_arg_map_.get_refactored( + // tablet_set_ptr->get_common_header().get_unique_id(), sp_op_arg))) { + // DUP_TABLE_LOG(WARN, "get special op failed", K(ret), KPC(tablet_set_ptr), + // K(sp_op_arg)); + // } + // } + // + // DupTabletLog change_log(log_tail, tablet_set_ptr, &sp_op_arg); + // if (OB_FAIL(ret)) { + // } else if (OB_FAIL(change_log.serialize(buf, buf_len, tmp_pos))) { + // DUP_TABLE_LOG(WARN, "serialize tablet change log failed", K(ret)); + // } + } + } + + if (OB_SUCC(ret)) { + pos = tmp_pos; + } + } + + DUP_TABLE_LOG(DEBUG, "after ser log all", K(ret), K(buf_len), K(pos), K(tmp_pos)); + return ret; +} + +int ObLSDupTabletsMgr::deserialize_tablet_log(DupTabletSetIDArray &unique_id_array, + const char *buf, + const int64_t data_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + + int64_t tmp_pos = pos; + + unique_id_array.reset(); + + SpinWLockGuard guard(dup_tablets_lock_); + if (OB_ISNULL(buf) || data_len <= 0 || pos <= 0) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid argument", K(ret), KP(buf), K(data_len), K(pos)); + } else { + // DupTabletLog tablet_log; + DupTabletCommonHeader deser_common_header; + while (OB_SUCC(ret) && tmp_pos < data_len) { + deser_common_header.reset(); + DupTabletChangeMap *tablet_set_ptr = nullptr; + bool construct_from_free = false; + share::SCN readable_version; + bool deser_has_confirmed = false; + + /* + * 1. deserialize tablet set common header + * 2. find a target tablet set by common header + * */ + if (OB_FAIL(deser_common_header.deserialize(buf, data_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "deserialize common header failed", K(ret), K(tmp_pos), K(data_len)); + } else if (OB_FAIL(get_target_tablet_set_(deser_common_header, tablet_set_ptr, + true /*construct_target_set*/))) { + DUP_TABLE_LOG(WARN, "get target tablet set failed", K(ret), K(deser_common_header), + KPC(tablet_set_ptr)); + } else if (tablet_set_ptr->get_common_header().is_free()) { + tablet_set_ptr->get_common_header().copy_tablet_set_type(deser_common_header); + construct_from_free = true; + } + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(unique_id_array.push_back(deser_common_header))) { + DUP_TABLE_LOG(WARN, "push back unique_id into logging array failed", K(ret), + K(deser_common_header)); + } else if (deser_common_header.is_free()) { + /* + * free a empty readable tablet set + * */ + if (!tablet_set_ptr->get_common_header().is_readable_set()) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "can not free a tablet_set in changing", K(ret)); + } else if (OB_FAIL(return_tablet_set(tablet_set_ptr))) { + DUP_TABLE_LOG(WARN, "free a readable set because of compact", K(ret), + KPC(tablet_set_ptr)); + } + DUP_TABLE_LOG(INFO, "deserialize a free tablet set", K(ret), K(tablet_set_ptr)); + } else if (deser_common_header.is_readable_set()) { + /* + * deserialize readable tablet set + * */ + if (construct_from_free) { + if (false == readable_tablets_list_.add_last(tablet_set_ptr)) { + if (OB_FAIL(return_tablet_set(tablet_set_ptr))) { + DUP_TABLE_LOG(WARN, "return tablet set failed", K(ret), KPC(tablet_set_ptr)); + } + // rewrite ret code + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "push back into readable_tablets_list_ failed", K(ret), + KPC(tablet_set_ptr)); + } + } + if (OB_SUCC(ret)) { + DupTabletCommonLogBody readable_log_body(*tablet_set_ptr); + if (OB_FAIL(readable_log_body.deserialize(buf, data_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "deserialize dup tablet readable log failed", K(ret)); + } + } + } else { + // DUP_TABLE_LOG(INFO, "deser a change set", K(ret), K(tablet_log.get_common_header()), + // KPC(tablet_set_ptr)); + DupTabletSpecialOpArg tmp_op_arg; + if (construct_from_free && deser_common_header.is_new_set() + && (false == need_confirm_new_queue_.add_last(tablet_set_ptr))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "push back into need_confirm_new_queue_ failed", K(ret), + KPC(tablet_set_ptr)); + return_tablet_set(tablet_set_ptr); + } else if (deser_common_header.is_old_set() && removing_old_set_ != tablet_set_ptr) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected old tablets set ptr", K(ret), KPC(tablet_set_ptr), + KPC(removing_old_set_)); + } else { + if (deser_common_header.no_specail_op()) { + /* + * deserialize new/old tablet set without special op + * */ + DupTabletChangeLogBody change_log_body(*tablet_set_ptr); + if (OB_FAIL(change_log_body.deserialize(buf, data_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "deserialize new/old tablet log failed", K(ret), + K(change_log_body), KPC(tablet_set_ptr)); + } else { + readable_version = change_log_body.get_change_tail().readable_version_; + deser_has_confirmed = change_log_body.get_change_tail().has_confirmed_; + } + } else { + /* + * deserialize special op arg + * */ + DupTabletSpecialOpArg tmp_op_arg; + DupTabletSpecialOpLogBody sp_op_log_body(*tablet_set_ptr, tmp_op_arg); + if (OB_FAIL(sp_op_log_body.deserialize(buf, data_len, tmp_pos))) { + DUP_TABLE_LOG(WARN, "deserialize new/old tablet log failed", K(ret), + K(sp_op_log_body), KPC(tablet_set_ptr)); + } else if (OB_FAIL(op_arg_map_.set_refactored(deser_common_header.get_unique_id(), + tmp_op_arg, 1))) { + DUP_TABLE_LOG(WARN, "insert into op_arg_map_ failed", K(ret), K(deser_common_header), + K(tmp_op_arg)); + } else { + readable_version = sp_op_log_body.get_change_tail().readable_version_; + deser_has_confirmed = sp_op_log_body.get_change_tail().has_confirmed_; + } + } + } + + /* + * set tablet set state as tablet_log_submitted + * */ + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(tablet_set_ptr->get_change_status()->prepare_serialize())) { + DUP_TABLE_LOG(WARN, "prepare serialize failed", K(ret), KPC(tablet_set_ptr)); + // } else if (tablet_set_ptr->get_change_status()->is_change_logging() + // && OB_FAIL(tablet_set_ptr->get_change_status()->tablet_change_log_submitted( + // share::SCN::min_scn(), true /*submit_result*/))) { + // DUP_TABLE_LOG(WARN, "set tablet change log submitted failed", K(ret), + // KPC(tablet_set_ptr)); + // } else if (tablet_set_ptr->get_change_status()->is_change_logging() + // && OB_FAIL(tablet_set_ptr->get_change_status()->prepare_confirm( + // share::SCN::min_scn(), true /*sync _result*/))) { + // DUP_TABLE_LOG(WARN, "prepare confirm tablet_set failed", K(ret), + // KPC(tablet_set_ptr)); + // + /* Step 2 : as try_to_confirm_tablets*/ + } else if (OB_FAIL(tablet_set_ptr->get_change_status()->push_need_confirm_scn( + readable_version))) { + DUP_TABLE_LOG(WARN, "set need_confirm_scn_ failed", K(ret), K(readable_version), + KPC(tablet_set_ptr)); + } else if (OB_FAIL( + tablet_set_ptr->get_change_status()->push_readable_scn(readable_version))) { + DUP_TABLE_LOG(WARN, "set readable version failed", K(ret), K(readable_version), + KPC(tablet_set_ptr)); + } else if (tablet_set_ptr->get_change_status()->is_confirming() + && OB_FAIL(tablet_set_ptr->get_change_status()->try_set_confirmed( + deser_has_confirmed))) { + DUP_TABLE_LOG(WARN, "replay confirmed flag failed", K(ret), K(deser_has_confirmed), + KPC(tablet_set_ptr)); + } + } + // DUP_TABLE_LOG(INFO, "deser tablet log for one set", K(ret), + // K(tablet_log.get_common_header()), + // KPC(tablet_set_ptr), K(need_confirm_new_queue_.get_size())); + } + } + + DUP_TABLE_LOG(DEBUG, "after deser tablet log", K(ret), K(tmp_pos), K(data_len), K(pos)); + + if (OB_SUCC(ret)) { + pos = tmp_pos; + } + // TODO rollback if replay failed + return ret; +} + +int ObLSDupTabletsMgr::tablet_log_submitted(const bool submit_result, + const share::SCN &tablet_log_scn, + const bool for_replay, + const DupTabletSetIDArray &unique_id_array) +{ + int ret = OB_SUCCESS; + SpinWLockGuard guard(dup_tablets_lock_); + + UNUSED(for_replay); + + for (int i = 0; OB_SUCC(ret) && i < unique_id_array.count(); i++) { + const DupTabletCommonHeader logging_common_header = unique_id_array[i]; + DupTabletChangeMap *logging_tablet_set = nullptr; + if (!logging_common_header.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid common header", K(ret), K(logging_common_header)); + } else if (logging_common_header.is_readable_set()) { + // do nothing + } else if (OB_FAIL(get_target_tablet_set_(logging_common_header, logging_tablet_set))) { + DUP_TABLE_LOG(WARN, "get logging tablet set failed", K(ret), KPC(logging_tablet_set), + K(logging_common_header)); + } else if (logging_tablet_set->get_change_status()->is_change_logging() + && OB_FAIL(logging_tablet_set->get_change_status()->tablet_change_log_submitted( + tablet_log_scn, submit_result))) { + DUP_TABLE_LOG(WARN, "modify tablet change status failed", K(ret), KPC(logging_tablet_set)); + } + } + + return ret; +} + +int ObLSDupTabletsMgr::tablet_log_synced(const bool sync_result, + const share::SCN &scn, + const bool for_replay, + const DupTabletSetIDArray &unique_id_array, + bool &merge_confirmed) +{ + int ret = OB_SUCCESS; + + merge_confirmed = false; + SpinWLockGuard guard(dup_tablets_lock_); + + for (int i = 0; OB_SUCC(ret) && i < unique_id_array.count(); i++) { + const DupTabletCommonHeader logging_common_header = unique_id_array[i]; + DupTabletChangeMap *logging_tablet_set = nullptr; + + if (!logging_common_header.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid common header", K(ret), K(logging_common_header)); + } else if (logging_common_header.is_readable_set()) { + // do nothing + } else if (OB_FAIL(get_target_tablet_set_(logging_common_header, logging_tablet_set))) { + // set my be cleaned, rewrite ret code? + DUP_TABLE_LOG(WARN, "get target tablet set failed", K(ret), KPC(logging_tablet_set), + K(logging_common_header)); + } else if (logging_tablet_set->get_change_status()->is_change_logging()) { + if (OB_SUCC(ret) && sync_result) { + if (OB_FAIL(try_exec_special_op_(logging_tablet_set, scn, for_replay))) { + DUP_TABLE_LOG(WARN, "try to execute special opertion for dup tablet set", K(ret), + KPC(logging_tablet_set)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(logging_tablet_set->get_change_status()->prepare_confirm(scn, sync_result))) { + DUP_TABLE_LOG(WARN, "modify tablet change status failed", K(ret), + KPC(logging_tablet_set)); + } + } + } else if (logging_tablet_set->get_change_status()->has_confirmed()) { + if (OB_SUCC(ret) && sync_result) { + // if old is confirmed, clear it + if (logging_common_header.is_old_set()) { + return_tablet_set(logging_tablet_set); + // move need_confirm_queue to readable + } else if (OB_FAIL(merge_into_readable_tablets_(logging_tablet_set, for_replay))) { + DUP_TABLE_LOG(WARN, "merge into readable tablet set failed", K(ret)); + } else { + merge_confirmed = true; + } + } + } + } + + if (OB_SUCC(ret) && !for_replay && (!is_master() || sync_result == false)) { + if (OB_FAIL(clean_unlog_tablets_())) { + DUP_TABLE_LOG(WARN, "clean unlog tablets failed", K(ret), K(ls_id_), K(for_replay), + K(sync_result), K(is_master()), K(unique_id_array)); + } + } + + return ret; +} + +int ObLSDupTabletsMgr::cal_single_set_max_ser_size_(DupTabletChangeMap *hash_map, + int64_t &max_ser_size, + DupTabletSetIDArray &unique_id_array) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(hash_map)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid hash map", K(ret)); + } else { + int64_t tmp_ser_size = hash_map->get_serialize_size(); + if (hash_map->get_common_header().is_readable_set()) { + DupTabletCommonLogBody common_log_body(*hash_map); + tmp_ser_size += common_log_body.get_serialize_size(); + } else if (hash_map->get_common_header().no_specail_op()) { + DupTabletChangeLogBody change_log_body(*hash_map); + tmp_ser_size += change_log_body.get_serialize_size(); + } else { + uint64_t uid = hash_map->get_common_header().get_unique_id(); + DupTabletSpecialOpArg *op_arg = op_arg_map_.get(uid); + DupTabletSpecialOpLogBody sp_log_body(*hash_map, *op_arg); + + if (OB_ISNULL(op_arg)) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(ERROR, "invalid special op arg", K(ret), KPC(op_arg), KPC(hash_map)); + } else { + tmp_ser_size += sp_log_body.get_serialize_size(); + } + } + + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(unique_id_array.push_back(hash_map->get_common_header()))) { + DUP_TABLE_LOG(WARN, "push back unique_id array failed", K(ret)); + } else { + max_ser_size += tmp_ser_size; + } + } + + return ret; +} + +int ObLSDupTabletsMgr::merge_into_readable_tablets_(DupTabletChangeMap *change_map_ptr, + const bool for_replay) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(change_map_ptr)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid hash map ptr", K(ret), KP(change_map_ptr)); + } else if (!for_replay && change_map_ptr != need_confirm_new_queue_.get_first()) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "we must merge the first set into readable tablets", K(ret), + KPC(change_map_ptr), KPC(need_confirm_new_queue_.get_first())); + } else if (OB_ISNULL(need_confirm_new_queue_.remove(change_map_ptr))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "remove from need_confirm_new_queue_ failed", K(ret), KPC(change_map_ptr)); + } else if (false == (readable_tablets_list_.add_last(change_map_ptr))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "insert into readable_tablets_list_ failed", K(ret), KPC(change_map_ptr)); + } else if (OB_FALSE_IT(change_map_ptr->get_common_header().set_readable())) { + // do nothing + } + + if (OB_SUCC(ret)) { + if (!change_map_ptr->get_common_header().is_readable_set()) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected merging tablet set", K(ret), KPC(change_map_ptr)); + } else if (change_map_ptr->empty()) { + DUP_TABLE_LOG(INFO, "try to remove empty readable tablet set from list", K(ret), + KPC(change_map_ptr)); + if (nullptr == readable_tablets_list_.remove(change_map_ptr)) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "remove empty readable set from list failed", K(ret), + KPC(change_map_ptr)); + } else if (OB_FAIL(return_tablet_set(change_map_ptr))) { + DUP_TABLE_LOG(WARN, "return empty readable set failed", K(ret), KPC(change_map_ptr)); + } else { + } + } + } + + DUP_TABLE_LOG(DEBUG, "merge into readable", K(ret), KPC(change_map_ptr), + K(need_confirm_new_queue_.get_size())); + return ret; +} + +int64_t ObLSDupTabletsMgr::get_dup_tablet_count() +{ + SpinRLockGuard guard(dup_tablets_lock_); + int64_t total_size = 0; + + if (OB_NOT_NULL(changing_new_set_)) { + total_size += changing_new_set_->size(); + } + + DLIST_FOREACH_X(need_confirm_new_set, need_confirm_new_queue_, true) + { + total_size += need_confirm_new_set->size(); + } + + if (OB_NOT_NULL(removing_old_set_)) { + total_size += removing_old_set_->size(); + } + + DLIST_FOREACH_X(readable_set_ptr, readable_tablets_list_, true) + { + total_size += readable_set_ptr->size(); + } + + // total_size += readable_tablets_.size(); + DUP_TABLE_LOG(DEBUG, "has dup tablet", K(total_size)); + + return total_size; +} + +bool ObLSDupTabletsMgr::has_dup_tablet() { return 0 < get_dup_tablet_count(); } + +int64_t ObLSDupTabletsMgr::get_readable_tablet_set_count() +{ + int64_t cnt = 0; + + SpinRLockGuard guard(dup_tablets_lock_); + cnt = readable_tablets_list_.get_size(); + + return cnt; +} + +int64_t ObLSDupTabletsMgr::get_all_tablet_set_count() +{ + int64_t cnt = 0; + SpinRLockGuard guard(dup_tablets_lock_); + + if (OB_NOT_NULL(changing_new_set_)) { + cnt += 1; + } + + cnt += need_confirm_new_queue_.get_size(); + cnt += readable_tablets_list_.get_size(); + + if (OB_NOT_NULL(removing_old_set_)) { + cnt += 1; + } + + return cnt; +} + +int ObLSDupTabletsMgr::leader_takeover(const bool is_resume, + const bool recover_all_readable_from_ckpt) +{ + int ret = OB_SUCCESS; + + SpinWLockGuard guard(dup_tablets_lock_); + + if (!is_resume) { + if (OB_FAIL(construct_clean_confirming_set_task_())) { + DUP_TABLE_LOG(WARN, "clean new/old tablets set failed", K(ret), + K(need_confirm_new_queue_.get_size()), KPC(removing_old_set_), + KPC(changing_new_set_)); + } else if (!recover_all_readable_from_ckpt /*incomplete readable set*/) { + if (OB_FAIL(construct_clean_all_readable_set_task_())) { + DUP_TABLE_LOG(WARN, "construct clean all readable set task failed", K(ret), + K(need_confirm_new_queue_.get_size())); + } + } + } + + // TODO make replay_active_tx_count as the trx_ref of first empty need_confirm_tablet_set + // TODO check the completeness of readable_tablets_set + + ATOMIC_STORE(&is_master_, true); + return ret; +} + +int ObLSDupTabletsMgr::leader_revoke(const bool is_logging) +{ + + int ret = OB_SUCCESS; + + SpinWLockGuard guard(dup_tablets_lock_); + + if (!is_logging) { + // clean unreadable tablets to make replay from clean sets. + if (OB_FAIL(clean_unlog_tablets_())) { + DUP_TABLE_LOG(WARN, "clean unlog tablets failed", K(ret), K(ls_id_), + K(need_confirm_new_queue_.get_size()), KPC(removing_old_set_), + KPC(changing_new_set_)); + } + } + + ATOMIC_STORE(&is_master_, false); + return ret; +} + +int ObLSDupTabletsMgr::try_to_confirm_tablets( + const share::SCN &lease_valid_follower_max_replayed_scn) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + SpinRLockGuard guard(dup_tablets_lock_); + if (!lease_valid_follower_max_replayed_scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid confirm ts", KR(ret), K(lease_valid_follower_max_replayed_scn)); + } else { + // comfirm need_confirm_new_queue + if (need_confirm_new_queue_.is_empty()) { + DUP_TABLE_LOG(DEBUG, "need_confirm_new_queue_ is empty", KR(ret), + K(need_confirm_new_queue_.get_size()), K(readable_tablets_list_.get_size())); + } else { + DLIST_FOREACH_X(node, need_confirm_new_queue_, OB_SUCC(ret)) + { + if (node->get_change_status()->is_confirming()) { + // update readable scn + const share::SCN readable_scn = SCN::min(lease_valid_follower_max_replayed_scn, + node->get_change_status()->need_confirm_scn_); + if (OB_FAIL(node->get_change_status()->push_readable_scn(readable_scn))) { + DUP_TABLE_LOG(WARN, "fail to confirm succ in this tablets set", K(ret), + K(lease_valid_follower_max_replayed_scn), KPC(node), K(readable_scn)); + // @input param can_be_confirmed is true and for_replay is false + // } else if (OB_FAIL(node->get_change_status()->set_confirmed(true))) { + // if (OB_EAGAIN != ret) { + // DUP_TABLE_LOG(WARN, "fail to set confimed, may need retry", K(ret), + // K(lease_valid_follower_max_replayed_scn), KPC(node), + // K(readable_scn)); + // } else { + // ret = OB_SUCCESS; + // } + } + } + } + } + // confirm old tablets + if (OB_SUCC(ret)) { + // update old tablets readable_version for confirm, though readable_version not used + if (removing_old_set_->get_change_status()->is_confirming()) { + const share::SCN readable_scn = + SCN::min(lease_valid_follower_max_replayed_scn, + removing_old_set_->get_change_status()->need_confirm_scn_); + if (OB_FAIL(removing_old_set_->get_change_status()->push_readable_scn(readable_scn))) { + DUP_TABLE_LOG(WARN, "fail to confirm old_tablets succ", K(ret), + K(lease_valid_follower_max_replayed_scn), KPC(removing_old_set_), + K(readable_scn)); + // @input param can_be_confirmed is true and for_replay is false + // } else if (OB_FAIL(removing_old_set_->get_change_status()->set_confirmed(true))) { + // if (OB_EAGAIN != ret) { + // DUP_TABLE_LOG(WARN, "fail to set old_tablets confimed, may need retry", K(ret), + // K(lease_valid_follower_max_replayed_scn), KPC(removing_old_set_), + // K(readable_scn)); + // } else { + // ret = OB_SUCCESS; + // } + } + } + } + } + return ret; +} + +// bool ObLSDupTabletsMgr::need_log_tablets() +// { +// bool need_log = false; +// if (!need_confirm_new_queue_.is_empty()) { +// DLIST_FOREACH_NORET(node, need_confirm_new_queue_) +// { +// if (node->get_change_status()->need_log()) { +// need_log = true; +// break; +// } +// } +// } +// if (false == need_log) { +// if (!readable_tablets_list_.is_empty()) { +// need_log = true; +// } else if (removing_old_set_->get_change_status()->need_log()) { +// need_log = true; +// } else if (OB_NOT_NULL(changing_new_set_)) { +// if (changing_new_set_->get_change_status()->need_log()) { +// need_log = true; +// } +// } +// } +// DUP_TABLE_LOG(DEBUG, "need log tablet", K(need_log)); +// return need_log; +// } + +void ObLSDupTabletsMgr::print_tablet_diag_info_log(bool is_master) +{ + SpinRLockGuard guard(dup_tablets_lock_); + int ret = OB_SUCCESS; + + const uint64_t TABLET_PRINT_BUF_LEN = + DupTableDiagStd::DUP_DIAG_INFO_LOG_BUF_LEN[DupTableDiagStd::TypeIndex::TABLET_INDEX]; + + const int64_t tenant_id = MTL_ID(); + const ObLSID ls_id = ls_id_; + + if (OB_ISNULL(tablet_diag_info_log_buf_)) { + if (OB_ISNULL(tablet_diag_info_log_buf_ = + static_cast(ob_malloc(TABLET_PRINT_BUF_LEN, "DupTableDiag")))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + _DUP_TABLE_LOG(WARN, "%salloc tablet diag info buf failed, ret=%d, ls_id=%lu", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + } + } + + if (OB_SUCC(ret)) { + int64_t tablet_diag_pos = 0; + // new tablet print + if (OB_SUCC(ret)) { + + if (OB_SUCC(ret) && OB_NOT_NULL(changing_new_set_)) { + if (OB_FAIL(::oceanbase::common::databuff_printf( + tablet_diag_info_log_buf_, TABLET_PRINT_BUF_LEN, tablet_diag_pos, + "\n%s[%sNew Dup Tablet Set - Changing] unique_id = %lu, tablet_count = %lu", + DupTableDiagStd::DUP_DIAG_INDENT_SPACE, DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, + changing_new_set_->get_common_header().get_unique_id(), + changing_new_set_->size()))) { + _DUP_TABLE_LOG(WARN, "%sprint changing tablet list header failed, ret=%d, ls_id=%lu", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + } else { + DiagInfoGenerator diag_info_gen(tablet_diag_info_log_buf_, TABLET_PRINT_BUF_LEN, + tablet_diag_pos, + changing_new_set_->get_common_header().get_unique_id()); + if (OB_FAIL(hash_for_each_update(*changing_new_set_, diag_info_gen))) { + _DUP_TABLE_LOG(WARN, "%sprint changing tablet list failed, ret=%d, ls_id=%lu", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + } else { + tablet_diag_pos = diag_info_gen.get_buf_pos(); + } + } + } + + if (OB_SUCC(ret) && !need_confirm_new_queue_.is_empty()) { + DLIST_FOREACH_X(need_confirm_set, need_confirm_new_queue_, OB_SUCC(ret)) + { + if (OB_FAIL(::oceanbase::common::databuff_printf( + tablet_diag_info_log_buf_, TABLET_PRINT_BUF_LEN, tablet_diag_pos, + "\n%s[%sNew Dup Tablet Set - NeedConfirm] unique_id = %lu, tablet_count = %lu, " + "flag " + "= %s, change_scn = %s, readable_version = %s, trx_ref_ = %lu", + DupTableDiagStd::DUP_DIAG_INDENT_SPACE, DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, + need_confirm_set->get_common_header().get_unique_id(), need_confirm_set->size(), + get_dup_tablet_flag_str(need_confirm_set->get_change_status()->flag_), + to_cstring(need_confirm_set->get_change_status()->tablet_change_scn_), + to_cstring(need_confirm_set->get_change_status()->readable_version_), + need_confirm_set->get_change_status()->trx_ref_))) { + + _DUP_TABLE_LOG(WARN, + "%sprint need confirm tablet list header failed, ret=%d, ls_id=%lu", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + } else { + DiagInfoGenerator diag_info_gen(tablet_diag_info_log_buf_, TABLET_PRINT_BUF_LEN, + tablet_diag_pos, + need_confirm_set->get_common_header().get_unique_id()); + if (OB_FAIL(hash_for_each_update(*need_confirm_set, diag_info_gen))) { + _DUP_TABLE_LOG(WARN, "%sprint need confirm tablet list failed, ret=%d, ls_id=%lu", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + } else { + tablet_diag_pos = diag_info_gen.get_buf_pos(); + } + } + } + } + } + // } else { + // // for (int i = 0; i < MAX_NEW_DUP_TABLET_SET_CNT && OB_SUCC(ret); i++) { + // DLIST_FOREACH(need_confirm_set, need_confirm_new_queue_) { + // if (need_confirm_set->size() > 0) { + // + // if (OB_FAIL(::oceanbase::common::databuff_printf( + // tablet_diag_info_log_buf_, TABLET_PRINT_BUF_LEN, tablet_diag_pos, + // "\n%s[%sNew Dup Tablet Set - Replay] unique_id = %lu, tablet_count = %lu, " + // "flag " + // "= %s, change_scn = %s, readable_version = %s, trx_ref_ = %lu", + // DupTableDiagStd::DUP_DIAG_INDENT_SPACE, + // DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, + // need_confirm_set->get_common_header()->unique_id_, + // new_tablets_array_[i].tablet_change_map_.size(), + // get_dup_tablet_flag_str(new_tablets_array_[i].change_status_.flag_), + // to_cstring(new_tablets_array_[i].change_status_.tablet_change_scn_), + // to_cstring(new_tablets_array_[i].change_status_.readable_version_), + // new_tablets_array_[i].change_status_.trx_ref_))) { + // + // _DUP_TABLE_LOG(WARN, + // "%sprint replay new tablet list header failed, ret=%d, ls_id=%lu", + // DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + // } else { + // DiagInfoGenerator diag_info_gen( + // tablet_diag_info_log_buf_, TABLET_PRINT_BUF_LEN, tablet_diag_pos, + // new_tablets_array_[i].tablet_change_map_.get_common_header().unique_id_); + // if (OB_FAIL(hash_for_each_update(new_tablets_array_[i].tablet_change_map_, + // diag_info_gen))) { + // _DUP_TABLE_LOG(WARN, "%sprint replay new tablet list failed, ret=%d, ls_id=%lu", + // DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + // } else { + // tablet_diag_pos = diag_info_gen.get_buf_pos(); + // } + // } + // } + // } + // } + // + // old tablet print + if (OB_SUCC(ret) && OB_NOT_NULL(removing_old_set_) && removing_old_set_->size() > 0) { + + if (OB_FAIL(::oceanbase::common::databuff_printf( + tablet_diag_info_log_buf_, TABLET_PRINT_BUF_LEN, tablet_diag_pos, + "\n%s[%sOld Dup Tablet Set] unique_id = %lu, tablet_count = %lu, " + "flag " + "= %s, change_scn = %s, readable_version = %s, trx_ref_ = %lu", + DupTableDiagStd::DUP_DIAG_INDENT_SPACE, DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, + removing_old_set_->get_common_header().get_unique_id(), removing_old_set_->size(), + get_dup_tablet_flag_str(removing_old_set_->get_change_status()->flag_), + to_cstring(removing_old_set_->get_change_status()->tablet_change_scn_), + to_cstring(removing_old_set_->get_change_status()->readable_version_), + removing_old_set_->get_change_status()->trx_ref_))) { + + _DUP_TABLE_LOG(WARN, "%sprint need confirm tablet list header failed, ret=%d, ls_id=%lu", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + } else { + DiagInfoGenerator diag_info_gen(tablet_diag_info_log_buf_, TABLET_PRINT_BUF_LEN, + tablet_diag_pos, + removing_old_set_->get_common_header().get_unique_id()); + if (OB_FAIL(hash_for_each_update(*removing_old_set_, diag_info_gen))) { + _DUP_TABLE_LOG(WARN, "%sprint need confirm tablet list failed, ret=%d, ls_id=%lu", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + } else { + tablet_diag_pos = diag_info_gen.get_buf_pos(); + } + } + } + + // readable tablet print + if (OB_SUCC(ret) && !readable_tablets_list_.is_empty()) { + DLIST_FOREACH(readable_set_ptr, readable_tablets_list_) + { + if (OB_FAIL(::oceanbase::common::databuff_printf( + tablet_diag_info_log_buf_, TABLET_PRINT_BUF_LEN, tablet_diag_pos, + "\n%s[%sReadable Dup Tablet Set] unique_id = %lu, tablet_count = %lu", + DupTableDiagStd::DUP_DIAG_INDENT_SPACE, DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, + readable_set_ptr->get_common_header().get_unique_id(), readable_set_ptr->size()))) { + + _DUP_TABLE_LOG(WARN, "%sprint readable tablet list header failed, ret=%d, ls_id=%lu", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + } else { + DiagInfoGenerator diag_info_gen(tablet_diag_info_log_buf_, TABLET_PRINT_BUF_LEN, + tablet_diag_pos, + readable_set_ptr->get_common_header().get_unique_id()); + if (OB_FAIL(hash_for_each_update(*readable_set_ptr, diag_info_gen))) { + _DUP_TABLE_LOG(WARN, "%sprint readable tablet list failed, ret=%d, ls_id=%lu", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + } else { + tablet_diag_pos = diag_info_gen.get_buf_pos(); + } + } + } + } + + tablet_diag_info_log_buf_[MIN(tablet_diag_pos, TABLET_PRINT_BUF_LEN - 1)] = '\0'; + _DUP_TABLE_LOG(INFO, + "[%sDup Tablet Info] tenant: %lu, ls: %lu, is_master: %s, " + "need_confirm_new_set_count: %u, readable_set_count: %u %s", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, tenant_id, ls_id.id(), + to_cstring(is_master), need_confirm_new_queue_.get_size(), + readable_tablets_list_.get_size(), tablet_diag_info_log_buf_); + } +} + +int ObLSDupTabletsMgr::lose_dup_tablet_(const common::ObTabletID &tablet_id) +{ + int ret = OB_SUCCESS; + + /** + * no need update schema ts + * + * HASH_NOT_EXIST: no need move + * In old : no need move + * In readable: move into old + * In changing_new: remove from new set + * */ + DupTabletInfo tmp_info; + + DupTabletCommonHeader changing_new_header; + changing_new_header.set_invalid_unique_id(); + changing_new_header.set_new(); + DupTabletChangeMap *changing_new_map = nullptr; + if (OB_FAIL(get_target_tablet_set_(changing_new_header, changing_new_map))) { + DUP_TABLE_LOG(WARN, "get changing new set failed", K(ret)); + if (ret == OB_EAGAIN) { + ret = OB_SUCCESS; + } + } else if (OB_ISNULL(changing_new_map)) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected changing new map", K(ret)); + } else if (OB_SUCC(changing_new_map->get_refactored(tablet_id, tmp_info))) { + if (OB_FAIL(changing_new_map->erase_refactored(tablet_id))) { + DUP_TABLE_LOG(WARN, "remove from changing_new_set_ failed", K(ret), K(tablet_id)); + } + } else if (ret != OB_HASH_NOT_EXIST) { + DUP_TABLE_LOG(WARN, "get dup table status from new_tablets_ failed", K(ret)); + } else if (ret == OB_HASH_NOT_EXIST) { + ret = OB_SUCCESS; + } + + DupTabletCommonHeader old_set_header; + old_set_header.set_invalid_unique_id(); + old_set_header.set_old(); + DupTabletChangeMap *old_tablet_set = nullptr; + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(get_target_tablet_set_(old_set_header, old_tablet_set))) { + DUP_TABLE_LOG(WARN, "get old tablets set failed", K(ret)); + } else if (!old_tablet_set->get_change_status()->is_modifiable()) { + ret = OB_SUCCESS; + DUP_TABLE_LOG(DEBUG, "old tablet set can not be modified", K(ret), K(tablet_id), + KPC(old_tablet_set)); + } else { + DLIST_FOREACH(readable_set_ptr, readable_tablets_list_) + { + if (OB_SUCC(readable_set_ptr->get_refactored(tablet_id, tmp_info))) { + if (OB_FAIL(old_tablet_set->set_refactored(tablet_id, tmp_info))) { + DUP_TABLE_LOG(WARN, "insert into old tablet set failed", K(ret)); + } else if (OB_FAIL(readable_set_ptr->erase_refactored(tablet_id))) { + DUP_TABLE_LOG(WARN, "remove from readable tablet set failed", K(ret)); + } + } else if (ret != OB_HASH_NOT_EXIST) { + DUP_TABLE_LOG(WARN, "get dup table status from readable_tablets_ failed", K(ret)); + } else if (ret == OB_HASH_NOT_EXIST) { + ret = OB_SUCCESS; + } + } + } + if (OB_FAIL(ret)) { + DUP_TABLE_LOG(WARN, "lose dup tablet failed", K(ret), K(tablet_id), KPC(changing_new_set_), + KPC(removing_old_set_), K(readable_tablets_list_.get_size())); + } + return ret; +} + +int ObLSDupTabletsMgr::discover_dup_tablet_(const common::ObTabletID &tablet_id, + const int64_t update_ts) +{ + int ret = OB_SUCCESS; + + DupTabletInfo tmp_status; + int64_t confirming_tablet_cnt = 0; + bool contain_confirming_special_op = false; + + // search new dup tablet in new, readable, old + ret = OB_HASH_NOT_EXIST; + DLIST_FOREACH_X(need_confirm_new_set, need_confirm_new_queue_, OB_HASH_NOT_EXIST == ret) + { + confirming_tablet_cnt += need_confirm_new_set->size(); + if (!need_confirm_new_set->get_common_header().no_specail_op()) { + contain_confirming_special_op = true; + } + if (OB_SUCC(need_confirm_new_set->get_refactored(tablet_id, tmp_status))) { + tmp_status.update_dup_schema_ts_ = update_ts; + if (OB_FAIL(need_confirm_new_set->set_refactored(tablet_id, tmp_status, 1))) { + DUP_TABLE_LOG(WARN, "update new_tablet ts failed", K(ret)); + } + } else if (OB_HASH_NOT_EXIST != ret) { + DUP_TABLE_LOG(WARN, "get from need_confirm_new_set failed", K(ret), + KPC(need_confirm_new_set)); + } + } + + if (OB_HASH_NOT_EXIST != ret) { + if (OB_SUCC(ret)) { + DUP_TABLE_LOG(DEBUG, "tablet has existed in new_tablets_", K(ret), K(tablet_id), + K(update_ts)); + } + } else { + DLIST_FOREACH_X(readable_set_ptr, readable_tablets_list_, OB_HASH_NOT_EXIST == ret) + { + if (OB_SUCC(readable_set_ptr->get_refactored(tablet_id, tmp_status))) { + tmp_status.update_dup_schema_ts_ = update_ts; + if (OB_FAIL(readable_set_ptr->set_refactored(tablet_id, tmp_status, 1))) { + DUP_TABLE_LOG(WARN, "update readable_tablet ts failed", K(ret)); + } + // DUP_TABLE_LOG(INFO, "tablet has existed in readable_tablets_", K(ret), K(tablet_id), + // K(update_ts)); + } else if (ret != OB_HASH_NOT_EXIST) { + DUP_TABLE_LOG(WARN, "get dup table status from readable_tablets_ failed", K(ret), + KPC(readable_set_ptr)); + } + } + } + + DupTabletCommonHeader old_set_header; + old_set_header.set_invalid_unique_id(); + old_set_header.set_old(); + DupTabletChangeMap *old_tablets_ptr = nullptr; + if (OB_HASH_NOT_EXIST != ret) { + // do nothing + if (OB_SUCC(ret)) { + DUP_TABLE_LOG(DEBUG, "tablet has existed in new or readable tablets", K(ret), K(tablet_id), + K(update_ts)); + } + } else if (OB_FAIL(get_target_tablet_set_(old_set_header, old_tablets_ptr))) { + DUP_TABLE_LOG(WARN, "get old tablet set failed", K(ret), KPC(old_tablets_ptr)); + } else if (OB_SUCC(old_tablets_ptr->get_refactored(tablet_id, tmp_status))) { + tmp_status.update_dup_schema_ts_ = update_ts; + if (OB_FAIL(old_tablets_ptr->set_refactored(tablet_id, tmp_status, 1))) { + DUP_TABLE_LOG(WARN, "update old_tablets_ ts failed", K(ret)); + } + // DUP_TABLE_LOG(INFO, "tablet has existed in old_tablet_set", K(ret), K(tablet_id), + // K(update_ts)); + } else if (ret != OB_HASH_NOT_EXIST) { + DUP_TABLE_LOG(WARN, "get dup table status from old_tablets_set failed", K(ret)); + } + + // We can not move a tablet_id from a confirming or logging old_tablets set. + // To make it simple, we will not move a tablet_id from a temporary old tablets set right now. + if (OB_HASH_NOT_EXIST == ret) { + // search a temporary new tablets set and insert it + tmp_status.update_dup_schema_ts_ = update_ts; + DupTabletCommonHeader changing_new_set_header; + changing_new_set_header.set_invalid_unique_id(); + changing_new_set_header.set_new(); + DupTabletChangeMap *changing_new_map = nullptr; + if (OB_FAIL(get_target_tablet_set_(changing_new_set_header, changing_new_map))) { + DUP_TABLE_LOG(WARN, "get changing new set failed", K(ret), KPC(changing_new_map)); + } else if (confirming_tablet_cnt + changing_new_map->size() > MAX_CONFIRMING_TABLET_COUNT + || contain_confirming_special_op) { + DUP_TABLE_LOG( + INFO, + "Too large confirming tablet set. We will not insert new tablet into changing_new_set_.", + K(ret), K(ls_id_), K(changing_new_set_->size()), K(confirming_tablet_cnt), + K(MAX_CONFIRMING_TABLET_COUNT), K(contain_confirming_special_op)); + } else if (OB_FAIL(changing_new_map->set_refactored(tablet_id, tmp_status, 1))) { + DUP_TABLE_LOG(WARN, "insert into changing new tablets failed", K(ret)); + } + } + + DUP_TABLE_LOG(DEBUG, "finish discover dup tablet", K(ret), K(tablet_id), KPC(changing_new_set_), + K(need_confirm_new_queue_.get_size()), KPC(removing_old_set_), + K(readable_tablets_list_.get_size())); + return ret; +} + +int ObLSDupTabletsMgr::alloc_extra_free_tablet_set_() +{ + int ret = OB_SUCCESS; + + DupTabletChangeMap *tmp_map_ptr = nullptr; + if (OB_ISNULL(tmp_map_ptr = static_cast( + share::mtl_malloc(sizeof(DupTabletChangeMap), "DupTabletMap")))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + } else if (OB_FALSE_IT(extra_free_set_alloc_count_++)) { + } else if (OB_FALSE_IT(new (tmp_map_ptr) DupTabletChangeMap(RESERVED_FREE_SET_COUNT + + extra_free_set_alloc_count_))) { + } else if (OB_FAIL(tmp_map_ptr->create(1024))) { + DUP_TABLE_LOG(WARN, "create dup_tablet hash map", K(ret)); + } else if (false == (free_set_pool_.add_last(tmp_map_ptr))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "push back into free_set_pool failed", K(ret), K(free_set_pool_.get_size()), + KPC(tmp_map_ptr)); + } + DUP_TABLE_LOG(INFO, "alloc a extra free tablet set", K(ret), K(last_no_free_set_time_), + K(extra_free_set_alloc_count_), KPC(changing_new_set_), + K(free_set_pool_.get_size()), K(need_confirm_new_queue_.get_size()), + K(readable_tablets_list_.get_size())); + return ret; +} + +int ObLSDupTabletsMgr::get_free_tablet_set(DupTabletChangeMap *&free_set, const uint64_t target_id) +{ + + int ret = OB_SUCCESS; + + const int64_t changing_new_set_count = OB_ISNULL(changing_new_set_) ? 0 : 1; + const int64_t removing_old_set_count = OB_ISNULL(removing_old_set_) ? 0 : 1; + const int64_t all_used_free_set_count = + free_set_pool_.get_size() + changing_new_set_count + removing_old_set_count + + need_confirm_new_queue_.get_size() + readable_tablets_list_.get_size(); + if (RESERVED_FREE_SET_COUNT + extra_free_set_alloc_count_ != all_used_free_set_count) { + DUP_TABLE_LOG(ERROR, "the free set may be leaked from the pool", K(ret), + K(RESERVED_FREE_SET_COUNT), K(extra_free_set_alloc_count_), + K(all_used_free_set_count), K(changing_new_set_count), K(removing_old_set_count), + K(need_confirm_new_queue_.get_size()), K(readable_tablets_list_.get_size()), + K(free_set_pool_.get_size()), KPC(removing_old_set_), KPC(changing_new_set_)); + } + + while (OB_SUCC(ret) && target_id > RESERVED_FREE_SET_COUNT + extra_free_set_alloc_count_) { + if (OB_FAIL(alloc_extra_free_tablet_set_())) { + DUP_TABLE_LOG(WARN, "alloc extra free tablet set failed", K(ret)); + } + } + + if (OB_SUCC(ret) && free_set_pool_.is_empty()) { + if (last_no_free_set_time_ < 0) { + last_no_free_set_time_ = ObTimeUtility::fast_current_time(); + } + if (extra_free_set_alloc_count_ < MAX_FREE_SET_COUNT - RESERVED_FREE_SET_COUNT + || ObTimeUtility::fast_current_time() - last_no_free_set_time_ >= 3 * 1000 * 1000) { + if (OB_FAIL(alloc_extra_free_tablet_set_())) { + DUP_TABLE_LOG(WARN, "alloc extra free tablet set failed", K(ret)); + } + } + } + + if (OB_FAIL(ret)) { + // do nothing + } else if (free_set_pool_.is_empty()) { + ret = OB_EAGAIN; + } else { + if (target_id <= 0) { + free_set = free_set_pool_.remove_first(); + last_no_free_set_time_ = -1; + } else { + DLIST_FOREACH(free_set_ptr, free_set_pool_) + { + if (free_set_ptr->get_common_header().get_unique_id() == target_id) { + free_set = free_set_ptr; + break; + } + } + if (OB_ISNULL(free_set)) { + ret = OB_ENTRY_NOT_EXIST; + DUP_TABLE_LOG(WARN, "no free set in free_set_pool_", K(ret), KPC(free_set), K(target_id), + K(free_set_pool_.get_size())); + } else if (OB_ISNULL(free_set_pool_.remove(free_set))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "remove free set from pool failed", K(ret), KPC(free_set)); + } + } + + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_ISNULL(free_set->get_change_status())) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(ERROR, "find a readable set in free_set_pool_", K(ret), KPC(free_set)); + } else { + free_set->get_change_status()->set_temporary(); + } + } + + // DUP_TABLE_LOG(DEBUG, "get a free set from pool", K(ret), K(free_set_pool_.get_size()), + // KPC(free_set), K(lbt())); + return ret; +} + +int ObLSDupTabletsMgr::get_target_tablet_set_(const DupTabletCommonHeader &target_common_header, + DupTabletChangeMap *&target_set, + const bool construct_target_set, + const bool need_changing_new_set) +{ + int ret = OB_SUCCESS; + const uint64_t unique_id = target_common_header.get_unique_id(); + + if (target_set != nullptr) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid argument", K(ret), K(target_common_header), KPC(target_set)); + // } else if (need_changing_new_set) { + // if (OB_ISNULL(changing_new_set_)) { + // if (OB_FAIL(get_free_tablet_set(changing_new_set_))) { + // DUP_TABLE_LOG(WARN, "get free tablet set failed", K(ret)); + // } + // } + // + // if (OB_SUCC(ret)) { + // target_set = changing_new_set_ + // } + } else if (unique_id == DupTabletCommonHeader::INVALID_UNIQUE_ID) { + if (target_common_header.is_free()) { + if (OB_FAIL(get_free_tablet_set(target_set, unique_id))) { + DUP_TABLE_LOG(WARN, "get free tablet set failed", K(ret)); + } + } else if (target_common_header.is_old_set()) { + + if (OB_NOT_NULL(removing_old_set_)) { + target_set = removing_old_set_; + } else { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected null old_tablets_set", K(ret), KPC(removing_old_set_)); + } + } else if (target_common_header.is_new_set()) { + + if (OB_ISNULL(changing_new_set_)) { + if (OB_FAIL(get_free_tablet_set(changing_new_set_))) { + DUP_TABLE_LOG(WARN, "get free tablet set failed", K(ret)); + } else { + + changing_new_set_->get_common_header().set_new(); + } + } + + if (OB_SUCC(ret)) { + target_set = changing_new_set_; + } + } else { + ret = OB_ENTRY_NOT_EXIST; + DUP_TABLE_LOG(WARN, "invalid unique_id with readable set", K(ret), K(target_common_header)); + } + } else { + if (target_common_header.is_readable_set()) { + DLIST_FOREACH(readable_set_ptr, readable_tablets_list_) + { + if (readable_set_ptr->get_common_header().get_unique_id() + == target_common_header.get_unique_id()) { + target_set = readable_set_ptr; + break; + } + } + + } else if (target_common_header.is_new_set()) { + if (OB_NOT_NULL(changing_new_set_) + && changing_new_set_->get_common_header().get_unique_id() + == target_common_header.get_unique_id()) { + target_set = changing_new_set_; + } else { + // DUP_TABLE_LOG(INFO, "111 get need confirm tablet set", K(target_common_header)); + DLIST_FOREACH(new_set_ptr, need_confirm_new_queue_) + { + // DUP_TABLE_LOG(INFO, "222 get need confirm tablet set", + // K(target_common_header),KPC(new_set_ptr)); + if (new_set_ptr->get_common_header().get_unique_id() + == target_common_header.get_unique_id()) { + target_set = new_set_ptr; + break; + } + } + } + // DUP_TABLE_LOG(INFO, "333 get need confirm tablet set", + // K(target_common_header),KPC(target_set)); + } else if (target_common_header.is_old_set()) { + + if (OB_NOT_NULL(removing_old_set_) + && removing_old_set_->get_common_header().get_unique_id() + == target_common_header.get_unique_id()) { + target_set = removing_old_set_; + } else { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected null old_tablets_set", K(ret), KPC(removing_old_set_)); + } + } + + // DUP_TABLE_LOG(INFO, "444 get need confirm tablet set", + // K(target_common_header),K(target_common_header.is_new_set()),K(target_common_header.is_old_set()),K(target_common_header.is_readable_set())); + if (OB_SUCC(ret) && OB_ISNULL(target_set) && !target_common_header.is_old_set()) { + if (construct_target_set) { + if (OB_FAIL(get_free_tablet_set(target_set, target_common_header.get_unique_id()))) { + DUP_TABLE_LOG(WARN, "get free tablet set failed", K(ret), KPC(target_set), + K(target_common_header), K(need_confirm_new_queue_.get_size()), + K(readable_tablets_list_.get_size()), KPC(removing_old_set_)); + } + } else { + ret = OB_ENTRY_NOT_EXIST; + DUP_TABLE_LOG(WARN, "no tartget tablet set", K(ret), K(target_common_header), + KPC(target_set), K(construct_target_set), KPC(changing_new_set_), + KPC(removing_old_set_), K(need_confirm_new_queue_.get_size())); + } + } + } + + if (OB_NOT_NULL(target_set) && OB_SUCC(ret)) { + if (target_set->get_common_header().is_readable_set() + && OB_NOT_NULL(target_set->get_change_status())) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(ERROR, "unexpected readbale tablet set with valid change status", K(ret), + KPC(target_set)); + target_set = nullptr; + } else if (!target_set->get_common_header().is_readable_set() + && OB_ISNULL(target_set->get_change_status())) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(ERROR, "unexpected new/old tablet set with invalid change status", K(ret), + KPC(target_set)); + } + } + + return ret; +} + +int ObLSDupTabletsMgr::return_tablet_set(DupTabletChangeMap *need_free_set) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(need_free_set)) { + + } else if (need_free_set->get_common_header().is_old_set()) { + need_free_set->reuse(); + need_free_set->get_common_header().set_old(); + } else { + if (!need_free_set->get_common_header().no_specail_op()) { + if (OB_FAIL( + op_arg_map_.erase_refactored(need_free_set->get_common_header().get_unique_id()))) { + DUP_TABLE_LOG(WARN, "remove from op_arg_map failed", K(ret), KPC(need_free_set)); + } + } + if (OB_FAIL(ret)) { + } else { + need_free_set->reuse(); + if (free_set_pool_.add_last(need_free_set) == false) { + ret = OB_ERR_UNEXPECTED; + } + if (need_free_set == changing_new_set_) { + changing_new_set_ = nullptr; + } + } + } + + return ret; +} + +int ObLSDupTabletsMgr::clean_readable_tablets_(const share::SCN &min_reserve_tablet_scn) +{ + int ret = OB_SUCCESS; + + if (!min_reserve_tablet_scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid argument", K(ret), K(ls_id_), K(min_reserve_tablet_scn)); + } else { + + DUP_TABLE_LOG(INFO, "try to clean durable readable tablets", K(ret), K(ls_id_), + K(min_reserve_tablet_scn), K(readable_tablets_list_.get_size())); + + DLIST_FOREACH_REMOVESAFE(readable_set, readable_tablets_list_) + { + DUP_TABLE_LOG(INFO, "try to clean one durable tablet set", K(ret), K(min_reserve_tablet_scn), + KPC(readable_set)); + if (!readable_set->need_reserve(min_reserve_tablet_scn)) { + if (OB_ISNULL(readable_tablets_list_.remove(readable_set))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "remove need_confirm_set failed", K(ret), KPC(readable_set)); + } else if (OB_FAIL(return_tablet_set(readable_set))) { + DUP_TABLE_LOG(WARN, "free need_confirm_set failed", K(ret), KPC(readable_set), + K(readable_tablets_list_.get_size()), K(free_set_pool_.get_size())); + } + } + } + } + + return ret; +} + +int ObLSDupTabletsMgr::clean_durable_confirming_tablets_(const share::SCN &min_reserve_tablet_scn) +{ + int ret = OB_SUCCESS; + + if (!min_reserve_tablet_scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid argument", K(ret), K(ls_id_), K(min_reserve_tablet_scn)); + } else { + + DUP_TABLE_LOG(INFO, "try to clean durable confirming tablets", K(ret), K(ls_id_), + K(min_reserve_tablet_scn), K(need_confirm_new_queue_.get_size())); + + DLIST_FOREACH_REMOVESAFE(need_confirm_set, need_confirm_new_queue_) + { + DUP_TABLE_LOG(INFO, "try to clean one durable tablet set", K(ret), K(min_reserve_tablet_scn), + KPC(need_confirm_set)); + if (!need_confirm_set->need_reserve(min_reserve_tablet_scn)) { + if (OB_ISNULL(need_confirm_new_queue_.remove(need_confirm_set))) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "remove need_confirm_set failed", K(ret), KPC(need_confirm_set)); + } else if (OB_FAIL(return_tablet_set(need_confirm_set))) { + DUP_TABLE_LOG(WARN, "free need_confirm_set failed", K(ret), KPC(need_confirm_set), + K(need_confirm_new_queue_.get_size()), K(free_set_pool_.get_size())); + } + } + } + } + return ret; +} + +int ObLSDupTabletsMgr::clean_unlog_tablets_() +{ + int ret = OB_SUCCESS; + + if (OB_SUCC(ret) && OB_NOT_NULL(changing_new_set_)) { + DUP_TABLE_LOG(INFO, "try to clean one unlog tablet set", K(ret), KPC(changing_new_set_)); + if (OB_FAIL(return_tablet_set(changing_new_set_))) { + DUP_TABLE_LOG(WARN, "free changing_new_set_ failed", K(ret), KPC(changing_new_set_), + K(free_set_pool_.get_size())); + } else { + changing_new_set_ = nullptr; + } + } + + if (OB_SUCC(ret) && OB_NOT_NULL(removing_old_set_) + && removing_old_set_->get_change_status()->is_unlog()) { + DUP_TABLE_LOG(INFO, "try to clean one unlog tablet set", K(ret), KPC(removing_old_set_)); + if (OB_FAIL(return_tablet_set(removing_old_set_))) { + DUP_TABLE_LOG(WARN, "free removing_old_set_ failed", K(ret), KPC(removing_old_set_), + K(free_set_pool_.get_size())); + } else { + } + } + + if (OB_SUCC(ret)) { + DLIST_FOREACH_REMOVESAFE(need_confirm_set, need_confirm_new_queue_) + { + if (need_confirm_set->get_change_status()->is_unlog()) { + DUP_TABLE_LOG(INFO, "try to clean one unlog tablet set", K(ret), KPC(need_confirm_set)); + + if (nullptr == need_confirm_new_queue_.remove(need_confirm_set)) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "remove need_confirm_set failed", K(ret), KPC(need_confirm_set)); + } else if (OB_FAIL(return_tablet_set(need_confirm_set))) { + DUP_TABLE_LOG(WARN, "free need_confirm_set failed", K(ret), KPC(need_confirm_set), + K(need_confirm_new_queue_.get_size()), K(free_set_pool_.get_size())); + } + } + } + } + return ret; +} + +int ObLSDupTabletsMgr::construct_empty_block_confirm_task_(const int64_t trx_ref) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + + DupTabletCommonHeader empty_new_common_header; + empty_new_common_header.set_invalid_unique_id(); + empty_new_common_header.set_free(); + DupTabletChangeMap *block_confirm_task = nullptr; + DupTabletSpecialOpArg tmp_op; + + if (trx_ref < 0) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret)); + } else if (OB_FAIL(get_target_tablet_set_(empty_new_common_header, block_confirm_task))) { + DUP_TABLE_LOG(WARN, "get free_set as empty_new_set", K(ret), KPC(block_confirm_task), + K(free_set_pool_.get_size())); + } else { + block_confirm_task->get_common_header().set_new(); + block_confirm_task->get_common_header().set_op_of_block_confirming(); + block_confirm_task->get_change_status()->trx_ref_ = trx_ref; // TODO + // set empty tablet_set as a normal tablet_set which has submit log failed + if (OB_FAIL(block_confirm_task->get_change_status()->prepare_serialize())) { + DUP_TABLE_LOG(WARN, "prepare serialize for block_confirm_task failed", K(ret)); + } else if (false == need_confirm_new_queue_.add_last(block_confirm_task)) { + DUP_TABLE_LOG(WARN, "insert into need_confirm_new_queue_ failed", K(ret), + KPC(block_confirm_task), K(need_confirm_new_queue_.get_size())); + } else if (OB_FAIL(op_arg_map_.set_refactored( + block_confirm_task->get_common_header().get_unique_id(), tmp_op))) { + DUP_TABLE_LOG(WARN, "insert into special op map failed", K(ret), KPC(block_confirm_task), + K(tmp_op)); + } + } + + if (OB_FAIL(ret)) { + if (OB_NOT_NULL(block_confirm_task)) { + if (nullptr == need_confirm_new_queue_.remove(block_confirm_task)) { + // may be error before insert into need_confirm_new_queue_ + DUP_TABLE_LOG(WARN, "remove block_confirm_task failed, it may not have been inserted", + K(ret), KPC(block_confirm_task)); + } + + if (OB_TMP_FAIL(return_tablet_set(block_confirm_task))) { + DUP_TABLE_LOG(WARN, "return block_confirm_task failed", K(ret), KPC(block_confirm_task)); + } + } + + if (OB_TMP_FAIL(op_arg_map_.erase_refactored( + block_confirm_task->get_common_header().get_unique_id()))) { + DUP_TABLE_LOG(WARN, "erase block_confirm_op from op_arg_map", K(ret), + KPC(block_confirm_task)); + } + } else { + DUP_TABLE_LOG(INFO, "construct empty block confirming set task successfully", K(ret), + KPC(block_confirm_task), K(tmp_op)); + } + + return ret; +} + +int ObLSDupTabletsMgr::search_special_op_(uint64_t special_op_type) +{ + int ret = OB_SUCCESS; + + return ret; +} + +int ObLSDupTabletsMgr::construct_clean_confirming_set_task_() +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + + DupTabletCommonHeader clean_confirming_common_header; + clean_confirming_common_header.set_invalid_unique_id(); + clean_confirming_common_header.set_free(); + DupTabletChangeMap *clean_confirming_task = nullptr; + DupTabletSpecialOpArg tmp_op; + + if (OB_FAIL(get_target_tablet_set_(clean_confirming_common_header, clean_confirming_task))) { + DUP_TABLE_LOG(WARN, "get free_set as empty_new_set", K(ret), KPC(clean_confirming_task), + K(free_set_pool_.get_size())); + } else { + clean_confirming_task->get_common_header().set_new(); + clean_confirming_task->get_common_header().set_op_of_clean_data_confirming_set(); + // set empty tablet_set as a normal tablet_set which has submit log failed + if (OB_FAIL(clean_confirming_task->get_change_status()->prepare_serialize())) { + DUP_TABLE_LOG(WARN, "prepare serialize for empty_new_set failed", K(ret)); + } else if (false == need_confirm_new_queue_.add_last(clean_confirming_task)) { + DUP_TABLE_LOG(WARN, "insert into need_confirm_new_queue_ failed", K(ret), + KPC(clean_confirming_task), K(need_confirm_new_queue_.get_size())); + } else { + DLIST_FOREACH(need_confirm_ptr, need_confirm_new_queue_) + { + if (need_confirm_ptr == clean_confirming_task) { + // do nothing + // } else if + // (OB_FAIL(tmp_op.op_objects_.push_back(need_confirm_ptr->get_common_header()))) { + // DUP_TABLE_LOG(WARN, "push back into special op arg failed", K(ret), + // KPC(need_confirm_ptr)); + } else { + need_confirm_ptr->get_change_status()->set_confirm_invalid(); + } + } + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(op_arg_map_.set_refactored( + clean_confirming_task->get_common_header().get_unique_id(), tmp_op))) { + DUP_TABLE_LOG(WARN, "insert into special op map failed", K(ret), KPC(clean_confirming_task), + K(tmp_op)); + } + } + } + + if (OB_FAIL(ret)) { + if (OB_NOT_NULL(clean_confirming_task)) { + if (nullptr == need_confirm_new_queue_.remove(clean_confirming_task)) { + // may be error before insert into need_confirm_new_queue_ + DUP_TABLE_LOG(WARN, "remove clean_confirming_task failed, it may not have been inserted", + K(ret), KPC(clean_confirming_task)); + } + + if (OB_TMP_FAIL(return_tablet_set(clean_confirming_task))) { + DUP_TABLE_LOG(WARN, "return clean_confirming_task failed", K(ret), + KPC(clean_confirming_task)); + } + } + + if (OB_TMP_FAIL(op_arg_map_.erase_refactored( + clean_confirming_task->get_common_header().get_unique_id()))) { + DUP_TABLE_LOG(WARN, "erase clean_confirming_op from op_arg_map", K(ret), + KPC(clean_confirming_task)); + } + } else { + DUP_TABLE_LOG(INFO, "construct clean data confirming set task successfully", K(ret), + KPC(clean_confirming_task), K(tmp_op)); + } + + return ret; +} + +int ObLSDupTabletsMgr::construct_clean_all_readable_set_task_() +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + + DupTabletCommonHeader clean_readable_common_header; + clean_readable_common_header.set_invalid_unique_id(); + clean_readable_common_header.set_free(); + DupTabletChangeMap *clean_readable_task = nullptr; + DupTabletSpecialOpArg tmp_op; + + if (OB_FAIL(get_target_tablet_set_(clean_readable_common_header, clean_readable_task))) { + DUP_TABLE_LOG(WARN, "get free_set as empty_new_set", K(ret), KPC(clean_readable_task), + K(free_set_pool_.get_size())); + } else { + clean_readable_task->get_common_header().set_new(); + clean_readable_task->get_common_header().set_op_of_clean_all_readable_set(); + // set empty tablet_set as a normal tablet_set which has submit log failed + if (OB_FAIL(clean_readable_task->get_change_status()->prepare_serialize())) { + DUP_TABLE_LOG(WARN, "prepare serialize for empty_new_set failed", K(ret)); + } else if (false == need_confirm_new_queue_.add_last(clean_readable_task)) { + DUP_TABLE_LOG(WARN, "insert into need_confirm_new_queue_ failed", K(ret), + KPC(clean_readable_task), K(need_confirm_new_queue_.get_size())); + } else { + // DLIST_FOREACH(readable_ptr, readable_tablets_list_) + // { + // if (OB_FAIL(tmp_op.op_objects_.push_back(readable_ptr->get_common_header()))) { + // DUP_TABLE_LOG(WARN, "push back into special op arg failed", K(ret), + // KPC(clean_readable_task)); + // } + // } + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(op_arg_map_.set_refactored( + clean_readable_task->get_common_header().get_unique_id(), tmp_op))) { + DUP_TABLE_LOG(WARN, "insert into special op map failed", K(ret), KPC(clean_readable_task), + K(tmp_op)); + } + } + } + + if (OB_FAIL(ret)) { + if (OB_NOT_NULL(clean_readable_task)) { + if (nullptr == need_confirm_new_queue_.remove(clean_readable_task)) { + // may be error before insert into need_confirm_new_queue_ + DUP_TABLE_LOG(WARN, "remove clean_readable_task failed, it may not have been inserted", + K(ret), KPC(clean_readable_task)); + } + + if (OB_TMP_FAIL(return_tablet_set(clean_readable_task))) { + DUP_TABLE_LOG(WARN, "return clean_readable_task failed", K(ret), KPC(clean_readable_task)); + } + } + + if (OB_TMP_FAIL(op_arg_map_.erase_refactored( + clean_readable_task->get_common_header().get_unique_id()))) { + DUP_TABLE_LOG(WARN, "erase clean_readable_op from op_arg_map", K(ret), + KPC(clean_readable_task)); + } + } else { + DUP_TABLE_LOG(INFO, "construct clean all readable task successfully", K(ret), + KPC(clean_readable_task), K(tmp_op)); + } + return ret; +} + +int ObLSDupTabletsMgr::try_exec_special_op_(DupTabletChangeMap *op_tablet_set, + const share::SCN &min_reserve_tablet_scn, + const bool for_replay) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(op_tablet_set)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid argument", K(ret), KPC(op_tablet_set)); + } else if (op_tablet_set->get_common_header().no_specail_op()) { + // filter no sp op tablet set + } else if (!op_tablet_set->get_change_status()->tablet_change_scn_.is_valid() || + (op_tablet_set->get_change_status()->tablet_change_scn_.is_valid() && + min_reserve_tablet_scn > op_tablet_set->get_change_status()->tablet_change_scn_)) { + // filter those sp op set with invalid change scn or not equal to min reserve scn + // do nothing + DUP_TABLE_LOG(INFO, "not need exec sp op", K(ret), K(min_reserve_tablet_scn), KPC(op_tablet_set)); + } else if (op_tablet_set->get_common_header().need_clean_all_readable_set()) { + if (OB_FAIL(clean_readable_tablets_(min_reserve_tablet_scn))) { + DUP_TABLE_LOG(WARN, "clean readable tablets failed", K(ret), K(min_reserve_tablet_scn)); + } + // if (OB_SUCC(ret) && !readable_tablets_list_.is_empty()) { + // DLIST_FOREACH_REMOVESAFE(readable_set_ptr, readable_tablets_list_) + // { + // if (nullptr == readable_tablets_list_.remove(readable_set_ptr)) { + // ret = OB_ERR_UNEXPECTED; + // DUP_TABLE_LOG(WARN, "remove readable_set failed", K(ret), KPC(readable_set_ptr)); + // } else if (OB_FAIL(return_tablet_set(readable_set_ptr))) { + // DUP_TABLE_LOG(WARN, "free readable_set failed", K(ret), KPC(readable_set_ptr), + // K(readable_tablets_list_.get_size()), K(free_set_pool_.get_size())); + // } + // } + // } + } else if (op_tablet_set->get_common_header().need_clean_data_confirming_set()) { + // DupTabletSpecialOpArg tmp_arg; + // DUP_TABLE_LOG(WARN, "try clean unreadable tablets", K(ret), + // K(op_tablet_set->get_common_header())); + // if (OB_FAIL(op_arg_map_.get_refactored(op_tablet_set->get_common_header().get_unique_id(), + // tmp_arg))) { + // DUP_TABLE_LOG(WARN, "get sp op arg failed", K(ret), K(op_tablet_set->get_common_header())); + // // @param1 type clean_all + // } else + if (OB_FAIL(clean_durable_confirming_tablets_(min_reserve_tablet_scn))) { + DUP_TABLE_LOG(WARN, "clean unreadable tablets failed", K(ret), K(min_reserve_tablet_scn)); + } + } else if (op_tablet_set->get_common_header().need_block_confirming()) { + // do nothing + // only block confirm new tablet set before all_trx_ref has been clear + } + + return ret; +} +// tablet set virtual table interface +// all tablets virtual table interface +int ObLSDupTabletsMgr::get_tablets_stat(ObDupLSTabletsStatIterator &collect_iter, + const share::ObLSID &ls_id) +{ + int ret = OB_SUCCESS; + const ObAddr addr = GCTX.self_addr(); + const int64_t tenant_id = MTL_ID(); + const int64_t collect_ts = ObTimeUtility::current_time(); + SpinRLockGuard rlock(dup_tablets_lock_); + + // iter changing new + if (OB_NOT_NULL(changing_new_set_)) { + if (0 == changing_new_set_->size()) { + // do nothing + } else { + CollectTabletsHandler changing_new_handler( + collect_ts, ls_id, tenant_id, addr, is_master(), + changing_new_set_->get_common_header().get_unique_id(), TabletSetAttr::DATA_SYNCING, + // tablet_gc_window_, + collect_iter); + if (OB_FAIL(hash_for_each_update(*changing_new_set_, changing_new_handler))) { + DUP_TABLE_LOG(WARN, "push into iter failed", KPC(this)); + } + } + } + // iter need confirm + if (OB_SUCC(ret)) { + DLIST_FOREACH(need_confirm_set, need_confirm_new_queue_) + { + if (OB_NOT_NULL(need_confirm_set)) { + if (0 == need_confirm_set->size()) { + // do nothing + } else { + CollectTabletsHandler changing_new_handler( + collect_ts, ls_id, tenant_id, addr, is_master(), + need_confirm_set->get_common_header().get_unique_id(), TabletSetAttr::DATA_SYNCING, + // tablet_gc_window_, + collect_iter); + if (OB_FAIL(hash_for_each_update(*need_confirm_set, changing_new_handler))) { + DUP_TABLE_LOG(WARN, "push into iter failed", KPC(this)); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "iter need confirm failed", K(ret), KPC(this), KP(need_confirm_set)); + } + } + } + // iter readable + if (OB_SUCC(ret)) { + DLIST_FOREACH(readable_set, readable_tablets_list_) + { + if (OB_NOT_NULL(readable_set)) { + if (0 == readable_set->size()) { + // do nothing + } else { + CollectTabletsHandler changing_new_handler( + collect_ts, ls_id, tenant_id, addr, is_master(), + readable_set->get_common_header().get_unique_id(), TabletSetAttr::READABLE, + // tablet_gc_window_, + collect_iter); + if (OB_FAIL(hash_for_each_update(*readable_set, changing_new_handler))) { + DUP_TABLE_LOG(WARN, "push into iter failed", KPC(this)); + } + } + } else { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "iter readable failed", K(ret), KPC(this), KP(readable_set)); + } + } + } + // iter old + if (OB_SUCC(ret) && OB_NOT_NULL(removing_old_set_)) { + if (0 == removing_old_set_->size()) { + // do nothing + } else { + CollectTabletsHandler changing_new_handler( + collect_ts, ls_id, tenant_id, addr, is_master(), + removing_old_set_->get_common_header().get_unique_id(), TabletSetAttr::DELETING, + // tablet_gc_window_, + collect_iter); + if (OB_FAIL(hash_for_each_update(*removing_old_set_, changing_new_handler))) { + DUP_TABLE_LOG(WARN, "push into iter failed", KPC(this)); + } + } + } + // TODO siyu: for debug + DUP_TABLE_LOG(WARN, "collect all", K(ret), KPC(this)); + return ret; +} + +int ObLSDupTabletsMgr::get_tablet_set_stat(ObDupLSTabletSetStatIterator &collect_iter, + const share::ObLSID &ls_id) +{ + int ret = OB_SUCCESS; + // iter changing new + // const ObAddr addr = GCTX.self_addr(); + const int64_t tenant_id = MTL_ID(); + SpinRLockGuard rlock(dup_tablets_lock_); + + if (OB_NOT_NULL(changing_new_set_)) { + DupTabletSetChangeStatus *tmp_status = changing_new_set_->get_change_status(); + if (OB_NOT_NULL(tmp_status)) { + // share::SCN not_used = share::SCN::min_scn(); + ObDupTableLSTabletSetStat tmp_stat; + tmp_stat.set_basic_info(tenant_id, ls_id, is_master()); + + tmp_stat.set_unique_id(changing_new_set_->get_common_header().get_unique_id()); + tmp_stat.set_attr(TabletSetAttr::DATA_SYNCING); + // set state, trx_ref, change_scn, need_confirm_scn and readable_scn + tmp_stat.set_from_change_status(tmp_status); + tmp_stat.set_count(changing_new_set_->size()); + + if (OB_FAIL(collect_iter.push(tmp_stat))) { + DUP_TABLE_LOG(WARN, "push into iter failed", K(tmp_stat)); + } + } else { + DUP_TABLE_LOG(WARN, "change status is null", KPC(this), KP(tmp_status)); + } + } + // iter need confirm + if (OB_SUCC(ret)) { + DLIST_FOREACH(need_confirm_set, need_confirm_new_queue_) + { + if (OB_NOT_NULL(need_confirm_set)) { + DUP_TABLE_LOG(WARN, "need confirm tablets ", KPC(need_confirm_set)); + DupTabletSetChangeStatus *tmp_status = need_confirm_set->get_change_status(); + if (OB_NOT_NULL(tmp_status)) { + ObDupTableLSTabletSetStat tmp_stat; + tmp_stat.set_basic_info(tenant_id, ls_id, is_master()); + + tmp_stat.set_unique_id(need_confirm_set->get_common_header().get_unique_id()); + tmp_stat.set_attr(TabletSetAttr::READABLE); + tmp_stat.set_from_change_status(tmp_status); + tmp_stat.set_count(need_confirm_set->size()); + + if (OB_FAIL(collect_iter.push(tmp_stat))) { + DUP_TABLE_LOG(WARN, "push into iter failed", K(tmp_stat)); + } + } else { + DUP_TABLE_LOG(WARN, "change status is null", KPC(this), KP(tmp_status)); + } + } else { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "iter need confirm failed", K(ret), KPC(this), + KP(need_confirm_set)); + } + } + } + // iter readable + if (OB_SUCC(ret)) { + DLIST_FOREACH(readable_set, readable_tablets_list_) + { + if (OB_NOT_NULL(readable_set)) { + share::SCN not_used = share::SCN::min_scn(); + ObDupTableLSTabletSetStat tmp_stat; + tmp_stat.set_basic_info(tenant_id, ls_id, is_master()); + + tmp_stat.set_unique_id(readable_set->get_common_header().get_unique_id()); + tmp_stat.set_attr(TabletSetAttr::READABLE); + tmp_stat.set_state(TabletSetState::CONFIRMED); + tmp_stat.set_from_change_status(nullptr); + tmp_stat.set_count(readable_set->size()); + + if (OB_FAIL(collect_iter.push(tmp_stat))) { + DUP_TABLE_LOG(WARN, "push into iter failed", K(tmp_stat)); + } + } else { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "iter readable failed", K(ret), KPC(this), + KP(readable_set)); + } + } + } + // iter old + if (OB_SUCC(ret) && OB_NOT_NULL(removing_old_set_)) { + share::SCN not_used = share::SCN::min_scn(); + DupTabletSetChangeStatus *tmp_status = removing_old_set_->get_change_status(); + DUP_TABLE_LOG(WARN, "old tablets ", KPC(removing_old_set_), KPC(tmp_status)); + if (OB_NOT_NULL(tmp_status)) { + ObDupTableLSTabletSetStat tmp_stat; + tmp_stat.set_basic_info(tenant_id, ls_id, is_master()); + + tmp_stat.set_unique_id(removing_old_set_->get_common_header().get_unique_id()); + tmp_stat.set_attr(TabletSetAttr::DELETING); + tmp_stat.set_from_change_status(tmp_status); + tmp_stat.set_count(removing_old_set_->size()); + + if (OB_FAIL(collect_iter.push(tmp_stat))) { + DUP_TABLE_LOG(WARN, "push into iter failed", K(tmp_stat)); + } + } else { + DUP_TABLE_LOG(WARN, "change status is null", KPC(this), KP(tmp_status)); + } + } + // TODO siyu: for debug + DUP_TABLE_LOG(WARN, "collect all", K(ret), KPC(this)); + return ret; +} + +int ObTenantDupTabletSchemaHelper::get_all_dup_tablet_set_(TabletIDSet &tablet_set) +{ + int ret = OB_SUCCESS; + ObSchemaGetterGuard schema_guard; + ObSEArray table_schemas; + if (OB_FAIL(GSCHEMASERVICE.get_tenant_schema_guard(MTL_ID(), schema_guard))) { + DUP_TABLE_LOG(WARN, "get tenant schema guard failed", K(ret)); + } else if (OB_FAIL(schema_guard.get_table_schemas_in_tenant(MTL_ID(), table_schemas))) { + DUP_TABLE_LOG(WARN, "get table schemas in tenant failed", K(ret)); + } else { + for (int64_t i = 0; OB_SUCCESS == ret && i < table_schemas.count(); i++) { + bool is_duplicated = false; + const ObSimpleTableSchemaV2 *table_schema = table_schemas.at(i); + if (OB_FAIL(table_schema->check_is_duplicated(schema_guard, is_duplicated))) { + DUP_TABLE_LOG(WARN, "check duplicate failed", K(ret)); + } else if (is_duplicated) { + ObArray tablet_id_arr; + if (OB_FAIL(table_schema->get_tablet_ids(tablet_id_arr))) { + DUP_TABLE_LOG(WARN, "get tablet ids from tablet schema failed"); + } else { + for (int j = 0; OB_SUCCESS == ret && j < tablet_id_arr.size(); j++) { + if (OB_FAIL(tablet_set.set_refactored(tablet_id_arr[j]))) { + DUP_TABLE_LOG(WARN, "insert into dup tablet set faild", K(ret)); + } + } + } + } else { + // do nothing + } + } + } + return ret; +} + +int ObTenantDupTabletSchemaHelper::refresh_and_get_tablet_set(TabletIDSet &tenant_dup_tablet_set) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(get_all_dup_tablet_set_(tenant_dup_tablet_set))) { + DUP_TABLE_LOG(WARN, "get tenant dup tablet set faild", K(ret)); + } + + DUP_TABLE_LOG(DEBUG, "get all dup tablet ids", K(tenant_dup_tablet_set.size())); + return ret; +} + +} // namespace transaction + +} // namespace oceanbase diff --git a/src/storage/tx/ob_dup_table_tablets.h b/src/storage/tx/ob_dup_table_tablets.h new file mode 100644 index 0000000000..6c9d24556c --- /dev/null +++ b/src/storage/tx/ob_dup_table_tablets.h @@ -0,0 +1,869 @@ +//Copyrigh (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OCEANBASE_DUP_TABLE_TABLETS_H +#define OCEANBASE_DUP_TABLE_TABLETS_H + +#include "lib/list/ob_dlist.h" +#include "lib/queue/ob_fixed_queue.h" +#include "common/ob_tablet_id.h" +#include "lib/lock/ob_spin_rwlock.h" +#include "lib/hash/ob_hashmap.h" +#include "lib/hash/ob_hashset.h" +#include "ob_dup_table_ts_sync.h" +#include "ob_dup_table_base.h" + +namespace oceanbase +{ + +namespace logservice +{ +class ObLogHandler; +} +namespace transaction +{ +class ObDupTableLSHandler; + +//********************************************************************** +//****** ObLSDupTabletsMgr +//********************************************************************** + +enum class DupTabletSetChangeFlag +{ + UNKNOWN = -1, + UNUSED, + TEMPORARY, + CHANGE_LOGGING, + CONFIRMING, + CONFIRMED, + +}; + +static const char *get_dup_tablet_flag_str(const DupTabletSetChangeFlag &flag) +{ + const char *flag_str = nullptr; + + switch (flag) { + case DupTabletSetChangeFlag::UNKNOWN: { + flag_str = "UNKNOWN"; + break; + } + + case DupTabletSetChangeFlag::UNUSED: { + flag_str = "UNUSED"; + break; + } + case DupTabletSetChangeFlag::TEMPORARY: { + flag_str = "TEMPORARY"; + break; + } + case DupTabletSetChangeFlag::CHANGE_LOGGING: { + flag_str = "CHANGE_LOGGING"; + break; + } + case DupTabletSetChangeFlag::CONFIRMING: { + flag_str = "CONFIRMING"; + break; + } + case DupTabletSetChangeFlag::CONFIRMED: { + flag_str = "CONFIRMED"; + break; + } + }; + + return flag_str; +} + +struct DupTabletSetChangeStatus +{ + DupTabletSetChangeFlag flag_; + share::SCN tablet_change_scn_; + share::SCN need_confirm_scn_; + share::SCN readable_version_; + int64_t trx_ref_; + + void init() + { + reset(); + flag_ = DupTabletSetChangeFlag::UNUSED; + } + + void reset() + { + flag_ = DupTabletSetChangeFlag::UNKNOWN; + tablet_change_scn_.reset(); + need_confirm_scn_.reset(); + readable_version_.set_min(); + trx_ref_ = 0; + } + + DupTabletSetChangeStatus() { reset(); } + + bool is_valid() const { return flag_ != DupTabletSetChangeFlag::UNKNOWN; } + bool need_log() const + { + return flag_ == DupTabletSetChangeFlag::TEMPORARY + || flag_ == DupTabletSetChangeFlag::CHANGE_LOGGING + || (flag_ == DupTabletSetChangeFlag::CONFIRMING && need_confirm_scn_ <= readable_version_) + || flag_ == DupTabletSetChangeFlag::CONFIRMED; + // TODO submit log if readable_version has changed. + } + + bool need_reserve(const share::SCN &min_reserve_scn) const + { + return !tablet_change_scn_.is_valid() + || (tablet_change_scn_.is_valid() && tablet_change_scn_ >= min_reserve_scn); + } + + bool is_unlog() const { return !tablet_change_scn_.is_valid(); } + bool is_free() const { return flag_ == DupTabletSetChangeFlag::UNUSED; } + bool is_modifiable() const { return flag_ == DupTabletSetChangeFlag::TEMPORARY; } + + bool is_change_logging() const { return flag_ == DupTabletSetChangeFlag::CHANGE_LOGGING; } + bool is_confirming() const { return flag_ == DupTabletSetChangeFlag::CONFIRMING; } + bool can_be_confirmed_anytime() const + { + return (trx_ref_ == 0 && readable_version_ >= need_confirm_scn_ + && flag_ == DupTabletSetChangeFlag::CONFIRMING) + || flag_ == DupTabletSetChangeFlag::CONFIRMED; + } + bool has_confirmed() const { return DupTabletSetChangeFlag::CONFIRMED == flag_; } + + void set_temporary() { flag_ = DupTabletSetChangeFlag::TEMPORARY; } + void set_confirm_invalid() { need_confirm_scn_.set_max(); } + int prepare_serialize() + { + int ret = OB_SUCCESS; + if (DupTabletSetChangeFlag::TEMPORARY == flag_) { + flag_ = DupTabletSetChangeFlag::CHANGE_LOGGING; + } + return ret; + } + + int tablet_change_log_submitted(const share::SCN &tablet_change_scn, const bool submit_result) + { + int ret = OB_SUCCESS; + if (!is_change_logging() || tablet_change_scn_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected flag after submitted", K(ret), KPC(this)); + } else if (submit_result) { + tablet_change_scn_ = tablet_change_scn; + } else { + // do nothing + } + + return ret; + } + + int prepare_confirm(const share::SCN &tablet_change_scn, const bool sync_result) + { + int ret = OB_SUCCESS; + if (!is_change_logging() || tablet_change_scn_ != tablet_change_scn) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected flag after submitted", K(ret), KPC(this)); + } else if (sync_result) { + flag_ = DupTabletSetChangeFlag::CONFIRMING; + need_confirm_scn_ = share::SCN::max(need_confirm_scn_, tablet_change_scn_); + } else if (is_change_logging()) { + tablet_change_scn_.set_invalid(); + } + DUP_TABLE_LOG(DEBUG, "finish prepare confirm", K(tablet_change_scn), K(tablet_change_scn_)); + return ret; + } + + int inc_active_tx() + { + int ret = OB_SUCCESS; + trx_ref_++; + return ret; + } + + int dec_active_tx() + { + int ret = OB_SUCCESS; + trx_ref_--; + return ret; + } + + int push_need_confirm_scn(const share::SCN &need_confirm_scn) + { + int ret = OB_SUCCESS; + + if (need_confirm_scn > need_confirm_scn_) { + need_confirm_scn_ = need_confirm_scn; + } + + return ret; + } + + int push_readable_scn(const share::SCN &readable_scn) + { + int ret = OB_SUCCESS; + + if (readable_scn > need_confirm_scn_) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected need_confirm_scn_", K(ret), KPC(this)); + } else if (readable_scn > readable_version_) { + readable_version_ = readable_scn; + } + + return ret; + } + + int try_set_confirmed(const bool can_be_confirmed) + { + int ret = OB_SUCCESS; + + if (can_be_confirmed) { + if (can_be_confirmed_anytime()) { + flag_ = DupTabletSetChangeFlag::CONFIRMED; + } else { + ret = OB_EAGAIN; + } + } + return ret; + } + + TO_STRING_KV(K(flag_), + K(tablet_change_scn_), + K(need_confirm_scn_), + K(readable_version_), + K(trx_ref_)); +}; + +struct DupTabletInfo +{ + int64_t update_dup_schema_ts_; + + void reset() { update_dup_schema_ts_ = 0; } + + DupTabletInfo() { reset(); } + + TO_STRING_KV(K(update_dup_schema_ts_)); +}; + +typedef common::hash:: + ObHashMap + DupTabletIdMap; + +// class DupTabletHashMap : public DupTabletIdMap +// { +// public: +// NEED_SERIALIZE_AND_DESERIALIZE; +// TO_STRING_KV(K(common_header_), K(size())); +// const DupTabletCommonHeader &get_common_header() { return common_header_; } +// +// int create(int64_t unique_id, int64_t bucket_num, const lib::ObLabel &bucket_label); +// void destroy() +// { +// DupTabletIdMap::destroy(); +// common_header_.reset(); +// } +// +// private: +// DupTabletCommonHeader common_header_; +// }; + +class DupTabletChangeMap : public common::ObDLinkBase, public DupTabletIdMap +{ +public: + NEED_SERIALIZE_AND_DESERIALIZE; + DupTabletChangeMap(const uint64_t set_id) : change_status_(), common_header_(set_id) { reuse(); } + + void reuse() + { + change_status_.init(); + change_status_.set_temporary(); + // common_header_.clean_sp_op(); + // common_header_.set_free(); + common_header_.reuse(); + DupTabletIdMap::clear(); + } + + void destroy() + { + reset(); + DupTabletIdMap::destroy(); + } + + int create(int64_t bucket_num); + + DupTabletSetChangeStatus *get_change_status() + { + DupTabletSetChangeStatus *change_status_ptr = nullptr; + if (common_header_.is_readable_set()) { + change_status_ptr = nullptr; + } else { + change_status_ptr = &change_status_; + } + return change_status_ptr; + } + DupTabletCommonHeader &get_common_header() { return common_header_; } + bool need_reserve(const share::SCN &scn) const + { + return change_status_.need_reserve(scn); + } + + TO_STRING_KV(K(change_status_), + K(common_header_), + K(DupTabletIdMap::size()), + K(DupTabletIdMap::created())); + +private: + DupTabletSetChangeStatus change_status_; + DupTabletCommonHeader common_header_; + // DupTabletIdMap tablet_id_map_; +}; + +class TabletsSerCallBack : public IHashSerCallBack +{ +public: + TabletsSerCallBack(char *buf, int64_t buf_len, int64_t pos) : IHashSerCallBack(buf, buf_len, pos) + {} + int operator()(const common::hash::HashMapPair &hash_pair); +}; + +class TabletsDeSerCallBack : public IHashDeSerCallBack +{ +public: + TabletsDeSerCallBack(const char *buf, + int64_t buf_len, + int64_t pos, + int64_t deser_time) + : IHashDeSerCallBack(buf, buf_len, pos), deser_time_(deser_time) + {} + int operator()(DupTabletChangeMap &dup_tablet_map); + +private: + int64_t deser_time_; +}; + +class TabletsGetSizeCallBack +{ +public: + int64_t operator()(const common::hash::HashMapPair &hash_pair); +}; + + +class DupTabletChangeLogTail +{ + OB_UNIS_VERSION(1); + +public: + DupTabletChangeLogTail() {} + DupTabletChangeLogTail(const share::SCN &readable_version, bool confirm_all) + : readable_version_(readable_version), has_confirmed_(confirm_all) + {} + bool is_valid() const; + void reset() + { + readable_version_.set_min(); + has_confirmed_ = false; + } + + share::SCN readable_version_; + bool has_confirmed_; + + TO_STRING_KV(K(readable_version_), K(has_confirmed_)); +}; + +typedef ObSEArray OpObjectArr; +class DupTabletSpecialOpArg +{ + OB_UNIS_VERSION(1); + +public: + DupTabletSpecialOpArg() {} + + void reset() { op_objects_.reset(); } + bool is_valid() { return !op_objects_.empty(); } + + TO_STRING_KV(K(op_objects_)); +public: + OpObjectArr op_objects_; +}; + +typedef common::hash::ObHashMap + SpecialOpArgMap; + +class DupTabletCommonLogBody +{ + OB_UNIS_VERSION(1); + +public: + DupTabletCommonLogBody(DupTabletChangeMap &hash_map) : tablet_id_map_(hash_map) {} + + TO_STRING_KV(K(tablet_id_map_)); + +protected: + DupTabletChangeMap &tablet_id_map_; +}; + +class DupTabletChangeLogBody : public DupTabletCommonLogBody +{ + OB_UNIS_VERSION(1); + +public: + DupTabletChangeLogBody(DupTabletChangeMap &hash_map) : DupTabletCommonLogBody(hash_map) + { + change_tail_.readable_version_ = + DupTabletCommonLogBody::tablet_id_map_.get_change_status()->readable_version_; + change_tail_.has_confirmed_ = tablet_id_map_.get_change_status()->has_confirmed(); + } + + const DupTabletChangeLogTail &get_change_tail() { return change_tail_; } + + INHERIT_TO_STRING_KV("common_log_body", DupTabletCommonLogBody, K(change_tail_)); + +private: + DupTabletChangeLogTail change_tail_; +}; + +class DupTabletSpecialOpLogBody : public DupTabletChangeLogBody +{ + OB_UNIS_VERSION(1); + +public: + DupTabletSpecialOpLogBody(DupTabletChangeMap &hash_map, DupTabletSpecialOpArg &op_arg) + : DupTabletChangeLogBody(hash_map), sp_op_arg_(op_arg) + {} + + INHERIT_TO_STRING_KV("change_log_body", DupTabletChangeLogBody, K(sp_op_arg_)); + +private: + DupTabletSpecialOpArg &sp_op_arg_; +}; + +class DupTabletLog +{ +public: + int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; + int deserialize_common_header(const char *buf, const int64_t data_len, int64_t &pos); + int deserialize_content(const char *buf, const int64_t data_len, int64_t &pos); + + int64_t get_serialize_size(); + +public: + DupTabletLog(DupTabletChangeMap *hash_map) : hash_map_(hash_map) + { + common_header_ = hash_map_->get_common_header(); + } + + DupTabletLog(DupTabletChangeLogTail change_tail, + DupTabletChangeMap *hash_map, + DupTabletSpecialOpArg *sp_op_arg = nullptr) + : hash_map_(hash_map), change_tail_(change_tail), special_op_arg_(sp_op_arg) + { + common_header_ = hash_map_->get_common_header(); + }; + + DupTabletLog() { reset(); }; + + void reset() + { + common_header_.reset(); + hash_map_ = nullptr; + change_tail_.reset(); + special_op_arg_ = nullptr; + } + + int set_hash_map_ptr(DupTabletChangeMap *hash_map_ptr, + DupTabletSpecialOpArg *special_op_arg_ = nullptr); + const DupTabletCommonHeader &get_common_header(); + const DupTabletChangeLogTail &get_change_tail(); + + TO_STRING_KV(K(common_header_), K(change_tail_), K(hash_map_)); + +private: + DupTabletCommonHeader common_header_; + DupTabletChangeMap *hash_map_; + DupTabletChangeLogTail change_tail_; + DupTabletSpecialOpArg *special_op_arg_; +}; + +// *********************************************************************************************** +// How dup tablet move between different sets when its dup attribute changed: +// 1. new set : store tablets which be discovered as a part of a dup table +// 2. old set : store tablets which has lost dup_table attribute +// 3. readable set : store tablets which can be read +// *********************************************************************************************** +// +// | +// | acquire dup attribute +// v +// discard dup attribute +------------------------------+ +// <----------------------- | new dup tablet | <+ +// +------------------------------+ | +// | | +// | confirmed by lease follower | +// v | +// +------------------------------+ | +// | readable dup tablet | | acquire dup attribute +// +------------------------------+ | +// | | +// | discard dup attribute | +// v | +// +------------------------------+ | +// | old dup tablet | -+ +// +------------------------------+ +// | +// | confirmed by lease follower +// v +// +// *********************************************************************************************** +// * How dup tablet change state when it move between dup tablet sets +// *********************************************************************************************** +// +// | +// | insert into new/old tablets +// v +// +--------------------------------------------------+ +// | TEMPORARY | +// +--------------------------------------------------+ +// | +// | mark logging but not serialize in the first log +// | set log ts after submitted log +// v +// +--------------------------------------------------+ +// | LOGGING | +// +--------------------------------------------------+ +// | +// | invoke log cb success +// v +// +--------------------------------------------------+ +// | DURABLE | +// +--------------------------------------------------+ +// | +// | confirmed replay_ts by lease follower +// | move into confirmed_new/confirmed_old tablets +// v +// +--------------------------------------------------+ +// | CONFIRMED | +// +--------------------------------------------------+ +// | +// | move into confirmed_new/confirmed_old tablets +// | serialize in the second log +// v +// +// +// +// | +// | replay or apply the second log +// v +// +// +// +// | +// | [new]move to readable/[old]remove from old +// v +// +--------------------------------------------------+ +// | READABLE | +// +--------------------------------------------------+ +// +// *********************************************************************************************** +// * If move a tablet from old to readable without confirm +// * Problem: +// * 1. Leader (A) tablet1(readable->old); submit lease log(log_ts = n); tablet1(old->new->readable); switch_to_follower +// * Follower (B) replay log n=>tablet1(readable->old); switch_to_leader +// * 2. Follower(A) tablet1(readable),replay_ts = n +// * Leaser(B) tablet1(old); confirm A replay_ts > n ; tablet1(old->delete) +// *********************************************************************************************** + +class ObLSDupTabletsMgr +{ +public: + ObLSDupTabletsMgr() + : changing_new_set_(nullptr), removing_old_set_(nullptr), tablet_diag_info_log_buf_(nullptr) + { + reset(); + } + int init(ObDupTableLSHandler *dup_ls_handle); + void destroy(); + void reset(); + + bool is_master() { return ATOMIC_LOAD(&is_master_); } + + const static int64_t MAX_CONFIRMING_TABLET_COUNT; +public: + int check_readable(const common::ObTabletID &tablet_id, + bool &readable, + const share::SCN &snapshot, + DupTableInterfaceStat interface_stat); + // For part_ctx, check_dup_table will be invoked after submit_log in LS which has dup_table + // tablets. It will bring performance effect for normal part_ctx without dup_table tablets. + int find_dup_tablet_in_set(const common::ObTabletID &tablet_id, + bool &is_dup_table, + const share::SCN &from_scn, + const share::SCN &to_scn); + int gc_dup_tablets(const int64_t gc_ts, const int64_t max_task_interval); + int refresh_dup_tablet(const common::ObTabletID &tablet_id, + bool is_dup_table, + int64_t refresh_time); + + int prepare_serialize(int64_t &max_ser_size, + DupTabletSetIDArray &unique_id_array, + const int64_t max_log_buf_len); + int serialize_tablet_log(const DupTabletSetIDArray &unique_id_array, + char *buf, + const int64_t buf_len, + int64_t &pos); + int deserialize_tablet_log(DupTabletSetIDArray &unique_id_array, + const char *buf, + const int64_t data_len, + int64_t &pos); + + int tablet_log_submitted(const bool submit_result, + const share::SCN &tablet_log_scn, + const bool for_replay, + const DupTabletSetIDArray &unique_id_array); + + int tablet_log_synced(const bool sync_result, + const share::SCN &scn, + const bool for_replay, + const DupTabletSetIDArray &unique_id_array, + bool &merge_confirmed); + + int try_to_confirm_tablets(const share::SCN &confirm_scn); + // bool need_log_tablets(); + int64_t get_dup_tablet_count(); + bool has_dup_tablet(); + int64_t get_readable_tablet_set_count(); + int64_t get_all_tablet_set_count(); + + int leader_takeover(const bool is_resume, const bool recover_all_readable_from_ckpt); + int leader_revoke(const bool is_logging); + + void print_tablet_diag_info_log(bool is_master); + + // TO_STRING_KV(KPC(changing_new_set_), + // K(need_confirm_new_queue_.get_size()), + // K(old_tablets_), + // K(readable_tablets_)); + int get_tablets_stat(ObDupLSTabletsStatIterator &collect_iter, + const share::ObLSID &ls_id); + int get_tablet_set_stat(ObDupLSTabletSetStatIterator &collect_iter, + const share::ObLSID &ls_id); + +private: + class GcDiscardedDupTabletHandler + { + public: + GcDiscardedDupTabletHandler(int64_t update_ts, + int64_t gc_time_interval, + const DupTabletCommonHeader &common_header, + DupTabletChangeMap &old_tablets) + : gc_ts_(update_ts), gc_time_interval_(gc_time_interval), gc_tablet_cnt_(0), + ret_(OB_SUCCESS), src_common_header_(common_header), old_tablets_(old_tablets) + {} + bool operator()(common::hash::HashMapPair &hash_pair); + int64_t get_gc_tablet_cnt() const { return gc_tablet_cnt_; } + int get_ret() const { return ret_; } + + private: + int64_t gc_ts_; + int64_t gc_time_interval_; + int64_t gc_tablet_cnt_; + int ret_; + DupTabletCommonHeader src_common_header_; + DupTabletChangeMap &old_tablets_; + }; + + class ConfirmedDupTabletHandler + { + /** + * 1. src == new : move to readable + * 2. src == old : remvo from old + */ + public: + ConfirmedDupTabletHandler(DupTabletChangeMap &readable_tablets) : readable_(readable_tablets) {} + int operator()(common::hash::HashMapPair &hash_pair); + + private: + DupTabletChangeMap &readable_; + }; + + class DiagInfoGenerator + { + public: + DiagInfoGenerator(char *info_buf, + int64_t info_buf_len, + int64_t info_buf_pos, + uint64_t tablet_set_id) + : info_buf_(info_buf), info_buf_len_(info_buf_len), info_buf_pos_(info_buf_pos), + tablet_set_id_(tablet_set_id) + { + iter_count_ = 0; + } + + int64_t get_buf_pos() { return info_buf_pos_; } + + int operator()(const common::hash::HashMapPair &hash_pair); + + private: + char *info_buf_; + int64_t info_buf_len_; + int64_t info_buf_pos_; + uint64_t tablet_set_id_; + int64_t iter_count_; + }; + + class CollectTabletsHandler + { + public: + CollectTabletsHandler(const int64_t collect_ts, + const share::ObLSID ls_id, + const uint64_t tenant_id, + const ObAddr &addr, + const bool is_master, + const int64_t tablet_set_id, + const TabletSetAttr attr, + // const int64_t tablet_gc_window, + ObDupLSTabletsStatIterator &collect_iter) + : collect_ts_(collect_ts),ls_id_(ls_id), tenant_id_(tenant_id), addr_(addr), + is_master_(is_master), tablet_set_id_(tablet_set_id), attr_(attr), + //tablet_gc_window_(tablet_gc_window) + collect_iter_(collect_iter) + {} + int operator()(const common::hash::HashMapPair &hash_pair); + + private: + int64_t collect_ts_; + share::ObLSID ls_id_; + uint64_t tenant_id_; + common::ObAddr addr_; + bool is_master_; + int64_t tablet_set_id_; + TabletSetAttr attr_; + // int64_t tablet_gc_window_; + ObDupLSTabletsStatIterator &collect_iter_; + }; + +private: + int lose_dup_tablet_(const common::ObTabletID &tablet_id); + int discover_dup_tablet_(const common::ObTabletID &tablet_id, const int64_t refresh_time); + int collect_confirmed_dup_tablet_(const share::SCN &max_replayed_scn); + + int init_free_tablet_pool_(); + int destroy_free_tablet_pool_(); + + // int get_changing_new_set_(DupTabletChangeMap *&changing_new_set); + // int get_old_tablet_set_(DupTabletChangeMap *&old_tablet_set); + int alloc_extra_free_tablet_set_(); + int get_free_tablet_set(DupTabletChangeMap *&free_set, const uint64_t target_id = 0); + + // If get a free tablet set, need set tablet set type and push into queue + int get_target_tablet_set_(const DupTabletCommonHeader &target_common_header, + DupTabletChangeMap *&target_set, + const bool construct_target_set = false, + const bool need_changing_new_set = false); + + int return_tablet_set(DupTabletChangeMap *need_free_set); + + int clean_readable_tablets_(const share::SCN & min_reserve_tablet_scn); + int clean_durable_confirming_tablets_(const share::SCN & min_reserve_tablet_scn); + int clean_unlog_tablets_(); + int construct_empty_block_confirm_task_(const int64_t trx_ref); + int search_special_op_(uint64_t special_op_type); + int clear_all_special_op_(); + int construct_clean_confirming_set_task_(); + int construct_clean_all_readable_set_task_(); + int try_exec_special_op_(DupTabletChangeMap *op_tablet_set, const share::SCN &min_reserve_tablet_scn,const bool for_replay); + + bool need_seralize_readable_set() { return true; } + + int cal_single_set_max_ser_size_(DupTabletChangeMap *hash_map, + int64_t &max_ser_size, + DupTabletSetIDArray &id_array); + + int merge_into_readable_tablets_(DupTabletChangeMap *change_map_ptr, const bool for_replay); + +private: + // + static int64_t GC_DUP_TABLETS_TIME_INTERVAL; // 5 min + static int64_t GC_DUP_TABLETS_FAILED_TIMEOUT; // 25 min + const static int64_t GC_TIMEOUT; // 1s + + const static int64_t RESERVED_FREE_SET_COUNT; + const static int64_t MAX_FREE_SET_COUNT; + +public: + TO_STRING_KV(K(free_set_pool_.get_size()), + KPC(changing_new_set_), + K(need_confirm_new_queue_.get_size()), + K(readable_tablets_list_.get_size()), + KPC(removing_old_set_), + K(last_gc_succ_time_), + K(last_no_free_set_time_), + K(extra_free_set_alloc_count_)); + +private: + SpinRWLock dup_tablets_lock_; + + // ObDupTableLSHandler *dup_ls_handle_ptr_; + share::ObLSID ls_id_; + bool is_master_; + bool is_stopped_; + + // used for gc_handler + int64_t tablet_gc_window_; // default is 2 * ObDupTabletScanTask::DUP_TABLET_SCAN_INTERVAL; + + common::ObDList free_set_pool_; + DupTabletChangeMap *changing_new_set_; + common::ObDList need_confirm_new_queue_; + common::ObDList readable_tablets_list_; + DupTabletChangeMap *removing_old_set_; + + SpecialOpArgMap op_arg_map_; + + // gc_dup_table + int64_t last_gc_succ_time_; + + int64_t last_no_free_set_time_; + int64_t extra_free_set_alloc_count_; + + char *tablet_diag_info_log_buf_; +}; + +class ObLSDupTablets +{ +public: + void reset() + { + ls_id_.reset(); + array_.reset(); + } + share::ObLSID get_ls_id() const { return ls_id_; } + ObTabletIDArray &get_array() { return array_; } + const ObTabletIDArray &get_array() const { return array_; } + void set_ls_id(const share::ObLSID &ls_id) { ls_id_ = ls_id; } +private: + share::ObLSID ls_id_; + ObTabletIDArray array_; +}; + +class ObTenantDupTabletSchemaHelper +{ +public: + typedef common::hash::ObHashSet TabletIDSet; +public: + ObTenantDupTabletSchemaHelper() {} +public: + int refresh_and_get_tablet_set(TabletIDSet &tenant_dup_tablet_set); +private: + int get_all_dup_tablet_set_(TabletIDSet & tablets_set); +private: +}; + + +} // namespace transaction + +} // namespace oceanbase + +#endif diff --git a/src/storage/tx/ob_dup_table_ts_sync.cpp b/src/storage/tx/ob_dup_table_ts_sync.cpp new file mode 100644 index 0000000000..25e7ea9b80 --- /dev/null +++ b/src/storage/tx/ob_dup_table_ts_sync.cpp @@ -0,0 +1,450 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#include "logservice/ob_log_service.h" +#include "observer/ob_server_struct.h" +#include "share/rc/ob_tenant_base.h" +#include "storage/tx/ob_dup_table_base.h" +#include "storage/tx/ob_dup_table_ts_sync.h" +#include "storage/tx/ob_dup_table_util.h" +#include "storage/tx/ob_trans_service.h" + +namespace oceanbase +{ + +namespace transaction +{ + +void DupTableTsInfo::update(const DupTableTsInfo &ts_info) +{ + max_replayed_scn_ = share::SCN::max(max_replayed_scn_, ts_info.max_replayed_scn_); + max_read_version_ = share::SCN::max(max_read_version_, ts_info.max_read_version_); + max_commit_version_ = share::SCN::max(max_commit_version_, ts_info.max_commit_version_); +} + +int ObDupTableLSTsSyncMgr::init(ObDupTableLSHandler *dup_ls_handle) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(ts_info_cache_.create(32, "DUP_TABLE"))) { + DUP_TABLE_LOG(WARN, "create ts info map failed", K(ret)); + } else { + ls_id_ = dup_ls_handle->get_ls_id(); + is_stopped_ = false; + is_master_ = false; + dup_ls_handle_ptr_ = dup_ls_handle; + } + + return ret; +} + +int ObDupTableLSTsSyncMgr::validate_replay_ts(const common::ObAddr &dst, + const share::SCN &target_replay_scn, + const ObTransID &tx_id, + bool &replay_all_redo, + share::SCN &max_read_version) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + + DupTableTsInfo tmp_ts_info; + max_read_version.reset(); + replay_all_redo = false; + + SpinRLockGuard r_guard(ts_sync_lock_); + + if (OB_FAIL(get_ts_info_cache_(dst, tmp_ts_info))) { + if (OB_HASH_NOT_EXIST != ret) { + DUP_TABLE_LOG(WARN, "get ts info cache failed", K(ret)); + } + } + + if (OB_SUCC(ret) && tmp_ts_info.max_replayed_scn_ >= target_replay_scn) { + replay_all_redo = true; + max_read_version = tmp_ts_info.max_read_version_; + } else if (OB_HASH_NOT_EXIST == ret + || (OB_SUCC(ret) && tmp_ts_info.max_replayed_scn_ < target_replay_scn)) { + replay_all_redo = false; + max_read_version.reset(); + if (OB_TMP_FAIL(request_ts_info_by_rpc_(dst, share::SCN::min_scn()))) { + DUP_TABLE_LOG(WARN, "request ts info by rpc failed", K(ret), K(tmp_ret), K(ls_id_)); + } + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } + } + + if (replay_all_redo && OB_SUCC(ret)) { + DUP_TABLE_LOG(INFO, "replay all dup table redo", K(ls_id_), K(tx_id), K(dst), K(tmp_ts_info), + K(target_replay_scn), K(replay_all_redo), K(max_read_version)); + } + + return ret; +} + +int ObDupTableLSTsSyncMgr::validate_commit_version(const common::ObAddr &dst, + share::SCN target_commit_version) +{ + int ret = OB_SUCCESS; + + DupTableTsInfo tmp_ts_info; + SpinRLockGuard r_guard(ts_sync_lock_); + + if (OB_FAIL(get_ts_info_cache_(dst, tmp_ts_info))) { + if (OB_HASH_NOT_EXIST != ret) { + DUP_TABLE_LOG(WARN, "get ts info cache failed", K(ret)); + } + } + + if (OB_HASH_NOT_EXIST == ret + || (OB_SUCC(ret) && tmp_ts_info.max_commit_version_ < target_commit_version)) { + if (OB_FAIL(request_ts_info_by_rpc_(dst, target_commit_version))) { + DUP_TABLE_LOG(WARN, "request ts info by rpc failed", K(ret)); + } + } + + return ret; +} + +int ObDupTableLSTsSyncMgr::update_all_ts_info_cache() +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + int64_t err_cnt = 0; + int64_t total_cnt = 0; + + DupTableTsInfo local_ts_info; + if (OB_FAIL(get_local_ts_info(local_ts_info))) { + DUP_TABLE_LOG(WARN, "get local ts info failed", K(ret)); + } + + SpinRLockGuard r_guard(ts_sync_lock_); + + const int64_t current_ts = ObTimeUtility::current_time(); + if (current_ts - last_refresh_ts_info_cache_ts_ > TS_INFO_CACHE_REFRESH_INTERVAL) { + DupTableTsInfoMap::iterator cache_iter = ts_info_cache_.begin(); + while (OB_SUCC(ret) && cache_iter != ts_info_cache_.end()) { + if (OB_TMP_FAIL( + request_ts_info_by_rpc_(cache_iter->first, local_ts_info.max_commit_version_))) { + err_cnt++; + DUP_TABLE_LOG(WARN, "request ts info by rpc failed", K(tmp_ret)); + } + cache_iter++; + total_cnt++; + } + last_refresh_ts_info_cache_ts_ = current_ts; + } + // if (err_cnt > 0) { + // DUP_TABLE_LOG(WARN, "update some ts info cache failed", K(err_cnt), K(ret), K(tmp_ret)); + // } + + if (total_cnt > 0) { + DUP_TABLE_LOG(DEBUG, "update ts info cache", K(ret), K(err_cnt), K(total_cnt)); + } + + return ret; +} + +int ObDupTableLSTsSyncMgr::request_ts_info(const common::ObAddr &dst) +{ + // only post msg , not need ts_sync_lock_ + return request_ts_info_by_rpc_(dst, share::SCN::min_scn()); +} + +int ObDupTableLSTsSyncMgr::leader_takeover() +{ + int ret = OB_SUCCESS; + + SpinWLockGuard w_guard(ts_sync_lock_); + + ret = clean_ts_info_cache_(); + + ATOMIC_STORE(&is_master_, true); + + return ret; +} + +int ObDupTableLSTsSyncMgr::leader_revoke() +{ + int ret = OB_SUCCESS; + + // SpinWLockGuard w_guard(ts_sync_lock_); + ATOMIC_STORE(&is_master_, false); + return ret; +} + +int ObDupTableLSTsSyncMgr::clean_ts_info_cache_() +{ + int ret = OB_SUCCESS; + + if (!ts_info_cache_.empty()) { + ts_info_cache_.clear(); + } + + return ret; +} + +int ObDupTableLSTsSyncMgr::handle_ts_sync_request(const ObDupTableTsSyncRequest &ts_sync_req) +{ + int ret = OB_SUCCESS; + DupTableTsInfo local_ts_info; + + SpinRLockGuard r_guard(ts_sync_lock_); + + if (!ts_sync_req.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid argument", K(ts_sync_req)); + } else { + // MTL(ObTransService *) + // ->get_tx_version_mgr() + // .update_max_commit_ts(ts_sync_req.get_max_commit_scn(), false); + if (OB_FAIL(get_local_ts_info(local_ts_info))) { + DUP_TABLE_LOG(WARN, "get local ts info failed", K(ret)); + } else { + ObDupTableTsSyncResponse ts_sync_reps(local_ts_info.max_replayed_scn_, + local_ts_info.max_commit_version_, + local_ts_info.max_read_version_); + + const ObLSID &cur_ls_id = ls_id_; + ObAddr leader_addr = ts_sync_req.get_src(); + ObILocationAdapter *location_adapter = MTL(ObTransService *)->get_location_adapter(); + + if (OB_FAIL(location_adapter->nonblock_get_leader(GCONF.cluster_id, MTL_ID(), cur_ls_id, + leader_addr))) { + DUP_TABLE_LOG(WARN, "get ls leader failed", K(ret), K(leader_addr), K(cur_ls_id), + K(MTL_ID())); + (void)location_adapter->nonblock_renew(GCONF.cluster_id, MTL_ID(), cur_ls_id); + } else if (leader_addr != ts_sync_req.get_src()) { + DUP_TABLE_LOG(INFO, "The leader addr is not the src", K(leader_addr), K(ts_sync_req)); + } + + ts_sync_reps.set_header(ts_sync_req.get_dst(), leader_addr, ts_sync_req.get_dst(), cur_ls_id); + + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(MTL(ObTransService *) + ->get_dup_table_rpc_impl() + .post_msg(leader_addr, ts_sync_reps))) { + DUP_TABLE_LOG(WARN, "post ts sync response failed", K(ret)); + } else { + DUP_TABLE_LOG(DEBUG, "post ts sync response success", K(ret), K(ts_sync_reps)); + } + } + } + return ret; +} + +int ObDupTableLSTsSyncMgr::handle_ts_sync_response(const ObDupTableTsSyncResponse &ts_sync_reps) +{ + int ret = OB_SUCCESS; + + DupTableTsInfo tmp_ts_info; + tmp_ts_info.max_replayed_scn_ = ts_sync_reps.get_max_replayed_scn(); + tmp_ts_info.max_commit_version_ = ts_sync_reps.get_max_commit_scn(); + tmp_ts_info.max_read_version_ = ts_sync_reps.get_max_read_scn(); + + SpinWLockGuard w_guard(ts_sync_lock_); + + if (!ts_sync_reps.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid argument", K(ts_sync_reps)); + } else if (OB_FAIL(update_ts_info_(ts_sync_reps.get_src(), tmp_ts_info))) { + DUP_TABLE_LOG(WARN, "update ts info failed", K(ret), K(ts_sync_reps)); + } else { + DUP_TABLE_LOG(DEBUG, "handle ts sync response success", K(ret), K(ts_sync_reps), + K(tmp_ts_info)); + } + + return ret; +} + +int ObDupTableLSTsSyncMgr::request_ts_info_by_rpc_(const common::ObAddr &addr, + const share::SCN &leader_commit_scn) +{ + int ret = OB_SUCCESS; + + const common::ObAddr self_addr = MTL(ObTransService *)->get_server(); + ObDupTableTsSyncRequest ts_sync_req(leader_commit_scn); + ts_sync_req.set_header(self_addr, addr, self_addr, ls_id_); + + if (OB_FAIL(MTL(ObTransService *)->get_dup_table_rpc_impl().post_msg(addr, ts_sync_req))) { + DUP_TABLE_LOG(WARN, "post ts sync request failed", K(ret)); + } + return ret; +} + +int ObDupTableLSTsSyncMgr::update_ts_info_(const common::ObAddr &addr, + const DupTableTsInfo &ts_info) +{ + int ret = OB_SUCCESS; + DupTableTsInfo tmp_ts_info; + + if (OB_FAIL(ts_info_cache_.get_refactored(addr, tmp_ts_info))) { + if (OB_HASH_NOT_EXIST != ret) { + DUP_TABLE_LOG(WARN, "get ts info cache failed", K(ret), K(addr), K(ts_info)); + } else { + DUP_TABLE_LOG(INFO, "it is a new ts info which has not cached", K(ret), K(addr)); + ret = OB_SUCCESS; + } + } + + tmp_ts_info.update(ts_info); + + if (OB_FAIL(ret)) { + } else if (ts_info_cache_.set_refactored(addr, tmp_ts_info, 1)) { + DUP_TABLE_LOG(WARN, "set ts info failed", K(ret)); + } + DUP_TABLE_LOG(DEBUG, "update ts info", K(ret), K(addr), K(tmp_ts_info)); + return ret; +} + +int ObDupTableLSTsSyncMgr::get_ts_info_cache_(const common::ObAddr &addr, DupTableTsInfo &ts_info) +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(ts_info_cache_.get_refactored(addr, ts_info))) { + DUP_TABLE_LOG(WARN, "get ts info cache failed"); + } + + return ret; +} + +int ObDupTableLSTsSyncMgr::get_local_ts_info(DupTableTsInfo &ts_info) +{ + int ret = OB_SUCCESS; + + DupTableTsInfo tmp_ts_info; + + // We need get max_replayed_scn before max_read_version and max_commit_version. + // Because max_read_version must be acquired after replaying before_prepare + if (OB_FAIL(dup_ls_handle_ptr_->get_log_handler()->get_max_decided_scn( + tmp_ts_info.max_replayed_scn_))) { + DUP_TABLE_LOG(WARN, "get max replayed ts failed", K(ret)); + } else if (OB_FAIL(dup_ls_handle_ptr_->check_and_update_max_replayed_scn( + tmp_ts_info.max_replayed_scn_))) { + DUP_TABLE_LOG(WARN, "invalid max replayed scn", K(ret), K(ls_id_), + K(tmp_ts_info.max_replayed_scn_)); + } else { + tmp_ts_info.max_commit_version_ = + MTL(ObTransService *)->get_tx_version_mgr().get_max_commit_ts(false); + tmp_ts_info.max_read_version_ = MTL(ObTransService *)->get_tx_version_mgr().get_max_read_ts(); + ts_info.update(tmp_ts_info); + } + + return ret; +} + +int ObDupTableLSTsSyncMgr::get_cache_ts_info(const common::ObAddr &addr, DupTableTsInfo &ts_info) +{ + int ret = OB_SUCCESS; + + SpinRLockGuard guard(ts_sync_lock_); + + if (OB_FAIL(get_ts_info_cache_(addr, ts_info))) { + DUP_TABLE_LOG(WARN, "get ts info cache failed", K(ret)); + } + DUP_TABLE_LOG(DEBUG, "get ts info cache", K(ret), K(ret), K(ts_info)); + return ret; +} + +int ObDupTableLSTsSyncMgr::get_lease_mgr_stat(ObDupLSLeaseMgrStatIterator &collect_iter, + FollowerLeaseMgrStatArr &collect_arr) +{ + int ret = OB_SUCCESS; + DupTableTsInfo tmp_info; + SpinRLockGuard r_lock(ts_sync_lock_); + + for (int i = 0; i < collect_arr.count() && OB_SUCC(ret); i++) { + ObDupTableLSLeaseMgrStat &tmp_stat = collect_arr.at(i); + const common::ObAddr follower_addr = tmp_stat.get_follower_addr(); + + if (OB_SUCC(ts_info_cache_.get_refactored(follower_addr, tmp_info))) { + // if exist, update tmp_stat + tmp_stat.set_max_replayed_scn(tmp_info.max_replayed_scn_); + tmp_stat.set_max_commit_version( + tmp_info.max_commit_version_.convert_to_ts(true /*ignore invalid*/)); + tmp_stat.set_max_read_version( + tmp_info.max_read_version_.convert_to_ts(true /*ignore invalid*/)); + } else if (OB_HASH_NOT_EXIST == ret) { + // rewrite retcode + ret = OB_SUCCESS; + } else { + DUP_TABLE_LOG(WARN, "get ts info failed", K(ret)); + } + // push into iter + if (OB_SUCC(ret)) { + if (OB_FAIL(collect_iter.push(tmp_stat))) { + DUP_TABLE_LOG(WARN, "push into virtual iter failed", K(ret)); + } + } + } + + return ret; +} + +int ObDupTableLSTsSyncMgr::DiagInfoGenerator::operator()( + const common::hash::HashMapPair &hash_pair) +{ + int ret = OB_SUCCESS; + + const char *addr_str = to_cstring(hash_pair.first); + + ret = ::oceanbase::common::databuff_printf( + info_buf_, info_buf_len_, info_buf_pos_, + "%s[%sCached Ts Info] owner=%s, max_commit_version=%s, max_read_version=%s, " + "max_replayed_scn=%s\n", + DupTableDiagStd::DUP_DIAG_INDENT_SPACE, DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, addr_str, + to_cstring(hash_pair.second.max_commit_version_), + to_cstring(hash_pair.second.max_read_version_), + to_cstring(hash_pair.second.max_replayed_scn_)); + + return ret; +} + +void ObDupTableLSTsSyncMgr::print_ts_sync_diag_info_log(const bool is_master) +{ + int ret = OB_SUCCESS; + + SpinRLockGuard guard(ts_sync_lock_); + + const uint64_t TS_SYNC_PRINT_BUF_LEN = + DupTableDiagStd::DUP_DIAG_INFO_LOG_BUF_LEN[DupTableDiagStd::TypeIndex::TS_SYNC_INDEX]; + // if (OB_NOT_NULL(dup_ls_handle_ptr_)) { + const int64_t tenant_id = MTL_ID(); + const ObLSID ls_id = ls_id_; + + if (OB_ISNULL(ts_sync_diag_info_log_buf_)) { + if (OB_ISNULL(ts_sync_diag_info_log_buf_ = + static_cast(ob_malloc(TS_SYNC_PRINT_BUF_LEN, "DupTableDiag")))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + _DUP_TABLE_LOG(WARN, "%salloc ts sync diag info buf failed, ret=%d, ls_id=%lu", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + } + } + + if (OB_SUCC(ret) && is_master) { + DiagInfoGenerator diag_info_gen(ts_sync_diag_info_log_buf_, TS_SYNC_PRINT_BUF_LEN); + if (OB_FAIL(hash_for_each_update(ts_info_cache_, diag_info_gen))) { + _DUP_TABLE_LOG(WARN, "%sprint ts info cache failed, ret=%d, ls_id=%lu", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, ret, ls_id.id()); + } + + ts_sync_diag_info_log_buf_[MIN(diag_info_gen.get_buf_pos(), TS_SYNC_PRINT_BUF_LEN - 1)] = '\0'; + + _DUP_TABLE_LOG(INFO, "[%sTs Sync Cache] tenant: %lu, ls: %lu , ts_info_count: %lu\n%s", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, tenant_id, ls_id.id(), + ts_info_cache_.size(), ts_sync_diag_info_log_buf_); + } + // } +} + +} // namespace transaction + +} // namespace oceanbase diff --git a/src/storage/tx/ob_dup_table_ts_sync.h b/src/storage/tx/ob_dup_table_ts_sync.h new file mode 100644 index 0000000000..00e69aaf50 --- /dev/null +++ b/src/storage/tx/ob_dup_table_ts_sync.h @@ -0,0 +1,156 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OCEANBASE_TRANSACTION_DUP_TABLE_TS_SYNC_H +#define OCEANBASE_TRANSACTION_DUP_TABLE_TS_SYNC_H + +#include "lib/list/ob_dlist.h" +#include "lib/lock/ob_spin_rwlock.h" +#include "storage/tx/ob_trans_define.h" +#include "storage/tx/ob_dup_table_stat.h" + +namespace oceanbase +{ + +namespace transaction +{ + +class ObDupTableTsSyncRequest; +class ObDupTableTsSyncResponse; +class ObDupTableLSHandler; + +struct DupTableTsInfo +{ + // common::ObAddr addr_; + share::SCN max_replayed_scn_; + share::SCN max_read_version_; + share::SCN max_commit_version_; + // TODO last recive time + + DupTableTsInfo() { reset(); } + void reset() + { + max_replayed_scn_.reset(); + max_read_version_.reset(); + max_commit_version_.reset(); + } + void update(const DupTableTsInfo &ts_info); + bool is_valid() + { + return max_replayed_scn_.is_valid() && max_read_version_.is_valid() + && max_commit_version_.is_valid(); + } + + TO_STRING_KV(K(max_replayed_scn_), K(max_read_version_), K(max_commit_version_)); +}; + +typedef common::hash::ObHashMap + DupTableTsInfoMap; + +class ObDupTableLSTsSyncMgr +{ +public: + const int64_t TS_INFO_CACHE_REFRESH_INTERVAL = 1 * 1000 * 1000; // 1s +public: + ObDupTableLSTsSyncMgr() : ts_sync_diag_info_log_buf_(nullptr) {} + int init(ObDupTableLSHandler *dup_ls_handle); + + void reset() + { + is_stopped_ = false; + ls_id_.reset(); + is_master_ = false; + + last_refresh_ts_info_cache_ts_ = 0; + dup_ls_handle_ptr_ = nullptr; + ts_info_cache_.destroy(); + if (OB_NOT_NULL(ts_sync_diag_info_log_buf_)) { + ob_free(ts_sync_diag_info_log_buf_); + } + ts_sync_diag_info_log_buf_ = nullptr; + } + + void destroy() { reset(); } + + bool is_master() { return ATOMIC_LOAD(&is_master_); } + + // redo sync + int validate_replay_ts(const common::ObAddr &dst, + const share::SCN &target_replay_scn, + const ObTransID &tx_id, + bool &replay_all_redo, + share::SCN &max_read_version); + // pre_commit sync + int validate_commit_version(const common::ObAddr &dst, const share::SCN target_commit_scn); + + // update all ts info cache + int update_all_ts_info_cache(); + + // try to sync ts info + int request_ts_info(const common::ObAddr &dst); + + int leader_takeover(); + int leader_revoke(); + + int handle_ts_sync_request(const ObDupTableTsSyncRequest &ts_sync_req); + int handle_ts_sync_response(const ObDupTableTsSyncResponse &ts_sync_reps); + + int get_local_ts_info(DupTableTsInfo &ts_info); + int get_cache_ts_info(const common::ObAddr &addr, DupTableTsInfo &ts_info); + + void print_ts_sync_diag_info_log(const bool is_master); + + int get_lease_mgr_stat(ObDupLSLeaseMgrStatIterator &collect_iter, + FollowerLeaseMgrStatArr &arr); +private: + int clean_ts_info_cache_(); + int request_ts_info_by_rpc_(const common::ObAddr &addr, const share::SCN &leader_commit_scn); + int update_ts_info_(const common::ObAddr &addr, const DupTableTsInfo &ts_info); + int get_ts_info_cache_(const common::ObAddr &addr, DupTableTsInfo &ts_info); + + class DiagInfoGenerator + { + public: + DiagInfoGenerator(char *info_buf, int64_t info_buf_len) + : info_buf_(info_buf), info_buf_len_(info_buf_len), info_buf_pos_(0) + {} + + int64_t get_buf_pos() { return info_buf_pos_; } + + int operator()(const common::hash::HashMapPair &hash_pair); + + private: + char *info_buf_; + int64_t info_buf_len_; + int64_t info_buf_pos_; + }; + +private: + SpinRWLock ts_sync_lock_; + + bool is_stopped_; + share::ObLSID ls_id_; + bool is_master_; + + ObDupTableLSHandler *dup_ls_handle_ptr_; + DupTableTsInfoMap ts_info_cache_; + + int64_t last_refresh_ts_info_cache_ts_; + + char *ts_sync_diag_info_log_buf_; + + // dup table cb list order by ts + // DupTableCbList replay_ts_list_; + // DupTableCbList commit_ts_list_; +}; +} // namespace transaction +} // namespace oceanbase + +#endif diff --git a/src/storage/tx/ob_dup_table_util.cpp b/src/storage/tx/ob_dup_table_util.cpp new file mode 100644 index 0000000000..912db99f8b --- /dev/null +++ b/src/storage/tx/ob_dup_table_util.cpp @@ -0,0 +1,1535 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#include "lib/container/ob_bit_set.h" +#include "ob_dup_table_lease.h" +#include "ob_dup_table_tablets.h" +#include "ob_dup_table_ts_sync.h" +#include "ob_dup_table_util.h" +#include "storage/tablet/ob_tablet_iterator.h" +#include "storage/tx/ob_trans_service.h" +#include "storage/tx/ob_tx_log_adapter.h" +#include "storage/tx_storage/ob_ls_service.h" + +namespace oceanbase +{ + +using namespace storage; +using namespace common; +using namespace share; + +namespace transaction +{ + +typedef ObSEArray TabletIDArray; + +//************************************************************************************************************* +//**** ObDupTabletScanTask +//************************************************************************************************************* +void ObDupTabletScanTask::reset() +{ + tenant_id_ = 0; + dup_table_scan_timer_ = nullptr; + dup_loop_worker_ = nullptr; + last_execute_time_ = 0; + max_execute_interval_ = 0; +} + +int ObDupTabletScanTask::make(const int64_t tenant_id, + ObDupTableLeaseTimer *scan_timer, + ObDupTableLoopWorker *loop_worker) +{ + int ret = OB_SUCCESS; + if (tenant_id <= 0 || OB_ISNULL(loop_worker) || OB_ISNULL(scan_timer)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret), K(tenant_id), KP(loop_worker), KP(scan_timer)); + } else { + tenant_id_ = tenant_id; + dup_table_scan_timer_ = scan_timer; + dup_loop_worker_ = loop_worker; + // ObTransTask::make(ObTransRetryTaskType::DUP_TABLET_SCAN_TASK); + // set_retry_interval_us(DUP_TABLET_SCAN_INTERVAL, DUP_TABLET_SCAN_INTERVAL); + } + return ret; +} + +void ObDupTabletScanTask::runTimerTask() +{ + int tmp_ret = OB_SUCCESS; + + if (tenant_id_ <= 0 || OB_ISNULL(dup_loop_worker_) || OB_ISNULL(dup_table_scan_timer_)) { + tmp_ret = OB_NOT_INIT; + DUP_TABLE_LOG_RET(WARN, tmp_ret, "invalid arguments", K(tmp_ret), K(tenant_id_), + KP(dup_loop_worker_), KP(dup_table_scan_timer_)); + } else { + if (OB_TMP_FAIL(execute_for_dup_ls_())) { + DUP_TABLE_LOG_RET(WARN, tmp_ret, "execute dup ls scan failed", K(tmp_ret)); + } + + dup_table_scan_timer_->unregister_timeout_task(*this); + dup_table_scan_timer_->register_timeout_task(*this, DUP_TABLET_SCAN_INTERVAL); + } +} + +int ObDupTabletScanTask::refresh_dup_tablet_schema_( + bool need_refresh, + ObTenantDupTabletSchemaHelper::TabletIDSet &tenant_dup_tablet_set, + share::ObLSStatusInfo &dup_ls_status_info) +{ + int ret = OB_SUCCESS; + bool has_dup_ls = false; + if (need_refresh) { + + share::ObLSStatusOperator ls_status_op; + if (OB_FAIL(ls_status_op.get_duplicate_ls_status_info(MTL_ID(), *GCTX.sql_proxy_, + dup_ls_status_info))) { + if (OB_ENTRY_NOT_EXIST == ret) { + DUP_TABLE_LOG(WARN, "no duplicate ls", K(dup_ls_status_info)); + } else { + DUP_TABLE_LOG(WARN, "get duplicate ls status info failed", K(ret), K(dup_ls_status_info)); + } + } else { + DUP_TABLE_LOG(INFO, "find a duplicate ls", K(ret), K(dup_ls_status_info)); + } + + if (OB_SUCC(ret) && dup_ls_status_info.is_duplicate_ls()) { + if (OB_FAIL(ret)) { + // do nothing + } else if (!tenant_dup_tablet_set.created()) { + if (OB_FAIL(tenant_dup_tablet_set.create(512))) { + DUP_TABLE_LOG(WARN, "init dup tablet cache failed", K(ret)); + } + } + + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(dup_schema_helper_.refresh_and_get_tablet_set(tenant_dup_tablet_set))) { + DUP_TABLE_LOG(WARN, "refresh dup tablet set failed", K(ret)); + } + } + } + return ret; +} + +int ObDupTabletScanTask::execute_() +{ + int ret = OB_SUCCESS; + int iter_ret = OB_SUCCESS; + + ObSharedGuard ls_iter_guard; + ObLSIterator *ls_iter_ptr = nullptr; + ObLS *cur_ls_ptr = nullptr; + TabletIDArray tablet_id_array; + ObTenantDupTabletSchemaHelper::TabletIDSet tenant_dup_tablet_set; + bool need_refreh_dup_schema = true; + share::ObLSStatusInfo dup_ls_status_info; + + // compute scan task max execute interval + const int64_t cur_time = ObTimeUtility::fast_current_time(); + if (cur_time - last_execute_time_ > 0) { + if (0 != last_execute_time_) { + max_execute_interval_ = max(max_execute_interval_, cur_time - last_execute_time_); + last_execute_time_ = cur_time; + } else { + last_execute_time_ = ObTimeUtility::fast_current_time(); + } + } + + if (OB_ISNULL(MTL(ObLSService *)) || OB_ISNULL(dup_loop_worker_) + || (OB_FAIL(MTL(ObLSService *)->get_ls_iter(ls_iter_guard, ObLSGetMod::TRANS_MOD)) + || !ls_iter_guard.is_valid())) { + if (OB_SUCC(ret)) { + ret = OB_INVALID_ARGUMENT; + } + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret)); + } else if (OB_ISNULL(ls_iter_ptr = ls_iter_guard.get_ptr())) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret)); + } else { + iter_ret = OB_SUCCESS; + cur_ls_ptr = nullptr; + // const int64_t gc_time = ObTimeUtility::fast_current_time(); + while (OB_SUCCESS == (iter_ret = ls_iter_ptr->get_next(cur_ls_ptr))) { + tablet_id_array.reset(); + + ObRole ls_role; + int64_t unused_proposal_id; + + if (OB_ISNULL(cur_ls_ptr)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid ls ptr", K(ret), KP(cur_ls_ptr)); + } else if (!cur_ls_ptr->get_dup_table_ls_handler()->is_master()) { + // do nothing + DUP_TABLE_LOG(DEBUG, "ls not leader", K(cur_ls_ptr->get_ls_id())); + } else if (OB_FAIL(refresh_dup_tablet_schema_(need_refreh_dup_schema, tenant_dup_tablet_set, dup_ls_status_info))) { + DUP_TABLE_LOG(INFO, "refresh dup table schema failed", K(ret)); + } else if (OB_FALSE_IT(need_refreh_dup_schema = false)) { + // do nothing + } else { + // TODO + // Only need all tablet_ids in LS. + // No need to get tx data from tablet_meta + storage::ObHALSTabletIDIterator ls_tablet_id_iter(cur_ls_ptr->get_ls_id(), true); + if (OB_FAIL(cur_ls_ptr->build_tablet_iter(ls_tablet_id_iter))) { + DUP_TABLE_LOG(WARN, "build ls tablet iter failed", K(cur_ls_ptr->get_ls_id())); + } else if (!ls_tablet_id_iter.is_valid()) { + DUP_TABLE_LOG(WARN, "invalid tablet id iterator", K(cur_ls_ptr->get_ls_id())); + } else { + ObTabletID tmp_tablet_id; + bool is_dup_tablet = false; + int64_t refresh_time = ObTimeUtility::fast_current_time(); + while (OB_SUCC(ls_tablet_id_iter.get_next_tablet_id(tmp_tablet_id))) { + is_dup_tablet = false; + ret = tenant_dup_tablet_set.exist_refactored(tmp_tablet_id); + if (OB_HASH_EXIST == ret) { + is_dup_tablet = true; + ret = OB_SUCCESS; + } else if (OB_HASH_NOT_EXIST == ret) { + is_dup_tablet = false; + ret = OB_SUCCESS; + } else { + DUP_TABLE_LOG( + WARN, "Failed to check whether the tablet exists in the tenant_dup_tablet_set", + K(ret), K(cur_ls_ptr->get_ls_id()), K(tmp_tablet_id)); + } + + if (!cur_ls_ptr->get_dup_table_ls_handler()->is_inited() && !is_dup_tablet) { + // do nothing + } else if (OB_FAIL(cur_ls_ptr->get_dup_table_ls_handler()->init(is_dup_tablet)) + && OB_INIT_TWICE != ret) { + DUP_TABLE_LOG(WARN, "init dup tablet ls handler", K(ret)); + } else if (OB_FAIL(cur_ls_ptr->get_dup_table_ls_handler()->refresh_dup_table_tablet( + tmp_tablet_id, is_dup_tablet, refresh_time))) { + if (is_dup_tablet || OB_NOT_INIT != ret) { + DUP_TABLE_LOG(WARN, "refresh ls dup table tablets failed", K(ret), K(tmp_tablet_id), + K(is_dup_tablet)); + } else { + ret = OB_SUCCESS; + } + } + } + + if (OB_ITER_END == ret) { + // ret = OB_SUCCESS; + if (OB_FAIL(cur_ls_ptr->get_dup_table_ls_handler()->gc_dup_tablets( + refresh_time, max_execute_interval_))) { + DUP_TABLE_LOG(WARN, "ls gc dup_tablet failed", KR(ret), K(refresh_time), + K(max_execute_interval_)); + } + } + } + } + // refresh dup_table_ls on leader and follower + + if (!cur_ls_ptr->get_dup_table_ls_handler()->has_dup_tablet()) { + // do nothing + } else if (OB_FAIL(dup_loop_worker_->append_dup_table_ls(cur_ls_ptr->get_ls_id()))) { + DUP_TABLE_LOG(WARN, "refresh dup_table ls failed", K(ret)); + } + } + } + + // DUP_TABLE_LOG(INFO, "scan all ls to find dup_tablet", KR(ret), K(tenant_dup_tablet_set.size())); + if (tenant_dup_tablet_set.created()) { + tenant_dup_tablet_set.destroy(); + } + + if (OB_FAIL(ret)) { + DUP_TABLE_LOG(WARN, "scan all ls to find dup_tablet failed", KR(ret)); + } + return ret; +} + +int ObDupTabletScanTask::execute_for_dup_ls_() +{ + int ret = OB_SUCCESS; + + TabletIDArray tablet_id_array; + ObTenantDupTabletSchemaHelper::TabletIDSet tenant_dup_tablet_set; + bool need_refreh_dup_schema = true; + ObLSHandle ls_handle; + share::ObLSStatusInfo dup_ls_status_info; + + // compute scan task max execute interval + const int64_t cur_time = ObTimeUtility::fast_current_time(); + if (cur_time - last_execute_time_ > 0) { + if (0 != last_execute_time_) { + max_execute_interval_ = max(max_execute_interval_, cur_time - last_execute_time_); + last_execute_time_ = cur_time; + } else { + last_execute_time_ = ObTimeUtility::fast_current_time(); + } + } + + if (OB_ISNULL(MTL(ObLSService *)) || OB_ISNULL(dup_loop_worker_)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret)); + } else if (OB_FAIL(refresh_dup_tablet_schema_(need_refreh_dup_schema, tenant_dup_tablet_set, + dup_ls_status_info))) { + DUP_TABLE_LOG(WARN, "refresh dup table schema failed", K(ret)); + } else if (!dup_ls_status_info.is_duplicate_ls()) { + // do nothing + } else if (OB_FAIL(MTL(ObLSService *) + ->get_ls(dup_ls_status_info.ls_id_, ls_handle, ObLSGetMod::TRANS_MOD))) { + DUP_TABLE_LOG(WARN, "get dup ls failed", K(ret), K(dup_ls_status_info)); + } else { + + ObLS *cur_ls_ptr = ls_handle.get_ls(); + if (OB_ISNULL(cur_ls_ptr)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid ls ptr", K(ret), KP(cur_ls_ptr)); + } else if (!cur_ls_ptr->get_dup_table_ls_handler()->is_master()) { + // do nothing + DUP_TABLE_LOG(DEBUG, "ls not leader", K(cur_ls_ptr->get_ls_id())); + } else if (OB_FAIL(refresh_dup_tablet_schema_(need_refreh_dup_schema, tenant_dup_tablet_set, + dup_ls_status_info))) { + DUP_TABLE_LOG(INFO, "refresh dup table schema failed", K(ret)); + } else if (OB_FALSE_IT(need_refreh_dup_schema = false)) { + // do nothing + } else { + storage::ObHALSTabletIDIterator ls_tablet_id_iter(cur_ls_ptr->get_ls_id(), true); + if (OB_FAIL(cur_ls_ptr->build_tablet_iter(ls_tablet_id_iter))) { + DUP_TABLE_LOG(WARN, "build ls tablet iter failed", K(cur_ls_ptr->get_ls_id())); + } else if (!ls_tablet_id_iter.is_valid()) { + DUP_TABLE_LOG(WARN, "invalid tablet id iterator", K(cur_ls_ptr->get_ls_id())); + } else { + ObTabletID tmp_tablet_id; + bool is_dup_tablet = false; + int64_t refresh_time = ObTimeUtility::fast_current_time(); + while (OB_SUCC(ls_tablet_id_iter.get_next_tablet_id(tmp_tablet_id))) { + is_dup_tablet = false; + ret = tenant_dup_tablet_set.exist_refactored(tmp_tablet_id); + if (OB_HASH_EXIST == ret) { + is_dup_tablet = true; + ret = OB_SUCCESS; + } else if (OB_HASH_NOT_EXIST == ret) { + is_dup_tablet = false; + ret = OB_SUCCESS; + } else { + DUP_TABLE_LOG(WARN, + "Failed to check whether the tablet exists in the tenant_dup_tablet_set", + K(ret), K(cur_ls_ptr->get_ls_id()), K(tmp_tablet_id)); + } + + if (!cur_ls_ptr->get_dup_table_ls_handler()->is_inited() && !is_dup_tablet) { + // do nothing + } else if (OB_FAIL(cur_ls_ptr->get_dup_table_ls_handler()->init(is_dup_tablet)) + && OB_INIT_TWICE != ret) { + DUP_TABLE_LOG(WARN, "init dup tablet ls handler", K(ret)); + } else if (OB_FAIL(cur_ls_ptr->get_dup_table_ls_handler()->refresh_dup_table_tablet( + tmp_tablet_id, is_dup_tablet, refresh_time))) { + if (is_dup_tablet || OB_NOT_INIT != ret) { + DUP_TABLE_LOG(WARN, "refresh ls dup table tablets failed", K(ret), K(tmp_tablet_id), + K(is_dup_tablet)); + } else { + ret = OB_SUCCESS; + } + } + } + + if (OB_ITER_END == ret) { + // ret = OB_SUCCESS; + if (OB_FAIL(cur_ls_ptr->get_dup_table_ls_handler()->gc_dup_tablets( + refresh_time, max_execute_interval_))) { + DUP_TABLE_LOG(WARN, "ls gc dup_tablet failed", KR(ret), K(refresh_time), + K(max_execute_interval_)); + } + } + } + } + // refresh dup_table_ls on leader and follower + + if (!cur_ls_ptr->get_dup_table_ls_handler()->has_dup_tablet()) { + // do nothing + } else if (OB_FAIL(dup_loop_worker_->append_dup_table_ls(cur_ls_ptr->get_ls_id()))) { + DUP_TABLE_LOG(WARN, "refresh dup_table ls failed", K(ret)); + } + } + + if (tenant_dup_tablet_set.created()) { + tenant_dup_tablet_set.destroy(); + } + + if (OB_FAIL(ret)) { + DUP_TABLE_LOG(WARN, "scan dup ls to find dup_tablet failed", KR(ret)); + } + return ret; +} + +//************************************************************************************************************* +//**** ObDupTableLSHandler +//************************************************************************************************************* + +int ObDupTableLSHandler::init(bool is_dup_table) +{ + int ret = OB_SUCCESS; + if (is_dup_table) { + if (ATOMIC_LOAD(&is_inited_)) { + ret = OB_INIT_TWICE; + } else { + // init by dup_tablet_scan_task_. + lease_mgr_ptr_ = + static_cast(ob_malloc(sizeof(ObDupTableLSLeaseMgr), "DupTable")); + ts_sync_mgr_ptr_ = static_cast( + ob_malloc(sizeof(ObDupTableLSTsSyncMgr), "DupTable")); + tablets_mgr_ptr_ = + static_cast(ob_malloc(sizeof(ObLSDupTabletsMgr), "DupTable")); + + if (OB_ISNULL(lease_mgr_ptr_) || OB_ISNULL(ts_sync_mgr_ptr_) || OB_ISNULL(tablets_mgr_ptr_)) { + ret = OB_ALLOCATE_MEMORY_FAILED; + DUP_TABLE_LOG(WARN, "alloc memory in ObDupTableLSHandler::init failed", K(ret), + KP(lease_mgr_ptr_), KP(ts_sync_mgr_ptr_), KP(tablets_mgr_ptr_)); + } else { + new (lease_mgr_ptr_) ObDupTableLSLeaseMgr(); + new (ts_sync_mgr_ptr_) ObDupTableLSTsSyncMgr(); + new (tablets_mgr_ptr_) ObLSDupTabletsMgr(); + + if (OB_FAIL(lease_mgr_ptr_->init(this))) { + DUP_TABLE_LOG(WARN, "init lease_mgr failed", K(ret)); + } else if (OB_FAIL(ts_sync_mgr_ptr_->init(this))) { + DUP_TABLE_LOG(WARN, "init ts_sync_mgr failed", K(ret)); + } else if (OB_FAIL(tablets_mgr_ptr_->init(this))) { + DUP_TABLE_LOG(WARN, "init tablets_mgr failed", K(ret)); + } else if (ATOMIC_LOAD(&is_master_) && OB_FAIL(leader_takeover_(true /*is_resume*/))) { + DUP_TABLE_LOG(WARN, "leader takeover in init failed", K(ret)); + } else { + ATOMIC_STORE(&is_inited_, true); + } + } + + if (OB_FAIL(ret)) { + if (OB_NOT_NULL(lease_mgr_ptr_)) { + lease_mgr_ptr_->destroy(); + ob_free(lease_mgr_ptr_); + } + if (OB_NOT_NULL(ts_sync_mgr_ptr_)) { + ts_sync_mgr_ptr_->destroy(); + ob_free(ts_sync_mgr_ptr_); + } + if (OB_NOT_NULL(tablets_mgr_ptr_)) { + tablets_mgr_ptr_->destroy(); + ob_free(tablets_mgr_ptr_); + } + lease_mgr_ptr_ = nullptr; + ts_sync_mgr_ptr_ = nullptr; + tablets_mgr_ptr_ = nullptr; + } + DUP_TABLE_LOG(INFO, "ls handler init", K(ret), K(ls_id_), K(is_master()), K(is_follower()), + KPC(lease_mgr_ptr_), KPC(tablets_mgr_ptr_), KP(ts_sync_mgr_ptr_)); + } + } + return ret; +} + +void ObDupTableLSHandler::destroy() { reset(); } + +void ObDupTableLSHandler::reset() +{ + // ATOMIC_STORE(&is_inited_, false); + is_inited_ = false; + is_master_ = false; + + dup_ls_ckpt_.reset(); + + if (OB_NOT_NULL(lease_mgr_ptr_)) { + lease_mgr_ptr_->destroy(); + ob_free(lease_mgr_ptr_); + } + if (OB_NOT_NULL(ts_sync_mgr_ptr_)) { + ts_sync_mgr_ptr_->destroy(); + ob_free(ts_sync_mgr_ptr_); + } + if (OB_NOT_NULL(tablets_mgr_ptr_)) { + tablets_mgr_ptr_->destroy(); + ob_free(tablets_mgr_ptr_); + } + if (OB_NOT_NULL(log_operator_)) { + share::mtl_free(log_operator_); + } + + lease_mgr_ptr_ = nullptr; + ts_sync_mgr_ptr_ = nullptr; + tablets_mgr_ptr_ = nullptr; + log_operator_ = nullptr; + + total_block_confirm_ref_ = 0; + self_max_replayed_scn_.reset(); + committing_dup_trx_cnt_ = 0; + + interface_stat_.reset(); + for (int i = 0; i < DupTableDiagStd::TypeIndex::MAX_INDEX; i++) { + last_diag_info_print_us_[i] = 0; + } +} + +bool ObDupTableLSHandler::is_master() +{ + bool sub_master = true; + if (OB_NOT_NULL(ts_sync_mgr_ptr_)) { + sub_master = sub_master && ts_sync_mgr_ptr_->is_master(); + } + if (OB_NOT_NULL(lease_mgr_ptr_)) { + sub_master = sub_master && lease_mgr_ptr_->is_master(); + } + if (OB_NOT_NULL(tablets_mgr_ptr_)) { + sub_master = sub_master && tablets_mgr_ptr_->is_master(); + } + + return (ATOMIC_LOAD(&is_master_)) && sub_master; +} + +bool ObDupTableLSHandler::is_follower() +{ + bool sub_not_master = true; + if (OB_NOT_NULL(ts_sync_mgr_ptr_)) { + sub_not_master = sub_not_master && !ts_sync_mgr_ptr_->is_master(); + } + if (OB_NOT_NULL(lease_mgr_ptr_)) { + sub_not_master = sub_not_master && !lease_mgr_ptr_->is_master(); + } + if (OB_NOT_NULL(tablets_mgr_ptr_)) { + sub_not_master = sub_not_master && !tablets_mgr_ptr_->is_master(); + } + + return (!ATOMIC_LOAD(&is_master_)) && sub_not_master; +} + +int ObDupTableLSHandler::ls_loop_handle() +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + // TODO check stopped + if (!ATOMIC_LOAD(&is_inited_) || OB_ISNULL(lease_mgr_ptr_) || OB_ISNULL(tablets_mgr_ptr_) + || OB_ISNULL(ts_sync_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "dup table ls handle not init", K(ret)); + } else if (!has_dup_tablet()) { + ret = OB_NO_TABLET; + DUP_TABLE_LOG(INFO, "no dup tablet, no need to do loop worker", K(ret), KPC(tablets_mgr_ptr_)); + } else { + if (is_master()) { + if (OB_ISNULL(log_operator_) || !log_operator_->is_busy()) { + // handle lease request and collect follower info + DupTableTsInfo min_lease_ts_info; + if (OB_FAIL(get_min_lease_ts_info_(min_lease_ts_info))) { + DUP_TABLE_LOG(WARN, "get min lease ts info failed", K(ret), K(min_lease_ts_info)); + // try confirm tablets and check tablet need log + } else if (OB_FAIL(try_to_confirm_tablets_(min_lease_ts_info.max_replayed_scn_))) { + DUP_TABLE_LOG(WARN, "try confirm tablets failed", K(ret), K(min_lease_ts_info)); + } else { + // submit lease log + if (OB_FAIL(prepare_log_operator_())) { + DUP_TABLE_LOG(WARN, "prepare log operator failed", K(ret)); + } else if (OB_FAIL(log_operator_->submit_log_entry())) { + DUP_TABLE_LOG(WARN, "submit dup table log entry failed", K(ret)); + } + } + } + + // update ts info cache + if (OB_TMP_FAIL(ts_sync_mgr_ptr_->update_all_ts_info_cache())) { + DUP_TABLE_LOG(WARN, "update all ts info cache failed", K(tmp_ret)); + } + + } else if (is_follower()) { + if (OB_FAIL(lease_mgr_ptr_->follower_handle())) { + DUP_TABLE_LOG(WARN, "follower lease handle failed", K(ret)); + } + } + DUP_TABLE_LOG(DEBUG, "loop running : dup table ls handler", K(ret), K(ls_id_), K(is_master()), + KPC(lease_mgr_ptr_), KPC(tablets_mgr_ptr_), KPC(log_operator_)); + + const int64_t fast_cur_time = ObTimeUtility::fast_current_time(); + const bool is_leader = is_master(); + + if (fast_cur_time - last_diag_info_print_us_[DupTableDiagStd::TypeIndex::LEASE_INDEX] + >= DupTableDiagStd::DUP_DIAG_PRINT_INTERVAL[DupTableDiagStd::TypeIndex::LEASE_INDEX]) { + _DUP_TABLE_LOG(INFO, "[%sDup Interface Stat] tenant: %lu, ls: %lu, is_master: %s, %s", + DupTableDiagStd::DUP_DIAG_COMMON_PREFIX, MTL_ID(), ls_id_.id(), + to_cstring(is_leader), to_cstring(interface_stat_)); + } + + if (fast_cur_time - last_diag_info_print_us_[DupTableDiagStd::TypeIndex::LEASE_INDEX] + >= DupTableDiagStd::DUP_DIAG_PRINT_INTERVAL[DupTableDiagStd::TypeIndex::LEASE_INDEX]) { + lease_mgr_ptr_->print_lease_diag_info_log(is_leader); + last_diag_info_print_us_[DupTableDiagStd::TypeIndex::LEASE_INDEX] = fast_cur_time; + } + + if (fast_cur_time - last_diag_info_print_us_[DupTableDiagStd::TypeIndex::TABLET_INDEX] + >= DupTableDiagStd::DUP_DIAG_PRINT_INTERVAL[DupTableDiagStd::TypeIndex::TABLET_INDEX]) { + tablets_mgr_ptr_->print_tablet_diag_info_log(is_leader); + last_diag_info_print_us_[DupTableDiagStd::TypeIndex::TABLET_INDEX] = fast_cur_time; + } + + if (fast_cur_time - last_diag_info_print_us_[DupTableDiagStd::TypeIndex::TS_SYNC_INDEX] + >= DupTableDiagStd::DUP_DIAG_PRINT_INTERVAL[DupTableDiagStd::TypeIndex::TS_SYNC_INDEX]) { + ts_sync_mgr_ptr_->print_ts_sync_diag_info_log(is_leader); + last_diag_info_print_us_[DupTableDiagStd::TypeIndex::TS_SYNC_INDEX] = fast_cur_time; + } + } + + return ret; +} + +int ObDupTableLSHandler::refresh_dup_table_tablet(common::ObTabletID tablet_id, + bool is_dup_table, + int64_t refresh_time) +{ + int ret = OB_SUCCESS; + + if (!ATOMIC_LOAD(&is_inited_) || OB_ISNULL(tablets_mgr_ptr_)) { + ret = OB_NOT_INIT; + if (is_dup_table) { + DUP_TABLE_LOG(WARN, "ObDupTableLSHandler not init", K(ret), K(is_inited_), + KP(tablets_mgr_ptr_)); + } + } else if (OB_FAIL(tablets_mgr_ptr_->refresh_dup_tablet(tablet_id, is_dup_table, refresh_time))) { + if (ret != OB_NOT_MASTER) { + DUP_TABLE_LOG(WARN, "refresh dup table tablet failed", K(ret), K(tablet_id), K(is_dup_table)); + } else { + ret = OB_SUCCESS; + } + } + + return ret; +} + +int ObDupTableLSHandler::recive_lease_request(const ObDupTableLeaseRequest &lease_req) +{ + int ret = OB_SUCCESS; + if (!ATOMIC_LOAD(&is_inited_) || OB_ISNULL(lease_mgr_ptr_) || OB_ISNULL(ts_sync_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "DupTableLSHandler not init", K(ret), K(is_inited_), KP(lease_mgr_ptr_)); + } else if (OB_FAIL(ts_sync_mgr_ptr_->handle_ts_sync_response(lease_req))) { + DUP_TABLE_LOG(WARN, "handle ts sync response failed", K(ret)); + } else if (OB_FAIL(lease_mgr_ptr_->recive_lease_request(lease_req))) { + DUP_TABLE_LOG(WARN, "recive lease request failed", K(ret), K(lease_req)); + } + return ret; +} + +int ObDupTableLSHandler::handle_ts_sync_response(const ObDupTableTsSyncResponse &ts_sync_resp) +{ + int ret = OB_SUCCESS; + + if (!ATOMIC_LOAD(&is_inited_) || OB_ISNULL(ts_sync_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "DupTableLSHandler not init", K(ret), K(is_inited_), KP(ts_sync_mgr_ptr_)); + } else if (OB_FAIL(ts_sync_mgr_ptr_->handle_ts_sync_response(ts_sync_resp))) { + DUP_TABLE_LOG(WARN, "handle ts sync response failed", K(ret)); + } + + return ret; +} + +int ObDupTableLSHandler::handle_ts_sync_request(const ObDupTableTsSyncRequest &ts_sync_req) +{ + int ret = OB_SUCCESS; + + if (!ATOMIC_LOAD(&is_inited_) || OB_ISNULL(ts_sync_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "DupTableLSHandler not init", K(ret), K(is_inited_), KP(ts_sync_mgr_ptr_)); + } else if (OB_FAIL(ts_sync_mgr_ptr_->handle_ts_sync_request(ts_sync_req))) { + DUP_TABLE_LOG(WARN, "handle ts sync request failed", K(ret)); + } + + return ret; +} + +int ObDupTableLSHandler::check_redo_sync_completed(const ObTransID &tx_id, + const share::SCN &redo_completed_scn, + bool &redo_sync_finish, + share::SCN &total_max_read_version) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + LeaseAddrArray lease_addrs; + redo_sync_finish = false; + int64_t redo_sync_succ_cnt = 0; + total_max_read_version.set_invalid(); + share::SCN tmp_max_read_version; + tmp_max_read_version.set_invalid(); + + const int64_t GET_GTS_TIMEOUT = 1 * 1000 * 1000; // 1s + share::SCN before_prepare_gts; + before_prepare_gts.set_invalid(); + int64_t start_us = OB_INVALID_TIMESTAMP; + + if (!ATOMIC_LOAD(&is_inited_) || OB_ISNULL(lease_mgr_ptr_) || OB_ISNULL(ts_sync_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "ObDupTableLSHandler not init", K(ret), K(is_inited_), KP(lease_mgr_ptr_), + KP(ts_sync_mgr_ptr_)); + } else if (OB_FAIL(lease_mgr_ptr_->get_lease_valid_array(lease_addrs))) { + DUP_TABLE_LOG(WARN, "get lease valid array failed", K(ret)); + } else if (lease_addrs.count() == 0) { + redo_sync_finish = true; + total_max_read_version.set_min(); // min scn + DUP_TABLE_LOG(INFO, "no follower with valid lease, redo sync finish", K(ret), K(tx_id), + K(ls_id_), K(redo_completed_scn), K(redo_sync_finish), K(total_max_read_version)); + } else { + tmp_max_read_version.set_min(); + for (int i = 0; OB_SUCC(ret) && i < lease_addrs.count(); i++) { + bool replay_all_redo = false; + share::SCN max_read_version; + max_read_version.set_invalid(); + if (OB_FAIL(ts_sync_mgr_ptr_->validate_replay_ts(lease_addrs[i], redo_completed_scn, tx_id, + replay_all_redo, max_read_version))) { + DUP_TABLE_LOG(WARN, "validate replay ts failed", K(ret), K(lease_addrs[i]), + K(redo_completed_scn)); + } else if (replay_all_redo) { + if (!max_read_version.is_valid()) { + ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "unexpected max read version", K(ret), K(replay_all_redo), + K(max_read_version)); + } else if (tmp_max_read_version.is_valid()) { + tmp_max_read_version = share::SCN::max(tmp_max_read_version, max_read_version); + } else { + tmp_max_read_version = max_read_version; + } + if (OB_SUCC(ret)) { + redo_sync_succ_cnt++; + } + } + + // get_gts && retry to post before_prepare request + if ((OB_SUCC(ret) && !replay_all_redo) || OB_FAIL(ret)) { + int tmp_ret = OB_SUCCESS; + if (!before_prepare_gts.is_valid()) { + share::SCN tmp_gts; + tmp_gts.set_invalid(); + start_us = ObTimeUtility::fast_current_time(); + MonotonicTs rts(0); + do { + const int64_t now = ObTimeUtility::fast_current_time(); + const MonotonicTs stc = + MonotonicTs(now) - MonotonicTs(GCONF._ob_get_gts_ahead_interval); + if (now >= start_us + GET_GTS_TIMEOUT) { + tmp_ret = OB_TIMEOUT; + DUP_TABLE_LOG(WARN, "wait gts for too long time", K(now), K(start_us), + K(before_prepare_gts)); + } else if (OB_TMP_FAIL(MTL(ObTransService *) + ->get_ts_mgr() + ->get_gts(MTL_ID(), stc, NULL, tmp_gts, rts))) { + if (OB_EAGAIN == tmp_ret) { + ob_usleep(1000); + } else { + DUP_TABLE_LOG(WARN, "get gts fail", K(tmp_ret), K(now)); + } + } else if (OB_UNLIKELY(!tmp_gts.is_valid())) { + tmp_ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "invalid snapshot from gts", K(tmp_gts), K(now)); + } else { + // do nothing + } + } while (tmp_ret == OB_EAGAIN); + + if (OB_SUCCESS == tmp_ret) { + before_prepare_gts = tmp_gts; + } + } + + if (OB_SUCCESS == tmp_ret && before_prepare_gts > redo_completed_scn) { + const common::ObAddr self_addr = MTL(ObTransService *)->get_server(); + ObDupTableBeforePrepareRequest before_prepare_req(tx_id, before_prepare_gts); + before_prepare_req.set_header(self_addr, lease_addrs[i], self_addr, ls_id_); + if (OB_TMP_FAIL(MTL(ObTransService *) + ->get_dup_table_rpc_impl() + .post_msg(lease_addrs[i], before_prepare_req))) { + DUP_TABLE_LOG(WARN, "post ts sync request failed", K(tmp_ret)); + } + } + } + } + + if (OB_SUCC(ret) && redo_sync_succ_cnt == lease_addrs.count()) { + redo_sync_finish = true; + total_max_read_version = tmp_max_read_version; + + DUP_TABLE_LOG(INFO, "redo sync finish with lease valid follower", K(ret), K(ls_id_), K(tx_id), + K(redo_completed_scn), K(redo_sync_finish), K(total_max_read_version), + K(lease_addrs)); + } + } + + if (redo_sync_finish) { + interface_stat_.dup_table_redo_sync_succ_cnt_++; + } else { + interface_stat_.dup_table_redo_sync_fail_cnt_++; + } + + return ret; +} + +int ObDupTableLSHandler::block_confirm_with_dup_tablet_change_snapshot( + share::SCN &dup_tablet_change_snapshot) +{ + int ret = OB_SUCCESS; + + ATOMIC_INC(&total_block_confirm_ref_); + + if (!ATOMIC_LOAD(&is_inited_)) { + // do nothing + ret = OB_SUCCESS; + } else { + } + + return ret; +} + +int ObDupTableLSHandler::gc_dup_tablets(const int64_t gc_ts, const int64_t max_task_interval) +{ + int ret = OB_SUCCESS; + + if (!ATOMIC_LOAD(&is_inited_)) { + // do nothing + } else if (OB_ISNULL(tablets_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "ObDupTableLSHandler not init", K(ret), K(is_inited_), + KP(tablets_mgr_ptr_)); + } else if (0 > gc_ts || 0 > max_task_interval) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid gc_time", K(ret), K(gc_ts), K(max_task_interval)); + } else if (OB_FAIL(tablets_mgr_ptr_->gc_dup_tablets(gc_ts, max_task_interval))) { + DUP_TABLE_LOG(WARN, "lose dup tablet failed", KR(ret), K(gc_ts)); + } + + return ret; +} + +int ObDupTableLSHandler::try_to_confirm_tablets_(const share::SCN &confirm_scn) +{ + int ret = OB_SUCCESS; + + if (!ATOMIC_LOAD(&is_inited_) || OB_ISNULL(tablets_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "ObDupTableLSHandler not init", K(ret), K(is_inited_), + KP(tablets_mgr_ptr_)); + } else if (!confirm_scn.is_valid() || share::SCN::max_scn() == confirm_scn) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid confrim_time", K(ret), K(confirm_scn)); + } else if (OB_FAIL(tablets_mgr_ptr_->try_to_confirm_tablets(confirm_scn))) { + DUP_TABLE_LOG(WARN, "confirm tablets failed", K(ret), K(confirm_scn)); + } + // for debug + DUP_TABLE_LOG(DEBUG, "ls finish confirm tablets", K(ret), K(confirm_scn)); + return ret; +} + +int ObDupTableLSHandler::unblock_confirm_with_prepare_scn( + const share::SCN &dup_tablet_change_snapshot, + const share::SCN &redo_scn) +{ + int ret = OB_SUCCESS; + + return ret; +} + +int ObDupTableLSHandler::check_dup_tablet_in_redo(const ObTabletID &tablet_id, + bool &is_dup_tablet, + const share::SCN &base_snapshot, + const share::SCN &redo_scn) +{ + int ret = OB_SUCCESS; + is_dup_tablet = false; + + if (!tablet_id.is_valid() || !base_snapshot.is_valid() || !redo_scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid argument", K(ret), K(tablet_id), K(base_snapshot), K(redo_scn)); + } else if (OB_ISNULL(lease_mgr_ptr_) || OB_ISNULL(tablets_mgr_ptr_)) { + is_dup_tablet = false; + } else if (!has_dup_tablet()) { + is_dup_tablet = false; + } else if (OB_FAIL(tablets_mgr_ptr_->find_dup_tablet_in_set(tablet_id, is_dup_tablet, + base_snapshot, redo_scn))) { + DUP_TABLE_LOG(WARN, "check dup tablet failed", K(ret), K(tablet_id), K(base_snapshot), + K(redo_scn)); + } + return ret; +} + +int ObDupTableLSHandler::check_dup_tablet_readable(const ObTabletID &tablet_id, + const share::SCN &read_snapshot, + const bool read_from_leader, + const share::SCN &max_replayed_scn, + bool &readable) +{ + int ret = OB_SUCCESS; + + share::SCN tmp_max_replayed_scn = max_replayed_scn; + readable = false; + if (!tablet_id.is_valid() || !read_snapshot.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid argument", K(ret), K(tablet_id), K(read_snapshot), K(readable)); + } else if (OB_ISNULL(lease_mgr_ptr_) || OB_ISNULL(tablets_mgr_ptr_)) { + // no dup tablet in ls + readable = false; + } else if (!has_dup_tablet()) { + readable = false; + interface_stat_.dup_table_follower_read_tablet_not_exist_cnt_++; + // use read_from_leader to validate lease; + DUP_TABLE_LOG(INFO, "no dup tablet can be read", K(ret), KPC(tablets_mgr_ptr_), + K(read_from_leader), K(tmp_max_replayed_scn)); + } else if (!tmp_max_replayed_scn.is_valid() + && (OB_ISNULL(log_handler_) + || OB_FAIL(log_handler_->get_max_decided_scn(tmp_max_replayed_scn)))) { + DUP_TABLE_LOG(WARN, "get max replayed scn for dup table read failed", K(ret), K(ls_id_), + K(tablet_id), K(read_snapshot), KP(log_handler_), K(tmp_max_replayed_scn)); + } else if (OB_FAIL(check_and_update_max_replayed_scn(max_replayed_scn))) { + DUP_TABLE_LOG(WARN, "invalid max_replayed_scn", K(ret), K(tablet_id), K(read_snapshot), + K(read_from_leader)); + } else if (false + == lease_mgr_ptr_->check_follower_lease_serving(read_from_leader, + tmp_max_replayed_scn)) { + readable = false; + interface_stat_.dup_table_follower_read_lease_expired_cnt_++; + DUP_TABLE_LOG(INFO, "lease is expired for read", K(ret), K(tablet_id), K(read_snapshot), + K(read_from_leader), K(tmp_max_replayed_scn)); + } else if (OB_FAIL(tablets_mgr_ptr_->check_readable(tablet_id, readable, read_snapshot, + interface_stat_))) { + DUP_TABLE_LOG(WARN, "check dup tablet failed", K(ret), K(tablet_id), K(read_snapshot)); + } + + if (readable) { + interface_stat_.dup_table_follower_read_succ_cnt_++; + } + + return ret; +} +int64_t ObDupTableLSHandler::get_dup_tablet_count() +{ + int64_t dup_tablet_cnt = 0; + + if (OB_ISNULL(tablets_mgr_ptr_)) { + dup_tablet_cnt = 0; + } else { + dup_tablet_cnt = tablets_mgr_ptr_->get_dup_tablet_count(); + } + + return dup_tablet_cnt; +} + +bool ObDupTableLSHandler::has_dup_tablet() +{ + bool has_dup = false; + if (OB_ISNULL(tablets_mgr_ptr_)) { + has_dup = false; + } else { + has_dup = tablets_mgr_ptr_->has_dup_tablet(); + } + return has_dup; +} + +int ObDupTableLSHandler::get_local_ts_info(DupTableTsInfo &ts_info) +{ + int ret = OB_SUCCESS; + + if (!ATOMIC_LOAD(&is_inited_) || OB_ISNULL(ts_sync_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "DupTableLSHandler not init", K(ret), K(is_inited_), KP(ts_sync_mgr_ptr_)); + } else if (OB_FAIL(ts_sync_mgr_ptr_->get_local_ts_info(ts_info))) { + DUP_TABLE_LOG(WARN, "get local ts sync info failed", K(ret)); + } + + return ret; +} + +int ObDupTableLSHandler::get_cache_ts_info(const common::ObAddr &addr, DupTableTsInfo &ts_info) +{ + int ret = OB_SUCCESS; + + if (!ATOMIC_LOAD(&is_inited_) || OB_ISNULL(ts_sync_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "DupTableLSHandler not init", K(ret), K(is_inited_), KP(ts_sync_mgr_ptr_)); + } else if (OB_FAIL(ts_sync_mgr_ptr_->get_cache_ts_info(addr, ts_info))) { + DUP_TABLE_LOG(WARN, "get cache ts info failed", K(ret), K(addr), K(ts_info)); + } + return ret; +} + +int ObDupTableLSHandler::replay(const void *buffer, + const int64_t nbytes, + const palf::LSN &lsn, + const share::SCN &ts_ns) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + + const bool no_dup_tablet_before_replay = !has_dup_tablet(); + + // cover lease list and tablets list + if (!ATOMIC_LOAD(&is_inited_) && OB_FAIL(init(true))) { + DUP_TABLE_LOG(WARN, "init dup_ls_handle in replay failed", K(ret)); + } else if (OB_FAIL(prepare_log_operator_())) { + DUP_TABLE_LOG(WARN, "init dup_table log operator failed", K(ret)); + } else if (OB_FAIL( + log_operator_->merge_replay_block(static_cast(buffer), nbytes))) { + if (OB_SUCCESS == ret) { + DUP_TABLE_LOG(INFO, "merge replay buf success, may be completed", K(ret)); + } else if (OB_START_LOG_CURSOR_INVALID == ret) { + DUP_TABLE_LOG(WARN, "start replay from the middle of log entry, skip this dup_table log", + K(ts_ns), K(lsn)); + // ret = OB_SUCCESS; + } else { + DUP_TABLE_LOG(WARN, "merge replay buf failed", K(ret)); + } + } else if (OB_FAIL(log_operator_->deserialize_log_entry())) { + DUP_TABLE_LOG(WARN, "deserialize log block failed", K(ret)); + } else if (OB_FAIL(lease_mgr_ptr_->follower_try_acquire_lease(ts_ns))) { + DUP_TABLE_LOG(WARN, "acquire lease from lease log error", K(ret), K(ts_ns)); + } else { + log_operator_->set_logging_scn(ts_ns); + ret = log_operator_->replay_succ(); + DUP_TABLE_LOG(INFO, "replay dup_table log success", K(ret), K(nbytes), K(lsn), K(ts_ns), + KPC(tablets_mgr_ptr_), KPC(lease_mgr_ptr_)); + // log_operator_->reuse();V + } + + // start require lease instantly + if (OB_FAIL(ret)) { + // do nothing + } else if (no_dup_tablet_before_replay && has_dup_tablet() + && OB_TMP_FAIL( + MTL(ObTransService *)->get_dup_table_loop_worker().append_dup_table_ls(ls_id_))) { + DUP_TABLE_LOG(WARN, "refresh dup table ls failed", K(tmp_ret), K(ls_id_), K(lsn), K(ts_ns)); + } + + DUP_TABLE_LOG(DEBUG, "finish replay log", K(ret), K(ls_id_), K(lsn), K(ts_ns)); // for debug + return ret; +} + +void ObDupTableLSHandler::switch_to_follower_forcedly() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(leader_revoke_())) { + DUP_TABLE_LOG(ERROR, "switch to follower forcedly failed for dup table", K(ret), K(ls_id), + K(is_master())); + } + ATOMIC_STORE(&is_master_, false); +} + +int ObDupTableLSHandler::switch_to_follower_gracefully() +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + + if (OB_FAIL(leader_revoke_())) { + DUP_TABLE_LOG(WARN, "switch to follower gracefully failed for dup table", K(ret), K(ls_id), + K(is_master())); + } + + ATOMIC_STORE(&is_master_, false); + + if (OB_FAIL(ret)) { + if (OB_TMP_FAIL(resume_leader())) { + ret = OB_LS_NEED_REVOKE; + DUP_TABLE_LOG(WARN, "resume leader failed, need revoke", K(ret), K(tmp_ret), K(ls_id)); + } else { + DUP_TABLE_LOG(WARN, "resume leader successfully, return error code", K(ret), K(tmp_ret), + K(ls_id)); + } + } + + return ret; +} + +int ObDupTableLSHandler::resume_leader() +{ + int ret = OB_SUCCESS; + + const bool is_resume = true; + + if (OB_FAIL(leader_takeover_(is_resume))) { + DUP_TABLE_LOG(WARN, "resume leader failed for dup table", K(ret), K(ls_id), K(is_master())); + } + + ATOMIC_STORE(&is_master_, true); + return ret; +} + +int ObDupTableLSHandler::switch_to_leader() +{ + int ret = OB_SUCCESS; + + const bool is_resume = false; + if (OB_FAIL(leader_takeover_(is_resume))) { + DUP_TABLE_LOG(WARN, "switch to leader failed for dup table", K(ret), K(ls_id), K(is_master())); + } + ATOMIC_STORE(&is_master_, true); + return ret; +} + +int ObDupTableLSHandler::leader_revoke_() +{ + int ret = OB_SUCCESS; + + if (!is_master()) { + ret = OB_STATE_NOT_MATCH; + DUP_TABLE_LOG(ERROR, "unexpected ObDupTableLSHandler role", K(ret), K(ls_id_), K(is_master())); + } else { + // clean new/old tablet set + if (OB_NOT_NULL(log_operator_)) { + if (OB_NOT_NULL(tablets_mgr_ptr_) + && OB_FAIL(tablets_mgr_ptr_->leader_revoke(log_operator_->is_busy()))) { + DUP_TABLE_LOG(WARN, "clean unreadable tablet set failed", K(ret)); + } + } + + if (OB_SUCC(ret) && OB_NOT_NULL(ts_sync_mgr_ptr_)) { + if (OB_FAIL(ts_sync_mgr_ptr_->leader_revoke())) { + DUP_TABLE_LOG(WARN, "ts_sync_mgr switch to follower failed", K(ret)); + } + } + + if (OB_SUCC(ret) && OB_NOT_NULL(lease_mgr_ptr_)) { + if (OB_FAIL(lease_mgr_ptr_->leader_revoke())) { + DUP_TABLE_LOG(WARN, "lease_mgr switch to follower failed", K(ret)); + } + } + + } + + interface_stat_.reset(); + DUP_TABLE_LOG(INFO, "Leader Revoke", K(ret), K(ls_id_)); + return ret; +} + +int ObDupTableLSHandler::leader_takeover_(const bool is_resume) +{ + int ret = OB_SUCCESS; + + if (is_master()) { + ret = OB_STATE_NOT_MATCH; + DUP_TABLE_LOG(ERROR, "unexpected ObDupTableLSHandler role", K(ret), K(ls_id_), K(is_master())); + } else { + // clean ts info cache + if (OB_NOT_NULL(ts_sync_mgr_ptr_)) { + ts_sync_mgr_ptr_->leader_takeover(); + } + // extend lease_expired_time + if (OB_NOT_NULL(lease_mgr_ptr_)) { + lease_mgr_ptr_->leader_takeover(is_resume); + } + + if (OB_NOT_NULL(tablets_mgr_ptr_)) { + if (OB_FAIL(tablets_mgr_ptr_->leader_takeover( + is_resume, dup_ls_ckpt_.contain_all_readable_on_replica()))) { + DUP_TABLE_LOG(WARN, "clean unreadable tablet set failed", K(ret)); + } + } + } + + interface_stat_.reset(); + DUP_TABLE_LOG(INFO, "Leader Takeover", K(ret), K(ls_id_), K(is_resume)); + return ret; +} + +int ObDupTableLSHandler::prepare_log_operator_() +{ + int ret = OB_SUCCESS; + + // need release in reset() + if (OB_ISNULL(log_operator_)) { + if (OB_ISNULL(log_operator_ = static_cast( + share::mtl_malloc(sizeof(ObDupTableLogOperator), "DUP_LOG_OP")))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + DUP_TABLE_LOG(WARN, "malloc log operator failed", K(ret)); + } else { + new (log_operator_) ObDupTableLogOperator(ls_id_, log_handler_, &dup_ls_ckpt_, lease_mgr_ptr_, + tablets_mgr_ptr_); + } + } + + return ret; +} + +int ObDupTableLSHandler::check_and_update_max_replayed_scn(const share::SCN &max_replayed_scn) +{ + int ret = OB_SUCCESS; + if (!max_replayed_scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid max_replayed_scn", K(ret), K(max_replayed_scn)); + } else if (!self_max_replayed_scn_.atomic_get().is_valid()) { + self_max_replayed_scn_.atomic_set(max_replayed_scn); + last_max_replayed_scn_update_ts_ = ObTimeUtility::fast_current_time(); + } else if (max_replayed_scn >= self_max_replayed_scn_.atomic_get()) { + self_max_replayed_scn_.atomic_set(max_replayed_scn); + last_max_replayed_scn_update_ts_ = ObTimeUtility::fast_current_time(); + } else if (max_replayed_scn < self_max_replayed_scn_.atomic_get() + && self_max_replayed_scn_.atomic_get().convert_to_ts(true) + - max_replayed_scn.convert_to_ts(true) + > 100 * 1000) { + // ret = OB_ERR_UNEXPECTED; + DUP_TABLE_LOG(WARN, "the max_replayed_scn has been rollbacked", K(ret), K(ls_id_), + K(max_replayed_scn), K(self_max_replayed_scn_), + K(last_max_replayed_scn_update_ts_)); + } + + return ret; +} + +int ObDupTableLSHandler::get_min_lease_ts_info_(DupTableTsInfo &min_ts_info) +{ + int ret = OB_SUCCESS; + + LeaseAddrArray lease_valid_array; + min_ts_info.reset(); + + if (!ATOMIC_LOAD(&is_inited_) || OB_ISNULL(lease_mgr_ptr_) || OB_ISNULL(ts_sync_mgr_ptr_)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(is_inited_), KP(lease_mgr_ptr_), + KP(ts_sync_mgr_ptr_)); + } else if (OB_FAIL(ts_sync_mgr_ptr_->get_local_ts_info(min_ts_info))) { + DUP_TABLE_LOG(WARN, "get local ts info failed", K(ret)); + } else if (!min_ts_info.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid local ts info", K(ret), K(min_ts_info)); + } else if (OB_FAIL(lease_mgr_ptr_->get_lease_valid_array(lease_valid_array))) { + DUP_TABLE_LOG(WARN, "get lease valid array failed", K(ret)); + } else { + DupTableTsInfo tmp_ts_info; + for (int64_t i = 0; OB_SUCC(ret) && i < lease_valid_array.count(); i++) { + if (OB_FAIL(ts_sync_mgr_ptr_->get_cache_ts_info(lease_valid_array[i], tmp_ts_info))) { + DUP_TABLE_LOG(WARN, "get cache ts info failed", K(ret), K(lease_valid_array[i])); + } else { + min_ts_info.max_replayed_scn_ = + share::SCN::min(min_ts_info.max_replayed_scn_, tmp_ts_info.max_replayed_scn_); + min_ts_info.max_read_version_ = + share::SCN::min(min_ts_info.max_read_version_, tmp_ts_info.max_read_version_); + min_ts_info.max_commit_version_ = + share::SCN::min(min_ts_info.max_commit_version_, tmp_ts_info.max_commit_version_); + } + } + } + + if (OB_FAIL(ret)) { + DUP_TABLE_LOG(INFO, "get min lease ts info failed", K(ret), K(min_ts_info), + K(lease_valid_array)); + } + return ret; +} + +int ObDupTableLSHandler::get_lease_mgr_stat(ObDupLSLeaseMgrStatIterator &collect_iter) +{ + int ret = OB_SUCCESS; + FollowerLeaseMgrStatArr collect_arr; + + // collect all leader info + if (is_master()) { + if (OB_ISNULL(lease_mgr_ptr_) || OB_ISNULL(ts_sync_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "not init", K(ret), KPC(lease_mgr_ptr_), KP(ts_sync_mgr_ptr_)); + } else if(OB_FAIL(lease_mgr_ptr_->get_lease_mgr_stat(collect_arr))) { + DUP_TABLE_LOG(WARN, "get lease mgr stat from lease_mgr failed", K(ret)); + } else if(OB_FAIL(ts_sync_mgr_ptr_->get_lease_mgr_stat(collect_iter, collect_arr))) { + DUP_TABLE_LOG(WARN, "get lease mgr stat from ts_sync_mgr failed", K(ret)); + } + DUP_TABLE_LOG(DEBUG, "get lease mgr stat", K(ret), K(collect_arr)); + } + + return ret; +} + +int ObDupTableLSHandler::get_ls_tablets_stat(ObDupLSTabletsStatIterator &collect_iter) +{ + int ret = OB_SUCCESS; + const share::ObLSID ls_id = ls_id_; + + if (OB_ISNULL(tablets_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "tablets_mgr not init", K(ret), KP(tablets_mgr_ptr_)); + } else if(OB_FAIL(tablets_mgr_ptr_->get_tablets_stat(collect_iter, ls_id_))) { + DUP_TABLE_LOG(WARN, "get tablets stat failed", K(ret)); + } + + return ret; +} + +int ObDupTableLSHandler::get_ls_tablet_set_stat(ObDupLSTabletSetStatIterator &collect_iter) +{ + int ret = OB_SUCCESS; + const share::ObLSID ls_id = get_ls_id(); + + if (OB_ISNULL(tablets_mgr_ptr_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "not init", K(ret), KPC(tablets_mgr_ptr_)); + } else if (OB_FAIL(tablets_mgr_ptr_->get_tablet_set_stat(collect_iter, ls_id))) { + DUP_TABLE_LOG(WARN, "get tablet set stat failed", K(ret)); + } + + return ret; +} + +//************************************************************************************************************* +//**** ObDupTableLoopWorker +//************************************************************************************************************* + +int ObDupTableLoopWorker::init() +{ + int ret = OB_SUCCESS; + + if (is_inited_) { + ret = OB_INIT_TWICE; + DUP_TABLE_LOG(WARN, "init dup_loop_worker twice", K(ret)); + } else { + if (OB_FAIL(dup_ls_id_set_.create(8, "DUP_LS_SET", "DUP_LS_ID", MTL_ID()))) { + DUP_TABLE_LOG(WARN, "create dup_ls_map_ error", K(ret)); + } else { + is_inited_ = true; + } + } + DUP_TABLE_LOG(INFO, "init ObDupTableLoopWorker"); + return ret; +} + +int ObDupTableLoopWorker::start() +{ + int ret = OB_SUCCESS; + + if (!is_inited_) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "dup_loop_worker has not inited", K(ret)); + } else { + lib::ThreadPool::set_run_wrapper(MTL_CTX()); + ret = lib::ThreadPool::start(); + } + DUP_TABLE_LOG(INFO, "start ObDupTableLoopWorker", KR(ret)); + return ret; +} + +void ObDupTableLoopWorker::stop() +{ + if (!has_set_stop()) { + DUP_TABLE_LOG(INFO, "stop ObDupTableLoopWorker"); + } + lib::ThreadPool::stop(); +} + +void ObDupTableLoopWorker::wait() +{ + lib::ThreadPool::wait(); + DUP_TABLE_LOG(INFO, "wait ObDupTableLoopWorker"); +} + +void ObDupTableLoopWorker::destroy() +{ + lib::ThreadPool::destroy(); + (void)dup_ls_id_set_.destroy(); + DUP_TABLE_LOG(INFO, "destroy ObDupTableLoopWorker"); +} + +void ObDupTableLoopWorker::reset() +{ + int ret = OB_SUCCESS; + + if (OB_FAIL(dup_ls_id_set_.clear())) { + DUP_TABLE_LOG(WARN, "clear dup_ls_set failed", KR(ret)); + } + is_inited_ = false; +} + +void ObDupTableLoopWorker::run1() +{ + int ret = OB_SUCCESS; + int64_t start_time = 0; + int64_t time_used = 0; + DupLSIDSet_Spin::iterator iter; + ObSEArray remove_ls_list; + + lib::set_thread_name("DupLoop"); + if (!is_inited_) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(ERROR, "dup_loop_worker has not inited", K(ret)); + } else { + while (!has_set_stop()) { + start_time = ObTimeUtility::current_time(); + + remove_ls_list.reuse(); + + for (iter = dup_ls_id_set_.begin(); iter != dup_ls_id_set_.end(); iter++) { + const share::ObLSID cur_ls_id = iter->first; + ObLSHandle ls_handle; + + if (OB_ISNULL(MTL(ObLSService *)) + || (OB_FAIL(MTL(ObLSService *)->get_ls(cur_ls_id, ls_handle, ObLSGetMod::TRANS_MOD)) + || !ls_handle.is_valid())) { + if (OB_SUCC(ret)) { + ret = OB_INVALID_ARGUMENT; + } + DUP_TABLE_LOG(WARN, "get ls error", K(ret), K(cur_ls_id)); + } else if (OB_FAIL(ls_handle.get_ls()->get_dup_table_ls_handler()->ls_loop_handle())) { + DUP_TABLE_LOG(WARN, "ls loop handle error", K(ret), K(cur_ls_id)); + } + + if (OB_LS_NOT_EXIST == ret || OB_NOT_INIT == ret || OB_NO_TABLET == ret) { + remove_ls_list.push_back(cur_ls_id); + TRANS_LOG(INFO, "try to remove invalid dup ls id", K(ret), K(cur_ls_id), + K(remove_ls_list)); + } + } + + for (int index = 0; index < remove_ls_list.count(); index++) { + if (OB_FAIL(dup_ls_id_set_.erase_refactored(remove_ls_list[index]))) { + DUP_TABLE_LOG(WARN, "remove from dup_ls_id_set_ failed", K(ret), K(index), + K(remove_ls_list[index])); + } + } + + time_used = ObTimeUtility::current_time() - start_time; + if (time_used < LOOP_INTERVAL) { + usleep(LOOP_INTERVAL - time_used); + } + } + } +} + +int ObDupTableLoopWorker::append_dup_table_ls(const share::ObLSID &ls_id) +{ + int ret = OB_SUCCESS; + ObDupTableLSHandler *tmp_ls_handle = nullptr; + + if (!ls_id.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid ls id", K(ls_id)); + } else if (OB_FAIL(dup_ls_id_set_.set_refactored(ls_id, 0))) { + if (OB_HASH_EXIST == ret) { + // do nothing + } else { + DUP_TABLE_LOG(WARN, "insert dup_ls_handle into hash_set failed", K(ret)); + } + } + + if (OB_SUCC(ret)) { + DUP_TABLE_LOG(INFO, "append dup table ls success", K(ret), K(ls_id)); + } else if (OB_HASH_EXIST == ret) { + ret = OB_SUCCESS; + } + + if (OB_SUCC(ret) && !dup_ls_id_set_.empty() && has_set_stop()) { + start(); + } + + return ret; +} + +// trans service -> dup worker -> ls service -> dup ls handler -> iterate +int ObDupTableLoopWorker::iterate_dup_ls(ObDupLSLeaseMgrStatIterator &collect_iter) +{ + int ret = OB_SUCCESS; + DupLSIDSet_Spin::iterator iter; + ObLSService *ls_service = MTL(ObLSService *); + + if (OB_ISNULL(ls_service)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "get ls service failed", K(ret), KP(ls_service)); + } else { + for (iter = dup_ls_id_set_.begin(); iter != dup_ls_id_set_.end(); iter++) { + const share::ObLSID cur_ls_id = iter->first; + ObDupTableLSHandler *cur_dup_ls_handler = nullptr; + ObLSHandle ls_handle; + + if (OB_FAIL(ls_service->get_ls(cur_ls_id, ls_handle, ObLSGetMod::TRANS_MOD))) { + DUP_TABLE_LOG(WARN, "get ls handler error", K(ret), K(cur_ls_id), KPC(ls_service)); + } else if (!ls_handle.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "ls handler not valid", K(ret), K(cur_ls_id), KPC(ls_service)); + } else { + cur_dup_ls_handler = ls_handle.get_ls()->get_dup_table_ls_handler(); + if (OB_ISNULL(cur_dup_ls_handler) || !cur_dup_ls_handler->is_inited()) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "dup ls handler not init", K(ret), K(cur_ls_id), + KPC(cur_dup_ls_handler)); + } else if (OB_FAIL(cur_dup_ls_handler->get_lease_mgr_stat(collect_iter))) { + DUP_TABLE_LOG(WARN, "collect lease mgr stat failed", K(ret), K(cur_ls_id), + KPC(cur_dup_ls_handler)); + } + } + } + } + + return ret; +} + +int ObDupTableLoopWorker::iterate_dup_ls(ObDupLSTabletSetStatIterator &collect_iter) +{ + int ret = OB_SUCCESS; + DupLSIDSet_Spin::iterator iter; + ObLSService *ls_service = MTL(ObLSService *); + + if (OB_ISNULL(ls_service)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "get ls service failed", K(ret), KP(ls_service)); + } else { + for (iter = dup_ls_id_set_.begin(); iter != dup_ls_id_set_.end(); iter++) { + const share::ObLSID cur_ls_id = iter->first; + ObDupTableLSHandler *cur_dup_ls_handler = nullptr; + ObLSHandle ls_handle; + + if (OB_FAIL(ls_service->get_ls(cur_ls_id, ls_handle, ObLSGetMod::TRANS_MOD))) { + DUP_TABLE_LOG(WARN, "get ls handler error", K(ret), K(cur_ls_id), KPC(ls_service)); + } else if (!ls_handle.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "ls handler not valid", K(ret), K(cur_ls_id), KPC(ls_service)); + } else { + cur_dup_ls_handler = ls_handle.get_ls()->get_dup_table_ls_handler(); + if (OB_ISNULL(cur_dup_ls_handler) || !cur_dup_ls_handler->is_inited()) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "dup ls handler not init", K(ret), K(cur_ls_id), + KPC(cur_dup_ls_handler)); + } else if (OB_FAIL(cur_dup_ls_handler->get_ls_tablet_set_stat(collect_iter))) { + DUP_TABLE_LOG(WARN, "collect lease mgr stat failed", K(ret), K(cur_ls_id), + KPC(cur_dup_ls_handler)); + } + DUP_TABLE_LOG(WARN, "iter dup ls handler", K(ret), K(cur_ls_id), + KPC(cur_dup_ls_handler)); + } + } + } + + return ret; +} + +int ObDupTableLoopWorker::iterate_dup_ls(ObDupLSTabletsStatIterator &collect_iter) +{ + int ret = OB_SUCCESS; + DupLSIDSet_Spin::iterator iter; + ObLSService *ls_service = MTL(ObLSService *); + + if (OB_ISNULL(ls_service)) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "get ls service failed", K(ret), KP(ls_service)); + } else { + for (iter = dup_ls_id_set_.begin(); iter != dup_ls_id_set_.end(); iter++) { + const share::ObLSID cur_ls_id = iter->first; + ObDupTableLSHandler *cur_dup_ls_handler = nullptr; + ObLSHandle ls_handle; + + if (OB_FAIL(ls_service->get_ls(cur_ls_id, ls_handle, ObLSGetMod::TRANS_MOD))) { + DUP_TABLE_LOG(WARN, "get ls handler error", K(ret), K(cur_ls_id), KPC(ls_service)); + } else if (!ls_handle.is_valid()) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "ls handler not valid", K(ret), K(cur_ls_id), KPC(ls_service)); + } else { + cur_dup_ls_handler = ls_handle.get_ls()->get_dup_table_ls_handler(); + if (OB_ISNULL(cur_dup_ls_handler) || !cur_dup_ls_handler->is_inited()) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "dup ls handler not init", K(ret), K(cur_ls_id), + KPC(cur_dup_ls_handler)); + } else if (OB_FAIL(cur_dup_ls_handler->get_ls_tablets_stat(collect_iter))) { + DUP_TABLE_LOG(WARN, "collect lease mgr stat failed", K(ret), K(cur_ls_id), + KPC(cur_dup_ls_handler)); + } + } + } + } + + return ret; +} +} // namespace transaction +} // namespace oceanbase diff --git a/src/storage/tx/ob_dup_table_util.h b/src/storage/tx/ob_dup_table_util.h new file mode 100644 index 0000000000..6fcf8ebd00 --- /dev/null +++ b/src/storage/tx/ob_dup_table_util.h @@ -0,0 +1,311 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OCEANBASE_TRANSACTION_DUP_TABLE_UTIL_H +#define OCEANBASE_TRANSACTION_DUP_TABLE_UTIL_H + +// #include "lib/hash/ob_hashset.h" +#include "logservice/ob_log_base_type.h" +#include "storage/tx/ob_dup_table_tablets.h" +#include "storage/tx/ob_trans_define.h" +#include "storage/tx/ob_dup_table_stat.h" + +namespace oceanbase +{ + +namespace logservice +{ +class ObLogHandler; +} + +namespace transaction +{ +class ObDupTableLSLeaseMgr; +class ObDupTableLSTsSyncMgr; +class ObLSDupTabletsMgr; +class ObDupTableLoopWorker; +class DupTableTsInfo; +// class ObDupTableICallback; +class ObITxLogAdapter; + +class ObDupTableLeaseRequest; +class ObDupTableTsSyncRequest; +class ObDupTableTsSyncResponse; + +class MockDupTableChecker +{ +public: + static int check_dup_table_tablet(common::ObTabletID id, bool &is_dup_table) + { + int ret = OB_SUCCESS; + + is_dup_table = false; + + return ret; + } +}; + +// scan all tablets to find dup_table tablets on LS leader +class ObDupTabletScanTask : public ObITimeoutTask +{ +public: + static const int64_t DUP_TABLET_SCAN_INTERVAL = 10 * 1000 * 1000; // 10s +public: + ObDupTabletScanTask() { reset(); } + ~ObDupTabletScanTask() { destroy(); } + void reset(); + void destroy() { reset(); } + int make(const int64_t tenant_id, + ObDupTableLeaseTimer *scan_timer, + ObDupTableLoopWorker *loop_worker); + + void runTimerTask(); + uint64_t hash() const { return tenant_id_; } + +private: + int execute_(); + int execute_for_dup_ls_(); + int refresh_dup_tablet_schema_(bool need_refresh, + ObTenantDupTabletSchemaHelper::TabletIDSet &tenant_dup_tablet_set, + share::ObLSStatusInfo &dup_ls_status_info); + +private: + ObTenantDupTabletSchemaHelper dup_schema_helper_; + + int64_t tenant_id_; + ObDupTableLeaseTimer *dup_table_scan_timer_; + ObDupTableLoopWorker *dup_loop_worker_; + int64_t last_execute_time_; + int64_t max_execute_interval_; +}; + +// LS-level +// manage lease and ts_sync for dup_table +// register log_handler in each ls but not alloc inside memory without dup_table tablet +// alloc inside memory when called by ObDupTabletScanTask +class ObDupTableLSHandler : public logservice::ObIReplaySubHandler, + public logservice::ObIRoleChangeSubHandler, + public logservice::ObICheckpointSubHandler +{ +public: + ObDupTableLSHandler() + : lease_mgr_ptr_(nullptr), ts_sync_mgr_ptr_(nullptr), tablets_mgr_ptr_(nullptr), + log_operator_(nullptr) + { + reset(); + } + // init by ObDupTabletScanTask or replay + int init(bool is_dup_table); + + void stop(); // TODO stop submit log before the log handler is invalid. + void wait(); // TODO + void destroy(); // remove from dup_table_loop_worker + void reset(); + + const share::ObLSID &get_ls_id() { return ls_id_; } + // must set when init log_stream + void default_init(const share::ObLSID &ls_id, logservice::ObLogHandler *log_handler) + { + ls_id_ = ls_id; + log_handler_ = log_handler; + dup_ls_ckpt_.default_init(ls_id); + } + + bool is_master(); + bool is_follower(); + +public: + int ls_loop_handle(); + + //------ leader interface + // called by DupTabletScanTask => ObDupTableLoopWorker , alloc memory for first dup_table tablet + int refresh_dup_table_tablet(common::ObTabletID tablet_id, + bool is_dup_table, + int64_t refresh_time); + // called by rpc thread + int recive_lease_request(const ObDupTableLeaseRequest &lease_req); + int handle_ts_sync_response(const ObDupTableTsSyncResponse &ts_sync_reps); + // called by part_ctx + // int validate_dup_table_tablet(const ObTabletID &tablet_id, bool &is_dup_tablet); + // int validate_replay_ts(int64_t log_ts); + // int validate_commit_version(int64_t commit_version); + int check_redo_sync_completed(const ObTransID &tx_id, + const share::SCN &redo_completed_scn, + bool &redo_sync_finish, + share::SCN &total_max_read_version); + + int block_confirm_with_dup_tablet_change_snapshot(share::SCN &dup_tablet_change_snapshot); + int unblock_confirm_with_prepare_scn(const share::SCN &dup_tablet_change_snapshot, + const share::SCN &prepare_scn); + int check_dup_tablet_in_redo(const ObTabletID &tablet_id, + bool &is_dup_tablet, + const share::SCN &base_snapshot, + const share::SCN &redo_scn); + + //------ follower interface + // called by rpc thread + int handle_ts_sync_request(const ObDupTableTsSyncRequest &ts_sync_req); // post ts_sync response + // called by trans_service + int validate_readable_tablet(); + int validate_lease_valid(); + int validate_in_trans_read(); + int check_dup_tablet_readable(const ObTabletID &tablet_id, + const share::SCN &read_snapshot, + const bool read_from_leader, + const share::SCN &max_replayed_scn, + bool &readable); + +public: + bool is_inited() { return is_inited_; } + int64_t get_dup_tablet_count(); + bool has_dup_tablet(); + int gc_dup_tablets(const int64_t gc_ts, const int64_t max_task_interval); + int get_local_ts_info(DupTableTsInfo &ts_info); + int get_cache_ts_info(const common::ObAddr &addr, DupTableTsInfo &ts_info); + int get_lease_mgr_stat(ObDupLSLeaseMgrStatIterator &collect_iter); + int get_ls_tablets_stat(ObDupLSTabletsStatIterator &collect_iter); + int get_ls_tablet_set_stat(ObDupLSTabletSetStatIterator &collect_iter); + // int retry_submit_log(); + // ObLSDupTabletsMgr *get_tablets_mgr() { return tablets_mgr_ptr_; } + // ObITxLogAdapter *get_log_adapter() { return log_adapter_ptr_; } +public: + int replay(const void *buffer, + const int64_t nbytes, + const palf::LSN &lsn, + const share::SCN &ts_ns); + void switch_to_follower_forcedly(); + int switch_to_follower_gracefully(); + int resume_leader(); + int switch_to_leader(); + + int set_dup_table_ls_meta(const ObDupTableLSCheckpoint::ObLSDupTableMeta &dup_ls_meta) + { + return dup_ls_ckpt_.set_dup_ls_meta(dup_ls_meta); + } + int get_dup_table_ls_meta(ObDupTableLSCheckpoint::ObLSDupTableMeta &dup_ls_meta) const + { + return dup_ls_ckpt_.get_dup_ls_meta(dup_ls_meta); + } + + share::SCN get_rec_scn() { return dup_ls_ckpt_.get_lease_log_rec_scn(); } + int flush(share::SCN &rec) { return dup_ls_ckpt_.flush(); } + + logservice::ObLogHandler *get_log_handler() { return log_handler_; } + + void inc_committing_dup_trx_cnt() { ATOMIC_INC(&committing_dup_trx_cnt_); } + void dec_committing_dup_trx_cnt() { ATOMIC_DEC(&committing_dup_trx_cnt_); } + int64_t get_committing_dup_trx_cnt() { return ATOMIC_LOAD(&committing_dup_trx_cnt_); } + +public: + int64_t get_total_block_confirm_ref() { return ATOMIC_LOAD(&total_block_confirm_ref_); } + + int check_and_update_max_replayed_scn(const share::SCN &max_replayed_scn); +private: + DISALLOW_COPY_AND_ASSIGN(ObDupTableLSHandler); + + int prepare_log_operator_(); + int get_min_lease_ts_info_(DupTableTsInfo &min_ts_info); + int leader_takeover_(const bool is_resume); + int leader_revoke_(); + + int try_to_confirm_tablets_(const share::SCN &confirm_ts); + +private: + share::ObLSID ls_id_; + bool is_master_; // set by role change + bool is_inited_; // ste by replay or dup_tablet_scan_task_ + bool is_stopped_; // TODO + // TODO: is_stopped_; + + int64_t total_block_confirm_ref_; // block new dup tablet confirmed + + int64_t committing_dup_trx_cnt_; + + share::SCN self_max_replayed_scn_; + int64_t last_max_replayed_scn_update_ts_; + + ObDupTableLSCheckpoint dup_ls_ckpt_; + + // lease + ObDupTableLSLeaseMgr *lease_mgr_ptr_; + // ts sync + ObDupTableLSTsSyncMgr *ts_sync_mgr_ptr_; + // dup_table_tablets + ObLSDupTabletsMgr *tablets_mgr_ptr_; + + logservice::ObLogHandler *log_handler_; + + ObDupTableLogOperator *log_operator_; + + DupTableInterfaceStat interface_stat_; + + int64_t last_diag_info_print_us_[DupTableDiagStd::TypeIndex::MAX_INDEX]; +}; + +typedef common::hash::ObHashSet + DupTableLSHandlerSet; + +typedef common::hash:: + ObHashMap + DupTableLSHandlerMap; + +typedef common::hash::ObHashSet + DupLSIDSet_Spin; + + +// tenant-level +// a thread for handle lease request and writer log +// start by ObDupTabletScanTask +// stop by trans_service or ObDupTabletScanTask +class ObDupTableLoopWorker : public lib::ThreadPool +{ +public: + const static int64_t LOOP_INTERVAL = 100 * 1000; // 100ms +public: + ObDupTableLoopWorker() { is_inited_ = false; } + int init(); + int start(); + void stop(); + void wait(); + void destroy(); + void reset(); + void run1(); + +public: + // called by ObDupTabletScanTask + // set dup_ls_handle and dup_tablet + int refresh_dup_table_tablet(ObDupTableLSHandler *dup_ls_handle, + const common::ObTabletID tablet_id, + bool is_dup_table, + int64_t refresh_time); + int append_dup_table_ls(const share::ObLSID &ls_id); + // int control_thread(); + + // int remove_stopped_dup_ls(share::ObLSID ls_id); + // iterate all ls for collect dup ls info + int iterate_dup_ls(ObDupLSLeaseMgrStatIterator &collect_iter); + int iterate_dup_ls(ObDupLSTabletSetStatIterator &collect_iter); + int iterate_dup_ls(ObDupLSTabletsStatIterator &collect_iter); + + TO_STRING_KV(K(is_inited_), K(dup_ls_id_set_.size())); + +private: + bool is_inited_; + // SpinRWLock lock_; + // // dup_table ls map which need to handle + // // DupTableLSHandlerSet dup_ls_set_; + // DupTableLSHandlerMap dup_ls_map_; + DupLSIDSet_Spin dup_ls_id_set_; +}; + +} // namespace transaction +} // namespace oceanbase + +#endif diff --git a/src/storage/tx/ob_keep_alive_service.cpp b/src/storage/tx/ob_keep_alive_service.cpp new file mode 100644 index 0000000000..5aa43ce450 --- /dev/null +++ b/src/storage/tx/ob_keep_alive_service.cpp @@ -0,0 +1,152 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#include "logservice/ob_log_base_header.h" +#include "logservice/ob_log_handler.h" +#include "storage/tx/ob_keep_alive_service.h" +#include "storage/tx_storage/ob_ls_service.h" + +namespace oceanbase +{ + +using namespace share; +using namespace logservice; + +namespace transaction +{ + +int ObKeepAliveService::mtl_init(ObKeepAliveService *& ka) +{ + return ka->init(); +} + +int ObKeepAliveService::init() +{ + int ret = OB_SUCCESS; + int64_t pos = 0; + + TRANS_LOG(INFO, "[Keep Alive] init"); + + lib::ThreadPool::set_run_wrapper(MTL_CTX()); + + return ret; +} + +int ObKeepAliveService::start() +{ + int ret = OB_SUCCESS; + + TRANS_LOG(INFO, "[Keep Alive] start"); + if (OB_FAIL(lib::ThreadPool::start())) { + TRANS_LOG(WARN, "[Keep Alive] start keep alive thread failed", K(ret)); + } else { + // TRANS_LOG(INFO, "[Keep Alive] start keep alive thread succeed", K(ret)); + } + + return ret; +} + +void ObKeepAliveService::stop() +{ + TRANS_LOG(INFO, "[Keep Alive] stop"); + lib::ThreadPool::stop(); +} + +void ObKeepAliveService::wait() +{ + TRANS_LOG(INFO, "[Keep Alive] wait"); + lib::ThreadPool::wait(); +} + +void ObKeepAliveService::destroy() +{ + TRANS_LOG(INFO, "[Keep Alive] destroy"); + lib::ThreadPool::destroy(); + reset(); +} + +void ObKeepAliveService::reset() +{ + need_print_ = false; + +} + +void ObKeepAliveService::run1() +{ + int ret = OB_SUCCESS; + int64_t start_time_us = 0; + int64_t time_used = 0; + lib::set_thread_name("TxKeepAlive"); + + // int64_t loop_cnt = 0; + while (!has_set_stop()) { + start_time_us = ObTimeUtility::current_time(); + + if (REACH_TIME_INTERVAL(KEEP_ALIVE_PRINT_INFO_INTERVAL)) { + // TRANS_LOG(INFO, "[Keep Alive LOOP]", K(loop_cnt)); + need_print_ = true; + // loop_cnt = 0; + } + + if (OB_FAIL(scan_all_ls_())) { + TRANS_LOG(WARN, "[Keep Alive] scan all ls failed", K(ret)); + } + + // loop_cnt += 1; + need_print_ = false; + + time_used = ObTimeUtility::current_time() - start_time_us; + if (time_used < KEEP_ALIVE_INTERVAL) { + usleep(KEEP_ALIVE_INTERVAL - time_used); + } + } +} + +int ObKeepAliveService::scan_all_ls_() +{ + int ret = OB_SUCCESS; + int iter_ret = OB_SUCCESS; + + ObSharedGuard ls_iter_guard; + ObLSIterator *iter_ptr = nullptr; + ObLS *cur_ls_ptr = nullptr; + + int64_t ls_cnt = 0; + + if (OB_ISNULL(MTL(ObLSService *)) || OB_FAIL(MTL(ObLSService *)->get_ls_iter(ls_iter_guard, ObLS::LSGetMod::STORAGE_MOD)) + || !ls_iter_guard.is_valid()) { + if (OB_SUCCESS == ret) { + ret = OB_INVALID_ARGUMENT; + } + TRANS_LOG(WARN, "[Keep Alive] get ls iter failed", K(ret), KP(MTL(ObLSService *))); + } else if (OB_ISNULL(iter_ptr = ls_iter_guard.get_ptr())) { + TRANS_LOG(WARN, "[Keep Alive] ls iter_ptr is nullptr", KP(iter_ptr)); + } else { + iter_ret = OB_SUCCESS; + cur_ls_ptr = nullptr; + while (OB_SUCCESS == (iter_ret = iter_ptr->get_next(cur_ls_ptr)) && OB_NOT_NULL(cur_ls_ptr)) { + if (cur_ls_ptr->get_keep_alive_ls_handler()->try_submit_log()) { + TRANS_LOG(WARN, "[Keep Alive] try submit keep alive log failed", K(ret)); + } + if (need_print_) { + ls_cnt += 1; + cur_ls_ptr->get_keep_alive_ls_handler()->print_stat_info(); + } + } + } + + if (need_print_) { + TRANS_LOG(INFO, "[Keep Alive Loop] ", "LS_CNT", ls_cnt); + } + return ret; +} + +} +} diff --git a/src/storage/tx/ob_trans_define.cpp b/src/storage/tx/ob_trans_define.cpp index 9c0e2a875e..2c1605e1e3 100644 --- a/src/storage/tx/ob_trans_define.cpp +++ b/src/storage/tx/ob_trans_define.cpp @@ -1018,6 +1018,7 @@ void ObTxExecInfo::reset() prepare_log_info_arr_.reset(); xid_.reset(); need_checksum_ = true; + tablet_modify_record_.reset(); is_sub2pc_ = false; } @@ -1057,6 +1058,7 @@ OB_SERIALIZE_MEMBER(ObTxExecInfo, prepare_log_info_arr_, xid_, need_checksum_, + tablet_modify_record_, is_sub2pc_); bool ObMulSourceDataNotifyArg::is_redo_submitted() const { return redo_submitted_; } diff --git a/src/storage/tx/ob_trans_define.h b/src/storage/tx/ob_trans_define.h index df74b9fae9..f0d2754fbc 100644 --- a/src/storage/tx/ob_trans_define.h +++ b/src/storage/tx/ob_trans_define.h @@ -1077,7 +1077,7 @@ public: static const int64_t UNKNOWN = -1; static const int64_t END_TRANS_CB_TASK = 0; static const int64_t ADVANCE_LS_CKPT_TASK = 1; - static const int64_t MAX = 14; + static const int64_t MAX = 3; public: static bool is_valid(const int64_t task_type) { return task_type > UNKNOWN && task_type < MAX; } @@ -1601,7 +1601,7 @@ private: int64_t count_; }; - +static const int64_t MAX_TABLET_MODIFY_RECORD_COUNT = 16; // exec info need to be persisted by "trans context table" struct ObTxExecInfo { @@ -1670,6 +1670,7 @@ public: // for xa ObXATransID xid_; bool need_checksum_; + common::ObSEArray tablet_modify_record_; bool is_sub2pc_; }; diff --git a/src/storage/tx/ob_trans_part_ctx.cpp b/src/storage/tx/ob_trans_part_ctx.cpp index 65a8a7ad0b..518287d630 100644 --- a/src/storage/tx/ob_trans_part_ctx.cpp +++ b/src/storage/tx/ob_trans_part_ctx.cpp @@ -287,6 +287,7 @@ void ObPartTransCtx::default_init_() last_op_sn_ = 0; last_scn_ = 0; first_scn_ = 0; + dup_table_follower_max_read_version_.reset(); rec_log_ts_.reset(); prev_rec_log_ts_.reset(); big_segment_info_.reset(); @@ -516,6 +517,7 @@ int ObPartTransCtx::handle_timeout(const int64_t delay) } } } + if (exec_info_.is_dup_tx_) { if (ObTxState::REDO_COMPLETE == exec_info_.state_) { if (OB_SUCCESS != (tmp_ret = dup_table_tx_redo_sync_())) { @@ -774,7 +776,7 @@ int ObPartTransCtx::commit(const ObLSArray &parts, if (parts.count() <= 0) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(ERROR, "the size of participant is 0 when commit", KPC(this)); - } else if (parts.count() == 1 && parts[0] == ls_id_) { + } else if (parts.count() == 1 && parts[0] == ls_id_ && !exec_info_.is_dup_tx_) { exec_info_.trans_type_ = TransType::SP_TRANS; can_elr_ = (trans_service_->get_tx_elr_util().is_can_tenant_elr() ? true : false); if (OB_FAIL(one_phase_commit_())) { @@ -1092,6 +1094,11 @@ int ObPartTransCtx::get_gts_callback(const MonotonicTs srr, mt_ctx_.set_trans_version(gts); const SCN max_read_ts = trans_service_->get_tx_version_mgr().get_max_read_ts(); // TRANS_LOG(INFO, "get_gts_callback mid", K(*this), K(log_type)); + if (is_local_tx_() && exec_info_.is_dup_tx_) { + TRANS_LOG(ERROR, "invalid trans type for a local dup_table trx", K(ret), KPC(this)); + exec_info_.trans_type_ = TransType::DIST_TRANS; + } + if (is_local_tx_()) { if (OB_FAIL(ctx_tx_data_.set_commit_version(SCN::max(gts, max_read_ts)))) { TRANS_LOG(WARN, "set commit_version failed", K(ret)); @@ -1163,6 +1170,12 @@ int ObPartTransCtx::gts_elapse_callback(const MonotonicTs srr, const SCN >s) } else { sub_state_.clear_gts_waiting(); } + + if (is_local_tx_() && exec_info_.is_dup_tx_) { + TRANS_LOG(ERROR, "invalid trans type for a local dup_table trx", K(ret), KPC(this)); + exec_info_.trans_type_ = TransType::DIST_TRANS; + } + if (is_local_tx_()) { if (OB_FAIL(after_local_commit_succ_())) { TRANS_LOG(WARN, "terminate trx after local commit failed", KR(ret), KPC(this)); @@ -1465,6 +1478,15 @@ int ObPartTransCtx::recover_tx_ctx_table_info(ObTxCtxTableInfo &ctx_info) } } + if (exec_info_.is_dup_tx_ && get_downstream_state() == ObTxState::REDO_COMPLETE + && exec_info_.max_applying_log_ts_ == exec_info_.max_applied_log_ts_) { + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(dup_table_before_preapre_(exec_info_.max_applied_log_ts_, true/*before_replay*/))) { + TRANS_LOG(WARN, "set commit_info scn as before_prepare_version failed", K(ret), KPC(this)); + } + } + TRANS_LOG(INFO, "recover tx ctx table info succeed", K(ret), KPC(this), K(ctx_info)); } @@ -1935,8 +1957,28 @@ int ObPartTransCtx::on_success_ops_(ObTxLogCb *log_cb) } else if (ObTxLogType::TX_COMMIT_INFO_LOG == log_type) { ObTwoPhaseCommitLogType two_phase_log_type; set_durable_state_(ObTxState::REDO_COMPLETE); - if (exec_info_.is_dup_tx_ && OB_FAIL(dup_table_tx_redo_sync_())) { - TRANS_LOG(WARN, "dup table redo sync error", K(ret)); + if (exec_info_.is_dup_tx_) { + if (is_follower_()) { + if (OB_FAIL(dup_table_before_preapre_(log_ts, true/*before_replay*/))) { + TRANS_LOG(WARN, "set commit_info scn as befre_prepare_version failed", K(ret), KPC(log_cb), + KPC(this)); + } else if (OB_FAIL(clear_dup_table_redo_sync_result_())) { + TRANS_LOG(WARN, "clear redo sync result failed", K(ret)); + } + TRANS_LOG(INFO, "need set before_prepare_version in on_success after switch_to_follower", + K(ret), KPC(log_cb)); + } else { + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(dup_table_tx_redo_sync_())) { + if (OB_EAGAIN != ret) { + TRANS_LOG(WARN, "dup table redo sync error, need retry in trans_timer", K(ret), K(trans_id_), K(ls_id_)); + ret = OB_SUCCESS; + } else { + ret = OB_SUCCESS; + } + } + } } if (is_sub2pc()) { if (OB_FAIL(ret)) { @@ -2203,6 +2245,15 @@ int ObPartTransCtx::on_failure(ObTxLogCb *log_cb) log_cb->set_tx_data(nullptr); } } + if (ObTxLogType::TX_PREPARE_LOG == log_type) { + if (!exec_info_.is_dup_tx_) { + // do nothing + } else if (OB_FAIL(dup_table_before_preapre_(exec_info_.max_applied_log_ts_, true/*before_replay*/))) { + TRANS_LOG(WARN, "set commit_info scn as befre_prepare_version failed", K(ret), KPC(this)); + } else if (OB_FAIL(clear_dup_table_redo_sync_result_())) { + TRANS_LOG(WARN, "clear redo sync result failed", K(ret)); + } + } return_log_cb_(log_cb); log_cb = NULL; if (ObTxLogType::TX_COMMIT_INFO_LOG == log_type) { @@ -2311,12 +2362,12 @@ int ObPartTransCtx::generate_prepare_version_() { int ret = OB_SUCCESS; - if (!mt_ctx_.is_prepared()) { + if (!mt_ctx_.is_prepared() || !exec_info_.prepare_version_.is_valid()) { SCN gts = SCN::min_scn(); SCN local_max_read_version = SCN::min_scn(); bool is_gts_ok = false; // Only the root participant require to request gts - const bool need_gts = is_root(); + const bool need_gts = is_root() || exec_info_.is_dup_tx_; if (need_gts) { if (OB_FAIL(get_gts_(gts))) { @@ -2343,6 +2394,27 @@ int ObPartTransCtx::generate_prepare_version_() mt_ctx_.before_prepare(gts); if (OB_FAIL(get_local_max_read_version_(local_max_read_version))) { TRANS_LOG(WARN, "get local max read version failed", KR(ret), K(*this)); + } else if(exec_info_.is_dup_tx_) { + if (!dup_table_follower_max_read_version_.is_valid()) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "invalid dup_table_follower_max_read_version_", K(ret), + K(dup_table_follower_max_read_version_), KPC(this)); + } else { + exec_info_.prepare_version_ = SCN::max(gts, local_max_read_version); + exec_info_.prepare_version_ = + SCN::max(exec_info_.prepare_version_, dup_table_follower_max_read_version_); + TRANS_LOG(INFO, + "generate prepare version for dup table trx", + K(exec_info_.prepare_version_), + K(gts), + K(local_max_read_version), + K(dup_table_follower_max_read_version_), + K(trans_id_), + K(ls_id_)); + } + if (exec_info_.prepare_version_ > gts) { + mt_ctx_.before_prepare(exec_info_.prepare_version_); + } } else { // should not overwrite the prepare version of other participants exec_info_.prepare_version_ = SCN::max(SCN::max(gts, local_max_read_version), @@ -2624,6 +2696,8 @@ int ObPartTransCtx::submit_redo_commit_info_log_() // state log already submitted, do nothing } else if (OB_FAIL(log_block.init(replay_hint, log_block_header))) { TRANS_LOG(WARN, "init log block failed", KR(ret), K(*this)); + } else if (OB_FAIL(submit_multi_data_source_(log_block))) { + TRANS_LOG(WARN, "submit multi source data failed", KR(ret), K(*this)); } else if (OB_FAIL(submit_redo_commit_info_log_(log_block, has_redo, helper))) { TRANS_LOG(WARN, "submit redo commit state log failed", KR(ret), K(*this)); } else if (OB_FAIL(prepare_log_cb_(!NEED_FINAL_CB, log_cb))) { @@ -2677,6 +2751,8 @@ int ObPartTransCtx::submit_redo_commit_info_log_(ObTxLogBlock &log_block, // state log already submitted, do nothing } else if (OB_FAIL(submit_redo_log_(log_block, has_redo, helper))) { TRANS_LOG(WARN, "submit redo log failed", KR(ret), K(*this)); + } else if (OB_FAIL(check_dup_trx_with_submitting_all_redo(log_block, helper))) { + TRANS_LOG(WARN, "check dup trx with submitting all redo failed", K(ret)); } else { ObTxCommitInfoLog commit_info_log( exec_info_.scheduler_, exec_info_.participants_, exec_info_.upstream_, @@ -2858,7 +2934,7 @@ int ObPartTransCtx::submit_prepare_log_() ret = OB_TRANS_KILLED; TRANS_LOG(WARN, "tx has been aborting, can not submit prepare log", K(ret)); } - + if (OB_SUCC(ret)) { if (OB_FAIL(log_block.init(replay_hint, log_block_header))) { TRANS_LOG(WARN, "init log block failed", KR(ret), K(*this)); @@ -2874,6 +2950,26 @@ int ObPartTransCtx::submit_prepare_log_() } } + + if (OB_SUCC(ret)) { + if (exec_info_.is_dup_tx_ && !is_dup_table_redo_sync_completed_()) { + if (OB_FAIL(submit_pending_log_block_(log_block, helper))) { + TRANS_LOG(WARN, "submit pending log block failed", K(ret)); + } else { + ret = OB_EAGAIN; + TRANS_LOG(INFO, "need wait redo sync finish for a dup table trx", K(ret), K(trans_id_), + K(ls_id_)); + } + } + } + + + if (OB_SUCC(ret)) { + if (OB_FAIL(errism_submit_prepare_log_())) { + TRANS_LOG(WARN, "errsim for submit prepare log", K(ret), KPC(this)); + } + } + if (OB_SUCC(ret)) { ObTxLogCb *log_cb = NULL; @@ -3023,6 +3119,18 @@ int ObPartTransCtx::submit_commit_log_() } } + if (OB_SUCC(ret)) { + if (exec_info_.is_dup_tx_ && !is_dup_table_redo_sync_completed_()) { + if (OB_FAIL(submit_pending_log_block_(log_block, helper))) { + TRANS_LOG(WARN, "submit pending log block failed", K(ret)); + } else { + ret = OB_EAGAIN; + TRANS_LOG(INFO, "need wait redo sync finish for a dup table trx", K(ret), K(trans_id_), + K(ls_id_)); + } + } + } + if (OB_SUCC(ret)) { const uint64_t checksum = (exec_info_.need_checksum_ && !is_incomplete_replay_ctx_ ? mt_ctx_.calc_checksum_all() : 0); @@ -4430,7 +4538,7 @@ int ObPartTransCtx::replay_active_info(const ObTxActiveInfoLog &log, } else { exec_info_.trans_type_ = log.get_trans_type(); if (log.is_dup_tx()) { - set_dup_table_tx(); + set_dup_table_tx_(); } trans_expired_time_ = log.get_tx_expired_time(); session_id_ = log.get_session_id(); @@ -4494,6 +4602,9 @@ int ObPartTransCtx::replay_commit_info(const ObTxCommitInfoLog &commit_info_log, } else if (OB_FAIL(set_app_trace_id_(commit_info_log.get_app_trace_id()))) { TRANS_LOG(WARN, "set app trace id error", K(ret), K(commit_info_log), K(*this)); } else { + if (OB_SUCC(ret) && commit_info_log.is_dup_tx()) { + set_dup_table_tx_(); + } // NOTE that set xa variables before set trans type set_2pc_upstream_(commit_info_log.get_upstream()); exec_info_.xid_ = commit_info_log.get_xid(); @@ -4508,11 +4619,6 @@ int ObPartTransCtx::replay_commit_info(const ObTxCommitInfoLog &commit_info_log, exec_info_.trans_type_ = TransType::SP_TRANS; } - if (commit_info_log.is_dup_tx()) { - set_dup_table_tx(); - mt_ctx_.before_prepare(timestamp); - } - if (!is_local_tx_() && !commit_info_log.get_upstream().is_valid()) { set_2pc_upstream_(ls_id_); TRANS_LOG(INFO, "set upstream to self", K(*this), K(commit_info_log)); @@ -4522,16 +4628,29 @@ int ObPartTransCtx::replay_commit_info(const ObTxCommitInfoLog &commit_info_log, sub_state_.set_info_log_submitted(); reset_redo_lsns_(); ObTwoPhaseCommitLogType two_phase_log_type = ObTwoPhaseCommitLogType::OB_LOG_TX_MAX; - if (is_incomplete_replay_ctx_) { - // incomplete replay ctx will exiting by replay commit/abort/clear, no need to depend on 2PC + if (OB_FAIL(ret)) { + // do nothing } else if (is_local_tx_()) { set_durable_state_(ObTxState::REDO_COMPLETE); set_upstream_state(ObTxState::REDO_COMPLETE); + } else if (is_incomplete_replay_ctx_) { + set_durable_state_(ObTxState::REDO_COMPLETE); + set_upstream_state(ObTxState::REDO_COMPLETE); + // incomplete replay ctx will exiting by replay commit/abort/clear, no need to depend on 2PC } else if (OB_FAIL(switch_log_type_(commit_info_log.LOG_TYPE, two_phase_log_type))) { TRANS_LOG(WARN, "switch log type failed", KR(ret), KPC(this)); } else if (OB_FAIL(ObTxCycleTwoPhaseCommitter::replay_log(two_phase_log_type))) { TRANS_LOG(WARN, "replay_log failed", KR(ret), KPC(this)); } + + if (OB_SUCC(ret) && commit_info_log.is_dup_tx()) { + // 1. the tx ctx must be committed or aborted. + // 2. a new lease log ts must be larger than commit log ts. + //=> no need set before_preapre version for incomplete replay + if (OB_FAIL(dup_table_before_preapre_(timestamp))) { + TRANS_LOG(WARN, "set commit_info scn as before_prepare_version failed", K(ret), KPC(this)); + } + } } if (OB_FAIL(ret)) { @@ -4591,6 +4710,8 @@ int ObPartTransCtx::replay_prepare(const ObTxPrepareLog &prepare_log, mt_ctx_.set_prepare_version(timestamp); ObTwoPhaseCommitLogType two_phase_log_type = ObTwoPhaseCommitLogType::OB_LOG_TX_MAX; if (is_incomplete_replay_ctx_) { + set_durable_state_(ObTxState::PREPARE); + set_upstream_state(ObTxState::PREPARE); // incomplete replay ctx will exiting by replay commit/abort/clear, no need to depend on 2PC } else if (OB_FAIL(switch_log_type_(prepare_log.LOG_TYPE, two_phase_log_type))) { TRANS_LOG(WARN, "switch log type failed", KR(ret), KPC(this)); @@ -4688,6 +4809,8 @@ int ObPartTransCtx::replay_commit(const ObTxCommitLog &commit_log, } else { ObTwoPhaseCommitLogType two_phase_log_type = ObTwoPhaseCommitLogType::OB_LOG_TX_MAX; if (is_incomplete_replay_ctx_) { + set_durable_state_(ObTxState::COMMIT); + set_upstream_state(ObTxState::COMMIT); // incomplete replay ctx will exiting by replay commit/abort/clear, no need to depend on // 2PC } else if (OB_FAIL(switch_log_type_(commit_log.LOG_TYPE, two_phase_log_type))) { @@ -4791,6 +4914,8 @@ int ObPartTransCtx::replay_clear(const ObTxClearLog &clear_log, } else { ObTwoPhaseCommitLogType two_phase_log_type = ObTwoPhaseCommitLogType::OB_LOG_TX_MAX; if (is_incomplete_replay_ctx_) { + set_durable_state_(ObTxState::CLEAR); + set_upstream_state(ObTxState::CLEAR); // incomplete replay ctx will exiting by replay commit/abort/clear, no need to depend on 2PC } else if (OB_FAIL(switch_log_type_(clear_log.LOG_TYPE, two_phase_log_type))) { TRANS_LOG(WARN, "switch log type failed", KR(ret), KPC(this)); @@ -4875,6 +5000,8 @@ int ObPartTransCtx::replay_abort(const ObTxAbortLog &abort_log, } else { ObTwoPhaseCommitLogType two_phase_log_type = ObTwoPhaseCommitLogType::OB_LOG_TX_MAX; if (is_incomplete_replay_ctx_) { + set_durable_state_(ObTxState::ABORT); + set_upstream_state(ObTxState::ABORT); // incomplete replay ctx will exiting by replay commit/abort/clear, no need to depend on 2PC } else if (OB_FAIL(switch_log_type_(abort_log.LOG_TYPE, two_phase_log_type))) { TRANS_LOG(WARN, "switch log type failed", KR(ret), KPC(this)); @@ -5229,6 +5356,17 @@ int ObPartTransCtx::switch_to_follower_forcedly(ObIArray &cb TRANS_LOG(WARN, "clear unlog callbacks", KR(ret), K(*this)); } + if (OB_SUCC(ret) && exec_info_.is_dup_tx_ && get_downstream_state() == ObTxState::REDO_COMPLETE + && !sub_state_.is_state_log_submitted()) { + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(dup_table_before_preapre_(exec_info_.max_applied_log_ts_, true/*before_replay*/))) { + TRANS_LOG(WARN, "set commit_info scn as befre_prepare_version failed", K(ret), KPC(this)); + } else if (OB_FAIL(clear_dup_table_redo_sync_result_())) { + TRANS_LOG(WARN, "clear redo sync result failed", K(ret)); + } + } + // special handle commit triggered by local call: coordinator colocate with scheduler // let scheduler retry commit with RPC if required if (need_callback_scheduler_()) { @@ -5345,6 +5483,17 @@ int ObPartTransCtx::switch_to_follower_gracefully(ObIArray & } } + timeguard.click(); + if (exec_info_.is_dup_tx_ && get_downstream_state() == ObTxState::REDO_COMPLETE + && !sub_state_.is_state_log_submitted()) { + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(dup_table_before_preapre_(exec_info_.max_applied_log_ts_, true/*before_replay*/))) { + TRANS_LOG(WARN, "set commit_info scn as befre_prepare_version failed", K(ret), KPC(this)); + } else if (OB_FAIL(clear_dup_table_redo_sync_result_())) { + TRANS_LOG(WARN, "clear redo sync result failed", K(ret)); + } + } timeguard.click(); if (OB_FAIL(ret)) { state_helper.restore_state(); @@ -5839,6 +5988,34 @@ int ObPartTransCtx::prepare_mul_data_source_tx_end_(bool is_commit) return ret; } +#ifdef ERRSIM +ERRSIM_POINT_DEF(EN_DUP_TABLE_REDO_SYNC) +ERRSIM_POINT_DEF(EN_SUBMIT_TX_PREPARE_LOG) +#endif + +int ObPartTransCtx::errism_dup_table_redo_sync_() +{ + + int ret = OB_SUCCESS; + +#ifdef ERRSIM + ret = EN_DUP_TABLE_REDO_SYNC; +#endif + return ret; +} + +int ObPartTransCtx::errism_submit_prepare_log_() +{ + + int ret = OB_SUCCESS; + +#ifdef ERRSIM + ret = EN_SUBMIT_TX_PREPARE_LOG; +#endif + + return ret; +} + int ObPartTransCtx::notify_table_lock_(const SCN &log_ts, const bool for_replay, const ObTxBufferNodeArray ¬ify_array, @@ -6069,20 +6246,172 @@ int ObPartTransCtx::del_retain_ctx() int ObPartTransCtx::search_unsubmitted_dup_table_redo_() { - return mt_ctx_.get_redo_generator().search_unsubmitted_dup_tablet_redo(); + int ret = OB_SUCCESS; + + if (ls_tx_ctx_mgr_->get_ls_log_adapter()->has_dup_tablet()) { + if (OB_FAIL(submit_log_impl_(ObTxLogType::TX_COMMIT_INFO_LOG))) { + TRANS_LOG(WARN, "submit commit info log failed", K(ret), KPC(this)); + // } else if (OB_FAIL(check_tablet_modify_record_())) { + // TRANS_LOG(WARN, "check the modify tablet failed", K(ret)); + } + } + return ret; + // return mt_ctx_.get_redo_generator().search_unsubmitted_dup_tablet_redo(); } int ObPartTransCtx::dup_table_tx_redo_sync_() { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; - // init state => submit commit info log - // redo_complete state => validate reod sync (follower replay ts) - // redo sync finish -> submit prepare log + bool redo_sync_finish = false; + share::SCN tmp_max_read_version; + tmp_max_read_version.set_invalid(); + + if (busy_cbs_.get_size() > 0 && get_downstream_state() < ObTxState::REDO_COMPLETE) { + ret = OB_EAGAIN; + TRANS_LOG(INFO, "start redo sync after the on_success of commit info log ", K(ret), KPC(this)); + } else if (get_downstream_state() != ObTxState::REDO_COMPLETE || !exec_info_.is_dup_tx_) { + ret = OB_STATE_NOT_MATCH; + TRANS_LOG(WARN, "invalid dup trx state", K(ret), KPC(this)); + } else if (is_2pc_logging_()) { + ret = OB_EAGAIN; + TRANS_LOG(WARN, "the dup table participant is 2pc logging, need retry", K(ret), KPC(this)); + } else if (is_follower_()) { + ret = OB_NOT_MASTER; + TRANS_LOG(WARN, "can not execute redo sync on a follower", KPC(this)); + } else if (OB_FAIL(errism_dup_table_redo_sync_())) { + TRANS_LOG(WARN, "errsim for dup table redo sync", K(ret), KPC(this)); + } else if (is_dup_table_redo_sync_completed_()) { + ret = OB_SUCCESS; + redo_sync_finish = true; + bool no_need_submit_log = false; + if (OB_TMP_FAIL(drive_self_2pc_phase(ObTxState::PREPARE))) { + TRANS_LOG(WARN, "do prepare failed after redo sync", K(tmp_ret), KPC(this)); + } else { + } + } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->check_redo_sync_completed( + trans_id_, exec_info_.max_applied_log_ts_, redo_sync_finish, + tmp_max_read_version))) { + TRANS_LOG(WARN, "check redo sync completed failed", K(ret), K(redo_sync_finish), + K(tmp_max_read_version), KPC(this)); + } else if (redo_sync_finish) { + if (!tmp_max_read_version.is_valid()) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "invalid dup table follower's max_read_version", K(ret), + K(tmp_max_read_version), KPC(this)); + } else { + dup_table_follower_max_read_version_ = tmp_max_read_version; + /* + * drive into prepare state in the next do_prepare operation + * */ + TRANS_LOG(INFO, "finish redo sync for dup table trx", K(ret), K(redo_sync_finish), + K(dup_table_follower_max_read_version_), KPC(this)); + } + } else { + ret = OB_EAGAIN; + TRANS_LOG(INFO, "redo sync need retry", K(ret), K(redo_sync_finish), K(tmp_max_read_version), + K(dup_table_follower_max_read_version_), KPC(this)); + } return ret; } +int ObPartTransCtx::submit_pending_log_block_(ObTxLogBlock &log_block, + memtable::ObRedoLogSubmitHelper &helper) +{ + int ret = OB_SUCCESS; + + if (log_block.get_cb_arg_array().empty()) { + TRANS_LOG(INFO, "no need to submit pending log block because of empty", K(ret), K(trans_id_), + K(ls_id_), K(log_block)); + } else { + bool need_final_cb = false; + if (is_contain(log_block.get_cb_arg_array(), ObTxLogType::TX_COMMIT_LOG) + || is_contain(log_block.get_cb_arg_array(), ObTxLogType::TX_COMMIT_LOG)) { + need_final_cb = true; + } + + ObTxLogCb *log_cb = NULL; + if (OB_FAIL(prepare_log_cb_(need_final_cb, log_cb))) { + if (OB_UNLIKELY(OB_TX_NOLOGCB != ret)) { + TRANS_LOG(WARN, "get log cb failed", KR(ret), K(*this)); + } + } else if (log_block.get_cb_arg_array().count() == 0) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(ERROR, "cb arg array is empty", K(ret), K(log_block)); + return_log_cb_(log_cb); + log_cb = NULL; + } else if (OB_FAIL(acquire_ctx_ref_())) { + TRANS_LOG(ERROR, "acquire ctx ref failed", KR(ret), K(*this)); + } else if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->submit_log( + log_block.get_buf(), log_block.get_size(), share::SCN::min_scn(), log_cb, + false))) { + TRANS_LOG(WARN, "submit log to clog adapter failed", KR(ret), K(*this)); + return_log_cb_(log_cb); + log_cb = NULL; + release_ctx_ref_(); + } else if (OB_FAIL(after_submit_log_(log_block, log_cb, &helper))) { + } else { + // TRANS_LOG(INFO, "submit prepare log in clog adapter success", K(*log_cb)); + log_cb = NULL; + } + } + + return ret; +} + +int ObPartTransCtx::check_dup_trx_with_submitting_all_redo(ObTxLogBlock &log_block, + memtable::ObRedoLogSubmitHelper &helper) +{ + int ret = OB_SUCCESS; + + // 1. submit all redo in dup ls + // 2. check modified tablet + // 3. if not dup_tx, do nothing + // 4. if dup_tx: + // a. set is_dup_tx and upstream + // b. submit commit info and start dup_table_redo_sync in on_success + // c. return OB_EAGAIN + if (ls_tx_ctx_mgr_->get_ls_log_adapter()->has_dup_tablet()) { + if (!sub_state_.is_info_log_submitted() && get_downstream_state() < ObTxState::REDO_COMPLETE) { + + ret = submit_pending_log_block_(log_block, helper); + + TRANS_LOG(INFO, "submit all redo log for dup table check", K(ret), + K(exec_info_.tablet_modify_record_.count()), KPC(this)); + if (OB_FAIL(ret)) { + // do nothing + } else if (OB_FAIL(check_tablet_modify_record_())) { + TRANS_LOG(WARN, "check the modify tablet failed", K(ret)); + } + } + + } else { + // do nothing + } + + if (OB_SUCC(ret) && exec_info_.is_dup_tx_) { + + if (exec_info_.participants_.count() == 1 && !exec_info_.upstream_.is_valid()) { + set_2pc_upstream_(ls_id_); + } + } + + return ret; +} + +bool ObPartTransCtx::is_dup_table_redo_sync_completed_() +{ + bool redo_sync_completed = true; + + if (exec_info_.state_ <= ObTxState::REDO_COMPLETE) { + redo_sync_completed = (dup_table_follower_max_read_version_.is_valid()); + } + + return redo_sync_completed; +} + int ObPartTransCtx::dup_table_tx_pre_commit_() { int ret = OB_SUCCESS; @@ -6093,6 +6422,138 @@ int ObPartTransCtx::dup_table_tx_pre_commit_() return ret; } +int ObPartTransCtx::merge_tablet_modify_record_(const common::ObTabletID &tablet_id) +{ + int ret = OB_SUCCESS; + + if (exec_info_.tablet_modify_record_.count() >= MAX_TABLET_MODIFY_RECORD_COUNT) { + // do nothing + } else { + bool is_contain = false; + for (int i = 0; i < exec_info_.tablet_modify_record_.count(); i++) { + if (exec_info_.tablet_modify_record_[i] == tablet_id) { + is_contain = true; + } + } + if (!is_contain && OB_FAIL(exec_info_.tablet_modify_record_.push_back(tablet_id))) { + TRANS_LOG(WARN, "push back tablet id failed", K(ret), K(tablet_id), + K(exec_info_.tablet_modify_record_)); + } + } + + return ret; +} + +int ObPartTransCtx::check_tablet_modify_record_() +{ + int ret = OB_SUCCESS; + + if (!exec_info_.is_dup_tx_) { + bool has_dup_tablet = false; + if (!ls_tx_ctx_mgr_->get_ls_log_adapter()->has_dup_tablet()) { + has_dup_tablet = false; + TRANS_LOG(INFO, "no dup tablet in this ls", K(has_dup_tablet), K(trans_id_), K(ls_id_)); + } else if (exec_info_.tablet_modify_record_.count() >= MAX_TABLET_MODIFY_RECORD_COUNT) { + has_dup_tablet = true; + TRANS_LOG(INFO, "too much tablet, consider it as a dup trx", K(ret), K(has_dup_tablet), + K(exec_info_.tablet_modify_record_), KPC(this)); + } else { + has_dup_tablet = false; + for (int i = 0; i < exec_info_.tablet_modify_record_.count(); i++) { + + if (OB_FAIL(ls_tx_ctx_mgr_->get_ls_log_adapter()->check_dup_tablet_in_redo( + exec_info_.tablet_modify_record_[i], has_dup_tablet, share::SCN::min_scn(), + share::SCN::max_scn()))) { + TRANS_LOG(WARN, "check dup tablet failed", K(ret), K(trans_id_), K(ls_id_), + K(exec_info_.tablet_modify_record_[i])); + } else if (has_dup_tablet) { + TRANS_LOG(INFO, "modify a dup tablet, consider it as a dup trx", K(ret), + K(has_dup_tablet), K(exec_info_.tablet_modify_record_[i]), + K(exec_info_.tablet_modify_record_.count()), KPC(this)); + break; + } + } + } + if (has_dup_tablet) { + set_dup_table_tx_(); + } + } + + return ret; +} + +int ObPartTransCtx::clear_dup_table_redo_sync_result_() +{ + int ret = OB_SUCCESS; + + dup_table_follower_max_read_version_.set_invalid(); + + return ret; +} + +int ObPartTransCtx::dup_table_before_preapre_(const share::SCN &before_prepare_version, + const bool before_replay) +{ + int ret = OB_SUCCESS; + + if (get_downstream_state() != ObTxState::REDO_COMPLETE + || (!before_replay && get_upstream_state() != ObTxState::REDO_COMPLETE) + || (before_replay && get_upstream_state() > ObTxState::PREPARE) || !exec_info_.is_dup_tx_) { + ret = OB_STATE_NOT_MATCH; + TRANS_LOG(WARN, "unexpected dup trx state", K(ret), KPC(this)); + } else if (!before_prepare_version.is_valid()) { + ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "invalid before_prepare version", K(ret), K(before_prepare_version), + K(before_replay), KPC(this)); + } else if (mt_ctx_.get_trans_version().is_max() || !mt_ctx_.get_trans_version().is_valid() + || mt_ctx_.get_trans_version() < before_prepare_version) { + mt_ctx_.before_prepare(before_prepare_version); + } + + if (OB_SUCC(ret)) { + DUP_TABLE_LOG(INFO, "set dup_table before prepare version successfully", K(ret), + K(before_prepare_version), K(before_replay), KPC(this)); + } + + return ret; +} + +int ObPartTransCtx::retry_dup_trx_before_prepare(const share::SCN &before_prepare_version) +{ + int ret = OB_SUCCESS; + + CtxLockGuard guard(lock_); + + if (!is_follower_()) { + ret = OB_NOT_FOLLOWER; + TRANS_LOG(WARN, "leader need not handle a before_prepare retry request", K(ret), + K(before_prepare_version), K(ls_id_), K(trans_id_)); + } else if (OB_FAIL(dup_table_before_preapre_(before_prepare_version))) { + TRANS_LOG(WARN, "set dup table before_prepare_version failed", K(ret), + K(before_prepare_version), KPC(this)); + } + + return ret; +} + +int ObPartTransCtx::merge_tablet_modify_record(const common::ObTabletID &tablet_id) +{ + int ret = OB_SUCCESS; + + CtxLockGuard guard(lock_); + + if (is_exiting_) { + ret = OB_TRANS_CTX_NOT_EXIST; + TRANS_LOG(WARN, "exiting part_ctx", K(ret), K(tablet_id), KPC(this)); + } else if (!is_follower_()) { + ret = OB_NOT_FOLLOWER; + TRANS_LOG(WARN, "can not invoke on leader", K(ret), K(tablet_id), KPC(this)); + } else if (OB_FAIL(merge_tablet_modify_record_(tablet_id))) { + TRANS_LOG(WARN, "merge tablet modify record failed", K(ret)); + } + return ret; +} + int ObPartTransCtx::sub_prepare(const ObLSArray &parts, const MonotonicTs &commit_time, const int64_t &expire_ts, diff --git a/src/storage/tx/ob_trans_part_ctx.h b/src/storage/tx/ob_trans_part_ctx.h index 07b4e952b1..e5a7ddd1ff 100644 --- a/src/storage/tx/ob_trans_part_ctx.h +++ b/src/storage/tx/ob_trans_part_ctx.h @@ -95,6 +95,7 @@ class ObPartTransCtx : public ObTransCtx, friend class MockObTxCtx; friend class ObTxELRHandler; friend class ObIRetainCtxCheckFunctor; + friend class memtable::ObRedoLogGenerator; public: ObPartTransCtx() : ObTransCtx("participant", ObTransCtxType::PARTICIPANT), ObTsCbTask(), @@ -289,11 +290,8 @@ public: // ======================================================== // newly added for 4.0 - void set_dup_table_tx() - { - exec_info_.is_dup_tx_ = true; - exec_info_.trans_type_ = TransType::DIST_TRANS; - } + int retry_dup_trx_before_prepare(const share::SCN &before_prepare_version); + int merge_tablet_modify_record(const common::ObTabletID &tablet_id); int set_scheduler(const common::ObAddr &scheduler); const common::ObAddr &get_scheduler() const; int on_success(ObTxLogCb *log_cb); @@ -435,6 +433,8 @@ private: bool &has_redo, memtable::ObRedoLogSubmitHelper &helper); + int submit_pending_log_block_(ObTxLogBlock &log_block, memtable::ObRedoLogSubmitHelper &helper); + int submit_big_segment_log_(); int prepare_big_segment_submit_(ObTxLogCb *segment_cb, const share::SCN &base_scn, @@ -467,9 +467,21 @@ private: // and is callbacked via on_failure, redo lsns should be fixed int fix_redo_lsns_(const ObTxLogCb *log_cb); - int search_unsubmitted_dup_table_redo_(); + int search_unsubmitted_dup_table_redo_() __attribute__((__noinline__)); int dup_table_tx_redo_sync_(); + int check_dup_trx_with_submitting_all_redo(ObTxLogBlock &log_block, + memtable::ObRedoLogSubmitHelper &helper); + bool is_dup_table_redo_sync_completed_(); int dup_table_tx_pre_commit_(); + int merge_tablet_modify_record_(const common::ObTabletID &tablet_id); + int check_tablet_modify_record_(); + void set_dup_table_tx_() + { + exec_info_.is_dup_tx_ = true; + exec_info_.trans_type_ = TransType::DIST_TRANS; + } + int dup_table_before_preapre_(const share::SCN &before_prepare_version, const bool before_replay = false); + int clear_dup_table_redo_sync_result_(); int do_local_tx_end_(TxEndAction tx_end_action); // int on_local_tx_end_(TxEndAction tx_end_action); @@ -533,6 +545,8 @@ private: int prepare_mul_data_source_tx_end_(bool is_commit); + int errism_dup_table_redo_sync_(); + int errism_submit_prepare_log_(); protected: virtual int get_gts_(share::SCN >s); virtual int wait_gts_elapse_commit_version_(bool &need_wait); @@ -818,6 +832,8 @@ private: share::SCN start_working_log_ts_; + share::SCN dup_table_follower_max_read_version_; + int16_t retain_cause_; ObTxState upstream_state_; diff --git a/src/storage/tx/ob_trans_service.cpp b/src/storage/tx/ob_trans_service.cpp index 6c9673a0f6..c3accf9c14 100644 --- a/src/storage/tx/ob_trans_service.cpp +++ b/src/storage/tx/ob_trans_service.cpp @@ -84,6 +84,8 @@ int ObTransService::mtl_init(ObTransService *&it) TRANS_LOG(ERROR, "rpc init error", KR(ret)); } else if (OB_FAIL(it->dup_table_rpc_def_.init(it, req_transport, self))) { TRANS_LOG(ERROR, "dup table rpc init error", KR(ret)); + } else if (OB_FAIL(it->dup_table_rpc_impl_.init(req_transport,self))) { + TRANS_LOG(ERROR, "dup table rpc init error", KR(ret)); } else if (OB_FAIL(it->location_adapter_def_.init(schema_service, location_service))) { TRANS_LOG(ERROR, "location adapter init error", KR(ret)); } else if (OB_FAIL(it->gti_source_def_.init(self, req_transport))) { @@ -138,8 +140,8 @@ int ObTransService::init(const ObAddr &self, ret = OB_INVALID_ARGUMENT; } else if (OB_FAIL(timer_.init("TransTimeWheel"))) { TRANS_LOG(ERROR, "timer init error", KR(ret)); - } else if (OB_FAIL(dup_table_lease_timer_.init())) { - TRANS_LOG(ERROR, "dup table lease timer init error", K(ret)); + } else if (OB_FAIL(dup_table_scan_timer_.init())) { + TRANS_LOG(ERROR, "dup table scan timer init error", K(ret)); } else if (OB_FAIL(ObSimpleThreadPool::init(2, msg_task_cnt, "TransService", tenant_id))) { TRANS_LOG(WARN, "thread pool init error", KR(ret)); } else if (OB_FAIL(tx_desc_mgr_.init(std::bind(&ObTransService::gen_trans_id_, @@ -148,6 +150,12 @@ int ObTransService::init(const ObAddr &self, TRANS_LOG(WARN, "ObTxDescMgr init error", K(ret)); } else if (OB_FAIL(tx_ctx_mgr_.init(tenant_id, ts_mgr, this))) { TRANS_LOG(WARN, "tx_ctx_mgr_ init error", KR(ret)); + } else if (OB_FAIL(dup_table_loop_worker_.init())) { + TRANS_LOG(WARN, "init dup table loop worker failed", K(ret)); + } else if (OB_FAIL(dup_tablet_scan_task_.make(tenant_id, + &dup_table_scan_timer_, + &dup_table_loop_worker_))) { + TRANS_LOG(WARN, "init dup_tablet_scan_task_ failed",K(ret)); } else { self_ = self; tenant_id_ = tenant_id; @@ -199,12 +207,16 @@ int ObTransService::start() ret = OB_ERR_UNEXPECTED; } else if (OB_FAIL(timer_.start())) { TRANS_LOG(WARN, "ObTransTimer start error", K(ret)); - } else if (OB_FAIL(dup_table_lease_timer_.start())) { - TRANS_LOG(ERROR, "dup table lease timer start error", K(ret)); + } else if (OB_FAIL(dup_table_scan_timer_.start())) { + TRANS_LOG(WARN, "dup_table_scan_timer_ start error", K(ret)); + } else if (OB_FAIL(dup_table_scan_timer_.register_timeout_task( + dup_tablet_scan_task_, + ObDupTabletScanTask::DUP_TABLET_SCAN_INTERVAL))) { + TRANS_LOG(WARN, "register dup table scan task error", K(ret)); } else if (OB_FAIL(rpc_->start())) { TRANS_LOG(WARN, "ObTransRpc start error", KR(ret)); - } else if (OB_FAIL(dup_table_rpc_->start())) { - TRANS_LOG(WARN, "ObDupTableRpc start error", K(ret)); + // } else if (OB_FAIL(dup_table_rpc_->start())) { + // TRANS_LOG(WARN, "ObDupTableRpc start error", K(ret)); } else if (OB_FAIL(gti_source_->start())) { TRANS_LOG(WARN, "ObGtiSource start error", KR(ret)); } else if (OB_FAIL(tx_ctx_mgr_.start())) { @@ -213,6 +225,7 @@ int ObTransService::start() TRANS_LOG(WARN, "tx_desc_mgr_ start error", KR(ret)); } else { is_running_ = true; + TRANS_LOG(INFO, "transaction service start success", KPC(this)); } @@ -235,12 +248,13 @@ void ObTransService::stop() TRANS_LOG(WARN, "tx_desc_mgr stop error", KR(ret)); } else if (OB_FAIL(timer_.stop())) { TRANS_LOG(WARN, "ObTransTimer stop error", K(ret)); - } else if (OB_FAIL(dup_table_lease_timer_.stop())) { - TRANS_LOG(ERROR, "dup table lease timer stop error", K(ret)); + } else if (OB_FAIL(dup_table_scan_timer_.stop())) { + TRANS_LOG(WARN, "dup_table_scan_timer_ stop error", K(ret)); } else { rpc_->stop(); dup_table_rpc_->stop(); gti_source_->stop(); + dup_table_loop_worker_.stop(); ObSimpleThreadPool::stop(); is_running_ = false; TRANS_LOG(INFO, "transaction service stop success", KPC(this)); @@ -263,12 +277,13 @@ int ObTransService::wait_() TRANS_LOG(WARN, "tx_desc_mgr_ wait error", KR(ret)); } else if (OB_FAIL(timer_.wait())) { TRANS_LOG(WARN, "ObTransTimer wait error", K(ret)); - } else if (OB_FAIL(dup_table_lease_timer_.wait())) { - TRANS_LOG(ERROR, "dup table lease timer wait error", K(ret)); + } else if (OB_FAIL(dup_table_scan_timer_.wait())) { + TRANS_LOG(WARN, "dup_table_scan_timer_ wait error", K(ret)); } else { rpc_->wait(); - dup_table_rpc_->wait(); + // dup_table_rpc_->wait(); gti_source_->wait(); + dup_table_loop_worker_.wait(); TRANS_LOG(INFO, "transaction service wait success", KPC(this)); } return ret; @@ -282,7 +297,9 @@ void ObTransService::destroy() wait(); } timer_.destroy(); - dup_table_lease_timer_.destroy(); + dup_table_scan_timer_.destroy(); + dup_tablet_scan_task_.destroy(); + dup_table_loop_worker_.destroy(); if (use_def_) { rpc_->destroy(); location_adapter_->destroy(); diff --git a/src/storage/tx/ob_trans_service.h b/src/storage/tx/ob_trans_service.h index 134a96d11e..c2f9488fbd 100644 --- a/src/storage/tx/ob_trans_service.h +++ b/src/storage/tx/ob_trans_service.h @@ -29,6 +29,7 @@ #include "ob_trans_rpc.h" #include "ob_trans_ctx_mgr.h" #include "ob_dup_table_rpc.h" +#include "ob_dup_table_base.h" #include "ob_trans_memory_stat.h" #include "ob_trans_event.h" #include "ob_dup_table.h" @@ -40,6 +41,7 @@ #include "observer/ob_server_struct.h" #include "common/storage/ob_sequence.h" #include "ob_tx_elr_util.h" +#include "storage/tx/ob_dup_table_util.h" #include "ob_tx_free_route.h" #include "ob_tx_free_route_msg.h" @@ -180,6 +182,8 @@ public: ObTransTimer &get_trans_timer() { return timer_; } ObITransRpc *get_trans_rpc() { return rpc_; } ObIDupTableRpc *get_dup_table_rpc() { return dup_table_rpc_; } + ObDupTableRpc &get_dup_table_rpc_impl() { return dup_table_rpc_impl_; } + ObDupTableLoopWorker &get_dup_table_loop_worker() { return dup_table_loop_worker_; } ObILocationAdapter *get_location_adapter() { return location_adapter_; } common::ObMySQLProxy *get_mysql_proxy() { return GCTX.sql_proxy_; } bool is_running() const { return is_running_; } @@ -254,14 +258,13 @@ protected: ObLocationAdapter location_adapter_def_; // transaction timer ObTransTimer timer_; - // dup table lease timer - ObDupTableLeaseTimer dup_table_lease_timer_; + ObDupTableLeaseTimer dup_table_scan_timer_; ObTxVersionMgr tx_version_mgr_; protected: bool use_def_; ObITransRpc *rpc_; ObIDupTableRpc *dup_table_rpc_; - ObDupTableRpc dup_table_rpc_def_; + ObDupTableRpc_old dup_table_rpc_def_; // the adapter between transaction and location cache ObILocationAdapter *location_adapter_; // the adapter between transaction and clog @@ -279,6 +282,11 @@ private: // txDesc's manager ObTxDescMgr tx_desc_mgr_; + //4.0 dup_table + ObDupTabletScanTask dup_tablet_scan_task_; + ObDupTableLoopWorker dup_table_loop_worker_; + ObDupTableRpc dup_table_rpc_impl_; + obrpc::ObSrvRpcProxy *rpc_proxy_; ObTxELRUtil elr_util_; private: diff --git a/src/storage/tx/ob_trans_service_v4.cpp b/src/storage/tx/ob_trans_service_v4.cpp index d2f9587c56..e17ecf6b82 100644 --- a/src/storage/tx/ob_trans_service_v4.cpp +++ b/src/storage/tx/ob_trans_service_v4.cpp @@ -477,6 +477,10 @@ int ObTransService::handle_tx_commit_result_(ObTxDesc &tx, max_delay = 300 * 1000; } + if (OB_EAGAIN == result) { + max_delay = 300 * 1000; + } + if (OB_FAIL(register_commit_retry_task_(tx, max_delay))) { commit_fin = true; state = ObTxDesc::State::ROLLED_BACK; @@ -983,6 +987,7 @@ int ObTransService::get_read_store_ctx(const ObTxReadSnapshot &snapshot, snapshot.source_, ls_id, store_ctx.timeout_, + store_ctx.tablet_id_, *store_ctx.ls_))) { TRANS_LOG(WARN, "replica not readable", K(ret), K(snapshot), K(ls_id), K(store_ctx)); } @@ -1348,12 +1353,17 @@ int ObTransService::check_replica_readable_(const SCN &snapshot, const ObTxReadSnapshot::SRC src, const share::ObLSID &ls_id, const int64_t expire_ts, + const ObTabletID &tablet_id, ObLS &ls) { int ret = OB_SUCCESS; bool leader = false; int64_t epoch = 0; + bool dup_table_readable = false; + share::SCN max_replayed_scn; + max_replayed_scn.reset(); bool readable = check_ls_readable_(ls, snapshot, src); + if (!readable) { if (OB_FAIL(ls.get_tx_svr()->get_tx_ls_log_adapter()->get_role(leader, epoch))) { TRANS_LOG(WARN, "get replica status fail", K(ls_id)); @@ -1363,6 +1373,27 @@ int ObTransService::check_replica_readable_(const SCN &snapshot, ObTxReadSnapshot::SRC::WEAK_READ_SERVICE == src) { // to compatible with SQL's retry-logic, trigger re-choose replica ret = OB_REPLICA_NOT_READABLE; + } else if (OB_FAIL(ls.get_max_decided_scn(max_replayed_scn))) { + TRANS_LOG(WARN, "get max decided scn failed", K(ret)); + } else if (OB_FAIL(ls.get_tx_svr()->get_tx_ls_log_adapter()->check_dup_tablet_readable( + tablet_id, + snapshot, + leader, + max_replayed_scn, + dup_table_readable))) { + TRANS_LOG(WARN, "check dup tablet readable error", K(ret)); + } else if (dup_table_readable) { + TRANS_LOG(INFO, + "the dup tablet is readable now", + K(ret), + K(tablet_id), + K(snapshot), + K(leader), + K(max_replayed_scn), + K(dup_table_readable), + K(ls_id), + K(expire_ts)); + ret = OB_SUCCESS; } else { if (OB_SUCC(wait_follower_readable_(ls, expire_ts, snapshot, src))) { TRANS_LOG(INFO, "read from follower", K(snapshot), K(snapshot), K(ls)); diff --git a/src/storage/tx/ob_trans_service_v4.h b/src/storage/tx/ob_trans_service_v4.h index 88516f52d7..def8f2f0ca 100644 --- a/src/storage/tx/ob_trans_service_v4.h +++ b/src/storage/tx/ob_trans_service_v4.h @@ -220,6 +220,7 @@ int check_replica_readable_(const share::SCN &snapshot, const ObTxReadSnapshot::SRC src, const share::ObLSID &ls_id, const int64_t expired_ts, + const ObTabletID &tablet_id, ObLS &ls); bool check_ls_readable_(ObLS &ls, const share::SCN &snapshot, @@ -272,7 +273,7 @@ int get_tx_state_from_tx_table_(const share::ObLSID &lsid, const ObTransID &tx_id, int &state, share::SCN &commit_version); -int gen_trans_id_(ObTransID &trans_id); +OB_NOINLINE int gen_trans_id_(ObTransID &trans_id); bool commit_need_retry_(const int ret); // for xa int build_tx_sub_prepare_msg_(const ObTxDesc &tx, ObTxSubPrepareMsg &msg); diff --git a/src/storage/tx/ob_trans_timer.cpp b/src/storage/tx/ob_trans_timer.cpp index c2f195a485..5b41a11539 100644 --- a/src/storage/tx/ob_trans_timer.cpp +++ b/src/storage/tx/ob_trans_timer.cpp @@ -330,7 +330,7 @@ int ObDupTableLeaseTimer::init() if (is_inited_) { TRANS_LOG(WARN, "ObDupTableLeaseTimer inited twice"); ret = OB_INIT_TWICE; - } else if (OB_FAIL(tw_.init(TRANS_TIMEOUT_TASK_PRECISION_US, 1, timer_name))) { + } else if (OB_FAIL(tw_.init(DUP_TABLE_TIMEOUT_TASK_PRECISION_US, 1, timer_name))) { TRANS_LOG(ERROR, "dup table lease timer init error", K(ret)); } else { TRANS_LOG(INFO, "dup table lease timer inited success"); diff --git a/src/storage/tx/ob_trans_timer.h b/src/storage/tx/ob_trans_timer.h index ea8e827144..199ee9ea26 100644 --- a/src/storage/tx/ob_trans_timer.h +++ b/src/storage/tx/ob_trans_timer.h @@ -143,6 +143,7 @@ public: virtual ~ObDupTableLeaseTimer() {} int init(); private: + static const int64_t DUP_TABLE_TIMEOUT_TASK_PRECISION_US = 3 * 1000 * 1000L; DISALLOW_COPY_AND_ASSIGN(ObDupTableLeaseTimer); }; diff --git a/src/storage/tx/ob_tx_2pc_ctx_impl.cpp b/src/storage/tx/ob_tx_2pc_ctx_impl.cpp index f24f7b6c1e..a599d13764 100644 --- a/src/storage/tx/ob_tx_2pc_ctx_impl.cpp +++ b/src/storage/tx/ob_tx_2pc_ctx_impl.cpp @@ -75,6 +75,11 @@ int ObPartTransCtx::do_prepare(bool &no_need_submit_log) int ret = OB_SUCCESS; no_need_submit_log = false; + // common operation + if (OB_FAIL(search_unsubmitted_dup_table_redo_())) { + TRANS_LOG(WARN, "search unsubmitted dup table redo", K(ret), KPC(this)); + } + if (OB_SUCC(ret)) { if (sub_state_.is_force_abort()) { if (OB_FAIL(compensate_abort_log_())) { @@ -87,7 +92,7 @@ int ObPartTransCtx::do_prepare(bool &no_need_submit_log) } if (OB_SUCC(ret)) { - if (exec_info_.is_dup_tx_ || OB_SUCC(search_unsubmitted_dup_table_redo_())) { + if (exec_info_.is_dup_tx_ && !is_dup_table_redo_sync_completed_()) { no_need_submit_log = true; if (OB_FAIL(dup_table_tx_redo_sync_())) { TRANS_LOG(WARN, "dup table tx redo sync failed", K(ret)); @@ -97,6 +102,11 @@ int ObPartTransCtx::do_prepare(bool &no_need_submit_log) } } + if (exec_info_.is_dup_tx_) { + TRANS_LOG(INFO, "do prepare for dup table", K(ret), K(dup_table_follower_max_read_version_), + K(is_dup_table_redo_sync_completed_()), KPC(this)); + } + if (OB_SUCC(ret)) { if (OB_FAIL(prepare_mul_data_source_tx_end_(true))) { TRANS_LOG(WARN, "trans commit need retry", K(ret), K(trans_id_), K(ls_id_)); diff --git a/src/storage/tx/ob_tx_big_segment_buf.cpp b/src/storage/tx/ob_tx_big_segment_buf.cpp index 4ffbbfa884..7dc0ec93ac 100644 --- a/src/storage/tx/ob_tx_big_segment_buf.cpp +++ b/src/storage/tx/ob_tx_big_segment_buf.cpp @@ -46,6 +46,102 @@ int ObTxBigSegmentBuf::init_for_serialize(int64_t segment_len) return ret; } +char *ObTxBigSegmentBuf::get_serialize_buf() +{ + char *buf = nullptr; + if (!is_inited() || !for_serialize_) { + buf = nullptr; + } else { + buf = segment_buf_; + } + + return buf; +} + +int64_t ObTxBigSegmentBuf::get_serialize_buf_len() +{ + int64_t buf_len = 0; + if (!is_inited() || !for_serialize_) { + buf_len = 0; + } else { + buf_len = segment_buf_len_; + } + return buf_len; +} + +int64_t ObTxBigSegmentBuf::get_serialize_buf_pos() +{ + int64_t buf_pos = 0; + if (!is_inited() || !for_serialize_) { + buf_pos = INT64_MAX; + } else { + buf_pos = segment_data_len_; + } + return buf_pos; +} + +int ObTxBigSegmentBuf::set_serialize_pos(const int64_t ser_pos) +{ + int ret = OB_SUCCESS; + + if (ser_pos < 0 || ser_pos > segment_buf_len_ || !is_inited() || !for_serialize_) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret), K(ser_pos), KPC(this)); + } else { + segment_data_len_ = ser_pos; + } + + return ret; +} + +const char *ObTxBigSegmentBuf::get_deserialize_buf() +{ + char *buf = nullptr; + if (!is_inited() || for_serialize_) { + buf = nullptr; + } else { + buf = segment_buf_; + } + + return buf; +} + +int64_t ObTxBigSegmentBuf::get_deserialize_buf_len() +{ + int64_t buf_len = 0; + if (!is_inited() || for_serialize_) { + buf_len = 0; + } else { + buf_len = segment_buf_len_; + } + return buf_len; +} + +int64_t ObTxBigSegmentBuf::get_deserialize_buf_pos() +{ + int64_t buf_pos = 0; + if (!is_inited() || for_serialize_) { + buf_pos = INT64_MAX; + } else { + buf_pos = segment_pos_; + } + return buf_pos; +} + +int ObTxBigSegmentBuf::set_deserialize_pos(const int64_t deser_pos) +{ + int ret = OB_SUCCESS; + + if (deser_pos < 0 || !is_inited() || for_serialize_) { + ret = OB_INVALID_ARGUMENT; + DUP_TABLE_LOG(WARN, "invalid arguments", K(ret), K(deser_pos), KPC(this)); + } else { + segment_pos_ = deser_pos; + } + + return ret; +} + int ObTxBigSegmentBuf::split_one_part(char *part_buf, const int64_t part_buf_len, int64_t &part_buf_pos, diff --git a/src/storage/tx/ob_tx_big_segment_buf.h b/src/storage/tx/ob_tx_big_segment_buf.h index fe4afd8f73..72ec0c5295 100644 --- a/src/storage/tx/ob_tx_big_segment_buf.h +++ b/src/storage/tx/ob_tx_big_segment_buf.h @@ -56,6 +56,16 @@ public: int serialize_object(const T &obj); template int deserialize_object(T &obj); + + char *get_serialize_buf(); + int64_t get_serialize_buf_len(); + int64_t get_serialize_buf_pos(); + int set_serialize_pos(const int64_t ser_pos); + const char *get_deserialize_buf(); + int64_t get_deserialize_buf_len(); + int64_t get_deserialize_buf_pos(); + int set_deserialize_pos(const int64_t deser_pos); + /** * OB_ITER_END : no part can be split or collect * */ diff --git a/src/storage/tx/ob_tx_log_adapter.cpp b/src/storage/tx/ob_tx_log_adapter.cpp index cd177025b0..97e3263d75 100644 --- a/src/storage/tx/ob_tx_log_adapter.cpp +++ b/src/storage/tx/ob_tx_log_adapter.cpp @@ -20,6 +20,61 @@ using namespace share; namespace transaction { +int ObITxLogAdapter::block_confirm_with_dup_tablet_change_snapshot( + share::SCN &dup_tablet_change_snapshot) +{ + dup_tablet_change_snapshot.set_invalid(); + return OB_SUCCESS; +} + +int ObITxLogAdapter::unblock_confirm_with_prepare_scn(const share::SCN &dup_tablet_change_snapshot, + const share::SCN &prepare_scn) +{ + UNUSED(dup_tablet_change_snapshot); + UNUSED(prepare_scn); + return OB_SUCCESS; +} + +int ObITxLogAdapter::check_dup_tablet_in_redo(const ObTabletID &tablet_id, + bool &is_dup_tablet, + const share::SCN &base_snapshot, + const share::SCN &redo_scn) +{ + UNUSED(tablet_id); + UNUSED(redo_scn); + UNUSED(base_snapshot); + is_dup_tablet = false; + return OB_SUCCESS; +} + +int ObITxLogAdapter::check_dup_tablet_readable(const ObTabletID &tablet_id, + const share::SCN &read_snapshot, + const bool read_from_leader, + const share::SCN &max_replayed_scn, + bool &readable) +{ + UNUSED(tablet_id); + UNUSED(read_snapshot); + UNUSED(read_from_leader); + UNUSED(max_replayed_scn); + readable = false; + return OB_SUCCESS; +} + +int ObITxLogAdapter::check_redo_sync_completed(const ObTransID &tx_id, + const share::SCN &redo_completed_scn, + bool &redo_sync_finish, + share::SCN &total_max_read_version) +{ + UNUSED(tx_id); + UNUSED(redo_completed_scn); + redo_sync_finish = false; + total_max_read_version.set_invalid(); + return OB_SUCCESS; +} + +int64_t ObITxLogAdapter::get_committing_dup_trx_cnt() { return 0; } + int ObLSTxLogAdapter::init(ObITxLogParam *param) { int ret = OB_SUCCESS; @@ -29,6 +84,7 @@ int ObLSTxLogAdapter::init(ObITxLogParam *param) } else { ObTxPalfParam *palf_param = static_cast(param); log_handler_ = palf_param->get_log_handler(); + dup_table_ls_handler_ = palf_param->get_dup_table_ls_handler(); } return ret; } @@ -89,6 +145,100 @@ int ObLSTxLogAdapter::get_role(bool &is_leader, int64_t &epoch) return ret; } +int ObLSTxLogAdapter::block_confirm_with_dup_tablet_change_snapshot( + share::SCN &dup_tablet_change_snapshot) +{ + int ret = OB_SUCCESS; + + dup_tablet_change_snapshot.set_invalid(); + + return ret; +} + +int ObLSTxLogAdapter::unblock_confirm_with_prepare_scn(const share::SCN &dup_tablet_change_snapshot, + const share::SCN &redo_scn) +{ + int ret = OB_SUCCESS; + + return ret; +} + +int ObLSTxLogAdapter::check_dup_tablet_in_redo(const ObTabletID &tablet_id, + bool &is_dup_tablet, + const share::SCN &base_snapshot, + const share::SCN &redo_scn) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(dup_table_ls_handler_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "invalid dup table ls handler", K(ret)); + } else if (OB_FAIL(dup_table_ls_handler_->check_dup_tablet_in_redo(tablet_id, is_dup_tablet, + base_snapshot, redo_scn))) { + DUP_TABLE_LOG(WARN, "check dup tablet readable failed", K(ret)); + } + + return ret; +} + +int ObLSTxLogAdapter::check_dup_tablet_readable(const ObTabletID &tablet_id, + const share::SCN &read_snapshot, + const bool read_from_leader, + const share::SCN &max_replayed_scn, + bool &readable) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(dup_table_ls_handler_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "invalid dup table ls handler", K(ret)); + } else if (OB_FAIL(dup_table_ls_handler_->check_dup_tablet_readable( + tablet_id, read_snapshot, read_from_leader, max_replayed_scn, readable))) { + DUP_TABLE_LOG(WARN, "check dup tablet readable failed", K(ret)); + } + return ret; +} + +int ObLSTxLogAdapter::check_redo_sync_completed(const ObTransID &tx_id, + const share::SCN &redo_completed_scn, + bool &redo_sync_finish, + share::SCN &total_max_read_version) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(dup_table_ls_handler_)) { + ret = OB_NOT_INIT; + DUP_TABLE_LOG(WARN, "invalid dup table ls handler", K(ret)); + } else if (OB_FAIL(dup_table_ls_handler_->check_redo_sync_completed( + tx_id, redo_completed_scn, redo_sync_finish, total_max_read_version))) { + DUP_TABLE_LOG(WARN, "check redo sync completed failed", K(ret)); + } + + return ret; +} + +bool ObLSTxLogAdapter::has_dup_tablet() +{ + bool has_dup = false; + if (OB_ISNULL(dup_table_ls_handler_)) { + has_dup = false; + } else { + has_dup = dup_table_ls_handler_->has_dup_tablet(); + } + return has_dup; +} + +int64_t ObLSTxLogAdapter::get_committing_dup_trx_cnt() +{ + int64_t committing_dup_trx_cnt = 0; + if (OB_ISNULL(dup_table_ls_handler_)) { + committing_dup_trx_cnt = 0; + } else { + committing_dup_trx_cnt = dup_table_ls_handler_->get_committing_dup_trx_cnt(); + } + return committing_dup_trx_cnt; +} + int ObLSTxLogAdapter::get_max_decided_scn(SCN &scn) { int ret = OB_SUCCESS; diff --git a/src/storage/tx/ob_tx_log_adapter.h b/src/storage/tx/ob_tx_log_adapter.h index f35a272fa4..c0d7d8de86 100644 --- a/src/storage/tx/ob_tx_log_adapter.h +++ b/src/storage/tx/ob_tx_log_adapter.h @@ -31,6 +31,7 @@ class PalfHandle; namespace transaction { +class ObDupTableLSHandler; class ObITxLogParam { public: @@ -41,10 +42,15 @@ private: class ObTxPalfParam : public ObITxLogParam { public: - ObTxPalfParam(logservice::ObLogHandler *handler) : handler_(handler) {} + ObTxPalfParam(logservice::ObLogHandler *handler, ObDupTableLSHandler *dup_tablet_ls_handler) + : handler_(handler), dup_tablet_ls_handler_(dup_tablet_ls_handler) + {} logservice::ObLogHandler *get_log_handler() { return handler_; } + ObDupTableLSHandler *get_dup_table_ls_handler() { return dup_tablet_ls_handler_; } + private: logservice::ObLogHandler *handler_; + ObDupTableLSHandler *dup_tablet_ls_handler_; }; class ObITxLogAdapter @@ -59,7 +65,27 @@ public: virtual int get_role(bool &is_leader, int64_t &epoch) = 0; virtual int get_max_decided_scn(share::SCN &scn) = 0; -private: + /** + * Dup Table Inerface + * */ + virtual int block_confirm_with_dup_tablet_change_snapshot(share::SCN &dup_tablet_change_snapshot); + virtual int unblock_confirm_with_prepare_scn(const share::SCN &dup_tablet_change_snapshot, + const share::SCN &prepare_scn); + virtual int check_dup_tablet_in_redo(const ObTabletID &tablet_id, + bool &is_dup_tablet, + const share::SCN &base_snapshot, + const share::SCN &redo_scn); + virtual int check_dup_tablet_readable(const ObTabletID &tablet_id, + const share::SCN &read_snapshot, + const bool read_from_leader, + const share::SCN &max_replayed_scn, + bool &readable); + virtual int check_redo_sync_completed(const ObTransID &tx_id, + const share::SCN &redo_completed_scn, + bool &redo_sync_finish, + share::SCN &total_max_read_version); + virtual bool has_dup_tablet() { return false; } + virtual int64_t get_committing_dup_trx_cnt(); }; class ObLSTxLogAdapter : public ObITxLogAdapter @@ -76,8 +102,30 @@ public: int get_role(bool &is_leader, int64_t &epoch); int get_max_decided_scn(share::SCN &scn); + /** + * Dup Table Inerface + * */ + int block_confirm_with_dup_tablet_change_snapshot(share::SCN &dup_tablet_change_snapshot); + int unblock_confirm_with_prepare_scn(const share::SCN &dup_tablet_change_snapshot, + const share::SCN &redo_scn); + int check_dup_tablet_in_redo(const ObTabletID &tablet_id, + bool &is_dup_tablet, + const share::SCN &base_snapshot, + const share::SCN &redo_scn); + int check_dup_tablet_readable(const ObTabletID &tablet_id, + const share::SCN &read_snapshot, + const bool read_from_leader, + const share::SCN &max_replayed_scn, + bool &readable); + int check_redo_sync_completed(const ObTransID &tx_id, + const share::SCN &redo_completed_scn, + bool &redo_sync_finish, + share::SCN &total_max_read_version); + bool has_dup_tablet(); + int64_t get_committing_dup_trx_cnt(); private: logservice::ObLogHandler *log_handler_; + ObDupTableLSHandler *dup_table_ls_handler_; }; } // namespace transaction diff --git a/src/storage/tx/ob_tx_replay_executor.cpp b/src/storage/tx/ob_tx_replay_executor.cpp index 7605d1718d..aef9d95b36 100644 --- a/src/storage/tx/ob_tx_replay_executor.cpp +++ b/src/storage/tx/ob_tx_replay_executor.cpp @@ -557,6 +557,8 @@ int ObTxReplayExecutor::replay_redo_in_memtable_(ObTxRedoLog &redo) } } else if (FALSE_IT(row_head = mmi_ptr_->get_row_head())) { // do nothing + } else if (OB_NOT_NULL(ctx_) && OB_FAIL(ctx_->merge_tablet_modify_record(row_head.tablet_id_))) { + TRANS_LOG(WARN, "record tablet_id in redo failed", K(ret), K(row_head)); } else if (OB_FAIL(replay_one_row_in_memtable_(row_head, mmi_ptr_))) { if (OB_MINOR_FREEZE_NOT_ALLOW == ret) { if (TC_REACH_TIME_INTERVAL(1000 * 1000)) { diff --git a/src/storage/tx_storage/ob_access_service.cpp b/src/storage/tx_storage/ob_access_service.cpp index 8845f56612..19876b9588 100644 --- a/src/storage/tx_storage/ob_access_service.cpp +++ b/src/storage/tx_storage/ob_access_service.cpp @@ -327,28 +327,6 @@ int ObAccessService::table_rescan( return ret; } -int ObAccessService::check_replica_allow_access_( - const ObStoreAccessType access_type, - const ObReplicaType replica_type) -{ - int ret = OB_SUCCESS; - if (ObAccessTypeCheck::is_write_access_type(access_type)) { - if (!ObReplicaTypeCheck::is_writable_replica(replica_type)) { - ret = OB_ERR_READ_ONLY; - LOG_WARN("replica is not writable", K(ret), K(replica_type)); - } - } else if (ObAccessTypeCheck::is_read_access_type(access_type)) { - if (!ObReplicaTypeCheck::is_readable_replica(replica_type)) { - ret = OB_REPLICA_NOT_READABLE; - LOG_WARN("replica is not readable", K(ret), K(replica_type)); - } - } else { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("unexpected access type", K(ret)); - } - return ret; -} - int ObAccessService::get_write_store_ctx_guard_( const share::ObLSID &ls_id, const int64_t timeout, @@ -505,9 +483,6 @@ int ObAccessService::check_write_allowed_( } else if (OB_ISNULL(ls = ctx_guard.get_ls_handle().get_ls())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("ls should not be null", K(ret), K(ls_id), K_(tenant_id)); - } else if (OB_FAIL(check_replica_allow_access_(access_type, ls->get_replica_type()))) { - LOG_WARN("replica can not be accessed", K(ret), K(access_type), "replica_type", - ls->get_replica_type(), K(ls_id)); } else { // TODO: this may confuse user, because of txn timeout won't notify user proactively auto lock_expired_ts = MIN(dml_param.timeout_, tx_desc.get_expire_ts()); @@ -1001,10 +976,6 @@ int ObAccessService::get_multi_ranges_cost( } else if (OB_ISNULL(ls = ls_handle.get_ls())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("ls hould not be null", K(ret), K(ls_id), K_(tenant_id)); - } else if (OB_FAIL(check_replica_allow_access_( - ObStoreAccessType::READ, - ls->get_replica_type()))) { - LOG_WARN("replica can not be accessed", K(ret), "replica", ls->get_replica_type(), K(ls_id)); } else if (OB_ISNULL(tablet_service = ls->get_tablet_svr())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("tablet service should not be null", K(ret), K(ls_id)); @@ -1080,10 +1051,6 @@ int ObAccessService::split_multi_ranges( } else if (OB_ISNULL(ls = ls_handle.get_ls())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("ls hould not be null", K(ret), K(ls_id), K_(tenant_id)); - } else if (OB_FAIL(check_replica_allow_access_( - ObStoreAccessType::READ, - ls->get_replica_type()))) { - LOG_WARN("replica can not be accessed", K(ret), "replica", ls->get_replica_type(), K(ls_id)); } else if (OB_ISNULL(tablet_service = ls->get_tablet_svr())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("tablet service should not be null", K(ret), K(ls_id)); diff --git a/src/storage/tx_storage/ob_access_service.h b/src/storage/tx_storage/ob_access_service.h index edf88e3ae8..6b7c8a7887 100644 --- a/src/storage/tx_storage/ob_access_service.h +++ b/src/storage/tx_storage/ob_access_service.h @@ -195,10 +195,6 @@ public: protected: int check_tenant_out_of_memstore_limit_(bool &is_out_of_mem); - int check_replica_allow_access_( - const ObStoreAccessType access_type, - const ObReplicaType replica_type); - int get_write_store_ctx_guard_( const share::ObLSID &ls_id, const int64_t timeout, diff --git a/src/storage/tx_storage/ob_ls_map.cpp b/src/storage/tx_storage/ob_ls_map.cpp index 7b157b9fd1..e927c78d40 100644 --- a/src/storage/tx_storage/ob_ls_map.cpp +++ b/src/storage/tx_storage/ob_ls_map.cpp @@ -346,117 +346,5 @@ int ObLSMap::get_all_ls_id(ObIArray &ls_id_array) return ret; } -int ObLSMap::remove_duplicate_ls() -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObLSMap has not been inited", K(ret)); - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < BUCKETS_CNT; ++i) { - ObQSyncLockWriteGuard bucket_guard(buckets_lock_[i]); - if (nullptr != ls_buckets_[i]) { - if (OB_FAIL(remove_duplicate_ls_in_linklist(ls_buckets_[i]))) { - LOG_WARN("fail to remove same ls in linklist", K(ret)); - } - } - } - } - return ret; -} - -int ObLSMap::choose_preserve_ls(ObLS *left_ls, - ObLS *right_ls, - ObLS *&result_ls) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(nullptr == left_ls || nullptr == right_ls)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), KP(left_ls), KP(right_ls)); - } else { - const ObReplicaType left_replica_type = left_ls->get_replica_type(); - const ObReplicaType right_replica_type = right_ls->get_replica_type(); - if (ObReplicaTypeCheck::is_writable_replica(left_replica_type)) { - result_ls = left_ls; - } else if (ObReplicaTypeCheck::is_writable_replica(right_replica_type)) { - result_ls = right_ls; - } else if (ObReplicaTypeCheck::is_readonly_replica(left_replica_type)) { - result_ls = left_ls; - } else if (ObReplicaTypeCheck::is_readonly_replica(right_replica_type)) { - result_ls = right_ls; - } else { - result_ls = left_ls; - } - } - return ret; -} - -int ObLSMap::remove_duplicate_ls_in_linklist(ObLS *&head) -{ - int ret = OB_SUCCESS; - common::hash::ObHashMap effective_ls_map; - const int64_t MAX_LS_CNT_IN_BUCKET = 10L; - lib::ObLabel label("LS_TMP_MAP"); - if (OB_UNLIKELY(!is_inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("ObLSMap has not been inited", K(ret)); - } else if (OB_ISNULL(head)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid arguments", K(ret), KP(head)); - } else if (OB_FAIL(effective_ls_map.create(MAX_LS_CNT_IN_BUCKET, label))) { - LOG_WARN("fail to create effetive ls hash map", K(ret)); - } else { - ObLS *curr = head; - ObLS *next = nullptr; - bool has_same_ls = false; - while (OB_SUCC(ret) && curr != nullptr) { - ObLS *ls = nullptr; - next = static_cast(curr->next_); - bool need_set = false; - if (OB_FAIL(effective_ls_map.get_refactored(curr->get_ls_id(), ls))) { - if (OB_HASH_NOT_EXIST == ret) { - if (OB_FAIL(effective_ls_map.set_refactored(curr->get_ls_id(), curr))) { - LOG_WARN("fail to set to effective ls map", K(ret)); - } - } - } else { - ObLS *choose_ls = nullptr; - has_same_ls = true; - if (OB_FAIL(choose_preserve_ls(curr, ls, choose_ls))) { - LOG_WARN("fail to choose preserve ls", K(ret)); - } else { - if (choose_ls == curr) { - if (OB_FAIL(effective_ls_map.set_refactored(curr->get_ls_id(), choose_ls, true/*overwrite*/))) { - LOG_WARN("fail to set to effective ls map", K(ret)); - } else { - del_ls_impl(ls); - } - } else { - del_ls_impl(curr); - } - } - } - curr = next; - } - if (OB_SUCC(ret) && has_same_ls) { - ObLS *prev = nullptr; - for (ObHashMap::iterator iter = effective_ls_map.begin(); iter != effective_ls_map.end(); ++iter) { - if (nullptr == prev) { - head = iter->second; - prev = head; - } else { - prev->next_ = iter->second; - prev = iter->second; - } - } - if (OB_SUCC(ret)) { - prev->next_ = nullptr; - } - } - effective_ls_map.destroy(); - } - return ret; -} - -}; // end namespace storage +} // end namespace storage }; // end namespace oceanbase diff --git a/src/storage/tx_storage/ob_ls_map.h b/src/storage/tx_storage/ob_ls_map.h index bd238c757d..bf1031c06a 100644 --- a/src/storage/tx_storage/ob_ls_map.h +++ b/src/storage/tx_storage/ob_ls_map.h @@ -62,8 +62,6 @@ public: static TCRef tcref(16); return tcref; } - // why there is duplicate ls? - int remove_duplicate_ls(); private: OB_INLINE void free_ls(ObLS *ls) const; void del_ls_impl(ObLS *ls); diff --git a/src/storage/tx_storage/ob_ls_service.cpp b/src/storage/tx_storage/ob_ls_service.cpp index df5c80a81e..43f3f72b79 100644 --- a/src/storage/tx_storage/ob_ls_service.cpp +++ b/src/storage/tx_storage/ob_ls_service.cpp @@ -245,7 +245,6 @@ int ObLSService::start() } int ObLSService::inner_create_ls_(const share::ObLSID &lsid, - const ObReplicaType replica_type, const ObMigrationStatus &migration_status, const ObLSRestoreStatus &restore_status, const SCN &create_scn, @@ -263,12 +262,11 @@ int ObLSService::inner_create_ls_(const share::ObLSID &lsid, } else if (OB_FAIL(ls->init(lsid, tenant_id_, - replica_type, migration_status, restore_status, create_scn, rs_reporter_))) { - LOG_WARN("fail to init ls", K(ret), K(lsid), K(replica_type)); + LOG_WARN("fail to init ls", K(ret), K(lsid)); } if (OB_FAIL(ret) && NULL != ls) { ls->~ObLS(); @@ -419,7 +417,6 @@ int ObLSService::create_ls(const obrpc::ObCreateLSArg &arg) ret = OB_EAGAIN; LOG_WARN("ls waiting for destroy, need retry later", K(ret), K(arg)); } else if (OB_FAIL(inner_create_ls_(arg.get_ls_id(), - arg.get_replica_type(), migration_status, (is_ls_to_restore_(arg) ? ObLSRestoreStatus(ObLSRestoreStatus::RESTORE_START) : @@ -443,6 +440,7 @@ int ObLSService::create_ls(const obrpc::ObCreateLSArg &arg) } else if (FALSE_IT(state = ObLSCreateState::CREATE_STATE_WRITE_PREPARE_SLOG)) { } else if (OB_FAIL(ls->create_ls(arg.get_tenant_info().get_tenant_role(), palf_base_info, + arg.get_replica_type(), unused_allow_log_sync))) { LOG_WARN("enable ls palf failed", K(ret), K(arg), K(palf_base_info)); // only restore ls does not need enable replay @@ -793,7 +791,6 @@ int ObLSService::replay_create_ls_(const ObLSMeta &ls_meta) } else if (OB_FAIL(ls_meta.get_restore_status(restore_status))) { LOG_WARN("failed to get restore status", K(ret), K(ls_meta)); } else if (OB_FAIL(inner_create_ls_(ls_meta.ls_id_, - ls_meta.replica_type_, migration_status, restore_status, ls_meta.get_clog_checkpoint_scn(), @@ -1012,7 +1009,6 @@ int ObLSService::create_ls_for_ha( } else if (OB_FAIL(get_restore_status_(restore_status))) { LOG_WARN("failed to get restore status", K(ret), K(arg), K(task_id)); } else if (OB_FAIL(inner_create_ls_(arg.ls_id_, - arg.dst_.get_replica_type(), migration_status, restore_status, ObScnRange::MIN_SCN, /* create scn */ @@ -1035,6 +1031,7 @@ int ObLSService::create_ls_for_ha( } else if (FALSE_IT(state = ObLSCreateState::CREATE_STATE_WRITE_PREPARE_SLOG)) { } else if (OB_FAIL(ls->create_ls(share::RESTORE_TENANT_ROLE, palf_base_info, + arg.dst_.get_replica_type(), allow_log_sync))) { LOG_WARN("enable ls palf failed", K(ret), K(ls_meta)); } else if (FALSE_IT(state = ObLSCreateState::CREATE_STATE_PALF_ENABLED)) { diff --git a/src/storage/tx_storage/ob_ls_service.h b/src/storage/tx_storage/ob_ls_service.h index 486c4aac6e..36a9d5bb60 100644 --- a/src/storage/tx_storage/ob_ls_service.h +++ b/src/storage/tx_storage/ob_ls_service.h @@ -159,7 +159,6 @@ private: CREATE_STATE_FINISH }; int inner_create_ls_(const share::ObLSID &lsid, - const ObReplicaType replica_type, const ObMigrationStatus &migration_status, const share::ObLSRestoreStatus &restore_status, const share::SCN &create_scn, diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result index 0bd47aa178..e2937b1fb9 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result @@ -589,6 +589,9 @@ select 0xffffffffff & table_id, table_name, table_type, database_id, part_num fr 12366 __all_virtual_archive_dest_status 2 201001 1 12369 __all_virtual_io_scheduler 2 201001 1 12371 __all_virtual_external_table_file 2 201001 1 +12376 __all_virtual_dup_ls_lease_mgr 2 201001 1 +12378 __all_virtual_dup_ls_tablet_set 2 201001 1 +12379 __all_virtual_dup_ls_tablets 2 201001 1 12380 __all_virtual_tx_data 2 201001 1 12381 __all_virtual_task_opt_stat_gather_history 2 201001 1 12382 __all_virtual_table_opt_stat_gather_history 2 201001 1 diff --git a/tools/deploy/mysql_test/test_suite/px/r/mysql/join_nlj.result b/tools/deploy/mysql_test/test_suite/px/r/mysql/join_nlj.result index c28b1d8746..5599173f8f 100644 --- a/tools/deploy/mysql_test/test_suite/px/r/mysql/join_nlj.result +++ b/tools/deploy/mysql_test/test_suite/px/r/mysql/join_nlj.result @@ -185,7 +185,7 @@ sid subject score tid name subject 62 EN 90 1 Miss Zhang EN 63 EN 99 1 Miss Zhang EN 64 EN 87 1 Miss Zhang EN -CREATE TABLE product (pid bigint primary key,pname varchar(50),gmt_modify timestamp(6),gmt_create timestamp(6),thread_id bigint, key k (gmt_modify)); +CREATE TABLE product (pid bigint primary key,pname varchar(50),gmt_modify timestamp(6),gmt_create timestamp(6),thread_id bigint, key k (gmt_modify))duplicate_scope="cluster"; CREATE TABLE orders (oid bigint primary key,pid bigint,amount int, addr varchar(20),key k (pid) local) partition by hash (oid) partitions 100; insert into product values(314265, "abcd", now(), now(), 1234); insert into orders values(314265, 314265, 678, "192.168.1.8"); diff --git a/tools/deploy/mysql_test/test_suite/px/t/join_nlj.test b/tools/deploy/mysql_test/test_suite/px/t/join_nlj.test index 272ac0720f..8b9309cb9c 100644 --- a/tools/deploy/mysql_test/test_suite/px/t/join_nlj.test +++ b/tools/deploy/mysql_test/test_suite/px/t/join_nlj.test @@ -14,7 +14,7 @@ explain select /*+ USE_PX parallel(2) */ * from score, teacher where teacher.sub --sorted_result select /*+ USE_PX parallel(2) */ * from score, teacher where teacher.subject = score.subject and teacher.tid = 1; -CREATE TABLE product (pid bigint primary key,pname varchar(50),gmt_modify timestamp(6),gmt_create timestamp(6),thread_id bigint, key k (gmt_modify)); +CREATE TABLE product (pid bigint primary key,pname varchar(50),gmt_modify timestamp(6),gmt_create timestamp(6),thread_id bigint, key k (gmt_modify))duplicate_scope="cluster"; CREATE TABLE orders (oid bigint primary key,pid bigint,amount int, addr varchar(20),key k (pid) local) partition by hash (oid) partitions 100; diff --git a/tools/ob_admin/backup_tool/ob_admin_dump_backup_data_executor.cpp b/tools/ob_admin/backup_tool/ob_admin_dump_backup_data_executor.cpp index d959fa099f..be0d5035a4 100644 --- a/tools/ob_admin/backup_tool/ob_admin_dump_backup_data_executor.cpp +++ b/tools/ob_admin/backup_tool/ob_admin_dump_backup_data_executor.cpp @@ -2150,7 +2150,7 @@ int ObAdminDumpBackupDataExecutor::dump_ls_attr_info_(const share::ObLSAttr &ls_ PrintHelper::print_dump_title("ls_attr info"); PrintHelper::print_dump_line("ls_id", ls_attr.get_ls_id().id()); PrintHelper::print_dump_line("ls_group_id", ls_attr.get_ls_group_id()); - PrintHelper::print_dump_line("flag", ls_attr.get_ls_flag()); + PrintHelper::print_dump_line("flag", ls_attr.get_ls_flag().get_flag_value()); PrintHelper::print_dump_line("status", ls_attr.get_ls_status()); PrintHelper::print_dump_line("operation_type", ls_attr.get_ls_operatin_type()); return ret; diff --git a/unittest/logservice/mock_logservice_container/mock_log_sliding_window.h b/unittest/logservice/mock_logservice_container/mock_log_sliding_window.h index 4da91a874a..d0d3dfabc1 100644 --- a/unittest/logservice/mock_logservice_container/mock_log_sliding_window.h +++ b/unittest/logservice/mock_logservice_container/mock_log_sliding_window.h @@ -16,6 +16,7 @@ #define private public #include "logservice/palf/log_sliding_window.h" #include "share/scn.h" +#include "mock_log_state_mgr.h" #undef private namespace oceanbase @@ -344,7 +345,13 @@ public: ack_info.lsn_ = LSN(PALF_INITIAL_LSN_VAL); return OB_SUCCESS; } + int get_leader_from_cache(common::ObAddr &leader) const + { + leader = state_mgr_->get_leader(); + return OB_SUCCESS; + } public: + palf::MockLogStateMgr *state_mgr_; LSN pending_end_lsn_; int64_t mock_start_id_; int64_t mock_last_submit_log_id_; diff --git a/unittest/logservice/test_log_config_mgr.cpp b/unittest/logservice/test_log_config_mgr.cpp index 7d5f46a6c7..0062c09986 100644 --- a/unittest/logservice/test_log_config_mgr.cpp +++ b/unittest/logservice/test_log_config_mgr.cpp @@ -82,6 +82,7 @@ public: mock_election_->leader_epoch_ = INIT_ELE_EPOCH; mock_sw_->mock_last_submit_lsn_ = LSN(PALF_INITIAL_LSN_VAL); mock_sw_->mock_last_submit_pid_ = INIT_PROPOSAL_ID; + mock_sw_->state_mgr_ = mock_state_mgr_; EXPECT_TRUE(config_info.is_valid()); PALF_LOG(INFO, "init_test_log_config_env", K(role), K(state), K(mock_state_mgr_->leader_), K(mock_state_mgr_->role_)); mock_log_engine_->reset_register_parent_resp_ret(); @@ -110,6 +111,41 @@ public: palf::LogPlugins *mock_plugins_; }; +TEST_F(TestLogConfigMgr, test_set_initial_member_list) +{ + LogConfigInfo default_config_info; + common::ObMemberList init_member_list; + GlobalLearnerList learner_list; + LogConfigVersion init_config_version; + init_config_version.generate(1, 1); + init_member_list.add_server(addr1); + init_member_list.add_server(addr2); + EXPECT_EQ(OB_SUCCESS, default_config_info.generate(init_member_list, 3, learner_list, init_config_version)); + + { + LogConfigMgr cm; + LogConfigVersion config_version; + init_test_log_config_env(addr1, default_config_info, cm); + // arb_member is self + EXPECT_EQ(OB_NOT_SUPPORTED, cm.set_initial_member_list(init_member_list, ObMember(addr1, 0), 3, learner_list, 1, config_version)); + // arb_member overlaps with member_list + EXPECT_EQ(OB_INVALID_ARGUMENT, cm.set_initial_member_list(init_member_list, ObMember(addr2, 0), 3, learner_list, 1, config_version)); + + // arb_member overlaps with learners + learner_list.add_learner(ObMember(addr4, 0)); + EXPECT_EQ(OB_INVALID_ARGUMENT, cm.set_initial_member_list(init_member_list, ObMember(addr4, 0), 3, learner_list, 1, config_version)); + // learners overlap with member_list + init_member_list.add_server(addr4); + EXPECT_EQ(OB_INVALID_ARGUMENT, cm.set_initial_member_list(init_member_list, ObMember(addr5, 0), 3, learner_list, 1, config_version)); + + init_member_list.add_server(addr3); + learner_list.reset(); + // do not reach majority + EXPECT_EQ(OB_INVALID_ARGUMENT, cm.set_initial_member_list(init_member_list, ObMember(addr5, 0), 3, learner_list, 1, config_version)); + EXPECT_EQ(OB_SUCCESS, cm.set_initial_member_list(init_member_list, ObMember(addr5, 0), 4, learner_list, 1, config_version)); + } +} + TEST_F(TestLogConfigMgr, test_remove_child_is_not_learner) { LogConfigMgr cm; @@ -233,7 +269,7 @@ TEST_F(TestLogConfigMgr, test_apply_config_meta) GlobalLearnerList learner_list; LogConfigInfo default_config_info, one_f_one_a_config_info, two_f_one_a_config_info, four_f_one_a_config_info, five_f_config_info, - four_f_config_info, two_f_config_info; + four_f_config_info, two_f_config_info, three_f_one_learner_config_info; EXPECT_EQ(OB_SUCCESS, default_config_info.generate(init_member_list, 3, learner_list, init_config_version)); EXPECT_EQ(OB_SUCCESS, one_f_one_a_config_info.generate(one_f_member_list, 1, learner_list, init_config_version)); @@ -246,6 +282,9 @@ TEST_F(TestLogConfigMgr, test_apply_config_meta) EXPECT_EQ(OB_SUCCESS, four_f_config_info.generate(four_f_member_list, 4, learner_list, init_config_version)); EXPECT_EQ(OB_SUCCESS, five_f_config_info.generate(five_f_member_list, 5, learner_list, init_config_version)); + three_f_one_learner_config_info = default_config_info; + three_f_one_learner_config_info.learnerlist_.add_learner(ObMember(addr4, -1)); + std::vector config_info_list; std::vector arg_list; std::vector expect_member_list; @@ -338,10 +377,9 @@ TEST_F(TestLogConfigMgr, test_apply_config_meta) // 14. acceptor -> learner config_info_list.push_back(default_config_info); arg_list.push_back(LogConfigChangeArgs(ObMember(addr3, -1), 0, SWITCH_ACCEPTOR_TO_LEARNER)); - expect_ret_list.push_back(OB_SUCCESS); + expect_ret_list.push_back(OB_INVALID_ARGUMENT); expect_finished_list.push_back(false); expect_member_list.push_back(init_member_list); - expect_member_list.back().remove_server(addr3); // 15. 3F, add_arb_member, replica_num 3 config_info_list.push_back(default_config_info); arg_list.push_back(LogConfigChangeArgs(ObMember(addr4, -1), 3, palf::ADD_ARB_MEMBER)); @@ -564,6 +602,69 @@ TEST_F(TestLogConfigMgr, test_apply_config_meta) expect_ret_list.push_back(OB_NOT_ALLOW_REMOVING_LEADER); expect_finished_list.push_back(false); expect_member_list.push_back(init_member_list); + // 44. acceptor -> learner, invalid replica_num + config_info_list.push_back(default_config_info); + arg_list.push_back(LogConfigChangeArgs(ObMember(addr3, -1), 4, SWITCH_ACCEPTOR_TO_LEARNER)); + expect_ret_list.push_back(OB_INVALID_ARGUMENT); + expect_finished_list.push_back(false); + expect_member_list.push_back(init_member_list); + // 45. acceptor -> learner + config_info_list.push_back(default_config_info); + arg_list.push_back(LogConfigChangeArgs(ObMember(addr3, -1), 2, SWITCH_ACCEPTOR_TO_LEARNER)); + expect_ret_list.push_back(OB_SUCCESS); + expect_finished_list.push_back(false); + expect_member_list.push_back(init_member_list); + expect_member_list.back().remove_server(addr3); + // 46. acceptor -> learner, member already, but replica_num do not match + config_info_list.push_back(three_f_one_learner_config_info); + arg_list.push_back(LogConfigChangeArgs(ObMember(addr4, -1), 2, SWITCH_ACCEPTOR_TO_LEARNER)); + expect_ret_list.push_back(OB_INVALID_ARGUMENT); + expect_finished_list.push_back(false); + expect_member_list.push_back(init_member_list); + // 47. acceptor -> learner, already finish + config_info_list.push_back(three_f_one_learner_config_info); + arg_list.push_back(LogConfigChangeArgs(ObMember(addr4, -1), 3, SWITCH_ACCEPTOR_TO_LEARNER)); + expect_ret_list.push_back(OB_SUCCESS); + expect_finished_list.push_back(true); + expect_member_list.push_back(init_member_list); + // 48. learner -> acceptor, invalid replica_num + config_info_list.push_back(three_f_one_learner_config_info); + arg_list.push_back(LogConfigChangeArgs(ObMember(addr4, -1), 0, SWITCH_LEARNER_TO_ACCEPTOR)); + expect_ret_list.push_back(OB_INVALID_ARGUMENT); + expect_finished_list.push_back(false); + expect_member_list.push_back(init_member_list); + // 49. learner -> acceptor, invalid replica_num + config_info_list.push_back(three_f_one_learner_config_info); + arg_list.push_back(LogConfigChangeArgs(ObMember(addr4, -1), 3, SWITCH_LEARNER_TO_ACCEPTOR)); + expect_ret_list.push_back(OB_INVALID_ARGUMENT); + expect_finished_list.push_back(false); + expect_member_list.push_back(init_member_list); + // 50. learner -> acceptor, invalid member + config_info_list.push_back(three_f_one_learner_config_info); + arg_list.push_back(LogConfigChangeArgs(ObMember(addr5, -1), 3, SWITCH_LEARNER_TO_ACCEPTOR)); + expect_ret_list.push_back(OB_INVALID_ARGUMENT); + expect_finished_list.push_back(false); + expect_member_list.push_back(init_member_list); + // 51. learner -> acceptor, already finish + config_info_list.push_back(default_config_info); + arg_list.push_back(LogConfigChangeArgs(ObMember(addr3, -1), 3, SWITCH_LEARNER_TO_ACCEPTOR)); + expect_ret_list.push_back(OB_SUCCESS); + expect_finished_list.push_back(true); + expect_member_list.push_back(init_member_list); + // 52. learner -> acceptor, member already exists, but replica_num do not match + config_info_list.push_back(default_config_info); + arg_list.push_back(LogConfigChangeArgs(ObMember(addr3, -1), 4, SWITCH_LEARNER_TO_ACCEPTOR)); + expect_ret_list.push_back(OB_INVALID_ARGUMENT); + expect_finished_list.push_back(false); + expect_member_list.push_back(init_member_list); + // 53. learner -> acceptor + config_info_list.push_back(three_f_one_learner_config_info); + arg_list.push_back(LogConfigChangeArgs(ObMember(addr4, -1), 4, SWITCH_LEARNER_TO_ACCEPTOR)); + expect_ret_list.push_back(OB_SUCCESS); + expect_finished_list.push_back(false); + expect_member_list.push_back(init_member_list); + expect_member_list.back().add_server(addr4); + for (int i = 0; i < arg_list.size(); ++i) { PALF_LOG(INFO, "test_check_config_change_args begin case", K(i)); LogConfigMgr cm; diff --git a/unittest/logservice/test_log_meta_info.cpp b/unittest/logservice/test_log_meta_info.cpp index 87d16db721..212a41006c 100644 --- a/unittest/logservice/test_log_meta_info.cpp +++ b/unittest/logservice/test_log_meta_info.cpp @@ -138,6 +138,15 @@ TEST(TestLogMetaInfos, test_log_config_meta) EXPECT_EQ(OB_SUCCESS, curr_config_version.generate(curr_log_proposal_id, curr_config_seq)); EXPECT_EQ(OB_SUCCESS, prev_config_info.generate(prev_member_list, prev_replica_num, prev_learner_list, prev_config_version)); EXPECT_EQ(OB_SUCCESS, curr_config_info.generate(curr_member_list, curr_replica_num, curr_learner_list, curr_config_version)); + EXPECT_TRUE(curr_config_info.is_valid()); + EXPECT_TRUE(prev_config_info.is_valid()); + + // test lists overlap + { + LogConfigInfo invalid_info = curr_config_info; + invalid_info.learnerlist_.add_learner(member2); + EXPECT_FALSE(invalid_info.is_valid()); + } // test basic serialization { diff --git a/unittest/logservice/test_palf_bench.cpp b/unittest/logservice/test_palf_bench.cpp index 3dbafb02c6..2f86981fd6 100644 --- a/unittest/logservice/test_palf_bench.cpp +++ b/unittest/logservice/test_palf_bench.cpp @@ -111,8 +111,8 @@ public: } else if (OB_FAIL(palf_env_->create(1, handle_))) { PALF_LOG(ERROR, "palf_env_ create failed", K(ret)); } - - EXPECT_EQ(OB_SUCCESS, handle_.set_initial_member_list(member_list, 1)); + GlobalLearnerList learner_list; + EXPECT_EQ(OB_SUCCESS, handle_.set_initial_member_list(member_list, 1, learner_list)); while (true) { ObRole role; diff --git a/unittest/rootserver/test_primary_ls_service.cpp b/unittest/rootserver/test_primary_ls_service.cpp index 576797bb9d..40e64edc8e 100644 --- a/unittest/rootserver/test_primary_ls_service.cpp +++ b/unittest/rootserver/test_primary_ls_service.cpp @@ -15,6 +15,7 @@ #include #define private public #include "rootserver/ob_primary_ls_service.h" +#include "share/ls/ob_ls_operator.h" namespace oceanbase { using namespace common; using namespace share; @@ -261,6 +262,110 @@ TEST_F(TestPrimaryLSService, zone_balance) ASSERT_EQ(3, count_group_by_zone.at(2)); +} +TEST_F(TestPrimaryLSService, LS_FLAG) +{ + int ret = OB_SUCCESS; + ObLSFlag flag; + ObLSFlagStr str; + ObLSFlagStr empty_str; + ObLSFlagStr str0("DUPLICATE"); + ObLSFlagStr str1("DUPLICATE "); + ObLSFlagStr str2(" DUPLICATE "); + ObLSFlagStr str3("BLOCK_TABLET_IN"); + ObLSFlagStr str4("BLOCK_TABLET_IN "); + ObLSFlagStr str5("BLOCK_TABLET_IN|DUPLICATE"); + ObLSFlagStr str6("DUPLICATE|BLOCK_TABLET_IN"); + ObLSFlagStr str7("BLOCK_TABLET_IN | DUPLICATE"); + ObLSFlagStr str8("BLOCK_TABLET_IN,DUPLICATE"); + ObLSFlagStr str9("BLOCK_TABLET_IN DUPLICATE"); + + ret = flag.flag_to_str(str); + ASSERT_EQ(ret, OB_SUCCESS); + ASSERT_EQ(empty_str, str); + ASSERT_EQ(0, flag.flag_); + LOG_INFO("test", K(flag), K(str)); + + flag.set_block_tablet_in(); + ASSERT_EQ(2, flag.flag_); + ret = flag.flag_to_str(str); + ASSERT_EQ(str3, str); + LOG_INFO("test", K(flag), K(str)); + + flag.clear_block_tablet_in(); + ASSERT_EQ(0, flag.flag_); + ret = flag.flag_to_str(str); + ASSERT_EQ(ret, OB_SUCCESS); + ASSERT_EQ(empty_str, str); + LOG_INFO("test", K(flag), K(str)); + + flag.set_duplicate(); + ASSERT_EQ(1, flag.flag_); + ret = flag.flag_to_str(str); + ASSERT_EQ(ret, OB_SUCCESS); + ASSERT_EQ(str0, str); + LOG_INFO("test", K(flag), K(str)); + + flag.set_block_tablet_in(); + ASSERT_EQ(3, flag.flag_); + ret = flag.flag_to_str(str); + ASSERT_EQ(ret, OB_SUCCESS); + ASSERT_EQ(str6, str); + LOG_INFO("test", K(flag), K(str)); + + flag.clear_block_tablet_in(); + ASSERT_EQ(1, flag.flag_); + ret = flag.flag_to_str(str); + ASSERT_EQ(ret, OB_SUCCESS); + ASSERT_EQ(str0, str); + LOG_INFO("test", K(flag), K(str)); + + ret = flag.str_to_flag(empty_str.str()); + ASSERT_EQ(ret, OB_SUCCESS); + ASSERT_EQ(0, flag.flag_); + LOG_INFO("test", K(flag)); + + ret = flag.str_to_flag(str0.str()); + ASSERT_EQ(ret, OB_SUCCESS); + ASSERT_EQ(1, flag.flag_); + LOG_INFO("test", K(flag)); + + ret = flag.str_to_flag(str1.str()); + ASSERT_EQ(OB_ERR_UNEXPECTED, ret); + + ret = flag.str_to_flag(str2.str()); + ASSERT_EQ(OB_ERR_UNEXPECTED, ret); + + ret = flag.str_to_flag(str3.str()); + ASSERT_EQ(ret, OB_SUCCESS); + ASSERT_EQ(2, flag.flag_); + LOG_INFO("test", K(flag)); + + ret = flag.str_to_flag(str4.str()); + ASSERT_EQ(OB_ERR_UNEXPECTED, ret); + + ret = flag.str_to_flag(str5.str()); + ASSERT_EQ(ret, OB_SUCCESS); + ASSERT_EQ(3, flag.flag_); + LOG_INFO("test", K(flag)); + + ret = flag.str_to_flag(str6.str()); + ASSERT_EQ(ret, OB_SUCCESS); + ASSERT_EQ(3, flag.flag_); + + LOG_INFO("test", K(flag)); + ret = flag.str_to_flag(str7.str()); + ASSERT_EQ(OB_ERR_UNEXPECTED, ret); + + LOG_INFO("test", K(flag)); + + ret = flag.str_to_flag(str8.str()); + ASSERT_EQ(OB_ERR_UNEXPECTED, ret); + + ret = flag.str_to_flag(str9.str()); + ASSERT_EQ(OB_ERR_UNEXPECTED, ret); + + } } } diff --git a/unittest/storage/backup/test_backup_extern_info_mgr.cpp b/unittest/storage/backup/test_backup_extern_info_mgr.cpp index 89f6eb4c4d..691736fccd 100644 --- a/unittest/storage/backup/test_backup_extern_info_mgr.cpp +++ b/unittest/storage/backup/test_backup_extern_info_mgr.cpp @@ -93,7 +93,6 @@ void TestBackupExternInfoMgr::make_ls_meta_package_(ObBackupLSMetaInfo &ls_meta_ { ls_meta_info.ls_meta_package_.ls_meta_.tenant_id_ = tenant_id_; ls_meta_info.ls_meta_package_.ls_meta_.ls_id_ = ls_id_; - ls_meta_info.ls_meta_package_.ls_meta_.replica_type_ = ObReplicaType::REPLICA_TYPE_FULL; ls_meta_info.ls_meta_package_.ls_meta_.migration_status_ = ObMigrationStatus::OB_MIGRATION_STATUS_NONE; ls_meta_info.ls_meta_package_.ls_meta_.gc_state_ = LSGCState::NORMAL; ls_meta_info.ls_meta_package_.ls_meta_.restore_status_ = ObLSRestoreStatus(ObLSRestoreStatus::RESTORE_NONE); @@ -105,7 +104,6 @@ static bool cmp_backup_ls_meta(const ObBackupLSMetaInfo &lhs, const ObBackupLSMe { return lhs.ls_meta_package_.ls_meta_.tenant_id_ == rhs.ls_meta_package_.ls_meta_.tenant_id_ && lhs.ls_meta_package_.ls_meta_.ls_id_ == rhs.ls_meta_package_.ls_meta_.ls_id_ && - lhs.ls_meta_package_.ls_meta_.replica_type_ == rhs.ls_meta_package_.ls_meta_.replica_type_ && lhs.ls_meta_package_.ls_meta_.migration_status_ == rhs.ls_meta_package_.ls_meta_.migration_status_ && lhs.ls_meta_package_.ls_meta_.gc_state_ == rhs.ls_meta_package_.ls_meta_.gc_state_ && lhs.ls_meta_package_.ls_meta_.restore_status_ == rhs.ls_meta_package_.ls_meta_.restore_status_ && @@ -144,4 +142,4 @@ int main(int argc, char **argv) OB_LOGGER.set_log_level("info"); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} diff --git a/unittest/storage/mock_ob_log_handler.h b/unittest/storage/mock_ob_log_handler.h index dc1481e191..4b24ed33c3 100644 --- a/unittest/storage/mock_ob_log_handler.h +++ b/unittest/storage/mock_ob_log_handler.h @@ -133,17 +133,21 @@ public: return OB_SUCCESS; } int set_initial_member_list(const common::ObMemberList &member_list, - const int64_t paxos_replica_num) + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list) { UNUSED(member_list); UNUSED(paxos_replica_num); + UNUSED(learner_list); return OB_SUCCESS; } int set_initial_member_list(const common::ObMemberList &member_list, const common::ObMember &arb_replica, - const int64_t paxos_replica_num) + const int64_t paxos_replica_num, + const common::GlobalLearnerList &learner_list) { UNUSEDx(member_list, arb_replica, paxos_replica_num); + UNUSED(learner_list); return OB_SUCCESS; } int get_end_scn(share::SCN &scn) const @@ -167,6 +171,13 @@ public: UNUSED(paxos_replica_num); return OB_SUCCESS; } + int get_paxos_member_list_and_learner_list(common::ObMemberList &member_list, + int64_t &paxos_replica_num, + common::GlobalLearnerList &learner_list) const + { + UNUSEDx(member_list, paxos_replica_num, learner_list); + return OB_SUCCESS; + } int get_max_lsn(palf::LSN &lsn) const { UNUSED(lsn); @@ -316,20 +327,29 @@ public: return ret; } - int switch_learner_to_acceptor(const common::ObMember &learner, - const int64_t timeout_us) + int replace_learner(const common::ObMember &added_learner, + const common::ObMember &removed_learner, + const int64_t timeout_us) { int ret = OB_SUCCESS; - UNUSED(learner); - UNUSED(timeout_us); + UNUSEDx(added_learner, removed_learner, timeout_us); + return ret; + } + + int switch_learner_to_acceptor(const common::ObMember &learner, + const int64_t new_replica_num, + const int64_t timeout_us) + { + int ret = OB_SUCCESS; + UNUSEDx(learner, new_replica_num, timeout_us); return ret; } int switch_acceptor_to_learner(const common::ObMember &member, - const int64_t timeout_us) + const int64_t new_replica_num, + const int64_t timeout_us) { int ret = OB_SUCCESS; - UNUSED(member); - UNUSED(timeout_us); + UNUSEDx(member, new_replica_num, timeout_us); return ret; } int add_arbitration_member(const common::ObMember &added_member, const int64_t timeout_us) diff --git a/unittest/storage/test_dml_common.h b/unittest/storage/test_dml_common.h index c8430cdaa0..ae2eee7c70 100644 --- a/unittest/storage/test_dml_common.h +++ b/unittest/storage/test_dml_common.h @@ -173,7 +173,8 @@ int TestDmlCommon::create_ls( ObMemberList member_list; const int64_t paxos_replica_num = 1; (void) member_list.add_server(MockTenantModuleEnv::get_instance().self_addr_); - if (OB_FAIL(ls->set_initial_member_list(member_list, paxos_replica_num))) { + GlobalLearnerList learner_list; + if (OB_FAIL(ls->set_initial_member_list(member_list, paxos_replica_num, learner_list))) { STORAGE_LOG(WARN, "failed to set initial member list", K(ret), K(member_list), K(paxos_replica_num)); } diff --git a/unittest/storage/test_meta_pointer_map.cpp b/unittest/storage/test_meta_pointer_map.cpp index 199da4712c..bfcafbb44c 100644 --- a/unittest/storage/test_meta_pointer_map.cpp +++ b/unittest/storage/test_meta_pointer_map.cpp @@ -102,7 +102,6 @@ void TestMetaPointerMap::FakeLs(ObLS &ls) ls.ls_meta_.gc_state_ = logservice::LSGCState::NORMAL; ls.ls_meta_.migration_status_ = ObMigrationStatus::OB_MIGRATION_STATUS_NONE; ls.ls_meta_.restore_status_ = ObLSRestoreStatus::RESTORE_NONE; - ls.ls_meta_.replica_type_ = ObReplicaType::REPLICA_TYPE_FULL; ls.ls_meta_.rebuild_seq_ = 0; } diff --git a/unittest/storage/tx/CMakeLists.txt b/unittest/storage/tx/CMakeLists.txt index 50f2ed436f..7a48d15a94 100644 --- a/unittest/storage/tx/CMakeLists.txt +++ b/unittest/storage/tx/CMakeLists.txt @@ -42,3 +42,5 @@ storage_unittest(test_ob_tx_msg) storage_unittest(test_ob_id_meta) storage_unittest(test_ob_standby_read) add_subdirectory(it) + +storage_unittest(test_dup_table_lease) diff --git a/unittest/storage/tx/ob_mock_tx_ctx.cpp b/unittest/storage/tx/ob_mock_tx_ctx.cpp index 665051e880..d6dec23d14 100644 --- a/unittest/storage/tx/ob_mock_tx_ctx.cpp +++ b/unittest/storage/tx/ob_mock_tx_ctx.cpp @@ -123,6 +123,13 @@ int MockObTxCtx::submit_log(const ObTwoPhaseCommitLogType& log_type) return OB_SUCCESS; } +int ObPartTransCtx::search_unsubmitted_dup_table_redo_() +{ + int ret = OB_SUCCESS; + + return ret; +} + int MockObTxCtx::register_timeout_task_(const int64_t interval_us) { return OB_SUCCESS; diff --git a/unittest/storage/tx/ob_mock_tx_log_adapter.cpp b/unittest/storage/tx/ob_mock_tx_log_adapter.cpp index 941b6daed9..f56c25e9c0 100644 --- a/unittest/storage/tx/ob_mock_tx_log_adapter.cpp +++ b/unittest/storage/tx/ob_mock_tx_log_adapter.cpp @@ -13,12 +13,15 @@ #include "ob_mock_tx_log_adapter.h" #include "logservice/ob_log_handler.h" #include "storage/tx/ob_trans_submit_log_cb.h" +#include "logservice/ob_log_base_type.h" -namespace oceanbase { +namespace oceanbase +{ using namespace palf; using namespace logservice; -namespace transaction { +namespace transaction +{ int MockTxLogAdapter::init(ObITxLogParam *param) { int ret = OB_SUCCESS; @@ -158,7 +161,7 @@ bool MockTxLogAdapter::is_cbs_finish_() return waiting_cbs_.empty(); } -void MockTxLogAdapter::push_all_cbs_() +void MockTxLogAdapter::invoke_all_cbs() { ObSpinLockGuard cbs_guard(cbs_lock_); @@ -193,10 +196,127 @@ bool MockTxLogAdapter::get_log(int64_t log_ts, std::string &log_string) return has_log; } +int MockTxLogAdapter::get_next_log(int64_t log_ts, std::string &log_string, int64_t &next_log_ts) +{ + int ret = OB_SUCCESS; + ObSpinLockGuard file_guard(log_file_lock_); + + std::map::iterator file_iter = mock_log_file_.find(log_ts); + + if (file_iter == mock_log_file_.end()) { + file_iter = mock_log_file_.begin(); + } + + file_iter++; + + if (file_iter == mock_log_file_.end()) { + ret = OB_HASH_NOT_EXIST; + } else { + log_string = file_iter->second; + next_log_ts = file_iter->first; + } + + return ret; +} + int64_t MockTxLogAdapter::get_cb_cnt() { return ATOMIC_LOAD(&CB_CNT_); } + +// int MockReplayMgr::init(MockTxLogAdapter * log_adapter) +// { +// int ret = OB_SUCCESS; +// +// log_adapter_ptr_ = log_adapter; +// +// return ret; +// } +// +// int MockReplayMgr::start() +// { +// int ret = OB_SUCCESS; +// +// ret = lib::ThreadPool::start(); +// +// return ret; +// } +// +// void MockReplayMgr::stop() +// { +// lib::ThreadPool::stop(); +// } +// +// void MockReplayMgr::wait() +// { +// lib::ThreadPool::wait(); +// } +// +// void MockReplayMgr::destroy() +// { +// lib::ThreadPool::destroy(); +// } +// +// void MockReplayMgr::run1() +// { +// int ret = OB_SUCCESS; +// int64_t cur_time = 0; +// int64_t time_used = 0; +// std::string tmp_log_string; +// +// palf::LSN tmp_lsn; +// while (!has_set_stop()) { +// cur_time = ObTimeUtility::current_time(); +// +// for (auto iter = replay_target_list_.begin(); iter != replay_target_list_.end(); iter++) { +// +// while (OB_SUCC(log_adapter_ptr_->get_next_log(iter->replay_success_ts_, iter->replaying_log_, +// iter->replaying_ts_))) { +// +// if (OB_FAIL(iter->replay_target_->replay(tmp_log_string.c_str(), tmp_log_string.size(), +// tmp_lsn, iter->replaying_ts_))) { +// TRANS_LOG(WARN, "replay one log error", KP(iter->replay_target_), K(iter->replaying_ts_)); +// } else { +// iter->replay_success_ts_ = iter->replaying_ts_; +// } +// } +// +// if (ret != OB_HASH_NOT_EXIST) { +// TRANS_LOG(WARN, "replay error", K(ret), K(iter->replay_success_ts_), +// K(iter->replaying_ts_)); +// } +// } +// time_used = ObTimeUtility::current_time() - cur_time; +// if (time_used < log_adapter_ptr_->get_cb_time()) { +// usleep(log_adapter_ptr_->get_cb_time() - time_used); +// } +// } +// } +// +// void MockReplayMgr::register_replay_target(logservice::ObIReplaySubHandler *replay_target) +// { +// MockReplayInfo tmp_replay_info; +// tmp_replay_info.replay_target_ = replay_target; +// +// replay_target_list_.push_back(tmp_replay_info); +// +// } +// +// void MockReplayMgr::unregister_replay_target(logservice::ObIReplaySubHandler *replay_target) +// { +// auto iter = replay_target_list_.begin(); +// for (; iter != replay_target_list_.end(); iter++) { +// +// if (iter->replay_target_ == replay_target) { +// break; +// } +// } +// +// if (iter != replay_target_list_.end()) { +// replay_target_list_.erase(iter); +// } +// } + } // namespace transaction } // namespace oceanbase diff --git a/unittest/storage/tx/ob_mock_tx_log_adapter.h b/unittest/storage/tx/ob_mock_tx_log_adapter.h index c8539fcfaf..a9fbf98908 100644 --- a/unittest/storage/tx/ob_mock_tx_log_adapter.h +++ b/unittest/storage/tx/ob_mock_tx_log_adapter.h @@ -28,6 +28,11 @@ namespace oceanbase { +namespace logservice +{ +class ObIReplaySubHandler; +} + namespace transaction { @@ -55,8 +60,9 @@ public: void wait(); void destroy(); - int push(void * task); - void handle(void * task); + int push(void *task); + void handle(void *task); + public: int submit_log(const char *buf, const int64_t size, @@ -70,10 +76,13 @@ public: return OB_SUCCESS; } - void push_all_cbs_(); + void invoke_all_cbs(); + public: - bool get_log(int64_t log_ts ,std::string &log_string); + bool get_log(int64_t log_ts, std::string &log_string); + int get_next_log(int64_t log_ts, std::string &log_string, int64_t &next_log_ts); int64_t get_cb_cnt(); + int64_t get_cb_time() { return submit_config_.cb_time_; } private: bool is_cbs_finish_(); @@ -91,14 +100,15 @@ private: // common::ObSEArray waiting_cbs_; std::map mock_log_file_; //ts , log record std::list waiting_cbs_; - + ObTransTimer timer_; - ObITimeoutTask * task_ptr_; + ObITimeoutTask *task_ptr_; int64_t CB_CNT_; }; -class MockCbTimeoutTask : public ObITimeoutTask { +class MockCbTimeoutTask : public ObITimeoutTask +{ public: MockCbTimeoutTask() : adapter_(nullptr) {} virtual ~MockCbTimeoutTask() {} @@ -111,12 +121,51 @@ public: void reset() { adapter_ = nullptr; } public: - void runTimerTask() { adapter_->push_all_cbs_(); } + void runTimerTask() { adapter_->invoke_all_cbs(); } uint64_t hash() const { return 1; }; private: MockTxLogAdapter *adapter_; }; + +// struct MockReplayInfo +// { +// logservice::ObIReplaySubHandler *replay_target_; +// +// int64_t replay_success_ts_; +// int64_t replaying_ts_; +// std::string replaying_log_; +// +// void reset() +// { +// replay_target_ = nullptr; +// replay_success_ts_ = -1; +// replaying_ts_ = -1; +// replaying_log_.clear(); +// } +// +// MockReplayInfo() { reset(); } +// }; +// +// class MockReplayMgr : public lib::ThreadPool +// { +// public: +// int init(MockTxLogAdapter * log_adapter); +// int start(); +// void stop(); +// void wait(); +// void destroy(); +// +// virtual void run1(); +// +// void register_replay_target(logservice::ObIReplaySubHandler *replay_target); +// void unregister_replay_target(logservice::ObIReplaySubHandler *replay_target); +// +// private: +// MockTxLogAdapter * log_adapter_ptr_; +// std::list replay_target_list_; +// }; + // class TestTxLogSubmitter : public ObSimpleThreadPool // { // public: diff --git a/unittest/storage/tx/test_dup_table_lease.cpp b/unittest/storage/tx/test_dup_table_lease.cpp new file mode 100644 index 0000000000..66bf1b5ae0 --- /dev/null +++ b/unittest/storage/tx/test_dup_table_lease.cpp @@ -0,0 +1,49 @@ +// Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#include +// #include "ob_mock_dup_table_ls_mgr.h" +#include "storage/tx/ob_dup_table_lease.h" + +namespace oceanbase +{ + +namespace unittest +{ + +class TestDupTableLease : public ::testing::Test +{ +public: + virtual void SetUp() {} + virtual void TearDown() {} +public: +}; + +TEST_F(TestDupTableLease, test_dup_table_lease_log) +{ + +} + +} + +} // namespace oceanbase + +using namespace oceanbase; + +int main(int argc, char **argv) +{ + int ret = 1; + ObLogger &logger = ObLogger::get_logger(); + logger.set_file_name("test_dup_table_lease.log", true); + logger.set_log_level(OB_LOG_LEVEL_INFO); + testing::InitGoogleTest(&argc, argv); + ret = RUN_ALL_TESTS(); + return ret; +} diff --git a/unittest/storage/tx_table/test_tx_ctx_table.cpp b/unittest/storage/tx_table/test_tx_ctx_table.cpp index 60941deb9f..e6a6ad2856 100644 --- a/unittest/storage/tx_table/test_tx_ctx_table.cpp +++ b/unittest/storage/tx_table/test_tx_ctx_table.cpp @@ -110,7 +110,8 @@ public: protected: virtual void SetUp() override { - ObTxPalfParam palf_param((logservice::ObLogHandler *)(0x01)); + ObTxPalfParam palf_param((logservice::ObLogHandler *)(0x01), + (transaction::ObDupTableLSHandler *)(0x02)); freezer_.init(&ls_); EXPECT_EQ(OB_SUCCESS, t3m_.init()); EXPECT_EQ(OB_SUCCESS, @@ -275,7 +276,8 @@ TEST_F(TestTxCtxTable, test_tx_ctx_memtable_mgr) attr.tenant_id_ = MTL_ID(); tx_data_table.slice_allocator_.init(sizeof(ObTxData), OB_MALLOC_NORMAL_BLOCK_SIZE, common::default_blk_alloc, attr); - ObTxPalfParam palf_param((logservice::ObLogHandler *)(0x01)); + ObTxPalfParam palf_param((logservice::ObLogHandler *)(0x01), + (transaction::ObDupTableLSHandler *)(0x02)); ObTxCtxTableRecoverHelperUT recover_helper; ObLSTxCtxMgr* ls_tx_ctx_mgr_recover = &unittest::TestTxCtxTable::ls_tx_ctx_mgr2_;