[FEAT MERGE] transfer without kill tx

This commit is contained in:
Handora 2024-02-07 14:20:47 +00:00 committed by ob-robot
parent 233bf43b61
commit 46b64790bc
125 changed files with 10809 additions and 1109 deletions

View File

@ -218,12 +218,9 @@ struct ObQueryFlag
inline void set_use_fast_agg() { use_fast_agg_ = UseFastAgg; }
inline void set_iter_uncommitted_row() { iter_uncommitted_row_ = true; }
inline void set_not_iter_uncommitted_row() { iter_uncommitted_row_ = false; }
inline void set_for_foreign_key_check() { for_foreign_key_check_ = true; }
inline void set_ignore_trans_stat() { ignore_trans_stat_ = true; }
inline void set_not_ignore_trans_stat() { ignore_trans_stat_ = false; }
inline bool iter_uncommitted_row() const { return iter_uncommitted_row_; }
inline void set_for_foreign_key_check() { for_foreign_key_check_ = true; }
inline bool is_for_foreign_key_check() const { return for_foreign_key_check_; }
inline bool is_ignore_trans_stat() const { return ignore_trans_stat_; }
inline bool is_sstable_cut() const { return is_sstable_cut_; }
inline bool is_skip_read_lob() const { return skip_read_lob_; }
inline void disable_cache()

View File

@ -656,6 +656,8 @@ int MockTenantModuleEnv::init()
if (inited_) {
ret = OB_INIT_TWICE;
STORAGE_LOG(ERROR, "init twice", K(ret));
} else if (OB_FAIL(ObClockGenerator::init())) {
STORAGE_LOG(ERROR, "init ClockGenerator failed", K(ret));
} else if (FALSE_IT(init_gctx_gconf())) {
} else if (OB_FAIL(init_before_start_mtl())) {
STORAGE_LOG(ERROR, "init_before_start_mtl failed", K(ret));

View File

@ -12,6 +12,7 @@
#define USING_LOG_PREFIX STORAGE
#include <gtest/gtest.h>
#include <thread>
#include "mtlenv/mock_tenant_module_env.h"
#include "storage/mockcontainer/mock_ob_iterator.h"
#include "storage/mockcontainer/mock_ob_end_trans_callback.h"
@ -365,6 +366,79 @@ TEST_F(TestTrans, freeze)
ASSERT_EQ(OB_SUCCESS, ls->logstream_freeze());
}
*/
TEST_F(TestTrans, transfer_block)
{
int ret = OB_SUCCESS;
uint64_t tenant_id = MTL_ID();
ObLSID ls_id(100);
ObTabletID tablet_id(1001);
LOG_INFO("start transaction");
ObTxDesc *tx_desc = NULL;
ObTxReadSnapshot snapshot;
prepare_tx_desc(tx_desc, snapshot);
// prepare insert param
const char *ins_str =
"bigint dml \n"
"300 T_DML_INSERT \n";
insert_rows(ls_id, tablet_id, *tx_desc, snapshot, ins_str);
ObTransService *tx_service = MTL(ObTransService*);
ObPartTransCtx *part_ctx;
ASSERT_EQ(OB_SUCCESS, tx_service->tx_ctx_mgr_.get_tx_ctx(ls_id, tx_desc->tx_id_, false, part_ctx));
part_ctx->sub_state_.set_transfer_blocking();
ASSERT_EQ(OB_SUCCESS, tx_service->tx_ctx_mgr_.revert_tx_ctx(part_ctx));
std::thread th([part_ctx] () {
::sleep(3);
part_ctx->sub_state_.clear_transfer_blocking();
});
LOG_INFO("commit transaction");
ASSERT_EQ(OB_SUCCESS, tx_service->commit_tx(*tx_desc, ObTimeUtility::current_time() + 100000000));
LOG_INFO("release transaction");
tx_service->release_tx(*tx_desc);
th.join();
}
TEST_F(TestTrans, transfer_block2)
{
int ret = OB_SUCCESS;
uint64_t tenant_id = MTL_ID();
ObLSID ls_id(100);
ObTabletID tablet_id(1001);
LOG_INFO("start transaction");
ObTxDesc *tx_desc = NULL;
ObTxReadSnapshot snapshot;
prepare_tx_desc(tx_desc, snapshot);
// prepare insert param
const char *ins_str =
"bigint dml \n"
"400 T_DML_INSERT \n";
insert_rows(ls_id, tablet_id, *tx_desc, snapshot, ins_str);
ObTransService *tx_service = MTL(ObTransService*);
ObPartTransCtx *part_ctx;
ASSERT_EQ(OB_SUCCESS, tx_service->tx_ctx_mgr_.get_tx_ctx(ls_id, tx_desc->tx_id_, false, part_ctx));
bool is_blocked = false;
part_ctx->sub_state_.set_transfer_blocking();
ASSERT_EQ(OB_SUCCESS, tx_service->tx_ctx_mgr_.revert_tx_ctx(part_ctx));
std::thread th([part_ctx] () {
::sleep(3);
part_ctx->sub_state_.clear_transfer_blocking();
});
LOG_INFO("rollback transaction");
ASSERT_EQ(OB_SUCCESS, tx_service->rollback_tx(*tx_desc));
LOG_INFO("release transaction");
tx_service->release_tx(*tx_desc);
th.join();
}
TEST_F(TestTrans, remove_ls)
{

View File

@ -750,9 +750,7 @@ int main(int argc, char **argv)
// TEST_LOG("GCONF.syslog_io_bandwidth_limit %ld ", GCONF.syslog_io_bandwidth_limit.get_value());
// LOG_INFO("GCONF.syslog_io_bandwidth_limit ", K(GCONF.syslog_io_bandwidth_limit.get_value()));
if (OB_SUCCESS != ObClockGenerator::init()) {
TRANS_LOG(WARN, "ObClockGenerator::init error!");
} else {
{
if (argc > 1) {
const_data_num = atoi(argv[1]);
} else {

View File

@ -341,8 +341,8 @@ TEST_F(GET_RESTART_ZONE_TEST_CLASS_NAME(2, 1), become_leader_after_restart)
transaction::ObPartTransCtx *tx_ctx = nullptr;
ASSERT_EQ(OB_SUCCESS,
ls_handle.get_ls()->get_tx_ctx(transaction::ObTransID(update_tx_id), false, tx_ctx));
share::ObLSArray fake_parts;
ASSERT_EQ(OB_SUCCESS, fake_parts.push_back(share::ObLSID(static_basic_arg_.ls_id_num_)));
ObTxCommitParts fake_parts;
ASSERT_EQ(OB_SUCCESS, fake_parts.push_back(ObTxExecPart(share::ObLSID(static_basic_arg_.ls_id_num_), -1, -1)));
tx_ctx->set_2pc_participants_(fake_parts);
tx_ctx->submit_redo_commit_info_log_();
RETRY_UNTIL_TIMEOUT(tx_ctx->busy_cbs_.is_empty(), 20 * 1000 * 1000, 100 * 1000);

View File

@ -2,6 +2,7 @@ set(OBSERVER_TEST_SRCS
env/ob_simple_server.cpp
env/ob_simple_server_restart_helper.cpp
env/ob_simple_cluster_test_base.cpp
env/ob_simple_server_helper.cpp
)
add_library(observer_test ${OBSERVER_TEST_SRCS})
@ -29,27 +30,31 @@ function(errsim_ha_unittest_observer case)
target_link_libraries(${case} PRIVATE gtest gmock observer_test oceanbase)
endfunction()
add_executable(test_simple_ob
EXCLUDE_FROM_ALL
test_ob_simple_cluster.cpp
env/ob_simple_server.cpp
env/ob_simple_server_restart_helper.cpp
env/ob_simple_cluster_test_base.cpp
)
target_include_directories(test_simple_ob PUBLIC
${CMAKE_SOURCE_DIR}/unittest ${CMAKE_SOURCE_DIR}/mittest)
target_link_libraries(test_simple_ob
PRIVATE
-Wl,--start-group
oceanbase_static
ob_sql_static
ob_storage_static
-Wl,--end-group
-static-libgcc
-static-libstdc++
gtest
gmock)
function(ob_offline_observer case case_file)
add_executable(${case}
EXCLUDE_FROM_ALL
${case_file}
${OBSERVER_TEST_SRCS}
)
target_include_directories(${case} PUBLIC
${CMAKE_SOURCE_DIR}/unittest ${CMAKE_SOURCE_DIR}/mittest)
target_link_libraries(${case}
PRIVATE
-Wl,--start-group
oceanbase_static
ob_sql_static
ob_storage_static
-Wl,--end-group
-static-libgcc
-static-libstdc++
gtest
gmock)
endfunction()
ob_offline_observer(test_simple_ob test_ob_simple_cluster.cpp)
ob_offline_observer(test_transfer_tx test_transfer_tx.cpp)
ob_unittest_observer(test_transfer_no_kill_tx test_transfer_tx.cpp)
ob_unittest_observer(test_standby_balance test_standby_balance_ls_group.cpp)
ob_unittest_observer(test_ls_recover test_ls_recover.cpp)
ob_unittest_observer(test_ob_simple_cluster test_ob_simple_cluster.cpp)

View File

@ -192,7 +192,8 @@ int ObSimpleClusterTestBase::close()
int ObSimpleClusterTestBase::create_tenant(const char *tenant_name,
const char *memory_size,
const char *log_disk_size,
const bool oracle_mode)
const bool oracle_mode,
int64_t tenant_cpu)
{
SERVER_LOG(INFO, "create tenant start");
int32_t log_level;
@ -228,8 +229,8 @@ int ObSimpleClusterTestBase::create_tenant(const char *tenant_name,
{
ObSqlString sql;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(sql.assign_fmt("create resource unit %s%s max_cpu 2, memory_size '%s', log_disk_size='%s';",
UNIT_BASE, tenant_name, memory_size, log_disk_size))) {
} else if (OB_FAIL(sql.assign_fmt("create resource unit %s%s max_cpu %ld, memory_size '%s', log_disk_size='%s';",
UNIT_BASE, tenant_name, tenant_cpu, memory_size, log_disk_size))) {
SERVER_LOG(WARN, "create_tenant", K(ret));
} else if (OB_FAIL(sql_proxy.write(sql.ptr(), affected_rows))) {
SERVER_LOG(WARN, "create_tenant", K(ret));

View File

@ -43,7 +43,8 @@ public:
int create_tenant(const char *tenant_name = "tt1",
const char *memory_size = "2G",
const char *log_disk_size = "2G",
const bool oracle_mode = false);
const bool oracle_mode = false,
int64_t tenant_cpu = 2);
int delete_tenant(const char *tenant_name = "tt1");
int get_tenant_id(uint64_t &tenant_id, const char *tenant_name = "tt1");
int exec_write_sql_sys(const char *sql_str, int64_t &affected_rows);

View File

@ -251,7 +251,7 @@ int ObSimpleServer::init_sql_proxy2(const char *tenant_name, const char *db_name
param.long_query_timeout_ = 300*1000*1000; // 120s
param.connection_refresh_interval_ = 200*1000; // 200ms
param.connection_pool_warn_time_ = 10*1000*1000; // 1s
param.sqlclient_per_observer_conn_limit_ = 1000;
param.sqlclient_per_observer_conn_limit_ = 10000;
ret = sql_conn_pool2_.init(db_addr, param);
if (OB_SUCC(ret)) {
sql_conn_pool2_.set_mode(common::sqlclient::ObMySQLConnection::DEBUG_MODE);

View File

@ -0,0 +1,796 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX STORAGE
#define private public
#define protected public
#include "ob_simple_server_helper.h"
#include "storage/tx_storage/ob_ls_service.h"
#include "storage/tablet/ob_tablet.h"
#include "storage/tx/ob_trans_part_ctx.h"
#include "logservice/ob_log_service.h"
#include "unittest/storage/init_basic_struct.h"
#include "lib/profile/ob_trace_id.h"
namespace oceanbase
{
int SimpleServerHelper::create_ls(uint64_t tenant_id, ObAddr addr)
{
#define FR(x) \
if (FAILEDx(x)) { \
return ret; \
}
int ret = OB_SUCCESS;
int64_t affected_rows = 0;
static int64_t start_ls_id = 1001;
ObLSID ls_id(ATOMIC_AAF(&start_ls_id,1));
if (OB_FAIL(GCTX.sql_proxy_->write(tenant_id, "alter system set enable_rebalance=false", affected_rows))) {
}
if (OB_SUCC(ret)) {
ObSqlString sql;
sql.assign_fmt("insert into __all_ls (ls_id, ls_group_id, status, flag, create_scn) values(%ld, 1001,'NORMAL', '',0)", ls_id.id());
if (FAILEDx(GCTX.sql_proxy_->write(tenant_id, sql.ptr(), affected_rows))) {
}
sql.assign_fmt("insert into __all_ls_status (tenant_id, ls_id, status, ls_group_id, unit_group_id, primary_zone) values(%ld, %ld,'NORMAL', 1001, 1001, 'zone1')", tenant_id, ls_id.id());
if (FAILEDx(GCTX.sql_proxy_->write(gen_meta_tenant_id(tenant_id), sql.ptr(), affected_rows))) {
}
}
if (OB_FAIL(ret)) {
return ret;
}
MTL_SWITCH(tenant_id) {
ObCreateLSArg arg;
ObLSService* ls_svr = MTL(ObLSService*);
FR(gen_create_ls_arg(tenant_id, ls_id, arg));
FR(ls_svr->create_ls(arg));
LOG_INFO("set member list");
ObLSHandle handle;
ObLS *ls = nullptr;
FR(ls_svr->get_ls(ls_id, handle, ObLSGetMod::STORAGE_MOD));
ls = handle.get_ls();
ObMemberList member_list;
int64_t paxos_replica_num = 1;
(void) member_list.add_server(addr);
GlobalLearnerList learner_list;
FR(ls->set_initial_member_list(member_list,
paxos_replica_num,
learner_list));
// check leader
LOG_INFO("check leader");
for (int i = 0; i < 15; i++) {
ObRole role;
int64_t leader_epoch = 0;
ls->get_log_handler()->get_role(role, leader_epoch);
if (role == ObRole::LEADER) {
break;
}
::sleep(1);
}
}
return ret;
}
// select with sql_proxy
int SimpleServerHelper::select_int64(common::ObMySQLProxy &sql_proxy, const char *sql, int64_t &val)
{
int ret = OB_SUCCESS;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
if (OB_FAIL(sql_proxy.read(res, sql))) {
} else {
sqlclient::ObMySQLResult *result = res.get_result();
if (result == nullptr) {
ret = OB_ENTRY_NOT_EXIST;
} else if (OB_FAIL(result->next())) {
} else if (OB_FAIL(result->get_int("val", val))) {
}
}
}
if (OB_FAIL(ret)) {
LOG_WARN("select failed", KR(ret), K(sql));
}
return ret;
}
// select with sql_proxy
int SimpleServerHelper::g_select_int64(uint64_t tenant_id, const char *sql, int64_t &val)
{
int ret = OB_SUCCESS;
common::ObMySQLProxy &sql_proxy = *GCTX.sql_proxy_;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
if (OB_FAIL(sql_proxy.read(res, tenant_id, sql))) {
} else {
sqlclient::ObMySQLResult *result = res.get_result();
if (result == nullptr) {
ret = OB_ENTRY_NOT_EXIST;
} else if (OB_FAIL(result->next())) {
} else if (OB_FAIL(result->get_int("val", val))) {
}
}
}
if (OB_FAIL(ret)) {
LOG_WARN("select failed", KR(ret), K(sql));
}
return ret;
}
int SimpleServerHelper::select_uint64(common::ObMySQLProxy &sql_proxy, const char *sql, uint64_t &val)
{
int ret = OB_SUCCESS;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
if (OB_FAIL(sql_proxy.read(res, sql))) {
} else {
sqlclient::ObMySQLResult *result = res.get_result();
if (result == nullptr) {
ret = OB_ENTRY_NOT_EXIST;
} else if (OB_FAIL(result->next())) {
} else if (OB_FAIL(result->get_uint("val", val))) {
}
}
}
if (OB_FAIL(ret)) {
LOG_WARN("select failed", KR(ret), K(sql));
}
return ret;
}
// select with sql_proxy
int SimpleServerHelper::g_select_uint64(uint64_t tenant_id, const char *sql, uint64_t &val)
{
int ret = OB_SUCCESS;
common::ObMySQLProxy &sql_proxy = *GCTX.sql_proxy_;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
if (OB_FAIL(sql_proxy.read(res, tenant_id, sql))) {
} else {
sqlclient::ObMySQLResult *result = res.get_result();
if (result == nullptr) {
ret = OB_ENTRY_NOT_EXIST;
} else if (OB_FAIL(result->next())) {
} else if (OB_FAIL(result->get_uint("val", val))) {
}
}
}
if (OB_FAIL(ret)) {
LOG_WARN("select failed", KR(ret), K(sql));
}
return ret;
}
int SimpleServerHelper::select_int64(sqlclient::ObISQLConnection *conn, const char *sql, int64_t &val)
{
int ret = OB_SUCCESS;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
if (OB_FAIL(conn->execute_read(OB_SYS_TENANT_ID, sql, res))) {
} else {
sqlclient::ObMySQLResult *result = res.get_result();
if (result == nullptr) {
ret = OB_ENTRY_NOT_EXIST;
} else if (OB_FAIL(result->next())) {
} else if (OB_FAIL(result->get_int("val", val))) {
}
}
}
if (OB_FAIL(ret)) {
LOG_WARN("select failed", KR(ret), K(sql));
}
return ret;
}
int SimpleServerHelper::g_select_varchar(uint64_t tenant_id, const char *sql, ObString &val)
{
int ret = OB_SUCCESS;
common::ObMySQLProxy &sql_proxy = *GCTX.sql_proxy_;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
if (OB_FAIL(sql_proxy.read(res, tenant_id, sql))) {
} else {
sqlclient::ObMySQLResult *result = res.get_result();
if (result == nullptr) {
ret = OB_ENTRY_NOT_EXIST;
} else if (OB_FAIL(result->next())) {
} else {
EXTRACT_VARCHAR_FIELD_MYSQL(*result, "val", val);
}
}
}
if (OB_FAIL(ret)) {
LOG_WARN("select failed", KR(ret), K(sql));
}
return ret;
}
int SimpleServerHelper::select_varchar(sqlclient::ObISQLConnection *conn, const char *sql, ObString &val)
{
int ret = OB_SUCCESS;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
if (OB_FAIL(conn->execute_read(OB_SYS_TENANT_ID, sql, res))) {
} else {
sqlclient::ObMySQLResult *result = res.get_result();
if (result == nullptr) {
ret = OB_ENTRY_NOT_EXIST;
} else if (OB_FAIL(result->next())) {
} else {
EXTRACT_VARCHAR_FIELD_MYSQL(*result, "val", val);
}
}
}
if (OB_FAIL(ret)) {
LOG_WARN("select failed", KR(ret), K(sql));
}
return ret;
}
int SimpleServerHelper::select_table_loc(uint64_t tenant_id, const char* table_name, ObLSID &ls_id)
{
int ret = OB_SUCCESS;
ObSqlString sql;
int64_t val = 0;
sql.assign_fmt("select a.ls_id as val from __all_tablet_to_ls a join __all_table b where a.table_id=b.table_id and b.table_name='%s'", table_name);
if (OB_FAIL(g_select_int64(tenant_id, sql.ptr(), val))) {
} else {
ls_id = ObLSID(val);
}
return ret;
}
int SimpleServerHelper::select_table_tablet(uint64_t tenant_id, const char* table_name, ObTabletID &tablet_id)
{
int ret = OB_SUCCESS;
ObSqlString sql;
int64_t val = 0;
sql.assign_fmt("select tablet_id as val from __all_table b where table_name='%s'", table_name);
if (OB_FAIL(g_select_int64(tenant_id, sql.ptr(), val))) {
} else {
tablet_id = ObTabletID(val);
}
return ret;
}
int SimpleServerHelper::submit_redo(uint64_t tenant_id, ObLSID ls_id)
{
int ret = OB_SUCCESS;
ObTransID failed_tx_id;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
} else if (OB_FAIL(ls_handle.get_ls()->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) {
}
}
return ret;
}
int SimpleServerHelper::wait_checkpoint_newest(uint64_t tenant_id, ObLSID ls_id)
{
LOG_INFO("wait_checkpoint_newest", K(tenant_id), K(ls_id));
int ret = OB_SUCCESS;
ObTransID failed_tx_id;
SCN end_scn;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
} else if (OB_FAIL(ls_handle.get_ls()->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) {
} else if (OB_FAIL(ls_handle.get_ls()->get_end_scn(end_scn))) {
} else {
SCN checkpoint_scn;
while (OB_SUCC(ret)) {
if (OB_FAIL(ls_handle.get_ls()->advance_checkpoint_by_flush(SCN::max_scn()))) {
} else if (FALSE_IT(checkpoint_scn = ls_handle.get_ls()->get_ls_meta().get_clog_checkpoint_scn())) {
} else if (checkpoint_scn < end_scn) {
LOG_INFO("wait ls checkpoint advance", K(tenant_id), K(ls_id), K(checkpoint_scn), K(end_scn));
ob_usleep(500 * 1000);
} else {
LOG_INFO("wait ls checkpoint advance", K(tenant_id), K(ls_id), K(checkpoint_scn), K(end_scn));
break;
}
}
}
}
LOG_INFO("wait_checkpoint_newest finish", K(tenant_id), K(ls_id));
return ret;
}
int SimpleServerHelper::freeze(uint64_t tenant_id, ObLSID ls_id, ObTabletID tablet_id)
{
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
} else if (OB_FAIL(ls_handle.get_ls()->tablet_freeze(tablet_id, true))) {
}
}
return ret;
}
int SimpleServerHelper::wait_flush_finish(uint64_t tenant_id, ObLSID ls_id, ObTabletID tablet_id)
{
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
} else {
while (OB_SUCC(ret)) {
ObTabletHandle handle;
ObTablet *tablet = NULL;
common::ObSEArray<storage::ObITable *, 1> memtables;
if (OB_FAIL(ls_handle.get_ls()->get_tablet_svr()->direct_get_tablet(tablet_id, handle))) {
LOG_WARN("failed to get tablet", K(ret), K(tablet_id));
} else if (FALSE_IT(tablet = handle.get_obj())) {
} else if (OB_FAIL(tablet->get_memtables(memtables))) {
if (OB_ENTRY_NOT_EXIST == ret) {
ret = OB_SUCCESS;
break;
}
} else {
bool flush_finish = true;
for (int64_t idx = 0; idx < memtables.count();idx++) {
memtable::ObMemtable *mt = dynamic_cast<memtable::ObMemtable*>(memtables.at(idx));
if (mt->get_mt_stat().release_time_ == 0) {
flush_finish = false;
break;
}
}
if (flush_finish) {
break;
}
ob_usleep(100 * 1000);
}
}
}
}
return ret;
}
int SimpleServerHelper::remove_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id)
{
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(ls_id));
} else {
auto &m = ls_handle.get_ls()->ls_tx_svr_.mgr_->ls_tx_ctx_map_;
ObPartTransCtx *ctx = nullptr;
if (OB_FAIL(ls_handle.get_ls()->get_tx_ctx(tx_id, false, ctx))) {
} else {
ls_handle.get_ls()->revert_tx_ctx(ctx);
CtxLockGuard ctx_lock_guard;
ctx->get_ctx_guard(ctx_lock_guard);
m.del(tx_id, ctx);
}
}
}
return ret;
}
int SimpleServerHelper::abort_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id)
{
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
ObPartTransCtx *ctx = nullptr;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(ls_id));
} else if (OB_FAIL(ls_handle.get_ls()->get_tx_ctx(tx_id, false, ctx))) {
} else {
ls_handle.get_ls()->revert_tx_ctx(ctx);
{
CtxLockGuard ctx_lock_guard;
ctx->get_ctx_guard(ctx_lock_guard);
if (OB_FAIL(ctx->do_local_abort_tx_())) {
}
}
/*
if (OB_SUCC(ret)) {
while (true) {
ret = ls_handle.get_ls()->get_tx_ctx(tx_id, false, ctx);
if (OB_SUCCESS == ret) {
ob_usleep(200* 1000);
continue;
} else if (OB_TRANS_CTX_NOT_EXIST == ret) {
ret = OB_SUCCESS;
break;
} else {
break;
}
}
}
*/
}
}
return ret;
}
int SimpleServerHelper::find_session(sqlclient::ObISQLConnection *conn,
int64_t &session_id)
{
return select_int64(conn, "select connection_id() as val", session_id);
}
int SimpleServerHelper::find_tx(sqlclient::ObISQLConnection *conn, ObTransID &tx_id)
{
int ret = OB_SUCCESS;
int64_t session_id = 0;
if (OB_FAIL(find_session(conn, session_id))) {
} else {
ObSqlString sql;
uint64_t val = 0;
sql.assign_fmt("select trans_id as val from __all_virtual_session_info where id=%ld", session_id);
if (OB_FAIL(g_select_uint64(OB_SYS_TENANT_ID, sql.ptr(), val))) {
LOG_WARN("find tx", KR(ret), K(sql));
} else {
tx_id = ObTransID(val);
}
}
return ret;
}
int SimpleServerHelper::find_trace_id(sqlclient::ObISQLConnection *conn, ObString &trace_id)
{
int ret = OB_SUCCESS;
if (OB_FAIL(select_varchar(conn, "select last_trace_id() as val", trace_id))) {
}
return ret;
}
int SimpleServerHelper::find_request(uint64_t tenant_id, int64_t session_id ,
int64_t &request_id, ObTransID &tx_id, ObString &trace_id, int64_t &retry_cnt)
{
int ret = OB_SUCCESS;
ObSqlString sql;
sql.assign_fmt("select request_id,transaction_id,trace_id,retry_cnt from __all_virtual_sql_audit where tenant_id=%ld and session_id=%ld order by request_id desc limit 1",
tenant_id, session_id);
common::ObMySQLProxy &sql_proxy = *GCTX.sql_proxy_;
SMART_VAR(ObMySQLProxy::MySQLResult, res) {
if (OB_FAIL(sql_proxy.read(res, tenant_id, sql.ptr()))) {
} else {
sqlclient::ObMySQLResult *result = res.get_result();
if (result == nullptr) {
ret = OB_ENTRY_NOT_EXIST;
} else if (OB_FAIL(result->next())) {
} else {
EXTRACT_INT_FIELD_MYSQL(*result, "request_id", request_id, int64_t);
EXTRACT_INT_FIELD_MYSQL(*result, "transaction_id", tx_id.tx_id_, int64_t);
EXTRACT_VARCHAR_FIELD_MYSQL(*result, "trace_id", trace_id);
EXTRACT_INT_FIELD_MYSQL(*result, "retry_cnt", retry_cnt, int64_t);
}
}
}
if (OB_FAIL(ret)) {
LOG_WARN("select failed", KR(ret), K(sql));
}
return ret;
}
int SimpleServerHelper::ls_resume(uint64_t tenant_id, ObLSID ls_id)
{
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
} else if (OB_FAIL(ls_handle.get_ls()->ls_tx_svr_.switch_to_follower_gracefully())) {
} else if (OB_FAIL(ls_handle.get_ls()->ls_tx_svr_.switch_to_leader())) {
}
}
return ret;
}
int SimpleServerHelper::find_tx_info(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObPartTransCtx &ctx_info)
{
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(ls_id));
} else {
ObPartTransCtx *ctx = nullptr;
if (OB_FAIL(ls_handle.get_ls()->get_tx_ctx(tx_id, true, ctx))) {
} else {
LOGI("find_tx_info tenant_id:%ld ls_id:%ld txid:%ld epoch:%ld state:%hhu ptr:%p", tenant_id,
ls_id.id(), tx_id.get_id(), ctx->epoch_, ctx->exec_info_.state_, ctx);
ctx_info.trans_id_ = ctx->trans_id_;
ctx_info.ls_id_ = ctx->ls_id_;
ctx_info.epoch_ = ctx->epoch_;
ctx_info.exec_info_.assign(ctx->exec_info_);
ls_handle.get_ls()->revert_tx_ctx(ctx);
}
}
}
return ret;
}
int SimpleServerHelper::wait_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObTxState tx_state)
{
LOG_INFO("wait_tx", K(tenant_id), K(ls_id), K(tx_id));
int ret = OB_SUCCESS;
int wait_end = false;
while (OB_SUCC(ret) && !wait_end) {
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(ls_id));
} else {
ObPartTransCtx *ctx = nullptr;
if (OB_FAIL(ls_handle.get_ls()->get_tx_ctx(tx_id, true, ctx))) {
} else {
if (ctx->exec_info_.state_ >= tx_state) {
wait_end = true;
}
if (wait_end || REACH_TIME_INTERVAL(1 * 1000 * 1000)) {
LOG_INFO("wait_tx", K(tx_state), K(*ctx), KP(ctx), K(ctx->exec_info_.state_), K(ls_id));
}
ls_handle.get_ls()->revert_tx_ctx(ctx);
}
}
}
ob_usleep(50 * 1000);
}
LOG_INFO("wait_tx finish", K(tenant_id), K(ls_id), K(tx_id));
return ret;
}
int SimpleServerHelper::wait_tx_exit(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id)
{
LOG_INFO("wait_tx_end", K(tenant_id), K(ls_id), K(tx_id));
int ret = OB_SUCCESS;
while (OB_SUCC(ret)) {
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(ls_id));
} else {
ObPartTransCtx *ctx = nullptr;
if (OB_FAIL(ls_handle.get_ls()->get_tx_ctx(tx_id, true, ctx))) {
} else {
if (REACH_TIME_INTERVAL(1 * 1000 * 1000)) {
LOG_INFO("wait_tx", K(*ctx), KP(ctx), K(ctx->exec_info_.state_));
}
ls_handle.get_ls()->revert_tx_ctx(ctx);
}
}
}
ob_usleep(50 * 1000);
}
LOG_INFO("wait_tx_end finish", K(ret), K(tenant_id), K(ls_id), K(tx_id));
return ret;
}
int SimpleServerHelper::get_ls_end_scn(uint64_t tenant_id, ObLSID ls_id, SCN &end_scn)
{
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(ls_id));
} else if (OB_FAIL(ls_handle.get_ls()->get_end_scn(end_scn))) {
}
}
return ret;
}
int SimpleServerHelper::wait_replay_advance(uint64_t tenant_id, ObLSID ls_id, SCN end_scn)
{
int ret = OB_SUCCESS;
bool advance = false;
while (OB_SUCC(ret) && !advance) {
MTL_SWITCH(tenant_id) {
SCN replayed_scn;
if (OB_FAIL(MTL(logservice::ObLogService*)->get_log_replay_service()->get_max_replayed_scn(ls_id, replayed_scn))) {
} else if (replayed_scn >= end_scn) {
advance = true;
} else {
ob_usleep(200 * 1000);
}
}
}
return ret;
}
int SimpleServerHelper::enable_wrs(uint64_t tenant_id, ObLSID ls_id, bool enable)
{
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
} else {
ls_handle.get_ls()->get_ls_wrs_handler()->is_enabled_ = enable;
}
}
return ret;
}
int SimpleServerHelper::wait_weak_read_ts_advance(uint64_t tenant_id, ObLSID ls_id1, ObLSID ls_id2)
{
int ret = OB_SUCCESS;
LOG_INFO("wait_weak_read_ts_advance", K(tenant_id), K(ls_id1), K(ls_id2));
bool advance = false;
SCN ts1,ts2;
while (OB_SUCC(ret) && !advance) {
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle1;
ObLSHandle ls_handle2;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id1, ls_handle1, ObLSGetMod::STORAGE_MOD))) {
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id2, ls_handle2, ObLSGetMod::STORAGE_MOD))) {
} else if (FALSE_IT(ts1 = ls_handle1.get_ls()->get_ls_wrs_handler()->ls_weak_read_ts_)) {
} else if (FALSE_IT(ts2 = ls_handle2.get_ls()->get_ls_wrs_handler()->ls_weak_read_ts_)) {
} else if (ts1 > ts2) {
advance = true;
} else {
ob_usleep(200 * 1000);
}
}
}
LOG_INFO("wait_weak_read_ts_advance finish", K(tenant_id), K(ls_id1), K(ts1), K(ls_id2), K(ts2));
return ret;
}
int SimpleServerHelper::modify_wrs(uint64_t tenant_id, ObLSID ls_id, int64_t add_ns)
{
LOG_INFO("modify_wrs", K(tenant_id), K(ls_id), K(add_ns));
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
} else {
SCN &wrs_scn = ls_handle.get_ls()->get_ls_wrs_handler()->ls_weak_read_ts_;
SCN old_scn = wrs_scn;
wrs_scn = SCN::plus(old_scn, add_ns);
LOG_INFO("modify_wrs finish", K(tenant_id), K(ls_id), K(add_ns), K(old_scn), K(wrs_scn));
}
}
return ret;
}
int SimpleServerHelper::ls_reboot(uint64_t tenant_id, ObLSID ls_id)
{
LOG_INFO("ls_reboot", K(tenant_id), K(ls_id));
int ret = OB_SUCCESS;
auto print_mgr_state = [](ObLS *ls) {
auto state = ls->ls_tx_svr_.mgr_->state_;
LOG_INFO("print ls ctx mgr state:", K(ls->get_ls_id()),
"ctx_mgr_state", state,
K(ObLSTxCtxMgr::State::state_str(state)));
};
auto func = [tenant_id, ls_id, print_mgr_state] () {
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
SCN end_scn;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(tenant_id), K(ls_id));
} else if (OB_FAIL(ls_handle.get_ls()->ls_tx_svr_.switch_to_follower_gracefully())) {
LOG_WARN("switch to follower failed", KR(ret));
} else if (OB_FAIL(ls_handle.get_ls()->get_end_scn(end_scn))) {
} else if (OB_FAIL(ls_handle.get_ls()->offline())) {
LOG_WARN("ls offline failed", KR(ret), K(tenant_id), K(ls_id));
} else if (FALSE_IT(print_mgr_state(ls_handle.get_ls()))) {
} else if (OB_FAIL(ls_handle.get_ls()->online())) {
LOG_WARN("ls online failed", KR(ret), K(tenant_id), K(ls_id));
} else if (OB_FAIL(wait_replay_advance(tenant_id, ls_id, end_scn))) {
LOG_WARN("wait replay advance failed", KR(ret), K(tenant_id), K(ls_id), K(end_scn));
}
LOG_INFO("ls_reboot", KR(ret), K(tenant_id), K(ls_id));
}
return ret;
};
for (int i = 0; i < 10; i++) {
if (OB_FAIL(func())) {
::sleep(2);
} else {
break;
}
}
LOG_INFO("ls_reboot finish", K(tenant_id), K(ls_id));
return ret;
}
int SimpleServerHelper::write(sqlclient::ObISQLConnection *conn, const char *sql)
{
int64_t affected_rows = 0;
return conn->execute_write(OB_SYS_TENANT_ID, sql, affected_rows);
}
int SimpleServerHelper::write(sqlclient::ObISQLConnection *conn, const char *sql, int64_t &affected_rows)
{
return conn->execute_write(OB_SYS_TENANT_ID, sql, affected_rows);
}
int InjectTxFaultHelper::submit_log(const char *buf, const int64_t size, const share::SCN &base_ts, ObTxBaseLogCb *cb, const bool need_nonblock)
{
int ret = OB_SUCCESS;
ObTxLogBlockHeader log_block_header;
ObSEArray<ObTxLogType, 1> log_list;
ObTxLogBlock log_block;
int64_t replay_hint = 0;
if (OB_ISNULL(mgr_)) {
ret = OB_ERR_UNEXPECTED;
} else if (OB_FAIL(log_block.init_with_header(buf, size, replay_hint, log_block_header))) {
LOG_WARN("log_block init failed", K(ret), KP(buf), K(size));
} else {
while (OB_SUCC(ret)) {
ObTxLogHeader header;
if (OB_FAIL(log_block.get_next_log(header))) {
if (OB_ITER_END == ret) {
ret = OB_SUCCESS;
break;
} else {
LOG_WARN("log_block get_next failed", K(ret), K(log_block_header));
}
} else if (OB_FAIL(log_list.push_back(header.get_tx_log_type()))) {
}
}
}
ObLSID ls_id;
if (OB_NOT_NULL(mgr_)) {
ls_id = mgr_->ls_id_;
}
LOG_INFO("submit_log", K(ret), K(log_block_header), K(log_list), K(ls_id));
ObTxLogType *inject_tx_log_type = nullptr;
if (FALSE_IT(inject_tx_log_type = tx_injects_.get(log_block_header.tx_id_))) {
} else if (OB_ISNULL(inject_tx_log_type)) {
} else if (*inject_tx_log_type == ObTxLogType::UNKNOWN) {
ret = OB_EAGAIN;
LOG_WARN("submit log tx inject fault", K(ret), K(log_block_header.tx_id_));
} else {
for (int i = 0; OB_SUCC(ret) && i < log_list.count(); i++) {
if (log_list.at(i) == *inject_tx_log_type) {
ret = OB_EAGAIN;
LOG_WARN("submit log tx inject fault", K(ret), K(log_block_header.tx_id_));
}
}
}
if (FAILEDx(mgr_->log_adapter_def_.submit_log(buf,
size,
base_ts,
cb,
need_nonblock))) {
}
return ret;
}
int InjectTxFaultHelper::inject_tx_block(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObTxLogType log_type)
{
LOG_INFO("inject_tx_block", K(tenant_id), K(ls_id), K(tx_id), K(log_type));
int ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
} else if (OB_FAIL(tx_injects_.set_refactored(tx_id, log_type))) {
} else if (OB_ISNULL(mgr_)) {
// replace log_adapter
ObLSTxCtxMgr *mgr = ls_handle.get_ls()->ls_tx_svr_.mgr_;
log_handler_ = mgr->log_adapter_def_.log_handler_;
dup_table_ls_handler_ = mgr->log_adapter_def_.dup_table_ls_handler_;
tx_table_ = mgr->log_adapter_def_.tx_table_;
mgr->tx_log_adapter_ = this;
mgr_ = mgr;
}
}
LOG_INFO("inject_tx_block finish", K(ret), K(tenant_id), K(ls_id), K(tx_id), K(log_type));
return ret;
}
void InjectTxFaultHelper::release()
{
if (OB_NOT_NULL(mgr_)) {
mgr_->tx_log_adapter_ = &mgr_->log_adapter_def_;
}
mgr_ = NULL;
tx_injects_.clear();
}
}

View File

@ -0,0 +1,99 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#pragma once
#include "observer/ob_server_struct.h"
#include "share/ob_ls_id.h"
#include "storage/tx/ob_tx_log_adapter.h"
#include "storage/tx/ob_trans_ctx_mgr_v4.h"
#define LOGI(format, ...) {time_t now=time(NULL);tm* local = localtime(&now);char buf[128] = {0};\
strftime(buf, 128,"%Y-%m-%d %H:%M:%S", local);printf("[%s] [INFO] [%s:%d] [%s] " format "\n",buf, __FILENAME__,__LINE__, __FUNCTION__,##__VA_ARGS__);}
#define LOGE(format, ...) {time_t now=time(NULL);tm* local = localtime(&now);char buf[128] = {0};\
strftime(buf, 128,"%Y-%m-%d %H:%M:%S", local);printf("[%s] [ERROR] [%s:%d] [%s] " format "\n",buf, __FILENAME__,__LINE__, __FUNCTION__,##__VA_ARGS__);}
namespace oceanbase
{
using namespace share;
using namespace transaction;
class SimpleServerHelper
{
public:
static int create_ls(uint64_t tenant_id, ObAddr add);
static int select_int64(common::ObMySQLProxy &sql_proxy, const char *sql, int64_t &val);
static int g_select_int64(uint64_t tenant_id, const char *sql, int64_t &val);
static int select_uint64(common::ObMySQLProxy &sql_proxy, const char *sql, uint64_t &val);
static int g_select_uint64(uint64_t tenant_id, const char *sql, uint64_t &val);
static int select_int64(sqlclient::ObISQLConnection *conn, const char *sql, int64_t &val);
static int select_varchar(sqlclient::ObISQLConnection *conn, const char *sql, ObString &val);
static int g_select_varchar(uint64_t tenant_id, const char *sql, ObString &val);
static int find_trace_id(sqlclient::ObISQLConnection *conn, ObString &trace_id);
static int find_request(uint64_t tenant_id, int64_t session_id,
int64_t &request_id,ObTransID &tx_id, ObString &trace_id, int64_t &retry_cnt);
static int select_table_loc(uint64_t tenant_id, const char* table_name, ObLSID &ls_id);
static int select_table_tablet(uint64_t tenant_id, const char* table_name, ObTabletID &tablet_id);
static int do_balance(uint64_t tenant_id);
static int remove_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id);
static int abort_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id);
static int submit_redo(uint64_t tenant_id, ObLSID ls_id);
static int find_session(sqlclient::ObISQLConnection *conn, int64_t &session_id);
static int find_tx(sqlclient::ObISQLConnection *conn, ObTransID &tx_id);
static int ls_resume(uint64_t tenant_id, ObLSID ls_id);
static int ls_reboot(uint64_t tenant_id, ObLSID ls_id);
static int freeze(uint64_t tenant_id, ObLSID ls_id, ObTabletID tablet_id);
static int find_tx_info(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObPartTransCtx &ctx_info);
static int get_ls_end_scn(uint64_t tenant_id, ObLSID ls_id, SCN &end_scn);
static int wait_replay_advance(uint64_t tenant_id, ObLSID ls_id, SCN end_scn);
static int wait_checkpoint_newest(uint64_t tenant_id, ObLSID ls_id);
static int wait_tx(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObTxState tx_state);
static int wait_tx_exit(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id);
static int wait_flush_finish(uint64_t tenant_id, ObLSID ls_id, ObTabletID tablet_id);
static int write(sqlclient::ObISQLConnection *conn, const char *sql);
static int write(sqlclient::ObISQLConnection *conn, const char *sql, int64_t &affected_rows);
static int wait_weak_read_ts_advance(uint64_t tenant_id, ObLSID ls_id1, ObLSID ls_id2);
static int enable_wrs(uint64_t tenant_id, ObLSID ls_id, bool enable);
static int modify_wrs(uint64_t tenant_id, ObLSID ls_id, int64_t add_ns = 10 * 1000 * 1000 * 1000L);
};
class InjectTxFaultHelper : public transaction::ObLSTxLogAdapter
{
public:
InjectTxFaultHelper() : mgr_(NULL) {
tx_injects_.create(1024, "tx_inject");
}
~InjectTxFaultHelper() {
release();
}
void release();
int inject_tx_block(uint64_t tenant_id, ObLSID ls_id, ObTransID tx_id, ObTxLogType log_type);
virtual int submit_log(const char *buf,
const int64_t size,
const share::SCN &base_ts,
ObTxBaseLogCb *cb,
const bool need_nonblock) override;
private:
transaction::ObLSTxCtxMgr *mgr_;
hash::ObHashMap<ObTransID, ObTxLogType> tx_injects_;
};
#define SSH SimpleServerHelper
}

View File

@ -31,7 +31,7 @@ class TestRunCtx
{
public:
uint64_t tenant_id_ = 0;
int time_sec_ = 0;
int64_t time_sec_ = 0;
};
TestRunCtx RunCtx;
@ -125,8 +125,8 @@ TEST_F(ObSimpleClusterExampleTest, end)
int main(int argc, char **argv)
{
int c = 0;
int time_sec = 0;
int64_t c = 0;
int64_t time_sec = 0;
char *log_level = (char*)"INFO";
while(EOF != (c = getopt(argc,argv,"t:l:"))) {
switch(c) {

File diff suppressed because it is too large Load Diff

View File

@ -162,6 +162,8 @@ void oceanbase::observer::init_srv_xlator_for_migration(ObSrvRpcXlator *xlator)
RPC_PROCESSOR(ObFetchLSReplayScnP);
RPC_PROCESSOR(ObCheckTransferTabletsBackfillP);
RPC_PROCESSOR(ObStorageGetConfigVersionAndTransferScnP);
RPC_PROCESSOR(ObStorageSubmitTxLogP, gctx_.bandwidth_throttle_);
RPC_PROCESSOR(ObStorageGetTransferDestPrepareSCNP, gctx_.bandwidth_throttle_);
RPC_PROCESSOR(ObStorageLockConfigChangeP, gctx_.bandwidth_throttle_);
RPC_PROCESSOR(ObStorageUnlockConfigChangeP, gctx_.bandwidth_throttle_);
RPC_PROCESSOR(ObStorageGetLogConfigStatP, gctx_.bandwidth_throttle_);

View File

@ -287,6 +287,18 @@ int ObGVTxStat::inner_get_next_row(ObNewRow *&row)
cur_row_.cells_[i].set_int(-1);
}
break;
case START_SCN:
cur_row_.cells_[i].set_uint64(tx_stat.start_scn_.get_val_for_inner_table_field());
break;
case END_SCN:
cur_row_.cells_[i].set_uint64(tx_stat.end_scn_.get_val_for_inner_table_field());
break;
case REC_SCN:
cur_row_.cells_[i].set_uint64(tx_stat.rec_scn_.get_val_for_inner_table_field());
break;
case TRANSFER_BLOCKING:
cur_row_.cells_[i].set_bool(tx_stat.transfer_blocking_);
break;
default:
ret = OB_ERR_UNEXPECTED;
SERVER_LOG(WARN, "invalid coloum_id", K(ret), K(col_id));

View File

@ -85,6 +85,10 @@ private:
GTRID,
BQUAL,
FORMAT_ID,
START_SCN,
END_SCN,
REC_SCN,
TRANSFER_BLOCKING,
};
static const int64_t OB_MAX_BUFFER_SIZE = 1024;

View File

@ -8791,6 +8791,66 @@ int ObInnerTableSchema::all_virtual_trans_stat_schema(ObTableSchema &table_schem
format_id_default,
format_id_default); //default_value
}
if (OB_SUCC(ret)) {
ADD_COLUMN_SCHEMA("start_scn", //column_name
++column_id, //column_id
0, //rowkey_id
0, //index_id
0, //part_key_pos
ObUInt64Type, //column_type
CS_TYPE_INVALID, //column_collation_type
sizeof(uint64_t), //column_length
-1, //column_precision
-1, //column_scale
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
ADD_COLUMN_SCHEMA("end_scn", //column_name
++column_id, //column_id
0, //rowkey_id
0, //index_id
0, //part_key_pos
ObUInt64Type, //column_type
CS_TYPE_INVALID, //column_collation_type
sizeof(uint64_t), //column_length
-1, //column_precision
-1, //column_scale
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
ADD_COLUMN_SCHEMA("rec_scn", //column_name
++column_id, //column_id
0, //rowkey_id
0, //index_id
0, //part_key_pos
ObUInt64Type, //column_type
CS_TYPE_INVALID, //column_collation_type
sizeof(uint64_t), //column_length
-1, //column_precision
-1, //column_scale
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
ADD_COLUMN_SCHEMA("transfer_blocking", //column_name
++column_id, //column_id
0, //rowkey_id
0, //index_id
0, //part_key_pos
ObTinyIntType, //column_type
CS_TYPE_INVALID, //column_collation_type
1, //column_length
-1, //column_precision
-1, //column_scale
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
table_schema.get_part_option().set_part_num(1);
table_schema.set_part_level(PARTITION_LEVEL_ONE);

View File

@ -2212,6 +2212,66 @@ int ObInnerTableSchema::all_virtual_trans_stat_ora_schema(ObTableSchema &table_s
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
ADD_COLUMN_SCHEMA("START_SCN", //column_name
++column_id, //column_id
0, //rowkey_id
0, //index_id
0, //part_key_pos
ObNumberType, //column_type
CS_TYPE_INVALID, //column_collation_type
38, //column_length
38, //column_precision
0, //column_scale
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
ADD_COLUMN_SCHEMA("END_SCN", //column_name
++column_id, //column_id
0, //rowkey_id
0, //index_id
0, //part_key_pos
ObNumberType, //column_type
CS_TYPE_INVALID, //column_collation_type
38, //column_length
38, //column_precision
0, //column_scale
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
ADD_COLUMN_SCHEMA("REC_SCN", //column_name
++column_id, //column_id
0, //rowkey_id
0, //index_id
0, //part_key_pos
ObNumberType, //column_type
CS_TYPE_INVALID, //column_collation_type
38, //column_length
38, //column_precision
0, //column_scale
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
ADD_COLUMN_SCHEMA("TRANSFER_BLOCKING", //column_name
++column_id, //column_id
0, //rowkey_id
0, //index_id
0, //part_key_pos
ObNumberType, //column_type
CS_TYPE_INVALID, //column_collation_type
38, //column_length
38, //column_precision
0, //column_scale
false, //is_nullable
false); //is_autoincrement
}
if (OB_SUCC(ret)) {
table_schema.get_part_option().set_part_num(1);
table_schema.set_part_level(PARTITION_LEVEL_ONE);

View File

@ -7479,6 +7479,10 @@ def_table_schema(
('gtrid', 'varbinary:128'),
('bqual', 'varbinary:128'),
('format_id', 'int', 'false', '1'),
('start_scn', 'uint'),
('end_scn', 'uint'),
('rec_scn', 'uint'),
('transfer_blocking', 'bool'),
],
partition_columns = ['svr_ip', 'svr_port'],
vtable_route_policy = 'distributed',

View File

@ -269,6 +269,7 @@ ob_set_subtarget(ob_storage tablet
tablet/ob_tablet.cpp
tablet/ob_tablet_block_header.cpp
tablet/ob_tablet_medium_info_reader.cpp
tablet/ob_tablet_transfer_tx_ctx.cpp
tablet/ob_tablet_space_usage.cpp
tablet/ob_tablet_block_aggregated_info.cpp
tablet/ob_tablet_macro_info_iterator.cpp
@ -429,6 +430,7 @@ ob_set_subtarget(ob_storage ls
ls/ob_ls_saved_info.cpp
ls/ob_ls_reserved_snapshot_mgr.cpp
ls/ob_ls_storage_clog_handler.cpp
ls/ob_ls_transfer_status.cpp
)
ob_set_subtarget(ob_storage column_store

View File

@ -1461,19 +1461,14 @@ int ObMultiVersionMicroBlockRowScanner::inner_inner_get_next_row(
}
} else {
ObMultiVersionRowFlag flag;
int64_t trans_version = 0;
const ObRowHeader *row_header = nullptr;
int64_t trans_version = 0;
int64_t sql_sequence = 0;
bool can_read = true;
bool is_determined_state = false;
bool read_uncommitted_row = false;
bool is_ghost_row_flag = false;
const int64_t snapshot_version = context_->trans_version_range_.snapshot_version_;
memtable::ObMvccAccessCtx &acc_ctx = context_->store_ctx_->mvcc_acc_ctx_;
if (OB_UNLIKELY(context_->query_flag_.is_ignore_trans_stat())) {
version_fit = true;
} else if (OB_FAIL(reader_->get_multi_version_info(
if (OB_FAIL(reader_->get_multi_version_info(
current_,
read_info_->get_schema_rowkey_count(),
row_header,
@ -1485,28 +1480,72 @@ int ObMultiVersionMicroBlockRowScanner::inner_inner_get_next_row(
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("row header is null", K(ret));
} else if (FALSE_IT(flag = row_header->get_row_multi_version_flag())) {
} else if (flag.is_uncommitted_row()) {
have_uncommited_row = true; // TODO @lvling check transaction status instead
compaction::ObMergeCachedTransState trans_state;
transaction::ObTxSEQ tx_sequence = transaction::ObTxSEQ::cast_from_int(sql_sequence);
if (OB_NOT_NULL(context_->trans_state_mgr_) &&
OB_SUCCESS == context_->trans_state_mgr_->get_trans_state(
transaction::ObTransID(row_header->get_trans_id()), tx_sequence, trans_state)) {
can_read = trans_state.can_read_;
trans_version = trans_state.trans_version_;
is_determined_state = trans_state.is_determined_state_;
} else {
transaction::ObLockForReadArg lock_for_read_arg(acc_ctx,
transaction::ObTransID(row_header->get_trans_id()),
tx_sequence,
context_->query_flag_.read_latest_,
sstable_->get_end_scn());
} else if (OB_FAIL(ObGhostRowUtil::is_ghost_row(flag, is_ghost_row_flag))) {
LOG_WARN("fail to check ghost row", K(ret), K_(current), KPC(row_header),
K(trans_version), K(sql_sequence), K_(macro_id));
} else {
is_last_multi_version_row_ = flag.is_last_multi_version_row();
final_result = is_last_multi_version_row_;
if (OB_FAIL(lock_for_read(lock_for_read_arg,
can_read,
trans_version,
is_determined_state))) {
STORAGE_LOG(WARN, "fail to check transaction status", K(ret), KPC(row_header), K_(macro_id));
if (OB_UNLIKELY(is_ghost_row_flag)) {
// Case1: Data is ghost row, and it means no valid value for the row, so
// we can skip it
version_fit = false;
LOG_DEBUG("is ghost row", K(ret), K(current_), K(flag));
} else if (flag.is_uncommitted_row()) {
have_uncommited_row = true; // TODO @lvling check transaction status instead
transaction::ObTxSEQ tx_sequence = transaction::ObTxSEQ::cast_from_int(sql_sequence);
// Case2: Data is uncommitted, so we use the txn state cache or txn
// table to decide whether uncommitted txns are readable
compaction::ObMergeCachedTransState trans_state;
if (OB_NOT_NULL(context_->trans_state_mgr_) &&
OB_SUCCESS == context_->trans_state_mgr_->get_trans_state(
transaction::ObTransID(row_header->get_trans_id()), tx_sequence, trans_state)) {
version_fit = trans_state.can_read_;
trans_version = trans_state.trans_version_;
if (transaction::is_effective_trans_version(trans_version)
&& trans_version <= version_range_.base_version_) {
version_fit = false;
// filter multi version row whose trans version is smaller than base_version
final_result = true;
}
} else {
transaction::ObLockForReadArg lock_for_read_arg(
acc_ctx,
transaction::ObTransID(row_header->get_trans_id()),
tx_sequence,
context_->query_flag_.read_latest_,
context_->query_flag_.iter_uncommitted_row(),
// TODO(handora.qc): remove it in the future
sstable_->get_end_scn());
if (OB_FAIL(lock_for_read(lock_for_read_arg,
version_fit,
trans_version))) {
STORAGE_LOG(WARN, "fail to check transaction status",
K(ret), KPC(row_header), K_(macro_id));
} else if (transaction::is_effective_trans_version(trans_version)
&& trans_version <= version_range_.base_version_) {
version_fit = false;
// filter multi version row whose trans version is smaller than base_version
final_result = true;
}
}
} else {
// Case3: Data is committed, so we use the version on the data to decide
// whether uncommitted txns are readable
if (context_->query_flag_.iter_uncommitted_row()) {
version_fit = true;
} else if (trans_version <= version_range_.base_version_) {
// filter multi version row whose trans version is smaller than base_version
version_fit = false;
final_result = true;
} else if (trans_version > snapshot_version) {
// filter multi version row whose trans version is larger than snapshot_version
version_fit = false;
} else {
version_fit = true;
}
}
}
@ -1543,43 +1582,6 @@ int ObMultiVersionMicroBlockRowScanner::inner_inner_get_next_row(
}
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(ObGhostRowUtil::is_ghost_row(flag, is_ghost_row_flag))) {
LOG_WARN("fail to check ghost row", K(ret), K_(current), KPC(row_header),
K(trans_version), K(sql_sequence), K_(macro_id));
} else if (OB_UNLIKELY(is_ghost_row_flag)) {
can_read = false;
is_determined_state = true;
LOG_DEBUG("is ghost row", K(ret), K(current_), K(flag));
}
if (OB_SUCC(ret)) {
is_last_multi_version_row_ = flag.is_last_multi_version_row();
final_result = is_last_multi_version_row_;
if (OB_UNLIKELY(context_->query_flag_.is_ignore_trans_stat())) {
// do nothing
} else if (!can_read) {
if (!is_determined_state && context_->query_flag_.iter_uncommitted_row()) { // for mark deletion
version_fit = true;
read_uncommitted_row = true;
} else {
version_fit = false;
}
} else if (!flag.is_uncommitted_row() || is_determined_state) { // committed
if (trans_version <= version_range_.base_version_) {
version_fit = false;
// filter multi version row whose trans version is smaller than base_version
final_result = true;
} else if (trans_version > snapshot_version) { // filter multi version row whose trans version is larger than snapshot_version
version_fit = false;
} else {
version_fit = true;
}
} else {
// read rows in current transaction
version_fit = true;
}
}
if (OB_SUCC(ret)) {
if (version_fit) {
ObDatumRow *row = nullptr;
@ -1602,7 +1604,8 @@ int ObMultiVersionMicroBlockRowScanner::inner_inner_get_next_row(
}
}
if (OB_SUCC(ret) && version_fit) {
if (OB_INVALID_INDEX != read_info_->get_trans_col_index() && is_determined_state) {
if (OB_INVALID_INDEX != read_info_->get_trans_col_index()
&& transaction::is_effective_trans_version(trans_version)) {
// only uncommitted row need to be set, committed row set in row reader
int64_t trans_idx = read_info_->get_trans_col_index();
if (OB_UNLIKELY(trans_idx >= row->count_ || 0 >= trans_version)) {
@ -1614,7 +1617,8 @@ int ObMultiVersionMicroBlockRowScanner::inner_inner_get_next_row(
}
}
if (OB_SUCC(ret)) {
if (!row->mvcc_row_flag_.is_uncommitted_row() || is_determined_state) {
if (!row->mvcc_row_flag_.is_uncommitted_row()
|| transaction::is_effective_trans_version(trans_version)) {
row->snapshot_version_ = 0;
row->trans_id_.reset();
} else { // uncommitted row
@ -1762,23 +1766,23 @@ int ObMultiVersionMicroBlockRowScanner::do_compact(
int ObMultiVersionMicroBlockRowScanner::lock_for_read(
const transaction::ObLockForReadArg &lock_for_read_arg,
bool &can_read,
int64_t &trans_version,
bool &is_determined_state)
int64_t &trans_version)
{
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
SCN scn_trans_version = SCN::invalid_scn();
auto &tx_table_guards = context_->store_ctx_->mvcc_acc_ctx_.get_tx_table_guards();
if (OB_FAIL(tx_table_guards.lock_for_read(lock_for_read_arg, can_read, scn_trans_version,
is_determined_state))) {
if (OB_FAIL(tx_table_guards.lock_for_read(lock_for_read_arg,
can_read,
scn_trans_version))) {
LOG_WARN("failed to check transaction status", K(ret));
} else {
trans_version = scn_trans_version.get_val_for_tx();
if (OB_NOT_NULL(context_->trans_state_mgr_) &&
OB_TMP_FAIL(context_->trans_state_mgr_->add_trans_state(
lock_for_read_arg.data_trans_id_, lock_for_read_arg.data_sql_sequence_,
trans_version, ObTxData::MAX_STATE_CNT, can_read, is_determined_state))) {
trans_version, ObTxData::MAX_STATE_CNT, can_read))) {
LOG_WARN("failed to add trans state to cache", K(tmp_ret),
"trans_id", lock_for_read_arg.data_trans_id_,
"sql_seq", lock_for_read_arg.data_sql_sequence_);
@ -2693,7 +2697,7 @@ int ObMultiVersionMicroBlockMinorMergeRowScanner::check_curr_row_can_read(
LOG_WARN("check sql sequence can read failed", K(ret), K(can_read), K(trans_id), K(sql_seq));
} else if (OB_NOT_NULL(context_->trans_state_mgr_) &&
OB_TMP_FAIL(context_->trans_state_mgr_->add_trans_state(trans_id, sql_seq,
committed_trans_version_, last_trans_state_, can_read, 0))) {
committed_trans_version_, last_trans_state_, can_read))) {
LOG_WARN("failed to add minor trans state", K(tmp_ret), K(trans_id), K(sql_seq), K(can_read));
}
}

View File

@ -298,8 +298,7 @@ private:
int lock_for_read(
const transaction::ObLockForReadArg &lock_for_read_arg,
bool &can_read,
int64_t &trans_version,
bool &is_determined_state);
int64_t &trans_version);
// The store_rowkey is a decoration of the ObObj pointer,
// and it will be destroyed when the life cycle of the rowkey_helper is end.
// So we have to send it into the function to avoid this situation.

View File

@ -84,12 +84,11 @@ int ObCachedTransStateMgr::add_trans_state(
const transaction::ObTxSEQ &sql_seq,
const int64_t commited_trans_version,
const int32_t trans_state,
const int16_t can_read,
const int16_t is_determined_state)
const int16_t can_read)
{
int ret = OB_SUCCESS;
ObMergeCachedTransKey key(trans_id, sql_seq);
ObMergeCachedTransState status(trans_id, sql_seq, commited_trans_version, trans_state, can_read, is_determined_state);
ObMergeCachedTransState status(trans_id, sql_seq, commited_trans_version, trans_state, can_read);
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ObCachedTransStateMgr is not initialized", K(ret));

View File

@ -67,29 +67,26 @@ struct ObMergeCachedTransState {
: key_(),
trans_version_(INVALID_TRANS_VERSION),
trans_state_(INT32_MAX),
can_read_(INVALID_BOOL_VALUE),
is_determined_state_(INVALID_BOOL_VALUE)
can_read_(INVALID_BOOL_VALUE)
{}
ObMergeCachedTransState(
transaction::ObTransID trans_id,
transaction::ObTxSEQ sql_sequence,
int64_t trans_version,
int32_t trans_state,
int16_t can_read,
int16_t is_determined_state)
int16_t can_read)
: key_(trans_id, sql_sequence),
trans_version_(trans_version),
trans_state_(trans_state),
can_read_(can_read),
is_determined_state_(is_determined_state)
can_read_(can_read)
{}
virtual ~ObMergeCachedTransState() {}
inline bool is_valid() const
{
return key_.is_valid() && INVALID_TRANS_VERSION != trans_version_ && INT32_MAX != trans_state_ &&
INVALID_BOOL_VALUE != can_read_ && INVALID_BOOL_VALUE != is_determined_state_;
INVALID_BOOL_VALUE != can_read_;
}
TO_STRING_KV(K_(key), K_(trans_state), K_(trans_version), K_(can_read), K_(is_determined_state));
TO_STRING_KV(K_(key), K_(trans_state), K_(trans_version), K_(can_read));
static const int16_t INVALID_BOOL_VALUE = -1;
static const int64_t INVALID_TRANS_VERSION = -1;
@ -97,7 +94,6 @@ struct ObMergeCachedTransState {
int64_t trans_version_;
int32_t trans_state_;
int16_t can_read_; // 0 false; 1 true
int16_t is_determined_state_; // 0 false; 1 true
};
class ObCachedTransStateMgr {
@ -118,8 +114,7 @@ public:
const transaction::ObTxSEQ &sql_seq,
const int64_t trans_version,
const int32_t trans_state,
const int16_t can_read,
const int16_t is_determined_state);
const int16_t can_read);
private:
bool is_inited_;
int64_t max_cnt_;

View File

@ -24,6 +24,7 @@
#include "share/ob_debug_sync_point.h"
#include "lib/utility/ob_tracepoint.h"
#include "storage/tablet/ob_tablet.h"
#include "storage/high_availability/ob_transfer_handler.h"
namespace oceanbase
{
@ -1588,10 +1589,14 @@ int ObTransferReplaceTableTask::check_src_tablet_sstables_(
} else {
sstable = static_cast<ObSSTable *>(table);
if (sstable->contain_uncommitted_row()) {
if (table->get_end_scn() >= ctx_->backfill_scn_) {
if (sstable->get_filled_tx_scn() > ctx_->backfill_scn_) {
ret = OB_TRANSFER_SYS_ERROR;
LOG_ERROR("src minor still has uncommitted row, unexpected", K(ret), KPC(sstable), KPC(ctx_));
LOG_ERROR("src sstable filled_tx_scn bigger than transfer_scn, unexpected", K(ret), KPC(sstable), KPC(ctx_),
K(sstable->get_filled_tx_scn()), K(ctx_->backfill_scn_));
} else if (sstable->get_filled_tx_scn() == ctx_->backfill_scn_) {
LOG_INFO("src minor has backfill to transfer_scn, when new transfer active tx has move to dest_ls", KPC(sstable), KPC(ctx_));
} else {
// filled_tx_scn < transfer_scn
ret = OB_EAGAIN;
LOG_WARN("sstable has not yet backfilled transactions", K(ret), KPC(sstable), KPC(ctx_));
}

View File

@ -28,9 +28,11 @@
#include "storage/compaction/ob_tenant_tablet_scheduler.h"
#include "ob_rebuild_service.h"
#include "storage/tablet/ob_tablet.h"
#include "storage/tx/wrs/ob_weak_read_util.h"
using namespace oceanbase::transaction;
using namespace oceanbase::share;
using namespace oceanbase::compaction;
namespace oceanbase
{
@ -415,7 +417,10 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
const int64_t start_ts = ObTimeUtil::current_time();
LOG_INFO("[TRANSFER] start do with start status", K(task_info));
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
// TODO lana compatible
bool new_transfer = true;
LOG_INFO("[TRANSFER] start do with start status", K(task_info), K(new_transfer));
ObTimeoutCtx timeout_ctx;
ObMySQLTransaction trans;
@ -424,11 +429,14 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta
palf::LogConfigVersion config_version;
bool is_leader = true;
bool succ_block_tx = false;
int64_t tablet_stop_begin = 0;
bool commit_succ = false;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("transfer handler do not init", K(ret));
} else if (OB_FAIL(enable_new_transfer(new_transfer))) {
LOG_WARN("fail to fetch new transfer", K(ret));
} else if (!task_info.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("do with start status get invalid argument", K(ret), K(task_info));
@ -461,6 +469,7 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta
if (tenant_config.is_valid()) {
enable_kill_trx = tenant_config->_enable_balance_kill_transaction;
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(lock_src_and_dest_ls_member_list_(task_info, task_info.src_ls_id_, task_info.dest_ls_id_))) {
LOG_WARN("failed to lock src and dest ls member list", K(ret), K(task_info));
@ -480,17 +489,22 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta
LOG_WARN("failed to stop tablets schedule medium", K(ret), K(task_info));
} else if (OB_FAIL(check_start_status_transfer_tablets_(task_info))) {
LOG_WARN("failed to check start status transfer tablets", K(ret), K(task_info));
} else if (!enable_kill_trx && OB_FAIL(check_src_ls_has_active_trans_(task_info.src_ls_id_))) {
} else if (!new_transfer && !enable_kill_trx && OB_FAIL(check_src_ls_has_active_trans_(task_info.src_ls_id_))) {
LOG_WARN("failed to check src ls active trans", K(ret), K(task_info));
} else if (OB_FAIL(update_all_tablet_to_ls_(task_info, trans))) {
LOG_WARN("failed to update all tablet to ls", K(ret), K(task_info));
} else if (OB_FAIL(lock_tablet_on_dest_ls_for_table_lock_(task_info, trans))) {
LOG_WARN("failed to lock tablet on dest ls for table lock", KR(ret), K(task_info));
} else if (OB_FAIL(block_and_kill_tx_(task_info, enable_kill_trx, timeout_ctx, succ_block_tx))) {
} else if (!new_transfer && OB_FAIL(block_and_kill_tx_(task_info, enable_kill_trx, timeout_ctx, succ_block_tx))) {
LOG_WARN("failed to block and kill tx", K(ret), K(task_info));
} else if (new_transfer && OB_FAIL(do_trans_transfer_start_prepare_(task_info, timeout_ctx, trans))) {
LOG_WARN("failed to do trans transfer start prepare", K(ret), K(task_info));
} else if (OB_FAIL(reset_timeout_for_trans_(timeout_ctx))) {
LOG_WARN("failed to reset timeout for trans", K(ret));
} else if (OB_FAIL(do_trans_transfer_start_(task_info, config_version, timeout_ctx, trans))) {
} else if (!new_transfer && OB_FAIL(do_trans_transfer_start_(task_info, config_version, timeout_ctx, trans))) {
LOG_WARN("failed to do trans transfer start", K(ret), K(task_info));
} else if (new_transfer && FALSE_IT(tablet_stop_begin = ObTimeUtil::current_time())) {
} else if (new_transfer && OB_FAIL(do_trans_transfer_start_v2_(task_info, timeout_ctx, trans))) {
LOG_WARN("failed to do trans transfer start", K(ret), K(task_info));
} else {
#ifdef ERRSIM
@ -504,6 +518,7 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta
DEBUG_SYNC(BEFORE_TRANSFER_START_COMMIT);
}
int64_t trans_commit_begin = ObTimeUtil::current_time();
commit_succ = OB_SUCC(ret);
if (OB_TMP_FAIL(commit_trans_(ret, trans))) {
LOG_WARN("failed to commit trans", K(tmp_ret), K(ret));
@ -512,6 +527,13 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta
}
commit_succ = false;
}
int64_t trans_commit_end = ObTimeUtil::current_time();
if (new_transfer) {
// tablet write stop from transfer_out_prepare to trans end
LOG_INFO("[TRANSFER] transfer start trans commit", KR(ret), "transfer_start_trans_cost", trans_commit_end - tablet_stop_begin,
"trans_process", trans_commit_begin - tablet_stop_begin,
"trans_commit", trans_commit_end - trans_commit_begin);
}
clear_prohibit_(task_info, tablet_ids, succ_block_tx, succ_stop_medium);
}
@ -521,7 +543,9 @@ int ObTransferHandler::do_with_start_status_(const share::ObTransferTaskInfo &ta
if (!is_leader) {
} else if (can_retry_(task_info, ret)) {
LOG_INFO("transfer task can retry", K(ret), K(task_info));
if (OB_TMP_FAIL(unlock_src_and_dest_ls_member_list_(task_info))) {
if (!new_transfer && OB_TMP_FAIL(unblock_tx_(task_info.tenant_id_, task_info.src_ls_id_, gts_seq_))) {
LOG_WARN("failed to unblock tx", K(ret));
} else if (OB_TMP_FAIL(unlock_src_and_dest_ls_member_list_(task_info))) {
LOG_WARN("failed to unlock src and dest ls member list", K(tmp_ret), K(ret), K(task_info));
}
ob_usleep(INTERVAL_US);
@ -943,7 +967,7 @@ int ObTransferHandler::do_trans_transfer_start_(
} else if (!task_info.is_valid() || !config_version.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("do trans transfer start get invalid argument", K(ret), K(task_info), K(config_version));
} else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans))) {
} else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans, transaction::ObTxDataSourceType::START_TRANSFER_OUT))) {
LOG_WARN("failed to do tx start transfer out", K(ret), K(task_info));
} else if (OB_FAIL(check_config_version_(config_version))) {
LOG_WARN("failed to check config version", K(ret), K(task_info));
@ -967,6 +991,197 @@ int ObTransferHandler::do_trans_transfer_start_(
return ret;
}
int ObTransferHandler::do_trans_transfer_start_prepare_(
const share::ObTransferTaskInfo &task_info,
ObTimeoutCtx &timeout_ctx,
ObMySQLTransaction &trans)
{
int ret = OB_SUCCESS;
ObLSHandle src_ls_handle;
ObTransID failed_tx_id;
ObStorageHASrcInfo addr_info;
addr_info.cluster_id_ = GCONF.cluster_id;
ObAddr dest_ls_leader;
SCN data_scn;
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("transfer handler do not init", K(ret));
} else if (!task_info.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("do trans transfer start get invalid argument", K(ret), K(task_info));
} else if (OB_FAIL(get_ls_leader_(task_info.dest_ls_id_, dest_ls_leader))) {
LOG_WARN("failed to get dest ls leader", K(ret), K(task_info));
} else if (task_info.tenant_id_ != MTL_ID()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tenant not match", K(ret), K(task_info), K(MTL_ID()));
} else if (OB_FAIL(do_trans_transfer_dest_prepare_(task_info, trans))) {
LOG_WARN("failed to do transfer dest prepare", K(ret), K(task_info));
} else if (FALSE_IT(addr_info.src_addr_ = dest_ls_leader)) {
// submit active tx redo log before block tablet write to optimise system interrupt time
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(task_info.src_ls_id_, src_ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("failed to get ls", K(ret), K(task_info));
} else if (OB_FAIL(src_ls_handle.get_ls()->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) {
LOG_WARN("failed to submit tx log", K(ret), K(task_info));
// submit dest_ls active tx redo log
} else if (OB_FAIL(storage_rpc_->submit_tx_log(task_info.tenant_id_, addr_info, task_info.dest_ls_id_, data_scn))) {
LOG_WARN("failed to submit tx log", K(ret), K(task_info));
} else if (OB_FAIL(wait_src_ls_advance_weak_read_ts_(task_info, timeout_ctx))) {
LOG_WARN("failed to wait src_ls advance weak_read_ts", K(ret), K(task_info));
}
return ret;
}
int ObTransferHandler::wait_tablet_write_end_(
const share::ObTransferTaskInfo &task_info,
SCN &data_end_scn,
ObTimeoutCtx &timeout_ctx)
{
int ret = OB_SUCCESS;
const uint64_t tenant_id = task_info.tenant_id_;
const share::ObLSID &src_ls_id = task_info.src_ls_id_;
ObLSHandle ls_handle;
ObLSService *ls_srv = NULL;
ObLS *ls = NULL;
logservice::ObLogService *log_service = nullptr;
ObRole role;
int64_t proposal_id = 0;
SCN scn;
if (OB_ISNULL(ls_srv = MTL(ObLSService*))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls srv should not be NULL", K(ret), KP(ls_srv));
} else if (OB_FAIL(ls_srv->get_ls(src_ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("ls_srv->get_ls() fail", K(ret), K(src_ls_id));
} else if (OB_ISNULL(ls = ls_handle.get_ls())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls is NULL", KR(ret), K(ls_handle));
} else {
ObSEArray<ObTabletID, 1> tablet_list;
for (int64_t idx = 0; OB_SUCC(ret) && idx < task_info.tablet_list_.count(); idx++) {
if (OB_FAIL(tablet_list.push_back(task_info.tablet_list_.at(idx).tablet_id()))) {
LOG_WARN("push tablet to array failed", KR(ret));
}
}
// wait tablet all operation stop
// data memtable write end
// table lock operation end
ObTransID failed_tx_id;
bool has_active_memtable = false;
if (OB_FAIL(ret)) {
} else if (OB_FAIL(ls->get_lock_table()->enable_check_tablet_status(true))) {
LOG_WARN("failed to enable check tablet status", KR(ret), K(task_info));
} else if (OB_FAIL(ls->wait_tx_write_end(timeout_ctx))) {
LOG_WARN("failed to wait tx_write end", KR(ret), K(task_info));
} else if (OB_FAIL(ls->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) {
LOG_WARN("failed to submit tx log", KR(ret), K(task_info));
} else if (OB_FAIL(ls->batch_tablet_freeze(tablet_list, true))) {
LOG_WARN("batch tablet freeze failed", KR(ret), KPC(ls), K(task_info));
} else if (OB_FAIL(ls->check_tablet_no_active_memtable(tablet_list, has_active_memtable))) {
LOG_WARN("check tablet has active memtable failed", KR(ret), KPC(ls), K(task_info));
} else if (has_active_memtable) {
ret = OB_EAGAIN;
LOG_WARN("tablet has active memtable need retry", KR(ret), K(tablet_list));
} else if (OB_FAIL(ls->get_log_handler()->get_max_scn(scn))) {
LOG_WARN("log_handler get_max_scn failed", KR(ret), K(task_info));
} else {
data_end_scn = scn;
LOG_INFO("success to wait tablet write end", KR(ret), K(task_info));
}
}
return ret;
}
int ObTransferHandler::do_trans_transfer_start_v2_(
const share::ObTransferTaskInfo &task_info,
ObTimeoutCtx &timeout_ctx,
ObMySQLTransaction &trans)
{
LOG_INFO("[TRANSFER] start do trans transfer start v2", K(task_info));
int ret = OB_SUCCESS;
SCN start_scn;
ObArray<ObMigrationTabletParam> tablet_meta_list;
const share::ObTransferStatus next_status(ObTransferStatus::DOING);
ObAddr src_ls_leader;
ObStorageHASrcInfo src_info;
src_info.cluster_id_ = GCONF.cluster_id;
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
SCN data_end_scn;
int64_t move_tx_count = 0;
int64_t start_time = ObTimeUtil::current_time();
int64_t transfer_out_prepare_cost = 0;
int64_t wait_tablet_write_end_cost = 0;
int64_t transfer_out_cost = 0;
int64_t wait_src_replay_cost = 0;
int64_t get_transfer_out_scn_cost = 0;
int64_t get_tablets_meta_cost = 0;
int64_t move_tx_cost = 0;
int64_t transfer_in_cost = 0;
int64_t now_time = ObTimeUtil::current_time();
int64_t step_time = now_time;
#define STEP_COST_AND_CHECK_TIMEOUT(cost) FALSE_IT(now_time = ObTimeUtil::current_time()) || \
FALSE_IT(cost = now_time - step_time) || \
FALSE_IT(step_time = now_time) || \
(timeout_ctx.is_timeouted() && !FALSE_IT(ret = OB_TIMEOUT))
if (!is_inited_) {
ret = OB_NOT_INIT;
LOG_WARN("transfer handler do not init", K(ret));
} else if (!task_info.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("do trans transfer start get invalid argument", K(ret), K(task_info));
// for transfer support move active tx, we use this config as tablet write blocked timeout
} else if (OB_FAIL(get_ls_leader_(task_info.src_ls_id_, src_ls_leader))) {
LOG_WARN("failed to get src ls leader", K(ret), K(task_info));
} else if (FALSE_IT(src_info.src_addr_ = src_ls_leader)) {
// MDS transaction operation for block tablet write
} else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans,
transaction::ObTxDataSourceType::START_TRANSFER_OUT_PREPARE))) {
LOG_WARN("failed to do tx start transfer prepare", K(ret), K(task_info));
} else if (STEP_COST_AND_CHECK_TIMEOUT(transfer_out_prepare_cost)) {
// resubmit tx log promise transfer tablet redo complete
} else if (OB_FAIL(wait_tablet_write_end_(task_info, data_end_scn, timeout_ctx))) {
LOG_WARN("failed to wait tablet write end", K(ret), K(task_info));
} else if (STEP_COST_AND_CHECK_TIMEOUT(wait_tablet_write_end_cost)) {
} else if (!data_end_scn.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("transfer data_end_scn is invalid", K(ret), K(task_info), K(data_end_scn));
} else if (OB_FAIL(do_tx_start_transfer_out_(task_info, trans, transaction::ObTxDataSourceType::START_TRANSFER_OUT_V2, data_end_scn))) {
LOG_WARN("failed to do tx start transfer out", K(ret), K(task_info));
} else if (STEP_COST_AND_CHECK_TIMEOUT(transfer_out_cost)) {
} else if (OB_FAIL(get_start_transfer_out_scn_(task_info, timeout_ctx, start_scn))) {
LOG_WARN("failed to get start transfer out log ts", K(ret), K(task_info));
} else if (STEP_COST_AND_CHECK_TIMEOUT(get_transfer_out_scn_cost)) {
// wait src replay
} else if (OB_FAIL(wait_src_ls_replay_to_start_scn_(task_info, start_scn, timeout_ctx))) {
LOG_WARN("failed to wait src ls replay to start scn", K(ret), K(task_info));
} else if (STEP_COST_AND_CHECK_TIMEOUT(wait_src_replay_cost)) {
} else if (OB_FAIL(get_transfer_tablets_meta_(task_info, tablet_meta_list))) {
LOG_WARN("failed to get transfer tablets meta", K(ret), K(task_info));
} else if (STEP_COST_AND_CHECK_TIMEOUT(get_tablets_meta_cost)) {
// move tx
} else if (OB_FAIL(do_move_tx_to_dest_ls_(task_info, timeout_ctx, trans, data_end_scn, start_scn, move_tx_count))) {
LOG_WARN("failed to do move tx to dest_ls", K(ret), K(task_info));
} else if (STEP_COST_AND_CHECK_TIMEOUT(move_tx_cost)) {
// transfer in
} else if (OB_FAIL(do_tx_start_transfer_in_(task_info, start_scn, tablet_meta_list, timeout_ctx, trans))) {
LOG_WARN("failed to do tx start transfer in", K(ret), K(task_info), K(start_scn), K(tablet_meta_list));
} else if (STEP_COST_AND_CHECK_TIMEOUT(transfer_in_cost)) {
} else if (OB_FAIL(update_transfer_status_(task_info, next_status, start_scn, OB_SUCCESS, trans))) {
LOG_WARN("failed to update transfer status", K(ret), K(task_info));
}
LOG_INFO("[TRANSFER] finish do trans transfer start", K(ret), K(task_info), "cost", ObTimeUtil::current_time() - start_time,
K(transfer_out_prepare_cost),
K(wait_tablet_write_end_cost),
K(transfer_out_cost),
K(get_transfer_out_scn_cost),
K(wait_src_replay_cost),
K(get_tablets_meta_cost),
K(move_tx_cost),
K(transfer_in_cost),
K(move_tx_count));
return ret;
}
int ObTransferHandler::start_trans_(
ObTimeoutCtx &timeout_ctx,
ObMySQLTransaction &trans)
@ -1056,9 +1271,11 @@ int ObTransferHandler::lock_transfer_task_(
int ObTransferHandler::do_tx_start_transfer_out_(
const share::ObTransferTaskInfo &task_info,
common::ObMySQLTransaction &trans)
common::ObMySQLTransaction &trans,
const transaction::ObTxDataSourceType data_source_type,
SCN data_end_scn)
{
LOG_INFO("start do tx start transfer out", K(task_info));
LOG_INFO("[TRANSFER] register start transfer out", K(task_info), K(data_source_type));
int ret = OB_SUCCESS;
observer::ObInnerSQLConnection *conn = NULL;
ObTXStartTransferOutInfo start_transfer_out_info;
@ -1080,6 +1297,9 @@ int ObTransferHandler::do_tx_start_transfer_out_(
} else {
start_transfer_out_info.src_ls_id_ = task_info.src_ls_id_;
start_transfer_out_info.dest_ls_id_ = task_info.dest_ls_id_;
start_transfer_out_info.data_end_scn_ = data_end_scn;
// TODO lana optimise transfer_epoch value
start_transfer_out_info.transfer_epoch_ = task_info.task_id_.id();
if (OB_FAIL(start_transfer_out_info.tablet_list_.assign(task_info.tablet_list_))) {
LOG_WARN("failed to assign transfer tablet list", K(ret), K(task_info));
} else {
@ -1095,10 +1315,11 @@ int ObTransferHandler::do_tx_start_transfer_out_(
} else if (OB_FAIL(start_transfer_out_info.serialize(buf, buf_len, pos))) {
LOG_WARN("fail to serialize start transfer out info", KR(ret), K(start_transfer_out_info));
} else if (OB_FAIL(conn->register_multi_data_source(task_info.tenant_id_, task_info.src_ls_id_,
transaction::ObTxDataSourceType::START_TRANSFER_OUT, buf, buf_len, flag))) {
data_source_type, buf, buf_len, flag))) {
LOG_WARN("failed to register multi data source", K(ret), K(task_info));
} else {
LOG_INFO("[TRANSFER_BLOCK_TX] success register start transfer out", "cost", ObTimeUtil::current_time() - start_ts);
LOG_INFO("[TRANSFER] success register start transfer out", "cost", ObTimeUtil::current_time() - start_ts,
K(data_source_type));
}
#ifdef ERRSIM
ObTransferEventRecorder::record_transfer_task_event(
@ -1403,6 +1624,7 @@ int ObTransferHandler::wait_ls_replay_event_(
}
ob_usleep(OB_CHECK_START_SCN_READY_INTERVAL);
}
FLOG_INFO("[TRANSFER] wait_ls_replay_event_ finish", K(ret), K(task_info.task_id_), K(check_scn), "cost", ObTimeUtil::current_time() - start_ts);
return ret;
}
@ -2214,6 +2436,250 @@ int ObTransferHandler::clear_prohibit_medium_flag_(const ObIArray<ObTabletID> &t
}
return ret;
}
/*
* when src_ls replica replay to latest (> transfer_scn)
*
* we can collect active tx info from replica, because we have set transfer_blocking on moving ctxs
*
* after collect we can register move_tx_ctx MDS operation on dest_ls
*/
int ObTransferHandler::do_move_tx_to_dest_ls_(const share::ObTransferTaskInfo &task_info,
ObTimeoutCtx &timeout_ctx,
ObMySQLTransaction &trans,
const SCN data_end_scn,
const SCN transfer_scn,
int64_t &move_tx_count)
{
LOG_INFO("[TRANSFER] do_move_tx_to_dest_ls_", K(task_info), K(data_end_scn));
int ret = OB_SUCCESS;
int64_t start_time = ObTimeUtility::current_time();
ObLSHandle src_ls_handle;
CollectTxCtxInfo collect_res;
collect_res.src_ls_id_ = task_info.src_ls_id_;
collect_res.dest_ls_id_ = task_info.dest_ls_id_;
collect_res.task_id_ = task_info.task_id_.id();
// TODO lana optimise transfer_epoch value
collect_res.transfer_epoch_ = task_info.task_id_.id();
collect_res.transfer_scn_ = transfer_scn;
int64_t tx_count = 0;
int64_t buf_len = 0;
int64_t collect_count = 0;
ObArray<ObTabletID> tablet_list;
for (int64_t idx = 0; OB_SUCC(ret) && idx < task_info.tablet_list_.count(); idx++) {
if (OB_FAIL(tablet_list.push_back(task_info.tablet_list_.at(idx).tablet_id()))) {
LOG_WARN("push to array failed", KR(ret));
}
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(task_info.src_ls_id_,src_ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(task_info));
} else if (OB_FAIL(src_ls_handle.get_ls()->collect_tx_ctx(task_info.dest_ls_id_,
data_end_scn,
const_cast<const ObArray<ObTabletID>&>(tablet_list),
tx_count,
collect_count,
collect_res.args_))) {
LOG_WARN("collect tx ctx failed", KR(ret), K(task_info));
} else if (collect_count != collect_res.args_.count()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("collect tx ctx count mismatch", KR(ret), K(collect_count), K(collect_res));
} else if (FALSE_IT(move_tx_count = collect_count)) {
} else if (0 == collect_count) {
// no active tx do nothing
} else if (collect_res.args_.count() <= MOVE_TX_BATCH) {
// register once
if (OB_FAIL(register_move_tx_ctx_batch_(task_info,
transfer_scn,
trans,
collect_res,
buf_len))) {
LOG_WARN("register move_tx_ctx batch failed", KR(ret), K(task_info));
}
} else {
// register batch
int64_t start_idx = 0;
while (OB_SUCC(ret) && start_idx < collect_res.args_.count()) {
int64_t batch_len = 0;
CollectTxCtxInfo collect_batch;
collect_batch.src_ls_id_ = task_info.src_ls_id_;
collect_batch.dest_ls_id_ = task_info.dest_ls_id_;
collect_batch.task_id_ = task_info.task_id_.id();
collect_batch.transfer_epoch_ = task_info.task_id_.id();
collect_batch.transfer_scn_ = transfer_scn;
for (int count =0; OB_SUCC(ret) && count < MOVE_TX_BATCH && start_idx < collect_res.args_.count(); count++) {
if (OB_FAIL(collect_batch.args_.push_back(collect_res.args_.at(start_idx)))) {
LOG_WARN("push to array fail", KR(ret));
}
start_idx++;
}
if (FAILEDx(register_move_tx_ctx_batch_(task_info,
transfer_scn,
trans,
collect_batch,
batch_len))) {
} else {
buf_len += batch_len;
}
LOG_INFO("register move_tx_ctx batch", KR(ret), K(start_idx), K(batch_len));
}
}
int64_t end_time = ObTimeUtility::current_time();
LOG_INFO("do_move_tx_to_dest_ls_", KR(ret), "cost", end_time-start_time,
K(task_info),
"tx_count", collect_res.args_.count(),
"buf_size", buf_len);
return ret;
}
int ObTransferHandler::register_move_tx_ctx_batch_(const share::ObTransferTaskInfo &task_info,
const SCN transfer_scn,
ObMySQLTransaction &trans,
CollectTxCtxInfo &collect_batch,
int64_t &batch_len)
{
int ret = OB_SUCCESS;
int64_t buf_len = collect_batch.get_serialize_size();
int64_t pos = 0;
char *buf = NULL;
ObArenaAllocator allocator;
observer::ObInnerSQLConnection *conn = NULL;
ObRegisterMdsFlag flag;
flag.need_flush_redo_instantly_ = true;
flag.mds_base_scn_ = transfer_scn;
if (OB_ISNULL(buf = (char*)allocator.alloc(buf_len))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", KR(ret), K(buf_len));
} else if (OB_FAIL(collect_batch.serialize(buf, buf_len, pos))) {
LOG_WARN("fail to serialize", KR(ret), K(collect_batch));
} else if (buf_len > OB_MAX_LOG_ALLOWED_SIZE) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("move tx ctx batch exceed log size", KR(ret), K(buf_len));
} else if (OB_ISNULL(conn = static_cast<observer::ObInnerSQLConnection *>(trans.get_connection()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("conn is null", KR(ret), K(task_info));
} else if (OB_FAIL(conn->register_multi_data_source(task_info.tenant_id_, task_info.dest_ls_id_,
ObTxDataSourceType::TRANSFER_MOVE_TX_CTX, buf, buf_len, flag))) {
LOG_WARN("failed to register multi data source", KR(ret), K(task_info), K(buf), K(buf_len));
} else {
batch_len = buf_len;
}
return ret;
}
int ObTransferHandler::do_trans_transfer_dest_prepare_(
const share::ObTransferTaskInfo &task_info,
ObMySQLTransaction &trans)
{
LOG_INFO("do_trans_transfer_dest_prepare_", K(task_info));
int ret = OB_SUCCESS;
int64_t start_time = ObTimeUtil::current_time();
ObTransferDestPrepareInfo info;
info.task_id_ = task_info.task_id_.id();
info.src_ls_id_ = task_info.src_ls_id_;
info.dest_ls_id_ = task_info.dest_ls_id_;
int64_t buf_len = info.get_serialize_size();
int64_t pos = 0;
char *buf = NULL;
ObArenaAllocator allocator;
observer::ObInnerSQLConnection *conn = NULL;
ObRegisterMdsFlag flag;
flag.need_flush_redo_instantly_ = true;
if (OB_ISNULL(buf = (char*)allocator.alloc(buf_len))) {
ret = OB_ALLOCATE_MEMORY_FAILED;
LOG_WARN("alloc memory failed", KR(ret), K(buf_len));
} else if (OB_FAIL(info.serialize(buf, buf_len, pos))) {
LOG_WARN("fail to serialize", KR(ret), K(info));
} else if (OB_ISNULL(conn = static_cast<observer::ObInnerSQLConnection *>(trans.get_connection()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("conn is null", KR(ret), K(task_info));
} else if (OB_FAIL(conn->register_multi_data_source(task_info.tenant_id_, task_info.dest_ls_id_,
ObTxDataSourceType::TRANSFER_DEST_PREPARE, buf, buf_len, flag))) {
LOG_WARN("failed to register multi data source", KR(ret), K(task_info), K(buf), K(buf_len));
}
int64_t end_time = ObTimeUtil::current_time();
LOG_INFO("[TRANSFER] do_trans_transfer_dest_prepare_", KR(ret), "cost", end_time - start_time, K(task_info));
return ret;
}
int ObTransferHandler::wait_src_ls_advance_weak_read_ts_(
const share::ObTransferTaskInfo &task_info,
ObTimeoutCtx &timeout_ctx)
{
int ret = OB_SUCCESS;
ObAddr dest_ls_leader;
ObStorageHASrcInfo addr_info;
addr_info.cluster_id_ = GCONF.cluster_id;
if (OB_FAIL(get_ls_leader_(task_info.dest_ls_id_, dest_ls_leader))) {
LOG_WARN("failed to get src ls leader", K(ret), K(task_info));
} else if (FALSE_IT(addr_info.src_addr_ = dest_ls_leader)) {
} else {
int64_t start_time = ObClockGenerator::getClock();
share::SCN transfer_dest_prepare_scn;
int64_t timeout = timeout_ctx.get_timeout();
// get dest_ls transfer_dest_prepare_scn
while (OB_SUCC(ret)) {
if (OB_FAIL(storage_rpc_->get_transfer_dest_prepare_scn(task_info.tenant_id_,
addr_info,
task_info.dest_ls_id_,
transfer_dest_prepare_scn))) {
LOG_WARN("failed to get transfer_dest_prepare_scn", KR(ret), K(task_info));
} else if (!transfer_dest_prepare_scn.is_valid()) {
LOG_WARN("transfer_dest_prepare_scn is invalid need retry", K(task_info.task_id_));
if (ObTimeUtil::current_time() - start_time > timeout) {
ret = OB_TIMEOUT;
FLOG_WARN("failed to get transfer_dest_prepare_scn", KR(ret), K(task_info));
} else {
ob_usleep(50 * 1000);
}
} else {
break;
}
}
int64_t step_time = ObClockGenerator::getClock();
LOG_INFO("[TRANSFER] get dest_ls transfer_dest_prepare_scn", KR(ret), K(task_info.task_id_), K(transfer_dest_prepare_scn),
"cost", step_time - start_time, K(timeout));
// check src_ls advance weak_read_ts
while (OB_SUCC(ret)) {
SCN weak_read_ts = ls_->get_ls_wrs_handler()->get_ls_weak_read_ts();
if (weak_read_ts <= transfer_dest_prepare_scn) {
LOG_WARN("wait src_ls weak_read_ts advance", K(task_info.task_id_), K(weak_read_ts), K(transfer_dest_prepare_scn));
if (ObClockGenerator::getClock() - start_time > timeout) {
ret = OB_TIMEOUT;
FLOG_WARN("failed to wait src_ls advance transfer_dest_prepare_scn", KR(ret), K(task_info), K(transfer_dest_prepare_scn));
} else {
ob_usleep(20 * 1000);
}
} else {
break;
}
}
int64_t end_time = ObClockGenerator::getClock();
LOG_INFO("[TRANSFER] wait src_ls weak_read_ts advance", KR(ret), K(task_info.task_id_), K(transfer_dest_prepare_scn),
"cost", end_time - step_time, K(timeout));
}
return ret;
}
// TODO(handora.qc): remove it under 4.3.x later
int enable_new_transfer(bool &enable)
{
int ret = OB_SUCCESS;
uint64_t data_version = 0;
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
if (OB_FAIL(GET_MIN_DATA_VERSION(MTL_ID(), data_version))) {
LOG_INFO("[TRANSFER] get min data version failed", K(ret));
} else if (DATA_VERSION_4_3_0_0 > data_version) {
enable = false;
} else if (!tenant_config->_enable_transfer_active_tx) {
enable = false;
} else {
enable = true;
}
return ret;
}
int ObTransferHandler::clear_prohibit_(
const share::ObTransferTaskInfo &task_info,
@ -2240,6 +2706,12 @@ int ObTransferHandler::clear_prohibit_(
ob_abort();
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(ls_->get_lock_table()->enable_check_tablet_status(false))) {
LOG_WARN("failed to cancel lock table check tablet status", K(ret), K(task_info));
ob_abort();
}
LOG_INFO("[TRANSFER] clear prohibit", K(ret), "cost", ObTimeUtil::current_time() - start_ts);
return ret;
}

View File

@ -134,6 +134,31 @@ private:
const palf::LogConfigVersion &config_version,
ObTimeoutCtx &timeout_ctx,
ObMySQLTransaction &trans);
int do_trans_transfer_start_prepare_(
const share::ObTransferTaskInfo &task_info,
ObTimeoutCtx &timeout_ctx,
ObMySQLTransaction &trans);
int wait_tablet_write_end_(
const share::ObTransferTaskInfo &task_info,
SCN &data_end_scn,
ObTimeoutCtx &timeout_ctx);
int do_trans_transfer_start_v2_(
const share::ObTransferTaskInfo &task_info,
ObTimeoutCtx &timeout_ctx,
ObMySQLTransaction &trans);
int do_trans_transfer_dest_prepare_(
const share::ObTransferTaskInfo &task_info,
ObMySQLTransaction &trans);
int wait_src_ls_advance_weak_read_ts_(
const share::ObTransferTaskInfo &task_info,
ObTimeoutCtx &timeout_ctx);
int do_move_tx_to_dest_ls_(
const share::ObTransferTaskInfo &task_info,
ObTimeoutCtx &timeout_ctx,
ObMySQLTransaction &trans,
const SCN data_end_scn,
const SCN transfer_scn,
int64_t &move_tx_count);
int start_trans_(
ObTimeoutCtx &timeout_ctx,
ObMySQLTransaction &trans);
@ -143,7 +168,9 @@ private:
int do_tx_start_transfer_out_(
const share::ObTransferTaskInfo &task_info,
common::ObMySQLTransaction &trans);
common::ObMySQLTransaction &trans,
const transaction::ObTxDataSourceType data_source_type,
SCN data_end_scn = SCN::min_scn());
int lock_transfer_task_(
const share::ObTransferTaskInfo &task_info,
common::ObISQLClient &trans);
@ -258,9 +285,15 @@ private:
common::ObMemberList &member_list);
int broadcast_tablet_location_(const share::ObTransferTaskInfo &task_info);
int register_move_tx_ctx_batch_(const share::ObTransferTaskInfo &task_info,
const SCN transfer_scn,
ObMySQLTransaction &trans,
CollectTxCtxInfo &collect_batch,
int64_t &batch_len);
private:
static const int64_t INTERVAL_US = 1 * 1000 * 1000; //1s
static const int64_t KILL_TX_MAX_RETRY_TIMES = 3;
static const int64_t MOVE_TX_BATCH = 2000;
private:
bool is_inited_;
ObLS *ls_;
@ -277,6 +310,10 @@ private:
bool transfer_handler_enabled_;
DISALLOW_COPY_AND_ASSIGN(ObTransferHandler);
};
int enable_new_transfer(bool &enable);
}
}
#endif

View File

@ -29,7 +29,8 @@ using namespace storage;
ObTXStartTransferOutInfo::ObTXStartTransferOutInfo()
: src_ls_id_(),
dest_ls_id_(),
tablet_list_()
tablet_list_(),
data_end_scn_()
{
}
@ -38,13 +39,17 @@ void ObTXStartTransferOutInfo::reset()
src_ls_id_.reset();
dest_ls_id_.reset();
tablet_list_.reset();
data_end_scn_.reset();
transfer_epoch_ = 0;
}
bool ObTXStartTransferOutInfo::is_valid() const
{
return src_ls_id_.is_valid()
&& dest_ls_id_.is_valid()
&& !tablet_list_.empty();
&& !tablet_list_.empty()
&& data_end_scn_.is_valid()
&& transfer_epoch_ > 0;
}
int ObTXStartTransferOutInfo::assign(const ObTXStartTransferOutInfo &start_transfer_out_info)
@ -58,12 +63,13 @@ int ObTXStartTransferOutInfo::assign(const ObTXStartTransferOutInfo &start_trans
} else {
src_ls_id_ = start_transfer_out_info.src_ls_id_;
dest_ls_id_ = start_transfer_out_info.dest_ls_id_;
data_end_scn_ = start_transfer_out_info.data_end_scn_;
transfer_epoch_ = start_transfer_out_info.transfer_epoch_;
}
return ret;
}
OB_SERIALIZE_MEMBER(ObTXStartTransferOutInfo, src_ls_id_, dest_ls_id_, tablet_list_);
OB_SERIALIZE_MEMBER(ObTXStartTransferOutInfo, src_ls_id_, dest_ls_id_, tablet_list_, data_end_scn_, transfer_epoch_);
ObTXStartTransferInInfo::ObTXStartTransferInInfo()
: src_ls_id_(),

View File

@ -37,11 +37,13 @@ public:
bool is_valid() const;
int assign(const ObTXStartTransferOutInfo &start_transfer_out_info);
TO_STRING_KV(K_(src_ls_id), K_(dest_ls_id), K_(tablet_list));
TO_STRING_KV(K_(src_ls_id), K_(dest_ls_id), K_(tablet_list), K_(data_end_scn), K_(transfer_epoch));
share::ObLSID src_ls_id_;
share::ObLSID dest_ls_id_;
common::ObSArray<share::ObTransferTabletInfo> tablet_list_;
share::SCN data_end_scn_;
int64_t transfer_epoch_;
DISALLOW_COPY_AND_ASSIGN(ObTXStartTransferOutInfo);
};

View File

@ -1062,7 +1062,7 @@ int ObFreezer::batch_tablet_freeze(const ObIArray<ObTabletID> &tablet_ids, ObFut
int ret = OB_SUCCESS;
share::ObLSID ls_id = get_ls_id();
SCN freeze_snapshot_version;
FLOG_INFO("[Freezer] batch_tablet_freeze start", K(ret), K(ls_id), K(tablet_ids));
FLOG_INFO("[Freezer] batch_tablet_freeze start", K(ls_id), K(tablet_ids));
int64_t start_time = ObTimeUtility::current_time();
bool need_freeze = true;

View File

@ -202,6 +202,8 @@ int ObLS::init(const share::ObLSID &ls_id,
LOG_WARN("failed to init member list service", K(ret));
} else if (OB_FAIL(block_tx_service_.init(this))) {
LOG_WARN("failed to init block tx service", K(ret));
} else if (OB_FAIL(ls_transfer_status_.init(this))) {
LOG_WARN("failed to init transfer status", K(ret));
} else {
REGISTER_TO_LOGSERVICE(logservice::TRANS_SERVICE_LOG_BASE_TYPE, &ls_tx_svr_);
REGISTER_TO_LOGSERVICE(logservice::STORAGE_SCHEMA_LOG_BASE_TYPE, &ls_tablet_svr_);
@ -938,6 +940,7 @@ void ObLS::destroy()
is_inited_ = false;
tenant_id_ = OB_INVALID_TENANT_ID;
startup_transfer_info_.reset();
ls_transfer_status_.reset();
}
int ObLS::offline_tx_(const int64_t start_ts)
@ -1011,6 +1014,8 @@ int ObLS::offline_(const int64_t start_ts)
LOG_WARN("tablet service offline failed", K(ret), K(ls_meta_));
} else if (OB_FAIL(tablet_empty_shell_handler_.offline())) {
LOG_WARN("tablet_empty_shell_handler failed", K(ret), K(ls_meta_));
} else if (OB_FAIL(ls_transfer_status_.offline())) {
LOG_WARN("ls transfer status offline failed", K(ret), K(ls_meta_));
} else if (OB_FAIL(running_state_.post_offline(ls_meta_.ls_id_))) {
LOG_WARN("ls post offline failed", KR(ret), K(ls_meta_));
} else {
@ -1156,6 +1161,8 @@ int ObLS::online_without_lock()
} else if (FALSE_IT(checkpoint_executor_.online())) {
} else if (FALSE_IT(tablet_gc_handler_.online())) {
} else if (FALSE_IT(tablet_empty_shell_handler_.online())) {
} else if (OB_FAIL(ls_transfer_status_.online())) {
LOG_WARN("ls transfer status online failed", K(ret), K(ls_meta_));
} else if (OB_FAIL(online_advance_epoch_())) {
} else if (OB_FAIL(running_state_.online(ls_meta_.ls_id_))) {
LOG_WARN("ls online failed", KR(ret), K(ls_meta_));

View File

@ -67,6 +67,7 @@
#include "storage/high_availability/ob_ls_block_tx_service.h"
#include "storage/high_availability/ob_ls_transfer_info.h"
#include "observer/table/ttl/ob_tenant_tablet_ttl_mgr.h"
#include "storage/ls/ob_ls_transfer_status.h"
namespace oceanbase
{
@ -283,6 +284,8 @@ public:
ObTransferHandler *get_transfer_handler() { return &transfer_handler_; }
ObLSTransferInfo &get_ls_startup_transfer_info() { return startup_transfer_info_; }
// for transfer record MDS phase
ObLSTransferStatus &get_transfer_status() { return ls_transfer_status_; }
//remove member handler
ObLSRemoveMemberHandler *get_ls_remove_member_handler() { return &ls_remove_member_handler_; }
@ -560,6 +563,8 @@ public:
DELEGATE_WITH_RET(ls_tablet_svr_, disable_to_read, void);
DELEGATE_WITH_RET(ls_tablet_svr_, get_tablet_with_timeout, int);
DELEGATE_WITH_RET(ls_tablet_svr_, get_mds_table_mgr, int);
// for transfer to check tablet no active memtable
DELEGATE_WITH_RET(ls_tablet_svr_, check_tablet_no_active_memtable, int);
// ObLockTable interface:
// check whether the lock op is conflict with exist lock.
@ -813,6 +818,19 @@ public:
CONST_DELEGATE_WITH_RET(dup_table_ls_handler_, get_dup_table_ls_meta, int);
DELEGATE_WITH_RET(dup_table_ls_handler_, set_dup_table_ls_meta, int);
// for transfer to modify active tx ctx state
DELEGATE_WITH_RET(ls_tx_svr_, transfer_out_tx_op, int);
// for transfer to wait tx write end
DELEGATE_WITH_RET(ls_tx_svr_, wait_tx_write_end, int);
// for transfer collect src_ls tx ctx
DELEGATE_WITH_RET(ls_tx_svr_, collect_tx_ctx, int);
// for transfer move tx ctx to dest_ls
DELEGATE_WITH_RET(ls_tx_svr_, move_tx_op, int);
// ObReplayHandler interface:
DELEGATE_WITH_RET(replay_handler_, replay, int);
@ -975,6 +993,8 @@ private:
ObTransferHandler transfer_handler_;
// Record the dependent transfer information when restarting
ObLSTransferInfo startup_transfer_info_;
// for transfer MDS phase
ObLSTransferStatus ls_transfer_status_;
// this is used for the meta lock, and will be removed later
RWLock meta_rwlock_;
};

View File

@ -2261,8 +2261,18 @@ int ObLSTabletService::create_memtable(
LOG_INFO("old tablet is empty shell tablet, should skip this operation", K(ret), "old_tablet", old_tablet_handle.get_obj());
} else {
time_guard.click("get tablet");
ObTabletCreateDeleteMdsUserData user_data;
bool is_committed = false;
ObTablet &old_tablet = *(old_tablet_handle.get_obj());
if (OB_FAIL(old_tablet.create_memtable(schema_version, clog_checkpoint_scn, for_replay))) {
// forbid create new memtable when transfer
if (for_replay) {
} else if (OB_FAIL(old_tablet.ObITabletMdsInterface::get_latest_tablet_status(user_data, is_committed))) {
} else if (!is_committed || (user_data.tablet_status_ != ObTabletStatus::NORMAL
&& user_data.tablet_status_ != ObTabletStatus::TRANSFER_IN)) {
ret = OB_EAGAIN;
LOG_WARN("tablet status not allow create new memtable", K(ret), K(is_committed), K(user_data));
}
if (FAILEDx(old_tablet.create_memtable(schema_version, clog_checkpoint_scn, for_replay))) {
if (OB_MINOR_FREEZE_NOT_ALLOW != ret) {
LOG_WARN("fail to create memtable", K(ret), K(new_tablet_handle), K(schema_version), K(tablet_id));
}
@ -6450,6 +6460,39 @@ int ObLSTabletService::offline_destroy_memtable_and_mds_table_()
return ret;
}
int ObLSTabletService::check_tablet_no_active_memtable(const ObIArray<ObTabletID> &tablet_list, bool &has)
{
int ret = OB_SUCCESS;
has = false;
if (OB_UNLIKELY(!is_inited_)) {
ret = OB_NOT_INIT;
LOG_WARN("not inited", K(ret), K_(is_inited));
} else {
for (int64_t idx = 0; !has && OB_SUCC(ret) && idx < tablet_list.count(); idx++) {
ObTabletID tablet_id = tablet_list.at(idx);
ObTabletHandle handle;
ObTablet *tablet = NULL;
ObTableHandleV2 table_handle;
if (OB_FAIL(direct_get_tablet(tablet_id, handle))) {
LOG_WARN("failed to get tablet", K(ret), K(tablet_id));
} else if (FALSE_IT(tablet = handle.get_obj())) {
} else if (OB_FAIL(tablet->get_active_memtable(table_handle))) {
if (OB_ENTRY_NOT_EXIST == ret) {
ret = OB_SUCCESS;
} else {
LOG_WARN("failed to get active memtable", K(ret), K(tablet_id));
}
} else if (OB_ISNULL(table_handle.get_table())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("get unexpected null table", K(ret), K(tablet_id));
} else if (table_handle.get_table()->is_active_memtable()) {
LOG_WARN("tablet has active memtable", K(tablet_id), K(table_handle));
has = true;
}
}
}
return ret;
}
} // namespace storage

View File

@ -434,6 +434,9 @@ public:
int get_all_tablet_ids(const bool except_ls_inner_tablet, common::ObIArray<ObTabletID> &tablet_id_array);
int flush_mds_table(int64_t recycle_scn);
// for transfer check tablet write stop
int check_tablet_no_active_memtable(const ObIArray<ObTabletID> &tablet_list, bool &has);
protected:
virtual int prepare_dml_running_ctx(
const common::ObIArray<uint64_t> *column_ids,

View File

@ -0,0 +1,228 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX STORAGE
#include "storage/ls/ob_ls_transfer_status.h"
#include "storage/tx_storage/ob_ls_service.h"
namespace oceanbase
{
namespace storage
{
using namespace oceanbase::transaction;
int ObLSTransferStatus::init(ObLS *ls)
{
int ret = OB_SUCCESS;
if (is_inited_) {
ret = OB_INIT_TWICE;
STORAGE_LOG(WARN, "ObLSTransferStatus init twice", K(ret), K(is_inited_));
} else {
ls_ = ls;
is_inited_ = true;
STORAGE_LOG(INFO, "ObLSTransferStatus init success", K(*this));
}
return ret;
}
void ObLSTransferStatus::reset()
{
is_inited_ = false;
ls_ = nullptr;
transfer_tx_id_.reset();
transfer_task_id_ = 0;
transfer_prepare_op_ = false;
transfer_prepare_scn_.reset();
move_tx_op_ = false;
move_tx_scn_.reset();
}
void ObLSTransferStatus::reset_prepare_op() {
transfer_prepare_op_ = false;
transfer_prepare_scn_.reset();
if (is_finished()) {
transfer_tx_id_.reset();
transfer_task_id_ = 0;
}
}
void ObLSTransferStatus::reset_move_tx_op() {
move_tx_op_ = false;
move_tx_scn_.reset();
if (is_finished()) {
transfer_tx_id_.reset();
transfer_task_id_ = 0;
}
}
bool ObLSTransferStatus::is_finished()
{
return !transfer_prepare_op_ && !move_tx_op_;
}
int ObLSTransferStatus::online()
{
int ret = OB_SUCCESS;
ObSpinLockGuard guard(lock_);
if (!is_inited_) {
ret = OB_NOT_INIT;
STORAGE_LOG(WARN, "ObLSTransferStatus not init", K(ret), K(*this));
} else {
STORAGE_LOG(INFO, "ObLSTransferStatus online", K(*this));
}
return ret;
}
int ObLSTransferStatus::offline()
{
int ret = OB_SUCCESS;
ObSpinLockGuard guard(lock_);
reset_prepare_op();
reset_move_tx_op();
STORAGE_LOG(INFO, "ObLSTransferStatus offline", K(*this));
return ret;
}
int ObLSTransferStatus::update_status(const transaction::ObTransID tx_id,
const int64_t task_id,
const share::SCN op_scn,
const transaction::NotifyType op_type,
const transaction::ObTxDataSourceType mds_type)
{
int ret = OB_SUCCESS;
bool is_follower = false;
int64_t proposal_id = 0;
common::ObRole ls_role = common::ObRole::INVALID_ROLE;
if (!is_inited_) {
ret = OB_NOT_INIT;
STORAGE_LOG(WARN, "ObLSTransferStatus not init", K(ret), K(*this));
} else if (!tx_id.is_valid()) {
ret = OB_INVALID_ARGUMENT;
STORAGE_LOG(WARN, "tx_id is invalid", K(ret), K(*this));
} else if (op_type != NotifyType::REGISTER_SUCC && op_type != NotifyType::ON_ABORT && !op_scn.is_valid()) {
ret = OB_INVALID_ARGUMENT;
STORAGE_LOG(WARN, "op_scn is invalid", K(ret), K(*this));
// mds frame not pass replay flag, get it from log handler
} else if (OB_FAIL(ls_->get_log_handler()->get_role(ls_role, proposal_id))) {
STORAGE_LOG(WARN, "get ls role fail", K(ret), K(*this));
} else if (ObTxDataSourceType::TRANSFER_DEST_PREPARE != mds_type &&
ObTxDataSourceType::TRANSFER_MOVE_TX_CTX != mds_type) {
ret = OB_INVALID_ARGUMENT;
STORAGE_LOG(WARN, "invalid mds_type", K(ret), K(*this), K(mds_type));
} else if (common::ObRole::FOLLOWER == ls_role) {
is_follower = true;
}
if (OB_SUCC(ret)) {
ObSpinLockGuard guard(lock_);
if (is_follower) {
if (OB_FAIL(replay_status_inner_(tx_id, task_id, op_scn, op_type, mds_type))) {
STORAGE_LOG(WARN, "update transfer status", KR(ret), K(*this), K(tx_id), K(task_id));
}
} else {
if (OB_FAIL(update_status_inner_(tx_id, task_id, op_scn, op_type, mds_type))) {
STORAGE_LOG(WARN, "update transfer status", KR(ret), K(*this), K(tx_id), K(task_id));
}
}
FLOG_INFO("update_transfer_status", K(ret), K(tx_id), K(task_id), K(op_scn), K(op_type), K(mds_type), K(*this));
}
return ret;
}
int ObLSTransferStatus::update_status_inner_(const transaction::ObTransID tx_id,
const int64_t task_id,
const share::SCN op_scn,
const transaction::NotifyType op_type,
const transaction::ObTxDataSourceType mds_type)
{
int ret = OB_SUCCESS;
// leader
if (!transfer_tx_id_.is_valid() || transfer_tx_id_ == tx_id) {
if (NotifyType::ON_COMMIT == op_type || NotifyType::ON_ABORT == op_type) {
if (ObTxDataSourceType::TRANSFER_DEST_PREPARE == mds_type) {
reset_prepare_op();
} else if (ObTxDataSourceType::TRANSFER_MOVE_TX_CTX == mds_type) {
reset_move_tx_op();
}
} else {
transfer_tx_id_ = tx_id;
transfer_task_id_ = task_id;
if (ObTxDataSourceType::TRANSFER_DEST_PREPARE == mds_type) {
transfer_prepare_op_ = true;
transfer_prepare_scn_ = op_scn;
} else if (ObTxDataSourceType::TRANSFER_MOVE_TX_CTX == mds_type) {
move_tx_op_ = true;
move_tx_scn_ = op_scn;
}
}
} else if (NotifyType::ON_ABORT == op_type) {
TRANS_LOG(WARN, "has unfinish tx status when transfer abort can skip", K(*this), K(tx_id), K(task_id));
} else if (NotifyType::ON_COMMIT == op_type) {
TRANS_LOG(ERROR, "has unfinish tx status when transfer commit", K(*this), K(tx_id), K(task_id));
} else {
ret = OB_OP_NOT_ALLOW;
TRANS_LOG(WARN, "has unfinish tx status", KR(ret), K(*this), K(tx_id), K(task_id));
}
return ret;
}
int ObLSTransferStatus::replay_status_inner_(const transaction::ObTransID tx_id,
const int64_t task_id,
const share::SCN op_scn,
const transaction::NotifyType op_type,
const transaction::ObTxDataSourceType mds_type)
{
int ret = OB_SUCCESS;
// follower replay filter
if (ObTxDataSourceType::TRANSFER_DEST_PREPARE == mds_type) {
if (!transfer_prepare_scn_.is_valid() || transfer_prepare_scn_ < op_scn) {
if (NotifyType::ON_COMMIT == op_type || NotifyType::ON_ABORT == op_type) {
reset_prepare_op();
} else {
transfer_tx_id_ = tx_id;
transfer_task_id_ = task_id;
transfer_prepare_op_ = true;
transfer_prepare_scn_ = op_scn;
}
}
} else if (ObTxDataSourceType::TRANSFER_MOVE_TX_CTX == mds_type) {
if (!move_tx_scn_.is_valid() || move_tx_scn_ < op_scn) {
if (NotifyType::ON_COMMIT == op_type || NotifyType::ON_ABORT == op_type) {
reset_move_tx_op();
} else {
transfer_tx_id_ = tx_id;
transfer_task_id_ = task_id;
move_tx_op_ = true;
move_tx_scn_ = op_scn;
}
}
}
return ret;
}
int ObLSTransferStatus::get_transfer_prepare_status(
bool &enable,
share::SCN &scn)
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
STORAGE_LOG(WARN, "ObLSTransferStatus not init", K(ret), K(*this));
} else {
ObSpinLockGuard guard(lock_);
enable = transfer_prepare_op_;
scn = transfer_prepare_scn_;
}
return ret;
}
}
}

View File

@ -0,0 +1,73 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEABASE_STORAGE_OB_LS_TRANSFER_STATUS
#define OCEABASE_STORAGE_OB_LS_TRANSFER_STATUS
#include "lib/lock/ob_spin_lock.h"
#include "storage/tx/ob_trans_define.h"
namespace oceanbase
{
namespace storage
{
class ObLSTransferStatus
{
public:
ObLSTransferStatus() { reset(); }
~ObLSTransferStatus() { reset(); }
int init(ObLS *ls);
void reset();
int online();
int offline();
bool is_finished();
void reset_prepare_op();
void reset_move_tx_op();
int update_status(const transaction::ObTransID tx_id,
const int64_t task_id,
const share::SCN op_scn,
const transaction::NotifyType op_type,
const transaction::ObTxDataSourceType mds_type);
transaction::ObTransID get_tx_id() { return transfer_tx_id_; }
bool get_transfer_prepare_enable() { return transfer_prepare_op_; }
int get_transfer_prepare_status(bool &enable, share::SCN &scn);
TO_STRING_KV(K_(ls), K_(transfer_tx_id), K_(transfer_task_id),
K_(transfer_prepare_op), K_(transfer_prepare_scn),
K_(move_tx_op), K_(move_tx_scn));
private:
int update_status_inner_(const transaction::ObTransID tx_id,
const int64_t task_id,
const share::SCN op_scn,
const transaction::NotifyType op_type,
const transaction::ObTxDataSourceType mds_type);
int replay_status_inner_(const transaction::ObTransID tx_id,
const int64_t task_id,
const share::SCN op_scn,
const transaction::NotifyType op_type,
const transaction::ObTxDataSourceType mds_type);
private:
bool is_inited_;
ObLS *ls_;
common::ObSpinLock lock_;
transaction::ObTransID transfer_tx_id_;
int64_t transfer_task_id_;
bool transfer_prepare_op_;
share::SCN transfer_prepare_scn_;
bool move_tx_op_;
share::SCN move_tx_scn_;
};
} // end storage
} // end oceanbase
#endif

View File

@ -813,6 +813,73 @@ int ObLSTxService::check_tx_blocked(bool &tx_blocked) const
}
return ret;
}
} // transaction
int ObLSTxService::transfer_out_tx_op(int64_t except_tx_id,
const share::SCN data_end_scn,
const share::SCN op_scn,
transaction::NotifyType op_type,
bool is_replay,
share::ObLSID dest_ls_id,
int64_t transfer_epoch,
int64_t &active_tx_count,
int64_t &op_tx_count)
{
int ret = OB_SUCCESS;
int64_t start_time = ObTimeUtility::current_time();
if (OB_FAIL(mgr_->transfer_out_tx_op(except_tx_id, data_end_scn, op_scn, op_type, is_replay,
dest_ls_id, transfer_epoch, active_tx_count, op_tx_count))) {
TRANS_LOG(WARN, "for each tx ctx error", KR(ret));
}
int64_t end_time = ObTimeUtility::current_time();
LOG_INFO("transfer_out_tx_op", KR(ret), K(op_type), "cost", end_time - start_time, K(active_tx_count), K(op_tx_count));
return ret;
}
int ObLSTxService::wait_tx_write_end(ObTimeoutCtx &timeout_ctx)
{
int ret = OB_SUCCESS;
int64_t start_time = ObTimeUtility::current_time();
if (OB_FAIL(mgr_->wait_tx_write_end(timeout_ctx))) {
TRANS_LOG(WARN, "for each tx ctx error", KR(ret));
}
int64_t end_time = ObTimeUtility::current_time();
LOG_INFO("wait_tx_write_end", KR(ret), "cost", end_time - start_time);
return ret;
}
int ObLSTxService::collect_tx_ctx(const ObLSID dest_ls_id,
const SCN log_scn,
const ObIArray<ObTabletID> &tablet_list,
int64_t &tx_count,
int64_t &collect_count,
ObIArray<ObTxCtxMoveArg> &res)
{
int ret = OB_SUCCESS;
int64_t start_time = ObTimeUtility::current_time();
if (OB_FAIL(mgr_->collect_tx_ctx(dest_ls_id, log_scn, tablet_list, tx_count, collect_count, res))) {
TRANS_LOG(WARN, "for each tx ctx error", KR(ret));
}
int64_t end_time = ObTimeUtility::current_time();
LOG_INFO("collect_tx_ctx", KR(ret), K(ls_id_), "cost_us", end_time - start_time,
K(tx_count), K(collect_count));
return ret;
}
int ObLSTxService::move_tx_op(const ObTransferMoveTxParam &move_tx_param,
const ObIArray<ObTxCtxMoveArg> &args)
{
int ret = OB_SUCCESS;
int64_t start_time = ObTimeUtility::current_time();
if (OB_FAIL(mgr_->move_tx_op(move_tx_param, args))) {
TRANS_LOG(WARN, "for each tx ctx error", KR(ret));
}
int64_t end_time = ObTimeUtility::current_time();
LOG_INFO("move_tx_ctx", KR(ret), K(ls_id_),"cost_us", end_time - start_time,
"count", args.count());
return ret;
}
} // transaction
} // oceanbase

View File

@ -32,6 +32,8 @@ class SCN;
namespace storage
{
class ObLS;
struct ObTxCtxMoveArg;
struct ObTransferMoveTxParam;
}
namespace transaction
@ -165,6 +167,24 @@ public:
int get_common_checkpoint_info(
ObIArray<checkpoint::ObCommonCheckpointVTInfo> &common_checkpoint_array);
int transfer_out_tx_op(int64_t except_tx_id,
const share::SCN data_end_scn,
const share::SCN op_scn,
transaction::NotifyType op_type,
bool is_replay,
share::ObLSID dest_ls_id,
int64_t transfer_epoch,
int64_t &active_tx_count,
int64_t &op_tx_count);
int wait_tx_write_end(ObTimeoutCtx &timeout_ctx);
int collect_tx_ctx(const share::ObLSID dest_ls_id,
const share::SCN log_scn,
const ObIArray<ObTabletID> &tablet_list,
int64_t &tx_count,
int64_t &collect_count,
ObIArray<ObTxCtxMoveArg> &args);
int move_tx_op(const ObTransferMoveTxParam &move_tx_param,
const ObIArray<ObTxCtxMoveArg> &arg);
public:
transaction::ObTransService *get_trans_service() { return trans_service_; }

View File

@ -173,10 +173,6 @@ public:
{
tx_table_guards_.src_tx_table_guard_ = tx_table_guard;
}
void set_transfer_scn(const share::SCN transfer_scn)
{
tx_table_guards_.transfer_start_scn_ = transfer_scn;
}
void init_replay(transaction::ObPartTransCtx &tx_ctx,
ObMemtableCtx &mem_ctx,
const transaction::ObTransID &tx_id)

View File

@ -44,10 +44,6 @@ int ObMvccValueIterator::init(ObMvccAccessCtx &ctx,
} else if (OB_ISNULL(value)) {
// row not exist
is_inited_ = true;
} else if (query_flag.iter_uncommitted_row()) {
value_ = value;
is_inited_ = true;
version_iter_ = value->get_list_head();
} else {
value_ = value;
if (OB_FAIL(lock_for_read_(query_flag))) {
@ -136,6 +132,8 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag,
const bool read_latest = flag.is_read_latest();
const ObTransID &data_tx_id = iter->get_tx_id();
const bool read_uncommitted = flag.iter_uncommitted_row();
// NB: We need pay much attention to the order of the reads to the different
// variables. Although we update the version before the state for the tnodes
// and read the state before the version. It may appear that the compiled code
@ -148,7 +146,7 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag,
const bool is_delayed_cleanout = iter->is_delayed_cleanout();
const SCN scn = iter->get_scn();
// Opt1: data is decided
if ((is_committed || is_aborted || is_elr)
if ((is_committed || is_aborted || (is_elr && !is_delayed_cleanout))
// Opt2: data is not decided while we donot need cleanout
|| (!is_delayed_cleanout
&& (// Opt2.1: snapshot reads the data written by snapshot
@ -160,7 +158,10 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag,
if (is_committed || is_elr) {
// Case 2: Data is committed, so the state is decided
const SCN data_version = iter->trans_version_.atomic_load();
if (ctx_->get_snapshot_version() >= data_version) {
if (read_uncommitted) {
// Case 2.0 Read the version if we need the uncommitted version
version_iter_ = iter;
} else if (ctx_->get_snapshot_version() >= data_version) {
// Case 2.1 Read the version if it is smaller than read version
version_iter_ = iter;
} else {
@ -173,7 +174,10 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag,
iter = iter->prev_;
} else {
// Case 4: data is during execution
if (read_latest && data_tx_id == ctx_->tx_id_) {
if (read_uncommitted) {
// Case 4.0 Read the version if we need the uncommitted version
version_iter_ = iter;
} else if (read_latest && data_tx_id == ctx_->tx_id_) {
// Case 4.1: data is written by the current txn and we also need read the
// latest data(eg: check existence), then we can read it if it
// is not undone
@ -204,8 +208,7 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag,
// when data is delay cleanout
bool can_read = false;
SCN data_version;
data_version.set_max();
bool is_determined_state = false;
data_version.set_invalid();
// Opt3: we only cleanout tx node who is delay cleanout
ObCleanoutOp *cleanout_op;
@ -217,29 +220,32 @@ int ObMvccValueIterator::lock_for_read_inner_(const ObQueryFlag &flag,
cleanout_op = &clean_nothing_op;
}
ObReCheckTxNodeForLockForReadOperation recheck_tx_node_op(*iter, can_read, data_version, is_determined_state);
ObReCheckTxNodeForLockForReadOperation recheck_tx_node_op(*iter,
can_read,
data_version);
ObReCheckOp *recheck_op = &recheck_tx_node_op;
ObLockForReadArg lock_for_read_arg(*ctx_,
data_tx_id,
iter->get_seq_no(),
read_latest,
read_uncommitted,
scn);
if (OB_FAIL(ctx_->get_tx_table_guards().lock_for_read(lock_for_read_arg,
can_read,
data_version,
is_determined_state,
*cleanout_op,
*recheck_op))) {
can_read,
data_version,
*cleanout_op,
*recheck_op))) {
TRANS_LOG(WARN, "lock for read failed", KPC(iter), K(lock_for_read_arg));
} else if (can_read && ctx_->get_snapshot_version() >= data_version) {
} else if (can_read) {
// Case 5.1: data is cleanout by lock for read and can be read by reader's
// snapshot
int counter = 0;
while (OB_SUCC(ret)
&& !ctx_->is_standby_read_
&& is_determined_state
&& !read_uncommitted
&& transaction::is_effective_trans_version(data_version)
&& !(iter->is_committed() || iter->is_aborted() || iter->is_elr())) {
if (OB_FAIL(try_cleanout_tx_node_(iter))) {
TRANS_LOG(WARN, "cleanout tx state failed", K(ret), KPC(value_), KPC(iter));

View File

@ -226,6 +226,11 @@ void ObMemtableCtx::wait_pending_write()
WRLockGuard wrguard(rwlock_);
}
void ObMemtableCtx::wait_write_end()
{
WRLockGuard wrguard(rwlock_);
}
SCN ObMemtableCtx::get_tx_end_scn() const
{
return ctx_->get_tx_end_log_ts();
@ -988,6 +993,15 @@ int ObMemtableCtx::get_table_lock_store_info(ObTableLockInfo &table_lock_info)
return ret;
}
int ObMemtableCtx::get_table_lock_for_transfer(ObTableLockInfo &table_lock_info, const ObIArray<ObTabletID> &tablet_list)
{
int ret = OB_SUCCESS;
if (OB_FAIL(lock_mem_ctx_.get_table_lock_for_transfer(table_lock_info, tablet_list))) {
TRANS_LOG(WARN, "get tablet lock for transfer failed", K(ret));
}
return ret;
}
int ObMemtableCtx::recover_from_table_lock_durable_info(const ObTableLockInfo &table_lock_info)
{
int ret = OB_SUCCESS;

View File

@ -356,6 +356,7 @@ public:
virtual void inc_ref();
virtual void dec_ref();
void wait_pending_write();
void wait_write_end();
virtual int write_auth(const bool exclusive);
virtual int write_done();
virtual int trans_begin();
@ -488,6 +489,7 @@ public:
const share::SCN &scn);
int recover_from_table_lock_durable_info(const ObTableLockInfo &table_lock_info);
int get_table_lock_store_info(ObTableLockInfo &table_lock_info);
int get_table_lock_for_transfer(ObTableLockInfo &table_lock_info, const ObIArray<common::ObTabletID> &tablet_list);
// for deadlock detect.
void set_table_lock_killed() { lock_mem_ctx_.set_killed(); }
bool is_table_lock_killed() const { return lock_mem_ctx_.is_killed(); }

View File

@ -891,12 +891,27 @@ int ObTenantMetaMemMgr::get_min_end_scn_from_single_tablet(ObTablet *tablet,
SCN &min_end_scn)
{
int ret = OB_SUCCESS;
bool is_committed = false;
ObTabletCreateDeleteMdsUserData user_data;
ObTabletMemberWrapper<ObTabletTableStore> table_store_wrapper;
if (OB_ISNULL(tablet)) {
ret = OB_INVALID_ARGUMENT;
STORAGE_LOG(WARN, "tablet is nullptr.", K(ret), KP(this));
} else if (OB_FAIL(tablet->fetch_table_store(table_store_wrapper))) {
LOG_WARN("fail to fetch table store", K(ret));
} else if (OB_FAIL(tablet->ObITabletMdsInterface::get_latest_tablet_status(user_data, is_committed))) {
LOG_WARN("get tablet status failed", KR(ret), KP(tablet));
} else if (ObTabletStatus::TRANSFER_IN == user_data.tablet_status_) {
/* when tablet transfer with active tx, dest_ls may recycle active transaction tx_data
* because no uncommitted data depend it, but src_ls's tablet may has uncommitted data depend this tx_data
* so we must concern src_ls's tablet boundary to stop recycle tx_data
*/
if (!user_data.transfer_scn_.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("transfer_scn is invalid", K(ret), K(user_data));
} else {
min_end_scn = SCN::scn_dec(user_data.transfer_scn_);
}
} else {
ObITable *first_minor_mini_sstable =
table_store_wrapper.get_member()->get_minor_sstables().get_boundary_table(false /*is_last*/);

View File

@ -32,10 +32,12 @@ namespace mds
class BufferCtx
{
public:
BufferCtx() : binding_type_id_(INVALID_VALUE) {}
BufferCtx() : binding_type_id_(INVALID_VALUE),is_incomplete_replay_(false) {}
virtual ~BufferCtx() {}
void set_binding_type_id(const int64_t type_id) { binding_type_id_ = type_id; }
int64_t get_binding_type_id() const { return binding_type_id_; }
void set_incomplete_replay(const bool incomplete_replay) { is_incomplete_replay_ = incomplete_replay; }
bool is_incomplete_replay() const { return is_incomplete_replay_; }
// 允许用户重写的方法
virtual const MdsWriter get_writer() const = 0;
virtual void on_redo(const share::SCN &redo_scn) {}
@ -54,6 +56,7 @@ public:
virtual int64_t get_serialize_size(void) const = 0;
private:
int64_t binding_type_id_;
bool is_incomplete_replay_;
};
// 该结构嵌入事务上下文中,与多数据源的BufferNode一一对应,同事务状态一起持久化以及恢复
@ -89,4 +92,4 @@ private:
}
}
}
#endif
#endif

View File

@ -32,6 +32,7 @@
#include "src/storage/tablet/ob_tablet_start_transfer_mds_helper.h"
#include "src/storage/tablet/ob_tablet_finish_transfer_mds_helper.h"
#include "src/share/balance/ob_balance_task_table_operator.h"
#include "src/storage/tablet/ob_tablet_transfer_tx_ctx.h"
#endif
/**************************************************************************************************/
@ -100,6 +101,22 @@ _GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION_(HELPER_CLASS, BUFFER_CTX_TYPE, ID, ENU
::oceanbase::storage::mds::MdsCtx,\
24,\
TRANSFER_TASK)
GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION(::oceanbase::storage::ObTabletStartTransferOutPrepareHelper,\
::oceanbase::storage::mds::MdsCtx,\
25,\
START_TRANSFER_OUT_PREPARE)
GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION(::oceanbase::storage::ObTabletStartTransferOutV2Helper,\
::oceanbase::storage::ObTransferOutTxCtx,\
26,\
START_TRANSFER_OUT_V2)
GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION(::oceanbase::storage::ObStartTransferMoveTxHelper,\
::oceanbase::storage::ObTransferMoveTxCtx,\
27,\
TRANSFER_MOVE_TX_CTX)
GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION(::oceanbase::storage::ObStartTransferDestPrepareHelper,\
::oceanbase::storage::ObTransferDestPrepareTxCtx,\
28,\
TRANSFER_DEST_PREPARE)
#undef GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION
#endif
/**************************************************************************************************/

View File

@ -25,8 +25,8 @@ namespace mds
MdsCtx::MdsCtx() : state_(TwoPhaseCommitState::STATE_INIT) {}
MdsCtx::MdsCtx(const MdsWriter &writer)
: writer_(writer),
state_(TwoPhaseCommitState::STATE_INIT) {}
: state_(TwoPhaseCommitState::STATE_INIT),
writer_(writer){}
MdsCtx::~MdsCtx()
{

View File

@ -37,7 +37,7 @@ namespace storage
namespace mds
{
class MdsTableHandle;
class MdsCtx final : public BufferCtx
class MdsCtx : public BufferCtx
{
friend class MdsNode;
OB_UNIS_VERSION(1);
@ -112,12 +112,13 @@ private:
}
private:
List<MdsNode> write_list_;
MdsWriter writer_;
TwoPhaseCommitState state_;
MdsLock lock_;
protected: // for serialize in derived class
MdsWriter writer_;
};
OB_SERIALIZE_MEMBER_TEMP(inline, MdsCtx, writer_);
}
}
}
#endif
#endif

View File

@ -131,12 +131,16 @@ int MdsFactory::deep_copy_buffer_ctx(const transaction::ObTransID &trans_id,
return ret;
}
template <typename T, typename std::enable_if<std::is_same<T, MdsCtx>::value, bool>::type = true>
template <typename T, typename std::enable_if<std::is_base_of<MdsCtx, T>::value ||
std::is_same<T, ObTransferDestPrepareTxCtx>::value ||
std::is_same<T, ObTransferMoveTxCtx>::value, bool>::type = true>
void try_set_writer(T &ctx, const transaction::ObTransID &trans_id) {
ctx.set_writer(MdsWriter(trans_id));
}
template <typename T, typename std::enable_if<!std::is_same<T, MdsCtx>::value, bool>::type = true>
template <typename T, typename std::enable_if<!(std::is_base_of<MdsCtx, T>::value ||
std::is_same<T, ObTransferDestPrepareTxCtx>::value ||
std::is_same<T, ObTransferMoveTxCtx>::value), bool>::type = true>
void try_set_writer(T &ctx, const transaction::ObTransID &trans_id) {
// do nothing
}

View File

@ -866,6 +866,7 @@ bool ObGetTransferStartScnArg::is_valid() const
OB_SERIALIZE_MEMBER(ObGetTransferStartScnArg, tenant_id_, src_ls_id_, tablet_list_);
ObGetTransferStartScnRes::ObGetTransferStartScnRes()
: start_scn_()
{
@ -883,6 +884,26 @@ bool ObGetTransferStartScnRes::is_valid() const
OB_SERIALIZE_MEMBER(ObGetTransferStartScnRes, start_scn_);
ObStorageTransferCommonArg::ObStorageTransferCommonArg()
: tenant_id_(OB_INVALID_ID),
ls_id_()
{
}
void ObStorageTransferCommonArg::reset()
{
tenant_id_ = OB_INVALID_ID;
ls_id_.reset();
}
bool ObStorageTransferCommonArg::is_valid() const
{
return OB_INVALID_ID != tenant_id_
&& ls_id_.is_valid();
}
OB_SERIALIZE_MEMBER(ObStorageTransferCommonArg, tenant_id_, ls_id_);
ObTransferTabletInfoArg::ObTransferTabletInfoArg()
: tenant_id_(OB_INVALID_ID),
src_ls_id_(),
@ -3057,6 +3078,81 @@ int ObStorageFetchLSViewP::process()
return ret;
}
ObStorageSubmitTxLogP::ObStorageSubmitTxLogP(
common::ObInOutBandwidthThrottle *bandwidth_throttle)
: ObStorageStreamRpcP(bandwidth_throttle)
{
}
int ObStorageSubmitTxLogP::process()
{
int ret = OB_SUCCESS;
const uint64_t tenant_id = arg_.tenant_id_;
const share::ObLSID &ls_id = arg_.ls_id_;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
ObLS *ls = NULL;
transaction::ObTransID failed_tx_id;
SCN scn;
if (!arg_.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("get invalid args", K(ret), K_(arg));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("ls_srv->get_ls() fail", K(ret), K(ls_id));
} else if (OB_ISNULL(ls = ls_handle.get_ls())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls is NULL", KR(ret), K(ls_handle));
} else if (OB_FAIL(ls->get_tx_svr()->traverse_trans_to_submit_redo_log(failed_tx_id))) {
LOG_WARN("failed to submit tx log", K(ret), KPC(ls), K(failed_tx_id));
} else if (OB_FAIL(ls->get_log_handler()->get_max_scn(scn))) {
LOG_WARN("log_handler get_max_scn failed", K(ret), K(ls_id));
} else {
result_ = scn;
LOG_INFO("success to submit tx log", K(ret), K_(arg));
}
}
return ret;
}
ObStorageGetTransferDestPrepareSCNP::ObStorageGetTransferDestPrepareSCNP(
common::ObInOutBandwidthThrottle *bandwidth_throttle)
: ObStorageStreamRpcP(bandwidth_throttle)
{
}
int ObStorageGetTransferDestPrepareSCNP::process()
{
int ret = OB_SUCCESS;
const uint64_t tenant_id = arg_.tenant_id_;
const share::ObLSID &ls_id = arg_.ls_id_;
MTL_SWITCH(tenant_id) {
ObLSHandle ls_handle;
ObLS *ls = NULL;
bool enable = false;
SCN scn;
if (!arg_.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("get invalid args", K(ret), K_(arg));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("ls_srv->get_ls() fail", K(ret), K(ls_id));
} else if (OB_ISNULL(ls = ls_handle.get_ls())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls is NULL", KR(ret), K(ls_handle));
} else if (OB_FAIL(ls->get_transfer_status().get_transfer_prepare_status(enable, scn))) {
LOG_WARN("failed to get wrs handler transfer_prepare status", K(ret));
} else if (!enable) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("wrs handler not enter transfer_prepare status", K(ret), K_(arg));
} else {
result_ = scn;
LOG_INFO("success to get wrs handler transfer_dest_prepare_scn", K(ret), K_(arg), K(scn));
}
}
return ret;
}
ObStorageLockConfigChangeP::ObStorageLockConfigChangeP(
common::ObInOutBandwidthThrottle *bandwidth_throttle)
: ObStorageStreamRpcP(bandwidth_throttle)
@ -3518,6 +3614,69 @@ int ObStorageRpc::get_transfer_start_scn(
return ret;
}
int ObStorageRpc::submit_tx_log(
const uint64_t tenant_id,
const ObStorageHASrcInfo &src_info,
const share::ObLSID &ls_id,
SCN &data_end_scn)
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
STORAGE_LOG(WARN, "storage rpc is not inited", K(ret));
} else if (tenant_id == OB_INVALID_ID || !src_info.is_valid() || !ls_id.is_valid()) {
ret = OB_INVALID_ARGUMENT;
STORAGE_LOG(WARN, "invalid argument", K(ret), K(tenant_id), K(src_info), K(ls_id));
} else {
ObStorageTransferCommonArg arg;
arg.tenant_id_ = tenant_id;
arg.ls_id_ = ls_id;
SCN end_scn;
if (OB_FAIL(rpc_proxy_->to(src_info.src_addr_)
.by(tenant_id)
.dst_cluster_id(src_info.cluster_id_)
.group_id(share::OBCG_STORAGE_HA_LEVEL2)
.submit_tx_log(arg, end_scn))) {
LOG_WARN("failed to submit tx log", K(ret), K(src_info), K(arg));
} else {
data_end_scn = end_scn;
}
}
return ret;
}
int ObStorageRpc::get_transfer_dest_prepare_scn(
const uint64_t tenant_id,
const ObStorageHASrcInfo &src_info,
const share::ObLSID &ls_id,
SCN &scn)
{
int ret = OB_SUCCESS;
if (!is_inited_) {
ret = OB_NOT_INIT;
STORAGE_LOG(WARN, "storage rpc is not inited", K(ret));
} else if (tenant_id == OB_INVALID_ID || !src_info.is_valid() || !ls_id.is_valid()) {
ret = OB_INVALID_ARGUMENT;
STORAGE_LOG(WARN, "invalid argument", K(ret), K(tenant_id), K(src_info), K(ls_id));
} else {
ObStorageTransferCommonArg arg;
arg.tenant_id_ = tenant_id;
arg.ls_id_ = ls_id;
SCN ret_scn;
if (OB_FAIL(rpc_proxy_->to(src_info.src_addr_)
.by(tenant_id)
.dst_cluster_id(src_info.cluster_id_)
.group_id(share::OBCG_STORAGE_HA_LEVEL2)
.get_transfer_dest_prepare_scn(arg, ret_scn))) {
LOG_WARN("failed to get transfer_dest_prepare_scn", K(ret), K(src_info), K(arg));
} else {
scn = ret_scn;
}
}
return ret;
}
int ObStorageRpc::lock_config_change(
const uint64_t tenant_id,
const ObStorageHASrcInfo &src_info,

View File

@ -664,6 +664,35 @@ private:
DISALLOW_COPY_AND_ASSIGN(ObCheckStartTransferTabletsRes);
};
struct ObStorageBlockTxArg final
{
OB_UNIS_VERSION(1);
public:
ObStorageBlockTxArg();
~ObStorageBlockTxArg() {}
bool is_valid() const;
void reset();
TO_STRING_KV(K_(tenant_id), K_(ls_id), K_(gts));
uint64_t tenant_id_;
share::ObLSID ls_id_;
share::SCN gts_;
};
struct ObStorageTransferCommonArg final
{
OB_UNIS_VERSION(1);
public:
ObStorageTransferCommonArg();
~ObStorageTransferCommonArg() {}
bool is_valid() const;
void reset();
TO_STRING_KV(K_(tenant_id), K_(ls_id));
uint64_t tenant_id_;
share::ObLSID ls_id_;
};
struct ObStorageKillTxArg final
{
OB_UNIS_VERSION(1);
@ -754,6 +783,8 @@ public:
RPC_S(PR5 update_ls_meta, OB_HA_UPDATE_LS_META, (ObRestoreUpdateLSMetaArg));
RPC_S(PR5 get_ls_active_trans_count, OB_GET_LS_ACTIVE_TRANSACTION_COUNT, (ObGetLSActiveTransCountArg), ObGetLSActiveTransCountRes);
RPC_S(PR5 get_transfer_start_scn, OB_GET_TRANSFER_START_SCN, (ObGetTransferStartScnArg), ObGetTransferStartScnRes);
RPC_S(PR5 submit_tx_log, OB_HA_SUBMIT_TX_LOG, (ObStorageTransferCommonArg), share::SCN);
RPC_S(PR5 get_transfer_dest_prepare_scn, OB_HA_GET_TRANSFER_DEST_PREPARE_SCN, (ObStorageTransferCommonArg), share::SCN);
RPC_S(PR5 lock_config_change, OB_HA_LOCK_CONFIG_CHANGE, (ObStorageConfigChangeOpArg), ObStorageConfigChangeOpRes);
RPC_S(PR5 unlock_config_change, OB_HA_UNLOCK_CONFIG_CHANGE, (ObStorageConfigChangeOpArg), ObStorageConfigChangeOpRes);
RPC_S(PR5 get_config_change_lock_stat, OB_HA_GET_CONFIG_CHANGE_LOCK_STAT, (ObStorageConfigChangeOpArg), ObStorageConfigChangeOpRes);
@ -1060,6 +1091,28 @@ protected:
int64_t max_tablet_num_;
};
class ObStorageSubmitTxLogP:
public ObStorageStreamRpcP<OB_HA_SUBMIT_TX_LOG>
{
public:
explicit ObStorageSubmitTxLogP(common::ObInOutBandwidthThrottle *bandwidth_throttle);
virtual ~ObStorageSubmitTxLogP() {}
protected:
int process();
private:
};
class ObStorageGetTransferDestPrepareSCNP:
public ObStorageStreamRpcP<OB_HA_GET_TRANSFER_DEST_PREPARE_SCN>
{
public:
explicit ObStorageGetTransferDestPrepareSCNP(common::ObInOutBandwidthThrottle *bandwidth_throttle);
virtual ~ObStorageGetTransferDestPrepareSCNP() {}
protected:
int process();
private:
};
class ObStorageLockConfigChangeP:
public ObStorageStreamRpcP<OB_HA_LOCK_CONFIG_CHANGE>
{
@ -1170,6 +1223,19 @@ public:
const share::ObLSID &ls_id,
const common::ObIArray<share::ObTransferTabletInfo> &tablet_list,
share::SCN &transfer_start_scn) = 0;
virtual int submit_tx_log(
const uint64_t tenant_id,
const ObStorageHASrcInfo &src_info,
const share::ObLSID &ls_id,
SCN &data_end_scn) = 0;
virtual int get_transfer_dest_prepare_scn(
const uint64_t tenant_id,
const ObStorageHASrcInfo &src_info,
const share::ObLSID &ls_id,
SCN &scn) = 0;
virtual int lock_config_change(
const uint64_t tenant_id,
const ObStorageHASrcInfo &src_info,
@ -1258,6 +1324,19 @@ public:
const share::ObLSID &ls_id,
const common::ObIArray<share::ObTransferTabletInfo> &tablet_list,
share::SCN &transfer_start_scn);
virtual int submit_tx_log(
const uint64_t tenant_id,
const ObStorageHASrcInfo &src_info,
const share::ObLSID &ls_id,
SCN &data_end_scn);
virtual int get_transfer_dest_prepare_scn(
const uint64_t tenant_id,
const ObStorageHASrcInfo &src_info,
const share::ObLSID &ls_id,
SCN &scn);
virtual int lock_config_change(
const uint64_t tenant_id,
const ObStorageHASrcInfo &src_info,

View File

@ -30,6 +30,8 @@
#include "storage/tx/ob_trans_define.h"
#include "storage/tx/ob_trans_part_ctx.h"
#include "storage/compaction/ob_schedule_dag_func.h"
#include "storage/tx_storage/ob_ls_service.h"
#include "storage/tablet/ob_tablet.h"
namespace oceanbase
{
@ -52,6 +54,7 @@ ObLockMemtable::ObLockMemtable()
pre_rec_scn_(SCN::max_scn()),
max_committed_scn_(),
is_frozen_(false),
need_check_tablet_status_(false),
freezer_(nullptr),
flush_lock_(common::ObLatchIds::CLOG_CKPT_LOCK)
{
@ -105,6 +108,7 @@ void ObLockMemtable::reset()
freeze_scn_.reset();
flushed_scn_.reset();
is_frozen_ = false;
need_check_tablet_status_ = false;
freezer_ = nullptr;
is_inited_ = false;
}
@ -156,6 +160,8 @@ int ObLockMemtable::lock_(
LOG_WARN("lock timeout", K(ret), K(lock_op), K(param));
} else if (OB_FAIL(guard.write_auth(ctx))) {
LOG_WARN("not allow lock table.", K(ret), K(ctx));
} else if (OB_FAIL(check_tablet_write_allow_(lock_op))) {
LOG_WARN("check tablet write allow failed", K(ret), K(lock_op));
} else if (FALSE_IT(mem_ctx = static_cast<ObMemtableCtx *>(ctx.mvcc_acc_ctx_.mem_ctx_))) {
} else if (OB_FAIL(mem_ctx->check_lock_exist(lock_op.lock_id_,
lock_op.owner_id_,
@ -262,6 +268,44 @@ int ObLockMemtable::lock_(
return ret;
}
int ObLockMemtable::check_tablet_write_allow_(const ObTableLockOp &lock_op)
{
int ret = OB_SUCCESS;
ObTabletID tablet_id;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
ObTabletHandle tablet_handle;
ObTabletStatus::Status tablet_status = ObTabletStatus::MAX;
ObTabletCreateDeleteMdsUserData data;
bool is_commited = false;
if (!need_check_tablet_status_) {
} else if (!lock_op.lock_id_.is_tablet_lock()) {
} else if (OB_FAIL(lock_op.lock_id_.convert_to(tablet_id))) {
LOG_WARN("convert lock id to tablet_id failed", K(ret), K(lock_op));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id_, ls_handle, ObLSGetMod::TABLELOCK_MOD))) {
LOG_WARN("failed to get ls", K(ret), K(ls_id_));
} else if (OB_ISNULL(ls = ls_handle.get_ls())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", K(ret), KP(ls));
} else if (OB_FAIL(ls->get_tablet(tablet_id,
tablet_handle,
0,
ObMDSGetTabletMode::READ_WITHOUT_CHECK))) {
LOG_WARN("get tablet with timeout failed", K(ret), K(ls->get_ls_id()), K(tablet_id));
} else if (OB_FAIL(tablet_handle.get_obj()->ObITabletMdsInterface::get_latest_tablet_status(
data, is_commited))) {
LOG_WARN("failed to get CreateDeleteMdsUserData", KR(ret));
} else if (FALSE_IT(tablet_status = data.get_tablet_status())) {
} else if (is_commited && (ObTabletStatus::NORMAL == tablet_status
|| ObTabletStatus::TRANSFER_IN == tablet_status)) {
// allow
} else {
ret = OB_TABLET_NOT_EXIST;
LOG_INFO("tablet status not allow", KR(ret), K(tablet_id), K(is_commited), K(data));
}
return ret;
}
int ObLockMemtable::unlock_(
ObStoreCtx &ctx,
const ObTableLockOp &unlock_op,
@ -292,6 +336,8 @@ int ObLockMemtable::unlock_(
LOG_WARN("unlock timeout", K(ret), K(unlock_op), K(expired_time));
} else if (OB_FAIL(guard.write_auth(ctx))) {
LOG_WARN("not allow unlock table.", K(ret), K(ctx));
} else if (OB_FAIL(check_tablet_write_allow_(unlock_op))) {
LOG_WARN("check tablet write allow failed", K(ret), K(unlock_op));
} else if (FALSE_IT(mem_ctx = static_cast<ObMemtableCtx *>(ctx.mvcc_acc_ctx_.mem_ctx_))) {
// check whether the unlock op exist already
} else if (OB_FAIL(mem_ctx->check_lock_exist(unlock_op.lock_id_,

View File

@ -167,6 +167,8 @@ public:
void set_flushed_scn(const share::SCN &flushed_scn) { flushed_scn_ = flushed_scn; }
void enable_check_tablet_status(const bool need_check) { ATOMIC_STORE(&need_check_tablet_status_, need_check); }
INHERIT_TO_STRING_KV("ObITable", ObITable, KP(this), K_(snapshot_version), K_(ls_id));
private:
enum ObLockStep {
@ -198,6 +200,8 @@ private:
int register_into_deadlock_detector_(const ObStoreCtx &ctx,
const ObTableLockOp &lock_op);
int unregister_from_deadlock_detector_(const ObTableLockOp &lock_op);
int check_tablet_write_allow_(const ObTableLockOp &lock_op);
private:
typedef common::SpinRWLock RWLock;
typedef common::SpinRLockGuard RLockGuard;
@ -217,6 +221,8 @@ private:
share::SCN pre_rec_scn_;
share::SCN max_committed_scn_;
bool is_frozen_;
// for tablet transfer enable check tablet status
bool need_check_tablet_status_;
storage::ObFreezer *freezer_;
RWLock flush_lock_; // lock before change ts

View File

@ -732,6 +732,27 @@ int ObLockTable::switch_to_leader()
return ret;
}
int ObLockTable::enable_check_tablet_status(const bool need_check)
{
int ret = OB_SUCCESS;
ObTableHandleV2 handle;
ObLockMemtable *lock_memtable = nullptr;
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("ObLockTable is not inited", K(ret));
} else if (OB_FAIL(get_lock_memtable(handle))) {
LOG_WARN("get lock memtable failed", K(ret));
// to disable check just skip when no active memtable
if (!need_check && OB_ENTRY_NOT_EXIST == ret) {
ret = OB_SUCCESS;
}
} else if (OB_FAIL(handle.get_lock_memtable(lock_memtable))) {
LOG_WARN("get lock memtable from lock handle failed", K(ret));
} else if (FALSE_IT(lock_memtable->enable_check_tablet_status(need_check))) {
}
return ret;
}
} // tablelock
} // transaction
} // oceanbase

View File

@ -150,6 +150,8 @@ public:
int switch_to_follower_gracefully() override { return OB_SUCCESS; }
int resume_leader() override { return OB_SUCCESS; }
int enable_check_tablet_status(const bool need_check);
private:
// We use the method to recover the lock_table for reboot.
int restore_lock_table_(storage::ObITable &sstable);

View File

@ -187,6 +187,38 @@ int ObLockMemCtx::get_table_lock_store_info(ObTableLockInfo &table_lock_info)
return ret;
}
int ObLockMemCtx::get_table_lock_for_transfer(ObTableLockInfo &table_lock_info, const ObIArray<ObTabletID> &tablet_list)
{
int ret = OB_SUCCESS;
RDLockGuard guard(list_rwlock_);
DLIST_FOREACH(curr, lock_list_) {
if (OB_UNLIKELY(!curr->is_valid())) {
// no need dump to avoid been restored even if rollback
LOG_WARN("the table lock op no should not dump", K(curr->lock_op_));
} else {
bool is_hit = false;
for (int64_t idx = 0; OB_SUCC(ret) && idx < tablet_list.count(); idx++) {
if (curr->lock_op_.is_tablet_lock(tablet_list.at(idx))) {
is_hit = true;
break;
}
}
if (OB_FAIL(ret)) {
} else if (!is_hit) {
} else if (!curr->is_logged()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("lock op is not logged", KR(ret), K(curr));
break;
} else if (OB_FAIL(table_lock_info.table_lock_ops_.push_back(curr->lock_op_))) {
LOG_WARN("fail to push back table_lock store info", K(ret));
break;
}
}
}
table_lock_info.max_durable_scn_ = max_durable_scn_;
return ret;
}
int ObLockMemCtx::clear_table_lock(
const bool is_committed,
const SCN &commit_version,

View File

@ -106,6 +106,7 @@ public:
void *alloc_lock_op_callback();
void free_lock_op_callback(void *cb);
int get_table_lock_store_info(ObTableLockInfo &table_lock_info);
int get_table_lock_for_transfer(ObTableLockInfo &table_lock_info, const ObIArray<ObTabletID> &tablet_list);
// used by deadlock detector to kill the trans.
void set_killed()
{ is_killed_ = true; }

View File

@ -469,6 +469,10 @@ public:
is_in_trans_common_lock_op_type(op_type_));
}
bool need_replay_or_recover(const ObTableLockOp &lock_op) const;
bool is_tablet_lock(const ObTabletID &tablet_id) {
return lock_id_.is_tablet_lock() && lock_id_.obj_id_ == tablet_id.id();
}
private:
bool is_need_record_lock_mode_() const
{

View File

@ -88,6 +88,7 @@ void ObTabletCreateDeleteMdsUserData::on_redo(const share::SCN &redo_scn)
case ObTabletMdsUserDataType::NONE :
case ObTabletMdsUserDataType::CREATE_TABLET :
case ObTabletMdsUserDataType::REMOVE_TABLET :
case ObTabletMdsUserDataType::START_TRANSFER_OUT_PREPARE:
case ObTabletMdsUserDataType::START_TRANSFER_IN :
case ObTabletMdsUserDataType::FINISH_TRANSFER_OUT : {
break;
@ -124,6 +125,7 @@ void ObTabletCreateDeleteMdsUserData::on_commit(const share::SCN &commit_version
int ret = OB_SUCCESS;
switch (data_type_) {
case ObTabletMdsUserDataType::NONE :
case ObTabletMdsUserDataType::START_TRANSFER_OUT_PREPARE:
case ObTabletMdsUserDataType::FINISH_TRANSFER_IN : {
break;
}

View File

@ -40,6 +40,9 @@ enum class ObTabletMdsUserDataType : int64_t
FINISH_TRANSFER_OUT = 5,
// for finish transfer in
FINISH_TRANSFER_IN = 6,
// for start tranfer out prepare
START_TRANSFER_OUT_PREPARE = 7,
MAX_TYPE,
};

View File

@ -30,12 +30,14 @@
#include "storage/high_availability/ob_transfer_service.h"
#include "storage/high_availability/ob_rebuild_service.h"
#include "storage/high_availability/ob_storage_ha_utils.h"
#include "storage/tx/ob_multi_data_source.h"
#define USING_LOG_PREFIX MDS
namespace oceanbase
{
namespace storage
{
using namespace oceanbase::transaction;
/******************ObTabletStartTransferOutReplayExecutor*********************/
class ObTabletStartTransferOutReplayExecutor final : public logservice::ObTabletReplayExecutor
@ -49,7 +51,8 @@ public:
const share::ObLSID &src_ls_id,
const share::ObLSID &dest_ls_id,
const share::ObTransferTabletInfo &tablet_info,
mds::BufferCtx &buffer_ctx);
mds::BufferCtx &buffer_ctx,
ObTxDataSourceType mds_op_type);
protected:
virtual bool is_replay_update_tablet_status_() const override
{
@ -72,6 +75,7 @@ private:
share::ObLSID dest_ls_id_;
share::ObTransferTabletInfo tablet_info_;
mds::BufferCtx *buffer_ctx_;
ObTxDataSourceType mds_op_type_;
DISALLOW_COPY_AND_ASSIGN(ObTabletStartTransferOutReplayExecutor);
};
@ -94,7 +98,8 @@ int ObTabletStartTransferOutReplayExecutor::init(
const share::ObLSID &src_ls_id,
const share::ObLSID &dest_ls_id,
const share::ObTransferTabletInfo &tablet_info,
mds::BufferCtx &buffer_ctx)
mds::BufferCtx &buffer_ctx,
ObTxDataSourceType mds_op_type)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(is_inited_)) {
@ -113,6 +118,7 @@ int ObTabletStartTransferOutReplayExecutor::init(
buffer_ctx_ = &buffer_ctx;
tablet_info_ = tablet_info;
scn_ = scn;
mds_op_type_ = mds_op_type;
is_inited_ = true;
}
return ret;
@ -137,7 +143,11 @@ int ObTabletStartTransferOutReplayExecutor::do_replay_(ObTabletHandle &tablet_ha
LOG_WARN("failed to get tx data", K(ret), KPC(tablet), K(tablet_info_));
} else {
user_data.transfer_ls_id_ = dest_ls_id_;
user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT;
if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_PREPARE) {
user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT_PREPARE;
} else {
user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT;
}
user_data.tablet_status_ = ObTabletStatus::TRANSFER_OUT;
user_data.transfer_scn_.set_min();
//user_data.transfer_scn_ will be update in user data on_redo
@ -159,6 +169,7 @@ int ObTabletStartTransferOutReplayExecutor::check_src_transfer_tablet_(
{
int ret = OB_SUCCESS;
ObTablet *tablet = nullptr;
bool is_committed = true;
ObTabletCreateDeleteMdsUserData user_data;
if (!is_inited_) {
ret = OB_NOT_INIT;
@ -166,11 +177,23 @@ int ObTabletStartTransferOutReplayExecutor::check_src_transfer_tablet_(
} else if (OB_ISNULL(tablet = tablet_handle.get_obj())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet should not be NULL", K(ret), KP(tablet), K(tablet_info_), K(src_ls_id_), K(dest_ls_id_));
} else if (OB_FAIL(tablet->ObITabletMdsInterface::get_tablet_status(share::SCN::max_scn(), user_data, ObTabletCommon::DEFAULT_GET_TABLET_DURATION_US))) {
} else if (OB_FAIL(tablet->ObITabletMdsInterface::get_latest_tablet_status(user_data, is_committed))) {
LOG_WARN("failed to get tx data", K(ret), KPC(tablet), K(tablet_info_));
} else if (scn_ <= tablet->get_tablet_meta().mds_checkpoint_scn_) {
LOG_INFO("skip replay", K(ret), K_(scn), K(tablet->get_tablet_meta()));
} else if (ObTabletStatus::NORMAL != user_data.tablet_status_) {
} else if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT && (
ObTabletStatus::NORMAL != user_data.tablet_status_ ||
!is_committed)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet status is unexpected", K(ret), KPC(tablet), K(tablet_info_), K(user_data));
} else if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_PREPARE && (
ObTabletStatus::NORMAL != user_data.tablet_status_ ||
!is_committed)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet status is unexpected", K(ret), KPC(tablet), K(tablet_info_), K(user_data));
} else if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_V2 && (
ObTabletStatus::TRANSFER_OUT != user_data.tablet_status_ ||
is_committed)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet status is unexpected", K(ret), KPC(tablet), K(tablet_info_), K(user_data));
} else if (tablet_info_.transfer_seq_ != tablet->get_tablet_meta().transfer_info_.transfer_seq_) {
@ -228,6 +251,8 @@ int ObTabletStartTransferOutHelper::on_register_success_(
"tablet_count", tx_start_transfer_out_info.tablet_list_.count());
#endif
ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT;
ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type);
if (!tx_start_transfer_out_info.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("on_register_ get invalid argument", K(ret), K(tx_start_transfer_out_info));
@ -241,7 +266,7 @@ int ObTabletStartTransferOutHelper::on_register_success_(
LOG_WARN("ls should not be NULL", KR(ret), K(tx_start_transfer_out_info), KP(ls));
} else if (CLICK_FAIL(prepare_src_transfer_tablets_(tx_start_transfer_out_info , ls))) {
LOG_WARN("failed to prepare src transfer tablets", K(ret), K(tx_start_transfer_out_info), KPC(ls));
} else if (CLICK_FAIL(update_tablets_transfer_out_(tx_start_transfer_out_info, ls, ctx))) {
} else if (CLICK_FAIL(transfer_out_helper.update_tablets_transfer_out_(tx_start_transfer_out_info, ls, ctx))) {
LOG_WARN("failed to update tables transfer out", K(ret), K(tx_start_transfer_out_info), KPC(ls));
}
@ -325,7 +350,7 @@ int ObTabletStartTransferOutHelper::check_src_transfer_tablet_(
return ret;
}
int ObTabletStartTransferOutHelper::update_tablets_transfer_out_(
int ObTabletStartTransferOutCommonHelper::update_tablets_transfer_out_(
const ObTXStartTransferOutInfo &tx_start_transfer_out_info,
ObLS *ls,
mds::BufferCtx &ctx)
@ -350,7 +375,7 @@ int ObTabletStartTransferOutHelper::update_tablets_transfer_out_(
return ret;
}
int ObTabletStartTransferOutHelper::update_tablet_transfer_out_(
int ObTabletStartTransferOutCommonHelper::update_tablet_transfer_out_(
const share::ObLSID &dest_ls_id,
const share::ObTransferTabletInfo &tablet_info,
ObLS *ls,
@ -361,26 +386,45 @@ int ObTabletStartTransferOutHelper::update_tablet_transfer_out_(
ObTabletHandle tablet_handle;
ObTablet *tablet = nullptr;
ObTabletCreateDeleteMdsUserData user_data;
bool is_committed = true;
if (!tablet_info.is_valid() || OB_ISNULL(ls)) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("check src transfer tablets get invalid argument", K(ret), K(tablet_info), KP(ls));
} else if (mds_op_type_ != ObTxDataSourceType::START_TRANSFER_OUT &&
mds_op_type_ != ObTxDataSourceType::START_TRANSFER_OUT_PREPARE &&
mds_op_type_ != ObTxDataSourceType::START_TRANSFER_OUT_V2) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected mds op type", K(ret), K(mds_op_type_));
} else if (CLICK_FAIL(ls->get_tablet(tablet_info.tablet_id_, tablet_handle, 0,
ObMDSGetTabletMode::READ_WITHOUT_CHECK))) {
LOG_WARN("failed to get tablet", K(ret), K(tablet_info));
} else if (OB_ISNULL(tablet = tablet_handle.get_obj())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet should not be NULL", K(ret), K(tablet_info));
} else if (CLICK_FAIL(tablet->ObITabletMdsInterface::get_tablet_status(share::SCN::max_scn(), user_data, ObTabletCommon::DEFAULT_GET_TABLET_DURATION_US))) {
} else if (CLICK_FAIL(tablet->ObITabletMdsInterface::get_latest_tablet_status(user_data, is_committed))) {
LOG_WARN("failed to get tx data", K(ret), KPC(tablet), K(tablet_info));
} else if (ObTabletStatus::NORMAL != user_data.tablet_status_
|| tablet->get_tablet_meta().transfer_info_.transfer_seq_ != tablet_info.transfer_seq_) {
} else if ((mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT || mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_PREPARE) && (
ObTabletStatus::NORMAL != user_data.tablet_status_ ||
tablet->get_tablet_meta().transfer_info_.transfer_seq_ != tablet_info.transfer_seq_ ||
!is_committed)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet user data is unexpected", K(ret), KPC(tablet), K(tablet_info), K(user_data));
LOG_WARN("tablet user data is unexpected", K(ret), K(mds_op_type_),KPC(tablet), K(tablet_info), K(user_data));
} else if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_V2 && (
ObTabletStatus::TRANSFER_OUT != user_data.tablet_status_ ||
tablet->get_tablet_meta().transfer_info_.transfer_seq_ != tablet_info.transfer_seq_ ||
is_committed ||
ObTabletMdsUserDataType::START_TRANSFER_OUT_PREPARE != user_data.data_type_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet user data is unexpected", K(ret), K(mds_op_type_), KPC(tablet), K(tablet_info), K(user_data));
} else {
mds::MdsCtx &user_ctx = static_cast<mds::MdsCtx&>(ctx);
user_data.transfer_ls_id_ = dest_ls_id;
user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT;
if (mds_op_type_ == ObTxDataSourceType::START_TRANSFER_OUT_PREPARE) {
user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT_PREPARE;
} else {
user_data.data_type_ = ObTabletMdsUserDataType::START_TRANSFER_OUT;
}
user_data.tablet_status_ = ObTabletStatus::TRANSFER_OUT;
user_data.transfer_scn_.set_min();
//user_data.transfer_scn_ will be update in user data on_redo
@ -412,6 +456,8 @@ int ObTabletStartTransferOutHelper::on_replay(
ObTXStartTransferOutInfo tx_start_transfer_out_info;
int64_t pos = 0;
const bool for_replay = true;
ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT;
ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type);
ObTransferUtils::set_transfer_module();
if (OB_ISNULL(buf) || len < 0 || !scn.is_valid()) {
@ -443,7 +489,7 @@ int ObTabletStartTransferOutHelper::on_replay(
"scn", scn);
#endif
DEBUG_SYNC(BEFORE_ON_REDO_START_TRANSFER_OUT);
if (CLICK() && FAILEDx(on_replay_success_(scn, tx_start_transfer_out_info, ctx))) {
if (CLICK() && FAILEDx(transfer_out_helper.on_replay_success_(scn, tx_start_transfer_out_info, ctx))) {
LOG_WARN("failed to on register_success_", K(ret), K(scn), K(tx_start_transfer_out_info));
}
#ifdef ERRSIM
@ -459,7 +505,7 @@ int ObTabletStartTransferOutHelper::on_replay(
return ret;
}
int ObTabletStartTransferOutHelper::try_enable_dest_ls_clog_replay(
int ObTabletStartTransferOutCommonHelper::try_enable_dest_ls_clog_replay(
const share::SCN &scn,
const share::ObLSID &dest_ls_id)
{
@ -521,7 +567,7 @@ int ObTabletStartTransferOutHelper::try_enable_dest_ls_clog_replay(
return ret;
}
int ObTabletStartTransferOutHelper::set_transfer_tablets_freeze_flag_(const ObTXStartTransferOutInfo &tx_start_transfer_out_info)
int ObTabletStartTransferOutCommonHelper::set_transfer_tablets_freeze_flag_(const ObTXStartTransferOutInfo &tx_start_transfer_out_info)
{
int ret = OB_SUCCESS;
ObLSService *ls_service = nullptr;
@ -561,7 +607,7 @@ int ObTabletStartTransferOutHelper::set_transfer_tablets_freeze_flag_(const ObTX
return ret;
}
int ObTabletStartTransferOutHelper::on_replay_success_(
int ObTabletStartTransferOutCommonHelper::on_replay_success_(
const share::SCN &scn,
const ObTXStartTransferOutInfo &tx_start_transfer_out_info,
mds::BufferCtx &ctx)
@ -585,16 +631,16 @@ int ObTabletStartTransferOutHelper::on_replay_success_(
if (!scn.is_valid() || !tx_start_transfer_out_info.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("on_replay_success_ get invalid argument", K(ret), K(scn), K(tx_start_transfer_out_info));
} else if (CLICK_FAIL(try_enable_dest_ls_clog_replay(scn, tx_start_transfer_out_info.dest_ls_id_))) {
} else if (mds_op_type_ != ObTxDataSourceType::START_TRANSFER_OUT_PREPARE && CLICK_FAIL(try_enable_dest_ls_clog_replay(scn, tx_start_transfer_out_info.dest_ls_id_))) {
LOG_WARN("failed to try enable dest ls clog replay", K(ret), K(scn), K(tx_start_transfer_out_info));
} else if (CLICK_FAIL(set_transfer_tablets_freeze_flag_(tx_start_transfer_out_info))) {
} else if (mds_op_type_ != ObTxDataSourceType::START_TRANSFER_OUT_PREPARE && CLICK_FAIL(set_transfer_tablets_freeze_flag_(tx_start_transfer_out_info))) {
LOG_WARN("failed to set transfer src tablets freeze flag", K(ret), K(scn), K(tx_start_transfer_out_info));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < tx_start_transfer_out_info.tablet_list_.count(); ++i) {
MDS_TG(10_ms);
const share::ObTransferTabletInfo &tablet_info = tx_start_transfer_out_info.tablet_list_.at(i);
ObTabletStartTransferOutReplayExecutor executor;
if (CLICK_FAIL(executor.init(scn, tx_start_transfer_out_info.src_ls_id_, tx_start_transfer_out_info.dest_ls_id_, tablet_info, ctx))) {
if (CLICK_FAIL(executor.init(scn, tx_start_transfer_out_info.src_ls_id_, tx_start_transfer_out_info.dest_ls_id_, tablet_info, ctx, mds_op_type_))) {
LOG_WARN("failed to init tablet start transfer out replay executor", K(ret), K(scn), K(tx_start_transfer_out_info), K(tablet_info));
} else if (CLICK_FAIL(executor.execute(scn, tx_start_transfer_out_info.src_ls_id_, tablet_info.tablet_id_))) {
LOG_WARN("failed to execute start transfer out replay", K(ret), K(scn), K(tx_start_transfer_out_info), K(tablet_info));
@ -611,6 +657,173 @@ int ObTabletStartTransferOutHelper::on_replay_success_(
return ret;
}
int ObTabletStartTransferOutPrepareHelper::on_register(
const char *buf,
const int64_t len,
mds::BufferCtx &ctx)
{
MDS_TG(1_s);
int ret = OB_SUCCESS;
ObTXStartTransferOutInfo tx_start_transfer_out_info;
int64_t pos = 0;
const bool for_replay = false;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT_PREPARE;
ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type);
if (OB_ISNULL(buf) || len < 0) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("on register start transfer out get invalid argument", K(ret), KP(buf), K(len));
} else if (CLICK_FAIL(tx_start_transfer_out_info.deserialize(buf, len, pos))) {
LOG_WARN("failed to deserialize tx start transfer out info", K(ret), K(len), K(pos));
} else if (!tx_start_transfer_out_info.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tx start transfer out info is unexpected", K(ret), K(tx_start_transfer_out_info));
} else if (CLICK_FAIL(MTL(ObLSService *)->get_ls(tx_start_transfer_out_info.src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("fail to get ls", KR(ret), K(tx_start_transfer_out_info));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), K(tx_start_transfer_out_info), KP(ls));
} else if (CLICK_FAIL(transfer_out_helper.update_tablets_transfer_out_(tx_start_transfer_out_info, ls, ctx))) {
LOG_WARN("failed to update tables transfer out", K(ret), K(tx_start_transfer_out_info), KPC(ls));
}
return ret;
}
int ObTabletStartTransferOutPrepareHelper::on_replay(
const char* buf,
const int64_t len,
const share::SCN &scn,
mds::BufferCtx &ctx)
{
MDS_TG(1_s);
int ret = OB_SUCCESS;
ObTXStartTransferOutInfo tx_start_transfer_out_info;
int64_t pos = 0;
const bool for_replay = true;
ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT_PREPARE;
ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type);
if (OB_ISNULL(buf) || len < 0 || !scn.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("on replay start transfer out get invalid argument", K(ret), KP(buf), K(len), K(scn));
} else if (CLICK_FAIL(tx_start_transfer_out_info.deserialize(buf, len, pos))) {
LOG_WARN("failed to deserialize tx start transfer out info", K(ret), K(len), K(pos));
} else if (!tx_start_transfer_out_info.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tx start transfer out info is unexpected", K(ret), K(tx_start_transfer_out_info));
}
if (CLICK() && FAILEDx(transfer_out_helper.on_replay_success_(scn, tx_start_transfer_out_info, ctx))) {
LOG_WARN("failed to on register_success_", K(ret), K(scn), K(tx_start_transfer_out_info));
}
return ret;
}
/******************ObTabletStartTransferOutTxHelper*********************/
int ObTabletStartTransferOutV2Helper::on_register(
const char *buf,
const int64_t len,
mds::BufferCtx &ctx)
{
MDS_TG(1_s);
int ret = OB_SUCCESS;
ObTXStartTransferOutInfo info;
int64_t pos = 0;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
int64_t active_tx_count = 0;
int64_t block_tx_count = 0;
SCN op_scn;
int64_t start_time = ObTimeUtility::current_time();
mds::MdsCtx &user_ctx = static_cast<mds::MdsCtx&>(ctx);
ObTransferOutTxCtx &transfer_tx_ctx = static_cast<ObTransferOutTxCtx&>(ctx);
ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT_V2;
ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type);
bool start_modify = false;
if (OB_ISNULL(buf) || len < 0) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("on register start transfer out tx get invalid argument", KR(ret), KP(buf), K(len));
} else if (CLICK_FAIL(info.deserialize(buf, len, pos))) {
LOG_WARN("failed to deserialize tx start transfer out tx info", KR(ret), K(len), K(pos));
} else if (!info.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tx start transfer out tx info is unexpected", KR(ret), K(info));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("fail to get ls", KR(ret), K(info));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), K(info), KP(ls));
} else if (OB_FAIL(transfer_tx_ctx.record_transfer_block_op(info.src_ls_id_, info.dest_ls_id_, info.data_end_scn_, info.transfer_epoch_, false))) {
LOG_WARN("record transfer block op failed", KR(ret), K(info));
} else if (FALSE_IT(start_modify = true)) {
} else if (OB_FAIL(ls->transfer_out_tx_op(user_ctx.get_writer().writer_id_, info.data_end_scn_, op_scn,
NotifyType::REGISTER_SUCC, false, info.dest_ls_id_, info.transfer_epoch_, active_tx_count, block_tx_count))) {
LOG_WARN("transfer block tx failed", KR(ret), K(info));
} else if (OB_FAIL(transfer_out_helper.update_tablets_transfer_out_(info, ls, ctx))) {
LOG_WARN("update tablets transfer out failed", KR(ret), K(info), KP(ls));
} else {
int64_t end_time = ObTimeUtility::current_time();
LOG_INFO("[TRANSFER] start transfer out tx register succ", K(info), "cost", end_time - start_time,
K(active_tx_count), K(block_tx_count));
}
if (OB_FAIL(ret)) {
// to clean
int tmp_ret = OB_SUCCESS;
if (start_modify && OB_TMP_FAIL(ls->transfer_out_tx_op(user_ctx.get_writer().writer_id_, info.data_end_scn_, op_scn,
NotifyType::ON_ABORT, false, info.dest_ls_id_, info.transfer_epoch_, active_tx_count, block_tx_count))) {
LOG_ERROR("transfer out clean failed", K(tmp_ret), K(info), K(user_ctx.get_writer().writer_id_));
}
}
return ret;
}
int ObTabletStartTransferOutV2Helper::on_replay(const char *buf,
const int64_t len,
const share::SCN &scn,
mds::BufferCtx &ctx)
{
MDS_TG(1_s);
int ret = OB_SUCCESS;
ObTXStartTransferOutInfo info;
int64_t pos = 0;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
int64_t active_tx_count = 0;
int64_t block_tx_count = 0;
mds::MdsCtx &user_ctx = static_cast<mds::MdsCtx&>(ctx);
ObTransferOutTxCtx &transfer_tx_ctx = static_cast<ObTransferOutTxCtx&>(ctx);
ObTxDataSourceType mds_op_type = ObTxDataSourceType::START_TRANSFER_OUT_V2;
ObTabletStartTransferOutCommonHelper transfer_out_helper(mds_op_type);
if (OB_ISNULL(buf) || len < 0) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("on replay start transfer out tx get invalid argument", KR(ret), KP(buf), K(len));
} else if (CLICK_FAIL(info.deserialize(buf, len, pos))) {
LOG_WARN("failed to deserialize tx start transfer out tx info", KR(ret), K(len), K(pos));
} else if (!info.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tx start transfer out tx info is unexpected", KR(ret), K(info));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("fail to get ls", KR(ret), K(info));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), K(info), KP(ls));
} else if (OB_FAIL(transfer_tx_ctx.record_transfer_block_op(info.src_ls_id_, info.dest_ls_id_, info.data_end_scn_, info.transfer_epoch_, true))) {
LOG_WARN("record transfer block op failed", KR(ret), K(info));
} else if (OB_FAIL(ls->transfer_out_tx_op(user_ctx.get_writer().writer_id_, info.data_end_scn_, scn,
NotifyType::ON_REDO, true, info.dest_ls_id_, info.transfer_epoch_, active_tx_count, block_tx_count))) {
LOG_WARN("transfer block tx failed", KR(ret), K(info));
} else if (OB_FAIL(transfer_out_helper.on_replay_success_(scn, info, ctx))) {
LOG_WARN("start transfer out on replay failed", KR(ret), K(info), KP(ls));
} else {
LOG_INFO("start transfer out tx replay succ", K(info), K(scn), K(active_tx_count), K(block_tx_count));
}
return ret;
}
/******************ObTabletStartTransferInReplayExecutor*********************/
class ObTabletStartTransferInReplayExecutor final : public logservice::ObTabletReplayExecutor
{

View File

@ -26,6 +26,9 @@ class SCN;
class ObLSID;
struct ObTransferTabletInfo;
}
namespace transaction {
enum class ObTxDataSourceType : int64_t;
}
namespace storage
{
@ -42,6 +45,34 @@ class ObTXStartTransferOutInfo;
class ObTXStartTransferInInfo;
class ObMigrationTabletParam;
class ObTabletStartTransferOutCommonHelper
{
public:
ObTabletStartTransferOutCommonHelper(transaction::ObTxDataSourceType &mds_op_type)
: mds_op_type_(mds_op_type) {}
~ObTabletStartTransferOutCommonHelper() {}
int update_tablets_transfer_out_(
const ObTXStartTransferOutInfo &tx_start_transfer_out_info,
ObLS *ls,
mds::BufferCtx &ctx);
int update_tablet_transfer_out_(
const share::ObLSID &dest_ls_id,
const share::ObTransferTabletInfo &tablet_info,
ObLS *ls,
mds::BufferCtx &ctx);
int set_transfer_tablets_freeze_flag_(const ObTXStartTransferOutInfo &tx_start_transfer_out_info);
int try_enable_dest_ls_clog_replay(
const share::SCN &scn,
const share::ObLSID &dest_ls_id);
int on_replay_success_(
const share::SCN &scn,
const ObTXStartTransferOutInfo &tx_start_transfer_out_info,
mds::BufferCtx &ctx);
private:
DISALLOW_COPY_AND_ASSIGN(ObTabletStartTransferOutCommonHelper);
transaction::ObTxDataSourceType &mds_op_type_;
};
class ObTabletStartTransferOutHelper
{
public:
@ -68,28 +99,40 @@ private:
const share::ObLSID &ls_id,
const share::ObTransferTabletInfo &tablet_info,
ObTablet *tablet);
static int update_tablets_transfer_out_(
const ObTXStartTransferOutInfo &tx_start_transfer_out_info,
ObLS *ls,
mds::BufferCtx &ctx);
static int update_tablet_transfer_out_(
const share::ObLSID &dest_ls_id,
const share::ObTransferTabletInfo &tablet_info,
ObLS *ls,
mds::BufferCtx &ctx);
static int set_transfer_tablets_freeze_flag_(const ObTXStartTransferOutInfo &tx_start_transfer_out_info);
static int on_replay_success_(
const share::SCN &scn,
const ObTXStartTransferOutInfo &tx_start_transfer_out_info,
mds::BufferCtx &ctx);
static int try_enable_dest_ls_clog_replay(
const share::SCN &scn,
const share::ObLSID &dest_ls_id);
private:
DISALLOW_COPY_AND_ASSIGN(ObTabletStartTransferOutHelper);
};
class ObTabletStartTransferOutPrepareHelper
{
public:
static int on_register(
const char* buf,
const int64_t len,
mds::BufferCtx &ctx);
static int on_replay(
const char* buf,
const int64_t len,
const share::SCN &scn,
mds::BufferCtx &ctx);
};
class ObTabletStartTransferOutV2Helper
{
public:
static int on_register(
const char* buf,
const int64_t len,
mds::BufferCtx &ctx);
static int on_replay(
const char* buf,
const int64_t len,
const share::SCN &scn,
mds::BufferCtx &ctx);
private:
DISALLOW_COPY_AND_ASSIGN(ObTabletStartTransferOutV2Helper);
};
class ObTabletStartTransferInHelper
{
public:

View File

@ -0,0 +1,724 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX STORAGE
#include "storage/ls/ob_ls.h"
#include "storage/tx_storage/ob_ls_service.h"
#include "storage/tablet/ob_tablet_transfer_tx_ctx.h"
namespace oceanbase
{
namespace storage
{
using namespace transaction;
OB_SERIALIZE_MEMBER(CollectTxCtxInfo, src_ls_id_, dest_ls_id_, task_id_, transfer_epoch_, transfer_scn_, args_);
OB_SERIALIZE_MEMBER(ObTxCtxMoveArg, tx_id_, epoch_, session_id_, tx_state_, trans_version_, prepare_version_, commit_version_, cluster_id_, cluster_version_, scheduler_, tx_expired_time_, xid_, last_seq_no_, max_submitted_seq_no_, tx_start_scn_, tx_end_scn_, is_sub2pc_, happened_before_, table_lock_info_);
OB_SERIALIZE_MEMBER(ObTransferDestPrepareInfo, task_id_, src_ls_id_, dest_ls_id_);
OB_SERIALIZE_MEMBER(ObTransferMoveTxParam, src_ls_id_, transfer_epoch_, transfer_scn_, op_scn_, op_type_, is_replay_, is_incomplete_replay_);
int CollectTxCtxInfo::assign(const CollectTxCtxInfo &other)
{
int ret = OB_SUCCESS;
if (OB_FAIL(args_.assign(other.args_))) {
LOG_WARN("collect tx ctx info assign failed", KR(ret), K(other));
} else {
src_ls_id_ = other.src_ls_id_;
dest_ls_id_ = other.dest_ls_id_;
task_id_ = other.task_id_;
transfer_epoch_ = other.transfer_epoch_;
transfer_scn_ = other.transfer_scn_;
}
return ret;
}
void ObTransferMoveTxParam::reset()
{
src_ls_id_.reset();
transfer_epoch_ = 0;
transfer_scn_.reset();
op_scn_.reset();
op_type_ = NotifyType::UNKNOWN;
is_replay_ = false;
is_incomplete_replay_ = false;
}
ObTransferOutTxCtx::ObTransferOutTxCtx()
: do_transfer_block_(false),
src_ls_id_(),
dest_ls_id_(),
data_end_scn_(),
transfer_scn_(),
transfer_epoch_(0) {}
void ObTransferOutTxCtx::reset()
{
do_transfer_block_ = false;
src_ls_id_.reset();
dest_ls_id_.reset();
data_end_scn_.reset();
transfer_scn_.reset();
transfer_epoch_ = 0;
}
bool ObTransferOutTxCtx::is_valid()
{
return do_transfer_block_ &&
src_ls_id_.is_valid() &&
dest_ls_id_.is_valid() &&
data_end_scn_.is_valid() &&
transfer_scn_.is_valid() &&
transfer_epoch_ > 0;
}
int ObTransferOutTxCtx::assign(const ObTransferOutTxCtx &other)
{
int ret = OB_SUCCESS;
const mds::MdsCtx &mds_ctx = static_cast<const mds::MdsCtx&>(other);
if (OB_FAIL(MdsCtx::assign(mds_ctx))) {
LOG_WARN("transfer out tx ctx assign failed", KR(ret), K(other));
} else {
do_transfer_block_ = other.do_transfer_block_;
src_ls_id_ = other.src_ls_id_;
dest_ls_id_ = other.dest_ls_id_;
data_end_scn_ = other.data_end_scn_;
transfer_scn_ = other.transfer_scn_;
transfer_epoch_ = other.transfer_epoch_;
}
return ret;
}
int ObTransferOutTxCtx::record_transfer_block_op(const share::ObLSID src_ls_id,
const share::ObLSID dest_ls_id,
const share::SCN data_end_scn,
int64_t transfer_epoch,
bool is_replay)
{
int ret = OB_SUCCESS;
if (!is_replay && do_transfer_block_) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ctx do_transfer_block unexpectd", KR(ret), KP(this));
} else {
src_ls_id_ = src_ls_id;
dest_ls_id_ = dest_ls_id;
data_end_scn_ = data_end_scn;
transfer_epoch_ = transfer_epoch;
do_transfer_block_ = true;
}
return ret;
}
void ObTransferOutTxCtx::on_redo(const share::SCN &redo_scn)
{
transaction::ObTransID tx_id = writer_.writer_id_;
LOG_INFO("transfer_out_tx on_redo", K(redo_scn), K(tx_id), KP(this), KPC(this));
mds::MdsCtx::on_redo(redo_scn);
transfer_scn_ = redo_scn;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
int64_t active_tx_count = 0;
int64_t block_tx_count = 0;
while (true) {
int ret = OB_SUCCESS;
if (!is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("transfer out tx ctx invalid state", KR(ret), K(tx_id), KP(this), KPC(this));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("failed to get ls", KR(ret), K(tx_id), KP(this));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), KP(this), KP(ls));
} else if (OB_FAIL(ls->transfer_out_tx_op(get_writer().writer_id_,
data_end_scn_,
redo_scn,
transaction::NotifyType::ON_REDO,
false,
dest_ls_id_,
transfer_epoch_,
active_tx_count,
block_tx_count))) {
LOG_WARN("transfer out tx failed", KR(ret), K(tx_id), KP(this));
}
if (OB_FAIL(ret)) {
ob_usleep(10 * 1000);
} else {
break;
}
}
}
void ObTransferOutTxCtx::on_commit(const share::SCN &commit_version, const share::SCN &commit_scn)
{
transaction::ObTransID tx_id = writer_.writer_id_;
LOG_INFO("transfer_out_tx on_commit", K(commit_version), K(commit_scn), K(tx_id), KP(this), KPC(this));
int ret = OB_SUCCESS;
mds::MdsCtx::on_commit(commit_version, commit_scn);
while (true) {
int ret = OB_SUCCESS;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
int64_t active_tx_count = 0;
int64_t op_tx_count = 0;
int64_t start_time = ObTimeUtility::current_time();
if (!is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("transfer out tx ctx invalid state", KR(ret), K(tx_id), KP(this), KPC(this));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("fail to get ls", KR(ret), K(writer_), KP(this));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), KP(this));
} else if (OB_FAIL(ls->transfer_out_tx_op(get_writer().writer_id_,
data_end_scn_,
commit_scn,
transaction::NotifyType::ON_COMMIT,
false,
dest_ls_id_,
transfer_epoch_,
active_tx_count,
op_tx_count))) {
LOG_WARN("transfer out tx op failed", KR(ret), K(tx_id), KP(this));
} else {
int64_t end_time = ObTimeUtility::current_time();
LOG_INFO("transfer out tx op commit", KR(ret), KP(this),
K(active_tx_count), K(op_tx_count), "cost", end_time - start_time);
}
if (OB_SUCC(ret)) {
break;
} else {
ob_usleep(10 * 1000);
}
}
}
void ObTransferOutTxCtx::on_abort(const share::SCN &abort_scn)
{
transaction::ObTransID tx_id = writer_.writer_id_;
LOG_INFO("transfer_out_tx on_abort", K(abort_scn), K(tx_id), KP(this), KPC(this));
mds::MdsCtx::on_abort(abort_scn);
if (do_transfer_block_) {
while (true) {
int ret = OB_SUCCESS;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
int64_t active_tx_count = 0;
int64_t op_tx_count = 0;
if (!is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("transfer out tx ctx invalid state", KR(ret), K(tx_id), KP(this));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(src_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("fail to get ls", KR(ret), K(tx_id), KP(this));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), KP(this));
} else if (OB_FAIL(ls->transfer_out_tx_op(get_writer().writer_id_,
data_end_scn_,
abort_scn,
transaction::NotifyType::ON_ABORT,
false,
dest_ls_id_,
transfer_epoch_,
active_tx_count,
op_tx_count))) {
LOG_WARN("transfer out tx op failed", KR(ret), K(tx_id), KP(this));
}
if (OB_SUCC(ret)) {
break;
} else {
ob_usleep(10 * 1000);
}
}
}
}
int ObStartTransferMoveTxHelper::on_register(const char* buf, const int64_t len, mds::BufferCtx &ctx)
{
MDS_TG(1_s);
int ret = OB_SUCCESS;
int64_t pos = 0;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
SCN op_scn;
ObTransferMoveTxCtx &transfer_move_tx_ctx = static_cast<ObTransferMoveTxCtx&>(ctx);
CollectTxCtxInfo &collect_tx_info = transfer_move_tx_ctx.get_collect_tx_info();
transaction::ObTransID tx_id = transfer_move_tx_ctx.get_writer().writer_id_;
bool start_modify = false;
LOG_INFO("TransferMoveTx on_register", K(tx_id));
if (OB_ISNULL(buf) || len < 0) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("on register move tx get invalid argument", KR(ret), KP(buf), K(len));
} else if (collect_tx_info.is_valid() || transfer_move_tx_ctx.get_op_scn().is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ctx state is valid before register", KR(ret), K(transfer_move_tx_ctx));
} else if (CLICK_FAIL(collect_tx_info.deserialize(buf, len, pos))) {
LOG_WARN("failed to deserialize collect tx ctx info", KR(ret), K(len), K(pos));
} else if (!collect_tx_info.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("on register collect_tx_info is valid", KR(ret), K(collect_tx_info));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(collect_tx_info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(transfer_move_tx_ctx));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), K(transfer_move_tx_ctx), KP(ls));
} else if (FALSE_IT(start_modify = true)) {
} else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, SCN(),
NotifyType::REGISTER_SUCC, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) {
LOG_WARN("update transfer status failed", KR(ret), K(tx_id));
} else {
int64_t start_time = ObTimeUtil::current_time();
ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_,
collect_tx_info.transfer_epoch_,
collect_tx_info.transfer_scn_,
op_scn,
transaction::NotifyType::REGISTER_SUCC,
false,
false);
while (OB_SUCC(ret)) {
if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) {
LOG_WARN("move tx op failed", KR(ret), K(tx_id), K(transfer_move_tx_ctx));
} else {
break;
}
if (ObTimeUtil::current_time() - start_time > 5 * 1000 * 1000) {
break;
} else if (OB_NEED_RETRY == ret) {
ret = OB_SUCCESS;
ob_usleep(10 * 1000);
}
}
}
if (OB_FAIL(ret)) {
int tmp_ret = OB_SUCCESS;
if (start_modify && OB_TMP_FAIL(clean(ls, tx_id, collect_tx_info))) {
LOG_ERROR("TransferMoveTx clean failed", K(tmp_ret), K(tx_id));
}
}
LOG_INFO("[TRANSFER] TransferMoveTx on_register", KR(ret), K(len), K(tx_id),
"tx_count", collect_tx_info.args_.count());
return ret;
}
int ObStartTransferMoveTxHelper::clean(ObLS *ls, transaction::ObTransID tx_id, CollectTxCtxInfo &collect_tx_info)
{
int ret = OB_SUCCESS;
int64_t start_time = ObTimeUtil::current_time();
ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_,
collect_tx_info.transfer_epoch_,
collect_tx_info.transfer_scn_,
SCN(),
transaction::NotifyType::ON_ABORT,
false,
false);
while (OB_SUCC(ret)) {
if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, SCN(),
NotifyType::ON_ABORT, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) {
LOG_WARN("update transfer status failed", KR(ret), K(tx_id));
} else if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) {
LOG_WARN("move tx op failed", KR(ret), K(tx_id));
} else {
break;
}
if (OB_FAIL(ret)) {
int64_t cost = ObTimeUtil::current_time() - start_time;
if (cost > 500 * 1000) {
LOG_WARN("move_tx clean tool long time", KR(ret), K(ls->get_ls_id()), K(tx_id), K(cost));
}
// retry
ret = OB_SUCCESS;
ob_usleep(10 * 1000);
}
}
return ret;
}
int ObStartTransferMoveTxHelper::on_replay(const char* buf, const int64_t len, const share::SCN &scn, mds::BufferCtx &ctx)
{
MDS_TG(1_s);
int ret = OB_SUCCESS;
int64_t pos = 0;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
ObTransferMoveTxCtx &transfer_move_tx_ctx = static_cast<ObTransferMoveTxCtx&>(ctx);
CollectTxCtxInfo &collect_tx_info = transfer_move_tx_ctx.get_collect_tx_info();
transaction::ObTransID tx_id = transfer_move_tx_ctx.get_writer().writer_id_;
LOG_INFO("TransferMoveTx on_replay", K(tx_id));
if (OB_ISNULL(buf) || len < 0) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("move tx get invalid argument", KR(ret), KP(buf), K(len));
} else if (CLICK_FAIL(transfer_move_tx_ctx.get_collect_tx_info().deserialize(buf, len, pos))) {
LOG_WARN("failed to deserialize collect tx ctx info", KR(ret), K(len), K(pos));
} else if (!transfer_move_tx_ctx.get_collect_tx_info().is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("collect_tx_info is valid", KR(ret));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(collect_tx_info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(collect_tx_info));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), K(collect_tx_info), KP(ls));
} else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, scn,
NotifyType::ON_REDO, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) {
LOG_WARN("update transfer status failed", KR(ret), K(tx_id));
} else {
ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_,
collect_tx_info.transfer_epoch_,
collect_tx_info.transfer_scn_,
scn,
transaction::NotifyType::ON_REDO,
true,
transfer_move_tx_ctx.is_incomplete_replay());
if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) {
LOG_WARN("move tx ctx failed", KR(ret), K(collect_tx_info));
} else {
LOG_INFO("[TRANSFER] TransferMoveTx on_replay", KR(ret), K(tx_id));
}
}
return ret;
}
ObTransferMoveTxCtx::ObTransferMoveTxCtx()
: writer_(), op_scn_(), collect_tx_info_()
{}
void ObTransferMoveTxCtx::reset()
{
op_scn_.reset();
collect_tx_info_.reset();
}
void ObTransferMoveTxCtx::set_writer(const mds::MdsWriter &writer)
{
writer_.writer_type_ = writer.writer_type_;
writer_.writer_id_ = writer.writer_id_;
}
const mds::MdsWriter ObTransferMoveTxCtx::get_writer() const { return writer_; }
int ObTransferMoveTxCtx::assign(const ObTransferMoveTxCtx &other)
{
int ret = OB_SUCCESS;
if (OB_FAIL(collect_tx_info_.assign(other.collect_tx_info_))) {
LOG_WARN("move_tx_ctx assign failed", KR(ret), K(other));
} else {
writer_ = other.writer_;
op_scn_ = other.op_scn_;
}
return ret;
}
void ObTransferMoveTxCtx::on_redo(const share::SCN &redo_scn)
{
transaction::ObTransID tx_id = writer_.writer_id_;
LOG_INFO("move_tx_ctx on_redo", K(redo_scn), K(tx_id), KP(this));
while (true) {
int ret = OB_SUCCESS;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
CollectTxCtxInfo &collect_tx_info = collect_tx_info_;
if (!collect_tx_info.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("TRANSFER collect_tx_info is invalid", KR(ret), K(collect_tx_info), K(op_scn_), K(writer_), KP(this));
} else if ((!op_scn_.is_valid() || op_scn_ < redo_scn) && FALSE_IT(op_scn_ = redo_scn)) {
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(collect_tx_info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(writer_), K(collect_tx_info), KP(this));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), K(collect_tx_info), KP(ls));
} else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, redo_scn,
NotifyType::REGISTER_SUCC, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) {
LOG_WARN("update transfer status failed", KR(ret), K(tx_id));
} else {
ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_,
collect_tx_info.transfer_epoch_,
collect_tx_info.transfer_scn_,
redo_scn,
transaction::NotifyType::ON_REDO,
false,
is_incomplete_replay());
if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) {
LOG_WARN("move tx ctx failed", KR(ret), K(collect_tx_info), K(tx_id), KP(this), K(redo_scn));
} else {
LOG_INFO("[TRANSFER] move_tx_ctx", KR(ret), K(redo_scn), K(tx_id), KP(this));
}
}
if (OB_SUCC(ret)) {
break;
} else {
ob_usleep(10 * 1000);
}
}
}
void ObTransferMoveTxCtx::on_commit(const share::SCN &commit_version, const share::SCN &commit_scn)
{
transaction::ObTransID tx_id = writer_.writer_id_;
LOG_INFO("move_tx_ctx on_commit", K(commit_version), K(commit_scn), K(tx_id), KP(this));
while (true) {
int ret = OB_SUCCESS;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
CollectTxCtxInfo &collect_tx_info = collect_tx_info_;
if (!collect_tx_info.is_valid() || !op_scn_.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("TRANSFER collect_tx_info is invalid", KR(ret), K(collect_tx_info), K(op_scn_));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(collect_tx_info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(collect_tx_info));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), K(collect_tx_info), KP(ls));
} else {
ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_,
collect_tx_info.transfer_epoch_,
collect_tx_info.transfer_scn_,
commit_scn,
transaction::NotifyType::ON_COMMIT,
false,
is_incomplete_replay());
if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) {
LOG_WARN("move tx ctx failed", KR(ret), K(collect_tx_info), K(commit_scn));
} else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, commit_scn,
NotifyType::ON_COMMIT, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) {
LOG_WARN("update transfer status failed", KR(ret), K(tx_id));
} else {
LOG_INFO("[TRANSFER] move_tx_ctx", KR(ret), K(commit_version), K(commit_scn), K(writer_), KP(this));
}
}
if (OB_SUCC(ret)) {
break;
} else {
ob_usleep(10 * 1000);
}
}
}
void ObTransferMoveTxCtx::on_abort(const share::SCN &abort_scn)
{
transaction::ObTransID tx_id = writer_.writer_id_;
LOG_INFO("move_tx_ctx on_abort", K(abort_scn), K(writer_), KP(this));
while (true) {
int ret = OB_SUCCESS;
ObLSHandle ls_handle;
ObLS *ls = nullptr;
CollectTxCtxInfo &collect_tx_info = collect_tx_info_;
if (!collect_tx_info.is_valid() || !op_scn_.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("TRANSFER collect_tx_info is invalid", KR(ret), K(collect_tx_info), K(op_scn_));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(collect_tx_info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(collect_tx_info));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), K(collect_tx_info), KP(ls));
} else {
ObTransferMoveTxParam move_tx_param(collect_tx_info.src_ls_id_,
collect_tx_info.transfer_epoch_,
collect_tx_info.transfer_scn_,
abort_scn,
transaction::NotifyType::ON_ABORT,
false,
is_incomplete_replay());
if (OB_FAIL(ls->move_tx_op(move_tx_param, collect_tx_info.args_))) {
LOG_WARN("move tx ctx failed", KR(ret), K(collect_tx_info), K(abort_scn));
} else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, collect_tx_info.task_id_, abort_scn,
NotifyType::ON_ABORT, ObTxDataSourceType::TRANSFER_MOVE_TX_CTX))) {
LOG_WARN("update transfer status failed", KR(ret), K(tx_id));
} else {
LOG_INFO("[TRANSFER] move_tx_ctx", KR(ret), K(writer_), KP(this), K(abort_scn));
}
}
if (OB_SUCC(ret)) {
break;
} else {
ob_usleep(10 * 1000);
}
}
}
int ObStartTransferDestPrepareHelper::on_register(
const char* buf,
const int64_t len,
mds::BufferCtx &ctx)
{
int ret = OB_SUCCESS;
int64_t pos = 0;
ObLSHandle ls_handle;
ObLS *ls = NULL;
ObTransferDestPrepareTxCtx &user_ctx = static_cast<ObTransferDestPrepareTxCtx&>(ctx);
ObTransferDestPrepareInfo &info = user_ctx.get_info();
transaction::ObTransID tx_id = user_ctx.get_writer().writer_id_;
LOG_INFO("transfer_dest_prepare register", K(tx_id));
if (OB_FAIL(info.deserialize(buf, len, pos))) {
LOG_WARN("failed to deserialize transfer dest prepare info", KR(ret), K(len), K(pos));
} else if (!info.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("transfer_dest_prepare invalid param", KR(ret), K(info));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(info));
} else if (OB_UNLIKELY(nullptr == (ls = ls_handle.get_ls()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("ls should not be NULL", KR(ret), K(info), KP(ls));
} else if (OB_FAIL(ls->get_transfer_status().update_status(tx_id, info.task_id_, SCN(),
NotifyType::REGISTER_SUCC, ObTxDataSourceType::TRANSFER_DEST_PREPARE))) {
LOG_WARN("update transfer status failed", KR(ret), K(tx_id));
}
return ret;
}
int ObStartTransferDestPrepareHelper::on_replay(
const char* buf,
const int64_t len,
const share::SCN &scn,
mds::BufferCtx &ctx)
{
int ret = OB_SUCCESS;
int64_t pos = 0;
ObLSHandle ls_handle;
ObTransferDestPrepareTxCtx &user_ctx = static_cast<ObTransferDestPrepareTxCtx&>(ctx);
ObTransferDestPrepareInfo &info = user_ctx.get_info();
transaction::ObTransID tx_id = user_ctx.get_writer().writer_id_;
LOG_INFO("transfer_dest_prepare on_replay", K(tx_id), K(scn));
if (OB_FAIL(info.deserialize(buf, len, pos))) {
LOG_WARN("failed to deserialize transfer dest prepare info", KR(ret), K(len), K(pos));
} else if (!info.is_valid()) {
ret = OB_INVALID_ARGUMENT;
LOG_WARN("transfer_dest_prepare invalid param", KR(ret), K(info));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(info));
} else if (OB_FAIL(ls_handle.get_ls()->get_transfer_status().update_status(tx_id, info.task_id_, scn,
NotifyType::ON_REDO, ObTxDataSourceType::TRANSFER_DEST_PREPARE))) {
LOG_WARN("update transfer status failed", KR(ret), K(tx_id));
}
return ret;
}
void ObTransferDestPrepareTxCtx::reset()
{
op_scn_.reset();
transfer_dest_prepare_info_.reset();
}
int ObTransferDestPrepareInfo::assign(const ObTransferDestPrepareInfo& other)
{
int ret = OB_SUCCESS;
task_id_ = other.task_id_;
src_ls_id_ = other.src_ls_id_;
dest_ls_id_ = other.dest_ls_id_;
return ret;
}
int ObTransferDestPrepareTxCtx::assign(const ObTransferDestPrepareTxCtx &other)
{
int ret = OB_SUCCESS;
if (OB_FAIL(transfer_dest_prepare_info_.assign(other.transfer_dest_prepare_info_))) {
LOG_WARN("transfer dest prepare info assign failed", KR(ret), K(other));
} else {
writer_ = other.writer_;
op_scn_ = other.op_scn_;
}
return ret;
}
void ObTransferDestPrepareTxCtx::set_writer(const mds::MdsWriter &writer)
{
writer_.writer_type_ = writer.writer_type_;
writer_.writer_id_ = writer.writer_id_;
}
const mds::MdsWriter ObTransferDestPrepareTxCtx::get_writer() const { return writer_; }
void ObTransferDestPrepareTxCtx::on_redo(const share::SCN &redo_scn)
{
transaction::ObTransID tx_id = writer_.writer_id_;
LOG_INFO("transfer_dest_prepare on_redo", K(tx_id), K(this), K(redo_scn));
while (true) {
int ret = OB_SUCCESS;
ObLSHandle ls_handle;
ObTransferDestPrepareInfo &info = get_info();
if (!info.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("transfer dest prepare info is invalid", KR(ret), K(tx_id), KP(this), KPC(this));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(tx_id), K(transfer_dest_prepare_info_));
} else if ((!op_scn_.is_valid() || op_scn_ < redo_scn) && FALSE_IT(op_scn_ = redo_scn)) {
} else if (OB_FAIL(ls_handle.get_ls()->get_transfer_status().update_status(tx_id, info.task_id_, redo_scn,
NotifyType::ON_REDO, ObTxDataSourceType::TRANSFER_DEST_PREPARE))) {
LOG_WARN("update transfer status failed", KR(ret), K(tx_id));
}
if (OB_SUCC(ret)) {
break;
} else {
ob_usleep(10 * 1000);
}
}
}
// TODO we could recover dest_ls weak_read_ts advance before on_commit just after move_tx_ctx
void ObTransferDestPrepareTxCtx::on_commit(const share::SCN &commit_version, const share::SCN &commit_scn)
{
transaction::ObTransID tx_id = writer_.writer_id_;
LOG_INFO("transfer_dest_prepare on_commit", K(tx_id), K(this), K(commit_scn));
while (true) {
int ret = OB_SUCCESS;
ObLSHandle ls_handle;
ObTransferDestPrepareInfo &info = get_info();
if (!info.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("transfer dest prepare info is invalid", KR(ret), K(tx_id), KP(this), KPC(this));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(info.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(tx_id), K(transfer_dest_prepare_info_));
} else if (OB_FAIL(ls_handle.get_ls()->get_transfer_status().update_status(tx_id, info.task_id_, commit_scn,
NotifyType::ON_COMMIT, ObTxDataSourceType::TRANSFER_DEST_PREPARE))) {
LOG_WARN("update transfer status failed", KR(ret), K(tx_id));
}
if (OB_SUCC(ret)) {
break;
} else {
ob_usleep(10 * 1000);
}
}
}
void ObTransferDestPrepareTxCtx::on_abort(const share::SCN &abort_scn)
{
transaction::ObTransID tx_id = writer_.writer_id_;
LOG_INFO("transfer_dest_prepare on_abort", K(tx_id), K(this), K(abort_scn));
while (true) {
int ret = OB_SUCCESS;
ObLSHandle ls_handle;
ObTransferDestPrepareInfo &info = get_info();
if (!info.is_valid()) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("transfer dest prepare info is invalid", KR(ret), K(tx_id), KP(this), KPC(this));
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(transfer_dest_prepare_info_.dest_ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
LOG_WARN("get ls failed", KR(ret), K(transfer_dest_prepare_info_), K(tx_id));
} else if (OB_FAIL(ls_handle.get_ls()->get_transfer_status().update_status(tx_id, info.task_id_, abort_scn,
NotifyType::ON_ABORT, ObTxDataSourceType::TRANSFER_DEST_PREPARE))) {
LOG_WARN("update transfer status failed", KR(ret), K(tx_id));
}
if (OB_SUCC(ret)) {
break;
} else {
ob_usleep(10 * 1000);
}
}
}
} // end storage
} // end oceanbase

View File

@ -0,0 +1,278 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_STORAGE_OB_TABLET_TRANSFER_TX_CTX
#define OCEANBASE_STORAGE_OB_TABLET_TRANSFER_TX_CTX
namespace oceanbase
{
namespace storage
{
#include "share/scn.h"
#include "share/ob_ls_id.h"
#include "storage/multi_data_source/mds_ctx.h"
#include "storage/tablelock/ob_table_lock_common.h"
struct ObTxCtxMoveArg
{
OB_UNIS_VERSION(1);
public:
transaction::ObTransID tx_id_;
int64_t epoch_;
uint32_t session_id_;
transaction::ObTxState tx_state_;
share::SCN trans_version_;
share::SCN prepare_version_;
share::SCN commit_version_;
uint64_t cluster_id_;
uint64_t cluster_version_;
common::ObAddr scheduler_;
int64_t tx_expired_time_;
transaction::ObXATransID xid_;
transaction::ObTxSEQ last_seq_no_;
transaction::ObTxSEQ max_submitted_seq_no_;
share::SCN tx_start_scn_;
share::SCN tx_end_scn_;
bool is_sub2pc_;
bool happened_before_;
transaction::tablelock::ObTableLockInfo table_lock_info_;
TO_STRING_KV(K_(tx_id), K_(epoch), K_(session_id), K_(tx_state), K_(trans_version), K_(prepare_version), K_(commit_version),
K_(cluster_id), K_(cluster_version), K_(scheduler), K_(tx_expired_time), K_(xid), K_(last_seq_no), K_(max_submitted_seq_no),
K_(tx_start_scn), K_(tx_end_scn), K_(is_sub2pc), K_(happened_before), K_(table_lock_info));
};
struct ObTransferMoveTxParam
{
OB_UNIS_VERSION_V(1);
public:
ObTransferMoveTxParam(share::ObLSID ls_id, int64_t transfer_epoch, share::SCN transfer_scn,
share::SCN op_scn, transaction::NotifyType op_type, bool is_replay, bool is_incomplete_replay)
: src_ls_id_(ls_id),
transfer_epoch_(transfer_epoch),
transfer_scn_(transfer_scn),
op_scn_(op_scn),
op_type_(op_type),
is_replay_(is_replay),
is_incomplete_replay_(is_incomplete_replay) {}
~ObTransferMoveTxParam() { reset(); }
void reset();
TO_STRING_KV(K_(src_ls_id), K_(transfer_epoch), K_(transfer_scn),
K_(op_scn), K_(op_type), K_(is_replay), K_(is_incomplete_replay));
share::ObLSID src_ls_id_;
int64_t transfer_epoch_;
share::SCN transfer_scn_;
share::SCN op_scn_;
transaction::NotifyType op_type_;
bool is_replay_;
bool is_incomplete_replay_;
};
struct CollectTxCtxInfo final
{
OB_UNIS_VERSION(1);
public:
CollectTxCtxInfo() { reset(); }
~CollectTxCtxInfo() { reset(); }
bool is_valid() {
return src_ls_id_.is_valid() &&
dest_ls_id_.is_valid() &&
task_id_ > 0 &&
transfer_epoch_ > 0 &&
transfer_scn_.is_valid() &&
args_.count() > 0;
}
void reset() {
src_ls_id_.reset();
dest_ls_id_.reset();
task_id_ = 0;
transfer_epoch_ = 0;
transfer_scn_.reset();
args_.reset();
}
int assign(const CollectTxCtxInfo& other);
share::ObLSID src_ls_id_;
share::ObLSID dest_ls_id_;
int64_t task_id_;
int64_t transfer_epoch_;
share::SCN transfer_scn_;
ObSArray<ObTxCtxMoveArg> args_;
TO_STRING_KV(K_(src_ls_id), K_(dest_ls_id), K_(task_id), K_(transfer_epoch), K_(transfer_scn), K_(args));
};
struct ObTransferDestPrepareInfo
{
OB_UNIS_VERSION(1);
public:
ObTransferDestPrepareInfo() :task_id_(0),
src_ls_id_(),
dest_ls_id_()
{}
void reset() {
task_id_ = 0;
src_ls_id_.reset();
dest_ls_id_.reset();
}
~ObTransferDestPrepareInfo() {
reset();
}
int assign(const ObTransferDestPrepareInfo& other);
int64_t task_id_;
share::ObLSID src_ls_id_;
share::ObLSID dest_ls_id_;
bool is_valid() {
return task_id_ > 0 && src_ls_id_.is_valid() && dest_ls_id_.is_valid();
}
TO_STRING_KV(K_(task_id), K_(src_ls_id), K_(dest_ls_id));
};
class ObTransferOutTxCtx : public mds::MdsCtx
{
OB_UNIS_VERSION(1);
public:
ObTransferOutTxCtx();
~ObTransferOutTxCtx() { reset(); }
void reset();
int record_transfer_block_op(const share::ObLSID src_ls_id,
const share::ObLSID dest_ls_id,
const share::SCN data_end_scn,
int64_t transfer_epoch,
bool is_replay);
virtual void on_redo(const share::SCN &redo_scn) override;
virtual void on_commit(const share::SCN &commit_version, const share::SCN &commit_scn) override;
virtual void on_abort(const share::SCN &abort_scn) override;
bool is_valid();
int assign(const ObTransferOutTxCtx &other);
TO_STRING_KV(K_(do_transfer_block),
K_(src_ls_id),
K_(dest_ls_id),
K_(data_end_scn),
K_(transfer_scn));
private:
bool do_transfer_block_;
share::ObLSID src_ls_id_;
share::ObLSID dest_ls_id_;
share::SCN data_end_scn_;
share::SCN transfer_scn_;
int64_t transfer_epoch_;
};
OB_SERIALIZE_MEMBER_TEMP(inline, ObTransferOutTxCtx,
writer_,
do_transfer_block_,
src_ls_id_,
dest_ls_id_,
data_end_scn_,
transfer_scn_,
transfer_epoch_)
class ObTransferMoveTxCtx : public mds::BufferCtx
{
OB_UNIS_VERSION(1);
public:
ObTransferMoveTxCtx();
~ObTransferMoveTxCtx() { reset(); }
void reset();
int assign(const ObTransferMoveTxCtx& other);
virtual const mds::MdsWriter get_writer() const override;
void set_writer(const mds::MdsWriter &writer);
virtual void on_redo(const share::SCN &redo_scn) override;
virtual void on_commit(const share::SCN &commit_version, const share::SCN &commit_scn) override;
virtual void on_abort(const share::SCN &abort_scn) override;
CollectTxCtxInfo &get_collect_tx_info() { return collect_tx_info_; }
share::SCN get_op_scn() const { return op_scn_; }
TO_STRING_KV(K_(writer), K_(op_scn), K_(collect_tx_info));
private:
mds::MdsWriter writer_;
share::SCN op_scn_;
CollectTxCtxInfo collect_tx_info_;
};
OB_SERIALIZE_MEMBER_TEMP(inline, ObTransferMoveTxCtx,
writer_,
op_scn_,
collect_tx_info_)
class ObStartTransferMoveTxHelper
{
public:
static int on_register(
const char* buf,
const int64_t len,
mds::BufferCtx &ctx);
static int on_replay(
const char* buf,
const int64_t len,
const share::SCN &scn,
mds::BufferCtx &ctx);
static int clean(ObLS *ls,
transaction::ObTransID tx_id,
CollectTxCtxInfo &collect_tx_info);
};
class ObTransferDestPrepareTxCtx : public mds::BufferCtx
{
OB_UNIS_VERSION(1);
public:
ObTransferDestPrepareTxCtx() {
reset();
}
~ObTransferDestPrepareTxCtx() { reset(); }
void reset();
int assign(const ObTransferDestPrepareTxCtx &other);
virtual const mds::MdsWriter get_writer() const override;
void set_writer(const mds::MdsWriter &writer);
virtual void on_redo(const share::SCN &redo_scn) override;
virtual void on_commit(const share::SCN &commit_version, const share::SCN &commit_scn) override;
virtual void on_abort(const share::SCN &abort_scn) override;
ObTransferDestPrepareInfo &get_info() { return transfer_dest_prepare_info_; }
share::SCN get_op_scn() const { return op_scn_; }
TO_STRING_KV(K_(writer), K_(op_scn), K_(transfer_dest_prepare_info));
private:
mds::MdsWriter writer_;
share::SCN op_scn_;
ObTransferDestPrepareInfo transfer_dest_prepare_info_;
};
OB_SERIALIZE_MEMBER_TEMP(inline, ObTransferDestPrepareTxCtx,
writer_,
op_scn_,
transfer_dest_prepare_info_)
class ObStartTransferDestPrepareHelper
{
public:
static int on_register(
const char* buf,
const int64_t len,
mds::BufferCtx &ctx);
static int on_replay(
const char* buf,
const int64_t len,
const share::SCN &scn,
mds::BufferCtx &ctx);
};
} // end storage
} // end oceanbase
#endif

View File

@ -79,6 +79,7 @@ enum class ObTxState : uint8_t
};
const int64_t OB_C2PC_UPSTREAM_ID = INT64_MAX - 1;
const int64_t OB_C2PC_SENDER_ID = INT64_MAX - 2;
/* // ObITxCommitter provides method to commit the transaction with user provided callbacks. */
/* // The interface need guarantee the atomicity of the transaction. */

View File

@ -221,6 +221,10 @@ int ObMulSourceTxDataNotifier::notify(const ObTxBufferNodeArray &array,
}
} else {
mds::TLOCAL_MDS_TRANS_NOTIFY_TYPE = notify_type;
if (arg.is_incomplete_replay_) {
// pass incomplete replay arg
const_cast<mds::BufferCtx*>(node.get_buffer_ctx_node().get_ctx())->set_incomplete_replay(arg.is_incomplete_replay_);
}
switch (node.type_) {
#define NEED_GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION
#define _GENERATE_MDS_FRAME_CODE_FOR_TRANSACTION_(HELPER_CLASS, BUFFER_CTX_TYPE, ID, ENUM_NAME) \

View File

@ -23,6 +23,7 @@
#include "storage/ls/ob_ls_tx_service.h"
#include "storage/ls/ob_ls.h"
#include "storage/tx/ob_trans_ctx_mgr_v4.h"
#include "storage/tx_storage/ob_ls_service.h"
namespace oceanbase
{
@ -434,8 +435,18 @@ int ObLSTxCtxMgr::create_tx_ctx_(const ObTxCreateArg &arg,
TRANS_LOG(WARN, "alloc transaction context error", K(arg));
ret = OB_ALLOCATE_MEMORY_FAILED;
} else {
// pack `epoch(15bit) | ts_ns(48bit)` into int64_t, set most significant bit to zero
int64_t epoch_v = ~(1UL << 63) & ((epoch << 48) | (ObTimeUtility::current_time_ns() & ~(0xFFFFUL << 48)));
int64_t epoch_v = 0;
if (arg.epoch_ > 0) {
epoch_v = arg.epoch_;
} else {
// for transfer compatibility, we need old version follower's epoch be 0, so we need not check it
if (!arg.for_replay_) {
// pack `epoch(15bit) | ts_ns(48bit)` into int64_t, set most significant bit to zero
epoch_v = ~(1UL << 63) & ((epoch << 48) | (ObTimeUtility::current_time_ns() & ~(0xFFFFUL << 48)));
} else {
epoch_v = -1;
}
}
CtxLockGuard ctx_lock_guard;
ObPartTransCtx *tmp = static_cast<ObPartTransCtx *>(tmp_ctx);
if (OB_FAIL(tmp->init(arg.tenant_id_,
@ -449,7 +460,12 @@ int ObLSTxCtxMgr::create_tx_ctx_(const ObTxCreateArg &arg,
arg.cluster_id_,
epoch_v,
this,
arg.for_replay_))) {
arg.for_replay_,
arg.xid_))) {
// when transfer move active tx ctx, we will create tx ctx when dest_ls has no this tx
// we want to promise the created ctx state new enouth before insert to dest_ls ctx_map
} else if (OB_NOT_NULL(arg.move_arg_) && OB_FAIL(tmp->init_for_transfer_move(*arg.move_arg_))) {
TRANS_LOG(WARN, "init tx ctx for transfer failed", KR(ret), K(*arg.move_arg_));
} else if (FALSE_IT(inc_total_tx_ctx_count())) {
} else if (FALSE_IT(tmp_ctx->get_ctx_guard(ctx_lock_guard))) {
} else if (OB_FAIL(ls_tx_ctx_map_.insert_and_get(arg.tx_id_, tmp_ctx, &exist_ctx))) {
@ -2469,5 +2485,190 @@ int ObLSTxCtxMgr::do_standby_cleanup()
return ret;
}
int ObLSTxCtxMgr::transfer_out_tx_op(int64_t except_tx_id,
const SCN data_end_scn,
const SCN op_scn,
NotifyType op_type,
bool is_replay,
ObLSID dest_ls_id,
int64_t transfer_epoch,
int64_t& active_tx_count,
int64_t &op_tx_count)
{
int ret = OB_SUCCESS;
const int64_t abs_expired_time = INT64_MAX;
TransferOutTxOpFunctor fn(abs_expired_time, except_tx_id,
data_end_scn,
op_scn,
op_type,
is_replay,
dest_ls_id,
transfer_epoch);
if (OB_FAIL(ls_tx_ctx_map_.for_each(fn))) {
TRANS_LOG(WARN, "for each tx ctx error", KR(ret), "manager", *this);
ret = fn.get_ret();
} else {
active_tx_count = fn.get_count();
op_tx_count = fn.get_op_tx_count();
}
TRANS_LOG(INFO, "[TRANSFER] transfer_out_tx_op", KR(ret), K(data_end_scn), K(op_scn), K(op_type), K(is_replay), K(dest_ls_id),
K(transfer_epoch), K(active_tx_count), K(op_tx_count), K(ls_tx_ctx_map_.count()), K(tenant_id_), K(ls_id_));
return ret;
}
int ObLSTxCtxMgr::wait_tx_write_end(ObTimeoutCtx &timeout_ctx)
{
int ret = OB_SUCCESS;
int64_t active_tx_count = 0;
int64_t abs_expired_time = INT64_MAX;
if (timeout_ctx.get_abs_timeout() > 0) {
abs_expired_time = timeout_ctx.get_abs_timeout();
}
WaitTxWriteEndFunctor fn(abs_expired_time);
if (OB_FAIL(ls_tx_ctx_map_.for_each(fn))) {
TRANS_LOG(WARN, "for each tx ctx error", KR(ret), "manager", *this);
ret = fn.get_ret();
} else {
active_tx_count = fn.get_count();
}
TRANS_LOG(INFO, "wait_tx_write_end", KR(ret), K(active_tx_count));
return ret;
}
int ObLSTxCtxMgr::collect_tx_ctx(const ObLSID dest_ls_id,
const SCN log_scn,
const ObIArray<ObTabletID> &tablet_list,
int64_t &tx_count,
int64_t &collect_count,
ObIArray<ObTxCtxMoveArg> &res)
{
int ret = OB_SUCCESS;
const int64_t abs_expired_time = INT64_MAX;
CollectTxCtxFunctor fn(abs_expired_time, dest_ls_id, log_scn, tablet_list, tx_count, collect_count, res);
if (OB_FAIL(ls_tx_ctx_map_.for_each(fn))) {
TRANS_LOG(WARN, "for each tx ctx error", KR(ret), "manager", *this);
ret = fn.get_ret();
} else {
tx_count = fn.get_tx_count();
collect_count = fn.get_collect_count();
}
TRANS_LOG(INFO, "collect_tx_ctx", KR(ret), K(tx_count), K(collect_count), K(tenant_id_), K(ls_id_));
return ret;
}
int ObLSTxCtxMgr::move_tx_op(const ObTransferMoveTxParam &move_tx_param,
const ObIArray<ObTxCtxMoveArg> &args)
{
int ret = OB_SUCCESS;
bool is_replay = move_tx_param.is_replay_;
if (!is_replay && is_follower_()) {
is_replay = true;
}
ObLSHandle ls_handle;
// get weak read ts for check
share::SCN weak_read_ts;
bool need_check_wrs = true;
//only check wrs for register and redo phase
if (move_tx_param.op_type_ != NotifyType::REGISTER_SUCC && move_tx_param.op_type_ != NotifyType::ON_REDO) {
need_check_wrs = false;
} else if (OB_FAIL(MTL(ObLSService*)->get_ls(ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) {
TRANS_LOG(WARN, "get_ls failed", KR(ret), K(ls_id_));
} else {
weak_read_ts = ls_handle.get_ls()->get_ls_wrs_handler()->get_ls_weak_read_ts();
if (is_replay) {
const SCN checkpoint_scn = ls_handle.get_ls()->get_clog_checkpoint_scn();
const bool transfer_prepare = ls_handle.get_ls()->get_transfer_status().get_transfer_prepare_enable();
if (!transfer_prepare) {
// recover no this MDS operation so checkpoint is complete
// replay from middle and incomplete when migrate happen
if (!move_tx_param.is_incomplete_replay_) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(ERROR, "move_tx_op replay unexpected", K(ret), K(ls_id_), K(move_tx_param), K(checkpoint_scn));
} else {
TRANS_LOG(WARN, "move_tx_op replay incomplete", K(ls_id_), K(move_tx_param), K(checkpoint_scn));
}
}
}
}
for (int64_t idx = 0; OB_SUCC(ret) && idx < args.count(); idx++) {
const ObTxCtxMoveArg &arg = args.at(idx);
ObPartTransCtx *ctx = nullptr;
ObTransCtx *tmp_ctx = nullptr, *exist_ctx = nullptr;
bool is_exist = false;
bool is_created = false;
if (OB_SUCC(ls_tx_ctx_map_.get(arg.tx_id_, tmp_ctx))) {
if (OB_ISNULL(tmp_ctx)) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "ctx is NULL", KR(ret), "ctx", OB_P(tmp_ctx));
} else if (FALSE_IT(ctx = static_cast<ObPartTransCtx*>(tmp_ctx))) {
} else {
is_exist = true;
}
} else if (OB_ENTRY_NOT_EXIST != ret) {
TRANS_LOG(WARN, "get tx ctx failed", KR(ret), K(arg));
} else {
ret = OB_SUCCESS;
}
// check to create
if (OB_FAIL(ret)) {
} else if (move_tx_param.op_type_ == NotifyType::ON_ABORT && !is_exist) {
// a. transfer abort log now not impl STRICT_BARRIER
// b. when on_register part failure do abort allow no this ctx
TRANS_LOG(WARN, "tx.ctx not exist when transfer on abort can skip", K(arg));
continue;
} else if (move_tx_param.is_incomplete_replay_ && !is_exist) {
TRANS_LOG(WARN, "tx.ctx not exist may incomplete replay can skip", K(arg));
continue;
} else if (!is_exist) {
if (!is_replay && (move_tx_param.op_type_ == NotifyType::ON_REDO || move_tx_param.op_type_ == NotifyType::ON_COMMIT)) {
TRANS_LOG(WARN, "tx ctx not exist", K(ls_id_), K(move_tx_param), K(arg));
}
ObTxCreateArg create_arg(!is_master(),
false,
tenant_id_,
arg.tx_id_,
ls_id_,
arg.cluster_id_,
arg.cluster_version_,
arg.session_id_,
arg.scheduler_,
INT64_MAX, // tx expired time
txs_,
arg.xid_,
arg.epoch_,
&arg);
if (need_check_wrs && arg.tx_state_ >= ObTxState::PREPARE && arg.prepare_version_ <= weak_read_ts) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(ERROR, "move tx prepare_version less than dest_ls weak_read_ts", KR(ret), K(arg), K(weak_read_ts), K(ls_id_), K(move_tx_param));
} else if (OB_FAIL(create_tx_ctx(create_arg, is_exist, ctx))) {
TRANS_LOG(WARN, "create tx ctx failed", KR(ret), K(create_arg));
} else if (!is_exist) {
is_exist = true;
is_created = true;
}
}
// do move
if (OB_FAIL(ret)) {
} else if (!is_exist || OB_ISNULL(ctx)) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "ctx not found", KR(ret), K(is_exist), KP(ctx));
} else if (OB_FAIL(ctx->move_tx_op(move_tx_param,
arg,
is_created))) {
TRANS_LOG(WARN, "move tx op failed", KR(ret), K(move_tx_param), K(arg));
}
if (OB_NOT_NULL(ctx)) {
revert_tx_ctx(ctx);
}
TRANS_LOG(INFO, "move_tx_op", KR(ret), K(arg.tx_id_), K(ls_id_), K(is_replay), K(is_created));
}
return ret;
}
}
}

View File

@ -37,6 +37,8 @@ namespace storage
class ObLSTxService;
class ObTransSubmitLogFunctor;
class ObTxCtxTable;
struct ObTxCtxMoveArg;
struct ObTransferMoveTxParam;
}
namespace memtable
@ -99,7 +101,10 @@ struct ObTxCreateArg
const uint32_t session_id,
const common::ObAddr &scheduler,
const int64_t trans_expired_time,
ObTransService *trans_service)
ObTransService *trans_service,
ObXATransID xid = ObXATransID(),
int64_t epoch = -1,
const ObTxCtxMoveArg *move_arg = NULL)
: for_replay_(for_replay),
for_special_tx_(for_special_tx),
tenant_id_(tenant_id),
@ -110,7 +115,10 @@ struct ObTxCreateArg
session_id_(session_id),
scheduler_(scheduler),
trans_expired_time_(trans_expired_time),
trans_service_(trans_service) {}
trans_service_(trans_service),
xid_(xid),
epoch_(epoch),
move_arg_(move_arg) {}
bool is_valid() const
{
return ls_id_.is_valid()
@ -121,7 +129,8 @@ struct ObTxCreateArg
TO_STRING_KV(K_(for_replay), K_(for_special_tx),
K_(tenant_id), K_(tx_id),
K_(ls_id), K_(cluster_id), K_(cluster_version),
K_(session_id), K_(scheduler), K_(trans_expired_time), KP_(trans_service));
K_(session_id), K_(scheduler), K_(trans_expired_time), KP_(trans_service),
K_(epoch), K_(xid));
bool for_replay_;
bool for_special_tx_;
uint64_t tenant_id_;
@ -133,6 +142,9 @@ struct ObTxCreateArg
const common::ObAddr &scheduler_;
int64_t trans_expired_time_;
ObTransService *trans_service_;
ObXATransID xid_;
int64_t epoch_;
const ObTxCtxMoveArg *move_arg_;
};
// Is used to store and traverse ObTxID
@ -189,6 +201,25 @@ public:
// Offline the in-memory state of the ObLSTxCtxMgr
int offline();
int transfer_out_tx_op(int64_t except_tx_id,
const SCN data_end_scn,
const SCN op_scn,
NotifyType op_type,
bool is_replay,
ObLSID dest_ls_id,
int64_t transfer_epoch,
int64_t& active_tx_count,
int64_t &op_tx_count);
int wait_tx_write_end(ObTimeoutCtx &timeout_ctx);
int collect_tx_ctx(const share::ObLSID dest_ls_id,
const SCN log_scn,
const ObIArray<ObTabletID> &tablet_list,
int64_t &tx_count,
int64_t &colllect_count,
ObIArray<ObTxCtxMoveArg> &res);
int move_tx_op(const ObTransferMoveTxParam &move_tx_param,
const ObIArray<ObTxCtxMoveArg> &args);
public:
// Create a TxCtx whose tx_id is specified
// @param [in] tx_id: transaction ID
@ -645,7 +676,6 @@ private:
static const int64_t ONLINE = 10;
static const int64_t UNBLOCK_NORMAL = 11;
static const int64_t MAX = 12;
public:
static bool is_valid(const int64_t op)
{ return op > INVALID && op < MAX; }

View File

@ -57,9 +57,12 @@ OB_SERIALIZE_MEMBER(ObStartTransParam, access_mode_, type_, isolation_, consiste
cluster_version_, is_inner_trans_, read_snapshot_type_);
OB_SERIALIZE_MEMBER(ObElrTransInfo, trans_id_, commit_version_, result_);
OB_SERIALIZE_MEMBER(ObLSLogInfo, id_, offset_);
OB_SERIALIZE_MEMBER(ObStateInfo, ls_id_, state_, version_, snapshot_version_);
OB_SERIALIZE_MEMBER(ObStateInfo, ls_id_, state_, version_, snapshot_version_, check_info_);
OB_SERIALIZE_MEMBER(ObTransDesc, a_);
OB_SERIALIZE_MEMBER(ObTxExecPart, ls_id_, exec_epoch_, transfer_epoch_);
OB_SERIALIZE_MEMBER(ObStandbyCheckInfo, check_info_ori_ls_id_, check_part_);
// class ObStartTransParam
void ObStartTransParam::reset()
{
@ -696,7 +699,7 @@ DEF_TO_STRING(ObLockForReadArg)
{
int64_t pos = 0;
J_OBJ_START();
J_KV(K(mvcc_acc_ctx_), K(data_trans_id_), K(data_sql_sequence_), K(read_latest_), K(scn_));
J_KV(K(mvcc_acc_ctx_), K(data_trans_id_), K(data_sql_sequence_), K(read_latest_), K(read_uncommitted_), K(scn_));
J_OBJ_END();
return pos;
}
@ -710,6 +713,8 @@ void ObTxExecInfo::reset()
upstream_.reset();
participants_.reset();
incremental_participants_.reset();
intermediate_participants_.reset();
commit_parts_.reset();
prev_record_lsn_.reset();
redo_lsns_.reset();
scheduler_.reset();
@ -731,6 +736,9 @@ void ObTxExecInfo::reset()
xid_.reset();
need_checksum_ = true;
is_sub2pc_ = false;
is_transfer_blocking_ = false;
is_empty_ctx_created_by_transfer_ = false;
exec_epoch_ = 0;
}
void ObTxExecInfo::destroy(ObTxMDSCache &mds_cache)
@ -805,6 +813,34 @@ void ObTxExecInfo::clear_buffer_ctx_in_multi_data_source()
}
}
int ObTxExecInfo::assign_commit_parts(const share::ObLSArray &participants,
const ObTxCommitParts &commit_parts)
{
int ret = OB_SUCCESS;
if (participants.count() != commit_parts.count()) {
// recover old version log, we need mock the commit parts
for (int64_t i = 0; OB_SUCC(ret) && i < participants.count(); i++) {
if (OB_FAIL(commit_parts_.push_back(ObTxExecPart(participants[i],
-1, /*exec_epoch*/
-1 /*transfer_epoch*/)))) {
TRANS_LOG(WARN, "set commit parts error", K(ret), K(*this));
}
}
if (OB_FAIL(ret)) {
// reset on failure to ensure atomicity
commit_parts_.reset();
}
} else {
if (OB_FAIL(commit_parts_.assign(commit_parts))) {
TRANS_LOG(WARN, "set commit parts error", K(ret), K(*this));
}
}
return ret;
}
int ObTxExecInfo::assign(const ObTxExecInfo &exec_info)
{
int ret = OB_SUCCESS;
@ -816,6 +852,8 @@ int ObTxExecInfo::assign(const ObTxExecInfo &exec_info)
TRANS_LOG(WARN, "participants assign error", KR(ret), K(exec_info));
} else if (OB_FAIL(incremental_participants_.assign(exec_info.incremental_participants_))) {
TRANS_LOG(WARN, "incremental participants assign error", KR(ret), K(exec_info));
} else if (OB_FAIL(intermediate_participants_.assign(exec_info.intermediate_participants_))) {
TRANS_LOG(WARN, "intermediate participants assign error", KR(ret), K(exec_info));
} else if (OB_FAIL(redo_lsns_.assign(exec_info.redo_lsns_))) {
TRANS_LOG(WARN, "redo_lsns assign error", KR(ret), K(exec_info));
} else if (OB_FAIL(multi_data_source_.assign(exec_info.multi_data_source_))) {
@ -824,6 +862,11 @@ int ObTxExecInfo::assign(const ObTxExecInfo &exec_info)
TRANS_LOG(WARN, "mds_buffer_ctx_array assign error", KR(ret), K(exec_info));
} else if (OB_FAIL(prepare_log_info_arr_.assign(exec_info.prepare_log_info_arr_))) {
TRANS_LOG(WARN, "prepare log info array assign error", KR(ret), K(exec_info));
} else if (OB_FAIL(assign_commit_parts(exec_info.participants_,
exec_info.commit_parts_))) {
TRANS_LOG(WARN, "commit parts assign error", KR(ret), K(exec_info));
} else if (OB_FAIL(transfer_parts_.assign(exec_info.transfer_parts_))) {
TRANS_LOG(WARN, "transfer_epoch assign error", KR(ret), K(exec_info));
} else {
// Prepare version should be initialized before state_
// for ObTransPartCtx::get_prepare_version_if_preapred();
@ -846,6 +889,9 @@ int ObTxExecInfo::assign(const ObTxExecInfo &exec_info)
xid_ = exec_info.xid_;
need_checksum_ = exec_info.need_checksum_;
is_sub2pc_ = exec_info.is_sub2pc_;
is_transfer_blocking_ = exec_info.is_transfer_blocking_;
is_empty_ctx_created_by_transfer_ = exec_info.is_empty_ctx_created_by_transfer_;
exec_epoch_ = exec_info.exec_epoch_;
}
return ret;
}
@ -876,7 +922,13 @@ OB_SERIALIZE_MEMBER(ObTxExecInfo,
xid_,
need_checksum_,
is_sub2pc_,
mds_buffer_ctx_array_);
mds_buffer_ctx_array_,
intermediate_participants_,
is_transfer_blocking_,
commit_parts_,
transfer_parts_,
is_empty_ctx_created_by_transfer_,
exec_epoch_);
bool ObMulSourceDataNotifyArg::is_redo_submitted() const { return redo_submitted_; }
@ -915,5 +967,60 @@ const char *trans_type_to_cstr(const TransType &trans_type)
return str;
}
int RollbackMaskSet::merge_part(const share::ObLSID add_ls_id, const int64_t exec_epoch, const int64_t transfer_epoch)
{
int ret = OB_SUCCESS;
bool is_exist = false;
ObSpinLockGuard guard(lock_);
if (OB_ISNULL(rollback_parts_)) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "rollback_parts is null", K(ret), K(add_ls_id));
} else {
for (int64_t i = 0; i < rollback_parts_->count(); i++) {
if (rollback_parts_->at(i).ls_id_ == add_ls_id) {
is_exist = true;
break;
}
}
if (!is_exist && OB_FAIL(rollback_parts_->push_back(ObTxExecPart(add_ls_id, exec_epoch, transfer_epoch)))) {
TRANS_LOG(WARN, "push part to array failed", KR(ret), K(add_ls_id));
}
}
return ret;
}
int RollbackMaskSet::find_part(const share::ObLSID ls_id,
const int64_t orig_epoch,
ObTxExecPart &part)
{
int ret = OB_SUCCESS;
bool is_exist = false;
ObSpinLockGuard guard(lock_);
if (OB_ISNULL(rollback_parts_)) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "rollback_parts is null", K(ret), K(ls_id));
} else {
for (int64_t idx = 0; idx < rollback_parts_->count(); idx++) {
if (rollback_parts_->at(idx).ls_id_ == ls_id) {
if (rollback_parts_->at(idx).exec_epoch_ != orig_epoch) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "check rollback part failed", K(ret), K(rollback_parts_), K(orig_epoch));
} else {
part = rollback_parts_->at(idx);
is_exist = true;
}
break;
}
}
}
if (OB_SUCC(ret) && !is_exist) {
ret = OB_ENTRY_NOT_EXIST;
}
if (OB_FAIL(ret)) {
TRANS_LOG(WARN, "find part", K(ret), K(ls_id), K(orig_epoch), K(rollback_parts_));
}
return ret;
}
} // transaction
} // oceanbase

View File

@ -387,11 +387,13 @@ struct ObLockForReadArg
ObTransID data_trans_id,
ObTxSEQ data_sql_sequence,
bool read_latest,
bool read_uncommitted,
share::SCN scn)
: mvcc_acc_ctx_(acc_ctx),
data_trans_id_(data_trans_id),
data_sql_sequence_(data_sql_sequence),
read_latest_(read_latest),
read_uncommitted_(read_uncommitted),
scn_(scn) {}
DECLARE_TO_STRING;
@ -400,7 +402,9 @@ struct ObLockForReadArg
ObTransID data_trans_id_;
ObTxSEQ data_sql_sequence_;
bool read_latest_;
share::SCN scn_; // Compare with transfer_start_scn, sstable is end_scn, and memtable is ObMvccTransNode scn
bool read_uncommitted_;
// Compare with transfer_start_scn, sstable is end_scn, and memtable is ObMvccTransNode scn
share::SCN scn_;
};
class ObTransKey final
@ -1098,6 +1102,13 @@ public:
void clear_force_abort()
{ flag_ &= ~FORCE_ABORT_BIT; }
bool is_transfer_blocking() const
{ return flag_ & TRANSFER_BLOCKING_BIT; }
void set_transfer_blocking()
{ flag_ |= TRANSFER_BLOCKING_BIT; }
void clear_transfer_blocking()
{ flag_ &= ~TRANSFER_BLOCKING_BIT; }
// bool is_prepare_log_submitted() const
// { return flag_ & PREPARE_LOG_SUBMITTED_BIT; }
// void set_prepare_log_submitted()
@ -1132,6 +1143,7 @@ private:
// indicate whether notified multi data source to prepare
static const int64_t PREPARE_NOTIFY_BIT = 1UL << 5;
static const int64_t FORCE_ABORT_BIT = 1UL << 6;
static const int64_t TRANSFER_BLOCKING_BIT = 1UL << 7;
private:
int64_t flag_;
};
@ -1547,10 +1559,63 @@ private:
palf::LSN offset_;
};
struct ObTxExecPart
{
OB_UNIS_VERSION(1);
public:
ObTxExecPart() : ls_id_(),
exec_epoch_(-1),
transfer_epoch_(-1) {}
ObTxExecPart(share::ObLSID ls_id, int64_t epoch, int64_t transfer_epoch)
: ls_id_(ls_id),
exec_epoch_(epoch),
transfer_epoch_(transfer_epoch) {}
inline bool operator==(const ObTxExecPart &other) const {
return other.ls_id_ == ls_id_ &&
other.exec_epoch_ == exec_epoch_ &&
other.transfer_epoch_ == transfer_epoch_;
}
bool is_valid() const {
return ls_id_.is_valid()
&& (exec_epoch_ > 0
|| transfer_epoch_ > 0);
}
share::ObLSID ls_id_;
int64_t exec_epoch_;
int64_t transfer_epoch_;
TO_STRING_KV(K_(ls_id), K_(exec_epoch), K_(transfer_epoch));
};
struct ObStandbyCheckInfo
{
OB_UNIS_VERSION(1);
public:
ObStandbyCheckInfo() :
check_info_ori_ls_id_(-1),
check_part_()
{}
~ObStandbyCheckInfo() {}
bool operator==(const ObStandbyCheckInfo &other) const {
bool bool_ret = check_info_ori_ls_id_ == other.check_info_ori_ls_id_
&& check_part_ == other.check_part_;
return bool_ret;
}
void operator=(const ObStandbyCheckInfo &other) {
check_info_ori_ls_id_ = other.check_info_ori_ls_id_;
check_part_ = other.check_part_;
}
bool is_valid() const { return check_info_ori_ls_id_.is_valid()
&& check_part_.is_valid(); }
share::ObLSID check_info_ori_ls_id_; // those carrry check info origin ls id
ObTxExecPart check_part_;
TO_STRING_KV(K_(check_info_ori_ls_id), K_(check_part));
};
class ObStateInfo
{
public:
ObStateInfo() : state_(ObTxState::UNKNOWN), version_(), snapshot_version_() {}
ObStateInfo() : state_(ObTxState::UNKNOWN), version_(), snapshot_version_(), check_info_() {}
ObStateInfo(const share::ObLSID &ls_id,
const ObTxState &state,
const share::SCN &version,
@ -1570,15 +1635,19 @@ public:
state_ = state_info.state_;
version_ = state_info.version_;
snapshot_version_ = state_info.snapshot_version_;
check_info_ = state_info.check_info_;
}
bool need_update(const ObStateInfo &state_info);
TO_STRING_KV(K_(ls_id), K_(state), K_(version), K_(snapshot_version))
TO_STRING_KV(K_(ls_id), K_(state), K_(version), K_(snapshot_version), K_(check_info))
OB_UNIS_VERSION(1);
public:
share::ObLSID ls_id_;
ObTxState state_;
share::SCN version_;
share::SCN snapshot_version_;
// for epoch check
ObStandbyCheckInfo check_info_;
};
typedef common::ObSEArray<ObElrTransInfo, 1, TransModulePageAllocator> ObElrTransInfoArray;
@ -1616,6 +1685,24 @@ static const int64_t MAX_PART_CTX_COUNT = 700 * 1000;
static const int DUP_TABLE_LEASE_LIST_MAX_COUNT = 8;
#define TRANS_AGGRE_LOG_TIMESTAMP OB_INVALID_TIMESTAMP
typedef common::ObSEArray<ObTxExecPart, share::OB_DEFAULT_LS_COUNT> ObTxCommitParts;
typedef common::ObSEArray<ObTxExecPart, share::OB_DEFAULT_LS_COUNT> ObTxRollbackParts;
#define CONVERT_COMMIT_PARTS_TO_PARTS(commit_parts, parts) \
for (int64_t idx = 0; OB_SUCC(ret) && idx < commit_parts.count(); idx++) { \
if (OB_FAIL(parts.push_back(commit_parts.at(idx).ls_id_))) { \
TRANS_LOG(WARN, "parts push failed", K(ret)); \
} \
}
#define CONVERT_PARTS_TO_COMMIT_PARTS(parts, commit_parts) \
for (int64_t idx = 0; OB_SUCC(ret) && idx < parts.count(); idx++) { \
if (OB_FAIL(commit_parts.push_back(ObTxExecPart(parts.at(idx), -1, -1)))) { \
TRANS_LOG(WARN, "parts push failed", K(ret)); \
} \
}
class ObEndParticipantsRes
{
public:
@ -1650,6 +1737,7 @@ public:
explicit ObTxExecInfo(TransModulePageAllocator &allocator)
: participants_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "PARTICIPANT")),
incremental_participants_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "INC_PART`")),
intermediate_participants_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "INTER_PART`")),
redo_lsns_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "REDO_LSNS")),
prepare_log_info_arr_(OB_MALLOC_NORMAL_BLOCK_SIZE, ModulePageAllocator(allocator, "PREPARE_INFO")) {}
public:
@ -1663,12 +1751,15 @@ public:
private:
ObTxExecInfo &operator=(const ObTxExecInfo &info);
int assign_commit_parts(const share::ObLSArray &participants,
const ObTxCommitParts &commit_parts);
public:
TO_STRING_KV(K_(state),
K_(upstream),
K_(participants),
K_(incremental_participants),
K_(intermediate_participants),
K_(prev_record_lsn),
K_(redo_lsns),
"redo_log_no", redo_lsns_.count(),
@ -1690,11 +1781,20 @@ public:
K_(prepare_log_info_arr),
K_(xid),
K_(need_checksum),
K_(is_sub2pc));
K_(is_sub2pc),
K_(is_transfer_blocking),
K_(commit_parts),
K_(transfer_parts),
K_(is_empty_ctx_created_by_transfer),
K_(exec_epoch));
ObTxState state_;
share::ObLSID upstream_;
share::ObLSArray participants_;
ObTxCommitParts commit_parts_;
// for tree phase commit
share::ObLSArray incremental_participants_;
ObTxCommitParts intermediate_participants_;
ObTxCommitParts transfer_parts_;
LogOffSet prev_record_lsn_;
ObRedoLSNArray redo_lsns_;
ObTxBufferNodeArray multi_data_source_;
@ -1720,6 +1820,9 @@ public:
ObXATransID xid_;
bool need_checksum_;
bool is_sub2pc_;
bool is_transfer_blocking_;
bool is_empty_ctx_created_by_transfer_;
int64_t exec_epoch_;
};
static const int64_t USEC_PER_SEC = 1000 * 1000;
@ -1739,6 +1842,8 @@ struct ObMulSourceDataNotifyArg
// force kill trans without abort scn
bool is_force_kill_;
bool is_incomplete_replay_;
ObMulSourceDataNotifyArg() { reset(); }
void reset()
@ -1751,6 +1856,7 @@ struct ObMulSourceDataNotifyArg
redo_submitted_ = false;
redo_synced_ = false;
is_force_kill_ = false;
is_incomplete_replay_ = false;
}
TO_STRING_KV(K_(tx_id),
@ -1760,7 +1866,8 @@ struct ObMulSourceDataNotifyArg
K_(notify_type),
K_(redo_submitted),
K_(redo_synced),
K_(is_force_kill));
K_(is_force_kill),
K_(is_incomplete_replay));
// The redo log of current buf_node has been submitted;
bool is_redo_submitted() const;
@ -1793,6 +1900,20 @@ inline bool IS_CORNER_IMPL(const char *func, const int64_t line, const int64_t p
#define IS_CORNER(ppm) IS_CORNER_IMPL(__FUNCTION__, __LINE__, ppm)
inline bool is_effective_trans_version(const share::SCN trans_version)
{
return trans_version.is_valid()
&& !trans_version.is_min()
&& !trans_version.is_max();
}
inline bool is_effective_trans_version(const int64_t trans_version)
{
return -1 != trans_version
&& 0 != trans_version
&& INT64_MAX != trans_version;
}
} // transaction
} // oceanbase

View File

@ -1674,6 +1674,25 @@ void ObTxDesc::mark_part_abort(const ObTransID tx_id, const int abort_cause)
abort_cause_ = abort_cause;
}
}
int64_t ObTxDesc::get_coord_epoch() const
{
int64_t epoch = -1;
if (OB_UNLIKELY(!coord_id_.is_valid())) {
epoch = -1;
} else {
ARRAY_FOREACH_NORET(commit_parts_, i) {
const ObTxExecPart &part = commit_parts_[i];
if (coord_id_ == part.ls_id_) {
epoch = part.exec_epoch_;
}
}
}
return epoch;
}
} // transaction
} // oceanbase
#undef USING_LOG_PREFIX

View File

@ -29,6 +29,7 @@
#include "ob_trans_hashmap.h"
#include "storage/tx/ob_trans_define.h"
#include "common/ob_simple_iterator.h"
#include "share/ob_common_id.h"
namespace oceanbase
{
@ -362,6 +363,54 @@ public:
const ObSArray<ObTransIDAndAddr> &get_conflict_txs() const { return cflict_txs_; }
};
class RollbackMaskSet
{
public:
RollbackMaskSet() : rollback_parts_(NULL) {}
int init(share::ObCommonID tx_msg_id, ObTxRollbackParts &parts) {
ObSpinLockGuard guard(lock_);
tx_msg_id_ = tx_msg_id;
rollback_parts_ = &parts;
return mask_set_.init(&parts);
}
int get_not_mask(ObTxRollbackParts &remain) {
ObSpinLockGuard guard(lock_);
return mask_set_.get_not_mask(remain);
}
bool is_mask(const ObTxExecPart &part) {
ObSpinLockGuard guard(lock_);
return mask_set_.is_mask(part);
}
int mask(const ObTxExecPart &part) {
ObSpinLockGuard guard(lock_);
return mask_set_.mask(part);
}
bool is_all_mask() {
ObSpinLockGuard guard(lock_);
return mask_set_.is_all_mask();
}
share::ObCommonID get_tx_msg_id() const {
return tx_msg_id_;
}
void reset() {
ObSpinLockGuard guard(lock_);
tx_msg_id_.reset();
rollback_parts_ = NULL;
mask_set_.reset();
}
int merge_part(const share::ObLSID add_ls_id,
const int64_t exec_epoch,
const int64_t transfer_epoch);
int find_part(const share::ObLSID ls_id,
const int64_t orig_epoch,
ObTxExecPart &part);
private:
ObSpinLock lock_;
share::ObCommonID tx_msg_id_;
ObTxRollbackParts *rollback_parts_;
common::ObMaskSet2<ObTxExecPart> mask_set_;
};
class ObTxDesc final : public ObTransHashLink<ObTxDesc>
{
static constexpr const char *OP_LABEL = "TX_DESC_VALUE";
@ -373,7 +422,6 @@ class ObTxDesc final : public ObTransHashLink<ObTxDesc>
friend class ObTxStmtInfo;
friend class IterateTxSchedulerFunctor;
friend class ObTxnFreeRouteCtx;
typedef common::ObMaskSet2<ObTxLSEpochPair> MaskSet;
OB_UNIS_VERSION(1);
protected:
uint64_t tenant_id_; // FIXME: removable
@ -495,7 +543,7 @@ protected:
// used during commit
share::ObLSID coord_id_; // coordinator ID
int64_t commit_expire_ts_; // commit operation deadline
share::ObLSArray commit_parts_; // participants to do commit
ObTxCommitParts commit_parts_; // participants to do commit
share::SCN commit_version_; // Tx commit version
int commit_out_; // the commit result
int commit_times_; // times of sent commit request
@ -510,7 +558,7 @@ private:
ObSpinLock commit_cb_lock_; // protect commit_cb_ field
ObITxCallback *commit_cb_; // async commit callback
int64_t exec_info_reap_ts_; // the time reaping incremental tx exec info
MaskSet brpc_mask_set_; // used in message driven savepoint rollback
RollbackMaskSet brpc_mask_set_; // used in message driven savepoint rollback
ObTransCond rpc_cond_; // used in message driven savepoint rollback
ObTxTimeoutTask commit_task_; // commit retry task
@ -629,7 +677,7 @@ public:
void set_with_temporary_table() { flags_.WITH_TEMP_TABLE_ = true; }
bool with_temporary_table() const { return flags_.WITH_TEMP_TABLE_; }
int64_t get_op_sn() const { return op_sn_; }
void inc_op_sn() { state_change_flags_.DYNAMIC_CHANGED_ = true; ++op_sn_; }
void inc_op_sn(const uint64_t num = 1) { state_change_flags_.DYNAMIC_CHANGED_ = true; ATOMIC_AAF(&op_sn_, num); }
share::SCN get_commit_version() const { return commit_version_; }
bool contain_savepoint(const ObString &sp);
bool is_tx_end() {
@ -733,6 +781,7 @@ LST_DO(DEF_FREE_ROUTE_DECODE, (;), static, dynamic, parts, extra);
void set_explicit() { flags_.EXPLICIT_ = true; }
void clear_interrupt() { flags_.INTERRUPTED_ = false; }
void mark_part_abort(const ObTransID tx_id, const int abort_cause);
int64_t get_coord_epoch() const;
ObTxSEQ get_and_inc_tx_seq(int16_t branch, int N) const;
ObTxSEQ inc_and_get_tx_seq(int16_t branch) const;
ObTxSEQ get_tx_seq(int64_t seq_abs = 0) const;
@ -742,6 +791,7 @@ LST_DO(DEF_FREE_ROUTE_DECODE, (;), static, dynamic, parts, extra);
typedef common::ObSimpleIterator<ObTxSchedulerStat,
ObModIds::OB_TRANS_VIRTUAL_TABLE_TRANS_STAT, 16> ObTxSchedulerStatIterator;
class ObTxDescMgr final
{
public:
@ -764,7 +814,6 @@ public:
int64_t get_alloc_count() const { return map_.alloc_cnt(); }
int64_t get_total_count() const { return map_.count(); }
int iterate_tx_scheduler_stat(ObTxSchedulerStatIterator &tx_scheduler_stat_iter);
private:
struct {
bool inited_: 1;
bool stoped_: 1;
@ -773,34 +822,34 @@ private:
{
public:
ObTxDescAlloc(): alloc_cnt_(0)
#ifndef NDEBUG
#ifndef NDEBUG
, lk_()
, list_()
#endif
{}
ObTxDesc* alloc_value()
{
ATOMIC_INC(&alloc_cnt_);
ObTxDesc *it = op_alloc(ObTxDesc);
#ifndef NDEBUG
#endif
{}
ObTxDesc* alloc_value()
{
ATOMIC_INC(&alloc_cnt_);
ObTxDesc *it = op_alloc(ObTxDesc);
#ifndef NDEBUG
ObSpinLockGuard guard(lk_);
list_.insert(it->alloc_link_);
#endif
#endif
return it;
}
void free_value(ObTxDesc *v)
{
if (NULL != v) {
ATOMIC_DEC(&alloc_cnt_);
#ifndef NDEBUG
#ifndef NDEBUG
ObSpinLockGuard guard(lk_);
v->alloc_link_.remove();
#endif
#endif
op_free(v);
}
}
int64_t get_alloc_cnt() const { return ATOMIC_LOAD(&alloc_cnt_); }
#ifndef NDEBUG
#ifndef NDEBUG
template<typename Function>
int for_each(Function &fn)
{
@ -814,13 +863,13 @@ private:
}
return ret;
}
#endif
private:
int64_t alloc_cnt_;
#ifndef NDEBUG
ObSpinLock lk_;
ObTxDesc::DLink list_;
#endif
#endif
private:
int64_t alloc_cnt_;
#ifndef NDEBUG
ObSpinLock lk_;
ObTxDesc::DLink list_;
#endif
};
ObTransHashMap<ObTransID, ObTxDesc, ObTxDescAlloc, common::SpinRWLock, 1 << 16 /*bucket_num*/> map_;
std::function<int(ObTransID&)> tx_id_allocator_;

View File

@ -31,6 +31,7 @@
#include "storage/tx/ob_trans_service.h"
#include "storage/tx/ob_keep_alive_ls_handler.h"
#include "storage/tx/ob_xa_service.h"
#include "storage/tablet/ob_tablet_transfer_tx_ctx.h"
namespace oceanbase
{
@ -357,6 +358,176 @@ private:
ObIArray<ObTxCommitCallback> &cb_array;
};
class TransferOutTxOpFunctor
{
public:
TransferOutTxOpFunctor(const int64_t abs_expired_time, int64_t except_tx_id, const SCN data_end_scn,
const SCN op_scn, NotifyType op_type, bool is_replay, ObLSID dest_ls_id, int64_t transfer_epoch)
: abs_expired_time_(abs_expired_time), except_tx_id_(except_tx_id), data_end_scn_(data_end_scn),
op_scn_(op_scn), op_type_(op_type), is_replay_(is_replay), dest_ls_id_(dest_ls_id),
transfer_epoch_(transfer_epoch), count_(0), op_tx_count_(0), ret_(OB_SUCCESS)
{
SET_EXPIRED_LIMIT(100 * 1000 /*100ms*/, 3 * 1000 * 1000 /*3s*/);
}
~TransferOutTxOpFunctor() { PRINT_FUNC_STAT; }
OPERATOR_V4(TransferOutTxOpFunctor)
{
bool bool_ret = false;
int ret = OB_SUCCESS;
if (!tx_id.is_valid() || OB_ISNULL(tx_ctx)) {
ret_ = ret = OB_INVALID_ARGUMENT;
TRANS_LOG(WARN, "invalid argument", K(tx_id), "ctx", OB_P(tx_ctx));
} else {
++count_;
if ((count_ % BATCH_CHECK_COUNT) == 0) {
const int64_t now = ObTimeUtility::current_time();
if (now >= abs_expired_time_) {
ret_ = ret = OB_TIMEOUT;
TRANS_LOG(WARN, "transfer block tx timeout", K(count_));
}
}
}
if (OB_FAIL(ret)) {
} else if (tx_id.get_id() == except_tx_id_) {
bool_ret = true;
} else {
bool is_operated = false;
if (OB_FAIL(tx_ctx->do_transfer_out_tx_op(data_end_scn_, op_scn_, op_type_, is_replay_,
dest_ls_id_, transfer_epoch_, is_operated))) {
TRANS_LOG(WARN, "do_transfer_out_tx_op failed", KR(ret), K(*tx_ctx));
ret_ = ret;
} else {
if (is_operated) {
op_tx_count_++;
}
bool_ret = true;
}
}
return bool_ret;
}
int get_ret() const { return ret_; }
int64_t get_count() const { return count_; }
int64_t get_op_tx_count() const { return op_tx_count_; }
private:
static const int64_t BATCH_CHECK_COUNT = 100;
int64_t abs_expired_time_;
int64_t except_tx_id_;
const SCN data_end_scn_;
const SCN op_scn_;
NotifyType op_type_;
bool is_replay_;
ObLSID dest_ls_id_;
int64_t transfer_epoch_;
int64_t count_;
int64_t op_tx_count_;
int ret_;
};
class WaitTxWriteEndFunctor
{
public:
WaitTxWriteEndFunctor(const int64_t abs_expired_time)
: abs_expired_time_(abs_expired_time), count_(0), ret_(OB_SUCCESS)
{
SET_EXPIRED_LIMIT(100 * 1000 /*100ms*/, 3 * 1000 * 1000 /*3s*/);
}
~WaitTxWriteEndFunctor() { PRINT_FUNC_STAT; }
OPERATOR_V4(WaitTxWriteEndFunctor)
{
bool bool_ret = false;
int ret = OB_SUCCESS;
if (!tx_id.is_valid() || OB_ISNULL(tx_ctx)) {
ret_ = ret = OB_INVALID_ARGUMENT;
TRANS_LOG(WARN, "invalid argument", K(tx_id), "ctx", OB_P(tx_ctx));
} else {
++count_;
if ((count_ % BATCH_CHECK_COUNT) == 0) {
const int64_t now = ObTimeUtility::current_time();
if (now >= abs_expired_time_) {
ret_ = ret = OB_TIMEOUT;
TRANS_LOG(WARN, "wait tx write end timeout", K(count_));
}
}
}
if (OB_FAIL(ret)) {
} else {
if (OB_FAIL(tx_ctx->wait_tx_write_end())) {
TRANS_LOG(WARN, "wait tx write end failed", KR(ret), K(*tx_ctx));
ret_ = ret;
} else {
bool_ret = true;
}
}
return bool_ret;
}
int get_ret() const { return ret_; }
int64_t get_count() const { return count_; }
private:
static const int64_t BATCH_CHECK_COUNT = 100;
int64_t abs_expired_time_;
int64_t count_;
int ret_;
};
class CollectTxCtxFunctor
{
public:
CollectTxCtxFunctor(const int64_t abs_expired_time,
share::ObLSID dest_ls_id,
SCN log_scn,
const ObIArray<common::ObTabletID> &tablet_list,
int64_t &tx_count,
int64_t &collect_count,
ObIArray<ObTxCtxMoveArg> &res)
: abs_expired_time_(abs_expired_time), dest_ls_id_(dest_ls_id), log_scn_(log_scn),
tablet_list_(tablet_list), tx_count_(tx_count), collect_count_(collect_count), res_(res), ret_(OB_SUCCESS)
{
SET_EXPIRED_LIMIT(100 * 1000 /*100ms*/, 3 * 1000 * 1000 /*3s*/);
}
~CollectTxCtxFunctor() { PRINT_FUNC_STAT; }
OPERATOR_V4(CollectTxCtxFunctor)
{
bool bool_ret = false;
int ret = OB_SUCCESS;
if (!tx_id.is_valid() || OB_ISNULL(tx_ctx)) {
ret_ = ret = OB_INVALID_ARGUMENT;
TRANS_LOG(WARN, "invalid argument", K(tx_id), "ctx", OB_P(tx_ctx));
} else {
++tx_count_;
ObTxCtxMoveArg arg;
bool is_collected = false;
if (OB_FAIL(tx_ctx->collect_tx_ctx(dest_ls_id_, log_scn_, tablet_list_, arg, is_collected))) {
TRANS_LOG(WARN, "collect_tx_ctx", KR(ret), K(*tx_ctx));
ret_ = ret;
} else if (is_collected && OB_FAIL(res_.push_back(arg))) {
TRANS_LOG(WARN, "push arg to array fail", KR(ret));
ret_ = ret;
} else {
bool_ret = true;
if (is_collected) {
collect_count_++;
}
}
}
return bool_ret;
}
int get_ret() const { return ret_; }
int64_t get_tx_count() const { return tx_count_; }
int64_t get_collect_count() const { return collect_count_; }
private:
static const int64_t BATCH_CHECK_COUNT = 100;
int64_t abs_expired_time_;
share::ObLSID dest_ls_id_;
SCN log_scn_;
const ObIArray<common::ObTabletID> &tablet_list_;
int64_t &tx_count_;
int64_t &collect_count_;
ObIArray<ObTxCtxMoveArg> &res_;
int ret_;
};
class StopLSFunctor
{
public:
@ -785,6 +956,8 @@ public:
}
if (OB_SUCC(ret)) {
share::ObLSArray participants_arr;
ObTxData *tx_data = NULL;
tx_ctx->ctx_tx_data_.get_tx_data_ptr(tx_data);
if (OB_FAIL(tx_ctx->get_2pc_participants_copy(participants_arr))) {
TRANS_LOG_RET(WARN, ret, "ObTxStat get participants copy error", K(ret));
} else if (OB_FAIL(tx_stat.init(tx_ctx->addr_,
@ -810,7 +983,11 @@ public:
tx_ctx->is_exiting_,
tx_ctx->exec_info_.xid_,
tx_ctx->exec_info_.upstream_,
tx_ctx->last_request_ts_))) {
tx_ctx->last_request_ts_,
OB_NOT_NULL(tx_data) ? tx_data->start_scn_.atomic_load() : SCN::invalid_scn(),
OB_NOT_NULL(tx_data) ? tx_data->end_scn_.atomic_load() : SCN::invalid_scn(),
tx_ctx->get_rec_log_ts_(),
tx_ctx->sub_state_.is_transfer_blocking()))) {
TRANS_LOG_RET(WARN, ret, "ObTxStat init error", K(ret), KPC(tx_ctx));
} else if (OB_FAIL(tx_stat_iter_.push(tx_stat))) {
TRANS_LOG_RET(WARN, ret, "ObTxStatIterator push trans stat error", K(ret));
@ -860,7 +1037,6 @@ public:
TRANS_LOG(WARN, "invalid argument", K(tx_id), "ctx", OB_P(tx_ctx));
ret = OB_INVALID_ARGUMENT;
} else {
ObTxCtxTableInfo ctx_info;
rec_log_ts_ = share::SCN::min(rec_log_ts_, tx_ctx->get_rec_log_ts());
}
if (OB_SUCCESS == ret) {

File diff suppressed because it is too large Load Diff

View File

@ -86,6 +86,62 @@ const static int64_t OB_TX_MAX_LOG_CBS = 15;
const static int64_t PREALLOC_LOG_CALLBACK_COUNT = 3;
const static int64_t RESERVE_LOG_CALLBACK_COUNT_FOR_FREEZING = 1;
template<typename T, typename fn>
int64_t search(const ObIArray<T> &array, fn &equal_func)
{
int ret = OB_SUCCESS;
int64_t search_index = -1;
ARRAY_FOREACH_X(array, i, cnt, search_index == -1) {
if (equal_func(array.at(i))) {
search_index = i;
}
}
return search_index;
}
template<typename T>
class EqualToTransferPartFunctor
{
public:
EqualToTransferPartFunctor(const ObStandbyCheckInfo &tmp_info) :
tmp_info_(tmp_info)
{}
bool operator()(const T& item) {
bool bool_ret = false;
if (item.check_info_ == tmp_info_) {
bool_ret = true;
}
return bool_ret;
}
private:
const ObStandbyCheckInfo &tmp_info_;
};
template<typename T>
class EqualToStateInfoFunctor
{
public:
EqualToStateInfoFunctor(const T &tmp_info) :
tmp_info_(tmp_info)
{}
bool operator()(const T& item) {
bool bool_ret = false;
if (tmp_info_.ls_id_ == item.ls_id_) {
if (tmp_info_.check_info_.is_valid()) {
if (tmp_info_.check_info_ == item.check_info_) {
bool_ret = true;
}
} else { // for old version msg compat
bool_ret = true;
}
}
return bool_ret;
}
private:
const T &tmp_info_;
};
// participant transaction context
class ObPartTransCtx : public ObTransCtx,
public ObTsCbTask,
@ -109,7 +165,8 @@ public:
role_state_(TxCtxRoleState::FOLLOWER),
coord_prepare_info_arr_(OB_MALLOC_NORMAL_BLOCK_SIZE,
ModulePageAllocator(reserve_allocator_, "PREPARE_INFO")),
standby_part_collected_(), ask_state_info_interval_(100 * 1000), refresh_state_info_interval_(100 * 1000)
standby_part_collected_(), ask_state_info_interval_(100 * 1000), refresh_state_info_interval_(100 * 1000),
transfer_deleted_(false)
{ /*reset();*/ }
~ObPartTransCtx() { destroy(); }
void destroy();
@ -124,7 +181,8 @@ public:
const uint64_t cluster_id,
const int64_t epoch,
ObLSTxCtxMgr *ls_ctx_mgr,
const bool for_replay);
const bool for_replay,
ObXATransID xid);
void reset() { }
int construct_context(const ObTransMsg &msg);
public:
@ -136,7 +194,7 @@ public:
*/
int kill(const KillTransArg &arg, ObIArray<ObTxCommitCallback> &cb_array);
memtable::ObMemtableCtx *get_memtable_ctx() { return &mt_ctx_; }
int commit(const share::ObLSArray &parts,
int commit(const ObTxCommitParts &parts,
const MonotonicTs &commit_time,
const int64_t &expire_ts,
const common::ObString &app_trace_info,
@ -153,7 +211,7 @@ public:
uint64_t get_tenant_id() const { return tenant_id_; }
int64_t get_role_state() const { return role_state_; }
// for xa
int sub_prepare(const share::ObLSArray &parts,
int sub_prepare(const ObTxCommitParts &parts,
const MonotonicTs &commit_time,
const int64_t &expire_ts,
const common::ObString &app_trace_info,
@ -165,7 +223,7 @@ public:
const bool is_rollback);
int dump_2_text(FILE *fd);
int init_for_transfer_move(const ObTxCtxMoveArg &arg);
public:
int replay_start_working_log(const share::SCN start_working_ts);
int set_trans_app_trace_id_str(const ObString &app_trace_id_str);
@ -212,11 +270,10 @@ public:
int check_for_standby(const share::SCN &snapshot,
bool &can_read,
share::SCN &trans_version,
bool &is_determined_state);
int handle_trans_ask_state(const share::SCN &snapshot, ObAskStateRespMsg &resp);
share::SCN &trans_version);
int handle_trans_ask_state(const ObAskStateMsg &req, ObAskStateRespMsg &resp);
int handle_trans_ask_state_resp(const ObAskStateRespMsg &msg);
int handle_trans_collect_state(ObStateInfo &state_info, const SCN &snapshot);
int handle_trans_collect_state(ObCollectStateRespMsg &resp, const ObCollectStateMsg &req);
int handle_trans_collect_state_resp(const ObCollectStateRespMsg &msg);
// tx state check for 4377
@ -241,6 +298,7 @@ private:
K(start_replay_ts_),
K(start_recover_ts_),
K(is_incomplete_replay_ctx_),
K(epoch_),
K(mt_ctx_),
K(coord_prepare_info_arr_),
K_(upstream_state),
@ -294,6 +352,7 @@ private:
int common_on_success_(ObTxLogCb * log_cb);
int on_success_ops_(ObTxLogCb * log_cb);
void check_and_register_timeout_task_();
int recover_ls_transfer_status_();
// bool need_commit_barrier();
@ -631,7 +690,7 @@ protected:
private:
int apply_2pc_msg_(const ObTwoPhaseCommitMsgType msg_type);
int set_2pc_upstream_(const share::ObLSID&upstream);
int set_2pc_participants_(const share::ObLSArray &participants);
int set_2pc_participants_(const ObTxCommitParts &participants);
int set_2pc_incremental_participants_(const share::ObLSArray &participants);
int set_2pc_request_id_(const int64_t request_id);
int update_2pc_prepare_version_(const share::SCN &prepare_version);
@ -668,6 +727,41 @@ private:
int post_tx_sub_prepare_resp_(const int status);
int post_tx_sub_commit_resp_(const int status);
int post_tx_sub_rollback_resp_(const int status);
int submit_log_if_allow(const char *buf,
const int64_t size,
const share::SCN &base_ts,
ObTxBaseLogCb *cb,
const bool need_nonblock,
const ObTxCbArgArray &cb_arg_array);
virtual bool is_2pc_blocking() const override {
return sub_state_.is_transfer_blocking();
}
// ======================= for transfer ===============================
public:
int do_transfer_out_tx_op(const share::SCN data_end_scn,
const share::SCN op_scn,
const NotifyType op_type,
const bool is_replay,
const ObLSID dest_ls_id,
const int64_t transfer_epoch,
bool &is_operated);
int collect_tx_ctx(const share::ObLSID dest_ls_id,
const SCN data_end_scn,
const ObIArray<ObTabletID> &tablet_list,
ObTxCtxMoveArg &arg,
bool &is_collected);
int wait_tx_write_end();
int move_tx_op(const ObTransferMoveTxParam &move_tx_param,
const ObTxCtxMoveArg &arg,
const bool is_new_created);
bool is_exec_complete(ObLSID ls_id, int64_t epoch, int64_t transfer_epoch);
bool is_exec_complete_without_lock(ObLSID ls_id, int64_t epoch, int64_t transfer_epoch);
private:
int transfer_op_log_cb_(share::SCN op_scn, NotifyType op_type);
int update_tx_data_end_scn_(const share::SCN end_scn, const share::SCN transfer_scn);
protected:
virtual int post_msg_(const share::ObLSID&receiver, ObTxMsg &msg);
virtual int post_msg_(const ObAddr &server, ObTxMsg &msg);
@ -678,10 +772,7 @@ private:
// ========================== TX COMMITTER BEGIN ==========================
protected:
virtual Ob2PCRole get_2pc_role() const override;
virtual int64_t get_downstream_size() const override
{
return exec_info_.participants_.count();
};
virtual int64_t get_downstream_size() const override;
virtual int64_t get_self_id();
virtual bool is_2pc_logging() const override;
@ -740,11 +831,12 @@ public:
* end_access - end of txn protected resources access
*/
int end_access();
int rollback_to_savepoint(const int64_t op_sn, const ObTxSEQ from_scn, const ObTxSEQ to_scn);
int rollback_to_savepoint(const int64_t op_sn, const ObTxSEQ from_scn, const ObTxSEQ to_scn, ObIArray<ObTxLSEpochPair> &downstream_parts);
int set_block_frozen_memtable(memtable::ObMemtable *memtable);
void clear_block_frozen_memtable();
bool is_logging_blocked();
bool is_xa_trans() const { return !exec_info_.xid_.empty(); }
bool is_transfer_deleted() const { return transfer_deleted_; }
private:
int check_status_();
int tx_keepalive_response_(const int64_t status);
@ -753,19 +845,46 @@ private:
int rollback_to_savepoint_(const ObTxSEQ from_scn, const ObTxSEQ to_scn);
int submit_rollback_to_log_(const ObTxSEQ from_scn, const ObTxSEQ to_scn, ObTxData *tx_data);
int set_state_info_array_();
int update_state_info_array_(const ObStateInfo& state_info);
int update_state_info_array_with_transfer_parts_(const ObTxCommitParts &parts, const ObLSID &ls_id);
void build_and_post_collect_state_msg_(const share::SCN &snapshot);
int build_and_post_ask_state_msg_(const share::SCN &snapshot);
void handle_trans_ask_state_(const SCN &snapshot);
int check_ls_state_(const SCN &snapshot, const ObLSID &ls_id);
int build_and_post_ask_state_msg_(const share::SCN &snapshot,
const share::ObLSID &ori_ls_id, const ObAddr &ori_addr);
int check_ls_state_(const SCN &snapshot, const ObLSID &ls_id, const ObStandbyCheckInfo &check_info);
int get_ls_replica_readable_scn_(const ObLSID &ls_id, SCN &snapshot_version);
int check_and_submit_redo_log_(bool &try_submit);
int submit_redo_log_for_freeze_(bool &try_submit);
void print_first_mvcc_callback_();
int assign_commit_parts(const share::ObLSArray &log_participants,
const ObTxCommitParts &log_commit_parts);
protected:
// for xa
virtual bool is_sub2pc() const override
{ return exec_info_.is_sub2pc_; }
// =========================== TREE COMMITTER START ===========================
public:
// merge the intermediate_participants into participants during 2pc state transfer
virtual int merge_intermediate_participants() override;
// is_real_upstream presents whether we are handling requests from the real
// upstream:
// - If the sender equals to the upstream, it means we that are handling the
// real leader and we need collect all responses from the downstream before
// responsing to the upstream
// - If the sender is different from the upstream, it means we are handling
// requests from the upstream other than the real upstream. To prevent from
// the deadlock in the cycle commit, we only need consider the situation of
// myself before responsing to the upstream
// - It may be no sender during handle_timeout, it means we are retransmitting
// the requests and responses, so we only need pay attention to the upstream
// and all downstreams for retransmitting
virtual bool is_real_upstream() override;
// add_intermediate_participants means add participant into intermediate_participants,
// which is important to ensure the consistency of participants during tree commit
int add_intermediate_participants(const ObLSID ls_id, int64_t transfer_epoch);
private:
bool is_real_upstream_(const ObLSID upstream);
private:
DISALLOW_COPY_AND_ASSIGN(ObPartTransCtx);
private:
@ -878,6 +997,9 @@ private:
// this is a tempoary variable which is set to now by default
// therefore, if a follower switchs to leader, the variable is set to now
int64_t last_request_ts_;
// for transfer move tx ctx to clean for abort
bool transfer_deleted_;
// ========================================================
};

View File

@ -30,7 +30,7 @@ using namespace share;
namespace obrpc
{
OB_SERIALIZE_MEMBER(ObTransRpcResult, status_, send_timestamp_, private_data_);
OB_SERIALIZE_MEMBER(ObTxRpcRollbackSPResult, status_, send_timestamp_, addr_, born_epoch_, ignore_);
OB_SERIALIZE_MEMBER(ObTxRpcRollbackSPResult, status_, send_timestamp_, addr_, born_epoch_, ignore_, downstream_parts_);
bool need_refresh_location_cache_(const int ret)
{
@ -79,7 +79,8 @@ int handle_sp_rollback_resp(const share::ObLSID &receiver_ls_id,
return OB_SUCCESS;
}
return MTL(ObTransService *)->handle_sp_rollback_resp(receiver_ls_id,
epoch, tx_id, status, request_id, result.born_epoch_, result.addr_);
epoch, tx_id, status, request_id, result.born_epoch_, result.addr_,
result.downstream_parts_);
}
void ObTransRpcResult::reset()

View File

@ -78,9 +78,10 @@ public:
// rollback response has changed to use ObTxRollbackSPRespMsg
// use this field to indicate handler ignore handle by this msg
bool ignore_;
ObSEArray<transaction::ObTxLSEpochPair, 1> downstream_parts_;
public:
int get_status() const { return status_; }
TO_STRING_KV(K_(status), K_(send_timestamp), K_(born_epoch), K_(addr), K_(ignore));
TO_STRING_KV(K_(status), K_(send_timestamp), K_(born_epoch), K_(addr), K_(ignore), K_(downstream_parts));
};
class ObTransRpcProxy : public obrpc::ObRpcProxy

View File

@ -161,6 +161,8 @@ int ObTransService::init(const ObAddr &self,
&dup_table_scan_timer_,
&dup_table_loop_worker_))) {
TRANS_LOG(WARN, "init dup_tablet_scan_task_ failed",K(ret));
} else if (OB_FAIL(rollback_sp_msg_mgr_.init(lib::ObMemAttr(tenant_id, "RollbackSPMgr")))) {
TRANS_LOG(WARN, "init rollback msg map failed", KR(ret));
} else {
self_ = self;
tenant_id_ = tenant_id;
@ -172,6 +174,7 @@ int ObTransService::init(const ObAddr &self,
schema_service_ = schema_service;
ts_mgr_ = ts_mgr;
server_tracer_ = server_tracer;
rollback_sp_msg_sequence_ = ObTimeUtil::current_time();
is_inited_ = true;
TRANS_LOG(INFO, "transaction service inited success", KP(this), K(tenant_memory_limit));
}

View File

@ -146,6 +146,44 @@ public:
ObThreadLocalTransCtxState state_;
} CACHE_ALIGNED;
class ObRollbackSPMsgGuard final : public ObTransHashLink<ObRollbackSPMsgGuard>
{
public:
ObRollbackSPMsgGuard(ObCommonID tx_msg_id, ObTxDesc &tx_desc, ObTxDescMgr &tx_desc_mgr)
: tx_msg_id_(tx_msg_id), tx_desc_(tx_desc), tx_desc_mgr_(tx_desc_mgr) {
tx_desc_.inc_ref(1);
}
~ObRollbackSPMsgGuard() {
if (0 == tx_desc_.dec_ref(1)) {
tx_desc_mgr_.free(&tx_desc_);
}
tx_msg_id_.reset();
}
ObTxDesc &get_tx_desc() { return tx_desc_; }
bool contain(ObCommonID tx_msg_id) { return tx_msg_id == tx_msg_id_; }
private:
ObCommonID tx_msg_id_;
ObTxDesc &tx_desc_;
ObTxDescMgr &tx_desc_mgr_;
};
class ObRollbackSPMsgGuardAlloc
{
public:
static ObRollbackSPMsgGuard* alloc_value()
{
return (ObRollbackSPMsgGuard*)ob_malloc(sizeof(ObRollbackSPMsgGuard), "RollbackSPMsg");
}
static void free_value(ObRollbackSPMsgGuard *p)
{
if (NULL != p) {
p->~ObRollbackSPMsgGuard();
ob_free(p);
p = NULL;
}
}
};
class ObTransService : public common::ObSimpleThreadPool
{
public:
@ -225,6 +263,7 @@ private:
const int64_t stmt_expired_time,
const uint64_t tenant_id);
int handle_batch_msg_(const int type, const char *buf, const int32_t size);
int64_t fetch_rollback_sp_sequence_() { return ATOMIC_AAF(&rollback_sp_msg_sequence_, 1); }
public:
int check_dup_table_ls_readable();
int check_dup_table_tablet_readable();
@ -296,6 +335,10 @@ private:
obrpc::ObSrvRpcProxy *rpc_proxy_;
ObTxELRUtil elr_util_;
// for rollback-savepoint request-id
int64_t rollback_sp_msg_sequence_;
// for rollback-savepoint msg resp callback to find tx_desc
ObTransHashMap<ObCommonID, ObRollbackSPMsgGuard, ObRollbackSPMsgGuardAlloc, common::SpinRWLock, 1 << 16 /*bucket_num*/> rollback_sp_msg_mgr_;
private:
DISALLOW_COPY_AND_ASSIGN(ObTransService);
};

View File

@ -209,6 +209,7 @@ int ObTransService::do_commit_tx_(ObTxDesc &tx,
tx.trace_info_.get_app_trace_info(),
tx.op_sn_,
SCN::max_scn(),
tx.get_coord_epoch(),
commit_version,
self_))
|| !commit_need_retry_(ret))) {
@ -650,7 +651,9 @@ int ObTransService::decide_tx_commit_info_(ObTxDesc &tx, ObTxPart *&coord)
ARRAY_FOREACH(parts, i) {
if (parts[i].is_without_ctx()) {
// skip participant, without ctx created
} else if (OB_FAIL(tx.commit_parts_.push_back(parts[i].id_))) {
} else if (OB_FAIL(tx.commit_parts_.push_back(ObTxExecPart(parts[i].id_,
parts[i].epoch_,
-1)))) {
TRANS_LOG(WARN, "part id push fail", K(ret), K(tx));
} else if (!tx.coord_id_.is_valid() && parts[i].addr_ == self_) {
tx.coord_id_ = parts[i].id_;
@ -737,8 +740,9 @@ int ObTransService::build_tx_sub_prepare_msg_(const ObTxDesc &tx, ObTxSubPrepare
msg.cluster_id_ = tx.cluster_id_;
msg.request_id_ = tx.op_sn_;
msg.xid_ = tx.xid_;
if (OB_FAIL(msg.parts_.assign(tx.commit_parts_))) {
TRANS_LOG(WARN, "fail to assign parts", K(ret), K(tx));
CONVERT_COMMIT_PARTS_TO_PARTS(tx.commit_parts_, msg.parts_);
if (FAILEDx(msg.commit_parts_.assign(tx.commit_parts_))) {
TRANS_LOG(WARN, "assign commit parts fail", K(ret), K(tx));
}
return ret;
}
@ -1120,6 +1124,21 @@ int ObTransService::get_write_store_ctx(ObTxDesc &tx,
TRANS_LOG(WARN, "acquire tx ctx fail", K(ret), K(tx), K(ls_id), KPC(this));
} else if (OB_FAIL(tx_ctx->start_access(tx, data_scn))) {
TRANS_LOG(WARN, "tx ctx start access fail", K(ret), K(tx_ctx), K(ls_id), KPC(this));
// when transfer move_tx phase we put src_ls tx_ctx into dest_ls ctx_mgr when transfer abort we need remove it
// when access tx_ctx first get ctx from mgr, second increase pending_write
// so we need to check transfer_removing to retry create new ctx
if (OB_NEED_RETRY == ret && tx_ctx->is_transfer_deleted()) {
ret = OB_SUCCESS;
revert_tx_ctx_(store_ctx.ls_, tx_ctx);
ob_usleep(10 * 1000);
if (OB_FAIL(acquire_tx_ctx(ls_id, tx, tx_ctx, store_ctx.ls_, special))) {
TRANS_LOG(WARN, "acquire tx ctx fail", K(ret), K(tx), K(ls_id), KPC(this));
} else if (OB_FAIL(tx_ctx->start_access(tx, data_scn))) {
TRANS_LOG(WARN, "tx ctx start access fail", K(ret), K(tx_ctx), K(ls_id), KPC(this));
}
}
}
if (OB_FAIL(ret)) {
} else if (FALSE_IT(access_started = true)) {
} else if (OB_FAIL(get_tx_table_guard_(store_ctx.ls_, ls_id, tx_table_guard))) {
TRANS_LOG(WARN, "acquire tx table guard fail", K(ret), K(tx), K(ls_id), KPC(this));
@ -1257,15 +1276,14 @@ int ObTransService::create_tx_ctx_(const share::ObLSID &ls_id,
tx.sess_id_, /*session_id*/
tx.addr_,
tx.get_expire_ts(),
this);
this,
tx.xid_);
ret = OB_NOT_NULL(ls) ?
ls->create_tx_ctx(arg, existed, ctx) :
tx_ctx_mgr_.create_tx_ctx(arg, existed, ctx);
if (OB_FAIL(ret)) {
TRANS_LOG(WARN, "get tx ctx from mgr fail", K(ret), K(tx.tx_id_), K(ls_id), K(tx), K(arg));
ctx = NULL;
} else if (!tx.xid_.empty() && !existed) {
ctx->exec_info_.xid_ = tx.xid_;
}
TRANS_LOG(TRACE, "create tx ctx", K(ret), K(ls_id), K(tx));
return ret;
@ -1570,9 +1588,11 @@ int ObTransService::build_tx_commit_msg_(const ObTxDesc &tx, ObTxCommitMsg &msg)
msg.cluster_id_ = tx.cluster_id_;
msg.app_trace_info_ = tx.trace_info_.get_app_trace_info();
msg.request_id_ = tx.op_sn_;
msg.epoch_ = tx.get_coord_epoch();
msg.commit_start_scn_ = tx.commit_start_scn_;
if (OB_FAIL(msg.parts_.assign(tx.commit_parts_))) {
TRANS_LOG(WARN, "assign parts fail", K(ret), K(tx));
CONVERT_COMMIT_PARTS_TO_PARTS(tx.commit_parts_, msg.parts_);
if (FAILEDx(msg.commit_parts_.assign(tx.commit_parts_))) {
TRANS_LOG(WARN, "assign part epochs fail", K(ret), K(tx));
}
return ret;
}
@ -1783,7 +1803,7 @@ int ObTransService::acquire_global_snapshot__(const int64_t expire_ts,
int ObTransService::batch_post_rollback_savepoint_msg_(ObTxDesc &tx,
ObTxRollbackSPMsg &msg,
const ObIArray<ObTxLSEpochPair> &list,
const ObTxRollbackParts &list,
int &post_succ_num)
{
int ret = OB_SUCCESS;
@ -1791,13 +1811,16 @@ int ObTransService::batch_post_rollback_savepoint_msg_(ObTxDesc &tx,
post_succ_num = 0;
const ObTxDesc *msg_tx_ptr = msg.tx_ptr_;
ARRAY_FOREACH_NORET(list, idx) {
const ObTxLSEpochPair &p = list.at(idx);
msg.receiver_ = p.left_;
msg.epoch_ = p.right_;
const ObTxExecPart &p = list.at(idx);
msg.receiver_ = p.ls_id_;
msg.epoch_ = p.exec_epoch_;
if (msg.epoch_ > 0) {
msg.tx_ptr_ = NULL;
}
if (OB_FAIL(rpc_->post_msg(p.left_, msg))) {
if (p.exec_epoch_ <= 0 && p.transfer_epoch_ > 0) {
msg.set_for_transfer();
}
if (OB_FAIL(rpc_->post_msg(msg.receiver_, msg))) {
if (OB_LS_IS_DELETED == ret) {
ObSpinLockGuard lock(tx.lock_);
ObAddr fake_addr;
@ -1880,15 +1903,22 @@ int ObTransService::handle_trans_commit_request(ObTxCommitMsg &msg,
{
int ret = OB_SUCCESS;
SCN commit_version;
ret = local_ls_commit_tx_(msg.tx_id_,
msg.receiver_,
msg.parts_,
msg.expire_ts_,
msg.app_trace_info_,
msg.request_id_,
msg.commit_start_scn_,
commit_version,
msg.sender_addr_);
if (msg.commit_parts_.count() == 0) {
// for compatible
CONVERT_PARTS_TO_COMMIT_PARTS(msg.parts_, msg.commit_parts_);
}
if (OB_SUCC(ret)) {
ret = local_ls_commit_tx_(msg.tx_id_,
msg.receiver_,
msg.commit_parts_,
msg.expire_ts_,
msg.app_trace_info_,
msg.request_id_,
msg.commit_start_scn_,
msg.epoch_,
commit_version,
msg.sender_addr_);
}
result.reset();
result.init(ret, msg.get_timestamp());
result.private_data_ = commit_version;
@ -1904,11 +1934,12 @@ int ObTransService::handle_trans_commit_request(ObTxCommitMsg &msg,
int ObTransService::local_ls_commit_tx_(const ObTransID &tx_id,
const share::ObLSID &coord,
const share::ObLSArray &parts,
const ObTxCommitParts &parts,
const int64_t &expire_ts,
const common::ObString &app_trace_info,
const int64_t &request_id,
const SCN commit_start_scn,
const int64_t epoch,
SCN &commit_version,
const common::ObAddr &caller)
{
@ -1962,6 +1993,9 @@ int ObTransService::local_ls_commit_tx_(const ObTransID &tx_id,
} else if (ctx->get_scheduler() != caller) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "receive commit from not scheduler", K(ret), K(caller), K(ctx->get_scheduler()));
} else if (!ctx->is_exec_complete(coord, epoch, -1 /*transfer_epoch*/)) {
ret = OB_TRANS_CTX_NOT_EXIST;
TRANS_LOG(WARN, "tx exec not complete", K(ret));
} else if (OB_FAIL(ctx->commit(parts, commit_time, expire_ts, app_trace_info, request_id))) {
TRANS_LOG(WARN, "commit fail", K(ret), K(coord), K(tx_id));
}
@ -2024,7 +2058,10 @@ int ObTransService::handle_sp_rollback_request(ObTxRollbackSPMsg &msg,
msg.op_sn_,
msg.savepoint_,
ctx_born_epoch,
msg.tx_ptr_);
msg.tx_ptr_,
msg.for_transfer(),
msg.specified_from_scn_,
result.downstream_parts_);
if (msg.use_async_resp()) {
ObTxRollbackSPRespMsg resp;
resp.cluster_version_ = msg.cluster_version_;
@ -2039,7 +2076,9 @@ int ObTransService::handle_sp_rollback_request(ObTxRollbackSPMsg &msg,
resp.orig_epoch_ = msg.epoch_,
resp.epoch_ = ctx_born_epoch;
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(rpc_->post_msg(msg.sender_addr_, resp))) {
if (OB_TMP_FAIL(resp.downstream_parts_.assign(result.downstream_parts_))) {
TRANS_LOG(WARN, "parts assign failed", K(tmp_ret), K(resp));
} else if (OB_TMP_FAIL(rpc_->post_msg(msg.sender_addr_, resp))) {
TRANS_LOG(WARN, "pos rollback sp resp fail", K(tmp_ret), K(resp));
}
}
@ -2068,7 +2107,8 @@ int ObTransService::handle_sp_rollback_response(ObTxRollbackSPRespMsg &msg,
msg.ret_,
msg.request_id_,
msg.epoch_,
msg.sender_addr_);
msg.sender_addr_,
msg.downstream_parts_);
result.reset();
result.init(ret, msg.get_timestamp());
return ret;
@ -2143,6 +2183,15 @@ int ObTransService::handle_tx_batch_req(int msg_type,
ret = OB_TRANS_CTX_NOT_EXIST; \
TRANS_LOG(INFO, "tx context is exiting",K(ret),K(msg)); \
(void)handle_orphan_2pc_msg_(msg, false, false); \
} else if (ctx->is_2pc_blocking()) { \
ret = OB_NEED_RETRY; \
TRANS_LOG(WARN, "ctx 2pc is blocking", K(ret), K(msg)); \
} else if ((msg_type == TX_2PC_PREPARE_REDO_REQ || \
msg_type == TX_2PC_PREPARE_REQ) && \
!ctx->is_exec_complete(msg.sender_, msg.epoch_, msg.transfer_epoch_)) { \
ret = OB_TRANS_CTX_NOT_EXIST; \
TRANS_LOG(WARN, "tx exec not complete",K(ret), K(msg)); \
(void)handle_orphan_2pc_msg_(msg, false, false); \
} else if (OB_FAIL(ctx->msg_handler__(msg))) { \
TRANS_LOG(WARN, "handle 2pc request fail", K(ret), K(msg)); \
} \
@ -2191,7 +2240,7 @@ bool ObTransService::common_retryable_error_(const int ret) {
);
}
void ObTransService::on_sp_rollback_succ_(const ObTxLSEpochPair &part,
void ObTransService::on_sp_rollback_succ_(const ObTxExecPart &part,
ObTxDesc &tx,
const int64_t born_epoch,
const ObAddr &addr)
@ -2199,26 +2248,49 @@ void ObTransService::on_sp_rollback_succ_(const ObTxLSEpochPair &part,
if (tx.brpc_mask_set_.is_mask(part)) {
TRANS_LOG(DEBUG, "has marked received", K(part));
} else {
if (part.right_ <= 0) {
tx.update_clean_part(part.left_, born_epoch, addr);
if (part.exec_epoch_ <= 0 && part.transfer_epoch_ <= 0) {
tx.update_clean_part(part.ls_id_, born_epoch, addr);
}
(void)tx.brpc_mask_set_.mask(part);
}
}
int ObTransService::merge_rollback_downstream_parts_(ObTxDesc &tx, const ObIArray<ObTxLSEpochPair> &downstream_parts)
{
int ret = OB_SUCCESS;
for (int64_t idx = 0; OB_SUCC(ret) && idx < downstream_parts.count(); idx++) {
ObLSID add_ls_id = downstream_parts.at(idx).left_;
if (OB_FAIL(tx.brpc_mask_set_.merge_part(add_ls_id, 0, downstream_parts.at(idx).right_))) {
TRANS_LOG(WARN, "merge part failed", KR(ret), K(tx.tx_id_), K(add_ls_id));
} else {
TRANS_LOG(INFO, "merge rollback parts", K(tx.tx_id_), K(add_ls_id));
}
}
return ret;
}
int ObTransService::handle_sp_rollback_resp(const share::ObLSID &ls_id,
const int64_t orig_epoch,
const transaction::ObTransID &tx_id,
const int status,
const int64_t request_id,
const int64_t ret_epoch,
const ObAddr &ret_addr)
const ObAddr &ret_addr,
const ObIArray<ObTxLSEpochPair> &downstream_parts)
{
int ret = OB_SUCCESS;
TRANS_LOG(INFO, "handle_sp_rollback_resp", K(tx_id), K(ls_id), K(status), K(downstream_parts));
ObRollbackSPMsgGuard *rollback_sp_msg_guard = NULL;
ObTxDesc *tx = NULL;
if (OB_FAIL(tx_desc_mgr_.get(tx_id, tx))) {
// find tx_msg by request_id
ObCommonID msg_id(request_id);
if (request_id <= 0) {
ret = OB_INVALID_ARGUMENT;
TRANS_LOG(WARN, "rollback sp resp request_id is invalid", KR(ret), K(tx_id), K(request_id));
} else if (OB_FAIL(rollback_sp_msg_mgr_.get(msg_id, rollback_sp_msg_guard))) {
TRANS_LOG(WARN, "get trans_desc fail", K(ret), K(tx_id));
} else if (tx->op_sn_ > request_id || tx->tx_id_ != tx_id || tx->state_ != ObTxDesc::State::ROLLBACK_SAVEPOINT) { // fast fail
} else if (FALSE_IT(tx = &rollback_sp_msg_guard->get_tx_desc())) {
} else if (tx->tx_id_ != tx_id || tx->state_ != ObTxDesc::State::ROLLBACK_SAVEPOINT) { // fast fail
TRANS_LOG(WARN, "receive stale rollback response message",
K(status), K(request_id), K(ret_epoch), K(ret_addr), K(tx_id), K(tx->tx_id_), K(tx->op_sn_));
} else if (status == OB_TRANS_RPC_TIMEOUT || common_retryable_error_(status)) {
@ -2226,15 +2298,25 @@ int ObTransService::handle_sp_rollback_resp(const share::ObLSID &ls_id,
} else if (OB_FAIL(tx->lock_.lock(10_ms))) {
TRANS_LOG(WARN, "lock fail", K(ret), K(ls_id), K(tx_id), K(request_id), K(status));
} else {
if (tx->state_ != ObTxDesc::State::ROLLBACK_SAVEPOINT) {
// must compare tx_msg_id in tx lock
if (tx->brpc_mask_set_.get_tx_msg_id() != msg_id) {
TRANS_LOG(WARN, "receive stale rollback response message", K(tx_id), K(tx->brpc_mask_set_.get_tx_msg_id()), K(msg_id));
} else if (tx->state_ != ObTxDesc::State::ROLLBACK_SAVEPOINT) {
TRANS_LOG(WARN, "receive stale rollback response message", K(status), K(request_id), KPC(tx));
} else if (tx->tx_id_ != tx_id || tx->op_sn_ > request_id) {
} else if (tx->tx_id_ != tx_id) {
TRANS_LOG(WARN, "receive old rpc result msg", K(ret), K_(tx->op_sn), K(request_id), K(tx_id), K(tx->tx_id_));
} else if (status == OB_SUCCESS) {
ObTxLSEpochPair pair(ls_id, orig_epoch);
(void)on_sp_rollback_succ_(pair, *tx, ret_epoch, ret_addr);
if (tx->brpc_mask_set_.is_all_mask()) {
tx->rpc_cond_.notify(OB_SUCCESS);
ObTxExecPart p;
if (downstream_parts.count() > 0 && OB_FAIL(merge_rollback_downstream_parts_(*tx, downstream_parts))) {
TRANS_LOG(WARN, "merge rollback downstream parts failed", K(ret), K(tx_id), K(downstream_parts));
} else if (OB_FAIL(tx->brpc_mask_set_.find_part(ls_id, orig_epoch, p))) {
TRANS_LOG(WARN, "find part failed", K(ret), K(ls_id), K(tx_id));
} else {
// find rollback part by ls_id
(void)on_sp_rollback_succ_(p, *tx, ret_epoch, ret_addr);
if (tx->brpc_mask_set_.is_all_mask()) {
tx->rpc_cond_.notify(OB_SUCCESS);
}
}
} else { // other failure
// notify waiter, cause the savepoint rollback fail
@ -2247,7 +2329,7 @@ int ObTransService::handle_sp_rollback_resp(const share::ObLSID &ls_id,
tx->lock_.unlock();
}
if (OB_NOT_NULL(tx)) {
tx_desc_mgr_.revert(*tx);
rollback_sp_msg_mgr_.revert(rollback_sp_msg_guard);
}
return ret;
}
@ -2905,14 +2987,29 @@ int ObTransService::handle_sub_prepare_request(const ObTxSubPrepareMsg &msg,
ObTransRpcResult &result)
{
int ret = OB_SUCCESS;
if (OB_FAIL(sub_prepare_local_ls_(msg.tx_id_,
msg.receiver_,
msg.parts_,
msg.expire_ts_,
msg.app_trace_info_,
msg.request_id_,
msg.xid_))) {
TRANS_LOG(WARN, "handle tx commit request fail", K(ret), K(msg));
if (msg.commit_parts_.count () > 0) {
if (OB_FAIL(sub_prepare_local_ls_(msg.tx_id_,
msg.receiver_,
msg.commit_parts_,
msg.expire_ts_,
msg.app_trace_info_,
msg.request_id_,
msg.xid_))) {
TRANS_LOG(WARN, "handle tx commit request fail", K(ret), K(msg));
}
} else {
// for compatible
ObTxCommitParts commit_parts;
CONVERT_PARTS_TO_COMMIT_PARTS(msg.parts_, commit_parts);
if (FAILEDx(sub_prepare_local_ls_(msg.tx_id_,
msg.receiver_,
msg.commit_parts_,
msg.expire_ts_,
msg.app_trace_info_,
msg.request_id_,
msg.xid_))) {
TRANS_LOG(WARN, "handle tx commit request fail", K(ret), K(msg));
}
}
result.reset();
result.init(ret, msg.get_timestamp());
@ -2922,9 +3019,9 @@ int ObTransService::handle_sub_prepare_request(const ObTxSubPrepareMsg &msg,
int ObTransService::sub_prepare_local_ls_(const ObTransID &tx_id,
const share::ObLSID &coord,
const share::ObLSArray &parts,
const ObTxCommitParts &parts,
const int64_t &expire_ts,
const common::ObString & app_trace_info,
const common::ObString &app_trace_info,
const int64_t &request_id,
const ObXATransID &xid)
{
@ -3409,13 +3506,12 @@ int ObTransService::check_for_standby(const share::ObLSID &ls_id,
const ObTransID &tx_id,
const SCN &snapshot,
bool &can_read,
SCN &trans_version,
bool &is_determined_state)
SCN &trans_version)
{
int ret = OB_SUCCESS;
ObPartTransCtx *ctx = NULL;
if (OB_SUCC(get_tx_ctx_for_standby_(ls_id, tx_id, ctx))) {
ret = ctx->check_for_standby(snapshot, can_read, trans_version, is_determined_state);
ret = ctx->check_for_standby(snapshot, can_read, trans_version);
revert_tx_ctx_(ctx);
} else {
ret = OB_ERR_SHARED_LOCK_CONFLICT;
@ -3428,31 +3524,43 @@ int ObTransService::handle_trans_ask_state(const ObAskStateMsg &msg,
{
int ret = OB_SUCCESS;
ObTransID tx_id = msg.get_trans_id();
share::ObLSID coord = msg.get_receiver();
share::ObLSID upstream_id = msg.get_receiver();
bool is_root = false;
ObPartTransCtx *ctx = NULL;
ObAskStateRespMsg resp;
if (OB_FAIL(get_tx_ctx_for_standby_(coord, tx_id, ctx))) {
TRANS_LOG(INFO, "fail to get coordinator tx context", K(ret), K(tx_id), K(coord));
if (OB_FAIL(get_tx_ctx_for_standby_(upstream_id, tx_id, ctx))) {
TRANS_LOG(INFO, "fail to get coordinator tx context", K(ret), K(tx_id), K(upstream_id));
if (OB_TRANS_CTX_NOT_EXIST == ret) {
ObStateInfo state_info;
state_info.ls_id_ = coord;
state_info.ls_id_ = upstream_id;
state_info.snapshot_version_ = msg.snapshot_;
if (OB_FAIL(check_and_fill_state_info(tx_id, state_info))) {
TRANS_LOG(WARN, "fill state info fail", K(ret), K(coord), K(tx_id), K(state_info));
TRANS_LOG(WARN, "fill state info fail", K(ret), K(upstream_id), K(tx_id), K(state_info));
} else if (OB_FAIL(resp.state_info_array_.push_back(state_info))) {
TRANS_LOG(WARN, "state info array push back fail", K(ret), K(coord), K(tx_id), K(state_info));
TRANS_LOG(WARN, "state info array push back fail", K(ret), K(upstream_id), K(tx_id), K(state_info));
}
}
} else if (OB_FAIL(ctx->handle_trans_ask_state(msg.snapshot_, resp))) {
TRANS_LOG(WARN, "fail to handle trans ask state", K(ret), K(coord), K(tx_id));
} else if (OB_FAIL(ctx->handle_trans_ask_state(msg, resp))) {
TRANS_LOG(WARN, "fail to handle trans ask state", K(ret), K(upstream_id), K(tx_id));
}
if (OB_NOT_NULL(ctx)) {
is_root = ctx->is_root();
revert_tx_ctx_(ctx);
}
if (OB_SUCC(ret)) {
build_tx_ask_state_resp_(resp, msg);
if (OB_FAIL(rpc_->post_msg(msg.sender_addr_, resp))) {
TRANS_LOG(WARN, "post ask state msg fail", K(ret), K(resp));
if (OB_ISNULL(ctx) || is_root) {
if (!resp.state_info_array_.empty()) {
build_tx_ask_state_resp_(resp, msg);
ObAddr send_to_addr; // for msg compat
if (msg.ori_addr_.is_valid()) {
send_to_addr = msg.ori_addr_;
} else {
send_to_addr = msg.sender_addr_;
}
if (OB_FAIL(rpc_->post_msg(send_to_addr, resp))) {
TRANS_LOG(WARN, "post ask state msg fail", K(ret), K(resp));
}
}
}
}
result.reset();
@ -3515,7 +3623,11 @@ void ObTransService::build_tx_ask_state_resp_(ObAskStateRespMsg &resp, const ObA
resp.sender_ = msg.receiver_;
resp.request_id_ = ObTimeUtility::current_time();
resp.cluster_id_ = msg.cluster_id_;
resp.receiver_ = msg.sender_;
if (msg.ori_ls_id_.is_valid()) { // for msg compat
resp.receiver_ = msg.ori_ls_id_;
} else {
resp.receiver_ = msg.sender_;
}
}
int ObTransService::handle_trans_ask_state_response(const ObAskStateRespMsg &msg,
@ -3559,7 +3671,7 @@ int ObTransService::handle_trans_collect_state(const ObCollectStateMsg &msg,
resp.state_info_ = state_info;
}
}
} else if (OB_FAIL(ctx->handle_trans_collect_state(resp.state_info_, msg.snapshot_))) {
} else if (OB_FAIL(ctx->handle_trans_collect_state(resp, msg))) {
TRANS_LOG(WARN, "fail to handle trans ask state", K(ret), K(ls_id), K(tx_id));
}
if (OB_NOT_NULL(ctx)) {

View File

@ -101,7 +101,8 @@ int handle_sp_rollback_resp(const share::ObLSID &ls_id,
const int status,
const int64_t request_id,
const int64_t ret_epoch,
const ObAddr &ret_addr);
const ObAddr &ret_addr,
const ObIArray<ObTxLSEpochPair> &downstream_parts);
int handle_trans_msg_callback(const share::ObLSID &sender_ls_id,
const share::ObLSID &receiver_ls_id,
const ObTransID &tx_id,
@ -176,8 +177,7 @@ int check_for_standby(const share::ObLSID &ls_id,
const ObTransID &tx_id,
const SCN &snapshot,
bool &can_read,
SCN &trans_version,
bool &is_determined_state);
SCN &trans_version);
void register_standby_cleanup_task();
int do_standby_cleanup();
void handle_defer_abort(ObTxDesc &tx);
@ -206,13 +206,16 @@ int rollback_savepoint_(ObTxDesc &tx,
const ObTxSEQ savepoint,
int64_t expire_ts);
int rollback_savepoint_slowpath_(ObTxDesc &tx,
const ObTxPartRefList &parts,
ObTxRollbackParts &rollback_parts,
const ObTxSEQ specified_from_scn,
const ObTxSEQ scn,
const int64_t expire_ts);
void on_sp_rollback_succ_(const ObTxLSEpochPair &part,
void on_sp_rollback_succ_(const ObTxExecPart &part,
ObTxDesc &tx,
const int64_t born_epoch,
const ObAddr &addr);
int merge_rollback_downstream_parts_(ObTxDesc &tx,
const ObIArray<ObTxLSEpochPair> &downstream_parts);
int create_tx_ctx_(const share::ObLSID &ls_id,
const ObTxDesc &tx,
ObPartTransCtx *&ctx);
@ -264,7 +267,7 @@ int acquire_global_snapshot__(const int64_t expire_ts,
ObFunction<bool()> interrupt_checker);
int batch_post_rollback_savepoint_msg_(ObTxDesc &tx,
ObTxRollbackSPMsg &msg,
const ObIArray<ObTxLSEpochPair> &list,
const ObTxRollbackParts &list,
int &post_succ_num);
int post_tx_commit_msg_(ObTxDesc &tx_desc,
ObTxCommitMsg &msg,
@ -291,11 +294,12 @@ int handle_tx_commit_result_(ObTxDesc &tx,
int decide_tx_commit_info_(ObTxDesc &tx, ObTxPart *&coord);
int local_ls_commit_tx_(const ObTransID &tx_id,
const share::ObLSID &coord,
const share::ObLSArray &parts,
const ObTxCommitParts &parts,
const int64_t &expire_ts,
const common::ObString &app_trace_info,
const int64_t &request_id,
const share::SCN commit_start_scn,
const int64_t epoch,
share::SCN &commit_version,
const common::ObAddr &caller);
int get_tx_state_from_tx_table_(const share::ObLSID &lsid,
@ -319,7 +323,7 @@ int build_tx_sub_commit_msg_(const ObTxDesc &tx, ObTxSubCommitMsg &msg);
int build_tx_sub_rollback_msg_(const ObTxDesc &tx, ObTxSubRollbackMsg &msg);
int sub_prepare_local_ls_(const ObTransID &tx_id,
const share::ObLSID &coord,
const share::ObLSArray &parts,
const ObTxCommitParts &parts,
const int64_t &expire_ts,
const common::ObString & app_trace_info,
const int64_t &request_id,
@ -349,10 +353,13 @@ int ls_rollback_to_savepoint_(const ObTransID &tx_id,
const ObTxSEQ savepoint,
int64_t &ctx_born_epoch,
const ObTxDesc *tx,
const bool for_transfer,
const ObTxSEQ from_scn,
ObIArray<ObTxLSEpochPair> &downstream_parts,
int64_t expire_ts = -1);
int sync_rollback_savepoint__(ObTxDesc &tx,
ObTxRollbackSPMsg &msg,
const ObTxDesc::MaskSet &mask_set,
RollbackMaskSet &mask_set,
int64_t expire_ts,
const int64_t max_retry_interval,
int &retries);
@ -373,7 +380,9 @@ int rollback_to_global_implicit_savepoint_(ObTxDesc &tx,
int ls_sync_rollback_savepoint__(ObPartTransCtx *part_ctx,
const ObTxSEQ savepoint,
const int64_t op_sn,
const int64_t expire_ts);
const int64_t expire_ts,
const ObTxSEQ specified_from_scn,
ObIArray<ObTxLSEpochPair> &downstream_parts);
void tx_post_terminate_(ObTxDesc &tx);
int start_epoch_(ObTxDesc &tx);
int tx_sanity_check_(ObTxDesc &tx);

View File

@ -294,6 +294,9 @@ public:
// and apply_prepare_log.
virtual bool is_2pc_logging() const = 0;
// means 2pc state machine stop, don't advance to next phase
virtual bool is_2pc_blocking() const = 0;
//durable state, set by applying log
virtual ObTxState get_downstream_state() const = 0;
virtual int set_downstream_state(const ObTxState state) = 0;
@ -329,11 +332,26 @@ public:
// TODO, refine in 4.1
virtual bool is_sub2pc() const = 0;
// only persist redo and commit info
//
int prepare_redo();
// continue execution of two phase commit
int continue_execution(const bool is_rollback);
// for tree phase commit
//
// Merge the intermediate_participants(created during transfer) into the
// participants to guarantee the consistency view of the 2pc(we guarantee the
// same participants in each state transfer).
// Implementer need to distinguish the particpants of the current 2pc state
// and the participants created during transfer in the current 2pc state. And
// merge them in the implementation
virtual int merge_intermediate_participants() = 0;
// Whether it is the real upstream of myself during handling the 2pc msg. We
// rely on thus information to prevent the deadlock(caused by cycled transfer.
// eg: A transfer to B and then B transfer to A) of the tree phase commit.
// Implementer need to remember that the request and compare with the real
// upstream. What's more, we need consider the case it is called not during
// the 2pc msg and so we are handling with the real upstream
virtual bool is_real_upstream() = 0;
private:
// Inner method for handle_2pc_xxx_request/response for clearity

View File

@ -128,27 +128,32 @@ int ObTxCycleTwoPhaseCommitter::replay_log(const ObTwoPhaseCommitLogType log_typ
{
int ret = OB_SUCCESS;
switch (log_type) {
case ObTwoPhaseCommitLogType::OB_LOG_TX_COMMIT_INFO:
ret = replay_commit_info_log();
break;
case ObTwoPhaseCommitLogType::OB_LOG_TX_PREPARE:
ret = replay_prepare_log();
break;
case ObTwoPhaseCommitLogType::OB_LOG_TX_COMMIT:
ret = replay_commit_log();
break;
case ObTwoPhaseCommitLogType::OB_LOG_TX_ABORT:
ret = replay_abort_log();
break;
case ObTwoPhaseCommitLogType::OB_LOG_TX_CLEAR:
ret = replay_clear_log();
break;
default:
TRANS_LOG(ERROR, "invalid log type", K(log_type));
ret = OB_TRANS_INVALID_STATE;
break;
if (OB_FAIL(merge_intermediate_participants())) {
TRANS_LOG(WARN, "fail to merge incremental participants", KPC(this));
} else {
switch (log_type) {
case ObTwoPhaseCommitLogType::OB_LOG_TX_COMMIT_INFO:
ret = replay_commit_info_log();
break;
case ObTwoPhaseCommitLogType::OB_LOG_TX_PREPARE:
ret = replay_prepare_log();
break;
case ObTwoPhaseCommitLogType::OB_LOG_TX_COMMIT:
ret = replay_commit_log();
break;
case ObTwoPhaseCommitLogType::OB_LOG_TX_ABORT:
ret = replay_abort_log();
break;
case ObTwoPhaseCommitLogType::OB_LOG_TX_CLEAR:
ret = replay_clear_log();
break;
default:
TRANS_LOG(ERROR, "invalid log type", K(log_type));
ret = OB_TRANS_INVALID_STATE;
break;
}
}
if (OB_FAIL(ret)) {
TRANS_LOG(WARN, "replay log failed", K(ret), KPC(this), K(log_type));
}
@ -244,17 +249,27 @@ int ObTxCycleTwoPhaseCommitter::retransmit_upstream_msg_(const ObTxState state)
if (get_downstream_state() > get_upstream_state()) {
ret = OB_INVALID_ARGUMENT;
TRANS_LOG(WARN, "Invalid downstream_state", K(ret), KPC(this));
} else {
switch (get_2pc_role()) {
// root do not respond
case Ob2PCRole::ROOT: {
need_respond = false;
if (!is_real_upstream()) {
// It may be the case that the ROOT is the downstream of the fake
// upstream and need to respond with the fake upstream
need_respond = true;
} else {
need_respond = false;
}
break;
}
case Ob2PCRole::INTERNAL: {
// need respond if all downstreams has responded and submit log succesfully
need_respond = (all_downstream_collected_() && get_downstream_state() == state)
need_respond = ((all_downstream_collected_()
// need respond if it is not the real upstream and we
// should response just after the downstream state has
// been synced
|| !is_real_upstream())
&& get_downstream_state() == state)
// dowstream_state <= upstream_state
// => state < downstream_state && state < upstream_state
// => post response for last phase
@ -319,7 +334,7 @@ int ObTxCycleTwoPhaseCommitter::retransmit_upstream_msg_(const ObTxState state)
}
if (OB_SUCC(ret) && need_respond) {
if (OB_TMP_FAIL(post_msg(msg_type, OB_C2PC_UPSTREAM_ID))) {
if (OB_TMP_FAIL(post_msg(msg_type, OB_C2PC_SENDER_ID))) {
TRANS_LOG(WARN, "post msg failed", K(tmp_ret), K(msg_type), K(*this));
}
}
@ -345,7 +360,6 @@ int ObTxCycleTwoPhaseCommitter::handle_2pc_prepare_request_impl_() {
break;
}
case Ob2PCRole::INTERNAL: {
if (OB_TMP_FAIL(post_downstream_msg(ObTwoPhaseCommitMsgType::OB_MSG_TX_PREPARE_REQ))) {
TRANS_LOG(WARN, "post prepare msg failed", KR(ret));
}
@ -616,7 +630,14 @@ int ObTxCycleTwoPhaseCommitter::handle_2pc_clear_request()
const ObTxState state = get_downstream_state();
switch (state) {
case ObTxState::INIT:
case ObTxState::INIT: {
// There may be the case of transfer that you have already stay in the
// init phase and fail to pass the epoch check in the transfer. So you
// will send the abort response back to the upstream with init state and
// the upstream will post the abort request w/o youself and then move to
// the clear and post the clear request to you
break;
}
case ObTxState::PREPARE:
case ObTxState::PRE_COMMIT: {
ret = OB_TRANS_PROTOCOL_ERROR;
@ -836,7 +857,7 @@ int ObTxCycleTwoPhaseCommitter::apply_commit_log()
} else if (all_downstream_collected_()) {
switch (get_2pc_role()) {
case Ob2PCRole::ROOT: {
if (OB_FAIL(drive_self_2pc_phase(ObTxState::CLEAR))) {
if (OB_TMP_FAIL(drive_self_2pc_phase(ObTxState::CLEAR))) {
TRANS_LOG(WARN, "enter into clear phase failed", K(ret), KPC(this));
} else if (OB_TMP_FAIL(post_downstream_msg(ObTwoPhaseCommitMsgType::OB_MSG_TX_CLEAR_REQ))) {
TRANS_LOG(WARN, "post downstream msg failed", K(tmp_ret));
@ -895,7 +916,7 @@ int ObTxCycleTwoPhaseCommitter::apply_abort_log()
} else if (all_downstream_collected_()) {
switch (get_2pc_role()) {
case Ob2PCRole::ROOT: {
if (OB_FAIL(drive_self_2pc_phase(ObTxState::CLEAR))) {
if (OB_TMP_FAIL(drive_self_2pc_phase(ObTxState::CLEAR))) {
TRANS_LOG(WARN, "enter into clear phase failed", K(ret), KPC(this));
} else if (OB_TMP_FAIL(post_downstream_msg(ObTwoPhaseCommitMsgType::OB_MSG_TX_CLEAR_REQ))) {
TRANS_LOG(WARN, "post clear request failed", K(tmp_ret), K(*this));
@ -1089,11 +1110,12 @@ int ObTxCycleTwoPhaseCommitter::recover_from_tx_table()
int ObTxCycleTwoPhaseCommitter::try_enter_pre_commit_state()
{
int ret = OB_SUCCESS;
int tmp_ret = OB_SUCCESS;
if (is_2pc_logging()) {
ret = OB_EAGAIN;
TRANS_LOG(INFO, "committer is 2pc logging", KPC(this));
} else if (OB_FAIL(drive_self_2pc_phase(ObTxState::PRE_COMMIT))) {
} else if (OB_TMP_FAIL(drive_self_2pc_phase(ObTxState::PRE_COMMIT))) {
if (OB_EAGAIN != ret) {
TRANS_LOG(WARN, "drive self 2pc pre_commit phase failed", K(ret), KPC(this));
}
@ -1122,7 +1144,7 @@ int ObTxCycleTwoPhaseCommitter::on_pre_commit()
// TODO, currently, if a trans only has one participant,
// the state can not be drived from pre commit to commit.
// Therefore, enter commit state directly.
if (OB_FAIL(drive_self_2pc_phase(ObTxState::COMMIT))) {
if (OB_TMP_FAIL(drive_self_2pc_phase(ObTxState::COMMIT))) {
TRANS_LOG(WARN, "do commit in memory failed", K(ret), KPC(this));
}
// not need post downstream msg

View File

@ -83,6 +83,9 @@ int ObTxCycleTwoPhaseCommitter::drive_self_2pc_phase(ObTxState next_phase)
ret = OB_EAGAIN;
TRANS_LOG(WARN, "can not enter next phase when logging", K(ret), KPC(this));
// TODO check state
} else if (is_2pc_blocking()) {
ret = OB_EAGAIN;
TRANS_LOG(WARN, "can not enter next phase when 2pc blocking", K(ret), KPC(this));
} else if (next_phase == get_upstream_state()) {
// do nothing about in-memory operation
} else {
@ -134,7 +137,11 @@ int ObTxCycleTwoPhaseCommitter::drive_self_2pc_phase(ObTxState next_phase)
}
}
if (OB_FAIL(ret)) {
// do nothing
// It is safe to merge the intermediate_participants because we will block
// the in-memory state machine with is_2pc_blocking. The detailed design
// can be found in the implementation of the merge_intermediate_participants.
} else if (OB_FAIL(merge_intermediate_participants())) {
TRANS_LOG(WARN, "fail to merge incremental participants", KPC(this));
} else {
collected_.reset();
set_upstream_state(next_phase);
@ -301,7 +308,10 @@ int ObTxCycleTwoPhaseCommitter::retransmit_downstream_msg_()
ObTwoPhaseCommitMsgType msg_type;
bool need_submit = true;
if (is_root() || is_internal()) {
if ((is_root() || is_internal())
// If we are handling the fake upstream, we only need to take care of
// myself without retransmitting to the downstreams
&& is_real_upstream()) {
int64_t this_part_id = get_self_id();
if (OB_FAIL(decide_downstream_msg_type_(need_submit, msg_type))) {
TRANS_LOG(WARN, "deecide downstream msg_type fail", K(ret), KPC(this));
@ -937,11 +947,14 @@ bool ObTxCycleTwoPhaseCommitter::all_downstream_collected_()
{
bool all_collected = false;
switch (get_2pc_role()) {
case Ob2PCRole::ROOT:
case Ob2PCRole::INTERNAL: {
case Ob2PCRole::ROOT: {
all_collected = collected_.num_members() == get_downstream_size() - 1;
break;
}
case Ob2PCRole::INTERNAL: {
all_collected = collected_.num_members() == get_downstream_size();
break;
}
case Ob2PCRole::LEAF: {
all_collected = true;
break;

View File

@ -21,6 +21,8 @@ using namespace share;
namespace transaction
{
// get_2pc_role is engaged with the current state, so it may become from a leaf
// to a internal at later. So we can only decide its state under lock at one time.
Ob2PCRole ObPartTransCtx::get_2pc_role() const
{
Ob2PCRole role = Ob2PCRole::UNKNOWN;
@ -28,7 +30,7 @@ Ob2PCRole ObPartTransCtx::get_2pc_role() const
if (exec_info_.upstream_.is_valid()) {
if (exec_info_.upstream_ == ls_id_) {
role = Ob2PCRole::ROOT;
} else if (exec_info_.incremental_participants_.empty()) {
} else if (exec_info_.participants_.empty()) {
// not root & downstream is empty
// root must not be leaf, because the distributed txn must be composed by
// more than one participants.
@ -41,12 +43,21 @@ Ob2PCRole ObPartTransCtx::get_2pc_role() const
return role;
}
int64_t ObPartTransCtx::get_downstream_size() const
{
return exec_info_.participants_.count();
}
int64_t ObPartTransCtx::get_self_id()
{
int ret = OB_SUCCESS;
if (self_id_ == -1) {
if (OB_FAIL(find_participant_id_(ls_id_, self_id_))) {
TRANS_LOG(ERROR, "find participant id failed", K(ret), K(*this));
if (is_root()) {
TRANS_LOG(ERROR, "find participant id failed", K(ret), K(*this));
} else {
self_id_ = -1;
}
}
}
return self_id_;
@ -403,5 +414,130 @@ int ObPartTransCtx::reply_to_scheduler_for_sub2pc(int64_t msg_type)
return ret;
}
// When to merge the intermediate_participants into the participants in a two
// phase commit needs careful consideration. One of the most critical factors is
// how to deal with concurrency with the transfer out logs.
//
// The primary rule we need to follow is that "the two phase commit state before
// the transfer out log will be relocated to the dest, and the two phase commit
// state after this log will participant into the src participants, progressing
// through a tree-style two phase commit." Therefore, before committing the
// transfer out log, we will first block the advancement of the two phase commit
// protocol for all txns which is required by the transfer (by blocking the
// advancement of the in-memory state through "drive_self_2pc_phase" and the
// advancement of the persistent state machine through "submit_log_if_allow"),
// ensuring the integrity of the two phase commit state. Simultaneously, we will
// add intermediate_participants for these blocked txns.
//
// The second rule is that we need to adhere to the principle that "when a
// participant of a txn enters a certain two phase commit state with a log, all
// transfer out logs before this log need to be included in the participants."
// Therefore, we must ensure that the transfer out logs before the writing of
// this two phase commit log will definitely be included in the temporary
// participants(because the transfer out logs are barrier logs), while the
// transfer out logs after that will not be included in the intermediate
// participants(because the transfer out logs block the advancement of the txn's
// state machine before being written to paxos, including both of the in-memory
// state and the persistent state, as explained above).
//
// Hence, with the protection of the blocking capability of the state machine in
// the in-memory state advancement("drive_self_2pc_phase") and the advancement
// of the persistent state machine("submit_log_if_allow"), we can safely proceed
// with the action of merging the intermediate_participant into the participants.
int ObPartTransCtx::merge_intermediate_participants()
{
int ret = OB_SUCCESS;
bool exist = false;
const int64_t participants_size = exec_info_.participants_.count();
const int64_t increase_size = exec_info_.intermediate_participants_.count();
if (increase_size > 0) {
if (participants_size != exec_info_.commit_parts_.count()) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "part size not match", KR(ret), KPC(this));
} else if (OB_FAIL(exec_info_.participants_.reserve(participants_size + increase_size))) {
TRANS_LOG(WARN, "part reserve failed", KR(ret), KPC(this));
} else if (OB_FAIL(exec_info_.commit_parts_.reserve(participants_size + increase_size))) {
TRANS_LOG(WARN, "part reserve failed", KR(ret), KPC(this));
}
for (int64_t i = 0; OB_SUCC(ret) && i < increase_size; i++) {
exist = false;
for (int64_t j = 0; OB_SUCC(ret) && !exist && j < participants_size; j++) {
if (exec_info_.participants_[j] == exec_info_.intermediate_participants_[i].ls_id_) {
if (exec_info_.commit_parts_.at(j).ls_id_ != exec_info_.participants_[j]) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "commit part ls_id not match", KR(ret), KPC(this));
} else if (exec_info_.commit_parts_.at(j).transfer_epoch_ > 0) {
// do nothing
// use first transfer_epoch to drive
} else {
exec_info_.commit_parts_.at(j).transfer_epoch_ = exec_info_.intermediate_participants_[i].transfer_epoch_;
}
exist = true;
}
}
if (OB_SUCC(ret) && !exist) {
if (OB_FAIL(exec_info_.participants_.push_back(exec_info_.intermediate_participants_[i].ls_id_))) {
TRANS_LOG(WARN, "fail to push back incremental participants", KR(ret), KPC(this));
} else if (OB_FAIL(exec_info_.commit_parts_.push_back(exec_info_.intermediate_participants_[i]))) {
TRANS_LOG(WARN, "fail to push back incremental participants", KR(ret), KPC(this));
}
}
}
TRANS_LOG(INFO, "merge participant", KR(ret),
K(trans_id_),
K(ls_id_),
KP(this),
K(exec_info_.participants_),
K(exec_info_.intermediate_participants_));
if (OB_SUCC(ret)) {
(void)exec_info_.intermediate_participants_.reuse();
}
}
return ret;
}
bool ObPartTransCtx::is_real_upstream_(const ObLSID upstream)
{
return upstream == exec_info_.upstream_;
}
bool ObPartTransCtx::is_real_upstream()
{
bool bret = false;
if (OB_ISNULL(msg_2pc_cache_)) {
// If msg_2pc_cache is empty, it is called by handle_timeout, and we only
// need to send to real upstream during handle_timeout.
bret = true;
} else {
bret = is_real_upstream_(msg_2pc_cache_->sender_);
}
return bret;
}
int ObPartTransCtx::add_intermediate_participants(const share::ObLSID ls_id, int64_t transfer_epoch)
{
int ret = OB_SUCCESS;
bool exist = false;
for (int64_t i = 0; OB_SUCC(ret) && !exist && i < exec_info_.intermediate_participants_.count(); i++) {
if (ls_id == exec_info_.intermediate_participants_[i].ls_id_) {
exist = true;
}
}
if (OB_SUCC(ret) && !exist) {
if (OB_FAIL(exec_info_.intermediate_participants_.push_back(ObTxExecPart(ls_id, -1, transfer_epoch)))) {
TRANS_LOG(WARN, "fail to push back participant into intermediate participants", KR(ret), KPC(this));
}
}
return ret;
}
} // end namespace transaction
} // end namespace oceanbase

View File

@ -51,6 +51,7 @@ int ObPartTransCtx::post_msg_(const ObTwoPhaseCommitMsgType& msg_type,
// for xa trans, if prepare request, convert it to prepare version request
Ob2pcPrepareVersionReqMsg prepare_version_req;
build_tx_common_msg_(receiver, prepare_version_req);
prepare_version_req.upstream_ = ls_id_;
if (OB_FAIL(post_msg_(receiver, prepare_version_req))) {
TRANS_LOG(WARN, "rpc post msg failed", K(ret), K(*this), K(receiver), K(msg_type));
}
@ -225,6 +226,13 @@ void ObPartTransCtx::build_tx_common_msg_(const ObLSID &receiver,
ls_id_,
cluster_id_,
msg);
// fill exec_epoch && transfer_epoch
for (int64_t idx = 0; idx < exec_info_.commit_parts_.count(); idx++) {
if (exec_info_.commit_parts_.at(idx).ls_id_ == receiver) {
msg.epoch_ = exec_info_.commit_parts_.at(idx).exec_epoch_;
msg.transfer_epoch_ = exec_info_.commit_parts_.at(idx).transfer_epoch_;
}
}
}
void ObPartTransCtx::build_tx_common_msg_(const ObTxMsg &recv_msg,
@ -344,20 +352,39 @@ int ObPartTransCtx::post_msg(const ObTwoPhaseCommitMsgType& msg_type,
const int64_t participant_id)
{
int ret = OB_SUCCESS;
bool need_post = true;
ObLSID receiver;
if (participant_id >= exec_info_.participants_.count()
&& OB_C2PC_UPSTREAM_ID != participant_id) {
&& OB_C2PC_UPSTREAM_ID != participant_id
&& OB_C2PC_SENDER_ID != participant_id) {
ret = OB_INVALID_ARGUMENT;
TRANS_LOG(WARN, "invalid argument", KR(ret), K(participant_id), K(*this));
} else if (OB_C2PC_UPSTREAM_ID == participant_id) {
// We should send to real upstream
receiver = exec_info_.upstream_;
need_post = true;
} else if (OB_C2PC_SENDER_ID == participant_id) {
if (msg_2pc_cache_ != NULL) {
// We should send to the sender(just the sender of the msg)
receiver = msg_2pc_cache_->sender_;
need_post = true;
} else if (exec_info_.upstream_.is_valid()) {
// We should retransmit the msg to the real upstream
receiver = exec_info_.upstream_;
need_post = true;
} else {
// there may be intermediate participant retransmits to the upstream which
// disturbs the participants in this turn.
need_post = false;
}
} else {
receiver = exec_info_.participants_[participant_id];
need_post = true;
}
if (OB_SUCC(ret)
&& need_post
&& OB_FAIL(post_msg_(msg_type, receiver))) {
TRANS_LOG(WARN, "post msg failed", KR(ret), K(*this));
}
@ -369,7 +396,11 @@ int ObPartTransCtx::set_2pc_upstream_(const ObLSID &upstream)
{
int ret = OB_SUCCESS;
exec_info_.upstream_ = upstream;
if (!exec_info_.upstream_.is_valid()) {
// upstream should be fixed during each state in 2pc in order to prevent
// the deadlock in the cycle based tree phase commit.
exec_info_.upstream_ = upstream;
}
return ret;
}
@ -384,14 +415,18 @@ int ObPartTransCtx::set_2pc_incremental_participants_(
return ret;
}
int ObPartTransCtx::set_2pc_participants_(const ObLSArray &participants)
int ObPartTransCtx::set_2pc_participants_(const ObTxCommitParts& participants)
{
int ret = OB_SUCCESS;
if (OB_FAIL(exec_info_.participants_.assign(participants))) {
TRANS_LOG(WARN, "set participants error", K(ret), K(participants), KPC(this));
if (exec_info_.participants_.count() > 0) {
TRANS_LOG(WARN, "participants has set before", KPC(this));
} else {
CONVERT_COMMIT_PARTS_TO_PARTS(participants, exec_info_.participants_);
if (FAILEDx(assign_commit_parts(exec_info_.participants_,
participants))) {
TRANS_LOG(WARN, "set participants error", K(ret), K(participants), KPC(this));
}
}
return ret;
}
@ -524,7 +559,11 @@ int ObPartTransCtx::apply_2pc_msg_(const ObTwoPhaseCommitMsgType msg_type)
TRANS_LOG(WARN, "unexpect tx flag", KR(ret), KPC(this));
} else if (is_sub2pc()) {
// prepare version for xa trans
// these actions has been done in entrance function handle_tx_2pc_prepare_version_req
const Ob2pcPrepareVersionReqMsg &msg = *(static_cast<const Ob2pcPrepareVersionReqMsg *>(msg_2pc_cache_));
if (OB_FAIL(set_2pc_upstream_(msg.upstream_))) {
TRANS_LOG(WARN, "set coordinator failed", KR(ret), K(msg), K(*this));
}
// other actions has been done in entrance function handle_tx_2pc_prepare_version_req
} else {
const Ob2pcPrepareReqMsg &msg = *(static_cast<const Ob2pcPrepareReqMsg *>(msg_2pc_cache_));
@ -561,7 +600,9 @@ int ObPartTransCtx::apply_2pc_msg_(const ObTwoPhaseCommitMsgType msg_type)
const Ob2pcPreCommitReqMsg &msg =
*(static_cast<const Ob2pcPreCommitReqMsg *>(msg_2pc_cache_));
if (OB_FAIL(set_2pc_commit_version_(msg.commit_version_))) {
if (OB_FAIL(set_2pc_upstream_(msg.sender_))) {
TRANS_LOG(WARN, "set coordinator failed", KR(ret), K(msg), K(*this));
} else if (OB_FAIL(set_2pc_commit_version_(msg.commit_version_))) {
TRANS_LOG(WARN, "set commit version failed", KR(ret), K(msg), KPC(this));
}
@ -583,7 +624,9 @@ int ObPartTransCtx::apply_2pc_msg_(const ObTwoPhaseCommitMsgType msg_type)
const Ob2pcCommitReqMsg &msg = *(static_cast<const Ob2pcCommitReqMsg *>(msg_2pc_cache_));
if (OB_FAIL(set_2pc_commit_version_(msg.commit_version_))) {
if (OB_FAIL(set_2pc_upstream_(msg.sender_))) {
TRANS_LOG(WARN, "set coordinator failed", KR(ret), K(msg), K(*this));
} else if (OB_FAIL(set_2pc_commit_version_(msg.commit_version_))) {
TRANS_LOG(WARN, "set commit version failed", KR(ret), K(msg), K(*this));
} else if (OB_FAIL(coord_prepare_info_arr_.assign(msg.prepare_info_array_))) {
TRANS_LOG(WARN, "assign prepare_log_info_arr_ failed", K(ret));
@ -621,6 +664,8 @@ int ObPartTransCtx::apply_2pc_msg_(const ObTwoPhaseCommitMsgType msg_type)
|| msg.max_commit_log_scn_ < ctx_tx_data_.get_end_log_ts())) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "unexpected max commit log scn in clear request", K(ret), KPC(this));
} else if (OB_FAIL(set_2pc_upstream_(msg.sender_))) {
TRANS_LOG(WARN, "set coordinator failed", KR(ret), K(msg), K(*this));
} else {
max_2pc_commit_scn_ = share::SCN::max(msg.max_commit_log_scn_, max_2pc_commit_scn_);
}

View File

@ -1028,26 +1028,48 @@ int ObTransService::rollback_to_local_implicit_savepoint_(ObTxDesc &tx,
int ret = OB_SUCCESS;
ObTxPartRefList parts;
int64_t start_ts = ObTimeUtility::current_time();
ObTxRollbackParts rollback_parts;
// when rollback local we use this from_scn to all downstream participants
ObTxSEQ from_scn = savepoint.clone_with_seq(ObSequence::inc_and_get_max_seq_no());
if (OB_FAIL(find_parts_after_sp_(tx, parts, savepoint))) {
TRANS_LOG(WARN, "find rollback parts fail", K(ret), K(savepoint), K(tx));
} else {
ARRAY_FOREACH(parts, i) {
ObPartTransCtx *ctx = NULL;
ObTxPart &p = parts[i];
ObSEArray<ObTxLSEpochPair, 1> downstream_parts;
if (OB_FAIL(get_tx_ctx_(p.id_, tx.tx_id_, ctx))) {
TRANS_LOG(WARN, "get tx ctx fail", K(ret), K_(p.id), K(tx));
} else if (p.epoch_ != ctx->epoch_) {
ret = OB_TRANS_CTX_NOT_EXIST; // FIXME more decent errno
} else if (OB_FAIL(ls_sync_rollback_savepoint__(ctx, savepoint, tx.op_sn_, expire_ts))) {
} else if (OB_FAIL(ls_sync_rollback_savepoint__(ctx, savepoint, tx.op_sn_, expire_ts, from_scn, downstream_parts))) {
TRANS_LOG(WARN, "LS rollback savepoint fail", K(ret), K(savepoint), K(tx));
} else {
p.last_scn_ = savepoint;
// merge find new downstream to tx.rollback parts
for (int64_t idx = 0; OB_SUCC(ret) && idx < downstream_parts.count(); idx++) {
if (OB_FAIL(rollback_parts.push_back(ObTxExecPart(downstream_parts.at(idx).left_, 0, downstream_parts.at(idx).right_)))) {
TRANS_LOG(WARN, "push part to array failed", K(ret), K(tx));
}
}
if (OB_SUCC(ret)) {
p.last_scn_ = savepoint;
}
}
if (OB_NOT_NULL(ctx)) {
revert_tx_ctx_(ctx);
}
}
}
if (OB_SUCC(ret) && rollback_parts.count() > 0) {
// rollback downstream participants
TRANS_LOG(INFO, "rollback local with downstream", K(tx.tx_id_), K(from_scn), K(savepoint), K(rollback_parts));
if (OB_FAIL(rollback_savepoint_slowpath_(tx, rollback_parts, from_scn, savepoint, expire_ts))) {
TRANS_LOG(WARN, "rollback slowpath", KR(ret), K(rollback_parts), K(tx));
}
TRANS_LOG(INFO, "rollback local with downstream", KR(ret), K(tx.tx_id_), K(from_scn), K(savepoint), K(rollback_parts));
}
int64_t elapsed_us = ObTimeUtility::current_time() - start_ts;
#ifndef NDEBUG
TRANS_LOG(INFO, "rollback local implicit savepoint", K(ret), K(savepoint));
@ -1196,14 +1218,21 @@ int ObTransService::rollback_to_global_implicit_savepoint_(ObTxDesc &tx,
int ObTransService::ls_sync_rollback_savepoint__(ObPartTransCtx *part_ctx,
const ObTxSEQ savepoint,
const int64_t op_sn,
const int64_t expire_ts)
const int64_t expire_ts,
const ObTxSEQ specified_from_scn,
ObIArray<ObTxLSEpochPair> &downstream_parts)
{
int ret = OB_SUCCESS;
int64_t retry_cnt = 0;
bool blockable = expire_ts > 0;
const ObTxSEQ from_scn = savepoint.clone_with_seq(ObSequence::inc_and_get_max_seq_no());
ObTxSEQ from_scn;
if (specified_from_scn.is_valid()) {
from_scn = specified_from_scn;
} else {
from_scn = savepoint.clone_with_seq(ObSequence::inc_and_get_max_seq_no());
}
do {
ret = part_ctx->rollback_to_savepoint(op_sn, from_scn, savepoint);
ret = part_ctx->rollback_to_savepoint(op_sn, from_scn, savepoint, downstream_parts);
if (OB_NEED_RETRY == ret && blockable) {
if (ObTimeUtility::current_time() >= expire_ts) {
ret = OB_TIMEOUT;
@ -1216,7 +1245,7 @@ int ObTransService::ls_sync_rollback_savepoint__(ObPartTransCtx *part_ctx,
ob_usleep(50 * 1000);
}
}
} while (OB_NEED_RETRY == ret && blockable);
} while (OB_NEED_RETRY == ret && blockable && !part_ctx->is_transfer_deleted());
#ifndef NDEBUG
TRANS_LOG(INFO, "rollback to savepoint sync", K(ret),
K(part_ctx->get_trans_id()), K(part_ctx->get_ls_id()), K(retry_cnt),
@ -1421,6 +1450,7 @@ int ObTransService::rollback_savepoint_(ObTxDesc &tx,
slowpath = false;
ObTxPart &p = parts[0];
int64_t born_epoch = 0;
ObSEArray<ObTxLSEpochPair, 1> downstream_parts;
if (OB_FAIL(ls_rollback_to_savepoint_(tx.tx_id_,
p.id_,
p.epoch_,
@ -1428,6 +1458,9 @@ int ObTransService::rollback_savepoint_(ObTxDesc &tx,
savepoint,
born_epoch,
&tx,
false,/*for transfer*/
ObTxSEQ::INVL(),
downstream_parts,
-1/*non-blocking*/))) {
if (common_retryable_error_(ret)) {
slowpath = true;
@ -1438,15 +1471,29 @@ int ObTransService::rollback_savepoint_(ObTxDesc &tx,
} else {
if (p.epoch_ <= 0) { tx.update_clean_part(p.id_, born_epoch, self_); }
TRANS_LOG(TRACE, "succ to rollback on participant", K(p), K(tx), K(savepoint));
if (downstream_parts.count() > 0) {
slowpath = true;
}
}
}
if (slowpath &&
OB_FAIL(rollback_savepoint_slowpath_(tx,
parts,
savepoint,
expire_ts))) {
TRANS_LOG(WARN, "rollback slowpath fail", K(ret),
if (slowpath) {
ObTxRollbackParts rollback_parts;
if (OB_FAIL(rollback_parts.reserve(parts.count()))) {
TRANS_LOG(WARN, "reserve space fail", K(ret), K(parts), K(tx));
} else {
ARRAY_FOREACH(parts, i) {
rollback_parts.push_back(ObTxExecPart(parts[i].id_, parts[i].epoch_, 0));
}
}
if (FAILEDx(rollback_savepoint_slowpath_(tx,
rollback_parts,
ObTxSEQ::INVL(),
savepoint,
expire_ts))) {
TRANS_LOG(WARN, "rollback slowpath fail", K(ret),
K(parts), K(savepoint), K(expire_ts), K(tx));
}
}
if (OB_TIMEOUT == ret && ObTimeUtility::current_time() >= tx.get_expire_ts()) {
ret = OB_TRANS_TIMEOUT;
@ -1482,6 +1529,9 @@ int ObTransService::ls_rollback_to_savepoint_(const ObTransID &tx_id,
const ObTxSEQ savepoint,
int64_t &ctx_born_epoch,
const ObTxDesc *tx,
const bool for_transfer,
const ObTxSEQ from_scn,
ObIArray<ObTxLSEpochPair> &downstream_parts,
int64_t expire_ts)
{
int ret = OB_SUCCESS;
@ -1489,7 +1539,7 @@ int ObTransService::ls_rollback_to_savepoint_(const ObTransID &tx_id,
ObPartTransCtx *ctx = NULL;
if (OB_FAIL(get_tx_ctx_(ls, tx_id, ctx))) {
if (OB_NOT_MASTER == ret) {
} else if (OB_TRANS_CTX_NOT_EXIST == ret && verify_epoch <= 0) {
} else if (OB_TRANS_CTX_NOT_EXIST == ret && verify_epoch <= 0 && !for_transfer) {
int tx_state = ObTxData::RUNNING;
share::SCN commit_version;
if (OB_FAIL(get_tx_state_from_tx_table_(ls, tx_id, tx_state, commit_version))) {
@ -1532,7 +1582,7 @@ int ObTransService::ls_rollback_to_savepoint_(const ObTransID &tx_id,
ret = OB_TRANS_CTX_NOT_EXIST;
TRANS_LOG(WARN, "current ctx illegal, born epoch not match", K(ret), K(ls), K(tx_id),
K(verify_epoch), KPC(ctx));
} else if(OB_FAIL(ls_sync_rollback_savepoint__(ctx, savepoint, op_sn, expire_ts))){
} else if (OB_FAIL(ls_sync_rollback_savepoint__(ctx, savepoint, op_sn, expire_ts, from_scn, downstream_parts))) {
TRANS_LOG(WARN, "LS rollback to savepoint fail", K(ret), K(tx_id), K(ls), K(op_sn), K(savepoint), KPC(ctx));
}
}
@ -1543,21 +1593,20 @@ int ObTransService::ls_rollback_to_savepoint_(const ObTransID &tx_id,
}
inline int ObTransService::rollback_savepoint_slowpath_(ObTxDesc &tx,
const ObTxPartRefList &parts,
ObTxRollbackParts &rollback_parts,
const ObTxSEQ specified_from_scn,
const ObTxSEQ savepoint,
const int64_t expire_ts)
{
int ret = OB_SUCCESS;
int64_t max_retry_intval = GCONF._ob_trans_rpc_timeout;
ObSEArray<ObTxLSEpochPair, 4> targets;
if (OB_FAIL(targets.reserve(parts.count()))) {
TRANS_LOG(WARN, "reserve space fail", K(ret), K(parts), K(tx));
int64_t tx_msg_id = fetch_rollback_sp_sequence_();
if (rollback_parts.count() == 0) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "rollback parts is empty", K(ret), K(tx));
} else {
ARRAY_FOREACH(parts, i) {
targets.push_back(ObTxLSEpochPair(parts[i].id_, parts[i].epoch_));
}
tx.brpc_mask_set_.reset();
if (OB_FAIL(tx.brpc_mask_set_.init(&targets))) {
if (OB_FAIL(tx.brpc_mask_set_.init(ObCommonID(tx_msg_id), rollback_parts))) {
TRANS_LOG(WARN, "init rpc mask set fail", K(ret), K(tx));
}
}
@ -1571,12 +1620,13 @@ inline int ObTransService::rollback_savepoint_slowpath_(ObTxDesc &tx,
msg.savepoint_ = savepoint;
msg.op_sn_ = tx.op_sn_;
msg.epoch_ = -1;
msg.request_id_ = tx.op_sn_;
msg.request_id_ = tx_msg_id;
msg.specified_from_scn_ = specified_from_scn;
// prepare msg.tx_ptr_ if required
// TODO(yunxing.cyx) : in 4.1 rework here, won't serialize txDesc
ObTxDesc *tmp_tx_desc = NULL;
ARRAY_FOREACH_NORET(parts, i) {
if (parts[i].epoch_ <= 0) {
ARRAY_FOREACH_NORET(rollback_parts, i) {
if (rollback_parts.at(i).exec_epoch_ <= 0 && rollback_parts.at(i).transfer_epoch_ <= 0) {
int64_t len = tx.get_serialize_size() + sizeof(ObTxDesc);
char *buf = (char*)ob_malloc(len, "TxDesc");
int64_t pos = sizeof(ObTxDesc);
@ -1636,7 +1686,7 @@ inline int ObTransService::rollback_savepoint_slowpath_(ObTxDesc &tx,
int64_t elapsed_us = ObTimeUtility::current_time() - start_ts;
TRANS_LOG(INFO, "rollback savepoint slowpath", K(ret),
K_(tx.tx_id), K(start_ts), K(retries),
K(savepoint), K(expire_ts), K(tx), K(parts.count()));
K(savepoint), K(expire_ts), K(tx), K(rollback_parts.count()));
ObTransTraceLog &tlog = tx.get_tlog();
REC_TRANS_TRACE_EXT(&tlog, rollback_savepoint_slowpath, OB_Y(ret),
OB_ID(savepoint), savepoint.cast_to_int(), OB_Y(expire_ts),
@ -1647,7 +1697,7 @@ inline int ObTransService::rollback_savepoint_slowpath_(ObTxDesc &tx,
inline int ObTransService::sync_rollback_savepoint__(ObTxDesc &tx,
ObTxRollbackSPMsg &msg,
const ObTxDesc::MaskSet &mask_set,
RollbackMaskSet &mask_set,
int64_t expire_ts,
const int64_t max_retry_intval,
int &retries)
@ -1658,11 +1708,23 @@ inline int ObTransService::sync_rollback_savepoint__(ObTxDesc &tx,
retries = 0;
int64_t min_retry_intval = 10 * 1000; // 10 ms
expire_ts = std::max(ObTimeUtility::current_time() + MIN_WAIT_TIME, expire_ts);
ObCommonID msg_id(msg.request_id_);
ObRollbackSPMsgGuard *rollback_sp_msg_guard = NULL;
bool insert_mgr = false;
share::ObTenantBase *tenant_base = MTL_CTX();
omt::ObTenant *tenant = static_cast<omt::ObTenant *>(tenant_base);
if (OB_ISNULL(tenant_base)) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "get tenant is null", K(ret));
} else if (OB_ISNULL(rollback_sp_msg_guard = ObRollbackSPMsgGuardAlloc::alloc_value())) {
ret = OB_ALLOCATE_MEMORY_FAILED;
TRANS_LOG(WARN, "alloc tx_msg_guard failed", KR(ret), K(msg.tx_id_));
} else if (FALSE_IT(new (rollback_sp_msg_guard) ObRollbackSPMsgGuard(msg_id, tx, tx_desc_mgr_))) {
} else if (OB_FAIL(rollback_sp_msg_mgr_.insert(msg_id, rollback_sp_msg_guard))) {
TRANS_LOG(WARN, "insert tx_desc to holder failed", KR(ret), K(msg.tx_id_));
} else {
insert_mgr = true;
}
while (OB_SUCC(ret)) {
int64_t retry_intval = std::min(min_retry_intval * (1 + retries), max_retry_intval);
@ -1671,7 +1733,7 @@ inline int ObTransService::sync_rollback_savepoint__(ObTxDesc &tx,
ret = OB_TIMEOUT;
TRANS_LOG(WARN, "tx rpc wait result timeout", K(ret), K(expire_ts), K(retries));
} else {
ObSEArray<ObTxLSEpochPair, 4> remain;
ObTxRollbackParts remain;
mask_set.get_not_mask(remain);
int64_t remain_cnt = remain.count();
TRANS_LOG(DEBUG, "unmasked parts", K(remain), K(tx), K(retries));
@ -1737,6 +1799,10 @@ inline int ObTransService::sync_rollback_savepoint__(ObTxDesc &tx,
}
++retries;
}
if (insert_mgr) {
// remove msg from mgr
rollback_sp_msg_mgr_.del(msg_id, rollback_sp_msg_guard);
}
return ret;
}

View File

@ -68,16 +68,27 @@ namespace storage
// If we follow the logic above, we can always ensure the correctness between read and write
//
int CheckSqlSequenceCanReadFunctor::operator() (const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx) {
int CheckSqlSequenceCanReadFunctor::operator() (const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx)
{
UNUSED(tx_cc_ctx);
int ret = OB_SUCCESS;
// NB: We need pay much attention to the order of the reads to the different
// variables. Although we update the version before the state for the tnodes
// and read the state before the version. It may appear that the compiled code
// execution may rearrange its order and fail to obey its origin logic(You can
// read the Dependency Definiation of the ARM architecture book to understand
// it). So the synchronization primitive below is much important.
const int32_t state = ATOMIC_LOAD(&tx_data.state_);
const SCN commit_version = tx_data.commit_version_.atomic_load();
const SCN end_scn = tx_data.end_scn_.atomic_load();
const bool is_rollback = tx_data.undo_status_list_.is_contain(sql_sequence_, state);
// NB: The functor is only used during minor merge
if (ObTxData::ABORT == state) {
// Case 1: data is aborted, so we donot need it during merge
can_read_ = false;
} else if (tx_data.undo_status_list_.is_contain(sql_sequence_, state)) {
} else if (is_rollback) {
// Case 2: data is rollbacked in undo status, so we donot need it during merge
can_read_ = false;
} else {
@ -85,6 +96,10 @@ int CheckSqlSequenceCanReadFunctor::operator() (const ObTxData &tx_data, ObTxCCC
can_read_ = true;
}
if (OB_SUCC(ret)) {
(void)resolve_tx_data_check_data_(state, commit_version, end_scn, is_rollback);
}
return ret;
}
@ -92,8 +107,16 @@ int CheckRowLockedFunctor::operator() (const ObTxData &tx_data, ObTxCCCtx *tx_cc
{
UNUSED(tx_cc_ctx);
int ret = OB_SUCCESS;
// NB: We need pay much attention to the order of the reads to the different
// variables. Although we update the version before the state for the tnodes
// and read the state before the version. It may appear that the compiled code
// execution may rearrange its order and fail to obey its origin logic(You can
// read the Dependency Definiation of the ARM architecture book to understand
// it). So the synchronization primitive below is much important.
const int32_t state = ATOMIC_LOAD(&tx_data.state_);
const SCN commit_version = tx_data.commit_version_.atomic_load();
const SCN end_scn = tx_data.end_scn_.atomic_load();
const bool is_rollback = tx_data.undo_status_list_.is_contain(sql_sequence_, state);
switch (state) {
case ObTxData::COMMIT: {
@ -110,13 +133,13 @@ int CheckRowLockedFunctor::operator() (const ObTxData &tx_data, ObTxCCCtx *tx_cc
// whether the lock is locked by the data depends on whether undo status
// conains the data and the tsc version is unnecessary for the running
// txn.
lock_state_.is_locked_ = !tx_data.undo_status_list_.is_contain(sql_sequence_, state);
lock_state_.is_locked_ = !is_rollback;
lock_state_.trans_version_.set_min();
} else {
// Case 3: data is during execution and it is not owned by the checker, so
// whether the lock is locked by the data depends on whether undo status
// conains the data and the tsc version is unnecessary for the running txn.
lock_state_.is_locked_ = !tx_data.undo_status_list_.is_contain(sql_sequence_, state);
lock_state_.is_locked_ = !is_rollback;
lock_state_.trans_version_.set_min();
}
break;
@ -139,6 +162,10 @@ int CheckRowLockedFunctor::operator() (const ObTxData &tx_data, ObTxCCCtx *tx_cc
lock_state_.is_delayed_cleanout_ = true;
}
if (OB_SUCC(ret)) {
(void)resolve_tx_data_check_data_(state, commit_version, end_scn, is_rollback);
}
return ret;
}
@ -147,9 +174,16 @@ int GetTxStateWithSCNFunctor::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_
{
UNUSED(tx_cc_ctx);
int ret = OB_SUCCESS;
// NB: We need pay much attention to the order of the reads to the different
// variables. Although we update the version before the state for the tnodes
// and read the state before the version. It may appear that the compiled code
// execution may rearrange its order and fail to obey its origin logic(You can
// read the Dependency Definiation of the ARM architecture book to understand
// it). So the synchronization primitive below is much important.
const int32_t state = ATOMIC_LOAD(&tx_data.state_);
const SCN commit_version = tx_data.commit_version_.atomic_load();
const SCN end_scn = tx_data.end_scn_.atomic_load();
const bool is_rollback = false;
// return the transaction state_ according to the merge log ts.
// the detailed document is available as follows.
@ -179,6 +213,10 @@ int GetTxStateWithSCNFunctor::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_
STORAGE_LOG(ERROR, "unexpected transaction state_", K(ret), K(tx_data));
}
if (OB_SUCC(ret)) {
(void)resolve_tx_data_check_data_(state, commit_version, end_scn, is_rollback);
}
return ret;
}
@ -194,6 +232,7 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx *
const transaction::ObTransID data_tx_id = lock_for_read_arg_.data_trans_id_;
const transaction::ObTxSEQ data_sql_sequence = lock_for_read_arg_.data_sql_sequence_;
const bool read_latest = lock_for_read_arg_.read_latest_;
const bool read_uncommitted = lock_for_read_arg_.read_uncommitted_;
const transaction::ObTransID reader_tx_id = lock_for_read_arg_.mvcc_acc_ctx_.tx_id_;
// NB: We need pay much attention to the order of the reads to the different
@ -204,29 +243,39 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx *
// it). So the synchronization primitive below is much important.
const int32_t state = ATOMIC_LOAD(&tx_data.state_);
const SCN commit_version = tx_data.commit_version_.atomic_load();
const SCN end_scn = tx_data.end_scn_.atomic_load();
const bool is_rollback = tx_data.undo_status_list_.is_contain(data_sql_sequence, state);
can_read_ = false;
trans_version_.set_invalid();
is_determined_state_ = false;
switch (state) {
case ObTxData::COMMIT: {
// Case 1: data is committed, so the state is decided and whether we can read
// depends on whether undo status contains the data. Then we return the commit
// version as data version.
can_read_ = !tx_data.undo_status_list_.is_contain(data_sql_sequence, state);
trans_version_ = commit_version;
is_determined_state_ = true;
if (read_uncommitted) {
// Case 1.1: We need the latest version instead of multi-version search
can_read_ = !is_rollback;
trans_version_ = commit_version;
} else {
// Case 1.2: Otherwise, we get the version under mvcc
can_read_ = snapshot_version >= commit_version
&& !tx_data.undo_status_list_.is_contain(data_sql_sequence, state);
trans_version_ = commit_version;
}
break;
}
case ObTxData::RUNNING:
case ObTxData::ELR_COMMIT: {
// Case 2: data is during execution, so the state is not decided.
if (read_latest && reader_tx_id == data_tx_id) {
// Case 2.0: read the latest written of current txn
can_read_ = !tx_data.undo_status_list_.is_contain(data_sql_sequence, state);
if (read_uncommitted) {
can_read_ = !is_rollback;
trans_version_.set_min();
} else if (read_latest && reader_tx_id == data_tx_id) {
// Case 2.0: read the latest written of current txn
can_read_ = !is_rollback;
trans_version_.set_min();
is_determined_state_ = false;
} else if (snapshot_tx_id == data_tx_id) {
// Case 2.1: data is owned by the read txn
bool tmp_can_read = false;
@ -241,11 +290,9 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx *
tmp_can_read = false;
}
// Tip 2.1.1: we should skip the data if it is undone
can_read_ = tmp_can_read &&
!tx_data.undo_status_list_.is_contain(data_sql_sequence, state);
can_read_ = tmp_can_read && !is_rollback;
// Tip 2.1.2: trans version is unnecessary for the running txn
trans_version_.set_min();
is_determined_state_ = false;
} else {
// Case 2.2: data is not owned by the read txn
// NB: we need pay attention to the choice condition when issuing the
@ -261,7 +308,6 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx *
// unnecessary for the running txn
can_read_ = false;
trans_version_.set_min();
is_determined_state_ = false;
} else if (tx_cc_ctx->prepare_version_ > snapshot_version) {
// Case 2.2.2: data is at least in prepare state and the prepare
// version is bigger than the read txn's snapshot version, then the
@ -270,15 +316,12 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx *
// the running txn
can_read_ = false;
trans_version_.set_min();
is_determined_state_ = false;
} else {
// Only dml statement can read elr data
if (ObTxData::ELR_COMMIT == state
&& lock_for_read_arg_.mvcc_acc_ctx_.snapshot_.tx_id_.is_valid()) {
can_read_ = !tx_data.undo_status_list_.is_contain(data_sql_sequence, state);
can_read_ = snapshot_version >= commit_version && !is_rollback;
trans_version_ = commit_version;
// TODO(handora.qc): use better implementaion to remove it
is_determined_state_ = true;
} else {
// Case 2.2.3: data is in prepare state and the prepare version is
// smaller than the read txn's snapshot version, then the data's
@ -300,7 +343,6 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx *
// the data and the trans version is unnecessary for the aborted txn
can_read_ = false;
trans_version_.set_min();
is_determined_state_ = true;
break;
}
default:
@ -310,6 +352,10 @@ int LockForReadFunctor::inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx *
break;
}
if (OB_SUCC(ret)) {
(void)resolve_tx_data_check_data_(state, commit_version, end_scn, is_rollback);
}
return ret;
}
@ -371,11 +417,6 @@ int LockForReadFunctor::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx
}
}
if (OB_SUCC(ret) && OB_FAIL(cleanout_op_(tx_data, tx_cc_ctx))) {
TRANS_LOG(WARN, "cleanout failed", K(ret), K(cleanout_op_), KPC(this),
K(tx_data), KPC(tx_cc_ctx));
}
TRANS_LOG(DEBUG, "lock for read", K(ret), K(tx_data), KPC(tx_cc_ctx), KPC(this));
return ret;
@ -438,9 +479,11 @@ int LockForReadFunctor::check_gc_handler_()
int LockForReadFunctor::check_for_standby(const transaction::ObTransID &tx_id)
{
int ret = OB_SUCCESS;
if (OB_SUCC(MTL(transaction::ObTransService *)->check_for_standby(ls_id_, tx_id,
if (OB_SUCC(MTL(transaction::ObTransService *)->check_for_standby(ls_id_,
tx_id,
lock_for_read_arg_.mvcc_acc_ctx_.snapshot_.version_,
can_read_, trans_version_, is_determined_state_))) {
can_read_,
trans_version_))) {
lock_for_read_arg_.mvcc_acc_ctx_.is_standby_read_ = true;
}
return ret;
@ -448,7 +491,15 @@ int LockForReadFunctor::check_for_standby(const transaction::ObTransID &tx_id)
int CleanoutTxStateFunctor::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx)
{
return operation_(tx_data, tx_cc_ctx);
int ret = OB_SUCCESS;
const int32_t state = ATOMIC_LOAD(&tx_data.state_);
const SCN commit_version = tx_data.commit_version_.atomic_load();
const SCN end_scn = tx_data.end_scn_.atomic_load();
const bool is_rollback = tx_data.undo_status_list_.is_contain(seq_no_, state);
(void)resolve_tx_data_check_data_(state, commit_version, end_scn, is_rollback);
return ret;
}
bool ObReCheckTxNodeForLockForReadOperation::operator()()
@ -458,7 +509,6 @@ bool ObReCheckTxNodeForLockForReadOperation::operator()()
if (tnode_.is_aborted()) {
can_read_ = false;
trans_version_.set_min();
is_determined_state_ = true;
ret = true;
}
@ -471,43 +521,44 @@ bool ObReCheckNothingOperation::operator()()
return ret;
}
int ObCleanoutTxNodeOperation::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx)
bool ObCleanoutTxNodeOperation::need_cleanout() const
{
return !(tnode_.is_committed() ||
tnode_.is_aborted()) &&
tnode_.is_delayed_cleanout();
}
int ObCleanoutTxNodeOperation::operator()(const ObTxDataCheckData &tx_data)
{
int ret = OB_SUCCESS;
const int32_t state = ATOMIC_LOAD(&tx_data.state_);
const SCN commit_version = tx_data.commit_version_.atomic_load();
const SCN end_scn = tx_data.end_scn_.atomic_load();
// NB: We need pay much attention to the order of the reads to the different
// variables. Although we update the version before the state for the tnodes
// and read the state before the version. It may appear that the compiled code
// execution may rearrange its order and fail to obey its origin logic(You can
// read the Dependency Definiation of the ARM architecture book to understand
// it). So the synchronization primitive below is much important.
const int32_t state = tx_data.state_;
const SCN commit_version = tx_data.commit_version_;
const SCN end_scn = tx_data.end_scn_;
const bool is_rollback = tx_data.is_rollback_;
if (ObTxData::RUNNING == state
&& !tx_data.undo_status_list_.is_contain(tnode_.seq_no_, state)
// NB: we need pay attention to the choice condition when issuing the
// lock_for_read, we cannot only treat state in exec_info as judgement
// whether txn is prepared, because the state in exec_info will not be
// updated as prepared until log is applied and the application is
// asynchronous. So we need use version instead of state as judgement and
// mark it whenever we submit the commit/prepare log(using before_prepare)
&& tx_cc_ctx->prepare_version_.is_max()) {
if (ObTxData::RUNNING == state && !is_rollback) {
// Case 1: data is during execution, so we donot need write back
// This is the case for most of the lock for read scenerio, so we need to
// mainly optimize it through not latching the row
} else if (!(tnode_.is_committed() || tnode_.is_aborted())
&& tnode_.is_delayed_cleanout()) {
} else if (need_cleanout()) {
if (need_row_latch_) {
value_.latch_.lock();
}
if (!(tnode_.is_committed() || tnode_.is_aborted())
&& tnode_.is_delayed_cleanout()) {
if (tx_data.undo_status_list_.is_contain(tnode_.seq_no_, state)) {
if (need_cleanout()) {
if (is_rollback) {
// Case 2: data is rollbacked during execution, so we write back the abort state
if (OB_FAIL(value_.unlink_trans_node(tnode_))) {
TRANS_LOG(WARN, "mvcc trans ctx trans commit error", K(ret), K(value_), K(tnode_));
} else {
(void)tnode_.trans_abort(tx_data.end_scn_);
(void)tnode_.trans_abort(end_scn);
}
} else if (ObTxData::RUNNING == state) {
if (!tx_cc_ctx->prepare_version_.is_max()) {
// Case 3: data is prepared, we also donot write back the prepare state
}
} else if (ObTxData::ELR_COMMIT == state) {
// TODO: make it more clear
value_.update_max_elr_trans_version(commit_version, tnode_.tx_id_);
@ -524,7 +575,6 @@ int ObCleanoutTxNodeOperation::operator()(const ObTxData &tx_data, ObTxCCCtx *tx
}
} else if (ObTxData::ABORT == state) {
// Case 6: data is aborted, so we write back the abort state
if (OB_FAIL(value_.unlink_trans_node(tnode_))) {
TRANS_LOG(WARN, "mvcc trans ctx trans commit error", K(ret), K(value_), K(tnode_));
} else {
@ -532,7 +582,7 @@ int ObCleanoutTxNodeOperation::operator()(const ObTxData &tx_data, ObTxCCCtx *tx
}
} else {
ret = OB_ERR_UNEXPECTED;
STORAGE_LOG(WARN, "unexpected transaction state_", K(ret), K(tx_data));
STORAGE_LOG(WARN, "unexpected transaction state_", K(ret));
}
}
@ -541,15 +591,14 @@ int ObCleanoutTxNodeOperation::operator()(const ObTxData &tx_data, ObTxCCCtx *tx
}
}
TRANS_LOG(DEBUG, "cleanout tx state", K(ret), K(tx_data), KPC(tx_cc_ctx), KPC(this));
TRANS_LOG(DEBUG, "cleanout tx state", K(ret), KPC(this));
return ret;
}
int ObCleanoutNothingOperation::operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx)
int ObCleanoutNothingOperation::operator()(const ObTxDataCheckData &tx_data)
{
UNUSED(tx_data);
UNUSED(tx_cc_ctx);
return OB_SUCCESS;
}

View File

@ -49,18 +49,15 @@ class ObReCheckTxNodeForLockForReadOperation : public ObReCheckOp
public:
ObReCheckTxNodeForLockForReadOperation(memtable::ObMvccTransNode &tnode,
bool &can_read,
share::SCN &trans_version,
bool &is_determined_state)
share::SCN &trans_version)
: tnode_(tnode),
can_read_(can_read),
is_determined_state_(is_determined_state),
trans_version_(trans_version) {}
virtual bool operator()() override;
DECLARE_TO_STRING;
private:
memtable::ObMvccTransNode &tnode_;
bool &can_read_;
bool &is_determined_state_;
share::SCN &trans_version_;
};
@ -75,8 +72,8 @@ public:
class ObCleanoutOp
{
public:
virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) = 0;
virtual int operator()(const ObTxDataCheckData &tx_data) = 0;
virtual bool need_cleanout() const { return false; }
int64_t to_string(char* buf, const int64_t buf_len) const { return 0; }
};
@ -89,7 +86,8 @@ public:
: value_(value),
tnode_(tnode),
need_row_latch_(need_row_latch) {}
virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override;
virtual int operator()(const ObTxDataCheckData &tx_data) override;
virtual bool need_cleanout() const override;
DECLARE_TO_STRING;
private:
memtable::ObMvccRow &value_;
@ -101,7 +99,7 @@ class ObCleanoutNothingOperation : public ObCleanoutOp
{
public:
ObCleanoutNothingOperation() {}
virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override;
virtual int operator()(const ObTxDataCheckData &tx_data) override;
TO_STRING_KV("CleanoutOperation", "CleanoutNothing");
};
@ -115,7 +113,8 @@ public:
CheckSqlSequenceCanReadFunctor(const transaction::ObTxSEQ &sql_sequence, bool &can_read)
: sql_sequence_(sql_sequence), can_read_(can_read) {}
virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override;
TO_STRING_KV(K(sql_sequence_), K(can_read_));
INHERIT_TO_STRING_KV("ObITxDataCheckFunctor", ObITxDataCheckFunctor,
K(sql_sequence_), K(can_read_));
public:
const transaction::ObTxSEQ &sql_sequence_;
bool &can_read_;
@ -140,8 +139,9 @@ public:
sql_sequence_(sql_sequence),
lock_state_(lock_state) {}
virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override;
TO_STRING_KV(K(read_tx_id_), K(data_tx_id_), K(sql_sequence_),
K(lock_state_));
INHERIT_TO_STRING_KV("ObITxDataCheckFunctor", ObITxDataCheckFunctor,
K(read_tx_id_), K(data_tx_id_), K(sql_sequence_),
K(lock_state_));
public:
const transaction::ObTransID &read_tx_id_;
const transaction::ObTransID &data_tx_id_;
@ -157,12 +157,13 @@ class GetTxStateWithSCNFunctor : public ObITxDataCheckFunctor
{
public:
GetTxStateWithSCNFunctor(const share::SCN scn,
int64_t &state,
share::SCN &trans_version)
int64_t &state,
share::SCN &trans_version)
: scn_(scn), state_(state), trans_version_(trans_version) {}
virtual ~GetTxStateWithSCNFunctor() {}
virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override;
TO_STRING_KV(K(scn_), K(state_), K(trans_version_));
INHERIT_TO_STRING_KV("ObITxDataCheckFunctor", ObITxDataCheckFunctor, K(scn_),
K(state_), K(trans_version_));
public:
const share::SCN scn_;
int64_t &state_;
@ -181,13 +182,11 @@ public:
LockForReadFunctor(const transaction::ObLockForReadArg &lock_for_read_arg,
bool &can_read,
share::SCN &trans_version,
bool &is_determined_state,
const share::ObLSID ls_id,
ObCleanoutOp &cleanout_op,
ObReCheckOp &recheck_op)
: lock_for_read_arg_(lock_for_read_arg),
can_read_(can_read),
is_determined_state_(is_determined_state),
trans_version_(trans_version),
ls_id_(ls_id),
cleanout_op_(cleanout_op),
@ -196,8 +195,8 @@ public:
virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override;
virtual bool recheck() override;
int check_for_standby(const transaction::ObTransID &tx_id);
TO_STRING_KV(K(lock_for_read_arg_), K(can_read_), K(is_determined_state_),
K(trans_version_), K(ls_id_));
INHERIT_TO_STRING_KV("ObITxDataCheckFunctor", ObITxDataCheckFunctor, K(lock_for_read_arg_),
K(can_read_), K(trans_version_), K(ls_id_));
private:
int inner_lock_for_read(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx);
int check_clog_disk_full_();
@ -205,7 +204,6 @@ private:
public:
const transaction::ObLockForReadArg &lock_for_read_arg_;
bool &can_read_;
bool &is_determined_state_;
share::SCN &trans_version_;
share::ObLSID ls_id_;
// Cleanout the tx node if necessary
@ -221,11 +219,13 @@ public:
class CleanoutTxStateFunctor : public ObITxDataCheckFunctor
{
public:
CleanoutTxStateFunctor(ObCleanoutOp &op)
: operation_(op) {}
CleanoutTxStateFunctor(const transaction::ObTxSEQ seq_no,
ObCleanoutOp &op)
: seq_no_(seq_no), operation_(op) {}
virtual int operator()(const ObTxData &tx_data, ObTxCCCtx *tx_cc_ctx = nullptr) override;
TO_STRING_KV(K_(operation));
INHERIT_TO_STRING_KV("ObITxDataCheckFunctor", ObITxDataCheckFunctor, K_(operation), K_(seq_no));
public:
transaction::ObTxSEQ seq_no_;
ObCleanoutOp &operation_;
};

View File

@ -38,23 +38,37 @@ ObTxLogTypeChecker::need_replay_barrier(const ObTxLogType log_type,
|| data_source_type == ObTxDataSourceType::DELETE_TABLET_NEW_MDS
|| data_source_type == ObTxDataSourceType::UNBIND_TABLET_NEW_MDS
|| data_source_type == ObTxDataSourceType::START_TRANSFER_OUT
|| data_source_type == ObTxDataSourceType::START_TRANSFER_OUT_PREPARE
|| data_source_type == ObTxDataSourceType::FINISH_TRANSFER_OUT) {
barrier_flag = logservice::ObReplayBarrierType::PRE_BARRIER;
} else if (data_source_type == ObTxDataSourceType::FINISH_TRANSFER_IN) {
} else if (data_source_type == ObTxDataSourceType::FINISH_TRANSFER_IN
|| data_source_type == ObTxDataSourceType::START_TRANSFER_OUT_V2
|| data_source_type == ObTxDataSourceType::TRANSFER_MOVE_TX_CTX) {
barrier_flag = logservice::ObReplayBarrierType::STRICT_BARRIER;
}
} else if (ObTxLogType::TX_COMMIT_LOG == log_type) {
} else if (ObTxLogType::TX_COMMIT_INFO_LOG == log_type) {
if (data_source_type == ObTxDataSourceType::START_TRANSFER_IN) {
barrier_flag = logservice::ObReplayBarrierType::STRICT_BARRIER;
}
} else if (ObTxLogType::TX_COMMIT_LOG == log_type) {
if (data_source_type == ObTxDataSourceType::START_TRANSFER_IN
|| data_source_type == ObTxDataSourceType::START_TRANSFER_OUT_V2
|| data_source_type == ObTxDataSourceType::TRANSFER_MOVE_TX_CTX) {
barrier_flag = logservice::ObReplayBarrierType::STRICT_BARRIER;
}
} else if (ObTxLogType::TX_ABORT_LOG == log_type) {
if (data_source_type == ObTxDataSourceType::START_TRANSFER_IN
|| data_source_type == ObTxDataSourceType::START_TRANSFER_OUT_V2
|| data_source_type == ObTxDataSourceType::TRANSFER_MOVE_TX_CTX) {
barrier_flag = logservice::ObReplayBarrierType::STRICT_BARRIER;
}
}
return barrier_flag;
}
int ObTxLogTypeChecker::decide_final_barrier_type(
const logservice::ObReplayBarrierType tmp_log_barrier_type,
logservice::ObReplayBarrierType &final_barrier_type)
@ -255,7 +269,9 @@ OB_TX_SERIALIZE_MEMBER(ObTxCommitInfoLog,
/* 10 */ app_trace_info_,
/* 11 */ prev_record_lsn_,
/* 12 */ redo_lsns_,
/* 13 */ xid_);
/* 13 */ xid_,
/* 14 */ commit_parts_,
/* 15 */ epoch_);
OB_TX_SERIALIZE_MEMBER(ObTxPrepareLog,
compat_bytes_,
@ -335,7 +351,7 @@ int ObTxCommitInfoLog::before_serialize()
TRANS_LOG(WARN, "reset all compat_bytes_ valid failed", K(ret));
}
} else {
if (OB_FAIL(compat_bytes_.init(13))) {
if (OB_FAIL(compat_bytes_.init(15))) {
TRANS_LOG(WARN, "init compat_bytes_ failed", K(ret));
}
}
@ -354,6 +370,8 @@ int ObTxCommitInfoLog::before_serialize()
TX_NO_NEED_SER(prev_record_lsn_.is_valid() == false, 11, compat_bytes_);
TX_NO_NEED_SER(redo_lsns_.empty(), 12, compat_bytes_);
TX_NO_NEED_SER(xid_.empty(), 13, compat_bytes_);
TX_NO_NEED_SER(commit_parts_.empty(), 14, compat_bytes_);
TX_NO_NEED_SER(epoch_ == 0, 15, compat_bytes_);
}
return ret;
@ -1082,6 +1100,7 @@ void ObTxLogBlock::reset()
replay_buf_ = nullptr;
len_ = pos_ = 0;
cur_log_type_ = ObTxLogType::UNKNOWN;
cur_block_barrier_type_ = logservice::ObReplayBarrierType::NO_NEED_BARRIER;
cb_arg_array_.reset();
big_segment_buf_ = nullptr;
}
@ -1090,6 +1109,7 @@ int ObTxLogBlock::reuse(const int64_t replay_hint, const ObTxLogBlockHeader &blo
{
int ret = OB_SUCCESS;
cur_log_type_ = ObTxLogType::UNKNOWN;
cur_block_barrier_type_ = logservice::ObReplayBarrierType::NO_NEED_BARRIER;
cb_arg_array_.reset();
big_segment_buf_ = nullptr;
pos_ = 0;
@ -1100,7 +1120,8 @@ int ObTxLogBlock::reuse(const int64_t replay_hint, const ObTxLogBlockHeader &blo
}
ObTxLogBlock::ObTxLogBlock()
: replay_buf_(nullptr), len_(0), pos_(0), cur_log_type_(ObTxLogType::UNKNOWN), cb_arg_array_(),
: replay_buf_(nullptr), len_(0), pos_(0), cur_log_type_(ObTxLogType::UNKNOWN),
cur_block_barrier_type_(logservice::ObReplayBarrierType::NO_NEED_BARRIER), cb_arg_array_(),
big_segment_buf_(nullptr)
{
// do nothing
@ -1176,12 +1197,21 @@ int ObTxLogBlock::rewrite_barrier_log_block(int64_t replay_hint,
int ret = OB_SUCCESS;
int64_t tmp_pos = 0;
char *serialize_buf = nullptr;
logservice::ObLogBaseHeader header(logservice::ObLogBaseType::TRANS_SERVICE_LOG_BASE_TYPE,
barrier_type, replay_hint);
logservice::ObReplayBarrierType final_barrier_type =
logservice::ObReplayBarrierType::NO_NEED_BARRIER;
if (OB_ISNULL(fill_buf_.get_buf())
|| logservice::ObReplayBarrierType::INVALID_BARRIER == barrier_type) {
ret = OB_INVALID_ARGUMENT;
TRANS_LOG(WARN, "invalid arguments", K(ret), K(replay_hint), K(barrier_type), KPC(this));
} else if (OB_FAIL(ObTxLogTypeChecker::decide_final_barrier_type(cur_block_barrier_type_,
final_barrier_type))) {
TRANS_LOG(WARN, "decide final barrier type with the cur_block_barrier failed", K(ret),
K(barrier_type), K(final_barrier_type), K(replay_hint));
} else if (OB_FAIL(
ObTxLogTypeChecker::decide_final_barrier_type(barrier_type, final_barrier_type))) {
TRANS_LOG(WARN, "decide final barrier type with the barrier_type arg failed", K(ret),
K(barrier_type), K(final_barrier_type), K(replay_hint));
} else {
serialize_buf = fill_buf_.get_buf();
}
@ -1191,8 +1221,18 @@ int ObTxLogBlock::rewrite_barrier_log_block(int64_t replay_hint,
} else if (OB_ISNULL(serialize_buf)) {
ret = OB_ERR_UNEXPECTED;
TRANS_LOG(WARN, "unexpected empty serialize_buf", K(*this));
} else if (OB_FAIL(header.serialize(serialize_buf, len_, tmp_pos))) {
TRANS_LOG(WARN, "serialize log base header error", K(ret));
} else {
logservice::ObLogBaseHeader header(logservice::ObLogBaseType::TRANS_SERVICE_LOG_BASE_TYPE,
final_barrier_type, replay_hint);
if (final_barrier_type != barrier_type) {
TRANS_LOG(INFO, "rewrite barrier_type without the origin target", K(ret), K(replay_hint),
K(final_barrier_type), K(barrier_type), KPC(this));
}
if (OB_FAIL(header.serialize(serialize_buf, len_, tmp_pos))) {
TRANS_LOG(WARN, "serialize log base header error", K(ret));
}
}
return ret;
@ -1283,6 +1323,10 @@ int ObTxLogBlock::serialize_log_block_header_(const int64_t replay_hint,
TRANS_LOG(WARN, "serialize block header error", K(ret));
}
if (OB_SUCC(ret)) {
cur_block_barrier_type_ = barrier_type;
}
return ret;
}

View File

@ -177,6 +177,18 @@ public:
logservice::ObReplayBarrierType &final_barrier_type);
};
inline bool is_contain_stat_log(const ObTxCbArgArray &array)
{
bool bool_ret = false;
for (int64_t i = 0; i < array.count(); i++) {
if ((ObTxLogTypeChecker::is_state_log(array.at(i).get_log_type()))) {
bool_ret = true;
break;
}
}
return bool_ret;
}
// ============================== Tx Log Header ==============================
class ObTxLogHeader
{
@ -482,7 +494,7 @@ public:
incremental_participants_(temp_ref.incremental_participants_), cluster_version_(0),
app_trace_id_str_(temp_ref.app_trace_id_str_), app_trace_info_(temp_ref.app_trace_info_),
prev_record_lsn_(temp_ref.prev_record_lsn_), redo_lsns_(temp_ref.redo_lsns_),
xid_(temp_ref.xid_)
xid_(temp_ref.xid_), commit_parts_(), epoch_(0)
{
before_serialize();
}
@ -498,12 +510,14 @@ public:
ObRedoLSNArray &redo_lsns,
share::ObLSArray &incremental_participants,
uint64_t cluster_version,
const ObXATransID &xid)
const ObXATransID &xid,
const ObTxCommitParts &commit_parts,
int64_t epoch)
: scheduler_(scheduler), participants_(participants), upstream_(upstream),
is_sub2pc_(is_sub2pc), is_dup_tx_(is_dup_tx), can_elr_(is_elr),
incremental_participants_(incremental_participants), cluster_version_(cluster_version),
app_trace_id_str_(app_trace_id), app_trace_info_(app_trace_info),
prev_record_lsn_(prev_record_lsn), redo_lsns_(redo_lsns), xid_(xid)
prev_record_lsn_(prev_record_lsn), redo_lsns_(redo_lsns), xid_(xid), commit_parts_(commit_parts), epoch_(epoch)
{
before_serialize();
};
@ -521,6 +535,8 @@ public:
const share::ObLSArray &get_incremental_participants() const { return incremental_participants_; }
uint64_t get_cluster_version() const { return cluster_version_; }
const ObXATransID &get_xid() const { return xid_; }
int64_t get_epoch() const { return epoch_; }
const ObTxCommitParts &get_commit_parts() const { return commit_parts_; }
int ob_admin_dump(share::ObAdminMutatorStringArg &arg);
static const ObTxLogType LOG_TYPE;
@ -537,7 +553,9 @@ public:
K(app_trace_info_),
K(prev_record_lsn_),
K(redo_lsns_),
K(xid_))
K(xid_),
K(commit_parts_),
K(epoch_))
public:
int before_serialize();
@ -559,6 +577,8 @@ private:
ObRedoLSNArray &redo_lsns_;
// for xa
ObXATransID xid_;
ObTxCommitParts commit_parts_;
int64_t epoch_;
};
class ObTxPrepareLogTempRef
@ -1116,6 +1136,7 @@ public:
K(len_),
K(pos_),
K(cur_log_type_),
K(cur_block_barrier_type_),
K(cb_arg_array_),
KPC(big_segment_buf_));
@ -1145,6 +1166,7 @@ private:
int64_t len_;
int64_t pos_;
ObTxLogType cur_log_type_;
logservice::ObReplayBarrierType cur_block_barrier_type_;
ObTxCbArgArray cb_arg_array_;
ObTxBigSegmentBuf *big_segment_buf_;

View File

@ -27,7 +27,8 @@ OB_SERIALIZE_MEMBER(ObTxMsg,
request_id_,
timestamp_,
epoch_,
cluster_id_);
cluster_id_,
transfer_epoch_);
// NOTICE: DO NOT MODIFY FOLLOING MACRO DEFINES, IT IS RESERVED FOR COMPATIBLE WITH OLD <= 4.1.2
#define ObTxSubPrepareMsg_V1_MEMBERS expire_ts_, xid_, parts_, app_trace_info_
#define ObTxSubPrepareRespMsg_V1_MEMBERS ret_
@ -97,13 +98,13 @@ OB_SERIALIZE_MEMBER(ObTxMsg,
return len; \
}
OB_TX_MSG_SERDE(ObTxSubPrepareMsg, ObTxMsg, expire_ts_, xid_, parts_, app_trace_info_);
OB_TX_MSG_SERDE(ObTxSubPrepareMsg, ObTxMsg, expire_ts_, xid_, parts_, app_trace_info_, commit_parts_);
OB_TX_MSG_SERDE(ObTxSubPrepareRespMsg, ObTxMsg, ret_);
OB_TX_MSG_SERDE(ObTxSubCommitMsg, ObTxMsg, xid_);
OB_TX_MSG_SERDE(ObTxSubCommitRespMsg, ObTxMsg, ret_);
OB_TX_MSG_SERDE(ObTxSubRollbackMsg, ObTxMsg, xid_);
OB_TX_MSG_SERDE(ObTxSubRollbackRespMsg, ObTxMsg, ret_);
OB_TX_MSG_SERDE(ObTxCommitMsg, ObTxMsg, expire_ts_, parts_, app_trace_info_, commit_start_scn_);
OB_TX_MSG_SERDE(ObTxCommitMsg, ObTxMsg, expire_ts_, parts_, app_trace_info_, commit_start_scn_, commit_parts_);
OB_TX_MSG_SERDE(ObTxCommitRespMsg, ObTxMsg, ret_, commit_version_);
OB_TX_MSG_SERDE(ObTxAbortMsg, ObTxMsg, reason_);
OB_TX_MSG_SERDE(ObTxKeepaliveMsg, ObTxMsg, status_);
@ -120,13 +121,13 @@ OB_TX_MSG_SERDE(Ob2pcClearReqMsg, ObTxMsg, max_commit_log_scn_);
OB_TX_MSG_SERDE(Ob2pcClearRespMsg, ObTxMsg);
OB_TX_MSG_SERDE(Ob2pcPrepareRedoReqMsg, ObTxMsg, xid_, upstream_, app_trace_info_);
OB_TX_MSG_SERDE(Ob2pcPrepareRedoRespMsg, ObTxMsg);
OB_TX_MSG_SERDE(Ob2pcPrepareVersionReqMsg, ObTxMsg);
OB_TX_MSG_SERDE(Ob2pcPrepareVersionReqMsg, ObTxMsg, upstream_);
OB_TX_MSG_SERDE(Ob2pcPrepareVersionRespMsg, ObTxMsg, prepare_version_, prepare_info_array_);
OB_TX_MSG_SERDE(ObAskStateMsg, ObTxMsg, snapshot_);
OB_TX_MSG_SERDE(ObAskStateMsg, ObTxMsg, snapshot_, ori_ls_id_, ori_addr_);
OB_TX_MSG_SERDE(ObAskStateRespMsg, ObTxMsg, state_info_array_);
OB_TX_MSG_SERDE(ObCollectStateMsg, ObTxMsg, snapshot_);
OB_TX_MSG_SERDE(ObCollectStateRespMsg, ObTxMsg, state_info_);
OB_SERIALIZE_MEMBER((ObTxRollbackSPRespMsg, ObTxMsg), ret_, orig_epoch_);
OB_TX_MSG_SERDE(ObCollectStateMsg, ObTxMsg, snapshot_, check_info_);
OB_TX_MSG_SERDE(ObCollectStateRespMsg, ObTxMsg, state_info_, transfer_parts_);
OB_SERIALIZE_MEMBER((ObTxRollbackSPRespMsg, ObTxMsg), ret_, orig_epoch_, downstream_parts_);
OB_DEF_SERIALIZE_SIZE(ObTxRollbackSPMsg)
{
@ -140,6 +141,7 @@ OB_DEF_SERIALIZE_SIZE(ObTxRollbackSPMsg)
OB_UNIS_ADD_LEN(false);
}
OB_UNIS_ADD_LEN(flag_);
OB_UNIS_ADD_LEN(specified_from_scn_);
return len;
}
@ -155,6 +157,7 @@ OB_DEF_SERIALIZE(ObTxRollbackSPMsg)
OB_UNIS_ENCODE(false);
}
OB_UNIS_ENCODE(flag_);
OB_UNIS_ENCODE(specified_from_scn_);
}
return ret;
}
@ -177,6 +180,7 @@ OB_DEF_DESERIALIZE(ObTxRollbackSPMsg)
}
}
OB_UNIS_DECODE(flag_);
OB_UNIS_DECODE(specified_from_scn_);
}
return ret;
}
@ -333,8 +337,7 @@ bool Ob2pcPrepareRespMsg::is_valid() const
bool Ob2pcPreCommitReqMsg::is_valid() const
{
bool ret = false;
if (ObTxMsg::is_valid() && type_ == TX_2PC_PRE_COMMIT_REQ
&& commit_version_.is_valid()) {
if (ObTxMsg::is_valid() && type_ == TX_2PC_PRE_COMMIT_REQ) {
ret = true;
}
return ret;
@ -343,8 +346,7 @@ bool Ob2pcPreCommitReqMsg::is_valid() const
bool Ob2pcPreCommitRespMsg::is_valid() const
{
bool ret = false;
if (ObTxMsg::is_valid() && type_ == TX_2PC_PRE_COMMIT_RESP
&& commit_version_.is_valid()) {
if (ObTxMsg::is_valid() && type_ == TX_2PC_PRE_COMMIT_RESP) {
ret = true;
}
return ret;
@ -435,7 +437,9 @@ bool Ob2pcPrepareRedoRespMsg::is_valid() const
bool Ob2pcPrepareVersionReqMsg::is_valid() const
{
bool ret = false;
if (ObTxMsg::is_valid() && type_ == TX_2PC_PREPARE_VERSION_REQ) {
if (ObTxMsg::is_valid() && type_ == TX_2PC_PREPARE_VERSION_REQ
// open after no version can upgrade from with no upstream
/*&& upstream_.is_valid()*/) {
ret = true;
}
return ret;

View File

@ -76,6 +76,7 @@ namespace transaction
tx_id_(),
receiver_(share::ObLSID::INVALID_LS_ID),
epoch_(-1),
transfer_epoch_(-1),
sender_addr_(),
sender_(share::ObLSID::INVALID_LS_ID),
request_id_(-1),
@ -90,6 +91,7 @@ namespace transaction
share::ObLSID receiver_;
/* the target participant's born epoch, used to verify its health */
int64_t epoch_;
int64_t transfer_epoch_;
/* useful when send rsp to sender */
ObAddr sender_addr_;
share::ObLSID sender_;
@ -104,6 +106,7 @@ namespace transaction
K_(sender),
K_(sender_addr),
K_(epoch),
K_(transfer_epoch),
K_(request_id),
K_(timestamp),
K_(cluster_id));
@ -135,11 +138,13 @@ namespace transaction
ObTxMsg(SUBPREPARE),
expire_ts_(OB_INVALID_TIMESTAMP),
xid_(),
parts_()
parts_(),
commit_parts_()
{}
int64_t expire_ts_;
ObXATransID xid_;
share::ObLSArray parts_;
ObTxCommitParts commit_parts_;
common::ObString app_trace_info_;
bool is_valid() const;
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(expire_ts), K_(xid), K_(parts),
@ -210,8 +215,9 @@ namespace transaction
share::SCN commit_start_scn_;
share::ObLSArray parts_;
common::ObString app_trace_info_;
ObTxCommitParts commit_parts_;
bool is_valid() const;
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(expire_ts), K_(commit_start_scn), K_(parts), K_(app_trace_info));
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(expire_ts), K_(commit_start_scn), K_(parts), K_(app_trace_info), K_(commit_parts));
OB_UNIS_VERSION(1);
};
struct ObTxCommitRespMsg : public ObTxMsg {
@ -243,7 +249,8 @@ namespace transaction
//todo:后续branch_id使用方式确定后,需要相应修改
branch_id_(-1),
tx_ptr_(NULL),
flag_(USE_ASYNC_RESP)
flag_(USE_ASYNC_RESP),
specified_from_scn_()
{}
~ObTxRollbackSPMsg() {
if (OB_NOT_NULL(tx_ptr_)) {
@ -251,6 +258,7 @@ namespace transaction
ob_free((void*)tx_ptr_);
tx_ptr_ = NULL;
}
specified_from_scn_.reset();
}
ObTxSEQ savepoint_;
int64_t op_sn_;
@ -258,11 +266,15 @@ namespace transaction
int64_t branch_id_;
const ObTxDesc *tx_ptr_;
uint8_t flag_;
ObTxSEQ specified_from_scn_;
bool use_async_resp() const { return (flag_ & USE_ASYNC_RESP) !=0; }
void set_for_transfer() { flag_ |= ROLLBACK_FOR_TRANSFER; }
bool for_transfer() const { return (flag_ & ROLLBACK_FOR_TRANSFER) !=0; }
const static uint8_t USE_ASYNC_RESP = 0x01;
const static uint8_t ROLLBACK_FOR_TRANSFER = 0x02;
bool is_valid() const;
INHERIT_TO_STRING_KV("txMsg", ObTxMsg,
K_(savepoint), K_(op_sn), K_(branch_id), K_(flag),
K_(savepoint), K_(op_sn), K_(branch_id), K_(flag), K_(specified_from_scn),
KP_(tx_ptr));
OB_UNIS_VERSION(1);
};
@ -279,7 +291,8 @@ namespace transaction
}
int ret_;
int64_t orig_epoch_;
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(ret), K_(orig_epoch));
ObSEArray<ObTxLSEpochPair, 1> downstream_parts_;
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(ret), K_(orig_epoch), K_(downstream_parts));
OB_UNIS_VERSION(1);
};
@ -471,10 +484,13 @@ namespace transaction
{
public:
Ob2pcPrepareVersionReqMsg() :
ObTxMsg(TX_2PC_PREPARE_VERSION_REQ)
ObTxMsg(TX_2PC_PREPARE_VERSION_REQ),
upstream_(share::ObLSID::INVALID_LS_ID)
{}
public:
share::ObLSID upstream_;
bool is_valid() const;
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(upstream));
OB_UNIS_VERSION(1);
};
@ -498,13 +514,16 @@ namespace transaction
public:
ObAskStateMsg() :
ObTxMsg(ASK_STATE),
snapshot_()
snapshot_(),
ori_ls_id_(),
ori_addr_()
{}
public:
share::SCN snapshot_;
share::ObLSID ori_ls_id_;
ObAddr ori_addr_;
bool is_valid() const;
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(snapshot));
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(snapshot), K_(ori_ls_id), K_(ori_addr));
OB_UNIS_VERSION(1);
};
@ -527,12 +546,14 @@ namespace transaction
public:
ObCollectStateMsg() :
ObTxMsg(COLLECT_STATE),
snapshot_()
snapshot_(),
check_info_()
{}
public:
share::SCN snapshot_;
ObStandbyCheckInfo check_info_;
bool is_valid() const;
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(snapshot));
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(snapshot), K_(check_info));
OB_UNIS_VERSION(1);
};
@ -541,12 +562,14 @@ namespace transaction
public:
ObCollectStateRespMsg() :
ObTxMsg(COLLECT_STATE_RESP),
state_info_()
state_info_(),
transfer_parts_()
{}
public:
ObStateInfo state_info_;
ObTxCommitParts transfer_parts_;
bool is_valid() const;
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(state_info));
INHERIT_TO_STRING_KV("txMsg", ObTxMsg, K_(state_info), K_(transfer_parts));
OB_UNIS_VERSION(1);
};

View File

@ -60,7 +60,8 @@ int ObTxStat::init(const common::ObAddr &addr, const ObTransID &tx_id,
const int64_t role_state,
const int64_t session_id, const common::ObAddr &scheduler,
const bool is_exiting, const ObXATransID &xid,
const share::ObLSID &coord, const int64_t last_request_ts)
const share::ObLSID &coord, const int64_t last_request_ts,
SCN start_scn, SCN end_scn, SCN rec_scn, bool transfer_blocking)
{
int ret = OB_SUCCESS;
if (is_inited_) {
@ -97,6 +98,10 @@ int ObTxStat::init(const common::ObAddr &addr, const ObTransID &tx_id,
coord_ = coord;
}
last_request_ts_ = last_request_ts;
start_scn_ = start_scn;
end_scn_ = end_scn;
rec_scn_ = rec_scn;
transfer_blocking_ = transfer_blocking;
}
return ret;
}

View File

@ -38,7 +38,8 @@ struct ObTxStat
const int64_t role_state,
const int64_t session_id, const common::ObAddr &scheduler,
const bool is_exiting, const ObXATransID &xid,
const share::ObLSID &coord, const int64_t last_request_ts);
const share::ObLSID &coord, const int64_t last_request_ts,
share::SCN start_scn, share::SCN end_scn, share::SCN rec_scn, bool transfer_blocking);
TO_STRING_KV(K_(addr), K_(tx_id), K_(tenant_id),
K_(has_decided), K_(ls_id), K_(participants),
K_(tx_ctx_create_time), K_(tx_expired_time), K_(ref_cnt),
@ -47,7 +48,7 @@ struct ObTxStat
K_(pending_log_size), K_(flushed_log_size),
K_(role_state), K_(session_id),
K_(scheduler_addr), K_(is_exiting),
K_(xid), K_(coord), K_(last_request_ts));
K_(xid), K_(coord), K_(last_request_ts), K_(start_scn), K_(end_scn), K_(rec_scn), K_(transfer_blocking));
public:
bool is_inited_;
common::ObAddr addr_;
@ -74,6 +75,10 @@ public:
ObXATransID xid_;
share::ObLSID coord_;
int64_t last_request_ts_;
share::SCN start_scn_;
share::SCN end_scn_;
share::SCN rec_scn_;
bool transfer_blocking_;
};
class ObTxLockStat

View File

@ -10,11 +10,13 @@
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX STORAGE
#include "storage/tx/wrs/ob_ls_wrs_handler.h"
#include "lib/utility/ob_print_utils.h"
#include "storage/tx/ob_trans_service.h"
#include "storage/tx_storage/ob_ls_service.h"
#include "logservice/ob_log_service.h"
#include "share/ob_force_print_log.h"
namespace oceanbase
{
@ -100,6 +102,12 @@ int ObLSWRSHandler::generate_ls_weak_read_snapshot_version(ObLS &ls,
if (REACH_TIME_INTERVAL(60 * 1000 * 1000)) {
STORAGE_LOG(INFO, "weak read handler not enabled", K(*this));
}
} else if (ls.get_transfer_status().get_transfer_prepare_enable()) {
// do nothing
need_skip = true;
if (REACH_TIME_INTERVAL(60 * 1000 * 1000)) {
STORAGE_LOG(INFO, "ls in transfer status", K(*this));
}
} else if (OB_FAIL(generate_weak_read_timestamp_(ls, max_stale_time, timestamp))) {
STORAGE_LOG(DEBUG, "fail to generate weak read timestamp", KR(ret), K(max_stale_time));
need_skip = true;

View File

@ -19,6 +19,8 @@
#include "lib/utility/ob_macro_utils.h"
#include "share/scn.h"
#include "share/ob_ls_id.h"
#include "storage/tx/ob_trans_define.h"
namespace oceanbase
{
namespace clog
@ -30,6 +32,8 @@ class ObISubmitLogCb;
namespace storage
{
class ObLS;
class ObLSWRSHandler
{
public:

View File

@ -412,7 +412,6 @@ int ObAccessService::get_source_ls_tx_table_guard_(
} else {
ObStoreCtx &ctx = ctx_guard.get_store_ctx();
ctx.mvcc_acc_ctx_.set_src_tx_table_guard(src_tx_table_guard);
ctx.mvcc_acc_ctx_.set_transfer_scn(user_data.transfer_scn_);
LOG_DEBUG("succ get src tx table guard", K(ret), K(src_ls->get_ls_id()), K(src_tx_table_guard), K(user_data));
}
}

View File

@ -110,7 +110,7 @@ int ObTxCtxTableRecoverHelper::recover_one_tx_ctx_(transaction::ObLSTxCtxMgr* ls
ctx_info.tx_id_,
ctx_info.ls_id_,
ctx_info.cluster_id_, /* cluster_id */
GET_MIN_CLUSTER_VERSION(),
ctx_info.cluster_version_,
0, /*session_id*/
scheduler,
INT64_MAX,

View File

@ -840,7 +840,6 @@ int ObTxTable::check_row_locked(ObReadTxDataArg &read_tx_data_arg,
{
CheckRowLockedFunctor fn(read_tx_id, read_tx_data_arg.tx_id_, sql_sequence, lock_state);
int ret = check_with_tx_data(read_tx_data_arg, fn);
// TODO(handora.qc): remove it
LOG_DEBUG("finish check row locked", K(read_tx_data_arg), K(read_tx_id), K(sql_sequence), K(lock_state));
return ret;
}
@ -851,7 +850,6 @@ int ObTxTable::check_sql_sequence_can_read(ObReadTxDataArg &read_tx_data_arg,
{
CheckSqlSequenceCanReadFunctor fn(sql_sequence, can_read);
int ret = check_with_tx_data(read_tx_data_arg, fn);
// TODO(handora.qc): remove it
LOG_DEBUG("finish check sql sequence can read", K(read_tx_data_arg), K(sql_sequence), K(can_read));
return ret;
}
@ -863,7 +861,6 @@ int ObTxTable::get_tx_state_with_scn(ObReadTxDataArg &read_tx_data_arg,
{
GetTxStateWithSCNFunctor fn(scn, state, trans_version);
int ret = check_with_tx_data(read_tx_data_arg, fn);
// TODO(handora.qc): remove it
LOG_DEBUG("finish get tx state with scn", K(read_tx_data_arg), K(scn), K(state), K(trans_version));
return ret;
}
@ -894,15 +891,17 @@ int ObTxTable::lock_for_read(ObReadTxDataArg &read_tx_data_arg,
const transaction::ObLockForReadArg &lock_for_read_arg,
bool &can_read,
SCN &trans_version,
bool &is_determined_state,
ObCleanoutOp &cleanout_op,
ObReCheckOp &recheck_op)
{
LockForReadFunctor fn(
lock_for_read_arg, can_read, trans_version, is_determined_state, ls_id_, cleanout_op, recheck_op);
LockForReadFunctor fn(lock_for_read_arg,
can_read,
trans_version,
ls_id_,
cleanout_op,
recheck_op);
int ret = check_with_tx_data(read_tx_data_arg, fn);
// TODO(handora.qc): remove it
LOG_DEBUG("finish lock for read", K(lock_for_read_arg), K(can_read), K(trans_version), K(is_determined_state));
LOG_DEBUG("finish lock for read", K(lock_for_read_arg), K(can_read), K(trans_version));
return ret;
}
@ -983,7 +982,7 @@ int ObTxTable::cleanout_tx_node(ObReadTxDataArg &read_tx_data_arg,
const bool need_row_latch)
{
ObCleanoutTxNodeOperation op(value, tnode, need_row_latch);
CleanoutTxStateFunctor fn(op);
CleanoutTxStateFunctor fn(tnode.seq_no_, op);
int ret = check_with_tx_data(read_tx_data_arg, fn);
if (OB_TRANS_CTX_NOT_EXIST == ret) {
if (tnode.is_committed() || tnode.is_aborted()) {
@ -991,6 +990,12 @@ int ObTxTable::cleanout_tx_node(ObReadTxDataArg &read_tx_data_arg,
ret = OB_SUCCESS;
}
}
if (OB_SUCC(ret)) {
if (op.need_cleanout()) {
op(fn.get_tx_data_check_data());
}
}
return ret;
}

View File

@ -127,6 +127,14 @@ public:
// =============== Interface for sstable to get txn information =====================
/**
* @brief do some checking with tx data user has to implement the check functor derived from ObITxDataCheckFunctor
*
* @param[in] tx_id tx_id, the tx id of the transaction to be checked
* @param[in] fn the functor implemented by user
* @param[in] read_epoch to make sure the version of tx data is what the callers want to be
*/
int check_with_tx_data(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn);
/**
* @brief check whether the row key is locked by tx id
@ -182,18 +190,17 @@ public:
/**
* @brief the txn READ_TRANS_ID use SNAPSHOT_VERSION to read the data, and check whether the data is locked, readable or unreadable by txn DATA_TRANS_ID. READ_LATEST is used to check whether read the data belong to the same txn
*
* @param[in] read_tx_data_arg
* @param[in] lock_for_read_arg
* @param[in] read_epoch
* @param[out] can_read
* @param[out] trans_version
* @param[out] is_determined_state
* @param[in] op
* @param[in] cleanout_op
* @param[in] recheck_op
*/
int lock_for_read(ObReadTxDataArg &read_tx_data_arg,
const transaction::ObLockForReadArg &lock_for_read_arg,
bool &can_read,
share::SCN &trans_version,
bool &is_determined_state,
ObCleanoutOp &cleanout_op,
ObReCheckOp &recheck_op);
@ -274,6 +281,7 @@ public: // getter & setter
int get_tx_table_guard(ObTxTableGuard &guard);
int64_t get_epoch() const { return ATOMIC_LOAD(&epoch_); }
TxTableState get_state() const { return ATOMIC_LOAD(&state_); }
share::ObLSID get_ls_id() const { return ls_id_; }
static int64_t get_filter_col_idx();
@ -300,14 +308,6 @@ private:
int offline_tx_ctx_table_();
int offline_tx_data_table_();
/**
* @brief do some checking with tx data user has to implement the check functor derived from ObITxDataCheckFunctor
*
* @param[in] tx_id tx_id, the tx id of the transaction to be checked
* @param[in] fn the functor implemented by user
* @param[in] read_epoch to make sure the version of tx data is what the callers want to be
*/
int check_with_tx_data(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn);
int check_tx_data_in_mini_cache_(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn);
int check_tx_data_in_kv_cache_(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn);
int check_tx_data_in_tables_(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn);

Some files were not shown because too many files have changed in this diff Show More