use mds checkpoint scn to indicate tablet initial state

This commit is contained in:
hiddenbomb
2023-08-08 11:12:30 +00:00
committed by ob-robot
parent 2e28ad4ffa
commit 9fd9e82342
7 changed files with 59 additions and 99 deletions

View File

@ -1489,7 +1489,12 @@ int ObLSTabletService::build_new_tablet_from_mds_table(
if (OB_FAIL(ret)) {
} else if (CLICK_FAIL(old_tablet->read_mds_table(arena_allocator, mds_data, true))) {
LOG_WARN("failed to read mds table", K(ret));
if (OB_EMPTY_RESULT == ret) {
ret = OB_SUCCESS;
LOG_INFO("read nothing from mds table, no need to build new tablet", K(ret), K(key));
} else {
LOG_WARN("failed to read mds table", K(ret), K(key));
}
} else if (CLICK_FAIL(tmp_tablet->init(allocator, *old_tablet, flush_scn, mds_data, old_tablet->mds_data_))) {
LOG_WARN("failed to init tablet", K(ret), KPC(old_tablet), K(flush_scn));
} else if (CLICK_FAIL(ObTabletPersister::persist_and_transform_tablet(*tmp_tablet, new_tablet_handle))) {
@ -1505,7 +1510,7 @@ int ObLSTabletService::build_new_tablet_from_mds_table(
} else {
CLICK();
time_guard.click("CASwap");
LOG_INFO("succeeded to build new tablet", K(ret), K(disk_addr), K(new_tablet_handle), K(mds_data));
LOG_INFO("succeeded to build new tablet", K(ret), K(key), K(disk_addr), K(new_tablet_handle), K(mds_data));
}
}
}

View File

@ -37,7 +37,8 @@ inline int ObITabletMdsInterface::get_tablet_status(const share::SCN &snapshot,
}, snapshot, 0, timeout))) {
MDS_LOG_GET(WARN, "tablet_status does not exist on neither mds_table nor tablet", K(lbt()));
} else if (!data.is_valid()) {
MDS_LOG_GET(WARN, "get invalid ObTabletCreateDeleteMdsUserData", K(lbt()));
ret = OB_ERR_UNEXPECTED;
MDS_LOG_GET(WARN, "invalid user data", K(lbt()));
}
return ret;
#undef PRINT_WRAPPER
@ -57,7 +58,8 @@ inline int ObITabletMdsInterface::get_latest_tablet_status(ObTabletCreateDeleteM
}, is_committed, 0))) {
MDS_LOG_GET(WARN, "fail to get_latest_tablet_status");
} else if (!data.is_valid()) {
MDS_LOG_GET(WARN, "get invalid ObTabletCreateDeleteMdsUserData", K(lbt()));
ret = OB_ERR_UNEXPECTED;
MDS_LOG_GET(WARN, "invalid user data", K(lbt()));
}
return ret;
#undef PRINT_WRAPPER

View File

@ -4577,17 +4577,24 @@ int ObTablet::check_and_set_initial_state()
int ret = OB_SUCCESS;
const ObLSID &ls_id = tablet_meta_.ls_id_;
const ObTabletID &tablet_id = tablet_meta_.tablet_id_;
bool initial_state = true;
if (is_empty_shell()) {
initial_state = false;
} else if (OB_FAIL(check_initial_state(initial_state))) {
LOG_WARN("failed to check initial state", K(ret));
}
if (IS_NOT_INIT) {
ret = OB_NOT_INIT;
LOG_WARN("not inited", K(ret), K_(is_inited));
} else {
// for normal tablet(except ls inner tablet), if mds_checkpoint_scn equals initial SCN(value is 1),
// it means all kinds of mds data(including tablet status) has never been dumped to disk,
// then we think that this tablet is in initial state
bool initial_state = true;
if (is_ls_inner_tablet()) {
initial_state = false;
} else {
initial_state = (tablet_meta_.mds_checkpoint_scn_ == ObTabletMeta::INIT_CLOG_CHECKPOINT_SCN);
}
if (OB_FAIL(ret)) {
} else if (!initial_state) {
if (OB_FAIL(set_initial_state(false/*initial_state*/))) {
if (initial_state) {
// do nothing
} else if (OB_FAIL(set_initial_state(false/*initial_state*/))) {
LOG_WARN("failed to set initial state", K(ret));
} else {
LOG_DEBUG("set initial state to false", K(ret), K(ls_id), K(tablet_id));
@ -4813,7 +4820,15 @@ int ObTablet::build_transfer_tablet_param(
if (OB_FAIL(mig_tablet_param.transfer_info_.init(tablet_meta_.ls_id_, user_data.transfer_scn_, transfer_seq))) {
LOG_WARN("failed to init transfer info", K(ret), K(tablet_meta_), K(user_data));
} else if (OB_FAIL(read_mds_table(mig_tablet_param.allocator_, mds_table_data, false))) {
LOG_WARN("failed to read mds table", K(ret));
if (OB_EMPTY_RESULT == ret) {
// do nothing
ret = OB_SUCCESS;
} else {
LOG_WARN("failed to read mds table", K(ret));
}
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(get_finish_medium_scn(finish_medium_scn))) {
LOG_WARN("failed to get finish medium scn", K(ret));
} else if (OB_FAIL(new_mds_data.init(mig_tablet_param.allocator_, mds_table_data, mds_data_, finish_medium_scn))) {
@ -5081,6 +5096,8 @@ int ObTablet::read_mds_table(common::ObIAllocator &allocator, ObTabletMdsData &m
{
TIMEGUARD_INIT(STORAGE, 10_ms, 5_s);
int ret = OB_SUCCESS;
const share::ObLSID &ls_id = tablet_meta_.ls_id_;
const common::ObTabletID &tablet_id = tablet_meta_.tablet_id_;
mds_data.reset();
mds::MdsTableHandle mds_table_handle;
@ -5091,10 +5108,8 @@ int ObTablet::read_mds_table(common::ObIAllocator &allocator, ObTabletMdsData &m
LOG_WARN("failed to init mds data", K(ret));
} else if (CLICK_FAIL(inner_get_mds_table(mds_table_handle))) {
if (OB_ENTRY_NOT_EXIST == ret) {
ret = OB_SUCCESS;
LOG_INFO("mds table does not exist, may be released", K(ret),
"ls_id", tablet_meta_.ls_id_,
"tablet_id", tablet_meta_.tablet_id_);
ret = OB_EMPTY_RESULT;
LOG_INFO("mds table does not exist, may be released", K(ret), K(ls_id), K(tablet_id));
} else {
LOG_WARN("failed to get mds table", K(ret));
}
@ -5102,6 +5117,9 @@ int ObTablet::read_mds_table(common::ObIAllocator &allocator, ObTabletMdsData &m
ObTabletDumpMdsNodeOperator op(mds_data, allocator);
if (CLICK_FAIL(mds_table_handle.for_each_unit_from_small_key_to_big_from_old_node_to_new_to_dump(op, for_flush))) {
LOG_WARN("failed to traverse mds table", K(ret));
} else if (!op.dumped()) {
ret = OB_EMPTY_RESULT;
LOG_INFO("read nothing from mds table", K(ret), K(ls_id), K(tablet_id));
}
}
@ -5480,76 +5498,6 @@ int ObTablet::set_initial_state(const bool initial_state)
return ret;
}
int ObTablet::check_initial_state(bool &initial_state)
{
int ret = OB_SUCCESS;
initial_state = true;
const ObTabletComplexAddr<mds::MdsDumpKV> &uncommitted_tablet_status_addr = mds_data_.tablet_status_.uncommitted_kv_;
const ObTabletComplexAddr<mds::MdsDumpKV> &committed_tablet_status_addr = mds_data_.tablet_status_.committed_kv_;
ObTabletCreateDeleteMdsUserData uncommitted_data;
ObTabletCreateDeleteMdsUserData committed_data;
const mds::MdsDumpKV *uncommitted_kv = nullptr;
const mds::MdsDumpKV *committed_kv = nullptr;
ObArenaAllocator arena_allocator("mds_reader");
if (OB_UNLIKELY(!(uncommitted_tablet_status_addr.is_memory_object() || uncommitted_tablet_status_addr.is_disk_object())
|| !(committed_tablet_status_addr.is_memory_object() || committed_tablet_status_addr.is_disk_object()))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("tablet status addr is not mem or disk type", K(ret), K(uncommitted_tablet_status_addr), K(committed_tablet_status_addr));
}
// TODO(@bowen.gbw): optimize, check initial state without IO
// TODO(@chenqingxiang.cqx): support using none addr to check initial state.
if (OB_FAIL(ret)) {
} else if (OB_FAIL(ObTabletMdsData::load_mds_dump_kv(arena_allocator, uncommitted_tablet_status_addr, uncommitted_kv))) {
LOG_WARN("failed to load mds dump kv", K(ret), K(uncommitted_tablet_status_addr));
} else if (OB_ISNULL(uncommitted_kv)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, mds dump kv is null", K(ret));
} else {
const common::ObString &user_data = uncommitted_kv->v_.user_data_;
int64_t pos = 0;
if (user_data.empty()) {
// uncommitted tablet status is empty
} else if (OB_FAIL(uncommitted_data.deserialize(user_data.ptr(), user_data.length(), pos))) {
LOG_WARN("failed to deserialize", K(user_data),
"user_data_length", user_data.length(),
"user_hash:%x", user_data.hash(),
"crc_check_number", uncommitted_kv->v_.crc_check_number_);
} else if (ObTabletStatus::MAX != uncommitted_data.tablet_status_) {
initial_state = false;
}
}
if (OB_FAIL(ret)) {
} else if (!initial_state) {
// no need to check committed tablet status
} else if (OB_FAIL(ObTabletMdsData::load_mds_dump_kv(arena_allocator, committed_tablet_status_addr, committed_kv))) {
LOG_WARN("failed to load mds dump kv", K(ret), K(committed_tablet_status_addr));
} else if (OB_ISNULL(committed_kv)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, mds dump kv is null", K(ret));
} else {
const common::ObString &user_data = committed_kv->v_.user_data_;
int64_t pos = 0;
if (user_data.empty()) {
// committed tablet status is empty
} else if (OB_FAIL(committed_data.deserialize(user_data.ptr(), user_data.length(), pos))) {
LOG_WARN("failed to deserialize", K(user_data),
"user_data_length", user_data.length(),
"user_hash:%x", user_data.hash(),
"crc_check_number", committed_kv->v_.crc_check_number_);
} else if (ObTabletStatus::MAX != committed_data.tablet_status_) {
initial_state = false;
}
}
ObTabletMdsData::free_mds_dump_kv(arena_allocator, uncommitted_kv);
ObTabletMdsData::free_mds_dump_kv(arena_allocator, committed_kv);
return ret;
}
int ObTablet::load_medium_info_list(
common::ObArenaAllocator &allocator,
const ObTabletComplexAddr<oceanbase::storage::ObTabletDumpedMediumInfo> &complex_addr,
@ -5586,7 +5534,15 @@ int ObTablet::get_fused_medium_info_list(
ret = OB_NOT_INIT;
LOG_WARN("not inited", K(ret), K_(is_inited));
} else if (OB_FAIL(read_mds_table(allocator, mds_table_data, false))) {
LOG_WARN("failed to read mds table", K(ret));
if (OB_EMPTY_RESULT == ret) {
// do nothing
ret = OB_SUCCESS;
} else {
LOG_WARN("failed to read mds table", K(ret));
}
}
if (OB_FAIL(ret)) {
} else if (OB_FAIL(get_finish_medium_scn(finish_medium_scn))) {
LOG_WARN("failed to get finish medium scn", K(ret));
} else if (OB_FAIL(new_mds_data.init(allocator, mds_table_data, mds_data_, finish_medium_scn))) {

View File

@ -424,13 +424,6 @@ public:
int64_t &data_size,
int64_t &required_size,
const bool need_checksums = true);
int set_tx_data(
const ObTabletTxMultiSourceDataUnit &tx_data,
const share::SCN &memtable_log_scn,
const bool for_replay,
const memtable::MemtableRefOp ref_op = memtable::MemtableRefOp::NONE,
const bool is_callback = false);
int check_and_set_initial_state();
int set_memtable_clog_checkpoint_scn(const ObMigrationTabletParam *tablet_meta);
int read_mds_table(common::ObIAllocator &allocator, ObTabletMdsData &mds_data, const bool for_flush);
@ -641,7 +634,6 @@ private:
const ObTaletExtraMediumInfo &extra_info,
compaction::ObMediumCompactionInfoList &medium_info_list);
int set_initial_state(const bool initial_state);
int check_initial_state(bool &initial_state);
int load_deserialize_v1(
common::ObArenaAllocator &allocator,

View File

@ -27,7 +27,7 @@ namespace storage
READ_ALL_COMMITED:
Read committed row after transaction committed, except empty shell.
Return tablet in NORMAL, TRANSFER_IN, TRANSFER_OUT, DELETED, TRANSFER_OUT_DELETED status.
Return tablet which has finished at least one MDS transaction(not including tablet in NORMAL not committed, TRANSFER_IN not committed status).
Not return CREATING and DELETING who was abandoned from 4.2.
In addition, you should NOT pass read timeout under this mode.

View File

@ -30,7 +30,8 @@ namespace storage
{
ObTabletDumpMdsNodeOperator::ObTabletDumpMdsNodeOperator(ObTabletMdsData &mds_data, common::ObIAllocator &allocator)
: mds_data_(mds_data),
allocator_(allocator)
allocator_(allocator),
dumped_(false)
{
}
@ -197,6 +198,8 @@ int ObTabletDumpMdsNodeOperator::operator()(const mds::MdsDumpKV &kv)
} else if (OB_UNLIKELY(!dumped)) {
ret = OB_ERR_UNEXPECTED;
LOG_ERROR("unexpected kv type, not dumped", K(ret), K(kv), K(dumped));
} else {
dumped_ = true;
}
return ret;

View File

@ -36,12 +36,14 @@ public:
ObTabletDumpMdsNodeOperator(ObTabletMdsData &mds_data, common::ObIAllocator &allocator);
public:
int operator()(const mds::MdsDumpKV &kv);
bool dumped() const { return dumped_; }
private:
template <typename K, typename T>
int dump(const mds::MdsDumpKV &kv, bool &dumped);
private:
ObTabletMdsData &mds_data_;
common::ObIAllocator &allocator_;
bool dumped_;
};
class ObTabletMediumInfoNodeOperator