From b29a7dd782c00a04b82ddf4ac73716751e9c8d2e Mon Sep 17 00:00:00 2001 From: Tsunaou <895254752@qq.com> Date: Tue, 18 Jun 2024 02:58:53 +0000 Subject: [PATCH] [FEAT MERGE] Set different table mode and compaction strategies to optimize buffer table performance. Co-authored-by: ZenoWang --- mittest/simple_server/CMakeLists.txt | 1 + .../test_keep_alive_min_start_scn.cpp | 18 +- .../simple_server/test_ob_queuing_table.cpp | 257 ++++++++++ .../simple_server/test_replay_from_middle.cpp | 65 +-- .../test_transfer_with_commit_action.cpp | 2 +- .../test_transfer_with_smaller_tx_data.cpp | 13 +- src/rootserver/ob_ddl_service.cpp | 20 +- .../parallel_ddl/ob_create_table_helper.cpp | 7 + .../compaction/ob_compaction_time_guard.cpp | 1 + .../compaction/ob_compaction_time_guard.h | 1 + src/share/schema/ob_schema_printer.cpp | 4 +- src/share/schema/ob_table_schema.cpp | 21 + src/share/schema/ob_table_schema.h | 90 +++- src/share/schema/ob_table_sql_service.cpp | 9 + src/share/stat/ob_opt_stat_service.cpp | 4 +- src/sql/resolver/ddl/ob_ddl_resolver.cpp | 30 +- src/storage/CMakeLists.txt | 1 + .../blocksstable/ob_macro_block_writer.cpp | 16 +- .../blocksstable/ob_micro_block_header.h | 2 +- .../ob_shared_macro_block_manager.cpp | 5 +- src/storage/blocksstable/ob_sstable.cpp | 79 ++- src/storage/blocksstable/ob_sstable.h | 7 +- src/storage/blocksstable/ob_sstable_meta.cpp | 13 +- src/storage/blocksstable/ob_sstable_meta.h | 2 +- .../compaction/ob_basic_tablet_merge_ctx.cpp | 35 ++ .../compaction/ob_basic_tablet_merge_ctx.h | 1 + .../compaction/ob_medium_compaction_func.cpp | 50 +- .../compaction/ob_medium_compaction_func.h | 10 +- .../compaction/ob_partition_merge_policy.cpp | 167 +++++-- .../compaction/ob_partition_merge_policy.h | 26 +- .../compaction/ob_tablet_merge_checker.cpp | 4 +- .../compaction/ob_tablet_merge_ctx.cpp | 106 +++-- src/storage/compaction/ob_tablet_merge_ctx.h | 2 +- .../compaction/ob_tablet_merge_info.cpp | 2 + .../compaction/ob_tablet_merge_task.cpp | 2 +- .../compaction/ob_tenant_tablet_scheduler.cpp | 450 +++++++++++++----- .../compaction/ob_tenant_tablet_scheduler.h | 55 ++- src/storage/ls/ob_ls.cpp | 103 +--- src/storage/ls/ob_ls.h | 8 +- src/storage/ls/ob_ls_transfer_status.cpp | 21 +- src/storage/ob_gc_upper_trans_helper.cpp | 121 +++++ src/storage/ob_gc_upper_trans_helper.h | 43 ++ src/storage/ob_storage_struct.cpp | 51 +- src/storage/ob_storage_struct.h | 21 +- src/storage/ob_tenant_tablet_stat_mgr.cpp | 262 +++++++++- src/storage/ob_tenant_tablet_stat_mgr.h | 63 ++- src/storage/tablet/ob_tablet.cpp | 86 ---- src/storage/tablet/ob_tablet.h | 2 - src/storage/tablet/ob_tablet_table_store.cpp | 50 +- src/storage/tablet/ob_tablet_table_store.h | 4 +- src/storage/tx/ob_tx_loop_worker.cpp | 2 + src/storage/tx_table/ob_tx_data_table.cpp | 116 ++--- src/storage/tx_table/ob_tx_data_table.h | 47 +- src/storage/tx_table/ob_tx_table.cpp | 83 +++- src/storage/tx_table/ob_tx_table.h | 58 ++- .../storage/test_tenant_tablet_stat_mgr.cpp | 64 ++- 56 files changed, 2108 insertions(+), 675 deletions(-) create mode 100644 mittest/simple_server/test_ob_queuing_table.cpp create mode 100644 src/storage/ob_gc_upper_trans_helper.cpp create mode 100644 src/storage/ob_gc_upper_trans_helper.h diff --git a/mittest/simple_server/CMakeLists.txt b/mittest/simple_server/CMakeLists.txt index df975a8c68..396af4afcb 100644 --- a/mittest/simple_server/CMakeLists.txt +++ b/mittest/simple_server/CMakeLists.txt @@ -71,6 +71,7 @@ ob_unittest_observer(test_balance_operator test_tenant_balance_operator.cpp) ob_unittest_observer(test_transfer_partition_task test_transfer_partition_task.cpp) ob_unittest_observer(test_mds_table_checkpoint test_mds_table_checkpoint.cpp) ob_unittest_observer(test_ob_black_list_service test_ob_black_list_service.cpp) +ob_unittest_observer(test_ob_queuing_table test_ob_queuing_table.cpp) ob_unittest_observer(test_ob_table_lock_service test_ob_table_lock_service.cpp) ob_unittest_observer(test_ob_obj_lock_garbage_collector test_ob_obj_lock_garbage_collector.cpp) ob_unittest_observer(test_observer_expand_shrink test_observer_expand_shrink.cpp) diff --git a/mittest/simple_server/test_keep_alive_min_start_scn.cpp b/mittest/simple_server/test_keep_alive_min_start_scn.cpp index a8d95186d4..a24b9c089c 100644 --- a/mittest/simple_server/test_keep_alive_min_start_scn.cpp +++ b/mittest/simple_server/test_keep_alive_min_start_scn.cpp @@ -26,7 +26,7 @@ namespace oceanbase namespace storage { -int64_t ObTxDataTable::UPDATE_CALC_UPPER_INFO_INTERVAL = 0; +int64_t ObTxTable::UPDATE_MIN_START_SCN_INTERVAL = 0; } namespace unittest @@ -90,26 +90,26 @@ void ObTestKeepAliveMinStartSCN::loop_check_start_scn(SCN &prev_min_start_scn, S MTL_SWITCH(RunCtx.tenant_id_) { ObLS *ls = get_ls(RunCtx.tenant_id_, ObLSID(1001)); - ObTxDataTable *tx_data_table = ls->get_tx_table()->get_tx_data_table(); + ObTxTable *tx_table = ls->get_tx_table(); // 每100毫秒循环一次,对应tx loop worker的单次循环interval,循环200次,对应20秒 // 因为tx loop worker会15秒遍历一次上下文,略大于遍历间隔 int retry_times = 200; while (--retry_times >= 0) { // 每次循环都更新tx data table中的min_start_scn - tx_data_table->update_calc_upper_info_(SCN::max_scn()); + tx_table->update_min_start_scn_info(SCN::max_scn()); // 判断min_start_scn的大小关系,若出错,打印到stdout - if (prev_min_start_scn > tx_data_table->calc_upper_info_.min_start_scn_in_ctx_) { + if (prev_min_start_scn > tx_table->ctx_min_start_scn_info_.min_start_scn_in_ctx_) { fprintf(stdout, "Incorrect min_start_scn in tx data table, prev_min_start_scn = %s, current_min_start_scn = %s\n", to_cstring(prev_min_start_scn), - to_cstring(tx_data_table->calc_upper_info_.min_start_scn_in_ctx_)); + to_cstring(tx_table->ctx_min_start_scn_info_.min_start_scn_in_ctx_)); } - ASSERT_LE(prev_min_start_scn, tx_data_table->calc_upper_info_.min_start_scn_in_ctx_); - prev_min_start_scn = tx_data_table->calc_upper_info_.min_start_scn_in_ctx_; - ASSERT_LE(prev_keep_alive_scn, tx_data_table->calc_upper_info_.keep_alive_scn_); - prev_keep_alive_scn = tx_data_table->calc_upper_info_.keep_alive_scn_; + ASSERT_LE(prev_min_start_scn, tx_table->ctx_min_start_scn_info_.min_start_scn_in_ctx_); + prev_min_start_scn = tx_table->ctx_min_start_scn_info_.min_start_scn_in_ctx_; + ASSERT_LE(prev_keep_alive_scn, tx_table->ctx_min_start_scn_info_.keep_alive_scn_); + prev_keep_alive_scn = tx_table->ctx_min_start_scn_info_.keep_alive_scn_; ::usleep(ObTxLoopWorker::LOOP_INTERVAL); } diff --git a/mittest/simple_server/test_ob_queuing_table.cpp b/mittest/simple_server/test_ob_queuing_table.cpp new file mode 100644 index 0000000000..b5e9a438d1 --- /dev/null +++ b/mittest/simple_server/test_ob_queuing_table.cpp @@ -0,0 +1,257 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#define USING_LOG_PREFIX STORAGE +#define protected public +#define private public + +#include "env/ob_simple_cluster_test_base.h" +#include "storage/ob_tenant_tablet_stat_mgr.h" + +namespace oceanbase +{ +namespace unittest +{ +class ObQueuingTableTest : public ObSimpleClusterTestBase +{ +public: + struct ReportStat { + ReportStat() = default; + ReportStat( + const char* tname, + const ObTableModeFlag mode, + const int64_t ls_id, + const uint64_t tablet_id, + const uint32_t query_cnt, + const uint64_t scan_physical_row_cnt) + : tname_(tname), + mode_(mode) + { + stat_.ls_id_ = ls_id; + stat_.tablet_id_ = tablet_id; + stat_.query_cnt_ = query_cnt; + stat_.scan_physical_row_cnt_ = scan_physical_row_cnt; + } + TO_STRING_KV(K_(tname), K_(mode), K_(stat)); +public: + const char* tname_; + ObTableModeFlag mode_; + ObTabletStat stat_; + }; +public: + ObQueuingTableTest() : ObSimpleClusterTestBase("test_ob_queuing_table") {} + void create_table_and_fetch_infomations( + int &ret, + const char *tname, + const ObTableModeFlag mode, + int64_t &ls_id, + uint64_t &tablet_id); + void wait_refresh(); + void check_report_stats(ObIArray &report_stats); + void mock_one_table( + ObIArray &report_stats, + const char* tname, + uint32_t &base_query_cnt, + uint64_t &base_scan_physical_row_cnt, + const ObTableModeFlag mode); + void alter_table_mode(const char*tname, const ObTableModeFlag new_mode); +}; + +void ObQueuingTableTest::create_table_and_fetch_infomations( + int &ret, + const char *tname, + const ObTableModeFlag mode, + int64_t &ls_id, + uint64_t &tablet_id) +{ + ret = OB_SUCCESS; + // 1. Create table + { + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy(); + LOG_INFO("start create table", K(tname)); + ObSqlString sql; + int64_t affected_rows = 0; + ASSERT_EQ(OB_SUCCESS, sql.assign_fmt("create table %s (k int primary key, v int) table_mode='%s'", tname, table_mode_flag_to_str(mode))); + ASSERT_EQ(OB_SUCCESS, sql_proxy.write(sql.ptr(), affected_rows)); + LOG_INFO("finish create table", K(tname)); + } + + static bool need_init = true; + if (need_init) { + need_init = false; + ASSERT_EQ(OB_SUCCESS, get_curr_simple_server().init_sql_proxy2("sys", "oceanbase")); + } + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy2(); + { + LOG_INFO("start query table_id", K(tname)); + ObSqlString sql; + ASSERT_EQ(OB_SUCCESS, sql.assign_fmt("select tablet_id from __all_virtual_table where table_name='%s'", tname)); + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + ASSERT_EQ(OB_SUCCESS, sql_proxy.read(res, sql.ptr())); + sqlclient::ObMySQLResult *result = res.get_result(); + ASSERT_NE(nullptr, result); + ASSERT_EQ(OB_SUCCESS, result->next()); + ASSERT_EQ(OB_SUCCESS, result->get_uint("tablet_id", tablet_id)); + ASSERT_NE(0, tablet_id); + } + LOG_INFO("finish query table_id", K(tname), K(tablet_id)); + } + { + LOG_INFO("start query ls_id", K(tname)); + ObSqlString sql; + ASSERT_EQ(OB_SUCCESS, sql.assign_fmt("select ls_id from __all_virtual_tablet_to_ls where tablet_id=%ld", tablet_id)); + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + ASSERT_EQ(OB_SUCCESS, sql_proxy.read(res, sql.ptr())); + sqlclient::ObMySQLResult *result = res.get_result(); + ASSERT_NE(nullptr, result); + ASSERT_EQ(OB_SUCCESS, result->next()); + ASSERT_EQ(OB_SUCCESS, result->get_int("ls_id", ls_id)); + } + LOG_INFO("finish query ls_id", K(tname), K(ls_id)); + } + LOG_INFO("Success to create table", K(tname), "mode", table_mode_flag_to_str(mode), K(ls_id), K(tablet_id)); +} + + +void ObQueuingTableTest::wait_refresh() +{ + ObTenantTabletStatMgr *stat_mgr = MTL(ObTenantTabletStatMgr *); + ASSERT_NE(nullptr, stat_mgr); + + const int64_t current_time = ObTimeUtility::current_time(); + const int64_t last_update_time = stat_mgr->get_last_update_time(); + const int64_t TIMEOUT_INTERVAL = 5 * ObTenantTabletStatMgr::CHECK_INTERVAL; + LOG_INFO("Wait until ObTenantTabletStatMgr refresh", K(current_time), K(last_update_time)); + + while (last_update_time == stat_mgr->get_last_update_time()) { + LOG_INFO("sleep for a while"); + usleep(ObTenantTabletStatMgr::CHECK_INTERVAL / 2); + if ((ObTimeUtility::current_time() - current_time) > TIMEOUT_INTERVAL) { + ASSERT_TRUE(false) << "Waiting stat mgr update timeout"; + } + } + ASSERT_GT(stat_mgr->get_last_update_time(), last_update_time); + LOG_INFO("Finsih waiting ObTenantTabletStatMgr refresh"); +} + +void ObQueuingTableTest::check_report_stats(ObIArray &report_stats) +{ + ObTenantTabletStatMgr *stat_mgr = MTL(ObTenantTabletStatMgr *); + ASSERT_NE(nullptr, stat_mgr); + FOREACH_CNT(it, report_stats) { + ObTabletStat res; + ObLSID ls_id(it->stat_.ls_id_); + ObTabletID tablet_id(it->stat_.tablet_id_); + ObTabletStat stat; + ObTableModeFlag mode = ObTableModeFlag::TABLE_MODE_MAX; + ASSERT_EQ(OB_SUCCESS, stat_mgr->get_latest_tablet_stat(ls_id, tablet_id, res, stat, mode)); + ASSERT_EQ(mode, it->mode_); + ObTableQueuingModeCfg cfg; + ASSERT_EQ(OB_SUCCESS, stat_mgr->get_queuing_cfg(ls_id, tablet_id, cfg)); + ASSERT_EQ(cfg.mode_, it->mode_); + ASSERT_EQ(stat.query_cnt_, it->stat_.query_cnt_); + ASSERT_EQ(stat.scan_physical_row_cnt_, it->stat_.scan_physical_row_cnt_); + } +} + +void ObQueuingTableTest::mock_one_table( + ObIArray &report_stats, + const char* tname, + uint32_t &base_query_cnt, + uint64_t &base_scan_physical_row_cnt, + const ObTableModeFlag mode) +{ + int ret = OB_SUCCESS; + bool succ_report = false; + int64_t cur_ls_id = OB_INVALID_ID; + uint64_t cur_tablet_id = OB_INVALID_ID; + ObTenantTabletStatMgr *stat_mgr = MTL(ObTenantTabletStatMgr *); + + ASSERT_NE(nullptr, stat_mgr); + create_table_and_fetch_infomations(ret, tname, mode, cur_ls_id, cur_tablet_id); + ASSERT_EQ(OB_SUCCESS, ret); + ReportStat rep_stat(tname, mode, cur_ls_id, cur_tablet_id, base_query_cnt++, base_scan_physical_row_cnt++); + ASSERT_EQ(OB_SUCCESS, report_stats.push_back(rep_stat)); + ASSERT_EQ(OB_SUCCESS, stat_mgr->report_stat(rep_stat.stat_, succ_report)); + ASSERT_TRUE(succ_report); +} + +void ObQueuingTableTest::alter_table_mode(const char*tname, const ObTableModeFlag new_mode) +{ + int ret = OB_SUCCESS; + common::ObMySQLProxy &sql_proxy = get_curr_simple_server().get_sql_proxy(); + LOG_INFO("start alter table mode", K(tname), "new_mode", table_mode_flag_to_str(new_mode)); + ObSqlString sql; + int64_t affected_rows = 0; + ASSERT_EQ(OB_SUCCESS, sql.assign_fmt("alter table %s set table_mode='%s'", tname, table_mode_flag_to_str(new_mode))); + ASSERT_EQ(OB_SUCCESS, sql_proxy.write(sql.ptr(), affected_rows)); + LOG_INFO("finish alter table mode", K(tname), "new_mode", table_mode_flag_to_str(new_mode)); +} + +TEST_F(ObQueuingTableTest, refresh_queuing_mode) +{ + LOG_INFO("ObQueuingTableTest::refresh_queuing_mode"); + int ret = OB_SUCCESS; + share::ObTenantSwitchGuard tenant_guard; + ret = tenant_guard.switch_to(OB_SYS_TENANT_ID); + ASSERT_EQ(OB_SUCCESS, ret); + ObTenantTabletStatMgr *stat_mgr = MTL(ObTenantTabletStatMgr *); + ASSERT_NE(nullptr, stat_mgr); + + uint32_t base_query_cnt = 100000; + uint64_t base_scan_physical_row_cnt = 10000; + ObSEArray report_stats; + + mock_one_table(report_stats, "qt0", base_query_cnt, base_scan_physical_row_cnt, ObTableModeFlag::TABLE_MODE_NORMAL); + mock_one_table(report_stats, "qt1", base_query_cnt, base_scan_physical_row_cnt, ObTableModeFlag::TABLE_MODE_QUEUING); + mock_one_table(report_stats, "qt2", base_query_cnt, base_scan_physical_row_cnt, ObTableModeFlag::TABLE_MODE_QUEUING_MODERATE); + mock_one_table(report_stats, "qt3", base_query_cnt, base_scan_physical_row_cnt, ObTableModeFlag::TABLE_MODE_QUEUING_SUPER); + mock_one_table(report_stats, "qt4", base_query_cnt, base_scan_physical_row_cnt, ObTableModeFlag::TABLE_MODE_QUEUING_EXTREME); + wait_refresh(); + check_report_stats(report_stats); + + int64_t report_cnt = report_stats.count(); + ASSERT_GT(report_cnt, 0); + ObTableModeFlag base_mode = ObTableModeFlag::TABLE_MODE_MAX; + ObTableModeFlag new_mode = ObTableModeFlag::TABLE_MODE_MAX; + int64_t idx = 0; + bool succ_report = false; + for (int64_t round = 0; round < 3; round++) { + LOG_INFO("Checking alter table mode and report again", K(round)); + base_mode = report_stats.at(0).mode_; + for (idx = 0; idx < report_cnt; idx++) { + new_mode = idx == report_cnt - 1 ? base_mode : report_stats.at(idx+1).mode_; + alter_table_mode(report_stats.at(idx).tname_, new_mode); + ObTabletStat new_stat = report_stats.at(idx).stat_; + new_stat.query_cnt_ += idx; + new_stat.scan_physical_row_cnt_ += idx; + ASSERT_EQ(OB_SUCCESS, stat_mgr->report_stat(new_stat, succ_report)); + ASSERT_TRUE(succ_report); + report_stats.at(idx).stat_ += new_stat; + report_stats.at(idx).mode_ = new_mode; + } + wait_refresh(); + check_report_stats(report_stats); + } +} + +} // end unittest +} // end oceanbase + +int main(int argc, char **argv) +{ + oceanbase::unittest::init_log_and_gtest(argc, argv); + OB_LOGGER.set_log_level("INFO"); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/mittest/simple_server/test_replay_from_middle.cpp b/mittest/simple_server/test_replay_from_middle.cpp index 779a04f376..49488ff8fc 100644 --- a/mittest/simple_server/test_replay_from_middle.cpp +++ b/mittest/simple_server/test_replay_from_middle.cpp @@ -446,6 +446,7 @@ void ObReplayRestartTest::restart_test() { int ret = OB_SUCCESS; ObLS *ls = nullptr; + ObTxTable *tx_table = nullptr; ObTxDataTable *tx_data_table = nullptr; share::SCN max_decided_scn = share::SCN::min_scn(); share::SCN upper_trans_version = share::SCN::min_scn(); @@ -457,11 +458,12 @@ void ObReplayRestartTest::restart_test() ASSERT_EQ(OB_SUCCESS, ls->get_max_decided_scn(max_decided_scn)); ASSERT_EQ(false, max_decided_scn.is_min()); - tx_data_table = ls->get_tx_table()->get_tx_data_table(); + tx_table = ls->get_tx_table(); + tx_data_table = tx_table->get_tx_data_table(); { // 场景一: keep alive日志没有被回放,min_start_scn为初始值状态,跳过计算upper_trans_version - ASSERT_EQ(SCN::min_scn(), tx_data_table->calc_upper_info_.min_start_scn_in_ctx_); + ASSERT_EQ(SCN::min_scn(), tx_table->ctx_min_start_scn_info_.min_start_scn_in_ctx_); upper_trans_version.set_min(); FLOG_INFO("get upper trans version, situation 1:", K(SSTABLE_END_SCN)); ASSERT_EQ(OB_SUCCESS, @@ -473,8 +475,8 @@ void ObReplayRestartTest::restart_test() REPLAY_BARRIER = SCN::plus(KEEP_ALIVE_SCN, 1); int64_t retry_times = 100; while (--retry_times > 0) { - tx_data_table->update_calc_upper_info_(SCN::max_scn() /*max_decided_scn*/); - if ( !tx_data_table->calc_upper_info_.min_start_scn_in_ctx_.is_min()) { + tx_table->update_min_start_scn_info(SCN::max_scn() /*max_decided_scn*/); + if ( !tx_table->ctx_min_start_scn_info_.min_start_scn_in_ctx_.is_min()) { break; } else { ::sleep(1); @@ -502,8 +504,8 @@ void ObReplayRestartTest::restart_test() MinStartScnStatus status; while (--retry_times > 0) { ls->get_min_start_scn(min_start_scn, keep_alive_scn, status); - tx_data_table->update_calc_upper_info_(SCN::max_scn() /*max_decided_scn*/); - if (tx_data_table->calc_upper_info_.min_start_scn_in_ctx_ > SSTABLE_END_SCN) { + tx_table->update_min_start_scn_info(SCN::max_scn() /*max_decided_scn*/); + if (tx_table->ctx_min_start_scn_info_.min_start_scn_in_ctx_ > SSTABLE_END_SCN) { break; } else { ::sleep(1); @@ -514,21 +516,21 @@ void ObReplayRestartTest::restart_test() status); } } - ASSERT_GT(tx_data_table->calc_upper_info_.min_start_scn_in_ctx_, SSTABLE_END_SCN); + ASSERT_GT(tx_table->ctx_min_start_scn_info_.min_start_scn_in_ctx_, SSTABLE_END_SCN); retry_times = 60; while (--retry_times > 0) { ASSERT_EQ(OB_SUCCESS, ls->get_max_decided_scn(max_decided_scn)); - if (max_decided_scn > tx_data_table->calc_upper_info_.keep_alive_scn_) { + if (max_decided_scn > tx_table->ctx_min_start_scn_info_.keep_alive_scn_) { break; } else { ::sleep(1); fprintf(stdout, "waiting max decided scn, max_decided_scn = %lu keep_alive_scn = %lu\n", max_decided_scn.get_val_for_inner_table_field(), - tx_data_table->calc_upper_info_.keep_alive_scn_.get_val_for_inner_table_field()); + tx_table->ctx_min_start_scn_info_.keep_alive_scn_.get_val_for_inner_table_field()); } } - ASSERT_GT(max_decided_scn, tx_data_table->calc_upper_info_.keep_alive_scn_); + ASSERT_GT(max_decided_scn, tx_table->ctx_min_start_scn_info_.keep_alive_scn_); upper_trans_version.set_min(); FLOG_INFO("get upper trans version, situation 3:", K(SSTABLE_END_SCN)); @@ -537,7 +539,7 @@ void ObReplayRestartTest::restart_test() ::sleep(10); - STORAGE_LOG(INFO, "finish restart test", K(upper_trans_version), K(SSTABLE_END_SCN), K(tx_data_table->calc_upper_info_)); + STORAGE_LOG(INFO, "finish restart test", K(upper_trans_version), K(SSTABLE_END_SCN), K(tx_table->ctx_min_start_scn_info_)); ASSERT_LT(upper_trans_version, SCN::max_scn()); } } @@ -640,47 +642,6 @@ int main(int argc, char **argv) namespace oceanbase { namespace storage { -void ObTxDataTable::update_calc_upper_info_(const SCN &max_decided_scn) -{ - int64_t cur_ts = common::ObTimeUtility::fast_current_time(); - SpinWLockGuard lock_guard(calc_upper_info_.lock_); - // recheck update condition and do update calc_upper_info - - /**********************************************************/ - //if (cur_ts - calc_upper_info_.update_ts_ > 30_s && max_decided_scn> calc_upper_info_.keep_alive_scn_) { - /**********************************************************/ - - SCN min_start_scn = SCN::min_scn(); - SCN keep_alive_scn = SCN::min_scn(); - MinStartScnStatus status; - ls_->get_min_start_scn(min_start_scn, keep_alive_scn, status); - if (MinStartScnStatus::UNKOWN == status) { - // do nothing - } else { - int ret = OB_SUCCESS; - CalcUpperInfo tmp_calc_upper_info; - tmp_calc_upper_info.keep_alive_scn_ = keep_alive_scn; - tmp_calc_upper_info.update_ts_ = cur_ts; - if (MinStartScnStatus::NO_CTX == status) { - // use the previous keep_alive_scn as min_start_scn - tmp_calc_upper_info.min_start_scn_in_ctx_ = calc_upper_info_.keep_alive_scn_; - } else if (MinStartScnStatus::HAS_CTX == status) { - tmp_calc_upper_info.min_start_scn_in_ctx_ = min_start_scn; - } else { - ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(ERROR, "invalid min start scn status", K(min_start_scn), K(keep_alive_scn), K(status)); - } - - if (OB_FAIL(ret)) { - } else if (tmp_calc_upper_info.min_start_scn_in_ctx_ < calc_upper_info_.min_start_scn_in_ctx_) { - ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(ERROR, "invalid min start scn", K(tmp_calc_upper_info), K(calc_upper_info_)); - } else { - calc_upper_info_ = tmp_calc_upper_info; - } - } -} - int ObLSService::online_ls() { // do nothing diff --git a/mittest/simple_server/test_transfer_with_commit_action.cpp b/mittest/simple_server/test_transfer_with_commit_action.cpp index d9e601c729..e713575667 100644 --- a/mittest/simple_server/test_transfer_with_commit_action.cpp +++ b/mittest/simple_server/test_transfer_with_commit_action.cpp @@ -29,7 +29,7 @@ namespace oceanbase namespace storage { -int64_t ObTxDataTable::UPDATE_CALC_UPPER_INFO_INTERVAL = 0; +int64_t ObTxTable::UPDATE_MIN_START_SCN_INTERVAL = 0; int ObTransferHandler::wait_src_ls_advance_weak_read_ts_( const share::ObTransferTaskInfo &task_info, diff --git a/mittest/simple_server/test_transfer_with_smaller_tx_data.cpp b/mittest/simple_server/test_transfer_with_smaller_tx_data.cpp index 46eb1bb456..b5b079ca10 100644 --- a/mittest/simple_server/test_transfer_with_smaller_tx_data.cpp +++ b/mittest/simple_server/test_transfer_with_smaller_tx_data.cpp @@ -28,7 +28,7 @@ namespace oceanbase namespace storage { -int64_t ObTxDataTable::UPDATE_CALC_UPPER_INFO_INTERVAL = 0; +int64_t ObTxTable::UPDATE_MIN_START_SCN_INTERVAL = 0; int ObTransferHandler::wait_src_ls_advance_weak_read_ts_( const share::ObTransferTaskInfo &task_info, @@ -325,11 +325,11 @@ TEST_F(ObTransferWithSmallerStartSCN, smaller_start_scn) // Step4: let the tx data table update upper info ObLS *ls = get_ls(tenant_id, loc1); - storage::ObTxDataTable *tx_data_table = ls->get_tx_table()->get_tx_data_table(); + ObTxTable *tx_table = ls->get_tx_table(); fprintf(stdout, "start update upper info the first time\n"); TRANS_LOG(INFO, "start update upper info the first time"); - tx_data_table->update_calc_upper_info_(SCN::max_scn()); - uint64_t first_min_start_scn = tx_data_table->calc_upper_info_.min_start_scn_in_ctx_.val_; + tx_table->update_min_start_scn_info(SCN::max_scn()); + uint64_t first_min_start_scn = tx_table->ctx_min_start_scn_info_.min_start_scn_in_ctx_.val_; fprintf(stdout, "end update upper info the first time, %lu\n", first_min_start_scn); TRANS_LOG(INFO, "end update upper info the first time"); @@ -371,6 +371,7 @@ TEST_F(ObTransferWithSmallerStartSCN, smaller_start_scn) bool unused; fprintf(stdout, "start get min start in tx data table first time\n"); TRANS_LOG(INFO, "start get min start in tx data table first time"); + ObTxDataTable *tx_data_table = tx_table->get_tx_data_table(); tx_data_table->check_min_start_in_tx_data_(SCN::invalid_scn(), min_start_scn_in_tx_data, unused); uint64_t first_min_start_scn_in_tx_data = min_start_scn_in_tx_data.val_; fprintf(stdout, "end get min start in tx data table first time, %lu, %lu\n", min_start_scn_in_tx_data.val_, tx_id.get_id()); @@ -402,8 +403,8 @@ TEST_F(ObTransferWithSmallerStartSCN, smaller_start_scn) fprintf(stdout, "start update upper info the second time\n"); TRANS_LOG(INFO, "start update upper info the second time"); - tx_data_table->update_calc_upper_info_(SCN::max_scn()); - uint64_t second_min_start_scn = tx_data_table->calc_upper_info_.min_start_scn_in_ctx_.val_; + tx_table->update_min_start_scn_info(SCN::max_scn()); + uint64_t second_min_start_scn = tx_table->ctx_min_start_scn_info_.min_start_scn_in_ctx_.val_; fprintf(stdout, "end update upper info the second time %lu\n", second_min_start_scn); TRANS_LOG(INFO, "end update upper info the second time"); diff --git a/src/rootserver/ob_ddl_service.cpp b/src/rootserver/ob_ddl_service.cpp index 9e00838346..38b4c4ac49 100755 --- a/src/rootserver/ob_ddl_service.cpp +++ b/src/rootserver/ob_ddl_service.cpp @@ -1390,6 +1390,7 @@ int ObDDLService::generate_schema( const ObIArray &constraints = arg.constraint_list_; const uint64_t tenant_id = schema.get_tenant_id(); uint64_t new_table_id = schema.get_table_id(); + uint64_t compat_version = 0; ObSchemaService *schema_service = NULL; const ObDatabaseSchema *database_schema = NULL; const ObTenantSchema *tenant_schema = NULL; @@ -1398,6 +1399,12 @@ int ObDDLService::generate_schema( ObSchemaGetterGuard guard; if (OB_FAIL(check_inner_stat())) { LOG_WARN("variable is not init"); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { + LOG_WARN("fail to get data version", K(ret), K(tenant_id)); + } else if (not_compat_for_queuing_mode(compat_version) && arg.schema_.is_new_queuing_table_mode()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN(QUEUING_MODE_NOT_COMPAT_WARN_STR, K(ret), K(tenant_id), K(compat_version), K(arg)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, QUEUING_MODE_NOT_COMPAT_USER_ERROR_STR); } else if (OB_FAIL(schema_service_->get_tenant_schema_guard(tenant_id, guard))) { LOG_WARN("get schema guard failed", K(ret)); } else { @@ -3340,8 +3347,17 @@ int ObDDLService::set_raw_table_options( break; } case ObAlterTableArg::TABLE_MODE: { - new_table_schema.set_table_mode(alter_table_schema.get_table_mode()); - need_update_index_table = true; + uint64_t compat_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { + LOG_WARN("get min data_version failed", K(ret), K(tenant_id)); + } else if (not_compat_for_queuing_mode(compat_version) && alter_table_schema.is_new_queuing_table_mode()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN(QUEUING_MODE_NOT_COMPAT_WARN_STR, K(ret), K(compat_version), K(alter_table_schema.get_table_mode())); + LOG_USER_ERROR(OB_NOT_SUPPORTED, QUEUING_MODE_NOT_COMPAT_USER_ERROR_STR); + } else { + new_table_schema.set_table_mode(alter_table_schema.get_table_mode()); + need_update_index_table = true; + } break; } case ObAlterTableArg::INCREMENT_MODE : { diff --git a/src/rootserver/parallel_ddl/ob_create_table_helper.cpp b/src/rootserver/parallel_ddl/ob_create_table_helper.cpp index 5eb18adcd8..f76bd1d13d 100644 --- a/src/rootserver/parallel_ddl/ob_create_table_helper.cpp +++ b/src/rootserver/parallel_ddl/ob_create_table_helper.cpp @@ -985,9 +985,16 @@ int ObCreateTableHelper::generate_table_schema_() // to make try_format_partition_schema() passed const uint64_t mock_table_id = OB_MIN_USER_OBJECT_ID + 1; + uint64_t compat_version = 0; bool is_oracle_mode = false; if (OB_FAIL(check_inner_stat_())) { LOG_WARN("fail to check inner stat", KR(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id_, compat_version))) { + LOG_WARN("fail to get data version", K(ret), K_(tenant_id)); + } else if (not_compat_for_queuing_mode(compat_version) && arg_.schema_.is_new_queuing_table_mode()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN(QUEUING_MODE_NOT_COMPAT_WARN_STR, K(ret), K_(tenant_id), K(compat_version), K(arg_)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, QUEUING_MODE_NOT_COMPAT_USER_ERROR_STR); } else if (OB_UNLIKELY(OB_INVALID_ID != arg_.schema_.get_table_id())) { ret = OB_NOT_SUPPORTED; LOG_WARN("create table with table_id in 4.x is not supported", diff --git a/src/share/compaction/ob_compaction_time_guard.cpp b/src/share/compaction/ob_compaction_time_guard.cpp index 348ccb3102..f7144dd529 100644 --- a/src/share/compaction/ob_compaction_time_guard.cpp +++ b/src/share/compaction/ob_compaction_time_guard.cpp @@ -215,6 +215,7 @@ const char *ObStorageCompactionTimeGuard::CompactionEventStr[] = { "GET_PARALLEL_RANGE", "EXECUTE", "CREATE_SSTABLE", + "UPDATE_UPPER_TRANS", "UPDATE_TABLET", "RELEASE_MEMTABLE", "SCHEDULE_OTHER_COMPACTION", diff --git a/src/share/compaction/ob_compaction_time_guard.h b/src/share/compaction/ob_compaction_time_guard.h index c5253404a7..8f7fd8b76b 100644 --- a/src/share/compaction/ob_compaction_time_guard.h +++ b/src/share/compaction/ob_compaction_time_guard.h @@ -157,6 +157,7 @@ public: GET_PARALLEL_RANGE, EXECUTE, CREATE_SSTABLE, + UPDATE_UPPER_TRANS, UPDATE_TABLET, RELEASE_MEMTABLE, SCHEDULE_OTHER_COMPACTION, diff --git a/src/share/schema/ob_schema_printer.cpp b/src/share/schema/ob_schema_printer.cpp index 9891351bb2..2388730f26 100644 --- a/src/share/schema/ob_schema_printer.cpp +++ b/src/share/schema/ob_schema_printer.cpp @@ -1832,7 +1832,7 @@ int ObSchemaPrinter::print_table_definition_table_options(const ObTableSchema &t if (OB_SUCC(ret) && !strict_compat_ && !is_index_tbl && !is_no_table_options(sql_mode)) { if (!agent_mode) { if (table_schema.is_queuing_table()) { - table_mode_str = "QUEUING"; + table_mode_str = table_mode_flag_to_str(table_schema.get_table_mode_flag()); } } else { // true == agent_mode table_mode_str = ObBackUpTableModeOp::get_table_mode_str(table_schema.get_table_mode_struct()); @@ -2340,7 +2340,7 @@ int ObSchemaPrinter::print_table_definition_table_options( if (OB_SUCC(ret) && !strict_compat_ && !is_index_tbl) { if (!is_agent_mode) { if (table_schema.is_queuing_table()) { - table_mode_str = "QUEUING"; + table_mode_str = table_mode_flag_to_str(table_schema.get_table_mode_flag()); } } else { // true == agent_mode table_mode_str = ObBackUpTableModeOp::get_table_mode_str(table_schema.get_table_mode_struct()); diff --git a/src/share/schema/ob_table_schema.cpp b/src/share/schema/ob_table_schema.cpp index 34340acb1f..a8ad395cf1 100644 --- a/src/share/schema/ob_table_schema.cpp +++ b/src/share/schema/ob_table_schema.cpp @@ -47,6 +47,27 @@ using namespace oceanbase::common; using namespace oceanbase::common::hash; using namespace blocksstable; +const static char * ObTableModeFlagStr[] = { + "NORMAL", + "QUEUING", + "PRIMARY_AUX_VP", + "MODERATE", + "SUPER", + "EXTREME", +}; + +const char *table_mode_flag_to_str(const ObTableModeFlag &table_mode) +{ + STATIC_ASSERT(static_cast(TABLE_MODE_MAX) == ARRAYSIZEOF(ObTableModeFlagStr), "table mode flag str len is mismatch"); + const char *str = ""; + if (is_valid_table_mode_flag(table_mode)) { + str = ObTableModeFlagStr[table_mode]; + } else { + str = "invalid_table_mode_flag_type"; + } + return str; +} + ObColumnIdKey ObGetColumnKey::operator()(const ObColumnSchemaV2 *column_schema) const { return ObColumnIdKey(column_schema->get_column_id()); diff --git a/src/share/schema/ob_table_schema.h b/src/share/schema/ob_table_schema.h index 16c6919d8c..2da89ea32c 100644 --- a/src/share/schema/ob_table_schema.h +++ b/src/share/schema/ob_table_schema.h @@ -166,12 +166,38 @@ enum ObTableModeFlag TABLE_MODE_NORMAL = 0, TABLE_MODE_QUEUING = 1, TABLE_MODE_PRIMARY_AUX_VP = 2, - TABLE_MODE_QUEUING_ENHANCED = 3, // Placeholder: ENHANCED/SUPERIOR/PREMIUM will be introduced in 4.2.3 and not supported by resolver now. - TABLE_MODE_QUEUING_SUPERIOR = 4, - TABLE_MODE_QUEUING_PREMIUM = 5, + TABLE_MODE_QUEUING_MODERATE = 3, + TABLE_MODE_QUEUING_SUPER = 4, + TABLE_MODE_QUEUING_EXTREME = 5, TABLE_MODE_MAX, }; +inline bool is_valid_table_mode_flag(const ObTableModeFlag &table_mode) +{ + return TABLE_MODE_NORMAL <= table_mode && table_mode < TABLE_MODE_MAX; +} +inline bool is_new_queuing_mode(const ObTableModeFlag &table_mode) +{ + return TABLE_MODE_QUEUING_MODERATE <= table_mode && table_mode <= TABLE_MODE_QUEUING_EXTREME; +} +inline bool is_queuing_table_mode(const ObTableModeFlag &table_mode) +{ + return TABLE_MODE_QUEUING == table_mode || is_new_queuing_mode(table_mode); +} +inline bool not_compat_for_queuing_mode_42x(const uint64_t min_data_version) +{ + return (min_data_version < MOCK_DATA_VERSION_4_2_1_5) + || (DATA_VERSION_4_2_2_0 <= min_data_version && min_data_version < MOCK_DATA_VERSION_4_2_3_0); +} +inline bool not_compat_for_queuing_mode(const uint64_t min_data_version) +{ + return not_compat_for_queuing_mode_42x(min_data_version) + || (DATA_VERSION_4_3_0_0 <= min_data_version && min_data_version < DATA_VERSION_4_3_2_0); +} +const char *table_mode_flag_to_str(const ObTableModeFlag &table_mode); +#define QUEUING_MODE_NOT_COMPAT_WARN_STR "moderate/super/extreme table mode is not supported in data_version < 4.2.1.5 or 4.2.2 <= data_version < 4.2.3 or 4.3.0 <= data_version < 4.3.2" +#define QUEUING_MODE_NOT_COMPAT_USER_ERROR_STR "moderate/super/extreme table mode in data_version < 4.2.1.5 or 4.2.2 <= data_version < 4.2.3 or 4.3.0 <= data_version < 4.3.2" + enum ObTablePKMode { TPKM_OLD_NO_PK= 0, @@ -424,18 +450,42 @@ struct ObBackUpTableModeOp "QUEUING|NEW_NO_PK_MODE": TABLE_MODE_QUEUING && TPKM_NEW_NO_PK "QUEUING|HEAP_ORGANIZED_TABLE":TABLE_MODE_QUEUING && TOM_HEAP_ORGANIZED "QUEUING|INDEX_ORGANIZED_TABLE":TABLE_MODE_QUEUING && TOM_INDEX_ORGANIZED + "MODERATE":TABLE_MODE_QUEUING_MODERATE + "MODERATE|NEW_NO_PK_MODE": TABLE_MODE_QUEUING_MODERATE && TPKM_NEW_NO_PK + "MODERATE|HEAP_ORGANIZED_TABLE":TABLE_MODE_QUEUING_MODERATE && TOM_HEAP_ORGANIZED + "MODERATE|INDEX_ORGANIZED_TABLE":TABLE_MODE_QUEUING_MODERATE && TOM_INDEX_ORGANIZED + "SUPER":TABLE_MODE_QUEUING_SUPER + "SUPER|NEW_NO_PK_MODE": TABLE_MODE_QUEUING_SUPER && TPKM_NEW_NO_PK + "SUPER|HEAP_ORGANIZED_TABLE":TABLE_MODE_QUEUING_SUPER && TOM_HEAP_ORGANIZED + "SUPER|INDEX_ORGANIZED_TABLE":TABLE_MODE_QUEUING_SUPER && TOM_INDEX_ORGANIZED + "EXTREME":TABLE_MODE_QUEUING_EXTREME + "EXTREME|NEW_NO_PK_MODE": TABLE_MODE_QUEUING_EXTREME && TPKM_NEW_NO_PK + "EXTREME|HEAP_ORGANIZED_TABLE":TABLE_MODE_QUEUING_EXTREME && TOM_HEAP_ORGANIZED + "EXTREME|INDEX_ORGANIZED_TABLE":TABLE_MODE_QUEUING_EXTREME && TOM_INDEX_ORGANIZED */ + #define SET_QUEUING_TABLE_MODE_WITH_OTHER_MODE(mode, queuing_mode_str) \ + if (TPKM_NEW_NO_PK == mode.pk_mode_) { \ + ret_str = queuing_mode_str"|NEW_NO_PK_MODE"; \ + } else if (TOM_HEAP_ORGANIZED == mode.organization_mode_) { \ + ret_str = queuing_mode_str"|HEAP_ORGANIZED_TABLE"; \ + } else if (TOM_INDEX_ORGANIZED == mode.organization_mode_) { \ + ret_str = queuing_mode_str"|INDEX_ORGANIZED_TABLE"; \ + } else { \ + ret_str = queuing_mode_str; \ + } + static common::ObString get_table_mode_str(const ObTableMode mode) { common::ObString ret_str = ""; - if (TABLE_MODE_QUEUING == mode.mode_flag_) { - if (TPKM_NEW_NO_PK == mode.pk_mode_) { - ret_str = "QUEUING|NEW_NO_PK_MODE"; - } else if (TOM_HEAP_ORGANIZED == mode.organization_mode_) { - ret_str = "QUEUING|HEAP_ORGANIZED_TABLE"; - } else if (TOM_INDEX_ORGANIZED == mode.organization_mode_) { - ret_str = "QUEUING|INDEX_ORGANIZED_TABLE"; - } else { - ret_str = "QUEUING"; + const ObTableModeFlag flag = static_cast(mode.mode_flag_); + if (is_queuing_table_mode(flag)) { + if (TABLE_MODE_QUEUING == mode.mode_flag_) { + SET_QUEUING_TABLE_MODE_WITH_OTHER_MODE(mode, "QUEUING"); + } else if (TABLE_MODE_QUEUING_MODERATE == mode.mode_flag_) { + SET_QUEUING_TABLE_MODE_WITH_OTHER_MODE(mode, "MODERATE"); + } else if (TABLE_MODE_QUEUING_SUPER == mode.mode_flag_) { + SET_QUEUING_TABLE_MODE_WITH_OTHER_MODE(mode, "SUPER"); + } else if (TABLE_MODE_QUEUING_EXTREME == mode.mode_flag_) { + SET_QUEUING_TABLE_MODE_WITH_OTHER_MODE(mode, "EXTREME"); } } else if (TPKM_NEW_NO_PK == mode.pk_mode_) { ret_str = "NEW_NO_PK_MODE"; @@ -447,7 +497,7 @@ struct ObBackUpTableModeOp return ret_str; } - static int get_table_mode(const common::ObString str, ObTableMode &ret_mode) { + static int get_table_mode(const common::ObString str, ObTableMode &ret_mode, uint64_t tenant_data_version) { int ret = common::OB_SUCCESS; ret_mode.reset(); char * flag = nullptr; @@ -464,6 +514,12 @@ struct ObBackUpTableModeOp // do nothing } else if (0 == flag_str.case_compare("queuing")) { ret_mode.mode_flag_ = TABLE_MODE_QUEUING; + } else if (0 == flag_str.case_compare("moderate")) { + ret_mode.mode_flag_ = TABLE_MODE_QUEUING_MODERATE; + } else if (0 == flag_str.case_compare("super")) { + ret_mode.mode_flag_ = TABLE_MODE_QUEUING_SUPER; + } else if (0 == flag_str.case_compare("extreme")) { + ret_mode.mode_flag_ = TABLE_MODE_QUEUING_EXTREME; } else if (0 == flag_str.case_compare("new_no_pk_mode")) { ret_mode.pk_mode_ = TPKM_NEW_NO_PK; } else if (0 == flag_str.case_compare("heap_organized_table")) { @@ -474,6 +530,11 @@ struct ObBackUpTableModeOp } else { ret = common::OB_ERR_PARSER_SYNTAX; } + if (OB_SUCC(ret) && not_compat_for_queuing_mode(tenant_data_version) && is_new_queuing_mode(static_cast(ret_mode.mode_flag_))) { + ret = OB_NOT_SUPPORTED; + SHARE_SCHEMA_LOG(WARN, QUEUING_MODE_NOT_COMPAT_WARN_STR, K(ret), K(flag_str), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, QUEUING_MODE_NOT_COMPAT_USER_ERROR_STR); + } flag = strtok_r(NULL, delim, &save_ptr); } return ret; @@ -705,7 +766,7 @@ public: inline void set_table_state_flag(const ObTableStateFlag flag) { table_mode_.state_flag_ = flag; } inline bool is_queuing_table() const - { return TABLE_MODE_QUEUING == (enum ObTableModeFlag)table_mode_.mode_flag_; } + { return is_queuing_table_mode(static_cast(table_mode_.mode_flag_)); } inline bool is_iot_table() const { return TOM_INDEX_ORGANIZED == (enum ObTableOrganizationMode)table_mode_.organization_mode_; } inline bool is_heap_table() const @@ -992,6 +1053,7 @@ public: inline bool has_rowid() const { return is_user_table() || is_tmp_table(); } inline bool gen_normal_tablet() const { return has_rowid() && !is_extended_rowid_mode(); } + inline bool is_new_queuing_table_mode() const { return is_new_queuing_mode(static_cast(table_mode_.mode_flag_)); } DECLARE_VIRTUAL_TO_STRING; protected: diff --git a/src/share/schema/ob_table_sql_service.cpp b/src/share/schema/ob_table_sql_service.cpp index 2ab5478005..63fe3bde63 100644 --- a/src/share/schema/ob_table_sql_service.cpp +++ b/src/share/schema/ob_table_sql_service.cpp @@ -3006,6 +3006,9 @@ int ObTableSqlService::gen_table_dml( LOG_WARN("ttl definition and kv attributes is not supported in version less than 4.2.1", "ttl_definition", table.get_ttl_definition().empty(), "kv_attributes", table.get_kv_attributes().empty()); + } else if (not_compat_for_queuing_mode(data_version) && table.is_new_queuing_table_mode()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN(QUEUING_MODE_NOT_COMPAT_WARN_STR, K(ret), K(table)); } else {} if (OB_SUCC(ret)) { const ObPartitionOption &part_option = table.get_part_option(); @@ -3215,6 +3218,9 @@ int ObTableSqlService::gen_table_options_dml( } else if (data_version < DATA_VERSION_4_1_0_0 && OB_UNLIKELY(table.view_column_filled())) { ret = OB_NOT_SUPPORTED; LOG_WARN("option is not support before 4.1", K(ret), K(table)); + } else if (not_compat_for_queuing_mode(data_version) && table.is_new_queuing_table_mode()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN(QUEUING_MODE_NOT_COMPAT_WARN_STR, K(ret), K(table)); } else if (OB_FAIL(check_column_store_valid(table, data_version))) { LOG_WARN("fail to check column store valid", KR(ret), K(table)); } else if (OB_FAIL(dml.add_pk_column("tenant_id", ObSchemaUtils::get_extract_tenant_id( @@ -3330,6 +3336,9 @@ int ObTableSqlService::update_table_attribute(ObISQLClient &sql_client, && OB_UNLIKELY((OB_INVALID_VERSION != new_table_schema.get_truncate_version()))) { ret = OB_NOT_SUPPORTED; LOG_WARN("truncate version is not support before 4.1", K(ret), K(new_table_schema)); + } else if (not_compat_for_queuing_mode(data_version) && new_table_schema.is_new_queuing_table_mode()) { + ret = OB_NOT_SUPPORTED; + LOG_WARN(QUEUING_MODE_NOT_COMPAT_WARN_STR, K(ret), K(new_table_schema)); } else if (OB_FAIL(check_column_store_valid(new_table_schema, data_version))) { LOG_WARN("fail to check column store valid", KR(ret), K(tenant_id), K(new_table_schema)); } else if (OB_FAIL(dml.add_pk_column("tenant_id", ObSchemaUtils::get_extract_tenant_id( diff --git a/src/share/stat/ob_opt_stat_service.cpp b/src/share/stat/ob_opt_stat_service.cpp index 783b6d66c9..15e1c7fc34 100644 --- a/src/share/stat/ob_opt_stat_service.cpp +++ b/src/share/stat/ob_opt_stat_service.cpp @@ -306,6 +306,8 @@ int ObOptStatService::get_table_rowcnt(const uint64_t tenant_id, } else { ObSEArray reload_tablet_ids; ObSEArray reload_ls_ids; + storage::ObTabletStat unused_tablet_stat; + share::schema::ObTableModeFlag unused_mode; for (int64_t i = 0; OB_SUCC(ret) && i < all_tablet_ids.count(); ++i) { ObOptTableStat::Key key(tenant_id, table_id, all_tablet_ids.at(i).id()); ObOptTableStatHandle handle; @@ -331,7 +333,7 @@ int ObOptStatService::get_table_rowcnt(const uint64_t tenant_id, storage::ObTabletStat tablet_stat; //try check the latest tablet stat from stroage if (stat_mgr != NULL) { - if (OB_FAIL(stat_mgr->get_latest_tablet_stat(all_ls_ids.at(i), all_tablet_ids.at(i), tablet_stat))) { + if (OB_FAIL(stat_mgr->get_latest_tablet_stat(all_ls_ids.at(i), all_tablet_ids.at(i), tablet_stat, unused_tablet_stat, unused_mode))) { if (OB_HASH_NOT_EXIST != ret) { LOG_WARN("failed to get latest tablet stat", K(ret), K(all_ls_ids.at(i)), K(all_tablet_ids.at(i))); } else { diff --git a/src/sql/resolver/ddl/ob_ddl_resolver.cpp b/src/sql/resolver/ddl/ob_ddl_resolver.cpp index d10ff59e67..e386bb8e5e 100644 --- a/src/sql/resolver/ddl/ob_ddl_resolver.cpp +++ b/src/sql/resolver/ddl/ob_ddl_resolver.cpp @@ -1671,7 +1671,10 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool break; } case T_TABLE_MODE: { - if (OB_ISNULL(option_node->children_[0])) { + uint64_t tenant_data_version = 0; + if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, tenant_data_version))) { + LOG_WARN("get tenant data version failed", K(ret)); + } else if (OB_ISNULL(option_node->children_[0])) { ret = OB_ERR_UNEXPECTED; SQL_RESV_LOG(WARN, "option_node child is null", K(option_node->children_[0]), K(ret)); } else { @@ -1681,23 +1684,42 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool if (OB_FAIL(ObResolverUtils::check_sync_ddl_user(session_info_, is_sync_ddl_user))) { LOG_WARN("Failed to check sync_ddl_user", K(ret)); } else if (is_sync_ddl_user) { // in backup mode - if (OB_FAIL(ObBackUpTableModeOp::get_table_mode(table_mode_str, table_mode_))) { + if (OB_FAIL(ObBackUpTableModeOp::get_table_mode(table_mode_str, table_mode_, tenant_data_version))) { LOG_WARN("Failed to get table mode from string", K(ret), K(table_mode_str)); } } else if (0 == table_mode_str.case_compare("normal")) { table_mode_.mode_flag_ = TABLE_MODE_NORMAL; } else if (0 == table_mode_str.case_compare("queuing")) { table_mode_.mode_flag_ = TABLE_MODE_QUEUING; + } else if (0 == table_mode_str.case_compare("moderate")) { + table_mode_.mode_flag_ = TABLE_MODE_QUEUING_MODERATE; + } else if (0 == table_mode_str.case_compare("super")) { + table_mode_.mode_flag_ = TABLE_MODE_QUEUING_SUPER; + } else if (0 == table_mode_str.case_compare("extreme")) { + table_mode_.mode_flag_ = TABLE_MODE_QUEUING_EXTREME; } else if (0 == table_mode_str.case_compare("heap_organized_table")) { table_mode_.organization_mode_ = TOM_HEAP_ORGANIZED; table_mode_.pk_mode_ = TPKM_TABLET_SEQ_PK; } else if (0 == table_mode_str.case_compare("index_organized_table")) { table_mode_.organization_mode_ = TOM_INDEX_ORGANIZED; } else { - ret = OB_ERR_PARSER_SYNTAX; + ret = OB_NOT_SUPPORTED; + int tmp_ret = OB_SUCCESS; + ObSqlString err_msg; + if (OB_TMP_FAIL(err_msg.append_fmt("Table mode %s is", table_mode_str.ptr()))) { + LOG_USER_ERROR(OB_NOT_SUPPORTED, "this table mode is"); + LOG_WARN("failed to append err msg", K(tmp_ret), K(err_msg), K(table_mode_str)); + } else { + LOG_USER_ERROR(OB_NOT_SUPPORTED, err_msg.ptr()); + } } if (OB_FAIL(ret)) { SQL_RESV_LOG(WARN, "failed to resolve table mode str!", K(ret)); + } else if (not_compat_for_queuing_mode(tenant_data_version) + && is_new_queuing_mode(static_cast(table_mode_.mode_flag_))) { + ret = OB_NOT_SUPPORTED; + LOG_WARN(QUEUING_MODE_NOT_COMPAT_WARN_STR, K(ret), K(table_mode_str), K(tenant_data_version)); + LOG_USER_ERROR(OB_NOT_SUPPORTED, QUEUING_MODE_NOT_COMPAT_USER_ERROR_STR); } } if (OB_SUCCESS == ret && stmt::T_ALTER_TABLE ==stmt_->get_stmt_type()) { @@ -1708,7 +1730,7 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool if (OB_FAIL(get_table_schema_for_check(tmp_table_schema))) { LOG_WARN("get table schema failed", K(ret)); } else if ((tmp_table_schema.is_primary_aux_vp_table() || tmp_table_schema.is_aux_vp_table()) - && table_mode_.mode_flag_ == TABLE_MODE_QUEUING) { + && is_queuing_table_mode(static_cast(table_mode_.mode_flag_))) { ret = OB_NOT_SUPPORTED; LOG_USER_ERROR(OB_NOT_SUPPORTED, "set vertical partition table as queuing table mode"); SQL_RESV_LOG(WARN, "Vertical partition table cannot set queuing table mode", K(ret)); diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index d1b639bd8f..12be094e77 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -586,6 +586,7 @@ ob_set_subtarget(ob_storage common ob_direct_load_table_guard.cpp ob_tenant_tablet_stat_mgr.cpp ob_protected_memtable_mgr_handle.cpp + ob_gc_upper_trans_helper.cpp ) ob_set_subtarget(ob_storage common_mixed diff --git a/src/storage/blocksstable/ob_macro_block_writer.cpp b/src/storage/blocksstable/ob_macro_block_writer.cpp index 0cc175c521..038860eadc 100644 --- a/src/storage/blocksstable/ob_macro_block_writer.cpp +++ b/src/storage/blocksstable/ob_macro_block_writer.cpp @@ -868,7 +868,8 @@ int ObMacroBlockWriter::check_order(const ObDatumRow &row) LOG_ERROR("Unexpected current row trans version in major merge", K(ret), K(row), "snapshot_version", data_store_desc_->get_snapshot_version()); } else if (!row.mvcc_row_flag_.is_uncommitted_row()) { // update max commit version - if (data_store_desc_->is_major_merge_type() && data_store_desc_->get_major_working_cluster_version() < DATA_VERSION_4_3_0_0) { + const int64_t cluster_version = data_store_desc_->get_major_working_cluster_version(); + if (data_store_desc_->is_major_merge_type() && not_compat_for_queuing_mode_42x(cluster_version) && cluster_version < DATA_VERSION_4_3_0_0) { micro_writer_->update_max_merged_trans_version(-cur_row_version); } if (!row.mvcc_row_flag_.is_shadow_row()) { @@ -920,8 +921,7 @@ int ObMacroBlockWriter::check_order(const ObDatumRow &row) } } else { // another schema rowkey if (nullptr != merge_info_ - && !is_major_merge_type(merge_info_->merge_type_) - && !is_meta_major_merge(merge_info_->merge_type_) + && !is_major_or_meta_merge_type(merge_info_->merge_type_) && !is_macro_or_micro_block_reused_ && !last_key_with_L_flag_) { ret = OB_ERR_UNEXPECTED; @@ -950,8 +950,14 @@ int ObMacroBlockWriter::update_micro_commit_info(const ObDatumRow &row) } else { const int64_t trans_version_col_idx = data_store_desc_->get_schema_rowkey_col_cnt(); const int64_t cur_row_version = row.storage_datums_[trans_version_col_idx].get_int(); - if (!data_store_desc_->is_major_merge_type() || data_store_desc_->get_major_working_cluster_version() >= DATA_VERSION_4_3_0_0) { + // data_store_desc_->get_major_working_cluster_version() only set in major merge. it is 0 for mini/minor + const int64_t cluster_version = data_store_desc_->get_major_working_cluster_version(); + if (!data_store_desc_->is_major_merge_type() || cluster_version >= DATA_VERSION_4_3_0_0) { + // see ObMicroBlockWriter::build_block, column_checksums_ and min_merged_trans_version_ share the same memory space. + // Only major merge set column_checksums_, so we can set min_merged_trans_version_ regardless of data version. micro_writer_->update_merged_trans_version(-cur_row_version); + } else if (!not_compat_for_queuing_mode_42x(cluster_version)) { + micro_writer_->update_max_merged_trans_version(-cur_row_version); } } return ret; @@ -1916,4 +1922,4 @@ int ObMacroBlockWriter::agg_micro_block(const ObMicroIndexInfo µ_index_info } }//end namespace blocksstable -}//end namespace oceanbase +}//end namespace oceanbase \ No newline at end of file diff --git a/src/storage/blocksstable/ob_micro_block_header.h b/src/storage/blocksstable/ob_micro_block_header.h index 3c94f9520f..d52ff9a109 100644 --- a/src/storage/blocksstable/ob_micro_block_header.h +++ b/src/storage/blocksstable/ob_micro_block_header.h @@ -105,7 +105,7 @@ public: } TO_STRING_KV(K_(magic), K_(version), K_(header_size), K_(header_checksum), K_(column_count), K_(rowkey_column_count), K_(has_column_checksum), K_(row_count), K_(row_store_type), - K_(opt), K_(var_column_count), K_(compressor_type), K_(row_offset), K_(original_length), K_(max_merged_trans_version), + K_(opt), K_(var_column_count), K_(compressor_type), K_(row_offset), K_(original_length), K_(max_merged_trans_version), K_(min_merged_trans_version), K_(data_length), K_(data_zlength), K_(data_checksum), KP_(column_checksums), K_(single_version_rows), K_(contain_uncommitted_rows), K_(is_last_row_last_flag), K(is_valid())); public: diff --git a/src/storage/blocksstable/ob_shared_macro_block_manager.cpp b/src/storage/blocksstable/ob_shared_macro_block_manager.cpp index cb22a132e4..71e49eb76f 100644 --- a/src/storage/blocksstable/ob_shared_macro_block_manager.cpp +++ b/src/storage/blocksstable/ob_shared_macro_block_manager.cpp @@ -620,6 +620,8 @@ int ObSharedMacroBlockMgr::update_tablet( *new_sstable))) { LOG_WARN("fail to rebuild sstable and update tablet", K(ret)); } else if (OB_FAIL(new_sstables.push_back(new_sstable))) { + new_sstable->~ObSSTable(); + allocator.free(new_sstable); LOG_WARN("fail to push table handle to array", K(ret), KPC(sstable)); } } @@ -651,6 +653,7 @@ int ObSharedMacroBlockMgr::update_tablet( ObITable *table = new_sstables[i]; if (OB_LIKELY(nullptr != table)) { table->~ObITable(); + allocator.free(table); } } } @@ -712,7 +715,7 @@ int ObSharedMacroBlockMgr::rebuild_sstable( LOG_WARN("fail to close sstable index builder", K(ret)); } else if (OB_FAIL(create_new_sstable(allocator, res, old_sstable, block_info, new_sstable))) { LOG_WARN("fail to create new sstable", K(ret), K(tablet.get_tablet_meta()), K(old_sstable)); - } else if (OB_FAIL(new_sstable.set_upper_trans_version(old_sstable.get_upper_trans_version(), false/*force_update*/))) { + } else if (OB_FAIL(new_sstable.set_upper_trans_version(allocator, old_sstable.get_upper_trans_version()))) { LOG_WARN("fail to update upper trans version", K(ret), K(old_sstable.get_upper_trans_version())); } else if (OB_FAIL(new_sstable.get_meta(new_meta_handle))) { LOG_WARN("get meta handle fail", K(ret), K(new_sstable)); diff --git a/src/storage/blocksstable/ob_sstable.cpp b/src/storage/blocksstable/ob_sstable.cpp index b7240f5a6f..32814c2cda 100644 --- a/src/storage/blocksstable/ob_sstable.cpp +++ b/src/storage/blocksstable/ob_sstable.cpp @@ -126,6 +126,15 @@ int ObSSTableMetaCache::init( return ret; } +void ObSSTableMetaCache::set_upper_trans_version(const int64_t upper_trans_version) +{ + if (INT64_MAX == max_merged_trans_version_) { + upper_trans_version_ = upper_trans_version; + } else { + upper_trans_version_ = std::max(upper_trans_version, max_merged_trans_version_); + } +} + OB_DEF_SERIALIZE_SIMPLE(ObSSTableMetaCache) { int ret = OB_SUCCESS; @@ -957,23 +966,22 @@ int ObSSTable::check_row_locked( } int ObSSTable::set_upper_trans_version( - const int64_t upper_trans_version, - const bool force_update) + common::ObArenaAllocator &allocator, + const int64_t upper_trans_version) { int ret = OB_SUCCESS; - - const int64_t old_val = ATOMIC_LOAD(&meta_cache_.upper_trans_version_); - // only set once - if (INT64_MAX == old_val && INT64_MAX != upper_trans_version) { - int64_t new_val = upper_trans_version; - if (OB_LIKELY(!force_update)) { - new_val = std::max(new_val, meta_cache_.max_merged_trans_version_); - } - ATOMIC_CAS(&meta_cache_.upper_trans_version_, old_val, new_val); + const int64_t old_val = meta_cache_.upper_trans_version_; + // make sure meta_ is loaded, otherwise make meta and shell inconsistency. + if (!is_loaded() && OB_FAIL(bypass_load_meta(allocator))) { + LOG_WARN("failed to load sstable meta", K(ret), K(key_)); + } + if (OB_SUCC(ret) && is_loaded()) { + (void) meta_->basic_meta_.set_upper_trans_version(upper_trans_version); + (void) meta_cache_.set_upper_trans_version(upper_trans_version); } - LOG_INFO("succeed to set upper trans version", K(force_update), K(key_), - K(upper_trans_version), K(meta_cache_.upper_trans_version_)); + LOG_INFO("finish set upper trans version", K(ret), K(key_), K_(meta), + K(old_val), K(upper_trans_version), K_(meta_cache)); return ret; } @@ -1952,6 +1960,51 @@ int ObSSTable::get_meta( return ret; } + +int ObSSTable::bypass_load_meta(common::ObArenaAllocator &allocator) +{ + int ret = OB_SUCCESS; + if (is_loaded()) { + if (OB_UNLIKELY(!meta_->is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid sstable meta pointer for in-memory sstable", K(ret), KPC(this)); + } + } else if (OB_UNLIKELY(!addr_.is_valid() || !addr_.is_block())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected invalid addr for get sstable from cache", K(ret), KPC(this)); + } else { + common::ObSafeArenaAllocator safe_allocator(allocator); + ObSSTableMetaHandle meta_handle; + ObStorageMetaCache &meta_cache = OB_STORE_CACHE.get_storage_meta_cache(); + const ObStorageMetaValue *value = nullptr; + ObSSTable *sstable_ptr = nullptr; + ObStorageMetaKey meta_key(MTL_ID(), addr_); + ObStorageMetaValue::MetaType meta_type = is_co_sstable() + ? ObStorageMetaValue::MetaType::CO_SSTABLE + : ObStorageMetaValue::MetaType::SSTABLE; + if (OB_FAIL(meta_cache.bypass_get_meta(meta_type, meta_key, safe_allocator, meta_handle.handle_))) { + LOG_WARN("fail to bypass cache get meta", K(ret), K(meta_type), K(meta_key)); + } else if (OB_FAIL(meta_handle.handle_.get_value(value))) { + LOG_WARN("fail to get value from meta handle", K(ret), KPC(this)); + } else if (OB_ISNULL(value)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null sstable cache value", K(ret), K(value), KPC(this)); + } else if (OB_FAIL(value->get_sstable(sstable_ptr))) { + LOG_WARN("fail to get sstable from meta cache value", K(ret), KPC(value), KPC(this)); + } else if (OB_ISNULL(sstable_ptr) + || OB_UNLIKELY(!sstable_ptr->is_valid()) + || OB_ISNULL(sstable_ptr->meta_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null sstable pointer", K(ret), KPC(sstable_ptr)); + } else { + // move the owner of new created sstable meta in memory to this + meta_ = sstable_ptr->meta_; + sstable_ptr->meta_ = nullptr; + } + } + return ret; +} + int ObSSTable::init_sstable_meta( const ObTabletCreateSSTableParam ¶m, common::ObArenaAllocator *allocator) diff --git a/src/storage/blocksstable/ob_sstable.h b/src/storage/blocksstable/ob_sstable.h index ee316b7696..5c6cf826a1 100644 --- a/src/storage/blocksstable/ob_sstable.h +++ b/src/storage/blocksstable/ob_sstable.h @@ -80,6 +80,7 @@ public: ~ObSSTableMetaCache() = default; void reset(); int init(const blocksstable::ObSSTableMeta *meta, const bool has_multi_version_row = false); + void set_upper_trans_version(const int64_t upper_trans_version); bool is_valid() const { return version_ >= SSTABLE_META_CACHE_VERSION; } int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; int deserialize(const char *buf, const int64_t data_len, int64_t &pos); @@ -202,7 +203,9 @@ public: storage::ObTableAccessContext &context, share::SCN &max_trans_version, ObRowsInfo &rows_info); - int set_upper_trans_version(const int64_t upper_trans_version, const bool force_update); + int set_upper_trans_version( + common::ObArenaAllocator &allocator, + const int64_t upper_trans_version); virtual int64_t get_upper_trans_version() const override { return meta_cache_.upper_trans_version_; @@ -273,6 +276,8 @@ public: OB_INLINE bool is_valid() const { return valid_for_reading_; } OB_INLINE bool is_loaded() const { return nullptr != meta_; } int get_meta(ObSSTableMetaHandle &meta_handle, common::ObSafeArenaAllocator *allocator = nullptr) const; + // load sstable meta bypass. Lifetime is guaranteed by allocator, which should cover this sstable + int bypass_load_meta(common::ObArenaAllocator &allocator); int set_status_for_read(const ObSSTableStatus status); // TODO: get_index_tree_root and get_last_rowkey now required sstable to be loaded diff --git a/src/storage/blocksstable/ob_sstable_meta.cpp b/src/storage/blocksstable/ob_sstable_meta.cpp index f5d197fa6c..8c1805e092 100644 --- a/src/storage/blocksstable/ob_sstable_meta.cpp +++ b/src/storage/blocksstable/ob_sstable_meta.cpp @@ -362,16 +362,13 @@ DEFINE_GET_SERIALIZE_SIZE(ObSSTableBasicMeta) return len; } -int ObSSTableBasicMeta::set_upper_trans_version(const int64_t upper_trans_version) +void ObSSTableBasicMeta::set_upper_trans_version(const int64_t upper_trans_version) { - int ret = OB_SUCCESS; - const int64_t old_val = ATOMIC_LOAD(&upper_trans_version_); - // only set once - if (INT64_MAX == old_val && INT64_MAX != upper_trans_version) { - const int64_t new_val = std::max(upper_trans_version, max_merged_trans_version_); - ATOMIC_CAS(&upper_trans_version_, old_val, new_val); + if (INT64_MAX == max_merged_trans_version_) { + upper_trans_version_ = upper_trans_version; + } else { + upper_trans_version_ = std::max(upper_trans_version, max_merged_trans_version_); } - return ret; } //================================== ObTxDesc & ObTxContext ================================== diff --git a/src/storage/blocksstable/ob_sstable_meta.h b/src/storage/blocksstable/ob_sstable_meta.h index f6bbe91368..88886a320a 100644 --- a/src/storage/blocksstable/ob_sstable_meta.h +++ b/src/storage/blocksstable/ob_sstable_meta.h @@ -115,7 +115,7 @@ public: OB_INLINE common::ObRowStoreType get_latest_row_store_type() const { return latest_row_store_type_; } int decode_for_compat(const char *buf, const int64_t data_len, int64_t &pos); - int set_upper_trans_version(const int64_t upper_trans_version); + void set_upper_trans_version(const int64_t upper_trans_version); int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; int deserialize(const char *buf, const int64_t data_len, int64_t& pos); int64_t get_serialize_size() const; diff --git a/src/storage/compaction/ob_basic_tablet_merge_ctx.cpp b/src/storage/compaction/ob_basic_tablet_merge_ctx.cpp index 08e08d8cf8..c1ee3d9034 100644 --- a/src/storage/compaction/ob_basic_tablet_merge_ctx.cpp +++ b/src/storage/compaction/ob_basic_tablet_merge_ctx.cpp @@ -21,6 +21,7 @@ #include "storage/ob_tenant_tablet_stat_mgr.h" #include "storage/blocksstable/ob_data_store_desc.h" #include "storage/ob_storage_schema_util.h" +#include "storage/ob_gc_upper_trans_helper.h" #include "ob_medium_list_checker.h" #include "share/schema/ob_tenant_schema_service.h" @@ -903,6 +904,40 @@ int ObBasicTabletMergeCtx::update_tablet( return ret; } +int ObBasicTabletMergeCtx::try_set_upper_trans_version(blocksstable::ObSSTable &sstable) +{ + int ret = OB_SUCCESS; + const ObMergeType merge_type = get_inner_table_merge_type(); + const int64_t rebuild_seq = get_ls_rebuild_seq(); + // update upper_trans_version for param.sstable_, and then update table store + if (is_mini_merge(merge_type) || is_minor_merge(merge_type)) { + // upper_trans_version calculated from ls is invalid when ls is rebuilding, use rebuild_seq to prevent concurrency bug. + int tmp_ret = OB_SUCCESS; + ObLS *ls = get_ls(); + int64_t new_upper_trans_version = INT64_MAX; + int64_t new_rebuild_seq = 0; + bool ls_is_migration = false; + + if (INT64_MAX != sstable.get_upper_trans_version()) { + // all row committed, has set as max_merged_trans_version + } else if (OB_TMP_FAIL(ls->check_ls_migration_status(ls_is_migration, new_rebuild_seq))) { + LOG_WARN("failed to check ls migration status", K(tmp_ret), K(ls_is_migration), K(new_rebuild_seq)); + } else if (ls_is_migration) { + } else if (rebuild_seq != new_rebuild_seq) { + ret = OB_EAGAIN; + LOG_WARN("rebuild seq not same, need retry merge", K(ret), "ls_meta", ls->get_ls_meta(), K(new_rebuild_seq), K(rebuild_seq)); + } else if (OB_TMP_FAIL(ObGCUpperTransHelper::try_get_sstable_upper_trans_version(*ls, sstable, new_upper_trans_version))) { + LOG_WARN("failed to get new upper_trans_version for sstable", K(tmp_ret), K(sstable)); + } else if (INT64_MAX != new_upper_trans_version + && OB_TMP_FAIL(sstable.set_upper_trans_version(mem_ctx_.get_allocator(), new_upper_trans_version))) { + LOG_WARN("failed to set upper trans version", K(tmp_ret), K(sstable)); + } else { + time_guard_click(ObStorageCompactionTimeGuard::UPDATE_UPPER_TRANS); + } + } + return ret; +} + int ObBasicTabletMergeCtx::update_tablet_after_merge() { int ret = OB_SUCCESS; diff --git a/src/storage/compaction/ob_basic_tablet_merge_ctx.h b/src/storage/compaction/ob_basic_tablet_merge_ctx.h index 956cabf1b8..deaec28867 100644 --- a/src/storage/compaction/ob_basic_tablet_merge_ctx.h +++ b/src/storage/compaction/ob_basic_tablet_merge_ctx.h @@ -164,6 +164,7 @@ public: virtual int update_tablet( const blocksstable::ObSSTable &sstable, ObTabletHandle &new_tablet_handle); + int try_set_upper_trans_version(blocksstable::ObSSTable &sstable); int update_tablet_after_merge(); ObITable::TableType get_merged_table_type( const ObStorageColumnGroupSchema *cg_schema, diff --git a/src/storage/compaction/ob_medium_compaction_func.cpp b/src/storage/compaction/ob_medium_compaction_func.cpp index 3328cd0ffe..bf4901b41b 100644 --- a/src/storage/compaction/ob_medium_compaction_func.cpp +++ b/src/storage/compaction/ob_medium_compaction_func.cpp @@ -269,7 +269,9 @@ int ObMediumCompactionScheduleFunc::get_status_from_inner_table( // cal this func with PLAF LEADER ROLE && last_medium_scn_ = 0 int ObMediumCompactionScheduleFunc::schedule_next_medium_for_leader( - const int64_t major_snapshot) + const int64_t major_snapshot, + const bool is_tombstone, + bool &medium_clog_submitted) { int ret = OB_SUCCESS; ObRole role = INVALID_ROLE; @@ -291,7 +293,7 @@ int ObMediumCompactionScheduleFunc::schedule_next_medium_for_leader( } } #endif - ret = schedule_next_medium_primary_cluster(major_snapshot); + ret = schedule_next_medium_primary_cluster(major_snapshot, is_tombstone, medium_clog_submitted); } else { LOG_TRACE("not leader", K(ret), K(role), K(ls_.get_ls_id())); } @@ -333,7 +335,9 @@ int ObMediumCompactionScheduleFunc::get_adaptive_reason( } int ObMediumCompactionScheduleFunc::schedule_next_medium_primary_cluster( - const int64_t schedule_major_snapshot) + const int64_t schedule_major_snapshot, + const bool is_tombstone, + bool &medium_clog_submitted) { int ret = OB_SUCCESS; ObTabletCompactionScnInfo ret_info; @@ -360,6 +364,9 @@ int ObMediumCompactionScheduleFunc::schedule_next_medium_primary_cluster( LOG_WARN("failed to get adaptive reason", KR(ret), K(schedule_major_snapshot)); } else if (ObAdaptiveMergePolicy::is_valid_merge_reason(merge_reason_)) { schedule_medium_flag = true; + } else if (is_tombstone && ObAdaptiveMergePolicy::NONE == merge_reason_) { + merge_reason_ = ObAdaptiveMergePolicy::TOMBSTONE_SCENE; + schedule_medium_flag = true; } LOG_TRACE("schedule next medium in primary cluster", K(ret), KPC(this), K(schedule_medium_flag), K(schedule_major_snapshot), K(merge_reason_), K(last_major_snapshot_version), KPC_(medium_info_list), K(max_sync_medium_scn)); @@ -395,7 +402,7 @@ int ObMediumCompactionScheduleFunc::schedule_next_medium_primary_cluster( schedule_flag = true; } if (OB_SUCC(ret) && schedule_flag) { - ret = decide_medium_snapshot(); + ret = decide_medium_snapshot(medium_clog_submitted); } return ret; @@ -449,7 +456,9 @@ int ObMediumCompactionScheduleFunc::check_frequency( const int64_t medium_snapshot) { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; ObTablet *tablet = tablet_handle_.get_obj(); + const ObTabletID &tablet_id = tablet->get_tablet_meta().tablet_id_; const int64_t current_time = ObTimeUtility::current_time_ns(); if (max_reserved_snapshot < current_time) { const int64_t time_interval = (current_time - max_reserved_snapshot) / 2; @@ -458,9 +467,25 @@ int ObMediumCompactionScheduleFunc::check_frequency( ret = OB_ERR_UNEXPECTED; LOG_WARN("major sstable should not be empty", K(ret), K(last_major_snapshot_version)); } else if (last_major_snapshot_version + time_interval > medium_snapshot) { - ret = OB_NO_NEED_MERGE; - LOG_TRACE("schedule medium frequently", K(ret), K(last_major_snapshot_version), K(medium_snapshot), - K(time_interval)); + // TODO(chengkong): for better performance, here should take meta major merge in the future. + ObTableQueuingModeCfg queuing_cfg; + if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->get_queuing_cfg(ls_.get_ls_id(), tablet_id, queuing_cfg))) { + LOG_WARN_RET(tmp_ret, "failed to get table queuing mode, treat it as normal table", "ls_id", ls_.get_ls_id(), K(tablet_id)); + ret = OB_NO_NEED_MERGE; + LOG_TRACE("schedule medium frequently", K(ret), K(last_major_snapshot_version), K(medium_snapshot), K(time_interval)); + } else if (queuing_cfg.is_queuing_mode()) { + const int64_t cooling_down_interval = ObAdaptiveMergePolicy::MEDIUM_COOLING_TIME_THRESHOLD_NS * queuing_cfg.queuing_factor_; + const bool max_reserved_cooling_down = last_major_snapshot_version + time_interval * queuing_cfg.queuing_factor_ > medium_snapshot; + const bool medium_is_cooling_down = last_major_snapshot_version + cooling_down_interval > ObTimeUtility::current_time_ns(); + if (max_reserved_cooling_down && medium_is_cooling_down) { + ret = OB_NO_NEED_MERGE; + LOG_DEBUG("schedule queuing medium frequently", K(ret), KPC(tablet), K(medium_snapshot), K(time_interval), K(queuing_cfg), + K(cooling_down_interval), K(max_reserved_cooling_down), K(medium_is_cooling_down)); + } + } else { + ret = OB_NO_NEED_MERGE; + LOG_TRACE("schedule medium frequently", K(ret), K(last_major_snapshot_version), K(medium_snapshot), K(time_interval)); + } } } return ret; @@ -528,13 +553,14 @@ int ObMediumCompactionScheduleFunc::choose_new_medium_snapshot( return ret; } -int ObMediumCompactionScheduleFunc::decide_medium_snapshot() +int ObMediumCompactionScheduleFunc::decide_medium_snapshot(bool &medium_clog_submitted) { int ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS; int64_t max_sync_medium_scn = 0; uint64_t compat_version = 0; ObTablet *tablet = nullptr; + medium_clog_submitted = false; if (OB_UNLIKELY(!tablet_handle_.is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid tablet_handle", K(ret), K(tablet_handle_)); @@ -590,8 +616,11 @@ int ObMediumCompactionScheduleFunc::decide_medium_snapshot() } } else if (OB_FAIL(submit_medium_clog(medium_info))) { LOG_WARN("failed to submit medium clog and update inner table", K(ret), KPC(this)); - } else if (OB_NOT_NULL(schedule_stat_)) { - ++schedule_stat_->submit_clog_cnt_; + } else { + medium_clog_submitted = true; + if (OB_NOT_NULL(schedule_stat_)) { + ++schedule_stat_->submit_clog_cnt_; + } } // delete tablet_id in ObLSReservedSnapshotMgr even if submit clog or update inner table failed if (OB_TMP_FAIL(ls_.del_dependent_medium_tablet(tablet_id))) { @@ -1482,6 +1511,7 @@ int ObMediumCompactionScheduleFunc::check_need_merge_and_schedule( bool need_force_freeze = false; const ObLSID &ls_id = ls.get_ls_id(); const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + create_dag_flag = false; if (OB_UNLIKELY(0 == schedule_scn)) { ret = OB_INVALID_ARGUMENT; diff --git a/src/storage/compaction/ob_medium_compaction_func.h b/src/storage/compaction/ob_medium_compaction_func.h index b457691ffc..2b210bd830 100644 --- a/src/storage/compaction/ob_medium_compaction_func.h +++ b/src/storage/compaction/ob_medium_compaction_func.h @@ -89,11 +89,13 @@ public: ObCompactionTimeGuard &time_guard); int schedule_next_medium_for_leader( - const int64_t major_snapshot); + const int64_t major_snapshot, + const bool is_tombstone, + bool &medium_clog_submitted); int64_t to_string(char* buf, const int64_t buf_len) const; protected: - int decide_medium_snapshot(); + int decide_medium_snapshot(bool &medium_clog_submitted); static int get_status_from_inner_table( const ObLSID &ls_id, const ObTabletID &tablet_id, @@ -163,7 +165,9 @@ protected: bool &tablet_need_freeze_flag, bool &create_dag_flag); int schedule_next_medium_primary_cluster( - const int64_t major_snapshot); + const int64_t major_snapshot, + const bool is_tombstone, + bool &medium_clog_submitted); int choose_new_medium_snapshot( const int64_t max_reserved_snapshot, diff --git a/src/storage/compaction/ob_partition_merge_policy.cpp b/src/storage/compaction/ob_partition_merge_policy.cpp index 2d95701aa0..533d34611c 100644 --- a/src/storage/compaction/ob_partition_merge_policy.cpp +++ b/src/storage/compaction/ob_partition_merge_policy.cpp @@ -18,6 +18,7 @@ #include "share/ob_debug_sync.h" #include "share/ob_force_print_log.h" #include "share/rc/ob_tenant_base.h" +#include "share/schema/ob_table_schema.h" #include "storage/memtable/ob_memtable.h" #include "storage/tablet/ob_tablet.h" #include "storage/tablet/ob_tablet_table_store.h" @@ -1375,6 +1376,53 @@ bool ObAdaptiveMergePolicy::is_valid_merge_reason(const AdaptiveMergeReason &rea reason < AdaptiveMergeReason::INVALID_REASON; } +bool ObAdaptiveMergePolicy::is_valid_compaction_policy(const AdaptiveCompactionPolicy &policy) +{ + return policy >= AdaptiveCompactionPolicy::NORMAL && + policy < AdaptiveCompactionPolicy::INVALID_POLICY; +} + +#ifdef ERRSIM + #define SHOULD_SCHEDULE_MERGE(tracepoint) \ + int ret = OB_E(EventTable::tracepoint) OB_SUCCESS; \ + if (OB_FAIL(ret)) { \ + bret = true; \ + LOG_INFO("ERRSIM should merge:" #tracepoint); \ + } +#endif + +bool ObAdaptiveMergePolicy::is_schedule_medium(const share::schema::ObTableModeFlag &mode) +{ + bool bret = take_advanced_policy(mode) || take_normal_policy(mode); +#ifdef ERRSIM + SHOULD_SCHEDULE_MERGE(EN_COMPACTION_SCHEDULE_MEDIUM_MERGE_AFTER_MINI); +#endif + return bret; +} +bool ObAdaptiveMergePolicy::is_schedule_meta(const share::schema::ObTableModeFlag &mode) +{ + bool bret = take_advanced_policy(mode) || take_extrem_policy(mode); +#ifdef ERRSIM + SHOULD_SCHEDULE_MERGE(EN_COMPACTION_SCHEDULE_META_MERGE); +#endif + return bret; +} + +bool ObAdaptiveMergePolicy::take_normal_policy(const share::schema::ObTableModeFlag &mode) +{ + return share::schema::ObTableModeFlag::TABLE_MODE_NORMAL == mode + || share::schema::ObTableModeFlag::TABLE_MODE_QUEUING == mode; +} +bool ObAdaptiveMergePolicy::take_advanced_policy(const share::schema::ObTableModeFlag &mode) +{ + return share::schema::ObTableModeFlag::TABLE_MODE_QUEUING_MODERATE == mode + || share::schema::ObTableModeFlag::TABLE_MODE_QUEUING_SUPER == mode; +} +bool ObAdaptiveMergePolicy::take_extrem_policy(const share::schema::ObTableModeFlag &mode) +{ + return share::schema::ObTableModeFlag::TABLE_MODE_QUEUING_EXTREME == mode; +} + int ObAdaptiveMergePolicy::get_meta_merge_tables( const ObGetMergeTablesParam ¶m, ObLS &ls, @@ -1433,6 +1481,18 @@ int ObAdaptiveMergePolicy::find_adaptive_merge_tables( if (nullptr == base_table) { base_table = static_cast(table_store->get_major_sstables().get_boundary_table(true/*last*/)); } + const ObSSTableArray &minor_tables = table_store->get_minor_sstables(); + for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.count(); ++i) { + const int64_t cur_upper_trans_version = minor_tables[i]->get_upper_trans_version(); + if (cur_upper_trans_version <= base_table->get_snapshot_version()) { + continue; + } else if (cur_upper_trans_version > tablet.get_snapshot_version()) { + ret = OB_NO_NEED_MERGE; + LOG_WARN("first minor upper trans version is bigger than tablet snapshot version, no need to merge", + K(ret), K(cur_upper_trans_version), "tablet_snapshot_version", tablet.get_snapshot_version()); + } + break; + } } else { base_table = static_cast(table_store->get_major_sstables().get_boundary_table(true/*last*/)); } @@ -1461,11 +1521,7 @@ int ObAdaptiveMergePolicy::find_adaptive_merge_tables( } else if (result.handle_.get_count() <= 1 && table->get_upper_trans_version() <= base_table->get_snapshot_version()) { continue; // skip minor sstable which has been merged } else if (!table->is_trans_state_deterministic()) { - if (is_meta_major_merge(merge_type)) { - break; - } else { - found_undeterm_table = true; - } + found_undeterm_table = true; } else if (!found_undeterm_table) { ++tx_determ_table_cnt; inc_row_cnt += static_cast(table)->get_row_count(); @@ -1476,9 +1532,10 @@ int ObAdaptiveMergePolicy::find_adaptive_merge_tables( } } // end for + const int64_t base_inc_row_cnt = base_table->get_row_count(); bool scanty_tx_determ_table = tx_determ_table_cnt < 2; bool scanty_inc_row_cnt = inc_row_cnt < TRANS_STATE_DETERM_ROW_CNT_THRESHOLD - || inc_row_cnt < INC_ROW_COUNT_PERCENTAGE_THRESHOLD * base_table->get_row_count(); + || inc_row_cnt < INC_ROW_COUNT_PERCENTAGE_THRESHOLD * base_inc_row_cnt; #ifdef ERRSIM #define META_POLICY_ERRSIM(tracepoint) \ @@ -1499,11 +1556,26 @@ int ObAdaptiveMergePolicy::find_adaptive_merge_tables( if (OB_FAIL(ret)) { } else if (scanty_tx_determ_table || scanty_inc_row_cnt) { - ret = OB_NO_NEED_MERGE; - if (REACH_TENANT_TIME_INTERVAL(30_s)) { - LOG_INFO("no enough table or no enough rows for meta merge", K(ret), - K(scanty_tx_determ_table), K(scanty_inc_row_cnt), K(result), K(PRINT_TS_WRAPPER(table_store_wrapper))); + int tmp_ret = OB_SUCCESS; + ObTableQueuingModeCfg cfg; + if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->get_queuing_cfg(tablet.get_ls_id(), tablet.get_tablet_id(), cfg))) { + LOG_WARN_RET(tmp_ret, "failed to get table queuing mode, treat it as normal table"); } + if (cfg.is_queuing_mode() && scanty_inc_row_cnt && !scanty_tx_determ_table) { + } else { + ret = OB_NO_NEED_MERGE; + if (REACH_TENANT_TIME_INTERVAL(30_s) || cfg.is_queuing_mode()) { + LOG_INFO("no enough table or no enough rows for meta merge", K(ret), + K(scanty_tx_determ_table), K(scanty_inc_row_cnt), K(inc_row_cnt), K(base_inc_row_cnt), K(result), K(PRINT_TS_WRAPPER(table_store_wrapper))); + } + } + } + if (OB_FAIL(ret)) { + } else if (is_meta_major_merge(merge_type)) { + result.version_range_.snapshot_version_ = tablet.get_snapshot_version(); + } + + if (OB_FAIL(ret)) { } else if (result.version_range_.snapshot_version_ < tablet.get_multi_version_start() || result.version_range_.snapshot_version_ <= base_table->get_snapshot_version()) { ret = OB_NO_NEED_MERGE; @@ -1547,7 +1619,7 @@ int ObAdaptiveMergePolicy::add_meta_merge_result( result.version_range_.multi_version_start_ = table->get_snapshot_version(); result.version_range_.snapshot_version_ = table->get_snapshot_version(); } else if (update_snapshot_flag) { - int64_t max_snapshot = MAX(result.version_range_.snapshot_version_, table->get_max_merged_trans_version()); + int64_t max_snapshot = MAX(table->get_max_merged_trans_version(), result.version_range_.snapshot_version_); result.version_range_.multi_version_start_ = max_snapshot; result.version_range_.snapshot_version_ = max_snapshot; result.scn_range_.end_scn_ = table->get_end_scn(); @@ -1586,26 +1658,61 @@ int ObAdaptiveMergePolicy::get_adaptive_merge_reason( ret = OB_SUCCESS; } } else { - if (OB_TMP_FAIL(check_tombstone_situation(tablet_analyzer, tablet, reason))) { + if (OB_TMP_FAIL(check_tombstone_situation(tablet_analyzer, reason))) { LOG_WARN("failed to check tombstone scene", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer)); } - if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_load_data_situation(tablet_analyzer, tablet, reason))) { + if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_load_data_situation(tablet_analyzer, reason))) { LOG_WARN("failed to check load data scene", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer)); } if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_inc_sstable_row_cnt_percentage(tablet, reason))) { LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer)); } - if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_ineffecient_read(tablet_analyzer, tablet, reason))) { + if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_ineffecient_read(tablet_analyzer, reason))) { LOG_WARN("failed to check ineffecient read", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer)); } } +#ifdef ERRSIM + if (OB_SUCC(ret) && AdaptiveMergeReason::NONE == reason) { + ret = OB_E(EventTable::EN_COMPACTION_SCHEDULE_MEDIUM_MERGE_AFTER_MINI) OB_SUCCESS; + if (OB_FAIL(ret)) { + ret = OB_SUCCESS; + reason = AdaptiveMergeReason::TOMBSTONE_SCENE; + LOG_INFO("EN_COMPACTION_SCHEDULE_MEDIUM_MERGE_AFTER_MINI, set adaptive reason", K(reason)); + } + } +#endif + if (REACH_TENANT_TIME_INTERVAL(10 * 1000 * 1000 /*10s*/)) { LOG_INFO("Check tablet adaptive merge reason", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_analyzer), K(crazy_medium_flag)); } return ret; } +int ObAdaptiveMergePolicy::check_tombstone_reason( + const storage::ObTablet &tablet, + AdaptiveMergeReason &reason) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + reason = AdaptiveMergeReason::NONE; + ObTabletStatAnalyzer tablet_analyzer; + if (tablet_id.is_special_merge_tablet()) { + // do nothing + } else if (OB_FAIL(MTL(ObTenantTabletStatMgr *)->get_tablet_analyzer(ls_id, tablet_id, tablet_analyzer))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get tablet analyzer stat", K(ret), K(ls_id), K(tablet_id)); + } else { + ret = OB_SUCCESS; + } + } else if (OB_TMP_FAIL(check_tombstone_situation(tablet_analyzer, reason))) { + LOG_WARN("failed to check tombstone scene", K(tmp_ret), K(ls_id), K(tablet_id), K(tablet_analyzer)); + } + return ret; +} + int ObAdaptiveMergePolicy::check_inc_sstable_row_cnt_percentage( const ObTablet &tablet, AdaptiveMergeReason &reason) @@ -1647,64 +1754,52 @@ int ObAdaptiveMergePolicy::check_inc_sstable_row_cnt_percentage( int ObAdaptiveMergePolicy::check_load_data_situation( const storage::ObTabletStatAnalyzer &analyzer, - const ObTablet &tablet, AdaptiveMergeReason &reason) { int ret = OB_SUCCESS; - const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; - const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; reason = AdaptiveMergeReason::NONE; - if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid() - || ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) { + if (OB_UNLIKELY(!analyzer.tablet_stat_.is_valid())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("get invalid arguments", K(ret), K(tablet), K(analyzer)); + LOG_WARN("get invalid arguments", K(ret), K(analyzer)); } else if (analyzer.is_hot_tablet() && analyzer.is_insert_mostly()) { reason = AdaptiveMergeReason::LOAD_DATA_SCENE; } - LOG_DEBUG("check_load_data_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer)); + LOG_DEBUG("check_load_data_situation", K(ret), K(reason), K(analyzer)); return ret; } int ObAdaptiveMergePolicy::check_tombstone_situation( const storage::ObTabletStatAnalyzer &analyzer, - const ObTablet &tablet, AdaptiveMergeReason &reason) { int ret = OB_SUCCESS; - const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; - const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; reason = AdaptiveMergeReason::NONE; - if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid() - || ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) { + if (OB_UNLIKELY(!analyzer.tablet_stat_.is_valid())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("get invalid arguments", K(ret), K(analyzer), K(tablet)); - } else if (analyzer.tablet_stat_.merge_cnt_ > 1 && analyzer.is_update_or_delete_mostly()) { + LOG_WARN("get invalid arguments", K(ret), K(analyzer)); + } else if ((analyzer.tablet_stat_.merge_cnt_ > 1 && analyzer.is_update_or_delete_mostly()) || analyzer.has_accumnulated_delete()) { reason = AdaptiveMergeReason::TOMBSTONE_SCENE; } - LOG_DEBUG("check_tombstone_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer)); + LOG_DEBUG("check_tombstone_situation", K(ret), K(reason), K(analyzer)); return ret; } int ObAdaptiveMergePolicy::check_ineffecient_read( const storage::ObTabletStatAnalyzer &analyzer, - const ObTablet &tablet, AdaptiveMergeReason &reason) { int ret = OB_SUCCESS; - const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; - const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; reason = AdaptiveMergeReason::NONE; - if (OB_UNLIKELY(!tablet.is_valid() || !analyzer.tablet_stat_.is_valid() - || ls_id.id() != analyzer.tablet_stat_.ls_id_ || tablet_id.id() != analyzer.tablet_stat_.tablet_id_)) { + if (OB_UNLIKELY(!analyzer.tablet_stat_.is_valid())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("get invalid arguments", K(ret), K(tablet), K(analyzer)); + LOG_WARN("get invalid arguments", K(ret), K(analyzer)); } else if (analyzer.is_hot_tablet() && analyzer.has_slow_query()) { reason = AdaptiveMergeReason::INEFFICIENT_QUERY; } - LOG_DEBUG("check_ineffecient_read", K(ret), K(ls_id), K(tablet_id), K(reason), K(analyzer)); + LOG_DEBUG("check_ineffecient_read", K(ret), K(reason), K(analyzer)); return ret; } diff --git a/src/storage/compaction/ob_partition_merge_policy.h b/src/storage/compaction/ob_partition_merge_policy.h index f65d69be05..ed493343df 100644 --- a/src/storage/compaction/ob_partition_merge_policy.h +++ b/src/storage/compaction/ob_partition_merge_policy.h @@ -242,8 +242,21 @@ public: INVALID_REASON }; + enum AdaptiveCompactionPolicy : uint8 { + NORMAL = 0, // only medium + ADVANCED = 1, // medium with meta + EXTREME = 2, // only meta + INVALID_POLICY + }; + static const char *merge_reason_to_str(const int64_t merge_reason); static bool is_valid_merge_reason(const AdaptiveMergeReason &reason); + static bool is_valid_compaction_policy(const AdaptiveCompactionPolicy &policy); + static bool is_schedule_medium(const share::schema::ObTableModeFlag &mode); + static bool is_schedule_meta(const share::schema::ObTableModeFlag &mode); + static bool take_normal_policy(const share::schema::ObTableModeFlag &mode); + static bool take_advanced_policy(const share::schema::ObTableModeFlag &mode); + static bool take_extrem_policy(const share::schema::ObTableModeFlag &mode); static int get_meta_merge_tables( const storage::ObGetMergeTablesParam ¶m, @@ -254,28 +267,28 @@ public: static int get_adaptive_merge_reason( const storage::ObTablet &tablet, AdaptiveMergeReason &reason); - + static int check_tombstone_reason( + const storage::ObTablet &tablet, + AdaptiveMergeReason &reason); private: static int find_adaptive_merge_tables( const ObMergeType &merge_type, const storage::ObTablet &tablet, storage::ObGetMergeTablesResult &result); - static int add_meta_merge_result(storage::ObITable *table, + static int add_meta_merge_result( + storage::ObITable *table, const storage::ObStorageMetaHandle &table_meta_handle, storage::ObGetMergeTablesResult &result, const bool update_snapshot_flag); private: static int check_load_data_situation( const storage::ObTabletStatAnalyzer &analyzer, - const storage::ObTablet &tablet, AdaptiveMergeReason &merge_reason); static int check_tombstone_situation( const storage::ObTabletStatAnalyzer &analyzer, - const storage::ObTablet &tablet, AdaptiveMergeReason &merge_reason); static int check_ineffecient_read( const storage::ObTabletStatAnalyzer &analyzer, - const storage::ObTablet &tablet, AdaptiveMergeReason &merge_reason); static int check_inc_sstable_row_cnt_percentage( const storage::ObTablet &tablet, @@ -283,12 +296,13 @@ private: public: static constexpr int64_t INC_ROW_COUNT_THRESHOLD = 100L * 1000L; // 10w - static constexpr int64_t TOMBSTONE_ROW_COUNT_THRESHOLD = 200L * 1000L; // 20w + static constexpr int64_t TOMBSTONE_ROW_COUNT_THRESHOLD = 250L * 1000L; // 25w, the same as ObFastFreezeChecker::TOMBSTONE_DEFAULT_ROW_COUNT static constexpr int64_t BASE_ROW_COUNT_THRESHOLD = 10L * 1000L; // 1w static constexpr int64_t LOAD_DATA_SCENE_THRESHOLD = 70; static constexpr int64_t TOMBSTONE_SCENE_THRESHOLD = 50; static constexpr float INC_ROW_COUNT_PERCENTAGE_THRESHOLD = 0.5; static constexpr int64_t TRANS_STATE_DETERM_ROW_CNT_THRESHOLD = 10000L; // 10k + static constexpr int64_t MEDIUM_COOLING_TIME_THRESHOLD_NS = 600_s * 1000; // 1000: set precision from us to ns }; class ObCOMajorMergePolicy diff --git a/src/storage/compaction/ob_tablet_merge_checker.cpp b/src/storage/compaction/ob_tablet_merge_checker.cpp index 7ae7de375a..87316e4da5 100644 --- a/src/storage/compaction/ob_tablet_merge_checker.cpp +++ b/src/storage/compaction/ob_tablet_merge_checker.cpp @@ -37,7 +37,7 @@ int ObTabletMergeChecker::check_need_merge(const ObMergeType merge_type, const O LOG_WARN("merge type is invalid", K(ret), "merge_type", merge_type_to_str(merge_type)); } else if (!is_minor_merge(merge_type) && !is_mini_merge(merge_type) - && !is_major_merge(merge_type) + && !is_major_or_meta_merge_type(merge_type) && !is_medium_merge(merge_type)) { need_merge = true; } else { @@ -46,7 +46,7 @@ int ObTabletMergeChecker::check_need_merge(const ObMergeType merge_type, const O bool is_empty_shell = tablet.is_empty_shell(); if (is_minor_merge(merge_type) || is_mini_merge(merge_type)) { need_merge = !is_empty_shell; - } else if (is_major_merge(merge_type) || is_medium_merge(merge_type)) { + } else if (is_major_or_meta_merge_type(merge_type) || is_medium_merge(merge_type)) { need_merge = tablet.is_data_complete(); } diff --git a/src/storage/compaction/ob_tablet_merge_ctx.cpp b/src/storage/compaction/ob_tablet_merge_ctx.cpp index 7f0d00d88f..809a9a8435 100644 --- a/src/storage/compaction/ob_tablet_merge_ctx.cpp +++ b/src/storage/compaction/ob_tablet_merge_ctx.cpp @@ -157,18 +157,18 @@ void ObTabletMiniMergeCtx::try_schedule_compaction_after_mini(ObTabletHandle &ta { int tmp_ret = OB_SUCCESS; bool is_restore = false; - bool create_meta_dag = false; + bool create_dag = false; // when restoring, some log stream may be not ready, // thus the inner sql in ObTenantFreezeInfoMgr::try_update_info may timeout if (!MTL(ObTenantTabletScheduler *)->is_restore()) { if (get_tablet_id().is_ls_inner_tablet() || 0 == get_merge_info().get_sstable_merge_info().macro_block_count_) { // do nothing - } else if (OB_TMP_FAIL(try_schedule_meta_merge(tablet_handle, create_meta_dag))) { + } else if (OB_TMP_FAIL(try_schedule_adaptive_merge(tablet_handle, create_dag))) { LOG_WARN_RET(tmp_ret, "failed to schedule meta merge", K(get_dag_param())); } - if (create_meta_dag || 0 == get_merge_info().get_sstable_merge_info().macro_block_count_) { + if (create_dag || 0 == get_merge_info().get_sstable_merge_info().macro_block_count_) { // no need to schedule minor merge } else if (OB_TMP_FAIL(ObTenantTabletScheduler::schedule_tablet_minor_merge( static_param_.ls_handle_, tablet_handle))) { @@ -182,62 +182,69 @@ void ObTabletMiniMergeCtx::try_schedule_compaction_after_mini(ObTabletHandle &ta time_guard_click(ObStorageCompactionTimeGuard::SCHEDULE_OTHER_COMPACTION); } -int ObTabletMiniMergeCtx::try_schedule_meta_merge( +int ObTabletMiniMergeCtx::try_schedule_adaptive_merge( ObTabletHandle &tablet_handle, - bool &create_meta_dag) + bool &create_dag) { int ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS; - create_meta_dag = false; - - bool is_tombstone_scene = info_collector_.tnode_stat_.delete_row_count_ >= compaction::ObAdaptiveMergePolicy::TOMBSTONE_ROW_COUNT_THRESHOLD; - bool medium_is_cooling_down = tablet_handle.get_obj()->get_last_major_snapshot_version() / 1000 + 600_s > ObTimeUtility::fast_current_time(); -#ifdef ERRSIM - #define SCHEDULE_META_ERRSIM(tracepoint) \ - do { \ - if (OB_SUCC(ret)) { \ - ret = OB_E((EventTable::tracepoint)) OB_SUCCESS; \ - if (OB_FAIL(ret)) { \ - ret = OB_SUCCESS; \ - STORAGE_LOG(INFO, "ERRSIM " #tracepoint); \ - is_tombstone_scene = true; \ - medium_is_cooling_down = true; \ - } \ - } \ - } while(0); - SCHEDULE_META_ERRSIM(EN_COMPACTION_SCHEDULE_META_MERGE); - #undef SCHEDULE_META_ERRSIM -#endif - - // try to schedule meta major merge - if (is_tombstone_scene && medium_is_cooling_down) { - if (OB_TMP_FAIL(ObTenantTabletScheduler::schedule_tablet_meta_merge(static_param_.ls_handle_, tablet_handle, create_meta_dag))) { - LOG_WARN_RET(tmp_ret, "failed to schedule meta merge for tablet", "param", get_dag_param()); + const ObLSID &ls_id = get_ls_id(); + const ObTabletID &tablet_id = get_tablet_id(); + ObTablet *tablet = tablet_handle.get_obj(); + bool medium_is_cooling_down = tablet->get_last_major_snapshot_version() + ObAdaptiveMergePolicy::MEDIUM_COOLING_TIME_THRESHOLD_NS > ObTimeUtility::current_time_ns(); + create_dag = false; + ObTableQueuingModeCfg queuing_cfg; + (void) try_report_tablet_stat_after_mini(); // try report after mini every time for updating table mode for tablet. + if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->get_queuing_cfg(ls_id, tablet_id, queuing_cfg))) { + LOG_WARN_RET(tmp_ret, "failed to get table queuing mode, treat it as normal table", K(ls_id), K(tablet_id)); + } + const int64_t adaptive_threshold = ObAdaptiveMergePolicy::TOMBSTONE_ROW_COUNT_THRESHOLD * queuing_cfg.queuing_factor_; + const ObTransNodeDMLStat &tnode_stat = info_collector_.tnode_stat_; + bool is_tombstone_scene = (tnode_stat.update_row_count_ + tnode_stat.delete_row_count_) >= adaptive_threshold + || tnode_stat.delete_row_count_ > queuing_cfg.total_delete_row_cnt_; + if (!is_tombstone_scene && queuing_cfg.is_queuing_mode()) { + ObAdaptiveMergePolicy::AdaptiveMergeReason adaptive_merge_reason = ObAdaptiveMergePolicy::NONE; + if (OB_TMP_FAIL(ObAdaptiveMergePolicy::get_adaptive_merge_reason(*tablet, adaptive_merge_reason))) { + LOG_WARN("failed to get adaptive reason", K(tmp_ret)); + } else if (ObAdaptiveMergePolicy::NONE != adaptive_merge_reason) { + is_tombstone_scene = true; } } +#ifdef ERRSIM + // ATTENTION !!!: 2 tracepoint can only hit one at once + #define SCHEDULE_META_MEDIUM_ERRSIM(tracepoint, cooling_down) \ + do { \ + if (OB_SUCC(ret)) { \ + ret = OB_E((EventTable::tracepoint)) OB_SUCCESS; \ + if (OB_FAIL(ret)) { \ + ret = OB_SUCCESS; \ + STORAGE_LOG(INFO, "ERRSIM " #tracepoint); \ + is_tombstone_scene = true; \ + medium_is_cooling_down = cooling_down; \ + } \ + } \ + } while(0); + SCHEDULE_META_MEDIUM_ERRSIM(EN_COMPACTION_SCHEDULE_MEDIUM_MERGE_AFTER_MINI, false /*cooling_down*/); + SCHEDULE_META_MEDIUM_ERRSIM(EN_COMPACTION_SCHEDULE_META_MERGE, true /*cooling_down*/); + #undef SCHEDULE_META_MEDIUM_ERRSIM + STORAGE_LOG(INFO, "try_schedule_adaptive_merge hit errsim", K(ret), K(is_tombstone_scene), K(medium_is_cooling_down)); +#endif - // deal with tablet stat - if (create_meta_dag) { - MTL(ObTenantTabletStatMgr *)->clear_tablet_stat(get_ls_id(), get_tablet_id()); - FLOG_INFO("clear tablet stat", "ls_id", get_ls_id(), "tablet_id", get_tablet_id(), "tnode_stat", info_collector_.tnode_stat_); - } else { - (void) try_report_tablet_stat_after_mini(); - } - - // try schedule medium merge - if (!medium_is_cooling_down) { - bool non_used_schedule_dag_flag = false; - ObTabletSchedulePair non_used_schedule_pair; - if (OB_TMP_FAIL(ObMediumCompactionScheduleFunc::schedule_tablet_medium_merge( - *get_ls(), - *tablet_handle.get_obj(), - non_used_schedule_pair, - non_used_schedule_dag_flag))) { - if (OB_SIZE_OVERFLOW != tmp_ret && OB_EAGAIN != tmp_ret) { - LOG_WARN_RET(tmp_ret, "failed to schedule tablet medium merge", K(tmp_ret), "param", get_dag_param()); + if (is_tombstone_scene) { + if (ObAdaptiveMergePolicy::is_schedule_medium(queuing_cfg.mode_) && !medium_is_cooling_down) { + if (OB_TMP_FAIL(ObTenantTabletScheduler::schedule_tablet_medium_merge(static_param_.ls_handle_, tablet_id, create_dag))) { + LOG_WARN_RET(tmp_ret, "failed to schedule medium merge for tablet after mini", "param",get_dag_param(), K(tablet_id)); + } + } else if (ObAdaptiveMergePolicy::is_schedule_meta(queuing_cfg.mode_)) { + // TODO(chengkong): if ls offine or deleted, affect meta compaction? + if (OB_TMP_FAIL(ObTenantTabletScheduler::schedule_tablet_meta_merge(static_param_.ls_handle_, tablet_handle, create_dag))) { + LOG_WARN_RET(tmp_ret, "failed to schedule meta merge for tablet", "param", get_dag_param(), K(tablet_id)); } } } + + LOG_INFO("[Buffer-Opt] Try to schedule tablet medium/meta after mini", K(tmp_ret), K(ls_id), K(tablet_id), K(is_tombstone_scene), + "mode", table_mode_flag_to_str(queuing_cfg.mode_), K(medium_is_cooling_down), K(tnode_stat), K(create_dag)); return ret; } @@ -255,7 +262,6 @@ int ObTabletMiniMergeCtx::try_report_tablet_stat_after_mini() // insufficient data, skip to report } else { ObTabletStat report_stat; - bool report_succ = false; report_stat.ls_id_ = get_ls_id().id(); report_stat.tablet_id_ = get_tablet_id().id(); report_stat.merge_cnt_ = 1; diff --git a/src/storage/compaction/ob_tablet_merge_ctx.h b/src/storage/compaction/ob_tablet_merge_ctx.h index 257c7bb772..e7e19a2dd2 100644 --- a/src/storage/compaction/ob_tablet_merge_ctx.h +++ b/src/storage/compaction/ob_tablet_merge_ctx.h @@ -73,7 +73,7 @@ private: const blocksstable::ObSSTable &sstable, ObTabletHandle &new_tablet_handle) override; void try_schedule_compaction_after_mini(storage::ObTabletHandle &tablet_handle); - int try_schedule_meta_merge(ObTabletHandle &tablet_handle, bool &create_meta_dag); + int try_schedule_adaptive_merge(ObTabletHandle &tablet_handle, bool &create_meta_dag); int try_report_tablet_stat_after_mini(); }; diff --git a/src/storage/compaction/ob_tablet_merge_info.cpp b/src/storage/compaction/ob_tablet_merge_info.cpp index b32a5d83fc..639724e228 100644 --- a/src/storage/compaction/ob_tablet_merge_info.cpp +++ b/src/storage/compaction/ob_tablet_merge_info.cpp @@ -264,6 +264,8 @@ int ObTabletMergeInfo::create_sstable( STORAGE_LOG(WARN, "Failed to get sstable", K(ret)); } else if (OB_FAIL(sstable->deep_copy(ctx.mem_ctx_.get_safe_arena(), new_sstable, true/*transfer macro ref*/))) { STORAGE_LOG(WARN, "Failed to deep copy sstable", K(ret)); + } else if (OB_FAIL(ctx.try_set_upper_trans_version(*sstable))) { + LOG_WARN("failed to set upper trans version", K(ret), K(param)); } else if (OB_FAIL(merge_table_handle.set_sstable(new_sstable, &ctx.mem_ctx_.get_safe_arena()))) { STORAGE_LOG(WARN, "Failed to set sstable", K(ret)); } diff --git a/src/storage/compaction/ob_tablet_merge_task.cpp b/src/storage/compaction/ob_tablet_merge_task.cpp index 6a4e8eb789..eab1519df7 100644 --- a/src/storage/compaction/ob_tablet_merge_task.cpp +++ b/src/storage/compaction/ob_tablet_merge_task.cpp @@ -186,7 +186,7 @@ int64_t ObMergeParameter::to_string(char* buf, const int64_t buf_len) const if (OB_ISNULL(buf) || buf_len <= 0) { } else { J_OBJ_START(); - J_KV(K_(static_param), K_(merge_range), K_(merge_rowid_range), K_(merge_scn)); + J_KV(K_(static_param), K_(merge_version_range), K_(merge_range), K_(merge_rowid_range), K_(merge_scn)); J_OBJ_END(); } return pos; diff --git a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp index 1b82bc08e9..50c613fc8b 100644 --- a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp +++ b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp @@ -43,6 +43,7 @@ #include "storage/compaction/ob_sstable_merge_info_mgr.h" #include "storage/column_store/ob_co_merge_dag.h" #include "storage/compaction/ob_tenant_medium_checker.h" +#include "storage/ob_gc_upper_trans_helper.h" namespace oceanbase { @@ -89,16 +90,18 @@ int ObFastFreezeChecker::check_need_fast_freeze( bool &need_fast_freeze) { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; need_fast_freeze = false; ObTableHandleV2 table_handle; ObITabletMemtable *memtable = nullptr; const share::ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; const common::ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; - - if (!enable_fast_freeze_) { - } else if (tablet_id.is_inner_tablet() || tablet_id.is_ls_inner_tablet()) { - // inner tablet do nothing - } else if (OB_FAIL(tablet.get_active_memtable(table_handle))) { + ObTableQueuingModeCfg queuing_cfg; + if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->get_queuing_cfg(ls_id, tablet_id, queuing_cfg))) { + LOG_WARN_RET(tmp_ret, "[FastFreeze] failed to get table queuing mode, treat it as normal table", K(ls_id), K(tablet_id)); + } + const int64_t memtable_alive_threshold = queuing_cfg.get_memtable_alive_threshold(FAST_FREEZE_INTERVAL_US); + if (OB_FAIL(tablet.get_active_memtable(table_handle))) { if (OB_ENTRY_NOT_EXIST == ret) { ret = OB_SUCCESS; } else { @@ -113,9 +116,9 @@ int ObFastFreezeChecker::check_need_fast_freeze( // do nothing } else if (!memtable->is_data_memtable()) { // do nothing - } else if (ObTimeUtility::current_time() < memtable->get_timestamp() + FAST_FREEZE_INTERVAL_US) { + } else if (ObTimeUtility::current_time() < memtable->get_timestamp() + memtable_alive_threshold) { if (REACH_TENANT_TIME_INTERVAL(PRINT_LOG_INVERVAL)) { - LOG_INFO("[FastFreeze] no need to check fast freeze now", K(tablet)); + LOG_INFO("[FastFreeze] memtable is just created, no need to check", K(memtable_alive_threshold), K(ls_id), K(tablet_id), KPC(memtable)); } } else { memtable::ObMemtable *mt = static_cast(memtable); @@ -123,7 +126,8 @@ int ObFastFreezeChecker::check_need_fast_freeze( if (need_fast_freeze) { FLOG_INFO("[FastFreeze] tablet detects hotspot row, need fast freeze", K(ls_id), K(tablet_id)); } else { - check_tombstone_need_fast_freeze(tablet, *mt, need_fast_freeze); + // Only queuing table need tombstone fast freeze in 4.2.x, but 4.3.0 has this before, so open it + check_tombstone_need_fast_freeze(tablet, queuing_cfg, *mt, need_fast_freeze); if (need_fast_freeze) { FLOG_INFO("[FastFreeze] tablet detects tombstone, need fast freeze", K(ls_id), K(tablet_id)); } @@ -144,19 +148,22 @@ void ObFastFreezeChecker::check_hotspot_need_fast_freeze( void ObFastFreezeChecker::check_tombstone_need_fast_freeze( const ObTablet &tablet, + const ObTableQueuingModeCfg &queuing_cfg, memtable::ObMemtable &memtable, bool &need_fast_freeze) { need_fast_freeze = false; - const share::ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; - const common::ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; - if (memtable.is_active_memtable()) { + const share::ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; + const common::ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; const ObMtStat &mt_stat = memtable.get_mt_stat(); // dirty read - int64_t adaptive_threshold = TOMBSTONE_DEFAULT_ROW_COUNT; - try_update_tablet_threshold(ObTabletStatKey(ls_id, tablet_id), mt_stat, memtable.get_timestamp(), adaptive_threshold); - - need_fast_freeze = (mt_stat.update_row_count_ + mt_stat.delete_row_count_) >= adaptive_threshold; + int64_t adaptive_threshold = queuing_cfg.get_tombstone_row_threshold(TOMBSTONE_DEFAULT_ROW_COUNT); + if (!queuing_cfg.is_queuing_mode()) { + // dynamically change adaptive_threshold by merge cnt in recent 10 mins + try_update_tablet_threshold(ObTabletStatKey(ls_id, tablet_id), mt_stat, memtable.get_timestamp(), queuing_cfg, adaptive_threshold); + } + need_fast_freeze = (mt_stat.update_row_count_ + mt_stat.delete_row_count_) >= adaptive_threshold + || mt_stat.delete_row_count_ > queuing_cfg.total_delete_row_cnt_; if (!need_fast_freeze) { need_fast_freeze = @@ -169,6 +176,14 @@ void ObFastFreezeChecker::check_tombstone_need_fast_freeze( if (need_fast_freeze) { LOG_INFO("[FastFreeze] trigger by empty mvcc row tombstone", K(memtable), K(mt_stat), K(memtable.get_physical_row_cnt())); + } else { + ObAdaptiveMergePolicy::AdaptiveMergeReason adaptive_merge_reason = ObAdaptiveMergePolicy::NONE; + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(ObAdaptiveMergePolicy::check_tombstone_reason(tablet, adaptive_merge_reason))) { + LOG_WARN_RET(tmp_ret, "failed to check tombstone by historical stats"); + } else if (ObAdaptiveMergePolicy::NONE != adaptive_merge_reason) { + need_fast_freeze = true; + } } } } @@ -178,10 +193,12 @@ void ObFastFreezeChecker::try_update_tablet_threshold( const ObTabletStatKey &key, const ObMtStat &mt_stat, const int64_t memtable_create_timestamp, + const ObTableQueuingModeCfg &queuing_cfg, int64_t &adaptive_threshold) { int tmp_ret = OB_SUCCESS; - adaptive_threshold = TOMBSTONE_DEFAULT_ROW_COUNT; + const int64_t base_adaptive_threshold = queuing_cfg.get_tombstone_row_threshold(TOMBSTONE_DEFAULT_ROW_COUNT); + adaptive_threshold = base_adaptive_threshold; int64_t old_threshold = adaptive_threshold; if (OB_TMP_FAIL(store_map_.get_refactored(key, adaptive_threshold))) { @@ -194,12 +211,14 @@ void ObFastFreezeChecker::try_update_tablet_threshold( } ObTabletStat tablet_stat; - if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->get_latest_tablet_stat(key.ls_id_, key.tablet_id_, tablet_stat))) { + ObTabletStat total_stat; + ObTableModeFlag mode = ObTableModeFlag::TABLE_MODE_NORMAL; + if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->get_latest_tablet_stat(key.ls_id_, key.tablet_id_, tablet_stat, total_stat, mode))) { if (OB_HASH_NOT_EXIST != tmp_ret) { LOG_WARN_RET(tmp_ret, "[FastFreeze] failed to get tablet stat", K(key)); } // not hot tablet, reset threshold - adaptive_threshold = TOMBSTONE_DEFAULT_ROW_COUNT; + adaptive_threshold = base_adaptive_threshold; } else if (tablet_stat.merge_cnt_ >= 2) { // too many mini compaction occurs during the past 10 mins, inc threshold to dec mini merge count adaptive_threshold = MIN(adaptive_threshold + TOMBSTONE_STEP_ROW_COUNT, TOMBSTONE_MAX_ROW_COUNT); @@ -208,12 +227,12 @@ void ObFastFreezeChecker::try_update_tablet_threshold( if (inc_row_cnt >= adaptive_threshold) { // do nothing } else if (inc_row_cnt >= TOMBSTONE_DEFAULT_ROW_COUNT && ObTimeUtility::fast_current_time() - memtable_create_timestamp >= FAST_FREEZE_INTERVAL_US * 4) { - adaptive_threshold = TOMBSTONE_DEFAULT_ROW_COUNT; + adaptive_threshold = base_adaptive_threshold; } } if (old_threshold != adaptive_threshold) { - if (TOMBSTONE_DEFAULT_ROW_COUNT == adaptive_threshold) { + if (base_adaptive_threshold == adaptive_threshold) { (void) store_map_.erase_refactored(key); } else { (void) store_map_.set_refactored(key, adaptive_threshold); @@ -444,7 +463,8 @@ int ObTenantTabletScheduler::update_upper_trans_version_and_gc_sstable() } else if (OB_ISNULL(ls = ls_handle.get_ls())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ls is null", K(ret), K(ls)); - } else if (OB_TMP_FAIL(ls->try_update_upper_trans_version_and_gc_sstable(gc_sst_tablet_iter_))) { + } else if (ls->is_stopped()) { + } else if (OB_TMP_FAIL(try_update_upper_trans_version_and_gc_sstable(*ls, gc_sst_tablet_iter_))) { gc_sst_tablet_iter_.skip_cur_ls(); LOG_WARN("failed to update upper trans version", K(tmp_ret), "ls_id", ls->get_ls_id()); } @@ -452,6 +472,71 @@ int ObTenantTabletScheduler::update_upper_trans_version_and_gc_sstable() return ret; } +int ObTenantTabletScheduler::try_update_upper_trans_version_and_gc_sstable( + ObLS &ls, + ObCompactionScheduleIterator &iter) +{ + int ret = OB_SUCCESS; + const ObLSID &ls_id = ls.get_ls_id(); + ObTabletHandle tablet_handle; + ObTablet *tablet = nullptr; + common::ObTabletID tablet_id; + bool ls_is_migration = false; + int64_t rebuild_seq = 0; + while (OB_SUCC(ret)) { + if (OB_FAIL(iter.get_next_tablet(tablet_handle))) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + break; + } else { + LOG_WARN("failed to get tablet", K(ret), K(tablet_handle)); + } + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid tablet handle", K(ret), K(tablet_handle)); + } else if (FALSE_IT(tablet = tablet_handle.get_obj())) { + } else if (FALSE_IT(tablet_id = tablet->get_tablet_meta().tablet_id_)) { + } else if (tablet_id.is_special_merge_tablet()) { + } else if (!tablet->get_tablet_meta().ha_status_.is_none()) { + } else if (OB_FAIL(ls.check_ls_migration_status(ls_is_migration, rebuild_seq))) { + LOG_WARN("failed to check ls migration status", K(ret), K(ls_id)); + } else if (ls_is_migration) { + } else { + int64_t multi_version_start = 0; + int tmp_ret = OB_SUCCESS; + bool need_update = false; // need update table store + /* + * 1. upper_trans_version calculated from ls is invalid when ls is rebuilding, use rebuild_seq to prevent concurrency bug. + * 2. new_upper_trans array comes from old table store, use end_scn of last minor to check if table store is updated by other thread. + */ + ObSEArray new_upper_trans; + new_upper_trans.set_attr(ObMemAttr(MTL_ID(), "NewUpTxnVer")); + UpdateUpperTransParam upper_trans_param; + upper_trans_param.new_upper_trans_ = &new_upper_trans; + if (OB_TMP_FAIL(ObGCUpperTransHelper::check_need_gc_or_update_upper_trans_version(ls, *tablet, multi_version_start, upper_trans_param, need_update))) { + LOG_WARN("faild to check need gc or update", K(tmp_ret), K(ls_id), K(tablet_id)); + } else if (need_update) { + ObArenaAllocator tmp_arena("RmOldTblTmp", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); + ObStorageSchema *storage_schema = nullptr; + if (OB_TMP_FAIL(tablet->load_storage_schema(tmp_arena, storage_schema))) { + LOG_WARN("failed to load storage schema", K(tmp_ret), K(tablet)); + } else { + ObUpdateTableStoreParam param(tablet->get_snapshot_version(), multi_version_start, storage_schema, rebuild_seq, upper_trans_param); + ObTabletHandle new_tablet_handle; // no use here + if (OB_TMP_FAIL(ls.update_tablet_table_store(tablet_id, param, new_tablet_handle))) { + LOG_WARN("failed to update table store", K(tmp_ret), K(param), K(ls_id), K(tablet_id)); + } else { + FLOG_INFO("success to remove old table in table store", K(tmp_ret), K(ls_id), + K(tablet_id), K(multi_version_start), KPC(tablet)); + } + } + ObTabletObjLoadHelper::free(tmp_arena, storage_schema); + } + } + } // end while + return ret; +} + int ObTenantTabletScheduler::schedule_all_tablets_minor() { int ret = OB_SUCCESS; @@ -1036,21 +1121,19 @@ int ObTenantTabletScheduler::schedule_tablet_meta_merge( LOG_WARN("multi version data is discarded, should not compaction now", K(ret), K(ls_id), K(tablet_id), K(result.merge_version_)); } else if (!tablet->is_row_store()) { - ret = OB_NO_NEED_MERGE; - FLOG_INFO("column store table is not supported for meta major merge", K(ret), K(tablet->is_row_store())); - // ObCOMergeDagParam dag_param; - // dag_param.ls_id_ = ls_id; - // dag_param.tablet_id_ = tablet->get_tablet_meta().tablet_id_; - // dag_param.merge_type_ = META_MAJOR_MERGE; - // dag_param.merge_version_ = result.merge_version_; - // dag_param.compat_mode_ = tablet->get_tablet_meta().compat_mode_; - // dag_param.transfer_seq_ = tablet->get_tablet_meta().transfer_info_.transfer_seq_; - // if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_tablet_co_merge_dag_net(dag_param))) { - // if (OB_EAGAIN != ret && OB_SIZE_OVERFLOW != ret) { - // LOG_WARN("failed to schedule tablet merge dag", K(ret)); - // } - // } - // FLOG_INFO("chaser debug schedule co merge dag", K(ret), K(dag_param), K(tablet->is_row_store())); + ObCOMergeDagParam dag_param; + dag_param.ls_id_ = ls_id; + dag_param.tablet_id_ = tablet->get_tablet_meta().tablet_id_; + dag_param.merge_type_ = META_MAJOR_MERGE; + dag_param.merge_version_ = result.merge_version_; + dag_param.compat_mode_ = tablet->get_tablet_meta().compat_mode_; + dag_param.transfer_seq_ = tablet->get_tablet_meta().transfer_info_.transfer_seq_; + if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_tablet_co_merge_dag_net(dag_param))) { + if (OB_EAGAIN != ret && OB_SIZE_OVERFLOW != ret) { + LOG_WARN("failed to schedule tablet merge dag", K(ret), K(dag_param)); + } + } + FLOG_INFO("chaser debug schedule co merge dag", K(ret), K(dag_param), K(tablet->is_row_store())); } else { ObTabletMergeDagParam dag_param(META_MAJOR_MERGE, ls_id, tablet_id, tablet->get_tablet_meta().transfer_info_.transfer_seq_); @@ -1064,7 +1147,79 @@ int ObTenantTabletScheduler::schedule_tablet_meta_merge( } if (OB_SUCC(ret)) { + MTL(ObTenantTabletStatMgr *)->clear_tablet_stat(ls_id, tablet_id); has_created_dag = true; + LOG_INFO("success to schedule meta merge", K(ret), K(tablet_id), "is_row_store", tablet->is_row_store()); + } + } + } + return ret; +} + +int ObTenantTabletScheduler::schedule_tablet_medium_merge( + ObLSHandle &ls_handle, + const ObTabletID &tablet_id, + bool &succ_create_dag) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + if (OB_UNLIKELY(!ls_handle.is_valid() || !tablet_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(ls_handle), K(tablet_id)); + } else { + bool need_merge = false; + ObLS &ls = *ls_handle.get_ls(); + if (OB_FAIL(ObTabletMergeChecker::check_ls_state_in_major(ls, need_merge))) { + LOG_WARN("failed to check ls state", K(ret), K(ls)); + } else if (!need_merge) { + // no need to merge, do nothing + ret = OB_STATE_NOT_MATCH; + } else { + ObTenantTabletScheduler *scheduler = MTL(ObTenantTabletScheduler *); + int64_t merge_version = scheduler->get_frozen_version(); + if (ObTenantTabletScheduler::INIT_COMPACTION_SCN == merge_version) { + merge_version = 0; + } else { + // TODO(chengkong): if merge_version > scheduler->merge_version_, whether to update progress? + } + bool unused_all_ls_weak_read_ts_ready = false; + ObTenantTabletMediumParam medium_param(merge_version, true /*is_tombstone_*/); + if (OB_FAIL(scheduler->prepare_ls_medium_merge(ls, medium_param, unused_all_ls_weak_read_ts_ready))) { + LOG_WARN("failed to prepare ls medium merge", K(ret), K(medium_param), K(unused_all_ls_weak_read_ts_ready)); + } else { + ObCompactionScheduleTimeGuard tablet_time_guard; + ObTabletSchedulePair schedule_pair; + bool unused_tablet_merge_finish = false; + bool medium_clog_submitted = false; + // ATTENTION!!! load weak ts before get tablet + const share::SCN &weak_read_ts = ls.get_ls_wrs_handler()->get_ls_weak_read_ts(); + const ObLSID &ls_id = ls.get_ls_id(); + ObTabletHandle tablet_handle; // catch up latest ls status + if (OB_FAIL(ls.get_tablet(tablet_id, tablet_handle))) { + LOG_WARN("failed to get tablet", K(ret), K(ls_id), K(tablet_handle)); + } else if (FALSE_IT(tablet_time_guard.click(ObCompactionScheduleTimeGuard::GET_TABLET))) { + } else if (OB_FAIL(scheduler->try_schedule_tablet_medium( + ls, + ls_id, + tablet_handle, + weak_read_ts, + medium_param, + false /*scheduler_called*/, + unused_tablet_merge_finish, + medium_clog_submitted, + succ_create_dag, + schedule_pair, + tablet_time_guard))) { + LOG_WARN("failed to try schedule tablet medium", K(ret), K(ls_handle), K(ls_id), K(tablet_handle), + K(weak_read_ts), K(medium_param), K(unused_tablet_merge_finish)); + } else if (schedule_pair.need_force_freeze()) { + if (OB_TMP_FAIL(MTL(ObTenantFreezer *)->tablet_freeze(tablet_id, true/*force_freeze*/, true/*is_sync*/))) { + LOG_WARN("failed to force freeze tablet", K(tmp_ret), K(ls_id), K(tablet_id)); + } + } + if (medium_clog_submitted && OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->clear_tablet_stat(ls_id, tablet_id))) { + LOG_WARN("failed to clear tablet stats", K(tmp_ret), K(ls_id), K(tablet_id)); + } } } } @@ -1362,8 +1517,9 @@ int ObTenantTabletScheduler::schedule_tablet_minor( need_fast_freeze_flag = false; const ObLSID &ls_id = ls_handle.get_ls()->get_ls_id(); - const ObTabletID &tablet_id = tablet_handle.get_obj()->get_tablet_meta().tablet_id_; - if (OB_FAIL(ObTabletMergeChecker::check_need_merge(ObMergeType::MINOR_MERGE, *tablet_handle.get_obj()))) { + const ObTablet &tablet = *tablet_handle.get_obj(); + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + if (OB_FAIL(ObTabletMergeChecker::check_need_merge(ObMergeType::MINOR_MERGE, tablet))) { if (OB_NO_NEED_MERGE != ret) { LOG_WARN("failed to check need merge", K(ret), K(ls_id), K(tablet_id)); } @@ -1382,8 +1538,8 @@ int ObTenantTabletScheduler::schedule_tablet_minor( } } - if (!fast_freeze_checker_.need_check()) { - } else if (OB_TMP_FAIL(fast_freeze_checker_.check_need_fast_freeze(*tablet_handle.get_obj(), need_fast_freeze_flag))) { + if (!fast_freeze_checker_.need_check() || tablet_id.is_inner_tablet() || tablet_id.is_ls_inner_tablet()) { + } else if (OB_TMP_FAIL(fast_freeze_checker_.check_need_fast_freeze(tablet, need_fast_freeze_flag))) { LOG_WARN("failed to check need fast freeze", K(tmp_ret), K(tablet_handle)); } } @@ -1437,6 +1593,7 @@ int ObTenantTabletScheduler::schedule_next_medium_for_leader( const ObLSID &ls_id = ls.get_ls_id(); const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; bool tablet_could_schedule_merge = false; + bool unused_medium_clog_submitted = false; ObMediumCompactionScheduleFunc func(ls, tablet_handle, weak_read_ts, *medium_info_list, &schedule_stats_); const int64_t last_major_snapshot_version = tablet.get_last_major_snapshot_version(); if (last_major_snapshot_version > 0 && last_major_snapshot_version >= major_merge_version) { @@ -1448,7 +1605,7 @@ int ObTenantTabletScheduler::schedule_next_medium_for_leader( if ((!tablet_merge_finish || get_enable_adaptive_compaction()) // schedule major or adaptive compaction && tablet_could_schedule_merge) { if (OB_FAIL(func.schedule_next_medium_for_leader( - tablet_merge_finish ? 0 : major_merge_version))) { // schedule another round + tablet_merge_finish ? 0 : major_merge_version, false /*is_tombstone*/, unused_medium_clog_submitted))) { // schedule another round LOG_WARN("failed to schedule next medium", K(ret), K(ls_id), K(tablet_id)); if (OB_FAIL(MTL(compaction::ObDiagnoseTabletMgr *)->add_diagnose_tablet(ls_id, tablet_id, share::ObDiagnoseTabletType::TYPE_MEDIUM_MERGE))) { @@ -1524,62 +1681,130 @@ bool ObTenantTabletScheduler::get_enable_adaptive_compaction() return enable_adaptive_compaction; } +int ObTenantTabletScheduler::try_schedule_tablet_medium( + ObLS &ls, + const share::ObLSID &ls_id, + ObTabletHandle &tablet_handle, + const share::SCN &weak_read_ts, + ObTenantTabletMediumParam ¶m, + const bool scheduler_called, + bool &tablet_merge_finish, + bool &medium_clog_submitted, + bool &succ_create_dag, + ObTabletSchedulePair &schedule_pair, + ObCompactionTimeGuard &time_guard) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + ObTablet *tablet = nullptr; + ObTabletID tablet_id; + bool tablet_could_schedule_medium = false; + + if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid tablet handle", K(ret), K(ls_id), K(tablet_handle)); + } else if (FALSE_IT(tablet = tablet_handle.get_obj())) { + } else if (FALSE_IT(tablet_id = tablet->get_tablet_meta().tablet_id_)) { + } else if (tablet_id.is_ls_inner_tablet()) { + // do nothing + } else if (OB_FAIL(ObTabletMergeChecker::check_need_merge(ObMergeType::MEDIUM_MERGE, *tablet))) { + if (OB_NO_NEED_MERGE != ret) { + LOG_WARN("failed to check need merge", K(ret)); + } else { + ret = OB_SUCCESS; + } + } else if (param.is_leader_ && param.could_major_merge_ + && OB_TMP_FAIL(tablet_start_schedule_medium(tablet_id, tablet_could_schedule_medium))) { + LOG_WARN("failed to set start schedule medium", K(ret), K(tmp_ret), K(ls_id), K(tablet_id)); + } else if (FALSE_IT(report_blocking_medium(param.is_leader_, tablet_could_schedule_medium, param.could_major_merge_, ls_id))) { + } else if (OB_TMP_FAIL(schedule_tablet_medium( + ls, + tablet_handle, + weak_read_ts, + param, + tablet_could_schedule_medium, + scheduler_called, + tablet_merge_finish, + medium_clog_submitted, + succ_create_dag, + schedule_pair, + time_guard))) { + LOG_WARN("failed to schedule tablet medium", KR(tmp_ret), K(ls_id), K(tablet_id)); + } + if (tablet_could_schedule_medium + && OB_TMP_FAIL(clear_prohibit_medium_flag(tablet_id, ObProhibitScheduleMediumMap::ProhibitFlag::MEDIUM))) { + // clear flags set by tablet_start_schedule_medium + LOG_WARN("failed to clear prohibit schedule medium flag", K(tmp_ret), K(ret), K(ls_id), K(tablet_id)); + } + return ret; +} + +int ObTenantTabletScheduler::prepare_ls_medium_merge( + ObLS &ls, + ObTenantTabletMediumParam ¶m, + bool &all_ls_weak_read_ts_ready) +{ + int ret = OB_SUCCESS; + const ObLSID &ls_id = ls.get_ls_id(); + if (could_major_merge_start()) { + param.could_major_merge_ = true; + } else if (REACH_TENANT_TIME_INTERVAL(PRINT_LOG_INVERVAL)) { + LOG_INFO("major merge should not schedule", K(ret), K(param)); + } + // check weak_read_ts + if (param.merge_version_ >= 0) { + // the check here does not affect scheduling // diagnose info will be added in check_need_medium_merge + if (check_weak_read_ts_ready(param.merge_version_, ls)) { // weak read ts ready + if (OB_FAIL(ObMediumCompactionScheduleFunc::is_election_leader(ls_id, param.is_leader_))) { + if (OB_LS_NOT_EXIST != ret) { + LOG_WARN("failed to get palf handle role", K(ret), K(ls_id)); + } + } + } else { + all_ls_weak_read_ts_ready = false; + } + } + param.enable_adaptive_compaction_ = get_enable_adaptive_compaction(); + return ret; +} + int ObTenantTabletScheduler::schedule_ls_medium_merge( const int64_t merge_version, ObLSHandle &ls_handle, bool &all_ls_weak_read_ts_ready) { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; bool need_merge = false; ObLS &ls = *ls_handle.get_ls(); const ObLSID &ls_id = ls.get_ls_id(); - bool tablet_could_schedule_medium = false; + ObTenantTabletMediumParam medium_param(merge_version); if (OB_FAIL(ObTabletMergeChecker::check_ls_state_in_major(ls, need_merge))) { LOG_WARN("failed to check ls can schedule medium", K(ret), K(ls)); } else if (!need_merge) { // no need to merge, do nothing // TODO(@jingshui): add diagnose info ret = OB_STATE_NOT_MATCH; LOG_WARN("could not to merge now", K(ret), K(need_merge), K(ls_id)); + } else if (OB_FAIL(prepare_ls_medium_merge(ls, medium_param, all_ls_weak_read_ts_ready))) { + LOG_WARN("failed to prepare ls medium merge", K(ret), K(ls), K(medium_param), K(all_ls_weak_read_ts_ready)); } else { ObCompactionScheduleTimeGuard ls_time_guard; ObCompactionScheduleTimeGuard tablet_time_guard; ObTabletID tablet_id; ObTabletHandle tablet_handle; - ObTablet *tablet = nullptr; - int tmp_ret = OB_SUCCESS; - bool is_leader = false; - bool could_major_merge = false; - const int64_t major_frozen_scn = get_frozen_version(); ObSEArray need_freeze_tablets; need_freeze_tablets.set_attr(ObMemAttr(MTL_ID(), "MediumBatch")); - if (could_major_merge_start()) { - could_major_merge = true; - } else if (REACH_TENANT_TIME_INTERVAL(PRINT_LOG_INVERVAL)) { - LOG_INFO("major merge should not schedule", K(ret), K(merge_version)); - } - // check weak_read_ts - if (merge_version >= 0) { - // the check here does not affect scheduling // diagnose info will be added in check_need_medium_merge - if (check_weak_read_ts_ready(merge_version, ls)) { // weak read ts ready - if (OB_FAIL(ObMediumCompactionScheduleFunc::is_election_leader(ls_id, is_leader))) { - if (OB_LS_NOT_EXIST != ret) { - LOG_WARN("failed to get palf handle role", K(ret), K(ls_id)); - } - } - } else { - all_ls_weak_read_ts_ready = false; - } - } - - bool enable_adaptive_compaction = get_enable_adaptive_compaction(); + ObSEArray batched_clear_stat_tablets; + batched_clear_stat_tablets.set_attr(ObMemAttr(MTL_ID(), "BatchClearTblts")); ObTabletSchedulePair schedule_pair; while (OB_SUCC(ret)) { // loop all tablet in ls tablet_time_guard.reuse(); bool tablet_merge_finish = false; + bool medium_clog_submitted = false; + bool succ_create_dag = false; // ATTENTION!!! load weak ts before get tablet const share::SCN &weak_read_ts = ls.get_ls_wrs_handler()->get_ls_weak_read_ts(); - tablet_could_schedule_medium = false; schedule_pair.reset(); if (OB_FAIL(medium_ls_tablet_iter_.get_next_tablet(tablet_handle))) { @@ -1590,33 +1815,25 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge( LOG_WARN("failed to get tablet", K(ret), K(ls_id), K(tablet_handle)); } } else if (FALSE_IT(tablet_time_guard.click(ObCompactionScheduleTimeGuard::GET_TABLET))) { - } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid tablet handle", K(ret), K(ls_id), K(tablet_handle)); - } else if (FALSE_IT(tablet = tablet_handle.get_obj())) { - } else if (OB_FAIL(ObTabletMergeChecker::check_need_merge(ObMergeType::MEDIUM_MERGE, *tablet))) { - if (OB_NO_NEED_MERGE != ret) { - LOG_WARN("failed to check need merge", K(ret)); - } else { - ret = OB_SUCCESS; + } else if (OB_FAIL(try_schedule_tablet_medium( + ls, + ls_id, + tablet_handle, + weak_read_ts, + medium_param, + true /*scheduler_called*/, + tablet_merge_finish, + medium_clog_submitted, + succ_create_dag, + schedule_pair, + tablet_time_guard))) { + LOG_WARN("failed to try schedule tablet medium", K(ret), K(ls_handle), K(ls_id), + K(tablet_handle), K(weak_read_ts), K(medium_param), K(tablet_merge_finish)); + } else if (medium_clog_submitted) { + tablet_id = tablet_handle.get_obj()->get_tablet_meta().tablet_id_; + if (OB_TMP_FAIL(batched_clear_stat_tablets.push_back(tablet_id))) { + LOG_WARN("failed to add tablet to clear stat", K(tmp_ret), K(ls_id), K(tablet_id)); } - } else if (FALSE_IT(tablet_id = tablet->get_tablet_meta().tablet_id_)) { - } else if (tablet_id.is_ls_inner_tablet()) { - // do nothing - } else if (is_leader && could_major_merge - && OB_TMP_FAIL(tablet_start_schedule_medium(tablet_id, tablet_could_schedule_medium))) { - LOG_WARN("failed to set start schedule medium", K(ret), K(tmp_ret), K(ls_id), K(tablet_id)); - } else if (FALSE_IT(report_blocking_medium(is_leader, tablet_could_schedule_medium, could_major_merge, ls_id))) { - } else if (OB_TMP_FAIL(schedule_tablet_medium( - ls, tablet_handle, major_frozen_scn, weak_read_ts, - could_major_merge, tablet_could_schedule_medium, merge_version, enable_adaptive_compaction, - is_leader, tablet_merge_finish, schedule_pair, tablet_time_guard))) { - LOG_WARN("failed to schedule tablet medium", KR(tmp_ret), K(ls_id), K(tablet_id)); - } - if (tablet_could_schedule_medium - && OB_TMP_FAIL(clear_prohibit_medium_flag(tablet_id, ObProhibitScheduleMediumMap::ProhibitFlag::MEDIUM))) { - // clear flags set by tablet_start_schedule_medium - LOG_WARN("failed to clear prohibit schedule medium flag", K(tmp_ret), K(ret), K(ls_id), K(tablet_id)); } medium_ls_tablet_iter_.update_merge_finish(tablet_merge_finish); if (schedule_pair.need_force_freeze()) { @@ -1627,6 +1844,10 @@ int ObTenantTabletScheduler::schedule_ls_medium_merge( ls_time_guard.add_time_guard(tablet_time_guard); } // end of while + // most tablets will clear failed since the capacity of ObTenantTabletStatMgr is limited + if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->batch_clear_tablet_stat(ls_id, batched_clear_stat_tablets))) { + LOG_WARN("failed to batch clear tablet stats", K(tmp_ret), K(ls_id)); + } if (OB_FAIL(ret) || need_freeze_tablets.empty()) { } else if (OB_TMP_FAIL(schedule_batch_freeze_dag(ls_id, need_freeze_tablets))) { @@ -1670,14 +1891,13 @@ int ObTenantTabletScheduler::update_tablet_report_status( int ObTenantTabletScheduler::schedule_tablet_medium( ObLS &ls, ObTabletHandle &tablet_handle, - const int64_t major_frozen_scn, const share::SCN &weak_read_ts, - const bool could_major_merge, + ObTenantTabletMediumParam ¶m, const bool tablet_could_schedule_medium, - const int64_t merge_version, - const bool enable_adaptive_compaction, - bool &is_leader, + const bool scheduler_called, bool &tablet_merge_finish, + bool &medium_clog_submitted, + bool &succ_create_dag, ObTabletSchedulePair &schedule_pair, ObCompactionTimeGuard &time_guard) { @@ -1688,7 +1908,7 @@ int ObTenantTabletScheduler::schedule_tablet_medium( const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; bool need_diagnose = false; bool tablet_could_schedule_merge = false; - bool create_dag_flag = false; + succ_create_dag = false; schedule_pair.reset(); if (tablet_could_schedule_medium @@ -1700,7 +1920,7 @@ int ObTenantTabletScheduler::schedule_tablet_medium( const compaction::ObMediumCompactionInfoList *medium_list = nullptr; const int64_t last_major_snapshot_version = tablet.get_last_major_snapshot_version(); - if (last_major_snapshot_version > 0 && last_major_snapshot_version >= merge_version) { // merge_version can be zero here. + if (last_major_snapshot_version > 0 && last_major_snapshot_version >= param.merge_version_) { // merge_version can be zero here. tablet_merge_finish = true; schedule_stats_.finish_cnt_++; } @@ -1712,9 +1932,8 @@ int ObTenantTabletScheduler::schedule_tablet_medium( } else if (FALSE_IT(time_guard.click(ObCompactionScheduleTimeGuard::UPDATE_TABLET_REPORT_STATUS))){ } LOG_TRACE("schedule tablet medium", K(ret), K(ls_id), K(tablet_id), - K(tablet_merge_finish), K(last_major_snapshot_version), K(merge_version), K(is_leader), - K(could_major_merge), K(enable_adaptive_compaction), K(tablet_could_schedule_merge)); - if (OB_FAIL(ret) || !is_leader || 0 >= last_major_snapshot_version) { + K(tablet_merge_finish), K(last_major_snapshot_version), K(param), K(tablet_could_schedule_merge)); + if (OB_FAIL(ret) || !param.is_leader_ || 0 >= last_major_snapshot_version) { // follower or no major: do nothing need_diagnose = true; } else if (OB_FAIL(tablet.read_medium_info_list(tmp_allocator, medium_list))) { @@ -1726,14 +1945,14 @@ int ObTenantTabletScheduler::schedule_tablet_medium( tablet_id, ls_id, medium_list->get_wait_check_medium_scn()))) { LOG_WARN("failed to add tablet", K(tmp_ret), K(ls_id), K(tablet_id)); } - } else if (could_major_merge - && (!tablet_merge_finish || enable_adaptive_compaction) + } else if (param.could_major_merge_ + && (!tablet_merge_finish || param.enable_adaptive_compaction_) && tablet_could_schedule_merge) { // schedule another round ObMediumCompactionScheduleFunc func(ls, tablet_handle, weak_read_ts, *medium_list, &schedule_stats_); - if (OB_TMP_FAIL(func.schedule_next_medium_for_leader(tablet_merge_finish ? 0 : merge_version))) { + if (OB_TMP_FAIL(func.schedule_next_medium_for_leader(tablet_merge_finish ? 0 : param.merge_version_, param.is_tombstone_, medium_clog_submitted))) { if (OB_NOT_MASTER == tmp_ret) { - is_leader = false; + param.is_leader_ = false; } else { LOG_WARN("failed to schedule next medium", K(tmp_ret), K(ls_id), K(tablet_id)); } @@ -1744,23 +1963,23 @@ int ObTenantTabletScheduler::schedule_tablet_medium( } if (OB_FAIL(ret)) { - } else if (could_major_merge) { + } else if (param.could_major_merge_) { if (OB_TMP_FAIL(ObMediumCompactionScheduleFunc::schedule_tablet_medium_merge( - ls, tablet, schedule_pair, create_dag_flag, - major_frozen_scn, true /*scheduler_called*/))) { + ls, tablet, schedule_pair, succ_create_dag, + param.merge_version_, scheduler_called))) { if (OB_EAGAIN != ret) { LOG_WARN("failed to schedule medium", K(tmp_ret), K(ls_id), K(tablet_id)); } need_diagnose = true; - } else if (create_dag_flag) { + } else if (succ_create_dag) { ++schedule_stats_.schedule_dag_cnt_; } else if (FALSE_IT(time_guard.click(ObCompactionScheduleTimeGuard::SCHEDULE_TABLET_MEDIUM))){ } - } else if (major_frozen_scn > merged_version_ // could_major_merge = false + } else if (param.merge_version_ > merged_version_ // could_major_merge = false && OB_TMP_FAIL(ADD_SUSPECT_INFO( MEDIUM_MERGE, share::ObDiagnoseTabletType::TYPE_MEDIUM_MERGE, ls_id, tablet_id, ObSuspectInfoType::SUSPECT_SUSPEND_MERGE, - major_frozen_scn, + param.merge_version_, static_cast(tablet.is_row_store())))) { LOG_WARN("failed to add suspect info", K(tmp_ret)); } @@ -1992,6 +2211,7 @@ int ObTenantTabletScheduler::try_schedule_tablet_medium_merge( *ls_handle.get_ls(), tablet_handle, weak_read_ts, *medium_info_list, nullptr /*schedule_stat*/, is_rebuild_column_group ? ObAdaptiveMergePolicy::REBUILD_COLUMN_GROUP : ObAdaptiveMergePolicy::USER_REQUEST); + bool unused_medium_clog_submitted = false; const int64_t merge_version = get_frozen_version(); const int64_t last_major_snapshot_version = tablet_handle.get_obj()->get_last_major_snapshot_version(); @@ -2003,7 +2223,7 @@ int ObTenantTabletScheduler::try_schedule_tablet_medium_merge( ret = OB_MAJOR_FREEZE_NOT_FINISHED; LOG_WARN("tablet need check finish, can't schedule another medium", K(ret), K(ls_id), K(tablet_id), "wait_check_medium_scn", medium_info_list->get_wait_check_medium_scn()); - } else if (OB_TMP_FAIL(func.schedule_next_medium_for_leader(0/*major_snapshot*/))) { + } else if (OB_TMP_FAIL(func.schedule_next_medium_for_leader(0/*major_snapshot*/, false/*is_tombstone*/, unused_medium_clog_submitted))) { if (OB_EAGAIN != tmp_ret) { LOG_WARN("failed to schedule medium", K(tmp_ret), K(ls_id), K(tablet_id)); } diff --git a/src/storage/compaction/ob_tenant_tablet_scheduler.h b/src/storage/compaction/ob_tenant_tablet_scheduler.h index 1c5fab0725..21aa6c800a 100644 --- a/src/storage/compaction/ob_tenant_tablet_scheduler.h +++ b/src/storage/compaction/ob_tenant_tablet_scheduler.h @@ -65,12 +65,14 @@ private: bool &need_fast_freeze); void check_tombstone_need_fast_freeze( const storage::ObTablet &tablet, + const ObTableQueuingModeCfg &queuing_cfg, memtable::ObMemtable &memtable, bool &need_fast_freeze); void try_update_tablet_threshold( const storage::ObTabletStatKey &key, const storage::ObMtStat &mt_stat, const int64_t memtable_create_timestamp, + const ObTableQueuingModeCfg &queuing_cfg, int64_t &adaptive_threshold); private: static const int64_t FAST_FREEZE_INTERVAL_US = 300 * 1000 * 1000L; //300s @@ -123,6 +125,28 @@ private: common::hash::ObHashMap tablet_id_map_; // tablet is used for transfer of medium compaction }; +struct ObTenantTabletMediumParam +{ +public: + explicit ObTenantTabletMediumParam(const int64_t &merge_version, bool is_tombstone = false) + : merge_version_(merge_version), + is_tombstone_(is_tombstone), + is_leader_(false), + could_major_merge_(false), + enable_adaptive_compaction_(false) + {} + ~ObTenantTabletMediumParam() = default; + TO_STRING_KV(K_(merge_version), K_(is_tombstone), K_(is_leader), K_(could_major_merge), K_(enable_adaptive_compaction)); +public: + const int64_t merge_version_; + bool is_tombstone_; // tombstone scene after mini + bool is_leader_; + bool could_major_merge_; + bool enable_adaptive_compaction_; +private: + DISALLOW_COPY_AND_ASSIGN(ObTenantTabletMediumParam); +}; + class ObTenantTabletScheduler { public: @@ -179,6 +203,7 @@ public: int64_t get_bf_queue_size() const { return bf_queue_.task_count(); } int schedule_merge(const int64_t broadcast_version); int update_upper_trans_version_and_gc_sstable(); + int try_update_upper_trans_version_and_gc_sstable(ObLS &ls, ObCompactionScheduleIterator &iter); int check_ls_compaction_finish(const share::ObLSID &ls_id); int schedule_all_tablets_minor(); @@ -208,6 +233,10 @@ public: ObLSHandle &ls_handle, ObTabletHandle &tablet_handle, bool &has_created_dag); + static int schedule_tablet_medium_merge( + ObLSHandle &ls_handle, + const ObTabletID &tablet_id, + bool &succ_create_dag); template static int schedule_merge_execute_dag( const compaction::ObTabletMergeDagParam ¶m, @@ -229,7 +258,22 @@ public: ObTabletHandle &tablet_handle); int get_min_dependent_schema_version(int64_t &min_schema_version); - + int prepare_ls_medium_merge( + ObLS &ls, + ObTenantTabletMediumParam ¶m, + bool &all_ls_weak_read_ts_ready); + int try_schedule_tablet_medium( + ObLS &ls, + const share::ObLSID &ls_id, + ObTabletHandle &tablet_handle, + const share::SCN &weak_read_ts, + ObTenantTabletMediumParam ¶m, + const bool scheduler_called, + bool &tablet_merge_finish, + bool &medium_clog_submitted, + bool &succ_create_dag, + ObTabletSchedulePair &schedule_pair, + ObCompactionTimeGuard &time_guard); int try_schedule_tablet_medium_merge( const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, @@ -255,14 +299,13 @@ private: OB_INLINE int schedule_tablet_medium( ObLS &ls, ObTabletHandle &tablet_handle, - const int64_t major_frozen_scn, const share::SCN &weak_read_ts, - const bool could_major_merge, + ObTenantTabletMediumParam ¶m, const bool tablet_could_schedule_medium, - const int64_t merge_version, - const bool enable_adaptive_compaction, - bool &is_leader, + const bool scheduler_called, bool &tablet_merge_finish, + bool &medium_clog_submitted, + bool &succ_create_dag, ObTabletSchedulePair &schedule_pair, ObCompactionTimeGuard &time_guard); int after_schedule_tenant_medium( diff --git a/src/storage/ls/ob_ls.cpp b/src/storage/ls/ob_ls.cpp index 0e37d01b4f..32f75f5e58 100755 --- a/src/storage/ls/ob_ls.cpp +++ b/src/storage/ls/ob_ls.cpp @@ -1671,6 +1671,32 @@ int ObLS::build_new_tablet_from_mds_table( return ret; } +int ObLS::check_ls_migration_status( + bool &ls_is_migration, + int64_t &rebuild_seq) +{ + int ret = OB_SUCCESS; + RDLockGuard guard(meta_rwlock_); + ls_is_migration = false; + rebuild_seq = 0; + ObMigrationStatus migration_status; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ls is not inited", K(ret)); + } else if (OB_UNLIKELY(is_stopped())) { + ret = OB_NOT_RUNNING; + LOG_WARN("ls stopped", K(ret), K_(ls_meta)); + } else if (OB_FAIL(ls_meta_.get_migration_status(migration_status))) { + LOG_WARN("failed to get migration status", K(ret), KPC(this)); + } else if (ObMigrationStatus::OB_MIGRATION_STATUS_NONE != migration_status) { + //no need update upper trans version + ls_is_migration = true; + } else { + rebuild_seq = get_rebuild_seq(); + } + return ret; +} + int ObLS::finish_slog_replay() { int ret = OB_SUCCESS; @@ -2254,83 +2280,6 @@ int ObLS::disable_replay_without_lock() return ret; } -int ObLS::try_update_upper_trans_version_and_gc_sstable( - compaction::ObCompactionScheduleIterator &iter) -{ - int ret = OB_SUCCESS; - int tmp_ret = OB_SUCCESS; - RDLockGuard guard(meta_rwlock_); - bool update_upper_trans_version = true; - const share::ObLSID &ls_id = get_ls_id(); - ObMigrationStatus migration_status; - - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ls is not inited", K(ret)); - } else if (OB_UNLIKELY(is_stopped())) { - ret = OB_NOT_RUNNING; - LOG_WARN("ls stopped", K(ret), K_(ls_meta)); - } else if (OB_FAIL(ls_meta_.get_migration_status(migration_status))) { - LOG_WARN("failed to get migration status", K(ret), KPC(this)); - } else if (ObMigrationStatus::OB_MIGRATION_STATUS_NONE != migration_status) { - update_upper_trans_version = false; - } - - ObTabletHandle tablet_handle; - ObTablet *tablet = nullptr; - common::ObTabletID tablet_id; - while (OB_SUCC(ret)) { - if (OB_FAIL(iter.get_next_tablet(tablet_handle))) { - if (OB_ITER_END == ret) { - ret = OB_SUCCESS; - break; - } else { - LOG_WARN("failed to get tablet", K(ret), K(tablet_handle)); - } - } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid tablet handle", K(ret), K(tablet_handle)); - } else if (FALSE_IT(tablet = tablet_handle.get_obj())) { - } else if (FALSE_IT(tablet_id = tablet->get_tablet_meta().tablet_id_)) { - } else { - // 1. try to update upper trans version - bool is_updated = false; - if (!update_upper_trans_version || !tablet->get_tablet_meta().ha_status_.is_data_status_complete()) { - // no need to update upper trans version - } else if (OB_TMP_FAIL(tablet_handle.get_obj()->update_upper_trans_version(*this, is_updated))) { - LOG_WARN("failed to update upper trans version", K(tmp_ret), K(ls_id), K(tablet_id), KPC(tablet)); - } - - // 2. try to gc sstable - ObStorageSnapshotInfo snapshot_info; - bool need_remove = false; - if (tablet_id.is_special_merge_tablet()) { - // no need to gc sstable for special tablet - } else if (OB_TMP_FAIL(tablet->get_kept_snapshot_info(get_min_reserved_snapshot(), snapshot_info))) { - LOG_WARN("failed to get multi version start", K(tmp_ret), K(tablet_id)); - } else if (OB_TMP_FAIL(tablet->check_need_remove_old_table(snapshot_info.snapshot_, need_remove))) { - LOG_WARN("failed to check need remove old store", K(tmp_ret), K(snapshot_info), K(tablet_id)); - } else if (need_remove) { - ObArenaAllocator tmp_arena("RmOldTblTmp", OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID()); - ObStorageSchema *storage_schema = nullptr; - if (OB_TMP_FAIL(tablet->load_storage_schema(tmp_arena, storage_schema))) { - LOG_WARN("failed to load storage schema", K(tmp_ret), K(tablet)); - } else { - ObUpdateTableStoreParam param(tablet->get_snapshot_version(), snapshot_info.snapshot_, storage_schema, get_rebuild_seq()); - ObTabletHandle new_tablet_handle; // no use here - if (OB_TMP_FAIL(update_tablet_table_store_without_lock_(tablet_id, param, new_tablet_handle))) { - LOG_WARN("failed to update table store", K(tmp_ret), K(param), K(ls_id), K(tablet_id)); - } else { - FLOG_INFO("success to remove old table in table store", K(tmp_ret), K(ls_id), - K(tablet_id), K(snapshot_info), KPC(tablet)); - } - } - ObTabletObjLoadHelper::free(tmp_arena, storage_schema); - } - } - } // end while - return ret; -} int ObLS::update_ls_meta(const bool update_restore_status, const ObLSMeta &src_ls_meta) diff --git a/src/storage/ls/ob_ls.h b/src/storage/ls/ob_ls.h index 617d907920..7444684900 100644 --- a/src/storage/ls/ob_ls.h +++ b/src/storage/ls/ob_ls.h @@ -900,6 +900,8 @@ public: DELEGATE_WITH_RET(tx_table_, get_tx_table_guard, int); DELEGATE_WITH_RET(tx_table_, get_upper_trans_version_before_given_scn, int); DELEGATE_WITH_RET(tx_table_, generate_virtual_tx_data_row, int); + DELEGATE_WITH_RET(tx_table_, get_uncommitted_tx_min_start_scn, int); + DELEGATE_WITH_RET(tx_table_, update_min_start_scn_info, void); DELEGATE_WITH_RET(tx_table_, dump_single_tx_data_2_text, int); // ObCheckpointExecutor interface: @@ -930,14 +932,14 @@ public: int build_ha_tablet_new_table_store( const ObTabletID &tablet_id, const ObBatchUpdateTableStoreParam ¶m); - int try_update_upper_trans_version_and_gc_sstable( - compaction::ObCompactionScheduleIterator &iter); int build_new_tablet_from_mds_table( const int64_t ls_rebuild_seq, const common::ObTabletID &tablet_id, const int64_t mds_construct_sequence, const share::SCN &flush_scn); - int try_update_uppder_trans_version(); + int check_ls_migration_status( + bool &ls_is_migration, + int64_t &rebuild_seq); int diagnose(DiagnoseInfo &info) const; DELEGATE_WITH_RET(reserved_snapshot_mgr_, replay_reserved_snapshot_log, int); diff --git a/src/storage/ls/ob_ls_transfer_status.cpp b/src/storage/ls/ob_ls_transfer_status.cpp index 8de06ef55a..95df82e601 100644 --- a/src/storage/ls/ob_ls_transfer_status.cpp +++ b/src/storage/ls/ob_ls_transfer_status.cpp @@ -234,19 +234,18 @@ int ObLSTransferStatus::enable_upper_trans_calculation_(const share::SCN op_scn) { int ret = OB_SUCCESS; ObTxTableGuard guard; - ObTxDataTable *tx_data_table = nullptr; + ObTxTable *tx_table = nullptr; if (OB_FAIL(ls_->get_tx_table_guard(guard))) { TRANS_LOG(WARN, "failed to get tx table", K(ret)); } else if (OB_UNLIKELY(!guard.is_valid())) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "tx table guard is invalid", K(ret), K(guard)); - } else if (OB_ISNULL(tx_data_table = - guard.get_tx_table()->get_tx_data_table())) { + } else if (OB_ISNULL(tx_table = guard.get_tx_table())) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "tx data table in tx table is nullptr.", K(ret)); } else { - tx_data_table->enable_upper_trans_calculation(op_scn); + tx_table->enable_upper_trans_calculation(op_scn); TRANS_LOG(INFO, "enable upper trans calculation", KPC(ls_), K(guard), KPC(this)); } @@ -257,23 +256,23 @@ int ObLSTransferStatus::disable_upper_trans_calculation_() { int ret = OB_SUCCESS; ObTxTableGuard guard; - ObTxDataTable *tx_data_table = nullptr; + ObTxTable *tx_table = nullptr; if (OB_FAIL(ls_->get_tx_table_guard(guard))) { TRANS_LOG(WARN, "failed to get tx table", K(ret)); } else if (OB_UNLIKELY(!guard.is_valid())) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "tx table guard is invalid", K(ret), K(guard)); - } else if (OB_ISNULL(tx_data_table = - guard.get_tx_table()->get_tx_data_table())) { + } else if (OB_ISNULL(tx_table = guard.get_tx_table())) { ret = OB_ERR_UNEXPECTED; - TRANS_LOG(WARN, "tx data table in tx table is nullptr.", K(ret)); + TRANS_LOG(WARN, "tx table is nullptr.", K(ret)); } else { - tx_data_table->disable_upper_trans_calculation(); + (void)tx_table->disable_upper_trans_calculation(); TRANS_LOG(INFO, "disable upper trans calculation", KPC(ls_), K(guard), KPC(this)); } return ret; } -} -} + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/ob_gc_upper_trans_helper.cpp b/src/storage/ob_gc_upper_trans_helper.cpp new file mode 100644 index 0000000000..c2fe6178f0 --- /dev/null +++ b/src/storage/ob_gc_upper_trans_helper.cpp @@ -0,0 +1,121 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "storage/ob_gc_upper_trans_helper.h" +#define USING_LOG_PREFIX STORAGE + +namespace oceanbase +{ +namespace storage +{ + +int ObGCUpperTransHelper::try_get_sstable_upper_trans_version( + ObLS &ls, + const blocksstable::ObSSTable &sstable, + int64_t &new_upper_trans_version) +{ + int ret = OB_SUCCESS; + const ObLSID &ls_id = ls.get_ls_id(); + const ObTabletID &tablet_id = sstable.get_key().get_tablet_id(); + new_upper_trans_version = INT64_MAX; + if (INT64_MAX == sstable.get_upper_trans_version()) { + int64_t max_trans_version = INT64_MAX; + SCN tmp_scn = SCN::max_scn(); + if (OB_FAIL(ls.get_upper_trans_version_before_given_scn(sstable.get_end_scn(), tmp_scn))) { + LOG_WARN("failed to get upper trans version before given log ts", K(ret), K(sstable)); + } else if (FALSE_IT(max_trans_version = tmp_scn.get_val_for_tx())) { + } else if (0 == max_trans_version) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("max trans version should not be 0", K(sstable)); + } else if (INT64_MAX != max_trans_version) { + new_upper_trans_version = max_trans_version; + FLOG_INFO("success to get new upper trans version", K(ret), K(ls_id), K(tablet_id), K(max_trans_version), K(sstable)); + } else { + LOG_TRACE("can not get upper trans version", K(ret), K(ls_id), K(tablet_id)); + } + } + return ret; +} + +int ObGCUpperTransHelper::check_need_gc_or_update_upper_trans_version( + ObLS &ls, + const ObTablet &tablet, + int64_t &multi_version_start, + UpdateUpperTransParam &upper_trans_param, + bool &need_update) +{ + int ret = OB_SUCCESS; + const ObLSID &ls_id = ls.get_ls_meta().ls_id_; + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + bool is_paused = false; // TODO(DanLing) get is_paused + need_update = false; + ObTabletMemberWrapper table_store_wrapper; + ObStorageSnapshotInfo snapshot_info; + ObIArray *new_upper_trans = upper_trans_param.new_upper_trans_; + if (OB_UNLIKELY(new_upper_trans == nullptr || !new_upper_trans->empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("new_upper_trans is nullpr or empty", K(ret), K(ls_id), K(tablet_id), K(new_upper_trans)); + } else if (is_paused) { + ret = OB_EAGAIN; + LOG_INFO("paused, cannot update trans version now", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_FAIL(tablet.fetch_table_store(table_store_wrapper))) { + LOG_WARN("fail to fetch table store", K(ret)); + } else if (tablet.get_tablet_meta().ha_status_.is_data_status_complete()) { + ObITable *table = nullptr; + ObSSTable *sstable = nullptr; + int64_t new_upper_trans_version = INT64_MAX; + ObTableStoreIterator iter(false/*is_reverse*/, true/*need_load_sstable*/); + if (OB_FAIL(table_store_wrapper.get_member()->get_mini_minor_sstables(iter))) { + LOG_WARN("fail to get mini minor sstable", K(ret), K(table_store_wrapper)); + } + while (OB_SUCC(ret) && OB_SUCC(iter.get_next(table))) { + if (OB_ISNULL(table) || OB_UNLIKELY(!table->is_sstable())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, table is nullptr", K(ret), KPC(table)); + } else if (FALSE_IT(sstable = reinterpret_cast(table))) { + } else if (FALSE_IT(upper_trans_param.last_minor_end_scn_ = sstable->get_end_scn())) { + } else if (FALSE_IT(new_upper_trans_version = sstable->get_upper_trans_version())) { + } else if (INT64_MAX != new_upper_trans_version) { + if (OB_FAIL(new_upper_trans->push_back(new_upper_trans_version))) { + LOG_WARN("failed to push back new upper_trans_version", K(ret), K(new_upper_trans_version), KPC(sstable)); + } + } else if (OB_FAIL(try_get_sstable_upper_trans_version(ls, *sstable, new_upper_trans_version))) { + LOG_WARN("failed to update upper trans version", K(ret), KPC(sstable)); + } else { + need_update = need_update || (INT64_MAX != new_upper_trans_version); + if (OB_FAIL(new_upper_trans->push_back(new_upper_trans_version))) { + LOG_WARN("failed to push back new upper_trans_version", K(ret), K(new_upper_trans_version), KPC(sstable)); + } + } + } + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + } + + if (OB_FAIL(ret)) { + } else if (OB_FAIL(tablet.get_kept_snapshot_info(ls.get_min_reserved_snapshot(), snapshot_info))) { + LOG_WARN("failed to get multi version start", K(ret), K(tablet_id)); + } else if (FALSE_IT(multi_version_start = snapshot_info.snapshot_)) { + } else if (need_update) { + // need to update table store so skip checking gc status + } else if (OB_FAIL(table_store_wrapper.get_member()->need_remove_old_table(multi_version_start, need_update))) { + LOG_WARN("failed to check need rebuild table store", K(ret), K(multi_version_start)); + } else { + upper_trans_param.reset(); // no need to update upper trans version + } + + return ret; +} + +} // namespace storage +} // namespace oceanbase \ No newline at end of file diff --git a/src/storage/ob_gc_upper_trans_helper.h b/src/storage/ob_gc_upper_trans_helper.h new file mode 100644 index 0000000000..224de75041 --- /dev/null +++ b/src/storage/ob_gc_upper_trans_helper.h @@ -0,0 +1,43 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + + #ifndef OCEANBASE_STORAGE_OB_GC_UPPER_TRANS_HELPER + #define OCEANBASE_STORAGE_OB_GC_UPPER_TRANS_HELPER + +#include "storage/ls/ob_ls.h" +#include "storage/tablet/ob_tablet.h" + +namespace oceanbase +{ +namespace storage +{ + +class ObGCUpperTransHelper +{ +public: +static int try_get_sstable_upper_trans_version( + ObLS &ls, + const blocksstable::ObSSTable &sstable, + int64_t &new_upper_trans_version); + +static int check_need_gc_or_update_upper_trans_version( + ObLS &ls, + const ObTablet &tablet, + int64_t &multi_version_start, + UpdateUpperTransParam &upper_trans_param, + bool &need_update); +}; + +} // namespace storage +} // namespace oceanbase + + #endif \ No newline at end of file diff --git a/src/storage/ob_storage_struct.cpp b/src/storage/ob_storage_struct.cpp index 0b50a1bf80..8aa4d8a860 100644 --- a/src/storage/ob_storage_struct.cpp +++ b/src/storage/ob_storage_struct.cpp @@ -339,6 +339,23 @@ bool ObDDLTableStoreParam::is_valid() const && data_format_version_ >= 0; } +UpdateUpperTransParam::UpdateUpperTransParam() + : new_upper_trans_(nullptr), + last_minor_end_scn_() +{ + last_minor_end_scn_.set_min(); +} + +UpdateUpperTransParam::~UpdateUpperTransParam() +{ + reset(); +} + +void UpdateUpperTransParam::reset() +{ + new_upper_trans_ = nullptr; + last_minor_end_scn_.set_min(); +} ObUpdateTableStoreParam::ObUpdateTableStoreParam( const int64_t snapshot_version, @@ -358,7 +375,33 @@ ObUpdateTableStoreParam::ObUpdateTableStoreParam( allow_duplicate_sstable_(false), need_check_transfer_seq_(false), transfer_seq_(-1), - merge_type_(MERGE_TYPE_MAX) + merge_type_(MERGE_TYPE_MAX), + upper_trans_param_() +{ + clog_checkpoint_scn_.set_min(); +} + +ObUpdateTableStoreParam::ObUpdateTableStoreParam( + const int64_t snapshot_version, + const int64_t multi_version_start, + const ObStorageSchema *storage_schema, + const int64_t rebuild_seq, + const UpdateUpperTransParam upper_trans_param) + : sstable_(nullptr), + snapshot_version_(snapshot_version), + clog_checkpoint_scn_(), + multi_version_start_(multi_version_start), + need_report_(false), + storage_schema_(storage_schema), + rebuild_seq_(rebuild_seq), + update_with_major_flag_(false), + need_check_sstable_(false), + ddl_info_(), + allow_duplicate_sstable_(false), + need_check_transfer_seq_(false), + transfer_seq_(-1), + merge_type_(MERGE_TYPE_MAX), + upper_trans_param_(upper_trans_param) { clog_checkpoint_scn_.set_min(); } @@ -389,7 +432,8 @@ ObUpdateTableStoreParam::ObUpdateTableStoreParam( allow_duplicate_sstable_(allow_duplicate_sstable), need_check_transfer_seq_(need_check_transfer_seq), transfer_seq_(transfer_seq), - merge_type_(merge_type) + merge_type_(merge_type), + upper_trans_param_() { clog_checkpoint_scn_ = clog_checkpoint_scn; } @@ -416,7 +460,8 @@ ObUpdateTableStoreParam::ObUpdateTableStoreParam( allow_duplicate_sstable_(false), need_check_transfer_seq_(false), transfer_seq_(-1), - merge_type_(merge_type) + merge_type_(merge_type), + upper_trans_param_() { clog_checkpoint_scn_.set_min(); } diff --git a/src/storage/ob_storage_struct.h b/src/storage/ob_storage_struct.h index f18cff8ab6..a1623d2eeb 100644 --- a/src/storage/ob_storage_struct.h +++ b/src/storage/ob_storage_struct.h @@ -343,6 +343,18 @@ public: int64_t data_format_version_; }; +struct UpdateUpperTransParam final +{ +public: + UpdateUpperTransParam(); + ~UpdateUpperTransParam(); + void reset(); + TO_STRING_KV(K_(new_upper_trans), K_(last_minor_end_scn)); +public: + ObIArray *new_upper_trans_; + share::SCN last_minor_end_scn_; +}; + struct ObUpdateTableStoreParam { ObUpdateTableStoreParam() = default; @@ -351,6 +363,12 @@ struct ObUpdateTableStoreParam const int64_t multi_version_start, const ObStorageSchema *storage_schema, const int64_t rebuild_seq); + ObUpdateTableStoreParam( + const int64_t snapshot_version, + const int64_t multi_version_start, + const ObStorageSchema *storage_schema, + const int64_t rebuild_seq, + const UpdateUpperTransParam upper_trans_param); ObUpdateTableStoreParam( const blocksstable::ObSSTable *sstable, const int64_t snapshot_version, @@ -380,7 +398,7 @@ struct ObUpdateTableStoreParam K_(need_report), KPC_(storage_schema), K_(rebuild_seq), K_(update_with_major_flag), K_(need_check_sstable), K_(ddl_info), K_(allow_duplicate_sstable), "merge_type", merge_type_to_str(merge_type_), - K_(need_check_transfer_seq), K_(transfer_seq)); + K_(need_check_transfer_seq), K_(transfer_seq), K_(upper_trans_param)); const blocksstable::ObSSTable *sstable_; int64_t snapshot_version_; @@ -396,6 +414,7 @@ struct ObUpdateTableStoreParam bool need_check_transfer_seq_; int64_t transfer_seq_; compaction::ObMergeType merge_type_; // set merge_type only when update tablet in compaction + UpdateUpperTransParam upper_trans_param_; // set upper_trans_param_ only when update upper_trans_version }; struct ObBatchUpdateTableStoreParam final diff --git a/src/storage/ob_tenant_tablet_stat_mgr.cpp b/src/storage/ob_tenant_tablet_stat_mgr.cpp index e10bee5deb..ebd408aaa1 100644 --- a/src/storage/ob_tenant_tablet_stat_mgr.cpp +++ b/src/storage/ob_tenant_tablet_stat_mgr.cpp @@ -15,9 +15,12 @@ #include "lib/oblog/ob_log_module.h" #include "share/ob_force_print_log.h" #include "share/ob_thread_mgr.h" +#include "share/schema/ob_multi_version_schema_service.h" +#include "share/schema/ob_tenant_schema_service.h" #include "storage/ob_tenant_tablet_stat_mgr.h" #include "storage/access/ob_global_iterator_pool.h" #include "observer/ob_server_struct.h" +#include "src/storage/tablet/ob_tablet.h" #include "observer/ob_server.h" #include @@ -188,6 +191,34 @@ ObTabletStat& ObTabletStat::archive(int64_t factor) return *this; } +/************************************* ObTableQueuingModeCfg *************************************/ +const ObTableQueuingModeCfg& ObTableQueuingModeCfg::get_basic_config(const QueuingMode mode) +{ + static const ObTableQueuingModeCfg QUEUING_MODE_CFG[static_cast(QueuingMode::TABLE_MODE_MAX) + 1] = { + ObTableQueuingModeCfg(), // MODE_NORMAL 30 * 10000, 1.0 TABLE_MODE_NORMAL + ObTableQueuingModeCfg(TABLE_MODE_QUEUING, 20 * 10000, 0.9), // TABLE_MODE_QUEUING + ObTableQueuingModeCfg(), // MODE_NORMAL 30 * 10000, 1.0 TABLE_MODE_NORMAL + ObTableQueuingModeCfg(TABLE_MODE_QUEUING_MODERATE, 10 * 10000, 0.8), // TABLE_MODE_QUEUING_MODERATE + ObTableQueuingModeCfg(TABLE_MODE_QUEUING_SUPER, 5 * 10000, 0.6), // TABLE_MODE_QUEUING_SUPER + ObTableQueuingModeCfg(TABLE_MODE_QUEUING_EXTREME, 1000, 0.5), // TABLE_MODE_QUEUING_EXTREME + ObTableQueuingModeCfg(), // MODE_NORMAL 30 * 10000, 1.0 TABLE_MODE_NORMAL + }; + // NOTE: If update ObTableModeFlag, please also update QUEUING_MODE_CFG, otherwise complie/static assert error + STATIC_ASSERT((static_cast(QueuingMode::TABLE_MODE_MAX) + 1) == ARRAYSIZEOF(QUEUING_MODE_CFG), "table mode cnt mismatch"); + return QUEUING_MODE_CFG[mode]; +} + +int64_t ObTableQueuingModeCfg::get_memtable_alive_threshold(const int64_t fast_freeze_interval) const +{ + // avoid master bmsql performance degradation 5% for more frequently fast freeze, see bug/55012541 + const int64_t base_fast_freeze_interval = is_queuing_mode() ? (fast_freeze_interval * 0.4) : fast_freeze_interval; + return base_fast_freeze_interval * queuing_factor_; +} + +int64_t ObTableQueuingModeCfg::get_tombstone_row_threshold(const int64_t tombstone_default_cnt) const +{ + return tombstone_default_cnt * queuing_factor_; +} /************************************* ObTabletStatAnalyzer *************************************/ ObTabletStatAnalyzer::ObTabletStatAnalyzer() @@ -247,6 +278,15 @@ bool ObTabletStatAnalyzer::has_slow_query() const return bret; } +bool ObTabletStatAnalyzer::has_accumnulated_delete() const +{ + bool bret = false; + if (is_queuing_table_mode(mode_)) { + const ObTableQueuingModeCfg &queuing_cfg = ObTableQueuingModeCfg::get_basic_config(mode_); + bret = total_tablet_stat_.delete_row_cnt_ >= queuing_cfg.total_delete_row_cnt_; + } + return bret; +} /************************************* ObTenantSysStat *************************************/ ObTenantSysStat::ObTenantSysStat() @@ -294,6 +334,7 @@ int ObTenantSysStat::refresh(const uint64_t tenant_id) /************************************* ObTabletStream *************************************/ ObTabletStream::ObTabletStream() : key_(), + total_stat_(), curr_buckets_(CURR_BUCKET_STEP), latest_buckets_(LATEST_BUCKET_STEP), past_buckets_(PAST_BUCKET_STEP) @@ -302,11 +343,18 @@ ObTabletStream::ObTabletStream() ObTabletStream::~ObTabletStream() { + reset(); } void ObTabletStream::reset() { key_.reset(); + clear_stat(); +} + +void ObTabletStream::clear_stat() +{ + total_stat_.reset(); curr_buckets_.reset(); latest_buckets_.reset(); past_buckets_.reset(); @@ -321,6 +369,7 @@ void ObTabletStream::add_stat(const ObTabletStat &stat) if (key_.ls_id_.id() == stat.ls_id_ && key_.tablet_id_.id() == stat.tablet_id_) { curr_buckets_.add(stat); + total_stat_ += stat; } } @@ -715,7 +764,9 @@ int ObTenantTabletStatMgr::report_stat( int ObTenantTabletStatMgr::get_latest_tablet_stat( const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, - ObTabletStat &tablet_stat) + ObTabletStat &tablet_stat, + ObTabletStat &total_tablet_stat, + share::schema::ObTableModeFlag &mode) { int ret = OB_SUCCESS; tablet_stat.reset(); @@ -738,6 +789,8 @@ int ObTenantTabletStatMgr::get_latest_tablet_stat( } } else { stream_node->stream_.get_latest_stat(tablet_stat); + total_tablet_stat = stream_node->stream_.get_total_stats(); + mode = stream_node->mode_; } } return ret; @@ -758,14 +811,13 @@ int ObTenantTabletStatMgr::clear_tablet_stat( LOG_WARN("get invalid arguments", K(ret), K(ls_id), K(tablet_id)); } else { ObBucketHashWLockGuard lock_guard(bucket_lock_, key.hash()); - if (OB_FAIL(stream_map_.erase_refactored(key))) { - if (OB_HASH_NOT_EXIST == ret) { - ret = OB_SUCCESS; - } else { - LOG_WARN("failed to erase tablet stat", K(ret), K(key)); - } + if (OB_FAIL(inner_clear_tablet_stat(key))) { + LOG_WARN("failed to clear tablet stat", K(ret), K(key)); } } + if (OB_SUCC(ret)) { + FLOG_INFO("clear tablet stat", K(ret), K(ls_id), K(tablet_id)); + } return ret; } @@ -780,7 +832,11 @@ int ObTenantTabletStatMgr::get_all_tablet_stats( if (OB_NOT_NULL(cur_node = bucket_it->second)) { cur_stat.reset(); cur_node->stream_.get_latest_stat(cur_stat); - if (!cur_stat.is_valid()) { + if (is_queuing_table_mode(cur_node->mode_)) { + if (OB_FAIL(tablet_stats.push_back(cur_stat))) { + LOG_WARN("failed to add tablet stat", K(ret), K(cur_stat)); + } + } else if (!cur_stat.is_valid()) { } else if (0 == cur_stat.query_cnt_ && 0 == cur_stat.merge_cnt_) { // no tablet stat has been collected in the past 16 minutes. } else if (OB_FAIL(tablet_stats.push_back(cur_stat))) { @@ -826,13 +882,66 @@ int ObTenantTabletStatMgr::get_tablet_analyzer( { int ret = OB_SUCCESS; - if (OB_FAIL(get_latest_tablet_stat(ls_id, tablet_id, analyzer.tablet_stat_))) { + if (OB_FAIL(get_latest_tablet_stat(ls_id, tablet_id, analyzer.tablet_stat_, analyzer.total_tablet_stat_, analyzer.mode_))) { if (OB_HASH_NOT_EXIST != ret) { LOG_WARN("failed to get latest tablet stat", K(ret), K(ls_id), K(tablet_id)); } } else { + const ObTableQueuingModeCfg &queuing_cfg = ObTableQueuingModeCfg::get_basic_config(analyzer.mode_); analyzer.is_small_tenant_ = sys_stat_.is_small_tenant(); - analyzer.boost_factor_ = analyzer.is_small_tenant_ ? 2 : 1; + analyzer.boost_factor_ = (analyzer.is_small_tenant_ ? 2 : 1) * queuing_cfg.queuing_factor_; + } + return ret; +} + +int ObTenantTabletStatMgr::inner_clear_tablet_stat(const ObTabletStatKey &key) +{ + int ret = OB_SUCCESS; + ObTabletStreamNode *stream_node = nullptr; + if (OB_FAIL(stream_map_.get_refactored(key, stream_node))) { + if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get stream node", K(ret)); + } + } else { + // clear statistics but remain node, otherwise table mode of queuing table will be reset + stream_node->clear_stat(); + } + return ret; +} + +int ObTenantTabletStatMgr::batch_clear_tablet_stat( + const share::ObLSID &ls_id, + const ObIArray &tablet_ids) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + int64_t tablet_cnt = tablet_ids.count(); + int64_t clear_cnt = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTenantTabletStatMgr not inited", K(ret)); + } else if (OB_UNLIKELY(tablet_ids.empty())) { + LOG_TRACE("tablet_ids empty, no need to clear"); + } else { + ObTabletStatKey key; + key.ls_id_ = ls_id; + ObBucketWLockAllGuard lock_guard(bucket_lock_); + for (int64_t idx = 0; idx < tablet_cnt; idx++) { + key.tablet_id_ = tablet_ids.at(idx); + if (OB_UNLIKELY(!key.is_valid())) { + tmp_ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid tablet id", K(tmp_ret), K(key)); + } else if (OB_TMP_FAIL(inner_clear_tablet_stat(key))) { + LOG_WARN("failed to clear tablet stat", K(tmp_ret), K(key)); + } else { + clear_cnt++; + } + } + } + if (OB_SUCC(ret)) { + FLOG_INFO("batch clear tablet stat in ls", K(ret), K(ls_id), K(tablet_cnt), K(clear_cnt)); } return ret; } @@ -950,6 +1059,137 @@ void ObTenantTabletStatMgr::refresh_sys_stat() load_shedder_.refresh_sys_load(); } +void ObTenantTabletStatMgr::refresh_queuing_mode() +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + uint64_t compat_version = 0; + int64_t cost_time = common::ObTimeUtility::current_time(); + int64_t stream_cnt = 0; + int64_t update_schema_cnt = 0; + int64_t tenant_schema_version = OB_INVALID_VERSION; + const int64_t tenant_id = MTL_ID(); + ObMultiVersionSchemaService *schema_service = MTL(ObTenantSchemaService *)->get_schema_service(); + ObSchemaGetterGuard schema_guard; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTenantTabletStatMgr is not inited", K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { + LOG_WARN("failed to get data version", K(ret)); + } else if (not_compat_for_queuing_mode(compat_version)) { + if (REACH_TENANT_TIME_INTERVAL(30 * 1000L * 1000L/*30s*/)) { + LOG_INFO("compat_version not support buffer table mode, no need to refresh queuing mode", K(compat_version)); + } + } else if (OB_ISNULL(schema_service)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get get schema service", K(ret)); + } else if (OB_FAIL(schema_service->get_tenant_schema_guard(tenant_id, schema_guard))) { + LOG_WARN("failed to get schema guard", K(ret), K(tenant_id)); + } else if (OB_FAIL(schema_guard.get_schema_version(tenant_id, tenant_schema_version))) { + LOG_WARN("failed to get tenant schema version", K(ret), K(tenant_id)); + } else { + ObBucketWLockAllGuard lock_guard(bucket_lock_); + stream_cnt = stream_map_.size(); + if (stream_cnt > 0) { + ObSEArray tablet_ids; + ObSEArray table_ids; + tablet_ids.reserve(stream_cnt); + table_ids.reserve(stream_cnt); + common::hash::ObHashMap table_mode_map; + TabletStreamMap::iterator iter = stream_map_.begin(); + for ( ; iter != stream_map_.end() && OB_SUCC(ret); ++iter) { + if (OB_FAIL(tablet_ids.push_back(iter->first.tablet_id_))) { + LOG_WARN("failed to push back tablet id", K(ret)); + } + } + + // TODO(chengkong): basical implement, can optimize it + if (FAILEDx(schema_service->get_tablet_to_table_history(tenant_id, tablet_ids, tenant_schema_version, table_ids))) { + LOG_WARN("failed to get table ids according to tablet ids", K(ret), K(tenant_id), K(tenant_schema_version)); + } else if (OB_UNLIKELY(tablet_ids.count() != stream_cnt || table_ids.count() != stream_cnt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected tablet ids or table ids", K(ret), K(tablet_ids), K(table_ids)); + } else if (OB_FAIL(table_mode_map.create(DEFAULT_BUCKET_NUM, ObMemAttr(tenant_id, "TabStatModeMap")))) { + LOG_WARN("failed to init table_mode_map", K(ret)); + } else { + iter = stream_map_.begin(); + ObTabletStreamNode *stream_node = nullptr; + const ObSimpleTableSchemaV2 *table_schema = nullptr; + ObTableModeFlag tmp_mode_flag = TABLE_MODE_MAX; + for (int64_t idx = 0; idx < stream_cnt && iter != stream_map_.end() && OB_SUCC(ret); ++idx, ++iter) { + const ObTabletStatKey &key = iter->first; + stream_node = iter->second; + int64_t table_id = table_ids.at(idx); + if (OB_UNLIKELY(OB_INVALID_ID == table_id)) { + // TODO(chengkong): tablet id may be invalid in some cases like offline ddl or drop table. + LOG_WARN("failed to fetch table id from inner table, may be recycled or never exists, skip it", "tablet_id", tablet_ids.at(idx)); + } else if (OB_UNLIKELY(key.tablet_id_ != tablet_ids.at(idx))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("key mismatch with tablet id", K(ret), K(key), K(tablet_ids.at(idx))); + } else if (OB_FAIL(table_mode_map.get_refactored(table_id, tmp_mode_flag))) { + if (OB_HASH_NOT_EXIST == ret) { + if (OB_FAIL(schema_guard.get_simple_table_schema(tenant_id, table_id, table_schema))) { + LOG_WARN("failed to get table schema", K(ret), K(tenant_id), K(table_id)); + } else if (OB_ISNULL(table_schema)) { + LOG_WARN("get nullptr table schema, skip this tablet", K(tenant_id), K(table_id)); + } else if (FALSE_IT(tmp_mode_flag = table_schema->get_table_mode_flag())) { + } else if (FALSE_IT(stream_node->mode_ = tmp_mode_flag)) { + } else if (FALSE_IT(update_schema_cnt++)) { + } else if (OB_TMP_FAIL(table_mode_map.set_refactored(table_id, tmp_mode_flag))) { + LOG_WARN("failed to set table mode, try set next round", K(tmp_ret), K(table_id), K(tmp_mode_flag)); + } + } else { + LOG_WARN("failed to get table mode from map", K(ret), K(table_id)); + } + } else { + stream_node->mode_ = tmp_mode_flag; + update_schema_cnt++; + } + // prevent hunging schema memory too long + if (OB_SUCC(ret) && (idx+1) % MAX_SCHEMA_GUARD_REFRESH_CNT == 0) { + schema_guard.reset(); + if (OB_FAIL(schema_service->get_tenant_schema_guard(tenant_id, schema_guard))) { + LOG_WARN("fail to get schema guard", K(ret), K(tenant_id)); + } + } + } + } + } + } + cost_time = common::ObTimeUtility::current_time() - cost_time; + LOG_INFO("refresh queuing mode", K(ret), K(tenant_id), K(stream_cnt), K(update_schema_cnt), K(cost_time)); +} + +int ObTenantTabletStatMgr::get_queuing_cfg( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + ObTableQueuingModeCfg& queuing_cfg) +{ + int ret = OB_SUCCESS; + const ObTabletStatKey key(ls_id, tablet_id); + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTenantTabletStatMgr not inited", K(ret)); + } else if (OB_UNLIKELY(!key.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid arguments", K(ret), K(ls_id), K(tablet_id)); + } else { + ObTabletStreamNode *stream_node = nullptr; + ObBucketHashRLockGuard lock_guard(bucket_lock_, key.hash()); + if (OB_FAIL(stream_map_.get_refactored(key, stream_node))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get history stat", K(ret), K(key)); + } else { + ret = OB_SUCCESS; + } + } else { + queuing_cfg = ObTableQueuingModeCfg::get_basic_config(stream_node->mode_); + LOG_DEBUG("chengkong debug: success get queuing cfg", K(ret), K(ls_id), K(tablet_id), K(queuing_cfg)); + } + } + return ret; +} + void ObTenantTabletStatMgr::TabletStatUpdater::runTimerTask() { mgr_.process_stats(); @@ -965,6 +1205,8 @@ void ObTenantTabletStatMgr::TabletStatUpdater::runTimerTask() LOG_WARN_RET(OB_ERR_UNEXPECTED, "tablet streams not refresh too long", K(interval_step)); } mgr_.refresh_all(interval_step); + mgr_.refresh_queuing_mode(); + last_update_time_ = ObTimeUtility::current_time(); FLOG_INFO("TenantTabletStatMgr refresh all tablet stream", K(MTL_ID()), K(interval_step), KPC(global_iter_pool)); } } diff --git a/src/storage/ob_tenant_tablet_stat_mgr.h b/src/storage/ob_tenant_tablet_stat_mgr.h index 06cf732671..1f97871ace 100644 --- a/src/storage/ob_tenant_tablet_stat_mgr.h +++ b/src/storage/ob_tenant_tablet_stat_mgr.h @@ -17,6 +17,7 @@ #include "share/ob_ls_id.h" #include "lib/hash/ob_hashmap.h" #include "share/rc/ob_tenant_base.h" +#include "share/schema/ob_table_schema.h" #include "lib/allocator/page_arena.h" #include "lib/allocator/ob_fifo_allocator.h" #include "lib/lock/ob_bucket_lock.h" @@ -105,6 +106,32 @@ public: uint64_t delete_row_cnt_; }; +class ObTableQueuingModeCfg +{ +public: + using QueuingMode = share::schema::ObTableModeFlag; + ObTableQueuingModeCfg() + :mode_(QueuingMode::TABLE_MODE_NORMAL), + total_delete_row_cnt_(30 * 10000), + queuing_factor_(1.0) + {} + ~ObTableQueuingModeCfg() = default; + static const ObTableQueuingModeCfg& get_basic_config(const QueuingMode mode); + int64_t get_memtable_alive_threshold(const int64_t fast_freeze_interval) const; + int64_t get_tombstone_row_threshold(const int64_t tombstone_default_cnt) const; + bool is_queuing_mode() const { return is_queuing_table_mode(mode_); } + TO_STRING_KV(K_(mode), K_(total_delete_row_cnt), K_(queuing_factor)); +private: + explicit ObTableQueuingModeCfg(QueuingMode mode, int64_t delete_cnt, double factor) + :mode_(mode), + total_delete_row_cnt_(delete_cnt), + queuing_factor_(factor) + {} +public: + QueuingMode mode_; + int64_t total_delete_row_cnt_; + double queuing_factor_; +}; struct ObTabletStatAnalyzer { @@ -115,7 +142,8 @@ public: bool is_insert_mostly() const; bool is_update_or_delete_mostly() const; bool has_slow_query() const; - TO_STRING_KV(K_(tablet_stat), K_(is_small_tenant), K_(boost_factor)); + bool has_accumnulated_delete() const; + TO_STRING_KV(K_(tablet_stat), K_(total_tablet_stat), K_(is_small_tenant), K_(boost_factor)); public: static constexpr int64_t ACCESS_FREQUENCY = 5; static constexpr int64_t BASE_FACTOR = 10; @@ -126,8 +154,10 @@ public: static constexpr int64_t QUERY_BASIC_ITER_TABLE_CNT = 5; static constexpr int64_t MERGE_BASIC_ROW_CNT = 10000; public: - ObTabletStat tablet_stat_; - int64_t boost_factor_; + ObTabletStat tablet_stat_; // tablet statistics recently + ObTabletStat total_tablet_stat_; // tablet statistics since last compaction + share::schema::ObTableModeFlag mode_; + double boost_factor_; bool is_small_tenant_; }; @@ -234,6 +264,7 @@ public: ObTabletStream(); virtual ~ObTabletStream(); void reset(); + void clear_stat(); void add_stat(const ObTabletStat &stat); void refresh(); @@ -242,6 +273,7 @@ public: const ObTabletStatBucket &bucket, common::ObIArray &tablet_stats) const; int get_all_tablet_stat(common::ObIArray &tablet_stats) const; + const ObTabletStat& get_total_stats() const { return total_stat_; } OB_INLINE ObTabletStatKey& get_tablet_stat_key() { return key_; } OB_INLINE void get_latest_stat(ObTabletStat &tablet_stat) const { curr_buckets_.get_tablet_stat(tablet_stat); } TO_STRING_KV(K_(key), K_(curr_buckets), K_(latest_buckets), K_(past_buckets)); @@ -255,6 +287,7 @@ private: static constexpr uint32_t PAST_BUCKET_STEP = 32; // 64min for each unit, total 256min ObTabletStatKey key_; + ObTabletStat total_stat_; ObTabletStatBucket curr_buckets_; ObTabletStatBucket latest_buckets_; ObTabletStatBucket past_buckets_; @@ -264,15 +297,18 @@ private: class ObTabletStreamNode : public ObDLinkBase { public: + using QueuingMode = share::schema::ObTableModeFlag; explicit ObTabletStreamNode(const int64_t flag = 0) - : stream_(), flag_(flag) {} + : stream_(), flag_(flag), mode_(QueuingMode::TABLE_MODE_NORMAL) {} ~ObTabletStreamNode() { reset(); } void reset() { stream_.reset(); } + void clear_stat() { stream_.clear_stat(); } TO_STRING_KV(K_(stream), K_(flag)); public: ObTabletStream stream_; const int64_t flag_; + QueuingMode mode_; }; @@ -361,7 +397,9 @@ public: int get_latest_tablet_stat( const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, - ObTabletStat &tablet_stat); + ObTabletStat &tablet_stat, + ObTabletStat &total_tablet_stat, + share::schema::ObTableModeFlag &mode); int get_history_tablet_stats( const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, @@ -375,8 +413,17 @@ public: int clear_tablet_stat( const share::ObLSID &ls_id, const common::ObTabletID &tablet_id); + int batch_clear_tablet_stat( + const share::ObLSID &ls_id, + const ObIArray &tablet_ids); void process_stats(); void refresh_all(const int64_t step); + void refresh_queuing_mode(); + int get_queuing_cfg( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + ObTableQueuingModeCfg& queuing_cfg); + int64_t get_last_update_time() { return report_stat_task_.last_update_time_; } bool is_high_tenant_cpu_load() const { return get_load_shedding_factor() >= ObTenantSysLoadShedder::DEFAULT_LOAD_SHEDDING_FACTOR; } int64_t get_load_shedding_factor() const { return load_shedder_.get_load_shedding_factor(); } void refresh_sys_stat(); @@ -384,9 +431,11 @@ private: class TabletStatUpdater : public common::ObTimerTask { public: - TabletStatUpdater(ObTenantTabletStatMgr &mgr) : mgr_(mgr) {} + TabletStatUpdater(ObTenantTabletStatMgr &mgr) : last_update_time_(0), mgr_(mgr) {} virtual ~TabletStatUpdater() {} virtual void runTimerTask(); + public: + int64_t last_update_time_; // for mittest private: ObTenantTabletStatMgr &mgr_; }; @@ -394,6 +443,7 @@ private: private: int update_tablet_stream(const ObTabletStat &report_stat); int fetch_node(ObTabletStreamNode *&node); + int inner_clear_tablet_stat(const ObTabletStatKey &key); // hold lock! private: typedef common::hash::ObHashMap &memtables, co return inner_get_memtables(memtables, need_active); } -int ObTablet::check_need_remove_old_table( - const int64_t multi_version_start, - bool &need_remove) const -{ - int ret = OB_SUCCESS; - ObTabletMemberWrapper table_store_wrapper; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("not inited", K(ret), K_(is_inited)); - } else if (OB_FAIL(fetch_table_store(table_store_wrapper))) { - LOG_WARN("fail to fetch table store", K(ret)); - } else if (OB_FAIL(table_store_wrapper.get_member()->need_remove_old_table( - multi_version_start, need_remove))) { - LOG_WARN("failed to check need rebuild table store", K(ret), K(multi_version_start)); - } - - return ret; -} - -int ObTablet::update_upper_trans_version(ObLS &ls, bool &is_updated) -{ - int ret = OB_SUCCESS; - const share::ObLSID &ls_id = tablet_meta_.ls_id_; - const common::ObTabletID &tablet_id = tablet_meta_.tablet_id_; - is_updated = false; - bool is_paused = false; - ObTabletMemberWrapper table_store_wrapper; - ObTableStoreIterator iter(false/*is_reverse*/, false/*need_load_sstable*/); - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("not inited", K(ret), K_(is_inited)); - } else if (FALSE_IT(is_paused = false)) { // TODO(DanLing) get is_paused - } else if (is_paused) { - LOG_INFO("paused, cannot update trans version now", K(ret), K(ls_id), K(tablet_id)); - } else if (OB_FAIL(fetch_table_store(table_store_wrapper))) { - LOG_WARN("fail to fetch table store", K(ret)); - } else if (OB_FAIL(table_store_wrapper.get_member()->get_mini_minor_sstables(iter))) { - LOG_WARN("fail to get mini minor sstable", K(ret), K(table_store_wrapper)); - } else { - ObITable *table = nullptr; - while (OB_SUCC(ret) && OB_SUCC(iter.get_next(table))) { - if (OB_ISNULL(table) || OB_UNLIKELY(!table->is_sstable())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected error, table is nullptr", K(ret), KPC(table)); - } else { - ObSSTable *sstable = reinterpret_cast(table); - if (INT64_MAX != sstable->get_upper_trans_version()) { - } else if (0 == sstable->get_data_macro_block_count() && INT64_MAX == sstable->get_max_merged_trans_version()) { - if (OB_FAIL(sstable->set_upper_trans_version(0, true/*force_update*/))) { - LOG_WARN("failed to force set upper trans version", K(ret), KPC(sstable)); - } else { - FLOG_INFO("sstable has no data but max merged version is INT64_MAX, force set upper trans version", K(ret), KPC(sstable)); - } - } else { - int64_t max_trans_version = INT64_MAX; - SCN tmp_scn = SCN::max_scn(); - if (OB_FAIL(ls.get_upper_trans_version_before_given_scn(sstable->get_end_scn(), tmp_scn))) { - LOG_WARN("failed to get upper trans version before given log ts", K(ret), KPC(sstable)); - } else if (FALSE_IT(max_trans_version = tmp_scn.get_val_for_tx())) { - } else if (0 == max_trans_version) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("max trans version should not be 0", KPC(sstable)); - } else if (INT64_MAX != max_trans_version) { - if (OB_UNLIKELY(0 == max_trans_version)) { - FLOG_INFO("get max_trans_version = 0, maybe all the trans have been rollbacked", K(ret), K(ls_id), K(tablet_id), - K(max_trans_version), KPC(sstable)); - } - if (OB_FAIL(sstable->set_upper_trans_version(max_trans_version, false/*force_update*/))) { - LOG_WARN("failed to set_upper_trans_version", K(ret), KPC(sstable)); - } else { - is_updated = true; - FLOG_INFO("success to update sstable's upper trans version", K(ret), K(ls_id), K(tablet_id), - K(max_trans_version), KPC(sstable)); - } - } - } - } - } - if (OB_ITER_END == ret) { - ret = OB_SUCCESS; - } - } - - return ret; -} - int ObTablet::update_row( ObRelativeTable &relative_table, storage::ObStoreCtx &store_ctx, diff --git a/src/storage/tablet/ob_tablet.h b/src/storage/tablet/ob_tablet.h index 97826c21fc..e2d8b4adeb 100644 --- a/src/storage/tablet/ob_tablet.h +++ b/src/storage/tablet/ob_tablet.h @@ -344,8 +344,6 @@ public: int get_tablet_size(const bool ignore_shared_block, int64_t &meta_size, int64_t &data_size); int get_memtables(common::ObIArray &memtables, const bool need_active = false) const; int get_ddl_kvs(common::ObIArray &ddl_kvs) const; - int check_need_remove_old_table(const int64_t multi_version_start, bool &need_remove) const; - int update_upper_trans_version(ObLS &ls, bool &is_updated); // memtable operation // ATTENTION!!! diff --git a/src/storage/tablet/ob_tablet_table_store.cpp b/src/storage/tablet/ob_tablet_table_store.cpp index 840102fa7d..a73372657c 100644 --- a/src/storage/tablet/ob_tablet_table_store.cpp +++ b/src/storage/tablet/ob_tablet_table_store.cpp @@ -1233,7 +1233,7 @@ int ObTabletTableStore::build_new_table_store( if (OB_FAIL(ret)) { } else if (OB_FAIL(build_major_tables(allocator, param, old_store, inc_base_snapshot_version))) { LOG_WARN("failed to build major_tables", K(ret)); - } else if (OB_FAIL(build_minor_tables(allocator, param.sstable_, old_store, param.need_check_sstable_, inc_base_snapshot_version, ha_status))) { + } else if (OB_FAIL(build_minor_tables(allocator, param.sstable_, old_store, param.need_check_sstable_, inc_base_snapshot_version, ha_status, param.upper_trans_param_))) { if (OB_UNLIKELY(OB_NO_NEED_MERGE != ret)) { LOG_WARN("failed to build minor_tables", K(ret)); } @@ -1366,7 +1366,8 @@ int ObTabletTableStore::build_minor_tables( const ObTabletTableStore &old_store, const bool need_check_sstable, const int64_t inc_base_snapshot_version, - const ObTabletHAStatus &ha_status) + const ObTabletHAStatus &ha_status, + const UpdateUpperTransParam &upper_trans_param) { int ret = OB_SUCCESS; ObITable *new_table = const_cast(static_cast(new_sstable)); //table can be null @@ -1436,20 +1437,59 @@ int ObTabletTableStore::build_minor_tables( LOG_WARN("failed to sort minor tables", K(ret)); } else { int64_t inc_pos = -1; + const int64_t minor_cnt = minor_tables.count(); + const ObIArray *new_upper_trans = upper_trans_param.new_upper_trans_; + const bool has_valid_update = new_upper_trans != nullptr + && minor_cnt == new_upper_trans->count() + && minor_cnt > 0 + && minor_tables.at(minor_cnt-1)->get_end_scn() == upper_trans_param.last_minor_end_scn_; + int64_t current_upper_trans_version = INT64_MAX; if (!ha_status.is_none()) { inc_pos = 0; //in ha status do not recycle minor sstable LOG_INFO("tablet in ha status, no need recycle minor sstable", K(ha_status)); } else { + /* + * if the upper trans version of the ith sstable can't be calculated, the sstables with bigger end_scn can't be calculated either. + * new_upper_trans means the latest value of upper_trans_version for minor_tables. + * + * upper trans versions in old minors: + * --------- ascending by end_scn --------------> + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | + * | val1 | val2 | val3 | MAX | MAX | MAX | MAX | + * new_upper_trans: + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | + * | val1 | val2 | val3 | new1 | new2 | MAX | MAX | + */ for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.count(); ++i) { - if (minor_tables.at(i)->get_upper_trans_version() > inc_base_snapshot_version) { + current_upper_trans_version = has_valid_update ? new_upper_trans->at(i) : minor_tables.at(i)->get_upper_trans_version(); + if (current_upper_trans_version > inc_base_snapshot_version) { inc_pos = i; break; } } } - if (OB_FAIL(ret)) { - } else if (inc_pos >= 0 && OB_FAIL(init_minor_sstables_with_check(allocator, minor_tables, inc_pos))) { + if (OB_FAIL(ret) || inc_pos < 0) { + } else if (OB_FAIL(init_minor_sstables_with_check(allocator, minor_tables, inc_pos))) { LOG_WARN("failed to init minor_tables", K(ret)); + } else if (ha_status.is_none() && has_valid_update && minor_tables_.count() > 0) { + // update upper_trans_version of new table store with latest value + for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables_.count(); ++i) { + ObSSTable *sstable = minor_tables_[i]; + if (OB_ISNULL(sstable)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected null sstable pointer", K(ret), K(i)); + } else if (INT64_MAX != sstable->get_upper_trans_version()) { + } else if (i+inc_pos >= new_upper_trans->count()) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("index of new_upper_trans overflow", K(ret), K(i), K(inc_pos), KPC(new_upper_trans)); + } else if (FALSE_IT(current_upper_trans_version = new_upper_trans->at(i+inc_pos))) { + } else if (INT64_MAX == current_upper_trans_version) { + break; + } else if (OB_FAIL(sstable->set_upper_trans_version(allocator, current_upper_trans_version))) { + LOG_WARN("failed to set new upper_trans_version", K(ret), K(i), KPC(sstable)); + } + } + LOG_INFO("Finish update upper_trans_version", K(ret), K(upper_trans_param), K_(minor_tables)); } } } diff --git a/src/storage/tablet/ob_tablet_table_store.h b/src/storage/tablet/ob_tablet_table_store.h index 0c15068631..e926f31eb9 100644 --- a/src/storage/tablet/ob_tablet_table_store.h +++ b/src/storage/tablet/ob_tablet_table_store.h @@ -24,6 +24,7 @@ namespace storage { class ObIMemtableMgr; struct ObUpdateTableStoreParam; +struct UpdateUpperTransParam; struct ObBatchUpdateTableStoreParam; class ObTablet; class ObTableStoreIterator; @@ -248,7 +249,8 @@ private: const ObTabletTableStore &old_store, const bool need_check_sstable, const int64_t inc_base_snapshot_version, - const ObTabletHAStatus &ha_status); + const ObTabletHAStatus &ha_status, + const UpdateUpperTransParam &upper_trans_param); int build_meta_major_table( common::ObArenaAllocator &allocator, const blocksstable::ObSSTable *new_sstable, diff --git a/src/storage/tx/ob_tx_loop_worker.cpp b/src/storage/tx/ob_tx_loop_worker.cpp index cdf00e5ce9..026b59b15f 100644 --- a/src/storage/tx/ob_tx_loop_worker.cpp +++ b/src/storage/tx/ob_tx_loop_worker.cpp @@ -191,6 +191,8 @@ int ObTxLoopWorker::scan_all_ls_(bool can_tx_gc, if (OB_TMP_FAIL(cur_ls_ptr->get_log_handler()->get_max_decided_scn(max_decided_scn))) { TRANS_LOG(WARN, "get max decided scn failed", KR(tmp_ret), K(min_start_scn)); max_decided_scn.set_invalid(); + } else { + (void)cur_ls_ptr->update_min_start_scn_info(max_decided_scn); } min_start_scn = max_decided_scn; do_tx_gc_(cur_ls_ptr, min_start_scn, status); diff --git a/src/storage/tx_table/ob_tx_data_table.cpp b/src/storage/tx_table/ob_tx_data_table.cpp index 199e4f3428..297c084311 100644 --- a/src/storage/tx_table/ob_tx_data_table.cpp +++ b/src/storage/tx_table/ob_tx_data_table.cpp @@ -39,8 +39,6 @@ using namespace oceanbase::share; namespace storage { -int64_t ObTxDataTable::UPDATE_CALC_UPPER_INFO_INTERVAL = 15 * 1000 * 1000; // 15 seconds - int ObTxDataTable::init(ObLS *ls, ObTxCtxTable *tx_ctx_table) { int ret = OB_SUCCESS; @@ -181,7 +179,6 @@ void ObTxDataTable::reset() ls_tablet_svr_ = nullptr; memtable_mgr_ = nullptr; tx_ctx_table_ = nullptr; - calc_upper_info_.reset(); calc_upper_trans_version_cache_.reset(); memtables_cache_.reuse(); calc_upper_trans_is_disabled_ = false; @@ -209,8 +206,9 @@ int ObTxDataTable::offline() STORAGE_LOG(WARN, "clean memtables cache failed", KR(ret), KPC(this)); } else { is_started_ = false; - disable_upper_trans_calculation(); + calc_upper_trans_version_cache_.reset(); } + return ret; } @@ -233,9 +231,13 @@ int ObTxDataTable::online() LOG_WARN("failed to create memtable", K(ret)); } else { // load tx data table succeed - is_started_ = true; - calc_upper_trans_is_disabled_ = false; + { + TCWLockGuard lock_guard(calc_upper_trans_version_cache_.lock_); + calc_upper_trans_version_cache_.reset(); + } latest_transfer_scn_.reset(); + ATOMIC_STORE(&calc_upper_trans_is_disabled_, false); + is_started_ = true; } return ret; @@ -932,8 +934,10 @@ bool ObTxDataTable::skip_this_sstable_end_scn_(const SCN &sstable_end_scn) { int ret = OB_SUCCESS; bool need_skip = false; - SCN min_start_scn_in_tx_data_memtable = SCN::max_scn(); SCN max_decided_scn = SCN::min_scn(); + SCN min_start_scn_in_ctx = SCN::min_scn(); + SCN effective_scn = SCN::min_scn(); + SCN min_start_scn_in_tx_data_memtable = SCN::max_scn(); // make sure the max decided log ts is greater than sstable_end_scn if (OB_FAIL(ls_->get_max_decided_scn(max_decided_scn))) { @@ -943,7 +947,8 @@ bool ObTxDataTable::skip_this_sstable_end_scn_(const SCN &sstable_end_scn) // check if the min_start_scn_in_ctx is larger than sstable_end_scn if (need_skip) { - } else if (OB_FAIL(check_min_start_in_ctx_(sstable_end_scn, max_decided_scn, need_skip))) { + } else if (OB_FAIL(check_min_start_in_ctx_( + sstable_end_scn, max_decided_scn, min_start_scn_in_ctx, effective_scn, need_skip))) { need_skip = true; STORAGE_LOG(WARN, "check min start in ctx failed", KR(ret), KP(this), K(sstable_end_scn)); } @@ -960,7 +965,8 @@ bool ObTxDataTable::skip_this_sstable_end_scn_(const SCN &sstable_end_scn) K(need_skip), K(sstable_end_scn), K(max_decided_scn), - K(calc_upper_info_), + K(min_start_scn_in_ctx), + K(effective_scn), K(min_start_scn_in_tx_data_memtable)); } else { STORAGE_LOG(TRACE, @@ -968,7 +974,8 @@ bool ObTxDataTable::skip_this_sstable_end_scn_(const SCN &sstable_end_scn) K(need_skip), K(sstable_end_scn), K(max_decided_scn), - K(calc_upper_info_), + K(min_start_scn_in_ctx), + K(effective_scn), K(min_start_scn_in_tx_data_memtable)); } @@ -977,74 +984,35 @@ bool ObTxDataTable::skip_this_sstable_end_scn_(const SCN &sstable_end_scn) int ObTxDataTable::check_min_start_in_ctx_(const SCN &sstable_end_scn, const SCN &max_decided_scn, + SCN &min_start_scn, + SCN &effective_scn, bool &need_skip) { int ret = OB_SUCCESS; - bool need_update_info = false; - int64_t cur_ts = common::ObTimeUtility::fast_current_time(); + min_start_scn.set_min(); + effective_scn.set_min(); - { - SpinRLockGuard lock_guard(calc_upper_info_.lock_); - if (calc_upper_info_.min_start_scn_in_ctx_ <= sstable_end_scn || - (latest_transfer_scn_.is_valid() && - calc_upper_info_.keep_alive_scn_ < latest_transfer_scn_) || - calc_upper_info_.keep_alive_scn_ >= max_decided_scn) { - need_skip = true; - } - - if (cur_ts - calc_upper_info_.update_ts_ > ObTxDataTable::UPDATE_CALC_UPPER_INFO_INTERVAL && - max_decided_scn > calc_upper_info_.keep_alive_scn_) { - need_update_info = true; - } + if (OB_FAIL(ls_->get_uncommitted_tx_min_start_scn(min_start_scn, effective_scn))) { + need_skip = true; + STORAGE_LOG(DEBUG, "get uncommited tx min_start_scn failed", KR(ret), K(sstable_end_scn), K(max_decided_scn)); + } else if (min_start_scn <= sstable_end_scn || max_decided_scn <= effective_scn || + (latest_transfer_scn_.is_valid() && effective_scn < latest_transfer_scn_)) { + need_skip = true; + STORAGE_LOG(DEBUG, + "skip calculate upper_trans_version", + K(sstable_end_scn), + K(max_decided_scn), + K(min_start_scn), + K(effective_scn), + K(latest_transfer_scn_), + K(need_skip)); + } else { + // there is no ctx whose start_scn less than sstable_end_scn } - if (need_update_info) { - update_calc_upper_info_(max_decided_scn); - } return ret; } -void ObTxDataTable::update_calc_upper_info_(const SCN &max_decided_scn) -{ - int64_t cur_ts = common::ObTimeUtility::fast_current_time(); - SpinWLockGuard lock_guard(calc_upper_info_.lock_); - - // recheck update condition and do update calc_upper_info - if (cur_ts - calc_upper_info_.update_ts_ > ObTxDataTable::UPDATE_CALC_UPPER_INFO_INTERVAL && - max_decided_scn > calc_upper_info_.keep_alive_scn_) { - SCN min_start_scn = SCN::min_scn(); - SCN keep_alive_scn = SCN::min_scn(); - MinStartScnStatus status; - ls_->get_min_start_scn(min_start_scn, keep_alive_scn, status); - - if (MinStartScnStatus::UNKOWN == status) { - // do nothing - } else { - int ret = OB_SUCCESS; - CalcUpperInfo tmp_calc_upper_info; - tmp_calc_upper_info.keep_alive_scn_ = keep_alive_scn; - tmp_calc_upper_info.update_ts_ = cur_ts; - if (MinStartScnStatus::NO_CTX == status) { - // use the previous keep_alive_scn as min_start_scn - tmp_calc_upper_info.min_start_scn_in_ctx_ = calc_upper_info_.keep_alive_scn_; - } else if (MinStartScnStatus::HAS_CTX == status) { - tmp_calc_upper_info.min_start_scn_in_ctx_ = min_start_scn; - } else { - ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(ERROR, "invalid min start scn status", K(min_start_scn), K(keep_alive_scn), K(status)); - } - - if (OB_FAIL(ret)) { - } else if (tmp_calc_upper_info.min_start_scn_in_ctx_ < calc_upper_info_.min_start_scn_in_ctx_) { - ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(WARN, "invalid min start scn", K(tmp_calc_upper_info), K(calc_upper_info_)); - } else { - calc_upper_info_ = tmp_calc_upper_info; - } - } - } -} - int ObTxDataTable::check_min_start_in_tx_data_(const SCN &sstable_end_scn, SCN &min_start_scn_in_tx_data_memtable, bool &need_skip) @@ -1074,9 +1042,9 @@ int ObTxDataTable::check_min_start_in_tx_data_(const SCN &sstable_end_scn, } else if (FALSE_IT(min_start_scn_in_tx_data_memtable = std::min(min_start_scn_in_tx_data_memtable, tx_data_memtable->get_min_start_scn()))) { } else if (sstable_end_scn >= min_start_scn_in_tx_data_memtable) { - // there is a min_start_scn in tx_data_memtable less than sstable_end_scn, skip this - // calculation + // there is a min_start_scn in tx_data_memtable less than sstable_end_scn, skip this calculation need_skip = true; + STORAGE_LOG(DEBUG, "skip calculate upper_trans_version", K(ret), K(sstable_end_scn), KPC(tx_data_memtable)); break; } } @@ -1271,10 +1239,6 @@ void ObTxDataTable::disable_upper_trans_calculation() TCWLockGuard lock_guard(calc_upper_trans_version_cache_.lock_); calc_upper_trans_version_cache_.reset(); } - { - SpinWLockGuard lock_guard(calc_upper_info_.lock_); - calc_upper_info_.reset(); - } } void ObTxDataTable::enable_upper_trans_calculation(const share::SCN latest_transfer_scn) @@ -1288,8 +1252,6 @@ void ObTxDataTable::enable_upper_trans_calculation(const share::SCN latest_trans } else { latest_transfer_scn_ = latest_transfer_scn; } - SpinWLockGuard lock_guard(calc_upper_info_.lock_); - calc_upper_info_.reset(); ATOMIC_STORE(&calc_upper_trans_is_disabled_, false); } diff --git a/src/storage/tx_table/ob_tx_data_table.h b/src/storage/tx_table/ob_tx_data_table.h index 985dbecade..aa49f6e29e 100644 --- a/src/storage/tx_table/ob_tx_data_table.h +++ b/src/storage/tx_table/ob_tx_data_table.h @@ -80,34 +80,20 @@ public: TO_STRING_KV(K(memtable_head_), K(memtable_tail_), K(memtable_handles_)); }; - struct CalcUpperInfo - { - CalcUpperInfo() {reset();} - void reset() - { - min_start_scn_in_ctx_.set_min(); - keep_alive_scn_.set_min(); - update_ts_ = 0; - } + using SliceAllocator = ObSliceAlloc; - CalcUpperInfo &operator= (const CalcUpperInfo &rhs) - { - min_start_scn_in_ctx_ = rhs.min_start_scn_in_ctx_; - keep_alive_scn_ = rhs.keep_alive_scn_; - update_ts_ = rhs.update_ts_; - return *this; - } + static const int64_t TX_DATA_MAX_CONCURRENCY = 32; + // A tx data is 128 bytes, 128 * 262144 = 32MB + static const int64_t SSTABLE_CACHE_MAX_RETAIN_CNT = 262144; + // The max tps is 150w which means the cache can be inserted 15w tx data during 100ms. So once + // cache cleaning task will delete at least 11w tx data. + static const int64_t DEFAULT_CACHE_RETAINED_TIME = 100_ms; // 100ms - share::SCN min_start_scn_in_ctx_; - share::SCN keep_alive_scn_; - int64_t update_ts_; - common::SpinRWLock lock_; - - TO_STRING_KV(K(min_start_scn_in_ctx_), K(keep_alive_scn_), K(update_ts_)); - }; - - static int64_t UPDATE_CALC_UPPER_INFO_INTERVAL; + // The tx data memtable will trigger a freeze if its memory use is more than 2% + static constexpr double TX_DATA_FREEZE_TRIGGER_PERCENTAGE = 2; + // TODO : @gengli.wzy The active & frozen tx data memtable can not use memory more than 10% + static constexpr double TX_DATA_MEM_LIMIT_PERCENTAGE = 10; enum COLUMN_ID_LIST { @@ -133,7 +119,6 @@ public: // ObTxDataTable memtable_mgr_(nullptr), tx_ctx_table_(nullptr), read_schema_(), - calc_upper_info_(), calc_upper_trans_version_cache_(), memtables_cache_() {} ~ObTxDataTable() {} @@ -233,7 +218,6 @@ public: // ObTxDataTable K_(is_started), K_(ls_id), K_(tablet_id), - K_(calc_upper_info), K_(memtables_cache), KP_(ls), KP_(ls_tablet_svr), @@ -315,15 +299,17 @@ private: const share::SCN &sstable_end_scn, share::SCN &tmp_upper_trans_version); bool skip_this_sstable_end_scn_(const share::SCN &sstable_end_scn); - int check_min_start_in_ctx_(const share::SCN &sstable_end_scn, const share::SCN &max_decided_scn, bool &need_skip); + int check_min_start_in_ctx_(const share::SCN &sstable_end_scn, + const share::SCN &max_decided_scn, + share::SCN &min_start_scn, + share::SCN &effective_scn, + bool &need_skip); int check_min_start_in_tx_data_(const share::SCN &sstable_end_scn, share::SCN &min_start_ts_in_tx_data_memtable, bool &need_skip); void print_alloc_size_for_test_(); // free the whole undo status list allocated by slice allocator void free_undo_status_list_(ObUndoStatusNode *node_ptr); - void clean_sstable_cache_task_(int64_t cache_keeped_time); - void update_calc_upper_info_(const share::SCN &max_decided_log_ts); private: static const int64_t LS_TX_DATA_SCHEMA_VERSION = 0; static const int64_t LS_TX_DATA_SCHEMA_ROWKEY_CNT = 2; @@ -344,7 +330,6 @@ private: ObTxDataMemtableMgr *memtable_mgr_; ObTxCtxTable *tx_ctx_table_; TxDataReadSchema read_schema_; - CalcUpperInfo calc_upper_info_; CalcUpperTransSCNCache calc_upper_trans_version_cache_; MemtableHandlesCache memtables_cache_; }; // tx_table diff --git a/src/storage/tx_table/ob_tx_table.cpp b/src/storage/tx_table/ob_tx_table.cpp index c5bb57ff09..1eb52de0c6 100644 --- a/src/storage/tx_table/ob_tx_table.cpp +++ b/src/storage/tx_table/ob_tx_table.cpp @@ -22,9 +22,10 @@ #include "storage/tx/ob_trans_define.h" #include "storage/tx/ob_trans_part_ctx.h" #include "storage/tx/ob_trans_service.h" +#include "storage/tx/ob_tx_data_functor.h" +#include "storage/tx/ob_keep_alive_ls_handler.h" #include "storage/tx_storage/ob_ls_map.h" #include "storage/tx_storage/ob_ls_service.h" -#include "storage/tx/ob_tx_data_functor.h" #include "storage/tx_table/ob_tx_data_cache.h" #include "storage/tx_table/ob_tx_table_define.h" #include "storage/tx_table/ob_tx_table_iterator.h" @@ -35,7 +36,13 @@ namespace oceanbase { using namespace share; using namespace palf; +using namespace transaction; + namespace storage { + + +int64_t ObTxTable::UPDATE_MIN_START_SCN_INTERVAL = 5 * 1000 * 1000; // 5 seconds + int ObTxTable::init(ObLS *ls) { int ret = OB_SUCCESS; @@ -149,6 +156,7 @@ int ObTxTable::offline() LOG_WARN("offline tx data table failed", K(ret)); } else { recycle_scn_cache_.reset(); + (void)disable_upper_trans_calculation(); ATOMIC_STORE(&state_, TxTableState::OFFLINE); LOG_INFO("tx table offline succeed", K(ls_id_), KPC(this)); } @@ -172,6 +180,7 @@ int ObTxTable::online() LOG_WARN("failed to load tx ctx table", K(ret)); } else { recycle_scn_cache_.reset(); + (void)reset_ctx_min_start_scn_info_(); ATOMIC_STORE(&state_, ObTxTable::ONLINE); LOG_INFO("tx table online succeed", K(ls_id_), KPC(this)); } @@ -593,6 +602,7 @@ void ObTxTable::destroy() ls_id_.reset(); ls_ = nullptr; epoch_ = 0; + ctx_min_start_scn_info_.reset(); is_inited_ = false; } @@ -970,11 +980,82 @@ int ObTxTable::get_recycle_scn(SCN &real_recycle_scn) return ret; } +void ObTxTable::reset_ctx_min_start_scn_info_() +{ + SpinWLockGuard lock_guard(ctx_min_start_scn_info_.lock_); + ctx_min_start_scn_info_.reset(); +} + +int ObTxTable::get_uncommitted_tx_min_start_scn(share::SCN &min_start_scn, share::SCN &effective_scn) +{ + int ret = OB_SUCCESS; + SpinRLockGuard lock_guard(ctx_min_start_scn_info_.lock_); + min_start_scn = ctx_min_start_scn_info_.min_start_scn_in_ctx_; + effective_scn = ctx_min_start_scn_info_.keep_alive_scn_; + if (effective_scn.is_min()) { + ret = OB_EAGAIN; + } + return ret; +} + +void ObTxTable::update_min_start_scn_info(const SCN &max_decided_scn) +{ + int64_t cur_ts = ObClockGenerator::getClock(); + SpinWLockGuard lock_guard(ctx_min_start_scn_info_.lock_); + + // recheck update condition and do update calc_upper_info + if (cur_ts - ctx_min_start_scn_info_.update_ts_ > ObTxTable::UPDATE_MIN_START_SCN_INTERVAL && + max_decided_scn > ctx_min_start_scn_info_.keep_alive_scn_) { + SCN min_start_scn = SCN::min_scn(); + SCN keep_alive_scn = SCN::min_scn(); + MinStartScnStatus status; + (void)ls_->get_min_start_scn(min_start_scn, keep_alive_scn, status); + + if (MinStartScnStatus::UNKOWN == status) { + // do nothing + } else { + int ret = OB_SUCCESS; + CtxMinStartScnInfo tmp_min_start_scn_info; + tmp_min_start_scn_info.keep_alive_scn_ = keep_alive_scn; + tmp_min_start_scn_info.update_ts_ = cur_ts; + if (MinStartScnStatus::NO_CTX == status) { + // use the previous keep_alive_scn as min_start_scn + tmp_min_start_scn_info.min_start_scn_in_ctx_ = ctx_min_start_scn_info_.keep_alive_scn_; + } else if (MinStartScnStatus::HAS_CTX == status) { + tmp_min_start_scn_info.min_start_scn_in_ctx_ = min_start_scn; + } else { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(ERROR, "invalid min start scn status", K(min_start_scn), K(keep_alive_scn), K(status)); + } + + if (OB_FAIL(ret)) { + } else if (tmp_min_start_scn_info.min_start_scn_in_ctx_ < ctx_min_start_scn_info_.min_start_scn_in_ctx_) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "invalid min start scn", K(tmp_min_start_scn_info), K(ctx_min_start_scn_info_)); + } else { + ctx_min_start_scn_info_ = tmp_min_start_scn_info; + } + } + } +} + int ObTxTable::get_upper_trans_version_before_given_scn(const SCN sstable_end_scn, SCN &upper_trans_version) { return tx_data_table_.get_upper_trans_version_before_given_scn(sstable_end_scn, upper_trans_version); } +void ObTxTable::disable_upper_trans_calculation() +{ + (void)tx_data_table_.disable_upper_trans_calculation(); + reset_ctx_min_start_scn_info_(); +} + +void ObTxTable::enable_upper_trans_calculation(const share::SCN latest_transfer_scn) +{ + reset_ctx_min_start_scn_info_(); + (void)tx_data_table_.enable_upper_trans_calculation(latest_transfer_scn); +} + int ObTxTable::get_start_tx_scn(SCN &start_tx_scn) { int ret = OB_SUCCESS; diff --git a/src/storage/tx_table/ob_tx_table.h b/src/storage/tx_table/ob_tx_table.h index 12ec5b154d..6376f7b5be 100644 --- a/src/storage/tx_table/ob_tx_table.h +++ b/src/storage/tx_table/ob_tx_table.h @@ -58,7 +58,35 @@ class ObTxTable TO_STRING_KV(K(val_), K(update_ts_)); }; + struct CtxMinStartScnInfo + { + CtxMinStartScnInfo() { reset(); } + + void reset() + { + min_start_scn_in_ctx_.set_min(); + keep_alive_scn_.set_min(); + update_ts_ = 0; + } + + CtxMinStartScnInfo &operator= (const CtxMinStartScnInfo &rhs) + { + min_start_scn_in_ctx_ = rhs.min_start_scn_in_ctx_; + keep_alive_scn_ = rhs.keep_alive_scn_; + update_ts_ = rhs.update_ts_; + return *this; + } + + share::SCN min_start_scn_in_ctx_; + share::SCN keep_alive_scn_; + int64_t update_ts_; + common::SpinRWLock lock_; + + TO_STRING_KV(K(min_start_scn_in_ctx_), K(keep_alive_scn_), K(update_ts_)); + }; + public: + static int64_t UPDATE_MIN_START_SCN_INTERVAL; static const int64_t INVALID_READ_EPOCH = -1; static const int64_t CHECK_AND_ONLINE_PRINT_INVERVAL_US = 5 * 1000 * 1000; // 5 seconds static const int64_t DEFAULT_TX_RESULT_RETENTION_S = 300L; @@ -82,8 +110,8 @@ public: mini_cache_hit_cnt_(0), kv_cache_hit_cnt_(0), read_tx_data_table_cnt_(0), - recycle_scn_cache_() - {} + recycle_scn_cache_(), + ctx_min_start_scn_info_() {} ObTxTable(ObTxDataTable &tx_data_table) : is_inited_(false), @@ -95,8 +123,8 @@ public: mini_cache_hit_cnt_(0), kv_cache_hit_cnt_(0), read_tx_data_table_cnt_(0), - recycle_scn_cache_() - {} + recycle_scn_cache_(), + ctx_min_start_scn_info_() {} ~ObTxTable() {} int init(ObLS *ls); @@ -260,11 +288,27 @@ public: */ int get_start_tx_scn(share::SCN &start_tx_scn); + /** + * @brief get min_start_scn of uncommitted tx recorded on TxTable + * + * @param[out] min_start_scn the minimum start_scn of all uncommitted tx + * @param[out] effective_scn min_start_scn is usable only if max_decided_scn is larger than effective_scn + */ + int get_uncommitted_tx_min_start_scn(share::SCN &min_start_scn, share::SCN &effective_scn); + + /** + * @brief used for updating ctx_min_start_scn_info + */ + void update_min_start_scn_info(const share::SCN &max_decided_scn); + int generate_virtual_tx_data_row(const transaction::ObTransID tx_id, observer::VirtualTxDataRow &row_data); int dump_single_tx_data_2_text(const int64_t tx_id_int, const char *fname); const char* get_state_string(const int64_t state) const; + void disable_upper_trans_calculation(); + void enable_upper_trans_calculation(const share::SCN latest_transfer_scn); + TO_STRING_KV(KP(this), K_(is_inited), K_(epoch), @@ -273,7 +317,8 @@ public: K_(tx_data_table), K_(mini_cache_hit_cnt), K_(kv_cache_hit_cnt), - K_(read_tx_data_table_cnt)); + K_(read_tx_data_table_cnt), + K_(ctx_min_start_scn_info)); public: // getter & setter ObTxDataTable *get_tx_data_table() { return &tx_data_table_; } @@ -307,6 +352,7 @@ private: int load_tx_ctx_table_(); int offline_tx_ctx_table_(); int offline_tx_data_table_(); + void reset_ctx_min_start_scn_info_(); int check_tx_data_in_mini_cache_(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn); int check_tx_data_in_kv_cache_(ObReadTxDataArg &read_tx_data_arg, ObITxDataCheckFunctor &fn); @@ -316,6 +362,7 @@ private: const int64_t read_epoch, const bool need_log_error, int &ret); + private: static const int64_t LS_TX_CTX_SCHEMA_VERSION = 0; static const int64_t LS_TX_CTX_SCHEMA_ROWKEY_CNT = 1; @@ -333,6 +380,7 @@ private: int64_t kv_cache_hit_cnt_; int64_t read_tx_data_table_cnt_; RecycleSCNCache recycle_scn_cache_; + CtxMinStartScnInfo ctx_min_start_scn_info_; }; } // namespace storage } // namespace oceanbase diff --git a/unittest/storage/test_tenant_tablet_stat_mgr.cpp b/unittest/storage/test_tenant_tablet_stat_mgr.cpp index 0b7163d752..86c82f36e2 100644 --- a/unittest/storage/test_tenant_tablet_stat_mgr.cpp +++ b/unittest/storage/test_tenant_tablet_stat_mgr.cpp @@ -99,6 +99,7 @@ void TestTenantTabletStatMgr::batch_report_stat(int64_t report_num) ASSERT_TRUE(NULL != stat_mgr_); ASSERT_EQ(true, stat_mgr_->is_inited_); + std::thread *threads = new std::thread[report_num]; for (int64_t i = 0; i < report_num; ++i) { ObTabletStat curr_stat; curr_stat.ls_id_ = 1; @@ -106,11 +107,14 @@ void TestTenantTabletStatMgr::batch_report_stat(int64_t report_num) curr_stat.query_cnt_ = 100 * (i + 1); curr_stat.scan_physical_row_cnt_ = 10000 + i; - std::thread sub_report_thread(report, stat_mgr_, curr_stat); - if (sub_report_thread.joinable()) { - sub_report_thread.join(); + threads[i] = std::thread(report, stat_mgr_, curr_stat); + } + for (int64_t i = 0; i < report_num; ++i) { + if (threads[i].joinable()) { + threads[i].join(); } } + delete []threads; } namespace unittest @@ -382,9 +386,34 @@ TEST_F(TestTenantTabletStatMgr, basic_tablet_stat_mgr) ObTabletStat res; share::ObLSID ls_id(1); common::ObTabletID tablet_id(200123); - ret = stat_mgr_->get_latest_tablet_stat(ls_id, tablet_id, res); + storage::ObTabletStat unused_tablet_stat; + share::schema::ObTableModeFlag unused_mode; + ret = stat_mgr_->get_latest_tablet_stat(ls_id, tablet_id, res, unused_tablet_stat, unused_mode); ASSERT_EQ(OB_SUCCESS, ret); ASSERT_EQ(100, res.query_cnt_); + + ASSERT_EQ(1, stat_mgr_->stream_map_.size()); + ASSERT_EQ(OB_SUCCESS, stat_mgr_->clear_tablet_stat(ls_id, tablet_id)); + const ObTabletStatKey key(ls_id, tablet_id); + ObTabletStreamNode *stream_node = nullptr; + ASSERT_TRUE(key.is_valid()); + ASSERT_EQ(OB_SUCCESS, stat_mgr_->stream_map_.get_refactored(key, stream_node)); + ASSERT_TRUE(stream_node->stream_.key_.is_valid()); + ASSERT_FALSE(stream_node->stream_.total_stat_.is_valid()); + + tablet_stat.delete_row_cnt_ = 12345; + for (int64_t i = 0; i < 1000; i++) { + ret = stat_mgr_->report_stat(tablet_stat, report_succ); + ASSERT_EQ(OB_SUCCESS, ret); + } + stat_mgr_->process_stats(); + storage::ObTabletStat total_tablet_stat; + ret = stat_mgr_->get_latest_tablet_stat(ls_id, tablet_id, res, total_tablet_stat, unused_mode); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_EQ(100 * 1000, total_tablet_stat.query_cnt_); + ASSERT_EQ(100000 * 1000, total_tablet_stat.scan_logical_row_cnt_); + ASSERT_EQ(1000000 * 1000, total_tablet_stat.scan_physical_row_cnt_); + ASSERT_EQ(12345 * 1000, total_tablet_stat.delete_row_cnt_); } TEST_F(TestTenantTabletStatMgr, multi_report_tablet_stat) @@ -403,7 +432,32 @@ TEST_F(TestTenantTabletStatMgr, multi_report_tablet_stat) for ( ; iter != stat_mgr_->stream_map_.end(); ++iter) { ++report_cnt; } - ASSERT_TRUE(report_cnt > 5); + ASSERT_TRUE(report_cnt == 10); +} + +TEST_F(TestTenantTabletStatMgr, bacth_clear_tablet_stat) +{ + EXPECT_EQ(OB_SYS_TENANT_ID, MTL_ID()); + ObTenantTabletStatMgr *stat_mgr = MTL(ObTenantTabletStatMgr *); + ASSERT_TRUE(NULL != stat_mgr); + ASSERT_TRUE(stat_mgr->is_inited_); + + int64_t report_num = 100; + batch_report_stat(report_num); + stat_mgr_->process_stats(); + + ObLSID ls_id(1); + ObSEArray tablet_ids; + for (int64_t i = 0; i < report_num; i++) { + ASSERT_EQ(OB_SUCCESS, tablet_ids.push_back(ObTabletID(300001 + i))); + } + ASSERT_EQ(100, stat_mgr_->stream_map_.size()); + ASSERT_EQ(OB_SUCCESS, stat_mgr->batch_clear_tablet_stat(ls_id, tablet_ids)); + ObTenantTabletStatMgr::TabletStreamMap::iterator iter = stat_mgr_->stream_map_.begin(); + for ( ; iter != stat_mgr_->stream_map_.end(); ++iter) { + ASSERT_TRUE(iter->second->stream_.key_.is_valid()); + ASSERT_FALSE(iter->second->stream_.total_stat_.is_valid()); + } } } // end unittest