diff --git a/deps/oblib/src/common/ob_range.cpp b/deps/oblib/src/common/ob_range.cpp index a56070f13e..83cf2165c3 100644 --- a/deps/oblib/src/common/ob_range.cpp +++ b/deps/oblib/src/common/ob_range.cpp @@ -283,6 +283,5 @@ int64_t ObNewVersionRange::hash() const return hash_value; } - } } diff --git a/deps/oblib/src/common/ob_range.h b/deps/oblib/src/common/ob_range.h index a9654f0f8f..b79fc991ec 100644 --- a/deps/oblib/src/common/ob_range.h +++ b/deps/oblib/src/common/ob_range.h @@ -323,7 +323,6 @@ public: TO_STRING_KV(K_(base_version), K_(snapshot_version)); }; - class ObNewRange { diff --git a/deps/oblib/src/common/rowkey/ob_rowkey.cpp b/deps/oblib/src/common/rowkey/ob_rowkey.cpp index 7d0caaa7ab..24926ab335 100644 --- a/deps/oblib/src/common/rowkey/ob_rowkey.cpp +++ b/deps/oblib/src/common/rowkey/ob_rowkey.cpp @@ -53,6 +53,15 @@ int ObRowkey::to_store_rowkey(ObStoreRowkey &store_rowkey) const return ret; } +void ObRowkey::destroy(ObIAllocator &allocator) +{ + if (OB_NOT_NULL(obj_ptr_)) { + allocator.free(obj_ptr_); + obj_ptr_ = nullptr; + } + obj_cnt_ = 0; +} + int ObRowkey::equal(const ObRowkey &rhs, bool &is_equal) const { int ret = OB_SUCCESS; diff --git a/deps/oblib/src/common/rowkey/ob_rowkey.h b/deps/oblib/src/common/rowkey/ob_rowkey.h index d686bfd3c6..23e3e97929 100644 --- a/deps/oblib/src/common/rowkey/ob_rowkey.h +++ b/deps/oblib/src/common/rowkey/ob_rowkey.h @@ -38,6 +38,7 @@ public: public: int to_store_rowkey(ObStoreRowkey &store_rowkey) const; void reset() {obj_ptr_ = NULL; obj_cnt_ = 0; } + void destroy(ObIAllocator &allocator); inline int64_t get_obj_cnt() const { return obj_cnt_; } inline const ObObj *get_obj_ptr() const { return obj_ptr_; } inline ObObj *get_obj_ptr() { return obj_ptr_; } diff --git a/deps/oblib/src/common/rowkey/ob_store_rowkey.cpp b/deps/oblib/src/common/rowkey/ob_store_rowkey.cpp index fe41db1ab8..88312bf847 100644 --- a/deps/oblib/src/common/rowkey/ob_store_rowkey.cpp +++ b/deps/oblib/src/common/rowkey/ob_store_rowkey.cpp @@ -36,6 +36,13 @@ ObRowkey ObStoreRowkey::to_rowkey() const return key_; } +void ObStoreRowkey::destroy(ObIAllocator &allocator) +{ + key_.destroy(allocator); + hash_ = 0; + group_idx_ = 0; +} + uint64_t ObStoreRowkey::murmurhash(const uint64_t hash) const { uint64_t hash_ret = hash; diff --git a/deps/oblib/src/common/rowkey/ob_store_rowkey.h b/deps/oblib/src/common/rowkey/ob_store_rowkey.h index ff101e591f..32bd47c5a0 100644 --- a/deps/oblib/src/common/rowkey/ob_store_rowkey.h +++ b/deps/oblib/src/common/rowkey/ob_store_rowkey.h @@ -33,6 +33,7 @@ public: ObStoreRowkey() : key_(), hash_(0), group_idx_(0) {} ~ObStoreRowkey() {}; inline void reset() {key_.reset(); hash_ = 0; group_idx_ = 0; } + void destroy(ObIAllocator &allocator); //TODO column order is fake now, need to enable by someone in some day //FIXME-yangsuli diff --git a/deps/oblib/src/lib/mysqlclient/ob_mysql_result.h b/deps/oblib/src/lib/mysqlclient/ob_mysql_result.h index 133546ad56..0ec00cf828 100644 --- a/deps/oblib/src/lib/mysqlclient/ob_mysql_result.h +++ b/deps/oblib/src/lib/mysqlclient/ob_mysql_result.h @@ -180,6 +180,46 @@ }\ } +#define EXTRACT_UINT_FIELD_MYSQL_WITH_DEFAULT_VALUE(result, column_name, field, type, skip_null_error, skip_column_error, default_value) \ + if (OB_SUCC(ret)) \ + { \ + uint64_t int_value = 0; \ + if (OB_SUCCESS == (ret = (result).get_uint(column_name, int_value))) \ + { \ + field = static_cast(int_value); \ + } \ + else if (OB_ERR_NULL_VALUE == ret) \ + { \ + if (skip_null_error) \ + { \ + SQL_LOG(TRACE, "null value, ignore", K(column_name)); \ + field = static_cast(default_value); \ + ret = OB_SUCCESS; \ + } \ + else \ + { \ + SQL_LOG(WARN, "null value", K(column_name), K(ret)); \ + } \ + } \ + else if (OB_ERR_COLUMN_NOT_FOUND == ret) \ + { \ + if (skip_column_error) \ + { \ + SQL_LOG(INFO, "column not found, ignore", K(column_name)); \ + field = static_cast(default_value); \ + ret = OB_SUCCESS; \ + } \ + else \ + { \ + SQL_LOG(WARN, "column not found", K(column_name), K(ret)); \ + } \ + } \ + else \ + { \ + SQL_LOG(WARN, "fail to get column in row. ", K(column_name), K(ret)); \ + }\ + } + #define EXTRACT_INT_FIELD_MYSQL_SKIP_RET(result, column_name, field, type) \ if (OB_SUCC(ret)) \ { \ diff --git a/deps/oblib/src/lib/ob_define.h b/deps/oblib/src/lib/ob_define.h index 517455b8e7..9dc221595b 100644 --- a/deps/oblib/src/lib/ob_define.h +++ b/deps/oblib/src/lib/ob_define.h @@ -1333,6 +1333,7 @@ const int64_t OB_PARALLEL_MERGE_INFO_LENGTH = 512; const int64_t OB_COMPACTION_EVENT_STR_LENGTH = 256; const int64_t OB_PART_TABLE_INFO_LENGTH = 512; const int64_t OB_MACRO_ID_INFO_LENGTH = 256; +const int64_t OB_COMPACTION_INFO_LENGTH = 128; // for erasure code const int64_t OB_MAX_EC_STRIPE_COUNT = 32; @@ -1671,10 +1672,6 @@ const int64_t OB_DUMP_UNUSUAL_TABLET_TYPE = 2; const int64_t OB_MAX_SYS_VAR_NON_STRING_VAL_LENGTH = 128; const int64_t OB_MAX_SYS_VAR_VAL_LENGTH = 4096;//original 128 is too small -//mini minor merge related parameters -const int64_t OB_MIN_MINOR_SSTABLE_ROW_COUNT = 2000000; // L0 -> L1 row count threashold -const int64_t OB_DEFAULT_COMPACTION_AMPLIFICATION_FACTOR= 25; // / mini_sstable_total > minor_sstable_total * OB_DEFAULT_COMPACTION_AMPLIFICATION_FACTOR / 100 - // bitset defines const int64_t OB_DEFAULT_BITSET_SIZE = OB_MAX_TABLE_NUM_PER_STMT; const int64_t OB_DEFAULT_BITSET_SIZE_FOR_BASE_COLUMN = 64; diff --git a/deps/oblib/src/lib/utility/ob_macro_utils.h b/deps/oblib/src/lib/utility/ob_macro_utils.h index b648fa1f68..37433c9f62 100644 --- a/deps/oblib/src/lib/utility/ob_macro_utils.h +++ b/deps/oblib/src/lib/utility/ob_macro_utils.h @@ -659,6 +659,19 @@ for (__typeof__((c).at(0)) *it = ((extra_condition) && (c).count() > 0 ? &(c).at bret; \ }) +#define REACH_TENANT_TIME_INTERVAL(i) \ + ({ \ + bool bret = false; \ + RLOCAL_STATIC(int64_t, last_time) = ::oceanbase::common::ObTimeUtility::fast_current_time(); \ + int64_t cur_time = ::oceanbase::common::ObTimeUtility::fast_current_time(); \ + int64_t old_time = last_time; \ + if (OB_UNLIKELY((i + last_time) < cur_time) \ + && old_time == ATOMIC_CAS(&last_time, old_time, cur_time)) \ + { \ + bret = true; \ + } \ + bret; \ + }) // reach count per secound #define REACH_COUNT_PER_SEC(i) \ diff --git a/deps/oblib/src/lib/utility/ob_tracepoint.h b/deps/oblib/src/lib/utility/ob_tracepoint.h index 89548128fd..90836853b9 100644 --- a/deps/oblib/src/lib/utility/ob_tracepoint.h +++ b/deps/oblib/src/lib/utility/ob_tracepoint.h @@ -590,7 +590,16 @@ class EventTable // Compaction Related 700-750 EN_COMPACTION_DIAGNOSE_TABLE_STORE_UNSAFE_FAILED = 700, EN_COMPACTION_DIAGNOSE_CANNOT_MAJOR = 701, - EN_SESSION_LEAK_COUNT_THRESHOLD = 710, + EN_COMPACTION_MERGE_TASK = 702, + EN_MEDIUM_COMPACTION_SUBMIT_CLOG_FAILED = 703, + EN_MEDIUM_COMPACTION_UPDATE_CUR_SNAPSHOT_FAILED = 704, + EN_MEDIUM_REPLICA_CHECKSUM_ERROR = 705, + EN_MEDIUM_CREATE_DAG = 706, + EN_MEDIUM_VERIFY_GROUP_SKIP_SET_VERIFY = 707, + EN_MEDIUM_VERIFY_GROUP_SKIP_COLUMN_CHECKSUM = 708, + + // please add new trace point after 750 + EN_SESSION_LEAK_COUNT_THRESHOLD = 751, EN_END_PARTICIPANT = 800, //LS Migration Related 900 - 1000 diff --git a/src/logservice/ob_log_base_type.h b/src/logservice/ob_log_base_type.h index 9cbe49ded3..6c7f94dce6 100644 --- a/src/logservice/ob_log_base_type.h +++ b/src/logservice/ob_log_base_type.h @@ -63,9 +63,13 @@ enum ObLogBaseType DAS_ID_LOG_BASE_TYPE = 15, //for recovery_ls_service RESTORE_SERVICE_LOG_BASE_TYPE = 16, + + RESERVED_SNAPSHOT_LOG_BASE_TYPE = 17, + + MEDIUM_COMPACTION_LOG_BASE_TYPE = 18, + // pay attention!!! // add log type in log_base_type_to_string - // max value MAX_LOG_BASE_TYPE, }; @@ -110,6 +114,10 @@ int log_base_type_to_string(const ObLogBaseType log_type, strncpy(str ,"DAS_ID", str_len); } else if (log_type == RESTORE_SERVICE_LOG_BASE_TYPE) { strncpy(str ,"RESTORE_SERVICE", str_len); + } else if (log_type == RESERVED_SNAPSHOT_LOG_BASE_TYPE) { + strncpy(str ,"RESERVED_SNAPSHOT", str_len); + } else if (log_type == MEDIUM_COMPACTION_LOG_BASE_TYPE) { + strncpy(str ,"MEDIUM_COMPACTION", str_len); } else { ret = OB_INVALID_ARGUMENT; } diff --git a/src/observer/CMakeLists.txt b/src/observer/CMakeLists.txt index 57dc378dbe..aa0f5d007d 100644 --- a/src/observer/CMakeLists.txt +++ b/src/observer/CMakeLists.txt @@ -147,6 +147,7 @@ ob_set_subtarget(ob_server virtual_table virtual_table/ob_all_virtual_compaction_diagnose_info.cpp virtual_table/ob_all_virtual_server_compaction_event_history.cpp virtual_table/ob_all_virtual_compaction_suggestion.cpp + virtual_table/ob_all_virtual_tablet_compaction_info.cpp virtual_table/ob_all_virtual_dag.cpp virtual_table/ob_all_virtual_dag_warning_history.cpp virtual_table/ob_all_virtual_dblink_info.cpp diff --git a/src/observer/ob_server.cpp b/src/observer/ob_server.cpp index 44f2d5533c..82665e770e 100644 --- a/src/observer/ob_server.cpp +++ b/src/observer/ob_server.cpp @@ -72,7 +72,6 @@ #include "storage/ob_i_store.h" #include "storage/ob_long_ops_monitor.h" #include "storage/compaction/ob_sstable_merge_info_mgr.h" -#include "storage/ob_table_store_stat_mgr.h" #include "storage/tablelock/ob_table_lock_service.h" #include "storage/tx/ob_ts_mgr.h" #include "storage/ob_file_system_router.h" @@ -323,8 +322,6 @@ int ObServer::init(const ObServerOptions &opts, const ObPLogWriterCfg &log_cfg) LOG_ERROR("set_use_rpc_table failed", KR(ret)); } else if (OB_FAIL(ObSysTaskStatMgr::get_instance().set_self_addr(self_addr_))) { LOG_ERROR("set sys task status self addr failed", KR(ret)); - } else if (OB_FAIL(ObTableStoreStatMgr::get_instance().init())) { - LOG_ERROR("init table store stat mgr failed", KR(ret)); } else if (OB_FAIL(LONG_OPS_MONITOR_INSTANCE.init())) { LOG_ERROR("init long ops monitor instance failed", KR(ret)); } else if (OB_FAIL(ObCompatModeGetter::instance().init(&sql_proxy_))) { diff --git a/src/observer/ob_service.cpp b/src/observer/ob_service.cpp index 32f6611af8..3344f9a74f 100644 --- a/src/observer/ob_service.cpp +++ b/src/observer/ob_service.cpp @@ -2096,7 +2096,9 @@ int ObService::inner_fill_tablet_info_( addr, snapshot_version, data_size, - required_size))) { + required_size, + 0/*report_scn*/, + ObTabletReplica::SCN_STATUS_IDLE))) { LOG_WARN("fail to init a tablet replica", KR(ret), K(tenant_id), K(tablet_id), K(tablet_replica)); } else if (!need_checksum) { diff --git a/src/observer/omt/ob_multi_tenant.cpp b/src/observer/omt/ob_multi_tenant.cpp index c167db6a86..bf7382ab14 100644 --- a/src/observer/omt/ob_multi_tenant.cpp +++ b/src/observer/omt/ob_multi_tenant.cpp @@ -66,6 +66,7 @@ #include "storage/tx/ob_id_service.h" #include "storage/compaction/ob_tenant_compaction_progress.h" #include "storage/compaction/ob_server_compaction_event_history.h" +#include "storage/ob_tenant_tablet_stat_mgr.h" #include "storage/memtable/ob_lock_wait_mgr.h" #include "storage/slog_ckpt/ob_server_checkpoint_slog_handler.h" #include "storage/tablelock/ob_table_lock_service.h" @@ -286,6 +287,7 @@ int ObMultiTenant::init(ObAddr myaddr, MTL_BIND2(mtl_new_default, ObTxLoopWorker::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); // ObTxLoopWorker MTL_BIND2(mtl_new_default, compaction::ObTenantCompactionProgressMgr::mtl_init, nullptr, nullptr, nullptr, mtl_destroy_default); MTL_BIND2(mtl_new_default, compaction::ObServerCompactionEventHistory::mtl_init, nullptr, nullptr, nullptr, mtl_destroy_default); + MTL_BIND2(mtl_new_default, storage::ObTenantTabletStatMgr::mtl_init, nullptr, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, storage::ObTenantSSTableMergeInfoMgr::mtl_init, nullptr, nullptr, nullptr, mtl_destroy_default); MTL_BIND2(mtl_new_default, memtable::ObLockWaitMgr::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); MTL_BIND2(mtl_new_default, logservice::ObGarbageCollector::mtl_init, mtl_start_default, mtl_stop_default, mtl_wait_default, mtl_destroy_default); diff --git a/src/observer/report/ob_tablet_table_updater.cpp b/src/observer/report/ob_tablet_table_updater.cpp index b11b43dc85..5854be25c4 100644 --- a/src/observer/report/ob_tablet_table_updater.cpp +++ b/src/observer/report/ob_tablet_table_updater.cpp @@ -20,6 +20,7 @@ #include "share/ob_tablet_replica_checksum_operator.h" // for ObTabletReplicaChecksumItem #include "lib/mysqlclient/ob_mysql_transaction.h" // ObMySQLTransaction #include "lib/mysqlclient/ob_mysql_proxy.h" +#include "share/ob_tablet_meta_table_compaction_operator.h" namespace oceanbase { @@ -341,7 +342,9 @@ int ObTabletTableUpdater::generate_tasks_( GCONF.self_addr_, 1/*snapshot_version*/, 1/*data_size*/, - 1/*required_size*/))) { + 1/*required_size*/, + 0/*report_scn*/, + ObTabletReplica::SCN_STATUS_IDLE))) { LOG_WARN("fail to init ObTabletReplica", KR(ret), KPC(task), "server", GCONF.self_addr_); } else if (OB_FAIL(remove_tablet_tasks.reserve(count))) { diff --git a/src/observer/virtual_table/ob_all_virtual_compaction_diagnose_info.cpp b/src/observer/virtual_table/ob_all_virtual_compaction_diagnose_info.cpp index 1c1d3ec6a6..a8455ec865 100644 --- a/src/observer/virtual_table/ob_all_virtual_compaction_diagnose_info.cpp +++ b/src/observer/virtual_table/ob_all_virtual_compaction_diagnose_info.cpp @@ -11,6 +11,7 @@ */ #include "ob_all_virtual_compaction_diagnose_info.h" +#include "storage/compaction/ob_compaction_util.h" namespace oceanbase { diff --git a/src/observer/virtual_table/ob_all_virtual_compaction_suggestion.cpp b/src/observer/virtual_table/ob_all_virtual_compaction_suggestion.cpp index fe23c4c831..2b990932c5 100644 --- a/src/observer/virtual_table/ob_all_virtual_compaction_suggestion.cpp +++ b/src/observer/virtual_table/ob_all_virtual_compaction_suggestion.cpp @@ -11,7 +11,7 @@ */ #include "ob_all_virtual_compaction_suggestion.h" - +#include "storage/compaction/ob_compaction_util.h" namespace oceanbase { using namespace storage; diff --git a/src/observer/virtual_table/ob_all_virtual_memstore_info.cpp b/src/observer/virtual_table/ob_all_virtual_memstore_info.cpp index 364fa1833b..3af1c1e3b6 100644 --- a/src/observer/virtual_table/ob_all_virtual_memstore_info.cpp +++ b/src/observer/virtual_table/ob_all_virtual_memstore_info.cpp @@ -50,6 +50,7 @@ void ObAllVirtualMemstoreInfo::reset() tables_handle_.reset(); memtable_array_pos_ = 0; memset(freeze_time_dist_, 0, OB_MAX_CHAR_LENGTH); + memset(compaction_info_buf_, 0, sizeof(compaction_info_buf_)); ObVirtualTableScannerIterator::reset(); } @@ -338,6 +339,42 @@ int ObAllVirtualMemstoreInfo::process_curr_tenant(ObNewRow *&row) cur_row_.cells_[i].set_varchar(freeze_time_dist_); cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); break; + case OB_APP_MIN_COLUMN_ID + 24: { + // compaction info list + cur_row_.cells_[i].set_varchar("-"); + if (mt->is_data_memtable()) { + if (mt->has_multi_source_data_unit(MultiSourceDataUnitType::MEDIUM_COMPACTION_INFO)) { + int64_t pos = 0; + compaction::ObMediumCompactionInfo medium_info; + ObMultiSourceData::ObIMultiSourceDataUnitList dst_list; + if (OB_SUCC(mt->get_multi_source_data_unit_list(&medium_info, dst_list, allocator_))) { + int i = 0; + DLIST_FOREACH_X(info, dst_list, OB_SUCC(ret)) { + common::databuff_printf( + compaction_info_buf_, + sizeof(compaction_info_buf_), + pos, + "medium%d_%ld,", + i++, + static_cast(info)->medium_snapshot_); + } + if (OB_SUCC(ret)) { + cur_row_.cells_[i].set_varchar(compaction_info_buf_); + } + } + + + DLIST_FOREACH_REMOVESAFE_NORET(info, dst_list) { + dst_list.remove(info); + info->~ObIMultiSourceDataUnit(); + allocator_->free(info); + } + COMMON_LOG(DEBUG, "medium_list", K(dst_list), K(cur_row_.cells_[i])); + } + } + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + } default: ret = OB_ERR_UNEXPECTED; SERVER_LOG(WARN, "invalid col_id", K(ret), K(col_id)); diff --git a/src/observer/virtual_table/ob_all_virtual_memstore_info.h b/src/observer/virtual_table/ob_all_virtual_memstore_info.h index a304d51245..2ad32b3be4 100644 --- a/src/observer/virtual_table/ob_all_virtual_memstore_info.h +++ b/src/observer/virtual_table/ob_all_virtual_memstore_info.h @@ -60,6 +60,7 @@ private: common::ObSEArray tables_handle_; int64_t memtable_array_pos_; char freeze_time_dist_[OB_MAX_CHAR_LENGTH]; + char compaction_info_buf_[common::OB_COMPACTION_INFO_LENGTH]; private: DISALLOW_COPY_AND_ASSIGN(ObAllVirtualMemstoreInfo); }; diff --git a/src/observer/virtual_table/ob_all_virtual_server_compaction_event_history.cpp b/src/observer/virtual_table/ob_all_virtual_server_compaction_event_history.cpp index 721491d052..475cd236f2 100644 --- a/src/observer/virtual_table/ob_all_virtual_server_compaction_event_history.cpp +++ b/src/observer/virtual_table/ob_all_virtual_server_compaction_event_history.cpp @@ -11,6 +11,7 @@ */ #include "ob_all_virtual_server_compaction_event_history.h" +#include "storage/compaction/ob_compaction_util.h" namespace oceanbase { diff --git a/src/observer/virtual_table/ob_all_virtual_tablet_compaction_info.cpp b/src/observer/virtual_table/ob_all_virtual_tablet_compaction_info.cpp new file mode 100644 index 0000000000..c392346076 --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_tablet_compaction_info.cpp @@ -0,0 +1,215 @@ +//Copyright (c) 2022 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. +#include "observer/virtual_table/ob_all_virtual_tablet_compaction_info.h" +#include "observer/ob_server.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "storage/meta_mem/ob_tenant_meta_mem_mgr.h" +#include "storage/compaction/ob_medium_compaction_mgr.h" + +using namespace oceanbase; +using namespace common; +using namespace memtable; +using namespace storage; +using namespace observer; + +ObAllVirtualTabletCompactionInfo::ObAllVirtualTabletCompactionInfo() + : ObVirtualTableScannerIterator(), + ObMultiTenantOperator(), + addr_(), + tablet_iter_(nullptr), + tablet_handle_(), + ls_id_(share::ObLSID::INVALID_LS_ID), + iter_buf_(nullptr), + medium_info_buf_() +{ +} + +ObAllVirtualTabletCompactionInfo::~ObAllVirtualTabletCompactionInfo() +{ + reset(); +} + +void ObAllVirtualTabletCompactionInfo::reset() +{ + omt::ObMultiTenantOperator::reset(); + addr_.reset(); + ls_id_ = share::ObLSID::INVALID_LS_ID; + + if (OB_NOT_NULL(tablet_iter_)) { + tablet_iter_->~ObTenantTabletIterator(); + tablet_iter_ = nullptr; + } + if (OB_NOT_NULL(iter_buf_)) { + allocator_->free(iter_buf_); + iter_buf_ = nullptr; + } + tablet_handle_.reset(); + ObVirtualTableScannerIterator::reset(); +} + +int ObAllVirtualTabletCompactionInfo::init( + common::ObIAllocator *allocator, + common::ObAddr &addr) +{ + int ret = OB_SUCCESS; + if (start_to_read_) { + ret = OB_INIT_TWICE; + SERVER_LOG(WARN, "cannot init twice", K(ret)); + } else if (OB_ISNULL(allocator)) { + ret = OB_INVALID_ARGUMENT; + SERVER_LOG(WARN, "invalid argument", K(ret), KP(allocator)); + } else if (OB_ISNULL(iter_buf_ = allocator->alloc(sizeof(ObTenantTabletIterator)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + SERVER_LOG(WARN, "fail to alloc tablet iter buf", K(ret)); + } else { + allocator_ = allocator; + addr_ = addr; + start_to_read_ = true; + } + return ret; +} + +int ObAllVirtualTabletCompactionInfo::inner_get_next_row(ObNewRow *&row) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(execute(row))) { + SERVER_LOG(WARN, "fail to execute", K(ret)); + } + return ret; +} + +void ObAllVirtualTabletCompactionInfo::release_last_tenant() +{ + if (OB_NOT_NULL(tablet_iter_)) { + tablet_iter_->~ObTenantTabletIterator(); + tablet_iter_ = nullptr; + } +} + +bool ObAllVirtualTabletCompactionInfo::is_need_process(uint64_t tenant_id) +{ + if (!is_virtual_tenant_id(tenant_id) && + (is_sys_tenant(effective_tenant_id_) || tenant_id == effective_tenant_id_)){ + return true; + } + return false; +} + +int ObAllVirtualTabletCompactionInfo::get_next_tablet() +{ + int ret = OB_SUCCESS; + + if (nullptr == tablet_iter_) { + ObTenantMetaMemMgr *t3m = MTL(ObTenantMetaMemMgr*); + tablet_iter_ = new (iter_buf_) ObTenantTabletIterator(*t3m); + if (OB_ISNULL(tablet_iter_)) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "fail to new tablet_iter_", K(ret)); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(tablet_iter_->get_next_tablet(tablet_handle_))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + SERVER_LOG(WARN, "fail to get tablet iter", K(ret)); + } + } else if (OB_UNLIKELY(!tablet_handle_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "unexpected invalid tablet", K(ret), K(tablet_handle_)); + } else { + ls_id_ = tablet_handle_.get_obj()->get_tablet_meta().ls_id_.id(); + } + + return ret; +} + +int ObAllVirtualTabletCompactionInfo::process_curr_tenant(common::ObNewRow *&row) +{ + // each get_next_row will switch to required tenant, and released guard later + int ret = OB_SUCCESS; + ObTablet *tablet = nullptr; + ObITable *table = nullptr; + if (OB_UNLIKELY(!start_to_read_)) { + ret = OB_NOT_INIT; + SERVER_LOG(WARN, "not inited", K(start_to_read_), K(ret)); + } else if (NULL == cur_row_.cells_) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(ERROR, "cur row cell is NULL", K(ret)); + } else if (OB_FAIL(get_next_tablet())) { + if (OB_ITER_END != ret) { + SERVER_LOG(WARN, "get_next_table failed", K(ret)); + } + } else if (OB_ISNULL(tablet = tablet_handle_.get_obj())) { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "tablet is null", K(ret), K(tablet_handle_)); + } else { + const compaction::ObMediumCompactionInfoList &medium_info_list = tablet->get_medium_compaction_info_list(); + const int64_t col_count = output_column_ids_.count(); + int64_t max_sync_medium_scn = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < col_count; ++i) { + uint64_t col_id = output_column_ids_.at(i); + switch (col_id) { + case SVR_IP: + if (addr_.ip_to_string(ip_buf_, sizeof(ip_buf_))) { + cur_row_.cells_[i].set_varchar(ip_buf_); + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + } else { + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "fail to execute ip_to_string", K(ret)); + } + break; + case SVR_PORT: + cur_row_.cells_[i].set_int(addr_.get_port()); + break; + case TENANT_ID: + cur_row_.cells_[i].set_int(MTL_ID()); + break; + case LS_ID: + cur_row_.cells_[i].set_int(ls_id_); + break; + case TABLET_ID: + cur_row_.cells_[i].set_int(tablet->get_tablet_meta().tablet_id_.id()); + break; + case FINISH_SCN: + table = tablet->get_table_store().get_major_sstables().get_boundary_table(true/*last*/); + cur_row_.cells_[i].set_int(nullptr == table ? 0 : table->get_snapshot_version()); + break; + case WAIT_CHECK_SCN: + cur_row_.cells_[i].set_int(medium_info_list.get_wait_check_medium_scn()); + break; + case MAX_RECEIVED_SCN: + if (OB_SUCCESS == tablet->get_max_sync_medium_scn(max_sync_medium_scn)) { + cur_row_.cells_[i].set_int(max_sync_medium_scn); + } else { + cur_row_.cells_[i].set_int(-1); + } + break; + case SERIALIZE_SCN_LIST: + if (medium_info_list.size() > 0) { + int64_t pos = 0; + medium_info_list.gene_info(medium_info_buf_, OB_MAX_VARCHAR_LENGTH, pos); + cur_row_.cells_[i].set_varchar(medium_info_buf_); + SERVER_LOG(DEBUG, "get medium info mgr", K(medium_info_list), K(medium_info_buf_)); + } else { + cur_row_.cells_[i].set_varchar(""); + } + cur_row_.cells_[i].set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + break; + default: + ret = OB_ERR_UNEXPECTED; + SERVER_LOG(WARN, "invalid col_id", K(ret), K(col_id)); + break; + } + } + } + if (OB_SUCC(ret)) { + row = &cur_row_; + } + return ret; +} diff --git a/src/observer/virtual_table/ob_all_virtual_tablet_compaction_info.h b/src/observer/virtual_table/ob_all_virtual_tablet_compaction_info.h new file mode 100644 index 0000000000..c1291f6cb6 --- /dev/null +++ b/src/observer/virtual_table/ob_all_virtual_tablet_compaction_info.h @@ -0,0 +1,72 @@ +//Copyright (c) 2022 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef SRC_OBSERVER_VIRTUAL_TABLE_OB_ALL_VIRTUAL_TABLET_MEDIUM_COMPACTION_INFO_H_ +#define SRC_OBSERVER_VIRTUAL_TABLE_OB_ALL_VIRTUAL_TABLET_MEDIUM_COMPACTION_INFO_H_ + +#include "common/row/ob_row.h" +#include "lib/guard/ob_shared_guard.h" +#include "observer/omt/ob_multi_tenant.h" +#include "share/ob_scanner.h" +#include "share/ob_virtual_table_scanner_iterator.h" +#include "share/rc/ob_tenant_base.h" +#include "observer/omt/ob_multi_tenant_operator.h" +#include "storage/meta_mem/ob_tablet_handle.h" + +namespace oceanbase +{ +namespace storage +{ +class ObTenantTabletIterator; +} +namespace observer +{ +class ObAllVirtualTabletCompactionInfo : public common::ObVirtualTableScannerIterator, + public omt::ObMultiTenantOperator +{ + enum COLUMN_ID_LIST + { + SVR_IP = common::OB_APP_MIN_COLUMN_ID, + SVR_PORT, + TENANT_ID, + LS_ID, + TABLET_ID, + FINISH_SCN, + WAIT_CHECK_SCN, + MAX_RECEIVED_SCN, + SERIALIZE_SCN_LIST, + }; +public: + ObAllVirtualTabletCompactionInfo(); + virtual ~ObAllVirtualTabletCompactionInfo(); + int init(common::ObIAllocator *allocator, common::ObAddr &addr); +public: + virtual int inner_get_next_row(common::ObNewRow *&row); + virtual void reset(); +private: + int get_next_tablet(); + virtual bool is_need_process(uint64_t tenant_id) override; + virtual int process_curr_tenant(common::ObNewRow *&row) override; + virtual void release_last_tenant() override; +private: + common::ObAddr addr_; + storage::ObTenantTabletIterator *tablet_iter_; + ObTabletHandle tablet_handle_; + int64_t ls_id_; + char ip_buf_[common::OB_IP_STR_BUFF]; + void *iter_buf_; + char medium_info_buf_[common::OB_MAX_VARCHAR_LENGTH]; +private: + DISALLOW_COPY_AND_ASSIGN(ObAllVirtualTabletCompactionInfo); +}; + +} +} +#endif /* SRC_OBSERVER_VIRTUAL_TABLE_OB_ALL_VIRTUAL_TABLE_MGR_H_ */ diff --git a/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp b/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp index 5e7ba3409e..e7a591c4e6 100644 --- a/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp +++ b/src/observer/virtual_table/ob_virtual_table_iterator_factory.cpp @@ -145,6 +145,7 @@ #include "observer/virtual_table/ob_all_virtual_server_compaction_event_history.h" #include "observer/virtual_table/ob_all_virtual_tablet_compaction_progress.h" #include "observer/virtual_table/ob_all_virtual_tablet_compaction_history.h" +#include "observer/virtual_table/ob_all_virtual_tablet_compaction_info.h" #include "observer/virtual_table/ob_all_virtual_tablet_ddl_kv_info.h" #include "observer/virtual_table/ob_all_virtual_tablet_pointer_status.h" #include "observer/virtual_table/ob_all_virtual_storage_meta_memory_status.h" @@ -2137,6 +2138,17 @@ int ObVTIterCreator::create_vt_iter(ObVTableScanParam ¶ms, } break; } + case OB_ALL_VIRTUAL_TABLET_COMPACTION_INFO_TID: { + ObAllVirtualTabletCompactionInfo *info_mgr = NULL; + if (OB_SUCC(NEW_VIRTUAL_TABLE(ObAllVirtualTabletCompactionInfo, info_mgr))) { + if (OB_FAIL(info_mgr->init(&allocator, addr_))) { + SERVER_LOG(WARN, "fail to init ObAllVirtualTabletCompactionInfo", K(ret)); + } else { + vt_iter = static_cast(info_mgr); + } + } + break; + } case OB_ALL_VIRTUAL_TABLET_ENCRYPT_INFO_TID: { ObAllVirtualTabletEncryptInfo *partition_encrypt_info = NULL; if (OB_SUCC(NEW_VIRTUAL_TABLE(ObAllVirtualTabletEncryptInfo, partition_encrypt_info))) { diff --git a/src/rootserver/freeze/ob_checksum_validator.cpp b/src/rootserver/freeze/ob_checksum_validator.cpp index a58de124f7..73eec9bfdc 100644 --- a/src/rootserver/freeze/ob_checksum_validator.cpp +++ b/src/rootserver/freeze/ob_checksum_validator.cpp @@ -20,6 +20,7 @@ #include "lib/mysqlclient/ob_isql_client.h" #include "share/backup/ob_backup_manager.h" #include "share/ob_tablet_replica_checksum_operator.h" +#include "share/ob_tablet_meta_table_compaction_operator.h" namespace oceanbase { @@ -29,103 +30,115 @@ using namespace oceanbase::common; using namespace oceanbase::share; using namespace oceanbase::share::schema; -int ObChecksumValidatorBase::init( +int ObMergeErrorCallback::init( const uint64_t tenant_id, - ObMySQLProxy *sql_proxy) + ObZoneMergeManager &zone_merge_mgr) { int ret = OB_SUCCESS; if (IS_INIT) { ret = OB_INIT_TWICE; - LOG_WARN("init twice", KR(ret), K(tenant_id)); - } else if (OB_ISNULL(sql_proxy) || (OB_INVALID_TENANT_ID == tenant_id)) { + LOG_WARN("init twice", KR(ret)); + } else if (tenant_id == OB_INVALID_TENANT_ID) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(tenant_id)); } else { - sql_proxy_ = sql_proxy; + zone_merge_mgr_ = &zone_merge_mgr; tenant_id_ = tenant_id; is_inited_ = true; } return ret; } -int ObChecksumValidatorBase::check(const ObSimpleFrozenStatus &frozen_status) +int ObMergeErrorCallback::handle_merge_error( + const int64_t error_type, + const int64_t expected_epoch) { int ret = OB_SUCCESS; - if (!frozen_status.is_valid()) { + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret), K_(tenant_id)); + } else { + if (OB_FAIL(zone_merge_mgr_->set_merge_error(error_type, expected_epoch))) { + LOG_WARN("fail to set merge error", KR(ret), K_(tenant_id), K(error_type), K(expected_epoch)); + } + } + return ret; +} + +/////////////////////////////////////////////////////////////////////////////// + +int ObChecksumValidatorBase::init( + const uint64_t tenant_id, + ObMySQLProxy &sql_proxy, + ObZoneMergeManager &zone_merge_mgr) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", KR(ret), K(tenant_id)); + } else if (OB_INVALID_TENANT_ID == tenant_id) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), K_(tenant_id), K(frozen_status)); + LOG_WARN("invalid argument", KR(ret), K(tenant_id)); + } else if (OB_FAIL(merge_err_cb_.init(tenant_id, zone_merge_mgr))) { + LOG_WARN("fail to init merge error callback", KR(ret), K(tenant_id)); + } else { + sql_proxy_ = &sql_proxy; + zone_merge_mgr_ = &zone_merge_mgr; + tenant_id_ = tenant_id; + is_inited_ = true; + } + return ret; +} + +bool ObChecksumValidatorBase::is_primary_cluster() const +{ + bool is_primary_cluster = true; + if (PRIMARY_CLUSTER != ObClusterInfoGetter::get_cluster_role_v2()) { + is_primary_cluster = false; + } + return is_primary_cluster; +} + +bool ObChecksumValidatorBase::is_standby_cluster() const +{ + bool is_standby_cluster = true; + if (STANDBY_CLUSTER != ObClusterInfoGetter::get_cluster_role_v2()) { + is_standby_cluster = false; + } + return is_standby_cluster; +} + +/////////////////////////////////////////////////////////////////////////////// + +bool ObCrossClusterTableteChecksumValidator::need_validate() const +{ + bool need_validate = false; + if (is_inited_ && is_standby_cluster()) { + need_validate = true; + } + return need_validate; +} + +int ObCrossClusterTableteChecksumValidator::validate_checksum(const SCN &frozen_scn) +{ + int ret = OB_SUCCESS; + if (!frozen_scn.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K_(tenant_id), K(frozen_scn)); } else if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("not init", KR(ret), K_(tenant_id)); - } else if (need_check_) { - if (OB_FAIL(do_check(frozen_status))) { - LOG_WARN("fail to do check", KR(ret), K_(tenant_id), K(frozen_status)); - } + } else if (!is_standby_cluster()) { + ret = OB_INNER_STAT_ERROR; + LOG_WARN("can only check cross cluster checksum in standby cluster", KR(ret)); + } else if (OB_FAIL(check_cross_cluster_checksum(frozen_scn))) { + LOG_WARN("fail to check cross cluster checksum", KR(ret), K_(tenant_id), K(frozen_scn)); } return ret; } -/////////////////////////////////////////////////////////////////////////////// - -int ObTabletChecksumValidator::do_check(const ObSimpleFrozenStatus &frozen_status) -{ - int ret = OB_SUCCESS; - int check_ret = OB_SUCCESS; - - int64_t check_cnt= 0; - HEAP_VAR(ObTabletReplicaChecksumIterator, tablet_replica_checksum_iter) { - if (OB_FAIL(tablet_replica_checksum_iter.init(tenant_id_, sql_proxy_))) { - LOG_WARN("fail to init tablet_replica_checksum iter", KR(ret), K_(tenant_id)); - } else { - tablet_replica_checksum_iter.set_compaction_scn(frozen_status.frozen_scn_); - - ObTabletReplicaChecksumItem prev_item; - ObTabletReplicaChecksumItem curr_item; - while (OB_SUCC(ret)) { - curr_item.reset(); - if (OB_FAIL(tablet_replica_checksum_iter.next(curr_item))) { - if (OB_ITER_END != ret) { - LOG_WARN("fail to iter next tablet replica checksum item", KR(ret), K_(tenant_id)); - } - } else { - if (prev_item.is_key_valid()) { - if (curr_item.is_same_tablet(prev_item)) { // same tablet - ++check_cnt; - if (OB_FAIL(curr_item.verify_checksum(prev_item))) { - if (OB_CHECKSUM_ERROR == ret) { - LOG_ERROR("ERROR! ERROR! ERROR! checksum error in tablet replica checksum", KR(ret), - K(curr_item), K(prev_item)); - check_ret = ret; - ret = OB_SUCCESS; // continue checking next checksum - } else { - LOG_WARN("unexpected error in tablet replica checksum", KR(ret), K(curr_item), K(prev_item)); - } - } - } else { // next tablet - prev_item = curr_item; - } - } else { - prev_item = curr_item; - } - } - } - } - } - - if (OB_ITER_END == ret) { - ret = OB_SUCCESS; - } - if (OB_CHECKSUM_ERROR == check_ret) { - ret = OB_CHECKSUM_ERROR; - } - LOG_INFO("finish verifying tablet checksum", KR(ret), KR(check_ret), K_(tenant_id), - K(frozen_status), K(check_cnt)); - return ret; -} - -/////////////////////////////////////////////////////////////////////////////// - -int ObCrossClusterTableteChecksumValidator::do_check(const ObSimpleFrozenStatus &frozen_status) +int ObCrossClusterTableteChecksumValidator::check_cross_cluster_checksum( + const SCN &frozen_scn) { int ret = OB_SUCCESS; int check_ret = OB_SUCCESS; @@ -138,8 +151,8 @@ int ObCrossClusterTableteChecksumValidator::do_check(const ObSimpleFrozenStatus } else if (OB_FAIL(tablet_checksum_iter.init(tenant_id_, sql_proxy_))) { LOG_WARN("fail to init tablet checksum iterator", KR(ret), K_(tenant_id)); } else { - tablet_checksum_iter.set_compaction_scn(frozen_status.frozen_scn_); - tablet_replica_checksum_iter.set_compaction_scn(frozen_status.frozen_scn_); + tablet_checksum_iter.set_compaction_scn(frozen_scn); + tablet_replica_checksum_iter.set_compaction_scn(frozen_scn); int cmp_ret = 0; ObTabletChecksumItem tablet_checksum_item; @@ -190,7 +203,7 @@ int ObCrossClusterTableteChecksumValidator::do_check(const ObSimpleFrozenStatus ret = OB_CHECKSUM_ERROR; } LOG_INFO("finish verifying cross-cluster checksum", KR(ret), KR(check_ret), K_(tenant_id), - K(frozen_status), K(check_cnt)); + K(frozen_scn), K(check_cnt)); return ret; } @@ -278,76 +291,418 @@ bool ObCrossClusterTableteChecksumValidator::is_first_tablet_in_sys_ls(const ObT /////////////////////////////////////////////////////////////////////////////// -int ObIndexChecksumValidator::do_check(const ObSimpleFrozenStatus &frozen_status) +bool ObIndexChecksumValidator::need_validate() const +{ + bool need_validate = false; + if (is_inited_ && is_primary_cluster()) { + need_validate = true; + } + return need_validate; +} + +int ObIndexChecksumValidator::validate_checksum( + const SCN &frozen_scn, + const hash::ObHashMap &tablet_compaction_map, + int64_t &table_count, + hash::ObHashMap &table_compaction_map) +{ + int ret = OB_SUCCESS; + if ((!frozen_scn.is_valid()) || (tablet_compaction_map.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K_(tenant_id), K(frozen_scn)); + } else if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not init", KR(ret), K_(tenant_id)); + } else if (!is_primary_cluster()) { + ret = OB_INNER_STAT_ERROR; + LOG_WARN("can only check index column checksum in primary cluster", KR(ret)); + } else if (OB_FAIL(check_all_table_verification_finished(frozen_scn, tablet_compaction_map, table_count, + table_compaction_map))) { + LOG_WARN("fail to check all table verification finished", KR(ret), K_(tenant_id), K(frozen_scn)); + } + return ret; +} + +int ObIndexChecksumValidator::check_all_table_verification_finished( + const SCN &frozen_scn, + const hash::ObHashMap &tablet_compaction_map, + int64_t &table_count, + hash::ObHashMap &table_compaction_map) { int ret = OB_SUCCESS; int check_ret = OB_SUCCESS; - int64_t check_cnt = 0; + table_count = 0; ObSchemaGetterGuard schema_guard; - ObArray table_schemas; + SMART_VARS_2((ObArray, table_schemas), + (ObArray, table_ids)) { + if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_full_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("fail to get tenant schema guard", KR(ret), K_(tenant_id)); + } else if (OB_FAIL(schema_guard.get_table_schemas_in_tenant(tenant_id_, table_schemas))) { + LOG_WARN("fail to get tenant table schemas", KR(ret), K_(tenant_id)); + } else { + table_count = table_schemas.count(); + for (int64_t i = 0; (i < table_schemas.count()) && OB_SUCC(ret); ++i) { + const ObSimpleTableSchemaV2 *simple_schema = table_schemas.at(i); + if (OB_ISNULL(simple_schema)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected error, simple schema is null", KR(ret), K_(tenant_id)); + } else { + const uint64_t table_id = simple_schema->get_table_id(); + const ObTableSchema *table_schema = nullptr; + ObTableCompactionInfo cur_compaction_info; - if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_full_schema_guard(tenant_id_, schema_guard))) { - LOG_WARN("fail to get tenant schema guard", KR(ret), K_(tenant_id)); - } else if (OB_FAIL(schema_guard.get_table_schemas_in_tenant(tenant_id_, table_schemas))) { - LOG_WARN("fail to get tenant table schemas", KR(ret), K_(tenant_id)); + if (OB_FAIL(table_ids.push_back(table_id))) { + LOG_WARN("fail to push back", KR(ret), K(table_id)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, table_id, table_schema))) { + LOG_WARN("fail to get table schema", KR(ret), K_(tenant_id), K(table_id)); + } else if (OB_ISNULL(table_schema)) { + } else if (OB_FAIL(check_table_compaction_finished(*table_schema, frozen_scn, tablet_compaction_map, + table_compaction_map, cur_compaction_info))) { + LOG_WARN("fail to check table compaction finished", KR(ret), K(frozen_scn), KPC(table_schema)); + } else if (cur_compaction_info.is_verified()) { // already finished verification, skip it! + } else if (is_index_table(*simple_schema)) { // for index table, may need to check column checksum + const uint64_t data_table_id = simple_schema->get_data_table_id(); + const ObTableSchema *data_table_schema = nullptr; + if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, data_table_id, data_table_schema))) { + LOG_WARN("fail to get table schema", KR(ret), K_(tenant_id), K(data_table_id)); + } else if (OB_ISNULL(data_table_schema)) { + ret = OB_TABLE_NOT_EXIST; + LOG_WARN("fail to get data table schema", KR(ret), K_(tenant_id), K(table_id), K(data_table_id)); + } else { + ObTableCompactionInfo data_compaction_info; + + if (OB_FAIL(check_table_compaction_finished(*data_table_schema, frozen_scn, tablet_compaction_map, + table_compaction_map, data_compaction_info))) { + LOG_WARN("fail to check table compaction finished", KR(ret), K(frozen_scn), KPC(data_table_schema)); + } else if (data_compaction_info.is_verified()) { + ret = OB_ERR_UNEXPECTED; + // NOTICE: if a data table has a virtual index table and a valid index table, it may lead to this error. + LOG_WARN("not allow that data table is verified, while index table is not verified", KR(ret), K(table_id), + K(data_table_id), K(cur_compaction_info), K(data_compaction_info)); + } else if (table_schema->has_tablet()) { + data_compaction_info.is_valid_data_table_ = true; + if (!cur_compaction_info.finish_compaction() || !data_compaction_info.finish_compaction()) { + // data table or index table does not finish compaction. + data_compaction_info.all_index_verified_ = false; + } else if (cur_compaction_info.is_compacted() && data_compaction_info.is_compacted()) { + #ifdef ERRSIM + ret = E(EventTable::EN_MEDIUM_VERIFY_GROUP_SKIP_SET_VERIFY) OB_SUCCESS; + if (OB_FAIL(ret)) { + if (!is_inner_table(table_id)) { + ret = OB_EAGAIN; + STORAGE_LOG(INFO, "ERRSIM EN_MEDIUM_VERIFY_GROUP_SKIP_SET_VERIFY failed", K(ret)); + } else { + ret = OB_SUCCESS; + } + } + #endif + // both tables' all tablets finished compaction, we should validate column checksum. + if (OB_FAIL(ObTabletReplicaChecksumOperator::check_column_checksum(tenant_id_, + *data_table_schema, *table_schema, frozen_scn, *sql_proxy_))) { + if (OB_CHECKSUM_ERROR == ret) { + LOG_ERROR("ERROR! ERROR! ERROR! checksum error in index checksum", KR(ret), K(*data_table_schema), + K_(tenant_id), K(frozen_scn), K(*table_schema)); + } else { + LOG_WARN("fail to check index column checksum", KR(ret), K_(tenant_id), K(*data_table_schema), + K(*table_schema)); + } + // after index checksum verification, we should execute tablet_replica checksum verification on the + // index table, then mark it as VERIFIED + } else if (OB_FAIL(handle_table_compaction_finished(table_schema, frozen_scn, table_compaction_map))) { + LOG_WARN("fail to handle table compaction finished", KR(ret), K(table_id), K(frozen_scn)); + } + } else if (cur_compaction_info.can_skip_verifying() || data_compaction_info.can_skip_verifying()) { + // if one of them can skip verifying, that means we don't need to execute index checksum verification. + // Mark index table as VERIFIED directly. + if (OB_FAIL(handle_table_compaction_finished(table_schema, frozen_scn, table_compaction_map))) { + LOG_WARN("fail to handle index table compaction finished", KR(ret), K(table_id), K(frozen_scn)); + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(table_compaction_map.set_refactored(data_table_id, data_compaction_info, true/*overwrite*/))) { + LOG_WARN("fail to set refactored", KR(ret), K(data_table_id), K(data_compaction_info)); + } + } + } else { // virtual index table has no tablet, not need to index checksum verification. + if (cur_compaction_info.finish_compaction() && data_compaction_info.finish_compaction()) { + if (OB_FAIL(handle_table_compaction_finished(table_schema, frozen_scn, table_compaction_map))) { + LOG_WARN("fail to handle index table compaction finished", KR(ret), K(table_id), K(frozen_scn)); + } else if (OB_FAIL(handle_table_compaction_finished(data_table_schema, frozen_scn, table_compaction_map))) { + LOG_WARN("fail to handle data table compaction finished", KR(ret), K(data_table_id), K(frozen_scn)); + } + } + } + } + } else { + if (table_schema->get_index_tid_count() < 1) { // handle data table, meanwhile not have relative index table + if (cur_compaction_info.finish_compaction()) { + if OB_FAIL(handle_table_compaction_finished(table_schema, frozen_scn, table_compaction_map)) { + LOG_WARN("fail to handle table compaction finished", KR(ret), K(table_id), K(frozen_scn)); + } + } + } + } + + if (OB_CHECKSUM_ERROR == ret) { + check_ret = ret; + } + ret = OB_SUCCESS; // ignore ret, and continue check next table_schema + } + } // end for loop + + // for valid data table, if its all index table finished verification, we can handle it here + // and make it as verified + // + // and we also need to do that, compare 'table_compaction_map' with 'table_ids', remove those + // whose table_id not exists in 'table_ids' from 'table_compaction_map' + if (OB_SUCC(ret) && (OB_SUCCESS == check_ret)) { + ObArray removed_table_ids; // record the table_id which will be removed + hash::ObHashMap::iterator iter = table_compaction_map.begin(); + for (;OB_SUCC(ret) && (iter != table_compaction_map.end()); ++iter) { + const uint64_t cur_table_id = iter->first; + if (exist_in_table_array(cur_table_id, table_ids)) { + const ObTableCompactionInfo &compaction_info = iter->second; + if (compaction_info.is_valid_data_table_) { + if (OB_FAIL(update_data_table_verified(iter->first, compaction_info, frozen_scn, table_compaction_map))) { + if (OB_CHECKSUM_ERROR == ret) { + check_ret = OB_CHECKSUM_ERROR; + } + LOG_WARN("fail to update data table to verified status", KR(ret), "data_table_id", iter->first, + K(frozen_scn), K(compaction_info)); + } + } + } else if (OB_FAIL(removed_table_ids.push_back(cur_table_id))) { + LOG_WARN("fail to push back", KR(ret), K(cur_table_id)); + } + } /*end for iter*/ + for (int64_t i = 0; (OB_SUCC(ret) && (i < removed_table_ids.count())); ++i) { + const uint64_t table_id = removed_table_ids.at(i); + if (OB_FAIL(table_compaction_map.erase_refactored(table_id))) { + LOG_WARN("fail to erase refactored", KR(ret), K(i), K(table_id)); + } + } + } + } + } + + if (check_ret == OB_CHECKSUM_ERROR) { + ret = check_ret; + } + + return ret; +} + +int ObIndexChecksumValidator::update_data_table_verified( + const int64_t data_table_id, + const ObTableCompactionInfo &data_table_compaction, + const SCN &frozen_scn, + hash::ObHashMap &table_compaction_map) +{ + int ret = OB_SUCCESS; + if (!data_table_compaction.is_valid_data_table_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(data_table_compaction)); } else { - for (int64_t i = 0; (i < table_schemas.count()) && OB_SUCC(ret); ++i) { - const ObSimpleTableSchemaV2 *simple_schema = table_schemas.at(i); - - if (OB_ISNULL(simple_schema)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected error, simple schema is null", KR(ret), K_(tenant_id)); - } else if (simple_schema->is_index_table() - && simple_schema->can_read_index() - // virtual index table has no tablet - && simple_schema->has_tablet()) { + if (data_table_compaction.all_index_verified_) { + if (data_table_compaction.finish_compaction()) { + ObSchemaGetterGuard schema_guard; const ObTableSchema *data_table_schema = nullptr; - const ObTableSchema *index_table_schema = nullptr; - const uint64_t index_table_id = simple_schema->get_table_id(); - const uint64_t data_table_id = simple_schema->get_data_table_id(); - if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, index_table_id, index_table_schema))) { - LOG_WARN("fail to get table schema", KR(ret), K(tenant_id_), K(index_table_id)); - } else if (OB_ISNULL(index_table_schema)) { - // index table is deleted, do nothing + if (OB_FAIL(ObMultiVersionSchemaService::get_instance().get_tenant_full_schema_guard(tenant_id_, schema_guard))) { + LOG_WARN("fail to get tenant schema guard", KR(ret), K_(tenant_id)); } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id_, data_table_id, data_table_schema))) { - LOG_WARN("fail to get table schema", KR(ret), K(tenant_id_), K(data_table_id)); + LOG_WARN("fail to get table schema", KR(ret), K_(tenant_id), K(data_table_id)); } else if (OB_ISNULL(data_table_schema)) { ret = OB_TABLE_NOT_EXIST; LOG_WARN("fail to get data table schema", KR(ret), K_(tenant_id), K(data_table_id)); - } else { - ++check_cnt; + } else if (OB_FAIL(handle_table_compaction_finished(data_table_schema, frozen_scn, table_compaction_map))) { + LOG_WARN("fail to handle table compaction finished", KR(ret), K(data_table_id), K(frozen_scn)); } - - if (FAILEDx(ObTabletReplicaChecksumOperator::check_column_checksum(tenant_id_, *data_table_schema, - *index_table_schema, frozen_status.frozen_scn_, *sql_proxy_))) { - if (OB_CHECKSUM_ERROR == ret) { - LOG_ERROR("ERROR! ERROR! ERROR! checksum error in index checksum", KR(ret), K_(tenant_id), - K(frozen_status), K(*data_table_schema), K(*index_table_schema)); - check_ret = OB_CHECKSUM_ERROR; - ret = OB_SUCCESS; // continue checking next checksum - } else if (OB_EAGAIN != ret) { - LOG_WARN("fail to check index column checksum", KR(ret), K_(tenant_id), K(*data_table_schema), - K(*index_table_schema)); - } else { - if (REACH_TIME_INTERVAL(10 * 1000 * 1000)) { - LOG_WARN("fail to check index column checksum", KR(ret), K_(tenant_id), K(*data_table_schema), - K(*index_table_schema)); - } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("data table must finish compaction when its all index table finished verifying", KR(ret), K(data_table_id), + K(frozen_scn), K(data_table_compaction)); + } + } else { + ObTableCompactionInfo new_compaction_info = data_table_compaction; + //why mark it as true: cuz we can re-scan all table, to check this data table's 'all_index_verified_' again. + new_compaction_info.all_index_verified_ = true; + if (OB_FAIL(table_compaction_map.set_refactored(data_table_id, new_compaction_info, true/*overwrite*/))) { + LOG_WARN("fail to set refactored", KR(ret), K(data_table_id), K(new_compaction_info)); + } + } + } + return ret; +} + +// If one table finished compaction (and finished index checksum verification if needed), we can handle it, +// and then mark it as VERIFIED. +int ObIndexChecksumValidator::handle_table_compaction_finished( + const ObTableSchema *table_schema, + const SCN &frozen_scn, + hash::ObHashMap &table_compaction_map) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(table_schema)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret)); + } else { + const uint64_t table_id = table_schema->get_table_id(); + ObTableCompactionInfo cur_compaction_info; + if (OB_FAIL(table_compaction_map.get_refactored(table_id, cur_compaction_info))) { + LOG_WARN("fail to get refactored", KR(ret), K(table_id)); + } else if (cur_compaction_info.is_verified()) { // skip if finished verification + } else if (!cur_compaction_info.finish_compaction()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("table must finish compaction when arriving here", KR(ret), K(table_id), K(cur_compaction_info)); + } else { + if (table_schema->has_tablet()) { + SMART_VAR(ObArray, pairs) { + if (OB_FAIL(ObTabletReplicaChecksumOperator::get_tablet_ls_pairs(tenant_id_, *table_schema, *sql_proxy_, pairs))) { + LOG_WARN("fail to get tablet_ls pairs", KR(ret), K_(tenant_id), K(table_id)); + } else if (pairs.count() < 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get tablet_ls pairs of current table schema", KR(ret), K_(tenant_id), K(table_id)); + } else if (OB_FAIL(ObTabletMetaTableCompactionOperator::batch_update_report_scn( + tenant_id_, frozen_scn.get_val_for_tx(), + pairs, ObTabletReplica::ScnStatus::SCN_STATUS_ERROR))) { + LOG_WARN("fail to batch update report_scn", KR(ret), K_(tenant_id), K(pairs)); } } - ret = OB_SUCCESS; // ignore ret, and continue check next table_schema } - } // end for loop + + if (OB_SUCC(ret)) { + cur_compaction_info.set_verified(); + if (OB_FAIL(table_compaction_map.set_refactored(table_id, cur_compaction_info, true/*overwrite*/))) { + LOG_WARN("fail to set refactored", KR(ret), K(table_id), K(cur_compaction_info)); + } + } + } + } + return ret; +} + +// check all tablets of this table finished compaction or not. +// +// we need to notice that, when one table finished compaction, we need to execute tablet_replica +// checksum verification if this table has tablet. +int ObIndexChecksumValidator::check_table_compaction_finished( + const ObTableSchema &table_schema, + const SCN &frozen_scn, + const hash::ObHashMap &tablet_compaction_map, + hash::ObHashMap &table_compaction_map, + ObTableCompactionInfo &latest_compaction_info) +{ + int ret = OB_SUCCESS; + uint64_t table_id = UINT64_MAX; + latest_compaction_info.reset(); + if (!table_schema.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(table_schema)); + } else if (FALSE_IT(table_id = table_schema.get_table_id())) { + } else if (OB_FAIL(table_compaction_map.get_refactored(table_id, latest_compaction_info))) { + if (OB_HASH_NOT_EXIST == ret) { // first initial + ret = OB_SUCCESS; + latest_compaction_info.table_id_ = table_id; + if (OB_FAIL(table_compaction_map.set_refactored(table_id, latest_compaction_info))) { + LOG_WARN("fail to set refactored", KR(ret), K(table_id), K(latest_compaction_info)); + } + } else { + LOG_WARN("fail to get val from hashmap", KR(ret), K(table_id)); + } } - if (OB_CHECKSUM_ERROR == check_ret) { - ret = OB_CHECKSUM_ERROR; + if (OB_SUCC(ret) && !latest_compaction_info.is_compacted()) { + SMART_VAR(ObArray, tablet_ids) { + SMART_VAR(ObArray, pairs) { + if (table_schema.has_tablet()) { + if (OB_FAIL(table_schema.get_tablet_ids(tablet_ids))) { + LOG_WARN("fail to get tablet_ids from table schema", KR(ret), K(table_schema)); + } else if (OB_FAIL(ObTabletReplicaChecksumOperator::get_tablet_ls_pairs(tenant_id_, table_id, + *sql_proxy_, tablet_ids, pairs))) { + LOG_WARN("fail to get tablet_ls pairs", KR(ret), K_(tenant_id), K(table_id)); + } else if (pairs.count() < 1) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get tablet_ls pairs of current table schema", KR(ret), K_(tenant_id), K(table_id)); + } else { + // iterate all tablets to check 'compacted/finished status' or not. + const int64_t tablet_cnt = tablet_ids.count(); + int64_t idx = 0; + bool exist_skip_verifying_tablet = false; + for (; OB_SUCC(ret) && (idx < tablet_cnt); ++idx) { + ObTabletCompactionStatus tablet_status = ObTabletCompactionStatus::INITIAL; + if (OB_FAIL(tablet_compaction_map.get_refactored(pairs.at(idx), tablet_status))) { + if (OB_HASH_NOT_EXIST == ret) { // if tablet not finish compaction, it won't be added into this map + ret = OB_SUCCESS; + break; + } else { + LOG_WARN("fail to get tablet compaction status from map", KR(ret), K(idx), "pair", pairs.at(idx)); + } + } else if ((tablet_status != ObTabletCompactionStatus::COMPACTED) + && (tablet_status != ObTabletCompactionStatus::CAN_SKIP_VERIFYING)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected tablet status", KR(ret), K(tablet_status), K(frozen_scn), K(table_id)); + } else if (tablet_status == ObTabletCompactionStatus::CAN_SKIP_VERIFYING) { + exist_skip_verifying_tablet = true; + } + } /*end outer for loop*/ + if (OB_SUCC(ret) && (idx == tablet_cnt)) { + latest_compaction_info.tablet_cnt_ = tablet_ids.count(); + if (exist_skip_verifying_tablet) { + latest_compaction_info.set_can_skip_verifying(); + } else { + latest_compaction_info.set_compacted(); + } + } + // if current table 'has tablet' & 'finished compaction' & 'not skip verifying', verify tablet replica checksum + if (OB_SUCC(ret) && latest_compaction_info.is_compacted()) { + if (OB_FAIL(ObTabletReplicaChecksumOperator::check_tablet_replica_checksum(tenant_id_, pairs, frozen_scn, + *sql_proxy_))) { + if (OB_CHECKSUM_ERROR == ret) { + LOG_ERROR("ERROR! ERROR! ERROR! checksum error in major tablet_replica_checksum", KR(ret), + K_(tenant_id), K(frozen_scn), "pair_cnt", pairs.count()); + } else { + LOG_WARN("fail to check major tablet_replica checksum", KR(ret), K_(tenant_id), K(frozen_scn), K(table_schema)); + } + } + } + // final, set this table as COMPACTED/CAN_SKIP_VERIFYING + if (FAILEDx(table_compaction_map.set_refactored(table_id, latest_compaction_info, true/*overwrite*/))) { + LOG_WARN("fail to set refactored", KR(ret), K(table_id), K(latest_compaction_info)); + } + } + } else { // like VIEW, it does not have tablet, treat it as compaction finished and can skip verifying + latest_compaction_info.tablet_cnt_ = 0; + latest_compaction_info.set_can_skip_verifying(); + if (OB_FAIL(table_compaction_map.set_refactored(table_id, latest_compaction_info, true/*overwrite*/))) { + LOG_WARN("fail to set refactored", KR(ret), K(table_id), K(latest_compaction_info)); + } + } + } + } } - LOG_INFO("finish verifying index checksum", KR(ret), KR(check_ret), K_(tenant_id), - K(frozen_status), K(check_cnt)); return ret; } +bool ObIndexChecksumValidator::is_index_table( + const ObSimpleTableSchemaV2 &simple_schema) +{ + return (simple_schema.is_index_table() + && simple_schema.can_read_index()); +} + +bool ObIndexChecksumValidator::exist_in_table_array( + const uint64_t table_id, + const ObIArray &table_ids) +{ + bool exist = false; + for (int64_t i = 0; (i < table_ids.count()) && !exist; ++i) { + if (table_id == table_ids.at(i)) { + exist = true; + } + } + return exist; +} + } // end namespace rootserver } // end namespace oceanbase diff --git a/src/rootserver/freeze/ob_checksum_validator.h b/src/rootserver/freeze/ob_checksum_validator.h index 82f9acd6a7..3273003c81 100644 --- a/src/rootserver/freeze/ob_checksum_validator.h +++ b/src/rootserver/freeze/ob_checksum_validator.h @@ -16,6 +16,7 @@ #include "share/ob_tablet_checksum_iterator.h" #include "share/ob_tablet_replica_checksum_iterator.h" #include "share/ob_freeze_info_proxy.h" +#include "share/ob_zone_merge_info.h" namespace oceanbase { @@ -25,42 +26,50 @@ class ObZoneMergeManager; class ObFreezeInfoManager; class ObServerManager; +class ObMergeErrorCallback +{ +public: + ObMergeErrorCallback() + : is_inited_(false), tenant_id_(OB_INVALID_TENANT_ID), + zone_merge_mgr_(nullptr) + {} + virtual ~ObMergeErrorCallback() {} + + int init(const uint64_t tenant_id, ObZoneMergeManager &zone_merge_mgr); + + int handle_merge_error(const int64_t error_type, const int64_t expected_epoch); + +private: + bool is_inited_; + uint64_t tenant_id_; + ObZoneMergeManager *zone_merge_mgr_; + DISALLOW_COPY_AND_ASSIGN(ObMergeErrorCallback); +}; + class ObChecksumValidatorBase { public: ObChecksumValidatorBase() - : is_inited_(false), need_check_(true), tenant_id_(OB_INVALID_TENANT_ID), - sql_proxy_(NULL) + : is_inited_(false), tenant_id_(OB_INVALID_TENANT_ID), + sql_proxy_(NULL), zone_merge_mgr_(NULL), merge_err_cb_() {} virtual ~ObChecksumValidatorBase() {} virtual int init(const uint64_t tenant_id, - common::ObMySQLProxy *sql_proxy); + common::ObMySQLProxy &sql_proxy, + ObZoneMergeManager &zone_merge_mgr); + virtual bool need_validate() const { return false; } - int check(const share::ObSimpleFrozenStatus &frozen_status); - - void set_need_check(bool need_check) { need_check_ = need_check; } + bool is_primary_cluster() const; + bool is_standby_cluster() const; static const int64_t MIN_CHECK_INTERVAL = 10 * 1000 * 1000LL; -protected: - virtual int do_check(const share::ObSimpleFrozenStatus &frozen_status) = 0; - protected: bool is_inited_; - bool need_check_; uint64_t tenant_id_; common::ObMySQLProxy *sql_proxy_; -}; - -// Mainly to verify checksum between each tablet replicas in primary/standby cluster -class ObTabletChecksumValidator : public ObChecksumValidatorBase -{ -public: - ObTabletChecksumValidator() {} - virtual ~ObTabletChecksumValidator() {} - -protected: - virtual int do_check(const share::ObSimpleFrozenStatus &frozen_status) override; + ObZoneMergeManager *zone_merge_mgr_; + ObMergeErrorCallback merge_err_cb_; }; // Mainly to verify checksum of cross-cluster's tablet which sync from primary cluster @@ -70,11 +79,15 @@ public: ObCrossClusterTableteChecksumValidator() {} virtual ~ObCrossClusterTableteChecksumValidator() {} +public: + int validate_checksum(const share::SCN &frozen_scn); + virtual bool need_validate() const override; + // sync data from __all_tablet_replica_checksum to __all_tablet_checksum int write_tablet_checksum_item(); -protected: - virtual int do_check(const share::ObSimpleFrozenStatus &frozen_status) override; +private: + int check_cross_cluster_checksum(const share::SCN &frozen_scn); private: bool is_first_tablet_in_sys_ls(const share::ObTabletReplicaChecksumItem &item) const; @@ -90,11 +103,39 @@ public: ObIndexChecksumValidator() {} virtual ~ObIndexChecksumValidator() {} -protected: - virtual int do_check(const share::ObSimpleFrozenStatus &frozen_status) override; +public: + int validate_checksum(const share::SCN &frozen_scn, + const hash::ObHashMap &tablet_compaction_map, + int64_t &table_count, + hash::ObHashMap &table_compaction_map); + virtual bool need_validate() const override; + +private: + // valid '' pair should finish index column checksum verification, other tables just skip verification. + int check_all_table_verification_finished(const share::SCN &frozen_scn, + const hash::ObHashMap &tablet_compaction_map, + int64_t &table_count, + hash::ObHashMap &table_compaction_map); + int check_table_compaction_finished(const share::schema::ObTableSchema &table_schema, + const share::SCN &frozen_scn, + const hash::ObHashMap &tablet_compaction_map, + hash::ObHashMap &table_compaction_map, + share::ObTableCompactionInfo &latest_compaction_info); + // handle data table which has tablet and index table(s). its all index tables may finish virification or not + // If all finished, update tablet status. + int update_data_table_verified(const int64_t table_id, + const share::ObTableCompactionInfo &data_table_compaction, + const share::SCN &frozen_scn, + hash::ObHashMap &table_compaction_map); + // handle the table, update its all tablets' status if needed. And update its compaction_info in @table_compaction_map + int handle_table_compaction_finished(const share::schema::ObTableSchema *table_schema, + const share::SCN &frozen_scn, + hash::ObHashMap &table_compaction_map); + bool is_index_table(const share::schema::ObSimpleTableSchemaV2 &simple_schema); + bool exist_in_table_array(const uint64_t table_id, const common::ObIArray &table_ids); }; } // end namespace rootserver } // end namespace oceanbase -#endif // OCEANBASE_ROOTSERVER_FREEZE_OB_CHECKSUM_VALIDATOR_H_ \ No newline at end of file +#endif // OCEANBASE_ROOTSERVER_FREEZE_OB_CHECKSUM_VALIDATOR_H_ diff --git a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp index bddb20455e..5e5e2954ab 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_checker.cpp +++ b/src/rootserver/freeze/ob_major_merge_progress_checker.cpp @@ -20,7 +20,6 @@ #include "share/ob_global_stat_proxy.h" #include "share/ob_all_server_tracer.h" #include "share/ls/ob_ls_table_operator.h" -#include "share/tablet/ob_tablet_table_iterator.h" #include "share/ob_freeze_info_proxy.h" #include "share/scn.h" @@ -35,7 +34,8 @@ using namespace oceanbase::share::schema; ObMajorMergeProgressChecker::ObMajorMergeProgressChecker() : is_inited_(false), tenant_id_(OB_INVALID_ID), sql_proxy_(nullptr), schema_service_(nullptr), zone_merge_mgr_(nullptr), lst_operator_(nullptr), - server_trace_(nullptr) + server_trace_(nullptr), tablet_compaction_map_(), table_count_(0), table_compaction_map_(), + index_validator_() {} int ObMajorMergeProgressChecker::init( @@ -47,9 +47,18 @@ int ObMajorMergeProgressChecker::init( ObIServerTrace &server_trace) { int ret = OB_SUCCESS; + const int64_t DEFAULT_TABLET_CNT = 8; + const int64_t DEFAULT_TABLE_CNT = 128; if (IS_INIT) { ret = OB_INIT_TWICE; LOG_WARN("init twice", KR(ret)); + } else if (OB_FAIL(tablet_compaction_map_.create(DEFAULT_TABLET_CNT, "MFTatCompactMap", + "MFTatCompactMap", tenant_id))) { + LOG_WARN("fail to create tablet compaction status map", KR(ret), K(tenant_id), K(DEFAULT_TABLET_CNT)); + } else if (OB_FAIL(table_compaction_map_.create(DEFAULT_TABLE_CNT, "MFTbCompMap", "MFTbCompMap", tenant_id))) { + LOG_WARN("fail to create table compaction status map", KR(ret), K(tenant_id), K(DEFAULT_TABLE_CNT)); + } else if (OB_FAIL(index_validator_.init(tenant_id, sql_proxy, zone_merge_mgr))) { + LOG_WARN("fail to init index validator", KR(ret), K(tenant_id)); } else { tenant_id_ = tenant_id; sql_proxy_ = &sql_proxy; @@ -62,6 +71,57 @@ int ObMajorMergeProgressChecker::init( return ret; } +int ObMajorMergeProgressChecker::prepare_handle() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(tablet_compaction_map_.reuse())) { + LOG_WARN("fail to reuse tablet_compaction_map", KR(ret)); + } else if (OB_FAIL(table_compaction_map_.reuse())) { + LOG_WARN("fail to reuse table_compaction_map", KR(ret)); + } else { + table_count_ = 0; + } + return ret; +} + +int ObMajorMergeProgressChecker::check_table_status(bool &exist_uncompacted, bool &exist_unverified) +{ + int ret = OB_SUCCESS; + SMART_VARS_2((ObArray, uncompacted_tables), + (ObArray, unverified_tables)) { + hash::ObHashMap::iterator iter = table_compaction_map_.begin(); + int64_t ele_count = 0; + for (;OB_SUCC(ret) && (iter != table_compaction_map_.end()); ++iter) { + const ObTableCompactionInfo &compaction_info = iter->second; + if (!compaction_info.is_verified()) { + if (compaction_info.finish_compaction()) { + if (OB_FAIL(unverified_tables.push_back(compaction_info))) { + LOG_WARN("fail to push back", KR(ret), K(compaction_info)); + } + } else if (OB_FAIL(uncompacted_tables.push_back(compaction_info))) { + LOG_WARN("fail to push back", KR(ret), K(compaction_info)); + } + } else if (compaction_info.is_verified()) { + ++ele_count; + } + } + + if (OB_SUCC(ret)) { + exist_uncompacted = uncompacted_tables.count() > 0; + exist_unverified = unverified_tables.count() > 0; + if (exist_uncompacted || exist_unverified) { + LOG_INFO("exists compaction/varification unfinished table", "uncompacted cnt", uncompacted_tables.count(), + "unverified cnt", unverified_tables.count(), K(uncompacted_tables), K(unverified_tables)); + } else if (ele_count != table_count_) { + ret = OB_INNER_STAT_ERROR; + LOG_WARN("table_compaction_map element count should not be less than table cunt", KR(ret), K(ele_count), + K_(table_count)); + } + } + } + return ret; +} + int ObMajorMergeProgressChecker::check_merge_progress( const volatile bool &stop, const SCN &global_broadcast_scn, @@ -120,18 +180,21 @@ int ObMajorMergeProgressChecker::check_merge_progress( } else if (OB_FAIL(schema_guard.generate_tablet_table_map(tenant_id_, tablet_map))) { LOG_WARN("fail to generate tablet table map", K_(tenant_id), KR(ret)); } else { - ObTabletInfo tablet; - while (!stop && OB_SUCC(ret) && OB_SUCC(iter.next(tablet))) { - if (OB_FAIL(check_tablet(tablet, tablet_map, all_progress, - global_broadcast_scn, schema_guard))) { - LOG_WARN("fail to check tablet merge progress", KR(ret), K_(tenant_id), - K(stop), K(tablet)); + ObTabletInfo tablet_info; + while (!stop && OB_SUCC(ret) && OB_SUCC(iter.next(tablet_info))) { + if (!tablet_info.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("iterate invalid tablet info", KR(ret), K(tablet_info)); + } else if (OB_FAIL(check_tablet(tablet_info, tablet_map, all_progress, global_broadcast_scn, + schema_guard))) { + LOG_WARN("fail to check tablet", KR(ret), K_(tenant_id), K(stop), K(tablet_info)); } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; } + if (stop && OB_SUCC(ret)) { ret = OB_CANCELED; LOG_WARN("already stop to check merge progress", KR(ret), K_(tenant_id)); @@ -152,7 +215,7 @@ int ObMajorMergeProgressChecker::check_merge_progress( } int ObMajorMergeProgressChecker::check_tablet( - const ObTabletInfo &tablet, + const ObTabletInfo &tablet_info, const common::hash::ObHashMap &tablet_map, ObAllZoneMergeProgress &all_progress, const SCN &global_broadcast_scn, @@ -160,7 +223,7 @@ int ObMajorMergeProgressChecker::check_tablet( { int ret = OB_SUCCESS; - const ObTabletID tablet_id = tablet.get_tablet_id(); + const ObTabletID tablet_id(tablet_info.get_tablet_id()); const share::schema::ObSimpleTableSchemaV2 *table_schema = nullptr; bool need_check = true; uint64_t table_id = OB_INVALID_ID; @@ -194,79 +257,98 @@ int ObMajorMergeProgressChecker::check_tablet( if (OB_SUCC(ret) && need_check) { ObLSInfo ls_info; int64_t cluster_id = GCONF.cluster_id; - const ObLSID &ls_id = tablet.get_ls_id(); + const ObLSID &ls_id = tablet_info.get_ls_id(); if (OB_FAIL(lst_operator_->get(cluster_id, tenant_id_, ls_id, share::ObLSTable::DEFAULT_MODE, ls_info))) { LOG_WARN("fail to get ls info", KR(ret), K_(tenant_id), K(ls_id)); - } else if (OB_FAIL(check_majority_integrated(schema_guard, tablet, ls_info))) { - LOG_WARN("fail to check majority integrated", KR(ret)); - } else if (OB_FAIL(check_tablet_data_version(all_progress, global_broadcast_scn, tablet, ls_info))) { - LOG_WARN("fail to check data version", KR(ret)); + } else if (OB_FAIL(check_majority_integrated(schema_guard, tablet_info, ls_info))) { + LOG_WARN("fail to check majority integrated", KR(ret), K(tablet_info), K(ls_info)); + } else if (OB_FAIL(check_tablet_compaction_scn(all_progress, global_broadcast_scn, tablet_info, ls_info))) { + LOG_WARN("fail to check data version", KR(ret), K(tablet_info), K(ls_info)); } } return ret; } -int ObMajorMergeProgressChecker::check_tablet_data_version( +int ObMajorMergeProgressChecker::check_tablet_compaction_scn( ObAllZoneMergeProgress &all_progress, const SCN &global_broadcast_scn, - const ObTabletInfo &tablet, + const ObTabletInfo &tablet_info, const share::ObLSInfo &ls_info) { int ret = OB_SUCCESS; - const ObLSReplica *ls_r = nullptr; - FOREACH_CNT_X(r, tablet.get_replicas(), OB_SUCCESS == ret) { - if (OB_ISNULL(r)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid replica", KR(ret), K_(tenant_id), K(tablet)); - } else if (OB_FAIL(ls_info.find(r->get_server(), ls_r))) { - if (OB_ENTRY_NOT_EXIST == ret) { - // Ignore tablet replicas that are not in ls_info. E.g., after ls replica migration, - // source ls meta has been deleted, but source tablet meta has not been deleted yet. - ret = OB_SUCCESS; // ignore ret - LOG_INFO("ignore this tablet replica, sicne it is not in ls_info", K_(tenant_id), - KPC(r), K(ls_info)); - } else { - LOG_WARN("fail to find ls replica", KR(ret), "addr", r->get_server()); - } - } else if (OB_UNLIKELY(nullptr == ls_r)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid ls replica", KR(ret), KPC(r)); - } else { - ObAllZoneMergeProgress::iterator p = - std::lower_bound(all_progress.begin(), all_progress.end(), ls_r->get_zone()); - if ((p != all_progress.end()) && (p->zone_ == ls_r->get_zone())) { - if ((REPLICA_TYPE_LOGONLY == ls_r->get_replica_type()) - || (REPLICA_TYPE_ENCRYPTION_LOGONLY == ls_r->get_replica_type())) { - // logonly replica no need check + if (OB_FAIL(ret)) { + } else { + bool is_tablet_compacted = true; + bool tablet_need_verify = true; + const ObLSReplica *ls_r = nullptr; + FOREACH_CNT_X(r, tablet_info.get_replicas(), OB_SUCCESS == ret) { + if (OB_FAIL(ls_info.find(r->get_server(), ls_r))) { + if (OB_ENTRY_NOT_EXIST == ret) { + // Ignore tablet replicas that are not in ls_info. E.g., after ls replica migration, + // source ls meta has been deleted, but source tablet meta has not been deleted yet. + ret = OB_SUCCESS; // ignore ret + LOG_INFO("ignore this tablet replica, sicne it is not in ls_info", K_(tenant_id), + KPC(r), K(ls_info)); } else { - SCN rep_snapshot_scn; - if (OB_FAIL(rep_snapshot_scn.convert_for_tx(r->get_snapshot_version()))) { - LOG_WARN("fail to convert val to SCN", KR(ret), "snapshot_version", r->get_snapshot_version()); + LOG_WARN("fail to find ls replica", KR(ret), "addr", r->get_server()); + } + } else if (OB_UNLIKELY(nullptr == ls_r)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid ls replica", KR(ret), KPC(r)); + } else if (r->get_status() == ObTabletReplica::ScnStatus::SCN_STATUS_ERROR) { + ret = OB_CHECKSUM_ERROR; + LOG_ERROR("ERROR! ERROR! ERROR! find error status tablet replica", KR(ret), K(tablet_info)); + } else { + ObAllZoneMergeProgress::iterator p = + std::lower_bound(all_progress.begin(), all_progress.end(), ls_r->get_zone()); + if ((p != all_progress.end()) && (p->zone_ == ls_r->get_zone())) { + SCN replica_snapshot_scn; + if (OB_FAIL(replica_snapshot_scn.convert_for_tx(r->get_snapshot_version()))) { + LOG_WARN("fail to convert val to SCN", KR(ret), "snapshot_version", r->get_snapshot_version()); + } else if ((REPLICA_TYPE_LOGONLY == ls_r->get_replica_type()) + || (REPLICA_TYPE_ENCRYPTION_LOGONLY == ls_r->get_replica_type())) { + // logonly replica no need check } else { if ((p->smallest_snapshot_scn_ <= SCN::min_scn()) - || (p->smallest_snapshot_scn_ > rep_snapshot_scn)) { - p->smallest_snapshot_scn_ = rep_snapshot_scn; + || (p->smallest_snapshot_scn_ > replica_snapshot_scn)) { + p->smallest_snapshot_scn_ = replica_snapshot_scn; } - - if (rep_snapshot_scn < global_broadcast_scn) { + if (replica_snapshot_scn >= global_broadcast_scn) { + if (replica_snapshot_scn > global_broadcast_scn) { + tablet_need_verify = false; // this tablet doesn't need to execute checksum verification + } + ++(p->merged_tablet_cnt_); + p->merged_data_size_ += r->get_data_size(); + } else { // only log the first replica not merged if (0 == p->unmerged_tablet_cnt_) { - LOG_INFO("replica not merged to target version", K_(tenant_id), + LOG_INFO("replica not merged to target version or status not match", K_(tenant_id), "current_version", r->get_snapshot_version(), K(global_broadcast_scn), - "replica", *r); + "current_status", r->get_status(), "compaction_replica", *r); } ++(p->unmerged_tablet_cnt_); p->unmerged_data_size_ += r->get_data_size(); - } else { - ++(p->merged_tablet_cnt_); - p->merged_data_size_ += r->get_data_size(); + is_tablet_compacted = false; } } } } + } // end foreach + + if (OB_SUCC(ret) && is_tablet_compacted) { + ObTabletLSPair pair(tablet_info.get_tablet_id(), tablet_info.get_ls_id()); + if (tablet_need_verify) { + if (OB_FAIL(tablet_compaction_map_.set_refactored(pair, ObTabletCompactionStatus::COMPACTED, true))) { + LOG_WARN("fail to set refactored", KR(ret), K(tablet_info)); + } + } else { + if (OB_FAIL(tablet_compaction_map_.set_refactored(pair, ObTabletCompactionStatus::CAN_SKIP_VERIFYING, true))) { + LOG_WARN("fail to set refactored", KR(ret), K(tablet_info)); + } + } } } return ret; @@ -274,7 +356,7 @@ int ObMajorMergeProgressChecker::check_tablet_data_version( int ObMajorMergeProgressChecker::check_majority_integrated( share::schema::ObSchemaGetterGuard &schema_guard, - const ObTabletInfo &tablet, + const ObTabletInfo &tablet_info, const share::ObLSInfo &ls_info) { int ret = OB_SUCCESS; @@ -292,13 +374,13 @@ int ObMajorMergeProgressChecker::check_majority_integrated( } else if (OB_FAIL(get_member_list(ls_info, member_list))) { // member_list of ls leader replica LOG_WARN("fail to get member_list", KR(ret), K_(tenant_id), K(ls_info)); } else { - const int64_t tablet_replica_cnt = tablet.replica_count(); + const int64_t tablet_replica_cnt = tablet_info.replica_count(); int64_t paxos_cnt = 0; const ObLSReplica *ls_r = nullptr; - FOREACH_CNT_X(r, tablet.get_replicas(), OB_SUCC(ret)) { + FOREACH_CNT_X(r, tablet_info.get_replicas(), OB_SUCC(ret)) { if (OB_ISNULL(r)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid replica", KR(ret), K_(tenant_id), K(tablet)); + LOG_WARN("invalid replica", KR(ret), K_(tenant_id), K(tablet_info)); } else if (OB_FAIL(is_replica_in_ls_member_list(*r, member_list, is_in_member_list))) { LOG_WARN("fail to check if replica is in ls member_list", KR(ret), K_(tenant_id), KPC(r), K(member_list)); @@ -308,7 +390,7 @@ int ObMajorMergeProgressChecker::check_majority_integrated( LOG_INFO("ignore this tablet replica, sicne it is not in ls member_list", K_(tenant_id), KPC(r), K(member_list)); } else if (OB_FAIL(ls_info.find(r->get_server(), ls_r))) { - LOG_WARN("fail to find", "addr", r->get_server(), KR(ret)); + LOG_WARN("fail to find", KR(ret), "addr", r->get_server()); } else if (OB_UNLIKELY(nullptr == ls_r)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid ls replica", KR(ret), KPC(r)); @@ -330,7 +412,7 @@ int ObMajorMergeProgressChecker::check_majority_integrated( K(tablet_replica_cnt), K(majority), K(paxos_cnt), K(ls_info.get_replicas())); } else { LOG_ERROR("not integrated", K(full_cnt), K(paxos_cnt), K(majority), K(all_replica_num), - K(full_replica_num), K(paxos_replica_num), K(tablet), K(ls_info.get_replicas())); + K(full_replica_num), K(paxos_replica_num), K(tablet_info), K(ls_info.get_replicas())); } } } @@ -375,6 +457,21 @@ int ObMajorMergeProgressChecker::get_associated_replica_num( return ret; } +int ObMajorMergeProgressChecker::check_verification(const share::SCN &global_broadcast_scn) +{ + int ret = OB_SUCCESS; + if (!tablet_compaction_map_.empty()) { + if (index_validator_.need_validate() && OB_FAIL(index_validator_.validate_checksum(global_broadcast_scn, + tablet_compaction_map_, table_count_, table_compaction_map_))) { + LOG_WARN("fail to validate checksum of index validator", KR(ret), K(global_broadcast_scn)); + } + // TODO @donglou.zl add cross-cluster validator + } else { + LOG_INFO("none tablet finished compaction, no need to check verification", K(global_broadcast_scn)); + } + return ret; +} + int ObMajorMergeProgressChecker::get_member_list( const share::ObLSInfo &ls_info, share::ObLSReplica::MemberList &member_list) const diff --git a/src/rootserver/freeze/ob_major_merge_progress_checker.h b/src/rootserver/freeze/ob_major_merge_progress_checker.h index e0dc2e7752..72651d6c9e 100644 --- a/src/rootserver/freeze/ob_major_merge_progress_checker.h +++ b/src/rootserver/freeze/ob_major_merge_progress_checker.h @@ -16,6 +16,7 @@ #include "share/ob_zone_merge_info.h" #include "share/tablet/ob_tablet_info.h" #include "rootserver/ob_root_utils.h" +#include "rootserver/freeze/ob_checksum_validator.h" #include "common/ob_tablet_id.h" namespace oceanbase @@ -26,6 +27,7 @@ class ObTabletTableOperator; class ObLSInfo; class ObLSTableOperator; class ObIServerTrace; +struct ObTabletInfo; class ObLSReplica; namespace schema { @@ -40,6 +42,7 @@ class ObMySQLProxy; namespace rootserver { class ObZoneMergeManager; + class ObMajorMergeProgressChecker { public: @@ -53,22 +56,30 @@ public: share::ObLSTableOperator &lst_operator, share::ObIServerTrace &server_trace); + int prepare_handle(); // For each round major_freeze, need invoke this once. + int check_merge_progress(const volatile bool &stop, const share::SCN &global_broadcast_scn, share::ObAllZoneMergeProgress &all_progress); + int check_verification(const share::SCN &global_broadcast_scn); + + // @exist_uncompacted means not all table finished compaction + // @exist_unverified means not all table finished verification + int check_table_status(bool &exist_uncompacted, bool &exist_unverified); + private: - int check_tablet(const share::ObTabletInfo &tablet, + int check_tablet(const share::ObTabletInfo &tablet_info, const common::hash::ObHashMap &tablet_map, share::ObAllZoneMergeProgress &all_progress, const share::SCN &global_broadcast_scn, share::schema::ObSchemaGetterGuard &schema_guard); - int check_tablet_data_version(share::ObAllZoneMergeProgress &all_progress, - const share::SCN &global_broadcast_scn, - const share::ObTabletInfo &tablet, - const share::ObLSInfo &ls_info); + int check_tablet_compaction_scn(share::ObAllZoneMergeProgress &all_progress, + const share::SCN &global_broadcast_scn, + const share::ObTabletInfo &tablet, + const share::ObLSInfo &ls_info); int check_majority_integrated(share::schema::ObSchemaGetterGuard &schema_guard, - const share::ObTabletInfo &tablet, + const share::ObTabletInfo &tablet_info, const share::ObLSInfo &ls_info); int get_associated_replica_num(share::schema::ObSchemaGetterGuard &schema_guard, @@ -91,6 +102,12 @@ private: ObZoneMergeManager *zone_merge_mgr_; share::ObLSTableOperator *lst_operator_; share::ObIServerTrace *server_trace_; + // record each tablet compaction status: INITIAL/COMPACTED/FINISHED + hash::ObHashMap tablet_compaction_map_; + int64_t table_count_; + // record each table compaction/verify status + hash::ObHashMap table_compaction_map_; // + ObIndexChecksumValidator index_validator_; DISALLOW_COPY_AND_ASSIGN(ObMajorMergeProgressChecker); }; diff --git a/src/rootserver/freeze/ob_major_merge_scheduler.cpp b/src/rootserver/freeze/ob_major_merge_scheduler.cpp index 52fb6acb5a..6dbb0ac5df 100644 --- a/src/rootserver/freeze/ob_major_merge_scheduler.cpp +++ b/src/rootserver/freeze/ob_major_merge_scheduler.cpp @@ -31,6 +31,7 @@ #include "share/ob_global_stat_proxy.h" #include "share/ob_service_epoch_proxy.h" #include "share/ob_column_checksum_error_operator.h" +#include "share/ob_tablet_meta_table_compaction_operator.h" #include "share/ob_server_table_operator.h" namespace oceanbase @@ -40,119 +41,6 @@ namespace rootserver using namespace oceanbase::common; using namespace oceanbase::share; -int ObMergeErrorCallback::init( - const uint64_t tenant_id, - ObZoneMergeManager &zone_merge_mgr) -{ - int ret = OB_SUCCESS; - if (IS_INIT) { - ret = OB_INIT_TWICE; - LOG_WARN("init twice", KR(ret)); - } else if (tenant_id == OB_INVALID_TENANT_ID) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), K(tenant_id)); - } else { - zone_merge_mgr_ = &zone_merge_mgr; - tenant_id_ = tenant_id; - is_inited_ = true; - } - return ret; -} - -int ObMergeErrorCallback::handle_merge_error( - const int64_t error_type, - const int64_t expected_epoch) -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("not init", KR(ret), K_(tenant_id)); - } else { - if (OB_FAIL(zone_merge_mgr_->set_merge_error(error_type, expected_epoch))) { - LOG_WARN("fail to set merge error", KR(ret), K_(tenant_id), K(error_type), K(expected_epoch)); - } - } - return ret; -} - -/////////////////////////////////////////////////////////////////////////////// - -int ObFullChecksumValidator::init( - const uint64_t tenant_id, - ObMySQLProxy &sql_proxy, - ObZoneMergeManager &zone_merge_mgr) -{ - int ret = OB_SUCCESS; - if (IS_INIT) { - ret = OB_INIT_TWICE; - LOG_WARN("init twice", KR(ret), K(tenant_id)); - } else if (OB_FAIL(tablet_validator_.init(tenant_id, &sql_proxy))) { - LOG_WARN("fail to init tablet checksum validator", KR(ret), K(tenant_id)); - } else if (OB_FAIL(cross_cluster_validator_.init(tenant_id, &sql_proxy))) { - LOG_WARN("fail to init cross cluster checksum validator", KR(ret), K(tenant_id)); - } else if (OB_FAIL(index_validator_.init(tenant_id, &sql_proxy))) { - LOG_WARN("fail to init index checksum validator", KR(ret), K(tenant_id)); - } else if (OB_FAIL(merge_err_cb_.init(tenant_id, zone_merge_mgr))) { - LOG_WARN("fail to init merge error callback", KR(ret), K(tenant_id)); - } else { - tenant_id_ = tenant_id; - is_inited_ = true; - } - return ret; -} - -int ObFullChecksumValidator::execute_check( - const ObSimpleFrozenStatus &frozen_status, - const int64_t expected_epoch) -{ - int ret = OB_SUCCESS; - - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("not init", KR(ret), K_(tenant_id)); - } else { - // Set check condition for each validator here. - if (PRIMARY_CLUSTER == ObClusterInfoGetter::get_cluster_role_v2()) { - // no need to check cross-cluster checksum in primary cluster - cross_cluster_validator_.set_need_check(false); - } - - if (OB_FAIL(tablet_validator_.check(frozen_status))) { - LOG_WARN("fail to do check of tablet validator", KR(ret)); - } else if (/*OB_FAIL(cross_cluster_validator_.check(frozen_status))*/ false) { - LOG_WARN("fail to do check of cross_cluster validator", KR(ret)); - } else if (OB_FAIL(index_validator_.check(frozen_status))) { - LOG_WARN("fail to do check of index validator", KR(ret)); - } - last_check_time_ = ObTimeUtility::current_time(); - - if (OB_CHECKSUM_ERROR == ret) { - int tmp_ret = OB_SUCCESS; - if (OB_TMP_FAIL(merge_err_cb_.handle_merge_error(ObZoneMergeInfo::CHECKSUM_ERROR, expected_epoch))) { - LOG_WARN("fail to handle merge error", K(tmp_ret), K_(tenant_id), K(expected_epoch)); - ret = (OB_SUCC(ret) ? tmp_ret : ret); - } - } - } - return ret; -} - -int ObFullChecksumValidator::sync_tablet_checksum() -{ - int ret = OB_SUCCESS; - if (PRIMARY_CLUSTER == ObClusterInfoGetter::get_cluster_role_v2()) { - if (OB_FAIL(cross_cluster_validator_.write_tablet_checksum_item())) { - LOG_WARN("fail to sync tablet", KR(ret), K_(tenant_id)); - } else { - // no need to check cross-cluster checksum if sync checksum failed. - cross_cluster_validator_.set_need_check(false); - } - } else { - LOG_TRACE("no need to sync tablet checksum in current cluster", K_(tenant_id)); - } - return ret; -} - /////////////////////////////////////////////////////////////////////////////// int ObMajorMergeIdling::init(const uint64_t tenant_id) @@ -180,9 +68,9 @@ int64_t ObMajorMergeIdling::get_idle_interval_us() /////////////////////////////////////////////////////////////////////////////// ObMajorMergeScheduler::ObMajorMergeScheduler() - : ObFreezeReentrantThread(), is_inited_(false), fail_count_(0), first_check_merge_us_(0), - idling_(stop_), zone_merge_mgr_(nullptr), freeze_info_mgr_(nullptr), config_(nullptr), - merge_strategy_(), progress_checker_(), checksum_validator_() + : ObFreezeReentrantThread(), is_inited_(false), fail_count_(0), idling_(stop_), + zone_merge_mgr_(nullptr), freeze_info_mgr_(nullptr), config_(nullptr), + merge_strategy_(), progress_checker_(), cross_cluster_validator_() { } @@ -207,8 +95,8 @@ int ObMajorMergeScheduler::init( } else if (OB_FAIL(progress_checker_.init(tenant_id, sql_proxy, schema_service, zone_merge_mgr, *GCTX.lst_operator_, server_trace))) { LOG_WARN("fail to init progress_checker", KR(ret)); - } else if (OB_FAIL(checksum_validator_.init(tenant_id, sql_proxy, zone_merge_mgr))) { - LOG_WARN("fail to init checksum checker", KR(ret), K(tenant_id)); + } else if (OB_FAIL(cross_cluster_validator_.init(tenant_id, sql_proxy, zone_merge_mgr))) { + LOG_WARN("fail to init cross cluster validator", KR(ret), K(tenant_id)); } else if (OB_FAIL(idling_.init(tenant_id))) { LOG_WARN("fail to init idling", KR(ret), K(tenant_id)); } else { @@ -349,7 +237,9 @@ int ObMajorMergeScheduler::do_work() } if (OB_SUCC(ret) && need_merge) { - if (OB_FAIL(do_one_round_major_merge(curr_round_epoch))) { + if (OB_FAIL(do_before_major_merge(curr_round_epoch))) { + LOG_WARN("fail to do before major merge", KR(ret), K(curr_round_epoch)); + } else if (OB_FAIL(do_one_round_major_merge(curr_round_epoch))) { LOG_WARN("fail to do major merge", KR(ret), K(curr_round_epoch)); } } @@ -362,6 +252,23 @@ int ObMajorMergeScheduler::do_work() return ret; } +int ObMajorMergeScheduler::do_before_major_merge(const int64_t expected_epoch) +{ + int ret = OB_SUCCESS; + share::SCN global_broadcast_scn; + global_broadcast_scn.set_min(); + + if (OB_FAIL(progress_checker_.prepare_handle())) { + LOG_WARN("fail to do prepare handle of progress checker", KR(ret)); + } else if (OB_FAIL(zone_merge_mgr_->get_global_broadcast_scn(global_broadcast_scn))) { + LOG_WARN("fail to get_global_broadcast_scn", KR(ret)); + } else if (OB_FAIL(ObColumnChecksumErrorOperator::delete_column_checksum_err_info( + *sql_proxy_, tenant_id_, global_broadcast_scn))) { + LOG_WARN("fail to delete column checksum error info", KR(ret), K(global_broadcast_scn)); + } + return ret; +} + int ObMajorMergeScheduler::do_one_round_major_merge(const int64_t expected_epoch) { int ret = OB_SUCCESS; @@ -529,11 +436,12 @@ int ObMajorMergeScheduler::start_zones_merge(const ObZoneArray &to_merge, const int ObMajorMergeScheduler::update_merge_status(const int64_t expected_epoch) { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + ObAllZoneMergeProgress all_progress; SCN global_broadcast_scn; - bool all_merged = true; ObSimpleFrozenStatus frozen_status; - + DEBUG_SYNC(RS_VALIDATE_CHECKSUM); if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("not inited", KR(ret)); @@ -550,7 +458,43 @@ int ObMajorMergeScheduler::update_merge_status(const int64_t expected_epoch) "global_broadcast_scn", global_broadcast_scn.get_val_for_inner_table_field(), "service_addr", GCONF.self_addr_); } + } else if (OB_FAIL(progress_checker_.check_verification(global_broadcast_scn))) { + LOG_WARN("fail to check verification", KR(ret), K_(tenant_id), K(global_broadcast_scn)); + int64_t time_interval = 10L * 60 * 1000 * 1000; // record every 10 minutes + if (TC_REACH_TIME_INTERVAL(time_interval)) { + ROOTSERVICE_EVENT_ADD("daily_merge", "verification", K_(tenant_id), + "check verification fail", ret, + "global_broadcast_scn", global_broadcast_scn, + "service_addr", GCONF.self_addr_); + } + } + + if (OB_CHECKSUM_ERROR == ret) { + if (OB_TMP_FAIL(zone_merge_mgr_->set_merge_error(ObZoneMergeInfo::ObMergeErrorType::CHECKSUM_ERROR, + expected_epoch))) { + LOG_WARN("fail to set merge error", KR(ret), KR(tmp_ret), K_(tenant_id), K(expected_epoch)); + } + } else if (OB_SUCC(ret)) { + if (OB_FAIL(handle_all_zone_merge(all_progress, global_broadcast_scn, expected_epoch))) { + LOG_WARN("fail to handle all zone merge", KR(ret), K(global_broadcast_scn), K(expected_epoch)); + } + } + + return ret; +} + +int ObMajorMergeScheduler::handle_all_zone_merge( + const ObAllZoneMergeProgress &all_progress, + const share::SCN &global_broadcast_scn, + const int64_t expected_epoch) +{ + int ret = OB_SUCCESS; + bool all_merged = true; + if (global_broadcast_scn.get_val_for_tx() < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(global_broadcast_scn)); } else { + // 1. check all zone finished compaction HEAP_VAR(ObZoneMergeInfo, info) { FOREACH_X(progress, all_progress, OB_SUCC(ret)) { const ObZone &zone = progress->zone_; @@ -583,10 +527,11 @@ int ObMajorMergeScheduler::update_merge_status(const int64_t expected_epoch) LOG_INFO("broadcast_scn of this zone is larger than global_broadcast_scn, need to " "recheck merge progress again", K_(tenant_id), K(zone), K(global_broadcast_scn)); } else { - merged = (0 == progress->unmerged_tablet_cnt_); + merged = progress->is_merge_finished(); if (!merged) { all_merged = false; - LOG_INFO("zone merge not finish", "zone", progress->zone_, "unmerged_cnt", progress->unmerged_tablet_cnt_); + LOG_INFO("zone merge not finish", "zone", progress->zone_, "merged_cnt", progress->merged_tablet_cnt_, + "unmerged_cnt", progress->unmerged_tablet_cnt_); } SCN cur_all_merged_scn = SCN::min_scn(); @@ -598,8 +543,10 @@ int ObMajorMergeScheduler::update_merge_status(const int64_t expected_epoch) } else { cur_all_merged_scn = progress->smallest_snapshot_scn_; } - LOG_INFO("check updating merge status", KR(ret), K_(tenant_id), K(zone), K(merged), K(cur_all_merged_scn), - K(cur_merged_scn), "smallest_snapshot_scn", progress->smallest_snapshot_scn_, K(info)); + + LOG_INFO("check updating merge status", K_(tenant_id), K(zone), K(merged), K(cur_all_merged_scn), + K(cur_merged_scn), K(info),"smallest_snapshot_scn", progress->smallest_snapshot_scn_, + "merged_cnt", progress->merged_tablet_cnt_, "unmerged_cnt", progress->unmerged_tablet_cnt_); if (OB_SUCC(ret) && merged) { // cur_all_merged_scn >= cur_merged_scn @@ -612,7 +559,8 @@ int ObMajorMergeScheduler::update_merge_status(const int64_t expected_epoch) // 2. Equal: In backup-restore situation, tablets may have higher snapshot_version(eg. version=10). // If major_freeze with version=4, all_merged_scn will be updated to 10; if major_freeze with version=5, // all_merged_scn will still be 10. - if ((cur_all_merged_scn < cur_merged_scn) || (cur_all_merged_scn < ori_all_merged_scn) + if ((cur_all_merged_scn < cur_merged_scn) + || (cur_all_merged_scn < ori_all_merged_scn) || (cur_merged_scn < info.last_merged_scn())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("unexpected merged scn", KR(ret), K(merged), K(cur_merged_scn), K(cur_all_merged_scn), @@ -625,10 +573,8 @@ int ObMajorMergeScheduler::update_merge_status(const int64_t expected_epoch) LOG_WARN("fail to finish zone merge", KR(ret), K(zone), K(expected_epoch), K(cur_merged_scn), K(cur_all_merged_scn), K(info)); } else { - ROOTSERVICE_EVENT_ADD("daily_merge", "zone_merge_finish", K_(tenant_id), - "last_merged_scn", cur_merged_scn, - "all_merged_scn", cur_all_merged_scn, - K(zone)); + ROOTSERVICE_EVENT_ADD("daily_merge", "zone_merge_finish", K_(tenant_id), "last_merged_scn", cur_merged_scn, + "all_merged_scn", cur_all_merged_scn, K(zone)); } } } @@ -636,34 +582,35 @@ int ObMajorMergeScheduler::update_merge_status(const int64_t expected_epoch) } } + // 2. check all table finished compaction and verification if (OB_SUCC(ret) && all_merged) { - // MERGE_STATUS: MERGING -> VERIFYING CHECKSUM - if (OB_FAIL(update_global_merge_info_after_merge(expected_epoch))) { - LOG_WARN("fail to update global merge info after merge", KR(ret), K_(tenant_id), K(expected_epoch)); - } else if (OB_FAIL(ObColumnChecksumErrorOperator::delete_column_checksum_err_info(*sql_proxy_, - tenant_id_, global_broadcast_scn))) { - LOG_WARN("fail to delete column checksum error info", KR(ret), K_(tenant_id), K(global_broadcast_scn)); - } else if (OB_FAIL(checksum_validator_.execute_check(frozen_status, expected_epoch))) { - LOG_WARN("fail to execute checking checksum", KR(ret), K_(tenant_id), K(expected_epoch)); - // TODO NOT support cross-cluster checksum verify - } else if (/*OB_FAIL(checksum_validator_.sync_tablet_checksum())*/ false) { - LOG_WARN("fail to sync tablet checksum item", KR(ret), K_(tenant_id)); + bool exist_uncompacted = false; + bool exist_unverified = false; + if (OB_FAIL(progress_checker_.check_table_status(exist_uncompacted, exist_unverified))) { + LOG_WARN("fail to check table status", KR(ret), K_(tenant_id)); + } else if (exist_uncompacted) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("should not exist uncompacted table after all zone merged", KR(ret), K(exist_unverified)); + } else if (exist_unverified) { + all_merged = false; + LOG_INFO("although finished compaction, but not finish verification", K(all_merged), K(exist_uncompacted)); } + } - // MERGE_STATUS: VERIFYING CHECKSUM -> IDLE - if (FAILEDx(try_update_global_merged_scn(expected_epoch))) { + // 3. execute final operations after all merged + if (OB_SUCC(ret) && all_merged) { + // MERGE_STATUS: change to IDLE + if (OB_FAIL(try_update_global_merged_scn(expected_epoch))) { LOG_WARN("fail to update global_merged_scn", KR(ret), K_(tenant_id), K(expected_epoch)); } } } - return ret; } int ObMajorMergeScheduler::update_global_merge_info_after_merge(const int64_t expected_epoch) { int ret = OB_SUCCESS; - // need update global merge_status if (OB_FAIL(zone_merge_mgr_->update_global_merge_info_after_merge(expected_epoch))) { LOG_WARN("fail to update global merge info after merge", KR(ret), K_(tenant_id), K(expected_epoch)); } @@ -674,6 +621,7 @@ int ObMajorMergeScheduler::try_update_global_merged_scn(const int64_t expected_e { int ret = OB_SUCCESS; HEAP_VARS_2((ObZoneMergeInfoArray, infos), (ObGlobalMergeInfo, global_info)) { + uint64_t global_broadcast_scn_val = UINT64_MAX; if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("not inited", KR(ret)); @@ -681,6 +629,9 @@ int ObMajorMergeScheduler::try_update_global_merged_scn(const int64_t expected_e LOG_WARN("fail to get zone info", KR(ret)); } else if (global_info.is_merge_error()) { LOG_WARN("should not update global merged scn, cuz is_merge_error is true", K(global_info)); + } else if (FALSE_IT(global_broadcast_scn_val = global_info.global_broadcast_scn_.get_scn_val())) { + } else if (OB_FAIL(update_all_tablets_report_scn(global_broadcast_scn_val))) { + LOG_WARN("fail to update all tablets report_scn", KR(ret), K(expected_epoch), K(global_broadcast_scn_val)); } else { if (global_info.last_merged_scn() != global_info.global_broadcast_scn()) { bool merged = true; @@ -694,7 +645,7 @@ int ObMajorMergeScheduler::try_update_global_merged_scn(const int64_t expected_e LOG_WARN("try update global last_merged_scn failed", KR(ret), K(expected_epoch)); } else { ROOTSERVICE_EVENT_ADD("daily_merge", "global_merged", K_(tenant_id), - "global_broadcast_scn", global_info.global_broadcast_scn_); + "global_broadcast_scn", global_broadcast_scn_val); } } } @@ -703,6 +654,19 @@ int ObMajorMergeScheduler::try_update_global_merged_scn(const int64_t expected_e return ret; } +int ObMajorMergeScheduler::sync_tablet_checksum() +{ + int ret = OB_SUCCESS; + if (PRIMARY_CLUSTER == ObClusterInfoGetter::get_cluster_role_v2()) { + if (OB_FAIL(cross_cluster_validator_.write_tablet_checksum_item())) { + LOG_WARN("fail to sync tablet", KR(ret), K_(tenant_id)); + } + } else { + LOG_INFO("no need to sync tablet checksum in non-primary cluster", K_(tenant_id)); + } + return ret; +} + int ObMajorMergeScheduler::set_zone_merging(const ObZone &zone, const int64_t expected_epoch) { int ret = OB_SUCCESS; @@ -778,6 +742,19 @@ int ObMajorMergeScheduler::do_update_freeze_service_epoch( return ret; } +int ObMajorMergeScheduler::update_all_tablets_report_scn( + const uint64_t global_braodcast_scn_val) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObTabletMetaTableCompactionOperator::batch_update_report_scn( + tenant_id_, + global_braodcast_scn_val, + ObTabletReplica::ScnStatus::SCN_STATUS_ERROR))) { + LOG_WARN("fail to batch update report_scn", KR(ret), K_(tenant_id), K(global_braodcast_scn_val)); + } + return ret; +} + void ObMajorMergeScheduler::check_merge_interval_time(const bool is_merging) { int ret = OB_SUCCESS; diff --git a/src/rootserver/freeze/ob_major_merge_scheduler.h b/src/rootserver/freeze/ob_major_merge_scheduler.h index d767d03f8d..4c758b7fd6 100644 --- a/src/rootserver/freeze/ob_major_merge_scheduler.h +++ b/src/rootserver/freeze/ob_major_merge_scheduler.h @@ -44,56 +44,6 @@ class ObZoneMergeManager; class ObFreezeInfoManager; class ObTenantMajorMergeStrategy; -class ObMergeErrorCallback -{ -public: - ObMergeErrorCallback() - : is_inited_(false), tenant_id_(OB_INVALID_TENANT_ID), - zone_merge_mgr_(nullptr) - {} - virtual ~ObMergeErrorCallback() {} - - int init(const uint64_t tenant_id, ObZoneMergeManager &zone_merge_mgr); - - int handle_merge_error(const int64_t error_type, const int64_t expected_epoch); - -private: - bool is_inited_; - uint64_t tenant_id_; - ObZoneMergeManager *zone_merge_mgr_; - DISALLOW_COPY_AND_ASSIGN(ObMergeErrorCallback); -}; - -class ObFullChecksumValidator -{ -public: - ObFullChecksumValidator() - : is_inited_(false), tenant_id_(OB_INVALID_TENANT_ID), last_check_time_(0), - tablet_validator_(), cross_cluster_validator_(), index_validator_(), - merge_err_cb_() - {} - virtual ~ObFullChecksumValidator() {} - - int init(const uint64_t tenant_id, - common::ObMySQLProxy &sql_proxy, - ObZoneMergeManager &zone_merge_mgr); - - int execute_check(const share::ObSimpleFrozenStatus &frozen_status, - const int64_t expected_epoch); - - // sync tablet checksum data from __all_tablet_replica_checksum to __all_tablet_checksum - int sync_tablet_checksum(); - -private: - bool is_inited_; - uint64_t tenant_id_; - int64_t last_check_time_; - ObTabletChecksumValidator tablet_validator_; - ObCrossClusterTableteChecksumValidator cross_cluster_validator_; - ObIndexChecksumValidator index_validator_; - ObMergeErrorCallback merge_err_cb_; -}; - class ObMajorMergeIdling : public ObThreadIdling { public: @@ -138,6 +88,7 @@ protected: private: int do_work(); + int do_before_major_merge(const int64_t expected_epoch); int do_one_round_major_merge(const int64_t expected_epoch); int generate_next_global_broadcast_scn(const int64_t expected_epoch); @@ -147,11 +98,19 @@ private: int set_zone_merging(const ObZone &zone, const int64_t expected_epoch); int update_merge_status(const int64_t expected_epoch); + int handle_all_zone_merge(const share::ObAllZoneMergeProgress &all_progress, + const share::SCN &global_broadcast_scn, + const int64_t expected_epoch); int try_update_global_merged_scn(const int64_t expected_epoch); int update_global_merge_info_after_merge(const int64_t expected_epoch); int do_update_freeze_service_epoch(const int64_t latest_epoch); + // sync tablet checksum data from __all_tablet_replica_checksum to __all_tablet_checksum + int sync_tablet_checksum(); + + int update_all_tablets_report_scn(const uint64_t global_broadcast_scn_val); + void check_merge_interval_time(const bool is_merging); private: @@ -171,7 +130,7 @@ private: ObTenantAllZoneMergeStrategy merge_strategy_; common::ObMySQLProxy *sql_proxy_; ObMajorMergeProgressChecker progress_checker_; - ObFullChecksumValidator checksum_validator_; + ObCrossClusterTableteChecksumValidator cross_cluster_validator_; DISALLOW_COPY_AND_ASSIGN(ObMajorMergeScheduler); }; diff --git a/src/share/CMakeLists.txt b/src/share/CMakeLists.txt index cbce9b3f7c..20f0907642 100644 --- a/src/share/CMakeLists.txt +++ b/src/share/CMakeLists.txt @@ -154,6 +154,7 @@ ob_set_subtarget(ob_share common ob_tablet_checksum_operator.cpp ob_tablet_replica_checksum_iterator.cpp ob_tablet_replica_checksum_operator.cpp + ob_tablet_meta_table_compaction_operator.cpp ob_task_define.cpp ob_tenant_info_proxy.cpp ob_tenant_role.cpp diff --git a/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp b/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp index 5bdf89e866..87fe00f997 100644 --- a/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp +++ b/src/share/inner_table/ob_inner_table_schema.11001_11050.cpp @@ -8110,6 +8110,21 @@ int ObInnerTableSchema::all_virtual_memstore_info_schema(ObTableSchema &table_sc false, //is_nullable false); //is_autoincrement } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("compaction_info_list", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_COMPACTION_INFO_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } if (OB_SUCC(ret)) { table_schema.get_part_option().set_part_num(1); table_schema.set_part_level(PARTITION_LEVEL_ONE); diff --git a/src/share/inner_table/ob_inner_table_schema.12201_12250.cpp b/src/share/inner_table/ob_inner_table_schema.12201_12250.cpp index d95614f8fb..895534f5a2 100644 --- a/src/share/inner_table/ob_inner_table_schema.12201_12250.cpp +++ b/src/share/inner_table/ob_inner_table_schema.12201_12250.cpp @@ -9760,6 +9760,44 @@ int ObInnerTableSchema::all_virtual_tablet_meta_table_schema(ObTableSchema &tabl required_size_default, required_size_default); //default_value } + + if (OB_SUCC(ret)) { + ObObj report_scn_default; + report_scn_default.set_uint64(0); + ADD_COLUMN_SCHEMA_T("report_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObUInt64Type, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(uint64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + report_scn_default, + report_scn_default); //default_value + } + + if (OB_SUCC(ret)) { + ObObj status_default; + status_default.set_int(0); + ADD_COLUMN_SCHEMA_T("status", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + status_default, + status_default); //default_value + } table_schema.set_index_using_type(USING_BTREE); table_schema.set_row_store_type(ENCODING_ROW_STORE); table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); diff --git a/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp b/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp index d88c718f7e..ad054fdf57 100644 --- a/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp +++ b/src/share/inner_table/ob_inner_table_schema.12301_12350.cpp @@ -6369,6 +6369,195 @@ int ObInnerTableSchema::all_virtual_kvcache_handle_leak_info_schema(ObTableSchem return ret; } +int ObInnerTableSchema::all_virtual_tablet_compaction_info_schema(ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + uint64_t column_id = OB_APP_MIN_COLUMN_ID - 1; + + //generated fields: + table_schema.set_tenant_id(OB_SYS_TENANT_ID); + table_schema.set_tablegroup_id(OB_INVALID_ID); + table_schema.set_database_id(OB_SYS_DATABASE_ID); + table_schema.set_table_id(OB_ALL_VIRTUAL_TABLET_COMPACTION_INFO_TID); + table_schema.set_rowkey_split_pos(0); + table_schema.set_is_use_bloomfilter(false); + table_schema.set_progressive_merge_num(0); + table_schema.set_rowkey_column_num(0); + table_schema.set_load_type(TABLE_LOAD_TYPE_IN_DISK); + table_schema.set_table_type(VIRTUAL_TABLE); + table_schema.set_index_type(INDEX_TYPE_IS_NOT); + table_schema.set_def_type(TABLE_DEF_TYPE_INTERNAL); + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_table_name(OB_ALL_VIRTUAL_TABLET_COMPACTION_INFO_TNAME))) { + LOG_ERROR("fail to set table_name", K(ret)); + } + } + + if (OB_SUCC(ret)) { + if (OB_FAIL(table_schema.set_compress_func_name(OB_DEFAULT_COMPRESS_FUNC_NAME))) { + LOG_ERROR("fail to set compress_func_name", K(ret)); + } + } + table_schema.set_part_level(PARTITION_LEVEL_ZERO); + table_schema.set_charset_type(ObCharset::get_default_charset()); + table_schema.set_collation_type(ObCharset::get_default_collation(ObCharset::get_default_charset())); + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_ip", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 1, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + MAX_IP_ADDR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("svr_port", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 2, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tenant_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("ls_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("tablet_id", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("finished_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("wait_check_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("max_received_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("serialize_scn_list", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_INVALID, //column_collation_type + OB_MAX_VARCHAR_LENGTH, //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } + if (OB_SUCC(ret)) { + table_schema.get_part_option().set_part_num(1); + table_schema.set_part_level(PARTITION_LEVEL_ONE); + table_schema.get_part_option().set_part_func_type(PARTITION_FUNC_TYPE_LIST_COLUMNS); + if (OB_FAIL(table_schema.get_part_option().set_part_expr("svr_ip, svr_port"))) { + LOG_WARN("set_part_expr failed", K(ret)); + } else if (OB_FAIL(table_schema.mock_list_partition_array())) { + LOG_WARN("mock list partition array failed", K(ret)); + } + } + table_schema.set_index_using_type(USING_HASH); + table_schema.set_row_store_type(ENCODING_ROW_STORE); + table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); + table_schema.set_progressive_merge_round(1); + table_schema.set_storage_format_version(3); + table_schema.set_tablet_id(0); + + table_schema.set_max_used_column_id(column_id); + return ret; +} + int ObInnerTableSchema::all_virtual_ls_replica_task_plan_schema(ObTableSchema &table_schema) { int ret = OB_SUCCESS; diff --git a/src/share/inner_table/ob_inner_table_schema.15001_15050.cpp b/src/share/inner_table/ob_inner_table_schema.15001_15050.cpp index 1d72e0d4d6..c677fc5067 100644 --- a/src/share/inner_table/ob_inner_table_schema.15001_15050.cpp +++ b/src/share/inner_table/ob_inner_table_schema.15001_15050.cpp @@ -10404,6 +10404,21 @@ int ObInnerTableSchema::all_virtual_memstore_info_ora_schema(ObTableSchema &tabl false, //is_nullable false); //is_autoincrement } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("COMPACTION_INFO_LIST", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObVarcharType, //column_type + CS_TYPE_UTF8MB4_BIN, //column_collation_type + OB_COMPACTION_INFO_LENGTH, //column_length + 2, //column_precision + -1, //column_scale + false, //is_nullable + false); //is_autoincrement + } if (OB_SUCC(ret)) { table_schema.get_part_option().set_part_num(1); table_schema.set_part_level(PARTITION_LEVEL_ONE); diff --git a/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp b/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp index 3ae7b890a6..386aabac75 100644 --- a/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp +++ b/src/share/inner_table/ob_inner_table_schema.15201_15250.cpp @@ -4741,6 +4741,36 @@ int ObInnerTableSchema::all_virtual_tablet_meta_table_ora_schema(ObTableSchema & false, //is_nullable false); //is_autoincrement } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("REPORT_SCN", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } + + if (OB_SUCC(ret)) { + ADD_COLUMN_SCHEMA("STATUS", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObNumberType, //column_type + CS_TYPE_INVALID, //column_collation_type + 38, //column_length + 38, //column_precision + 0, //column_scale + false, //is_nullable + false); //is_autoincrement + } table_schema.set_index_using_type(USING_BTREE); table_schema.set_row_store_type(ENCODING_ROW_STORE); table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); diff --git a/src/share/inner_table/ob_inner_table_schema.301_350.cpp b/src/share/inner_table/ob_inner_table_schema.301_350.cpp index cdf86673a6..9acecd4ecf 100644 --- a/src/share/inner_table/ob_inner_table_schema.301_350.cpp +++ b/src/share/inner_table/ob_inner_table_schema.301_350.cpp @@ -7956,6 +7956,44 @@ int ObInnerTableSchema::all_tablet_meta_table_schema(ObTableSchema &table_schema required_size_default, required_size_default); //default_value } + + if (OB_SUCC(ret)) { + ObObj report_scn_default; + report_scn_default.set_uint64(0); + ADD_COLUMN_SCHEMA_T("report_scn", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObUInt64Type, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(uint64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + report_scn_default, + report_scn_default); //default_value + } + + if (OB_SUCC(ret)) { + ObObj status_default; + status_default.set_int(0); + ADD_COLUMN_SCHEMA_T("status", //column_name + ++column_id, //column_id + 0, //rowkey_id + 0, //index_id + 0, //part_key_pos + ObIntType, //column_type + CS_TYPE_INVALID, //column_collation_type + sizeof(int64_t), //column_length + -1, //column_precision + -1, //column_scale + false, //is_nullable + false, //is_autoincrement + status_default, + status_default); //default_value + } table_schema.set_index_using_type(USING_BTREE); table_schema.set_row_store_type(ENCODING_ROW_STORE); table_schema.set_store_format(OB_STORE_FORMAT_DYNAMIC_MYSQL); diff --git a/src/share/inner_table/ob_inner_table_schema.h b/src/share/inner_table/ob_inner_table_schema.h index 7fe288a588..e093734c90 100644 --- a/src/share/inner_table/ob_inner_table_schema.h +++ b/src/share/inner_table/ob_inner_table_schema.h @@ -829,6 +829,7 @@ public: static int all_virtual_query_response_time_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_column_checksum_error_info_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_kvcache_handle_leak_info_schema(share::schema::ObTableSchema &table_schema); + static int all_virtual_tablet_compaction_info_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_ls_replica_task_plan_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_schema_memory_schema(share::schema::ObTableSchema &table_schema); static int all_virtual_schema_slot_schema(share::schema::ObTableSchema &table_schema); @@ -2731,6 +2732,7 @@ const schema_create_func virtual_table_schema_creators [] = { ObInnerTableSchema::all_virtual_query_response_time_schema, ObInnerTableSchema::all_virtual_column_checksum_error_info_schema, ObInnerTableSchema::all_virtual_kvcache_handle_leak_info_schema, + ObInnerTableSchema::all_virtual_tablet_compaction_info_schema, ObInnerTableSchema::all_virtual_ls_replica_task_plan_schema, ObInnerTableSchema::all_virtual_schema_memory_schema, ObInnerTableSchema::all_virtual_schema_slot_schema, @@ -4008,6 +4010,7 @@ const uint64_t tenant_space_tables [] = { OB_ALL_VIRTUAL_BACKUP_DELETE_POLICY_TID, OB_ALL_VIRTUAL_PRIVILEGE_TID, OB_ALL_VIRTUAL_QUERY_RESPONSE_TIME_TID, + OB_ALL_VIRTUAL_TABLET_COMPACTION_INFO_TID, OB_ALL_VIRTUAL_LS_REPLICA_TASK_PLAN_TID, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_TID, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_ALL_VIRTUAL_SQL_AUDIT_I1_TID, @@ -5794,6 +5797,7 @@ const char* const tenant_space_table_names [] = { OB_ALL_VIRTUAL_BACKUP_DELETE_POLICY_TNAME, OB_ALL_VIRTUAL_PRIVILEGE_TNAME, OB_ALL_VIRTUAL_QUERY_RESPONSE_TIME_TNAME, + OB_ALL_VIRTUAL_TABLET_COMPACTION_INFO_TNAME, OB_ALL_VIRTUAL_LS_REPLICA_TASK_PLAN_TNAME, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_TNAME, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_ALL_VIRTUAL_SQL_AUDIT_I1_TNAME, @@ -7121,6 +7125,7 @@ const uint64_t tenant_distributed_vtables [] = { OB_ALL_VIRTUAL_ASH_TID, OB_ALL_VIRTUAL_DML_STATS_TID, OB_ALL_VIRTUAL_QUERY_RESPONSE_TIME_TID, + OB_ALL_VIRTUAL_TABLET_COMPACTION_INFO_TID, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_TID, OB_ALL_VIRTUAL_SQL_AUDIT_ORA_ALL_VIRTUAL_SQL_AUDIT_I1_TID, OB_ALL_VIRTUAL_PLAN_STAT_ORA_TID, @@ -9155,11 +9160,11 @@ static inline int get_sys_table_lob_aux_schema(const uint64_t tid, const int64_t OB_CORE_TABLE_COUNT = 4; const int64_t OB_SYS_TABLE_COUNT = 212; -const int64_t OB_VIRTUAL_TABLE_COUNT = 551; +const int64_t OB_VIRTUAL_TABLE_COUNT = 552; const int64_t OB_SYS_VIEW_COUNT = 601; -const int64_t OB_SYS_TENANT_TABLE_COUNT = 1369; +const int64_t OB_SYS_TENANT_TABLE_COUNT = 1370; const int64_t OB_CORE_SCHEMA_VERSION = 1; -const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 1372; +const int64_t OB_BOOTSTRAP_SCHEMA_VERSION = 1373; } // end namespace share } // end namespace oceanbase diff --git a/src/share/inner_table/ob_inner_table_schema_constants.h b/src/share/inner_table/ob_inner_table_schema_constants.h index 01219febbf..bc45942e8d 100644 --- a/src/share/inner_table/ob_inner_table_schema_constants.h +++ b/src/share/inner_table/ob_inner_table_schema_constants.h @@ -577,6 +577,7 @@ const uint64_t OB_ALL_VIRTUAL_LOG_RESTORE_SOURCE_TID = 12324; // "__all_virtual_ const uint64_t OB_ALL_VIRTUAL_QUERY_RESPONSE_TIME_TID = 12325; // "__all_virtual_query_response_time" const uint64_t OB_ALL_VIRTUAL_COLUMN_CHECKSUM_ERROR_INFO_TID = 12330; // "__all_virtual_column_checksum_error_info" const uint64_t OB_ALL_VIRTUAL_KVCACHE_HANDLE_LEAK_INFO_TID = 12331; // "__all_virtual_kvcache_handle_leak_info" +const uint64_t OB_ALL_VIRTUAL_TABLET_COMPACTION_INFO_TID = 12334; // "__all_virtual_tablet_compaction_info" const uint64_t OB_ALL_VIRTUAL_LS_REPLICA_TASK_PLAN_TID = 12335; // "__all_virtual_ls_replica_task_plan" const uint64_t OB_ALL_VIRTUAL_SCHEMA_MEMORY_TID = 12336; // "__all_virtual_schema_memory" const uint64_t OB_ALL_VIRTUAL_SCHEMA_SLOT_TID = 12337; // "__all_virtual_schema_slot" @@ -2463,6 +2464,7 @@ const char *const OB_ALL_VIRTUAL_LOG_RESTORE_SOURCE_TNAME = "__all_virtual_log_r const char *const OB_ALL_VIRTUAL_QUERY_RESPONSE_TIME_TNAME = "__all_virtual_query_response_time"; const char *const OB_ALL_VIRTUAL_COLUMN_CHECKSUM_ERROR_INFO_TNAME = "__all_virtual_column_checksum_error_info"; const char *const OB_ALL_VIRTUAL_KVCACHE_HANDLE_LEAK_INFO_TNAME = "__all_virtual_kvcache_handle_leak_info"; +const char *const OB_ALL_VIRTUAL_TABLET_COMPACTION_INFO_TNAME = "__all_virtual_tablet_compaction_info"; const char *const OB_ALL_VIRTUAL_LS_REPLICA_TASK_PLAN_TNAME = "__all_virtual_ls_replica_task_plan"; const char *const OB_ALL_VIRTUAL_SCHEMA_MEMORY_TNAME = "__all_virtual_schema_memory"; const char *const OB_ALL_VIRTUAL_SCHEMA_SLOT_TNAME = "__all_virtual_schema_slot"; diff --git a/src/share/inner_table/ob_inner_table_schema_def.py b/src/share/inner_table/ob_inner_table_schema_def.py index ff3263ea98..1ec8af84f7 100644 --- a/src/share/inner_table/ob_inner_table_schema_def.py +++ b/src/share/inner_table/ob_inner_table_schema_def.py @@ -3418,6 +3418,8 @@ def_table_schema( # load balancing releated ('data_size', 'int'), ('required_size', 'int', 'false', '0'), + ('report_scn', 'uint', 'false', '0'), + ('status', 'int', 'false', '0'), ], ) @@ -5098,8 +5100,7 @@ def_table_schema( ) # 417 : __all_switchover_checkpoint -# 418 : __all_tablet_compaction_scn - +# 418 : EMPTY # 419 : __all_column_group # 420 : __all_column_group_history # 421 : __all_column_group_mapping @@ -6121,7 +6122,8 @@ def_table_schema( ('delete_row_count', 'int'), ('freeze_ts', 'int'), ('freeze_state', 'varchar:OB_MAX_CHAR_LENGTH'), - ('freeze_time_dist', 'varchar:OB_MAX_CHAR_LENGTH') + ('freeze_time_dist', 'varchar:OB_MAX_CHAR_LENGTH'), + ('compaction_info_list', 'varchar:OB_COMPACTION_INFO_LENGTH'), ], partition_columns = ['svr_ip', 'svr_port'], vtable_route_policy = 'distributed', @@ -7659,6 +7661,10 @@ def_table_schema( vtable_route_policy = 'distributed', ) + + + + ################################################################ ################################################################ # INFORMATION SCHEMA @@ -10679,9 +10685,31 @@ def_table_schema( ) # 12332: __all_virtual_switchover_checkpoint +# 12333: EMPTY -# 12333: __all_virtual_tablet_compaction_scn -# 12334: __all_virtual_tablet_compaction_info +def_table_schema( + owner = 'lixia.yq', + table_name = '__all_virtual_tablet_compaction_info', + table_id = '12334', + table_type = 'VIRTUAL_TABLE', + in_tenant_space = True, + gm_columns = [], + rowkey_columns = [], + + normal_columns = [ + ('svr_ip', 'varchar:MAX_IP_ADDR_LENGTH'), + ('svr_port', 'int'), + ('tenant_id', 'int'), + ('ls_id', 'int'), + ('tablet_id', 'int'), + ('finished_scn', 'int'), + ('wait_check_scn', 'int'), + ('max_received_scn', 'int'), + ('serialize_scn_list', 'varchar:OB_MAX_VARCHAR_LENGTH') + ], + partition_columns = ['svr_ip', 'svr_port'], + vtable_route_policy = 'distributed', +) def_table_schema( owner = 'jingyu.cr', diff --git a/src/share/ob_debug_sync_point.h b/src/share/ob_debug_sync_point.h index 52fc50de4b..2277681bcb 100644 --- a/src/share/ob_debug_sync_point.h +++ b/src/share/ob_debug_sync_point.h @@ -56,7 +56,7 @@ class ObString; ACT(BEFORE_AUTO_COORDINATE,) \ ACT(DELAY_PARTITION_SERVICE_FREEZE_LOG_TASK,) \ ACT(MINOR_MERGE_TIMER_TASK,) \ - ACT(MINOR_MERGE_TASK_PROCESS,) \ + ACT(MERGE_TASK_PROCESS,) \ ACT(MINOR_MERGE_SCHEDULE,) \ ACT(DELAY_INDEX_WRITE,) \ ACT(BEFORE_MINOR_FREEZE_GET_BASE_STORAGE_INFO,) \ @@ -400,6 +400,8 @@ class ObString; ACT(WHILE_LEADER_RESTORE_GROUP_TABLET,)\ ACT(AFTER_DATA_TABLETS_MIGRATION,)\ ACT(MERGE_PARTITION_FINISH_TASK,)\ + ACT(RS_VALIDATE_CHECKSUM,)\ + ACT(HA_REPORT_META_TABLE,)\ ACT(AFTER_CHANGE_MIGRATION_STATUS_HOLD,)\ ACT(AFTER_CREATE_META_TENANT_SYS_LOGSTREAM,)\ ACT(AFTER_CREATE_USER_TENANT_SYS_LOGSTREAM,)\ diff --git a/src/share/ob_tablet_meta_table_compaction_operator.cpp b/src/share/ob_tablet_meta_table_compaction_operator.cpp new file mode 100644 index 0000000000..359e454521 --- /dev/null +++ b/src/share/ob_tablet_meta_table_compaction_operator.cpp @@ -0,0 +1,522 @@ +/* +* Copyright (c) 2021 Ant Group CO., Ltd. +* OceanBase is licensed under Mulan PubL v1. +* You can use this software according to the terms and conditions of the Mulan PubL v1. +* You may obtain a copy of Mulan PubL v1 at: http://license.coscl.org.cn/MulanPubL-1.0 +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* See the Mulan PubL v1 for more details. +*/ +#define USING_LOG_PREFIX SHARE +#include "ob_tablet_meta_table_compaction_operator.h" +#include "lib/mysqlclient/ob_mysql_result.h" +#include "lib/oblog/ob_log.h" +#include "lib/string/ob_sql_string.h" +#include "share/inner_table/ob_inner_table_schema.h" +#include "share/ob_dml_sql_splicer.h" +#include "share/tablet/ob_tablet_filter.h" +#include "observer/ob_server_struct.h" + +namespace oceanbase +{ +namespace share +{ +using namespace oceanbase::common; +using namespace oceanbase::common::sqlclient; + +// update status of all rows +int ObTabletMetaTableCompactionOperator::set_info_status( + const ObTabletCompactionScnInfo &input_info, + ObTabletCompactionScnInfo &ret_info) +{ + int ret = OB_SUCCESS; + ObMySQLTransaction trans; + ObSqlString sql; + ObDMLSqlSplicer dml; + int64_t affected_rows = 0; + if (OB_UNLIKELY(!input_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(input_info)); + } else { + const uint64_t meta_tenant_id = gen_meta_tenant_id(input_info.tenant_id_); + if (OB_FAIL(trans.start(GCTX.sql_proxy_, meta_tenant_id))) {// start trans + LOG_WARN("fail to start transaction", KR(ret), K(input_info), K(meta_tenant_id)); + } else if (OB_FAIL(do_select(trans, true/*select_with_update*/, input_info, ret_info))) { + LOG_WARN("failed to do select", K(ret), K(input_info)); + } else if (OB_FAIL(dml.add_pk_column("tenant_id", input_info.tenant_id_)) + || OB_FAIL(dml.add_pk_column("ls_id", input_info.ls_id_)) + || OB_FAIL(dml.add_pk_column("tablet_id", input_info.tablet_id_)) + || OB_FAIL(dml.add_column("status", (int64_t)input_info.status_))) { + LOG_WARN("add column failed", KR(ret), K(input_info)); + } else if (OB_FAIL(dml.splice_update_sql(OB_ALL_TABLET_META_TABLE_TNAME, sql))) { + LOG_WARN("fail to splice batch insert update sql", KR(ret), K(sql)); + } else if (OB_FAIL(trans.write(meta_tenant_id, sql.ptr(), affected_rows))) { + LOG_WARN("fail to execute sql", K(input_info), K(meta_tenant_id), K(sql)); + } else if (OB_UNLIKELY(0 == affected_rows)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("affected rows is invalid", K(ret), K(input_info), K(affected_rows)); + } else{ + FLOG_INFO("success to set info status", K(ret), K(input_info), K(ret_info)); + } + handle_trans_stat(trans, ret); + } + return ret; +} + +int ObTabletMetaTableCompactionOperator::get_status( + const ObTabletCompactionScnInfo &input_info, + ObTabletCompactionScnInfo &ret_info) +{ + int ret = OB_SUCCESS; + ret_info.reset(); + ObISQLClient *sql_client = GCTX.sql_proxy_; + if (OB_UNLIKELY(!input_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(input_info)); + } else if (OB_ISNULL(sql_client)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sql client is null", K(ret), KP(sql_client)); + } else if (OB_FAIL(do_select(*sql_client, false/*select_for_update*/, input_info, ret_info))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_WARN("failed to select from tablet compaction scn tablet", KR(ret), K(input_info)); + } + } + return ret; +} + +int ObTabletMetaTableCompactionOperator::diagnose_compaction_scn( + const int64_t tenant_id, + int64_t &error_tablet_cnt) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(GCTX.sql_proxy_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sql proxy is unexpected null", K(ret)); + } + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + ObZone zone; + ObMySQLResult *result = nullptr; + ObSqlString sql; + const uint64_t meta_tenant_id = gen_meta_tenant_id(tenant_id); + if (OB_FAIL(sql.append_fmt( + "SELECT count(1) as c FROM %s WHERE tenant_id = '%ld' AND status = '%ld'", + OB_ALL_TABLET_META_TABLE_TNAME, + tenant_id, + (int64_t )ObTabletReplica::SCN_STATUS_ERROR))) { + LOG_WARN("failed to append fmt", K(ret), K(tenant_id)); + } else if (OB_FAIL(GCTX.sql_proxy_->read(res, meta_tenant_id, sql.ptr()))) { + LOG_WARN("fail to do read", KR(ret), K(meta_tenant_id), K(sql.ptr())); + } else if (OB_ISNULL(result = res.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get result", KR(ret), K(meta_tenant_id), K(sql.ptr())); + } else if (OB_FAIL(result->get_int("c", error_tablet_cnt))) { + LOG_WARN("failed to get int", KR(ret)); + } + } + return ret; +} + +void ObTabletMetaTableCompactionOperator::handle_trans_stat( + ObMySQLTransaction &trans, + int &ret) +{ + if (trans.is_started()) { + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(trans.end(OB_SUCC(ret)))) { + LOG_WARN("trans end failed", "is_commit", OB_SUCC(ret), K(tmp_ret)); + ret = OB_SUCC(ret) ? tmp_ret : ret; + } + } +} + +int ObTabletMetaTableCompactionOperator::do_select( + ObISQLClient &sql_client, + const bool select_with_update, + const ObTabletCompactionScnInfo &input_info, + ObTabletCompactionScnInfo &ret_info) +{ + int ret = OB_SUCCESS; + ObSqlString sql; + const uint64_t meta_tenant_id = gen_meta_tenant_id(input_info.tenant_id_); + ret_info = input_info; // assign tenant_id / ls_id / tablet_id + + if (OB_FAIL(sql.append_fmt( + "SELECT max(report_scn) as report_scn, max(status) as status" + " FROM %s WHERE tenant_id = '%lu' AND ls_id = '%ld' AND tablet_id = '%ld'%s", + OB_ALL_TABLET_META_TABLE_TNAME, + input_info.tenant_id_, + input_info.ls_id_, + input_info.tablet_id_, + select_with_update ? " FOR UPDATE" : ""))) { + LOG_WARN("failed to append fmt", K(ret), K(input_info)); + } else { + ret = execute_select_sql(sql_client, meta_tenant_id, sql, ret_info); + } + return ret; +} + +int ObTabletMetaTableCompactionOperator::execute_select_sql( + ObISQLClient &sql_client, + const int64_t meta_tenant_id, + const ObSqlString &sql, + ObTabletCompactionScnInfo &ret_info) +{ + int ret = OB_SUCCESS; + SMART_VAR(ObMySQLProxy::MySQLResult, res) { + ObMySQLResult *result = nullptr; + if (OB_FAIL(sql_client.read(res, meta_tenant_id, sql.ptr()))) { + LOG_WARN("fail to do read", KR(ret), K(meta_tenant_id), K(sql)); + } else if (OB_ISNULL(result = res.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get result", KR(ret), K(meta_tenant_id), K(sql)); + } else if (OB_FAIL(construct_compaction_related_info(*result, ret_info))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_WARN("fail to get medium snapshot info", KR(ret), KP(result), K(sql)); + } + } else { + LOG_TRACE("success to get medium snapshot info", K(ret_info)); + } + } + return ret; +} + +int ObTabletMetaTableCompactionOperator::batch_update_unequal_report_scn_tablet( + const uint64_t tenant_id, + const share::ObLSID &ls_id, + const int64_t major_frozen_scn, + const common::ObIArray &input_tablet_id_array) +{ + int ret = OB_SUCCESS; + const uint64_t meta_tenant_id = gen_meta_tenant_id(tenant_id); + int64_t start_idx = 0; + int64_t end_idx = min(MAX_BATCH_COUNT, input_tablet_id_array.count()); + common::ObSEArray unequal_tablet_id_array; + if (OB_ISNULL(GCTX.sql_proxy_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sql proxy is unexpected null", K(ret)); + } + while (OB_SUCC(ret) && (start_idx < end_idx)) { + ObSqlString sql; + if (OB_FAIL(sql.append_fmt( + "select distinct(tablet_id) from %s where tenant_id = '%lu' AND ls_id = '%ld'" + " AND tablet_id IN (", + OB_ALL_TABLET_META_TABLE_TNAME, + tenant_id, + ls_id.id()))) { + LOG_WARN("failed to assign sql", K(ret), K(tenant_id), K(start_idx)); + } else if (OB_FAIL(append_tablet_id_array(tenant_id, input_tablet_id_array, start_idx, end_idx, sql))) { + LOG_WARN("fail to append tablet id array", KR(ret), K(tenant_id), + K(input_tablet_id_array.count()), K(start_idx), K(end_idx)); + } else if (OB_FAIL(sql.append_fmt(") AND compaction_scn = '%lu' AND report_scn < '%lu'", + major_frozen_scn, major_frozen_scn))) { + LOG_WARN("failed to assign sql", K(ret), K(tenant_id), K(start_idx)); + } else { + SMART_VAR(ObISQLClient::ReadResult, result) { + if (OB_FAIL(GCTX.sql_proxy_->read(result, meta_tenant_id, sql.ptr()))) { + LOG_WARN("fail to execute sql", KR(ret), K(tenant_id), K(meta_tenant_id), "sql", sql.ptr()); + } else if (OB_ISNULL(result.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get mysql result", KR(ret), "sql", sql.ptr()); + } else if (OB_FAIL(construct_unequal_tablet_id_array(*result.get_result(), unequal_tablet_id_array))) { + LOG_WARN("fail to construct tablet id array", KR(ret), "sql", sql.ptr()); + } + } + if (OB_FAIL(ret)) { + } else if (unequal_tablet_id_array.empty()) { + // do nothing + } else if (OB_FAIL(inner_batch_update_unequal_report_scn_tablet( + tenant_id, + ls_id, + major_frozen_scn, + unequal_tablet_id_array))) { + LOG_WARN("fail to update unequal tablet id array", KR(ret), "sql", sql.ptr()); + } else { + unequal_tablet_id_array.reuse(); + } + } + if (OB_SUCC(ret)) { + start_idx = end_idx; + end_idx = min(start_idx + MAX_BATCH_COUNT, input_tablet_id_array.count()); + } + } + return ret; +} + +int ObTabletMetaTableCompactionOperator::construct_unequal_tablet_id_array( + sqlclient::ObMySQLResult &result, + common::ObIArray &unequal_tablet_id_array) +{ + int ret = OB_SUCCESS; + int64_t tablet_id = 0; + while (OB_SUCC(ret)) { + if (OB_FAIL(result.next())) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("fail to get next result", KR(ret)); + } + break; + } else if (OB_FAIL(result.get_int("tablet_id", tablet_id))) { + LOG_WARN("fail to get uint", KR(ret)); + } else if (OB_FAIL(unequal_tablet_id_array.push_back(ObTabletID(tablet_id)))) { + LOG_WARN("failed to push back tablet id", K(ret), K(tablet_id)); + } + } + if (OB_SUCC(ret) && unequal_tablet_id_array.count() > 0) { + LOG_TRACE("success to get uneuqal tablet_id array", K(ret), K(unequal_tablet_id_array)); + } + return ret; +} + +int ObTabletMetaTableCompactionOperator::append_tablet_id_array( + const uint64_t tenant_id, + const common::ObIArray &input_tablet_id_array, + const int64_t start_idx, + const int64_t end_idx, + ObSqlString &sql) +{ + int ret = OB_SUCCESS; + for (int64_t idx = start_idx; OB_SUCC(ret) && (idx < end_idx); ++idx) { + const ObTabletID &tablet_id = input_tablet_id_array.at(idx); + if (OB_UNLIKELY(!tablet_id.is_valid_with_tenant(tenant_id))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid tablet_id with tenant", KR(ret), K(tenant_id), K(tablet_id)); + } else if (OB_FAIL(sql.append_fmt( + "%s %ld", + start_idx == idx ? "" : ",", + tablet_id.id()))) { + LOG_WARN("fail to assign sql", KR(ret), K(idx), K(tablet_id)); + } + } + return ret; +} + +int ObTabletMetaTableCompactionOperator::inner_batch_update_unequal_report_scn_tablet( + const uint64_t tenant_id, + const share::ObLSID &ls_id, + const int64_t major_frozen_scn, + const common::ObIArray &unequal_tablet_id_array) +{ + int ret = OB_SUCCESS; + int64_t affected_rows = 0; + const uint64_t meta_tenant_id = gen_meta_tenant_id(tenant_id); + ObSqlString sql; + if (OB_FAIL(sql.append_fmt("UPDATE %s SET report_scn='%lu' WHERE tenant_id='%lu' AND ls_id='%ld' AND tablet_id IN (", + OB_ALL_TABLET_META_TABLE_TNAME, + major_frozen_scn, + tenant_id, + ls_id.id()))) { + LOG_WARN("failed to append fmt", K(ret), K(tenant_id), K(ls_id)); + } else if (OB_FAIL(append_tablet_id_array(tenant_id, unequal_tablet_id_array, 0, unequal_tablet_id_array.count(), sql))) { + LOG_WARN("fail to append tablet id array", KR(ret), K(tenant_id), K(unequal_tablet_id_array)); + } else if (OB_FAIL(sql.append_fmt(") AND compaction_scn = '%lu' AND report_scn <'%lu'", + major_frozen_scn, major_frozen_scn))) { + LOG_WARN("failed to assign sql", K(ret), K(tenant_id), K(ls_id)); + } else if (OB_FAIL(GCTX.sql_proxy_->write(meta_tenant_id, sql.ptr(), affected_rows))) { + LOG_WARN("fail to execute sql", KR(ret), K(tenant_id), K(meta_tenant_id), K(sql)); + } else if (affected_rows > 0) { + LOG_INFO("success to update unequal report_scn", K(ret), K(tenant_id), K(ls_id), K(unequal_tablet_id_array.count())); + } + return ret; +} + +int ObTabletMetaTableCompactionOperator::construct_compaction_related_info( + sqlclient::ObMySQLResult &result, + ObTabletCompactionScnInfo &info) +{ + int ret = OB_SUCCESS; + uint64_t report_scn_in_table = 0; + int64_t status = 0; + if (OB_FAIL(result.get_uint("report_scn", report_scn_in_table))) { + if (OB_ERR_NULL_VALUE == ret) { + ret = OB_ENTRY_NOT_EXIST; + } else { + LOG_WARN("failed to get int", KR(ret), K(info)); + } + } else if (OB_FAIL(result.get_int("status", status))) { + LOG_WARN("failed to get int", KR(ret), K(status)); + } else if (OB_UNLIKELY(!ObTabletReplica::is_status_valid((ObTabletReplica::ScnStatus)status))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("status is invalid", KR(ret), K(status)); + } else { + info.report_scn_ = (int64_t)report_scn_in_table; + info.status_ = ObTabletReplica::ScnStatus(status); + } + return ret; +} + +int ObTabletMetaTableCompactionOperator::batch_update_report_scn( + const uint64_t tenant_id, + const uint64_t global_braodcast_scn_val, + const ObTabletReplica::ScnStatus &except_status) +{ + int ret = OB_SUCCESS; + uint64_t compat_version = 0; + if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { + LOG_WARN("fail to get data version", KR(ret), K(tenant_id)); + } else if (compat_version < DATA_VERSION_4_1_0_0) { + // do nothing + } else { + ObMySQLTransaction trans; + int64_t affected_rows = 0; + const uint64_t meta_tenant_id = gen_meta_tenant_id(tenant_id); + if (OB_FAIL(trans.start(GCTX.sql_proxy_, meta_tenant_id))) { + LOG_WARN("fail to start transaction", KR(ret), K(tenant_id), K(meta_tenant_id)); + } else { + ObSqlString sql; + // TODO tenant may have a great many tablets, so we should use batch splitting strategy to update + if (OB_FAIL(sql.assign_fmt("UPDATE %s SET report_scn = '%lu' WHERE tenant_id = '%ld' " + "AND compaction_scn >= '%lu' AND status != '%ld'", + OB_ALL_TABLET_META_TABLE_TNAME, + global_braodcast_scn_val, + tenant_id, + global_braodcast_scn_val, + (int64_t )except_status))) { + LOG_WARN("fail to assign sql", KR(ret), K(tenant_id), K(global_braodcast_scn_val), K(except_status)); + } else if (OB_FAIL(trans.write(meta_tenant_id, sql.ptr(), affected_rows))) { + LOG_WARN("fail to execute sql", KR(ret), K(tenant_id), K(meta_tenant_id), K(sql)); + } + } + handle_trans_stat(trans, ret); + LOG_INFO("finish to batch update report scn", KR(ret), K(tenant_id), K(affected_rows)); + } + return ret; +} + +int ObTabletMetaTableCompactionOperator::batch_update_report_scn( + const uint64_t tenant_id, + const uint64_t global_braodcast_scn_val, + const ObIArray &tablet_pairs, + const ObTabletReplica::ScnStatus &except_status) +{ + int ret = OB_SUCCESS; + int64_t affected_rows = 0; + uint64_t compat_version = 0; + ObDMLSqlSplicer dml; + const int64_t all_pair_cnt = tablet_pairs.count(); + if (OB_UNLIKELY((all_pair_cnt < 1) + || !is_valid_tenant_id(tenant_id))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(all_pair_cnt)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, compat_version))) { + LOG_WARN("fail to get data version", KR(ret), K(tenant_id)); + } else if (compat_version < DATA_VERSION_4_1_0_0) { + } else { + const uint64_t meta_tenant_id = gen_meta_tenant_id(tenant_id); + ObMySQLTransaction trans; + if (OB_FAIL(trans.start(GCTX.sql_proxy_, meta_tenant_id))) { + LOG_WARN("fail to start transaction", KR(ret), K(tenant_id), K(meta_tenant_id)); + } + for (int64_t i = 0; OB_SUCC(ret) && (i < all_pair_cnt); i += MAX_BATCH_COUNT) { + const int64_t cur_end_idx = MIN(i + MAX_BATCH_COUNT, all_pair_cnt); + ObSqlString sql; + if (OB_FAIL(sql.append_fmt( + "UPDATE %s SET report_scn = '%lu' WHERE tenant_id = %ld AND (tablet_id,ls_id) IN (", + OB_ALL_TABLET_META_TABLE_TNAME, + global_braodcast_scn_val, + tenant_id))) { + LOG_WARN("fail to assign sql", KR(ret), K(tenant_id), K(global_braodcast_scn_val)); + } else { + // handle each batch tablet_ls_pairs + for (int64_t idx = i; OB_SUCC(ret) && (idx < cur_end_idx); ++idx) { + const ObTabletID &tablet_id = tablet_pairs.at(idx).get_tablet_id(); + const ObLSID &ls_id = tablet_pairs.at(idx).get_ls_id(); + if (OB_UNLIKELY(!tablet_id.is_valid_with_tenant(tenant_id) + || !ls_id.is_valid_with_tenant(tenant_id))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid tablet_id with tenant", KR(ret), K(tenant_id), K(tablet_id), K(ls_id)); + } else if (OB_FAIL(sql.append_fmt( + "%s (%ld,%ld)", + i == idx ? "" : ",", + tablet_id.id(), + ls_id.id()))) { + LOG_WARN("fail to assign sql", KR(ret), K(tablet_id)); + } + } // end for + if (FAILEDx(sql.append_fmt(") AND compaction_scn >= '%lu' AND status != %ld", + global_braodcast_scn_val, + (int64_t)(except_status)))) { + LOG_WARN("fail to assign sql", KR(ret), K(tenant_id), K(meta_tenant_id), K(except_status), + K(global_braodcast_scn_val)); + } else if (OB_FAIL(trans.write(meta_tenant_id, sql.ptr(), affected_rows))) { + LOG_WARN("fail to execute sql", KR(ret), K(tenant_id), K(meta_tenant_id), K(sql)); + } else { + LOG_TRACE("success to update report_scn", KR(ret), K(tenant_id), K(meta_tenant_id), K(tablet_pairs), K(sql)); + } + } + } + handle_trans_stat(trans, ret); + } + + return ret; +} + +int ObTabletMetaTableCompactionOperator::get_unique_status( + const uint64_t tenant_id, + ObIArray &pairs, + ObIArray &status_arr) +{ + int ret = OB_SUCCESS; + + const int64_t pair_cnt = pairs.count(); + if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) || pair_cnt < 1)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(pair_cnt)); + } else { + ObSqlString sql; + SMART_VAR(ObISQLClient::ReadResult, res) { + ObMySQLResult *result = nullptr; + if (OB_FAIL(sql.assign_fmt("SELECT distinct status FROM %s WHERE tenant_id = '%lu' AND (ls_id, tablet_id) " + "IN (", OB_ALL_TABLET_META_TABLE_TNAME, tenant_id))) { + LOG_WARN("fail to assign sql", KR(ret), K(tenant_id)); + } else { + for (int64_t i = 0; (i < pair_cnt) && OB_SUCC(ret); ++i) { + const ObTabletLSPair &pair = pairs.at(i); + if (OB_UNLIKELY(!pair.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(i), K(pair)); + } else if (OB_FAIL(sql.append_fmt("(%ld, %ld)%s", pair.get_ls_id().id(), + pair.get_tablet_id().id(), ((i == pair_cnt - 1) ? ")" : ", ")))) { + LOG_WARN("fail to assign sql", KR(ret), K(i), K(tenant_id), K(pair)); + } + } + } + + const uint64_t meta_tenant_id = gen_meta_tenant_id(tenant_id); + if (FAILEDx(GCTX.sql_proxy_->read(res, meta_tenant_id, sql.ptr()))) { + LOG_WARN("fail to execute sql", KR(ret), K(tenant_id), K(meta_tenant_id), K(sql)); + } else if (OB_ISNULL(result = res.get_result())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get mysql result", KR(ret), K(tenant_id), K(sql)); + } else { + while (OB_SUCC(ret)) { + int64_t tmp_status = 0; + if (OB_FAIL(result->next())) { + if (OB_ITER_END != ret) { + LOG_WARN("fail to get next result", KR(ret), K(tenant_id), K(sql)); + } + } else if (OB_FAIL(result->get_int("status", tmp_status))) { + LOG_WARN("failed to get int", KR(ret), K(tmp_status)); + } else if (OB_UNLIKELY(!ObTabletReplica::is_status_valid((ObTabletReplica::ScnStatus)tmp_status))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("status is invalid", KR(ret), K(tenant_id), K(tmp_status)); + } else if (OB_FAIL(status_arr.push_back(ObTabletReplica::ScnStatus(tmp_status)))) { + LOG_WARN("fail to push back status", KR(ret), K(tenant_id), K(tmp_status)); + } + } // end while loop + + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + } + } + } + } + return ret; +} + +} // end namespace share +} // end namespace oceanbase diff --git a/src/share/ob_tablet_meta_table_compaction_operator.h b/src/share/ob_tablet_meta_table_compaction_operator.h new file mode 100644 index 0000000000..2e86650700 --- /dev/null +++ b/src/share/ob_tablet_meta_table_compaction_operator.h @@ -0,0 +1,161 @@ +/* +* Copyright (c) 2021 Ant Group CO., Ltd. +* OceanBase is licensed under Mulan PubL v1. +* You can use this software according to the terms and conditions of the Mulan PubL v1. +* You may obtain a copy of Mulan PubL v1 at: http://license.coscl.org.cn/MulanPubL-1.0 +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +* See the Mulan PubL v1 for more details. +*/ +#ifndef OCEANBASE_SHARE_OB_TABLET_MEDIUM_SNAPSHOT_TABLE_OPERATOR_ +#define OCEANBASE_SHARE_OB_TABLET_MEDIUM_SNAPSHOT_TABLE_OPERATOR_ + +#include "lib/container/ob_iarray.h" +#include "lib/mysqlclient/ob_isql_client.h" +#include "common/ob_zone.h" +#include "lib/mysqlclient/ob_mysql_transaction.h" +#include "share/ob_ls_id.h" +#include "share/tablet/ob_tablet_info.h" +#include "observer/ob_server_struct.h" + +namespace oceanbase +{ +namespace common +{ +class ObMySQLTransaction; +} +namespace share +{ +class ObTabletReplicaFilter; + +// part compaction related member from __all_tablet_meta_table +struct ObTabletCompactionScnInfo +{ +public: + ObTabletCompactionScnInfo() + : tenant_id_(OB_INVALID_TENANT_ID), + ls_id_(0), + tablet_id_(0), + compaction_scn_(0), + report_scn_(0), + status_(ObTabletReplica::SCN_STATUS_MAX) + {} + ObTabletCompactionScnInfo( + const int64_t tenant_id, + const ObLSID &ls_id, + const ObTabletID &tablet_id, + const ObTabletReplica::ScnStatus status) + : tenant_id_(tenant_id), + ls_id_(ls_id.id()), + tablet_id_(tablet_id.id()), + compaction_scn_(0), + report_scn_(0), + status_(status) + {} + bool is_valid() const + { + return is_valid_tenant_id(tenant_id_) && ls_id_ > 0 && tablet_id_ > 0 && report_scn_ >= 0; + } + // only check when last compaction type is major + bool could_schedule_next_round(const int64_t major_frozen_scn) + { + return ObTabletReplica::SCN_STATUS_IDLE == status_ && major_frozen_scn <= report_scn_; + } + void reset() + { + tenant_id_ = OB_INVALID_TENANT_ID; + ls_id_ = 0; + tablet_id_ = 0; + compaction_scn_ = 0; + report_scn_ = 0; + status_ = ObTabletReplica::SCN_STATUS_MAX; + } + TO_STRING_KV(K_(tenant_id), K_(ls_id), K_(tablet_id), K_(compaction_scn), K_(report_scn), K_(status)); +public: + uint64_t tenant_id_; + int64_t ls_id_; + int64_t tablet_id_; + int64_t compaction_scn_; + int64_t report_scn_; + ObTabletReplica::ScnStatus status_; +}; + +// CRUD operation to __all_tablet_meta_table +class ObTabletMetaTableCompactionOperator +{ +public: + static int set_info_status( + const ObTabletCompactionScnInfo &input_info, + ObTabletCompactionScnInfo &ret_info); + static int get_status( + const ObTabletCompactionScnInfo &input_info, + ObTabletCompactionScnInfo &ret_info); + static int diagnose_compaction_scn( + const int64_t tenant_id, + int64_t &error_tablet_cnt); + // update report_scn of all tablets which belong to @tablet_pairs + static int batch_update_report_scn( + const uint64_t tenant_id, + const uint64_t global_braodcast_scn_val, + const common::ObIArray &tablet_pairs, + const ObTabletReplica::ScnStatus &except_status); + // after major_freeze, update all tablets' report_scn to global_braodcast_scn_val + static int batch_update_report_scn( + const uint64_t tenant_id, + const uint64_t global_braodcast_scn_val, + const ObTabletReplica::ScnStatus &except_status); + static int get_unique_status( + const uint64_t tenant_id, + common::ObIArray &pairs, + common::ObIArray &status_arr); + static int batch_update_unequal_report_scn_tablet( + const uint64_t tenant_id, + const share::ObLSID &ls_id, + const int64_t major_frozen_scn, + const common::ObIArray &input_tablet_id_array); +private: + // is_update_finish_scn = TRUE: update finish_scn + // is_update_finish_scn = FALSE: delete rows + static int inner_batch_update_with_trans( + common::ObMySQLTransaction &trans, + const uint64_t tenant_id, + const bool is_update_finish_scn, + const common::ObIArray &replicas); + static int do_select( + ObISQLClient &sql_client, + const bool select_with_update, + const ObTabletCompactionScnInfo &input_info, + ObTabletCompactionScnInfo &ret_info); + static int execute_select_sql( + ObISQLClient &sql_client, + const int64_t meta_tenant_id, + const ObSqlString &sql, + ObTabletCompactionScnInfo &ret_info); + static void handle_trans_stat(common::ObMySQLTransaction &trans, int &ret); + // construct compaction_scn_info based on part of the fileds defined in the schema + static int construct_compaction_related_info( + sqlclient::ObMySQLResult &result, + ObTabletCompactionScnInfo &info); + static int inner_batch_update_unequal_report_scn_tablet( + const uint64_t tenant_id, + const share::ObLSID &ls_id, + const int64_t major_frozen_scn, + const common::ObIArray &unequal_tablet_id_array); + static int append_tablet_id_array( + const uint64_t tenant_id, + const common::ObIArray &input_tablet_id_array, + const int64_t start_idx, + const int64_t end_idx, + ObSqlString &sql); + static int construct_unequal_tablet_id_array( + sqlclient::ObMySQLResult &result, + common::ObIArray &unequal_tablet_id_array); +private: + const static int64_t MAX_BATCH_COUNT = 150; +}; + +} // end namespace share +} // end namespace oceanbase + +#endif // OCEANBASE_SHARE_OB_TABLET_MEDIUM_SNAPSHOT_TABLE_OPERATOR_ diff --git a/src/share/ob_tablet_replica_checksum_operator.cpp b/src/share/ob_tablet_replica_checksum_operator.cpp index 8aced95390..19506de6ca 100644 --- a/src/share/ob_tablet_replica_checksum_operator.cpp +++ b/src/share/ob_tablet_replica_checksum_operator.cpp @@ -20,8 +20,10 @@ #include "lib/mysqlclient/ob_mysql_proxy.h" #include "share/inner_table/ob_inner_table_schema_constants.h" #include "share/schema/ob_column_schema.h" +#include "observer/ob_server_struct.h" #include "share/tablet/ob_tablet_info.h" #include "share/config/ob_server_config.h" +#include "share/ob_tablet_meta_table_compaction_operator.h" namespace oceanbase { @@ -523,9 +525,39 @@ int ObTabletReplicaChecksumOperator::batch_get( return ret; } +int ObTabletReplicaChecksumOperator::get_specified_tablet_checksum( + const uint64_t tenant_id, + const int64_t ls_id, + const int64_t tablet_id, + const int64_t snapshot_version, + common::ObIArray &items) +{ + int ret = OB_SUCCESS; + ObSqlString sql; + if (OB_UNLIKELY((OB_INVALID_TENANT_ID == tenant_id) + || (ls_id <= 0) + || (tablet_id <= 0) + || (snapshot_version < 0))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(ls_id), K(tablet_id), K(snapshot_version)); + } else if (OB_FAIL(sql.append_fmt("SELECT * FROM %s WHERE tenant_id = '%lu' and tablet_id = '%ld' " + "and ls_id = '%ld' and compaction_scn = '%ld'", + OB_ALL_TABLET_REPLICA_CHECKSUM_TNAME, + tenant_id, + tablet_id, + ls_id, + snapshot_version))) { + LOG_WARN("fail to assign sql", KR(ret), K(tenant_id), K(ls_id), K(snapshot_version)); + } else if (OB_FAIL(batch_get(tenant_id, sql, *GCTX.sql_proxy_, items))) { + LOG_WARN("fail to batch get tablet replica checksum items", KR(ret), K(tenant_id), K(sql)); + } + return ret; +} + int ObTabletReplicaChecksumOperator::batch_get( const uint64_t tenant_id, const ObIArray &pairs, + const SCN &compaction_scn, ObISQLClient &sql_proxy, ObIArray &items) { @@ -547,8 +579,8 @@ int ObTabletReplicaChecksumOperator::batch_get( ObSqlString sql; while (OB_SUCC(ret) && (start_idx < end_idx)) { sql.reuse(); - if (OB_FAIL(construct_batch_get_sql_str_(tenant_id, pairs, start_idx, end_idx, sql))) { - LOG_WARN("fail to construct batch get sql", KR(ret), K(tenant_id), K(pairs), + if (OB_FAIL(construct_batch_get_sql_str_(tenant_id, compaction_scn, pairs, start_idx, end_idx, sql))) { + LOG_WARN("fail to construct batch get sql", KR(ret), K(tenant_id), K(compaction_scn), K(pairs), K(start_idx), K(end_idx)); } else if (OB_FAIL(inner_batch_get_by_sql_(tenant_id, sql, sql_proxy, items))) { LOG_WARN("fail to inner batch get by sql", KR(ret), K(tenant_id), K(sql)); @@ -632,7 +664,7 @@ int ObTabletReplicaChecksumOperator::construct_batch_get_sql_str_( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(batch_cnt), K(compaction_scn)); } else if (OB_FAIL(sql.append_fmt("SELECT * FROM %s WHERE tenant_id = '%lu' and tablet_id > '%lu' " - "and compaction_scn = %lu", OB_ALL_TABLET_REPLICA_CHECKSUM_TNAME, tenant_id, + "and compaction_scn >= %lu", OB_ALL_TABLET_REPLICA_CHECKSUM_TNAME, tenant_id, start_pair.get_tablet_id().id(), compaction_scn.get_val_for_inner_table_field()))) { LOG_WARN("fail to assign sql", KR(ret), K(tenant_id), K(start_pair), K(compaction_scn)); } else if (OB_FAIL(sql.append_fmt(" ORDER BY tenant_id, tablet_id, svr_ip, svr_port limit %ld", @@ -644,6 +676,7 @@ int ObTabletReplicaChecksumOperator::construct_batch_get_sql_str_( int ObTabletReplicaChecksumOperator::construct_batch_get_sql_str_( const uint64_t tenant_id, + const SCN &compaction_scn, const ObIArray &pairs, const int64_t start_idx, const int64_t end_idx, @@ -655,9 +688,9 @@ int ObTabletReplicaChecksumOperator::construct_batch_get_sql_str_( pairs_cnt < 1) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(start_idx), K(end_idx), K(pairs_cnt)); - } else if (OB_FAIL(sql.append_fmt("SELECT * FROM %s WHERE tenant_id = '%lu' and (tablet_id, ls_id)" - " IN ((", OB_ALL_TABLET_REPLICA_CHECKSUM_TNAME, tenant_id))) { - LOG_WARN("fail to assign sql", KR(ret), K(tenant_id)); + } else if (OB_FAIL(sql.append_fmt("SELECT * FROM %s WHERE tenant_id = '%lu' AND compaction_scn >= %ld" + " AND (tablet_id, ls_id) IN ((", OB_ALL_TABLET_REPLICA_CHECKSUM_TNAME, tenant_id, compaction_scn.get_val_for_inner_table_field()))) { + LOG_WARN("fail to assign sql", KR(ret), K(tenant_id), K(compaction_scn)); } else { for (int64_t idx = start_idx; OB_SUCC(ret) && (idx < end_idx); ++idx) { const ObTabletLSPair &pair = pairs.at(idx); @@ -675,7 +708,7 @@ int ObTabletReplicaChecksumOperator::construct_batch_get_sql_str_( } if (FAILEDx(sql.append_fmt(") ORDER BY tenant_id, tablet_id, ls_id, svr_ip, svr_port"))) { - LOG_WARN("fail to assign sql string", KR(ret), K(tenant_id), K(pairs_cnt)); + LOG_WARN("fail to assign sql string", KR(ret), K(tenant_id), K(compaction_scn), K(pairs_cnt)); } return ret; } @@ -883,6 +916,73 @@ int ObTabletReplicaChecksumOperator::inner_batch_insert_or_update_by_sql_( return ret; } +int ObTabletReplicaChecksumOperator::check_tablet_replica_checksum( + const uint64_t tenant_id, + const ObIArray &pairs, + const SCN &compaction_scn, + ObMySQLProxy &sql_proxy) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_valid_tenant_id(tenant_id) + || pairs.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", KR(ret), K(tenant_id), K(pairs), K(compaction_scn)); + } else { + SMART_VAR(ObArray, items) { + if (OB_FAIL(batch_get(tenant_id, pairs, compaction_scn, sql_proxy, items))) { + LOG_WARN("fail to batch get tablet replica checksum items", KR(ret), K(tenant_id), K(compaction_scn)); + } else if (items.count() == 0) { + ret = OB_ITER_END; + } else if (OB_FAIL(innner_verify_tablet_replica_checksum(items))) { + LOG_WARN("fail to execute tablet replica checksum verification", KR(ret), K(items)); + } + } + } + return ret; +} + +int ObTabletReplicaChecksumOperator::innner_verify_tablet_replica_checksum( + const ObIArray &ckm_items) +{ + int ret = OB_SUCCESS; + int check_ret = OB_SUCCESS; + if (OB_UNLIKELY(ckm_items.empty())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(ckm_items)); + } else { + const int64_t item_cnt = ckm_items.count(); + ObTabletReplicaChecksumItem prev_item; + ObTabletReplicaChecksumItem curr_item; + for (int64_t i = 0; OB_SUCC(ret) && (i < item_cnt); ++i) { + curr_item.reset(); + if (OB_FAIL(curr_item.assign(ckm_items.at(i)))) { + LOG_WARN("fail to assign tablet replica checksum item", KR(ret), K(i), "item", ckm_items.at(i)); + } else if (prev_item.is_key_valid()) { + if (curr_item.is_same_tablet(prev_item)) { // same tablet + if (OB_FAIL(curr_item.verify_checksum(prev_item))) { + if (OB_CHECKSUM_ERROR == ret) { + LOG_ERROR("ERROR! ERROR! ERROR! checksum error in tablet replica checksum", KR(ret), + K(curr_item), K(prev_item)); + check_ret = ret; + ret = OB_SUCCESS; // continue checking next checksum + } else { + LOG_WARN("unexpected error in tablet replica checksum", KR(ret), K(curr_item), K(prev_item)); + } + } + } else if (OB_FAIL(prev_item.assign(curr_item))) { // next tablet + LOG_WARN("fail to assign tablet replica checksum item", KR(ret), K(i), K(curr_item)); + } + } else if (OB_FAIL(prev_item.assign(curr_item))) { + LOG_WARN("fail to assign tablet replica checksum item", KR(ret), K(i), K(curr_item)); + } + } + } + if (OB_CHECKSUM_ERROR == check_ret) { + ret = OB_CHECKSUM_ERROR; + } + return ret; +} + int ObTabletReplicaChecksumOperator::check_column_checksum( const uint64_t tenant_id, const ObTableSchema &data_table_schema, @@ -921,6 +1021,7 @@ int ObTabletReplicaChecksumOperator::check_global_index_column_checksum( ObMySQLProxy &sql_proxy) { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; const int64_t default_column_cnt = ObTabletReplicaReportColumnMeta::DEFAULT_COLUMN_CNT; int64_t check_cnt = 0; bool need_verify = false; @@ -930,13 +1031,13 @@ int ObTabletReplicaChecksumOperator::check_global_index_column_checksum( hash::ObHashMap data_column_ckm_sum_map; hash::ObHashMap index_column_ckm_sum_map; - ObArray data_schema_tablet_ids; - ObArray index_schema_tablet_ids; - SMART_VAR(ObArray, data_table_ckm_items) { - SMART_VAR(ObArray, index_table_ckm_items) { - if (OB_FAIL(data_column_ckm_sum_map.create(default_column_cnt, ObModIds::OB_SSTABLE_CREATE_INDEX))) { + SMART_VARS_2((ObArray, data_table_ckm_items), + (ObArray, index_table_ckm_items)) { + SMART_VARS_2((ObArray, data_table_tablets), + (ObArray, index_table_tablets)) { + if (OB_FAIL(data_column_ckm_sum_map.create(default_column_cnt, ObModIds::OB_CHECKSUM_CHECKER))) { LOG_WARN("fail to create data table column ckm_sum map", KR(ret), K(default_column_cnt)); - } else if (OB_FAIL(index_column_ckm_sum_map.create(default_column_cnt, ObModIds::OB_SSTABLE_CREATE_INDEX))) { + } else if (OB_FAIL(index_column_ckm_sum_map.create(default_column_cnt, ObModIds::OB_CHECKSUM_CHECKER))) { LOG_WARN("fail to create index table column ckm_sum map", KR(ret), K(default_column_cnt)); } else { index_table_id = index_table_schema.get_table_id(); @@ -945,13 +1046,14 @@ int ObTabletReplicaChecksumOperator::check_global_index_column_checksum( ObColumnChecksumErrorInfo ckm_error_info(tenant_id, compaction_scn, true, data_table_id, index_table_id, unused_tablet_id, unused_tablet_id); - if (OB_FAIL(get_tablet_replica_checksum_items_(tenant_id, sql_proxy, index_table_schema, - index_schema_tablet_ids, index_table_ckm_items))) { - LOG_WARN("fail to get index table tablet replica ckm_items", KR(ret), K(tenant_id), K(index_table_schema)); - } else if (OB_FAIL(need_verify_checksum_(compaction_scn, need_verify, index_schema_tablet_ids, + if (OB_FAIL(get_tablet_replica_checksum_items_(tenant_id, sql_proxy, index_table_schema, compaction_scn, + index_table_tablets, index_table_ckm_items))) { + LOG_WARN("fail to get index table tablet replica ckm_items", KR(ret), K(tenant_id), K(compaction_scn), + K(index_table_id)); + } else if (OB_FAIL(need_verify_checksum_(tenant_id, compaction_scn, need_verify, index_table_tablets, index_table_ckm_items))) { if (OB_EAGAIN != ret) { - LOG_WARN("fail to check need verify checksum", KR(ret), K(index_table_id), K(data_table_id), + LOG_WARN("fail to check need verify checksum", KR(ret), K(tenant_id), K(index_table_id), K(data_table_id), K(compaction_scn)); } } else if (!need_verify) { @@ -964,13 +1066,14 @@ int ObTabletReplicaChecksumOperator::check_global_index_column_checksum( } else if (REACH_TIME_INTERVAL(10 * 1000 * 1000)) { LOG_WARN("fail to get index table tablet checksum items", KR(ret), K(index_table_schema)); } - } else if (OB_FAIL(get_tablet_replica_checksum_items_(tenant_id, sql_proxy, data_table_schema, - data_schema_tablet_ids, data_table_ckm_items))) { - LOG_WARN("fail to get data table tablet replica ckm_items", KR(ret), K(tenant_id), K(data_table_schema)); - } else if (OB_FAIL(need_verify_checksum_(compaction_scn, need_verify, data_schema_tablet_ids, + } else if (OB_FAIL(get_tablet_replica_checksum_items_(tenant_id, sql_proxy, data_table_schema, compaction_scn, + data_table_tablets, data_table_ckm_items))) { + LOG_WARN("fail to get data table tablet replica ckm_items", KR(ret), K(tenant_id), K(compaction_scn), + K(data_table_id)); + } else if (OB_FAIL(need_verify_checksum_(tenant_id, compaction_scn, need_verify, data_table_tablets, data_table_ckm_items))) { if (OB_EAGAIN != ret) { - LOG_WARN("fail to check need verify checksum", KR(ret), K(index_table_id), K(data_table_id), + LOG_WARN("fail to check need verify checksum", KR(ret), K(tenant_id), K(index_table_id), K(data_table_id), K(compaction_scn)); } } else if (!need_verify) { @@ -987,7 +1090,6 @@ int ObTabletReplicaChecksumOperator::check_global_index_column_checksum( index_column_ckm_sum_map, check_cnt, ckm_error_info))) { if (OB_CHECKSUM_ERROR == ret) { LOG_ERROR("data table and global index table column checksum are not equal", KR(ret), K(ckm_error_info)); - int tmp_ret = OB_SUCCESS; if (OB_TMP_FAIL(ObColumnChecksumErrorOperator::insert_column_checksum_err_info(sql_proxy, tenant_id, ckm_error_info))) { LOG_WARN("fail to insert global index column checksum error info", KR(tmp_ret), K(ckm_error_info)); @@ -998,14 +1100,16 @@ int ObTabletReplicaChecksumOperator::check_global_index_column_checksum( } // end smart_var } // end smart_var + ret = ((OB_SUCCESS == ret) ? tmp_ret : ret); + if (data_column_ckm_sum_map.created()) { data_column_ckm_sum_map.destroy(); } if (index_column_ckm_sum_map.created()) { index_column_ckm_sum_map.destroy(); } - LOG_INFO("finish verify global index table columns checksum", KR(ret), K(tenant_id), K(data_table_id), - K(index_table_id), K(check_cnt)); + LOG_INFO("finish verify global index table columns checksum", KR(ret), KR(tmp_ret), K(tenant_id), K(compaction_scn), + K(data_table_id), K(index_table_id), K(check_cnt), K(need_verify)); return ret; } @@ -1018,68 +1122,71 @@ int ObTabletReplicaChecksumOperator::check_local_index_column_checksum( ObMySQLProxy &sql_proxy) { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; const uint64_t index_table_id = index_table_schema.get_table_id(); const uint64_t data_table_id = data_table_schema.get_table_id(); const int64_t default_column_cnt = ObTabletReplicaReportColumnMeta::DEFAULT_COLUMN_CNT; int64_t check_cnt = 0; bool need_verify = false; - ObArray data_schema_tablet_ids; - ObArray index_schema_tablet_ids; - SMART_VAR(ObArray, data_table_ckm_items) { - SMART_VAR(ObArray, index_table_ckm_items) { - if (OB_FAIL(get_tablet_replica_checksum_items_(tenant_id, sql_proxy, index_table_schema, - index_schema_tablet_ids, index_table_ckm_items))) { - LOG_WARN("fail to get index table tablet replica ckm_items", KR(ret), K(tenant_id), K(index_table_schema)); - } else if (OB_FAIL(need_verify_checksum_(compaction_scn, need_verify, index_schema_tablet_ids, + SMART_VARS_2((ObArray, data_table_ckm_items), + (ObArray, index_table_ckm_items)) { + SMART_VARS_2((ObArray, data_table_tablets), + (ObArray, index_table_tablets)) { + if (OB_FAIL(get_tablet_replica_checksum_items_(tenant_id, sql_proxy, index_table_schema, compaction_scn, + index_table_tablets, index_table_ckm_items))) { + LOG_WARN("fail to get index table tablet replica ckm_items", KR(ret), K(tenant_id), K(compaction_scn), + K(index_table_id)); + } else if (OB_FAIL(need_verify_checksum_(tenant_id, compaction_scn, need_verify, index_table_tablets, index_table_ckm_items))) { if (OB_EAGAIN != ret) { - LOG_WARN("fail to check need verify checksum", KR(ret), K(index_table_id), K(data_table_id), + LOG_WARN("fail to check need verify checksum", KR(ret), K(tenant_id), K(index_table_id), K(data_table_id), K(compaction_scn)); } } else if (!need_verify) { LOG_INFO("do not need verify checksum", K(index_table_id), K(data_table_id), K(compaction_scn)); - } else if (OB_FAIL(get_tablet_replica_checksum_items_(tenant_id, sql_proxy, data_table_schema, - data_schema_tablet_ids, data_table_ckm_items))) { - LOG_WARN("fail to get data table tablet replica ckm_items", KR(ret), K(tenant_id), K(data_table_schema)); - } else if (OB_FAIL(need_verify_checksum_(compaction_scn, need_verify, data_schema_tablet_ids, + } else if (OB_FAIL(get_tablet_replica_checksum_items_(tenant_id, sql_proxy, data_table_schema, compaction_scn, + data_table_tablets, data_table_ckm_items))) { + LOG_WARN("fail to get data table tablet replica ckm_items", KR(ret), K(tenant_id), K(compaction_scn), + K(data_table_id)); + } else if (OB_FAIL(need_verify_checksum_(tenant_id, compaction_scn, need_verify, data_table_tablets, data_table_ckm_items))) { if (OB_EAGAIN != ret) { - LOG_WARN("fail to check need verify checksum", KR(ret), K(index_table_id), K(data_table_id), + LOG_WARN("fail to check need verify checksum", KR(ret), K(tenant_id), K(index_table_id), K(data_table_id), K(compaction_scn)); } - } else if (data_schema_tablet_ids.count() != index_schema_tablet_ids.count()) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("tablet count of local index table is not same with data table", KR(ret), "data_table_tablet_cnt", - data_schema_tablet_ids.count(), "index_table_tablet_cnt", index_schema_tablet_ids.count()); } else if (!need_verify) { LOG_INFO("do not need verify checksum", K(index_table_id), K(data_table_id), K(compaction_scn)); + } else if (data_table_tablets.count() != index_table_tablets.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet count of local index table is not same with data table", KR(ret), "data_table_tablet_cnt", + data_table_tablets.count(), "index_table_tablet_cnt", index_table_tablets.count()); } else { // map element: hash::ObHashMap data_column_ckm_map; hash::ObHashMap index_column_ckm_map; - if (OB_FAIL(data_column_ckm_map.create(default_column_cnt, ObModIds::OB_SSTABLE_CREATE_INDEX))) { + if (OB_FAIL(data_column_ckm_map.create(default_column_cnt, ObModIds::OB_CHECKSUM_CHECKER))) { LOG_WARN("fail to create data table column ckm map", KR(ret), K(default_column_cnt)); - } else if (OB_FAIL(index_column_ckm_map.create(default_column_cnt, ObModIds::OB_SSTABLE_CREATE_INDEX))) { + } else if (OB_FAIL(index_column_ckm_map.create(default_column_cnt, ObModIds::OB_CHECKSUM_CHECKER))) { LOG_WARN("fail to create index table column ckm map", KR(ret), K(default_column_cnt)); } // One tablet of local index table is mapping to one tablet of data table - const int64_t tablet_cnt = data_schema_tablet_ids.count(); + const int64_t tablet_cnt = data_table_tablets.count(); for (int64_t i = 0; (i < tablet_cnt) && OB_SUCC(ret); ++i) { if (OB_FAIL(data_column_ckm_map.clear())) { LOG_WARN("fail to clear hash map", KR(ret), K(default_column_cnt)); } else if (OB_FAIL(index_column_ckm_map.clear())) { LOG_WARN("fail to clear hash map", KR(ret), K(default_column_cnt)); } else { - const ObTabletID &data_tablet_id = data_schema_tablet_ids.at(i); - const ObTabletID &index_tablet_id = index_schema_tablet_ids.at(i); + const ObTabletLSPair &data_tablet_pair = data_table_tablets.at(i); + const ObTabletLSPair &index_tablet_pair = index_table_tablets.at(i); int64_t data_tablet_idx = OB_INVALID_INDEX; int64_t index_tablet_idx = OB_INVALID_INDEX; - if (OB_FAIL(find_checksum_item_by_id_(data_tablet_id, data_table_ckm_items, compaction_scn, data_tablet_idx))) { - LOG_WARN("fail to find checksum item by tablet_id", KR(ret), K(data_tablet_id), K(compaction_scn)); - } else if (OB_FAIL(find_checksum_item_by_id_(index_tablet_id, index_table_ckm_items, compaction_scn, index_tablet_idx))) { - LOG_WARN("fail to find checksum item by tablet_id", KR(ret), K(index_tablet_id), K(compaction_scn)); + if (OB_FAIL(find_checksum_item_(data_tablet_pair, data_table_ckm_items, compaction_scn, data_tablet_idx))) { + LOG_WARN("fail to find checksum item by tablet_id", KR(ret), K(data_tablet_pair), K(compaction_scn)); + } else if (OB_FAIL(find_checksum_item_(index_tablet_pair, index_table_ckm_items, compaction_scn, index_tablet_idx))) { + LOG_WARN("fail to find checksum item by tablet_id", KR(ret), K(index_tablet_pair), K(compaction_scn)); } else { // compare column checksum of index schema tablet and data schema tablet check_cnt = 0; @@ -1087,7 +1194,7 @@ int ObTabletReplicaChecksumOperator::check_local_index_column_checksum( const ObTabletReplicaChecksumItem &index_ckm_item = index_table_ckm_items.at(index_tablet_idx); ObColumnChecksumErrorInfo ckm_error_info(tenant_id, compaction_scn, false, data_table_id, index_table_id, - data_tablet_id, index_tablet_id); + data_tablet_pair.get_tablet_id(), index_tablet_pair.get_tablet_id()); if (OB_FAIL(get_column_checksum_map_(data_table_schema, compaction_scn, data_column_ckm_map, data_ckm_item))) { @@ -1099,7 +1206,6 @@ int ObTabletReplicaChecksumOperator::check_local_index_column_checksum( index_column_ckm_map, check_cnt, ckm_error_info))) { if (OB_CHECKSUM_ERROR == ret) { LOG_ERROR("data table and local index table column checksum are not equal", KR(ret), K(ckm_error_info)); - int tmp_ret = OB_SUCCESS; if (OB_TMP_FAIL(ObColumnChecksumErrorOperator::insert_column_checksum_err_info(sql_proxy, tenant_id, ckm_error_info))) { LOG_WARN("fail to insert local index column checksum error info", KR(tmp_ret), K(ckm_error_info)); @@ -1110,6 +1216,8 @@ int ObTabletReplicaChecksumOperator::check_local_index_column_checksum( } } // end loop + ret = ((OB_SUCCESS == ret) ? tmp_ret : ret); + if (data_column_ckm_map.created()) { data_column_ckm_map.destroy(); } @@ -1117,12 +1225,11 @@ int ObTabletReplicaChecksumOperator::check_local_index_column_checksum( index_column_ckm_map.destroy(); } } + LOG_INFO("finish verify local index table columns checksum", KR(ret), KR(tmp_ret), K(tenant_id), K(compaction_scn), + K(data_table_id), K(index_table_id), K(check_cnt), K(data_table_tablets.count())); } } - LOG_INFO("finish verify local index table columns checksum", KR(ret), K(tenant_id), K(data_table_id), - K(index_table_id), K(check_cnt), K(data_schema_tablet_ids.count())); - return ret; } @@ -1230,8 +1337,8 @@ int ObTabletReplicaChecksumOperator::get_column_checksum_map_( return ret; } -int ObTabletReplicaChecksumOperator::find_checksum_item_by_id_( - const ObTabletID &tablet_id, +int ObTabletReplicaChecksumOperator::find_checksum_item_( + const ObTabletLSPair &pair, ObIArray &items, const SCN &compaction_scn, int64_t &idx) @@ -1239,12 +1346,13 @@ int ObTabletReplicaChecksumOperator::find_checksum_item_by_id_( int ret = OB_SUCCESS; idx = OB_INVALID_INDEX; const int64_t item_cnt = items.count(); - if (!tablet_id.is_valid() || (item_cnt < 1)) { + if (!pair.is_valid() || (item_cnt < 1)) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), K(tablet_id), K(item_cnt)); + LOG_WARN("invalid argument", KR(ret), K(pair), K(item_cnt)); } else { for (int64_t i = 0; i < item_cnt; ++i) { - if ((items.at(i).tablet_id_ == tablet_id) + if ((items.at(i).tablet_id_ == pair.get_tablet_id()) + && (items.at(i).ls_id_ == pair.get_ls_id()) && (items.at(i).compaction_scn_ == compaction_scn)) { idx = i; break; @@ -1258,70 +1366,109 @@ int ObTabletReplicaChecksumOperator::find_checksum_item_by_id_( return ret; } -int ObTabletReplicaChecksumOperator::get_tablet_replica_checksum_items_( +int ObTabletReplicaChecksumOperator::get_tablet_ls_pairs( const uint64_t tenant_id, - ObMySQLProxy &sql_proxy, const ObTableSchema &table_schema, - ObIArray &tablet_ids, - ObIArray &items) + ObMySQLProxy &sql_proxy, + ObIArray &pairs) { int ret = OB_SUCCESS; - if (!is_valid_tenant_id(tenant_id)) { + if ((!is_valid_tenant_id(tenant_id)) || (!table_schema.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", KR(ret), K(tenant_id)); - } else if (OB_FAIL(get_table_all_tablet_id_(table_schema, tablet_ids))) { - LOG_WARN("fail to get table all tablet id", KR(ret), K(table_schema)); - } else if (tablet_ids.count() > 0) { - const uint64_t table_id = table_schema.get_table_id(); - ObArray pairs; - ObArray ls_ids; - - // sys_table's tablet->ls relation won't be written into __all_tablet_to_ls - if (is_sys_tenant(tenant_id) || is_sys_table(table_id)) { - for (int64_t i = 0; (i < tablet_ids.count()) && OB_SUCC(ret); ++i) { - ObLSID tmp_ls_id(ObLSID::SYS_LS_ID); - if (OB_FAIL(ls_ids.push_back(tmp_ls_id))) { - LOG_WARN("fail to push back ls_id", KR(ret), K(tenant_id), K(table_id)); + } else { + SMART_VAR(ObArray, tablet_ids) { + if (OB_FAIL(get_table_all_tablet_ids_(table_schema, tablet_ids))) { + LOG_WARN("fail to get table all tablet ids", KR(ret), K(table_schema)); + } else if (tablet_ids.count() > 0) { + const uint64_t table_id = table_schema.get_table_id(); + if (OB_FAIL(get_tablet_ls_pairs(tenant_id, table_id, sql_proxy, tablet_ids, pairs))) { + LOG_WARN("fail to get tablet_ls_pairs", KR(ret), K(tenant_id), K(table_id)); } } - } else if (OB_FAIL(ObTabletToLSTableOperator::batch_get_ls(sql_proxy, tenant_id, tablet_ids, ls_ids))) { - LOG_WARN("fail to batch get ls", KR(ret), K(tenant_id), K(tablet_ids)); - } - - if (OB_SUCC(ret) && (ls_ids.count() != tablet_ids.count())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("count mismatch", KR(ret), K(ls_ids.count()), K(tablet_ids.count())); - } - - if (OB_SUCC(ret)) { - SMART_VAR(ObArray, pairs) { - const int64_t ls_id_cnt = ls_ids.count(); - for (int64_t i = 0; (i < ls_id_cnt) && OB_SUCC(ret); ++i) { - ObTabletLSPair cur_pair; - const ObTabletID &cur_tablet_id = tablet_ids.at(i); - const ObLSID &cur_ls_id = ls_ids.at(i); - if (OB_FAIL(cur_pair.init(cur_tablet_id, cur_ls_id))) { - LOG_WARN("fail to init tablet_ls_pair", KR(ret), K(i), K(cur_tablet_id), K(cur_ls_id)); - } else if (OB_FAIL(pairs.push_back(cur_pair))) { - LOG_WARN("fail to push back pair", KR(ret), K(cur_pair)); - } - } - - if (OB_FAIL(ret)){ - } else if (OB_UNLIKELY(pairs.count() != ls_id_cnt)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("some unexpected err about tablet_ls_pair count", KR(ret), K(ls_id_cnt), K(pairs.count())); - } else if (OB_FAIL(ObTabletReplicaChecksumOperator::batch_get(tenant_id, pairs, sql_proxy, items))) { - LOG_WARN("fail to batch get tablet checksum item", KR(ret), K(tenant_id), - "pairs_count", pairs.count()); - } - } } } return ret; } -int ObTabletReplicaChecksumOperator::get_table_all_tablet_id_( +int ObTabletReplicaChecksumOperator::get_tablet_ls_pairs( + const uint64_t tenant_id, + const uint64_t table_id, + ObMySQLProxy &sql_proxy, + const ObIArray &tablet_ids, + ObIArray &pairs) +{ + int ret = OB_SUCCESS; + if (!is_valid_tenant_id(tenant_id) || (tablet_ids.count() < 1)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id), K(tablet_ids.count())); + } else { + SMART_VAR(ObArray, ls_ids) { + // sys_table's tablet->ls relation won't be written into __all_tablet_to_ls + if (is_sys_tenant(tenant_id) || is_sys_table(table_id)) { + for (int64_t i = 0; (i < tablet_ids.count()) && OB_SUCC(ret); ++i) { + ObLSID tmp_ls_id(ObLSID::SYS_LS_ID); + if (OB_FAIL(ls_ids.push_back(tmp_ls_id))) { + LOG_WARN("fail to push back ls_id", KR(ret), K(tenant_id), K(table_id)); + } + } + } else if (OB_FAIL(ObTabletToLSTableOperator::batch_get_ls(sql_proxy, tenant_id, tablet_ids, ls_ids))) { + LOG_WARN("fail to batch get ls", KR(ret), K(tenant_id), K(tablet_ids)); + } + + const int64_t ls_id_cnt = ls_ids.count(); + if (OB_SUCC(ret) && (ls_id_cnt != tablet_ids.count())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("count mismatch", KR(ret), K(ls_id_cnt), K(tablet_ids.count())); + } + + for (int64_t i = 0; (i < ls_id_cnt) && OB_SUCC(ret); ++i) { + ObTabletLSPair cur_pair; + const ObTabletID &cur_tablet_id = tablet_ids.at(i); + const ObLSID &cur_ls_id = ls_ids.at(i); + if (OB_FAIL(cur_pair.init(cur_tablet_id, cur_ls_id))) { + LOG_WARN("fail to init tablet_ls_pair", KR(ret), K(i), K(cur_tablet_id), K(cur_ls_id)); + } else if (OB_FAIL(pairs.push_back(cur_pair))) { + LOG_WARN("fail to push back pair", KR(ret), K(cur_pair)); + } + } + + if (OB_FAIL(ret)){ + } else if (OB_UNLIKELY(pairs.count() != ls_id_cnt)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("some unexpected err about tablet_ls_pair count", KR(ret), K(ls_id_cnt), K(pairs.count())); + } + } + } + return ret; +} + +int ObTabletReplicaChecksumOperator::get_tablet_replica_checksum_items_( + const uint64_t tenant_id, + ObMySQLProxy &sql_proxy, + const ObTableSchema &table_schema, + const SCN &compaction_scn, + ObIArray &tablet_pairs, + ObIArray &items) +{ + int ret = OB_SUCCESS; + if ((!is_valid_tenant_id(tenant_id)) || (!table_schema.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret), K(tenant_id)); + } else { + const uint64_t table_id = table_schema.get_table_id(); + if (OB_FAIL(get_tablet_ls_pairs(tenant_id, table_schema, sql_proxy, tablet_pairs))) { + LOG_WARN("fail to get tablet_ls_pairs", KR(ret), K(tenant_id), K(table_id)); + } else if (OB_FAIL(ObTabletReplicaChecksumOperator::batch_get(tenant_id, tablet_pairs, compaction_scn, + sql_proxy, items))) { + LOG_WARN("fail to batch get tablet checksum item", KR(ret), K(tenant_id), K(compaction_scn), + "pairs_count", tablet_pairs.count()); + } + } + return ret; +} + +int ObTabletReplicaChecksumOperator::get_table_all_tablet_ids_( const ObTableSchema &table_schema, ObIArray &schema_tablet_ids) { @@ -1330,19 +1477,37 @@ int ObTabletReplicaChecksumOperator::get_table_all_tablet_id_( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", KR(ret), K(table_schema)); } else { - const uint64_t table_id = table_schema.get_table_id(); - // TODO donglou, sys table can use table_schema.get_tablet_ids ? - if (is_sys_table(table_id)) { - const ObTabletID &tablet_id = table_schema.get_tablet_id(); - if (OB_UNLIKELY(!tablet_id.is_valid())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected err, invalid tablet_id", KR(ret), K(table_id), K(tablet_id)); - } else if (OB_FAIL(schema_tablet_ids.push_back(tablet_id))) { - LOG_WARN("fail to push back tablet_id", KR(ret), K(tablet_id)); - } - } else if (table_schema.has_tablet()) { + if (table_schema.has_tablet()) { if (OB_FAIL(table_schema.get_tablet_ids(schema_tablet_ids))) { - LOG_WARN("fail to get tablet_ids from table schema", KR(ret)); + LOG_WARN("fail to get tablet_ids from table schema", KR(ret), K(table_schema)); + } + } + } + return ret; +} + +int ObTabletReplicaChecksumOperator::check_table_all_tablets_ckm_status_( + const uint64_t tenant_id, + ObIArray &tablet_pairs, + bool &exist_error_status) +{ + int ret = OB_SUCCESS; + exist_error_status = false; + if (tablet_pairs.empty()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", KR(ret)); + } else { + ObArray status_arr; + if (OB_FAIL(ObTabletMetaTableCompactionOperator::get_unique_status(tenant_id, tablet_pairs, status_arr))) { + LOG_WARN("fail to get unique scn status", KR(ret), K(tenant_id), "pair_cnt", tablet_pairs.count()); + } else if (status_arr.empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("fail to get unique scn status", KR(ret), K(tenant_id), K(tablet_pairs)); + } else { + for (int64_t i = 0; (i < status_arr.count()) && (!exist_error_status); ++i) { + if (status_arr.at(i) == ObTabletReplica::ScnStatus::SCN_STATUS_ERROR) { + exist_error_status = true; + } } } } @@ -1350,18 +1515,19 @@ int ObTabletReplicaChecksumOperator::get_table_all_tablet_id_( } int ObTabletReplicaChecksumOperator::need_verify_checksum_( + const uint64_t tenant_id, const SCN &compaction_scn, bool &need_verify, - ObIArray &schema_tablet_ids, + ObIArray &tablet_pairs, ObIArray &items) { int ret = OB_SUCCESS; need_verify = false; const int64_t item_cnt = items.count(); - const int64_t schema_tablet_cnt = schema_tablet_ids.count(); - if (item_cnt <= 0) { + const int64_t tablet_cnt = tablet_pairs.count(); + if (OB_UNLIKELY(items.empty() || tablet_pairs.empty())) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", KR(ret), K(item_cnt)); + LOG_WARN("invalid argument", KR(ret), K(item_cnt), K(tablet_cnt)); } else { SCN min_compaction_scn = SCN::max_scn(); SCN max_compaction_scn = SCN::min_scn(); @@ -1375,41 +1541,23 @@ int ObTabletReplicaChecksumOperator::need_verify_checksum_( } } - if ((min_compaction_scn == compaction_scn) - && (max_compaction_scn == compaction_scn)) { - need_verify = true; - } else if ((min_compaction_scn < compaction_scn) - && (max_compaction_scn == compaction_scn)) { - hash::ObHashMap reported_tablet_ids; - if (OB_FAIL(reported_tablet_ids.create(schema_tablet_cnt, ObModIds::OB_SSTABLE_CREATE_INDEX))) { - LOG_WARN("fail to create reported tablet ids map", KR(ret), K(schema_tablet_cnt)); - } - for (int64_t i = 0; (i < item_cnt) && OB_SUCC(ret); ++i) { - if (items.at(i).compaction_scn_ == compaction_scn) { - if (OB_FAIL(reported_tablet_ids.set_refactored(items.at(i).tablet_id_.id(), true, true/*overwrite*/))) { - LOG_WARN("fail to set to hashmap", KR(ret), K(items.at(i))); - } + if (OB_SUCC(ret)) { + if ((min_compaction_scn == compaction_scn) + && (max_compaction_scn == compaction_scn)) { + // if one tablet status of this table is ERROR, it means we should not verify + bool exist_error_status = false; + if (OB_FAIL(check_table_all_tablets_ckm_status_(tenant_id, tablet_pairs, exist_error_status))) { + LOG_WARN("fail to check table all tablets checksum status", KR(ret), K(tenant_id)); + } else { + need_verify = exist_error_status ? false : true; } + } else if (max_compaction_scn > compaction_scn) { + need_verify = false; + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected tablet compaction scn", KR(ret), K(min_compaction_scn), K(max_compaction_scn), + K(compaction_scn)); } - - // when each tablet has at lease one replica which finished compaction - // with @compaction_scn, we will validate checksum - for (int64_t i = 0; (i < schema_tablet_cnt) && OB_SUCC(ret); ++i) { - bool value = false; - const ObTabletID &cur_tablet_id = schema_tablet_ids.at(i); - if (OB_FAIL(reported_tablet_ids.get_refactored(cur_tablet_id.id(), value))) { - if (OB_HASH_NOT_EXIST == ret) { - ret = OB_EAGAIN; - LOG_WARN("tablet has not reported", KR(ret), K(cur_tablet_id), K(schema_tablet_cnt)); - } - } - } - - if (OB_SUCC(ret)) { - need_verify = true; - } - } else { - LOG_INFO("snapshot version not match, no need nerify", K(min_compaction_scn), K(max_compaction_scn)); } } return ret; diff --git a/src/share/ob_tablet_replica_checksum_operator.h b/src/share/ob_tablet_replica_checksum_operator.h index 7167bb8609..133bf9968e 100644 --- a/src/share/ob_tablet_replica_checksum_operator.h +++ b/src/share/ob_tablet_replica_checksum_operator.h @@ -104,6 +104,9 @@ public: class ObTabletReplicaChecksumOperator { public: + // To get a batch of checksum_items + // We will get items whose compaction_scn >= @compaction_scn + // // This function is specifically designed for ObTabletReplicaChecksumIterator. // This function would remove the last several checksum items in some cases. // Please do not call this function in any other place, except ObTabletReplicaChecksumIterator. @@ -113,9 +116,12 @@ public: const SCN &compaction_scn, common::ObISQLClient &sql_proxy, common::ObIArray &items); + // get a batch of checksum_items, the count = @pairs.count() + // @compaction_scn means items' compaction_scn >= compaction_scn static int batch_get( const uint64_t tenant_id, const common::ObIArray &pairs, + const SCN &compaction_scn, common::ObISQLClient &sql_proxy, common::ObIArray &items); static int batch_get( @@ -138,6 +144,32 @@ public: const int64_t limit, int64_t &affected_rows); + static int get_specified_tablet_checksum( + const uint64_t tenant_id, + const int64_t ls_id, + const int64_t tablet_id, + const int64_t snapshot_version, + common::ObIArray &items); + + static int get_tablet_ls_pairs( + const uint64_t tenant_id, + const schema::ObTableSchema &table_schema, + common::ObMySQLProxy &sql_proxy, + common::ObIArray &tablet_ls_pairs); + + static int get_tablet_ls_pairs( + const uint64_t tenant_id, + const uint64_t table_id, + common::ObMySQLProxy &sql_proxy, + const common::ObIArray &tablet_ids, + common::ObIArray &tablet_ls_pairs); + + static int check_tablet_replica_checksum( + const uint64_t tenant_id, + const common::ObIArray &pairs, + const SCN &compaction_scn, + common::ObMySQLProxy &sql_proxy); + static int check_column_checksum( const uint64_t tenant_id, const schema::ObTableSchema &data_table_schema, @@ -196,6 +228,7 @@ private: static int construct_batch_get_sql_str_( const uint64_t tenant_id, + const SCN &compaction_scn, const common::ObIArray &pairs, const int64_t start_idx, const int64_t end_idx, @@ -213,6 +246,9 @@ private: common::sqlclient::ObMySQLResult &res, ObTabletReplicaChecksumItem &item); + static int innner_verify_tablet_replica_checksum( + const common::ObIArray &ckm_items); + static int check_global_index_column_checksum( const uint64_t tenant_id, const schema::ObTableSchema &data_table_schema, @@ -247,23 +283,30 @@ private: const uint64_t tenant_id, common::ObMySQLProxy &mysql_proxy, const schema::ObTableSchema &table_schema, - common::ObIArray &tablet_ids, + const SCN &compaction_scn, + common::ObIArray &tablet_pairs, common::ObIArray &items); - static int find_checksum_item_by_id_( - const common::ObTabletID &tablet_id, + static int get_table_all_tablet_ids_( + const schema::ObTableSchema &table_schema, + common::ObIArray &schema_tablet_ids); + + static int find_checksum_item_( + const ObTabletLSPair &pair, common::ObIArray &items, const SCN &compaction_scn, int64_t &idx); - static int get_table_all_tablet_id_( - const schema::ObTableSchema &table_schema, - common::ObIArray &schema_tablet_ids); + static int check_table_all_tablets_ckm_status_( + const uint64_t tenant_id, + common::ObIArray &tablet_pairs, + bool &exist_error_status); static int need_verify_checksum_( + const uint64_t tenant_id, const SCN &compaction_scn, bool &need_verify, - common::ObIArray &schema_tablet_ids, + common::ObIArray &tablet_pairs, common::ObIArray &items); static int compare_column_checksum_( @@ -284,4 +327,4 @@ private: } // share } // oceanbase -#endif // OCEANBASE_SHARE_OB_TABLET_REPLICA_CHECKSUM_OPERATOR_H_ \ No newline at end of file +#endif // OCEANBASE_SHARE_OB_TABLET_REPLICA_CHECKSUM_OPERATOR_H_ diff --git a/src/share/ob_thread_define.h b/src/share/ob_thread_define.h index 471d4ba2dd..634694acfc 100644 --- a/src/share/ob_thread_define.h +++ b/src/share/ob_thread_define.h @@ -50,10 +50,10 @@ TG_DEF(KVCacheWash, KVCacheWash, "", TG_STATIC, TIMER) TG_DEF(KVCacheRep, KVCacheRep, "", TG_STATIC, TIMER) TG_DEF(ObHeartbeat, ObHeartbeat, "", TG_STATIC, TIMER) TG_DEF(PlanCacheEvict, PlanCacheEvict, "", TG_DYNAMIC, TIMER) +TG_DEF(TabletStatRpt, TabletStatRpt, "", TG_STATIC, TIMER) TG_DEF(MergeLoop, MergeLoop, "", TG_STATIC, TIMER) TG_DEF(SSTableGC, SSTableGC, "", TG_STATIC, TIMER) -TG_DEF(MinorScan, MinorScan, "", TG_STATIC, TIMER) -TG_DEF(MajorScan, MajorScan, "", TG_STATIC, TIMER) +TG_DEF(MediumLoop, MediumLoop, "", TG_STATIC, TIMER) TG_DEF(WriteCkpt, WriteCkpt, "", TG_STATIC, TIMER) TG_DEF(EXTLogWash, EXTLogWash, "", TG_STATIC, TIMER) TG_DEF(LineCache, LineCache, "", TG_STATIC, TIMER) diff --git a/src/share/ob_zone_merge_info.cpp b/src/share/ob_zone_merge_info.cpp index 321c173097..12bd7e03d4 100644 --- a/src/share/ob_zone_merge_info.cpp +++ b/src/share/ob_zone_merge_info.cpp @@ -359,5 +359,17 @@ int64_t ObMergeProgress::get_merged_data_percentage() const return first_param_percnetage(merged_data_size_, unmerged_data_size_); } +/////////////////////////////////////////////////////////////////////////////// + +ObTableCompactionInfo &ObTableCompactionInfo::operator=(const ObTableCompactionInfo &other) +{ + table_id_ = other.table_id_; + tablet_cnt_ = other.tablet_cnt_; + status_ = other.status_; + is_valid_data_table_ = other.is_valid_data_table_; + all_index_verified_ = other.all_index_verified_; + return *this; +} + } // end namespace share } // end namespace oceanbase diff --git a/src/share/ob_zone_merge_info.h b/src/share/ob_zone_merge_info.h index e32cbe6126..5e17e5141a 100644 --- a/src/share/ob_zone_merge_info.h +++ b/src/share/ob_zone_merge_info.h @@ -181,10 +181,68 @@ public: {} ~ObMergeProgress() {} + bool is_merge_finished() const { return (0 == unmerged_tablet_cnt_); } + TO_STRING_KV(K_(tenant_id), K_(zone), K_(unmerged_tablet_cnt), K_(unmerged_data_size), K_(smallest_snapshot_scn)); }; +enum ObTabletCompactionStatus +{ + INITIAL = 0, + COMPACTED, // tablet finished compaction + CAN_SKIP_VERIFYING, // tablet finished compaction and not need to verify + STATUS_MAX +}; + +struct ObTableCompactionInfo { +public: + enum Status + { + INITIAL = 0, + COMPACTED, + CAN_SKIP_VERIFYING, // already compacted and can skip verification + VERIFIED, + TB_STATUS_MAX + }; + + ObTableCompactionInfo() + : table_id_(OB_INVALID_ID), tablet_cnt_(0), + status_(Status::INITIAL), + is_valid_data_table_(false), all_index_verified_(true) {} + ~ObTableCompactionInfo() { reset(); } + + void reset() + { + table_id_ = OB_INVALID_ID; + tablet_cnt_ = 0; + status_ = Status::INITIAL; + is_valid_data_table_ = false; + all_index_verified_ = true; + } + + ObTableCompactionInfo &operator=(const ObTableCompactionInfo &other); + + bool is_uncompacted() const { return Status::INITIAL == status_; } + void set_compacted() { status_ = Status::COMPACTED; } + bool is_compacted() const { return Status::COMPACTED == status_; } + void set_can_skip_verifying() { status_ = Status::CAN_SKIP_VERIFYING; } + bool can_skip_verifying() const { return Status::CAN_SKIP_VERIFYING == status_; } + void set_verified() { status_ = Status::VERIFIED; } + bool is_verified() const { return Status::VERIFIED == status_; } + bool finish_compaction() const { return (is_compacted() || can_skip_verifying()); } + + TO_STRING_KV(K_(table_id), K_(tablet_cnt), K_(status), K_(is_valid_data_table), K_(all_index_verified)); + + uint64_t table_id_; + int64_t tablet_cnt_; + Status status_; + // only for data table which has index table, these two members are meaningful. + // PS: above data table and index table must have tablet. + bool is_valid_data_table_; + bool all_index_verified_; // the data table's all index tables finished verification +}; + typedef common::ObArray ObAllZoneMergeProgress; typedef common::ObSEArray ObZoneArray; typedef common::ObSEArray ObZoneMergeInfoArray; diff --git a/src/share/rc/ob_tenant_base.h b/src/share/rc/ob_tenant_base.h index 172f1ccec4..01d751e78b 100644 --- a/src/share/rc/ob_tenant_base.h +++ b/src/share/rc/ob_tenant_base.h @@ -59,6 +59,7 @@ namespace storage { class ObStorageHAHandlerService; class ObLSRestoreService; class ObTenantSSTableMergeInfoMgr; + class ObTenantTabletStatMgr; namespace checkpoint { class ObCheckPointService; class ObTabletGCService; @@ -164,6 +165,7 @@ namespace detector storage::ObTenantCheckpointSlogHandler*, \ compaction::ObTenantCompactionProgressMgr*, \ compaction::ObServerCompactionEventHistory*, \ + storage::ObTenantTabletStatMgr*, \ memtable::ObLockWaitMgr*, \ logservice::ObGarbageCollector*, \ transaction::tablelock::ObTableLockService*, \ diff --git a/src/share/scheduler/ob_dag_scheduler.cpp b/src/share/scheduler/ob_dag_scheduler.cpp index b2a5c7a470..62f40592de 100644 --- a/src/share/scheduler/ob_dag_scheduler.cpp +++ b/src/share/scheduler/ob_dag_scheduler.cpp @@ -27,6 +27,7 @@ #include "ob_dag_warning_history_mgr.h" #include "storage/compaction/ob_tenant_compaction_progress.h" #include "storage/compaction/ob_tablet_merge_ctx.h" +#include "storage/compaction/ob_tablet_merge_task.h" #include "storage/compaction/ob_compaction_diagnose.h" #include #include @@ -352,7 +353,7 @@ const ObDagPrio::ObDagPrioEnum ObIDag::MergeDagPrio[] = { ObDagPrio::DAG_PRIO_COMPACTION_LOW, }; const ObDagType::ObDagTypeEnum ObIDag::MergeDagType[] = { - ObDagType::DAG_TYPE_MINOR_MERGE, + ObDagType::DAG_TYPE_MERGE_EXECUTE, ObDagType::DAG_TYPE_MAJOR_MERGE, ObDagType::DAG_TYPE_MINI_MERGE, }; @@ -2049,6 +2050,55 @@ int ObTenantDagScheduler::get_all_compaction_dag_info( return ret; } +int ObTenantDagScheduler::get_minor_exe_dag_info( + const compaction::ObTabletMergeDagParam ¶m, + ObIArray &merge_range_array) +{ + int ret = OB_SUCCESS; + compaction::ObTabletMergeExecuteDag dag; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + COMMON_LOG(WARN, "ObDagScheduler is not inited", K(ret)); + } else if (OB_FAIL(dag.init_by_param(¶m))) { + STORAGE_LOG(WARN, "failed to init dag", K(ret), K(param)); + } else { + ObThreadCondGuard guard(scheduler_sync_); + ObIDag *head = dag_list_[READY_DAG_LIST].get_head(ObDagPrio::DAG_PRIO_COMPACTION_MID); + ObIDag *cur = head->get_next(); + while (head != cur && OB_SUCC(ret)) { + if (cur->get_type() == ObDagType::DAG_TYPE_MERGE_EXECUTE) { + compaction::ObTabletMergeExecuteDag *other_dag = static_cast(cur); + if (other_dag->belong_to_same_tablet(&dag)) { + if (OB_FAIL(merge_range_array.push_back(other_dag->get_merge_range()))) { + LOG_WARN("failed to push merge range into array", K(ret), K(other_dag->get_merge_range())); + } + } + } + cur = cur->get_next(); + } // end of while + + // get meta major + ObIDag *stored_dag = nullptr; + dag.merge_type_ = META_MAJOR_MERGE; + compaction::ObTabletMergeExecuteDag *other_dag = nullptr; + if (OB_FAIL(ret)) { + } else if (OB_FAIL(dag_map_.get_refactored(&dag, stored_dag))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get from dag map", K(ret)); + } else { + ret = OB_SUCCESS; + } + } else if (OB_ISNULL(other_dag = static_cast(stored_dag))) { + ret = OB_ERR_SYS; + LOG_WARN("dag is null", K(ret)); + } else if (OB_FAIL(merge_range_array.push_back(other_dag->get_merge_range()))) { + LOG_WARN("failed to push merge range into array", K(ret), K(other_dag->get_merge_range())); + } + } + return ret; +} + int ObTenantDagScheduler::check_ls_compaction_dag_exist(const ObLSID &ls_id, bool &exist) { int ret = OB_SUCCESS; @@ -2070,6 +2120,39 @@ int ObTenantDagScheduler::check_ls_compaction_dag_exist(const ObLSID &ls_id, boo return ret; } +// get oldest minor execute dag +int ObTenantDagScheduler::diagnose_minor_exe_dag( + const compaction::ObMergeDagHash *merge_dag_info, + compaction::ObDiagnoseTabletCompProgress &progress) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + COMMON_LOG(WARN, "ObDagScheduler is not inited", K(ret)); + } else if (OB_ISNULL(merge_dag_info)) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "invalid arugment", K(ret), KP(merge_dag_info)); + } else { + ObThreadCondGuard guard(scheduler_sync_); + ObIDag *head = dag_list_[READY_DAG_LIST].get_head(ObDagPrio::DAG_PRIO_COMPACTION_MID); + ObIDag *cur = head->get_next(); + while (head != cur && OB_SUCC(ret)) { + if (cur->get_type() == ObDagType::DAG_TYPE_MERGE_EXECUTE) { + compaction::ObTabletMergeExecuteDag *exe_dag = static_cast(cur); + if (exe_dag->belong_to_same_tablet(merge_dag_info)) { + if (OB_FAIL(exe_dag->diagnose_compaction_info(progress))) { + LOG_WARN("failed to diagnose compaction dag", K(ret), K(exe_dag)); + } else { + break; + } + } + } + cur = cur->get_next(); + } // end of while + } + return ret; +} + // get max estimated_finish_time to update server_progress int ObTenantDagScheduler::get_max_major_finish_time(const int64_t version, int64_t &estimated_finish_time) { diff --git a/src/share/scheduler/ob_dag_scheduler.h b/src/share/scheduler/ob_dag_scheduler.h index 633df07f52..a4e329ebb2 100644 --- a/src/share/scheduler/ob_dag_scheduler.h +++ b/src/share/scheduler/ob_dag_scheduler.h @@ -23,6 +23,7 @@ #include "lib/profile/ob_trace_id.h" #include "share/rc/ob_tenant_base.h" #include "share/scheduler/ob_dag_scheduler_config.h" +#include "share/ob_table_range.h" namespace oceanbase { @@ -30,6 +31,8 @@ namespace compaction { struct ObTabletCompactionProgress; struct ObDiagnoseTabletCompProgress; +class ObMergeDagHash; +struct ObTabletMergeDagParam; } namespace share { @@ -498,7 +501,7 @@ struct ObDagInfo public: ObDagInfo(); ~ObDagInfo() {} - TO_STRING_KV(K_(dag_type), K_(dag_net_type), K_(dag_key), K_(dag_net_key), K_(dag_id), + TO_STRING_KV(K_(tenant_id), K_(dag_type), K_(dag_net_type), K_(dag_key), K_(dag_net_key), K_(dag_id), "dag_status", ObIDag::get_dag_status_str(dag_status_), K_(running_task_cnt), K_(add_time), K_(start_time), K_(indegree), K_(comment)); ObDagInfo & operator = (const ObDagInfo &other); @@ -795,6 +798,12 @@ public: int get_all_compaction_dag_info( ObIAllocator &allocator, ObIArray &progress_array); + int get_minor_exe_dag_info( + const compaction::ObTabletMergeDagParam ¶m, + ObIArray &merge_range_array); + int diagnose_minor_exe_dag( + const compaction::ObMergeDagHash *merge_dag_info, + compaction::ObDiagnoseTabletCompProgress &progress); int get_max_major_finish_time(const int64_t version, int64_t &estimated_finish_time); int diagnose_dag(const ObIDag *dag, compaction::ObDiagnoseTabletCompProgress &input_progress); int check_ls_compaction_dag_exist(const ObLSID &ls_id, bool &exist); @@ -1085,20 +1094,6 @@ inline void dag_yield() } } -#define REACH_TENANT_TIME_INTERVAL(i) \ - ({ \ - bool bret = false; \ - RLOCAL_STATIC(int64_t, last_time) = ::oceanbase::common::ObTimeUtility::fast_current_time(); \ - int64_t cur_time = ::oceanbase::common::ObTimeUtility::fast_current_time(); \ - int64_t old_time = last_time; \ - if (OB_UNLIKELY((i + last_time) < cur_time) \ - && old_time == ATOMIC_CAS(&last_time, old_time, cur_time)) \ - { \ - bret = true; \ - } \ - bret; \ - }) - } // namespace share } // namespace oceanbase diff --git a/src/share/scheduler/ob_dag_scheduler_config.h b/src/share/scheduler/ob_dag_scheduler_config.h index bbb90fbf78..409ffab897 100644 --- a/src/share/scheduler/ob_dag_scheduler_config.h +++ b/src/share/scheduler/ob_dag_scheduler_config.h @@ -11,7 +11,7 @@ */ #ifdef DAG_SCHEDULER_DAG_NET_TYPE_DEF -// DAG_SCHEDULER_DAG_TYPE_DEF(DAG_NET_TYPE_ENUM, DAG_NET_TYPE_STR) +// DAG_SCHEDULER_DAG_NET_TYPE_DEF(DAG_NET_TYPE_ENUM, DAG_NET_TYPE_STR) DAG_SCHEDULER_DAG_NET_TYPE_DEF(DAG_NET_TYPE_MIGARTION, "DAG_NET_MIGRATION") DAG_SCHEDULER_DAG_NET_TYPE_DEF(DAG_NET_TYPE_PREPARE_MIGARTION, "DAG_NET_PREPARE_MIGRATION") DAG_SCHEDULER_DAG_NET_TYPE_DEF(DAG_NET_TYPE_COMPLETE_MIGARTION, "DAG_NET_COMPLETE_MIGRATION") @@ -38,7 +38,7 @@ DAG_SCHEDULER_DAG_PRIO_DEF(DAG_PRIO_MAX, 0, "INVALID") #ifdef DAG_SCHEDULER_DAG_TYPE_DEF // DAG_SCHEDULER_DAG_TYPE_DEF(DAG_TYPE_ENUM, DAG_DEFAULT_PRIO, SYS_TASK_TYPE, DAG_TYPE_STR, DAG_MODULE_STR) DAG_SCHEDULER_DAG_TYPE_DEF(DAG_TYPE_MINI_MERGE, ObDagPrio::DAG_PRIO_COMPACTION_HIGH, ObSysTaskType::SSTABLE_MINI_MERGE_TASK, "MINI_MERGE", "COMPACTION") -DAG_SCHEDULER_DAG_TYPE_DEF(DAG_TYPE_MINOR_MERGE, ObDagPrio::DAG_PRIO_COMPACTION_MID, ObSysTaskType::SSTABLE_MINOR_MERGE_TASK, "MINOR_MERGE", "COMPACTION") +DAG_SCHEDULER_DAG_TYPE_DEF(DAG_TYPE_MERGE_EXECUTE, ObDagPrio::DAG_PRIO_COMPACTION_MID, ObSysTaskType::SSTABLE_MINOR_MERGE_TASK, "MINOR_EXECUTE", "COMPACTION") DAG_SCHEDULER_DAG_TYPE_DEF(DAG_TYPE_MAJOR_MERGE, ObDagPrio::DAG_PRIO_COMPACTION_LOW, ObSysTaskType::SSTABLE_MAJOR_MERGE_TASK, "MAJOR_MERGE", "COMPACTION") DAG_SCHEDULER_DAG_TYPE_DEF(DAG_TYPE_TX_TABLE_MERGE, ObDagPrio::DAG_PRIO_COMPACTION_HIGH, ObSysTaskType::SPECIAL_TABLE_MERGE_TASK, "TX_TABLE_MERGE", "COMPACTION") DAG_SCHEDULER_DAG_TYPE_DEF(DAG_TYPE_WRITE_CKPT, ObDagPrio::DAG_PRIO_COMPACTION_LOW, ObSysTaskType::WRITE_CKPT_TASK, "WRITE_CKPT", "COMPACTION") diff --git a/src/share/schema/ob_table_schema.h b/src/share/schema/ob_table_schema.h index 597ab60c85..8ef09cc0ef 100644 --- a/src/share/schema/ob_table_schema.h +++ b/src/share/schema/ob_table_schema.h @@ -995,7 +995,7 @@ public: inline int64_t get_rowkey_split_pos() const { return rowkey_split_pos_; } inline int64_t get_block_size() const { return block_size_;} virtual inline bool is_use_bloomfilter() const override { return is_use_bloomfilter_; } - inline int64_t get_progressive_merge_num() const override { return progressive_merge_num_; } + virtual inline int64_t get_progressive_merge_num() const override { return progressive_merge_num_; } virtual inline int64_t get_progressive_merge_round() const override { return progressive_merge_round_; } inline uint64_t get_autoinc_column_id() const { return autoinc_column_id_; } inline uint64_t get_auto_increment() const { return auto_increment_; } diff --git a/src/share/tablet/ob_tablet_info.cpp b/src/share/tablet/ob_tablet_info.cpp index c733e70a66..5f3051e486 100644 --- a/src/share/tablet/ob_tablet_info.cpp +++ b/src/share/tablet/ob_tablet_info.cpp @@ -28,7 +28,9 @@ ObTabletReplica::ObTabletReplica() server_(), snapshot_version_(0), data_size_(0), - required_size_(0) + required_size_(0), + report_scn_(0), + status_(SCN_STATUS_MAX) { } @@ -46,6 +48,8 @@ void ObTabletReplica::reset() snapshot_version_ = 0; data_size_ = 0; required_size_ = 0; + report_scn_ = 0; + status_ = SCN_STATUS_MAX; } int ObTabletReplica::assign(const ObTabletReplica &other) @@ -59,6 +63,8 @@ int ObTabletReplica::assign(const ObTabletReplica &other) snapshot_version_ = other.snapshot_version_; data_size_ = other.data_size_; required_size_ = other.required_size_; + report_scn_ = other.report_scn_; + status_ = other.status_; } return ret; } @@ -70,7 +76,9 @@ int ObTabletReplica::init( const common::ObAddr &server, const int64_t snapshot_version, const int64_t data_size, - const int64_t required_size) + const int64_t required_size, + const int64_t report_scn, + const ScnStatus status) { int ret = OB_SUCCESS; if (OB_UNLIKELY( @@ -79,10 +87,12 @@ int ObTabletReplica::init( || !server.is_valid() || snapshot_version < 0 || data_size < 0 - || required_size < 0)) { + || required_size < 0 + || report_scn < 0 + || !is_status_valid(status))) { ret = OB_INVALID_ARGUMENT; - LOG_WARN("init with invalid arguments", KR(ret), - K(tenant_id), K(tablet_id), K(ls_id), K(server), K(snapshot_version), K(data_size), K(required_size)); + LOG_WARN("init with invalid arguments", KR(ret), K(tenant_id), K(tablet_id), K(ls_id), + K(server), K(snapshot_version), K(data_size), K(required_size), K(report_scn), K(status)); } else { tenant_id_ = tenant_id; tablet_id_ = tablet_id; @@ -91,6 +101,8 @@ int ObTabletReplica::init( snapshot_version_ = snapshot_version; data_size_ = data_size; required_size_ = required_size; + report_scn_ = report_scn; + status_ = status; } return ret; } diff --git a/src/share/tablet/ob_tablet_info.h b/src/share/tablet/ob_tablet_info.h index a6312f1326..ac13eccb8d 100644 --- a/src/share/tablet/ob_tablet_info.h +++ b/src/share/tablet/ob_tablet_info.h @@ -26,6 +26,13 @@ class ObTabletReplicaFilter; class ObTabletReplica { public: + enum ScnStatus + { + SCN_STATUS_IDLE = 0, + SCN_STATUS_ERROR, + SCN_STATUS_MAX + }; + ObTabletReplica(); virtual ~ObTabletReplica(); void reset(); @@ -36,7 +43,9 @@ public: && server_.is_valid() && snapshot_version_ >= 0 && data_size_ >= 0 - && required_size_ >= 0; + && required_size_ >= 0 + && report_scn_ >= 0 + && is_status_valid(status_); } inline bool primary_keys_are_valid() const { @@ -52,6 +61,8 @@ public: inline int64_t get_snapshot_version() const { return snapshot_version_; } inline int64_t get_data_size() const { return data_size_; } inline int64_t get_required_size() const { return required_size_; } + inline int64_t get_report_scn() const { return report_scn_; } + inline ScnStatus get_status() const { return status_; } int init( const uint64_t tenant_id, const common::ObTabletID &tablet_id, @@ -59,8 +70,14 @@ public: const common::ObAddr &server, const int64_t snapshot_version, const int64_t data_size, - const int64_t required_size); + const int64_t required_size, + const int64_t report_scn, + const ScnStatus status); bool is_equal_for_report(const ObTabletReplica &other) const; + static bool is_status_valid(const ScnStatus status) + { + return status >= SCN_STATUS_IDLE && status < SCN_STATUS_MAX; + } TO_STRING_KV( K_(tenant_id), K_(tablet_id), @@ -68,7 +85,9 @@ public: K_(server), K_(snapshot_version), K_(data_size), - K_(required_size)); + K_(required_size), + K_(report_scn), + K_(status)); private: uint64_t tenant_id_; common::ObTabletID tablet_id_; @@ -77,6 +96,9 @@ private: int64_t snapshot_version_; int64_t data_size_; // load balancing releated int64_t required_size_; // load balancing releated + // below: tablet level member for compaction + int64_t report_scn_; + ScnStatus status_; }; class ObTabletInfo diff --git a/src/share/tablet/ob_tablet_table_operator.cpp b/src/share/tablet/ob_tablet_table_operator.cpp index 79d80767b2..9262fe6adf 100644 --- a/src/share/tablet/ob_tablet_table_operator.cpp +++ b/src/share/tablet/ob_tablet_table_operator.cpp @@ -22,6 +22,7 @@ #include "lib/mysqlclient/ob_mysql_proxy.h" // ObMySqlProxy #include "share/ob_ls_id.h" // ObLSID #include "observer/omt/ob_tenant_timezone_mgr.h" // for OTTZ_MGR.get_tenant_tz +#include "observer/ob_server_struct.h" // GCTX namespace oceanbase { @@ -90,6 +91,7 @@ int ObTabletTableOperator::get( return ret; } +// will fill empty tablet_info when tablet not exist int ObTabletTableOperator::get( const uint64_t tenant_id, const common::ObTabletID &tablet_id, @@ -118,6 +120,33 @@ int ObTabletTableOperator::get( return ret; } +// this func used for tablet leader to check merge finish, will not fill empty tablet info when tablet not exist +int ObTabletTableOperator::get_tablet_info( + common::ObISQLClient *sql_proxy, + const uint64_t tenant_id, + const common::ObTabletID &tablet_id, + const ObLSID &ls_id, + ObTabletInfo &tablet_info) +{ + int ret = OB_SUCCESS; + ObSEArray tablet_ls_pairs; + ObSEArray tablet_infos; + if (OB_ISNULL(sql_proxy)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(sql_proxy)); + } else if (OB_FAIL(tablet_ls_pairs.push_back(ObTabletLSPair(tablet_id, ls_id)))) { + LOG_WARN("fail to push back tablet ls pair", KR(ret), K(tablet_id), K(ls_id)); + } else if (OB_FAIL(inner_batch_get_by_sql_(*sql_proxy, tenant_id, tablet_ls_pairs, 0/*start_idx*/, 1/*end_idx*/, tablet_infos))) { + LOG_WARN("fail to get tablet info", KR(ret), K(tenant_id), K(tablet_ls_pairs)); + } else if (1 != tablet_infos.count()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet_infos count should be one", KR(ret), "count", tablet_infos.count()); + } else if (OB_FAIL(tablet_info.assign(tablet_infos.at(0)))) { + LOG_WARN("fail to assign tablet info", KR(ret), K(tablet_infos)); + } + return ret; +} + int ObTabletTableOperator::batch_get( const uint64_t tenant_id, const ObIArray &tablet_ls_pairs, @@ -165,6 +194,7 @@ int ObTabletTableOperator::batch_get( int64_t end_idx = min(MAX_BATCH_COUNT, pairs_cnt); while (OB_SUCC(ret) && (start_idx < end_idx)) { if (OB_FAIL(inner_batch_get_by_sql_( + *sql_proxy_, tenant_id, tablet_ls_pairs, start_idx, @@ -212,6 +242,7 @@ int ObTabletTableOperator::batch_get( } int ObTabletTableOperator::inner_batch_get_by_sql_( + ObISQLClient &sql_client, const uint64_t tenant_id, const ObIArray &tablet_ls_pairs, const int64_t start_idx, @@ -219,10 +250,7 @@ int ObTabletTableOperator::inner_batch_get_by_sql_( ObIArray &tablet_infos) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!inited_) || OB_ISNULL(sql_proxy_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", KR(ret)); - } else if (OB_UNLIKELY(tablet_ls_pairs.empty() + if (OB_UNLIKELY(tablet_ls_pairs.empty() || OB_INVALID_TENANT_ID == tenant_id || start_idx < 0 || start_idx >= end_idx @@ -256,7 +284,7 @@ int ObTabletTableOperator::inner_batch_get_by_sql_( } if (FAILEDx(sql.append_fmt(") ORDER BY tenant_id, tablet_id, ls_id, svr_ip, svr_port"))) { LOG_WARN("assign sql string failed", KR(ret)); - } else if (OB_FAIL(sql_proxy_->read(result, sql_tenant_id, sql.ptr()))) { + } else if (OB_FAIL(sql_client.read(result, sql_tenant_id, sql.ptr()))) { LOG_WARN("execute sql failed", KR(ret), K(tenant_id), K(sql_tenant_id), "sql", sql.ptr()); } else if (OB_ISNULL(result.get_result())) { @@ -275,49 +303,44 @@ int ObTabletTableOperator::construct_tablet_infos_( ObIArray &tablet_infos) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", KR(ret)); - } else { - ObTabletInfo tablet_info; - ObTabletReplica replica; - while (OB_SUCC(ret)) { - if (OB_FAIL(res.next())) { - if (OB_ITER_END == ret) { - ret = OB_SUCCESS; - } else { - LOG_WARN("get next result failed", KR(ret)); - } - break; + ObTabletInfo tablet_info; + ObTabletReplica replica; + while (OB_SUCC(ret)) { + if (OB_FAIL(res.next())) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; } else { - replica.reset(); - if (OB_FAIL(construct_tablet_replica_(res, replica))) { - LOG_WARN("fail to construct tablet replica", KR(ret)); - } else if (OB_UNLIKELY(!replica.is_valid())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("construct invalid replica", KR(ret), K(replica)); - } else if (tablet_info.is_self_replica(replica)) { - if (OB_FAIL(tablet_info.add_replica(replica))) { - LOG_WARN("fail to add replica", KR(ret), K(replica)); - } - } else { - if (tablet_info.is_valid()) { - if (OB_FAIL(tablet_infos.push_back(tablet_info))) { - LOG_WARN("fail to push back", KR(ret), K(tablet_info)); - } - } - tablet_info.reset(); - if (FAILEDx(tablet_info.init_by_replica(replica))) { - LOG_WARN("fail to init tablet_info by replica", KR(ret), K(replica)); + LOG_WARN("get next result failed", KR(ret)); + } + break; + } else { + replica.reset(); + if (OB_FAIL(construct_tablet_replica_(res, replica))) { + LOG_WARN("fail to construct tablet replica", KR(ret)); + } else if (OB_UNLIKELY(!replica.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("construct invalid replica", KR(ret), K(replica)); + } else if (tablet_info.is_self_replica(replica)) { + if (OB_FAIL(tablet_info.add_replica(replica))) { + LOG_WARN("fail to add replica", KR(ret), K(replica)); + } + } else { + if (tablet_info.is_valid()) { + if (OB_FAIL(tablet_infos.push_back(tablet_info))) { + LOG_WARN("fail to push back", KR(ret), K(tablet_info)); } } + tablet_info.reset(); + if (FAILEDx(tablet_info.init_by_replica(replica))) { + LOG_WARN("fail to init tablet_info by replica", KR(ret), K(replica)); + } } - } // end while - if (OB_SUCC(ret) && tablet_info.is_valid()) { - // last tablet info - if (OB_FAIL(tablet_infos.push_back(tablet_info))) { - LOG_WARN("fail to push back", KR(ret), K(tablet_info)); - } + } + } // end while + if (OB_SUCC(ret) && tablet_info.is_valid()) { + // last tablet info + if (OB_FAIL(tablet_infos.push_back(tablet_info))) { + LOG_WARN("fail to push back", KR(ret), K(tablet_info)); } } return ret; @@ -328,48 +351,58 @@ int ObTabletTableOperator::construct_tablet_replica_( ObTabletReplica &replica) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!inited_)) { - ret = OB_NOT_INIT; - LOG_WARN("not init", KR(ret)); - } else { - int64_t tenant_id = OB_INVALID_TENANT_ID; - int64_t tablet_id = ObTabletID::INVALID_TABLET_ID; - common::ObAddr server; - ObString ip; - int64_t port = OB_INVALID_INDEX; - int64_t ls_id = OB_INVALID_ID; - uint64_t uint_compaction_scn = 0; - int64_t compaction_scn = 0; - int64_t data_size = 0; - int64_t required_size = 0; + int64_t tenant_id = OB_INVALID_TENANT_ID; + int64_t tablet_id = ObTabletID::INVALID_TABLET_ID; + common::ObAddr server; + ObString ip; + int64_t port = OB_INVALID_INDEX; + int64_t ls_id = OB_INVALID_ID; + uint64_t uint_compaction_scn = 0; + int64_t compaction_scn = 0; + int64_t data_size = 0; + int64_t required_size = 0; + uint64_t uint_report_scn = 0; + int64_t status_in_table = 0; + ObTabletReplica::ScnStatus status = ObTabletReplica::SCN_STATUS_IDLE; + bool skip_null_error = false; + bool skip_column_error = true; - (void)GET_COL_IGNORE_NULL(res.get_int, "tenant_id", tenant_id); - (void)GET_COL_IGNORE_NULL(res.get_int, "tablet_id", tablet_id); - (void)GET_COL_IGNORE_NULL(res.get_int, "ls_id", ls_id); - (void)GET_COL_IGNORE_NULL(res.get_varchar, "svr_ip", ip); - (void)GET_COL_IGNORE_NULL(res.get_int, "svr_port", port); - (void)GET_COL_IGNORE_NULL(res.get_uint, "compaction_scn", uint_compaction_scn); - (void)GET_COL_IGNORE_NULL(res.get_int, "data_size", data_size); - (void)GET_COL_IGNORE_NULL(res.get_int, "required_size", required_size); + (void) GET_COL_IGNORE_NULL(res.get_int, "tenant_id", tenant_id); + (void) GET_COL_IGNORE_NULL(res.get_int, "tablet_id", tablet_id); + (void) GET_COL_IGNORE_NULL(res.get_int, "ls_id", ls_id); + (void) GET_COL_IGNORE_NULL(res.get_varchar, "svr_ip", ip); + (void) GET_COL_IGNORE_NULL(res.get_int, "svr_port", port); + (void) GET_COL_IGNORE_NULL(res.get_uint, "compaction_scn", uint_compaction_scn); + (void) GET_COL_IGNORE_NULL(res.get_int, "data_size", data_size); + (void) GET_COL_IGNORE_NULL(res.get_int, "required_size", required_size); - compaction_scn = static_cast(uint_compaction_scn); - if (OB_FAIL(ret)) { - } else if (OB_UNLIKELY(!server.set_ip_addr(ip, static_cast(port)))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid server address", KR(ret), K(ip), K(port)); - } else if (OB_FAIL(replica.init( - tenant_id, - ObTabletID(tablet_id), - share::ObLSID(ls_id), - server, - compaction_scn, - data_size, - required_size))) { - LOG_WARN("fail to init replica", KR(ret), - K(tenant_id), K(tablet_id), K(server), K(ls_id), K(data_size), K(required_size)); - } - LOG_TRACE("construct tablet replica", KR(ret), K(replica)); + EXTRACT_UINT_FIELD_MYSQL_WITH_DEFAULT_VALUE(res, "report_scn", uint_report_scn, uint64_t, skip_null_error, skip_column_error, 0); + EXTRACT_INT_FIELD_MYSQL_WITH_DEFAULT_VALUE(res, "status", status_in_table, int64_t, skip_null_error, skip_column_error, ObTabletReplica::SCN_STATUS_IDLE); + + status = (ObTabletReplica::ScnStatus)status_in_table; + compaction_scn = static_cast(uint_compaction_scn); + if (OB_FAIL(ret)) { + } else if (OB_UNLIKELY(!server.set_ip_addr(ip, static_cast(port)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid server address", KR(ret), K(ip), K(port)); + } else if (OB_UNLIKELY(!ObTabletReplica::is_status_valid(status))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid status", K(ret), K(status_in_table)); + } else if (OB_FAIL( + replica.init( + tenant_id, + ObTabletID(tablet_id), + share::ObLSID(ls_id), + server, + compaction_scn, + data_size, + required_size, + (int64_t)uint_report_scn, + status))) { + LOG_WARN("fail to init replica", KR(ret), + K(tenant_id), K(tablet_id), K(server), K(ls_id), K(data_size), K(required_size)); } + LOG_TRACE("construct tablet replica", KR(ret), K(replica)); return ret; } diff --git a/src/share/tablet/ob_tablet_table_operator.h b/src/share/tablet/ob_tablet_table_operator.h index a7cc3e0933..7aaa265ba5 100644 --- a/src/share/tablet/ob_tablet_table_operator.h +++ b/src/share/tablet/ob_tablet_table_operator.h @@ -135,8 +135,16 @@ public: const ObAddr &server, const int64_t limit, int64_t &affected_rows); +public: + static int get_tablet_info( + common::ObISQLClient *sql_proxy, + const uint64_t tenant_id, + const common::ObTabletID &tablet_id, + const ObLSID &ls_id, + ObTabletInfo &tablet_info); private: - int inner_batch_get_by_sql_( + static int inner_batch_get_by_sql_( + ObISQLClient &sql_client, const uint64_t tenant_id, const ObIArray &tablet_ls_pairs, const int64_t start_idx, @@ -148,10 +156,10 @@ private: const int64_t start_idx, const int64_t end_idx, common::ObISQLClient &sql_client); - int construct_tablet_infos_( + static int construct_tablet_infos_( common::sqlclient::ObMySQLResult &res, ObIArray &tablet_infos); - int construct_tablet_replica_( + static int construct_tablet_replica_( common::sqlclient::ObMySQLResult &res, ObTabletReplica &replica); int fill_dml_splicer_( diff --git a/src/storage/CMakeLists.txt b/src/storage/CMakeLists.txt index 9cad63597a..3bdd2a4a0c 100644 --- a/src/storage/CMakeLists.txt +++ b/src/storage/CMakeLists.txt @@ -318,6 +318,8 @@ ob_set_subtarget(ob_storage ls ls/ob_ls_tablet_service.cpp ls/ob_ls_tx_service.cpp ls/ob_ls_saved_info.cpp + ls/ob_ls_reserved_snapshot_mgr.cpp + ls/ob_ls_storage_clog_handler.cpp ) ob_set_subtarget(ob_storage access @@ -394,6 +396,7 @@ ob_set_subtarget(ob_storage common ob_storage_rpc.ipp ob_storage_schema.cpp ob_storage_schema_recorder.cpp + ob_storage_clog_recorder.cpp ob_storage_struct.cpp ob_storage_table_guard.cpp ob_storage_util.cpp @@ -401,6 +404,7 @@ ob_set_subtarget(ob_storage common ob_sync_tablet_seq_clog.cpp ob_table_store_stat_mgr.cpp ob_value_row_iterator.cpp + ob_tenant_tablet_stat_mgr.cpp ) ob_set_subtarget(ob_storage common_mixed @@ -433,12 +437,14 @@ ob_set_subtarget(ob_storage compaction compaction/ob_tenant_freeze_info_mgr.cpp compaction/ob_tenant_tablet_scheduler.cpp compaction/ob_schedule_dag_func.cpp + compaction/ob_medium_compaction_func.cpp compaction/ob_medium_compaction_mgr.cpp compaction/ob_compaction_diagnose.cpp compaction/ob_compaction_suggestion.cpp compaction/ob_sstable_merge_info_mgr.cpp compaction/ob_tenant_compaction_progress.cpp compaction/ob_server_compaction_event_history.cpp + compaction/ob_compaction_util.cpp compaction/ob_partition_rows_merger.cpp ) diff --git a/src/storage/access/ob_multiple_merge.cpp b/src/storage/access/ob_multiple_merge.cpp index ea2939c0b4..60390f34e8 100644 --- a/src/storage/access/ob_multiple_merge.cpp +++ b/src/storage/access/ob_multiple_merge.cpp @@ -25,6 +25,7 @@ #include "sql/engine/ob_operator.h" #include "storage/tx_storage/ob_ls_service.h" #include "storage/tx_storage/ob_ls_handle.h" +#include "storage/ob_tenant_tablet_stat_mgr.h" namespace oceanbase { @@ -101,7 +102,7 @@ int ObMultipleMerge::init( || OB_UNLIKELY(!context.is_valid()) || OB_UNLIKELY(!get_table_param.is_valid())) { ret = OB_INVALID_ARGUMENT; - STORAGE_LOG(WARN, "Invalid argument, ", K(ret), K(param), K(context), K(get_table_param)); + STORAGE_LOG(WARN, "Invalid argument", K(ret), K(param), K(context), K(get_table_param)); } else if (OB_FAIL(cur_row_.init(*context.stmt_allocator_, param.get_max_out_col_cnt()))) { STORAGE_LOG(WARN, "Failed to init datum row", K(ret)); } else if (OB_FAIL(unprojected_row_.init(*context.stmt_allocator_, param.get_out_col_cnt()))) { @@ -360,7 +361,7 @@ int ObMultipleMerge::get_next_row(ObDatumRow *&row) } if (OB_ITER_END == ret) { - update_and_report_scan_stat(); + update_and_report_tablet_stat(); scan_state_ = ScanState::NONE; } if (OB_SUCC(ret)) { @@ -512,7 +513,7 @@ int ObMultipleMerge::get_next_normal_rows(int64_t &count, int64_t capacity) } } if (OB_ITER_END == ret) { - update_and_report_scan_stat(); + update_and_report_tablet_stat(); scan_state_ = ScanState::NONE; } LOG_TRACE("[Vectorized] get next rows", K(ret), K(count), K(capacity), KPC(block_row_store_)); @@ -630,12 +631,33 @@ int ObMultipleMerge::get_next_aggregate_row(ObDatumRow *&row) } } if (OB_ITER_END == ret) { - update_and_report_scan_stat(); + update_and_report_tablet_stat(); scan_state_ = ScanState::NONE; } return ret; } +void ObMultipleMerge::report_tablet_stat() +{ + if (0 == access_ctx_->table_store_stat_.physical_read_cnt_ && + 0 == access_ctx_->table_store_stat_.micro_access_cnt_) { + // empty query, ignore it + } else { + int tmp_ret = OB_SUCCESS; + storage::ObTabletStat tablet_stat; + tablet_stat.ls_id_ = access_ctx_->table_store_stat_.ls_id_.id(); + tablet_stat.tablet_id_ = access_ctx_->table_store_stat_.tablet_id_.id(); + tablet_stat.query_cnt_ = 1; + tablet_stat.scan_logical_row_cnt_ = access_ctx_->table_store_stat_.logical_read_cnt_; + tablet_stat.scan_physical_row_cnt_ = access_ctx_->table_store_stat_.physical_read_cnt_; + tablet_stat.scan_micro_block_cnt_ = access_ctx_->table_store_stat_.micro_access_cnt_; + tablet_stat.pushdown_micro_block_cnt_ = access_ctx_->table_store_stat_.pushdown_micro_access_cnt_; + if (OB_TMP_FAIL(MTL(storage::ObTenantTabletStatMgr *)->report_stat(tablet_stat))) { + STORAGE_LOG(WARN, "failed to report tablet stat", K(tmp_ret), K(tablet_stat)); + } + } +} + int ObMultipleMerge::process_fuse_row(const bool not_using_static_engine, ObDatumRow &in_row, ObDatumRow *&out_row) diff --git a/src/storage/access/ob_multiple_merge.h b/src/storage/access/ob_multiple_merge.h index b3a9a5b096..05654f8216 100644 --- a/src/storage/access/ob_multiple_merge.h +++ b/src/storage/access/ob_multiple_merge.h @@ -99,7 +99,6 @@ private: OB_INLINE int check_need_refresh_table(bool &need_refresh); int save_curr_rowkey(); int reset_tables(); - OB_INLINE int report_table_store_stat(); int check_filtered(const blocksstable::ObDatumRow &row, bool &filtered); int alloc_row_store(ObTableAccessContext &context, const ObTableAccessParam ¶m); int alloc_iter_pool(common::ObIAllocator &allocator); @@ -107,11 +106,13 @@ private: blocksstable::ObDatumRow &in_row, blocksstable::ObDatumRow *&out_row); int fill_group_idx_if_need(blocksstable::ObDatumRow &row); - OB_INLINE int update_and_report_scan_stat(); int init_lob_reader(const ObTableIterParam &iter_param, ObTableAccessContext &access_ctx); int read_lob_columns(blocksstable::ObDatumRow &row); bool need_read_lob_columns(const blocksstable::ObDatumRow &row); + void report_tablet_stat(); + OB_INLINE int update_and_report_tablet_stat(); + protected: common::ObArenaAllocator padding_allocator_; MergeIterators iters_; @@ -166,23 +167,9 @@ OB_INLINE int ObMultipleMerge::check_need_refresh_table(bool &need_refresh) return ret; } -OB_INLINE int ObMultipleMerge::report_table_store_stat() +OB_INLINE int ObMultipleMerge::update_and_report_tablet_stat() { int ret = OB_SUCCESS; - if (lib::is_diagnose_info_enabled()) - { - collect_merge_stat(access_ctx_->table_store_stat_); - //report access cnt and output cnt to the main table, ignore ret - if(OB_FAIL(ObTableStoreStatMgr::get_instance().report_stat(access_ctx_->table_store_stat_))) { - STORAGE_LOG(WARN, "report tablestat to main table fail,", K(ret)); - } - } - access_ctx_->table_store_stat_.reuse(); - return ret; -} - -OB_INLINE int ObMultipleMerge::update_and_report_scan_stat() -{ EVENT_ADD(ObStatEventIds::STORAGE_READ_ROW_COUNT, scan_cnt_); access_ctx_->table_store_stat_.access_row_cnt_ += row_stat_.filt_del_count_; if (NULL != access_ctx_->table_scan_stat_) { @@ -198,7 +185,11 @@ OB_INLINE int ObMultipleMerge::update_and_report_scan_stat() access_ctx_->table_scan_stat_->row_cache_hit_cnt_ += access_ctx_->table_store_stat_.row_cache_hit_cnt_; access_ctx_->table_scan_stat_->row_cache_miss_cnt_ += access_ctx_->table_store_stat_.row_cache_miss_cnt_; } - return report_table_store_stat(); + if (lib::is_diagnose_info_enabled()) { + collect_merge_stat(access_ctx_->table_store_stat_); + } + report_tablet_stat(); + return ret; } } diff --git a/src/storage/access/ob_store_row_iterator.cpp b/src/storage/access/ob_store_row_iterator.cpp index b8a913113c..14e591147d 100644 --- a/src/storage/access/ob_store_row_iterator.cpp +++ b/src/storage/access/ob_store_row_iterator.cpp @@ -38,7 +38,7 @@ void ObStoreRowIterator::reset() int ObStoreRowIterator::init( const ObTableIterParam &iter_param, - ObTableAccessContext &access_ctx, + storage::ObTableAccessContext &access_ctx, ObITable *table, const void *query_range) { @@ -61,17 +61,6 @@ int ObStoreRowIterator::get_next_row(const ObDatumRow *&store_row) return ret; } -int ObStoreRowIterator::report_stat(ObTableStoreStat &stat) -{ - int ret = OB_SUCCESS; - if (OB_FAIL(ObTableStoreStatMgr::get_instance().report_stat(stat))) { - STORAGE_LOG(WARN, "report stat fail", K(ret), K(stat)); - } else { - stat.reuse(); - } - return ret; -} - TableTypedIters::TableTypedIters(const std::type_info& info, common::ObIAllocator &alloc) : type_info_(&info), diff --git a/src/storage/access/ob_store_row_iterator.h b/src/storage/access/ob_store_row_iterator.h index aa18c2e986..7db05a4419 100644 --- a/src/storage/access/ob_store_row_iterator.h +++ b/src/storage/access/ob_store_row_iterator.h @@ -99,7 +99,7 @@ public: UNUSED(read_handle); return OB_NOT_IMPLEMENT; } - int report_stat(ObTableStoreStat &stat); + VIRTUAL_TO_STRING_KV(K_(type), K_(is_sstable_iter), KP_(block_row_store)); protected: diff --git a/src/storage/blocksstable/ob_bloom_filter_data_writer.cpp b/src/storage/blocksstable/ob_bloom_filter_data_writer.cpp index bd35f812df..e9d15f98b4 100644 --- a/src/storage/blocksstable/ob_bloom_filter_data_writer.cpp +++ b/src/storage/blocksstable/ob_bloom_filter_data_writer.cpp @@ -171,7 +171,7 @@ int ObBloomFilterMacroBlockWriter::init(const ObDataStoreDesc &desc) ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "Invalid argument to init ObBloomFilterMacroBlockWriter", K(desc), K(ret)); - } else if (OB_UNLIKELY(is_major_merge(desc.merge_type_))) { + } else if (OB_UNLIKELY(is_major_merge_type(desc.merge_type_))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Major freeze would not build bloomfilter macro data", K(ret)); } else if (OB_FAIL(data_buffer_.ensure_space(desc.macro_block_size_))) { diff --git a/src/storage/blocksstable/ob_index_block_builder.cpp b/src/storage/blocksstable/ob_index_block_builder.cpp index 24a0a2d1c9..6a2fca2118 100644 --- a/src/storage/blocksstable/ob_index_block_builder.cpp +++ b/src/storage/blocksstable/ob_index_block_builder.cpp @@ -274,8 +274,8 @@ int ObSSTableMergeRes::fill_column_checksum_for_empty_major( } int ObSSTableMergeRes::fill_column_default_checksum_from_schema( - const ObMergeSchema *schema, - common::ObIArray &column_default_checksum) + const ObStorageSchema *schema, + ObIArray &column_default_checksum) { int ret = OB_SUCCESS; common::ObArray meta_array; @@ -295,7 +295,7 @@ int ObSSTableMergeRes::fill_column_default_checksum_from_schema( } int ObSSTableMergeRes::fill_column_checksum( - const ObMergeSchema *schema, + const ObStorageSchema *schema, ObIArray &column_checksums) const { int ret = OB_SUCCESS; diff --git a/src/storage/blocksstable/ob_index_block_builder.h b/src/storage/blocksstable/ob_index_block_builder.h index a74f3c483a..6afbe8e5f5 100644 --- a/src/storage/blocksstable/ob_index_block_builder.h +++ b/src/storage/blocksstable/ob_index_block_builder.h @@ -174,14 +174,14 @@ public: void reset(); int assign(const ObSSTableMergeRes &src); int fill_column_checksum( - const ObMergeSchema *schema, + const ObStorageSchema *schema, common::ObIArray &column_checksums) const; int fill_column_checksum( const common::ObIArray &column_default_checksum, common::ObIArray &column_checksums) const; int prepare_column_checksum_array(const int64_t data_column_cnt); static int fill_column_default_checksum_from_schema( - const ObMergeSchema *schema, + const ObStorageSchema *schema, common::ObIArray &column_default_checksum); static int fill_column_checksum_for_empty_major( const int64_t column_count, diff --git a/src/storage/blocksstable/ob_index_block_tree_cursor.cpp b/src/storage/blocksstable/ob_index_block_tree_cursor.cpp index f21ab9d920..f5512848da 100644 --- a/src/storage/blocksstable/ob_index_block_tree_cursor.cpp +++ b/src/storage/blocksstable/ob_index_block_tree_cursor.cpp @@ -367,6 +367,7 @@ int ObIndexBlockTreeCursor::drill_down( while (OB_SUCC(ret) && 0 == cmp_ret) { if (OB_FAIL(get_current_endkey(tmp_endkey, compare_schema_rowkey))) { LOG_WARN("Fail to get current endkey", K(ret)); + } else if (FALSE_IT(tmp_endkey.datum_cnt_ = rowkey.datum_cnt_)) { } else if (OB_FAIL(tmp_endkey.compare( rowkey, read_info_->get_datum_utils(), diff --git a/src/storage/blocksstable/ob_macro_block.cpp b/src/storage/blocksstable/ob_macro_block.cpp index 2d8ad88fe3..41e28e0ede 100644 --- a/src/storage/blocksstable/ob_macro_block.cpp +++ b/src/storage/blocksstable/ob_macro_block.cpp @@ -126,7 +126,7 @@ int ObDataStoreDesc::cal_row_store_type(const share::schema::ObMergeSchema &merg { int ret = OB_SUCCESS; - if (!storage::is_major_merge(merge_type) && !storage::is_buf_minor_merge(merge_type)) { // not major or buf minor + if (!storage::is_major_merge_type(merge_type) && !storage::is_meta_major_merge(merge_type)) { // not major or meta merge row_store_type_ = FLAT_ROW_STORE; } else { row_store_type_ = merge_schema.get_row_store_type(); @@ -144,7 +144,7 @@ int ObDataStoreDesc::cal_row_store_type(const share::schema::ObMergeSchema &merg int ObDataStoreDesc::set_major_working_cluster_version() { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_major_merge() || snapshot_version_ <= 0)) { + if (OB_UNLIKELY((!is_major_merge() && !is_meta_major_merge()) || snapshot_version_ <= 0)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected data store to get major working cluster version", K(ret), K_(merge_type), K_(snapshot_version)); @@ -187,7 +187,8 @@ int ObDataStoreDesc::init( } else { reset(); const int64_t pct_free = merge_schema.get_pctfree(); - const bool is_major = storage::is_major_merge(merge_type); + const bool is_major = (storage::is_major_merge_type(merge_type) || storage::is_meta_major_merge(merge_type)); + micro_block_size_ = merge_schema.get_block_size(); macro_block_size_ = OB_SERVER_BLOCK_MGR.get_macro_block_size(); if (pct_free >= 0 && pct_free <= 50) { macro_store_size_ = macro_block_size_ * (100 - pct_free) / 100; @@ -249,7 +250,7 @@ int ObDataStoreDesc::init( encoder_opt_.set_store_type(row_store_type_); } - if (OB_SUCC(ret) && is_major) { + if (OB_SUCC(ret) && storage::is_major_merge(merge_type)) { // exactly MAJOR MERGE if (cluster_version > 0) { major_working_cluster_version_ = cluster_version; } else { diff --git a/src/storage/blocksstable/ob_macro_block.h b/src/storage/blocksstable/ob_macro_block.h index c453f1e230..bb78e1075c 100644 --- a/src/storage/blocksstable/ob_macro_block.h +++ b/src/storage/blocksstable/ob_macro_block.h @@ -21,6 +21,7 @@ #include "ob_sstable_meta.h" #include "share/ob_encryption_util.h" #include "storage/blocksstable/ob_macro_block_meta.h" +#include "storage/compaction/ob_compaction_util.h" #include "share/scn.h" namespace oceanbase { @@ -92,11 +93,12 @@ struct ObDataStoreDesc void reset(); int assign(const ObDataStoreDesc &desc); bool encoding_enabled() const { return ObStoreFormat::is_row_store_type_with_encoding(row_store_type_); } - OB_INLINE bool is_major_merge() const { return storage::is_major_merge(merge_type_); } + OB_INLINE bool is_major_merge() const { return storage::is_major_merge_type(merge_type_); } + OB_INLINE bool is_meta_major_merge() const { return storage::is_meta_major_merge(merge_type_); } OB_INLINE bool is_use_pct_free() const { return macro_block_size_ != macro_store_size_; } int64_t get_logical_version() const { - return is_major_merge() ? snapshot_version_ : end_scn_.get_val_for_tx(); + return (is_major_merge() || is_meta_major_merge()) ? snapshot_version_ : end_scn_.get_val_for_tx(); } TO_STRING_KV( K_(ls_id), @@ -125,7 +127,7 @@ struct ObDataStoreDesc private: int cal_row_store_type( - const share::schema::ObMergeSchema &table_schema, + const share::schema::ObMergeSchema &schema, const storage::ObMergeType merge_type); int set_major_working_cluster_version(); int get_emergency_row_store_type(); diff --git a/src/storage/blocksstable/ob_macro_block_writer.cpp b/src/storage/blocksstable/ob_macro_block_writer.cpp index c8f2f6ee08..6406ce0186 100644 --- a/src/storage/blocksstable/ob_macro_block_writer.cpp +++ b/src/storage/blocksstable/ob_macro_block_writer.cpp @@ -827,7 +827,8 @@ int ObMacroBlockWriter::check_order(const ObDatumRow &row) } } else { // another schema rowkey if (nullptr != data_store_desc_->merge_info_ - && MAJOR_MERGE != data_store_desc_->merge_info_->merge_type_ + && !is_major_merge_type(data_store_desc_->merge_info_->merge_type_) + && !is_meta_major_merge(data_store_desc_->merge_info_->merge_type_) && !is_macro_or_micro_block_reused_ && !last_key_with_L_flag_) { ret = OB_ERR_UNEXPECTED; diff --git a/src/storage/blocksstable/ob_row_cache.cpp b/src/storage/blocksstable/ob_row_cache.cpp index f8066bd663..de6d932c45 100644 --- a/src/storage/blocksstable/ob_row_cache.cpp +++ b/src/storage/blocksstable/ob_row_cache.cpp @@ -137,7 +137,7 @@ int ObRowCacheKey::deep_copy(char *buf, const int64_t buf_len, ObIKVCacheKey *&k bool ObRowCacheKey::is_valid() const { return OB_LIKELY(0 != tenant_id_ && tablet_id_.is_valid() && rowkey_size_ > 0 - && data_version_ > -1 && (ObITable::is_minor_sstable(table_type_) || ObITable::is_major_sstable(table_type_)) + && data_version_ > -1 && (ObITable::is_minor_sstable(table_type_) || ObITable::is_major_sstable(table_type_) || ObITable::is_meta_major_sstable(table_type_)) && rowkey_.is_valid()); } diff --git a/src/storage/blocksstable/ob_shared_macro_block_manager.cpp b/src/storage/blocksstable/ob_shared_macro_block_manager.cpp index 9195495f2e..4c139ba715 100644 --- a/src/storage/blocksstable/ob_shared_macro_block_manager.cpp +++ b/src/storage/blocksstable/ob_shared_macro_block_manager.cpp @@ -706,7 +706,7 @@ int ObSharedMacroBlockMgr::parse_merge_type(const ObSSTable &sstable, ObMergeTyp if (sstable.is_major_sstable()) { merge_type = ObMergeType::MAJOR_MERGE; } else if (sstable.is_minor_sstable()) { - merge_type = ObMergeType::MINI_MINOR_MERGE; + merge_type = ObMergeType::MINOR_MERGE; } else { ret = OB_ERR_UNEXPECTED; LOG_WARN("sstable type is unexpected", K(ret), K(sstable)); @@ -769,4 +769,4 @@ void ObSharedMacroBlockMgr::ObBlockDefragmentationTask::runTimerTask() } } // namespace blocksstable -} // namespace oceanbase \ No newline at end of file +} // namespace oceanbase diff --git a/src/storage/blocksstable/ob_shared_macro_block_manager.h b/src/storage/blocksstable/ob_shared_macro_block_manager.h index 67f77d2e7a..c03a31cb5a 100644 --- a/src/storage/blocksstable/ob_shared_macro_block_manager.h +++ b/src/storage/blocksstable/ob_shared_macro_block_manager.h @@ -17,6 +17,7 @@ #include "lib/hash/ob_linear_hash_map.h" #include "storage/blocksstable/ob_block_manager.h" #include "lib/task/ob_timer.h" +#include "storage/compaction/ob_compaction_util.h" namespace oceanbase { @@ -186,4 +187,4 @@ private: } // namespace blocksstable } // namespace oceanbase -#endif \ No newline at end of file +#endif diff --git a/src/storage/blocksstable/ob_sstable.cpp b/src/storage/blocksstable/ob_sstable.cpp index 2054fd182d..6ee018ccc7 100644 --- a/src/storage/blocksstable/ob_sstable.cpp +++ b/src/storage/blocksstable/ob_sstable.cpp @@ -29,6 +29,7 @@ #include "storage/tablet/ob_tablet_create_sstable_param.h" #include "storage/meta_mem/ob_tenant_meta_mem_mgr.h" #include "storage/compaction/ob_tenant_tablet_scheduler.h" +#include "storage/ob_tenant_tablet_stat_mgr.h" #include "storage/blocksstable/ob_shared_macro_block_manager.h" namespace oceanbase @@ -411,9 +412,11 @@ int ObSSTable::exist( } if (OB_NOT_NULL(iter)) { - if (lib::is_diagnose_info_enabled()) { - iter->report_stat(access_context.table_store_stat_); - } + ObTabletStat &stat = ctx.tablet_stat_; + stat.ls_id_ = ctx.ls_id_.id(); + stat.tablet_id_ = ctx.tablet_id_.id(); + stat.query_cnt_ = access_context.table_store_stat_.exist_row_.empty_read_cnt_ > 0; + iter->~ObStoreRowIterator(); access_context.stmt_allocator_->free(iter); } @@ -496,9 +499,6 @@ int ObSSTable::exist(ObRowsInfo &rows_info, bool &is_exist, bool &all_rows_found } if (OB_NOT_NULL(iter)) { - if (lib::is_diagnose_info_enabled()) { - iter->report_stat(rows_info.exist_helper_.table_access_context_.table_store_stat_); - } iter->~ObStoreRowIterator(); rows_info.exist_helper_.table_access_context_.allocator_->free(iter); rows_info.reuse_scan_mem_allocator(); @@ -621,10 +621,8 @@ int ObSSTable::scan_secondary_meta( LOG_WARN("Unexpected null pointer of secondary meta iterator", K(ret)); } else if (OB_FAIL(iter->open( query_range, meta_type, *this, index_read_info, allocator, is_reverse_scan, sample_step))) { - if (OB_UNLIKELY(OB_BEYOND_THE_RANGE != ret)) { - LOG_WARN("Fail to open secondary meta iterator with range", - K(ret), K(query_range), K(meta_type), K_(meta), K(is_reverse_scan), K(sample_step)); - } + LOG_WARN("Fail to open secondary meta iterator with range", + K(ret), K(query_range), K(meta_type), K_(meta), K(is_reverse_scan), K(sample_step)); } else { meta_iter = iter; } diff --git a/src/storage/blocksstable/ob_sstable.h b/src/storage/blocksstable/ob_sstable.h index f03a2d5ccd..e1d83b9f76 100644 --- a/src/storage/blocksstable/ob_sstable.h +++ b/src/storage/blocksstable/ob_sstable.h @@ -162,7 +162,7 @@ public: public: int dump2text( const char *dir_name, - const share::schema::ObMergeSchema &schema, + const ObStorageSchema &schema, const char *fname) { // TODO: print sstable diff --git a/src/storage/blocksstable/ob_sstable_meta.h b/src/storage/blocksstable/ob_sstable_meta.h index 620965fdea..676a8fb8c7 100644 --- a/src/storage/blocksstable/ob_sstable_meta.h +++ b/src/storage/blocksstable/ob_sstable_meta.h @@ -100,7 +100,7 @@ public: int64_t progressive_merge_round_; int64_t progressive_merge_step_; int64_t upper_trans_version_; - // major/buf minor: snapshot version; others: max commit version + // major/meta major: snapshot version; others: max commit version int64_t max_merged_trans_version_; // recycle_version only avaliable for minor sstable, recored recycled multi version start int64_t recycle_version_; diff --git a/src/storage/blocksstable/ob_sstable_sec_meta_iterator.cpp b/src/storage/blocksstable/ob_sstable_sec_meta_iterator.cpp index d62a3b6f38..4f1a475a0e 100644 --- a/src/storage/blocksstable/ob_sstable_sec_meta_iterator.cpp +++ b/src/storage/blocksstable/ob_sstable_sec_meta_iterator.cpp @@ -142,6 +142,7 @@ int ObSSTableSecMetaIterator::open( if (OB_FAIL(ret) || is_prefetch_end_) { } else if (OB_UNLIKELY(start_key_beyond_range)) { + is_inited_ = true; set_iter_end(); is_inited_ = true; } diff --git a/src/storage/compaction/ob_compaction_diagnose.cpp b/src/storage/compaction/ob_compaction_diagnose.cpp index c47dbb956a..6e1958614a 100644 --- a/src/storage/compaction/ob_compaction_diagnose.cpp +++ b/src/storage/compaction/ob_compaction_diagnose.cpp @@ -23,6 +23,8 @@ #include "storage/tx_storage/ob_ls_service.h" #include "storage/ls/ob_ls.h" #include "observer/omt/ob_tenant_config_mgr.h" +#include "share/ob_tablet_meta_table_compaction_operator.h" +#include "storage/compaction/ob_compaction_util.h" namespace oceanbase { @@ -291,6 +293,12 @@ int ObCompactionDiagnoseMgr::diagnose_dag( if (OB_FAIL(dag.init_by_param(¶m))) { STORAGE_LOG(WARN, "failed to init dag", K(ret), K(param)); + } else if (is_minor_merge(merge_type)) { + if (OB_FAIL(MTL(ObTenantDagScheduler *)->diagnose_minor_exe_dag(&dag, progress))) { + if (OB_HASH_NOT_EXIST != ret) { + STORAGE_LOG(WARN, "failed to diagnose minor execute dag", K(ret), K(ls_id), K(tablet_id), K(progress)); + } + } } else if (OB_FAIL(MTL(ObTenantDagScheduler *)->diagnose_dag(&dag, progress))) { if (OB_HASH_NOT_EXIST != ret) { STORAGE_LOG(WARN, "failed to diagnose dag", K(ret), K(ls_id), K(tablet_id), K(progress)); @@ -374,12 +382,12 @@ int ObCompactionDiagnoseMgr::diagnose_tenant_tablet() const int64_t merged_version = scheduler->get_merged_version(); if (merged_version == ObTenantTabletScheduler::INIT_COMPACTION_SCN) { // do nothing - } else if (OB_FAIL(MTL(ObTenantFreezeInfoMgr *)->get_freeze_info_behind_snapshot_version(merged_version, freeze_info))) { - LOG_WARN("failed to get freeze info behind snapshot version", K(ret), K(merged_version)); + } else if (OB_TMP_FAIL(MTL(ObTenantFreezeInfoMgr *)->get_freeze_info_behind_snapshot_version(merged_version, freeze_info))) { + LOG_WARN("failed to get freeze info behind snapshot version", K(tmp_ret), K(merged_version)); if (can_add_diagnose_info() && OB_TMP_FAIL(SET_DIAGNOSE_INFO( info_array_[idx_++], - MAJOR_MERGE, + MEDIUM_MERGE, MTL_ID(), ObLSID(INT64_MAX), ObTabletID(INT64_MAX), @@ -392,6 +400,7 @@ int ObCompactionDiagnoseMgr::diagnose_tenant_tablet() compaction_scn = freeze_info.freeze_version; } } + (void)diagnose_medium_scn_table(compaction_scn); while (OB_SUCC(ret)) { // loop all log_stream bool need_merge = false; @@ -413,7 +422,7 @@ int ObCompactionDiagnoseMgr::diagnose_tenant_tablet() && can_add_diagnose_info()) { SET_DIAGNOSE_INFO( info_array_[idx_++], - MAJOR_MERGE, + MEDIUM_MERGE, MTL_ID(), ls_id, ObTabletID(INT64_MAX), @@ -459,13 +468,16 @@ int ObCompactionDiagnoseMgr::diagnose_tenant_tablet() compaction_scn, ls_id, *tablet_handle.get_obj()))) { - LOG_WARN("failed to get diagnoe major merge", K(tmp_ret)); + LOG_WARN("failed to get diagnose major merge", K(tmp_ret)); } if (OB_TMP_FAIL(diagnose_tablet_mini_merge(ls_id, *tablet_handle.get_obj()))) { - LOG_WARN("failed to get diagnoe mini merge", K(tmp_ret)); + LOG_WARN("failed to get diagnose mini merge", K(tmp_ret)); } if (OB_TMP_FAIL(diagnose_tablet_minor_merge(ls_id, *tablet_handle.get_obj()))) { - LOG_WARN("failed to get diagnoe minor merge", K(tmp_ret)); + LOG_WARN("failed to get diagnose minor merge", K(tmp_ret)); + } + if (OB_TMP_FAIL(diagnose_tablet_medium_merge(ls_id, *tablet_handle.get_obj()))) { + LOG_WARN("failed to get diagnose medium merge", K(tmp_ret)); } } } // end of while @@ -534,7 +546,7 @@ int ObCompactionDiagnoseMgr::diagnose_tablet_minor_merge(const ObLSID &ls_id, Ob { int ret = OB_SUCCESS; if (tablet.get_table_store().get_minor_sstables().count() >= DIAGNOSE_TABLE_CNT_IN_STORAGE) { - ObTabletMinorMergeDag dag; + ObTabletMergeExecuteDag dag; if (OB_FAIL(diagnose_tablet_merge( dag, MINOR_MERGE, @@ -546,6 +558,53 @@ int ObCompactionDiagnoseMgr::diagnose_tablet_minor_merge(const ObLSID &ls_id, Ob return ret; } +int ObCompactionDiagnoseMgr::diagnose_tablet_medium_merge( + const ObLSID &ls_id, + ObTablet &tablet) +{ + int ret = OB_SUCCESS; + const storage::ObMergeType merge_type = MEDIUM_MERGE; + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + const int64_t max_serialized_medium_scn = tablet.get_tablet_meta().max_serialized_medium_scn_; + const ObMediumCompactionInfoList &medium_list = tablet.get_medium_compaction_info_list(); + ObITable *table = tablet.get_table_store().get_major_sstables().get_boundary_table(true/*last*/); + const int64_t wait_check_medium_scn = medium_list.get_wait_check_medium_scn(); + int64_t max_sync_medium_scn = 0; + + if (OB_FAIL(tablet.get_max_sync_medium_scn(max_sync_medium_scn))){ + LOG_WARN("failed to get max sync medium scn", K(ret), K(ls_id), K(tablet_id)); + } else if (max_sync_medium_scn > max_serialized_medium_scn) { + // wait memtable dump + if (ObTimeUtility::fast_current_time() > max_sync_medium_scn + WAIT_MEDIUM_SCHEDULE_INTERVAL * 2 + && can_add_diagnose_info() + && OB_FAIL(SET_DIAGNOSE_INFO( + info_array_[idx_++], + merge_type, + MTL_ID(), + ls_id, + tablet_id, + ObCompactionDiagnoseInfo::DIA_STATUS_NOT_SCHEDULE, + ObTimeUtility::fast_current_time(), + "max_receive_medium_scn", max_sync_medium_scn, + "max_serialized_medium_scn", max_serialized_medium_scn))) { + LOG_WARN("failed to add diagnose info", K(ret), K(ls_id), K(tablet_id)); + } + } else if (0 == wait_check_medium_scn) { + // do nothing + } else if (OB_NOT_NULL(table) && table->get_snapshot_version() < wait_check_medium_scn) { + ObTabletMajorMergeDag dag; + if (OB_FAIL(diagnose_tablet_merge( + dag, + merge_type, + ls_id, + tablet_id, + wait_check_medium_scn))) { + LOG_WARN("diagnose failed", K(ret), K(ls_id), K(tablet_id), KPC(table)); + } + } + return ret; +} + int ObCompactionDiagnoseMgr::diagnose_tablet_major_merge( const int64_t compaction_scn, const ObLSID &ls_id, @@ -553,24 +612,46 @@ int ObCompactionDiagnoseMgr::diagnose_tablet_major_merge( { int ret = OB_SUCCESS; const ObTabletTableStore &table_store = tablet.get_table_store(); + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + const ObMergeType merge_type = MEDIUM_MERGE; + int64_t max_sync_medium_scn = 0; ObSSTable *latest_major_sstable = static_cast( table_store.get_major_sstables().get_boundary_table(true/*last*/)); if (OB_UNLIKELY(compaction_scn <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(compaction_scn)); + } else if (OB_FAIL(tablet.get_max_sync_medium_scn(max_sync_medium_scn))) { + LOG_WARN("failed to get max sync medium snapshot", K(ret), K(tablet_id)); } else { int tmp_ret = OB_SUCCESS; if (nullptr == latest_major_sstable || latest_major_sstable->get_snapshot_version() < compaction_scn) { - ObTabletMajorMergeDag dag; - if (OB_TMP_FAIL(diagnose_tablet_merge( - dag, - MAJOR_MERGE, - ls_id, - tablet.get_tablet_meta().tablet_id_, - compaction_scn))) { - LOG_WARN("diagnose failed", K(tmp_ret), K(ls_id), K(tablet), KPC(latest_major_sstable)); + if (max_sync_medium_scn < compaction_scn) { + if (can_add_diagnose_info() + && ObTimeUtility::fast_current_time() > compaction_scn + WAIT_MEDIUM_SCHEDULE_INTERVAL + && OB_FAIL(SET_DIAGNOSE_INFO( + info_array_[idx_++], + merge_type, + MTL_ID(), + ls_id, + tablet_id, + ObCompactionDiagnoseInfo::DIA_STATUS_NOT_SCHEDULE, + ObTimeUtility::fast_current_time(), + "max_receive_medium_snapshot", max_sync_medium_scn))) { + LOG_WARN("failed to add diagnose info", K(ret), K(ls_id), K(tablet_id)); + } + } else { + ObTabletMajorMergeDag dag; + if (OB_TMP_FAIL(diagnose_tablet_merge( + dag, + merge_type, + ls_id, + tablet.get_tablet_meta().tablet_id_, + compaction_scn))) { + LOG_WARN("diagnose failed", K(tmp_ret), K(ls_id), K(tablet), KPC(latest_major_sstable)); + } } + } } return ret; @@ -623,7 +704,8 @@ int ObCompactionDiagnoseMgr::get_suspect_and_warning_info( ObDagWarningInfo *warning_info = nullptr; bool add_schedule_info = false; - if (OB_FAIL(ObScheduleSuspectInfoMgr::get_instance().get_suspect_info(ObScheduleSuspectInfo::gen_hash(MTL_ID(), dag.hash()), info))) { + if (OB_FAIL(ObScheduleSuspectInfoMgr::get_instance().get_suspect_info( + ObScheduleSuspectInfo::gen_hash(MTL_ID(), dag.hash()), info))) { if (OB_HASH_NOT_EXIST != ret) { LOG_WARN("failed to get suspect info", K(ret), K(ls_id), K(tablet_id)); } else { // no schedule suspect info @@ -665,11 +747,12 @@ int ObCompactionDiagnoseMgr::diagnose_no_dag( int ret = OB_SUCCESS; ObScheduleSuspectInfo info; bool add_schedule_info = false; + if (OB_FAIL(get_suspect_and_warning_info(dag, merge_type, ls_id, tablet_id, info))) { LOG_WARN("failed to get suspect and warning info", K(ret), K(ls_id), K(tablet_id)); } else if (!info.is_valid()) { // do nothing - } else if (MAJOR_MERGE == merge_type) { + } else if (MEDIUM_MERGE == merge_type) { if (OB_UNLIKELY(compaction_scn <= 0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("merge version or freeze ts is invalid", K(ret), K(compaction_scn)); @@ -722,6 +805,28 @@ int ObCompactionDiagnoseMgr::diagnose_no_dag( return ret; } +int ObCompactionDiagnoseMgr::diagnose_medium_scn_table(const int64_t compaction_scn) +{ + int ret = OB_SUCCESS; + int64_t error_tablet_cnt = 0; + if (OB_FAIL(ObTabletMetaTableCompactionOperator::diagnose_compaction_scn(MTL_ID(), error_tablet_cnt))) { + LOG_WARN("failed to diagnose compaction scn", K(ret)); + } else if (0 != error_tablet_cnt + && can_add_diagnose_info() + && OB_FAIL(SET_DIAGNOSE_INFO( + info_array_[idx_++], + MEDIUM_MERGE, + MTL_ID(), + ObLSID(INT64_MAX), + ObTabletID(INT64_MAX), + ObCompactionDiagnoseInfo::DIA_STATUS_RUNNING, + ObTimeUtility::fast_current_time(), + "error_tablet_cnt", error_tablet_cnt))) { + LOG_WARN("failed to add diagnose info", K(ret)); + } + return ret; +} + /* * ObTabletCompactionProgressIterator implement * */ diff --git a/src/storage/compaction/ob_compaction_diagnose.h b/src/storage/compaction/ob_compaction_diagnose.h index bb21de993c..6de1453033 100644 --- a/src/storage/compaction/ob_compaction_diagnose.h +++ b/src/storage/compaction/ob_compaction_diagnose.h @@ -47,7 +47,7 @@ struct ObScheduleSuspectInfo : public common::ObDLinkBase char suspect_info_[common::OB_DIAGNOSE_INFO_LENGTH]; }; -class ObScheduleSuspectInfoMgr{ +class ObScheduleSuspectInfoMgr { public: ObScheduleSuspectInfoMgr(); ~ObScheduleSuspectInfoMgr() { destroy(); } @@ -125,6 +125,9 @@ public: private: int diagnose_tablet_mini_merge(const ObLSID &ls_id, ObTablet &tablet); int diagnose_tablet_minor_merge(const ObLSID &ls_id, ObTablet &tablet); + int diagnose_tablet_medium_merge( + const ObLSID &ls_id, + ObTablet &tablet); int diagnose_tablet_major_merge( const int64_t compaction_scn, const ObLSID &ls_id, @@ -147,6 +150,7 @@ private: const ObLSID ls_id, const ObTabletID tablet_id, ObScheduleSuspectInfo &info); + int diagnose_medium_scn_table(const int64_t compaction_scn); OB_INLINE bool can_add_diagnose_info() { return idx_ < max_cnt_; } int get_suspect_info( const ObMergeType merge_type, @@ -155,7 +159,7 @@ private: ObScheduleSuspectInfo &ret_info); private: - static const int64_t SCHEDULE_MINI_INTERVAL = 1000L * 1000L * 30L; // 30 seconds + static const int64_t WAIT_MEDIUM_SCHEDULE_INTERVAL = 1000L * 1000L * 120L; // 120 seconds static const int64_t SUSPECT_INFO_WARNING_THRESHOLD = 1000L * 1000L * 60L * 5; // 5 mins bool is_inited_; ObCompactionDiagnoseInfo *info_array_; diff --git a/src/storage/compaction/ob_compaction_suggestion.cpp b/src/storage/compaction/ob_compaction_suggestion.cpp index 36b6d30057..f75884be96 100644 --- a/src/storage/compaction/ob_compaction_suggestion.cpp +++ b/src/storage/compaction/ob_compaction_suggestion.cpp @@ -65,7 +65,7 @@ int ObCompactionSuggestionMgr::analyze_merge_info( } } } - if (MAJOR_MERGE == merge_info.merge_type_ + if (is_major_merge_type(merge_info.merge_type_) && merge_info.macro_block_count_ >= MACRO_CNT_PARAM) { if (1 == merge_info.concurrent_cnt_) { ADD_COMPACTION_INFO_PARAM(buf, buf_len, diff --git a/src/storage/compaction/ob_compaction_suggestion.h b/src/storage/compaction/ob_compaction_suggestion.h index 1aabbc697b..8525af82c9 100644 --- a/src/storage/compaction/ob_compaction_suggestion.h +++ b/src/storage/compaction/ob_compaction_suggestion.h @@ -13,7 +13,10 @@ #ifndef SRC_STORAGE_COMPACTION_OB_COMPACTION_SUGGESTION_H_ #define SRC_STORAGE_COMPACTION_OB_COMPACTION_SUGGESTION_H_ -#include "storage/ob_i_store.h" +#include "storage/compaction/ob_compaction_util.h" +#include "lib/allocator/page_arena.h" +#include "lib/utility/ob_print_utils.h" +#include "lib/container/ob_iarray.h" namespace oceanbase { diff --git a/src/storage/compaction/ob_compaction_util.cpp b/src/storage/compaction/ob_compaction_util.cpp new file mode 100644 index 0000000000..d60056cd44 --- /dev/null +++ b/src/storage/compaction/ob_compaction_util.cpp @@ -0,0 +1,40 @@ +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. +#include "storage/compaction/ob_compaction_util.h" +#include "share/ob_define.h" +namespace oceanbase +{ +namespace storage +{ + +const char * ObMergeTypeStr[] = { + "MINOR_MERGE", + "HISTORY_MINOR_MERGE", + "META_MAJOR_MERGE", + "MINI_MERGE", + "MAJOR_MERGE", + "MEDIUM_MERGE", + "DDL_KV_MERGE", + "BACKFILL_TX_MERGE" +}; + +const char *merge_type_to_str(const ObMergeType &merge_type) +{ + STATIC_ASSERT(static_cast(MERGE_TYPE_MAX) == ARRAYSIZEOF(ObMergeTypeStr), "merge type str len is mismatch"); + const char *str = ""; + if (merge_type >= MERGE_TYPE_MAX || merge_type <= INVALID_MERGE_TYPE) { + str = "invalid_merge_type"; + } else { + str = ObMergeTypeStr[merge_type]; + } + return str; +} + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/compaction/ob_compaction_util.h b/src/storage/compaction/ob_compaction_util.h new file mode 100644 index 0000000000..dc603aba1a --- /dev/null +++ b/src/storage/compaction/ob_compaction_util.h @@ -0,0 +1,81 @@ +//Copyright (c) 2022 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OB_STORAGE_COMPACTION_UTIL_H_ +#define OB_STORAGE_COMPACTION_UTIL_H_ + +namespace oceanbase +{ +namespace storage // temp solution +{ +enum ObMergeType +{ + INVALID_MERGE_TYPE = -1, + MINOR_MERGE = 0, // minor merge, compaction several mini sstable into one larger mini sstable + HISTORY_MINOR_MERGE = 1, + META_MAJOR_MERGE = 2, + MINI_MERGE = 3, // mini merge, only flush memtable + MAJOR_MERGE = 4, + MEDIUM_MERGE = 5, + DDL_KV_MERGE = 6, + BACKFILL_TX_MERGE = 7, + MERGE_TYPE_MAX, +}; + +const char *merge_type_to_str(const ObMergeType &merge_type); + +inline bool is_major_merge(const ObMergeType &merge_type) +{ + return MAJOR_MERGE == merge_type; +} +inline bool is_medium_merge(const ObMergeType &merge_type) +{ + return MEDIUM_MERGE == merge_type; +} +inline bool is_major_merge_type(const ObMergeType &merge_type) +{ + return is_medium_merge(merge_type) || is_major_merge(merge_type); +} +inline bool is_mini_merge(const ObMergeType &merge_type) +{ + return MINI_MERGE == merge_type; +} +inline bool is_minor_merge(const ObMergeType &merge_type) +{ + return MINOR_MERGE == merge_type; +} +inline bool is_multi_version_merge(const ObMergeType &merge_type) +{ + return MINOR_MERGE == merge_type + || MINI_MERGE == merge_type + || HISTORY_MINOR_MERGE == merge_type; +} +inline bool is_history_minor_merge(const ObMergeType &merge_type) +{ + return HISTORY_MINOR_MERGE == merge_type; +} +inline bool is_minor_merge_type(const ObMergeType &merge_type) +{ + return is_minor_merge(merge_type) || is_history_minor_merge(merge_type); +} +inline bool is_meta_major_merge(const ObMergeType &merge_type) +{ + return META_MAJOR_MERGE == merge_type; +} + +inline bool is_backfill_tx_merge(const ObMergeType &merge_type) +{ + return BACKFILL_TX_MERGE == merge_type; +} + +} // namespace storage +} // namespace oceanbase + +#endif // OB_STORAGE_COMPACTION_UTIL_H_ diff --git a/src/storage/compaction/ob_medium_compaction_func.cpp b/src/storage/compaction/ob_medium_compaction_func.cpp new file mode 100644 index 0000000000..9d4c8b7778 --- /dev/null +++ b/src/storage/compaction/ob_medium_compaction_func.cpp @@ -0,0 +1,804 @@ +//Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. +#define USING_LOG_PREFIX STORAGE_COMPACTION +#include "storage/compaction/ob_medium_compaction_func.h" +#include "storage/compaction/ob_medium_compaction_mgr.h" +#include "storage/compaction/ob_tablet_merge_ctx.h" +#include "storage/compaction/ob_partition_merge_policy.h" +#include "share/tablet/ob_tablet_info.h" +#include "share/tablet/ob_tablet_table_operator.h" +#include "share/ob_tablet_replica_checksum_operator.h" +#include "share/ob_ls_id.h" +#include "share/schema/ob_tenant_schema_service.h" +#include "logservice/ob_log_service.h" +#include "storage/meta_mem/ob_tenant_meta_mem_mgr.h" +#include "storage/tx_storage/ob_tenant_freezer.h" +#include "storage/compaction/ob_tenant_tablet_scheduler.h" +#include "lib/oblog/ob_log_module.h" +#include "lib/utility/ob_tracepoint.h" +#include "storage/ob_partition_range_spliter.h" +#include "storage/compaction/ob_compaction_diagnose.h" + +namespace oceanbase +{ +using namespace storage; +using namespace share; +using namespace common; + +namespace compaction +{ + +ObMediumCompactionScheduleFunc::ChooseMediumScn ObMediumCompactionScheduleFunc::choose_medium_scn[MEDIUM_FUNC_CNT] + = { ObMediumCompactionScheduleFunc::choose_medium_snapshot, + ObMediumCompactionScheduleFunc::choose_major_snapshot, + }; + +ObMediumCompactionScheduleFunc::PrepareTableIter ObMediumCompactionScheduleFunc::prepare_table_iter[MEDIUM_FUNC_CNT] + = { ObMediumCompactionScheduleFunc::prepare_iter_for_medium, + ObMediumCompactionScheduleFunc::prepare_iter_for_major, + }; + +int ObMediumCompactionScheduleFunc::choose_medium_snapshot( + ObLS &ls, + ObTablet &tablet, + const int64_t schedule_medium_snapshot, + const ObAdaptiveMergePolicy::AdaptiveMergeReason &merge_reason, + ObMediumCompactionInfo &medium_info, + ObGetMergeTablesResult &result) +{ + UNUSED(schedule_medium_snapshot); + int ret = OB_SUCCESS; + ObGetMergeTablesParam param; + param.merge_type_ = META_MAJOR_MERGE; + if (OB_FAIL(ObAdaptiveMergePolicy::get_meta_merge_tables( + param, + ls, + tablet, + result))) { + if (OB_NO_NEED_MERGE != ret) { + LOG_WARN("failed to get meta merge tables", K(ret), K(param)); + } + } else { + medium_info.compaction_type_ = ObMediumCompactionInfo::MEDIUM_COMPACTION; + medium_info.medium_merge_reason_ = merge_reason; + medium_info.medium_snapshot_ = result.version_range_.snapshot_version_; + medium_info.medium_scn_ = result.scn_range_.end_scn_; + LOG_TRACE("choose_medium_snapshot", K(ret), "ls_id", ls.get_ls_id(), + "tablet_id", tablet.get_tablet_meta().tablet_id_, K(result), K(medium_info)); + } + return ret; +} + +int ObMediumCompactionScheduleFunc::choose_major_snapshot( + ObLS &ls, + ObTablet &tablet, + const int64_t schedule_medium_snapshot, + const ObAdaptiveMergePolicy::AdaptiveMergeReason &merge_reason, + ObMediumCompactionInfo &medium_info, + ObGetMergeTablesResult &result) +{ + UNUSED(merge_reason); + int ret = OB_SUCCESS; + ObTenantFreezeInfoMgr::FreezeInfo freeze_info; + if (OB_FAIL(MTL_CALL_FREEZE_INFO_MGR(get_freeze_info_by_snapshot_version, schedule_medium_snapshot, freeze_info))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_WARN("failed to get freeze info", K(ret), K(schedule_medium_snapshot), "ls_id", ls.get_ls_id(), + "tablet_id", tablet.get_tablet_meta().tablet_id_); + } else { + ret = OB_NO_NEED_MERGE; + } + } else if (OB_FAIL(medium_info.medium_scn_.convert_for_tx(schedule_medium_snapshot))) { + LOG_WARN("failed to convert into scn", K(ret), K(schedule_medium_snapshot)); + } else { + medium_info.compaction_type_ = ObMediumCompactionInfo::MAJOR_COMPACTION; + medium_info.medium_merge_reason_ = ObAdaptiveMergePolicy::AdaptiveMergeReason::NONE; + medium_info.medium_snapshot_ = schedule_medium_snapshot; + result.schema_version_ = freeze_info.schema_version; + LOG_TRACE("choose_major_snapshot", K(ret), "ls_id", ls.get_ls_id(), + "tablet_id", tablet.get_tablet_meta().tablet_id_, K(medium_info), K(freeze_info)); + } + return ret; +} + +int ObMediumCompactionScheduleFunc::get_status_from_inner_table( + ObTabletCompactionScnInfo &ret_info) +{ + int ret = OB_SUCCESS; + ret_info.reset(); + + ObTabletCompactionScnInfo snapshot_info( + MTL_ID(), + ls_.get_ls_id(), + tablet_.get_tablet_meta().tablet_id_, + ObTabletReplica::SCN_STATUS_IDLE); + if (OB_FAIL(ObTabletMetaTableCompactionOperator::get_status(snapshot_info, ret_info))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; // first schedule medium snapshot + ret_info.status_ = ObTabletReplica::SCN_STATUS_IDLE; + } else { + LOG_WARN("failed to get cur medium snapshot", K(ret), K(ret_info)); + } + } + return ret; +} + +// cal this func with PLAF LEADER ROLE && wait_check_medium_scn_ = 0 +int ObMediumCompactionScheduleFunc::schedule_next_medium_for_leader( + const int64_t major_snapshot) +{ + int ret = OB_SUCCESS; + ObRole role = INVALID_ROLE; + if (OB_FAIL(ls_.get_ls_role(role))) { + LOG_WARN("failed to get ls role", K(ret), KPC(this)); + } else if (LEADER == role) { + // only log_handler_leader can schedule + ret = schedule_next_medium_primary_cluster(major_snapshot); + } + return ret; +} + +int ObMediumCompactionScheduleFunc::schedule_next_medium_primary_cluster( + const int64_t schedule_major_snapshot) +{ + int ret = OB_SUCCESS; + ObAdaptiveMergePolicy::AdaptiveMergeReason adaptive_merge_reason = ObAdaptiveMergePolicy::AdaptiveMergeReason::NONE; + ObTabletCompactionScnInfo ret_info; + // check last medium type, select inner table for last major + bool schedule_medium_flag = false; + int64_t max_sync_medium_scn = 0; + ObITable *last_major = tablet_.get_table_store().get_major_sstables().get_boundary_table(true/*last*/); + const ObMediumCompactionInfoList &medium_list = tablet_.get_medium_compaction_info_list(); + if (OB_ISNULL(last_major)) { + // no major, do nothing + } else if (!medium_list.could_schedule_next_round()) { // check serialized list + // do nothing + } else if (OB_FAIL(tablet_.get_max_sync_medium_scn(max_sync_medium_scn))) { // check info in memory + LOG_WARN("failed to get max sync medium scn", K(ret), K(max_sync_medium_scn)); + } else if (0 != schedule_major_snapshot && schedule_major_snapshot > max_sync_medium_scn) { + schedule_medium_flag = true; + } else if (nullptr != last_major && last_major->get_snapshot_version() < max_sync_medium_scn) { + // do nothing + } else if (OB_FAIL(ObAdaptiveMergePolicy::get_adaptive_merge_reason(tablet_, adaptive_merge_reason))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get meta merge priority", K(ret), KPC(this)); + } else { + ret = OB_SUCCESS; + } + } else if (adaptive_merge_reason > ObAdaptiveMergePolicy::AdaptiveMergeReason::NONE) { + schedule_medium_flag = true; + } + LOG_DEBUG("schedule next medium in primary cluster", K(ret), KPC(this), K(schedule_medium_flag), + K(schedule_major_snapshot), K(adaptive_merge_reason), KPC(last_major), K(medium_list), K(max_sync_medium_scn)); + + if (OB_FAIL(ret) || !schedule_medium_flag) { + } else if (ObMediumCompactionInfo::MAJOR_COMPACTION == medium_list.get_last_compaction_type()) { + // for normal medium, checksum error happened, wait_check_medium_scn_ will never = 0 + // for major, need select inner_table to check RS status + if (OB_FAIL(get_status_from_inner_table(ret_info))) { + LOG_WARN("failed to get status from inner tablet", K(ret), KPC(this)); + } else if (ret_info.could_schedule_next_round(last_major->get_snapshot_version())) { + ret = decide_medium_snapshot(schedule_major_snapshot); + } + } else { + ret = decide_medium_snapshot(schedule_major_snapshot, adaptive_merge_reason); + } + + return ret; +} + +int ObMediumCompactionScheduleFunc::decide_medium_snapshot( + const int64_t schedule_medium_snapshot, + const ObAdaptiveMergePolicy::AdaptiveMergeReason merge_reason) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + const ObTabletID &tablet_id = tablet_.get_tablet_meta().tablet_id_; + int64_t max_sync_medium_scn = 0; + LOG_DEBUG("decide_medium_snapshot", K(ret), KPC(this), K(schedule_medium_snapshot)); + if (OB_FAIL(tablet_.get_max_sync_medium_scn(max_sync_medium_scn))) { + LOG_WARN("failed to get max sync medium scn", K(ret), KPC(this)); + } else if (OB_FAIL(ls_.add_dependent_medium_tablet(tablet_id))) { // add dependent_id in ObLSReservedSnapshotMgr + LOG_WARN("failed to add dependent tablet", K(ret), KPC(this)); + } else { + const bool is_major = (0 != schedule_medium_snapshot); + int64_t multi_version_start = 0; + ObGetMergeTablesResult result; + ObMediumCompactionInfo medium_info; + if (OB_FAIL(choose_medium_scn[is_major](ls_, tablet_, schedule_medium_snapshot, merge_reason, medium_info, result))) { + if (OB_NO_NEED_MERGE != ret) { + LOG_WARN("failed to choose medium snapshot", K(ret), KPC(this)); + } + } else if (is_major) { + // do nothing + } else if (medium_info.medium_snapshot_ <= max_sync_medium_scn) { + ret = OB_NO_NEED_MERGE; + } else if (OB_FAIL(ObTablet::get_kept_multi_version_start(ls_, tablet_, multi_version_start))) { + LOG_WARN("failed to get multi_version_start", K(ret), KPC(this)); + } else if (medium_info.medium_snapshot_ < multi_version_start) { + // chosen medium snapshot is far too old + LOG_INFO("chosen medium snapshot is invalid for multi_version_start", K(ret), KPC(this), + K(medium_info), K(multi_version_start)); + const share::SCN &weak_read_ts = ls_.get_ls_wrs_handler()->get_ls_weak_read_ts(); + if (medium_info.medium_snapshot_ == tablet_.get_snapshot_version() // no uncommitted sstable + && weak_read_ts.get_val_for_tx() <= multi_version_start + && weak_read_ts.get_val_for_tx() + DEFAULT_SCHEDULE_MEDIUM_INTERVAL < ObTimeUtility::current_time_ns()) { + medium_info.medium_snapshot_ = weak_read_ts.get_val_for_tx(); + LOG_INFO("use weak_read_ts to schedule medium", K(ret), KPC(this), + K(medium_info), K(multi_version_start), K(weak_read_ts)); + } else { + ret = OB_NO_NEED_MERGE; + } + } + if (OB_SUCC(ret) && !is_major) { + const int64_t current_time = ObTimeUtility::current_time_ns(); + if (multi_version_start < current_time) { + const int64_t time_interval = (current_time - multi_version_start) / 2; + ObSSTable *table = static_cast(tablet_.get_table_store().get_major_sstables().get_boundary_table(true/*last*/)); + if (OB_ISNULL(table)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table is unexpected null", K(ret), KP(table)); + } else if (table->get_snapshot_version() + time_interval > medium_info.medium_snapshot_) { + ret = OB_NO_NEED_MERGE; + LOG_DEBUG("schedule medium frequently", K(ret), KPC(table), K(medium_info), K(time_interval)); + } + } + } + if (FAILEDx(prepare_medium_info(result, medium_info))) { + if (OB_TABLE_IS_DELETED == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to prepare medium info", K(ret), K(result), K(tablet_.get_storage_schema())); + } + } else if (OB_FAIL(submit_medium_clog(medium_info))) { + LOG_WARN("failed to submit medium clog and update inner table", K(ret), KPC(this)); + } else if (OB_TMP_FAIL(MTL(ObTenantFreezer *)->tablet_freeze(tablet_id, false/*force_freeze*/))) { + // need to freeze memtable with MediumCompactionInfo + LOG_WARN("failed to freeze tablet", K(tmp_ret), KPC(this)); + } + // delete tablet_id in ObLSReservedSnapshotMgr even if submit clog or update inner table failed + if (OB_TMP_FAIL(ls_.del_dependent_medium_tablet(tablet_id))) { + LOG_ERROR("failed to delete dependent medium tablet", K(tmp_ret), KPC(this)); + ob_abort(); + } + ret = OB_NO_NEED_MERGE == ret ? OB_SUCCESS : ret; + if (OB_FAIL(ret)) { + // add schedule suspect info + ADD_SUSPECT_INFO(MEDIUM_MERGE, ls_.get_ls_id(), tablet_id, + "schedule medium failed", + "compaction_scn", medium_info.medium_snapshot_, + "schema_version", medium_info.storage_schema_.schema_version_, + "error_no", ret); + } + } + return ret; +} + +int ObMediumCompactionScheduleFunc::init_parallel_range( + const ObGetMergeTablesResult &result, + ObMediumCompactionInfo &medium_info) +{ + int ret = OB_SUCCESS; + + int64_t expected_task_count = 0; + const int64_t tablet_size = medium_info.storage_schema_.get_tablet_size(); + const ObSSTable *first_sstable = + static_cast(tablet_.get_table_store().get_major_sstables().get_boundary_table(true/*last*/)); + if (OB_ISNULL(first_sstable)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sstable is unexpected null", K(ret), K(tablet_)); + } else { + const int64_t macro_block_cnt = first_sstable->get_meta().get_macro_info().get_data_block_ids().count(); + int64_t inc_row_cnt = 0; + for (int64_t i = 0; i < result.handle_.get_count(); ++i) { + inc_row_cnt += static_cast(result.handle_.get_table(i))->get_meta().get_row_count(); + } + if ((0 == macro_block_cnt && inc_row_cnt > SCHEDULE_RANGE_ROW_COUNT_THRESHOLD) + || (first_sstable->get_meta().get_row_count() >= SCHEDULE_RANGE_ROW_COUNT_THRESHOLD + && inc_row_cnt >= first_sstable->get_meta().get_row_count() * SCHEDULE_RANGE_INC_ROW_COUNT_PERCENRAGE_THRESHOLD)) { + if (OB_FAIL(ObParallelMergeCtx::get_concurrent_cnt(tablet_size, macro_block_cnt, expected_task_count))) { + STORAGE_LOG(WARN, "failed to get concurrent cnt", K(ret), K(tablet_size), K(expected_task_count), + KPC(first_sstable)); + } + } + } + + if (OB_FAIL(ret)) { + } else if (expected_task_count <= 1) { + medium_info.clear_parallel_range(); + } else { + ObTableStoreIterator table_iter; + ObArrayArray range_array; + ObPartitionMultiRangeSpliter range_spliter; + ObSEArray input_range_array; + ObStoreRange range; + range.set_start_key(ObStoreRowkey::MIN_STORE_ROWKEY); + range.set_end_key(ObStoreRowkey::MAX_STORE_ROWKEY); + const bool is_major = medium_info.is_major_compaction(); + if (OB_FAIL(prepare_table_iter[is_major](tablet_, result, medium_info, table_iter))) { + LOG_WARN("failed to get table iter", K(ret), K(range_array)); + } else if (OB_FAIL(input_range_array.push_back(range))) { + LOG_WARN("failed to push back range", K(ret), K(range)); + } else if (OB_FAIL(range_spliter.get_split_multi_ranges( + input_range_array, + expected_task_count, + tablet_.get_index_read_info(), + table_iter, + allocator_, + range_array))) { + LOG_WARN("failed to get split multi range", K(ret), K(range_array)); + } else if (OB_FAIL(medium_info.gene_parallel_info(allocator_, range_array))) { + LOG_WARN("failed to get parallel ranges", K(ret), K(range_array)); + } + } + return ret; +} + +int ObMediumCompactionScheduleFunc::prepare_iter_for_major( + ObTablet &tablet, + const ObGetMergeTablesResult &result, + ObMediumCompactionInfo &medium_info, + ObTableStoreIterator &table_iter) +{ + UNUSED(result); + int ret = OB_SUCCESS; + table_iter.reset(); + ObSSTable *base_table = nullptr; + const ObTabletTableStore &table_store = tablet.get_table_store(); + + if (OB_UNLIKELY(!medium_info.is_valid() || !table_store.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid argument", K(ret), K(medium_info), K(table_store)); + } else if (OB_ISNULL(base_table = static_cast(table_store.get_major_sstables().get_boundary_table(true/*last*/)))) { + ret = OB_ENTRY_NOT_EXIST; + LOG_WARN("major sstable not exist", K(ret), K(table_store)); + } else if (base_table->get_snapshot_version() >= medium_info.medium_snapshot_) { + ret = OB_NO_NEED_MERGE; + } else if (OB_FAIL(table_iter.add_table(base_table))) { + LOG_WARN("failed to add table into iterator", K(ret), KP(base_table)); + } else { + const ObSSTableArray &minor_tables = table_store.get_minor_sstables(); + int64_t start_idx = 0; + bool start_add_table_flag = false; + for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.count_; ++i) { + if (OB_ISNULL(minor_tables[i])) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("table must not null", K(ret), K(i), K(minor_tables)); + } else if (minor_tables[i]->get_upper_trans_version() >= base_table->get_snapshot_version()) { + start_idx = i; + break; + } + } + if (FAILEDx(table_iter.add_tables(minor_tables.array_ + start_idx, minor_tables.count_ - start_idx))) { + LOG_WARN("failed to add tables", K(ret), K(start_idx)); + } + } + return ret; +} +int ObMediumCompactionScheduleFunc::prepare_iter_for_medium( + ObTablet &tablet, + const ObGetMergeTablesResult &result, + ObMediumCompactionInfo &medium_info, + ObTableStoreIterator &table_iter) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(result.handle_.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("handle is invalid", K(ret), K(result)); + } + for (int i = 0; OB_SUCC(ret) && i < result.handle_.get_count(); ++i) { + if (OB_FAIL(table_iter.add_table(result.handle_.get_table(i)))) { + LOG_WARN("failed to add table into table_iter", K(ret), K(i)); + } + } + return ret; +} + +int ObMediumCompactionScheduleFunc::prepare_medium_info( + const ObGetMergeTablesResult &result, + ObMediumCompactionInfo &medium_info) +{ + int ret = OB_SUCCESS; + ObTableStoreIterator table_iter; + medium_info.cluster_id_ = GCONF.cluster_id; + if (medium_info.is_major_compaction()) { + // get table schema + if (OB_UNLIKELY(result.schema_version_ <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("schema version is invalid", K(ret), K(result), KPC(this), K(medium_info)); + } else if (OB_FAIL(get_table_schema_to_merge(result.schema_version_, medium_info))) { + if (OB_TABLE_IS_DELETED != ret) { + LOG_WARN("failed to get table schema", K(ret), KPC(this), K(medium_info)); + } + } + } else if (OB_FAIL(medium_info.save_storage_schema(allocator_, tablet_.get_storage_schema()))) { + LOG_WARN("failed to save storage schema", K(ret), K(tablet_.get_storage_schema())); + } + + if (FAILEDx(init_parallel_range(result, medium_info))) { + LOG_WARN("failed to init parallel range", K(ret), K(medium_info)); + } else { + LOG_INFO("success to init parallel range", K(ret), K(medium_info)); + } + return ret; +} + +int ObMediumCompactionScheduleFunc::get_table_id( + ObMultiVersionSchemaService &schema_service, + const ObTabletID &tablet_id, + const int64_t schema_version, + uint64_t &table_id) +{ + int ret = OB_SUCCESS; + table_id = OB_INVALID_ID; + + ObSEArray tablet_ids; + ObSEArray table_ids; + if (OB_FAIL(tablet_ids.push_back(tablet_id))) { + LOG_WARN("failed to add tablet id", K(ret)); + } else if (OB_FAIL(schema_service.get_tablet_to_table_history(MTL_ID(), tablet_ids, schema_version, table_ids))) { + LOG_WARN("failed to get table id according to tablet id", K(ret), K(schema_version)); + } else if (OB_UNLIKELY(table_ids.empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected empty table id", K(ret), K(table_ids)); + } else if (table_ids.at(0) == OB_INVALID_ID){ + ret = OB_TABLE_IS_DELETED; + LOG_WARN("table is deleted", K(ret), K(tablet_id), K(schema_version)); + } else { + table_id = table_ids.at(0); + } + return ret; +} + +int ObMediumCompactionScheduleFunc::get_table_schema_to_merge( + const int64_t schema_version, + ObMediumCompactionInfo &medium_info) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = MTL_ID(); + const ObTabletID &tablet_id = tablet_.get_tablet_meta().tablet_id_; + uint64_t table_id = OB_INVALID_ID; + ObMultiVersionSchemaService *schema_service = nullptr; + schema::ObSchemaGetterGuard schema_guard; + const ObTableSchema *table_schema = nullptr; + int64_t save_schema_version = schema_version; + if (OB_ISNULL(schema_service = MTL(ObTenantSchemaService *)->get_schema_service())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to get schema service from MTL", K(ret)); + } else if (OB_FAIL(get_table_id(*schema_service, tablet_id, schema_version, table_id))) { + if (OB_TABLE_IS_DELETED != ret) { + LOG_WARN("failed to get table id", K(ret), K(tablet_id)); + } + } else if (OB_FAIL(schema_service->retry_get_schema_guard(tenant_id, + schema_version, + table_id, + schema_guard, + save_schema_version))) { + if (OB_TABLE_IS_DELETED != ret) { + LOG_WARN("Fail to get schema", K(ret), K(tenant_id), K(schema_version), K(table_id)); + } else { + LOG_WARN("table is deleted", K(ret), K(table_id)); + } + } else if (save_schema_version < schema_version) { + ret = OB_SCHEMA_ERROR; + LOG_WARN("can not use older schema version", K(ret), K(schema_version), K(save_schema_version), K(table_id)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_id, table_schema))) { + LOG_WARN("Fail to get table schema", K(ret), K(table_id)); + } else if (NULL == table_schema) { + if (OB_FAIL(schema_service->get_tenant_full_schema_guard(tenant_id, schema_guard))) { + LOG_WARN("Fail to get schema", K(ret), K(tenant_id)); + } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_id, table_schema))) { + LOG_WARN("Fail to get table schema", K(ret), K(table_id)); + } else if (NULL == table_schema) { + ret = OB_TABLE_IS_DELETED; + LOG_WARN("table is deleted", K(ret), K(table_id)); + } + } + if (OB_SUCC(ret)) { + int64_t max_schema_version = 0; + if (OB_FAIL(tablet_.get_max_sync_storage_schema_version(max_schema_version))) { + LOG_WARN("failed to get max sync storage schema version", K(ret), KPC(this)); + } else if (max_schema_version < table_schema->get_schema_version()) { + // need sync schema clog + if (OB_FAIL(tablet_.try_update_storage_schema( + table_id, + table_schema->get_schema_version(), + allocator_, + DEFAULT_SYNC_SCHEMA_CLOG_TIMEOUT))) { + LOG_WARN("failed to sync schema clog", K(ret), KPC(this), KPC(table_schema)); + } + } + if (FAILEDx(medium_info.storage_schema_.init( + allocator_, + *table_schema, + tablet_.get_tablet_meta().compat_mode_))) { + LOG_WARN("failed to init storage schema", K(ret), K(schema_version)); + } else { + FLOG_INFO("get schema to merge", K(table_id), K(schema_version), K(save_schema_version), + K(*reinterpret_cast(table_schema))); + } + } + return ret; +} + +int ObMediumCompactionScheduleFunc::submit_medium_clog( + ObMediumCompactionInfo &medium_info) +{ + int ret = OB_SUCCESS; + +#ifdef ERRSIM + ret = E(EventTable::EN_MEDIUM_COMPACTION_SUBMIT_CLOG_FAILED) ret; + if (OB_FAIL(ret)) { + LOG_INFO("set update medium clog failed with errsim", KPC(this)); + return ret; + } +#endif + if (OB_FAIL(tablet_.submit_medium_compaction_clog(medium_info, allocator_))) { + LOG_WARN("failed to submit medium compaction clog", K(ret), K(medium_info)); + } else { + LOG_INFO("success to submit medium compaction clog", K(ret), KPC(this), K(medium_info)); + } + return ret; +} + +int ObMediumCompactionScheduleFunc::check_medium_meta_table( + const int64_t check_medium_snapshot, + const ObLSID &ls_id, + const ObTabletID &tablet_id, + bool &merge_finish) +{ + int ret = OB_SUCCESS; + merge_finish = false; + ObTabletInfo tablet_info; + + if (OB_UNLIKELY(check_medium_snapshot <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(check_medium_snapshot), K(ls_id), K(tablet_id)); + } else if (OB_FAIL(init_tablet_filters())) { + LOG_WARN("failed to init tablet filters", K(ret)); + } else if (OB_FAIL(ObTabletTableOperator::get_tablet_info(GCTX.sql_proxy_, MTL_ID(), tablet_id, ls_id, tablet_info))) { + LOG_WARN("failed to get tablet info", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_UNLIKELY(!tablet_info.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tabled_id is invalid", K(ret), K(ls_id), K(tablet_id)); + } else { + const ObArray &replica_array = tablet_info.get_replicas(); + int64_t unfinish_cnt = 0; + bool pass = true; + for (int i = 0; OB_SUCC(ret) && i < replica_array.count(); ++i) { + const ObTabletReplica &replica = replica_array.at(i); + if (OB_UNLIKELY(!replica.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("replica info is invalid", K(ret), K(ls_id), K(tablet_id), K(replica)); + } else if (OB_FAIL(filters_.check(replica, pass))) { + LOG_WARN("filter replica failed", K(ret), K(replica), K(filters_)); + } else if (!pass) { + // do nothing + } else if (replica.get_snapshot_version() >= check_medium_snapshot) { + // replica may have check_medium_snapshot = 2, but have received medium info of 3, + // when this replica is elected as leader, this will happened + } else { + unfinish_cnt++; + } + } // end of for + FLOG_INFO("check_medium_compaction_finish", K(ret), K(ls_id), K(tablet_id), K(check_medium_snapshot), + K(unfinish_cnt), "total_cnt", replica_array.count()); + + if (0 == unfinish_cnt) { // merge finish + merge_finish = true; + } + } + return ret; +} + +int ObMediumCompactionScheduleFunc::init_tablet_filters() +{ + int ret = OB_SUCCESS; + if (!filters_inited_) { + if (OB_FAIL(filters_.set_filter_not_exist_server(ObAllServerTracer::get_instance()))) { + LOG_WARN("fail to set not exist server filter", KR(ret)); + } else if (OB_FAIL(filters_.set_filter_permanent_offline(ObAllServerTracer::get_instance()))) { + LOG_WARN("fail to set filter", KR(ret)); + } else { + filters_inited_ = true; + } + } + return ret; +} + +int ObMediumCompactionScheduleFunc::check_medium_checksum_table( + const int64_t check_medium_snapshot, + const ObLSID &ls_id, + const ObTabletID &tablet_id) +{ + int ret = OB_SUCCESS; + ObSEArray checksum_items; + if (OB_FAIL(ObTabletReplicaChecksumOperator::get_specified_tablet_checksum( + MTL_ID(), ls_id.id(), tablet_id.id(), check_medium_snapshot, checksum_items))) { + LOG_WARN("failed to get tablet checksum", K(ret), K(ls_id), K(tablet_id), K(check_medium_snapshot)); + } else { + for (int i = 1; OB_SUCC(ret) && i < checksum_items.count(); ++i) { + if (OB_FAIL(checksum_items.at(0).verify_checksum(checksum_items.at(i)))) { + LOG_ERROR("checksum verify failed", K(ret), K(checksum_items.at(0)), K(i), K(checksum_items.at(i))); + } + } +#ifdef ERRSIM + ret = E(EventTable::EN_MEDIUM_REPLICA_CHECKSUM_ERROR) OB_SUCCESS; + if (OB_FAIL(ret)) { + STORAGE_LOG(INFO, "ERRSIM EN_MEDIUM_REPLICA_CHECKSUM_ERROR", K(ret), K(ls_id), K(tablet_id)); + } +#endif + if (OB_CHECKSUM_ERROR == ret) { + int tmp_ret = OB_SUCCESS; + ObTabletCompactionScnInfo medium_snapshot_info( + MTL_ID(), + ls_id, + tablet_id, + ObTabletReplica::SCN_STATUS_ERROR); + ObTabletCompactionScnInfo unused_ret_info; + // TODO(@lixia.yq) delete status when data_checksum_error is a inner_table + if (OB_TMP_FAIL(ObTabletMetaTableCompactionOperator::set_info_status( + medium_snapshot_info, unused_ret_info))) { + LOG_WARN("failed to set info status", K(tmp_ret), K(medium_snapshot_info)); + } + } + } + return ret; +} + +// for Leader, clean wait_check_medium_scn +int ObMediumCompactionScheduleFunc::check_medium_finish() +{ + int ret = OB_SUCCESS; + const ObLSID &ls_id = ls_.get_ls_id(); + const ObTabletID &tablet_id = tablet_.get_tablet_meta().tablet_id_; + const int64_t wait_check_medium_scn = tablet_.get_medium_compaction_info_list().get_wait_check_medium_scn(); + bool merge_finish = false; + + if (0 == wait_check_medium_scn) { + // do nothing + } else if (OB_FAIL(check_medium_meta_table(wait_check_medium_scn, ls_id, tablet_id, merge_finish))) { + LOG_WARN("failed to check inner table", K(ret), KPC(this)); + } else if (!merge_finish) { + // do nothing + } else if (OB_FAIL(check_medium_checksum_table(wait_check_medium_scn, ls_id, tablet_id))) { // check checksum + LOG_WARN("failed to check checksum", K(ret), K(wait_check_medium_scn), KPC(this)); + } else { + const ObMediumCompactionInfo::ObCompactionType compaction_type = tablet_.get_medium_compaction_info_list().get_last_compaction_type(); + FLOG_INFO("check medium compaction info", K(ret), K(ls_id), K(tablet_id), K(compaction_type)); + + // clear wait_check_medium_scn on Tablet + ObTabletHandle unused_handle; + if (OB_FAIL(ls_.update_medium_compaction_info(tablet_id, unused_handle))) { + LOG_WARN("failed to update medium compaction info", K(ret), K(ls_id), K(tablet_id)); + } + } + + return ret; +} + +// DO NOT visit inner table in this func +int ObMediumCompactionScheduleFunc::schedule_tablet_medium_merge( + ObLS &ls, + ObTablet &tablet, + const int64_t input_major_snapshot) +{ + int ret = OB_SUCCESS; +#ifdef ERRSIM + ret = E(EventTable::EN_MEDIUM_CREATE_DAG) ret; + if (OB_FAIL(ret)) { + LOG_INFO("set create medium dag failed with errsim", K(ret)); + return ret; + } +#endif + const ObMediumCompactionInfoList &medium_list = tablet.get_medium_compaction_info_list(); + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + const ObLSID &ls_id = ls.get_ls_id(); + int64_t major_frozen_snapshot = 0 == input_major_snapshot ? MTL(ObTenantTabletScheduler *)->get_frozen_version() : input_major_snapshot; + + const int64_t schedule_scn = tablet.get_medium_compaction_info_list().get_schedule_scn(major_frozen_snapshot); + bool need_merge = false; + LOG_DEBUG("schedule_tablet_medium_merge", K(schedule_scn), K(ls_id), K(tablet_id)); + if (schedule_scn > 0) { + if (OB_FAIL(check_need_merge_and_schedule(ls_id, tablet, schedule_scn, need_merge))) { + LOG_WARN("failed to check medium merge", K(ret), K(ls_id), K(tablet_id), K(schedule_scn)); + } + } + + return ret; +} + +int ObMediumCompactionScheduleFunc::get_palf_role(const ObLSID &ls_id, ObRole &role) +{ + int ret = OB_SUCCESS; + role = INVALID_ROLE; + int64_t unused_proposal_id = 0; + palf::PalfHandleGuard palf_handle_guard; + if (OB_FAIL(MTL(logservice::ObLogService*)->open_palf(ls_id, palf_handle_guard))) { + if (OB_LS_NOT_EXIST != ret) { + LOG_WARN("failed to open palf", K(ret), K(ls_id)); + } + } else if (OB_FAIL(palf_handle_guard.get_role(role, unused_proposal_id))) { + LOG_WARN("failed to get palf handle role", K(ret), K(ls_id)); + } + return ret; +} + +int ObMediumCompactionScheduleFunc::freeze_memtable_to_get_medium_info() +{ + int ret = OB_SUCCESS; + ObSEArray memtables; + if (OB_FAIL(tablet_.get_table_store().get_memtables(memtables, true/*need_active*/))) { + LOG_WARN("failed to get memtables", K(ret), KPC(this)); + } else if (memtables.empty()) { + // do nothing + } else { + memtable::ObMemtable *memtable = nullptr; + bool receive_medium_info = false; + for (int i = 0; OB_SUCC(ret) && i < memtables.count(); ++i) { + if (OB_ISNULL(memtable = static_cast(memtables.at(i)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("memtable is null", K(ret), K(i), KPC(memtables.at(i)), K(memtable)); + } else if (memtable->has_multi_source_data_unit(memtable::MultiSourceDataUnitType::MEDIUM_COMPACTION_INFO)) { + receive_medium_info = true; + break; + } + } // end of for + if (OB_SUCC(ret) && receive_medium_info) { + if (OB_FAIL(MTL(ObTenantFreezer *)->tablet_freeze(tablet_.get_tablet_meta().tablet_id_, false/*force_freeze*/))) { + if (OB_TABLE_NOT_EXIST != ret) { + LOG_WARN("failed to freeze tablet", K(ret), KPC(this)); + } + } + } + } + return ret; +} + +int ObMediumCompactionScheduleFunc::check_need_merge_and_schedule( + const ObLSID &ls_id, + ObTablet &tablet, + const int64_t schedule_scn, + bool &need_merge) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + need_merge = false; + bool can_merge = false; + + if (OB_FAIL(ObPartitionMergePolicy::check_need_medium_merge( + tablet, + schedule_scn, + need_merge, + can_merge))) { // check merge finish + LOG_WARN("failed to check medium merge", K(ret), K(ls_id), "tablet_id", tablet.get_tablet_meta().tablet_id_); + } else if (need_merge && can_merge) { + const ObMediumCompactionInfo *medium_info = nullptr; + if (OB_FAIL(tablet.get_medium_compaction_info_list().get_specified_scn_info(schedule_scn, medium_info))) { + LOG_WARN("failed to get specified scn info", K(ret), K(schedule_scn)); + } else if (OB_TMP_FAIL(ObTenantTabletScheduler::schedule_merge_dag( + ls_id, + tablet.get_tablet_meta().tablet_id_, + MEDIUM_MERGE, + schedule_scn, + medium_info->is_major_compaction()))) { + if (OB_SIZE_OVERFLOW != tmp_ret && OB_EAGAIN != tmp_ret) { + ret = tmp_ret; + LOG_WARN("failed to schedule medium merge dag", K(ret), K(ls_id), + "tablet_id", tablet.get_tablet_meta().tablet_id_); + } + } else { + LOG_DEBUG("success to schedule medium merge dag", K(ret), K(schedule_scn)); + } + } + return ret; +} + +} //namespace compaction +} // namespace oceanbase diff --git a/src/storage/compaction/ob_medium_compaction_func.h b/src/storage/compaction/ob_medium_compaction_func.h new file mode 100644 index 0000000000..0ca38edae0 --- /dev/null +++ b/src/storage/compaction/ob_medium_compaction_func.h @@ -0,0 +1,139 @@ +//Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. +#ifndef OB_STORAGE_COMPACTION_MEDIUM_COMPACTION_FUNC_H_ +#define OB_STORAGE_COMPACTION_MEDIUM_COMPACTION_FUNC_H_ + +#include "storage/ls/ob_ls.h" +#include "storage/compaction/ob_partition_merge_policy.h" +#include "share/tablet/ob_tablet_filter.h" +#include "share/ob_tablet_meta_table_compaction_operator.h" + +namespace oceanbase +{ +namespace compaction +{ + +class ObMediumCompactionScheduleFunc +{ +public: + ObMediumCompactionScheduleFunc(ObLS &ls, ObTablet &tablet) + : allocator_("MediumSchedule"), + ls_(ls), + tablet_(tablet), + filters_inited_(false), + filters_() + {} + ~ObMediumCompactionScheduleFunc() {} + + static int schedule_tablet_medium_merge( + ObLS &ls, + ObTablet &tablet, + const int64_t major_frozen_scn = 0); + static int get_palf_role(const share::ObLSID &ls_id, ObRole &role); + + int schedule_next_medium_for_leader(const int64_t major_snapshot); + + int decide_medium_snapshot( + const int64_t schedule_medium_snapshot, + const ObAdaptiveMergePolicy::AdaptiveMergeReason merge_reason = ObAdaptiveMergePolicy::AdaptiveMergeReason::NONE); + + int check_medium_finish(); + + int freeze_memtable_to_get_medium_info(); + + TO_STRING_KV("ls_id", ls_.get_ls_id(), "tablet_id", tablet_.get_tablet_meta().tablet_id_); +protected: + int get_status_from_inner_table(share::ObTabletCompactionScnInfo &ret_info); + int prepare_medium_info(const ObGetMergeTablesResult &result, ObMediumCompactionInfo &medium_info); + int init_parallel_range( + const ObGetMergeTablesResult &result, + ObMediumCompactionInfo &medium_info); + static int prepare_iter_for_major( + ObTablet &tablet, + const ObGetMergeTablesResult &result, + ObMediumCompactionInfo &medium_info, + ObTableStoreIterator &table_iter); + static int prepare_iter_for_medium( + ObTablet &tablet, + const ObGetMergeTablesResult &result, + ObMediumCompactionInfo &medium_info, + ObTableStoreIterator &table_iter); + int submit_medium_clog(ObMediumCompactionInfo &medium_info); + int check_medium_meta_table( + const int64_t medium_snapshot, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + bool &merge_finish); + int init_tablet_filters(); + static int check_medium_checksum_table( + const int64_t medium_snapshot, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id); + static int choose_medium_snapshot( + ObLS &ls, + ObTablet &tablet, + const int64_t schedule_medium_snapshot, + const ObAdaptiveMergePolicy::AdaptiveMergeReason &merge_reason, + ObMediumCompactionInfo &medium_info, + ObGetMergeTablesResult &result); + static int choose_major_snapshot( + ObLS &ls, + ObTablet &tablet, + const int64_t schedule_medium_snapshot, + const ObAdaptiveMergePolicy::AdaptiveMergeReason &merge_reason, + ObMediumCompactionInfo &medium_info, + ObGetMergeTablesResult &result); + static int check_need_merge_and_schedule( + const share::ObLSID &ls_id, + ObTablet &tablet, + const int64_t schedule_scn, + bool &need_merge); + int schedule_next_medium_primary_cluster(const int64_t major_snapshot); + + int get_table_schema_to_merge(const int64_t schema_version, ObMediumCompactionInfo &medium_info); + + static int get_table_id( + ObMultiVersionSchemaService &schema_service, + const ObTabletID &tablet_id, + const int64_t schema_version, + uint64_t &table_id); + static const int64_t DEFAULT_SYNC_SCHEMA_CLOG_TIMEOUT = 1000L * 1000L; // 1s + static const int64_t DEFAULT_SCHEDULE_MEDIUM_INTERVAL = 60L * 1000L * 1000L; // 60s + static const int64_t SCHEDULE_RANGE_INC_ROW_COUNT_PERCENRAGE_THRESHOLD = 10L; + static const int64_t SCHEDULE_RANGE_ROW_COUNT_THRESHOLD = 1000 *1000L; // 100w + static const int64_t MEDIUM_FUNC_CNT = 2; + typedef int (*ChooseMediumScn)( + ObLS &ls, + ObTablet &tablet, + const int64_t schedule_medium_snapshot, + const ObAdaptiveMergePolicy::AdaptiveMergeReason &merge_reason, + ObMediumCompactionInfo &medium_info, + ObGetMergeTablesResult &result); + static ChooseMediumScn choose_medium_scn[MEDIUM_FUNC_CNT]; + + typedef int (*PrepareTableIter)( + ObTablet &tablet, + const ObGetMergeTablesResult &result, + ObMediumCompactionInfo &medium_info, + ObTableStoreIterator &table_iter); + static PrepareTableIter prepare_table_iter[MEDIUM_FUNC_CNT]; + +private: + ObArenaAllocator allocator_; + ObLS &ls_; + ObTablet &tablet_; + bool filters_inited_; + share::ObTabletReplicaFilterHolder filters_; +}; + +} //namespace compaction +} // namespace oceanbase + +#endif // OB_STORAGE_COMPACTION_MEDIUM_COMPACTION_FUNC_H_ diff --git a/src/storage/compaction/ob_medium_compaction_mgr.cpp b/src/storage/compaction/ob_medium_compaction_mgr.cpp index 96a223e91a..e82b7c6c14 100644 --- a/src/storage/compaction/ob_medium_compaction_mgr.cpp +++ b/src/storage/compaction/ob_medium_compaction_mgr.cpp @@ -12,23 +12,213 @@ #define USING_LOG_PREFIX STORAGE #include "storage/compaction/ob_medium_compaction_mgr.h" +#include "storage/tablet/ob_tablet_meta.h" +#include "storage/tablet/ob_tablet.h" +#include "logservice/ob_log_base_header.h" namespace oceanbase { +using namespace storage; + namespace compaction { + +/* + * ObParallelMergeInfo + * */ + +void ObParallelMergeInfo::destroy() +{ + if (list_size_ > 0 && nullptr != parallel_end_key_list_ && nullptr != allocator_) { + for (int i = 0; i < list_size_; ++i) { + parallel_end_key_list_[i].destroy(*allocator_); + } + list_size_ = 0; + allocator_->free(parallel_end_key_list_); + parallel_end_key_list_ = nullptr; + allocator_ = nullptr; + } + parallel_info_ = 0; +} + +int ObParallelMergeInfo::serialize(char *buf, const int64_t buf_len, int64_t &pos) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == buf || buf_len <= 0 || pos < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), K(buf), K(buf_len), K(pos)); + } else if (0 == list_size_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("no need to serialize parallel_merge_info", K(ret), K(list_size_)); + } else { + LST_DO_CODE(OB_UNIS_ENCODE, + parallel_info_); + for (int i = 0; OB_SUCC(ret) && i < list_size_; ++i) { + if (OB_FAIL(parallel_end_key_list_[i].serialize(buf, buf_len, pos))) { + LOG_WARN("failed to encode concurrent cnt", K(ret), K(i), K(list_size_), K(parallel_end_key_list_[i])); + } + } + } + return ret; +} + +int ObParallelMergeInfo::deserialize( + common::ObIAllocator &allocator, + const char *buf, + const int64_t data_len, + int64_t &pos) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == buf || data_len <= 0 || pos < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), K(buf), K(data_len), K(pos)); + } else { + LST_DO_CODE(OB_UNIS_DECODE, parallel_info_); + if (OB_FAIL(ret)) { + } else if (0 == list_size_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("list size is invalid", K(ret), K(list_size_)); + } else { + allocator_ = &allocator; + void *alloc_buf = nullptr; + if (OB_ISNULL(alloc_buf = allocator.alloc(sizeof(ObStoreRowkey) * list_size_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc store rowkey array", K(ret), K(list_size_)); + } else { + parallel_end_key_list_ = new(alloc_buf) ObStoreRowkey[list_size_]; + } + for (int i = 0; OB_SUCC(ret) && i < list_size_; ++i) { + if (OB_FAIL(parallel_end_key_list_[i].deserialize(allocator, buf, data_len, pos))) { + LOG_WARN("failed to encode concurrent cnt", K(ret), K(i), K(list_size_), K(data_len), K(pos)); + } + } + if (OB_FAIL(ret)) { + destroy(); // free parallel_end_key_list_ in destroy + } + } + } + return ret; +} + +int64_t ObParallelMergeInfo::get_serialize_size() const +{ + int64_t len = 0; + if (list_size_ > 0) { + len += serialization::encoded_length_vi32(parallel_info_); + for (int i = 0; i < list_size_; ++i) { + len += parallel_end_key_list_[i].get_serialize_size(); + } + } + return len; +} + +int ObParallelMergeInfo::generate_from_range_array( + ObIAllocator &allocator, + ObArrayArray ¶l_range) +{ + int ret = OB_SUCCESS; + void *buf = nullptr; + if (OB_UNLIKELY(0 != list_size_ || nullptr != parallel_end_key_list_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("parallel merge info is not empty", K(ret), KPC(this)); + } else { + int64_t sum_range_cnt = 0; + for (int64_t i = 0; i < paral_range.count(); ++i) { + sum_range_cnt += paral_range.at(i).count(); + } + if (sum_range_cnt <= VALID_CONCURRENT_CNT || sum_range_cnt > UINT8_MAX) { + // do nothing + } else if (FALSE_IT(list_size_ = sum_range_cnt - 1)) { + } else if (OB_ISNULL(buf = allocator.alloc(sizeof(ObStoreRowkey) * list_size_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate", K(ret), K(paral_range)); + } else { + allocator_ = &allocator; + parallel_end_key_list_ = new(buf) ObStoreRowkey[list_size_]; + int64_t cnt = 0; + for (int64_t i = 0; OB_SUCC(ret) && i < paral_range.count() && cnt < list_size_; ++i) { + const ObIArray &range_array = paral_range.at(i); + for (int64_t j = 0; OB_SUCC(ret) && j < range_array.count() && cnt < list_size_; ++j) { + if (OB_FAIL(range_array.at(j).get_end_key().deep_copy(parallel_end_key_list_[cnt++], allocator))) { + LOG_WARN("failed to deep copy end key", K(ret), K(i), K(range_array), K(j), K(cnt)); + } + } + } // end of loop array + } + } + LOG_DEBUG("parallel range info", K(ret), KPC(this), K(paral_range), K(paral_range.count()), K(paral_range.at(0))); + + if (OB_FAIL(ret)) { + destroy(); + } else if (get_serialize_size() > MAX_PARALLEL_RANGE_SERIALIZE_LEN) { + ret = OB_SIZE_OVERFLOW; + LOG_DEBUG("parallel range info is too large to sync", K(ret), KPC(this)); + destroy(); + } + return ret; +} + +int ObParallelMergeInfo::init( + common::ObIAllocator &allocator, + const ObParallelMergeInfo &other) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!other.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("other parallel info is invalid", K(ret), K(other)); + } else { + list_size_ = other.list_size_; + allocator_ = &allocator; + if (list_size_ > 0) { + void *buf = nullptr; + if (OB_ISNULL(buf = allocator.alloc(sizeof(ObStoreRowkey) * list_size_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate", K(ret), K(other)); + } else { + parallel_end_key_list_ = new (buf) ObStoreRowkey[list_size_]; + for (int i = 0; OB_SUCC(ret) && i < list_size_; ++i) { + if (OB_FAIL(other.parallel_end_key_list_[i].deep_copy(parallel_end_key_list_[i], allocator))) { + LOG_WARN("failed to deep copy end key", K(ret), K(i), K(other.parallel_end_key_list_[i])); + } + } + if (OB_FAIL(ret)) { + destroy(); + } + } // else + } + } + return ret; +} + +int64_t ObParallelMergeInfo::to_string(char* buf, const int64_t buf_len) const +{ + int64_t pos = 0; + if (OB_ISNULL(buf) || buf_len <= 0) { + } else { + J_OBJ_START(); + J_KV(K_(list_size)); + J_COMMA(); + for (int i = 0; i < list_size_; ++i) { + J_KV(K(i), "key", parallel_end_key_list_[i]); + J_COMMA(); + } + J_OBJ_END(); + } + return pos; +} + /* * ObMediumCompactionInfo * */ const char *ObMediumCompactionInfo::ObCompactionTypeStr[] = { - "MAJOR_COMPACTION", "MEDIUM_COMPACTION", + "MAJOR_COMPACTION", }; const char *ObMediumCompactionInfo::get_compaction_type_str(enum ObCompactionType type) { const char *str = ""; - if (type >= COMPACTION_TYPE_MAX || type < MAJOR_COMPACTION) { + if (type >= COMPACTION_TYPE_MAX || type < MEDIUM_COMPACTION) { str = "invalid_type"; } else { str = ObCompactionTypeStr[type]; @@ -37,14 +227,17 @@ const char *ObMediumCompactionInfo::get_compaction_type_str(enum ObCompactionTyp } ObMediumCompactionInfo::ObMediumCompactionInfo() - : medium_compat_version_(MEIDUM_COMPAT_VERSION), + : ObIMultiSourceDataUnit(), + medium_compat_version_(MEIDUM_COMPAT_VERSION), compaction_type_(COMPACTION_TYPE_MAX), - is_schema_changed_(false), + contain_parallel_range_(false), + medium_merge_reason_(ObAdaptiveMergePolicy::NONE), reserved_(0), cluster_id_(0), medium_snapshot_(0), - medium_log_ts_(0), - storage_schema_() + medium_scn_(), + storage_schema_(), + parallel_merge_info_() { STATIC_ASSERT(static_cast(COMPACTION_TYPE_MAX) == ARRAYSIZEOF(ObCompactionTypeStr), "compaction type str len is mismatch"); } @@ -64,12 +257,13 @@ int ObMediumCompactionInfo::init( LOG_WARN("invalid argument", K(ret), K(medium_info)); } else if (OB_FAIL(storage_schema_.init(allocator, medium_info.storage_schema_))) { LOG_WARN("failed to init storage schema", K(ret), K(medium_info)); + } else if (OB_FAIL(parallel_merge_info_.init(allocator, medium_info.parallel_merge_info_))) { + LOG_WARN("failed to init parallel merge info", K(ret), K(medium_info)); } else { info_ = medium_info.info_; cluster_id_ = medium_info.cluster_id_; medium_snapshot_ = medium_info.medium_snapshot_; - medium_log_ts_ = medium_info.medium_log_ts_; - is_schema_changed_ = medium_info.is_schema_changed_; + medium_scn_ = medium_info.medium_scn_; } return ret; } @@ -78,8 +272,9 @@ bool ObMediumCompactionInfo::is_valid() const { return COMPACTION_TYPE_MAX != compaction_type_ && medium_snapshot_ > 0 - && medium_log_ts_ > 0 - && storage_schema_.is_valid(); + && medium_scn_.get_val_for_tx() > 0 + && storage_schema_.is_valid() + && parallel_merge_info_.is_valid(); } void ObMediumCompactionInfo::reset() @@ -89,9 +284,24 @@ void ObMediumCompactionInfo::reset() compaction_type_ = COMPACTION_TYPE_MAX; cluster_id_ = 0; medium_snapshot_ = 0; - medium_log_ts_ = 0; - is_schema_changed_ = false; + medium_scn_.set_min(); storage_schema_.reset(); + parallel_merge_info_.destroy(); +} + +int ObMediumCompactionInfo::deep_copy(const ObIMultiSourceDataUnit *src, ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == src || nullptr == allocator)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(src), KP(allocator)); + } else if (OB_UNLIKELY(memtable::MultiSourceDataUnitType::MEDIUM_COMPACTION_INFO != src->type())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), "type", src->type(), KP(allocator)); + } else { + ret = init(*allocator, *static_cast(src)); + } + return ret; } int ObMediumCompactionInfo::save_storage_schema( @@ -108,6 +318,25 @@ int ObMediumCompactionInfo::save_storage_schema( return ret; } +int ObMediumCompactionInfo::gene_parallel_info( + ObIAllocator &allocator, + ObArrayArray ¶l_range) +{ + int ret = OB_SUCCESS; + contain_parallel_range_ = false; + if (OB_FAIL(parallel_merge_info_.generate_from_range_array(allocator, paral_range))) { + if (OB_UNLIKELY(OB_SIZE_OVERFLOW != ret)) { + LOG_WARN("failed to generate parallel merge info", K(ret), K(paral_range)); + } else { + ret = OB_SUCCESS; + } + } else if (parallel_merge_info_.list_size_ > 0) { + contain_parallel_range_ = true; + LOG_INFO("success to gene parallel info", K(ret), K(contain_parallel_range_), K(parallel_merge_info_)); + } + return ret; +} + int ObMediumCompactionInfo::serialize(char *buf, const int64_t buf_len, int64_t &pos) const { int ret = OB_SUCCESS; @@ -120,8 +349,13 @@ int ObMediumCompactionInfo::serialize(char *buf, const int64_t buf_len, int64_t info_, cluster_id_, medium_snapshot_, - medium_log_ts_, + medium_scn_, storage_schema_); + if (contain_parallel_range_) { + LST_DO_CODE( + OB_UNIS_ENCODE, + parallel_merge_info_); + } LOG_DEBUG("ObMediumCompactionInfo::serialize", K(ret), K(buf), K(buf_len), K(pos)); } return ret; @@ -138,15 +372,20 @@ int ObMediumCompactionInfo::deserialize( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret), K(buf), K(data_len), K(pos)); } else { - LST_DO_CODE(OB_UNIS_DECODE, - info_, - cluster_id_, - medium_snapshot_, - medium_log_ts_); + LST_DO_CODE(OB_UNIS_DECODE, + info_, + cluster_id_, + medium_snapshot_, + medium_scn_); if (OB_FAIL(ret)) { } else if (OB_FAIL(storage_schema_.deserialize(allocator, buf, data_len, pos))) { LOG_WARN("failed to deserialize storage schema", K(ret)); + } else if (contain_parallel_range_) { + if (OB_FAIL(parallel_merge_info_.deserialize(allocator, buf, data_len, pos))) { + LOG_WARN("failed to deserialize parallel merge info", K(ret), K(buf), K(data_len), K(pos)); + } } else { + clear_parallel_range(); LOG_DEBUG("ObMediumCompactionInfo::deserialize", K(ret), K(buf), K(data_len), K(pos)); } } @@ -156,19 +395,322 @@ int ObMediumCompactionInfo::deserialize( int64_t ObMediumCompactionInfo::get_serialize_size() const { int64_t len = 0; - LST_DO_CODE(OB_UNIS_ADD_LEN, - info_, - cluster_id_, - medium_snapshot_, - medium_log_ts_, - storage_schema_); + LST_DO_CODE( + OB_UNIS_ADD_LEN, + info_, + cluster_id_, + medium_snapshot_, + medium_scn_, + storage_schema_); + if (contain_parallel_range_) { + LST_DO_CODE(OB_UNIS_ADD_LEN, parallel_merge_info_); + } return len; } void ObMediumCompactionInfo::gene_info( char* buf, const int64_t buf_len, int64_t &pos) const { - J_KV("compaction_type", ObMediumCompactionInfo::get_compaction_type_str((ObCompactionType)compaction_type_), K(medium_snapshot_)); + J_KV("compaction_type", ObMediumCompactionInfo::get_compaction_type_str((ObCompactionType)compaction_type_), + K(medium_snapshot_), K_(parallel_merge_info)); +} + +/* + * ObTabletMediumCompactionInfoRecorder + * */ + +ObTabletMediumCompactionInfoRecorder::ObTabletMediumCompactionInfoRecorder() + : ObIStorageClogRecorder(), + is_inited_(false), + ignore_medium_(false), + ls_id_(), + tablet_id_(), + tablet_handle_ptr_(nullptr), + medium_info_(nullptr), + allocator_(nullptr) +{ +#if defined(__x86_64__) + STATIC_ASSERT(sizeof(ObTabletMediumCompactionInfoRecorder) <= 96, "size of medium recorder is oversize"); +#endif +} + +ObTabletMediumCompactionInfoRecorder::~ObTabletMediumCompactionInfoRecorder() +{ + destroy(); +} + +void ObTabletMediumCompactionInfoRecorder::destroy() +{ + is_inited_ = false; + ignore_medium_ = false; + ObIStorageClogRecorder::destroy(); + free_allocated_info(); + ls_id_.reset(); + tablet_id_.reset(); +} + +void ObTabletMediumCompactionInfoRecorder::reset() +{ + if (is_inited_) { + ObIStorageClogRecorder::reset(); + } +} + +int ObTabletMediumCompactionInfoRecorder::init( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const int64_t max_saved_version, + logservice::ObLogHandler *log_handler) +{ + int ret = OB_SUCCESS; + + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_ISNULL(log_handler)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(log_handler)); + } else if (OB_FAIL(ObIStorageClogRecorder::init(max_saved_version, log_handler))) { + LOG_WARN("failed to init ObIStorageClogRecorder", K(ret), K(log_handler)); + } else { + ignore_medium_ = tablet_id.is_special_merge_tablet(); + ls_id_ = ls_id; + tablet_id_ = tablet_id; + is_inited_ = true; + LOG_INFO("success to init", K(ret), K_(ls_id), K_(tablet_id), K(max_saved_version)); + } + return ret; +} +// this func is protected by lock in reserved_snapshot_map +int ObTabletMediumCompactionInfoRecorder::submit_medium_compaction_info( + ObMediumCompactionInfo &medium_info, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + int64_t table_id = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (ignore_medium_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support to submit medium compaction clog", K(ret), K_(tablet_id)); + } else if (OB_UNLIKELY(!medium_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(medium_info)); + } else if (FALSE_IT(medium_info_ = &medium_info)) { + } else if (OB_FAIL(try_update_for_leader(medium_info.medium_snapshot_, &allocator))) { + LOG_WARN("failed to update for leader", K(ret), K(medium_info)); + } + medium_info_ = nullptr; + if (OB_ALLOCATE_MEMORY_FAILED == ret || OB_BLOCK_FROZEN == ret) { + ret = OB_EAGAIN; + } + return ret; +} + +void ObTabletMediumCompactionInfoRecorder::free_allocated_info() +{ + if (OB_NOT_NULL(allocator_)) { + if (OB_NOT_NULL(logcb_ptr_)) { + tablet_handle_ptr_->reset(); + tablet_handle_ptr_->~ObTabletHandle(); + allocator_->free(logcb_ptr_); + logcb_ptr_ = nullptr; + tablet_handle_ptr_ = nullptr; + } + allocator_ = nullptr; + } +} + +int ObTabletMediumCompactionInfoRecorder::replay_medium_compaction_log( + const share::SCN &scn, + const char *buf, + const int64_t size, + int64_t &pos) +{ + int ret = OB_SUCCESS; + int64_t update_version = OB_INVALID_VERSION; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (ignore_medium_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not support to replay medium compaction clog", K(ret), K_(tablet_id)); + } else if (OB_FAIL(serialization::decode_i64(buf, size, pos, &update_version))) { + LOG_WARN("fail to deserialize table_version", K(ret), K_(tablet_id)); + } else if (OB_FAIL(ObIStorageClogRecorder::replay_clog(update_version, scn, buf, size, pos))) { + LOG_WARN("failed to replay clog", K(ret), K(scn), K_(tablet_id), K(update_version)); + } + return ret; +} + +int ObTabletMediumCompactionInfoRecorder::inner_replay_clog( + const int64_t update_version, + const share::SCN &scn, + const char *buf, + const int64_t size, + int64_t &pos) +{ + UNUSED(update_version); + int ret = OB_SUCCESS; + ObArenaAllocator tmp_allocator; + ObMediumCompactionInfo replay_medium_info; + ObTabletHandle tmp_tablet_handle; + if (OB_FAIL(replay_get_tablet_handle(ls_id_, tablet_id_, scn, tmp_tablet_handle))) { + LOG_WARN("failed to get tablet handle", K(ret), K_(tablet_id), K(scn)); + } else if (OB_FAIL(replay_medium_info.deserialize(tmp_allocator, buf, size, pos))) { + LOG_WARN("failed to deserialize medium compaction info", K(ret)); + } else if (!replay_medium_info.from_cur_cluster() + && replay_medium_info.is_medium_compaction()) { + // throw medium compaction clog from other cluster + } else if (FALSE_IT(replay_medium_info.set_sync_finish(true))) { + } else if (OB_FAIL(tmp_tablet_handle.get_obj()->save_multi_source_data_unit(&replay_medium_info, + scn, true/*for replay*/, memtable::MemtableRefOp::NONE))) { + LOG_WARN("failed to save medium info", K(ret), K_(tablet_id), K(replay_medium_info)); + } else { + tmp_tablet_handle.reset(); + FLOG_INFO("success to save medium info", K(ret), K_(tablet_id), K(replay_medium_info), K(max_saved_version_)); + } + return ret; +} + +int ObTabletMediumCompactionInfoRecorder::sync_clog_succ_for_leader(const int64_t update_version) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(medium_info_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("medium info is invalid", K(ret), K_(clog_scn), KP_(medium_info)); + } else if (OB_UNLIKELY(medium_info_->medium_snapshot_ != update_version)) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("medium snapshot not match", K(ret), KPC(medium_info_), K(update_version)); + } else if (OB_FAIL(dec_ref_on_memtable(true/*sync_finish*/))) { + LOG_WARN("failed to dec ref on memtable", K(ret), K_(tablet_id), KPC(medium_info_)); + } else { + FLOG_INFO("success to save medium info", K(ret), K_(tablet_id), KPC(medium_info_), + K(max_saved_version_), K_(clog_scn)); + } + return ret; +} + +void ObTabletMediumCompactionInfoRecorder::sync_clog_failed_for_leader() +{ + dec_ref_on_memtable(false/*sync_finish*/); +} + +int ObTabletMediumCompactionInfoRecorder::dec_ref_on_memtable(const bool sync_finish) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == medium_info_ + || nullptr == tablet_handle_ptr_ + || !tablet_handle_ptr_->is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("medium info or tablet handle is unexpected null", K(ret), K_(ls_id), K_(tablet_id), + KP_(medium_info), K_(tablet_handle_ptr)); + } else { + medium_info_->set_sync_finish(sync_finish); + if (OB_FAIL(tablet_handle_ptr_->get_obj()->save_multi_source_data_unit(medium_info_, clog_scn_, + false/*for_replay*/, memtable::MemtableRefOp::DEC_REF, true/*is_callback*/))) { + LOG_WARN("failed to save medium info", K(ret), K_(tablet_id), K(medium_info_)); + } + } + return ret; +} + +// log_header + tablet_id + medium_snapshot + medium_compaction_info +int ObTabletMediumCompactionInfoRecorder::prepare_struct_in_lock( + int64_t &update_version, + ObIAllocator *allocator, + char *&clog_buf, + int64_t &clog_len) +{ + UNUSED(update_version); + int ret = OB_SUCCESS; + clog_buf = nullptr; + clog_len = 0; + const logservice::ObLogBaseHeader log_header( + logservice::ObLogBaseType::MEDIUM_COMPACTION_LOG_BASE_TYPE, + logservice::ObReplayBarrierType::PRE_BARRIER); + + int64_t pos = 0; + char *buf = nullptr; + char *alloc_clog_buf = nullptr; + int64_t alloc_buf_offset = 0; + const int64_t buf_len = log_header.get_serialize_size() + + tablet_id_.get_serialize_size() + + serialization::encoded_length_i64(medium_info_->medium_snapshot_) + + medium_info_->get_serialize_size(); + const int64_t alloc_buf_size = buf_len + sizeof(ObTabletHandle) + sizeof(ObStorageCLogCb); + + if (OB_UNLIKELY(nullptr == medium_info_ || nullptr == allocator)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("medium_info or allocator is unexpected null", K(ret), KP_(medium_info), KP(allocator)); + } else if (buf_len >= common::OB_MAX_LOG_ALLOWED_SIZE) { // need be separated into several clogs + ret = OB_ERR_DATA_TOO_LONG; + LOG_WARN("medium info log too long", K(buf_len), LITERAL_K(common::OB_MAX_LOG_ALLOWED_SIZE)); + } else if (FALSE_IT(allocator_ = allocator)) { + } else if (OB_ISNULL(buf = static_cast(allocator_->alloc(alloc_buf_size)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret), KPC(medium_info_)); + } else { + logcb_ptr_ = new(buf) ObStorageCLogCb(*this); + alloc_buf_offset += sizeof(ObStorageCLogCb); + tablet_handle_ptr_ = new (buf + alloc_buf_offset) ObTabletHandle(); + alloc_buf_offset += sizeof(ObTabletHandle); + alloc_clog_buf = static_cast(buf) + alloc_buf_offset; + } + + if (FAILEDx(get_tablet_handle(ls_id_, tablet_id_, *tablet_handle_ptr_))) { + LOG_WARN("failed to get tablet handle", K(ret), K_(ls_id), K_(tablet_id)); + } else if (OB_FAIL(log_header.serialize(alloc_clog_buf, buf_len, pos))) { + LOG_WARN("failed to serialize log header", K(ret)); + } else if (OB_FAIL(tablet_id_.serialize(alloc_clog_buf, buf_len, pos))) { + LOG_WARN("fail to serialize tablet_id", K(ret), K_(tablet_id)); + } else if (OB_FAIL(serialization::encode_i64(alloc_clog_buf, buf_len, pos, medium_info_->medium_snapshot_))) { + LOG_WARN("fail to serialize schema version", K(ret), K_(tablet_id)); + } else if (OB_FAIL(medium_info_->serialize(alloc_clog_buf, buf_len, pos))) { + LOG_WARN("failed to serialize medium compaction info", K(ret), K(buf_len), K_(tablet_id), KPC(medium_info_)); + } + if (OB_SUCC(ret)) { + clog_buf = alloc_clog_buf; + clog_len = pos; + } else if (nullptr != buf && nullptr != allocator_) { + free_allocated_info(); + } + return ret; +} + +int ObTabletMediumCompactionInfoRecorder::submit_log( + const int64_t update_version, + const char *clog_buf, + const int64_t clog_len) +{ + UNUSED(update_version); + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == medium_info_ + || nullptr == tablet_handle_ptr_ + || !tablet_handle_ptr_->is_valid() + || nullptr == clog_buf + || clog_len <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("log handler or medium info is null", K(ret), KP(medium_info_), + KP(clog_buf), K(clog_len), K(tablet_handle_ptr_)); + } else if (FALSE_IT(medium_info_->set_sync_finish(false))) { + } else if (OB_FAIL(tablet_handle_ptr_->get_obj()->save_multi_source_data_unit( + medium_info_, share::SCN::max_scn(), + false/*for_replay*/, memtable::MemtableRefOp::INC_REF))) { + LOG_WARN("failed to save medium info", K_(tablet_id), KPC(medium_info_)); + } else if (OB_FAIL(write_clog(clog_buf, clog_len))) { + LOG_WARN("fail to submit log", K(ret), K_(tablet_id), K(medium_info_)); + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(dec_ref_on_memtable(false))) { + LOG_ERROR("failed to dec ref on memtable", K(tmp_ret), K_(ls_id), K_(tablet_id)); + } + } else { + LOG_INFO("success to submit medium log", K(ret), K_(tablet_id), K(medium_info_), K_(clog_scn), + "max_saved_version", get_max_saved_version()); + } + + return ret; } /* @@ -177,11 +719,13 @@ void ObMediumCompactionInfo::gene_info( const int64_t ObMediumCompactionInfoList::MAX_SERIALIZE_SIZE; -ObMediumCompactionInfoList::ObMediumCompactionInfoList(ObMediumListType medium_list_type) +ObMediumCompactionInfoList::ObMediumCompactionInfoList() : is_inited_(false), - medium_list_type_(medium_list_type), - cur_medium_snapshot_(0), - allocator_(nullptr) + allocator_(nullptr), + compat_(MEDIUM_LIST_VERSION), + last_compaction_type_(0), + reserved_(0), + wait_check_medium_scn_(0) { } @@ -197,45 +741,98 @@ int ObMediumCompactionInfoList::init(common::ObIAllocator &allocator) ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret)); } else { + compat_ = MEDIUM_LIST_VERSION; allocator_ = &allocator; is_inited_ = true; } return ret; } +// MINI: dump_list is from memtable +// finish_medium_scn = last_major_scn +// init_by_ha = true: need force set wait_check = finish_scn +// if wait_check=0 after restore, report_scn don't will be updated by leader int ObMediumCompactionInfoList::init(common::ObIAllocator &allocator, - const ObMediumCompactionInfoList *input_list) + const ObMediumCompactionInfoList *old_list, + const ObMediumCompactionInfoList *dump_list, + const int64_t finish_medium_scn/*= 0*/, + const bool update_in_major_type_merge/*= false*/) { int ret = OB_SUCCESS; if (IS_INIT) { ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret)); } else if (FALSE_IT(allocator_ = &allocator)) { - } else if (nullptr == input_list) { - // do nothing - } else if (OB_FAIL(append_list_with_deep_copy(*input_list))) { - LOG_WARN("failed to deep copy list", K(ret), K(input_list)); + } else if (nullptr != old_list && OB_FAIL(append_list_with_deep_copy(finish_medium_scn, update_in_major_type_merge, *old_list))) { + LOG_WARN("failed to deep copy list", K(ret), K(old_list)); + } else if (nullptr != dump_list && OB_FAIL(append_list_with_deep_copy(finish_medium_scn, update_in_major_type_merge, *dump_list))) { + LOG_WARN("failed to deep copy list", K(ret), K(dump_list)); } else { - cur_medium_snapshot_ = input_list->get_cur_medium_snapshot(); + // if update_in_major_type_merge = true, will update wait_check_medium_scn in delete_medium_compaction_info + if (!update_in_major_type_merge && nullptr != old_list) { + last_compaction_type_ = old_list->last_compaction_type_; + wait_check_medium_scn_ = old_list->get_wait_check_medium_scn(); + } } if (OB_SUCC(ret)) { + compat_ = MEDIUM_LIST_VERSION; is_inited_ = true; + if (medium_info_list_.get_size() > 0 || wait_check_medium_scn_ > 0) { + LOG_INFO("success to init list", K(ret), KPC(this), KPC(old_list)); + } } else if (OB_UNLIKELY(!is_inited_)) { reset(); } return ret; } +int ObMediumCompactionInfoList::init_after_check_finish( + ObIAllocator &allocator, + const ObMediumCompactionInfoList &old_list) // list from old_tablet +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_UNLIKELY(!old_list.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(old_list)); + } else if (FALSE_IT(allocator_ = &allocator)) { + } else if (OB_FAIL(append_list_with_deep_copy(wait_check_medium_scn_, false, old_list))) { + LOG_WARN("failed to deep copy list", K(ret), K(wait_check_medium_scn_)); + } else { + last_compaction_type_ = old_list.last_compaction_type_; + wait_check_medium_scn_ = 0; // update after check finished, should reset wait_check_medium_scn + compat_ = MEDIUM_LIST_VERSION; + is_inited_ = true; + LOG_INFO("success to init list", K(ret), KPC(this), K(old_list)); + } + if (OB_UNLIKELY(!is_inited_)) { + reset(); + } + return ret; +} + + +void ObMediumCompactionInfoList::reset_list() +{ + DLIST_REMOVE_ALL_NORET(info, medium_info_list_) { + static_cast(info)->~ObMediumCompactionInfo(); + allocator_->free(info); + } + medium_info_list_.reset(); +} + void ObMediumCompactionInfoList::reset() { if (OB_NOT_NULL(allocator_)) { - DLIST_REMOVE_ALL_NORET(info, medium_info_list_) { - info->~ObMediumCompactionInfo(); - allocator_->free(info); - } + reset_list(); medium_info_list_.reset(); } is_inited_ = false; + info_ = 0; + wait_check_medium_scn_ = 0; allocator_ = nullptr; } @@ -251,7 +848,7 @@ int ObMediumCompactionInfoList::add_medium_compaction_info(const ObMediumCompact return ret; } -int ObMediumCompactionInfoList::get_specified_snapshot_info( +int ObMediumCompactionInfoList::get_specified_scn_info( const int64_t snapshot, const ObMediumCompactionInfo *&ret_info) const { @@ -264,13 +861,15 @@ int ObMediumCompactionInfoList::get_specified_snapshot_info( } else if (OB_UNLIKELY(snapshot <= 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(snapshot)); - } else { + } else if (snapshot <= get_max_medium_snapshot()) { + const ObMediumCompactionInfo *cur_info = nullptr; DLIST_FOREACH_X(info, medium_info_list_, OB_SUCC(ret)) { + cur_info = static_cast(info); if (OB_UNLIKELY(!info->is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("invalid medium info", K(ret), KPC(info)); - } else if (snapshot == info->medium_snapshot_) { - ret_info = info; + } else if (snapshot == cur_info->medium_snapshot_) { + ret_info = cur_info; break; } } @@ -281,16 +880,18 @@ int ObMediumCompactionInfoList::get_specified_snapshot_info( return ret; } -int ObMediumCompactionInfoList::save_medium_compaction_info(const ObMediumCompactionInfoList &input_list) +int64_t ObMediumCompactionInfoList::get_schedule_scn(const int64_t major_compaction_scn) const { - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("list is not init", K(ret)); - } else if (OB_FAIL(append_list_with_deep_copy(input_list))) { - LOG_WARN("failed to deep copy list", K(ret), K(input_list)); + int64_t ret_scn = 0; + if (size() > 0) { + const ObMediumCompactionInfo *first_medium_info = get_first_medium_info(); + if (first_medium_info->is_medium_compaction() + || (first_medium_info->is_major_compaction() && major_compaction_scn >= first_medium_info->medium_snapshot_)) { + // for standby cluster, receive several medium info, only schedule what scheduler have received + ret_scn = first_medium_info->medium_snapshot_; + } } - return ret; + return ret_scn; } int ObMediumCompactionInfoList::inner_deep_copy_node( @@ -300,10 +901,8 @@ int ObMediumCompactionInfoList::inner_deep_copy_node( ObMediumCompactionInfo *new_info = nullptr; void *alloc_buf = nullptr; - if (OB_UNLIKELY(medium_info_list_.get_size() > 0 - && medium_info_list_.get_last()->medium_snapshot_ >= input_info.medium_snapshot_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("input medium info is invalid for list", K(ret), K(medium_info_list_), KPC(new_info)); + if (get_max_medium_snapshot() >= input_info.medium_snapshot_) { + // do nothing } else if (OB_ISNULL(alloc_buf = allocator_->alloc(sizeof(ObMediumCompactionInfo)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("failed to alloc memory", K(ret)); @@ -313,8 +912,11 @@ int ObMediumCompactionInfoList::inner_deep_copy_node( } else if (OB_UNLIKELY(!medium_info_list_.add_last(new_info))) { ret = OB_ERR_SYS; LOG_WARN("failed to add into medium info list", K(ret), KPC(new_info)); + } else if (OB_UNLIKELY(!inner_is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("medium info list is invalid", K(ret), KPC(this)); } else { - LOG_DEBUG("success to deep copy apeend medium info", K(ret), KPC(new_info)); + LOG_INFO("success to deep copy append medium info", K(ret), KPC(new_info)); } if (OB_FAIL(ret) && nullptr != new_info) { @@ -328,17 +930,23 @@ int ObMediumCompactionInfoList::inner_deep_copy_node( int ObMediumCompactionInfoList::serialize(char *buf, const int64_t buf_len, int64_t &pos) const { int ret = OB_SUCCESS; + int64_t new_pos = pos; if (OB_UNLIKELY(nullptr == buf || buf_len <= 0 || pos < 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret), K(buf), K(buf_len), K(pos)); - } else if (OB_FAIL(serialization::encode_vi64(buf, buf_len, new_pos, cur_medium_snapshot_))) { - STORAGE_LOG(WARN, "failed to serialize cur medium snapshot", K(ret), K(buf_len), K(pos)); + } else if (OB_UNLIKELY(!is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("medium info list is invalid", K(ret), KPC(this)); + } else if (OB_FAIL(serialization::encode_vi64(buf, buf_len, new_pos, info_))) { + STORAGE_LOG(WARN, "failed to serialize info", K(ret), K(buf_len), K(pos)); + } else if (OB_FAIL(serialization::encode_vi64(buf, buf_len, new_pos, wait_check_medium_scn_))) { + STORAGE_LOG(WARN, "failed to serialize wait_check_medium_scn", K(ret), K(buf_len), K(pos)); } else if (OB_FAIL(serialization::encode_vi64(buf, buf_len, new_pos, medium_info_list_.get_size()))) { LOG_WARN("failed to serialize medium status", K(ret), K(buf_len)); } else { DLIST_FOREACH_X(info, medium_info_list_, OB_SUCC(ret)) { - if (OB_FAIL(info->serialize(buf, buf_len, new_pos))) { + if (OB_FAIL(static_cast(info)->serialize(buf, buf_len, new_pos))) { LOG_WARN("failed to serialize medium compaction info", K(ret), K(buf), K(buf_len), K(new_pos), KPC(info)); } else { LOG_DEBUG("success to serialize medium info", K(ret), KPC(info)); @@ -359,6 +967,7 @@ int ObMediumCompactionInfoList::deserialize( { int ret = OB_SUCCESS; int64_t new_pos = pos; + int64_t deserialize_info = 0; int64_t list_count = 0; if (IS_INIT) { ret = OB_INIT_TWICE; @@ -366,10 +975,20 @@ int ObMediumCompactionInfoList::deserialize( } else if (OB_UNLIKELY(nullptr == buf || data_len <= 0 || pos < 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret), K(buf), K(data_len), K(pos)); - } else if (OB_FAIL(serialization::decode_vi64(buf, data_len, new_pos, &cur_medium_snapshot_))) { + } else if (OB_FAIL(serialization::decode_vi64(buf, data_len, new_pos, &deserialize_info))) { LOG_WARN("failed to deserialize cur medium snapshot", K(ret), K(data_len)); + } else if (0 == deserialize_info) { + if (OB_FAIL(serialization::decode_vi64(buf, data_len, new_pos, &list_count))) { + LOG_WARN("failed to deserialize list count", K(ret), K(data_len)); + } else if (OB_UNLIKELY(0 != list_count)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("list count should be zero in old version medium list", K(ret), K(list_count)); + } + } else if (FALSE_IT(info_ = deserialize_info)) { + } else if (OB_FAIL(serialization::decode_vi64(buf, data_len, new_pos, &wait_check_medium_scn_))) { + LOG_WARN("failed to deserialize wait_check_medium_scn", K(ret), K(data_len)); } else if (OB_FAIL(serialization::decode_vi64(buf, data_len, new_pos, &list_count))) { - LOG_WARN("failed to serialize medium status", K(ret), K(data_len)); + LOG_WARN("failed to deserialize list count", K(ret), K(data_len)); } else if (OB_UNLIKELY(list_count < 0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected list count", K(ret), K(list_count)); @@ -397,8 +1016,13 @@ int ObMediumCompactionInfoList::deserialize( } } // end of for } - if (OB_SUCC(ret)) { + if (OB_FAIL(ret)) { + } else if (OB_UNLIKELY(!inner_is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("medium info list is invalid", K(ret), KPC(this)); + } else { allocator_ = &allocator; + compat_ = MEDIUM_LIST_VERSION; is_inited_ = true; pos = new_pos; } @@ -408,10 +1032,11 @@ int ObMediumCompactionInfoList::deserialize( int64_t ObMediumCompactionInfoList::get_serialize_size() const { int64_t len = 0; - len += serialization::encoded_length_vi64(cur_medium_snapshot_); + len += serialization::encoded_length_vi64(info_); + len += serialization::encoded_length_vi64(wait_check_medium_scn_); len += serialization::encoded_length_vi64(medium_info_list_.get_size()); DLIST_FOREACH_NORET(info, medium_info_list_){ - len += info->get_serialize_size(); + len += static_cast(info)->get_serialize_size(); } return len; } @@ -423,16 +1048,16 @@ void ObMediumCompactionInfoList::gene_info( // do nothing } else { J_OBJ_START(); - J_KV("size", size()); + J_KV("size", size(), K_(info), K_(wait_check_medium_scn)); J_COMMA(); BUF_PRINTF("info_list"); J_COLON(); J_OBJ_START(); if (size() > 0) { int i = 0; - DLIST_FOREACH_NORET(info, medium_info_list_){ + DLIST_FOREACH_NORET(info, medium_info_list_) { BUF_PRINTF("[%d]:", i++); - info->gene_info(buf, buf_len, pos); + static_cast(info)->gene_info(buf, buf_len, pos); if (i != size()) { BUF_PRINTF(";"); } diff --git a/src/storage/compaction/ob_medium_compaction_mgr.h b/src/storage/compaction/ob_medium_compaction_mgr.h index 5874a4fadc..3bc51afecf 100644 --- a/src/storage/compaction/ob_medium_compaction_mgr.h +++ b/src/storage/compaction/ob_medium_compaction_mgr.h @@ -14,28 +14,73 @@ #define OB_STORAGE_COMPACTION_MEDIUM_COMPACTION_MGR_H_ #include "storage/ob_storage_schema.h" +#include "storage/ob_storage_clog_recorder.h" +#include "lib/container/ob_array_array.h" +#include "storage/meta_mem/ob_tablet_handle.h" +#include "storage/compaction/ob_partition_merge_policy.h" namespace oceanbase { +namespace storage +{ +class ObTablet; +} namespace compaction { -/* - * TODO (@lixia.yq) add parallel merge info struct ObParallelMergeInfo { - int64_t concurrent_cnt; - ObDatumRowkey parallel_end_key[]; // concurrent_cnt - 1 -}; -*/ +public: + ObParallelMergeInfo() + : parallel_info_(0), + parallel_end_key_list_(nullptr), + allocator_(nullptr) + {} + ~ObParallelMergeInfo() { destroy(); } // attention!!! use destroy to free memory + int init(common::ObIAllocator &allocator, const ObParallelMergeInfo &other); + void destroy(); + bool is_valid() const + { + return list_size_ == 0 || nullptr != parallel_end_key_list_; + } -struct ObMediumCompactionInfo : public common::ObDLinkBase + // serialize & deserialize + int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; + int deserialize( + common::ObIAllocator &allocator, + const char *buf, + const int64_t data_len, + int64_t &pos); + int64_t get_serialize_size() const; + + int generate_from_range_array( + ObIAllocator &allocator, + common::ObArrayArray ¶l_range); + + int64_t to_string(char* buf, const int64_t buf_len) const; + static const int64_t MAX_PARALLEL_RANGE_SERIALIZE_LEN = 1 * 1024 * 1024; + static const int64_t VALID_CONCURRENT_CNT = 1; + + union { + uint32_t parallel_info_; + struct { + uint32_t compat_ : 4; + uint32_t list_size_ : 8; + uint32_t reserved_ : 20; + }; + }; + ObStoreRowkey *parallel_end_key_list_; // concurrent_cnt - 1 + + ObIAllocator *allocator_; +}; + +struct ObMediumCompactionInfo : public memtable::ObIMultiSourceDataUnit { public: enum ObCompactionType { - MAJOR_COMPACTION = 0, - MEDIUM_COMPACTION = 1, + MEDIUM_COMPACTION = 0, + MAJOR_COMPACTION = 1, COMPACTION_TYPE_MAX, }; const static char *ObCompactionTypeStr[]; @@ -45,9 +90,33 @@ public: ~ObMediumCompactionInfo(); int init(ObIAllocator &allocator, const ObMediumCompactionInfo &medium_info); - bool is_valid() const; - void reset(); int save_storage_schema(ObIAllocator &allocator, const storage::ObStorageSchema &storage_schema); + int gene_parallel_info( + ObIAllocator &allocator, + common::ObArrayArray ¶l_range); + static inline bool is_medium_compaction(const ObCompactionType type) { return MEDIUM_COMPACTION == type; } + static inline bool is_major_compaction(const ObCompactionType type) { return MAJOR_COMPACTION == type; } + inline bool is_major_compaction() const { return is_major_compaction((ObCompactionType)compaction_type_); } + inline bool is_medium_compaction() const { return is_medium_compaction((ObCompactionType)compaction_type_); } + inline void clear_parallel_range() + { + parallel_merge_info_.list_size_ = 0; + parallel_merge_info_.parallel_end_key_list_ = nullptr; + contain_parallel_range_ = false; + } + + // ObIMultiSourceDataUnit section + virtual int deep_copy(const ObIMultiSourceDataUnit *src, ObIAllocator *allocator) override; + virtual void reset() override; + virtual bool is_valid() const override; + virtual inline int64_t get_data_size() const override { return sizeof(ObMediumCompactionInfo); } + virtual inline memtable::MultiSourceDataUnitType type() const override + { + return memtable::MultiSourceDataUnitType::MEDIUM_COMPACTION_INFO; + } + virtual int64_t get_version() const override { return medium_snapshot_; } + virtual bool is_save_last() const override { return false; } + bool from_cur_cluster() const { return cluster_id_ == GCONF.cluster_id; } // serialize & deserialize int serialize(char *buf, const int64_t buf_len, int64_t &pos) const; @@ -59,16 +128,17 @@ public: int64_t get_serialize_size() const; void gene_info(char* buf, const int64_t buf_len, int64_t &pos) const; - TO_STRING_KV(K_(medium_compat_version), + TO_STRING_KV(K_(cluster_id), K_(medium_compat_version), "compaction_type", ObMediumCompactionInfo::get_compaction_type_str((ObCompactionType)compaction_type_), - K_(cluster_id), K_(medium_snapshot), K_(medium_log_ts), K_(is_schema_changed), - K_(storage_schema)); + "medium_merge_reason", ObAdaptiveMergePolicy::merge_reason_to_str(medium_merge_reason_), K_(cluster_id), + K_(medium_snapshot), K_(medium_scn), K_(storage_schema), + K_(contain_parallel_range), K_(parallel_merge_info)); public: static const int64_t MEIDUM_COMPAT_VERSION = 1; private: static const int32_t SCS_ONE_BIT = 1; - static const int32_t SCS_RESERVED_BITS = 57; + static const int32_t SCS_RESERVED_BITS = 49; public: union { @@ -76,65 +146,131 @@ public: struct { uint64_t medium_compat_version_ : 4; uint64_t compaction_type_ : 2; - uint64_t is_schema_changed_ : SCS_ONE_BIT; // TODO for progressive merge + uint64_t contain_parallel_range_ : SCS_ONE_BIT; + uint64_t medium_merge_reason_ : 8; uint64_t reserved_ : SCS_RESERVED_BITS; }; }; + uint64_t cluster_id_; // for backup database to throw MEDIUM_COMPACTION clog int64_t medium_snapshot_; - int64_t medium_log_ts_; // for follower minor merge + share::SCN medium_scn_; // for follower minor merge storage::ObStorageSchema storage_schema_; - //TODO(@lixia.yq) ObParallelMergeInfo parallel_merge_info; + ObParallelMergeInfo parallel_merge_info_; +}; + +class ObTabletMediumCompactionInfoRecorder : public storage::ObIStorageClogRecorder +{ +public: + ObTabletMediumCompactionInfoRecorder(); + ~ObTabletMediumCompactionInfoRecorder(); + int init( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const int64_t max_saved_version, + logservice::ObLogHandler *log_handler); + virtual void destroy() override; + void reset(); + bool is_inited() const { return is_inited_; } + // for leader + int submit_medium_compaction_info(ObMediumCompactionInfo &medium_info, ObIAllocator &allocator); + // follower + int replay_medium_compaction_log(const share::SCN &scn, const char *buf, const int64_t size, int64_t &pos); + +private: + virtual int inner_replay_clog( + const int64_t update_version, + const share::SCN &scn, + const char *buf, + const int64_t size, + int64_t &pos) override; virtual int sync_clog_succ_for_leader(const int64_t update_version) override; + virtual void sync_clog_failed_for_leader() override; + + virtual int prepare_struct_in_lock( + int64_t &update_version, + ObIAllocator *allocator, + char *&clog_buf, + int64_t &clog_len) override; + virtual int submit_log( + const int64_t update_version, + const char *clog_buf, + const int64_t clog_len) override; + virtual void free_struct_in_lock() override + { + free_allocated_info(); + } + void free_allocated_info(); + OB_INLINE int dec_ref_on_memtable(const bool sync_finish); +private: + bool is_inited_; + bool ignore_medium_; + share::ObLSID ls_id_; + ObTabletID tablet_id_; + storage::ObTabletHandle *tablet_handle_ptr_; + ObMediumCompactionInfo *medium_info_; + common::ObIAllocator *allocator_; }; class ObMediumCompactionInfoList { public: - enum ObMediumListType - { - MEDIUM_LIST_IN_MEMORY = 0, - MEDIUM_LIST_IN_STORAGE = 1, - MEDIUM_LIST_MAX, - }; - -public: - ObMediumCompactionInfoList(ObMediumListType medium_list_type); + ObMediumCompactionInfoList(); ~ObMediumCompactionInfoList(); - typedef common::ObDList MediumInfoList; + typedef memtable::ObIMultiSourceDataUnit BasicNode; + typedef common::ObDList MediumInfoList; int init(common::ObIAllocator &allocator); int init(common::ObIAllocator &allocator, - const ObMediumCompactionInfoList *old_list); + const ObMediumCompactionInfoList *old_list, + const ObMediumCompactionInfoList *dump_list = nullptr, + const int64_t finish_medium_scn = 0, + const bool update_in_major_type_merge = false); + + int init_after_check_finish( + ObIAllocator &allocator, + const ObMediumCompactionInfoList &old_list); void reset(); OB_INLINE bool is_empty() const { return 0 == medium_info_list_.get_size(); } OB_INLINE int64_t size() const { return medium_info_list_.get_size(); } - bool is_valid() const + OB_INLINE bool is_valid() const { - return cur_medium_snapshot_ >= 0 && size() >= 0 - && (MEDIUM_LIST_IN_MEMORY == medium_list_type_ - || (MEDIUM_LIST_IN_STORAGE == medium_list_type_ && size() <= MAX_SERIALIZE_SIZE)); + return is_inited_ && inner_is_valid(); } int add_medium_compaction_info(const ObMediumCompactionInfo &input_info); - int save_medium_compaction_info(const ObMediumCompactionInfoList &input_list); - const MediumInfoList &get_list() const { return medium_info_list_; } - int64_t get_cur_medium_snapshot() const { return cur_medium_snapshot_; } + OB_INLINE const MediumInfoList &get_list() const { return medium_info_list_; } + OB_INLINE int64_t get_wait_check_medium_scn() const { return wait_check_medium_scn_; } + OB_INLINE bool need_check_finish() const { return 0 != wait_check_medium_scn_; } + // check status on serialized medium list + OB_INLINE bool could_schedule_next_round() const + { + return 0 == wait_check_medium_scn_ && medium_info_list_.is_empty(); + } + OB_INLINE ObMediumCompactionInfo::ObCompactionType get_last_compaction_type() const + { + return (ObMediumCompactionInfo::ObCompactionType)last_compaction_type_; + } + int64_t get_schedule_scn(const int64_t major_compaction_scn) const; - int get_specified_snapshot_info( + int get_specified_scn_info( const int64_t snapshot, const ObMediumCompactionInfo *&compaction_info) const; OB_INLINE int64_t get_max_medium_snapshot() const { - return 0 == size() ? 0 : medium_info_list_.get_last()->medium_snapshot_; + return is_empty() ? 0 : static_cast(medium_info_list_.get_last())->medium_snapshot_; } - static bool need_select_inner_table_to_decide(const int64_t total_medium_info_size) + OB_INLINE int64_t get_min_medium_snapshot() const { - return total_medium_info_size > MAX_SERIALIZE_SIZE; + return is_empty() ? -1 : static_cast(medium_info_list_.get_first())->medium_snapshot_; + } + const ObMediumCompactionInfo *get_first_medium_info() const + { + return is_empty() ? nullptr : static_cast(medium_info_list_.get_first()); } // serialize & deserialize @@ -147,32 +283,61 @@ public: int64_t get_serialize_size() const; void gene_info(char* buf, const int64_t buf_len, int64_t &pos) const; - TO_STRING_KV(K_(is_inited), K_(medium_list_type), K_(cur_medium_snapshot), + + TO_STRING_KV(K_(is_inited), K_(info), K_(last_compaction_type), K_(wait_check_medium_scn), "list_size", size(), K_(medium_info_list)); private: - OB_INLINE int append_list_with_deep_copy(const ObMediumCompactionInfoList &input_list) + void reset_list(); + OB_INLINE bool inner_is_valid() const + { + return last_compaction_type_ < ObMediumCompactionInfo::COMPACTION_TYPE_MAX + && wait_check_medium_scn_ >= 0 && size() >= 0; + } + OB_INLINE int append_list_with_deep_copy( + const int64_t finish_scn, + const bool update_in_major_type_merge, + const ObMediumCompactionInfoList &input_list) { int ret = OB_SUCCESS; DLIST_FOREACH_X(input_info, input_list.medium_info_list_, OB_SUCC(ret)) { - ret = inner_deep_copy_node(*input_info); + const ObMediumCompactionInfo *medium_info = static_cast(input_info); + if (update_in_major_type_merge + && medium_info->medium_snapshot_ == finish_scn) { + last_compaction_type_ = medium_info->compaction_type_; + wait_check_medium_scn_ = finish_scn; + } + if (medium_info->medium_snapshot_ > finish_scn) { + ret = inner_deep_copy_node(*medium_info); + } } return ret; } int inner_deep_copy_node(const ObMediumCompactionInfo &medium_info); private: + static const int64_t MEDIUM_LIST_VERSION = 1; static const int64_t MAX_SERIALIZE_SIZE = 2; + static const int32_t MEDIUM_LIST_INFO_RESERVED_BITS = 52; private: bool is_inited_; - ObMediumListType medium_list_type_; // no need to serialize - int64_t cur_medium_snapshot_; common::ObIAllocator *allocator_; + + // need serialize + union { + int64_t info_; + struct { + int64_t compat_ : 8; + int64_t last_compaction_type_ : 4; // check inner_table when last_compaction is major + int64_t reserved_ : MEDIUM_LIST_INFO_RESERVED_BITS; + }; + }; + int64_t wait_check_medium_scn_; + MediumInfoList medium_info_list_; }; - } // namespace compaction } // namespace oceanbase diff --git a/src/storage/compaction/ob_partition_merge_fuser.cpp b/src/storage/compaction/ob_partition_merge_fuser.cpp index 21e6b1951d..ea1dab2a6d 100644 --- a/src/storage/compaction/ob_partition_merge_fuser.cpp +++ b/src/storage/compaction/ob_partition_merge_fuser.cpp @@ -75,7 +75,8 @@ int ObIPartitionMergeFuser::init(const ObMergeParameter &merge_param) STORAGE_LOG(WARN, "Failed to base init", K(ret), K(*this)); } else { is_inited_ = true; - STORAGE_LOG(DEBUG, "Succ to init partition fuser", K(ret), K(*this)); + STORAGE_LOG(DEBUG, "Succ to init partition fuser", K(ret), K(*this), + "merge_range", merge_param.merge_range_); } return ret; } @@ -98,9 +99,6 @@ int ObIPartitionMergeFuser::check_merge_param(const ObMergeParameter &merge_para if (OB_UNLIKELY(!merge_param.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "Invalid argument to init ObIPartitionMergeFuser", K(merge_param), K(ret)); - } else if (OB_UNLIKELY(!merge_param.is_schema_valid())) { - ret = OB_INVALID_ARGUMENT; - STORAGE_LOG(WARN, "schema is invalid", K(ret), K(merge_param)); } else if (OB_FAIL(inner_check_merge_param(merge_param))) { STORAGE_LOG(WARN, "Unexcepted merge param to init merge fuser", K(merge_param), K(ret)); } @@ -151,21 +149,15 @@ ObMajorPartitionMergeFuser::~ObMajorPartitionMergeFuser() void ObMajorPartitionMergeFuser::reset() { default_row_.reset(); - table_schema_ = NULL; generated_cols_.reset(); ObIPartitionMergeFuser::reset(); } -bool ObMajorPartitionMergeFuser::is_valid() const -{ - return ObIPartitionMergeFuser::is_valid() && OB_NOT_NULL(table_schema_); -} - int ObMajorPartitionMergeFuser::inner_check_merge_param(const ObMergeParameter &merge_param) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!merge_param.is_major_merge())) { + if (OB_UNLIKELY(!is_major_merge_type(merge_param.merge_type_))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected merge param with major fuser", K(merge_param), K(ret)); } else { @@ -173,7 +165,7 @@ int ObMajorPartitionMergeFuser::inner_check_merge_param(const ObMergeParameter & if (NULL == first_table) { ret = OB_ERR_SYS; LOG_ERROR("first table must not null", K(ret), K(merge_param)); - } else if (!first_table->is_major_sstable() && !first_table->is_buf_minor_sstable()) { + } else if (!first_table->is_major_sstable() && !first_table->is_meta_major_sstable()) { ret = OB_ERR_SYS; LOG_ERROR("invalid first table type", K(ret), K(*first_table)); } @@ -191,33 +183,36 @@ int ObMajorPartitionMergeFuser::inner_init(const ObMergeParameter &merge_param) STORAGE_LOG(WARN, "ObIPartitionMergeFuser init twice", K(ret)); } else if (OB_FAIL(default_row_.init(allocator_, multi_version_column_ids_.count()))) { STORAGE_LOG(WARN, "Failed to init datum row", K(ret)); - } else if (OB_FAIL(merge_param.table_schema_->get_orig_default_row(multi_version_column_ids_, default_row_))) { + } else if (OB_FAIL(merge_param.merge_schema_->get_orig_default_row(multi_version_column_ids_, default_row_))) { STORAGE_LOG(WARN, "Failed to get default row from table schema", K(ret)); } else if (OB_FAIL(ObLobManager::fill_lob_header(allocator_, multi_version_column_ids_, default_row_))) { STORAGE_LOG(WARN, "fail to fill lob header for default row", K(ret)); } else { default_row_.row_flag_.set_flag(ObDmlFlag::DF_UPDATE); - table_schema_ = merge_param.table_schema_; column_cnt_ = multi_version_column_ids_.count(); } if (FAILEDx(generated_cols_.init(column_cnt_))) { LOG_WARN("Fail to init generated_cols", K(ret), K(column_cnt_)); } - if (OB_SUCC(ret) && !merge_param.table_schema_->is_materialized_view()) { + if (OB_SUCC(ret) && !merge_param.merge_schema_->is_materialized_view()) { const ObColumnSchemaV2 *column_schema = NULL; for (int64_t i = 0; OB_SUCC(ret) && i < multi_version_column_ids_.count(); ++i) { if (OB_HIDDEN_TRANS_VERSION_COLUMN_ID == multi_version_column_ids_.at(i).col_id_ || OB_HIDDEN_SQL_SEQUENCE_COLUMN_ID == multi_version_column_ids_.at(i).col_id_) { // continue; - } else if (OB_ISNULL(column_schema = merge_param.table_schema_->get_column_schema( + } else { + const ObStorageSchema *storage_schema = static_cast(merge_param.merge_schema_); + const ObStorageColumnSchema *column_schema = NULL; + if (OB_ISNULL(column_schema = storage_schema->get_column_schema( multi_version_column_ids_.at(i).col_id_))) { - ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(WARN, "The column schema is NULL", K(ret), K(i), K(multi_version_column_ids_.at(i))); - } else if (column_schema->is_generated_column() - && !merge_param.table_schema_->is_storage_index_table()) { - // the generated columns in index are always filled before insert - if (OB_FAIL(generated_cols_.push_back(i))) { - LOG_WARN("Fail to push_back generated_cols", K(ret)); + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "The column schema is NULL", K(ret), K(i), K(multi_version_column_ids_.at(i))); + } else if (column_schema->is_generated_column() + && !merge_param.merge_schema_->is_storage_index_table()) { + // the generated columns in index are always filled before insert + if (OB_FAIL(generated_cols_.push_back(i))) { + LOG_WARN("Fail to push_back generated_cols", K(ret)); + } } } } @@ -247,7 +242,8 @@ int ObMajorPartitionMergeFuser::fuse_row(MERGE_ITER_ARRAY ¯o_row_iters) result_row_, nop_pos_, final_result))) { STORAGE_LOG(WARN, "Failed to fuse row", K(ret)); } else { - STORAGE_LOG(DEBUG, "success to fuse row", K(ret), K(result_row_)); + STORAGE_LOG(DEBUG, "success to fuse row", K(ret), K(i), + "curr_row", *macro_row_iters.at(i)->get_curr_row(), K(result_row_)); } } } @@ -340,16 +336,16 @@ int ObMajorPartitionMergeFuser::fuse_old_row(ObPartitionMergeIter *row_iter, blo } /* - *ObBufPartitionMergeFuser + *ObMetaPartitionMergeFuser */ -int ObBufPartitionMergeFuser::inner_check_merge_param(const ObMergeParameter &merge_param) +int ObMetaPartitionMergeFuser::inner_check_merge_param(const ObMergeParameter &merge_param) { int ret = OB_SUCCESS; - if (BUF_MINOR_MERGE != merge_param.merge_type_ + if (META_MAJOR_MERGE != merge_param.merge_type_ || merge_param.version_range_.multi_version_start_ != merge_param.version_range_.snapshot_version_) { ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(ERROR, "Unexpected merge param with buf fuser", K(ret), K(merge_param)); + STORAGE_LOG(ERROR, "Unexpected merge param with meta fuser", K(ret), K(merge_param)); } return ret; @@ -378,7 +374,7 @@ int ObMinorPartitionMergeFuser::inner_check_merge_param(const ObMergeParameter & { int ret = OB_SUCCESS; - if (OB_UNLIKELY(merge_param.is_major_merge())) { + if (OB_UNLIKELY(is_major_merge_type(merge_param.merge_type_))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected merge param with major fuser", K(merge_param), K(ret)); } else { diff --git a/src/storage/compaction/ob_partition_merge_fuser.h b/src/storage/compaction/ob_partition_merge_fuser.h index 8c9dd95d70..842f56fcd2 100644 --- a/src/storage/compaction/ob_partition_merge_fuser.h +++ b/src/storage/compaction/ob_partition_merge_fuser.h @@ -88,16 +88,13 @@ public: ObMajorPartitionMergeFuser() : ObIPartitionMergeFuser(), default_row_(), - table_schema_(NULL), generated_cols_(allocator_) {} virtual ~ObMajorPartitionMergeFuser(); virtual void reset() override; - virtual bool is_valid() const override; virtual int fuse_row(MERGE_ITER_ARRAY ¯o_row_iters) override; virtual const char *get_fuser_name() const override { return "ObMajorPartitionMergeFuser"; } - INHERIT_TO_STRING_KV("ObIPartitionMergeFuser", ObIPartitionMergeFuser, - K_(default_row), KP_(table_schema)); + INHERIT_TO_STRING_KV("ObIPartitionMergeFuser", ObIPartitionMergeFuser, K_(default_row)); protected: virtual int inner_check_merge_param(const ObMergeParameter &merge_param); virtual int inner_init(const ObMergeParameter &merge_param) override; @@ -107,24 +104,23 @@ protected: const int64_t rowkey_column_cnt) override; protected: blocksstable::ObDatumRow default_row_; - const share::schema::ObTableSchema *table_schema_; ObFixedArray generated_cols_; private: DISALLOW_COPY_AND_ASSIGN(ObMajorPartitionMergeFuser); }; -class ObBufPartitionMergeFuser : public ObMajorPartitionMergeFuser +class ObMetaPartitionMergeFuser : public ObMajorPartitionMergeFuser { public: - ObBufPartitionMergeFuser() {} - virtual ~ObBufPartitionMergeFuser() {} - virtual const char *get_fuser_name() const override { return "ObBufPartitionMergeFuser"; } + ObMetaPartitionMergeFuser() {} + virtual ~ObMetaPartitionMergeFuser() {} + virtual const char *get_fuser_name() const override { return "ObMetaPartitionMergeFuser"; } INHERIT_TO_STRING_KV("ObMajorPartitionMergeFuser", ObMajorPartitionMergeFuser, - "cur_fuser", "ObBufPartitionMergeFuser"); + "cur_fuser", "ObMetaPartitionMergeFuser"); protected: virtual int inner_check_merge_param(const ObMergeParameter &merge_param) override; private: - DISALLOW_COPY_AND_ASSIGN(ObBufPartitionMergeFuser); + DISALLOW_COPY_AND_ASSIGN(ObMetaPartitionMergeFuser); }; diff --git a/src/storage/compaction/ob_partition_merge_iter.cpp b/src/storage/compaction/ob_partition_merge_iter.cpp index 2495c94c31..2bba2eb001 100644 --- a/src/storage/compaction/ob_partition_merge_iter.cpp +++ b/src/storage/compaction/ob_partition_merge_iter.cpp @@ -105,7 +105,7 @@ int ObPartitionMergeIter::init_query_base_params(const ObMergeParameter &merge_p true))) { LOG_WARN("Fail to init read_info", K(ret)); } else if (OB_FAIL(access_param_.init_merge_param(tablet_id_.id(), tablet_id_, - read_info_, merge_param.is_multi_version_minor_merge()))) { + read_info_, is_multi_version_merge(merge_param.merge_type_)))) { LOG_WARN("Failed to init table access param", K(ret), KPC(this)); } else if (OB_FAIL(snapshot_version.convert_for_tx(merge_param.version_range_.snapshot_version_))) { LOG_WARN("Failed to convert", K(ret), K_(merge_param.version_range_.snapshot_version)); @@ -122,7 +122,7 @@ int ObPartitionMergeIter::init_query_base_params(const ObMergeParameter &merge_p false /*full row scan flag, obsoleted*/, false,/*index back*/ false); /*query_stat*/ - query_flag.multi_version_minor_merge_ = merge_param.is_multi_version_minor_merge(); + query_flag.multi_version_minor_merge_ = is_multi_version_merge(merge_param.merge_type_); if (OB_FAIL(access_context_.init(query_flag, store_ctx_, allocator_, stmt_allocator_, merge_param.version_range_))) { LOG_WARN("Failed to init table access context", K(ret), K(query_flag)); @@ -210,6 +210,37 @@ bool ObPartitionMergeIter::is_tx_table_valid() const return bret; } +int ObPartitionMergeIter::check_merge_range_cross(ObDatumRange &data_range, bool &range_cross) +{ + int ret = OB_SUCCESS; + range_cross = false; + if (merge_range_.is_whole_range()) { + // parallel minor merge should consider open the border macro blocks + } else { + int cmp_ret = 0; + // safe to modify range of curr_macro_block with overwriting ptr only + if (OB_FAIL(merge_range_.get_start_key().compare(data_range.get_start_key(), + read_info_.get_datum_utils(), + cmp_ret))) { + STORAGE_LOG(WARN, "Failed to compare start key", K(ret), K_(merge_range), K(data_range)); + } else if (cmp_ret > 0) { + data_range.start_key_ = merge_range_.get_start_key(); + range_cross = true; + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(merge_range_.get_end_key().compare(data_range.get_end_key(), + read_info_.get_datum_utils(), + cmp_ret))) { + STORAGE_LOG(WARN, "Failed to compare end key", K(ret), K_(merge_range), K(data_range)); + } else if (cmp_ret <= 0) { + data_range.end_key_ = merge_range_.get_end_key(); + range_cross = true; + } + } + LOG_DEBUG("check macro block range cross", K(ret), K(data_range), K(merge_range_), K(range_cross)); + return ret; +} + /* * ObPartitionRowMergeIter used for major merge */ @@ -230,7 +261,7 @@ bool ObPartitionRowMergeIter::inner_check(const ObMergeParameter &merge_param) if (!table_->is_sstable()) { bret = false; LOG_WARN("Unexpected table type for major merge", KPC(table_)); - } else if (merge_param.is_multi_version_minor_merge()) { + } else if (is_multi_version_merge(merge_param.merge_type_)) { bret = false; LOG_WARN("Unexpected merge type for major merge", K(merge_param)); } @@ -315,7 +346,7 @@ void ObPartitionMacroMergeIter::reset() bool ObPartitionMacroMergeIter::inner_check(const ObMergeParameter &merge_param) { bool bret = true; - if (OB_UNLIKELY(!merge_param.is_major_merge() && !merge_param.is_buf_minor_merge())) { + if (OB_UNLIKELY(!is_major_merge_type(merge_param.merge_type_) && !is_meta_major_merge(merge_param.merge_type_))) { bret = false; LOG_WARN("Unexpected merge type for major macro merge iter", K(bret), K(merge_param)); } else if (merge_param.merge_level_ != MACRO_BLOCK_MERGE_LEVEL) { @@ -327,7 +358,7 @@ bool ObPartitionMacroMergeIter::inner_check(const ObMergeParameter &merge_param) } else if (OB_UNLIKELY(!is_base_iter())) { bret = false; LOG_WARN("Unexpected iter idx for major macro merge iter", K(bret), K(merge_param)); - } else if (OB_UNLIKELY(!table_->is_major_sstable() && !table_->is_buf_minor_sstable())) { + } else if (OB_UNLIKELY(!table_->is_major_sstable() && !table_->is_meta_major_sstable())) { bret = false; LOG_WARN("Unexpected base table type for major macro merge iter", K(bret), KPC(table_)); } @@ -343,6 +374,18 @@ int ObPartitionMacroMergeIter::next_range() } else if (FALSE_IT(reset_macro_block_desc())) { } else if (OB_SUCC(macro_block_iter_->get_next_macro_block(curr_block_desc_))) { macro_block_opened_ = false; + bool need_open = false; + if (OB_FAIL(check_merge_range_cross(curr_block_desc_.range_, need_open))) { + LOG_WARN("failed to check range cross", K(ret), K(curr_block_desc_.range_)); + } else if (need_open) { + if (OB_FAIL(open_curr_range(false/*for rewrite*/))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to open curr range", K(ret), K(curr_block_desc_)); + } + } else { + LOG_TRACE("open macro for cross range", K(ret), K(curr_block_desc_), KPC(table_), KPC(this)); + } + } } else if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("Failed to get next macro block", K(ret)); } else { @@ -406,7 +449,6 @@ int ObPartitionMacroMergeIter::inner_init(const ObMergeParameter &merge_param) int ObPartitionMacroMergeIter::next() { int ret = OB_SUCCESS; - if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("ObPartitionMacroMergeIter is not inited", K(ret), K(*this)); @@ -502,7 +544,7 @@ bool ObPartitionMicroMergeIter::inner_check(const ObMergeParameter &merge_param) { bool bret = true; - if (OB_UNLIKELY(!merge_param.is_major_merge())) { + if (OB_UNLIKELY(!is_major_merge_type(merge_param.merge_type_) && !is_meta_major_merge(merge_param.merge_type_))) { bret = false; LOG_WARN("Unexpected merge type for major micro merge iter", K(bret), K(merge_param)); } else if (OB_UNLIKELY(merge_param.merge_level_ != MICRO_BLOCK_MERGE_LEVEL)) { @@ -514,7 +556,7 @@ bool ObPartitionMicroMergeIter::inner_check(const ObMergeParameter &merge_param) } else if (OB_UNLIKELY(!is_base_iter())) { bret = false; LOG_WARN("Unexpected iter idx for major micro merge iter", K(bret), K(merge_param)); - } else if (OB_UNLIKELY(!table_->is_major_sstable() && !table_->is_buf_minor_sstable())) { + } else if (OB_UNLIKELY(!table_->is_major_sstable() && !table_->is_meta_major_sstable())) { bret = false; LOG_WARN("Unexpected base table type for major macro merge iter", K(bret), KPC(table_)); } @@ -599,7 +641,6 @@ int ObPartitionMicroMergeIter::next_range() } else { iter_end_ = true; } - return ret; } @@ -620,7 +661,7 @@ int ObPartitionMicroMergeIter::open_curr_range(const bool for_rewrite, const boo micro_block_opened_ = false; need_reuse_micro_block_ = false; ret = ObPartitionMacroMergeIter::open_curr_range(for_rewrite); - LOG_DEBUG("open curr range for macro block", K(*this)); + LOG_DEBUG("open curr range for macro block", K(*this), K(curr_block_desc_)); } } else if (macro_block_opened_) { if (OB_FAIL(open_curr_micro_block())) { @@ -644,7 +685,7 @@ int ObPartitionMicroMergeIter::open_curr_range(const bool for_rewrite, const boo macro_block_opened_ = true; micro_row_scanner_->reuse(); ret = next(); - LOG_DEBUG("init micro block iter for macro block", K(*this)); + LOG_DEBUG("init micro block iter for macro block", K(*this), K(macro_block_iter_->get_micro_endkeys())); } } @@ -655,7 +696,6 @@ int ObPartitionMicroMergeIter::open_curr_range(const bool for_rewrite, const boo int ObPartitionMicroMergeIter::open_curr_micro_block() { int ret = OB_SUCCESS; - ObMicroBlockData decompressed_data; const ObMicroIndexInfo *micro_index_info = curr_micro_block_->micro_index_info_; ObMicroBlockDesMeta micro_des_meta; @@ -702,6 +742,7 @@ int ObPartitionMicroMergeIter::next() { int ret = OB_SUCCESS; bool row_itered = false; + bool range_cross = false; if (IS_NOT_INIT) { ret = OB_NOT_INIT; @@ -734,8 +775,18 @@ int ObPartitionMicroMergeIter::next() if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("Failed to get next range", K(ret), K(*this)); } - } else { - LOG_DEBUG("Merge iter next with range", K(*this)); + } else if (!macro_block_opened_ + && OB_FAIL(check_merge_range_cross(curr_block_desc_.range_, range_cross))) { + LOG_WARN("failed to check range cross", K(ret), K(curr_block_desc_.range_)); + } else if (range_cross) { + need_reuse_micro_block_ = false; + if (OB_FAIL(open_curr_range(false/*for rewrite*/))) { + if (OB_UNLIKELY(OB_ITER_END != ret)) { + LOG_WARN("failed to open curr range", K(ret), K(curr_block_desc_)); + } + } else { + LOG_TRACE("open macro for cross range", K(ret), K(curr_block_desc_), KPC(table_), KPC(curr_row_)); + } } } @@ -804,7 +855,7 @@ bool ObPartitionMinorRowMergeIter::inner_check(const ObMergeParameter &merge_par { bool bret = true; - if (!merge_param.is_multi_version_minor_merge() && !storage::is_backfill_tx_merge(merge_param.merge_type_)) { + if (!is_multi_version_merge(merge_param.merge_type_) && !storage::is_backfill_tx_merge(merge_param.merge_type_)) { bret = false; LOG_WARN("Unexpected merge type for minor row merge iter", K(bret), K(merge_param)); } else if (merge_param.merge_level_ != MACRO_BLOCK_MERGE_LEVEL) { @@ -853,7 +904,7 @@ int ObPartitionMinorRowMergeIter::inner_init(const ObMergeParameter &merge_param if (OB_FAIL(common_minor_inner_init(merge_param))) { LOG_WARN("Failed to do commont minor inner init", K(ret), K(merge_param)); } else if (table_->is_data_memtable()) { - if (OB_UNLIKELY(!merge_param.is_mini_merge())) { + if (OB_UNLIKELY(!is_mini_merge(merge_param.merge_type_))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected memtable for mini minor merge", K(ret), K(merge_param), KPC(table_)); } @@ -1289,28 +1340,8 @@ int ObPartitionMinorMacroMergeIter::check_need_open_curr_macro_block(bool &need) // need to open to recycle left rows of the last rowkey in recycled macro block // 2. last_macro_reused and current can be recycled: need to open to recycle micro blocks need = true; - } else if (merge_range_.is_whole_range()) { - // parallel minor merge should consider open the border macro blocks - } else { - int cmp_ret = 0; - // safe to modify range of curr_macro_block with overwriting ptr only - if (OB_FAIL(merge_range_.get_start_key().compare(curr_block_desc_.range_.get_start_key(), - read_info_.get_datum_utils(), - cmp_ret))) { - STORAGE_LOG(WARN, "Failed to compare start key", K(ret), K_(merge_range), K(curr_block_desc_.range_)); - } else if (cmp_ret > 0) { - curr_block_desc_.range_.start_key_ = merge_range_.get_start_key(); - need = true; - } - if (OB_FAIL(ret)) { - } else if (OB_FAIL(merge_range_.get_end_key().compare(curr_block_desc_.range_.get_end_key(), - read_info_.get_datum_utils(), - cmp_ret))) { - STORAGE_LOG(WARN, "Failed to compare end key", K(ret), K_(merge_range), K(curr_block_desc_.range_)); - } else if (cmp_ret <= 0) { - curr_block_desc_.range_.end_key_ = merge_range_.get_end_key(); - need = true; - } + } else if (OB_FAIL(check_merge_range_cross(curr_block_desc_.range_, need))) { + LOG_WARN("failed to check range cross", K(ret), K(curr_block_desc_.range_)); } LOG_DEBUG("check macro block need open", K(curr_block_desc_.range_), K(merge_range_), K(need)); return ret; @@ -1505,7 +1536,9 @@ int ObPartitionMinorMacroMergeIter::inner_next(const bool open_macro) if (OB_FAIL(open_curr_macro_block())) { LOG_WARN("Failed to open current macro block", K(ret), K(open_macro)); } else if (OB_FAIL(inner_next(open_macro))) { - LOG_WARN("Failed to inner next row", K(ret)); + if (OB_ITER_END != ret) { + LOG_WARN("Failed to inner next row", K(ret),KPC(this)); + } } else { LOG_DEBUG("open macro block on demand", K(open_macro), K(need_check), KPC(this)); } diff --git a/src/storage/compaction/ob_partition_merge_iter.h b/src/storage/compaction/ob_partition_merge_iter.h index bcc453494e..53ba85adfd 100644 --- a/src/storage/compaction/ob_partition_merge_iter.h +++ b/src/storage/compaction/ob_partition_merge_iter.h @@ -91,6 +91,7 @@ public: } return bret; } + int check_merge_range_cross(ObDatumRange &data_range, bool &range_cross); OB_INLINE const ObTableReadInfo &get_read_info() const{ return read_info_; } VIRTUAL_TO_STRING_KV(K_(tablet_id), K_(iter_end), K_(schema_rowkey_column_cnt), K_(schema_version), K_(merge_range), diff --git a/src/storage/compaction/ob_partition_merge_policy.cpp b/src/storage/compaction/ob_partition_merge_policy.cpp index d643e4f27d..5abbc23ea3 100644 --- a/src/storage/compaction/ob_partition_merge_policy.cpp +++ b/src/storage/compaction/ob_partition_merge_policy.cpp @@ -16,10 +16,13 @@ #include "share/ob_debug_sync.h" #include "share/ob_force_print_log.h" #include "share/rc/ob_tenant_base.h" +#include "storage/memtable/ob_memtable.h" #include "storage/tablet/ob_tablet.h" #include "storage/tablet/ob_tablet_table_store.h" +#include "storage/tablet/ob_table_store_util.h" #include "storage/ob_storage_schema.h" #include "storage/ob_storage_struct.h" +#include "storage/ob_tenant_tablet_stat_mgr.h" #include "storage/compaction/ob_compaction_diagnose.h" #include "storage/compaction/ob_tenant_compaction_progress.h" #include "observer/omt/ob_tenant_config_mgr.h" @@ -41,19 +44,16 @@ namespace compaction // keep order with ObMergeType ObPartitionMergePolicy::GetMergeTables ObPartitionMergePolicy::get_merge_tables[MERGE_TYPE_MAX] - = { ObPartitionMergePolicy::get_mini_minor_merge_tables, - ObPartitionMergePolicy::get_buf_minor_merge_tables, + = { ObPartitionMergePolicy::get_minor_merge_tables, ObPartitionMergePolicy::get_hist_minor_merge_tables, + ObAdaptiveMergePolicy::get_meta_merge_tables, ObPartitionMergePolicy::get_mini_merge_tables, - ObPartitionMergePolicy::get_major_merge_tables, - ObPartitionMergePolicy::get_mini_minor_merge_tables + ObPartitionMergePolicy::get_medium_merge_tables, + ObPartitionMergePolicy::get_medium_merge_tables, }; -ObPartitionMergePolicy::CheckNeedMerge ObPartitionMergePolicy::check_need_minor_merge[MERGE_TYPE_MAX] - = { ObPartitionMergePolicy::check_need_mini_minor_merge, - ObPartitionMergePolicy::check_need_buf_minor_merge, - ObPartitionMergePolicy::check_need_hist_minor_merge, - ObPartitionMergePolicy::check_need_mini_merge}; +const int64_t ObPartitionMergePolicy::OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG; +const int64_t ObPartitionMergePolicy::OB_MINOR_PARALLEL_SSTABLE_CNT_TRIGGER; int ObPartitionMergePolicy::get_neighbour_freeze_info( const int64_t snapshot_version, @@ -61,32 +61,93 @@ int ObPartitionMergePolicy::get_neighbour_freeze_info( ObTenantFreezeInfoMgr::NeighbourFreezeInfo &freeze_info) { int ret = OB_SUCCESS; - ObTenantFreezeInfoMgr *freeze_info_mgr = nullptr; - if (OB_ISNULL(freeze_info_mgr = MTL(ObTenantFreezeInfoMgr *))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected null freeze_info_mgr", K(ret)); - } else if (OB_SUCC(freeze_info_mgr->get_neighbour_major_freeze(snapshot_version, freeze_info))) { - } else if (OB_ENTRY_NOT_EXIST == ret) { - LOG_WARN("Failed to get freeze info, use snapshot_gc_ts instead", K(ret), K(snapshot_version)); - ret = OB_SUCCESS; - freeze_info.reset(); - freeze_info.next.freeze_version = INT64_MAX; - if (OB_NOT_NULL(last_major)) { - freeze_info.prev.freeze_version = last_major->get_snapshot_version(); + if (OB_FAIL(MTL(ObTenantFreezeInfoMgr *)->get_neighbour_major_freeze(snapshot_version, freeze_info))) { + if (OB_ENTRY_NOT_EXIST == ret) { + LOG_WARN("Failed to get freeze info, use snapshot_gc_ts instead", K(ret), K(snapshot_version)); + ret = OB_SUCCESS; + freeze_info.reset(); + freeze_info.next.freeze_version = INT64_MAX; + if (OB_NOT_NULL(last_major)) { + freeze_info.prev.freeze_version = last_major->get_snapshot_version(); + } + } else { + LOG_WARN("Failed to get neighbour major freeze info", K(ret), K(snapshot_version)); } + } + return ret; +} + +int ObPartitionMergePolicy::get_medium_merge_tables( + const ObGetMergeTablesParam ¶m, + ObLS &ls, + const ObTablet &tablet, + ObGetMergeTablesResult &result) +{ + int ret = OB_SUCCESS; + ObSSTable *base_table = nullptr; + const ObTabletTableStore &table_store = tablet.get_table_store(); + result.reset(); + result.merge_version_ = param.merge_version_; + result.suggest_merge_type_ = param.merge_type_; + DEBUG_SYNC(BEFORE_GET_MAJOR_MGERGE_TABLES); + + if (OB_UNLIKELY(!table_store.is_valid() || !param.is_valid() || !is_major_merge_type(param.merge_type_))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid argument", K(ret), K(table_store), K(param)); + } else if (OB_ISNULL(base_table = static_cast(table_store.get_major_sstables().get_boundary_table(true/*last*/)))) { + ret = OB_ENTRY_NOT_EXIST; + LOG_ERROR("major sstable not exist", K(ret), K(table_store)); + } else if (OB_FAIL(base_table->get_frozen_schema_version(result.base_schema_version_))) { + LOG_WARN("failed to get frozen schema version", K(ret)); + } else if (OB_FAIL(result.handle_.add_table(base_table))) { + LOG_WARN("failed to add base_table to result", K(ret)); + } else if (base_table->get_snapshot_version() >= param.merge_version_) { + ret = OB_NO_NEED_MERGE; + LOG_INFO("medium merge already finished", K(ret), KPC(base_table), K(result)); } else { - LOG_WARN("Failed to get neighbour major freeze info", K(ret), K(snapshot_version)); + const ObSSTableArray &minor_tables = table_store.get_minor_sstables(); + bool start_add_table_flag = false; + for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.count_; ++i) { + if (OB_ISNULL(minor_tables[i])) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("table must not null", K(ret), K(i), K(minor_tables)); + // TODO: add right boundary for major + } else if (!start_add_table_flag && minor_tables[i]->get_upper_trans_version() >= base_table->get_snapshot_version()) { + start_add_table_flag = true; + } + if (OB_SUCC(ret) && start_add_table_flag) { + if (OB_FAIL(result.handle_.add_table(minor_tables[i]))) { + LOG_WARN("failed to add table", K(ret)); + } + } + } + if (OB_SUCC(ret) && OB_FAIL(result.handle_.check_continues(nullptr))) { + LOG_WARN("failed to check continues for major merge", K(ret)); + } + } + + if (OB_SUCC(ret)) { + result.version_range_.base_version_ = base_table->get_upper_trans_version(); + result.version_range_.multi_version_start_ = tablet.get_multi_version_start(); + result.version_range_.snapshot_version_ = param.merge_version_; + if (OB_FAIL(get_multi_version_start(param.merge_type_, ls, tablet, result.version_range_))) { + LOG_WARN("failed to get multi version_start", K(ret)); + } else { + result.read_base_version_ = base_table->get_snapshot_version(); + result.create_snapshot_version_ = base_table->get_meta().get_basic_meta().create_snapshot_version_; + } } return ret; } int ObPartitionMergePolicy::get_mini_merge_tables( const ObGetMergeTablesParam ¶m, - const int64_t multi_version_start, + ObLS &ls, const ObTablet &tablet, ObGetMergeTablesResult &result) { int ret = OB_SUCCESS; + ObTenantFreezeInfoMgr::NeighbourFreezeInfo freeze_info; int64_t merge_inc_base_version = tablet.get_snapshot_version(); const ObMergeType merge_type = param.merge_type_; @@ -106,19 +167,19 @@ int ObPartitionMergePolicy::get_mini_merge_tables( LOG_ERROR("Too many sstables, delay mini merge until sstable count falls below MAX_SSTABLE_CNT", K(ret), K(table_store), K(tablet)); // add compaction diagnose info - diagnose_table_count_unsafe(MINI_MERGE, tablet); + ObPartitionMergePolicy::diagnose_table_count_unsafe(MINI_MERGE, tablet); } else if (OB_FAIL(tablet.get_memtable_mgr()->get_all_memtables(memtable_handles))) { LOG_WARN("failed to get all memtables from memtable mgr", K(ret)); } else if (OB_FAIL(get_neighbour_freeze_info(merge_inc_base_version, table_store.get_major_sstables().get_boundary_table(true), freeze_info))) { LOG_WARN("failed to get next major freeze", K(ret), K(merge_inc_base_version), K(table_store)); - } else if (OB_FAIL(find_mini_merge_tables(param, freeze_info, tablet, memtable_handles, result))) { + } else if (OB_FAIL(find_mini_merge_tables(param, freeze_info, ls, tablet, memtable_handles, result))) { if (OB_NO_NEED_MERGE != ret) { LOG_WARN("failed to find mini merge tables", K(ret), K(freeze_info)); } } else if (!result.update_tablet_directly_ - && OB_FAIL(deal_with_minor_result(merge_type, multi_version_start, tablet, result))) { + && OB_FAIL(deal_with_minor_result(merge_type, ls, tablet, result))) { LOG_WARN("failed to deal with minor merge result", K(ret)); } return ret; @@ -127,6 +188,7 @@ int ObPartitionMergePolicy::get_mini_merge_tables( int ObPartitionMergePolicy::find_mini_merge_tables( const ObGetMergeTablesParam ¶m, const ObTenantFreezeInfoMgr::NeighbourFreezeInfo &freeze_info, + ObLS &ls, const storage::ObTablet &tablet, ObIArray &memtable_handles, ObGetMergeTablesResult &result) @@ -149,7 +211,7 @@ int ObPartitionMergePolicy::find_mini_merge_tables( ret = OB_ERR_SYS; LOG_ERROR("memtable must not null", K(ret), K(tablet)); } else if (OB_UNLIKELY(memtable->is_active_memtable())) { - LOG_INFO("skip active memtable", K(i), KPC(memtable), K(memtable_handles)); + LOG_DEBUG("skip active memtable", K(i), KPC(memtable), K(memtable_handles)); break; } else if (!memtable->can_be_minor_merged()) { FLOG_INFO("memtable cannot mini merge now", K(ret), K(i), KPC(memtable), K(max_snapshot_version), K(memtable_handles), K(param)); @@ -190,7 +252,8 @@ int ObPartitionMergePolicy::find_mini_merge_tables( } } } // end for - if (OB_SUCC(ret)) { + if (OB_FAIL(ret)) { + } else { result.suggest_merge_type_ = param.merge_type_; result.version_range_.multi_version_start_ = tablet.get_multi_version_start(); if (result.handle_.empty()) { @@ -198,6 +261,7 @@ int ObPartitionMergePolicy::find_mini_merge_tables( } else if (result.scn_range_.end_scn_ <= clog_checkpoint_scn) { if (need_update_snapshot_version) { result.update_tablet_directly_ = true; + result.version_range_.multi_version_start_ = 0; // set multi_version_start to pass tablet::init check LOG_INFO("meet empty force freeze memtable, could update tablet directly", K(ret), K(result)); } else { ret = OB_NO_NEED_MERGE; @@ -217,7 +281,7 @@ int ObPartitionMergePolicy::find_mini_merge_tables( int ObPartitionMergePolicy::deal_with_minor_result( const ObMergeType &merge_type, - const int64_t expect_multi_version_start, + ObLS &ls, const ObTablet &tablet, ObGetMergeTablesResult &result) { @@ -228,34 +292,11 @@ int ObPartitionMergePolicy::deal_with_minor_result( } else if (OB_UNLIKELY(!result.scn_range_.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_ERROR("Invalid argument to check result", K(ret), K(result)); - } else if (OB_FAIL(result.handle_.check_continues(merge_type == BUF_MINOR_MERGE ? nullptr : &result.scn_range_))) { + } else if (OB_FAIL(result.handle_.check_continues(&result.scn_range_))) { LOG_WARN("failed to check continues", K(ret), K(result)); - } else if (BUF_MINOR_MERGE == merge_type) { + } else if (OB_FAIL(get_multi_version_start(merge_type, ls, tablet, result.version_range_))) { + LOG_WARN("failed to get kept multi_version_start", K(ret), K(merge_type), K(tablet)); } else { - // update multi_version_start - if (expect_multi_version_start < result.version_range_.multi_version_start_) { - LOG_WARN("cannot reserve multi_version_start", "multi_version_start", result.version_range_.multi_version_start_, - K(expect_multi_version_start)); - } else if (expect_multi_version_start < result.version_range_.snapshot_version_) { - result.version_range_.multi_version_start_ = expect_multi_version_start; - LOG_TRACE("succ reserve multi_version_start", "multi_version_start", result.version_range_.multi_version_start_, - K(expect_multi_version_start)); - } else { - result.version_range_.multi_version_start_ = result.version_range_.snapshot_version_; - LOG_TRACE("no need keep multi version", "multi_version_start", result.version_range_.multi_version_start_, - K(expect_multi_version_start)); - } - result.version_range_.base_version_ = 0; - - if (OB_SUCC(ret) && MINI_MERGE != merge_type) { - const ObTabletTableStore &table_store = tablet.get_table_store(); - if (OB_FAIL(table_store.get_recycle_version(result.version_range_.multi_version_start_, result.version_range_.base_version_))) { - LOG_WARN("Fail to get table store recycle version", K(ret), K(result.version_range_), K(table_store)); - } - } - } - - if (OB_SUCC(ret)) { result.schema_version_ = tablet.get_storage_schema().schema_version_; if (MINI_MERGE == merge_type) { ObITable *table = NULL; @@ -273,13 +314,22 @@ int ObPartitionMergePolicy::deal_with_minor_result( LOG_WARN("failed to get frozen schema version", K(ret), K(result)); } } + if (OB_SUCC(ret)) { + result.version_range_.base_version_ = 0; + if (OB_SUCC(ret) && !is_mini_merge(merge_type)) { + const ObTabletTableStore &table_store = tablet.get_table_store(); + if (OB_FAIL(table_store.get_recycle_version(result.version_range_.multi_version_start_, result.version_range_.base_version_))) { + LOG_WARN("Fail to get table store recycle version", K(ret), K(result.version_range_), K(table_store)); + } + } + } } return ret; } -int ObPartitionMergePolicy::get_mini_minor_merge_tables( +int ObPartitionMergePolicy::get_minor_merge_tables( const ObGetMergeTablesParam ¶m, - const int64_t multi_version_start, + ObLS &ls, const ObTablet &tablet, ObGetMergeTablesResult &result) { @@ -291,10 +341,10 @@ int ObPartitionMergePolicy::get_mini_minor_merge_tables( DEBUG_SYNC(BEFORE_GET_MINOR_MGERGE_TABLES); // no need to distinguish data tablet and tx tablet, all minor tables included - if (MINI_MINOR_MERGE != merge_type && MINOR_MERGE != merge_type) { + if (OB_UNLIKELY(!is_minor_merge(merge_type))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("get invalid arguments", K(ret), K(merge_type)); - } else if (tablet.is_ls_tx_data_tablet()) { + } else if (tablet.is_ls_inner_tablet()) { min_snapshot_version = 0; max_snapshot_version = INT64_MAX; } else if (OB_FAIL(get_boundary_snapshot_version(tablet, min_snapshot_version, max_snapshot_version))) { @@ -302,22 +352,25 @@ int ObPartitionMergePolicy::get_mini_minor_merge_tables( } if (OB_FAIL(ret)) { - } else if (OB_FAIL(find_mini_minor_merge_tables(param, - min_snapshot_version, - max_snapshot_version, - multi_version_start, - tablet, - result))) { - LOG_WARN("failed to get minor merge tables for mini minor merge", K(ret), K(max_snapshot_version), - K(multi_version_start)); + } else if (OB_FAIL(find_minor_merge_tables(param, + min_snapshot_version, + max_snapshot_version, + ls, + tablet, + result))) { + if (OB_NO_NEED_MERGE != ret) { + LOG_WARN("failed to get minor merge tables", K(ret), K(max_snapshot_version)); + } } + return ret; } int ObPartitionMergePolicy::get_boundary_snapshot_version( const ObTablet &tablet, int64_t &min_snapshot, - int64_t &max_snapshot) + int64_t &max_snapshot, + const bool check_table_cnt) { int ret = OB_SUCCESS; int64_t merge_inc_base_version = tablet.get_snapshot_version(); @@ -325,14 +378,17 @@ int ObPartitionMergePolicy::get_boundary_snapshot_version( const ObTabletTableStore &table_store = tablet.get_table_store(); ObITable *last_major_table = table_store.get_major_sstables().get_boundary_table(true); - if (OB_UNLIKELY(!table_store.is_valid())) { + if (OB_UNLIKELY(tablet.is_ls_inner_tablet())) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported for special tablet", K(ret), K(tablet)); + } else if (OB_UNLIKELY(!table_store.is_valid())) { ret = OB_ERR_SYS; LOG_WARN("table store not valid", K(ret), K(table_store)); } else if (OB_FAIL(get_neighbour_freeze_info(merge_inc_base_version, last_major_table, freeze_info))) { LOG_WARN("failed to get freeze info", K(ret), K(merge_inc_base_version), K(table_store)); - } else if (table_store.get_table_count() >= OB_UNSAFE_TABLE_CNT) { + } else if (check_table_cnt && table_store.get_table_count() >= OB_UNSAFE_TABLE_CNT) { max_snapshot = INT64_MAX; if (table_store.get_table_count() >= OB_EMERGENCY_TABLE_CNT) { min_snapshot = 0; @@ -340,17 +396,31 @@ int ObPartitionMergePolicy::get_boundary_snapshot_version( min_snapshot = last_major_table->get_snapshot_version(); } } else { - min_snapshot = freeze_info.prev.freeze_version; + if (OB_NOT_NULL(last_major_table)) { + min_snapshot = max(last_major_table->get_snapshot_version(), freeze_info.prev.freeze_version); + } else { + min_snapshot = freeze_info.prev.freeze_version; + } max_snapshot = freeze_info.next.freeze_version; + + int64_t max_medium_scn = 0; + if (OB_FAIL(tablet.get_max_medium_snapshot(max_medium_scn))) { + LOG_WARN("failed to get medium from memtables", K(ret)); + } else { + min_snapshot = max(min_snapshot, max_medium_scn); + } + LOG_DEBUG("get boundary snapshot", K(ret), "tablet_id", tablet.get_tablet_meta().tablet_id_, K(table_store), K(min_snapshot), K(max_snapshot), + K(tablet.get_medium_compaction_info_list()), K(max_medium_scn), KPC(last_major_table), + K(freeze_info)); } return ret; } -int ObPartitionMergePolicy::find_mini_minor_merge_tables( +int ObPartitionMergePolicy::find_minor_merge_tables( const ObGetMergeTablesParam ¶m, const int64_t min_snapshot_version, const int64_t max_snapshot_version, - const int64_t expect_multi_version_start, + ObLS &ls, const ObTablet &tablet, ObGetMergeTablesResult &result) { @@ -358,7 +428,7 @@ int ObPartitionMergePolicy::find_mini_minor_merge_tables( result.reset_handle_and_range(); const ObTabletTableStore &table_store = tablet.get_table_store(); ObTablesHandleArray minor_tables; - + int64_t minor_compact_trigger = DEFAULT_MINOR_COMPACT_TRIGGER; if (OB_UNLIKELY(!table_store.is_valid())) { ret = OB_ERR_SYS; @@ -368,60 +438,130 @@ int ObPartitionMergePolicy::find_mini_minor_merge_tables( } else { ObSSTable *table = nullptr; bool found_greater = false; + { + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); + if (tenant_config.is_valid()) { + minor_compact_trigger = tenant_config->minor_compact_trigger; + } + } + + ObSEArray minor_merge_candidates; for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.get_count(); ++i) { - if (OB_ISNULL(table = static_cast(minor_tables.get_table(i)))) { - ret = OB_ERR_SYS; - LOG_ERROR("table must not null", K(ret), K(i), K(table_store)); + if (OB_ISNULL(table = static_cast(minor_tables.get_table(i)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table must not null", K(ret), K(i), K(table_store)); } else if (!found_greater && table->get_upper_trans_version() <= min_snapshot_version) { continue; } else { found_greater = true; - if (result.handle_.get_count() > 0) { - if (result.scn_range_.end_scn_ < table->get_start_scn()) { - LOG_INFO("log ts not continues, reset previous minor merge tables", - K(i), "last_end_scn", result.scn_range_.end_scn_, KPC(table)); - result.reset_handle_and_range(); - } else if (HISTORY_MINI_MINOR_MERGE == param.merge_type_ && table->get_upper_trans_version() > max_snapshot_version) { - break; - } else if (table_store.get_table_count() < OB_UNSAFE_TABLE_CNT && - table->get_max_merged_trans_version() > max_snapshot_version) { - LOG_INFO("max_snapshot_version reached, stop find more tables", K(param), K(max_snapshot_version), KPC(table)); - break; - } + if (0 == minor_merge_candidates.count()) { + } else if (is_history_minor_merge(param.merge_type_) && table->get_upper_trans_version() > max_snapshot_version) { + break; + } else if (table_store.get_table_count() < OB_UNSAFE_TABLE_CNT && + table->get_max_merged_trans_version() > max_snapshot_version) { + LOG_INFO("max_snapshot_version reached, stop find more tables", K(param), K(max_snapshot_version), KPC(table)); + break; } - if (OB_FAIL(result.handle_.add_table(table))) { - LOG_WARN("Failed to add table", KPC(table), K(ret)); - } else { - if (1 == result.handle_.get_count()) { - result.scn_range_.start_scn_ = table->get_start_scn(); - } - result.scn_range_.end_scn_ = table->get_end_scn(); + if (OB_FAIL(minor_merge_candidates.push_back(table))) { + LOG_WARN("failed to add table", K(ret)); } } - } // end of for + } + + int64_t left_border = 0; + int64_t right_border = minor_merge_candidates.count(); + if (OB_FAIL(ret)) { + } else if (MINOR_MERGE != param.merge_type_) { + } else if (OB_FAIL(refine_minor_merge_tables(tablet, minor_merge_candidates, left_border, right_border))) { + LOG_WARN("failed to adjust mini minor merge tables", K(ret)); + } + + for (int64_t i = left_border; OB_SUCC(ret) && i < right_border; ++i) { + ObSSTable *table = minor_merge_candidates.at(i); + if (result.handle_.get_count() > 0 && result.scn_range_.end_scn_ < table->get_start_scn()) { + LOG_INFO("log ts not continues, reset previous minor merge tables", + "last_end_log_ts", result.scn_range_.end_scn_, KPC(table)); + result.reset_handle_and_range(); + } + if (OB_FAIL(result.handle_.add_table(table))) { + LOG_WARN("Failed to add table", K(ret), KPC(table)); + } else { + if (1 == result.handle_.get_count()) { + result.scn_range_.start_scn_ = table->get_start_scn(); + } + result.scn_range_.end_scn_ = table->get_end_scn(); + } + } } if (OB_SUCC(ret)) { result.suggest_merge_type_ = param.merge_type_; - if (OB_FAIL(refine_mini_minor_merge_result(result))) { - LOG_WARN("failed to refine_minor_merge_result", K(ret)); + result.version_range_.snapshot_version_ = tablet.get_snapshot_version(); + if (OB_FAIL(refine_minor_merge_result(minor_compact_trigger, result))) { + if (OB_NO_NEED_MERGE != ret) { + LOG_WARN("failed to refine_minor_merge_result", K(ret)); + } } else { - result.version_range_.multi_version_start_ = tablet.get_multi_version_start(); - result.version_range_.snapshot_version_ = tablet.get_snapshot_version(); - if (OB_FAIL(deal_with_minor_result(param.merge_type_, expect_multi_version_start, tablet, result))) { + if (OB_FAIL(deal_with_minor_result(param.merge_type_, ls, tablet, result))) { LOG_WARN("Failed to deal with minor merge result", K(ret), K(param), K(result)); } else { FLOG_INFO("succeed to get minor merge tables", K(min_snapshot_version), K(max_snapshot_version), - K(expect_multi_version_start), K(result), K(tablet)); + K(result), K(tablet)); } } + } else if (OB_NO_NEED_MERGE == ret && table_store.get_minor_sstables().count() >= DIAGNOSE_TABLE_CNT_IN_STORAGE) { + ADD_SUSPECT_INFO(MINOR_MERGE, + tablet.get_tablet_meta().ls_id_, + tablet.get_tablet_meta().tablet_id_, + "can't schedule minor merge.", + K(min_snapshot_version), K(max_snapshot_version), + "mini_sstable_cnt", table_store.get_minor_sstables().count()); + } + return ret; +} + +int ObPartitionMergePolicy::refine_minor_merge_tables( + const ObTablet &tablet, + const ObIArray &merge_tables, + int64_t &left_border, + int64_t &right_border) +{ + int ret = OB_SUCCESS; + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + ObITable *meta_table = tablet.get_table_store().get_extend_sstable(ObTabletTableStore::META_MAJOR); + int64_t covered_by_meta_table_cnt = 0; + left_border = 0; + right_border = merge_tables.count(); + + if (tablet_id.is_special_merge_tablet()) { + } else if (merge_tables.count() < 2 || nullptr == meta_table) { + // do nothing + } else { + // no need meta merge, but exist meta_sstable + for (int64_t i = 0; OB_SUCC(ret) && i < merge_tables.count(); ++i) { + if (OB_ISNULL(merge_tables.at(i))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected null table", K(ret), K(i), K(merge_tables)); + } else if (merge_tables.at(i)->get_upper_trans_version() > meta_table->get_snapshot_version()) { + break; + } else { + ++covered_by_meta_table_cnt; + } + } + } + + if (OB_FAIL(ret)) { + } else if (covered_by_meta_table_cnt * 2 >= merge_tables.count()) { + right_border = covered_by_meta_table_cnt; + } else { + left_border = covered_by_meta_table_cnt; } return ret; } int ObPartitionMergePolicy::get_hist_minor_merge_tables( const ObGetMergeTablesParam ¶m, - const int64_t multi_version_start, + ObLS &ls, const ObTablet &tablet, ObGetMergeTablesResult &result) { @@ -430,347 +570,17 @@ int ObPartitionMergePolicy::get_hist_minor_merge_tables( int64_t max_snapshot_version = 0; result.reset(); - if (HISTORY_MINI_MINOR_MERGE != merge_type) { + if (OB_UNLIKELY(!is_history_minor_merge(merge_type))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid args", K(ret), K(merge_type)); } else if (OB_FAIL(deal_hist_minor_merge(tablet, max_snapshot_version))) { if (OB_NO_NEED_MERGE != ret) { LOG_WARN("failed to deal hist minor merge", K(ret)); } - } else if (OB_FAIL(find_mini_minor_merge_tables(param, 0, max_snapshot_version, multi_version_start, tablet, result))) { - LOG_WARN("failed to get minor tables for hist minor merge", K(ret)); - } - return ret; -} - -int ObPartitionMergePolicy::get_buf_minor_merge_tables( - const ObGetMergeTablesParam ¶m, - const int64_t multi_version_start, - const ObTablet &tablet, - ObGetMergeTablesResult &result) -{ - int ret = OB_SUCCESS; - UNUSEDx(param, multi_version_start, tablet, result); - return ret; -} - -int ObPartitionMergePolicy::find_buf_minor_merge_tables( - const storage::ObTablet &tablet, - ObGetMergeTablesResult *result) -{ - int ret = OB_SUCCESS; - UNUSEDx(tablet, result); - return ret; -} - -int ObPartitionMergePolicy::find_buf_minor_base_table(ObITable *last_major_table, ObITable *&buf_minor_base_table) -{ - int ret = OB_SUCCESS; - UNUSEDx(last_major_table, buf_minor_base_table); - return ret; -} - -int ObPartitionMergePolicy::add_buf_minor_merge_result(ObITable *table, ObGetMergeTablesResult &result) -{ - int ret = OB_SUCCESS; - UNUSEDx(table, result); - return ret; -} - -int ObPartitionMergePolicy::get_major_merge_tables( - const ObGetMergeTablesParam ¶m, - const int64_t multi_version_start, - const ObTablet &tablet, - ObGetMergeTablesResult &result) -{ - int ret = OB_SUCCESS; - ObSSTable *base_table = nullptr; - const ObTabletTableStore &table_store = tablet.get_table_store(); - ObTenantFreezeInfoMgr::FreezeInfo freeze_info; - result.reset(); - result.merge_version_ = param.merge_version_; - result.suggest_merge_type_ = MAJOR_MERGE; - DEBUG_SYNC(BEFORE_GET_MAJOR_MGERGE_TABLES); - - if (OB_UNLIKELY(!table_store.is_valid() || !param.is_major_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("get invalid argument", K(ret), K(table_store), K(param)); - } else if (OB_ISNULL(base_table = static_cast(table_store.get_major_sstables().get_boundary_table(true/*last*/)))) { - ret = OB_ENTRY_NOT_EXIST; - LOG_ERROR("major sstable not exist", K(ret), K(table_store)); - } else if (base_table->get_snapshot_version() >= result.merge_version_) { - ret = OB_NO_NEED_MERGE; - LOG_WARN("major merge already finished", K(ret), KPC(base_table), K(result)); - } else if (OB_FAIL(base_table->get_frozen_schema_version(result.base_schema_version_))) { - LOG_WARN("failed to get frozen schema version", K(ret)); - } else if (OB_FAIL(MTL_CALL_FREEZE_INFO_MGR(get_freeze_info_behind_snapshot_version, - base_table->get_snapshot_version(), freeze_info))) { - LOG_WARN("failed to get freeze info", K(ret), K(base_table->get_snapshot_version())); - } else if (OB_FAIL(result.handle_.add_table(base_table))) { - LOG_WARN("failed to add base_table to result", K(ret)); - } else if (base_table->get_snapshot_version() >= freeze_info.freeze_version) { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("unexpected sstable with snapshot_version bigger than next freeze_scn", - K(ret), K(freeze_info), KPC(base_table), K(tablet)); - } else { - const ObSSTableArray &minor_tables = table_store.get_minor_sstables(); - bool start_add_table_flag = false; - for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.count_; ++i) { - if (OB_ISNULL(minor_tables[i])) { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("table must not null", K(ret), K(i), K(minor_tables)); - // TODO: add right boundary for major - } else if (!start_add_table_flag && minor_tables[i]->get_upper_trans_version() > base_table->get_snapshot_version()) { - start_add_table_flag = true; - } - if (OB_SUCC(ret) && start_add_table_flag) { - if (OB_FAIL(result.handle_.add_table(minor_tables[i]))) { - LOG_WARN("failed to add table", K(ret)); - } else { - result.scn_range_.end_scn_ = minor_tables[i]->get_key().get_end_scn(); - } - } - } - - if (OB_SUCC(ret)) { - if (result.handle_.get_count() < 2) { // fix issue 42746719 - if (OB_UNLIKELY(NULL == result.handle_.get_table(0) || !result.handle_.get_table(0)->is_major_sstable())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("the only table must be major sstable", K(ret), K(result), K(table_store)); - } - } else if (OB_FAIL(result.handle_.check_continues(nullptr))) { - LOG_WARN("failed to check continues for major merge", K(ret), K(result)); - } - } - } - if (OB_SUCC(ret) && OB_NOT_NULL(base_table)) { - const int64_t major_snapshot = MAX(base_table->get_snapshot_version(), freeze_info.freeze_version); - result.read_base_version_ = base_table->get_snapshot_version(); - result.version_range_.snapshot_version_ = major_snapshot; - result.create_snapshot_version_ = base_table->get_meta().get_basic_meta().create_snapshot_version_; - result.schema_version_ = freeze_info.schema_version; - result.version_range_.multi_version_start_ = tablet.get_multi_version_start(); - if (multi_version_start < result.version_range_.multi_version_start_) { - LOG_WARN("cannot reserve multi_version_start", "old multi_version_start", result.version_range_.multi_version_start_, - K(multi_version_start)); - } else if (multi_version_start < result.version_range_.snapshot_version_) { - result.version_range_.multi_version_start_ = multi_version_start; - LOG_TRACE("succ reserve multi_version_start", K(result.version_range_)); - } else { - result.version_range_.multi_version_start_ = result.version_range_.snapshot_version_; - LOG_TRACE("no need keep multi version", K(result.version_range_)); - } - } - return ret; -} - -// may need rewrite for checkpoint_mgr -int ObPartitionMergePolicy::check_need_mini_merge( - const storage::ObTablet &tablet, - bool &need_merge) -{ - int ret = OB_SUCCESS; - need_merge = false; - bool can_merge = false; - const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; - const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; - const ObTabletTableStore &table_store = tablet.get_table_store(); - ObSEArray memtables; - if (OB_UNLIKELY(!tablet.is_valid())) { - ret = OB_ERR_SYS; - LOG_ERROR("tablet is unexpectedly invalid", K(ret), K(tablet)); - } else if (OB_FAIL(tablet.get_memtables(memtables, false/*need_active*/))) { - LOG_WARN("failed to get all memtables from table store", K(ret)); - } else if (!memtables.empty()) { - ObSSTable *latest_sstable = static_cast(get_latest_sstable(table_store)); - ObIMemtable *first_frozen_memtable = static_cast(memtables.at(0)); - ObIMemtable *last_frozen_memtable = static_cast(memtables.at(memtables.count() - 1)); - if (OB_NOT_NULL(first_frozen_memtable)) { - need_merge = true; - if (first_frozen_memtable->can_be_minor_merged()) { - can_merge = true; - if (OB_NOT_NULL(latest_sstable) - && (latest_sstable->get_end_scn() >= last_frozen_memtable->get_end_scn() - && tablet.get_snapshot_version() >= last_frozen_memtable->get_snapshot_version())) { - need_merge = false; - LOG_ERROR("unexpected sstable", K(ret), KPC(latest_sstable), KPC(last_frozen_memtable)); - } - } else if (REACH_TENANT_TIME_INTERVAL(30 * 1000 * 1000)) { - LOG_INFO("memtable can not minor merge", - "memtable end_scn", first_frozen_memtable->get_end_scn(), - "memtable timestamp", first_frozen_memtable->get_timestamp()); - - const ObStorageSchema &storage_schema = tablet.get_storage_schema(); - ADD_SUSPECT_INFO(MINI_MERGE, - ls_id, tablet_id, - "memtable can not minor merge", - "memtable end_scn", - first_frozen_memtable->get_end_scn(), - "memtable timestamp", - first_frozen_memtable->get_timestamp()); - } - if (need_merge && !check_table_count_safe(table_store)) { // check table_store count - can_merge = false; - LOG_ERROR("table count is not safe for mini merge", K(tablet_id)); - // add compaction diagnose info - diagnose_table_count_unsafe(MINI_MERGE, tablet); - } -#ifdef ERRSIM - // TODO@hanhui: fix this errsim later - ret = E(EventTable::EN_COMPACTION_DIAGNOSE_TABLE_STORE_UNSAFE_FAILED) ret; - if (OB_FAIL(ret)) { - ret = OB_SUCCESS; - need_merge = false; - diagnose_table_count_unsafe(MINI_MERGE, tablet); // ignore failure - LOG_INFO("check table count with errsim", K(tablet_id)); - } -#endif - if (need_merge && !can_merge) { - need_merge = false; - if (REACH_TENANT_TIME_INTERVAL(10 * 1000 * 1000)) { - LOG_INFO("check_need_mini_merge which cannot merge", K(tablet_id), K(need_merge), K(can_merge), - K(latest_sstable), K(first_frozen_memtable)); - } - } - } - } - - if (OB_SUCC(ret) && need_merge) { - LOG_DEBUG("check mini merge", K(ls_id), "tablet_id", tablet_id.id(), K(need_merge), - K(can_merge), K(table_store)); - } - return ret; -} - -int ObPartitionMergePolicy::check_need_mini_minor_merge( - const ObTablet &tablet, - bool &need_merge) -{ - int ret = OB_SUCCESS; - int64_t min_snapshot_version = 0; - int64_t max_snapshot_version = 0; - int64_t minor_sstable_count = 0; - int64_t need_merge_mini_count = 0; - need_merge = false; - int64_t mini_minor_threshold = DEFAULT_MINOR_COMPACT_TRIGGER; - const ObTabletTableStore &table_store = tablet.get_table_store(); - const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; - int64_t delay_merge_schedule_interval = 0; - ObTablesHandleArray minor_tables; - { - omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); - if (tenant_config.is_valid()) { - mini_minor_threshold = tenant_config->minor_compact_trigger; - delay_merge_schedule_interval = tenant_config->_minor_compaction_interval; - } - } // end of ObTenantConfigGuard - if (table_store.get_minor_sstables().count_ <= mini_minor_threshold) { - // total number of mini sstable is less than threshold + 1 - } else if (tablet.is_ls_tx_data_tablet()) { - min_snapshot_version = 0; - max_snapshot_version = INT64_MAX; - } else if (OB_FAIL(get_boundary_snapshot_version(tablet, min_snapshot_version, max_snapshot_version))) { - LOG_WARN("failed to calculate boundary version", K(ret)); - } - - if (OB_FAIL(ret)) { - } else if (OB_FAIL(table_store.get_mini_minor_sstables(minor_tables))) { - LOG_WARN("failed to get mini minor sstables", K(ret), K(table_store)); - } else { - int64_t minor_check_snapshot_version = 0; - bool found_greater = false; - - for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.get_count(); ++i) { - ObSSTable *table = static_cast(minor_tables.get_table(i)); - if (OB_UNLIKELY(table->is_buf_minor_sstable())) { - ret = OB_ERR_SYS; - STORAGE_LOG(WARN, "Unexpected buf minor sstable", K(ret), K(table_store)); - } else if (!found_greater && table->get_upper_trans_version() <= min_snapshot_version) { - continue; - } else if (table->get_max_merged_trans_version() > max_snapshot_version) { - break; - } - found_greater = true; - minor_sstable_count++; - if (table->is_mini_sstable()) { - if (mini_minor_threshold == need_merge_mini_count++) { - minor_check_snapshot_version = table->get_max_merged_trans_version(); - } - } else if (need_merge_mini_count > 0 || minor_sstable_count - need_merge_mini_count > 1) { - // chaos order with mini and minor sstable OR more than one minor sstable - // need compaction except data replica - need_merge_mini_count = mini_minor_threshold + 1; - break; - } - } // end of for - - if (OB_SUCC(ret)) { - // GCONF.minor_compact_trigger means the maximum number of the current L0 sstable, - // the compaction will be scheduled when it be exceeded - // If minor_compact_trigger = 0, it means that all L0 sstables should be merged into L1 as soon as possible - if (minor_sstable_count <= 1) { - // only one minor sstable exist, no need to do mini minor merge - } else if (table_store.get_table_count() >= MAX_SSTABLE_CNT_IN_STORAGE - RESERVED_STORE_CNT_IN_STORAGE) { - need_merge = true; - LOG_INFO("table store has too many sstables, need to compaction", K(table_store)); - } else if (need_merge_mini_count <= mini_minor_threshold) { - // no need merge - } else { - if (delay_merge_schedule_interval > 0 && minor_check_snapshot_version > 0) { - // delays the compaction scheduling - int64_t current_time = ObTimeUtility::current_time(); - if (minor_check_snapshot_version + delay_merge_schedule_interval < current_time) { - // need merge - need_merge = true; - } - } else { - need_merge = true; - } - } - } - } - if (OB_SUCC(ret) && need_merge) { - LOG_DEBUG("check mini minor merge", "ls_id", tablet.get_tablet_meta().ls_id_, - K(tablet_id), K(need_merge), K(table_store)); - } - if (OB_SUCC(ret) && !need_merge && table_store.get_minor_sstables().count() >= DIAGNOSE_TABLE_CNT_IN_STORAGE) { - ADD_SUSPECT_INFO(MINOR_MERGE, - tablet.get_tablet_meta().ls_id_, tablet_id, - "can't schedule minor merge", - K(min_snapshot_version), K(max_snapshot_version), K(need_merge_mini_count), - K(minor_sstable_count), "mini_sstable_cnt", table_store.get_minor_sstables().count()); - } - return ret; -} - -int ObPartitionMergePolicy::check_need_buf_minor_merge( - const ObTablet &tablet, - bool &need_merge) -{ - int ret = OB_NO_NEED_MERGE; - need_merge = false; - UNUSED(tablet); - return ret; -} - -int ObPartitionMergePolicy::check_need_hist_minor_merge( - const storage::ObTablet &tablet, - bool &need_merge) -{ - int ret = OB_SUCCESS; - const int64_t hist_threashold = cal_hist_minor_merge_threshold(); - int64_t max_snapshot_version = 0; - need_merge = false; - if (OB_FAIL(deal_hist_minor_merge(tablet, max_snapshot_version))) { + } else if (OB_FAIL(find_minor_merge_tables(param, 0/*min_snapshot*/, + max_snapshot_version, ls, tablet, result))) { if (OB_NO_NEED_MERGE != ret) { - LOG_WARN("failed to deal hist minor merge", K(ret)); - } - } else { - need_merge = true; - } - if (OB_SUCC(ret) && need_merge) { - if (REACH_TENANT_TIME_INTERVAL(30 * 1000 * 1000)) { - FLOG_INFO("Table store need to do hist minor merge to reduce sstables", K(need_merge), K(hist_threashold)); + LOG_WARN("failed to get minor tables for hist minor merge", K(ret)); } } return ret; @@ -784,7 +594,6 @@ int ObPartitionMergePolicy::deal_hist_minor_merge( const ObTabletTableStore &table_store = tablet.get_table_store(); const int64_t hist_threshold = cal_hist_minor_merge_threshold(); ObITable *first_major_table = nullptr; - ObTenantFreezeInfoMgr *freeze_info_mgr = nullptr; max_snapshot_version = 0; if (!table_store.is_valid()) { @@ -792,12 +601,9 @@ int ObPartitionMergePolicy::deal_hist_minor_merge( LOG_ERROR("get unexpected invalid table store", K(ret), K(table_store)); } else if (table_store.get_minor_sstables().count_ < hist_threshold) { ret = OB_NO_NEED_MERGE; - } else if (OB_ISNULL(freeze_info_mgr = MTL(ObTenantFreezeInfoMgr *))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to get freeze info mgr from MTL", K(ret)); } else if (OB_ISNULL(first_major_table = table_store.get_major_sstables().get_boundary_table(false))) { // index table during building, need compat with continuous multi version - if (0 == (max_snapshot_version = freeze_info_mgr->get_latest_frozen_version())) { + if (0 == (max_snapshot_version = MTL(ObTenantFreezeInfoMgr*)->get_latest_frozen_version())) { // no freeze info found, wait normal mini minor to free sstable ret = OB_NO_NEED_MERGE; LOG_WARN("No freeze range to do hist minor merge for buiding index", K(ret), K(table_store)); @@ -805,14 +611,18 @@ int ObPartitionMergePolicy::deal_hist_minor_merge( } else { ObTenantFreezeInfoMgr::NeighbourFreezeInfo freeze_info; ObSEArray freeze_infos; - if (OB_FAIL(freeze_info_mgr->get_freeze_info_behind_major_snapshot( + if (OB_FAIL(MTL(ObTenantFreezeInfoMgr *)->get_freeze_info_behind_major_snapshot( first_major_table->get_snapshot_version(), freeze_infos))) { - LOG_WARN("Failed to get freeze infos behind major version", K(ret), KPC(first_major_table)); + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_NO_NEED_MERGE; + } else { + LOG_WARN("Failed to get freeze infos behind major version", K(ret), KPC(first_major_table)); + } } else if (freeze_infos.count() <= 1) { // only one major freeze found, wait normal mini minor to reduce table count ret = OB_NO_NEED_MERGE; - LOG_WARN("No enough freeze range to do hist minor merge", K(ret), K(freeze_infos)); + LOG_DEBUG("No enough freeze range to do hist minor merge", K(ret), K(freeze_infos)); } else { int64_t table_cnt = 0; int64_t min_minor_version = 0; @@ -846,86 +656,6 @@ int ObPartitionMergePolicy::deal_hist_minor_merge( return ret; } -int ObPartitionMergePolicy::check_need_major_merge( - const storage::ObTablet &tablet, - int64_t &merge_version, - bool &need_merge, - bool &can_merge, - bool &need_force_freeze) -{ - int ret = OB_SUCCESS; - need_merge = false; - can_merge = false; - need_force_freeze = false; - ObTenantFreezeInfoMgr::FreezeInfo freeze_info; - int64_t last_sstable_snapshot = tablet.get_snapshot_version(); - const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; - const ObTabletTableStore &table_store = tablet.get_table_store(); - int64_t major_sstable_version = 0; - bool is_tablet_data_status_complete = true; //ha status - - if (OB_UNLIKELY(!tablet.is_valid())) { - ret = OB_ERR_SYS; - LOG_ERROR("tablet is unexpectedly invalid", K(ret), K(tablet)); - } else { - ObSSTable *latest_major_sstable = static_cast(table_store.get_major_sstables().get_boundary_table(true/*last*/)); - if (OB_NOT_NULL(latest_major_sstable)) { - major_sstable_version = latest_major_sstable->get_snapshot_version(); - if (major_sstable_version < merge_version) { - need_merge = true; - } - } - if (need_merge) { - if (!tablet.get_tablet_meta().ha_status_.is_data_status_complete()) { - can_merge = false; - is_tablet_data_status_complete = false; - LOG_INFO("tablet data status incomplete, can not merge", K(ret), K(tablet_id)); - } else if (OB_FAIL(MTL_CALL_FREEZE_INFO_MGR(get_freeze_info_behind_snapshot_version, major_sstable_version, freeze_info))) { - if (OB_ENTRY_NOT_EXIST != ret) { - LOG_WARN("failed to get freeze info", K(ret), K(merge_version), K(major_sstable_version)); - } else { - can_merge = false; - ret = OB_SUCCESS; - LOG_INFO("can't get freeze info after snapshot", K(ret), K(merge_version), K(major_sstable_version)); - } - } else { - can_merge = last_sstable_snapshot >= freeze_info.freeze_version; - if (!can_merge) { - LOG_TRACE("tablet need merge, but cannot merge now", K(tablet_id), K(merge_version), K(last_sstable_snapshot), K(freeze_info)); - } - } - - if (OB_SUCC(ret) && !can_merge && is_tablet_data_status_complete) { - ObTabletMemtableMgr *memtable_mgr = nullptr; - memtable::ObMemtable *last_frozen_memtable = nullptr; - if (OB_ISNULL(memtable_mgr = static_cast(tablet.get_memtable_mgr()))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("memtable mgr is unexpected null", K(ret), K(tablet)); - } else if (OB_ISNULL(last_frozen_memtable = memtable_mgr->get_last_frozen_memtable())) { - // no frozen memtable, need force freeze - need_force_freeze = true; - } else { - need_force_freeze = last_frozen_memtable->get_snapshot_version() < freeze_info.freeze_version; - if (!need_force_freeze) { - FLOG_INFO("tablet no need force freeze", K(ret), K(tablet_id), K(merge_version), K(freeze_info), KPC(last_frozen_memtable)); - } - } - } - } - if (need_merge && !can_merge && REACH_TENANT_TIME_INTERVAL(60L * 1000L * 1000L)) { - LOG_INFO("check_need_major_merge", K(ret), "ls_id", tablet.get_tablet_meta().ls_id_, K(tablet_id), - K(need_merge), K(can_merge), K(need_force_freeze), K(merge_version), K(freeze_info), - K(is_tablet_data_status_complete)); - const ObStorageSchema &storage_schema = tablet.get_storage_schema(); - ADD_SUSPECT_INFO(MAJOR_MERGE, tablet.get_tablet_meta().ls_id_, tablet_id, - "need major merge but can't merge now", - K(merge_version), K(freeze_info), K(last_sstable_snapshot), K(need_force_freeze), - K(is_tablet_data_status_complete)); - } - } - return ret; -} - int ObPartitionMergePolicy::diagnose_minor_dag( ObMergeType merge_type, const ObLSID ls_id, @@ -934,7 +664,7 @@ int ObPartitionMergePolicy::diagnose_minor_dag( const int64_t buf_len) { int ret = OB_SUCCESS; - ObTabletMinorMergeDag dag; + ObTabletMergeExecuteDag dag; ObDiagnoseTabletCompProgress progress; if (OB_FAIL(ObCompactionDiagnoseMgr::diagnose_dag( merge_type, @@ -956,7 +686,7 @@ int ObPartitionMergePolicy::diagnose_minor_dag( } int ObPartitionMergePolicy::diagnose_table_count_unsafe( - const ObMergeType &merge_type, + const ObMergeType merge_type, const storage::ObTablet &tablet) { int ret = OB_SUCCESS; @@ -999,10 +729,10 @@ int ObPartitionMergePolicy::diagnose_table_count_unsafe( } // check have minor merge DAG - if (OB_TMP_FAIL(diagnose_minor_dag(MINI_MINOR_MERGE, ls_id, tablet_id, tmp_str, buf_len))) { + if (OB_TMP_FAIL(diagnose_minor_dag(MINOR_MERGE, ls_id, tablet_id, tmp_str, buf_len))) { LOG_WARN("failed to diagnose minor dag", K(tmp_ret), K(ls_id), K(tablet_id), K(tmp_str)); } - if (OB_TMP_FAIL(diagnose_minor_dag(HISTORY_MINI_MINOR_MERGE, ls_id, tablet_id, tmp_str, buf_len))) { + if (OB_TMP_FAIL(diagnose_minor_dag(HISTORY_MINOR_MERGE, ls_id, tablet_id, tmp_str, buf_len))) { LOG_WARN("failed to diagnose history minor dag", K(tmp_ret), K(ls_id), K(tablet_id), K(tmp_str)); } @@ -1012,11 +742,6 @@ int ObPartitionMergePolicy::diagnose_table_count_unsafe( return ret; } -bool ObPartitionMergePolicy::check_table_count_safe(const ObTabletTableStore &table_store) -{ - return table_store.get_table_count() < OB_EMERGENCY_TABLE_CNT; -} - int ObPartitionMergePolicy::refine_mini_merge_result( const ObTablet &tablet, ObGetMergeTablesResult &result) @@ -1049,116 +774,123 @@ int ObPartitionMergePolicy::refine_mini_merge_result( return ret; } -// Used to adjust whether to do L0 Minor merge or L1 Minor merge -int ObPartitionMergePolicy::refine_mini_minor_merge_result(ObGetMergeTablesResult &result) +int ObPartitionMergePolicy::refine_minor_merge_result( + const int64_t minor_compact_trigger, + ObGetMergeTablesResult &result) { int ret = OB_SUCCESS; ObMergeType &merge_type = result.suggest_merge_type_; - - if (result.handle_.empty()) { - } else if (MINI_MINOR_MERGE != merge_type - && HISTORY_MINI_MINOR_MERGE != merge_type - && MINOR_MERGE != merge_type) { + if (result.handle_.get_count() <= minor_compact_trigger) { + ret = OB_NO_NEED_MERGE; + LOG_DEBUG("minor refine, no need to do minor merge", K(result)); + result.handle_.reset(); + } else if (OB_UNLIKELY(!is_minor_merge_type(merge_type))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected merge type to refine merge tables", K(result), K(ret)); + } else if (0 == minor_compact_trigger || result.handle_.get_count() >= OB_UNSAFE_TABLE_CNT) { + // no refine } else { ObSEArray mini_tables; ObITable *table = NULL; ObSSTable *sstable = NULL; - int64_t mini_sstable_size = 1; - int64_t minor_sstable_size = 1; - int64_t minor_sstable_count = 0; + int64_t large_sstable_cnt = 0; + int64_t large_sstable_row_cnt = 0; + int64_t mini_sstable_row_cnt = 0; for (int64_t i = 0; OB_SUCC(ret) && i < result.handle_.get_count(); ++i) { if (OB_ISNULL(table = result.handle_.get_table(i)) || !table->is_minor_sstable()) { ret = OB_ERR_SYS; LOG_ERROR("get unexpected table", KP(table), K(ret)); - } else if (FALSE_IT(sstable = reinterpret_cast(table))) { - } else if (table->is_mini_sstable()) { // L0 table - mini_sstable_size += sstable->get_meta().get_basic_meta().row_count_; + } else if (FALSE_IT(sstable = reinterpret_cast(table))) { + } else { + if (sstable->get_meta().get_basic_meta().row_count_ > OB_LARGE_MINOR_SSTABLE_ROW_COUNT) { // large sstable + ++large_sstable_cnt; + large_sstable_row_cnt += sstable->get_meta().get_basic_meta().row_count_; + if (mini_tables.count() > minor_compact_trigger) { + break; + } else { + mini_tables.reset(); + continue; + } + } else { + mini_sstable_row_cnt += sstable->get_meta().get_basic_meta().row_count_; + } if (OB_FAIL(mini_tables.push_back(table))) { LOG_WARN("Failed to push mini minor table into array", K(ret)); } - } else if (table->is_multi_version_minor_sstable()) { // not include buf minor sstable, L1 table - if (mini_tables.count() > 0) { - mini_tables.reset(); - LOG_INFO("minor refine, minor merge sstable refine to minor merge due to chaos table order", - K(result)); - break; - } else { - minor_sstable_size += sstable->get_meta().get_basic_meta().row_count_; - ++minor_sstable_count; - } + } + } // end of for + + int64_t size_amplification_factor = OB_DEFAULT_COMPACTION_AMPLIFICATION_FACTOR; + { + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); + if (tenant_config.is_valid()) { + size_amplification_factor = tenant_config->_minor_compaction_amplification_factor; } } - if (OB_SUCC(ret)) { - int64_t minor_compact_trigger = DEFAULT_MINOR_COMPACT_TRIGGER; - int64_t size_amplification_factor = OB_DEFAULT_COMPACTION_AMPLIFICATION_FACTOR; - { - omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); - if (tenant_config.is_valid()) { - minor_compact_trigger = tenant_config->minor_compact_trigger; - if (tenant_config->_minor_compaction_amplification_factor != 0) { - size_amplification_factor = tenant_config->_minor_compaction_amplification_factor; + if (OB_FAIL(ret)) { + } else if (large_sstable_cnt > 1 + || mini_tables.count() <= minor_compact_trigger + || mini_sstable_row_cnt > (large_sstable_row_cnt * size_amplification_factor / 100)) { + // no refine, use current result to compaction + } else if (mini_tables.count() != result.handle_.get_count()) { + // reset the merge result, mini sstable merge into a new mini sstable + result.reset_handle_and_range(); + for (int64_t i = 0; OB_SUCC(ret) && i < mini_tables.count(); i++) { + ObITable *table = mini_tables.at(i); + if (OB_UNLIKELY(0 != i && table->get_start_scn() != result.scn_range_.end_scn_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexepcted table array", K(ret), K(i), KPC(table), K(mini_tables)); + } else if (OB_FAIL(result.handle_.add_table(table))) { + LOG_WARN("Failed to add table to minor merge result", KPC(table), K(ret)); + } else { + if (1 == result.handle_.get_count()) { + result.scn_range_.start_scn_ = table->get_start_scn(); } + result.scn_range_.end_scn_ = table->get_end_scn(); } - } // end of ObTenantConfigGuard - if (1 == result.handle_.get_count()) { - LOG_INFO("minor refine, only one sstable, no need to do mini minor merge", K(result)); - result.handle_.reset(); - } else if (HISTORY_MINI_MINOR_MERGE == merge_type) { - // use minor merge to do history mini minor merge and skip other checks - } else if (0 == minor_compact_trigger || mini_tables.count() <= 1 || minor_sstable_count > 1) { - merge_type = MINOR_MERGE; - } else if (minor_sstable_count == 0 && mini_sstable_size > OB_MIN_MINOR_SSTABLE_ROW_COUNT) { - merge_type = MINOR_MERGE; - LOG_INFO("minor refine, mini minor merge sstable refine to minor merge", - K(minor_sstable_size), K(mini_sstable_size), K(OB_MIN_MINOR_SSTABLE_ROW_COUNT), K(result)); - } else if (minor_sstable_count == 1 - && mini_sstable_size > (minor_sstable_size * size_amplification_factor / 100)) { - merge_type = MINOR_MERGE; - LOG_INFO("minor refine, mini minor merge sstable refine to minor merge", K(minor_sstable_size), - K(mini_sstable_size), K(size_amplification_factor), K(result)); - } else { - // reset the merge result, mini sstable merge into a new mini sstable - result.reset_handle_and_range(); - for (int64_t i = 0; OB_SUCC(ret) && i < mini_tables.count(); i++) { - ObITable *table = mini_tables.at(i); - if (OB_FAIL(result.handle_.add_table(table))) { - LOG_WARN("Failed to add table to minor merge result", KPC(table), K(ret)); - } else { - if (1 == result.handle_.get_count()) { - result.scn_range_.start_scn_ = table->get_start_scn(); - } - result.scn_range_.end_scn_ = table->get_end_scn(); - } - } - if (OB_SUCC(ret)) { - LOG_INFO("minor refine, mini minor merge sstable refine info", K(minor_sstable_size), - K(mini_sstable_size), K(result)); - } + } + if (OB_SUCC(ret)) { + LOG_INFO("minor refine, mini minor merge sstable refine info", K(result)); } } } return ret; } -ObITable *ObPartitionMergePolicy::get_latest_sstable(const ObTabletTableStore &table_store) +// call this func means have serialized medium compaction clog = medium_snapshot +int ObPartitionMergePolicy::check_need_medium_merge( + storage::ObTablet &tablet, + const int64_t medium_snapshot, + bool &need_merge, + bool &can_merge) { - ObITable *major_table = table_store.get_major_sstables().get_boundary_table(true/*last*/); - ObITable *minor_table = table_store.get_minor_sstables().get_boundary_table(true/*last*/); - ObITable *latest_sstable = nullptr; - if (OB_NOT_NULL(major_table) && OB_NOT_NULL(minor_table)) { - if (major_table->get_snapshot_version() < minor_table->get_max_merged_trans_version()) { - latest_sstable = static_cast(minor_table); - } else { - latest_sstable = static_cast(major_table); - } - } else if (OB_NOT_NULL(major_table)) { - latest_sstable = static_cast(major_table); + int ret = OB_SUCCESS; + need_merge = false; + can_merge = false; + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + ObTabletTableStore &table_store = tablet.get_table_store(); + ObITable *last_major = table_store.get_major_sstables().get_boundary_table(true/*last*/); + const bool is_tablet_data_status_complete = tablet.get_tablet_meta().ha_status_.is_data_status_complete(); + if (nullptr == last_major) { + // no major, no medium } else { - latest_sstable = static_cast(minor_table); + need_merge = last_major->get_snapshot_version() < medium_snapshot; + if (need_merge + && is_tablet_data_status_complete + && tablet.get_tablet_meta().max_serialized_medium_scn_ >= medium_snapshot) { + can_merge = true; + } } - return latest_sstable; + + if (need_merge && !can_merge && REACH_TENANT_TIME_INTERVAL(60L * 1000L * 1000L)) { + LOG_INFO("check_need_medium_merge", K(ret), "ls_id", tablet.get_tablet_meta().ls_id_, K(tablet_id), + K(need_merge), K(can_merge), K(medium_snapshot), K(is_tablet_data_status_complete)); + ADD_SUSPECT_INFO(MAJOR_MERGE, tablet.get_tablet_meta().ls_id_, tablet_id, + "need major merge but can't merge now", + K(medium_snapshot), K(is_tablet_data_status_complete), + "max_serialized_medium_scn", tablet.get_tablet_meta().max_serialized_medium_scn_); + } + return ret; } int64_t ObPartitionMergePolicy::cal_hist_minor_merge_threshold() @@ -1171,5 +903,547 @@ int64_t ObPartitionMergePolicy::cal_hist_minor_merge_threshold() return MIN((1 + compact_trigger) * OB_HIST_MINOR_FACTOR, MAX_TABLE_CNT_IN_STORAGE / 2); } +int ObPartitionMergePolicy::get_multi_version_start( + const ObMergeType merge_type, + ObLS &ls, + const ObTablet &tablet, + ObVersionRange &result_version_range) +{ + int ret = OB_SUCCESS; + int64_t expect_multi_version_start = 0; + if (tablet.is_ls_inner_tablet()) { + result_version_range.multi_version_start_ = INT64_MAX; + } else if (OB_FAIL(ObTablet::get_kept_multi_version_start(ls, tablet, expect_multi_version_start))) { + if (is_mini_merge(merge_type) || OB_TENANT_NOT_EXIST == ret) { + expect_multi_version_start = tablet.get_multi_version_start(); + FLOG_INFO("failed to get multi_version_start, use multi_version_start on tablet", K(ret), + K(merge_type), K(expect_multi_version_start)); + ret = OB_SUCCESS; // clear errno + } else { + LOG_WARN("failed to get kept multi_version_start", K(ret), + "tablet_id", tablet.get_tablet_meta().tablet_id_); + } + } + if (OB_SUCC(ret) && !tablet.is_ls_inner_tablet()) { + // update multi_version_start + if (expect_multi_version_start < result_version_range.multi_version_start_) { + LOG_WARN("cannot reserve multi_version_start", "multi_version_start", result_version_range.multi_version_start_, + K(expect_multi_version_start)); + } else if (expect_multi_version_start < result_version_range.snapshot_version_) { + result_version_range.multi_version_start_ = expect_multi_version_start; + LOG_DEBUG("succ reserve multi_version_start", "multi_version_start", result_version_range.multi_version_start_, + K(expect_multi_version_start)); + } else { + result_version_range.multi_version_start_ = result_version_range.snapshot_version_; + LOG_DEBUG("no need keep multi version", "multi_version_start", result_version_range.multi_version_start_, + K(expect_multi_version_start)); + } + } + return ret; +} + + +int add_table_with_check(ObGetMergeTablesResult &result, ObITable *table) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(table)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(table)); + } else if (OB_UNLIKELY(!result.handle_.empty() + && table->get_start_scn() > result.scn_range_.end_scn_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("log ts range is not continues", K(ret), K(result), KPC(table)); + } else if (OB_FAIL(result.handle_.add_table(table))) { + LOG_WARN("failed to add table", K(ret), KPC(table)); + } else { + if (1 == result.handle_.get_count()) { + result.scn_range_.start_scn_ = table->get_start_scn(); + } + result.scn_range_.end_scn_ = table->get_end_scn(); + } + return ret; +} + +int ObPartitionMergePolicy::push_result_with_merge( + const int64_t minor_trigger, + ObGetMergeTablesResult &input_result, + ObIArray ¶llel_result) +{ + int ret = OB_SUCCESS; + if (input_result.handle_.get_count() > minor_trigger) { + if (OB_FAIL(parallel_result.push_back(input_result))) { + LOG_WARN("failed to push back table result", K(ret), K(input_result)); + } + } else if (!parallel_result.empty()){ + ObGetMergeTablesResult &last_result = parallel_result.at(parallel_result.count() - 1); + if (last_result.scn_range_.end_scn_ == input_result.scn_range_.start_scn_) { + for (int i = 0; OB_SUCC(ret) && i < input_result.handle_.get_count(); ++i) { + if (OB_FAIL(add_table_with_check(last_result, input_result.handle_.get_table(i)))) { + LOG_WARN("failed to add table into result", K(ret), K(input_result), K(i)); + } + } + } + } + return ret; +} + +int ObPartitionMergePolicy::generate_parallel_minor_interval( + const ObGetMergeTablesResult &input_result, + ObMinorExecuteRangeMgr &minor_range_mgr, + ObIArray ¶llel_result) +{ + int ret = OB_SUCCESS; + int64_t minor_compact_trigger = DEFAULT_MINOR_COMPACT_TRIGGER; + { + omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); + if (tenant_config.is_valid()) { + minor_compact_trigger = tenant_config->minor_compact_trigger; + } + } + const bool check_in_range = minor_range_mgr.exe_range_array_.count() > 0; + minor_compact_trigger = check_in_range ? minor_compact_trigger << minor_range_mgr.exe_range_array_.count() : minor_compact_trigger; + + if (!storage::is_minor_merge(input_result.suggest_merge_type_)) { + } else if (input_result.handle_.get_count() <= minor_compact_trigger) { + // do nothing + } else if (!check_in_range + && input_result.handle_.get_count() < OB_MINOR_PARALLEL_SSTABLE_CNT_TRIGGER) { + if (OB_FAIL(parallel_result.push_back(input_result))) { + LOG_WARN("failed to push back result", K(ret), K(input_result)); + } + } else { + minor_compact_trigger = MAX(minor_compact_trigger, OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG / 2); + const int64_t table_cnt = input_result.handle_.get_count(); + const ObIArray &table_array = input_result.handle_.get_tables(); + ObITable *table = nullptr; + ObGetMergeTablesResult tmp_result; + if (OB_FAIL(tmp_result.copy_basic_info(input_result))) { + LOG_WARN("failed to copy basic info", K(ret), K(input_result)); + } + int64_t idx = 0; + bool split_minor = input_result.handle_.get_count() >= OB_MINOR_PARALLEL_SSTABLE_CNT_TRIGGER; + while (OB_SUCC(ret) && idx < table_cnt) { + tmp_result.handle_.reset(); + tmp_result.scn_range_.reset(); + while (OB_SUCC(ret) && idx < table_cnt) { + if (OB_ISNULL(table = table_array.at(idx++))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table is unexpected null", K(ret), K(idx), K(table_array)); + } else if (check_in_range && minor_range_mgr.in_execute_range(table)) { + if (tmp_result.handle_.get_count() > 0) { + break; + } + } else if (OB_FAIL(add_table_with_check(tmp_result, table))) { + LOG_WARN("failed to add table into result", K(ret), K(tmp_result), KPC(table)); + } else if (split_minor && tmp_result.handle_.get_count() >= OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG) { + break; + } + } // end of while + if (OB_FAIL(ret) || tmp_result.handle_.empty()) { + } else if (OB_FAIL(push_result_with_merge(minor_compact_trigger, tmp_result, parallel_result))) { + LOG_WARN("failed to merge result", K(ret), K(parallel_result)); + } else { + LOG_DEBUG("success to push result", K(ret), K(tmp_result), K(parallel_result)); + } + } // end of while + } + return ret; +} + + +/*************************************** ObMinorExecuteRangeMgr ***************************************/ +bool compareScnRange(share::ObScnRange &a, share::ObScnRange &b) +{ + return a.end_scn_ < b.end_scn_; +} + +int ObMinorExecuteRangeMgr::get_merge_ranges( + const ObLSID &ls_id, + const ObTabletID &tablet_id) +{ + int ret = OB_SUCCESS; + + ObTabletMergeDagParam param; + param.merge_type_ = MINOR_MERGE; + param.merge_version_ = ObVersion::MIN_VERSION; + param.ls_id_ = ls_id; + param.tablet_id_ = tablet_id; + param.for_diagnose_ = true; + + if (OB_FAIL(MTL(ObTenantDagScheduler*)->get_minor_exe_dag_info(param, exe_range_array_))) { + LOG_WARN("failed to get minor exe dag info", K(ret)); + } else if (OB_FAIL(sort_ranges())) { + LOG_WARN("failed to sort ranges", K(ret), K(param)); + } + return ret; +} + +int ObMinorExecuteRangeMgr::sort_ranges() +{ + int ret = OB_SUCCESS; + std::sort(exe_range_array_.begin(), exe_range_array_.end(), compareScnRange); + for (int i = 1; OB_SUCC(ret) && i < exe_range_array_.count(); ++i) { + if (OB_UNLIKELY(!exe_range_array_.at(i).is_valid() + || (exe_range_array_.at(i - 1).start_scn_.get_val_for_tx() > 0 // except meta major merge range + && exe_range_array_.at(i).start_scn_.get_val_for_tx() > 0 + && exe_range_array_.at(i).start_scn_ < exe_range_array_.at(i - 1).end_scn_))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected minor ranges", K(ret), K(i), K(exe_range_array_)); + } + } + return ret; +} + +bool ObMinorExecuteRangeMgr::in_execute_range(const ObITable *table) const +{ + bool bret = false; + if (exe_range_array_.count() > 0 && OB_NOT_NULL(table)) { + for (int i = 0; i < exe_range_array_.count(); ++i) { + if (table->get_end_scn() <= exe_range_array_.at(i).end_scn_ + && table->get_end_scn() > exe_range_array_.at(i).start_scn_) { + bret = true; + LOG_DEBUG("in execute range", KPC(table), K(i), K(exe_range_array_.at(i))); + break; + } + } + } + return bret; +} + + +/*************************************** ObAdaptiveMergePolicy ***************************************/ +const char * ObAdaptiveMergeReasonStr[] = { + "NONE", + "LOAD_DATA_SCENE", + "TOMBSTONE_SCENE", + "INEFFICIENT_QUERY", + "FREQUENT_WRITE" +}; + +const char* ObAdaptiveMergePolicy::merge_reason_to_str(const int64_t merge_reason) +{ + STATIC_ASSERT(static_cast(INVALID_REASON) == ARRAYSIZEOF(ObAdaptiveMergeReasonStr), + "adaptive merge reason str len is mismatch"); + const char *str = ""; + if (merge_reason >= INVALID_REASON || merge_reason < NONE) { + str = "invalid_merge_reason"; + } else { + str = ObAdaptiveMergeReasonStr[merge_reason]; + } + return str; +} + +bool ObAdaptiveMergePolicy::is_valid_merge_reason(const AdaptiveMergeReason &reason) +{ + return reason > AdaptiveMergeReason::NONE && + reason < AdaptiveMergeReason::INVALID_REASON; +} + +int ObAdaptiveMergePolicy::get_meta_merge_tables( + const ObGetMergeTablesParam ¶m, + ObLS &ls, + const ObTablet &tablet, + ObGetMergeTablesResult &result) +{ + int ret = OB_SUCCESS; + const ObMergeType merge_type = param.merge_type_; + const ObTabletTableStore &table_store = tablet.get_table_store(); + const ObStorageSchema &storage_schema = tablet.get_storage_schema(); + result.reset(); + + if (OB_UNLIKELY(!table_store.is_valid())) { + ret = OB_ERR_SYS; + LOG_WARN("table store not valid", K(ret), K(table_store)); + } else if (OB_UNLIKELY(META_MAJOR_MERGE != merge_type)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), K(merge_type)); + } else if (OB_FAIL(find_meta_major_tables(tablet, result))) { + if (OB_NO_NEED_MERGE != ret) { + LOG_WARN("Failed to find minor merge tables", K(ret)); + } + } else if (OB_FAIL(result.handle_.check_continues(nullptr))) { + LOG_WARN("failed to check continues", K(ret), K(result)); + } else if (FALSE_IT(result.schema_version_ = storage_schema.schema_version_)) { + } else if (FALSE_IT(result.suggest_merge_type_ = META_MAJOR_MERGE)) { + } else if (FALSE_IT(result.version_range_.snapshot_version_ = + MIN(tablet.get_snapshot_version(), result.version_range_.snapshot_version_))) { + // choose version should less than tablet::snapshot + } else if (OB_FAIL(ObPartitionMergePolicy::get_multi_version_start( + param.merge_type_, ls, tablet, result.version_range_))) { + LOG_WARN("failed to get multi version_start", K(ret)); + } else if (OB_FAIL(result.handle_.get_table(0)->get_frozen_schema_version(result.base_schema_version_))) { + LOG_WARN("failed to get frozen schema version", K(ret), K(result)); + } else { + FLOG_INFO("succeed to get meta major merge tables", K(result), K(table_store)); + } + return ret; +} + +int ObAdaptiveMergePolicy::find_meta_major_tables( + const storage::ObTablet &tablet, + ObGetMergeTablesResult &result) +{ + int ret = OB_SUCCESS; + int64_t min_snapshot = 0; + int64_t max_snapshot = 0; + int64_t base_row_cnt = 0; + int64_t inc_row_cnt = 0; + int64_t tx_determ_table_cnt = 0; + const ObTabletTableStore &table_store = tablet.get_table_store(); + ObITable *last_major = table_store.get_major_sstables().get_boundary_table(true); + ObITable *last_minor = table_store.get_minor_sstables().get_boundary_table(true); + ObITable *base_table = table_store.get_extend_sstable(ObTabletTableStore::META_MAJOR); + const ObSSTableArray &minor_tables = table_store.get_minor_sstables(); + + if (!table_store.is_valid()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ObTabletTableStore is not valid", K(ret), K(table_store)); + } else if (nullptr == last_minor || nullptr == last_major) { + ret = OB_NO_NEED_MERGE; + LOG_WARN("no minor/major sstable to do meta major merge", K(ret), KPC(last_minor), KPC(last_major)); + } else if (OB_FAIL(ObPartitionMergePolicy::get_boundary_snapshot_version( + tablet, min_snapshot, max_snapshot, false/*check_table_cnt*/))) { + if (OB_NO_NEED_MERGE != ret) { + LOG_WARN("Failed to find meta merge base table", K(ret), KPC(last_major), KPC(last_major), KPC(base_table)); + } + } else if (FALSE_IT(base_table = nullptr == base_table ? last_major : base_table)) { + } else if (OB_ISNULL(base_table)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("base table is unexpected null", K(ret), KP(base_table)); + } else if (base_table->get_snapshot_version() < min_snapshot || max_snapshot != INT64_MAX) { + // max_snapshot == INT64_MAX means there's no next freeze_info + ret = OB_NO_NEED_MERGE; + LOG_DEBUG("no need meta merge when the tablet is doing major merge", K(ret), K(min_snapshot), K(max_snapshot), KPC(base_table)); + } else if (OB_FAIL(add_meta_merge_result(base_table, result, true/*update_snapshot*/))) { + LOG_WARN("failed to add base table to meta merge result", K(ret), KPC(base_table), K(result)); + } else { + ++tx_determ_table_cnt; // inc for base_table + bool found_undeterm_table = false; + base_row_cnt = static_cast(base_table)->get_meta().get_row_count(); + ObITable *table = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < minor_tables.count(); ++i) { + if (OB_ISNULL(table = minor_tables[i]) || !table->is_multi_version_minor_sstable()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("get unexpected table", K(ret), K(i), K(table_store)); + } else if (table->get_upper_trans_version() <= base_table->get_snapshot_version()) { + // skip minor sstable which has been merged + continue; + } else if (!found_undeterm_table && table->is_trans_state_deterministic()) { + ++tx_determ_table_cnt; + inc_row_cnt += static_cast(table)->get_meta().get_row_count(); + } else { + found_undeterm_table = true; + } + + if (FAILEDx(add_meta_merge_result(table, result, !found_undeterm_table))) { + LOG_WARN("failed to add minor table to meta merge result", K(ret)); + } + } // end of for + if (OB_FAIL(ret)) { + } else if (tx_determ_table_cnt < 2) { + ret = OB_NO_NEED_MERGE; + LOG_INFO("no enough table for meta merge", K(ret), K(result), K(table_store)); + } else if (inc_row_cnt < TRANS_STATE_DETERM_ROW_CNT_THRESHOLD + || inc_row_cnt < INC_ROW_COUNT_PERCENTAGE_THRESHOLD * base_row_cnt) { + ret = OB_NO_NEED_MERGE; + LOG_INFO("found sstable could merge is not enough", K(ret), K(inc_row_cnt), K(base_row_cnt)); + } else if (result.version_range_.snapshot_version_ < tablet.get_multi_version_start()) { + ret = OB_NO_NEED_MERGE; + LOG_INFO("chosen snapshot is abandoned", K(ret), K(result), K(tablet.get_multi_version_start())); + } + } + return ret; +} + +int ObAdaptiveMergePolicy::find_base_table_and_inc_version( + ObITable *last_major_table, + ObITable *last_minor_table, + ObITable *&meta_base_table, + int64_t &merge_inc_version) +{ + int ret = OB_SUCCESS; + // find meta base table + if (OB_NOT_NULL(last_major_table)) { + if (OB_ISNULL(meta_base_table)) { + meta_base_table = last_major_table; + } else if (OB_UNLIKELY(meta_base_table->get_snapshot_version() <= last_major_table->get_snapshot_version())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("meta major table covered by major", K(ret), KPC(meta_base_table), KPC(last_major_table)); + } + } + + // find meta merge inc version + if (OB_FAIL(ret)) { + } else if (OB_NOT_NULL(last_major_table) && OB_NOT_NULL(last_minor_table)) { + merge_inc_version = MAX(last_major_table->get_snapshot_version(), last_minor_table->get_max_merged_trans_version()); + } else if (OB_NOT_NULL(last_major_table)) { + merge_inc_version = last_major_table->get_snapshot_version(); + } else if (OB_NOT_NULL(last_minor_table)){ + merge_inc_version = last_minor_table->get_max_merged_trans_version(); + } + + if (OB_SUCC(ret) && (NULL == meta_base_table || merge_inc_version <= 0)) { + ret = OB_NO_NEED_MERGE; + LOG_WARN("cannot meta merge with null base table or inc version", K(ret), K(meta_base_table), K(merge_inc_version)); + } + return ret; +} + +int ObAdaptiveMergePolicy::add_meta_merge_result( + ObITable *table, + ObGetMergeTablesResult &result, + const bool update_snapshot_flag) +{ + int ret = OB_SUCCESS; + + if (OB_ISNULL(table)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid argument", K(ret), KPC(table)); + } else if (OB_FAIL(result.handle_.add_table(table))) { + LOG_WARN("failed to add table", K(ret), KPC(table)); + } else if (table->is_meta_major_sstable() || table->is_major_sstable()) { + result.version_range_.base_version_ = 0; + result.version_range_.multi_version_start_ = table->get_snapshot_version(); + result.version_range_.snapshot_version_ = table->get_snapshot_version(); + result.create_snapshot_version_ = static_cast(table)->get_meta().get_basic_meta().create_snapshot_version_; + } else if (update_snapshot_flag) { + int64_t max_snapshot = MAX(result.version_range_.snapshot_version_, table->get_max_merged_trans_version()); + result.version_range_.multi_version_start_ = max_snapshot; + result.version_range_.snapshot_version_ = max_snapshot; + result.scn_range_.end_scn_ = table->get_end_scn(); + } + return ret; +} + +int ObAdaptiveMergePolicy::get_adaptive_merge_reason( + const ObTablet &tablet, + AdaptiveMergeReason &reason) +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + ObTabletStat tablet_stat; + reason = AdaptiveMergeReason::NONE; + + if (OB_FAIL(MTL(ObTenantTabletStatMgr *)->get_latest_tablet_stat(ls_id, tablet_id, tablet_stat))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get latest tablet stat", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_TMP_FAIL(check_inc_sstable_row_cnt_percentage(tablet, reason))) { + LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id)); + } + } else { + if (OB_TMP_FAIL(check_tombstone_situation(tablet_stat, tablet, reason))) { + LOG_WARN("failed to check tombstone scene", K(tmp_ret), K(ls_id), K(tablet_id)); + } + if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_load_data_situation(tablet_stat, tablet, reason))) { + LOG_WARN("failed to check load data scene", K(tmp_ret), K(ls_id), K(tablet_id)); + } + if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_inc_sstable_row_cnt_percentage(tablet, reason))) { + LOG_WARN("failed to check sstable data situation", K(tmp_ret), K(ls_id), K(tablet_id)); + } + if (AdaptiveMergeReason::NONE == reason && OB_TMP_FAIL(check_ineffecient_read(tablet_stat, tablet, reason))) { + LOG_WARN("failed to check ineffecient read", K(tmp_ret), K(ls_id), K(tablet_id)); + } + LOG_INFO("DanLing Check tablet adaptive merge reason", K(reason), K(tablet_stat)); // TODO tmp log, remove later + } + return ret; +} + +int ObAdaptiveMergePolicy::check_inc_sstable_row_cnt_percentage( + const ObTablet &tablet, + AdaptiveMergeReason &reason) +{ + int ret = OB_SUCCESS; + const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + ObSSTable *last_major = static_cast(tablet.get_table_store().get_major_sstables().get_boundary_table(true)); + int64_t base_row_count = nullptr != last_major ? last_major->get_meta().get_basic_meta().row_count_ : 0; + int64_t inc_row_count = 0; + const ObSSTableArray &minor_sstables = tablet.get_table_store().get_minor_sstables(); + ObSSTable *sstable = nullptr; + for (int i = 0; OB_SUCC(ret) && i < minor_sstables.count(); ++i) { + if (OB_ISNULL(sstable = static_cast(minor_sstables.get_table(i)))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("sstable is null", K(ret), K(i)); + } else { + inc_row_count += sstable->get_meta().get_basic_meta().row_count_; + } + } + if ((inc_row_count > INC_ROW_COUNT_THRESHOLD) || + (base_row_count > BASE_ROW_COUNT_THRESHOLD && + (inc_row_count * 100 / base_row_count) > LOAD_DATA_SCENE_THRESHOLD)) { + reason = AdaptiveMergeReason::FREQUENT_WRITE; + } + LOG_DEBUG("check_sstable_data_situation", K(ret), K(ls_id), K(tablet_id), K(reason), + K(base_row_count), K(inc_row_count)); + return ret; +} + +int ObAdaptiveMergePolicy::check_load_data_situation( + const ObTabletStat &tablet_stat, + const ObTablet &tablet, + AdaptiveMergeReason &reason) +{ + int ret = OB_SUCCESS; + const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + reason = AdaptiveMergeReason::NONE; + if (!tablet.is_valid() || !tablet_stat.is_valid() + || ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat)); + } else if (tablet_stat.is_hot_tablet() && tablet_stat.is_insert_mostly()) { + reason = AdaptiveMergeReason::LOAD_DATA_SCENE; + } + LOG_DEBUG("check_load_data_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat)); + return ret; +} + +int ObAdaptiveMergePolicy::check_tombstone_situation( + const ObTabletStat &tablet_stat, + const ObTablet &tablet, + AdaptiveMergeReason &reason) +{ + int ret = OB_SUCCESS; + const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + reason = AdaptiveMergeReason::NONE; + + if (!tablet.is_valid() || !tablet_stat.is_valid() + || ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat)); + } else if (tablet_stat.is_hot_tablet() && tablet_stat.is_update_mostly()) { + reason = AdaptiveMergeReason::TOMBSTONE_SCENE; + } + LOG_DEBUG("check_tombstone_situation", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat)); + return ret; +} + +int ObAdaptiveMergePolicy::check_ineffecient_read( + const ObTabletStat &tablet_stat, + const ObTablet &tablet, + AdaptiveMergeReason &reason) +{ + int ret = OB_SUCCESS; + const ObLSID &ls_id = tablet.get_tablet_meta().ls_id_; + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + reason = AdaptiveMergeReason::NONE; + + if (!tablet.is_valid() || !tablet_stat.is_valid() || + ls_id.id() != tablet_stat.ls_id_ || tablet_id.id() != tablet_stat.tablet_id_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid arguments", K(ret), K(tablet), K(tablet_stat)); + } else if (!tablet_stat.is_hot_tablet()) { + } else if (tablet_stat.is_inefficient_scan() || tablet_stat.is_inefficient_insert() + || tablet_stat.is_inefficient_pushdown()) { + reason = AdaptiveMergeReason::INEFFICIENT_QUERY; + } + LOG_DEBUG("check_ineffecient_read", K(ret), K(ls_id), K(tablet_id), K(reason), K(tablet_stat)); + return ret; +} + + } /* namespace compaction */ } /* namespace oceanbase */ diff --git a/src/storage/compaction/ob_partition_merge_policy.h b/src/storage/compaction/ob_partition_merge_policy.h index 2a8ee0d86e..814c700b02 100644 --- a/src/storage/compaction/ob_partition_merge_policy.h +++ b/src/storage/compaction/ob_partition_merge_policy.h @@ -13,9 +13,9 @@ #ifndef OB_PARTITION_MERGE_POLICY_H_ #define OB_PARTITION_MERGE_POLICY_H_ -#include "storage/memtable/ob_memtable.h" -#include "ob_tenant_freeze_info_mgr.h" - +#include "storage/compaction/ob_tenant_freeze_info_mgr.h" +#include "storage/compaction/ob_compaction_util.h" +#include "share/ob_table_range.h" namespace oceanbase { namespace storage @@ -28,113 +28,110 @@ class ObTabletTableStore; class ObGetMergeTablesResult; class ObTablesHandleArray; class ObStorageSchema; -enum ObMergeType; +struct ObTabletStat; +struct ObTableHandleV2; +class ObLS; +class ObTableStoreIterator; } using namespace storage; +namespace blocksstable +{ +class ObSSTable; +} + namespace compaction { +struct ObMinorExecuteRangeMgr; + class ObPartitionMergePolicy { public: static int get_mini_merge_tables( const storage::ObGetMergeTablesParam ¶m, - const int64_t multi_version_start, + ObLS &ls, const storage::ObTablet &tablet, storage::ObGetMergeTablesResult &result); - static int get_mini_minor_merge_tables( + static int get_minor_merge_tables( const storage::ObGetMergeTablesParam ¶m, - const int64_t multi_version_start, + ObLS &ls, const storage::ObTablet &tablet, storage::ObGetMergeTablesResult &result); static int get_hist_minor_merge_tables( const storage::ObGetMergeTablesParam ¶m, - const int64_t multi_version_start, + ObLS &ls, const storage::ObTablet &tablet, storage::ObGetMergeTablesResult &result); - static int get_buf_minor_merge_tables( + static int get_medium_merge_tables( const storage::ObGetMergeTablesParam ¶m, - const int64_t multi_version_start, + ObLS &ls, const storage::ObTablet &tablet, storage::ObGetMergeTablesResult &result); - - static int get_major_merge_tables( - const storage::ObGetMergeTablesParam ¶m, - const int64_t multi_version_start, - const storage::ObTablet &tablet, - storage::ObGetMergeTablesResult &result); - - static int check_need_mini_merge( - const storage::ObTablet &tablet, - bool &need_merge); - - static int check_need_mini_minor_merge( - const storage::ObTablet &tablet, - bool &need_merge); - - static int check_need_hist_minor_merge( - const storage::ObTablet &tablet, - bool &need_merge); - - static int check_need_buf_minor_merge( - const storage::ObTablet &tablet, - bool &need_merge); - - static int check_need_major_merge( - const storage::ObTablet &tablet, - int64_t &merge_version, + static int check_need_medium_merge( + storage::ObTablet &tablet, + const int64_t medium_snapshot, bool &need_merge, - bool &can_merge, - bool &need_frorce_freeze); - - static int diagnose_table_count_unsafe( - const storage::ObMergeType &merge_type, - const storage::ObTablet &tablet); -private: - static int find_mini_merge_tables( - const storage::ObGetMergeTablesParam ¶m, - const storage::ObTenantFreezeInfoMgr::NeighbourFreezeInfo &freeze_info, - const storage::ObTablet &tablet, - ObIArray &memtable_handles, - storage::ObGetMergeTablesResult &result); - - static int find_mini_minor_merge_tables( - const ObGetMergeTablesParam ¶m, - const int64_t min_snapshot_version, - const int64_t max_snapshot_version, - const int64_t expect_multi_version_start, - const ObTablet &tablet, - storage::ObGetMergeTablesResult &result); - - static int find_buf_minor_merge_tables( - const storage::ObTablet &tablet, - storage::ObGetMergeTablesResult *result = nullptr); - static int find_buf_minor_base_table( - storage::ObITable *last_major_table, - storage::ObITable *&buf_minor_base_table); - - static int add_buf_minor_merge_result(storage::ObITable *table, storage::ObGetMergeTablesResult &result); - - static int refine_mini_merge_result( - const storage::ObTablet &tablet, - storage::ObGetMergeTablesResult &result); - static int refine_mini_minor_merge_result(storage::ObGetMergeTablesResult &result); - - static int deal_with_minor_result( - const storage::ObMergeType &merge_type, - const int64_t expect_multi_version_start, - const storage::ObTablet &tablet, - storage::ObGetMergeTablesResult &result); + bool &can_merge); + static int generate_parallel_minor_interval( + const ObGetMergeTablesResult &input_result, + ObMinorExecuteRangeMgr &minor_range_mgr, + ObIArray ¶llel_result); static int get_boundary_snapshot_version( const ObTablet &tablet, int64_t &min_snapshot, - int64_t &max_snapshot); + int64_t &max_snapshot, + const bool check_table_cnt = true); - static storage::ObITable *get_latest_sstable(const storage::ObTabletTableStore &table_store); + static int diagnose_table_count_unsafe( + const storage::ObMergeType merge_type, + const storage::ObTablet &tablet); + + static int get_multi_version_start( + const ObMergeType merge_type, + ObLS &ls, + const ObTablet &tablet, + ObVersionRange &result_version_range); + +private: + static int find_mini_merge_tables( + const storage::ObGetMergeTablesParam ¶m, + const storage::ObTenantFreezeInfoMgr::NeighbourFreezeInfo &freeze_info, + ObLS &ls, + const storage::ObTablet &tablet, + ObIArray &memtable_handles, + storage::ObGetMergeTablesResult &result); + + static int find_minor_merge_tables( + const ObGetMergeTablesParam ¶m, + const int64_t min_snapshot_version, + const int64_t max_snapshot_version, + ObLS &ls, + const ObTablet &tablet, + storage::ObGetMergeTablesResult &result); + + static int refine_minor_merge_tables( + const ObTablet &tablet, + const common::ObIArray &merge_tables, + int64_t &left_border, + int64_t &right_border); + +private: + static int refine_mini_merge_result( + const storage::ObTablet &tablet, + storage::ObGetMergeTablesResult &result); + static int refine_minor_merge_result( + const int64_t minor_compact_trigger, + storage::ObGetMergeTablesResult &result); + + static int deal_with_minor_result( + const storage::ObMergeType &merge_type, + ObLS &ls, + const storage::ObTablet &tablet, + storage::ObGetMergeTablesResult &result); static int get_neighbour_freeze_info( const int64_t snapshot_version, @@ -147,7 +144,10 @@ private: const ObTablet &tablet, int64_t &max_snapshot_version); - static bool check_table_count_safe(const storage::ObTabletTableStore &table_store); + static int push_result_with_merge( + const int64_t minor_trigger, + ObGetMergeTablesResult &input_result, + ObIArray ¶llel_result); // diagnose part static int diagnose_minor_dag( storage::ObMergeType merge_type, @@ -160,17 +160,104 @@ public: static const int64_t OB_UNSAFE_TABLE_CNT = 32; static const int64_t OB_EMERGENCY_TABLE_CNT = 56; static const int64_t DEFAULT_MINOR_COMPACT_TRIGGER = 2; + static const int64_t OB_DEFAULT_COMPACTION_AMPLIFICATION_FACTOR = 25; + static const int64_t OB_MINOR_PARALLEL_SSTABLE_CNT_TRIGGER = 20; + static const int64_t OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG = 10; + static const int64_t OB_MINOR_PARALLEL_INFO_ARRAY_SIZE = MAX_SSTABLE_CNT_IN_STORAGE / OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG; + static const int64_t OB_LARGE_MINOR_SSTABLE_ROW_COUNT = 2000000; typedef int (*GetMergeTables)(const storage::ObGetMergeTablesParam&, - const int64_t, + ObLS &ls, const storage::ObTablet &, storage::ObGetMergeTablesResult&); static GetMergeTables get_merge_tables[storage::ObMergeType::MERGE_TYPE_MAX]; - - typedef int (*CheckNeedMerge)(const storage::ObTablet&, bool&); - static CheckNeedMerge check_need_minor_merge[storage::ObMergeType::MERGE_TYPE_MAX]; }; +struct ObMinorExecuteRangeMgr +{ + ObMinorExecuteRangeMgr() + : exe_range_array_() + {} + ~ObMinorExecuteRangeMgr() + { + reset(); + } + void reset() + { + exe_range_array_.reset(); + } + + int get_merge_ranges( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id); + bool in_execute_range(const storage::ObITable *table) const; + int sort_ranges(); + + ObSEArray exe_range_array_; +}; + + +class ObAdaptiveMergePolicy +{ +public: + enum AdaptiveMergeReason : uint8_t { + NONE = 0, + LOAD_DATA_SCENE = 1, + TOMBSTONE_SCENE = 2, + INEFFICIENT_QUERY = 3, + FREQUENT_WRITE = 4, + INVALID_REASON + }; + + static const char *merge_reason_to_str(const int64_t merge_reason); + static bool is_valid_merge_reason(const AdaptiveMergeReason &reason); + + static int get_meta_merge_tables( + const storage::ObGetMergeTablesParam ¶m, + ObLS &ls, + const storage::ObTablet &tablet, + storage::ObGetMergeTablesResult &result); + + static int get_adaptive_merge_reason( + const storage::ObTablet &tablet, + AdaptiveMergeReason &reason); + +private: + static int find_meta_major_tables(const storage::ObTablet &tablet, + storage::ObGetMergeTablesResult &result); + static int find_base_table_and_inc_version(storage::ObITable *last_major_table, + storage::ObITable *last_minor_table, + storage::ObITable *&meta_base_table, + int64_t &merge_inc_version); + static int add_meta_merge_result(storage::ObITable *table, + storage::ObGetMergeTablesResult &result, + const bool update_snapshot_flag); +private: + static int check_load_data_situation(const storage::ObTabletStat &tablet_stat, + const storage::ObTablet &tablet, + AdaptiveMergeReason &merge_reason); + static int check_tombstone_situation(const storage::ObTabletStat &tablet_stat, + const storage::ObTablet &tablet, + AdaptiveMergeReason &merge_reason); + static int check_ineffecient_read(const storage::ObTabletStat &tablet_stat, + const storage::ObTablet &tablet, + AdaptiveMergeReason &merge_reason); + static int check_inc_sstable_row_cnt_percentage( + const ObTablet &tablet, + AdaptiveMergeReason &merge_reason); + +private: + static constexpr int64_t SCHEDULE_META_MERGE_INTERVAL = 120L * 1000L * 1000L; //120s + static constexpr int64_t INC_ROW_COUNT_THRESHOLD = 100L * 1000L; // 10w + static constexpr int64_t TOMBSTONE_ROW_COUNT_THRESHOLD = 30L * 1000L; // 3w + static constexpr int64_t BASE_ROW_COUNT_THRESHOLD = 10L * 1000L; // 5w + static constexpr int64_t LOAD_DATA_SCENE_THRESHOLD = 70; + static constexpr int64_t TOMBSTONE_SCENE_THRESHOLD = 50; + static constexpr float INC_ROW_COUNT_PERCENTAGE_THRESHOLD = 0.5; + static constexpr int64_t TRANS_STATE_DETERM_ROW_CNT_THRESHOLD = 1000L; // 1k +}; + + } /* namespace compaction */ } /* namespace oceanbase */ #endif // OB_PARTITION_MERGE_POLICY_H_ diff --git a/src/storage/compaction/ob_partition_merge_progress.cpp b/src/storage/compaction/ob_partition_merge_progress.cpp index 3d97cede81..6d5d97613f 100644 --- a/src/storage/compaction/ob_partition_merge_progress.cpp +++ b/src/storage/compaction/ob_partition_merge_progress.cpp @@ -139,7 +139,7 @@ int ObPartitionMergeProgress::estimate(ObTabletMergeCtx *ctx) if (OB_UNLIKELY(0 == tables.count() || NULL == tables.at(0))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("get unexpected tables", K(ret), K(tables)); - } else if (ctx->param_.is_mini_merge()) { // only mini merge use estimate row interface + } else if (is_mini_merge(ctx->param_.merge_type_)) { // only mini merge use estimate row interface ObQueryFlag query_flag(ObQueryFlag::Forward, true, /*is daily merge scan*/ true, /*is read multiple macro block*/ @@ -197,7 +197,7 @@ int ObPartitionMergeProgress::estimate(ObTabletMergeCtx *ctx) avg_row_length_ = estimate_occupy_size_ * 1.0 / estimate_row_cnt_; } update_estimated_finish_time_(); - if (ctx->param_.is_major_merge()) { + if (ctx->param_.is_tenant_major_merge_) { if (OB_FAIL(MTL(ObTenantCompactionProgressMgr*)->update_progress( merge_dag_->get_ctx().param_.merge_version_, estimate_occupy_size_ - old_major_data_size, // estimate_occupy_size_delta diff --git a/src/storage/compaction/ob_partition_merge_progress.h b/src/storage/compaction/ob_partition_merge_progress.h index 9379447d85..995226661c 100644 --- a/src/storage/compaction/ob_partition_merge_progress.h +++ b/src/storage/compaction/ob_partition_merge_progress.h @@ -49,7 +49,7 @@ public: DECLARE_TO_STRING; public: static const int32_t UPDATE_INTERVAL = 2 * 1000 * 1000; // 2 second - static const int32_t NORMAL_UPDATE_PARAM = 120; + static const int32_t NORMAL_UPDATE_PARAM = 300; protected: int estimate(ObTabletMergeCtx *ctx); void update_estimated_finish_time_(); diff --git a/src/storage/compaction/ob_partition_merger.cpp b/src/storage/compaction/ob_partition_merger.cpp index 573d0a7f44..8f97d738c2 100644 --- a/src/storage/compaction/ob_partition_merger.cpp +++ b/src/storage/compaction/ob_partition_merger.cpp @@ -81,10 +81,10 @@ void ObPartitionMerger::reset() int ObPartitionMerger::init_data_store_desc(ObTabletMergeCtx &ctx) { int ret = OB_SUCCESS; - if (OB_FAIL(data_store_desc_.init(*ctx.get_merge_schema(), + if (OB_FAIL(data_store_desc_.init(*ctx.get_schema(), ctx.param_.ls_id_, ctx.param_.tablet_id_, - ctx.param_.merge_type_, + ctx.param_.get_merge_type(), ctx.sstable_version_range_.snapshot_version_))) { STORAGE_LOG(WARN, "Failed to init data store desc", K(ret), K(ctx)); } else { @@ -120,7 +120,7 @@ int ObPartitionMerger::open_macro_writer(ObMergeParameter &merge_param) } else if (OB_ISNULL(table = merge_ctx_->tables_handle_.get_tables().at(0))) { ret = OB_ERR_SYS; STORAGE_LOG(WARN, "sstable is null", K(ret)); - } else if (!table->is_sstable() && merge_ctx_->param_.is_major_merge()) { + } else if (!table->is_sstable() && is_major_merge_type(merge_ctx_->param_.merge_type_)) { ret = OB_ERR_SYS; STORAGE_LOG(WARN, "Unexpected first table for major merge", K(ret), KPC(merge_ctx_)); } else if (OB_FAIL(macro_start_seq.set_parallel_degree(task_idx_))) { @@ -229,6 +229,8 @@ int ObPartitionMerger::process(const ObMicroBlock µ_block) STORAGE_LOG(WARN, "invalid argument to append micro block", K(ret), K(micro_block)); } else if (OB_FAIL(macro_writer_->append_micro_block(micro_block, macro_desc))) { STORAGE_LOG(WARN, "Failed to append micro block to macro block writer", K(ret), K(micro_block)); + } else { + LOG_DEBUG("append micro block", K(ret), K(micro_block)); } return ret; @@ -267,6 +269,8 @@ int ObPartitionMerger::process(const ObDatumRow &row) STORAGE_LOG(WARN, "Failed to check row columns", K(ret), K(row)); } else if (OB_FAIL(inner_process(row))) { STORAGE_LOG(WARN, "Failed to inner append row", K(ret)); + } else { + LOG_DEBUG("append row", K(ret), K(row)); } return ret; } @@ -296,7 +300,7 @@ int ObPartitionMerger::merge_macro_block_iter(MERGE_ITER_ARRAY &minimum_iters, i } else if (OB_FAIL(try_rewrite_macro_block(*macro_desc, rewrite))) { STORAGE_LOG(WARN, "Failed to try_rewrite_macro_block", K(ret)); } else if (rewrite) { - if(OB_FAIL(rewrite_macro_block(minimum_iters))) { + if (OB_FAIL(rewrite_macro_block(minimum_iters))) { STORAGE_LOG(WARN, "Failed to open_curr_range", K(ret)); } } else if (OB_FAIL(process(*macro_desc))) { @@ -475,8 +479,8 @@ int ObPartitionMajorMerger::init_partition_fuser(const ObMergeParameter &merge_p int ret = OB_SUCCESS; partition_fuser_ = nullptr; - if (merge_param.is_buf_minor_merge()) { - partition_fuser_ = alloc_merge_helper(); + if (is_meta_major_merge(merge_param.merge_type_)) { + partition_fuser_ = alloc_merge_helper(); } else { partition_fuser_ = alloc_merge_helper(); } @@ -514,7 +518,7 @@ int ObPartitionMajorMerger::merge_partition(ObTabletMergeCtx &ctx, const int64_t bool has_incremental_data = false; if (merge_helper.is_iter_end()) { ret = OB_ITER_END; - } else if (merge_param.is_major_merge() + } else if (is_major_merge_type(merge_param.merge_type_) && OB_FAIL(get_macro_block_count_to_rewrite(merge_param.merge_range_, need_rewrite_block_cnt_))) { STORAGE_LOG(WARN, "Failed to compute the count of macro block to rewrite", K(ret)); } else if (OB_FAIL(merge_helper.has_incremental_data(has_incremental_data))) { @@ -611,7 +615,7 @@ int ObPartitionMajorMerger::get_macro_block_count_to_rewrite(const ObDatumRange ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected null first sstable", K(ret), K(merge_ctx_->tables_handle_)); } else if (merge_ctx_->progressive_merge_step_ < progressive_merge_num) { - ObSSTableSecMetaIterator *sec_meta_iter; + ObSSTableSecMetaIterator *sec_meta_iter = nullptr; ObDataMacroBlockMeta macro_meta; if (OB_FAIL(first_sstable->scan_secondary_meta( allocator_, @@ -707,6 +711,9 @@ int ObPartitionMajorMerger::rewrite_macro_block(MERGE_ITER_ARRAY &minimum_iters) { int ret = OB_SUCCESS; ObPartitionMergeIter *iter = nullptr; + blocksstable::MacroBlockId curr_macro_id; + const ObMacroBlockDesc *curr_macro = nullptr; + const ObMacroBlockDesc *tmp_macro = nullptr; if (minimum_iters.count() != 1) { ret = OB_INNER_STAT_ERROR; STORAGE_LOG(WARN, "Unexpected minimum iters to rewrite macro block", K(ret), K(minimum_iters)); @@ -716,12 +723,26 @@ int ObPartitionMajorMerger::rewrite_macro_block(MERGE_ITER_ARRAY &minimum_iters) } else if (FALSE_IT(iter = minimum_iters.at(0))) { } else if (OB_FAIL(iter->open_curr_range(true /* rewrite */))) { STORAGE_LOG(WARN, "Failed to open the curr macro block", K(ret)); + } else if (OB_FAIL(iter->get_curr_macro_block(curr_macro))) { + STORAGE_LOG(WARN, "failed to get curr macro block", K(ret), KPC(curr_macro)); + } else if (OB_ISNULL(curr_macro)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "curr macro is null", K(ret), KPC(curr_macro)); } else { STORAGE_LOG(DEBUG, "Rewrite macro block", KPC(iter)); + curr_macro_id = curr_macro->macro_block_id_; // TODO maybe we need use macro_block_ctx to decide wheather the result row came from the same macro block while (OB_SUCC(ret) && !iter->is_iter_end() && iter->is_macro_block_opened()) { if (OB_FAIL(merge_same_rowkey_iters(minimum_iters))) { STORAGE_LOG(WARN, "failed to merge_same_rowkey_iters", K(ret), K(minimum_iters)); + } else if (OB_FAIL(iter->get_curr_macro_block(tmp_macro))) { + STORAGE_LOG(WARN, "failed to get curr macro block", K(ret), KPC(tmp_macro)); + } else if (OB_ISNULL(tmp_macro)) { + ret = OB_ERR_UNEXPECTED; + STORAGE_LOG(WARN, "tmp macro is null", K(ret), KPC(tmp_macro)); + } else if (tmp_macro->macro_block_id_ != curr_macro_id) { + LOG_DEBUG("break for different macro", K(ret), KPC(tmp_macro), KPC(curr_macro)); + break; } } } @@ -746,9 +767,22 @@ int ObPartitionMajorMerger::reuse_base_sstable(ObPartitionMajorMergeHelper &merg while (OB_SUCC(ret)) { if (base_iter->is_iter_end()) { ret = OB_ITER_END; - } else if (base_iter->is_macro_block_opened()) { - ret = OB_ERR_UNEXPECTED; - STORAGE_LOG(WARN, "unexpected macro block opened", K(ret), KPC(base_iter)); + } else if (base_iter->is_macro_block_opened()) { // opend for cross range + // flush all row in curr macro block + while (OB_SUCC(ret) && base_iter->is_macro_block_opened()) { + if (OB_ISNULL(base_iter->get_curr_row())) { + STORAGE_LOG(WARN, "curr row is unexpected null", K(ret), KPC(base_iter)); + } else if (OB_FAIL(process(*base_iter->get_curr_row()))) { + STORAGE_LOG(WARN, "Failed to process row", K(ret), K(*partition_fuser_->get_result_row())); + if (GCONF._enable_compaction_diagnose) { + ObPartitionMergeDumper::print_error_info(ret, minimum_iters, *merge_ctx_); + } + } else if (OB_FAIL(base_iter->next())) { + if (OB_ITER_END != ret) { + STORAGE_LOG(WARN, "Failed to get next", K(ret), KPC(base_iter)); + } + } + } // end of while } else if (OB_FAIL(base_iter->get_curr_macro_block(macro_desc))) { STORAGE_LOG(WARN, "Failed to get current macro block", K(ret), KPC(base_iter)); } else if (OB_ISNULL(macro_desc) || OB_UNLIKELY(!macro_desc->is_valid())) { @@ -1631,7 +1665,7 @@ void ObPartitionMergeDumper::print_error_info(const int err_no, ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "name too long", K(ret), K(file_name)); } else if (table->is_sstable()) { - if (OB_FAIL(static_cast(table)->dump2text(dump_table_dir, *ctx.schema_ctx_.table_schema_, + if (OB_FAIL(static_cast(table)->dump2text(dump_table_dir, *ctx.schema_ctx_.storage_schema_, file_name))) { if (OB_SERVER_OUTOF_DISK_SPACE != ret) { STORAGE_LOG(WARN, "failed to dump sstable", K(ret), K(file_name)); diff --git a/src/storage/compaction/ob_partition_parallel_merge_ctx.cpp b/src/storage/compaction/ob_partition_parallel_merge_ctx.cpp index 23ce1285fe..b6b0f7c580 100644 --- a/src/storage/compaction/ob_partition_parallel_merge_ctx.cpp +++ b/src/storage/compaction/ob_partition_parallel_merge_ctx.cpp @@ -18,6 +18,7 @@ #include "ob_tablet_merge_ctx.h" #include "share/scheduler/ob_dag_scheduler.h" #include "storage/blocksstable/ob_sstable.h" +#include "storage/compaction/ob_medium_compaction_mgr.h" namespace oceanbase { using namespace common; @@ -71,27 +72,27 @@ int ObParallelMergeCtx::init(compaction::ObTabletMergeCtx &merge_ctx) if (IS_INIT) { ret = OB_INIT_TWICE; STORAGE_LOG(WARN, "ObParallelMergeCtx init twice", K(ret)); - } else if (OB_UNLIKELY(!merge_ctx.is_schema_valid() || merge_ctx.tables_handle_.empty())) { + } else if (OB_UNLIKELY(nullptr == merge_ctx.get_schema() || merge_ctx.tables_handle_.empty())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "Invalid argument to init parallel merge", K(ret), K(merge_ctx)); } else { - int64_t tablet_size = merge_ctx.get_merge_schema()->get_tablet_size(); + int64_t tablet_size = merge_ctx.get_schema()->get_tablet_size(); bool enable_parallel_minor_merge = false; { omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID())); if (tenant_config.is_valid()) { enable_parallel_minor_merge = tenant_config->_enable_parallel_minor_merge; } - } // end of ObTenantConfigGuard - if (enable_parallel_minor_merge && tablet_size > 0 && merge_ctx.param_.is_mini_merge()) { + } + if (enable_parallel_minor_merge && tablet_size > 0 && is_mini_merge(merge_ctx.param_.merge_type_)) { if (OB_FAIL(init_parallel_mini_merge(merge_ctx))) { STORAGE_LOG(WARN, "Failed to init parallel setting for mini merge", K(ret)); } - } else if (enable_parallel_minor_merge && tablet_size > 0 && merge_ctx.param_.is_minor_merge()) { + } else if (enable_parallel_minor_merge && tablet_size > 0 && is_minor_merge(merge_ctx.param_.merge_type_)) { if (OB_FAIL(init_parallel_mini_minor_merge(merge_ctx))) { STORAGE_LOG(WARN, "Failed to init parallel setting for mini minor merge", K(ret)); } - } else if (tablet_size > 0 && merge_ctx.param_.is_major_merge()) { + } else if (tablet_size > 0 && is_major_merge_type(merge_ctx.param_.merge_type_)) { if (OB_FAIL(init_parallel_major_merge(merge_ctx))) { STORAGE_LOG(WARN, "Failed to init parallel major merge", K(ret)); } @@ -108,6 +109,52 @@ int ObParallelMergeCtx::init(compaction::ObTabletMergeCtx &merge_ctx) return ret; } +int ObParallelMergeCtx::init(const compaction::ObMediumCompactionInfo &medium_info) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + STORAGE_LOG(WARN, "ObParallelMergeCtx init twice", K(ret)); + } else if (OB_UNLIKELY(!medium_info.is_valid())) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "Invalid argument to init parallel merge", K(ret), K(medium_info)); + } else { + const compaction::ObParallelMergeInfo ¶l_info = medium_info.parallel_merge_info_; + range_array_.reset(); + + ObDatumRange schema_rowkey_range; + ObDatumRange multi_version_range; + schema_rowkey_range.start_key_.set_min_rowkey(); + schema_rowkey_range.end_key_.set_min_rowkey(); + schema_rowkey_range.set_left_open(); + schema_rowkey_range.set_right_closed(); + for (int i = 0; OB_SUCC(ret) && i < paral_info.list_size_ + 1; ++i) { + if (i > 0 && OB_FAIL(schema_rowkey_range.end_key_.deep_copy(schema_rowkey_range.start_key_, allocator_))) { // end_key -> start_key + STORAGE_LOG(WARN, "failed to deep copy start key", K(ret), K(i), K(medium_info)); + } else if (i < paral_info.list_size_) { + if (OB_FAIL(schema_rowkey_range.end_key_.from_rowkey(paral_info.parallel_end_key_list_[i].get_rowkey(), allocator_))) { + STORAGE_LOG(WARN, "failed to deep copy end key", K(ret), K(i), K(medium_info)); + } + } else { // i == paral_info.list_size_ + schema_rowkey_range.end_key_.set_max_rowkey(); + } + multi_version_range.reset(); + if (FAILEDx(schema_rowkey_range.to_multi_version_range(allocator_, multi_version_range))) { + STORAGE_LOG(WARN, "failed to convert multi_version range", K(ret), K(schema_rowkey_range)); + } else if (OB_FAIL(range_array_.push_back(multi_version_range))) { + STORAGE_LOG(WARN, "Failed to push back merge range to array", K(ret), K(multi_version_range)); + } + } + if (OB_SUCC(ret)) { + concurrent_cnt_ = paral_info.list_size_ + 1; + parallel_type_ = PARALLEL_MAJOR; + is_inited_ = true; + STORAGE_LOG(INFO, "success to init parallel merge ctx", KPC(this)); + } + } + return ret; +} + int ObParallelMergeCtx::get_merge_range(const int64_t parallel_idx, ObDatumRange &merge_range) { @@ -124,7 +171,7 @@ int ObParallelMergeCtx::get_merge_range(const int64_t parallel_idx, ObDatumRange switch (parallel_type_) { case PARALLEL_MAJOR: case PARALLEL_MINI: - case PARALLEL_MINI_MINOR: + case PARALLEL_MINOR: case SERIALIZE_MERGE: merge_range = range_array_.at(parallel_idx); break; @@ -158,7 +205,7 @@ int ObParallelMergeCtx::init_parallel_major_merge(compaction::ObTabletMergeCtx & { int ret = OB_SUCCESS; const ObITable *first_table = nullptr; - if (OB_UNLIKELY(MAJOR_MERGE != merge_ctx.param_.merge_type_)) { + if (OB_UNLIKELY(!is_major_merge_type(merge_ctx.param_.merge_type_))) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "Invalid argument to init parallel major merge", K(ret), K(merge_ctx)); } else if (OB_UNLIKELY(nullptr == (first_table = merge_ctx.tables_handle_.get_table(0)) @@ -166,7 +213,7 @@ int ObParallelMergeCtx::init_parallel_major_merge(compaction::ObTabletMergeCtx & ret = OB_ERR_SYS; STORAGE_LOG(WARN, "Unexpected first table", K(ret), K(merge_ctx.tables_handle_)); } else { - const int64_t tablet_size = merge_ctx.schema_ctx_.merge_schema_->get_tablet_size(); + const int64_t tablet_size = merge_ctx.get_schema()->get_tablet_size(); const ObSSTable *first_sstable = static_cast(first_table); const int64_t macro_block_cnt = first_sstable->get_meta().get_macro_info().get_data_block_ids().count(); if (OB_FAIL(get_concurrent_cnt(tablet_size, macro_block_cnt, concurrent_cnt_))) { @@ -243,7 +290,7 @@ int ObParallelMergeCtx::init_parallel_mini_merge(compaction::ObTabletMergeCtx &m ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "Invalid argument to init parallel mini merge", K(ret), K(merge_ctx)); } else { - const int64_t tablet_size = merge_ctx.get_merge_schema()->get_tablet_size(); + const int64_t tablet_size = merge_ctx.get_schema()->get_tablet_size(); memtable::ObIMemtable *memtable = nullptr; if (OB_FAIL(merge_ctx.tables_handle_.get_first_memtable(memtable))) { STORAGE_LOG(WARN, "failed to get first memtable", K(ret), @@ -299,12 +346,12 @@ int ObParallelMergeCtx::init_parallel_mini_minor_merge(compaction::ObTabletMerge { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!merge_ctx.param_.is_minor_merge())) { + if (OB_UNLIKELY(!is_minor_merge(merge_ctx.param_.merge_type_))) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "Invalid argument to init parallel mini minor merge", K(ret), K(merge_ctx)); } else { const ObTableReadInfo &index_read_info = merge_ctx.tablet_handle_.get_obj()->get_index_read_info(); - const int64_t tablet_size = merge_ctx.get_merge_schema()->get_tablet_size(); + const int64_t tablet_size = merge_ctx.get_schema()->get_tablet_size(); ObRangeSplitInfo range_info; ObSEArray tables; ObSEArray store_ranges; @@ -316,7 +363,7 @@ int ObParallelMergeCtx::init_parallel_mini_minor_merge(compaction::ObTabletMerge } else if (tables.count() != merge_ctx.tables_handle_.get_count()) { if (OB_FAIL(init_serial_merge())) { STORAGE_LOG(WARN, "Failed to init serialize merge", K(ret)); - } else if (merge_ctx.param_.merge_type_ == MINI_MINOR_MERGE) { + } else if (is_minor_merge(merge_ctx.param_.merge_type_)) { STORAGE_LOG(WARN, "Unexpected tables handle for mini minor merge", K(ret), K(merge_ctx.tables_handle_)); } @@ -341,7 +388,7 @@ int ObParallelMergeCtx::init_parallel_mini_minor_merge(compaction::ObTabletMerge } } else { concurrent_cnt_ = store_ranges.count(); - parallel_type_ = PARALLEL_MINI_MINOR; + parallel_type_ = PARALLEL_MINOR; for (int64_t i = 0; OB_SUCC(ret) && i < store_ranges.count(); i++) { ObDatumRange datum_range; if (OB_FAIL(datum_range.from_range(store_ranges.at(i), allocator_))) { diff --git a/src/storage/compaction/ob_partition_parallel_merge_ctx.h b/src/storage/compaction/ob_partition_parallel_merge_ctx.h index 5b10abd390..7e356ec507 100644 --- a/src/storage/compaction/ob_partition_parallel_merge_ctx.h +++ b/src/storage/compaction/ob_partition_parallel_merge_ctx.h @@ -25,6 +25,7 @@ namespace oceanbase namespace compaction { struct ObTabletMergeCtx; +struct ObMediumCompactionInfo; } namespace blocksstable { @@ -41,7 +42,7 @@ public: enum ParallelMergeType { PARALLEL_MAJOR = 0, PARALLEL_MINI = 1, - PARALLEL_MINI_MINOR = 2, + PARALLEL_MINOR = 2, SERIALIZE_MERGE = 3, INVALID_PARALLEL_TYPE }; @@ -50,11 +51,16 @@ public: void reset(); bool is_valid() const; int init(compaction::ObTabletMergeCtx &merge_ctx); + int init(const compaction::ObMediumCompactionInfo &medium_info); OB_INLINE int64_t get_concurrent_cnt() const { return concurrent_cnt_; } int get_merge_range(const int64_t parallel_idx, blocksstable::ObDatumRange &merge_range); + static int get_concurrent_cnt( + const int64_t tablet_size, + const int64_t macro_block_cnt, + int64_t &concurrent_cnt); TO_STRING_KV(K_(parallel_type), K_(range_array), K_(concurrent_cnt), K_(is_inited)); private: - static const int64_t MIN_PARALLEL_MINI_MINOR_MERGE_THREASHOLD = 2; + static const int64_t MIN_PARALLEL_MINOR_MERGE_THREASHOLD = 2; static const int64_t MIN_PARALLEL_MERGE_BLOCKS = 32; static const int64_t PARALLEL_MERGE_TARGET_TASK_CNT = 20; //TODO @hanhui parallel in ai @@ -67,10 +73,6 @@ private: const int64_t sstable_count, int64_t ¶llel_degree); - int get_concurrent_cnt( - const int64_t tablet_size, - const int64_t macro_block_cnt, - int64_t &concurrent_cnt); int get_major_parallel_ranges( const blocksstable::ObSSTable *first_major_sstable, const int64_t tablet_size, diff --git a/src/storage/compaction/ob_partition_rows_merger.cpp b/src/storage/compaction/ob_partition_rows_merger.cpp index f2920b1f4c..cf3037a769 100644 --- a/src/storage/compaction/ob_partition_rows_merger.cpp +++ b/src/storage/compaction/ob_partition_rows_merger.cpp @@ -786,7 +786,7 @@ int ObPartitionMergeHelper::find_rowkey_minimum_iters(MERGE_ITER_ARRAY &minimum_ STORAGE_LOG(WARN, "Fail to push merge_iter to minimum_iters", K(ret), K(minimum_iters)); } else if (OB_FAIL(consume_iter_idxs_.push_back(iter_idx))) { STORAGE_LOG(WARN, "Fail to push consume iter idx to consume_iters", K(ret), K(consume_iter_idxs_)); - }else if (OB_FAIL(rows_merger_->pop())) { + } else if (OB_FAIL(rows_merger_->pop())) { STORAGE_LOG(WARN, "loser tree pop error", K(ret), K(has_same_rowkey), KPC(rows_merger_)); } } @@ -832,7 +832,7 @@ int ObPartitionMergeHelper::build_rows_merger() STORAGE_LOG(WARN, "failed to push item", K(ret), K(i), KPC(rows_merger_)); } } - }// end for + } // end for if (OB_FAIL(ret)) { } else if (OB_FAIL(rows_merger_->rebuild())) { @@ -864,7 +864,7 @@ int ObPartitionMergeHelper::rebuild_rows_merger() } else if (OB_ISNULL(iter = merge_iters_.at(iter_idx))) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "unexpected null iter", K(ret), K(iter_idx), K(merge_iters_)); - } else if (iter->is_iter_end()) {//skip iter end + } else if (iter->is_iter_end()) { //skip iter end } else { ObPartitionMergeLoserTreeItem item; item.iter_ = iter; @@ -960,7 +960,7 @@ ObPartitionMergeIter *ObPartitionMinorMergeHelper::alloc_merge_iter(const ObMerg ObPartitionMergeIter *merge_iter = nullptr; if (storage::is_backfill_tx_merge(merge_param.merge_type_)) { merge_iter = alloc_helper (allocator_); - } else if (!is_small_sstable && !merge_param.is_mini_merge() && !merge_param.is_full_merge_ && merge_param.sstable_logic_seq_ < ObMacroDataSeq::MAX_SSTABLE_SEQ) { + } else if (!is_small_sstable && !is_mini_merge(merge_param.merge_type_) && !merge_param.is_full_merge_ && merge_param.sstable_logic_seq_ < ObMacroDataSeq::MAX_SSTABLE_SEQ) { merge_iter = alloc_helper(allocator_); } else { merge_iter = alloc_helper(allocator_); diff --git a/src/storage/compaction/ob_schedule_dag_func.cpp b/src/storage/compaction/ob_schedule_dag_func.cpp index 4d87c2c794..ca4101b1b4 100644 --- a/src/storage/compaction/ob_schedule_dag_func.cpp +++ b/src/storage/compaction/ob_schedule_dag_func.cpp @@ -52,12 +52,12 @@ int ObScheduleDagFunc::schedule_tablet_merge_dag( const bool is_emergency) { int ret = OB_SUCCESS; - if (MAJOR_MERGE == param.merge_type_) { + if (is_major_merge_type(param.merge_type_)) { CREATE_DAG(ObTabletMajorMergeDag); } else if (MINI_MERGE == param.merge_type_) { CREATE_DAG(ObTabletMiniMergeDag); } else { - CREATE_DAG(ObTabletMinorMergeDag); + ret = OB_NOT_SUPPORTED; } return ret; } diff --git a/src/storage/compaction/ob_sstable_merge_info_mgr.cpp b/src/storage/compaction/ob_sstable_merge_info_mgr.cpp index 4b2c7b0475..9b9265b68e 100644 --- a/src/storage/compaction/ob_sstable_merge_info_mgr.cpp +++ b/src/storage/compaction/ob_sstable_merge_info_mgr.cpp @@ -217,7 +217,7 @@ int ObTenantSSTableMergeInfoMgr::add_sstable_merge_info(ObSSTableMergeInfo &merg } else if (OB_UNLIKELY(!merge_info.is_valid())) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "merge info is invalid", K(ret), K(merge_info)); - } else if (merge_info.is_major_merge()) { + } else if (merge_info.is_major_merge_type()) { if (OB_FAIL(major_merge_infos_.add(merge_info))) { STORAGE_LOG(WARN, "Fail to add into major merge info manager", K(ret), K(merge_info)); } diff --git a/src/storage/compaction/ob_tablet_merge_ctx.cpp b/src/storage/compaction/ob_tablet_merge_ctx.cpp index dbfa190bed..3693c6d0c8 100644 --- a/src/storage/compaction/ob_tablet_merge_ctx.cpp +++ b/src/storage/compaction/ob_tablet_merge_ctx.cpp @@ -17,13 +17,13 @@ #include "storage/ob_storage_schema.h" #include "storage/tablet/ob_tablet_create_delete_helper.h" #include "storage/tablet/ob_tablet_create_sstable_param.h" -#include "storage/compaction/ob_partition_merge_policy.h" -#include "storage/compaction/ob_partition_merge_policy.h" #include "storage/compaction/ob_compaction_diagnose.h" #include "storage/compaction/ob_sstable_merge_info_mgr.h" #include "storage/compaction/ob_tenant_tablet_scheduler.h" #include "observer/omt/ob_multi_tenant.h" #include "share/scheduler/ob_dag_warning_history_mgr.h" +#include "storage/compaction/ob_medium_compaction_mgr.h" +#include "storage/compaction/ob_medium_compaction_func.h" namespace oceanbase { @@ -96,7 +96,7 @@ int ObTabletMergeInfo::init(const ObTabletMergeCtx &ctx, bool need_check) sstable_merge_info_.tablet_id_ = ctx.param_.tablet_id_; sstable_merge_info_.compaction_scn_ = ctx.get_compaction_scn(); sstable_merge_info_.merge_start_time_ = ObTimeUtility::fast_current_time(); - sstable_merge_info_.merge_type_ = ctx.param_.merge_type_; + sstable_merge_info_.merge_type_ = ctx.is_tenant_major_merge_ ? MAJOR_MERGE : ctx.param_.merge_type_; sstable_merge_info_.progressive_merge_round_ = ctx.progressive_merge_round_; sstable_merge_info_.progressive_merge_num_ = ctx.progressive_merge_num_; sstable_merge_info_.concurrent_cnt_ = ctx.get_concurrent_cnt(); @@ -188,7 +188,7 @@ int ObTabletMergeInfo::build_create_sstable_param(const ObTabletMergeCtx &ctx, ObITable::TableKey table_key; table_key.table_type_ = ctx.get_merged_table_type(); table_key.tablet_id_ = ctx.param_.tablet_id_; - if (ctx.param_.is_major_merge()) { + if (is_major_merge_type(ctx.param_.merge_type_) || is_meta_major_merge(ctx.param_.merge_type_)) { table_key.version_range_.snapshot_version_ = ctx.sstable_version_range_.snapshot_version_; } else { table_key.scn_range_ = ctx.scn_range_; @@ -197,15 +197,15 @@ int ObTabletMergeInfo::build_create_sstable_param(const ObTabletMergeCtx &ctx, param.sstable_logic_seq_ = ctx.sstable_logic_seq_; param.filled_tx_scn_ = ctx.merge_scn_; - if (is_multi_version_minor_merge(ctx.param_.merge_type_)) { + param.table_mode_ = ctx.get_schema()->get_table_mode_struct(); + param.index_type_ = ctx.get_schema()->get_index_type(); + param.rowkey_column_cnt_ = ctx.get_schema()->get_rowkey_column_num() + + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); + if (is_minor_merge_type(ctx.param_.merge_type_)) { param.recycle_version_ = ctx.sstable_version_range_.base_version_; } else { param.recycle_version_ = 0; } - param.table_mode_ = ctx.schema_ctx_.merge_schema_->get_table_mode_struct(); - param.index_type_ = ctx.schema_ctx_.merge_schema_->get_index_type(); - param.rowkey_column_cnt_ = ctx.schema_ctx_.merge_schema_->get_rowkey_column_num() - + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt(); param.schema_version_ = ctx.schema_ctx_.schema_version_; param.create_snapshot_version_ = ctx.create_snapshot_version_; param.progressive_merge_round_ = ctx.progressive_merge_round_; @@ -244,8 +244,8 @@ int ObTabletMergeInfo::build_create_sstable_param(const ObTabletMergeCtx &ctx, param.other_block_ids_ = res.other_block_ids_; param.ddl_scn_.set_min(); MEMCPY(param.encrypt_key_, res.encrypt_key_, share::OB_MAX_TABLESPACE_ENCRYPT_KEY_LENGTH); - if (ctx.param_.is_major_merge()) { - if (FAILEDx(res.fill_column_checksum(ctx.schema_ctx_.table_schema_, param.column_checksums_))) { + if (is_major_merge_type(ctx.param_.merge_type_)) { + if (FAILEDx(res.fill_column_checksum(ctx.get_schema(), param.column_checksums_))) { LOG_WARN("fail to fill column checksum", K(ret), K(res)); } } @@ -263,7 +263,7 @@ int ObTabletMergeInfo::record_start_tx_scn_for_tx_data(const ObTabletMergeCtx &c // set INT64_MAX for invalid check param.filled_tx_scn_.set_max(); - if (ctx.param_.is_mini_merge()) { + if (is_mini_merge(ctx.param_.merge_type_)) { // when this merge is MINI_MERGE, use the start_scn of the oldest tx data memtable as start_tx_scn ObTxDataMemtable *tx_data_memtable = nullptr; if (ctx.tables_handle_.empty()) { @@ -275,7 +275,8 @@ int ObTabletMergeInfo::record_start_tx_scn_for_tx_data(const ObTabletMergeCtx &c } else { param.filled_tx_scn_ = tx_data_memtable->get_start_scn(); } - } else if (ctx.param_.is_minor_merge()) { + } else if (is_minor_merge(ctx.param_.merge_type_)) { + // when this merge is MINOR_MERGE, use max_filtered_end_scn in filter if filtered some tx data ObTransStatusFilter *compaction_filter_ = (ObTransStatusFilter*)ctx.compaction_filter_; ObSSTable *oldest_tx_data_sstable = static_cast(ctx.tables_handle_.get_table(0)); if (OB_ISNULL(oldest_tx_data_sstable)) { @@ -316,8 +317,8 @@ int ObTabletMergeInfo::create_sstable(ObTabletMergeCtx &ctx) LOG_WARN("invalid merge ctx", K(ret), K(ctx)); } else { int64_t column_count; - if (OB_FAIL(ctx.schema_ctx_.merge_schema_->get_store_column_count( - column_count, is_multi_version_minor_merge(ctx.param_.merge_type_)))) { + if (OB_FAIL(ctx.get_schema()->get_store_column_count( + column_count, is_multi_version_merge(ctx.param_.merge_type_)))) { LOG_WARN("fail to get store column count", K(ret), K(ctx)); } else { SMART_VARS_2((ObSSTableMergeRes, res), (ObTabletCreateSSTableParam, param)) { @@ -390,7 +391,6 @@ int ObTabletMergeInfo::new_block_write_ctx(blocksstable::ObMacroBlocksWriteCtx * constexpr float ObCompactionTimeGuard::COMPACTION_SHOW_PERCENT_THRESHOLD; const char *ObCompactionTimeGuard::ObTabletCompactionEventStr[] = { "WAIT_TO_SCHEDULE", - "GET_MULTI_VERSION_START", "COMPACTION_POLICY", "GET_SCHEMA", "CALC_PROGRESSIVE_PARAM", @@ -428,12 +428,12 @@ int64_t ObCompactionTimeGuard::to_string(char *buf, const int64_t buf_len) const { int64_t pos = 0; int64_t total_cost = 0; - for (int64_t idx = GET_MULTI_VERSION_START; idx < idx_; ++idx) { + for (int64_t idx = COMPACTION_POLICY; idx < idx_; ++idx) { total_cost += click_poinsts_[idx]; } if (total_cost > 0) { float ratio = 0; - for (int64_t idx = GET_MULTI_VERSION_START; idx < idx_; ++idx) { + for (int64_t idx = COMPACTION_POLICY; idx < idx_; ++idx) { const uint32_t time_interval = click_poinsts_[idx]; ratio = (float)(time_interval)/ total_cost; if (ratio >= COMPACTION_SHOW_PERCENT_THRESHOLD || time_interval >= COMPACTION_SHOW_TIME_THRESHOLD) { @@ -489,11 +489,8 @@ ObSchemaMergeCtx::ObSchemaMergeCtx(ObIAllocator &allocator) : allocator_(allocator), base_schema_version_(0), schema_version_(0), - table_schema_(nullptr), - schema_guard_(share::schema::ObSchemaMgrItem::MOD_SSTABLE_MERGE_CTX), allocated_storage_schema_(false), - storage_schema_(nullptr), - merge_schema_(nullptr) + storage_schema_(nullptr) { } @@ -510,6 +507,7 @@ ObTabletMergeCtx::ObTabletMergeCtx( merged_table_handle_(), schema_ctx_(allocator), is_full_merge_(false), + is_tenant_major_merge_(false), merge_level_(MICRO_BLOCK_MERGE_LEVEL), merge_info_(), parallel_merge_ctx_(), @@ -525,7 +523,8 @@ ObTabletMergeCtx::ObTabletMergeCtx( merge_progress_(nullptr), compaction_filter_(nullptr), time_guard_(), - rebuild_seq_(-1) + rebuild_seq_(-1), + merge_list_() { merge_scn_.set_max(); } @@ -542,6 +541,11 @@ void ObTabletMergeCtx::destroy() allocator_.free(merge_progress_); merge_progress_ = nullptr; } + if (OB_NOT_NULL(compaction_filter_)) { + compaction_filter_->~ObICompactionFilter(); + allocator_.free(compaction_filter_); + compaction_filter_ = nullptr; + } tables_handle_.reset(); tablet_handle_.reset(); } @@ -566,15 +570,6 @@ int ObTabletMergeCtx::init_merge_progress(bool is_major) return ret; } -bool ObTabletMergeCtx::is_schema_valid() const -{ - bool valid_ret = true; - if (!param_.is_multi_version_minor_merge() && !storage::is_backfill_tx_merge(param_.merge_type_)) { - valid_ret = nullptr != schema_ctx_.table_schema_; - } - return valid_ret && NULL != schema_ctx_.merge_schema_; -} - bool ObTabletMergeCtx::is_valid() const { return param_.is_valid() @@ -582,7 +577,8 @@ bool ObTabletMergeCtx::is_valid() const && create_snapshot_version_ >= 0 && schema_ctx_.schema_version_ >= 0 && schema_ctx_.base_schema_version_ >= 0 - && is_schema_valid() + && NULL != schema_ctx_.storage_schema_ + && schema_ctx_.storage_schema_->is_valid() && sstable_logic_seq_ >= 0 && progressive_merge_num_ >= 0 && parallel_merge_ctx_.is_valid() @@ -609,16 +605,15 @@ ObITable::TableType ObTabletMergeCtx::get_merged_table_type() const { ObITable::TableType table_type = ObITable::MAX_TABLE_TYPE; - if (param_.is_major_merge()) { // MAJOR_MERGE + if (is_major_merge_type(param_.merge_type_)) { // MAJOR_MERGE table_type = ObITable::TableType::MAJOR_SSTABLE; - } else if (MINI_MERGE == param_.merge_type_ - || MINI_MINOR_MERGE == param_.merge_type_) { + } else if (MINI_MERGE == param_.merge_type_) { table_type = ObITable::TableType::MINI_SSTABLE; - } else if (BUF_MINOR_MERGE == param_.merge_type_) { - table_type = ObITable::TableType::BUF_MINOR_SSTABLE; + } else if (META_MAJOR_MERGE == param_.merge_type_) { + table_type = ObITable::TableType::META_MAJOR_SSTABLE; } else if (DDL_KV_MERGE == param_.merge_type_) { table_type = ObITable::TableType::KV_DUMP_SSTABLE; - } else { // MINOR_MERGE || HISTORY_MINI_MINOR_MERGE + } else { // MINOR_MERGE || HISTORY_MINOR_MERGE table_type = ObITable::TableType::MINOR_SSTABLE; } return table_type; @@ -627,7 +622,7 @@ ObITable::TableType ObTabletMergeCtx::get_merged_table_type() const int ObTabletMergeCtx::init_parallel_merge() { int ret = OB_SUCCESS; - if (OB_FAIL(parallel_merge_ctx_.init(*this))) { + if (!parallel_merge_ctx_.is_valid() && OB_FAIL(parallel_merge_ctx_.init(*this))) { STORAGE_LOG(WARN, "Failed to init parallel merge context", K(ret)); } return ret; @@ -645,34 +640,19 @@ int ObTabletMergeCtx::get_merge_range(int64_t parallel_idx, ObDatumRange &merge_ return ret; } -int ObTabletMergeCtx::inner_init_for_major() +int ObTabletMergeCtx::inner_init_for_medium() { int ret = OB_SUCCESS; - int64_t multi_version_start = 0; - int64_t min_reserved_snapshot = 0; + const ObMediumCompactionInfo *medium_info = nullptr; ObGetMergeTablesParam get_merge_table_param; ObGetMergeTablesResult get_merge_table_result; get_merge_table_param.merge_type_ = param_.merge_type_; get_merge_table_param.merge_version_ = param_.merge_version_; - if (OB_FAIL(tablet_handle_.get_obj()->get_kept_multi_version_start(multi_version_start, min_reserved_snapshot))) { - if (OB_TENANT_NOT_EXIST == ret) { - multi_version_start = tablet_handle_.get_obj()->get_multi_version_start(); - ret = OB_SUCCESS; - FLOG_INFO("Tenant has been deleted!", K(ret), KPC(tablet_handle_.get_obj())); - } else { - LOG_WARN("failed to get kept multi_version_start", K(ret), KPC(tablet_handle_.get_obj())); - } - } - - FLOG_INFO("get multi version start", K(multi_version_start), K(min_reserved_snapshot), K_(tablet_handle)); - if (OB_FAIL(ret)) { - } else if (FALSE_IT(time_guard_.click(ObCompactionTimeGuard::GET_MULTI_VERSION_START))) { - } else if (OB_FAIL(ObPartitionMergePolicy::get_merge_tables[param_.merge_type_]( + if (OB_FAIL(ObPartitionMergePolicy::get_merge_tables[param_.merge_type_]( get_merge_table_param, - multi_version_start, + *ls_handle_.get_ls(), *tablet_handle_.get_obj(), get_merge_table_result))) { - // TODO(@DanLin) optimize this interface if (OB_NO_NEED_MERGE != ret) { LOG_WARN("failed to get merge tables", K(ret), KPC(this), K(get_merge_table_result)); } @@ -684,18 +664,56 @@ int ObTabletMergeCtx::inner_init_for_major() ret = OB_EAGAIN; LOG_INFO("tx table is not ready. waiting for max_decided_log_ts ...", KR(ret), "merge_scn", get_merge_table_result.scn_range_.end_scn_); + } else if (OB_FAIL(init_get_medium_compaction_info(param_.merge_version_, medium_info))) { // have checked medium info inside + LOG_WARN("failed to get medium compaction info", K(ret), KPC(this)); + } else if (FALSE_IT(get_merge_table_result.schema_version_ = medium_info->storage_schema_.schema_version_)) { + } else if (FALSE_IT(is_tenant_major_merge_ = medium_info->is_major_compaction())) { } else if (OB_FAIL(get_basic_info_from_result(get_merge_table_result))) { LOG_WARN("failed to set basic info to ctx", K(ret), K(get_merge_table_result), KPC(this)); - } else if (OB_FAIL(get_table_schema_to_merge())) { - LOG_WARN("failed to get table schema", K(ret), KPC(this)); - } else if (FALSE_IT(time_guard_.click(ObCompactionTimeGuard::GET_TABLE_SCHEMA))) { } else if (OB_FAIL(cal_major_merge_param(get_merge_table_result))) { - LOG_WARN("fail to cal minor merge param", K(ret), KPC(this)); - } else if (FALSE_IT(time_guard_.click(ObCompactionTimeGuard::CALC_PROGRESSIVE_PARAM))) { + LOG_WARN("fail to cal major merge param", K(ret), KPC(this)); } return ret; } +int ObTabletMergeCtx::init_get_medium_compaction_info( + const int64_t medium_snapshot, + const ObMediumCompactionInfo *&medium_info) +{ + int ret = OB_SUCCESS; + medium_info = nullptr; + const ObMediumCompactionInfoList &medium_list = tablet_handle_.get_obj()->get_medium_compaction_info_list(); + + if (OB_UNLIKELY(!medium_list.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("medium compaction mgr is invalid", K(ret), KPC(this), K(medium_list)); + } else if (OB_FAIL(medium_list.get_specified_scn_info(medium_snapshot, medium_info))) { + LOG_WARN("failed to get medium info from mgr", K(ret), K(medium_snapshot), K(medium_list)); + } else if (OB_UNLIKELY(nullptr == medium_info || !medium_info->is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("medium compaction info is invalid", K(ret), KPC(this), K(medium_list), KPC(medium_info)); + } else if (medium_info->contain_parallel_range_ + && OB_FAIL(parallel_merge_ctx_.init(*medium_info))) { + LOG_WARN("failed to init parallel merge ctx", K(ret), KPC(medium_info)); + } else { + void *buf = nullptr; + if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObStorageSchema)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc storage schema", K(ret)); + } else { + ObStorageSchema *storage_schema = nullptr; + storage_schema = new(buf) ObStorageSchema(); + schema_ctx_.storage_schema_ = storage_schema; + schema_ctx_.allocated_storage_schema_ = true; + if (OB_FAIL(storage_schema->init(allocator_, medium_info->storage_schema_))) { + LOG_WARN("failed to init storage schema from current medium info", K(ret), KPC(medium_info)); + } + } + } + + return ret; +} + int ObTabletMergeCtx::inner_init_for_minor(bool &skip_rest_operation) { int ret = OB_SUCCESS; @@ -707,19 +725,10 @@ int ObTabletMergeCtx::inner_init_for_minor(bool &skip_rest_operation) get_merge_table_param.merge_type_ = param_.merge_type_; get_merge_table_param.merge_version_ = param_.merge_version_; ObTablet *tablet = tablet_handle_.get_obj(); - if (OB_FAIL(tablet->get_kept_multi_version_start(multi_version_start, min_reserved_snapshot))) { - LOG_WARN("failed to get kept multi_version_start", K(ret)); - if (is_mini_merge(param_.merge_type_) || OB_TENANT_NOT_EXIST == ret) { - multi_version_start = tablet->get_multi_version_start(); - FLOG_INFO("failed to get multi_version_start, use multi_version_start on tablet", K(ret), K(param_), K(multi_version_start)); - ret = OB_SUCCESS; // clear errno to make mini merge success - } - } - if (OB_FAIL(ret)) { - } else if (FALSE_IT(time_guard_.click(ObCompactionTimeGuard::GET_MULTI_VERSION_START))) { - } else if (OB_FAIL(ObPartitionMergePolicy::get_merge_tables[param_.merge_type_]( + + if (OB_FAIL(ObPartitionMergePolicy::get_merge_tables[param_.merge_type_]( get_merge_table_param, - multi_version_start, + *ls_handle_.get_ls(), *tablet, get_merge_table_result))) { // TODO(@DanLin) optimize this interface @@ -743,13 +752,28 @@ int ObTabletMergeCtx::inner_init_for_minor(bool &skip_rest_operation) get_merge_table_result.scn_range_.end_scn_)) { ret = OB_EAGAIN; LOG_INFO("tx table is not ready. waiting for max_decided_log_ts ...", - KR(ret), "merge_scn", get_merge_table_result.scn_range_.end_scn_); - } else if (OB_FAIL(get_storage_schema_to_merge(get_merge_table_result.handle_, true/*get_schema_on_memtable*/))) { - LOG_ERROR("Fail to get storage schema", K(ret), KPC(this)); + KR(ret), "merge_scn", get_merge_table_result.scn_range_.end_scn_); + } else if (OB_FAIL(get_schema_and_gene_from_result(get_merge_table_result))) { + LOG_WARN("Fail to get storage schema", K(ret), K(get_merge_table_result), KPC(this)); + } + return ret; +} + +int ObTabletMergeCtx::get_schema_and_gene_from_result(const ObGetMergeTablesResult &get_merge_table_result) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(get_storage_schema_to_merge(get_merge_table_result.handle_, true/*get_schema_on_memtable*/))) { + LOG_WARN("failed to get storage schema to merge", K(ret), KPC(this)); } else if (OB_FAIL(get_basic_info_from_result(get_merge_table_result))) { LOG_WARN("failed to set basic info to ctx", K(ret), K(get_merge_table_result), KPC(this)); } else if (OB_FAIL(cal_minor_merge_param())) { - LOG_WARN("fail to cal minor merge param", K(ret), KPC(this)); + LOG_WARN("failed to cal minor merge param", K(ret), KPC(this)); + } else if (!is_minor_merge_type(get_merge_table_result.suggest_merge_type_)) { + } else if (OB_UNLIKELY(scn_range_.is_empty())) { + ret = OB_ERR_UNEXPECTED; + LOG_ERROR("Unexcepted empty scn range in minor merge", K(ret), K(scn_range_)); + } else { + merge_scn_ = scn_range_.end_scn_; } return ret; } @@ -794,7 +818,8 @@ int ObTabletMergeCtx::update_tablet_or_release_memtable(const ObGetMergeTablesRe int ObTabletMergeCtx::update_tablet_directly(const ObGetMergeTablesResult &get_merge_table_result) { int ret = OB_SUCCESS; - const int64_t rebuild_seq = ls_handle_.get_ls()->get_rebuild_seq(); + int tmp_ret = OB_SUCCESS; + scn_range_ = get_merge_table_result.scn_range_; ObTableHandleV2 empty_table_handle; @@ -803,8 +828,8 @@ int ObTabletMergeCtx::update_tablet_directly(const ObGetMergeTablesResult &get_m get_merge_table_result.version_range_.snapshot_version_, get_merge_table_result.version_range_.multi_version_start_, schema_ctx_.storage_schema_, - rebuild_seq, - param_.is_major_merge(), + rebuild_seq_, + is_major_merge_type(param_.merge_type_), SCN::min_scn()/*clog_checkpoint_scn*/); ObTabletHandle new_tablet_handle; if (OB_FAIL(ls_handle_.get_ls()->update_tablet_table_store( @@ -820,20 +845,11 @@ int ObTabletMergeCtx::update_tablet_directly(const ObGetMergeTablesResult &get_m sizeof(merge_info_.get_sstable_merge_info().participant_table_str_)); (void)merge_dag_->get_ctx().collect_running_info(); - int64_t schedule_verion = MTL(ObTenantTabletScheduler*)->get_frozen_version(); - bool unused_tablet_merge_finish = false; - ObTenantTabletScheduler::ObScheduleStatistics unused_schedule_stats; - int tmp_ret = OB_SUCCESS; - if (!get_merge_table_result.schedule_major_) { - } else if (OB_TMP_FAIL(ObTenantTabletScheduler::schedule_tablet_major_merge( - schedule_verion, - *ls_handle_.get_ls(), - *new_tablet_handle.get_obj(), - unused_tablet_merge_finish, - unused_schedule_stats, - false /*enable_force_freeze*/))) { - if (OB_SIZE_OVERFLOW != tmp_ret) { - LOG_WARN("failed to schedule tablet major merge", K(tmp_ret), K_(param)); + if (OB_TMP_FAIL(ObMediumCompactionScheduleFunc::schedule_tablet_medium_merge( + *ls_handle_.get_ls(), *new_tablet_handle.get_obj()))) { + if (OB_SIZE_OVERFLOW != tmp_ret && OB_EAGAIN != tmp_ret) { + LOG_WARN("failed to schedule tablet adaptive merge", K(tmp_ret), + "ls_id", param_.ls_id_, "tablet_id", param_.tablet_id_); } } } @@ -845,7 +861,7 @@ int ObTabletMergeCtx::get_basic_info_from_result( { int ret = OB_SUCCESS; - if (rebuild_seq_ < 0) { + if (OB_UNLIKELY(rebuild_seq_ < 0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("rebuild seq do not set, get tables failed", K(ret), K(rebuild_seq_)); } else if (OB_FAIL(tables_handle_.assign(get_merge_table_result.handle_))) { @@ -861,12 +877,12 @@ int ObTabletMergeCtx::get_basic_info_from_result( "suggest_merge_type", get_merge_table_result.suggest_merge_type_); param_.merge_type_ = get_merge_table_result.suggest_merge_type_; } - if (param_.is_major_merge()) { + if (is_major_merge_type(param_.merge_type_)) { param_.report_ = GCTX.ob_service_; } const ObITable *table = nullptr; - if (param_.is_major_merge() || param_.is_mini_merge()) { + if (is_major_merge_type(param_.merge_type_) || is_mini_merge(param_.merge_type_)) { sstable_logic_seq_ = 0; } else if (OB_ISNULL(table = tables_handle_.get_table(tables_handle_.get_count() - 1)) || !table->is_sstable()) { ret = OB_ERR_UNEXPECTED; @@ -900,71 +916,39 @@ int ObTabletMergeCtx::cal_minor_merge_param() return ret; } -int ObTabletMergeCtx::cal_major_merge_param( - const ObGetMergeTablesResult &get_merge_table_result) +int ObTabletMergeCtx::cal_major_merge_param(const ObGetMergeTablesResult &get_merge_table_result) { int ret = OB_SUCCESS; + ObSSTable *base_table = nullptr; + bool is_schema_changed = false; + read_base_version_ = get_merge_table_result.read_base_version_; param_.merge_version_ = get_merge_table_result.merge_version_; - ObMultiVersionSchemaService *schema_service = nullptr; - ObSchemaGetterGuard base_schema_guard; - const ObTableSchema *main_table_schema = nullptr; - const ObTableSchema *base_table_schema = nullptr; - bool is_schema_changed = false; - - if (OB_ISNULL(main_table_schema = schema_ctx_.table_schema_)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected null table schema", K(ret)); - } else if (OB_ISNULL(schema_service = MTL(ObTenantSchemaService *)->get_schema_service())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to get schema service from MTL", K(ret)); - } else if (OB_FAIL(schema_service->get_tenant_schema_guard(MTL_ID(), - base_schema_guard, - schema_ctx_.base_schema_version_, - OB_INVALID_VERSION))) { - LOG_WARN("failed to get schema guard", K(ret)); - } else if (OB_FAIL(base_schema_guard.check_formal_guard())) { - LOG_WARN("failed to check formal guard", K(ret)); - } else if (OB_FAIL(base_schema_guard.get_table_schema(MTL_ID(), - schema_ctx_.table_schema_->get_table_id(), base_table_schema))) { - LOG_WARN("failed to get base table schema", K(ret), K(schema_ctx_.base_schema_version_), K(schema_ctx_.table_schema_->get_table_id())); - } else if (OB_ISNULL(base_table_schema)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to get base table schema", K(ret), K(schema_ctx_.base_schema_version_), K(schema_ctx_.table_schema_->get_table_id())); - } else if (FALSE_IT(is_schema_changed = (base_table_schema->get_column_count() != main_table_schema->get_column_count() - || 0 != strcmp(base_table_schema->get_compress_func_name(), main_table_schema->get_compress_func_name()) - || base_table_schema->get_row_store_type() != main_table_schema->get_row_store_type()))) { - } else if (OB_FAIL(cal_progressive_merge_param(is_schema_changed))) { - LOG_WARN("failed to calculate progressive merge param", K(ret)); - } - - return ret; -} - -int ObTabletMergeCtx::cal_progressive_merge_param(const bool is_schema_changed) -{ - int ret = OB_SUCCESS; - ObSSTable *last_major = nullptr; - if (tables_handle_.empty() - || NULL == (last_major = static_cast(tables_handle_.get_table(0))) - || !last_major->is_major_sstable()) { + || NULL == (base_table = static_cast(tables_handle_.get_table(0))) + || (!base_table->is_major_sstable() && !base_table->is_meta_major_sstable())) { ret = OB_ENTRY_NOT_EXIST; - LOG_WARN("no major sstable exist", K(ret), K(tables_handle_)); + LOG_WARN("base table must be major or meta major", K(ret), K(tables_handle_)); } else { - if (param_.is_buf_minor_merge() || 1 == schema_ctx_.table_schema_->get_progressive_merge_num()) { + if (1 == get_schema()->get_progressive_merge_num()) { is_full_merge_ = true; } else { is_full_merge_ = false; } + const ObSSTableBasicMeta &base_meta = base_table->get_meta().get_basic_meta(); + if (base_table->get_meta().get_column_count() != get_schema()->get_column_count() + || base_meta.compressor_type_ != get_schema()->get_compressor_type() + || base_meta.row_store_type_ != get_schema()->row_store_type_) { + is_schema_changed = true; + } - const int64_t meta_progressive_merge_round = last_major->get_meta().get_basic_meta().progressive_merge_round_; - const int64_t schema_progressive_merge_round = schema_ctx_.table_schema_->get_progressive_merge_round(); - if (0 == schema_ctx_.table_schema_->get_progressive_merge_num()) { + const int64_t meta_progressive_merge_round = base_meta.progressive_merge_round_; + const int64_t schema_progressive_merge_round = get_schema()->get_progressive_merge_round(); + if (0 == get_schema()->get_progressive_merge_num()) { progressive_merge_num_ = (1 == schema_progressive_merge_round) ? 0 : OB_AUTO_PROGRESSIVE_MERGE_NUM; } else { - progressive_merge_num_ = schema_ctx_.table_schema_->get_progressive_merge_num(); + progressive_merge_num_ = get_schema()->get_progressive_merge_num(); } if (is_full_merge_) { @@ -975,7 +959,7 @@ int ObTabletMergeCtx::cal_progressive_merge_param(const bool is_schema_changed) progressive_merge_step_ = 0; } else if (meta_progressive_merge_round == schema_progressive_merge_round) { progressive_merge_round_ = meta_progressive_merge_round; - progressive_merge_step_ = last_major->get_meta().get_basic_meta().progressive_merge_step_; + progressive_merge_step_ = base_meta.progressive_merge_step_; } FLOG_INFO("Calc progressive param", K(is_schema_changed), K(progressive_merge_num_), K(progressive_merge_round_), K(meta_progressive_merge_round), K(progressive_merge_step_), @@ -993,19 +977,63 @@ int ObTabletMergeCtx::cal_progressive_merge_param(const bool is_schema_changed) int ObTabletMergeCtx::init_merge_info() { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_schema_valid())) { - ret = OB_ERR_SYS; - LOG_WARN("schema of merge ctx is not valid", K(ret), KPC(this)); - } else if (OB_FAIL(init_parallel_merge())) { + if (OB_FAIL(init_parallel_merge())) { LOG_WARN("failed to init parallel merge in sstable merge ctx", K(ret)); } else if (OB_FAIL(merge_info_.init(*this))) { LOG_WARN("failed to init merge context", K(ret)); } else { + if (OB_NOT_NULL(compaction_filter_) && compaction_filter_->is_full_merge_) { + is_full_merge_ = true; + } time_guard_.click(ObCompactionTimeGuard::GET_PARALLEL_RANGE); } return ret; } +int ObTabletMergeCtx::get_medium_compaction_info_to_store() +{ + int ret = OB_SUCCESS; + if (is_mini_merge(param_.merge_type_)) { + if (OB_FAIL(merge_list_.init(allocator_))) { + LOG_WARN("failed to init merge list", K(ret)); + } + ObITable *table = nullptr; + memtable::ObMemtable * memtable = nullptr; + compaction::ObMediumCompactionInfo medium_info; + memtable::ObMultiSourceData::ObIMultiSourceDataUnitList dst_list; + for (int i = 0; OB_SUCC(ret) && i < tables_handle_.get_count(); ++i) { + dst_list.reset(); + if (OB_UNLIKELY(nullptr == (table = tables_handle_.get_table(i)) || !table->is_frozen_memtable())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table in tables_handle is invalid", K(ret), KPC(table)); + } else if (OB_ISNULL(memtable = dynamic_cast(table))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table pointer does not point to a ObMemtable object", KPC(table)); + } else if (OB_FAIL(memtable->get_multi_source_data_unit_list(&medium_info, dst_list, &allocator_))) { + LOG_WARN("failed to get medium info from memtable", K(ret), KPC(table)); + } else if (dst_list.is_empty()) { + // do nothing + } else { + ObMediumCompactionInfo *input_info = nullptr; + DLIST_FOREACH_X(info, dst_list, OB_SUCC(ret)) { + if (OB_UNLIKELY(memtable::MultiSourceDataUnitType::MEDIUM_COMPACTION_INFO != info->type())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("return info is invalid", K(ret), KPC(info)); + } else if (FALSE_IT(input_info = static_cast(info))) { + } else if (OB_FAIL(merge_list_.add_medium_compaction_info(*input_info))) { + LOG_WARN("failed to add medium compaction info", K(ret), KPC(input_info)); + } + } + } + } // end of for + if (OB_SUCC(ret)) { + LOG_INFO("success get medium info list", "ls_id", param_.ls_id_, + "tablet_id", param_.tablet_id_, K(merge_list_)); + } + } + return ret; +} + int ObTabletMergeCtx::get_storage_schema_to_merge( const ObTablesHandleArray &merge_tables_handle, const bool get_schema_on_memtable) @@ -1064,7 +1092,6 @@ int ObTabletMergeCtx::get_storage_schema_to_merge( if (OB_SUCC(ret)) { OB_ASSERT(nullptr != schema_ctx_.storage_schema_); - schema_ctx_.merge_schema_ = schema_ctx_.storage_schema_; schema_ctx_.schema_version_ = schema_ctx_.storage_schema_->get_schema_version(); FLOG_INFO("get storage schema to merge", "ls_id", param_.ls_id_, "tablet_id", param_.tablet_id_, K_(schema_ctx), K(get_storage_schema_flag), @@ -1073,88 +1100,63 @@ int ObTabletMergeCtx::get_storage_schema_to_merge( return ret; } -int ObTabletMergeCtx::get_table_id( - const ObTabletID &tablet_id, - const int64_t schema_version, - uint64_t &table_id) +int ObTabletMergeCtx::prepare_index_tree() { int ret = OB_SUCCESS; - table_id = OB_INVALID_ID; - - ObSEArray tablet_ids; - ObSEArray table_ids; - ObMultiVersionSchemaService *schema_service = nullptr; - if (OB_FAIL(tablet_ids.push_back(tablet_id))) { - LOG_WARN("failed to add tablet id", K(ret)); - } else if (OB_ISNULL(schema_service = MTL(ObTenantSchemaService *)->get_schema_service())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("failed to get schema service from MTL", K(ret), K(schema_service)); - } else if (OB_FAIL(schema_service->get_tablet_to_table_history(MTL_ID(), tablet_ids, schema_version, table_ids))) { - LOG_WARN("failed to get table id according to tablet id", K(ret), K(schema_version)); - } else if (OB_UNLIKELY(table_ids.empty())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("get unexpected empty table id", K(ret), K(table_ids)); - } else if (table_ids.at(0) == OB_INVALID_ID){ - ret = OB_TABLE_IS_DELETED; - LOG_WARN("table is deleted", K(ret), K(tablet_id), K(schema_version)); + ObDataStoreDesc desc; + if (OB_UNLIKELY(!is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid merge ctx", K(ret), KPC(this)); + } else if (OB_FAIL(desc.init(*get_schema(), + param_.ls_id_, + param_.tablet_id_, + param_.merge_type_, + sstable_version_range_.snapshot_version_))) { + LOG_WARN("failed to init index store desc", K(ret), KPC(this)); } else { - table_id = table_ids.at(0); + // TODO(zhuixin.gsy) modify index_desc.init to avoid reset col_desc_array_ + desc.row_column_count_ = desc.rowkey_column_count_ + 1; + desc.col_desc_array_.reset(); + desc.need_prebuild_bloomfilter_ = false; + if (OB_FAIL(desc.col_desc_array_.init(desc.row_column_count_))) { + LOG_WARN("failed to reserve column desc array", K(ret)); + } else if (OB_FAIL(get_schema()->get_rowkey_column_ids(desc.col_desc_array_))) { + LOG_WARN("failed to get rowkey column ids", K(ret)); + } else if (OB_FAIL(ObMultiVersionRowkeyHelpper::add_extra_rowkey_cols(desc.col_desc_array_))) { + LOG_WARN("failed to get extra rowkey column ids", K(ret)); + } else { + ObObjMeta meta; + meta.set_varchar(); + meta.set_collation_type(CS_TYPE_BINARY); + share::schema::ObColDesc col; + col.col_id_ = static_cast(desc.row_column_count_ + OB_APP_MIN_COLUMN_ID); + col.col_type_ = meta; + col.col_order_ = DESC; + + if (OB_FAIL(desc.col_desc_array_.push_back(col))) { + LOG_WARN("failed to push back last col for index", K(ret), K(col)); + } + } + } + if (OB_SUCC(ret)) { + if (OB_FAIL(merge_info_.prepare_index_builder(desc))) { + LOG_WARN("failed to prepare index builder", K(ret), K(desc)); + } } return ret; } -int ObTabletMergeCtx::get_table_schema_to_merge() +int ObTabletMergeCtx::prepare_merge_progress() { int ret = OB_SUCCESS; - const uint64_t tenant_id = MTL_ID(); - const ObTabletID tablet_id = param_.tablet_id_; - uint64_t table_id = OB_INVALID_ID; - ObMultiVersionSchemaService *schema_service = nullptr; - const int64_t schema_version = schema_ctx_.schema_version_; - ObSchemaGetterGuard &schema_guard = schema_ctx_.schema_guard_; - const ObTableSchema *&table_schema = schema_ctx_.table_schema_; - int64_t save_schema_version = schema_version; - if (OB_FAIL(get_table_id(tablet_id, schema_version, table_id))) { - LOG_WARN("failed to get table id", K(ret), K(tablet_id)); - } else if (OB_UNLIKELY(!tablet_handle_.is_valid() - || tablet_id != tablet_handle_.get_obj()->get_tablet_meta().tablet_id_ - || OB_INVALID_ID == table_id)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("invalid tablet or table_id", K(ret), K(tablet_id), K(table_id)); - } else if (OB_ISNULL(schema_service = MTL(ObTenantSchemaService *)->get_schema_service())) { - LOG_WARN("failed to get schema service from MTL", K(ret)); - } else if (OB_FAIL(schema_service->retry_get_schema_guard(tenant_id, - schema_version, - table_id, - schema_guard, - save_schema_version))) { - if (OB_TABLE_IS_DELETED != ret) { - LOG_WARN("Fail to get schema", K(ret), K(tenant_id), K(schema_version), K(table_id)); + if (OB_NOT_NULL(merge_progress_)) { + const ObTableReadInfo &read_info = tablet_handle_.get_obj()->get_full_read_info(); + if (OB_FAIL(merge_progress_->init(this, read_info))) { + merge_progress_->reset(); + LOG_WARN("failed to init merge progress", K(ret)); } else { - LOG_WARN("table is deleted", K(ret), K(table_id)); + LOG_INFO("succeed to init merge progress", K(ret), KPC(merge_progress_)); } - } else if (save_schema_version < schema_version) { - ret = OB_SCHEMA_ERROR; - LOG_WARN("can not use older schema version", K(ret), K(schema_version), K(save_schema_version), K(table_id)); - } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_id, table_schema))) { - LOG_WARN("Fail to get table schema", K(ret), K(table_id)); - } else if (NULL == table_schema) { - if (OB_FAIL(schema_service->get_tenant_full_schema_guard(tenant_id, schema_guard))) { - LOG_WARN("Fail to get schema", K(ret), K(tenant_id)); - } else if (OB_FAIL(schema_guard.get_table_schema(tenant_id, table_id, table_schema))) { - LOG_WARN("Fail to get table schema", K(ret), K(table_id)); - } else if (NULL == table_schema) { - ret = OB_TABLE_IS_DELETED; - LOG_WARN("table is deleted", K(ret), K(table_id)); - } - } - if (OB_SUCC(ret)) { - schema_ctx_.merge_schema_ = table_schema; - schema_ctx_.schema_version_ = save_schema_version; - schema_ctx_.storage_schema_ = &tablet_handle_.get_obj()->get_storage_schema(); - - FLOG_INFO("get schema to merge", K(table_id), K(schema_version), K(save_schema_version), - K(*reinterpret_cast(table_schema))); } return ret; } @@ -1162,7 +1164,7 @@ int ObTabletMergeCtx::get_table_schema_to_merge() int ObTabletMergeCtx::generate_participant_table_info(char *buf, const int64_t buf_len) const { int ret = OB_SUCCESS; - if (param_.is_major_merge()) { + if (is_major_merge_type(param_.merge_type_)) { ADD_COMPACTION_INFO_PARAM(buf, buf_len, "table_cnt", tables_handle_.get_count(), "[MAJOR]scn", tables_handle_.get_table(0)->get_snapshot_version()); @@ -1211,6 +1213,33 @@ int ObTabletMergeCtx::generate_macro_id_list(char *buf, const int64_t buf_len) c return ret; } +int serialize_medium_list( + const char *list_name, + const compaction::ObMediumCompactionInfoList &medium_list, + ObSSTableMergeInfo &sstable_merge_info, + int64_t &pos) +{ + int ret = OB_SUCCESS; + if (medium_list.get_list().get_size() > 0) { + common::databuff_printf(sstable_merge_info.comment_, sizeof(sstable_merge_info.comment_), pos, + "%s:{cnt=%d;", list_name, medium_list.get_list().get_size()); + DLIST_FOREACH_NORET(info, medium_list.get_list()) { + const compaction::ObMediumCompactionInfo *medium_info = static_cast(info); + common::databuff_printf(sstable_merge_info.comment_, sizeof(sstable_merge_info.comment_), pos, + "%ld,", medium_info->medium_snapshot_); + if (medium_info->medium_merge_reason_ > ObAdaptiveMergePolicy::AdaptiveMergeReason::NONE) { + common::databuff_printf(sstable_merge_info.comment_, sizeof(sstable_merge_info.comment_), pos, + "merge_reason=%s,", ObAdaptiveMergePolicy::merge_reason_to_str(medium_info->medium_merge_reason_)); + } + } + if (pos > 0) { + pos --; + } + common::databuff_printf(sstable_merge_info.comment_, sizeof(sstable_merge_info.comment_), pos, "}|"); + } + return ret; +} + void ObTabletMergeCtx::collect_running_info() { int tmp_ret = OB_SUCCESS; @@ -1218,6 +1247,11 @@ void ObTabletMergeCtx::collect_running_info() ObSSTableMergeInfo &sstable_merge_info = merge_info_.get_sstable_merge_info(); sstable_merge_info.dag_id_ = merge_dag_->get_dag_id(); + // collect medium info dump msg + int64_t pos = 0; + serialize_medium_list("new_medium_list", merge_list_, sstable_merge_info, pos); + serialize_medium_list("serialize_medium_list", tablet_handle_.get_obj()->get_medium_compaction_info_list(), sstable_merge_info, pos); + ADD_COMPACTION_INFO_PARAM(sstable_merge_info.comment_, sizeof(sstable_merge_info.comment_), "time_guard", time_guard_); diff --git a/src/storage/compaction/ob_tablet_merge_ctx.h b/src/storage/compaction/ob_tablet_merge_ctx.h index 143706e3b5..96990fa744 100644 --- a/src/storage/compaction/ob_tablet_merge_ctx.h +++ b/src/storage/compaction/ob_tablet_merge_ctx.h @@ -15,10 +15,12 @@ #include "share/scheduler/ob_dag_scheduler.h" #include "lib/utility/ob_print_utils.h" +#include "lib/container/ob_se_array.h" #include "ob_partition_parallel_merge_ctx.h" #include "storage/compaction/ob_partition_merger.h" #include "storage/compaction/ob_partition_merge_progress.h" #include "storage/compaction/ob_tablet_merge_task.h" +#include "storage/compaction/ob_partition_merge_policy.h" #include "storage/tx_storage/ob_ls_map.h" #include "storage/tx_storage/ob_ls_handle.h" #include "share/scn.h" @@ -58,7 +60,7 @@ public: blocksstable::ObSSTableIndexBuilder *get_index_builder() const { return index_builder_; } void destroy(); int get_data_macro_block_count(int64_t ¯o_block_count); - TO_STRING_KV(K_(is_inited), K_(sstable_merge_info)); + TO_STRING_KV(K_(is_inited), K_(sstable_merge_info), KP_(index_builder)); private: static int build_create_sstable_param(const ObTabletMergeCtx &ctx, @@ -95,19 +97,15 @@ struct ObSchemaMergeCtx storage_schema_ = nullptr; } } + int deep_copy(const ObSchemaMergeCtx &input_ctx); common::ObIAllocator &allocator_; int64_t base_schema_version_; int64_t schema_version_; - const share::schema::ObTableSchema *table_schema_; // schema for major merge, get from schema_service - share::schema::ObSchemaGetterGuard schema_guard_; bool allocated_storage_schema_; - const ObStorageSchema *storage_schema_; // schema for mini merge, persist on meta - // in mini merge, merge_schema = storage_schema - // in major merge, merge_schema = table_schema - const ObMergeSchema *merge_schema_; + const ObStorageSchema *storage_schema_; // schema for all merge - TO_STRING_KV(K_(base_schema_version), K_(schema_version), KPC_(merge_schema)); + TO_STRING_KV(K_(base_schema_version), K_(schema_version), KPC_(storage_schema)); }; class ObCompactionTimeGuard : public common::occam::ObOccamTimeGuard @@ -115,7 +113,6 @@ class ObCompactionTimeGuard : public common::occam::ObOccamTimeGuard public: enum ObTabletCompactionEvent{ DAG_WAIT_TO_SCHEDULE = 0, - GET_MULTI_VERSION_START, COMPACTION_POLICY, GET_TABLE_SCHEMA, CALC_PROGRESSIVE_PARAM, @@ -160,7 +157,6 @@ struct ObTabletMergeCtx virtual ~ObTabletMergeCtx(); void destroy(); virtual bool is_valid() const; - bool is_schema_valid() const; bool need_full_checksum() const { return is_full_merge_; } bool need_rewrite_macro_block(const blocksstable::ObMacroBlockDesc ¯o_desc) const; int64_t get_storage_format_work_version() const { return param_.merge_version_; } @@ -168,21 +164,21 @@ struct ObTabletMergeCtx int init_merge_progress(bool is_major); int get_merge_range(int64_t parallel_idx, blocksstable::ObDatumRange &merge_range); - int inner_init_for_major(); int inner_init_for_minor(bool &skip_rest_operation); + int inner_init_for_medium(); + int init_get_medium_compaction_info(const int64_t medium_snapshot, const ObMediumCompactionInfo *&medium_info); + int get_schema_and_gene_from_result(const ObGetMergeTablesResult &get_merge_table_result); + int get_storage_schema_and_gene_from_result(const ObGetMergeTablesResult &get_merge_table_result); int get_storage_schema_to_merge(const ObTablesHandleArray &merge_tables_handle, const bool get_schema_on_memtable = true); - int get_table_schema_to_merge(); - - static int get_table_id( - const ObTabletID &tablet_id, - const int64_t schema_version, - uint64_t &table_id); +public: + int get_medium_compaction_info_to_store(); int get_basic_info_from_result(const ObGetMergeTablesResult &get_merge_table_result); int cal_minor_merge_param(); int cal_major_merge_param(const ObGetMergeTablesResult &get_merge_table_result); int init_merge_info(); - int cal_progressive_merge_param(const bool is_schema_changed); + int prepare_index_tree(); + int prepare_merge_progress(); int generate_participant_table_info(char *buf, const int64_t buf_len) const; int generate_macro_id_list(char *buf, const int64_t buf_len) const; void collect_running_info(); @@ -192,13 +188,15 @@ struct ObTabletMergeCtx OB_INLINE int64_t get_concurrent_cnt() const { return parallel_merge_ctx_.get_concurrent_cnt(); } ObITable::TableType get_merged_table_type() const; ObTabletMergeInfo& get_merge_info() { return merge_info_; } - const ObMergeSchema *get_merge_schema() { return schema_ctx_.merge_schema_; } + const ObStorageSchema *get_schema() const { return schema_ctx_.storage_schema_; } int64_t get_compaction_scn() const { return - is_multi_version_minor_merge(param_.merge_type_) ? + is_multi_version_merge(param_.merge_type_) ? scn_range_.end_scn_.get_val_for_tx() : sstable_version_range_.snapshot_version_; } + typedef common::ObSEArray MinorParallelResultArray; + // 1. init in dag ObTabletMergeDagParam ¶m_; common::ObIAllocator &allocator_; @@ -217,6 +215,7 @@ struct ObTabletMergeCtx // 4. filled in ObTabletMergePrepareTask::cal_minior_merge_param bool is_full_merge_; // full merge or increment merge + bool is_tenant_major_merge_; storage::ObMergeLevel merge_level_; ObTabletMergeInfo merge_info_; @@ -243,6 +242,7 @@ struct ObTabletMergeCtx compaction::ObICompactionFilter *compaction_filter_; ObCompactionTimeGuard time_guard_; int64_t rebuild_seq_; + ObMediumCompactionInfoList merge_list_; TO_STRING_KV(K_(param), K_(sstable_version_range), K_(create_snapshot_version), K_(is_full_merge), K_(merge_level), @@ -255,7 +255,7 @@ struct ObTabletMergeCtx K_(scn_range), K_(merge_scn), K_(read_base_version), K_(ls_handle), K_(tablet_handle), KPC_(merge_progress), - KPC_(compaction_filter), K_(time_guard), K_(rebuild_seq)); + KPC_(compaction_filter), K_(time_guard), K_(rebuild_seq), K_(merge_list)); private: DISALLOW_COPY_AND_ASSIGN(ObTabletMergeCtx); }; diff --git a/src/storage/compaction/ob_tablet_merge_task.cpp b/src/storage/compaction/ob_tablet_merge_task.cpp index 5d80c0c867..a4af2facc0 100644 --- a/src/storage/compaction/ob_tablet_merge_task.cpp +++ b/src/storage/compaction/ob_tablet_merge_task.cpp @@ -29,6 +29,15 @@ #include "ob_schedule_dag_func.h" #include "ob_tenant_tablet_scheduler.h" #include "share/ob_get_compat_mode.h" +#include "ob_sstable_merge_info_mgr.h" +#include "storage/compaction/ob_compaction_diagnose.h" +#include "storage/ob_tenant_tablet_stat_mgr.h" +#include "storage/access/ob_table_estimator.h" +#include "storage/access/ob_index_sstable_estimator.h" +#include "ob_medium_compaction_func.h" +#include "storage/compaction/ob_tenant_tablet_scheduler.h" +#include "share/ob_get_compat_mode.h" +#include "share/ob_tablet_meta_table_compaction_operator.h" namespace oceanbase { @@ -43,7 +52,7 @@ namespace compaction bool is_merge_dag(ObDagType::ObDagTypeEnum dag_type) { return dag_type == ObDagType::DAG_TYPE_MAJOR_MERGE - || dag_type == ObDagType::DAG_TYPE_MINOR_MERGE + || dag_type == ObDagType::DAG_TYPE_MERGE_EXECUTE || dag_type == ObDagType::DAG_TYPE_MINI_MERGE || dag_type == ObDagType::DAG_TYPE_TX_TABLE_MERGE; } @@ -58,7 +67,6 @@ ObMergeParameter::ObMergeParameter() tables_handle_(nullptr), merge_type_(INVALID_MERGE_TYPE), merge_level_(MACRO_BLOCK_MERGE_LEVEL), - table_schema_(nullptr), merge_schema_(nullptr), merge_range_(), sstable_logic_seq_(0), @@ -76,25 +84,10 @@ bool ObMergeParameter::is_valid() const && tables_handle_ != nullptr && sstable_logic_seq_ >= 0 && !tables_handle_->empty() - && is_schema_valid() && merge_type_ > INVALID_MERGE_TYPE && merge_type_ < MERGE_TYPE_MAX; } -bool ObMergeParameter::is_schema_valid() const -{ - bool bret = true; - if (OB_ISNULL(merge_schema_)) { - bret = false; - STORAGE_LOG(WARN, "schema is invalid, merge schema is null"); - } else if (storage::is_multi_version_minor_merge(merge_type_) || storage::is_backfill_tx_merge(merge_type_)) { - bret = merge_schema_->is_valid(); - } else { - bret = NULL != table_schema_ && table_schema_->is_valid(); - } - return bret; -} - void ObMergeParameter::reset() { ls_id_.reset(); @@ -103,7 +96,6 @@ void ObMergeParameter::reset() tables_handle_ = nullptr; merge_type_ = INVALID_MERGE_TYPE; merge_level_ = MACRO_BLOCK_MERGE_LEVEL; - table_schema_ = nullptr; merge_schema_ = nullptr; sstable_logic_seq_ = 0; merge_range_.reset(); @@ -128,18 +120,17 @@ int ObMergeParameter::init(compaction::ObTabletMergeCtx &merge_ctx, const int64_ tables_handle_ = &merge_ctx.tables_handle_; merge_type_ = merge_ctx.param_.merge_type_; merge_level_ = merge_ctx.merge_level_; - table_schema_ = merge_ctx.schema_ctx_.table_schema_; - merge_schema_ = merge_ctx.get_merge_schema(); + merge_schema_ = merge_ctx.get_schema(); version_range_ = merge_ctx.sstable_version_range_; sstable_logic_seq_ = merge_ctx.sstable_logic_seq_; - if (is_major_merge()) { + if (is_major_merge_type(merge_type_)) { // major merge should only read data between two major freeze points // but there will be some minor sstables which across major freeze points version_range_.base_version_ = MAX(merge_ctx.read_base_version_, version_range_.base_version_); - } else if (is_buf_minor_merge()) { - // buf minor merge does not keep multi-version + } else if (is_meta_major_merge(merge_type_)) { + // meta major merge does not keep multi-version version_range_.multi_version_start_ = version_range_.snapshot_version_; - } else if (is_multi_version_minor_merge()) { + } else if (is_multi_version_merge(merge_type_)) { // minor compaction always need to read all the data from input table // rewrite version to whole version range version_range_.snapshot_version_ = MERGE_READ_SNAPSHOT_VERSION; @@ -157,12 +148,27 @@ int ObMergeParameter::init(compaction::ObTabletMergeCtx &merge_ctx, const int64_ */ ObTabletMergeDagParam::ObTabletMergeDagParam() - : merge_type_(INVALID_MERGE_TYPE), + : for_diagnose_(false), + is_tenant_major_merge_(false), + merge_type_(INVALID_MERGE_TYPE), merge_version_(0), ls_id_(), tablet_id_(), - report_(nullptr), - for_diagnose_(false) + report_(nullptr) +{ +} + +ObTabletMergeDagParam::ObTabletMergeDagParam( + const storage::ObMergeType merge_type, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id) + : for_diagnose_(false), + is_tenant_major_merge_(false), + merge_type_(merge_type), + merge_version_(0), + ls_id_(ls_id), + tablet_id_(tablet_id), + report_(nullptr) { } @@ -171,7 +177,7 @@ bool ObTabletMergeDagParam::is_valid() const return ls_id_.is_valid() && tablet_id_.is_valid() && (merge_type_ > INVALID_MERGE_TYPE && merge_type_ < MERGE_TYPE_MAX) - && (!is_major_merge() || merge_version_ >= 0); + && (!is_major_merge_type(merge_type_) || merge_version_ >= 0); } ObBasicTabletMergeDag::ObBasicTabletMergeDag( @@ -200,7 +206,7 @@ ObBasicTabletMergeDag::~ObBasicTabletMergeDag() } // create ObTabletMergeCtx when Dag start running -int ObBasicTabletMergeDag::get_tablet_and_compat_mode() +int ObBasicTabletMergeDag::alloc_merge_ctx() { int ret = OB_SUCCESS; void *buf = nullptr; @@ -214,6 +220,11 @@ int ObBasicTabletMergeDag::get_tablet_and_compat_mode() ctx_ = new(buf) ObTabletMergeCtx(param_, allocator_); ctx_->merge_dag_ = this; } + return ret; +} +int ObBasicTabletMergeDag::get_tablet_and_compat_mode() +{ + int ret = OB_SUCCESS; // can't get tablet_handle now! because this func is called in create dag, // the last compaction dag is not finished yet, tablet is in old version ObTabletHandle tmp_tablet_handle; @@ -232,7 +243,7 @@ int ObBasicTabletMergeDag::get_tablet_and_compat_mode() int tmp_ret = OB_SUCCESS; if (OB_SUCC(ret) && typeid(*this) != typeid(ObTxTableMergeDag) - && OB_UNLIKELY(OB_SUCCESS != (tmp_ret = ctx_->init_merge_progress(param_.merge_type_ == MAJOR_MERGE)))) { + && OB_UNLIKELY(OB_SUCCESS != (tmp_ret = ctx_->init_merge_progress(param_.is_tenant_major_merge_)))) { LOG_WARN("failed to init merge progress", K(tmp_ret), K_(param)); } @@ -273,9 +284,13 @@ int ObBasicTabletMergeDag::inner_init(const ObTabletMergeDagParam ¶m) merge_type_ = param.merge_type_; ls_id_ = param.ls_id_; tablet_id_ = param.tablet_id_; - if (!param.for_diagnose_ && OB_FAIL(get_tablet_and_compat_mode())) { + if (param.for_diagnose_) { + } else if (OB_FAIL(alloc_merge_ctx())) { + LOG_WARN("failed to alloc merge ctx", K(ret)); + } else if (OB_FAIL(get_tablet_and_compat_mode())) { LOG_WARN("failed to get tablet and compat mode", K(ret)); - } else { + } + if (OB_SUCC(ret)) { is_inited_ = true; } } @@ -303,19 +318,26 @@ bool ObBasicTabletMergeDag::operator == (const ObIDag &other) const int64_t ObMergeDagHash::inner_hash() const { int64_t hash_value = 0; - ObMergeType merge_type = merge_type_; - if (merge_type_ == MINOR_MERGE || merge_type_ == MINI_MINOR_MERGE) { - merge_type = MINI_MINOR_MERGE; - } - hash_value = common::murmurhash(&merge_type, sizeof(merge_type), hash_value); + // make two merge type same + hash_value = common::murmurhash(&merge_type_, sizeof(merge_type_), hash_value); hash_value += ls_id_.hash(); hash_value += tablet_id_.hash(); return hash_value; } +bool ObMergeDagHash::belong_to_same_tablet(const ObMergeDagHash *other) const +{ + bool bret = false; + if (nullptr != other) { + bret = ls_id_ == other->ls_id_ + && tablet_id_ == other->tablet_id_; + } + return bret; +} + int64_t ObBasicTabletMergeDag::hash() const { - return ObMergeDagHash::inner_hash(); + return inner_hash(); } int ObBasicTabletMergeDag::fill_comment(char *buf, const int64_t buf_len) const @@ -349,15 +371,16 @@ ObTabletMergeDag::ObTabletMergeDag(const ObDagType::ObDagTypeEnum type) { } +template int ObTabletMergeDag::create_first_task() { int ret = OB_SUCCESS; - ObTabletMergePrepareTask *prepare_task = NULL; - if (OB_FAIL(alloc_task(prepare_task))) { + T *task = nullptr; + if (OB_FAIL(alloc_task(task))) { STORAGE_LOG(WARN, "fail to alloc task", K(ret)); - } else if (OB_FAIL(prepare_task->init())) { - STORAGE_LOG(WARN, "failed to init prepare_task", K(ret)); - } else if (OB_FAIL(add_task(*prepare_task))) { + } else if (OB_FAIL(task->init())) { + STORAGE_LOG(WARN, "failed to init task", K(ret)); + } else if (OB_FAIL(add_task(*task))) { STORAGE_LOG(WARN, "fail to add task", K(ret), K_(ls_id), K_(tablet_id), K_(ctx)); } return ret; @@ -457,7 +480,7 @@ int ObTabletMajorMergeDag::init_by_param(const ObIDagInitParam *param) ret = OB_INVALID_ARGUMENT; LOG_WARN("input param is null", K(ret), K(param)); } else if (FALSE_IT(merge_param = static_cast(param))) { - } else if (OB_UNLIKELY(!merge_param->is_major_merge())) { + } else if (OB_UNLIKELY(!is_major_merge_type(merge_param->merge_type_))) { ret = OB_ERR_SYS; LOG_ERROR("param is invalid or is major merge param not match", K(ret), K(param)); } else if (OB_FAIL(ObBasicTabletMergeDag::inner_init(*merge_param))) { @@ -487,7 +510,7 @@ int ObTabletMiniMergeDag::init_by_param(const share::ObIDagInitParam *param) ret = OB_INVALID_ARGUMENT; LOG_WARN("input param is null", K(ret), K(param)); } else if (FALSE_IT(merge_param = static_cast(param))) { - } else if (OB_UNLIKELY(!merge_param->is_mini_merge())) { + } else if (OB_UNLIKELY(!is_mini_merge(merge_param->merge_type_))) { ret = OB_ERR_SYS; LOG_ERROR("is mini merge param not match", K(ret), K(param)); } else if (OB_FAIL(ObBasicTabletMergeDag::inner_init(*merge_param))) { @@ -497,19 +520,20 @@ int ObTabletMiniMergeDag::init_by_param(const share::ObIDagInitParam *param) } /* - * ----------------------------------------------ObTabletMinorMergeDag-------------------------------------------------- + * ----------------------------------------------ObTabletMergeExecuteDag-------------------------------------------------- */ -ObTabletMinorMergeDag::ObTabletMinorMergeDag() - : ObTabletMergeDag(ObDagType::DAG_TYPE_MINOR_MERGE) +ObTabletMergeExecuteDag::ObTabletMergeExecuteDag() + : ObTabletMergeDag(ObDagType::DAG_TYPE_MERGE_EXECUTE), + merge_scn_range_() { } -ObTabletMinorMergeDag::~ObTabletMinorMergeDag() +ObTabletMergeExecuteDag::~ObTabletMergeExecuteDag() { } -int ObTabletMinorMergeDag::init_by_param(const share::ObIDagInitParam *param) +int ObTabletMergeExecuteDag::init_by_param(const share::ObIDagInitParam *param) { int ret = OB_SUCCESS; const ObTabletMergeDagParam *merge_param = nullptr; @@ -517,7 +541,8 @@ int ObTabletMinorMergeDag::init_by_param(const share::ObIDagInitParam *param) ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid argument to init sstable minor merge dag", K(ret), K(param)); } else if (FALSE_IT(merge_param = static_cast(param))) { - } else if (OB_UNLIKELY(!merge_param->is_multi_version_minor_merge() && !merge_param->is_buf_minor_merge())) { + } else if (OB_UNLIKELY(!is_multi_version_merge(merge_param->merge_type_) + && !is_meta_major_merge(merge_param->merge_type_))) { ret = OB_ERR_SYS; LOG_ERROR("Unexpected merge type to init minor merge dag", K(ret), KPC(merge_param)); } else if (OB_FAIL(ObTabletMergeDag::inner_init(*merge_param))) { @@ -527,7 +552,171 @@ int ObTabletMinorMergeDag::init_by_param(const share::ObIDagInitParam *param) return ret; } -bool ObTabletMinorMergeDag::operator == (const ObIDag &other) const +int ObTabletMergeExecuteDag::direct_init_ctx( + const ObTabletMergeDagParam ¶m, + const lib::Worker::CompatMode compat_mode, + const ObGetMergeTablesResult &result, + ObLSHandle &ls_handle, + ObTabletHandle &tablet_handle) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY((!is_minor_merge_type(result.suggest_merge_type_) + && !is_meta_major_merge(result.suggest_merge_type_)) || !result.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("result is invalid", K(ret), K(result)); + } else { + param_ = param; + merge_type_ = param.merge_type_; + ls_id_ = param.ls_id_; + tablet_id_ = param.tablet_id_; + compat_mode_ = compat_mode; + merge_scn_range_ = result.scn_range_; + if (OB_FAIL(alloc_merge_ctx())) { + LOG_WARN("failed to alloc merge ctx", K(ret)); + } else if (FALSE_IT(ctx_->tablet_handle_ = tablet_handle)) { // assign tablet_handle + } else if (FALSE_IT(ctx_->ls_handle_ = ls_handle)) { // assign ls_handle + } else if (FALSE_IT(ctx_->rebuild_seq_ = ls_handle.get_ls()->get_rebuild_seq())) { + } else if (OB_FAIL(create_first_task(result))) { + LOG_WARN("failed to create first task", K(ret), K(result)); + } else { + is_inited_ = true; + } + } + + return ret; +} +template +int ObTabletMergeExecuteDag::create_first_task( + const ObGetMergeTablesResult &result) +{ + int ret = OB_SUCCESS; + T *task = nullptr; + if (OB_FAIL(alloc_task(task))) { + STORAGE_LOG(WARN, "fail to alloc task", K(ret)); + } else if (OB_FAIL(task->init(result, *ctx_))) { + STORAGE_LOG(WARN, "failed to init prepare_task", K(ret)); + } else if (OB_FAIL(add_task(*task))) { + STORAGE_LOG(WARN, "fail to add task", K(ret), K_(ls_id), K_(tablet_id), K_(ctx)); + } + return ret; +} + +int ObTabletMergeExecuteDag::create_first_task(const ObGetMergeTablesResult &result) +{ + return create_first_task(result); +} + +ObTabletMergeExecutePrepareTask::ObTabletMergeExecutePrepareTask() + : ObITask(ObITask::TASK_TYPE_SSTABLE_MERGE_PREPARE), + is_inited_(false), + ctx_(nullptr), + result_() +{} + +ObTabletMergeExecutePrepareTask::~ObTabletMergeExecutePrepareTask() +{} + +int ObTabletMergeExecutePrepareTask::init( + const ObGetMergeTablesResult &result, + ObTabletMergeCtx &ctx) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_FAIL(result_.assign(result))) { + LOG_WARN("failed to assgin result", K(ret), K(result)); + } else { + ctx_ = &ctx; + is_inited_ = true; + } + return ret; +} + +int ObTabletMergeExecutePrepareTask::process() +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("task is not init", K(ret)); + } else if (OB_ISNULL(ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx is unexpected null", K(ret), K(ctx_)); + } else if (OB_FAIL(ctx_->get_schema_and_gene_from_result(result_))) { + LOG_WARN("failed to get schema and generage from result", K(ret), K_(result)); + } else if (OB_FAIL(ctx_->init_merge_info())) { + LOG_WARN("fail to init merge info", K(ret), K_(result), KPC(ctx_)); + } else if (OB_FAIL(ctx_->prepare_index_tree())) { + LOG_WARN("fail to prepare sstable index tree", K(ret), KPC(ctx_)); + } else if (OB_FAIL(ObBasicTabletMergeDag::generate_merge_task( + *static_cast(get_dag()), *ctx_, this))) { + LOG_WARN("Failed to generate_merge_sstable_task", K(ret)); + } else { + int tmp_ret = OB_SUCCESS; + if (ctx_->param_.tablet_id_.is_special_merge_tablet()) { + // init compaction filter for minor merge in TxDataTable + if (OB_FAIL(prepare_compaction_filter())) { + LOG_WARN("failed to prepare compaction filter", K(ret), K(ctx_->param_)); + } + } else if (OB_TMP_FAIL(ctx_->init_merge_progress(ctx_->param_.is_tenant_major_merge_))) { + LOG_WARN("failed to init merge progress", K(tmp_ret), K_(result)); + } else if (OB_TMP_FAIL(ctx_->prepare_merge_progress())) { + LOG_WARN("failed to init merge progress", K(tmp_ret)); + } + FLOG_INFO("succeed to init merge ctx", K(ret), KPC(ctx_), K(result_)); + } + return ret; +} + +int ObTxTableMergeExecutePrepareTask::prepare_compaction_filter() +{ + int ret = OB_SUCCESS; + void *buf = nullptr; + if (OB_ISNULL(ctx_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx is unexpected null", K(ret), K(ctx_)); + } else if (OB_UNLIKELY(!ctx_->param_.tablet_id_.is_ls_tx_data_tablet())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("only tx data tablet can execute minor merge", K(ret), K(ctx_->param_)); + } else if (OB_ISNULL(buf = ctx_->allocator_.alloc(sizeof(ObTransStatusFilter)))) { + } else { + ObTransStatusFilter *compaction_filter = new(buf) ObTransStatusFilter(); + ObTxTableGuard guard; + share::SCN recycle_scn = share::SCN::min_scn(); + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(ctx_->ls_handle_.get_ls()->get_tx_table_guard(guard))) { + LOG_WARN("failed to get tx table", K(tmp_ret), K_(ctx_->param)); + } else if (OB_UNLIKELY(!guard.is_valid())) { + tmp_ret = OB_ERR_UNEXPECTED; + LOG_WARN("tx table guard is invalid", K(tmp_ret), K_(ctx_->param), K(guard)); + } else if (OB_TMP_FAIL(guard.get_tx_table()->get_recycle_scn(recycle_scn))) { + LOG_WARN("failed to get recycle ts", K(tmp_ret), K_(ctx_->param)); + } else if (OB_TMP_FAIL(compaction_filter->init(recycle_scn, ObTxTable::get_filter_col_idx()))) { + LOG_WARN("failed to get init compaction filter", K(tmp_ret), K_(ctx_->param), K(recycle_scn)); + } else { + ctx_->compaction_filter_ = compaction_filter; + FLOG_INFO("success to init compaction filter", K(tmp_ret), K(recycle_scn)); + } + + if (OB_SUCC(ret)) { + ctx_->progressive_merge_num_ = 0; + ctx_->is_full_merge_ = true; + ctx_->merge_level_ = MACRO_BLOCK_MERGE_LEVEL; + ctx_->read_base_version_ = 0; + } else if (OB_NOT_NULL(buf)) { + ctx_->allocator_.free(buf); + buf = nullptr; + } + } + return ret; +} + +int ObTxTableMinorExecuteDag::create_first_task(const ObGetMergeTablesResult &result) +{ + return ObTabletMergeExecuteDag::create_first_task(result); +} + +bool ObTabletMergeExecuteDag::operator == (const ObIDag &other) const { bool is_same = true; if (this == &other) { @@ -535,11 +724,11 @@ bool ObTabletMinorMergeDag::operator == (const ObIDag &other) const } else if (get_type() != other.get_type()) { is_same = false; } else { - const ObTabletMergeDag &other_merge_dag = static_cast(other); - if (!is_mini_minor_merge(merge_type_) - || !is_mini_minor_merge(other_merge_dag.merge_type_) - || ls_id_ != other_merge_dag.ls_id_ - || tablet_id_ != other_merge_dag.tablet_id_) { + const ObTabletMergeExecuteDag &other_merge_dag = static_cast(other); + if (!belong_to_same_tablet(&other_merge_dag) + || merge_type_ != other_merge_dag.merge_type_ // different merge type + || (is_minor_merge(merge_type_) // different merge range for minor + && merge_scn_range_ != other_merge_dag.merge_scn_range_)) { is_same = false; } } @@ -597,11 +786,13 @@ int ObTabletMergePrepareTask::process() DEBUG_SYNC(MERGE_PARTITION_TASK); - if (!is_inited_) { + if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("not inited", K(ret)); } else if (OB_ISNULL(ctx = &merge_dag_->get_ctx())) { - } else if (OB_UNLIKELY(ctx->param_.is_major_merge() + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ctx is unexpected null", K(ret), KP(ctx), KPC(merge_dag_)); + } else if (OB_UNLIKELY(is_major_merge_type(ctx->param_.merge_type_) && !MTL(ObTenantTabletScheduler *)->could_major_merge_start())) { ret = OB_CANCELED; LOG_INFO("Merge has been paused", K(ret), K(ctx)); @@ -609,46 +800,32 @@ int ObTabletMergePrepareTask::process() ret = OB_CANCELED; LOG_INFO("ls offline, skip merge", K(ret), K(ctx)); } else if (FALSE_IT(ctx->time_guard_.click(ObCompactionTimeGuard::DAG_WAIT_TO_SCHEDULE))) { - } else if (OB_FAIL(ctx->ls_handle_.get_ls()->get_tablet(ctx->param_.tablet_id_, - ctx->tablet_handle_, - ObTabletCommon::NO_CHECK_GET_TABLET_TIMEOUT_US))) { - LOG_WARN("failed to get tablet", K(ret), "ls_id", ctx->param_.ls_id_, - "tablet_id", ctx->param_.tablet_id_); + } else if (OB_FAIL(check_before_init())) { + if (OB_CANCELED != ret) { + LOG_WARN("failed to check before init", K(ret), K(ctx->param_)); + } + } else if (OB_FAIL(ctx->ls_handle_.get_ls()->get_tablet( + ctx->param_.tablet_id_, + ctx->tablet_handle_, + storage::ObTabletCommon::NO_CHECK_GET_TABLET_TIMEOUT_US))) { + LOG_WARN("failed to get tablet", K(ret), K(ctx->param_)); + } else if (FALSE_IT(ctx->rebuild_seq_ = ctx->ls_handle_.get_ls()->get_rebuild_seq())) { } else if (OB_FAIL(build_merge_ctx(skip_rest_operation))) { if (OB_NO_NEED_MERGE != ret) { LOG_WARN("failed to build merge ctx", K(ret), K(ctx->param_)); } - } else if (!skip_rest_operation && ctx->param_.is_multi_version_minor_merge()) { - if (ctx->scn_range_.is_empty()) { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("Unexcepted empty log ts range in minor merge", K(ret), K(ctx->scn_range_)); - } else { - ctx->merge_scn_ = ctx->scn_range_.end_scn_; - } } if (OB_FAIL(ret) || skip_rest_operation) { + } else if (OB_FAIL(ObBasicTabletMergeDag::generate_merge_task( + *merge_dag_, *ctx, this))) { + LOG_WARN("Failed to generate_merge_sstable_task", K(ret)); } else { - if (OB_NOT_NULL(ctx->compaction_filter_)) { - ctx->is_full_merge_ = (ctx->is_full_merge_ || ctx->compaction_filter_->is_full_merge_); - } - if (OB_FAIL(generate_merge_task())) { - LOG_WARN("Failed to generate_merge_sstable_task", K(ret)); - } else { - int tmp_ret = OB_SUCCESS; - if (!ctx->tablet_handle_.is_valid()) { - STORAGE_LOG(WARN, "Unexcepted invalid tablet handle", K(ret), KPC(ctx)); - } else if (OB_NOT_NULL(ctx->merge_progress_)) { - const ObTableReadInfo &read_info = ctx->tablet_handle_.get_obj()->get_full_read_info(); - if (OB_SUCCESS != (tmp_ret = ctx->merge_progress_->init(ctx, read_info))) { - ctx->merge_progress_->reset(); - LOG_WARN("failed to init merge progress", K(tmp_ret)); - } else { - LOG_DEBUG("succeed to init merge progress", K(tmp_ret), KPC(ctx->merge_progress_)); - } - } - LOG_DEBUG("succeed to init merge ctx", "task", *this); + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(ctx->prepare_merge_progress())) { + LOG_WARN("failed to init merge progress", K(tmp_ret)); } + FLOG_INFO("succeed to init merge ctx", "task", *this); } if (OB_FAIL(ret)) { FLOG_WARN("sstable merge finish", K(ret), K(ctx), "task", *(static_cast(this))); @@ -657,100 +834,47 @@ int ObTabletMergePrepareTask::process() return ret; } -int ObTabletMergePrepareTask::prepare_index_tree() +int ObBasicTabletMergeDag::generate_merge_task( + ObBasicTabletMergeDag &merge_dag, + ObTabletMergeCtx &ctx, + ObITask *prepare_task) { int ret = OB_SUCCESS; - ObDataStoreDesc desc; - ObTabletMergeCtx &ctx = merge_dag_->get_ctx(); - if (OB_UNLIKELY(!ctx.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid merge ctx", K(ret), K(ctx)); - } else if (OB_FAIL(desc.init(*ctx.get_merge_schema(), - ctx.param_.ls_id_, - ctx.param_.tablet_id_, - ctx.param_.merge_type_, - ctx.sstable_version_range_.snapshot_version_))) { - LOG_WARN("failed to init index store desc", K(ret), K(ctx)); - } else { - // TODO(zhuixin.gsy) modify index_desc.init to avoid reset col_desc_array_ - const ObMergeSchema *merge_schema = ctx.get_merge_schema(); - desc.row_column_count_ = desc.rowkey_column_count_ + 1; - desc.col_desc_array_.reset(); - desc.need_prebuild_bloomfilter_ = false; - if (OB_FAIL(desc.col_desc_array_.init(desc.row_column_count_))) { - LOG_WARN("failed to reserve column desc array", K(ret)); - } else if (OB_FAIL(merge_schema->get_rowkey_column_ids(desc.col_desc_array_))) { - LOG_WARN("failed to get rowkey column ids", K(ret)); - } else if (OB_FAIL(ObMultiVersionRowkeyHelpper::add_extra_rowkey_cols(desc.col_desc_array_))) { - LOG_WARN("failed to get extra rowkey column ids", K(ret)); - } else { - ObObjMeta meta; - meta.set_varchar(); - meta.set_collation_type(CS_TYPE_BINARY); - share::schema::ObColDesc col; - col.col_id_ = static_cast(desc.row_column_count_ + OB_APP_MIN_COLUMN_ID); - col.col_type_ = meta; - col.col_order_ = DESC; - - if (OB_FAIL(desc.col_desc_array_.push_back(col))) { - LOG_WARN("failed to push back last col for index", K(ret), K(col)); - } - } - } - if (OB_SUCC(ret)) { - if (OB_FAIL(ctx.merge_info_.prepare_index_builder(desc))) { - LOG_WARN("failed to prepare index builder", K(ret), K(desc)); - } - } - return ret; -} - -int ObTabletMergePrepareTask::generate_merge_task() -{ - int ret = OB_SUCCESS; - ObTabletMergeCtx &ctx = merge_dag_->get_ctx(); ObTabletMergeTask *merge_task = NULL; ObTabletMergeFinishTask *finish_task = NULL; // add macro merge task - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("not inited", K(ret)); - } else if (OB_FAIL(prepare_index_tree())) { - LOG_WARN("fail to prepare sstable index tree", K(ret), K(ctx)); - } else if (OB_FAIL(merge_dag_->alloc_task(merge_task))) { + if (OB_FAIL(merge_dag.alloc_task(merge_task))) { LOG_WARN("fail to alloc task", K(ret)); } else if (OB_ISNULL(merge_task)) { ret = OB_ERR_UNEXPECTED; LOG_ERROR("Unexpecte null macro merge task", K(ret), K(ctx)); } else if (OB_FAIL(merge_task->init(0/*task_idx*/, ctx))) { LOG_WARN("fail to init macro merge task", K(ret), K(ctx)); - } else if (OB_FAIL(add_child(*merge_task))) { + } else if (OB_NOT_NULL(prepare_task) && OB_FAIL(prepare_task->add_child(*merge_task))) { LOG_WARN("fail to add child", K(ret), K(ctx)); - } else if (OB_FAIL(merge_dag_->add_task(*merge_task))) { + } else if (OB_FAIL(merge_dag.add_task(*merge_task))) { LOG_WARN("fail to add task", K(ret), K(ctx)); } // add finish task if (OB_FAIL(ret)) { - } else if (OB_FAIL(merge_dag_->alloc_task(finish_task))) { + } else if (OB_FAIL(merge_dag.alloc_task(finish_task))) { LOG_WARN("fail to alloc task", K(ret), K(ctx)); } else if (OB_FAIL(finish_task->init())) { LOG_WARN("fail to init main table finish task", K(ret), K(ctx)); - } else if (OB_NOT_NULL(merge_task) && OB_FAIL(merge_task->add_child(*finish_task))) { + } else if (OB_FAIL(merge_task->add_child(*finish_task))) { LOG_WARN("fail to add child", K(ret), K(ctx)); - } else if (OB_ISNULL(merge_task) && OB_FAIL(add_child(*finish_task))) { - LOG_WARN("fail to add child", K(ret), K(ctx)); - } else if (OB_FAIL(merge_dag_->add_task(*finish_task))) { + } else if (OB_FAIL(merge_dag.add_task(*finish_task))) { LOG_WARN("fail to add task", K(ret), K(ctx)); } if (OB_FAIL(ret)) { if (OB_NOT_NULL(merge_task)) { - merge_dag_->remove_task(*merge_task); + merge_dag.remove_task(*merge_task); merge_task = nullptr; } if (OB_NOT_NULL(finish_task)) { - merge_dag_->remove_task(*finish_task); + merge_dag.remove_task(*finish_task); finish_task = nullptr; } } @@ -765,41 +889,57 @@ int ObTabletMergePrepareTask::build_merge_ctx(bool &skip_rest_operation) const common::ObTabletID &tablet_id = ctx.param_.tablet_id_; // only ctx.param_ is inited, fill other fields here - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("The tablet has not been initialized", K(ret), K(tablet_id)); - } else if (!ctx.param_.is_valid()) { + if (OB_UNLIKELY(!ctx.param_.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(ctx)); - } else if (ctx.param_.tablet_id_ != tablet_id) { - ret = OB_ERR_SYS; - LOG_WARN("tablet id is not match", K(ret), K(tablet_id), K(ctx.param_)); - } else if (FALSE_IT(ctx.rebuild_seq_ = ctx.ls_handle_.get_ls()->get_rebuild_seq())) { - } else if (ctx.param_.is_major_merge()) { - if (!ctx.tablet_handle_.get_obj()->get_tablet_meta().ha_status_.is_data_status_complete()) { - ret = OB_STATE_NOT_MATCH; - LOG_WARN("ha status is not allowed major", K(ret), K(tablet_id)); - } else if (OB_FAIL(ctx.inner_init_for_major())) { - if (OB_NO_NEED_MERGE != ret) { - LOG_WARN("fail to inner init ctx", K(ret), K(tablet_id), K(ctx)); - } - } - } else if (OB_FAIL(ctx.inner_init_for_minor(skip_rest_operation))) { - if (OB_NO_NEED_MERGE != ret) { - LOG_WARN("fail to inner init ctx", K(ret), K(tablet_id), K(ctx)); - } + } else if (OB_FAIL(inner_init_ctx(ctx, skip_rest_operation))) { + LOG_WARN("fail to inner init ctx", K(ret), K(tablet_id), K(ctx)); } if (OB_FAIL(ret) || skip_rest_operation) { } else if (OB_FAIL(ctx.init_merge_info())) { LOG_WARN("fail to init merge info", K(ret), K(tablet_id), K(ctx)); - } else { + } else if (OB_FAIL(ctx.prepare_index_tree())) { + LOG_WARN("fail to prepare sstable index tree", K(ret), K(ctx)); + } + if (OB_SUCC(ret)) { FLOG_INFO("succeed to build merge ctx", K(tablet_id), K(ctx), K(skip_rest_operation)); } return ret; } +int ObTabletMajorPrepareTask::check_before_init() +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!MTL(ObTenantTabletScheduler *)->could_major_merge_start())) { + ret = OB_CANCELED; + LOG_INFO("Merge has been paused", K(ret), KPC(merge_dag_)); + } + return ret; +} + +int ObTabletMajorPrepareTask::inner_init_ctx(ObTabletMergeCtx &ctx, bool &skip_merge_task_flag) +{ + int ret = OB_SUCCESS; + skip_merge_task_flag = false; + if (OB_FAIL(ctx.inner_init_for_medium())) { + LOG_WARN("failed to inner init for major", K(ret)); + } + return ret; +} + +int ObTabletMiniPrepareTask::inner_init_ctx(ObTabletMergeCtx &ctx, bool &skip_merge_task_flag) +{ + int ret = OB_SUCCESS; + skip_merge_task_flag = false; + if (OB_FAIL(ctx.inner_init_for_minor(skip_merge_task_flag))) { + LOG_WARN("failed to inner init for mini", K(ret)); + } + return ret; +} + + /* * ----------------------------------------------ObTabletMergeFinishTask-------------------------------------------------- */ @@ -845,7 +985,7 @@ int ObTabletMergeFinishTask::create_sstable_after_merge(ObSSTable *&sstable) int ret = OB_SUCCESS; ObTabletMergeCtx &ctx = merge_dag_->get_ctx(); if (ctx.merged_table_handle_.is_valid()) { - if (OB_UNLIKELY(!ctx.param_.is_major_merge())) { + if (OB_UNLIKELY(!is_major_merge_type(ctx.param_.merge_type_))) { ret = OB_ERR_SYS; LOG_ERROR("Unxpected valid merged table handle with other merge", K(ret), K(ctx)); } else if (OB_FAIL(ctx.merged_table_handle_.get_sstable(sstable))) { @@ -874,6 +1014,7 @@ int ObTabletMergeFinishTask::process() LOG_WARN("not inited yet", K(ret)); } else { ObTabletMergeCtx &ctx = merge_dag_->get_ctx(); + ObLSID &ls_id = ctx.param_.ls_id_; ObTabletID &tablet_id = ctx.param_.tablet_id_; ctx.time_guard_.click(ObCompactionTimeGuard::EXECUTE); @@ -883,21 +1024,20 @@ int ObTabletMergeFinishTask::process() } else if (OB_FAIL(add_sstable_for_merge(ctx))) { LOG_WARN("failed to add sstable for merge", K(ret)); } - if (OB_SUCC(ret) && ctx.param_.is_major_merge() && NULL != ctx.param_.report_) { + if (OB_SUCC(ret) && is_major_merge_type(ctx.param_.merge_type_) && NULL != ctx.param_.report_) { int tmp_ret = OB_SUCCESS; if (OB_TMP_FAIL(ctx.param_.report_->submit_tablet_update_task(MTL_ID(), ctx.param_.ls_id_, tablet_id))) { LOG_WARN("failed to submit tablet update task to report", K(tmp_ret), K(MTL_ID()), K(ctx.param_.ls_id_), K(tablet_id)); } else if (OB_TMP_FAIL(ctx.ls_handle_.get_ls()->get_tablet_svr()->update_tablet_report_status(tablet_id))) { - LOG_WARN("failed to update tablet report status", K(tmp_ret), K(MTL_ID()), K(tablet_id)); + LOG_WARN("failed to update tablet report status", K(tmp_ret), K(tablet_id)); } } if (OB_SUCC(ret) && OB_NOT_NULL(ctx.merge_progress_)) { - int tmp_ret = OB_SUCCESS; - // update merge info if (OB_TMP_FAIL(ctx.merge_progress_->update_merge_info(ctx.merge_info_.get_sstable_merge_info()))) { STORAGE_LOG(WARN, "fail to update update merge info", K(tmp_ret)); } + if (OB_TMP_FAIL(compaction::ObCompactionSuggestionMgr::get_instance().analyze_merge_info( ctx.merge_info_, *ctx.merge_progress_))) { @@ -939,7 +1079,7 @@ int ObTabletMergeFinishTask::get_merged_sstable(ObTabletMergeCtx &ctx, ObSSTable LOG_INFO("create new merged sstable", K(ctx.param_.tablet_id_), "snapshot_version", ctx.sstable_version_range_.snapshot_version_, K(ctx.param_.merge_type_), K(ctx.create_snapshot_version_), - "table_mode_flag", ctx.get_merge_schema()->get_table_mode_flag()); + "table_mode_flag", ctx.get_schema()->get_table_mode_flag()); if (OB_FAIL(ctx.merge_info_.create_sstable(ctx))) { LOG_WARN("fail to create sstable", K(ret), K(ctx)); @@ -954,62 +1094,70 @@ int ObTabletMergeFinishTask::add_sstable_for_merge(ObTabletMergeCtx &ctx) { int ret = OB_SUCCESS; const ObStorageSchema *update_storage_schema = ctx.schema_ctx_.storage_schema_; - ObStorageSchema tmp_storage_schema; + ObTablet *old_tablet = ctx.tablet_handle_.get_obj(); + const ObMergeType merge_type = ctx.param_.merge_type_; + if (OB_UNLIKELY(!ctx.is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("unexpected error of merge ctx", K(ctx)); - } else if (ctx.param_.is_major_merge() - && ctx.get_merge_schema()->get_schema_version() > ctx.schema_ctx_.storage_schema_->get_schema_version()) { - if (OB_FAIL(tmp_storage_schema.init( - ctx.allocator_, - *static_cast(ctx.get_merge_schema()), - ctx.schema_ctx_.storage_schema_->get_compat_mode()))) { - LOG_WARN("failed to init storage schema", K(ret), KPC(ctx.get_merge_schema())); - } else { - update_storage_schema = &tmp_storage_schema; + } else if (is_major_merge_type(merge_type) + && update_storage_schema->schema_version_ > old_tablet->get_storage_schema().schema_version_) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("schema in major can't have larger schema version than tablet", K(ret), + KPC(update_storage_schema), K(old_tablet->get_storage_schema())); + } else if (is_mini_merge(merge_type) && !ctx.param_.tablet_id_.is_special_merge_tablet()) { + // if only one medium compaction info need store, just use ObUpdateTableStoreParam + // OR need to read from inner table to decide what need to keep after release memtable + if (OB_FAIL(ctx.get_medium_compaction_info_to_store())) { + LOG_WARN("failed to get medium compaction info", K(ret), K(ctx)); } } if (OB_SUCC(ret)) { - SCN clog_checkpoint_scn = ctx.param_.is_mini_merge() ? ctx.merged_table_handle_.get_table()->get_end_scn() : SCN::min_scn(); + SCN clog_checkpoint_scn = is_mini_merge(merge_type) ? ctx.merged_table_handle_.get_table()->get_end_scn() : SCN::min_scn(); + // means finish current major/medium compaction ObUpdateTableStoreParam param(ctx.merged_table_handle_, ctx.sstable_version_range_.snapshot_version_, ctx.sstable_version_range_.multi_version_start_, - update_storage_schema, + ctx.schema_ctx_.storage_schema_, ctx.rebuild_seq_, - ctx.param_.is_major_merge(), + is_major_merge_type(merge_type)/*need_report*/, clog_checkpoint_scn, - ctx.param_.is_mini_minor_merge()); + is_minor_merge(ctx.param_.merge_type_)/*need_check_sstable*/, + false/*allow_duplicate_sstable*/, + &ctx.merge_list_); ObTablet *old_tablet = ctx.tablet_handle_.get_obj(); ObTabletHandle new_tablet_handle; if (ctx.param_.tablet_id_.is_special_merge_tablet()) { param.multi_version_start_ = 1; } // for mini merge, read all msd from frozen memtable - if (ctx.param_.is_mini_merge() && OB_FAIL(read_msd_from_memtable(ctx, param))) { + if (is_mini_merge(merge_type) && OB_FAIL(read_msd_from_memtable(ctx, param))) { LOG_WARN("failed to read msd from memtable", K(ret), K(ctx)); } else if (OB_FAIL(ctx.ls_handle_.get_ls()->update_tablet_table_store( ctx.param_.tablet_id_, param, new_tablet_handle))) { LOG_WARN("failed to update tablet table store", K(ret), K(param)); } else if (FALSE_IT(ctx.time_guard_.click(ObCompactionTimeGuard::UPDATE_TABLET))) { - } else if (ctx.param_.is_mini_merge()) { + } else if (is_mini_merge(merge_type)) { if (OB_FAIL(new_tablet_handle.get_obj()->release_memtables(ctx.scn_range_.end_scn_))) { LOG_WARN("failed to release memtable", K(ret), "end_scn", ctx.scn_range_.end_scn_); } else { ctx.time_guard_.click(ObCompactionTimeGuard::RELEASE_MEMTABLE); } } + + // get info from inner table and save medium info // try schedule minor or major merge after mini - if (OB_SUCC(ret) && ctx.param_.is_mini_merge() && new_tablet_handle.is_valid()) { + if (OB_SUCC(ret) && is_mini_merge(merge_type) && new_tablet_handle.is_valid()) { int tmp_ret = OB_SUCCESS; if (!ctx.param_.tablet_id_.is_special_merge_tablet()) { if (OB_TMP_FAIL(try_schedule_compaction_after_mini(ctx, new_tablet_handle))) { LOG_WARN("failed to schedule compaction after mini", K(tmp_ret), "ls_id", ctx.param_.ls_id_, "tablet_id", ctx.param_.tablet_id_); } - } else if (OB_TMP_FAIL(ObTenantTabletScheduler::schedule_tx_table_merge( - ctx.param_.ls_id_, - *new_tablet_handle.get_obj()))) { + } else if (OB_TMP_FAIL(ObTenantTabletScheduler::schedule_tablet_minor_merge( + ctx.ls_handle_, + new_tablet_handle))) { if (OB_SIZE_OVERFLOW != tmp_ret) { LOG_WARN("failed to schedule special tablet minor merge", K(tmp_ret), "ls_id", ctx.param_.ls_id_, "tablet_id", ctx.param_.tablet_id_); @@ -1021,6 +1169,47 @@ int ObTabletMergeFinishTask::add_sstable_for_merge(ObTabletMergeCtx &ctx) return ret; } +int ObTabletMergeFinishTask::try_report_tablet_stat_after_mini(ObTabletMergeCtx &ctx) +{ + int ret = OB_SUCCESS; + const ObTabletID &tablet_id = ctx.param_.tablet_id_; + ObQueryFlag query_flag(ObQueryFlag::Forward, + true, /*is daily merge scan*/ + true, /*is read multiple macro block*/ + true, /*sys task scan, read one macro block in single io*/ + false, /*full row scan flag, obsoleted*/ + false, /*index back*/ + false); /*query_stat*/ + ObTableEstimateBaseInput base_input(query_flag, + tablet_id.id(), + ctx.tables_handle_.get_tables(), + ctx.tablet_handle_); + ObDatumRange whole_range; + whole_range.set_whole_range(); + ObSEArray ranges; + ObPartitionEst part_estimate; + ObSEArray records; + + if (OB_FAIL(ranges.push_back(whole_range))) { + LOG_WARN("failed to add ranges", K(ret), K(ranges), K(whole_range)); + } else if (OB_FAIL(ObTableEstimator::estimate_row_count_for_scan( + base_input, ranges, part_estimate, records))) { + LOG_WARN("failed to estimate row counts", K(ret), K(part_estimate), K(records)); + } else if (0 == part_estimate.logical_row_count_ && 0 == part_estimate.physical_row_count_) { + } else { + ObTabletStat report_stat; + report_stat.ls_id_ = ctx.param_.ls_id_.id(), + report_stat.tablet_id_ = ctx.param_.tablet_id_.id(); + report_stat.merge_cnt_ = 1; + report_stat.merge_logical_row_cnt_ = part_estimate.logical_row_count_; + report_stat.merge_physical_row_cnt_ = part_estimate.physical_row_count_; + if (OB_FAIL(MTL(ObTenantTabletStatMgr *)->report_stat(report_stat))) { + STORAGE_LOG(WARN, "failed to report tablet stat", K(ret)); + } + } + return ret; +} + int ObTabletMergeFinishTask::read_msd_from_memtable(ObTabletMergeCtx &ctx, ObUpdateTableStoreParam ¶m) { int ret = OB_SUCCESS; @@ -1089,36 +1278,25 @@ int ObTabletMergeFinishTask::try_schedule_compaction_after_mini( ObTabletHandle &tablet_handle) { int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; const ObTabletID &tablet_id = ctx.param_.tablet_id_; ObLSID ls_id = ctx.param_.ls_id_; - // schedule minor merge - if (OB_FAIL(ObTenantTabletScheduler::schedule_tablet_minor_merge(ls_id, *tablet_handle.get_obj()))) { - if (OB_SIZE_OVERFLOW != ret) { - LOG_WARN("failed to schedule minor merge", K(ret), K(ls_id), K(tablet_id)); - } - } - // schedule major merge - int64_t schedule_version = MTL(ObTenantTabletScheduler*)->get_frozen_version(); - if (ctx.schedule_major_ && MTL(ObTenantTabletScheduler*)->could_major_merge_start()) { - bool unused_tablet_merge_finish = false; - ObTenantTabletScheduler::ObScheduleStatistics unused_schedule_stats; - // fix issue 44407360: disable tablet force freeze in this call. - if (OB_FAIL(ObTenantTabletScheduler::schedule_tablet_major_merge( - schedule_version, - *ctx.ls_handle_.get_ls(), - *tablet_handle.get_obj(), - unused_tablet_merge_finish, - unused_schedule_stats, - false /*enable_force_freeze*/))) { - if (OB_SIZE_OVERFLOW != ret) { - LOG_WARN("failed to schedule tablet major merge", K(ret), K(schedule_version), K(ls_id), K(tablet_id)); - } - } - } + // report tablet stat + if (0 == ctx.get_merge_info().get_sstable_merge_info().macro_block_count_) { + // empty mini compaction, no need to reprot stat + } else if (OB_TMP_FAIL(try_report_tablet_stat_after_mini(ctx))) { + LOG_WARN("failed to report table stat after mini compaction", K(tmp_ret), K(ls_id), K(tablet_id)); + } + if (OB_TMP_FAIL(ObMediumCompactionScheduleFunc::schedule_tablet_medium_merge( + *ctx.ls_handle_.get_ls(), + *tablet_handle.get_obj()))) { + if (OB_SIZE_OVERFLOW != tmp_ret && OB_EAGAIN != tmp_ret) { + LOG_WARN("failed to schedule tablet medium merge", K(tmp_ret)); + } + } return ret; } - /* * ----------------------------------------------ObTabletMergeTask-------------------------------------------------- */ @@ -1152,7 +1330,7 @@ int ObTabletMergeTask::init(const int64_t idx, ObTabletMergeCtx &ctx) LOG_WARN("argument is invalid", K(ret), K(idx), K(ctx)); } else { void *buf = nullptr; - if (ctx.param_.is_major_merge() || ctx.param_.is_buf_minor_merge()) { + if (is_major_merge_type(ctx.param_.merge_type_) || is_meta_major_merge(ctx.param_.merge_type_)) { if (OB_ISNULL(buf = allocator_.alloc(sizeof(ObPartitionMajorMerger)))) { ret = OB_ALLOCATE_MEMORY_FAILED; STORAGE_LOG(WARN, "failed to alloc memory for major merger", K(ret)); @@ -1209,13 +1387,23 @@ int ObTabletMergeTask::process() ObTenantStatEstGuard stat_est_guard(MTL_ID()); ObTaskController::get().switch_task(share::ObTaskType::DATA_MAINTAIN); +#ifdef ERRSIM + ret = E(EventTable::EN_COMPACTION_MERGE_TASK) OB_SUCCESS; + if (OB_FAIL(ret)) { + STORAGE_LOG(INFO, "ERRSIM EN_COMPACTION_MERGE_TASK"); + return ret; + } +#endif + + DEBUG_SYNC(MERGE_TASK_PROCESS); + if (IS_NOT_INIT) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "ObTabletMergeTask is not inited", K(ret)); } else if (OB_ISNULL(ctx_)) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "Unexpected null merge ctx", K(ret)); - } else if (OB_UNLIKELY(ctx_->param_.is_major_merge() + } else if (OB_UNLIKELY(is_major_merge_type(ctx_->param_.merge_type_) && !MTL(ObTenantTabletScheduler *)->could_major_merge_start())) { ret = OB_CANCELED; LOG_INFO("Merge has been paused", K(ret)); diff --git a/src/storage/compaction/ob_tablet_merge_task.h b/src/storage/compaction/ob_tablet_merge_task.h index a818a56b72..f27bf81cca 100644 --- a/src/storage/compaction/ob_tablet_merge_task.h +++ b/src/storage/compaction/ob_tablet_merge_task.h @@ -14,25 +14,19 @@ #define STORAGE_COMPACTION_OB_TABLET_MERGE_TASK_H_ #include "share/scheduler/ob_dag_scheduler.h" -#include "storage/ob_i_store.h" #include "storage/ob_i_table.h" #include "observer/report/ob_i_meta_report.h" #include "storage/blocksstable/ob_datum_range.h" #include "storage/tx_storage/ob_ls_handle.h" +#include "storage/compaction/ob_i_compaction_filter.h" +#include "storage/compaction/ob_compaction_util.h" +#include "storage/ob_storage_struct.h" namespace oceanbase { -namespace share -{ -namespace schema -{ -class ObMergeSchema; -} -} namespace storage { class ObITable; -struct ObGetMergeTablesResult; class ObTablet; class ObTabletHandle; struct ObUpdateTableStoreParam; @@ -62,7 +56,6 @@ struct ObMergeParameter { ObMergeParameter(); ~ObMergeParameter() { reset(); } bool is_valid() const; - bool is_schema_valid() const; void reset(); int init(ObTabletMergeCtx &merge_ctx, const int64_t idx); @@ -73,8 +66,7 @@ struct ObMergeParameter { storage::ObTablesHandleArray *tables_handle_; ObMergeType merge_type_; ObMergeLevel merge_level_; - const share::schema::ObTableSchema *table_schema_; //table's schema need merge - const share::schema::ObMergeSchema *merge_schema_; + const ObStorageSchema *merge_schema_; blocksstable::ObDatumRange merge_range_; int16_t sstable_logic_seq_; ObVersionRange version_range_; @@ -82,13 +74,8 @@ struct ObMergeParameter { const ObTableReadInfo *full_read_info_; // full read info of old tablet bool is_full_merge_; // full merge or increment merge, duplicated with merge_level - OB_INLINE bool is_major_merge() const { return storage::is_major_merge(merge_type_); } - OB_INLINE bool is_buf_minor_merge() const { return storage::is_buf_minor_merge(merge_type_);} - OB_INLINE bool is_multi_version_minor_merge() const { return storage::is_multi_version_minor_merge(merge_type_); } - OB_INLINE bool is_mini_merge() const { return storage::is_mini_merge(merge_type_); } - OB_INLINE bool need_checksum() const { return storage::is_major_merge(merge_type_); } - TO_STRING_KV(KPC_(tables_handle), K_(merge_type), K_(merge_level), KP_(table_schema), - KP_(merge_schema), K_(merge_range), K_(version_range), K_(scn_range), K_(is_full_merge)); + TO_STRING_KV(KPC_(tables_handle), K_(merge_type), K_(merge_level), KP_(merge_schema), + K_(merge_range), K_(version_range), K_(scn_range), K_(is_full_merge)); private: DISALLOW_COPY_AND_ASSIGN(ObMergeParameter); }; @@ -96,24 +83,25 @@ private: struct ObTabletMergeDagParam : public share::ObIDagInitParam { ObTabletMergeDagParam(); + ObTabletMergeDagParam( + const storage::ObMergeType merge_type, + const share::ObLSID &ls_id, + const ObTabletID &tablet_id); virtual bool is_valid() const override; + storage::ObMergeType get_merge_type() const + { + return is_tenant_major_merge_ ? MAJOR_MERGE : merge_type_; + } - OB_INLINE bool is_major_merge() const { return storage::is_major_merge(merge_type_);} - OB_INLINE bool is_history_mini_minor_merge() const { return storage::is_history_mini_minor_merge(merge_type_);} - OB_INLINE bool is_mini_merge() const { return storage::is_mini_merge(merge_type_);} - OB_INLINE bool is_multi_version_minor_merge() const { return storage::is_multi_version_minor_merge(merge_type_); } - OB_INLINE bool is_buf_minor_merge() const { return storage::is_buf_minor_merge(merge_type_); } - OB_INLINE bool is_memtable_merge() const { return MINI_MERGE == merge_type_; } - OB_INLINE bool is_mini_minor_merge() const { return storage::is_mini_minor_merge(merge_type_); } - OB_INLINE bool is_minor_merge() const { return MINI_MINOR_MERGE == merge_type_ || MINOR_MERGE == merge_type_; } - TO_STRING_KV("merge_type",merge_type_to_str(merge_type_), K_(merge_version), K_(ls_id), K_(tablet_id), KP(report_), K_(for_diagnose)); + TO_STRING_KV("merge_type",merge_type_to_str(merge_type_), K_(merge_version), K_(ls_id), K_(tablet_id), KP(report_), K_(for_diagnose), K_(is_tenant_major_merge)); + bool for_diagnose_; + bool is_tenant_major_merge_; storage::ObMergeType merge_type_; int64_t merge_version_; share::ObLSID ls_id_; ObTabletID tablet_id_; observer::ObIMetaReport *report_; - bool for_diagnose_; }; class ObTabletMergePrepareTask: public share::ObITask @@ -124,10 +112,10 @@ public: int init(); protected: virtual int process() override; - int generate_merge_task(); private: - int prepare_index_tree(); - int build_merge_ctx(bool &skip_rest_operation); + int build_merge_ctx(bool &skip_merge_task_flag); + virtual int check_before_init() { return OB_SUCCESS; } + virtual int inner_init_ctx(ObTabletMergeCtx &ctx, bool &skip_merge_task_flag) = 0; protected: bool is_inited_; @@ -136,6 +124,30 @@ private: DISALLOW_COPY_AND_ASSIGN(ObTabletMergePrepareTask); }; +class ObTabletMajorPrepareTask: public ObTabletMergePrepareTask +{ +public: + ObTabletMajorPrepareTask() {} + virtual ~ObTabletMajorPrepareTask() {} +private: + virtual int check_before_init() override; + virtual int inner_init_ctx(ObTabletMergeCtx &ctx, bool &skip_merge_task_flag) override; + int create_sstable_directly(); +private: + DISALLOW_COPY_AND_ASSIGN(ObTabletMajorPrepareTask); +}; + +class ObTabletMiniPrepareTask: public ObTabletMergePrepareTask +{ +public: + ObTabletMiniPrepareTask() {} + virtual ~ObTabletMiniPrepareTask() {} +private: + virtual int inner_init_ctx(ObTabletMergeCtx &ctx, bool &skip_merge_task_flag) override; +private: + DISALLOW_COPY_AND_ASSIGN(ObTabletMiniPrepareTask); +}; + class ObTabletMergeFinishTask: public share::ObITask { public: @@ -152,6 +164,7 @@ private: int get_merged_sstable(ObTabletMergeCtx &ctx, blocksstable::ObSSTable *&sstable); int add_sstable_for_merge(ObTabletMergeCtx &ctx); int try_schedule_compaction_after_mini(ObTabletMergeCtx &ctx, storage::ObTabletHandle &tablet_handle); + int try_report_tablet_stat_after_mini(ObTabletMergeCtx &ctx); int read_msd_from_memtable(ObTabletMergeCtx &ctx, storage::ObUpdateTableStoreParam ¶m); int traverse_all_memtables(ObTabletMergeCtx &ctx, memtable::ObIMultiSourceDataUnit *msd, const memtable::MultiSourceDataUnitType &type); private: @@ -168,8 +181,11 @@ public: ls_id_(), tablet_id_() {} + virtual ~ObMergeDagHash() {} + + virtual int64_t inner_hash() const; + bool belong_to_same_tablet(const ObMergeDagHash *other) const; - int64_t inner_hash() const; TO_STRING_KV(K_(merge_type), K_(ls_id), K_(tablet_id)); ObMergeType merge_type_; @@ -201,7 +217,13 @@ public: virtual int64_t to_string(char* buf, const int64_t buf_len) const override; virtual lib::Worker::CompatMode get_compat_mode() const override { return compat_mode_; } + static int generate_merge_task( + ObBasicTabletMergeDag &merge_dag, + ObTabletMergeCtx &ctx, + share::ObITask *prepare_task = nullptr); + protected: + int alloc_merge_ctx(); int inner_init(const ObTabletMergeDagParam ¶m); bool is_inited_; @@ -217,7 +239,8 @@ class ObTabletMergeDag : public ObBasicTabletMergeDag public: ObTabletMergeDag(const share::ObDagType::ObDagTypeEnum type); virtual ~ObTabletMergeDag() {} - virtual int create_first_task() override; + template + int create_first_task(); virtual int gene_compaction_info(compaction::ObTabletCompactionProgress &progress) override; virtual int diagnose_compaction_info(compaction::ObDiagnoseTabletCompProgress &progress) override; @@ -228,6 +251,10 @@ class ObTabletMajorMergeDag: public ObTabletMergeDag public: ObTabletMajorMergeDag(); virtual ~ObTabletMajorMergeDag(); + virtual int create_first_task() override + { + return ObTabletMergeDag::create_first_task(); + } virtual int init_by_param(const share::ObIDagInitParam *param) override; private: DISALLOW_COPY_AND_ASSIGN(ObTabletMajorMergeDag); @@ -238,20 +265,75 @@ class ObTabletMiniMergeDag: public ObTabletMergeDag public: ObTabletMiniMergeDag(); virtual ~ObTabletMiniMergeDag(); + virtual int create_first_task() override + { + return ObTabletMergeDag::create_first_task(); + } virtual int init_by_param(const share::ObIDagInitParam *param) override; private: DISALLOW_COPY_AND_ASSIGN(ObTabletMiniMergeDag); }; -class ObTabletMinorMergeDag: public ObTabletMergeDag +class ObTabletMergeExecutePrepareTask: public share::ObITask { public: - ObTabletMinorMergeDag(); - virtual ~ObTabletMinorMergeDag(); - virtual int init_by_param(const share::ObIDagInitParam *param) override; + ObTabletMergeExecutePrepareTask(); + virtual ~ObTabletMergeExecutePrepareTask(); + int init(const ObGetMergeTablesResult &result, ObTabletMergeCtx &ctx); + virtual int process() override; +protected: + virtual int prepare_compaction_filter() { return OB_SUCCESS; } + + bool is_inited_; + ObTabletMergeCtx *ctx_; + ObGetMergeTablesResult result_; +}; + +// for minor merge +class ObTxTableMergeExecutePrepareTask : public ObTabletMergeExecutePrepareTask +{ +protected: + virtual int prepare_compaction_filter() override; +}; + +class ObTabletMergeExecuteDag: public ObTabletMergeDag +{ +public: + ObTabletMergeExecuteDag(); + virtual ~ObTabletMergeExecuteDag(); + virtual int init_by_param(const share::ObIDagInitParam *param) override; // for diagnose + int direct_init_ctx( + const ObTabletMergeDagParam ¶m, + const lib::Worker::CompatMode compat_mode, + const ObGetMergeTablesResult &result, + storage::ObLSHandle &ls_handle, + ObTabletHandle &tablet_handle); + template + int create_first_task(const ObGetMergeTablesResult &result); virtual bool operator == (const ObIDag &other) const override; + const share::ObScnRange& get_merge_range() const { return merge_scn_range_; } + + INHERIT_TO_STRING_KV("ObBasicTabletMergeDag", ObBasicTabletMergeDag, K_(merge_scn_range)); private: - DISALLOW_COPY_AND_ASSIGN(ObTabletMinorMergeDag); + int prepare_compaction(const ObGetMergeTablesResult &result); + virtual int prepare_compaction_filter() { return OB_SUCCESS; } + virtual int create_first_task(const ObGetMergeTablesResult &result); + DISALLOW_COPY_AND_ASSIGN(ObTabletMergeExecuteDag); + + share::ObScnRange merge_scn_range_; +}; + +class ObTxTableMinorExecuteDag: public ObTabletMergeExecuteDag +{ +public: + ObTxTableMinorExecuteDag() + : compaction_filter_() + {} + virtual ~ObTxTableMinorExecuteDag() = default; +private: + virtual int create_first_task(const ObGetMergeTablesResult &result) override; + DISALLOW_COPY_AND_ASSIGN(ObTxTableMinorExecuteDag); + ObTransStatusFilter compaction_filter_; }; class ObTabletMergeTask: public share::ObITask diff --git a/src/storage/compaction/ob_tenant_freeze_info_mgr.cpp b/src/storage/compaction/ob_tenant_freeze_info_mgr.cpp index 6232264158..c7ae746e33 100644 --- a/src/storage/compaction/ob_tenant_freeze_info_mgr.cpp +++ b/src/storage/compaction/ob_tenant_freeze_info_mgr.cpp @@ -34,6 +34,8 @@ #include "share/ob_zone_merge_table_operator.h" #include "storage/compaction/ob_server_compaction_event_history.h" #include "storage/compaction/ob_tenant_tablet_scheduler.h" +#include "storage/tx_storage/ob_ls_map.h" +#include "storage/tx_storage/ob_ls_service.h" namespace oceanbase { @@ -49,6 +51,7 @@ namespace storage ObTenantFreezeInfoMgr::ObTenantFreezeInfoMgr() : reload_task_(*this), + update_reserved_snapshot_task_(*this), info_list_(), snapshots_(), lock_(), @@ -109,7 +112,9 @@ int ObTenantFreezeInfoMgr::start() ret = OB_NOT_INIT; STORAGE_LOG(WARN, "not init", K(ret)); } else if (OB_FAIL(TG_SCHEDULE(tg_id_, reload_task_, RELOAD_INTERVAL, true))) { - STORAGE_LOG(ERROR, "fail to schedule task", K(ret)); + STORAGE_LOG(ERROR, "fail to schedule reload task", K(ret)); + } else if (OB_FAIL(TG_SCHEDULE(tg_id_, update_reserved_snapshot_task_, UPDATE_LS_RESERVED_SNAPSHOT_INTERVAL, true))) { + STORAGE_LOG(ERROR, "fail to schedule update reserved snapshot task", K(ret)); } return ret; @@ -199,7 +204,7 @@ int ObTenantFreezeInfoMgr::get_freeze_info_behind_major_snapshot( int64_t ret_pos = find_pos_in_list_(major_snapshot_version, info_list); if (ret_pos < 0 || ret_pos >= info_list.count()) { ret = OB_ENTRY_NOT_EXIST; - STORAGE_LOG(WARN, "Freeze info of specified major version not found", K(ret), K(major_snapshot_version)); + STORAGE_LOG(DEBUG, "Freeze info of specified major version not found", K(ret), K(major_snapshot_version)); } else { for (int64_t i = ret_pos; OB_SUCC(ret) && i < info_list.count(); i++) { if (OB_FAIL(freeze_infos.push_back(info_list.at(i)))) { @@ -288,7 +293,7 @@ int ObTenantFreezeInfoMgr::get_freeze_info_behind_snapshot_version_( STORAGE_LOG(WARN, "not init", K(ret)); } else if (info_list.empty()) { ret = OB_ENTRY_NOT_EXIST; - LOG_WARN("no freeze info in curr info_list", K(ret), K(cur_idx_), K(info_list_[0]), K(info_list_[1])); + LOG_INFO("no freeze info in curr info_list", K(ret), K(cur_idx_), K(info_list_[0]), K(info_list_[1])); } else { bool found = false; for (int64_t i = 0; OB_SUCC(ret) && !found && i < info_list.count(); ++i) { @@ -319,15 +324,16 @@ int ObTenantFreezeInfoMgr::inner_get_neighbour_major_freeze( info.reset(); ObIArray &info_list = info_list_[cur_idx_]; - + bool found = false; if (OB_UNLIKELY(!inited_)) { ret = OB_NOT_INIT; STORAGE_LOG(WARN, "not init", K(ret)); } else if (info_list.empty()) { ret = OB_ENTRY_NOT_EXIST; LOG_WARN("no freeze info in curr info_list", K(ret), K(cur_idx_), K(info_list_[0]), K(info_list_[1])); + } else if (snapshot_version >= info_list.at(info_list.count() - 1).freeze_version) { + // use found = false setting } else { - bool found = false; for (int64_t i = 0; i < info_list.count() && OB_SUCC(ret) && !found; ++i) { FreezeInfo &next_info = info_list.at(i); if (snapshot_version < next_info.freeze_version) { @@ -342,13 +348,11 @@ int ObTenantFreezeInfoMgr::inner_get_neighbour_major_freeze( } } } - - if (OB_SUCC(ret) && !found) { - info.next.freeze_version = INT64_MAX; - info.prev = info_list.at(info_list.count() - 1); - } } - + if (OB_SUCC(ret) && !found) { + info.next.freeze_version = INT64_MAX; + info.prev = info_list.at(info_list.count() - 1); + } return ret; } @@ -425,6 +429,7 @@ int ObTenantFreezeInfoMgr::get_min_reserved_snapshot( FreezeInfo freeze_info; int64_t duration = 0; bool unused = false; + snapshot_version = 0; RLockGuard lock_guard(lock_); ObIArray &snapshots = snapshots_[cur_idx_]; @@ -445,7 +450,7 @@ int ObTenantFreezeInfoMgr::get_min_reserved_snapshot( ret = OB_SUCCESS; } } - snapshot_version = std::max(0L, snapshot_gc_ts_ - duration * 1000L * 1000L *1000L); + snapshot_version = std::max(0L, snapshot_gc_ts_ - duration * 1000L * 1000L * 1000L); snapshot_version = std::min(snapshot_version, freeze_info.freeze_version); for (int64_t i = 0; i < snapshots.count() && OB_SUCC(ret); ++i) { bool related = false; @@ -456,6 +461,8 @@ int ObTenantFreezeInfoMgr::get_min_reserved_snapshot( snapshot_version = std::min(snapshot_version, snapshot.snapshot_scn_.get_val_for_tx()); } } + LOG_DEBUG("get_min_reserved_snapshot", K(ret), K(duration), K(snapshot_version), K(freeze_info), + K(snapshot_gc_ts_)); } return ret; } @@ -624,8 +631,6 @@ int ObTenantFreezeInfoMgr::update_next_info_list(const ObIArray &inf if (OB_SUCC(ret)) { if (OB_FAIL(next_info_list.push_back(next))) { STORAGE_LOG(WARN, "failed to push back freeze info", K(ret)); - } else { - STORAGE_LOG(INFO, "update info", "freeze info", next); } } } @@ -643,8 +648,6 @@ int ObTenantFreezeInfoMgr::update_next_snapshots(const ObIArray for (int64_t i = 0; OB_SUCC(ret) && i < snapshots.count(); ++i) { if (OB_FAIL(next_snapshots.push_back(snapshots.at(i)))) { STORAGE_LOG(WARN, "fail to push back snapshot", K(ret)); - } else { - STORAGE_LOG(INFO, "update info", "snapshot", snapshots.at(i)); } } return ret; @@ -808,6 +811,7 @@ int ObTenantFreezeInfoMgr::ReloadTask::refresh_merge_info() LOG_WARN("fail to load zone merge info", KR(ret), K(zone_merge_info)); } else { ObTenantTabletScheduler *scheduler = MTL(ObTenantTabletScheduler *); + scheduler->set_inner_table_merged_scn(global_merge_info.last_merged_scn_.get_scn().get_val_for_tx()); // set merged version if (global_merge_info.suspend_merging_.get_value()) { // suspend_merge scheduler->stop_major_merge(); LOG_INFO("schedule zone to stop major merge", K(tenant_id), K(zone_merge_info), K(global_merge_info)); @@ -846,7 +850,7 @@ int ObTenantFreezeInfoMgr::ReloadTask::try_update_info() // snapshot_gc_ts should be obtained before freeze_info and snapshots ObSEArray freeze_info; ObSEArray snapshots; - bool changed = false; + bool gc_snapshot_ts_changed = false; observer::ObService *ob_service = GCTX.ob_service_; int64_t min_major_snapshot = INT64_MAX; @@ -865,11 +869,11 @@ int ObTenantFreezeInfoMgr::ReloadTask::try_update_info() freeze_info, snapshots, min_major_snapshot, - changed))) { + gc_snapshot_ts_changed))) { STORAGE_LOG(WARN, "update info failed", K(ret), K(snapshot_gc_ts), K(freeze_info), K(snapshots)); } else { - if (changed || ob_service->is_heartbeat_expired()) { + if (gc_snapshot_ts_changed || ob_service->is_heartbeat_expired()) { last_change_ts_ = ObTimeUtility::current_time(); } else { const int64_t last_not_change_interval_us = ObTimeUtility::current_time() - last_change_ts_; @@ -900,12 +904,65 @@ void ObTenantFreezeInfoMgr::ReloadTask::runTimerTask() if (OB_TMP_FAIL(refresh_merge_info())) { LOG_WARN("fail to refresh merge info", KR(tmp_ret)); } - - tmp_ret = OB_SUCCESS; if (OB_TMP_FAIL(try_update_info())) { LOG_WARN("fail to try update info", KR(tmp_ret)); } } +void ObTenantFreezeInfoMgr::UpdateLSResvSnapshotTask::runTimerTask() +{ + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(mgr_.try_update_reserved_snapshot())) { + LOG_WARN("fail to try reserved snapshot", KR(tmp_ret)); + } +} + +int ObTenantFreezeInfoMgr::try_update_reserved_snapshot() +{ + int ret = OB_SUCCESS; + int64_t duration = 0; + int64_t reserved_snapshot = 0; + int64_t cost_ts = ObTimeUtility::fast_current_time(); + { + RLockGuard lock_guard(lock_); + + if (OB_UNLIKELY(!inited_)) { + ret = OB_NOT_INIT; + STORAGE_LOG(WARN, "ObTenantFreezeInfoMgr not init", K(ret)); + } else if (OB_FAIL(get_multi_version_duration(duration))) { + STORAGE_LOG(WARN, "fail to get multi version duration", K(ret)); + } else { + reserved_snapshot = std::max(0L, snapshot_gc_ts_ - duration * 1000L * 1000L *1000L); + LOG_INFO("success to update min reserved snapshot", K(reserved_snapshot), K(duration), K(snapshot_gc_ts_)); + } + } // end of lock + + // loop all ls, try update reserved snapshot + ObSharedGuard ls_iter_guard; + ObLS *ls = nullptr; + if (OB_FAIL(ret) || reserved_snapshot <= 0) { + } else if (OB_FAIL(MTL(ObLSService *)->get_ls_iter(ls_iter_guard, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get ls iterator", K(ret)); + } else { + int tmp_ret = OB_SUCCESS; + while (OB_SUCC(ret)) { + if (OB_FAIL(ls_iter_guard.get_ptr()->get_next(ls))) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + break; + } + } else if (OB_ISNULL(ls)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is null", K(ret), KP(ls)); + } else if (OB_TMP_FAIL(ls->try_sync_reserved_snapshot(reserved_snapshot, true/*update_flag*/))) { + LOG_WARN("failed to update min reserved snapshot", K(tmp_ret), KPC(ls), K(reserved_snapshot)); + } + } // end of while + } + cost_ts = ObTimeUtility::fast_current_time() - cost_ts; + STORAGE_LOG(INFO, "update reserved snapshot finished", K(cost_ts), K(reserved_snapshot)); + return ret; +} + } // storage } // oceanbase diff --git a/src/storage/compaction/ob_tenant_freeze_info_mgr.h b/src/storage/compaction/ob_tenant_freeze_info_mgr.h index 41a84c0ccf..a7849c8154 100644 --- a/src/storage/compaction/ob_tenant_freeze_info_mgr.h +++ b/src/storage/compaction/ob_tenant_freeze_info_mgr.h @@ -89,6 +89,7 @@ public: int get_freeze_info_behind_major_snapshot(const int64_t major_snapshot, common::ObIArray &freeze_infos); int get_freeze_info_by_snapshot_version(const int64_t snapshot_version, FreezeInfo &freeze_info); + // get first freeze info larger than snapshot int get_freeze_info_behind_snapshot_version(const int64_t snapshot_version, FreezeInfo &freeze_info); int get_neighbour_major_freeze(const int64_t snapshot_version, NeighbourFreezeInfo &info); @@ -132,6 +133,7 @@ private: typedef common::RWLock::WLockGuard WLockGuard; static const int64_t RELOAD_INTERVAL = 1L * 1000L * 1000L; + static const int64_t UPDATE_LS_RESERVED_SNAPSHOT_INTERVAL = 10L * 1000L * 1000L; static const int64_t MAX_GC_SNAPSHOT_TS_REFRESH_TS = 10L * 60L * 1000L * 1000L; static const int64_t FLUSH_GC_SNAPSHOT_TS_REFRESH_TS = common::MODIFY_GC_SNAPSHOT_INTERVAL + 10L * 1000L * 1000L; @@ -153,7 +155,7 @@ private: int get_freeze_info_behind_snapshot_version_( const int64_t snapshot_version, FreezeInfo &freeze_info); - + int try_update_reserved_snapshot(); class ReloadTask : public common::ObTimerTask { public: @@ -176,8 +178,18 @@ private: int64_t last_change_ts_; }; + class UpdateLSResvSnapshotTask : public common::ObTimerTask + { + public: + UpdateLSResvSnapshotTask(ObTenantFreezeInfoMgr &mgr) : mgr_(mgr) {} + virtual void runTimerTask(); + private: + ObTenantFreezeInfoMgr &mgr_; + }; + private: ReloadTask reload_task_; + UpdateLSResvSnapshotTask update_reserved_snapshot_task_; common::ObSEArray info_list_[2]; common::ObSEArray snapshots_[2]; // snapshots_ maintains multi_version_start for index and others common::RWLock lock_; @@ -196,7 +208,9 @@ private: ret = common::OB_ERR_UNEXPECTED; \ STORAGE_LOG(ERROR, "failed to get tenant freeze info mgr from mtl", K(ret)); \ } else if (OB_FAIL(mgr->func(args))) { \ - STORAGE_LOG(WARN, "failed to execute func", K(ret)); \ + if (OB_ENTRY_NOT_EXIST != ret) { \ + STORAGE_LOG(WARN, "failed to execute func", K(ret)); \ + } \ } \ ret; \ }) diff --git a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp index daea129c8b..c7ae9c6e89 100644 --- a/src/storage/compaction/ob_tenant_tablet_scheduler.cpp +++ b/src/storage/compaction/ob_tenant_tablet_scheduler.cpp @@ -31,6 +31,10 @@ #include "ob_tenant_freeze_info_mgr.h" #include "ob_tenant_compaction_progress.h" #include "ob_server_compaction_event_history.h" +#include "storage/compaction/ob_tenant_freeze_info_mgr.h" +#include "storage/compaction/ob_medium_compaction_func.h" +#include "storage/compaction/ob_tenant_compaction_progress.h" +#include "storage/compaction/ob_server_compaction_event_history.h" #include "share/scn.h" namespace oceanbase @@ -109,18 +113,28 @@ int ObFastFreezeChecker::check_hotspot_need_fast_freeze( return ret; } - void ObTenantTabletScheduler::MergeLoopTask::runTimerTask() { int ret = OB_SUCCESS; int64_t cost_ts = ObTimeUtility::fast_current_time(); - if (OB_FAIL(MTL(ObTenantTabletScheduler *)->merge_all())) { + if (OB_FAIL(MTL(ObTenantTabletScheduler *)->schedule_all_tablets_minor())) { LOG_WARN("Fail to merge all partition", K(ret)); } cost_ts = ObTimeUtility::fast_current_time() - cost_ts; LOG_INFO("MergeLoopTask", K(cost_ts)); } +void ObTenantTabletScheduler::MediumLoopTask::runTimerTask() +{ + int ret = OB_SUCCESS; + int64_t cost_ts = ObTimeUtility::fast_current_time(); + if (OB_FAIL(MTL(ObTenantTabletScheduler *)->schedule_all_tablets_medium())) { + LOG_WARN("Fail to merge all partition", K(ret)); + } + cost_ts = ObTimeUtility::fast_current_time() - cost_ts; + LOG_INFO("MediumLoopTask", K(cost_ts)); +} + void ObTenantTabletScheduler::SSTableGCTask::runTimerTask() { int ret = OB_SUCCESS; @@ -139,14 +153,17 @@ ObTenantTabletScheduler::ObTenantTabletScheduler() major_merge_status_(true), is_stop_(true), merge_loop_tg_id_(0), + medium_loop_tg_id_(0), sstable_gc_tg_id_(0), schedule_interval_(0), bf_queue_(), frozen_version_lock_(), frozen_version_(INIT_COMPACTION_SCN), merged_version_(INIT_COMPACTION_SCN), + inner_table_merged_scn_(INIT_COMPACTION_SCN), schedule_stats_(), merge_loop_task_(), + medium_loop_task_(), sstable_gc_task_(), fast_freeze_checker_() { @@ -163,12 +180,15 @@ void ObTenantTabletScheduler::destroy() stop(); wait(); TG_DESTROY(merge_loop_tg_id_); + TG_DESTROY(medium_loop_tg_id_); TG_DESTROY(sstable_gc_tg_id_); bf_queue_.destroy(); frozen_version_ = 0; merged_version_ = 0; + inner_table_merged_scn_ = 0; schedule_stats_.reset(); merge_loop_tg_id_ = 0; + medium_loop_tg_id_ = 0; sstable_gc_tg_id_ = 0; schedule_interval_ = 0; is_inited_ = false; @@ -221,6 +241,12 @@ int ObTenantTabletScheduler::start() LOG_WARN("failed to start minor merge scan thread", K(ret)); } else if (OB_FAIL(TG_SCHEDULE(merge_loop_tg_id_, merge_loop_task_, schedule_interval_, repeat))) { LOG_WARN("Fail to schedule minor merge scan task", K(ret)); + } else if (OB_FAIL(TG_CREATE_TENANT(lib::TGDefIDs::MediumLoop, medium_loop_tg_id_))) { + LOG_WARN("failed to create medium loop thread", K(ret)); + } else if (OB_FAIL(TG_START(medium_loop_tg_id_))) { + LOG_WARN("failed to start medium merge scan thread", K(ret)); + } else if (OB_FAIL(TG_SCHEDULE(medium_loop_tg_id_, medium_loop_task_, schedule_interval_, repeat))) { + LOG_WARN("Fail to schedule medium merge scan task", K(ret)); } else if (OB_FAIL(TG_CREATE_TENANT(lib::TGDefIDs::SSTableGC, sstable_gc_tg_id_))) { LOG_WARN("failed to create merge loop thread", K(ret)); } else if (OB_FAIL(TG_START(sstable_gc_tg_id_))) { @@ -264,6 +290,7 @@ void ObTenantTabletScheduler::stop() { is_stop_ = true; TG_STOP(merge_loop_tg_id_); + TG_STOP(medium_loop_tg_id_); TG_STOP(sstable_gc_tg_id_); stop_major_merge(); } @@ -271,6 +298,7 @@ void ObTenantTabletScheduler::stop() void ObTenantTabletScheduler::wait() { TG_WAIT(merge_loop_tg_id_); + TG_WAIT(medium_loop_tg_id_); TG_WAIT(sstable_gc_tg_id_); } @@ -305,10 +333,9 @@ int ObTenantTabletScheduler::try_remove_old_table(ObLS &ls) } else if (tablet_id.is_special_merge_tablet()) { } else { int64_t multi_version_start = 0; - int64_t min_reserved_snapshot = 0; bool need_remove = false; int tmp_ret = OB_SUCCESS; - if (OB_TMP_FAIL(tablet->get_kept_multi_version_start(multi_version_start, min_reserved_snapshot))) { + if (OB_TMP_FAIL(ObTablet::get_kept_multi_version_start(ls, *tablet, multi_version_start))) { LOG_WARN("failed to get multi version start", K(tmp_ret), K(tablet_id)); } else if (OB_TMP_FAIL(tablet->check_need_remove_old_table(multi_version_start, need_remove))) { LOG_WARN("failed to check need remove old store", K(tmp_ret), K(multi_version_start), K(tablet_id)); @@ -321,7 +348,7 @@ int ObTenantTabletScheduler::try_remove_old_table(ObLS &ls) LOG_WARN("failed to update table store", K(tmp_ret), K(param), K(tenant_id), K(ls_id), K(tablet_id)); } else { FLOG_INFO("success to remove old table in table store", K(tmp_ret), K(tenant_id), K(ls_id), - K(tablet_id), K(multi_version_start), K(min_reserved_snapshot), KPC(tablet)); + K(tablet_id), K(multi_version_start), KPC(tablet)); } } } @@ -359,6 +386,46 @@ int ObTenantTabletScheduler::update_upper_trans_version_and_gc_sstable() return ret; } +int ObTenantTabletScheduler::schedule_all_tablets_minor() +{ + int ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + ObSharedGuard ls_iter_guard; + ObLS *ls = nullptr; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("The ObTenantTabletScheduler has not been inited", K(ret)); + } else if (OB_FAIL(MTL(ObLSService *)->get_ls_iter(ls_iter_guard, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get ls iterator", K(ret)); + } + + while (OB_SUCC(ret)) { + if (OB_FAIL(ls_iter_guard.get_ptr()->get_next(ls))) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + break; + } else { + LOG_WARN("failed to get ls", K(ret), KP(ls_iter_guard.get_ptr())); + } + } else if (OB_ISNULL(ls)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is null", K(ret), K(ls)); + } else { + const ObLSID &ls_id = ls->get_ls_id(); + bool need_merge = false; + if (OB_FAIL(check_ls_state(*ls, need_merge))) { + LOG_WARN("failed to check ls state", K(ret), K(ls_id)); + } else if (!need_merge) { + // no need to merge, do nothing + } else if (OB_TMP_FAIL(schedule_ls_minor_merge(*ls))) { + LOG_WARN("failed to schedule ls minor merge", K(tmp_ret), K(ls_id)); + } + } + } + return ret; +} + int ObTenantTabletScheduler::check_ls_compaction_finish(const share::ObLSID &ls_id) { int ret = OB_SUCCESS; @@ -422,18 +489,6 @@ int ObTenantTabletScheduler::schedule_load_bloomfilter(const blocksstable::Macro return ret; } -int ObTenantTabletScheduler::merge_all() -{ - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObTenantTabletScheduler has not been inited", K(ret)); - } else if (OB_FAIL(schedule_all_tablets())) { - LOG_WARN("failed to schedule all tablet major merge", K(ret)); - } - return ret; -} - int ObTenantTabletScheduler::schedule_merge(const int64_t broadcast_version) { int ret = OB_SUCCESS; @@ -491,28 +546,6 @@ bool ObTenantTabletScheduler::check_weak_read_ts_ready( return is_ready_for_compaction; } -int ObTenantTabletScheduler::check_and_freeze_for_major( - const common::ObTabletID &tablet_id, - const int64_t &merge_version, - ObLS &ls) -{ - int ret = OB_SUCCESS; - const share::ObLSID &ls_id = ls.get_ls_id(); - - // TODO: @dengzhi.ldz opt force freeze when no inc data in active memtable - if (OB_UNLIKELY(merge_version > MTL(ObTenantTabletScheduler *)->get_frozen_version())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("get invalid arguments", K(ret), K(merge_version)); - } else if (OB_FAIL(MTL(ObTenantFreezer *)->tablet_freeze(tablet_id, true/*force_freeze*/))) { - LOG_WARN("failed to force freeze tablet", K(ret), K(ls_id), K(tablet_id)); - } else { - LOG_DEBUG("succeed to freeze tablet before merge", K(ret), K(ls_id), K(tablet_id), - K(merge_version)); - } - - return ret; -} - void ObTenantTabletScheduler::stop_major_merge() { if (major_merge_status_) { @@ -568,7 +601,8 @@ int ObTenantTabletScheduler::schedule_merge_dag( const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, const ObMergeType merge_type, - const int64_t &merge_snapshot_version) + const int64_t &merge_snapshot_version, + const bool is_tenant_major_merge) { int ret = OB_SUCCESS; ObTabletMergeDagParam param; @@ -576,6 +610,7 @@ int ObTenantTabletScheduler::schedule_merge_dag( param.tablet_id_ = tablet_id; param.merge_type_ = merge_type; param.merge_version_ = merge_snapshot_version; + param.is_tenant_major_merge_ = is_tenant_major_merge; if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_tablet_merge_dag(param))) { if (OB_EAGAIN != ret && OB_SIZE_OVERFLOW != ret) { LOG_WARN("failed to schedule tablet merge dag", K(ret)); @@ -584,120 +619,222 @@ int ObTenantTabletScheduler::schedule_merge_dag( return ret; } -int ObTenantTabletScheduler::schedule_tx_table_merge( - const ObLSID &ls_id, - ObTablet &tablet) +int ObTenantTabletScheduler::schedule_tablet_meta_major_merge( + ObLSHandle &ls_handle, + ObTabletHandle &tablet_handle) { int ret = OB_SUCCESS; - int tmp_ret = OB_SUCCESS; - ObTabletTableStore &table_store = tablet.get_table_store(); - bool need_merge = false; - if (OB_FAIL(ObPartitionMergePolicy::check_need_mini_minor_merge(tablet, need_merge))) { - LOG_WARN("failed to check need merge", K(ret), K(table_store)); - } else if (need_merge) { - ObTabletMergeDagParam param; - param.ls_id_ = ls_id; - param.tablet_id_ = tablet.get_tablet_meta().tablet_id_; - param.merge_type_ = MINI_MINOR_MERGE; - param.merge_version_ = ObVersionRange::MIN_VERSION; - if (OB_TMP_FAIL(compaction::ObScheduleDagFunc::schedule_tx_table_merge_dag(param))) { - if (OB_SIZE_OVERFLOW == tmp_ret) { - ret = OB_SIZE_OVERFLOW; - } else if (OB_EAGAIN != tmp_ret) { - LOG_WARN("failed to schedule tx tablet merge dag", K(tmp_ret)); - } - } - } - return ret; -} + const ObLSID &ls_id = ls_handle.get_ls()->get_ls_id(); + const ObTabletID &tablet_id = tablet_handle.get_obj()->get_tablet_meta().tablet_id_; + LOG_INFO("start try to schedule tablet meta major merge", K(ls_id), K(tablet_id), K(tablet_handle)); // tmp log, remove later -int ObTenantTabletScheduler::schedule_tablet_minor_merge(const ObLSID ls_id, ObTablet &tablet) -{ - int ret = OB_SUCCESS; - int tmp_ret = OB_SUCCESS; - const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; - for (int i = 0; OB_SUCC(ret) && i < NO_MAJOR_MERGE_TYPE_CNT; ++i) { - bool need_merge = false; - if (OB_FAIL(ObPartitionMergePolicy::check_need_minor_merge[MERGE_TYPES[i]](tablet, need_merge))) { + ObGetMergeTablesParam param; + ObGetMergeTablesResult result; + + const ObTabletTableStore &table_store = tablet_handle.get_obj()->get_table_store(); + ObITable *last_major = table_store.get_major_sstables().get_boundary_table(true/*last*/); + ObAdaptiveMergePolicy::AdaptiveMergeReason adaptive_merge_reason = ObAdaptiveMergePolicy::AdaptiveMergeReason::NONE; + int64_t max_sync_medium_scn = 0; + + if (OB_FAIL(tablet_handle.get_obj()->get_max_sync_medium_scn(max_sync_medium_scn))) { + LOG_WARN("failed to get max sync medium snapshot", K(ret), K(ls_id), K(tablet_id)); + } else if (tablet_handle.get_obj()->get_medium_compaction_info_list().size() > 0 + || nullptr == last_major + || max_sync_medium_scn > last_major->get_snapshot_version()) { + // do nothing + } else if (OB_FAIL(ObAdaptiveMergePolicy::get_adaptive_merge_reason(*tablet_handle.get_obj(), adaptive_merge_reason))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get meta merge priority", K(ret), K(tablet_id)); + } else { + ret = OB_SUCCESS; + } + } else if (ObAdaptiveMergePolicy::is_valid_merge_reason(adaptive_merge_reason)) { + LOG_INFO("start schedule meta merge", K(*tablet_handle.get_obj())); // tmp log, remove later + param.merge_type_ = META_MAJOR_MERGE; + if (OB_FAIL(ObAdaptiveMergePolicy::get_meta_merge_tables( + param, + *ls_handle.get_ls(), + *tablet_handle.get_obj(), + result))) { if (OB_NO_NEED_MERGE == ret) { ret = OB_SUCCESS; - LOG_DEBUG("tablet no need merge", K(ret), "merge_type", MERGE_TYPES[i], K(tablet_id), K(tablet)); + LOG_DEBUG("tablet no need meta merge", K(ret), K(param), K(tablet_id)); } else { - LOG_WARN("failed to check need merge", K(ret), "merge_type", MERGE_TYPES[i], K(tablet)); - } - } else if (need_merge && OB_TMP_FAIL(schedule_merge_dag(ls_id, tablet_id, MERGE_TYPES[i], ObVersionRange::MIN_VERSION))) { - if (OB_SIZE_OVERFLOW == tmp_ret) { - ret = OB_SIZE_OVERFLOW; - } else if (OB_EAGAIN != tmp_ret) { - ret = tmp_ret; - LOG_WARN("failed to schedule tablet merge dag", K(tmp_ret)); - } - } else if (need_merge) { - LOG_DEBUG("success to schedule tablet minor merge", K(tmp_ret), K(ls_id), K(tablet_id), "merge_type", MERGE_TYPES[i]); - } - } // end of for - return ret; -} - -int ObTenantTabletScheduler::schedule_tablet_major_merge( - int64_t &merge_version, - ObLS &ls, - ObTablet &tablet, - bool &tablet_merge_finish, - ObScheduleStatistics &schedule_stats, - const bool enable_force_freeze) -{ - int ret = OB_SUCCESS; - int tmp_ret = OB_SUCCESS; - tablet_merge_finish = false; - const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; - ObLSID ls_id = ls.get_ls_id(); - bool need_merge = false; - bool can_merge = false; - bool need_force_freeze = false; - if (!check_weak_read_ts_ready(merge_version, ls)) { - // ls weak read ts is not ready - } else if (OB_FAIL(ObPartitionMergePolicy::check_need_major_merge( - tablet, - merge_version, - need_merge, - can_merge, - need_force_freeze))) { - LOG_WARN("failed to check need major merge", K(ret), K(ls_id), K(tablet_id)); - } else if (!need_merge) { // no need merge - tablet_merge_finish = true; - schedule_stats.finish_cnt_++; - LOG_DEBUG("tablet no need to merge now", K(ret), K(ls_id), K(tablet_id), K(merge_version)); - } else if (can_merge) { - if (OB_TMP_FAIL(schedule_merge_dag(ls_id, tablet_id, MAJOR_MERGE, merge_version))) { - if (OB_SIZE_OVERFLOW == tmp_ret) { - ret = OB_SIZE_OVERFLOW; - } else if (OB_EAGAIN != tmp_ret) { - LOG_WARN("failed to schedule tablet merge dag", K(tmp_ret)); + LOG_WARN("failed to get meta merge tables", K(ret), K(param), K(tablet_id)); } } else { - schedule_stats.schedule_cnt_++; - LOG_DEBUG("success to schedule tablet major merge", K(tmp_ret), K(ls_id), K(tablet_id), - K(need_force_freeze)); - } - } else if (need_force_freeze && enable_force_freeze) { - schedule_stats.force_freeze_cnt_++; - if (OB_TMP_FAIL(check_and_freeze_for_major(tablet_id, merge_version, ls))) { - LOG_WARN("failed to check and freeze for major", K(tmp_ret), K(ls_id), K(tablet_id)); - } - } - - if (OB_SUCC(ret) && tablet_merge_finish && tablet.get_tablet_meta().report_status_.need_report()) { - if (OB_TMP_FAIL(GCTX.ob_service_->submit_tablet_update_task(MTL_ID(), ls_id, tablet_id))) { - LOG_WARN("failed to submit tablet update task to report", K(tmp_ret), K(MTL_ID()), K(tablet_id)); - } else if (OB_TMP_FAIL(ls.get_tablet_svr()->update_tablet_report_status(tablet_id))) { - LOG_WARN("failed to update tablet report status", K(tmp_ret), K(MTL_ID()), K(tablet_id)); + ObTabletMergeDagParam dag_param(META_MAJOR_MERGE, ls_id, tablet_id); + if (OB_FAIL(schedule_merge_execute_dag(dag_param, ls_handle, tablet_handle, result))) { + if (OB_SIZE_OVERFLOW != ret && OB_EAGAIN != ret) { + LOG_WARN("failed to schedule tablet meta merge dag", K(ret)); + } + } } } return ret; } -int ObTenantTabletScheduler::schedule_ls_merge( +template +int ObTenantTabletScheduler::schedule_tablet_minor_merge( + ObLSHandle &ls_handle, + ObTabletHandle &tablet_handle) +{ + int ret = OB_SUCCESS; + const ObLSID &ls_id = ls_handle.get_ls()->get_ls_id(); + const ObTabletID &tablet_id = tablet_handle.get_obj()->get_tablet_meta().tablet_id_; + const int64_t schedule_type_cnt = tablet_id.is_special_merge_tablet() ? TX_TABLE_NO_MAJOR_MERGE_TYPE_CNT : NO_MAJOR_MERGE_TYPE_CNT; + ObGetMergeTablesParam param; + ObGetMergeTablesResult result; + for (int i = 0; OB_SUCC(ret) && i < schedule_type_cnt; ++i) { + param.merge_type_ = MERGE_TYPES[i]; + if (OB_FAIL(ObPartitionMergePolicy::get_merge_tables[MERGE_TYPES[i]]( + param, + *ls_handle.get_ls(), + *tablet_handle.get_obj(), + result))) { + if (OB_NO_NEED_MERGE == ret) { + ret = OB_SUCCESS; + LOG_DEBUG("tablet no need merge", K(ret), "merge_type", MERGE_TYPES[i], K(tablet_id), K(tablet_handle)); + } else { + LOG_WARN("failed to check need merge", K(ret), "merge_type", MERGE_TYPES[i], K(tablet_handle)); + } + } else { + ObMinorExecuteRangeMgr minor_range_mgr; + MinorParallelResultArray parallel_results; + if (OB_FAIL(minor_range_mgr.get_merge_ranges(ls_id, tablet_id))) { + LOG_WARN("failed to get merge range", K(ret), K(ls_id), K(tablet_id)); + } else if (OB_FAIL(ObPartitionMergePolicy::generate_parallel_minor_interval(result, minor_range_mgr, parallel_results))) { + LOG_WARN("failed to generate parallel minor dag", K(ret), K(result)); + } else { + ObTabletMergeDagParam dag_param(MERGE_TYPES[i], ls_id, tablet_id); + for (int k = 0; OB_SUCC(ret) && k < parallel_results.count(); ++k) { + if (OB_FAIL(schedule_merge_execute_dag(dag_param, ls_handle, tablet_handle, parallel_results.at(k)))) { + LOG_WARN("failed to schedule minor execute dag", K(ret), K(k), K(parallel_results.at(k))); + } else { + LOG_INFO("success to schedule tablet minor merge", K(ret), K(ls_id), K(tablet_id), + "table_cnt", parallel_results.at(k).handle_.get_count(), + "merge_scn_range", parallel_results.at(k).scn_range_, "merge_type", MERGE_TYPES[i]); + } + } // end of for + } + } + } + return ret; +} + +template +int ObTenantTabletScheduler::schedule_merge_execute_dag( + const ObTabletMergeDagParam ¶m, + ObLSHandle &ls_handle, + ObTabletHandle &tablet_handle, + const ObGetMergeTablesResult &result) +{ + int ret = OB_SUCCESS; + T *merge_exe_dag = nullptr; + const bool emergency = tablet_handle.get_obj()->get_tablet_meta().tablet_id_.is_ls_inner_tablet(); + + if (result.handle_.get_count() > 1 + && !ObTenantTabletScheduler::check_tx_table_ready( + *ls_handle.get_ls(), + result.scn_range_.end_scn_)) { + ret = OB_EAGAIN; + LOG_INFO("tx table is not ready. waiting for max_decided_log_ts ...", KR(ret), + "merge_scn", result.scn_range_.end_scn_); + } else if (OB_FAIL(MTL(share::ObTenantDagScheduler *)->alloc_dag(merge_exe_dag))) { + LOG_WARN("failed to alloc dag", K(ret)); + } else if (OB_FAIL(merge_exe_dag->direct_init_ctx( + param, + tablet_handle.get_obj()->get_tablet_meta().compat_mode_, + result, + ls_handle, + tablet_handle))) { + LOG_WARN("failed to init dag", K(ret), K(result)); + } else if (OB_FAIL(MTL(share::ObTenantDagScheduler *)->add_dag(merge_exe_dag, emergency))) { + LOG_WARN("failed to add dag", K(ret), KPC(merge_exe_dag)); + } else { + LOG_INFO("success to scheudle tablet minor execute dag", K(ret), KP(merge_exe_dag), K(emergency)); + } + if (OB_FAIL(ret) && nullptr != merge_exe_dag) { + MTL(share::ObTenantDagScheduler *)->free_dag(*merge_exe_dag); + } + return ret; +} + +int ObTenantTabletScheduler::schedule_ls_minor_merge( + ObLS &ls) +{ + int ret = OB_SUCCESS; + ObLSTabletIterator tablet_iter(ObTabletCommon::DIRECT_GET_COMMITTED_TABLET_TIMEOUT_US); + bool need_merge = false; + bool need_fast_freeze = false; + const ObLSID &ls_id = ls.get_ls_id(); + ObLSHandle ls_handle; + if (OB_FAIL(check_ls_state(ls, need_merge))) { + LOG_WARN("failed to check ls state", K(ret), K(ls)); + } else if (!need_merge) { + // no need to merge, do nothing + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get ls", K(ret), K(ls_id)); + } else if (OB_FAIL(ls.build_tablet_iter(tablet_iter))) { + LOG_WARN("failed to build ls tablet iter", K(ret), K(ls)); + } else { + ObTabletID tablet_id; + ObTabletHandle tablet_handle; + ObTablet *tablet = nullptr; + int tmp_ret = OB_SUCCESS; + bool schedule_minor_flag = true; + while (OB_SUCC(ret) && schedule_minor_flag) { // loop all tablet in ls + bool tablet_merge_finish = false; + if (OB_FAIL(tablet_iter.get_next_tablet(tablet_handle))) { + if (OB_ITER_END == ret) { + ret = OB_SUCCESS; + break; + } else { + LOG_WARN("failed to get tablet", K(ret), K(ls_id), K(tablet_handle)); + } + } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid tablet handle", K(ret), K(ls_id), K(tablet_handle)); + } else if (FALSE_IT(tablet = tablet_handle.get_obj())) { + } else if (FALSE_IT(tablet_id = tablet->get_tablet_meta().tablet_id_)) { + } else if (tablet_id.is_special_merge_tablet()) { + // schedule minor merge for special tablet + if (tablet_id.is_mini_and_minor_merge_tablet() + && OB_TMP_FAIL(schedule_tablet_minor_merge( + ls_handle, + tablet_handle))) { + if (OB_SIZE_OVERFLOW == tmp_ret) { + schedule_minor_flag = false; + } else if (OB_EAGAIN != tmp_ret) { + LOG_WARN("failed to schedule tablet merge", K(tmp_ret), K(ls_id), K(tablet_id)); + } + } + } else { // data tablet + if (OB_TMP_FAIL(schedule_tablet_minor_merge(ls_handle, tablet_handle))) { + if (OB_SIZE_OVERFLOW == tmp_ret) { + schedule_minor_flag = false; + } else if (OB_EAGAIN != tmp_ret) { + LOG_WARN("failed to schedule tablet merge", K(tmp_ret), K(ls_id), K(tablet_id)); + } + } + if (OB_SUCC(ret)) { + need_fast_freeze = false; + if (!fast_freeze_checker_.need_check()) { + } else if (OB_TMP_FAIL(fast_freeze_checker_.check_need_fast_freeze(*tablet_handle.get_obj(), need_fast_freeze))) { + LOG_WARN("failed to check need fast freeze", K(tmp_ret), K(tablet_handle)); + } else if (need_fast_freeze) { + if (OB_TMP_FAIL(MTL(ObTenantFreezer *)->tablet_freeze(tablet_id, false/*force_freeze*/))) { + LOG_WARN("failt to freeze tablet", K(tmp_ret), K(tablet_id)); + } + } + } + } + } // end of while + } // else + return ret; +} + +int ObTenantTabletScheduler::schedule_ls_medium_merge( int64_t &merge_version, ObLS &ls, bool &ls_merge_finish, @@ -706,26 +843,48 @@ int ObTenantTabletScheduler::schedule_ls_merge( int ret = OB_SUCCESS; ObLSTabletIterator tablet_iter(ObTabletCommon::DIRECT_GET_COMMITTED_TABLET_TIMEOUT_US); bool need_merge = false; + const ObLSID &ls_id = ls.get_ls_id(); + ObLSHandle ls_handle; + ObLSRestoreStatus restore_status; if (OB_FAIL(check_ls_state(ls, need_merge))) { LOG_WARN("failed to check ls state", K(ret), K(ls)); } else if (!need_merge) { // no need to merge, do nothing + } else if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get ls", K(ret), K(ls_id)); } else if (OB_FAIL(ls.build_tablet_iter(tablet_iter))) { LOG_WARN("failed to build ls tablet iter", K(ret), K(ls)); + } else if (OB_FAIL(ls.get_ls_meta().get_restore_status(restore_status))) { + LOG_WARN("failed to get restore status", K(ret), K(ls)); + } else if (OB_UNLIKELY(!restore_status.is_restore_none())) { + if (REACH_TENANT_TIME_INTERVAL(PRINT_LOG_INVERVAL)) { + LOG_INFO("ls is in restore status, should not loop tablet to schedule", K(ret), K(ls)); + } } else { - const ObLSID &ls_id = ls.get_ls_id(); ObTabletID tablet_id; ObTabletHandle tablet_handle; + ObTablet *tablet = nullptr; int tmp_ret = OB_SUCCESS; - bool schedule_minor_flag = true; - bool schedule_major_flag = merge_version > ObVersionRange::MIN_VERSION; - bool need_fast_freeze = false; + bool is_leader = false; + bool could_major_merge = false; + const int64_t major_frozen_scn = get_frozen_version(); + if (MTL(ObTenantTabletScheduler *)->could_major_merge_start()) { + could_major_merge = true; + } else if (REACH_TENANT_TIME_INTERVAL(PRINT_LOG_INVERVAL)) { + LOG_INFO("major merge should not schedule", K(ret), K(merge_version)); + } // check weak_read_ts - bool weak_read_ts_ready = false; - if (schedule_major_flag) { - if (check_weak_read_ts_ready(merge_version, ls)) { - weak_read_ts_ready = true; + if (merge_version >= 0) { + if (check_weak_read_ts_ready(merge_version, ls)) { // weak read ts ready + ObRole role = INVALID_ROLE; + if (OB_FAIL(ObMediumCompactionScheduleFunc::get_palf_role(ls_id, role))) { + if (OB_LS_NOT_EXIST != ret) { + LOG_WARN("failed to get palf handle role", K(ret), K(ls_id)); + } + } else if (is_leader_by_election(role)) { + is_leader = true; + } } else { all_ls_weak_read_ts_ready = false; } @@ -743,84 +902,109 @@ int ObTenantTabletScheduler::schedule_ls_merge( } else if (OB_UNLIKELY(!tablet_handle.is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("invalid tablet handle", K(ret), K(ls_id), K(tablet_handle)); - } else if (FALSE_IT(tablet_id = tablet_handle.get_obj()->get_tablet_meta().tablet_id_)) { - } else if (tablet_id.is_special_merge_tablet()) { - // schedule minor merge for special tablet - if (tablet_id.is_mini_and_minor_merge_tablet() - && OB_TMP_FAIL(schedule_tx_table_merge( - ls_id, - *tablet_handle.get_obj()))) { - if (OB_SIZE_OVERFLOW == tmp_ret) { - ret = OB_SIZE_OVERFLOW; - } else { - LOG_WARN("failed to schedule tablet merge", K(tmp_ret), K(ls_id), K(tablet_handle)); - } - } - } else { // data tablet - if (schedule_minor_flag - && OB_TMP_FAIL(schedule_tablet_minor_merge(ls_id, *tablet_handle.get_obj()))) { - if (OB_SIZE_OVERFLOW == tmp_ret) { - schedule_minor_flag = false; - } else { - LOG_WARN("failed to schedule tablet merge", K(tmp_ret), K(ls_id), K(tablet_handle)); - } - } - if (schedule_major_flag && weak_read_ts_ready - && could_major_merge_start() && OB_TMP_FAIL(schedule_tablet_major_merge( - merge_version, - ls, - *tablet_handle.get_obj(), - tablet_merge_finish, - schedule_stats_))) { - ls_merge_finish = false; - if (OB_SIZE_OVERFLOW == tmp_ret) { - schedule_major_flag = false; - } else { - LOG_WARN("failed to schedule tablet merge", K(tmp_ret), K(ls_id), K(tablet_handle)); - } - } else { - ls_merge_finish &= tablet_merge_finish; - } + } else if (FALSE_IT(tablet = tablet_handle.get_obj())) { + } else if (FALSE_IT(tablet_id = tablet->get_tablet_meta().tablet_id_)) { + } else if (tablet_id.is_special_merge_tablet()) { // data tablet + // do nothing + } else { + ObMediumCompactionScheduleFunc func(ls, *tablet); + ObITable *latest_major = tablet->get_table_store().get_major_sstables().get_boundary_table(true/*last*/); + if (OB_NOT_NULL(latest_major) && latest_major->get_snapshot_version() >= merge_version) { + tablet_merge_finish = true; + schedule_stats_.finish_cnt_++; - if (OB_SUCC(ret)) { - need_fast_freeze = false; - if (!fast_freeze_checker_.need_check()) { - } else if (OB_TMP_FAIL(fast_freeze_checker_.check_need_fast_freeze(*tablet_handle.get_obj(), need_fast_freeze))) { - LOG_WARN("failed to check need fast freeze", K(tmp_ret), K(tablet_handle)); - } else if (need_fast_freeze) { - if (OB_TMP_FAIL(MTL(ObTenantFreezer *)->tablet_freeze(tablet_id, false/*force_freeze*/))) { - LOG_WARN("failt to freeze tablet", K(tmp_ret), K(tablet_id)); + if (tablet->get_tablet_meta().report_status_.need_report()) { + if (OB_TMP_FAIL(GCTX.ob_service_->submit_tablet_update_task(MTL_ID(), ls_id, tablet_id))) { + LOG_WARN("failed to submit tablet update task to report", K(tmp_ret), K(MTL_ID()), K(tablet_id)); + } else if (OB_TMP_FAIL(ls.get_tablet_svr()->update_tablet_report_status(tablet_id))) { + LOG_WARN("failed to update tablet report status", K(tmp_ret), K(MTL_ID()), K(tablet_id)); } } } + LOG_DEBUG("schedule tablet medium", K(ret), K(ls_id), K(tablet_id), K(tablet_merge_finish), + KPC(latest_major), K(merge_version)); + if (!is_leader || OB_ISNULL(latest_major)) { + // follower or no major: do nothing + } else if (tablet->get_medium_compaction_info_list().need_check_finish()) { // need check finished + if (OB_TMP_FAIL(func.check_medium_finish())) { + LOG_WARN("failed to check medium finish", K(tmp_ret), K(ls_id), K(tablet_id)); + } else if (ObTimeUtility::fast_current_time() < + tablet->get_medium_compaction_info_list().get_wait_check_medium_scn() + WAIT_MEDIUM_CHECK_THRESHOLD) { + // need wait 10 mins before schedule meta major + } else if (OB_TMP_FAIL(schedule_tablet_meta_major_merge(ls_handle, tablet_handle))) { + if (OB_SIZE_OVERFLOW != tmp_ret && OB_EAGAIN != tmp_ret) { + LOG_WARN("failed to schedule tablet merge", K(tmp_ret), K(ls_id), K(tablet_id)); + } + } + } else if (could_major_merge && OB_TMP_FAIL(func.schedule_next_medium_for_leader( + tablet_merge_finish ? 0 : merge_version))) { // schedule another round + LOG_WARN("failed to schedule next medium", K(tmp_ret), K(ls_id), K(tablet_id)); + } else { + schedule_stats_.schedule_cnt_++; + } + if (OB_TMP_FAIL(ObMediumCompactionScheduleFunc::schedule_tablet_medium_merge( + ls, + *tablet, + major_frozen_scn))) { + if (OB_EAGAIN != ret) { + LOG_WARN("failed to schedule medium", K(tmp_ret), K(ls_id), K(tablet_id)); + } + } + + // get info from memtable to check have received new medium info + if (OB_TMP_FAIL(func.freeze_memtable_to_get_medium_info())) { + if (OB_TABLE_NOT_EXIST != tmp_ret) { + LOG_WARN("failed to freeze memtable", K(tmp_ret), K(ls_id), K(tablet_id)); + } + } + + ls_merge_finish &= tablet_merge_finish; } } // end of while } // else return ret; } -int ObTenantTabletScheduler::schedule_all_tablets() +int ObTenantTabletScheduler::schedule_all_tablets_medium() { int ret = OB_SUCCESS; int tmp_ret = OB_SUCCESS; ObSharedGuard ls_iter_guard; - ObLS *ls = nullptr; - if (OB_FAIL(MTL(ObLSService *)->get_ls_iter(ls_iter_guard, ObLSGetMod::STORAGE_MOD))) { + uint64_t compat_version = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTenantTabletScheduler has not been inited", K(ret)); + } else if (OB_FAIL(GET_MIN_DATA_VERSION(MTL_ID(), compat_version))) { + LOG_WARN("fail to get data version", K(ret)); + } else if (compat_version < DATA_VERSION_4_1_0_0) { + // do nothing, should not loop tablets + } else if (OB_FAIL(MTL(ObLSService *)->get_ls_iter(ls_iter_guard, ObLSGetMod::STORAGE_MOD))) { LOG_WARN("failed to get ls iterator", K(ret)); } else { bool tenant_merge_finish = true; bool all_ls_weak_read_ts_ready = true; + bool check_report_scn_flag = false; int64_t merge_version = get_frozen_version(); + ObLS *ls = nullptr; LOG_INFO("start schedule all tablet merge", K(merge_version)); - if (merge_version > merged_version_) { + if (INIT_COMPACTION_SCN == merge_version) { + merge_version = 0; + } else if (merge_version > merged_version_) { if (OB_TMP_FAIL(MTL(ObTenantCompactionProgressMgr *)->update_progress(merge_version, share::ObIDag::DAG_STATUS_NODE_RUNNING))) { LOG_WARN("failed to update progress", K(tmp_ret), K(merge_version)); } } + if (REACH_TENANT_TIME_INTERVAL(CHECK_REPORT_SCN_INTERVAL)) { + check_report_scn_flag = true; + } +#ifdef ERRSIM + check_report_scn_flag = true; +#endif + while (OB_SUCC(ret)) { bool ls_merge_finish = true; if (OB_FAIL(ls_iter_guard.get_ptr()->get_next(ls))) { @@ -833,24 +1017,25 @@ int ObTenantTabletScheduler::schedule_all_tablets() } else if (OB_ISNULL(ls)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("ls is null", K(ret), K(ls)); - } else if (OB_TMP_FAIL(schedule_ls_merge(merge_version, *ls, ls_merge_finish, all_ls_weak_read_ts_ready))) { + } else if (OB_TMP_FAIL(schedule_ls_medium_merge(merge_version, *ls, ls_merge_finish, all_ls_weak_read_ts_ready))) { tenant_merge_finish = false; if (OB_SIZE_OVERFLOW == tmp_ret) { break; - } else { + } else if (OB_LS_NOT_EXIST != ret) { LOG_WARN("failed to schedule ls merge", K(tmp_ret), KPC(ls)); } } else { tenant_merge_finish &= ls_merge_finish; - // TODO(DanLing) Optimize: check whether the slave_read_ts of tablet that failed to merge is bigger than frozen ts + + // loop tablet_meta table to update smaller report_scn because of migration + if (check_report_scn_flag) { + (void) update_report_scn_as_ls_leader(*ls); + } } } // end while if (!tenant_merge_finish) { // wait major compaction if (all_ls_weak_read_ts_ready) { // check schedule Timer Task - if (OB_FAIL(reload_tenant_config())) { - LOG_WARN("failed to restart schedule timer task", K(ret)); - } if (schedule_stats_.add_weak_read_ts_event_flag_) { schedule_stats_.add_weak_read_ts_event_flag_ = false; ADD_COMPACTION_EVENT( @@ -900,6 +1085,8 @@ int ObTenantTabletScheduler::schedule_all_tablets() current_time, "cost_time", current_time - schedule_stats_.start_timestamp_); + + reload_tenant_config(); // tenant merge finish, use tenant default config to loop } LOG_INFO("finish schedule all tablet merge", K(merge_version), K(schedule_stats_), K(tenant_merge_finish), @@ -912,13 +1099,13 @@ int ObTenantTabletScheduler::restart_schedule_timer_task(const int64_t schedule_ { int ret = OB_SUCCESS; bool is_exist = false; - if (OB_FAIL(TG_TASK_EXIST(merge_loop_tg_id_, merge_loop_task_, is_exist))) { + if (OB_FAIL(TG_TASK_EXIST(medium_loop_tg_id_, medium_loop_task_, is_exist))) { LOG_ERROR("failed to check merge schedule task exist", K(ret)); } else if (is_exist) { - TG_CANCEL(merge_loop_tg_id_, merge_loop_task_); + TG_CANCEL(medium_loop_tg_id_, medium_loop_task_); } if (OB_FAIL(ret)) { - } else if (OB_FAIL(TG_SCHEDULE(merge_loop_tg_id_, merge_loop_task_, schedule_interval, true/*repeat*/))) { + } else if (OB_FAIL(TG_SCHEDULE(medium_loop_tg_id_, medium_loop_task_, schedule_interval, true/*repeat*/))) { LOG_WARN("Fail to schedule minor merge scan task", K(ret)); } else { schedule_interval_ = schedule_interval; @@ -944,5 +1131,26 @@ int ObTenantTabletScheduler::get_min_dependent_schema_version(int64_t &min_schem return ret; } +int ObTenantTabletScheduler::update_report_scn_as_ls_leader(ObLS &ls) +{ + int ret = OB_SUCCESS; + ObRole role = INVALID_ROLE; + const int64_t major_merged_scn = get_inner_table_merged_scn(); + if (OB_FAIL(ls.get_ls_role(role))) { + LOG_WARN("failed to get ls role", K(ret), K(ls)); + } else if (LEADER == role) { + const ObLSID &ls_id = ls.get_ls_id(); + ObSEArray tablet_id_array; + if (OB_FAIL(ls.get_tablet_svr()->get_all_tablet_ids(true/*except_ls_inner_tablet*/, tablet_id_array))) { + LOG_WARN("failed to get tablet id", K(ret), K(ls_id)); + } else if (major_merged_scn > INIT_COMPACTION_SCN + && OB_FAIL(ObTabletMetaTableCompactionOperator::batch_update_unequal_report_scn_tablet( + MTL_ID(), ls_id, major_merged_scn, tablet_id_array))) { + LOG_WARN("failed to get unequal report scn", K(ret), K(ls_id), K(major_merged_scn)); + } + } + return ret; +} + } // namespace storage } // namespace oceanbase diff --git a/src/storage/compaction/ob_tenant_tablet_scheduler.h b/src/storage/compaction/ob_tenant_tablet_scheduler.h index 2b37708468..bb95767d7a 100644 --- a/src/storage/compaction/ob_tenant_tablet_scheduler.h +++ b/src/storage/compaction/ob_tenant_tablet_scheduler.h @@ -17,6 +17,8 @@ #include "lib/queue/ob_dedup_queue.h" #include "share/ob_ls_id.h" #include "storage/ob_i_store.h" +#include "storage/compaction/ob_tablet_merge_task.h" +#include "storage/compaction/ob_partition_merge_policy.h" namespace oceanbase { @@ -108,12 +110,18 @@ public: int64_t get_frozen_version() const; int64_t get_merged_version() const { return merged_version_; } + int64_t get_inner_table_merged_scn() const { return ATOMIC_LOAD(&inner_table_merged_scn_); } + void set_inner_table_merged_scn(const int64_t merged_scn) + { + return ATOMIC_STORE(&inner_table_merged_scn_, merged_scn); + } int64_t get_bf_queue_size() const { return bf_queue_.task_count(); } - int merge_all(); int schedule_merge(const int64_t broadcast_version); int update_upper_trans_version_and_gc_sstable(); int check_ls_compaction_finish(const share::ObLSID &ls_id); + int schedule_all_tablets_minor(); + // Schedule an async task to build bloomfilter for the given macro block. // The bloomfilter build task will be ignored if a same build task exists in the queue. int schedule_build_bloomfilter( @@ -123,41 +131,44 @@ public: int schedule_load_bloomfilter(const blocksstable::MacroBlockId ¯o_id); static bool check_tx_table_ready(ObLS &ls, const share::SCN &check_scn); static int check_ls_state(ObLS &ls, bool &need_merge); - static int schedule_tablet_minor_merge(const share::ObLSID ls_id, ObTablet &tablet); - static int schedule_tablet_major_merge( - int64_t &merge_version, - ObLS &ls, - ObTablet &tablet, - bool &tablet_merge_finish, - ObScheduleStatistics &schedule_stats, - const bool enable_force_freeze = true); - static int schedule_tx_table_merge( - const share::ObLSID &ls_id, - ObTablet &tablet); + template + static int schedule_tablet_minor_merge( + ObLSHandle &ls_handle, + ObTabletHandle &tablet_handle); + static int schedule_tablet_meta_major_merge( + ObLSHandle &ls_handle, + ObTabletHandle &tablet_handle); + template + static int schedule_merge_execute_dag( + const compaction::ObTabletMergeDagParam ¶m, + ObLSHandle &ls_handle, + ObTabletHandle &tablet_handle, + const ObGetMergeTablesResult &result); static bool check_weak_read_ts_ready( const int64_t &merge_version, ObLS &ls); - - int get_min_dependent_schema_version(int64_t &min_schema_version); - -private: - int schedule_all_tablets(); - int schedule_ls_merge( - int64_t &merge_version, - ObLS &ls, - bool &ls_merge_finish, - bool &all_ls_weak_read_ts_ready); static int schedule_merge_dag( const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, const ObMergeType merge_type, - const int64_t &merge_snapshot_version); - int try_remove_old_table(ObLS &ls); - static int check_and_freeze_for_major( - const common::ObTabletID &tablet_id, - const int64_t &merge_version, + const int64_t &merge_snapshot_version, + const bool is_tenant_major_merge = false); + + int get_min_dependent_schema_version(int64_t &min_schema_version); + +private: + int schedule_all_tablets_medium(); + int schedule_ls_medium_merge( + int64_t &merge_version, + ObLS &ls, + bool &ls_merge_finish, + bool &all_ls_weak_read_ts_ready); + int schedule_ls_minor_merge( ObLS &ls); + int try_remove_old_table(ObLS &ls); int restart_schedule_timer_task(const int64_t interval); + int update_report_scn_as_ls_leader( + ObLS &ls); private: class MergeLoopTask: public common::ObTimerTask @@ -174,30 +185,43 @@ private: virtual ~SSTableGCTask() = default; virtual void runTimerTask() override; }; + class MediumLoopTask : public common::ObTimerTask + { + public: + MediumLoopTask() = default; + virtual ~MediumLoopTask() = default; + virtual void runTimerTask() override; + }; public: static const int64_t INIT_COMPACTION_SCN = 1; + typedef common::ObSEArray MinorParallelResultArray; private: static const int64_t BLOOM_FILTER_LOAD_BUILD_THREAD_CNT = 1; + static const int64_t NO_MAJOR_MERGE_TYPE_CNT = 2; + static const int64_t TX_TABLE_NO_MAJOR_MERGE_TYPE_CNT = 1; static const int64_t BF_TASK_QUEUE_SIZE = 10L * 1000; static const int64_t BF_TASK_MAP_SIZE = 10L * 1000; static const int64_t BF_TASK_TOTAL_LIMIT = 512L * 1024L * 1024L; static const int64_t BF_TASK_HOLD_LIMIT = 256L * 1024L * 1024L; static const int64_t BF_TASK_PAGE_SIZE = common::OB_MALLOC_MIDDLE_BLOCK_SIZE; //64K - static const int64_t NO_MAJOR_MERGE_TYPE_CNT = 3; static constexpr ObMergeType MERGE_TYPES[] = { - MINI_MINOR_MERGE, BUF_MINOR_MERGE, HISTORY_MINI_MINOR_MERGE}; + MINOR_MERGE, HISTORY_MINOR_MERGE}; static const int64_t SSTABLE_GC_INTERVAL = 30 * 1000 * 1000L; // 30s static const int64_t DEFAULT_HASH_MAP_BUCKET_CNT = 1009; static const int64_t DEFAULT_COMPACTION_SCHEDULE_INTERVAL = 30 * 1000 * 1000L; // 30s static const int64_t CHECK_WEAK_READ_TS_SCHEDULE_INTERVAL = 10 * 1000 * 1000L; // 10s + static const int64_t CHECK_REPORT_SCN_INTERVAL = 2 * 60 * 1000 * 1000L; // 2m, temp solution, change to 10m later static const int64_t ADD_LOOP_EVENT_INTERVAL = 120 * 1000 * 1000L; // 120s + static const int64_t WAIT_MEDIUM_CHECK_THRESHOLD = 10 * 60 * 1000 * 1000L; // 10m + static const int64_t PRINT_LOG_INVERVAL = 2 * 60 * 1000 * 1000L; // 2m private: bool is_inited_; bool major_merge_status_; bool is_stop_; int merge_loop_tg_id_; // thread + int medium_loop_tg_id_; // thread int sstable_gc_tg_id_; // thread int64_t schedule_interval_; @@ -205,8 +229,10 @@ private: mutable obsys::ObRWLock frozen_version_lock_; int64_t frozen_version_; int64_t merged_version_; // the merged major version of the local server, may be not accurate after reboot + int64_t inner_table_merged_scn_; ObScheduleStatistics schedule_stats_; MergeLoopTask merge_loop_task_; + MediumLoopTask medium_loop_task_; SSTableGCTask sstable_gc_task_; ObFastFreezeChecker fast_freeze_checker_; }; diff --git a/src/storage/compaction/ob_tx_table_merge_task.cpp b/src/storage/compaction/ob_tx_table_merge_task.cpp index f6b38a2946..664aca9eb5 100644 --- a/src/storage/compaction/ob_tx_table_merge_task.cpp +++ b/src/storage/compaction/ob_tx_table_merge_task.cpp @@ -28,8 +28,7 @@ namespace compaction */ ObTxTableMergeDag::ObTxTableMergeDag() - : ObBasicTabletMergeDag(ObDagType::DAG_TYPE_TX_TABLE_MERGE), - compaction_filter_() + : ObBasicTabletMergeDag(ObDagType::DAG_TYPE_TX_TABLE_MERGE) { } @@ -59,29 +58,11 @@ int ObTxTableMergeDag::init_by_param(const ObIDagInitParam *param) LOG_WARN("input param is null", K(ret), K(param)); } else if (FALSE_IT(merge_param = static_cast(param))) { } else if (OB_UNLIKELY(!merge_param->tablet_id_.is_special_merge_tablet() - || !merge_param->is_multi_version_minor_merge())) { + || !is_mini_merge(merge_param->merge_type_))) { ret = OB_INVALID_ARGUMENT; LOG_WARN("param is not valid", K(ret), KPC(merge_param)); } else if (OB_FAIL(ObBasicTabletMergeDag::inner_init(*merge_param))) { LOG_WARN("failed to init ObTabletMergeDag", K(ret)); - } else if (merge_param->tablet_id_.is_ls_tx_data_tablet() && merge_param->is_minor_merge()) { - // init compaction filter for minor merge in TxDataTable - ObTxTableGuard guard; - SCN recycle_scn = SCN::min_scn(); - int tmp_ret = OB_SUCCESS; - if (OB_TMP_FAIL(ctx_->ls_handle_.get_ls()->get_tx_table_guard(guard))) { - LOG_WARN("failed to get tx table", K(tmp_ret), KPC(merge_param)); - } else if (OB_UNLIKELY(!guard.is_valid())) { - tmp_ret = OB_ERR_UNEXPECTED; - LOG_WARN("tx table guard is invalid", K(tmp_ret), KPC(merge_param), K(guard)); - } else if (OB_TMP_FAIL(guard.get_tx_table()->get_recycle_scn(recycle_scn))) { - LOG_WARN("failed to get recycle ts", K(tmp_ret), KPC(merge_param)); - } else if (OB_TMP_FAIL(compaction_filter_.init(recycle_scn, ObTxTable::get_filter_col_idx()))) { - LOG_WARN("failed to get init compaction filter", K(tmp_ret), KPC(merge_param), K(recycle_scn)); - } else { - ctx_->compaction_filter_ = &compaction_filter_; - FLOG_INFO("success to init compaction filter", K(tmp_ret), K(recycle_scn)); - } } return ret; } @@ -123,54 +104,10 @@ int ObTxTableMergePrepareTask::init() return ret; } -int ObTxTableMergePrepareTask::process() +int ObTxTableMergePrepareTask::inner_init_ctx(ObTabletMergeCtx &ctx, bool &skip_merge_task_flag) { int ret = OB_SUCCESS; - ObTenantStatEstGuard stat_est_guard(MTL_ID()); - ObTabletMergeCtx *ctx = NULL; - ObTaskController::get().switch_task(share::ObTaskType::DATA_MAINTAIN); - - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("not inited", K(ret)); - } else if (FALSE_IT(ctx = &merge_dag_->get_ctx())) { - } else if (OB_FAIL(ctx->ls_handle_.get_ls()->get_tablet_svr()->get_tablet( - ctx->param_.tablet_id_, - ctx->tablet_handle_))) { - LOG_WARN("failed to get tablet", K(ret), "ls_id", ctx->param_.ls_id_, - "tablet_id", ctx->param_.tablet_id_); - } else if (OB_FAIL(build_merge_ctx())) { - LOG_WARN("failed to build merge ctx", K(ret), K(ctx->param_)); - } else if (ctx->scn_range_.is_empty()) { - ret = OB_ERR_UNEXPECTED; - LOG_ERROR("Unexcepted empty log ts range in minor merge", K(ret), K(ctx->scn_range_)); - } else { - ctx->merge_scn_ = ctx->scn_range_.end_scn_; - } - - if (OB_FAIL(ret)) { - } else if (OB_FAIL(ObTabletMergePrepareTask::generate_merge_task())) { - LOG_WARN("Failed to generate_merge_sstable_task", K(ret)); - } else { - // it doesn't matter if merge_progress failed to init - // TODO FIX merge progress -// int tmp_ret = OB_SUCCESS; -// if (OB_SUCCESS != (tmp_ret = ctx->merge_progress_.init(ctx))) { -// ctx->merge_progress_.reset(); -// LOG_WARN("Failed to init merge progress", K(tmp_ret)); -// } else { -// LOG_INFO("succeed to init merge progress", K(tmp_ret), K(ctx->merge_progress_)); -// } - FLOG_INFO("succeed to generate merge task", "task", *this); - } - - return ret; -} - -int ObTxTableMergePrepareTask::build_merge_ctx() -{ - int ret = OB_SUCCESS; - ObTabletMergeCtx &ctx = merge_dag_->get_ctx(); + skip_merge_task_flag = false; const common::ObTabletID &tablet_id = ctx.param_.tablet_id_; ObTablet *tablet = ctx.tablet_handle_.get_obj(); ObGetMergeTablesParam get_merge_table_param; @@ -178,16 +115,9 @@ int ObTxTableMergePrepareTask::build_merge_ctx() get_merge_table_param.merge_type_ = ctx.param_.merge_type_; // only ctx.param_ is inited, fill other fields here - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("The tablet has not been initialized", K(ret), K(tablet_id)); - } else if (OB_UNLIKELY(!ctx.param_.is_valid() || nullptr == tablet)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid argument", K(ret), K(ctx)); - } else if (FALSE_IT(ctx.rebuild_seq_ = ctx.ls_handle_.get_ls()->get_ls_meta().get_rebuild_seq())) { - } else if (OB_FAIL(ObPartitionMergePolicy::get_merge_tables[ctx.param_.merge_type_]( + if (OB_FAIL(ObPartitionMergePolicy::get_merge_tables[ctx.param_.merge_type_]( get_merge_table_param, - INT64_MAX/*multi_version_start*/, + *ctx.ls_handle_.get_ls(), *tablet, get_merge_table_result))) { // TODO(@DanLin) optimize this interface @@ -204,24 +134,13 @@ int ObTxTableMergePrepareTask::build_merge_ctx() LOG_WARN("failed to set basic info to ctx", K(ret), K(get_merge_table_result), K(ctx)); } else if (OB_FAIL(ctx.get_storage_schema_to_merge(get_merge_table_result.handle_, false/*get_schema_on_memtable*/))) { LOG_WARN("failed to get storage schema", K(ret), K(get_merge_table_result), K(ctx)); - } else if (OB_UNLIKELY(!ctx.is_schema_valid())) { - ret = OB_ERR_SYS; - LOG_WARN("schema of merge ctx is not valid", K(ret), K(ctx)); - } else if (OB_FAIL(ctx.init_parallel_merge())) { - LOG_WARN("Failed to init parallel merge in sstable merge ctx", K(ret)); } else { ctx.progressive_merge_num_ = 0; ctx.is_full_merge_ = true; ctx.merge_level_ = MACRO_BLOCK_MERGE_LEVEL; ctx.read_base_version_ = 0; - if (OB_FAIL(ctx.merge_info_.init(ctx))) { - LOG_WARN("failed to init merge context", K(ret)); - } } - if (OB_SUCC(ret)) { - FLOG_INFO("succeed to build merge ctx", K(tablet_id), K(ctx)); - } return ret; } diff --git a/src/storage/compaction/ob_tx_table_merge_task.h b/src/storage/compaction/ob_tx_table_merge_task.h index 62935b9e7c..a02a2689a0 100644 --- a/src/storage/compaction/ob_tx_table_merge_task.h +++ b/src/storage/compaction/ob_tx_table_merge_task.h @@ -14,7 +14,6 @@ #define STORAGE_COMPACTION_OB_TX_TABLE_MERGE_TASK_H_ #include "share/scheduler/ob_dag_scheduler.h" #include "storage/compaction/ob_tablet_merge_task.h" -#include "storage/compaction/ob_i_compaction_filter.h" namespace oceanbase { @@ -29,16 +28,15 @@ namespace compaction class ObTxTableMergeDag; class ObTabletMergeCtx; +// for mini merge class ObTxTableMergePrepareTask: public ObTabletMergePrepareTask { public: ObTxTableMergePrepareTask(); virtual ~ObTxTableMergePrepareTask(); int init(); - virtual int process() override; - private: - int build_merge_ctx(); + virtual int inner_init_ctx(ObTabletMergeCtx &ctx, bool &skip_merge_task_flag) override; private: DISALLOW_COPY_AND_ASSIGN(ObTxTableMergePrepareTask); }; @@ -51,7 +49,6 @@ public: virtual int create_first_task() override; virtual int init_by_param(const share::ObIDagInitParam *param) override; private: - ObTransStatusFilter compaction_filter_; DISALLOW_COPY_AND_ASSIGN(ObTxTableMergeDag); }; diff --git a/src/storage/high_availability/ob_ls_restore.cpp b/src/storage/high_availability/ob_ls_restore.cpp index 2c4c79b744..3e89d76339 100644 --- a/src/storage/high_availability/ob_ls_restore.cpp +++ b/src/storage/high_availability/ob_ls_restore.cpp @@ -2064,7 +2064,10 @@ int ObTabletGroupMetaRestoreTask::create_or_update_tablet_( LOG_WARN("failed to set restore status", K(ret), K(restore_status)); } else if (OB_FAIL(param.ha_status_.set_data_status(data_status))) { LOG_WARN("failed to set data status", K(ret), K(data_status)); - } else if (OB_FAIL(ObMigrationTabletParam::construct_placeholder_storage_schema(param.allocator_, param.storage_schema_))) { + } else if (OB_FAIL(ObMigrationTabletParam::construct_placeholder_storage_schema_and_medium( + param.allocator_, + param.storage_schema_, + param.medium_info_list_))) { LOG_WARN("failed to construct placeholder storage schema"); } else if (!param.is_valid()) { ret = OB_INVALID_ARGUMENT; diff --git a/src/storage/high_availability/ob_physical_copy_task.cpp b/src/storage/high_availability/ob_physical_copy_task.cpp index 4711de5956..aea84c23c9 100644 --- a/src/storage/high_availability/ob_physical_copy_task.cpp +++ b/src/storage/high_availability/ob_physical_copy_task.cpp @@ -740,13 +740,13 @@ int ObPhysicalCopyFinishTask::prepare_data_store_desc_( cluster_version))) { LOG_WARN("failed to init index store desc", K(ret), K(tablet_id), K(merge_type), KPC(sstable_param)); } else { - const ObMergeSchema &merge_schema = tablet->get_storage_schema(); + const ObStorageSchema &storage_schema = tablet->get_storage_schema(); desc.row_column_count_ = desc.rowkey_column_count_ + 1; desc.col_desc_array_.reset(); desc.need_prebuild_bloomfilter_ = false; if (OB_FAIL(desc.col_desc_array_.init(desc.row_column_count_))) { LOG_WARN("failed to reserve column desc array", K(ret)); - } else if (OB_FAIL(merge_schema.get_rowkey_column_ids(desc.col_desc_array_))) { + } else if (OB_FAIL(storage_schema.get_rowkey_column_ids(desc.col_desc_array_))) { LOG_WARN("failed to get rowkey column ids", K(ret)); } else if (OB_FAIL(ObMultiVersionRowkeyHelpper::add_extra_rowkey_cols(desc.col_desc_array_))) { LOG_WARN("failed to get extra rowkey column ids", K(ret)); @@ -831,7 +831,7 @@ int ObPhysicalCopyFinishTask::get_merge_type_( } else if (sstable_param->table_key_.is_major_sstable()) { merge_type = ObMergeType::MAJOR_MERGE; } else if (sstable_param->table_key_.is_minor_sstable()) { - merge_type = ObMergeType::MINI_MINOR_MERGE; + merge_type = ObMergeType::MINOR_MERGE; } else if (sstable_param->table_key_.is_ddl_sstable()) { merge_type = ObMergeType::DDL_KV_MERGE; } else { @@ -1410,7 +1410,8 @@ int ObTabletCopyFinishTask::inner_update_tablet_table_store_with_major_( true/*need_report*/, SCN::min_scn()/*clog_checkpoint_scn*/, true/*need_check_sstable*/, - true/*allow_duplicate_sstable*/); + true/*allow_duplicate_sstable*/, + &src_tablet_meta_->medium_info_list_); if (tablet->get_storage_schema().get_version() < src_tablet_meta_->storage_schema_.get_version()) { SERVER_EVENT_ADD("storage_ha", "schema_change_need_merge_tablet_meta", "tenant_id", MTL_ID(), diff --git a/src/storage/high_availability/ob_storage_ha_reader.cpp b/src/storage/high_availability/ob_storage_ha_reader.cpp index c9f0ed1ede..66188ca597 100644 --- a/src/storage/high_availability/ob_storage_ha_reader.cpp +++ b/src/storage/high_availability/ob_storage_ha_reader.cpp @@ -1036,7 +1036,10 @@ int ObCopyTabletInfoObProducer::build_deleted_tablet_info_( LOG_WARN("failed to set restore status", K(ret), K(restore_status)); } else if (OB_FAIL(tablet_info.param_.ha_status_.set_data_status(data_status))) { LOG_WARN("failed to set data status", K(ret), K(data_status)); - } else if (OB_FAIL(ObMigrationTabletParam::construct_placeholder_storage_schema(tablet_info.param_.allocator_, tablet_info.param_.storage_schema_))) { + } else if (OB_FAIL(ObMigrationTabletParam::construct_placeholder_storage_schema_and_medium( + tablet_info.param_.allocator_, + tablet_info.param_.storage_schema_, + tablet_info.param_.medium_info_list_))) { LOG_WARN("failed to construct placeholder storage schema"); } else if (!tablet_info.param_.is_valid()) { ret = OB_INVALID_ARGUMENT; @@ -1897,8 +1900,10 @@ int ObCopySSTableInfoObProducer::fake_deleted_tablet_meta_( LOG_WARN("failed to set restore status", K(ret), K(restore_status)); } else if (OB_FAIL(tablet_meta.ha_status_.set_data_status(data_status))) { LOG_WARN("failed to set data status", K(ret), K(data_status)); - } else if (OB_FAIL(ObMigrationTabletParam::construct_placeholder_storage_schema(tablet_meta.allocator_, - tablet_meta.storage_schema_))) { + } else if (OB_FAIL(ObMigrationTabletParam::construct_placeholder_storage_schema_and_medium( + tablet_meta.allocator_, + tablet_meta.storage_schema_, + tablet_meta.medium_info_list_))) { LOG_WARN("failed to construct placeholder storage schema"); } else if (!tablet_meta.is_valid()) { ret = OB_INVALID_ARGUMENT; diff --git a/src/storage/high_availability/ob_storage_ha_utils.cpp b/src/storage/high_availability/ob_storage_ha_utils.cpp index 11a59de478..58cfa435ae 100644 --- a/src/storage/high_availability/ob_storage_ha_utils.cpp +++ b/src/storage/high_availability/ob_storage_ha_utils.cpp @@ -89,7 +89,7 @@ int ObStorageHAUtils::check_tablet_replica_checksum_(const uint64_t tenant_id, c LOG_WARN("failed to init pair", K(ret), K(tablet_id), K(ls_id)); } else if (OB_FAIL(pairs.push_back(pair))) { LOG_WARN("failed to push back", K(ret), K(pair)); - } else if (OB_FAIL(ObTabletReplicaChecksumOperator::batch_get(tenant_id, pairs, sql_client, items))) { + } else if (OB_FAIL(ObTabletReplicaChecksumOperator::batch_get(tenant_id, pairs, compaction_scn, sql_client, items))) { LOG_WARN("failed to batch get replica checksum item", K(ret)); } else { ObArray filter_items; diff --git a/src/storage/high_availability/ob_tablet_backfill_tx.cpp b/src/storage/high_availability/ob_tablet_backfill_tx.cpp index d681cab749..a4a0d010a1 100644 --- a/src/storage/high_availability/ob_tablet_backfill_tx.cpp +++ b/src/storage/high_availability/ob_tablet_backfill_tx.cpp @@ -619,7 +619,7 @@ int ObTabletTableBackfillTXTask::process() LOG_WARN("tablet table backfill tx task do not init", K(ret)); } else if (OB_FAIL(prepare_merge_ctx_())) { LOG_WARN("failed to prepare merge ctx", K(ret), KPC(this)); - } else if (OB_FAIL(prepare_index_tree_())) { + } else if (OB_FAIL(tablet_merge_ctx_.prepare_index_tree())) { LOG_WARN("failed to prepare index tree", K(ret), KPC(this)); } else if (OB_FAIL(do_backfill_tx_())) { LOG_WARN("failed to do backfill tx", K(ret), KPC(this)); @@ -664,6 +664,7 @@ int ObTabletTableBackfillTXTask::prepare_merge_ctx_() tablet_merge_ctx_.scn_range_ = table_handle_.get_table()->get_key().scn_range_; tablet_merge_ctx_.merge_scn_ = backfill_tx_ctx_->log_sync_scn_; tablet_merge_ctx_.create_snapshot_version_ = 0; + tablet_merge_ctx_.schedule_major_ = false; if (OB_FAIL(tablet_merge_ctx_.tables_handle_.add_table(table_handle_))) { LOG_WARN("failed to add table into tables handle", K(ret), K(table_handle_)); @@ -673,8 +674,6 @@ int ObTabletTableBackfillTXTask::prepare_merge_ctx_() } else { //get_basic_info_from_result result tablet_merge_ctx_.schema_ctx_.base_schema_version_ = tablet_merge_ctx_.schema_ctx_.schema_version_; - tablet_merge_ctx_.create_snapshot_version_ = 0; - tablet_merge_ctx_.schedule_major_ = false; } if (OB_FAIL(ret)) { @@ -747,7 +746,7 @@ int ObTabletTableBackfillTXTask::update_merge_sstable_() tablet_merge_ctx_.sstable_version_range_.multi_version_start_, tablet_merge_ctx_.schema_ctx_.storage_schema_, rebuild_seq, - tablet_merge_ctx_.param_.is_major_merge()); + is_major_merge_type(tablet_merge_ctx_.param_.merge_type_)); ObTabletHandle new_tablet_handle; if (OB_FAIL(ls->update_tablet_table_store( tablet_id_, param, new_tablet_handle))) { @@ -757,56 +756,6 @@ int ObTabletTableBackfillTXTask::update_merge_sstable_() return ret; } -int ObTabletTableBackfillTXTask::prepare_index_tree_() -{ - int ret = OB_SUCCESS; - ObDataStoreDesc desc; - if (!is_inited_) { - ret = OB_NOT_INIT; - LOG_WARN("tablet table backfill tx task do not init", K(ret)); - } else if (OB_UNLIKELY(!tablet_merge_ctx_.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid merge ctx", K(ret), K(tablet_merge_ctx_)); - } else if (OB_FAIL(desc.init(*tablet_merge_ctx_.get_merge_schema(), - tablet_merge_ctx_.param_.ls_id_, - tablet_merge_ctx_.param_.tablet_id_, - tablet_merge_ctx_.param_.merge_type_, - tablet_merge_ctx_.sstable_version_range_.snapshot_version_))) { - LOG_WARN("failed to init index store desc", K(ret), K(tablet_merge_ctx_)); - } else { - // TODO(zhuixin.gsy) modify index_desc.init to avoid reset col_desc_array_ - const ObMergeSchema *merge_schema = tablet_merge_ctx_.get_merge_schema(); - desc.row_column_count_ = desc.rowkey_column_count_ + 1; - desc.col_desc_array_.reset(); - desc.need_prebuild_bloomfilter_ = false; - if (OB_FAIL(desc.col_desc_array_.init(desc.row_column_count_))) { - LOG_WARN("failed to reserve column desc array", K(ret)); - } else if (OB_FAIL(merge_schema->get_rowkey_column_ids(desc.col_desc_array_))) { - LOG_WARN("failed to get rowkey column ids", K(ret)); - } else if (OB_FAIL(ObMultiVersionRowkeyHelpper::add_extra_rowkey_cols(desc.col_desc_array_))) { - LOG_WARN("failed to get extra rowkey column ids", K(ret)); - } else { - ObObjMeta meta; - meta.set_varchar(); - meta.set_collation_type(CS_TYPE_BINARY); - share::schema::ObColDesc col; - col.col_id_ = static_cast(desc.row_column_count_ + OB_APP_MIN_COLUMN_ID); - col.col_type_ = meta; - col.col_order_ = DESC; - - if (OB_FAIL(desc.col_desc_array_.push_back(col))) { - LOG_WARN("failed to push back last col for index", K(ret), K(col)); - } - } - } - if (OB_SUCC(ret)) { - if (OB_FAIL(tablet_merge_ctx_.merge_info_.prepare_index_builder(desc))) { - LOG_WARN("failed to prepare index builder", K(ret), K(desc)); - } - } - return ret; -} - /******************ObFinishTabletBackfillTXTask*********************/ ObFinishTabletBackfillTXTask::ObFinishTabletBackfillTXTask() : ObITask(TASK_TYPE_MIGRATE_PREPARE), diff --git a/src/storage/high_availability/ob_tablet_backfill_tx.h b/src/storage/high_availability/ob_tablet_backfill_tx.h index 0f1589dda0..da9ba2ba6e 100644 --- a/src/storage/high_availability/ob_tablet_backfill_tx.h +++ b/src/storage/high_availability/ob_tablet_backfill_tx.h @@ -139,7 +139,6 @@ private: int do_backfill_tx_(); int prepare_partition_merge_(); int update_merge_sstable_(); - int prepare_index_tree_(); private: bool is_inited_; diff --git a/src/storage/ls/ob_ls.cpp b/src/storage/ls/ob_ls.cpp index 789bacdc6b..bb019770f7 100644 --- a/src/storage/ls/ob_ls.cpp +++ b/src/storage/ls/ob_ls.cpp @@ -153,6 +153,12 @@ int ObLS::init(const share::ObLSID &ls_id, LOG_WARN("failed to init ls rebuild cb impl", K(ret)); } else if (OB_FAIL(tablet_gc_handler_.init(this))) { LOG_WARN("init tablet gc handler", K(ret)); + } else if (OB_FAIL(reserved_snapshot_mgr_.init(this, &log_handler_))) { + LOG_WARN("failed to init reserved snapshot mgr", K(ret), K(ls_id)); + } else if (OB_FAIL(reserved_snapshot_clog_handler_.init(this))) { + LOG_WARN("failed to init reserved snapshot clog handler", K(ret), K(ls_id)); + } else if (OB_FAIL(medium_compaction_clog_handler_.init(this))) { + LOG_WARN("failed to init medium compaction clog handler", K(ret), K(ls_id)); } else { REGISTER_TO_LOGSERVICE(logservice::TRANS_SERVICE_LOG_BASE_TYPE, &ls_tx_svr_); REGISTER_TO_LOGSERVICE(logservice::STORAGE_SCHEMA_LOG_BASE_TYPE, &ls_tablet_svr_); @@ -160,6 +166,8 @@ int ObLS::init(const share::ObLSID &ls_id, REGISTER_TO_LOGSERVICE(logservice::DDL_LOG_BASE_TYPE, &ls_ddl_log_handler_); REGISTER_TO_LOGSERVICE(logservice::KEEP_ALIVE_LOG_BASE_TYPE, &keep_alive_ls_handler_); REGISTER_TO_LOGSERVICE(logservice::GC_LS_LOG_BASE_TYPE, &gc_handler_); + REGISTER_TO_LOGSERVICE(logservice::RESERVED_SNAPSHOT_LOG_BASE_TYPE, &reserved_snapshot_clog_handler_); + REGISTER_TO_LOGSERVICE(logservice::MEDIUM_COMPACTION_LOG_BASE_TYPE, &medium_compaction_clog_handler_); if (ls_id == IDS_LS) { REGISTER_TO_LOGSERVICE(logservice::TIMESTAMP_LOG_BASE_TYPE, MTL(transaction::ObTimestampService *)); @@ -586,7 +594,8 @@ void ObLS::destroy() UNREGISTER_FROM_LOGSERVICE(logservice::DDL_LOG_BASE_TYPE, &ls_ddl_log_handler_); UNREGISTER_FROM_LOGSERVICE(logservice::KEEP_ALIVE_LOG_BASE_TYPE, &keep_alive_ls_handler_); UNREGISTER_FROM_LOGSERVICE(logservice::GC_LS_LOG_BASE_TYPE, &gc_handler_); - + UNREGISTER_FROM_LOGSERVICE(logservice::RESERVED_SNAPSHOT_LOG_BASE_TYPE, &reserved_snapshot_clog_handler_); + UNREGISTER_FROM_LOGSERVICE(logservice::MEDIUM_COMPACTION_LOG_BASE_TYPE, &medium_compaction_clog_handler_); if (ls_meta_.ls_id_ == IDS_LS) { MTL(transaction::ObTransIDService *)->reset_ls(); MTL(transaction::ObTimestampService *)->reset_ls(); @@ -646,6 +655,9 @@ void ObLS::destroy() ls_migration_handler_.destroy(); ls_remove_member_handler_.destroy(); tablet_gc_handler_.reset(); + reserved_snapshot_mgr_.destroy(); + reserved_snapshot_clog_handler_.reset(); + medium_compaction_clog_handler_.reset(); rs_reporter_ = nullptr; is_inited_ = false; tenant_id_ = OB_INVALID_TENANT_ID; @@ -890,6 +902,52 @@ int ObLS::save_base_schema_version() return ret; } +int ObLS::get_ls_role(ObRole &role) +{ + int ret = OB_SUCCESS; + role = INVALID_ROLE; + int64_t proposal_id = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ls is not inited", K(ret)); + } else { + int64_t read_lock = LSLOCKLOG; + int64_t write_lock = 0; + ObLSLockGuard lock_myself(lock_, read_lock, write_lock); + if (OB_FAIL(log_handler_.get_role(role, proposal_id))) { + LOG_WARN("get ls role failed", K(ret), KPC(this)); + } + } + return ret; +} + +int ObLS::try_sync_reserved_snapshot( + const int64_t new_reserved_snapshot, + const bool update_flag) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ls is not inited", K(ret)); + } else { + ObRole role = INVALID_ROLE; + int64_t proposal_id = 0; + int64_t read_lock = LSLOCKLS | LSLOCKLOG; + int64_t write_lock = 0; + ObLSLockGuard lock_myself(lock_, read_lock, write_lock); + if (is_stopped_) { + // do nothing + } else if (OB_FAIL(log_handler_.get_role(role, proposal_id))) { + LOG_WARN("get ls role failed", K(ret), KPC(this)); + } else if (LEADER != role) { + // do nothing + } else { + ret = reserved_snapshot_mgr_.try_sync_reserved_snapshot(new_reserved_snapshot, update_flag); + } + } + return ret; +} + int ObLS::get_ls_info(ObLSVTInfo &ls_info) { int ret = OB_SUCCESS; diff --git a/src/storage/ls/ob_ls.h b/src/storage/ls/ob_ls.h index 5f76e35aa7..9e616570da 100644 --- a/src/storage/ls/ob_ls.h +++ b/src/storage/ls/ob_ls.h @@ -32,6 +32,8 @@ #include "storage/ls/ob_ls_sync_tablet_seq_handler.h" #include "storage/ls/ob_ls_ddl_log_handler.h" #include "storage/tx/wrs/ob_ls_wrs_handler.h" +#include "storage/ls/ob_ls_reserved_snapshot_mgr.h" +#include "storage/ls/ob_ls_storage_clog_handler.h" #include "storage/checkpoint/ob_checkpoint_executor.h" #include "storage/checkpoint/ob_data_checkpoint.h" #include "storage/tx_table/ob_tx_table.h" @@ -194,6 +196,7 @@ public: // get ls info int get_ls_info(ObLSVTInfo &ls_info); + int get_ls_role(ObRole &role); // report the ls replica info to RS. int report_replica_info(); @@ -270,6 +273,7 @@ public: ObTabletHandle &handle) const; int flush_if_need(const bool need_flush); + int try_sync_reserved_snapshot(const int64_t new_reserved_snapshot, const bool update_flag); bool is_stopped() const { return is_stopped_; } TO_STRING_KV(K_(ls_meta), K_(log_handler), K_(restore_handler), K_(is_inited), K_(tablet_gc_handler)); @@ -385,6 +389,8 @@ public: // int build_tablet_iter(ObLSTabletIterator &iter); // int build_tablet_iter(ObLSTabletIDIterator &iter); DELEGATE_WITH_RET(ls_tablet_svr_, build_tablet_iter, int); + // update medium compaction info for tablet + DELEGATE_WITH_RET(ls_tablet_svr_, update_medium_compaction_info, int); // trim rebuild tablet // @param [in] tablet_id ObTabletID, is_rollback bool // @param [out] null @@ -632,6 +638,10 @@ public: int try_update_uppder_trans_version(); int diagnose(DiagnoseInfo &info) const; + DELEGATE_WITH_RET(reserved_snapshot_mgr_, replay_reserved_snapshot_log, int); + DELEGATE_WITH_RET(reserved_snapshot_mgr_, get_min_reserved_snapshot, int64_t); + DELEGATE_WITH_RET(reserved_snapshot_mgr_, add_dependent_medium_tablet, int); + DELEGATE_WITH_RET(reserved_snapshot_mgr_, del_dependent_medium_tablet, int); private: // StorageBaseUtil // table manager: create, remove and guard get. @@ -685,6 +695,10 @@ private: ObLSRebuildCbImpl ls_rebuild_cb_impl_; // for tablet gc checkpoint::ObTabletGCHandler tablet_gc_handler_; + // record reserved snapshot + ObLSReservedSnapshotMgr reserved_snapshot_mgr_; + ObLSResvSnapClogHandler reserved_snapshot_clog_handler_; + ObMediumCompactionClogHandler medium_compaction_clog_handler_; private: bool is_inited_; uint64_t tenant_id_; diff --git a/src/storage/ls/ob_ls_reserved_snapshot_mgr.cpp b/src/storage/ls/ob_ls_reserved_snapshot_mgr.cpp new file mode 100644 index 0000000000..6d246946b6 --- /dev/null +++ b/src/storage/ls/ob_ls_reserved_snapshot_mgr.cpp @@ -0,0 +1,312 @@ +//Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. +#define USING_LOG_PREFIX STORAGE +#include "storage/ls/ob_ls_reserved_snapshot_mgr.h" +#include "storage/tx_storage/ob_ls_handle.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "logservice/ob_log_base_type.h" +#include "logservice/ob_log_base_header.h" + +namespace oceanbase +{ +using namespace logservice; +namespace storage +{ + +ObLSReservedSnapshotMgr::ObLSReservedSnapshotMgr() + : ObIStorageClogRecorder(), + is_inited_(false), + allocator_("ResvSnapMgr"), + min_reserved_snapshot_(0), + next_reserved_snapshot_(0), + snapshot_lock_(), + ls_(nullptr), + ls_handle_(), + dependent_tablet_set_(), + clog_cb_(*this), + last_print_log_ts_(ObTimeUtility::fast_current_time()), + clog_buf_() +{ +} + +ObLSReservedSnapshotMgr::~ObLSReservedSnapshotMgr() +{ + destroy(); +} + +int ObLSReservedSnapshotMgr::init(ObLS *ls, ObLogHandler *log_handler) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObLSReservedSnapshotMgr is inited", K(ret), KP(ls)); + } else if (OB_UNLIKELY(nullptr == ls || nullptr == log_handler)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(ls), K(log_handler)); + } else if (OB_FAIL(ObIStorageClogRecorder::init(0/*max_saved_version*/, log_handler))) { + LOG_WARN("failed to init", K(ret), KP(ls), K(log_handler)); + } else if (OB_FAIL(dependent_tablet_set_.create(HASH_BUCKET))) { + LOG_WARN("failed to create hash set", K(ret), K(ls)); + } else { + ls_ = ls; + is_inited_ = true; + LOG_INFO("success to init snapshot mgr", K(ret), KP(ls), "ls_id", ls_->get_ls_id(), KP(this)); + } + return ret; +} + +void ObLSReservedSnapshotMgr::destroy() +{ + is_inited_ = false; + ObIStorageClogRecorder::destroy(); + clog_cb_.reset(); + min_reserved_snapshot_ = 0; + next_reserved_snapshot_ = 0; + ls_ = nullptr; + ls_handle_.reset(); + last_print_log_ts_ = 0; + if (dependent_tablet_set_.created()) { + dependent_tablet_set_.destroy(); + } +} + +int ObLSReservedSnapshotMgr::add_dependent_medium_tablet(const ObTabletID tablet_id) +{ + int ret = OB_SUCCESS; + int hash_ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObLSReservedSnapshotMgr is not inited", K(ret), K(tablet_id)); + } else { + common::TCWLockGuard lock_guard(snapshot_lock_); + if (OB_HASH_EXIST == (hash_ret = dependent_tablet_set_.exist_refactored(tablet_id.id()))) { + ret = OB_ENTRY_EXIST; // tablet exist + } else if (OB_UNLIKELY(OB_HASH_NOT_EXIST != hash_ret)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to check exist in tablet set", K(ret), K(hash_ret), + "ls_id", ls_->get_ls_id(), K(tablet_id)); + } else if (OB_FAIL(dependent_tablet_set_.set_refactored(tablet_id.id()))) { + LOG_WARN("failed to set tablet_id", K(ret), "ls_id", ls_->get_ls_id(), K(tablet_id)); + } + } + return ret; +} + +int ObLSReservedSnapshotMgr::del_dependent_medium_tablet(const ObTabletID tablet_id) +{ + int ret = OB_SUCCESS; + int64_t new_snapshot_version = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObLSReservedSnapshotMgr is not inited", K(ret), K(tablet_id)); + } else { + common::TCWLockGuard lock_guard(snapshot_lock_); + if (OB_FAIL(dependent_tablet_set_.erase_refactored(tablet_id.id()))) { + LOG_WARN("failed to erase tablet id", K(ret), "ls_id", ls_->get_ls_id(), + K(tablet_id), K(dependent_tablet_set_.size()), KP(this)); + } else if (0 == dependent_tablet_set_.size() && next_reserved_snapshot_ > 0) { + min_reserved_snapshot_ = next_reserved_snapshot_; + new_snapshot_version = next_reserved_snapshot_; + next_reserved_snapshot_ = 0; + } + } // end of lock + + if (OB_SUCC(ret) && new_snapshot_version > 0) { + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(ls_->try_sync_reserved_snapshot(new_snapshot_version, false/*update_flag*/))) { + LOG_WARN("failed to send update reserved snapshot log", K(tmp_ret), K(new_snapshot_version)); + } + } + return ret; +} + +int64_t ObLSReservedSnapshotMgr::get_min_reserved_snapshot() +{ + common::TCRLockGuard lock_guard(snapshot_lock_); + return min_reserved_snapshot_; +} + +const int64_t ObLSReservedSnapshotMgr::CLOG_BUF_LEN; +int ObLSReservedSnapshotMgr::submit_log( + const int64_t reserved_snapshot, + const char *clog_buf, + const int64_t clog_len) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(nullptr == clog_buf || clog_len <= 0)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("clog_buf or clog_len is invalid", K(ret), KP(clog_buf), K(clog_len)); + } else if (OB_FAIL(write_clog(clog_buf, clog_len))) { + LOG_WARN("fail to submit log", K(ret), "ls_id", ls_->get_ls_id()); + } else { + LOG_DEBUG("submit reserved snapshot log success", "ls_id", ls_->get_ls_id(), K(reserved_snapshot)); + } + + return ret; +} + +int ObLSReservedSnapshotMgr::update_min_reserved_snapshot_for_leader(const int64_t new_snapshot_version) +{ + int ret = OB_SUCCESS; + bool send_log_flag = false; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObLSReservedSnapshotMgr is not inited", K(ret), KP(ls_)); + } else { + common::TCWLockGuard lock_guard(snapshot_lock_); + if (0 == dependent_tablet_set_.size()) { + if (new_snapshot_version < min_reserved_snapshot_) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("failed to update min reserved snapshot", K(ret), K(new_snapshot_version), + K(min_reserved_snapshot_)); + } else if (new_snapshot_version > min_reserved_snapshot_) { + // update min_reserved_snapshot and send clog + min_reserved_snapshot_ = new_snapshot_version; + next_reserved_snapshot_ = 0; + send_log_flag = true; + } + } else if (new_snapshot_version > next_reserved_snapshot_) { + // wait for next call + next_reserved_snapshot_ = new_snapshot_version; + } + } // end of lock + + if (OB_SUCC(ret) && send_log_flag) { + if (OB_FAIL(try_update_for_leader(new_snapshot_version, nullptr/*allocator*/))) { + LOG_WARN("failed to send update reserved snapshot log", K(ret), K(new_snapshot_version)); + } else if (need_print_log()) { + LOG_INFO("submit reserved snapshot log success", "ls_id", ls_->get_ls_id(), + K(new_snapshot_version)); + } + } + return ret; +} + +int ObLSReservedSnapshotMgr::try_sync_reserved_snapshot( + const int64_t new_reserved_snapshot, + const bool update_flag) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObLSReservedSnapshotMgr not inited", K(ret), KP(ls_)); + } else if (OB_UNLIKELY(new_reserved_snapshot < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(new_reserved_snapshot)); + } else if (update_flag) { + if (OB_FAIL(update_min_reserved_snapshot_for_leader(new_reserved_snapshot))) { + LOG_WARN("failed to update min_reserved_snapshot", K(ret), K(new_reserved_snapshot)); + } + } else if (OB_FAIL(try_update_for_leader(new_reserved_snapshot, nullptr/*allocator*/))) { + LOG_WARN("failed to send update reserved snapshot log", K(ret), K(new_reserved_snapshot)); + } else if (need_print_log()) { + LOG_INFO("submit reserved snapshot log success", "ls_id", ls_->get_ls_id(), + K(new_reserved_snapshot)); + } + return ret; +} + +int ObLSReservedSnapshotMgr::replay_reserved_snapshot_log( + const share::SCN &scn, const char *buf, const int64_t size, int64_t &pos) +{ + int ret = OB_SUCCESS; + int64_t reserved_snapshot = OB_INVALID_VERSION; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObLSReservedSnapshotMgr not inited", K(ret), KP(ls_)); + } else if (OB_FAIL(serialization::decode_i64(buf, size, pos, &reserved_snapshot))) { + LOG_WARN("fail to deserialize reserved_snapshot", K(ret), "ls_id", ls_->get_ls_id()); + } else if (OB_FAIL(ObIStorageClogRecorder::replay_clog(reserved_snapshot, scn, buf, size, pos))) { + LOG_WARN("failed to update reserved snapshot by log", K(ret), "ls_id", ls_->get_ls_id(), + K(min_reserved_snapshot_), K(reserved_snapshot)); + } + return ret; +} + +// replay after get update_version +int ObLSReservedSnapshotMgr::inner_replay_clog( + const int64_t update_version, + const share::SCN &scn, + const char *buf, + const int64_t size, + int64_t &pos) +{ + UNUSEDx(scn, buf, size, pos); + int ret = OB_SUCCESS; + if (OB_FAIL(inner_update_reserved_snapshot(update_version))) { + LOG_WARN("failed to update reserved_snapshot", K(ret), K(update_version)); + } + return ret; +} + +int ObLSReservedSnapshotMgr::sync_clog_succ_for_leader(const int64_t update_version) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(inner_update_reserved_snapshot(update_version))) { + LOG_WARN("failed to update reserved_snapshot", K(ret), K(update_version)); + } + return ret; +} + +int ObLSReservedSnapshotMgr::inner_update_reserved_snapshot(const int64_t reserved_snapshot) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(reserved_snapshot < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(reserved_snapshot)); + } else { + common::TCWLockGuard lock_guard(snapshot_lock_); + if (reserved_snapshot > min_reserved_snapshot_) { + min_reserved_snapshot_ = reserved_snapshot; + LOG_INFO("success to update reserved snapshot", K(ret), "ls_id", ls_->get_ls_id(), + K(min_reserved_snapshot_)); + } + } + return ret; +} + +int ObLSReservedSnapshotMgr::prepare_struct_in_lock( + int64_t &update_version, + ObIAllocator *allocator, + char *&clog_buf, + int64_t &clog_len) +{ + UNUSED(allocator); + clog_buf = nullptr; + clog_len = 0; + int ret = OB_SUCCESS; + int64_t pos = 0; + + const ObLogBaseHeader log_header( + ObLogBaseType::RESERVED_SNAPSHOT_LOG_BASE_TYPE, + ObReplayBarrierType::PRE_BARRIER/*need_replay_pre_barrier*/); + + if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_->get_ls_id(), ls_handle_, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get log stream", K(ret), "ls_id", ls_->get_ls_id()); + } else if (OB_FAIL(log_header.serialize(clog_buf_, CLOG_BUF_LEN, pos))) { + LOG_WARN("failed to serialize log header", K(ret)); + } else if (OB_FAIL(serialization::encode_i64(clog_buf_, CLOG_BUF_LEN, pos, update_version))) { + LOG_WARN("generate reserved snapshot log", K(ret), "ls_id", ls_->get_ls_id(), K(pos), K(CLOG_BUF_LEN)); + } else { + logcb_ptr_ = &clog_cb_; + clog_buf = clog_buf_; + clog_len = pos; + } + return ret; +} + +void ObLSReservedSnapshotMgr::free_struct_in_lock() +{ + ls_handle_.reset(); + clog_cb_.reset(); +} + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/ls/ob_ls_reserved_snapshot_mgr.h b/src/storage/ls/ob_ls_reserved_snapshot_mgr.h new file mode 100644 index 0000000000..a524512213 --- /dev/null +++ b/src/storage/ls/ob_ls_reserved_snapshot_mgr.h @@ -0,0 +1,111 @@ +//Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OB_STORAGE_STORAGE_LS_RESERVED_SNAPSHOT_MGR_H_ +#define OB_STORAGE_STORAGE_LS_RESERVED_SNAPSHOT_MGR_H_ + +#include "logservice/ob_append_callback.h" +#include "lib/utility/ob_print_utils.h" +#include "lib/hash/ob_hashset.h" +#include "share/ob_ls_id.h" +#include "common/ob_tablet_id.h" +#include "lib/lock/ob_tc_rwlock.h" +#include "storage/ob_storage_clog_recorder.h" +#include "logservice/ob_log_base_header.h" +#include "storage/tx_storage/ob_ls_handle.h" + +namespace oceanbase +{ +namespace logservice +{ +class ObLogHandler; +} // namespace palf + +namespace storage +{ +class ObLS; + +class ObLSReservedSnapshotMgr : public ObIStorageClogRecorder +{ +public: + ObLSReservedSnapshotMgr(); + ~ObLSReservedSnapshotMgr(); + + int init(storage::ObLS *ls, logservice::ObLogHandler *log_handler); + virtual void destroy() override; + + // for leader + int try_sync_reserved_snapshot(const int64_t new_reserved_snapshot, const bool update_flag); + // follower + int replay_reserved_snapshot_log(const share::SCN &scn, const char *buf, const int64_t size, int64_t &pos); + // operate with write_lock + int add_dependent_medium_tablet(const ObTabletID tablet_id); + int del_dependent_medium_tablet(const ObTabletID tablet_id); + + int64_t get_min_reserved_snapshot(); + +private: + int update_min_reserved_snapshot_for_leader(const int64_t new_reserved_snapshot); + int inner_update_reserved_snapshot(const int64_t reserved_snapshot); + + virtual int inner_replay_clog( + const int64_t update_version, + const share::SCN &scn, + const char *buf, + const int64_t size, + int64_t &pos) override; + virtual int sync_clog_succ_for_leader(const int64_t update_version) override; + virtual void sync_clog_failed_for_leader() override + { + // do nothing + } + virtual int submit_log( + const int64_t update_version, + const char *clog_buf, + const int64_t clog_len) override; + virtual int prepare_struct_in_lock( + int64_t &update_version, + ObIAllocator *allocator, + char *&clog_buf, + int64_t &clog_len) override; + virtual void free_struct_in_lock() override; + +private: + static const int64_t CLOG_BUF_LEN = sizeof(logservice::ObLogBaseHeader) + sizeof(int64_t); + static const int64_t HASH_BUCKET = 64; + static const int64_t PRINT_LOG_INTERVAL = 20 * 1000 * 1000L; + OB_INLINE bool need_print_log() + { + bool bret = false; + if (last_print_log_ts_ + PRINT_LOG_INTERVAL <= ObTimeUtility::fast_current_time()) { + last_print_log_ts_ = ObTimeUtility::fast_current_time(); + bret = true; + } + return bret; + } + + bool is_inited_; + common::ObArenaAllocator allocator_; + int64_t min_reserved_snapshot_; + int64_t next_reserved_snapshot_; + mutable common::TCRWLock snapshot_lock_; + storage::ObLS *ls_; + ObLSHandle ls_handle_; + common::hash::ObHashSet dependent_tablet_set_; // tablet_id + ObStorageCLogCb clog_cb_; + int64_t last_print_log_ts_; + // clog part + char clog_buf_[CLOG_BUF_LEN]; +}; + +} // namespace storage +} // namespace oceanbase + +#endif diff --git a/src/storage/ls/ob_ls_storage_clog_handler.cpp b/src/storage/ls/ob_ls_storage_clog_handler.cpp new file mode 100644 index 0000000000..0e77abfff8 --- /dev/null +++ b/src/storage/ls/ob_ls_storage_clog_handler.cpp @@ -0,0 +1,128 @@ +//Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. +#define USING_LOG_PREFIX STORAGE +#include "storage/ls/ob_ls_storage_clog_handler.h" +#include "storage/tx_storage/ob_ls_handle.h" +#include "storage/tx_storage/ob_ls_service.h" +#include "logservice/ob_log_base_type.h" +#include "logservice/ob_log_base_header.h" + +namespace oceanbase +{ +using namespace logservice; +namespace storage +{ + +int ObLSStorageClogHandler::init(ObLS *ls) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObLSReservedSnapshotMgr is inited", K(ret), KP(ls)); + } else if (OB_ISNULL(ls)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(ls)); + } else { + ls_ = ls; + is_inited_ = true; + } + return ret; +} + +void ObLSStorageClogHandler::reset() +{ + is_inited_ = false; + ls_ = nullptr; +} + +// for replay +int ObLSStorageClogHandler::replay( + const void *buffer, + const int64_t nbytes, + const palf::LSN &lsn, + const share::SCN &scn) +{ + int ret = OB_SUCCESS; + const char *buf = nullptr; + ObLogBaseHeader base_header; + int64_t pos = 0; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret), K_(is_inited)); + } else if (OB_UNLIKELY(nullptr == buffer + || nbytes <= 0 + || !lsn.is_valid() + || !scn.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid arguments", K(ret), K(buffer), K(nbytes), K(lsn), K(scn)); + } else if (FALSE_IT(buf = static_cast(buffer))) { + } else if (OB_FAIL(base_header.deserialize(buf, nbytes, pos))) { + LOG_WARN("log base header deserialize error", K(ret)); + } else if (OB_FAIL(inner_replay(base_header, scn, buf, nbytes, pos))) { + LOG_WARN("failed to replay update reserved snapshot", K(ret)); + } + return ret; +} + +int ObLSResvSnapClogHandler::inner_replay( + const ObLogBaseHeader &base_header, + const share::SCN &scn, + const char *buffer, + const int64_t buffer_size, + int64_t &pos) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(pos < 0 || buffer_size <= 0 || pos > buffer_size)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(pos), K(buffer_size)); + } else if (ObLogBaseType::RESERVED_SNAPSHOT_LOG_BASE_TYPE != base_header.get_log_type()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("log header is not valid", K(ret), K(base_header)); + } else if (OB_FAIL(ls_->replay_reserved_snapshot_log(scn, buffer, buffer_size, pos))) { + LOG_WARN("failed to replay update reserved snapshot", K(ret)); + } + return ret; +} + +int ObMediumCompactionClogHandler::inner_replay( + const ObLogBaseHeader &base_header, + const share::SCN &scn, + const char *buffer, + const int64_t buffer_size, + int64_t &pos) +{ + int ret = OB_SUCCESS; + ObTabletID tablet_id; + ObTabletHandle handle; + int64_t new_pos = pos; + + if (OB_UNLIKELY(pos < 0 || buffer_size <= 0 || pos > buffer_size)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(pos), K(buffer_size)); + } else if (ObLogBaseType::MEDIUM_COMPACTION_LOG_BASE_TYPE != base_header.get_log_type()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("log header is not valid", K(ret), K(base_header)); + } else if (OB_FAIL(tablet_id.deserialize(buffer, buffer_size, new_pos))) { + LOG_WARN("fail to deserialize tablet id", K(ret), K(buffer_size), K(pos), K(tablet_id)); + } else if (OB_FAIL(ls_->replay_get_tablet(tablet_id, scn, handle))) { + if (OB_TABLET_NOT_EXIST == ret) { + LOG_INFO("tablet not exist", K(ret), K(tablet_id)); + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get tablet", K(ret), K(tablet_id)); + } + } else if (OB_FAIL(handle.get_obj()->replay_medium_compaction_clog(scn, buffer, buffer_size, new_pos))) { + LOG_WARN("failed to replay medium compaction clog", K(ret), K(tablet_id), K(buffer_size), K(new_pos)); + } + return ret; +} + +} // namespace storage +} // namespace oceanbase diff --git a/src/storage/ls/ob_ls_storage_clog_handler.h b/src/storage/ls/ob_ls_storage_clog_handler.h new file mode 100644 index 0000000000..4544e918ce --- /dev/null +++ b/src/storage/ls/ob_ls_storage_clog_handler.h @@ -0,0 +1,109 @@ +//Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. + +#ifndef OB_STORAGE_STORAGE_LS_STORAGE_CLOG_HANDLER_H_ +#define OB_STORAGE_STORAGE_LS_STORAGE_CLOG_HANDLER_H_ + +#include "logservice/ob_log_base_header.h" +#include "share/scn.h" +namespace oceanbase +{ +namespace storage +{ +class ObLS; + +class ObLSStorageClogHandler : public logservice::ObIReplaySubHandler, + public logservice::ObIRoleChangeSubHandler, + public logservice::ObICheckpointSubHandler +{ +public: + ObLSStorageClogHandler() : is_inited_(false), ls_(nullptr) {} + virtual ~ObLSStorageClogHandler() { reset(); } + +public: + int init(ObLS *ls); + void reset(); + + // for replay + int replay( + const void *buffer, + const int64_t nbytes, + const palf::LSN &lsn, + const share::SCN &scn) override final; + + // for role change + void switch_to_follower_forcedly() override final + { + } + int switch_to_leader() override final + { + return OB_SUCCESS; + } + int switch_to_follower_gracefully() override final + { + return OB_SUCCESS; + } + int resume_leader() override final + { + return OB_SUCCESS; + } + + // for checkpoint + int flush(share::SCN &rec_scn) override final + { + UNUSED(rec_scn); + return OB_SUCCESS; + } + share::SCN get_rec_scn() override final + { + return share::SCN::max_scn(); + } + +private: + virtual int inner_replay( + const logservice::ObLogBaseHeader &base_header, + const share::SCN &scn, + const char *buffer, + const int64_t buffer_size, + int64_t &pos) = 0; + + bool is_inited_; + +protected: + ObLS *ls_; +}; + +class ObLSResvSnapClogHandler : public ObLSStorageClogHandler +{ +protected: + virtual int inner_replay( + const logservice::ObLogBaseHeader &base_header, + const share::SCN &scn, + const char *buffer, + const int64_t buffer_size, + int64_t &pos) override final; +}; + +class ObMediumCompactionClogHandler : public ObLSStorageClogHandler +{ +protected: + virtual int inner_replay( + const logservice::ObLogBaseHeader &base_header, + const share::SCN &scn, + const char *buffer, + const int64_t buffer_size, + int64_t &pos) override final; +}; + + +} // namespace storage +} // namespace oceanbase + +#endif diff --git a/src/storage/ls/ob_ls_tablet_service.cpp b/src/storage/ls/ob_ls_tablet_service.cpp index 001fc65774..54438fea1d 100644 --- a/src/storage/ls/ob_ls_tablet_service.cpp +++ b/src/storage/ls/ob_ls_tablet_service.cpp @@ -648,6 +648,94 @@ int ObLSTabletService::get_tablet_addr(const ObTabletMapKey &key, ObMetaDiskAddr return ret; } +int ObLSTabletService::replay_update_storage_schema( + const share::SCN &scn, + const char *buf, + const int64_t buf_size, + const int64_t pos) +{ + int ret = OB_SUCCESS; + ObTabletID tablet_id; + ObTabletHandle handle; + int64_t new_pos = pos; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (OB_UNLIKELY(buf_size <= pos || pos < 0 || buf_size <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(buf_size), K(pos)); + } else if (OB_FAIL(tablet_id.deserialize(buf, buf_size, new_pos))) { + LOG_WARN("fail to deserialize tablet id", K(ret), K(buf_size), K(pos), K(tablet_id)); + } else if (OB_FAIL(direct_get_tablet(tablet_id, handle))) { + if (OB_TABLET_NOT_EXIST == ret) { + LOG_INFO("tablet not exist", K(ret), K(tablet_id)); + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get tablet", K(ret), K(tablet_id)); + } + } else if (OB_FAIL(handle.get_obj()->replay_update_storage_schema(scn, buf, buf_size, new_pos))) { + LOG_WARN("update tablet storage schema fail", K(ret), K(tablet_id), K(buf_size), K(new_pos)); + } + return ret; +} + +int ObLSTabletService::replay_medium_compaction_clog( + const share::SCN &scn, + const char *buf, + const int64_t buf_size, + const int64_t pos) +{ + int ret = OB_SUCCESS; + ObTabletID tablet_id; + ObTabletHandle handle; + int64_t new_pos = pos; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (OB_UNLIKELY(buf_size <= pos || pos < 0 || buf_size <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(buf_size), K(pos)); + } else if (OB_FAIL(tablet_id.deserialize(buf, buf_size, new_pos))) { + LOG_WARN("fail to deserialize tablet id", K(ret), K(buf_size), K(pos), K(tablet_id)); + } else if (OB_FAIL(direct_get_tablet(tablet_id, handle))) { + if (OB_TABLET_NOT_EXIST == ret) { + LOG_INFO("tablet not exist", K(ret), K(tablet_id)); + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get tablet", K(ret), K(tablet_id)); + } + } else if (OB_FAIL(handle.get_obj()->replay_medium_compaction_clog(scn, buf, buf_size, new_pos))) { + LOG_WARN("update tablet storage schema fail", K(ret), K(tablet_id), K(buf_size), K(new_pos)); + } + return ret; +} + +int ObLSTabletService::replay_update_reserved_snapshot( + const share::SCN &scn, + const char *buf, + const int64_t buf_size, + const int64_t pos) +{ + int ret = OB_SUCCESS; + int64_t new_pos = pos; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (OB_UNLIKELY(buf_size <= pos || pos < 0 || buf_size <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(buf_size), K(pos)); + } else if (OB_ISNULL(ls_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("ls is null", K(ret), KPC(ls_)); + } else if (OB_FAIL(ls_->replay_reserved_snapshot_log(scn, buf, buf_size, new_pos))) { + LOG_WARN("replay reserved snapshot log fail", K(ret), KPC(ls_), K(buf_size), K(new_pos)); + } + return ret; +} + /** int ObLSTabletService::report_update_tablet( const ObTabletHandle *old_tablet_handle, @@ -1163,6 +1251,58 @@ int ObLSTabletService::update_tablet_table_store( return ret; } +int ObLSTabletService::update_medium_compaction_info( + const common::ObTabletID &tablet_id, + ObTabletHandle &handle) +{ + int ret = OB_SUCCESS; + ObTabletHandle old_tablet_handle; + ObTimeGuard time_guard("UpdateTableStore", 3000000/*3 seconds*/); + ObBucketHashWLockGuard lock_guard(bucket_lock_, tablet_id.hash()); + time_guard.click("Lock"); + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret), K_(is_inited)); + } else if (OB_UNLIKELY(!tablet_id.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), K(tablet_id)); + } else if (OB_FAIL(direct_get_tablet(tablet_id, old_tablet_handle))) { + LOG_WARN("failed to check and get tablet", K(ret), K(tablet_id)); + } else { + ObTabletHandle new_tablet_handle; + ObTablet *new_tablet = nullptr; + ObTablet *old_tablet = old_tablet_handle.get_obj(); + const share::ObLSID &ls_id = ls_->get_ls_id(); + const ObTabletMapKey key(ls_id, tablet_id); + ObMetaDiskAddr disk_addr; + ObTenantMetaMemMgr *t3m = MTL(ObTenantMetaMemMgr*); + + if (OB_FAIL(ObTabletCreateDeleteHelper::acquire_tablet(key, new_tablet_handle))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_TABLET_NOT_EXIST; + } else { + LOG_WARN("failed to acquire tablet", K(ret), K(key)); + } + } else if (FALSE_IT(new_tablet = new_tablet_handle.get_obj())) { + } else if (OB_FAIL(new_tablet->init_with_update_medium_info(*old_tablet))) { + LOG_WARN("failed to init tablet", K(ret), KPC(old_tablet)); + } else if (FALSE_IT(time_guard.click("InitNew"))) { + } else if (OB_FAIL(ObTabletSlogHelper::write_create_tablet_slog(new_tablet_handle, disk_addr))) { + LOG_WARN("fail to write update tablet slog", K(ret), K(new_tablet_handle), K(disk_addr)); + } else if (FALSE_IT(time_guard.click("WrSlog"))) { + } else if (OB_FAIL(t3m->compare_and_swap_tablet(key, + disk_addr, old_tablet_handle, new_tablet_handle))) { + LOG_WARN("failed to compare and swap tablet", K(ret), K(key), K(disk_addr), K(old_tablet_handle)); + } else if (FALSE_IT(time_guard.click("CASwap"))) { + } else { + handle = new_tablet_handle; + } + } + + return ret; +} + int ObLSTabletService::choose_msd( const ObUpdateTableStoreParam ¶m, const ObTablet &old_tablet, @@ -1463,7 +1603,10 @@ int ObLSTabletService::replay_create_tablet( } else if (OB_FAIL(new_tablet->deserialize(allocator, buf, buf_len, pos))) { LOG_WARN("fail to deserialize tablet", K(ret), K(buf), K(buf_len), K(pos)); } else if (OB_FAIL(new_tablet->init_shared_params(ls_id, tablet_id, - new_tablet->get_tablet_meta().max_sync_storage_schema_version_, freezer))) { + new_tablet->get_tablet_meta().max_sync_storage_schema_version_, + new_tablet->get_tablet_meta().max_serialized_medium_scn_, + new_tablet->get_tablet_meta().compat_mode_, + freezer))) { LOG_WARN("failed to init shared params", K(ret), K(ls_id), K(tablet_id)); } else if (OB_FAIL(refresh_tablet_addr(ls_id, tablet_id, disk_addr, new_tablet_handle))) { LOG_WARN("failed to refresh tablet addr", K(ret), K(ls_id), K(tablet_id), K(disk_addr)); @@ -5705,6 +5848,8 @@ int ObLSTabletService::GetAllTabletIDOperator::operator()(const common::ObTablet if (OB_UNLIKELY(!tablet_id.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid arguments", K(ret), K(tablet_id)); + } else if (except_ls_inner_tablet_ && tablet_id.is_ls_inner_tablet()) { + // do nothing } else if (OB_FAIL(tablet_ids_.push_back(tablet_id))) { LOG_WARN("failed to push back tablet id", K(ret), K(tablet_id)); } @@ -5783,6 +5928,17 @@ void ObLSTabletService::AllowToReadMgr::check_read_info_same( is_same = read_info == current_read_info; } +int ObLSTabletService::get_all_tablet_ids( + const bool except_ls_inner_tablet, + common::ObIArray &tablet_id_array) +{ + int ret = OB_SUCCESS; + GetAllTabletIDOperator op(tablet_id_array, except_ls_inner_tablet); + if (OB_FAIL(tablet_id_set_.foreach(op))) { + LOG_WARN("failed to traverse tablet id set", K(ret)); + } + return ret; +} } // namespace storage } // namespace oceanbase diff --git a/src/storage/ls/ob_ls_tablet_service.h b/src/storage/ls/ob_ls_tablet_service.h index 73b203d69a..f36dae6630 100644 --- a/src/storage/ls/ob_ls_tablet_service.h +++ b/src/storage/ls/ob_ls_tablet_service.h @@ -240,11 +240,31 @@ public: int get_bf_optimal_prefix(int64_t &prefix); int64_t get_tablet_count() const; + // clog replay + int replay_update_storage_schema( + const share::SCN &scn, + const char *buf, + const int64_t buf_size, + const int64_t pos); + int replay_medium_compaction_clog( + const share::SCN &scn, + const char *buf, + const int64_t buf_size, + const int64_t pos); + int replay_update_reserved_snapshot( + const share::SCN &scn, + const char *buf, + const int64_t buf_size, + const int64_t pos); + // update tablet int update_tablet_table_store( const common::ObTabletID &tablet_id, const ObUpdateTableStoreParam ¶m, ObTabletHandle &handle); + int update_medium_compaction_info( + const common::ObTabletID &tablet_id, + ObTabletHandle &handle); int update_tablet_table_store( // only for small sstables defragmentation const ObTabletHandle &old_tablet_handle, const ObIArray &table_handles); @@ -378,6 +398,7 @@ public: const ObBatchUpdateTableStoreParam ¶m); void enable_to_read(); void disable_to_read(); + int get_all_tablet_ids(const bool except_ls_inner_tablet, common::ObIArray &tablet_id_array); protected: virtual int prepare_dml_running_ctx( @@ -440,11 +461,13 @@ private: class GetAllTabletIDOperator final { public: - explicit GetAllTabletIDOperator(common::ObIArray &tablet_ids) - : tablet_ids_(tablet_ids) {} + explicit GetAllTabletIDOperator(common::ObIArray &tablet_ids, + const bool except_ls_inner_tablet = false) + : except_ls_inner_tablet_(except_ls_inner_tablet), tablet_ids_(tablet_ids) {} ~GetAllTabletIDOperator() = default; int operator()(const common::ObTabletID &tablet_id); private: + bool except_ls_inner_tablet_; common::ObIArray &tablet_ids_; }; class DestroyMemtableAndMemberOperator final diff --git a/src/storage/memtable/ob_memtable.cpp b/src/storage/memtable/ob_memtable.cpp index f5926149e6..271f47c65c 100644 --- a/src/storage/memtable/ob_memtable.cpp +++ b/src/storage/memtable/ob_memtable.cpp @@ -2131,7 +2131,8 @@ bool ObMemtable::is_partition_memtable_empty(const uint64_t table_id) const int ObMemtable::get_multi_source_data_unit( ObIMultiSourceDataUnit *const multi_source_data_unit, - ObIAllocator *allocator) + ObIAllocator *allocator, + const bool get_lastest) { int ret = OB_SUCCESS; TCRLockGuard guard(multi_source_data_lock_); @@ -2142,7 +2143,7 @@ int ObMemtable::get_multi_source_data_unit( } else if (OB_UNLIKELY(!multi_source_data_.is_valid())) { ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "multi source data is invalid", K(ret)); - } else if (OB_FAIL(multi_source_data_.get_multi_source_data_unit(multi_source_data_unit, allocator))) { + } else if (OB_FAIL(multi_source_data_.get_multi_source_data_unit(multi_source_data_unit, allocator, get_lastest))) { if (ret != OB_ENTRY_NOT_EXIST) { TRANS_LOG(WARN, "fail to get multi source data unit", K(ret)); } else { diff --git a/src/storage/memtable/ob_memtable.h b/src/storage/memtable/ob_memtable.h index 9726d0b508..76472d93c1 100644 --- a/src/storage/memtable/ob_memtable.h +++ b/src/storage/memtable/ob_memtable.h @@ -25,6 +25,7 @@ #include "storage/memtable/ob_row_compactor.h" #include "storage/memtable/ob_multi_source_data.h" #include "storage/checkpoint/ob_freeze_checkpoint.h" +#include "storage/compaction/ob_medium_compaction_mgr.h" namespace oceanbase { @@ -437,7 +438,15 @@ public: int resolve_right_boundary_for_migration(); /* multi source data operations */ - virtual int get_multi_source_data_unit(ObIMultiSourceDataUnit *multi_source_data_unit, ObIAllocator *allocator); + virtual int get_multi_source_data_unit( + ObIMultiSourceDataUnit *multi_source_data_unit, + ObIAllocator *allocator, + const bool get_lastest = true); + template + int get_multi_source_data_unit_list( + const T * const useless_unit, + ObMultiSourceData::ObIMultiSourceDataUnitList &dst_list, + ObIAllocator *allocator); bool has_multi_source_data_unit(const MultiSourceDataUnitType type) const; template @@ -616,6 +625,28 @@ int ObMemtable::save_multi_source_data_unit(const T *const multi_source_data_uni return ret; } +template +int ObMemtable::get_multi_source_data_unit_list( + const T * const useless_unit, + ObMultiSourceData::ObIMultiSourceDataUnitList &dst_list, + ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + TCRLockGuard guard(multi_source_data_lock_); + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + TRANS_LOG(WARN, "not inited", K(ret)); + } else if (OB_UNLIKELY(!multi_source_data_.is_valid())) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "multi source data is invalid", K(ret)); + } else if (OB_FAIL(multi_source_data_.get_multi_source_data_unit_list(useless_unit, dst_list, allocator))) { + TRANS_LOG(WARN, "fail to get multi source data unit", K(ret)); + } + + return ret; +} + typedef ObMemtable ObMemStore; /* diff --git a/src/storage/memtable/ob_multi_source_data.cpp b/src/storage/memtable/ob_multi_source_data.cpp index a31c48dc47..e28c820850 100644 --- a/src/storage/memtable/ob_multi_source_data.cpp +++ b/src/storage/memtable/ob_multi_source_data.cpp @@ -72,7 +72,10 @@ bool ObMultiSourceData::has_multi_source_data_unit(const MultiSourceDataUnitType return bret; } -int ObMultiSourceData::get_multi_source_data_unit(ObIMultiSourceDataUnit *const dst, ObIAllocator *allocator) +int ObMultiSourceData::get_multi_source_data_unit( + ObIMultiSourceDataUnit *const dst, + ObIAllocator *allocator, + bool get_lastest) { int ret = OB_SUCCESS; const int pos = static_cast(dst->type()); @@ -88,10 +91,19 @@ int ObMultiSourceData::get_multi_source_data_unit(ObIMultiSourceDataUnit *const ret = OB_INVALID_ARGUMENT; TRANS_LOG(WARN, "wrong unit type", K(ret), K(list_pos)); } - DLIST_FOREACH_BACKWARD_X(item, unit_list_array_[list_pos], OB_SUCC(ret)) { - if (item->is_sync_finish()) { - src = item; - break; + if (get_lastest) { + DLIST_FOREACH_BACKWARD_X(item, unit_list_array_[list_pos], OB_SUCC(ret)) { + if (item->is_sync_finish()) { + src = item; + break; + } + } + } else { + DLIST_FOREACH_X(item, unit_list_array_[list_pos], OB_SUCC(ret)) { + if (item->is_sync_finish()) { + src = item; + break; + } } } if (nullptr == src) { @@ -164,6 +176,7 @@ int ObMultiSourceData::inner_mark_unit_sync_finish( KPC(last_item)); } else { last_item->set_sync_finish(true); + if (save_last_flag) { (void)inner_release_rest_unit_data(list_pos, unit_version); } diff --git a/src/storage/memtable/ob_multi_source_data.h b/src/storage/memtable/ob_multi_source_data.h index 0365cf03bf..333bc0de4d 100644 --- a/src/storage/memtable/ob_multi_source_data.h +++ b/src/storage/memtable/ob_multi_source_data.h @@ -36,6 +36,7 @@ enum class MultiSourceDataUnitType TABLET_SEQ = 2, // unit list type STORAGE_SCHEMA = 3, + MEDIUM_COMPACTION_INFO = 4, MAX_TYPE }; @@ -112,7 +113,7 @@ private: class ObMultiSourceData { public: - typedef common::ObDList MultiSourceDataUnitList; + typedef common::ObDList ObIMultiSourceDataUnitList; ObMultiSourceData(common::ObIAllocator &allocator); ~ObMultiSourceData(); @@ -123,7 +124,15 @@ public: bool has_multi_source_data_unit(const MultiSourceDataUnitType type) const; - int get_multi_source_data_unit(ObIMultiSourceDataUnit *const dst, ObIAllocator *allocator); + int get_multi_source_data_unit( + ObIMultiSourceDataUnit *const dst, + ObIAllocator *allocator, + bool get_lastest = true); + template + int get_multi_source_data_unit_list( + const T * const useless_unit, + ObIMultiSourceDataUnitList &dst_list, + ObIAllocator *allocator); template int save_multi_source_data_unit(const T *const src, bool is_callback); int update_unsync_cnt_for_multi_data(const MultiSourceDataUnitType multi_source_type, const bool is_inc); @@ -138,7 +147,7 @@ private: const int64_t list_pos, const int64_t unit_version); template - int deep_copy_data_unit(const T *const src, T *&dst); + int deep_copy_data_unit(const T *const src, T *&dst, ObIAllocator &allocator); template int save_multi_source_data_unit_in_list(const T *const src, bool is_callback); int free_unit_list(const int64_t list_pos); @@ -151,16 +160,16 @@ private: } common::ObIAllocator &allocator_; ObIMultiSourceDataUnit *units_[MAX_PTR_COUNT]; - MultiSourceDataUnitList unit_list_array_[MAX_LIST_COUNT]; + ObIMultiSourceDataUnitList unit_list_array_[MAX_LIST_COUNT]; }; template -int ObMultiSourceData::deep_copy_data_unit(const T *const src, T *&dst) +int ObMultiSourceData::deep_copy_data_unit(const T *const src, T *&dst, ObIAllocator &allocator) { int ret = OB_SUCCESS; dst = nullptr; void *buf = nullptr; - if (OB_ISNULL(buf = allocator_.alloc(src->get_data_size()))) { + if (OB_ISNULL(buf = allocator.alloc(src->get_data_size()))) { ret = common::OB_ALLOCATE_MEMORY_FAILED; TRANS_LOG(WARN, "fail to alloc memory", K(ret)); } else if (FALSE_IT(dst = new (buf) T())) { @@ -170,7 +179,7 @@ int ObMultiSourceData::deep_copy_data_unit(const T *const src, T *&dst) if (OB_FAIL(ret)) { if (nullptr != buf) { dst->~ObIMultiSourceDataUnit(); - allocator_.free(buf); + allocator.free(buf); dst = nullptr; } } @@ -189,7 +198,7 @@ int ObMultiSourceData::save_multi_source_data_unit_in_list(const T *const src, b ret = OB_ERR_UNEXPECTED; TRANS_LOG(WARN, "unexpected order", K(ret), K(list_pos), K(units_[list_pos]), KPC(dst)); } else { - if (OB_FAIL(deep_copy_data_unit(src, dst))) { + if (OB_FAIL(deep_copy_data_unit(src, dst, allocator_))) { TRANS_LOG(WARN, "failed to deep copy unit", K(ret), K(list_pos), KPC(src)); } else if (!unit_list_array_[list_pos].add_last(dst)) { ret = common::OB_ERR_UNEXPECTED; @@ -231,7 +240,7 @@ int ObMultiSourceData::save_multi_source_data_unit(const T *const src, bool is_c } else if (pos < MAX_PTR_COUNT) { if (!is_callback) { // overwrite data - if (OB_FAIL(deep_copy_data_unit(src, dst))) { + if (OB_FAIL(deep_copy_data_unit(src, dst, allocator_))) { TRANS_LOG(WARN, "fail to deep copy data unit", K(ret), KP(dst), KP(src), K(pos)); } else { ObIMultiSourceDataUnit *old_value = units_[pos]; @@ -255,6 +264,53 @@ int ObMultiSourceData::save_multi_source_data_unit(const T *const src, bool is_c return ret; } +template +int ObMultiSourceData::get_multi_source_data_unit_list( + const T * const useless_unit, + ObIMultiSourceDataUnitList &dst_list, + ObIAllocator *allocator) +{ + int ret = OB_SUCCESS; + int64_t type = 0; + int64_t list_pos = -1; + const int type_count = static_cast(MultiSourceDataUnitType::MAX_TYPE); + if (OB_UNLIKELY(nullptr == useless_unit + || FALSE_IT(type = (int64_t)useless_unit->type()) + || type < 0 || type >= type_count + || nullptr == allocator)) { + ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "invalid argument", K(ret), KPC(useless_unit), KP(allocator)); + } else if (!ObIMultiSourceDataUnit::is_unit_list(static_cast(type))) { + ret = OB_NOT_SUPPORTED; + TRANS_LOG(WARN, "not supported for cur data unit", K(ret), K(type)); + } else if (FALSE_IT(list_pos = get_unit_list_array_idx(type))) { + } else if (OB_UNLIKELY(list_pos < 0 || list_pos >= MAX_LIST_COUNT)) { + ret = OB_INVALID_ARGUMENT; + TRANS_LOG(WARN, "wrong unit type", K(ret), K(list_pos), K(type)); + } else { + T *dst = nullptr; + DLIST_FOREACH_X(item, unit_list_array_[list_pos], OB_SUCC(ret)) { + if (OB_UNLIKELY(!item->is_valid())) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "data unit is invalid", K(ret), KPC(item)); + } else if (item->is_sync_finish()) { + if (OB_FAIL(deep_copy_data_unit(static_cast(item), dst, *allocator))) { + TRANS_LOG(WARN, "failed to deep copy unit", K(ret), KPC(item)); + } else if (!dst_list.add_last(dst)) { + ret = OB_ERR_UNEXPECTED; + TRANS_LOG(WARN, "failed to add data unit into list", K(ret), KPC(dst), K(dst_list)); + } + if (OB_FAIL(ret) && nullptr != dst) { + dst->~ObIMultiSourceDataUnit(); + allocator->free(dst); + } + } + } + } + return ret; +} + + } // namespace memtable } // namespace oceanbase diff --git a/src/storage/ob_i_memtable_mgr.cpp b/src/storage/ob_i_memtable_mgr.cpp index e50d1475a8..716e6e92ff 100644 --- a/src/storage/ob_i_memtable_mgr.cpp +++ b/src/storage/ob_i_memtable_mgr.cpp @@ -224,14 +224,16 @@ int ObIMemtableMgr::init( const ObTabletID &tablet_id, const share::ObLSID &ls_id, const int64_t max_saved_schema_version, + const int64_t max_saved_medium_scn, + const lib::Worker::CompatMode compat_mode, logservice::ObLogHandler *log_handler, ObFreezer *freezer, ObTenantMetaMemMgr *t3m) { int ret = OB_SUCCESS; if (!tablet_id.is_special_merge_tablet() - && OB_FAIL(init_storage_schema_recorder(tablet_id, ls_id, max_saved_schema_version, log_handler))) { - TRANS_LOG(WARN, "failed to init schema recorder", K(ret), K(max_saved_schema_version), KP(log_handler)); + && OB_FAIL(init_storage_recorder(tablet_id, ls_id, max_saved_schema_version, max_saved_medium_scn, compat_mode, log_handler))) { + TRANS_LOG(WARN, "failed to init schema recorder", K(ret), K(max_saved_schema_version), K(max_saved_medium_scn), K(compat_mode), KP(log_handler)); } else { ret = init(tablet_id, ls_id, freezer, t3m); } diff --git a/src/storage/ob_i_memtable_mgr.h b/src/storage/ob_i_memtable_mgr.h index ea35fe48fa..f233998bb1 100644 --- a/src/storage/ob_i_memtable_mgr.h +++ b/src/storage/ob_i_memtable_mgr.h @@ -17,7 +17,6 @@ #include "lib/lock/ob_qsync_lock.h" #include "storage/ob_i_table.h" #include "storage/memtable/ob_multi_source_data.h" -#include "storage/ob_storage_schema_recorder.h" namespace oceanbase { @@ -213,6 +212,8 @@ public: const ObTabletID &tablet_id, const share::ObLSID &ls_id, const int64_t max_saved_schema_version, + const int64_t max_saved_medium_scn, + const lib::Worker::CompatMode compat_mode, logservice::ObLogHandler *log_handler, ObFreezer *freezer, ObTenantMetaMemMgr *t3m); @@ -273,19 +274,23 @@ public: destroy(); ATOMIC_STORE(&ref_cnt_, 0); } - virtual int init_storage_schema_recorder( + virtual int init_storage_recorder( const ObTabletID &tablet_id, const share::ObLSID &ls_id, const int64_t max_saved_schema_version, + const int64_t max_saved_medium_scn, + const lib::Worker::CompatMode compat_mode, logservice::ObLogHandler *log_handler) { // do nothing UNUSED(tablet_id); UNUSED(ls_id); - UNUSED(max_saved_schema_version), + UNUSED(max_saved_schema_version); + UNUSED(max_saved_medium_scn); + UNUSED(compat_mode); UNUSED(log_handler); return OB_NOT_SUPPORTED; } - virtual int reset_storage_schema_recorder() + virtual int reset_storage_recorder() { // do nothing return OB_NOT_SUPPORTED; } diff --git a/src/storage/ob_i_store.cpp b/src/storage/ob_i_store.cpp index 0213864afe..1255116b8d 100644 --- a/src/storage/ob_i_store.cpp +++ b/src/storage/ob_i_store.cpp @@ -74,6 +74,7 @@ void ObStoreCtx::reset() table_version_ = INT64_MAX; timeout_ = -1; mvcc_acc_ctx_.reset(); + tablet_stat_.reset(); replay_log_scn_.set_max(); } @@ -317,28 +318,5 @@ int ObLockRowChecker::check_lock_row_valid( return ret; } -const char * ObMergeTypeStr[] = { - "MINI_MINOR_MERGE", - "BUF_MINOR_MERGE", - "HISTORY_MINI_MINOR_MERGE", - "MINI_MERGE", - "MAJOR_MERGE", - "MINOR_MERGE", - "DDL_KV_MERGE", - "BACKFILL_TX_MERGE" -}; - -const char *merge_type_to_str(const ObMergeType &merge_type) -{ - STATIC_ASSERT(static_cast(MERGE_TYPE_MAX) == ARRAYSIZEOF(ObMergeTypeStr), "merge type str len is mismatch"); - const char *str = ""; - if (merge_type >= MERGE_TYPE_MAX || merge_type < MINI_MINOR_MERGE) { - str = "invalid_merge_type"; - } else { - str = ObMergeTypeStr[merge_type]; - } - return str; -} - } } diff --git a/src/storage/ob_i_store.h b/src/storage/ob_i_store.h index 49f28d14ec..2bebc818d5 100644 --- a/src/storage/ob_i_store.h +++ b/src/storage/ob_i_store.h @@ -31,6 +31,7 @@ MULTI_VERSION_EXTRA_ROWKEY_DEF(MAX_EXTRA_ROWKEY, 0, NULL, NULL) #include "storage/blocksstable/ob_datum_rowkey.h" #include "storage/ob_table_store_stat_mgr.h" #include "storage/memtable/mvcc/ob_mvcc_acc_ctx.h" +#include "storage/ob_tenant_tablet_stat_mgr.h" namespace oceanbase { @@ -80,22 +81,6 @@ public: } }; -enum ObMergeType -{ - INVALID_MERGE_TYPE = -1, - MINI_MINOR_MERGE = 0, // mini minor merge, compaction several mini sstable into one larger mini sstable - BUF_MINOR_MERGE = 1, - HISTORY_MINI_MINOR_MERGE = 2, - MINI_MERGE = 3, // mini merge, only flush memtable - MAJOR_MERGE = 4, - MINOR_MERGE = 5, - DDL_KV_MERGE = 6, - BACKFILL_TX_MERGE = 7, - MERGE_TYPE_MAX, -}; - -const char *merge_type_to_str(const ObMergeType &merge_type); - enum ObMergeLevel { MACRO_BLOCK_MERGE_LEVEL = 0, @@ -434,6 +419,7 @@ struct ObStoreCtx KP_(table_iter), K_(table_version), K_(mvcc_acc_ctx), + K_(tablet_stat), K_(replay_log_scn)); share::ObLSID ls_id_; storage::ObLS *ls_; // for performance opt @@ -442,6 +428,7 @@ struct ObStoreCtx int64_t table_version_; // used to update memtable's max_schema_version int64_t timeout_; memtable::ObMvccAccessCtx mvcc_acc_ctx_; // all txn relative context + storage::ObTabletStat tablet_stat_; // used for collecting query statistics share::SCN replay_log_scn_; // used in replay pass log_ts }; @@ -490,38 +477,6 @@ OB_INLINE bool ObStoreRow::is_valid() const return bool_ret; } - -OB_INLINE bool is_major_merge(const ObMergeType &merge_type) -{ - return MAJOR_MERGE == merge_type; -} -OB_INLINE bool is_mini_merge(const ObMergeType &merge_type) -{ - return MINI_MERGE == merge_type; -} -OB_INLINE bool is_mini_minor_merge(const ObMergeType &merge_type) -{ - return MINOR_MERGE == merge_type || MINI_MINOR_MERGE == merge_type || HISTORY_MINI_MINOR_MERGE == merge_type; -} -OB_INLINE bool is_multi_version_minor_merge(const ObMergeType &merge_type) -{ - return MINOR_MERGE == merge_type || MINI_MERGE == merge_type || MINI_MINOR_MERGE == merge_type - || HISTORY_MINI_MINOR_MERGE == merge_type || BACKFILL_TX_MERGE == merge_type; -} -OB_INLINE bool is_history_mini_minor_merge(const ObMergeType &merge_type) -{ - return HISTORY_MINI_MINOR_MERGE == merge_type; -} -OB_INLINE bool is_buf_minor_merge(const ObMergeType &merge_type) -{ - return BUF_MINOR_MERGE == merge_type; -} - -OB_INLINE bool is_backfill_tx_merge(const ObMergeType &merge_type) -{ - return BACKFILL_TX_MERGE == merge_type; -} - } // storage } // oceanbase #endif // OCEANBASE_STORAGE_I_OB_STORE_H_ diff --git a/src/storage/ob_i_table.cpp b/src/storage/ob_i_table.cpp index 76cda204d1..983573cbe8 100644 --- a/src/storage/ob_i_table.cpp +++ b/src/storage/ob_i_table.cpp @@ -61,7 +61,7 @@ const char* ObITable::table_type_name_[] = "MAJOR", "MINOR", "MINI", - "BUF_MINOR", + "META_MAJOR", "DDL_DUMP", "REMOTE_LOGICAL_MINOR", }; @@ -675,7 +675,7 @@ int ObTablesHandleArray::check_continues(const share::ObScnRange *scn_range) con if (!tables_.empty()) { // 1:check major sstable - // there can only be one major or buf minor + // there can only be one major or meta merge const ObITable *last_table = nullptr; const ObITable *table = nullptr; SCN base_end_scn = SCN::min_scn(); @@ -683,8 +683,8 @@ int ObTablesHandleArray::check_continues(const share::ObScnRange *scn_range) con if (OB_ISNULL(table = tables_.at(i))) { ret = OB_ERR_SYS; LOG_WARN("table is NULL", KPC(table)); - } else if (table->is_major_sstable() || table->is_buf_minor_sstable()) { - base_end_scn = table->is_buf_minor_sstable() ? table->get_end_scn() : SCN::min_scn(); + } else if (table->is_major_sstable() || table->is_meta_major_sstable()) { + base_end_scn = table->is_meta_major_sstable() ? table->get_end_scn() : SCN::min_scn(); i++; } // 2:check minor sstable @@ -693,9 +693,9 @@ int ObTablesHandleArray::check_continues(const share::ObScnRange *scn_range) con if (OB_ISNULL(table)) { ret = OB_ERR_SYS; LOG_WARN("table is NULL", KPC(table)); - } else if (table->is_major_sstable() || table->is_buf_minor_sstable()) { + } else if (table->is_major_sstable() || table->is_meta_major_sstable()) { ret = OB_ERR_SYS; - LOG_WARN("major sstable or buf minor should be first", K(ret), K(i), K(table)); + LOG_WARN("major sstable or meta merge should be first", K(ret), K(i), K(table)); } else if (OB_ISNULL(last_table)) { // first table if (OB_NOT_NULL(scn_range) && table->get_start_scn() > scn_range->start_scn_) { diff --git a/src/storage/ob_i_table.h b/src/storage/ob_i_table.h index 414b316c29..e034356f05 100644 --- a/src/storage/ob_i_table.h +++ b/src/storage/ob_i_table.h @@ -68,6 +68,7 @@ struct ObTableAccessParam; struct ObTableAccessContext; struct ObRowsInfo; class ObStoreRowIterator; +struct ObStoreCtx; class ObITable { @@ -86,7 +87,7 @@ public: MAJOR_SSTABLE = 10, MINOR_SSTABLE = 11, MINI_SSTABLE = 12, - BUF_MINOR_SSTABLE = 13, + META_MAJOR_SSTABLE = 13, KV_DUMP_SSTABLE = 14, REMOTE_LOGICAL_MINOR_SSTABLE = 15, // < add new sstable before here, See is_sstable() @@ -117,7 +118,7 @@ public: OB_INLINE bool is_minor_sstable() const { return ObITable::is_minor_sstable(table_type_); } OB_INLINE bool is_mini_sstable() const { return ObITable::is_mini_sstable(table_type_); } OB_INLINE bool is_major_sstable() const { return ObITable::is_major_sstable(table_type_); } - OB_INLINE bool is_buf_minor_sstable() const { return ObITable::is_buf_minor_sstable(table_type_); } + OB_INLINE bool is_meta_major_sstable() const { return ObITable::is_meta_major_sstable(table_type_); } OB_INLINE bool is_multi_version_table() const { return ObITable::is_multi_version_table(table_type_); } OB_INLINE bool is_ddl_sstable() const { return ObITable::is_ddl_sstable(table_type_); } OB_INLINE bool is_table_with_scn_range() const { return ObITable::is_table_with_scn_range(table_type_); } @@ -128,7 +129,7 @@ public: OB_INLINE share::SCN get_end_scn() const { return scn_range_.end_scn_; } OB_INLINE int64_t get_snapshot_version() const { - OB_ASSERT(is_major_sstable()); + OB_ASSERT(is_major_sstable() || is_meta_major_sstable()); return version_range_.snapshot_version_; } OB_INLINE TableKey& operator=(const TableKey &key) @@ -226,7 +227,7 @@ public: virtual bool is_lock_memtable() const { return is_lock_memtable(key_.table_type_); } virtual bool is_frozen_memtable() const { return false; } virtual bool is_active_memtable() const { return false; } - virtual bool is_buf_minor_sstable() const { return is_buf_minor_sstable(key_.table_type_); } + virtual bool is_meta_major_sstable() const { return is_meta_major_sstable(key_.table_type_); } OB_INLINE bool is_table_with_scn_range() const { return is_table_with_scn_range(key_.table_type_); } virtual OB_INLINE int64_t get_timestamp() const { return 0; } virtual bool is_ddl_sstable() const { return is_ddl_sstable(key_.table_type_); } @@ -245,7 +246,6 @@ public: { return ObITable::TableType::MINOR_SSTABLE == table_type || ObITable::TableType::MINI_SSTABLE == table_type - || ObITable::TableType::BUF_MINOR_SSTABLE == table_type || ObITable::TableType::REMOTE_LOGICAL_MINOR_SSTABLE == table_type; } static bool is_multi_version_minor_sstable(const TableType table_type) @@ -308,9 +308,9 @@ public: return ObITable::TableType::LOCK_MEMTABLE == table_type; } - static bool is_buf_minor_sstable(const TableType table_type) + static bool is_meta_major_sstable(const TableType table_type) { - return ObITable::TableType::BUF_MINOR_SSTABLE == table_type; + return ObITable::TableType::META_MAJOR_SSTABLE == table_type; } static bool is_ddl_sstable(const TableType table_type) { @@ -318,7 +318,7 @@ public: } static bool is_table_with_scn_range(const TableType table_type) { - return is_multi_version_table(table_type) || is_buf_minor_sstable(table_type); + return is_multi_version_table(table_type) || is_meta_major_sstable(table_type); } OB_INLINE static const char* get_table_type_name(const TableType &table_type) { diff --git a/src/storage/ob_partition_range_spliter.cpp b/src/storage/ob_partition_range_spliter.cpp index 1aaf6e07e6..c7082c4a1f 100644 --- a/src/storage/ob_partition_range_spliter.cpp +++ b/src/storage/ob_partition_range_spliter.cpp @@ -697,6 +697,7 @@ int ObPartitionRangeSpliter::get_range_split_info(ObIArray &tables, } else { // build range paras range_info.store_range_ = &store_range; + range_info.tables_ = &tables; bool is_sstable = false; int64_t size = 0; int64_t macro_block_cnt = 0; @@ -724,7 +725,6 @@ int ObPartitionRangeSpliter::get_range_split_info(ObIArray &tables, *range_info.store_range_, index_read_info, table, size, macro_block_cnt))) { STORAGE_LOG(WARN, "Failed to get single range info", K(ret), K(i), KPC(table)); } else { - range_info.tables_ = &tables; range_info.total_size_ += size; range_info.index_read_info_ = &index_read_info; range_info.max_macro_block_count_ = MAX(macro_block_cnt, range_info.max_macro_block_count_); @@ -1575,13 +1575,13 @@ int ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::get_next_row( int ObPartitionIncrementalRangeSpliter::ObIncrementalIterator::prepare_table_access_param() { int ret = OB_SUCCESS; - const ObMergeSchema *merge_schema = merge_ctx_.schema_ctx_.merge_schema_; - if (OB_FAIL(merge_schema->get_rowkey_column_ids(rowkey_col_ids_))) { + const ObStorageSchema *storage_schema = merge_ctx_.get_schema(); + if (OB_FAIL(storage_schema->get_rowkey_column_ids(rowkey_col_ids_))) { STORAGE_LOG(WARN, "Failed to get rowkey column ids", KR(ret)); } else if (OB_FAIL(ObMultiVersionRowkeyHelpper::add_extra_rowkey_cols(rowkey_col_ids_))) { STORAGE_LOG(WARN, "failed to add extra rowkey cols", KR(ret)); - } else if (OB_FAIL(tbl_read_info_.init(allocator_, merge_schema->get_column_count(), - merge_schema->get_rowkey_column_num(), + } else if (OB_FAIL(tbl_read_info_.init(allocator_, storage_schema->get_column_count(), + storage_schema->get_rowkey_column_num(), lib::is_oracle_mode(), rowkey_col_ids_, true))) { STORAGE_LOG(WARN, "Failed to init columns info", KR(ret)); } else if (OB_FAIL(tbl_xs_param_.init_merge_param( @@ -1709,7 +1709,7 @@ int ObPartitionIncrementalRangeSpliter::init(compaction::ObTabletMergeCtx &merge merge_ctx_ = &merge_ctx; allocator_ = &allocator; major_sstable_ = static_cast(merge_ctx.tables_handle_.get_table(0)); - tablet_size_ = merge_ctx.schema_ctx_.merge_schema_->get_tablet_size(); + tablet_size_ = merge_ctx.get_schema()->get_tablet_size(); if (OB_UNLIKELY(tablet_size_ < 0)) { ret = OB_INVALID_ARGUMENT; STORAGE_LOG(WARN, "invalid argument tablet size", KR(ret), K_(tablet_size)); @@ -1812,8 +1812,7 @@ int ObPartitionIncrementalRangeSpliter::check_is_incremental(bool &is_incrementa int cmp_ret = 0; ObDatumRowkey row_rowkey; ObDatumRowkey end_rowkey; - const int64_t rowkey_column_num = - merge_ctx_->schema_ctx_.merge_schema_->get_rowkey_column_num(); + const int64_t rowkey_column_num = merge_ctx_->get_schema()->get_rowkey_column_num(); const ObStorageDatumUtils &datum_utils = merge_ctx_->tablet_handle_.get_obj()->get_index_read_info().get_datum_utils(); if (OB_FAIL(row_rowkey.assign(row->storage_datums_, rowkey_column_num))) { @@ -1911,8 +1910,7 @@ int ObPartitionIncrementalRangeSpliter::get_ranges_by_inc_data(ObDatumRangeArray num_rows_per_range = default_noisy_row_num_skipped_; } - const int64_t rowkey_column_num = - merge_ctx_->schema_ctx_.merge_schema_->get_rowkey_column_num(); + const int64_t rowkey_column_num = merge_ctx_->get_schema()->get_rowkey_column_num(); int64_t count = 0; const ObDatumRow *row = nullptr; ObDatumRowkey rowkey; diff --git a/src/storage/ob_sstable_struct.cpp b/src/storage/ob_sstable_struct.cpp index 4c8a0c9e88..0c4caed44f 100644 --- a/src/storage/ob_sstable_struct.cpp +++ b/src/storage/ob_sstable_struct.cpp @@ -220,11 +220,3 @@ void ObSSTableMergeInfo::dump_info(const char *msg) FLOG_INFO("dump merge info", K(msg), K(output_row_per_s), K(new_macro_KB_per_s), K(*this)); } -ObMergeChecksumInfo::ObMergeChecksumInfo() - : column_checksums_(NULL), - increment_column_checksums_(NULL), - concurrent_cnt_(0), - column_count_(0) -{ -} - diff --git a/src/storage/ob_sstable_struct.h b/src/storage/ob_sstable_struct.h index b0c28f133b..f35797b317 100644 --- a/src/storage/ob_sstable_struct.h +++ b/src/storage/ob_sstable_struct.h @@ -16,6 +16,7 @@ #include "blocksstable/ob_block_sstable_struct.h" #include "ob_i_table.h" #include "compaction/ob_i_compaction_filter.h" +#include "compaction/ob_compaction_util.h" namespace oceanbase { @@ -113,7 +114,7 @@ public: ~ObSSTableMergeInfo() = default; bool is_valid() const; int add(const ObSSTableMergeInfo &other); - OB_INLINE bool is_major_merge() const { return storage::is_major_merge(merge_type_); } + OB_INLINE bool is_major_merge_type() const { return storage::is_major_merge_type(merge_type_); } void dump_info(const char *msg); void reset(); TO_STRING_KV(K_(tenant_id), K_(ls_id), K_(tablet_id), K_(compaction_scn), @@ -157,17 +158,6 @@ public: char comment_[common::OB_COMPACTION_EVENT_STR_LENGTH]; }; -struct ObMergeChecksumInfo final -{ -public: - ObMergeChecksumInfo(); - ~ObMergeChecksumInfo() = default; - int64_t *column_checksums_; - int64_t **increment_column_checksums_; - int64_t concurrent_cnt_; - int64_t column_count_; -}; - } // end namespace storage } // end namespace oceanbase diff --git a/src/storage/ob_storage_clog_recorder.cpp b/src/storage/ob_storage_clog_recorder.cpp new file mode 100644 index 0000000000..7ec87f6015 --- /dev/null +++ b/src/storage/ob_storage_clog_recorder.cpp @@ -0,0 +1,316 @@ +//Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. +#define USING_LOG_PREFIX STORAGE +#include "ob_storage_clog_recorder.h" + +#include "lib/utility/ob_tracepoint.h" +#include "logservice/ob_log_base_header.h" +#include "logservice/ob_log_base_type.h" +#include "logservice/ob_log_handler.h" +#include "storage/tx_storage/ob_ls_handle.h" +#include "storage/tx_storage/ob_ls_service.h" + +namespace oceanbase +{ + +using namespace common; +using namespace clog; + +namespace storage +{ + +int ObIStorageClogRecorder::ObStorageCLogCb::on_success() +{ + int ret = OB_SUCCESS; + int64_t update_version = ATOMIC_LOAD(&update_version_); + bool finish_flag = false; + + LOG_DEBUG("clog succ callback", KPC(this)); + + if (OB_UNLIKELY(OB_INVALID_VERSION == update_version)) { + LOG_ERROR("table version is invalid", K(update_version)); + } else { + // clear table_version whether success or failed, make sure next time can update + ATOMIC_SET(&update_version_, OB_INVALID_VERSION); + WEAK_BARRIER(); + recorder_.clog_update_succ(update_version, finish_flag); + if (!finish_flag) { + LOG_ERROR("update failed", K(update_version_), K(finish_flag)); + recorder_.clog_update_fail(); + } + } + return ret; +} + +int ObIStorageClogRecorder::ObStorageCLogCb::on_failure() +{ + int ret = OB_SUCCESS; + LOG_INFO("clog failure callback", KPC(this)); + ATOMIC_SET(&update_version_, OB_INVALID_VERSION); + WEAK_BARRIER(); + recorder_.clog_update_fail(); + return ret; +} + +ObIStorageClogRecorder::ObIStorageClogRecorder() + : lock_(false), + logcb_finish_flag_(true), + logcb_ptr_(nullptr), + log_handler_(nullptr), + max_saved_version_(OB_INVALID_VERSION), + clog_scn_() +{ +} + +ObIStorageClogRecorder::~ObIStorageClogRecorder() +{ + destroy(); +} + +void ObIStorageClogRecorder::destroy() +{ + max_saved_version_ = OB_INVALID_VERSION; + lock_ = false; + logcb_finish_flag_ = true; + log_handler_ = NULL; + clog_scn_.set_min(); +} + +void ObIStorageClogRecorder::reset() +{ + wait_to_lock(OB_INVALID_VERSION); // lock + max_saved_version_ = 0; + ATOMIC_STORE(&lock_, false); // unlock +} + +int ObIStorageClogRecorder::init( + const int64_t max_saved_version, + logservice::ObLogHandler *log_handler) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(max_saved_version < 0 || NULL == log_handler)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(max_saved_version), KP(log_handler)); + } else { + max_saved_version_ = max_saved_version; + log_handler_ = log_handler; + } + return ret; +} + +OB_INLINE void ObIStorageClogRecorder::wait_to_lock(const int64_t update_version) +{ + while (true) { + int64_t last_time = ObTimeUtility::fast_current_time(); + while (true == ATOMIC_LOAD(&lock_)) { + usleep(100); + if (ObTimeUtility::fast_current_time() + 100 * 1000 > last_time) { + last_time = ObTimeUtility::fast_current_time(); + LOG_DEBUG("waiting to lock", K(update_version), K(max_saved_version_), KPC(this)); + } + WEAK_BARRIER(); + } + + if (ATOMIC_BCAS(&lock_, false, true)) { // success to lock + break; + } + } // end of while +} + +OB_INLINE void ObIStorageClogRecorder::wait_for_logcb(const int64_t update_version) +{ + int64_t last_time = ObTimeUtility::fast_current_time(); + while (false == ATOMIC_LOAD(&logcb_finish_flag_)) { + if (ObTimeUtility::fast_current_time() + 100 * 1000 > last_time) { + last_time = ObTimeUtility::fast_current_time(); + LOG_DEBUG("waiting for clog callback", K(update_version), K(max_saved_version_), KPC(this)); + } + usleep(100); + WEAK_BARRIER(); + } +} + +int ObIStorageClogRecorder::try_update_with_lock( + const int64_t update_version, + const char *clog_buf, + const int64_t clog_len, + const int64_t expire_ts) +{ + int ret = OB_SUCCESS; + while ((OB_SUCC(ret) || OB_BLOCK_FROZEN == ret) + && update_version > ATOMIC_LOAD(&max_saved_version_)) { + logcb_ptr_->set_update_version(update_version); + if (OB_FAIL(submit_log(update_version, clog_buf, clog_len))) { + if (OB_BLOCK_FROZEN != ret) { + LOG_WARN("fail to submit log", K(ret), K(update_version), K(max_saved_version_)); + } else if (ObTimeUtility::fast_current_time() >= expire_ts) { + ret = OB_EAGAIN; + LOG_WARN("failed to sync clog", K(ret), K(update_version), + K(max_saved_version_), K(expire_ts)); + } + } else { + wait_for_logcb(update_version); // wait clog callback + } + WEAK_BARRIER(); + } // end of while + + return ret; +} + +int ObIStorageClogRecorder::try_update_for_leader( + const int64_t update_version, + ObIAllocator *allocator, + const int64_t timeout_ts) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(update_version < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("input version is invalid", K(ret), KPC(this), K(update_version)); + } else if (update_version > ATOMIC_LOAD(&max_saved_version_)) { + + wait_to_lock(update_version); // lock + const int64_t expire_ts = ObTimeUtility::fast_current_time() + timeout_ts; + int64_t cur_update_version = update_version; + char *clog_buf = nullptr; + int64_t clog_len = 0; + if (cur_update_version > ATOMIC_LOAD(&max_saved_version_)) { + // may change cur_update_version in prepare_struct_in_lock + if (OB_FAIL(prepare_struct_in_lock(cur_update_version, allocator, clog_buf, clog_len))) { + LOG_WARN("failed to get struct", K(ret), K(update_version)); + } else if (OB_UNLIKELY(cur_update_version < update_version)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("update version is smaller", K(ret), K(cur_update_version), K(update_version)); + } else if (OB_FAIL(try_update_with_lock(cur_update_version, clog_buf, clog_len, expire_ts))) { + LOG_WARN("retry failed", K(ret), KPC(this), K(cur_update_version)); + } else { // sync clog success + LOG_DEBUG("sync clog success", KPC(this), K(cur_update_version), K(max_saved_version_)); + } + } + + // clear state no matter success or failed + ATOMIC_STORE(&logcb_finish_flag_, true); + free_struct_in_lock(); + WEAK_BARRIER(); + ATOMIC_STORE(&lock_, false); // unlock + } + if (OB_ALLOCATE_MEMORY_FAILED == ret) { + ret = OB_EAGAIN; + } + + return ret; +} + +int ObIStorageClogRecorder::replay_clog( + const int64_t update_version, + const share::SCN &scn, + const char *buf, + const int64_t size, + int64_t &pos) +{ + int ret = OB_SUCCESS; + if (update_version <= ATOMIC_LOAD(&max_saved_version_)) { + LOG_INFO("skip clog with smaller version", K(update_version), K(max_saved_version_)); + } else if (OB_FAIL(inner_replay_clog(update_version, scn, buf, size, pos))) { + LOG_WARN("fail to replay clog", K(ret), KPC(this)); + } else { + ATOMIC_STORE(&max_saved_version_, update_version); + LOG_DEBUG("success to replay clog", K(ret), KPC(this), K(max_saved_version_)); + } + + return ret; +} + +void ObIStorageClogRecorder::clog_update_fail() +{ + sync_clog_failed_for_leader(); + WEAK_BARRIER(); + ATOMIC_STORE(&logcb_finish_flag_, true); +} + +void ObIStorageClogRecorder::clog_update_succ( + const int64_t update_version, + bool &finish_flag) +{ + int ret = OB_SUCCESS; + finish_flag = false; + if (OB_UNLIKELY(update_version <= ATOMIC_LOAD(&max_saved_version_))) { + ret = OB_INVALID_ARGUMENT; + LOG_ERROR("clog with smaller version", K(ret), K(update_version), K(max_saved_version_)); + } else if (OB_UNLIKELY(clog_scn_.get_val_for_tx() <= 0)) { + // clog_scn_ may be invalid because of concurrency in rare situation + ret = OB_ERR_UNEXPECTED; + LOG_WARN("clog ts is invalid", K(ret), K_(clog_scn)); + } else { + if (OB_FAIL(sync_clog_succ_for_leader(update_version))) { + LOG_WARN("failed to save for leader", K(ret), KPC(this)); + } else { + finish_flag = true; + ATOMIC_STORE(&max_saved_version_, update_version); + LOG_DEBUG("update success", K(ret), KPC(this)); + } + } + ATOMIC_STORE(&logcb_finish_flag_, true); +} + +int ObIStorageClogRecorder::write_clog( + const char *buf, + const int64_t buf_len) +{ + int ret = OB_SUCCESS; + const bool need_nonblock = false; + palf::LSN lsn; + clog_scn_.set_min(); + if (OB_UNLIKELY(nullptr == buf || buf_len < 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(buf), K(buf_len)); + } else if (OB_ISNULL(log_handler_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("palf handle is null", K(ret), KP(log_handler_)); + } else if (FALSE_IT(ATOMIC_STORE(&logcb_finish_flag_, false))) { + } else if (OB_FAIL(log_handler_->append(buf, buf_len, share::SCN::min_scn(), need_nonblock, logcb_ptr_, lsn, clog_scn_))) { + LOG_WARN("fail to submit log", K(ret), KPC(this)); + } + return ret; +} + +int ObIStorageClogRecorder::get_tablet_handle( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + ObTabletHandle &tablet_handle) +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(ls_id)); + } else if (OB_FAIL(ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle))) { + LOG_WARN("failed to get tablet", K(ret), K(ls_id), K(tablet_id)); + } + return ret; +} + +int ObIStorageClogRecorder::replay_get_tablet_handle( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const share::SCN &scn, + ObTabletHandle &tablet_handle) +{ + int ret = OB_SUCCESS; + ObLSHandle ls_handle; + if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id, ls_handle, ObLSGetMod::STORAGE_MOD))) { + LOG_WARN("failed to get log stream", K(ret), K(ls_id)); + } else if (OB_FAIL(ls_handle.get_ls()->replay_get_tablet(tablet_id, scn, tablet_handle))) { + LOG_WARN("failed to get tablet", K(ret), K(ls_id), K(tablet_id), K(scn)); + } + return ret; +} + +} // storage +} // oceanbase diff --git a/src/storage/ob_storage_clog_recorder.h b/src/storage/ob_storage_clog_recorder.h new file mode 100644 index 0000000000..2db0b84500 --- /dev/null +++ b/src/storage/ob_storage_clog_recorder.h @@ -0,0 +1,144 @@ +//Copyright (c) 2021 OceanBase +// OceanBase is licensed under Mulan PubL v2. +// You can use this software according to the terms and conditions of the Mulan PubL v2. +// You may obtain a copy of Mulan PubL v2 at: +// http://license.coscl.org.cn/MulanPubL-2.0 +// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, +// EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, +// MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. +// See the Mulan PubL v2 for more details. +#ifndef OCEANBASE_STORAGE_STORAGE_CLOG_RECORDER_ +#define OCEANBASE_STORAGE_STORAGE_CLOG_RECORDER_ + +#include +#include "logservice/ob_append_callback.h" +#include "storage/meta_mem/ob_tablet_handle.h" +namespace oceanbase +{ +namespace logservice +{ +class ObLogHandler; +} // namespace palf + +namespace storage +{ +class ObIStorageClogRecorder +{ +protected: + class ObStorageCLogCb : public logservice::AppendCb + { + public: + ObStorageCLogCb(ObIStorageClogRecorder &recorder) + : recorder_(recorder), + update_version_(common::OB_INVALID_VERSION) + {} + virtual ~ObStorageCLogCb() + { + reset(); + } + int init(); + void reset() + { + update_version_ = common::OB_INVALID_VERSION; + } + + virtual int on_success() override; + virtual int on_failure() override; + virtual void reset_handle() {} + + void set_update_version(const int64_t update_version) + { + ATOMIC_SET(&update_version_, update_version); + } + private: + ObIStorageClogRecorder &recorder_; + int64_t update_version_; + + DISABLE_COPY_ASSIGN(ObStorageCLogCb); + }; +public: + ObIStorageClogRecorder(); + virtual ~ObIStorageClogRecorder(); + + int init(const int64_t max_saved_version, logservice::ObLogHandler *log_handler); + virtual void destroy(); + void reset(); + + // leader + int try_update_for_leader( + const int64_t update_version, + ObIAllocator *allocator, + const int64_t timeout_ts = 1000000); + int64_t get_max_saved_version() const { return ATOMIC_LOAD(&max_saved_version_); } + + ObIStorageClogRecorder(const ObIStorageClogRecorder&) = delete; + ObIStorageClogRecorder& operator=(const ObIStorageClogRecorder&) = delete; + + TO_STRING_KV(K(max_saved_version_), K(clog_scn_), KP(log_handler_)); +protected: + // follower, check update version + int replay_clog( + const int64_t update_version, + const share::SCN &scn, + const char *buf, + const int64_t size, + int64_t &pos); + + // clog callback + void clog_update_fail(); + void clog_update_succ(const int64_t update_version, bool &finish_flag); + + virtual int inner_replay_clog( + const int64_t update_version, + const share::SCN &scn, + const char *buf, + const int64_t size, + int64_t &pos) = 0; + virtual int sync_clog_succ_for_leader(const int64_t update_version) = 0; + virtual void sync_clog_failed_for_leader() = 0; + // call prepare struct only once + virtual int prepare_struct_in_lock( + int64_t &update_version, + ObIAllocator *allocator, + char *&clog_buf, + int64_t &clog_len) = 0; + virtual int submit_log( + const int64_t update_version, + const char *clog_buf, + const int64_t clog_len) = 0; + virtual void free_struct_in_lock() = 0; + + int try_update_with_lock( + const int64_t update_version, + const char *clog_buf, + const int64_t clog_len, + const int64_t expire_ts); + // lock + OB_INLINE void wait_to_lock(const int64_t table_version); + OB_INLINE void wait_for_logcb(const int64_t table_version); + + int write_clog(const char *buf, const int64_t buf_len); + share::SCN get_log_scn() const { return clog_scn_; } + + int get_tablet_handle( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + ObTabletHandle &tablet_handle); + int replay_get_tablet_handle( + const share::ObLSID &ls_id, + const ObTabletID &tablet_id, + const share::SCN &scn, + ObTabletHandle &tablet_handle); + +protected: + bool lock_; + bool logcb_finish_flag_; + ObStorageCLogCb *logcb_ptr_; + logservice::ObLogHandler *log_handler_; + int64_t max_saved_version_; + share::SCN clog_scn_; +}; + +} // storage +} // oceanbase +#endif /* OCEANBASE_STORAGE_STORAGE_SCHEMA_RECORDER_ */ diff --git a/src/storage/ob_storage_schema.cpp b/src/storage/ob_storage_schema.cpp index d0baccbafe..2953c08b0d 100644 --- a/src/storage/ob_storage_schema.cpp +++ b/src/storage/ob_storage_schema.cpp @@ -163,16 +163,17 @@ ObStorageSchema::~ObStorageSchema() int ObStorageSchema::init( common::ObIAllocator &allocator, const ObTableSchema &input_schema, - const lib::Worker::CompatMode compat_mode) + const lib::Worker::CompatMode compat_mode, + const bool skip_column_info/* = false*/) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; STORAGE_LOG(WARN, "init twice", K(ret), K_(is_inited)); - } else if (OB_UNLIKELY(!input_schema.is_valid())) { + } else if (OB_UNLIKELY(!input_schema.is_valid() || true == skip_column_info)) { ret = OB_INVALID_ARGUMENT; - STORAGE_LOG(WARN, "invalid args", K(ret), K(input_schema)); + STORAGE_LOG(WARN, "invalid args", K(ret), K(input_schema), K(skip_column_info)); } else { allocator_ = &allocator; rowkey_array_.set_allocator(&allocator); @@ -186,8 +187,9 @@ int ObStorageSchema::init( if (OB_FAIL(ret)) { } else if (OB_FAIL(generate_str(input_schema))) { STORAGE_LOG(WARN, "failed to generate string", K(ret), K(input_schema)); - } else if (OB_FAIL(generate_column_array(input_schema))) { + } else if (!skip_column_info && OB_FAIL(generate_column_array(input_schema))) { STORAGE_LOG(WARN, "failed to generate column array", K(ret), K(input_schema)); + } else if (FALSE_IT(column_info_simplified_ = skip_column_info)) { } else if (OB_UNLIKELY(!is_valid())) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "storage schema is invalid", K(ret)); @@ -202,16 +204,19 @@ int ObStorageSchema::init( return ret; } -int ObStorageSchema::init(common::ObIAllocator &allocator, const ObStorageSchema &old_schema) +int ObStorageSchema::init( + common::ObIAllocator &allocator, + const ObStorageSchema &old_schema, + const bool skip_column_info/* = false*/) { int ret = OB_SUCCESS; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; STORAGE_LOG(WARN, "init twice", K(ret), K_(is_inited)); - } else if (OB_UNLIKELY(!old_schema.is_valid())) { + } else if (OB_UNLIKELY(!old_schema.is_valid() || true == skip_column_info)) { ret = OB_INVALID_ARGUMENT; - STORAGE_LOG(WARN, "invalid args", K(ret), K(old_schema)); + STORAGE_LOG(WARN, "invalid args", K(ret), K(old_schema), K(skip_column_info)); } else { allocator_ = &allocator; rowkey_array_.set_allocator(&allocator); @@ -221,6 +226,7 @@ int ObStorageSchema::init(common::ObIAllocator &allocator, const ObStorageSchema copy_from(old_schema); compat_mode_ = old_schema.compat_mode_; compressor_type_ = old_schema.compressor_type_; + column_info_simplified_ = (skip_column_info || old_schema.column_info_simplified_); } if (OB_FAIL(ret)) { @@ -228,12 +234,16 @@ int ObStorageSchema::init(common::ObIAllocator &allocator, const ObStorageSchema STORAGE_LOG(WARN, "failed to deep copy encryption", K(ret), K(old_schema)); } else if (OB_FAIL(deep_copy_str(old_schema.encrypt_key_, encrypt_key_))) { STORAGE_LOG(WARN, "failed to deep copy encryption key", K(ret), K(old_schema)); + } else if (column_info_simplified_) { + // do nothing } else if (OB_FAIL(rowkey_array_.reserve(old_schema.rowkey_array_.count()))) { - STORAGE_LOG(WARN, "failed to reserve for row key array", K(ret), K(old_schema)); + STORAGE_LOG(WARN, "failed to reserve for rowkey array", K(ret), K(old_schema)); } else if (OB_FAIL(rowkey_array_.assign(old_schema.rowkey_array_))) { STORAGE_LOG(WARN, "failed to copy row key array", K(ret), K(old_schema)); - } else if (OB_FAIL(deep_copy_column_array(allocator, old_schema))) { + } else if (OB_FAIL(deep_copy_column_array(allocator, old_schema, old_schema.column_array_.count()))) { STORAGE_LOG(WARN, "failed to deep copy column array", K(ret), K(old_schema)); + } + if (OB_FAIL(ret)) { } else if (OB_UNLIKELY(!is_valid())) { ret = OB_ERR_UNEXPECTED; STORAGE_LOG(WARN, "storage schema is invalid", K(ret)); @@ -250,13 +260,17 @@ int ObStorageSchema::init(common::ObIAllocator &allocator, const ObStorageSchema int ObStorageSchema::deep_copy_column_array( common::ObIAllocator &allocator, - const ObStorageSchema &src_schema) + const ObStorageSchema &src_schema, + const int64_t copy_array_cnt) { int ret = OB_SUCCESS; - if (OB_FAIL(column_array_.reserve(src_schema.column_array_.count()))) { + if (OB_UNLIKELY(copy_array_cnt <= 0 || copy_array_cnt > src_schema.column_array_.count())) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "invalid argument", K(ret), K(copy_array_cnt), K(src_schema.column_array_)); + } else if (OB_FAIL(column_array_.reserve(src_schema.column_array_.count()))) { STORAGE_LOG(WARN, "failed to reserve for column array", K(ret), K(src_schema)); } - for (int i = 0; OB_SUCC(ret) && i < src_schema.column_array_.count(); ++i) { + for (int i = 0; OB_SUCC(ret) && i < copy_array_cnt; ++i) { ObStorageColumnSchema col_schema; const ObStorageColumnSchema &src_col_schema = src_schema.column_array_.at(i); col_schema.info_ = src_col_schema.info_; @@ -265,7 +279,7 @@ int ObStorageSchema::deep_copy_column_array( if (OB_FAIL(col_schema.deep_copy_default_val(allocator, src_col_schema.orig_default_value_))) { STORAGE_LOG(WARN, "failed to deep copy col schema", K(ret), K(i), K(src_col_schema)); } else if (OB_FAIL(column_array_.push_back(col_schema))) { - STORAGE_LOG(WARN, "failed to push back col schema", K(ret), K(i), K(column_array_.count()), + STORAGE_LOG(WARN, "failed to push back col schema", K(ret), K(i), K(copy_array_cnt), K(src_schema.column_array_.count()), K(col_schema)); col_schema.destroy(allocator); } @@ -335,9 +349,9 @@ bool ObStorageSchema::is_valid() const valid_ret = false; STORAGE_LOG(WARN, "invalid", K_(is_inited), KP_(allocator), K_(schema_version), K_(column_cnt), K_(tablet_size), K_(pctfree), K_(table_type), K_(table_mode), K_(index_type)); - } else if (column_cnt_ != column_array_.count()) { + } else if (!column_info_simplified_ && column_cnt_ != column_array_.count()) { valid_ret = false; - STORAGE_LOG(WARN, "invalid column count", K_(column_cnt), K_(column_array)); + STORAGE_LOG(WARN, "invalid column count", K(valid_ret), K_(column_info_simplified), K_(column_cnt), K_(column_array)); } else if (is_view_table()) { // no need checking other options for view } @@ -375,7 +389,7 @@ int ObStorageSchema::serialize(char *buf, const int64_t buf_len, int64_t &pos) c compressor_type_, encryption_, encrypt_key_); - if (OB_FAIL(ret)) { + if (OB_FAIL(ret) || column_info_simplified_) { } else if (OB_FAIL(serialize_column_array(buf, buf_len, pos, rowkey_array_))){ STORAGE_LOG(WARN, "failed to serialize rowkey columns", K_(rowkey_array)); } else if (OB_FAIL(serialize_column_array(buf, buf_len, pos, column_array_))){ @@ -439,11 +453,14 @@ int ObStorageSchema::deserialize( STORAGE_LOG(WARN, "failed to deep copy string", K(ret), K(tmp_encryption)); } else if (OB_FAIL(deep_copy_str(tmp_encrypt_key, encrypt_key_))) { STORAGE_LOG(WARN, "failed to deep copy string", K(ret), K(tmp_encrypt_key)); + } else if (column_info_simplified_) { + // do noting } else if (OB_FAIL(deserialize_rowkey_column_array(buf, data_len, pos))){ STORAGE_LOG(WARN, "failed to deserialize rowkey columns", K(ret), K_(rowkey_array)); } else if (OB_FAIL(deserialize_column_array(allocator, buf, data_len, pos))){ STORAGE_LOG(WARN, "failed to deserialize columns", K(ret), K_(column_array)); - } else { + } + if (OB_SUCC(ret)) { is_inited_ = true; } } else { @@ -552,9 +569,10 @@ int64_t ObStorageSchema::get_serialize_size() const encryption_, encrypt_key_); //get columms size - len += get_column_array_serialize_length(rowkey_array_); - len += get_column_array_serialize_length(column_array_); - + if (!column_info_simplified_) { + len += get_column_array_serialize_length(rowkey_array_); + len += get_column_array_serialize_length(column_array_); + } return len; } @@ -861,6 +879,48 @@ int ObStorageSchema::init_column_meta_array( return ret; } +int ObStorageSchema::get_orig_default_row( + const common::ObIArray &column_ids, + blocksstable::ObDatumRow &default_row) const +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!default_row.is_valid() || default_row.count_ != column_ids.count() + || column_ids.count() > column_cnt_ + ObMultiVersionRowkeyHelpper::get_extra_rowkey_col_cnt())) { + ret = OB_INVALID_ARGUMENT; + STORAGE_LOG(WARN, "Invalid argument", K(ret), K(column_cnt_), K(default_row), K(column_ids.count())); + } + + for (int64_t i = 0; OB_SUCC(ret) && i < column_ids.count(); ++i) { + if (column_ids.at(i).col_id_ == OB_HIDDEN_TRANS_VERSION_COLUMN_ID || + column_ids.at(i).col_id_ == OB_HIDDEN_SQL_SEQUENCE_COLUMN_ID) { + default_row.storage_datums_[i].set_int(0); + } else { + const ObStorageColumnSchema *col_schema = nullptr; + if (OB_ISNULL(col_schema = get_column_schema(column_ids.at(i).col_id_))) { + ret = OB_ERR_SYS; + STORAGE_LOG(WARN, "column id not found", K(ret), K(column_ids.at(i))); + } else if (OB_FAIL(default_row.storage_datums_[i].from_obj_enhance(col_schema->get_orig_default_value()))) { + STORAGE_LOG(WARN, "Failed to transefer obj to datum", K(ret)); + } + } + } + return ret; +} + +const ObStorageColumnSchema *ObStorageSchema::get_column_schema(const int64_t column_idx) const +{ + const ObStorageColumnSchema *found_col = nullptr; + for (int64_t j = 0; j < column_cnt_; ++j) { + const ObStorageColumnSchema &column = column_array_[j]; + if (common::OB_APP_MIN_COLUMN_ID + j == column_idx) { + found_col = &column; + break; + } + } + return found_col; +} + + int ObStorageSchema::get_multi_version_column_descs(common::ObIArray &column_descs) const { int ret = OB_SUCCESS; @@ -880,7 +940,6 @@ int ObStorageSchema::get_multi_version_column_descs(common::ObIArray void ObStorageSchema::copy_from(const share::schema::ObMergeSchema &input_schema) { is_use_bloomfilter_ = input_schema.is_use_bloomfilter(); - //TODO @lixia init oracle mode here table_type_ = input_schema.get_table_type(); table_mode_ = input_schema.get_table_mode_struct(); index_type_ = input_schema.get_index_type(); diff --git a/src/storage/ob_storage_schema.h b/src/storage/ob_storage_schema.h index 9896a7387d..e35dca92da 100644 --- a/src/storage/ob_storage_schema.h +++ b/src/storage/ob_storage_schema.h @@ -102,13 +102,16 @@ public: int init( common::ObIAllocator &allocator, const share::schema::ObTableSchema &input_schema, - const lib::Worker::CompatMode compat_mode); + const lib::Worker::CompatMode compat_mode, + const bool skip_column_info = false); int init( common::ObIAllocator &allocator, - const ObStorageSchema &old_schema); + const ObStorageSchema &old_schema, + const bool skip_column_info = false); int deep_copy_column_array( common::ObIAllocator &allocator, - const ObStorageSchema &src_schema); + const ObStorageSchema &src_schema, + const int64_t copy_array_cnt); // ObIMultiSourceDataUnit section virtual int deep_copy(const ObIMultiSourceDataUnit *src, ObIAllocator *allocator) override; @@ -147,7 +150,7 @@ public: { return share::schema::ObTableSchema::is_index_table(table_type_) || is_materialized_view(); } - virtual inline bool is_materialized_view() const { return share::schema::ObTableSchema::is_materialized_view(table_type_); } + inline bool is_materialized_view() const { return share::schema::ObTableSchema::is_materialized_view(table_type_); } virtual inline bool is_global_index_table() const override { return share::schema::ObSimpleTableSchemaV2::is_global_index_table(index_type_); } virtual inline int64_t get_block_size() const override { return block_size_; } @@ -178,9 +181,12 @@ public: virtual int init_column_meta_array( common::ObIArray &meta_array) const override; + int get_orig_default_row(const common::ObIArray &column_ids, + blocksstable::ObDatumRow &default_row) const; + const ObStorageColumnSchema *get_column_schema(const int64_t column_id) const; INHERIT_TO_STRING_KV("ObIMultiSourceDataUnit", ObIMultiSourceDataUnit, KP(this), K_(version), - K_(is_use_bloomfilter), K_(compat_mode), K_(table_type), K_(index_type), + K_(is_use_bloomfilter), K_(column_info_simplified), K_(compat_mode), K_(table_type), K_(index_type), K_(index_status), K_(row_store_type), K_(schema_version), K_(column_cnt), K_(tablet_size), K_(pctfree), K_(block_size), K_(progressive_merge_round), K_(master_key_id), K_(compressor_type), K_(encryption), K_(encrypt_key), @@ -214,7 +220,7 @@ public: static const int32_t SS_ONE_BIT = 1; static const int32_t SS_HALF_BYTE = 4; static const int32_t SS_ONE_BYTE = 8; - static const int32_t SS_RESERVED_BITS = 19; + static const int32_t SS_RESERVED_BITS = 18; // STORAGE_SCHEMA_VERSION is for serde compatibility. // Currently we do not use "standard" serde function macro, @@ -234,6 +240,7 @@ public: uint32_t version_ :SS_ONE_BYTE; uint32_t compat_mode_ :SS_HALF_BYTE; uint32_t is_use_bloomfilter_ :SS_ONE_BIT; + uint32_t column_info_simplified_ :SS_ONE_BIT; uint32_t reserved_ :SS_RESERVED_BITS; }; }; diff --git a/src/storage/ob_storage_schema_recorder.cpp b/src/storage/ob_storage_schema_recorder.cpp index 314e197602..dffc3fa75c 100644 --- a/src/storage/ob_storage_schema_recorder.cpp +++ b/src/storage/ob_storage_schema_recorder.cpp @@ -21,9 +21,6 @@ #include "share/schema/ob_table_schema.h" #include "share/schema/ob_tenant_schema_service.h" #include "storage/tablet/ob_tablet.h" -#include "storage/tx_storage/ob_ls_service.h" -#include "storage/meta_mem/ob_tablet_handle.h" -#include "storage/tx_storage/ob_ls_handle.h"//ObLSHandle #include "storage/meta_mem/ob_tenant_meta_mem_mgr.h" #include "share/scn.h" @@ -38,116 +35,75 @@ using namespace share::schema; namespace storage { -void ObStorageSchemaRecorder::ObStorageSchemaLogCb::set_table_version(const int64_t table_version) -{ - ATOMIC_SET(&table_version_, table_version); -} - -int ObStorageSchemaRecorder::ObStorageSchemaLogCb::on_success() -{ - int ret = OB_SUCCESS; - int64_t table_version = table_version_; - bool finish_flag = false; - - if (OB_UNLIKELY(OB_INVALID_VERSION == table_version)) { - LOG_ERROR("table version is invalid", K(table_version)); - } else { - // clear table_version whether success or failed, make sure next time can update - ATOMIC_SET(&table_version_, OB_INVALID_VERSION); - recorder_.update_table_schema_succ(table_version, finish_flag); - if (!finish_flag) { - LOG_WARN("update table schema failed", K(table_version), K(finish_flag)); - recorder_.update_table_schema_fail(); - } - } - - return ret; -} - -int ObStorageSchemaRecorder::ObStorageSchemaLogCb::on_failure() -{ - int ret = OB_SUCCESS; - LOG_INFO("schema log failure callback", K(table_version_)); - ATOMIC_SET(&table_version_, OB_INVALID_VERSION); - recorder_.update_table_schema_fail(); - return ret; -} - - -void ObStorageSchemaRecorder::ObStorageSchemaLogCb::clear() -{ - ATOMIC_SET(&table_version_, OB_INVALID_VERSION); -} - ObStorageSchemaRecorder::ObStorageSchemaRecorder() - : is_inited_(false), - lock_(false), - logcb_finish_flag_(true), - logcb_ptr_(nullptr), - max_saved_table_version_(OB_INVALID_VERSION), + : ObIStorageClogRecorder(), + is_inited_(false), + ignore_storage_schema_(false), + compat_mode_(lib::Worker::CompatMode::INVALID), clog_buf_(nullptr), - clog_len_(0), - clog_scn_(), + tablet_handle_ptr_(nullptr), schema_guard_(nullptr), storage_schema_(nullptr), allocator_(nullptr), - log_handler_(nullptr), ls_id_(), tablet_id_(), - tablet_handle_() + table_id_(0) { - STATIC_ASSERT(sizeof(ObStorageSchemaRecorder) <= 136, "size of schema recorder is oversize"); +#if defined(__x86_64__) + STATIC_ASSERT(sizeof(ObStorageSchemaRecorder) <= 120, "size of schema recorder is oversize"); +#endif } ObStorageSchemaRecorder::~ObStorageSchemaRecorder() { - reset(); -} - -void ObStorageSchemaRecorder::reset() -{ - if (is_inited_) { - wait_to_lock(OB_INVALID_VERSION); // lock - max_saved_table_version_ = 0; - ATOMIC_STORE(&lock_, false); // unlock - } + destroy(); } void ObStorageSchemaRecorder::destroy() { is_inited_ = false; - max_saved_table_version_ = OB_INVALID_VERSION; - lock_ = false; - logcb_finish_flag_ = true; + ignore_storage_schema_ = false; + compat_mode_ = lib::Worker::CompatMode::INVALID; + ObIStorageClogRecorder::destroy(); free_allocated_info(); log_handler_ = NULL; ls_id_.reset(); tablet_id_.reset(); - tablet_handle_.reset(); - clog_scn_.reset(); - clog_len_ = 0; + table_id_ = 0; +} + +void ObStorageSchemaRecorder::reset() +{ + if (is_inited_) { + ObIStorageClogRecorder::reset(); + } } int ObStorageSchemaRecorder::init( const share::ObLSID &ls_id, const ObTabletID &tablet_id, const int64_t saved_schema_version, + const lib::Worker::CompatMode compat_mode, logservice::ObLogHandler *log_handler) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(saved_schema_version < 0 || nullptr == log_handler)) { + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret)); + } else if (OB_UNLIKELY(saved_schema_version < 0 || nullptr == log_handler)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(saved_schema_version), KP(log_handler)); + } else if (OB_FAIL(ObIStorageClogRecorder::init(saved_schema_version, log_handler))) { + LOG_WARN("failed to init ObIStorageClogRecorder", K(ret), K(saved_schema_version), K(log_handler)); } else { - max_saved_table_version_ = saved_schema_version; + ignore_storage_schema_ = tablet_id.is_special_merge_tablet(); ls_id_ = ls_id; tablet_id_ = tablet_id; - log_handler_ = log_handler; - WEAK_BARRIER(); + compat_mode_ = compat_mode; is_inited_ = true; } if (OB_FAIL(ret)) { - reset(); + destroy(); } return ret; } @@ -160,94 +116,45 @@ int ObStorageSchemaRecorder::replay_schema_log( int64_t &pos) { int ret = OB_SUCCESS; - + int64_t update_version = OB_INVALID_VERSION; if (IS_NOT_INIT) { ret = OB_NOT_INIT; - LOG_WARN("schema recorder not inited", K(ret)); - } else { - int64_t table_version = OB_INVALID_VERSION; - ObArenaAllocator tmp_allocator; - ObStorageSchema replay_storage_schema; - if (tablet_id_.is_special_merge_tablet()) { - // do nothing - } else if (OB_FAIL(serialization::decode_i64(buf, size, pos, &table_version))) { - // table_version - LOG_WARN("fail to deserialize table_version", K(ret), K_(tablet_id)); - } else if (table_version <= ATOMIC_LOAD(&max_saved_table_version_)) { - LOG_INFO("skip schema log with smaller table version", K_(tablet_id), K(table_version), - K(max_saved_table_version_)); - } else if (OB_FAIL(replay_get_tablet_handle(scn, tablet_handle_))) { - LOG_WARN("failed to get tablet handle", K(ret), K_(tablet_id), K(scn)); - } else if (OB_FAIL(replay_storage_schema.deserialize(tmp_allocator, buf, size, pos))) { - LOG_WARN("fail to deserialize storage schema", K(ret), K_(tablet_id)); - } else if (FALSE_IT(replay_storage_schema.set_sync_finish(true))) { - } else if (OB_FAIL(tablet_handle_.get_obj()->save_multi_source_data_unit(&replay_storage_schema, - scn, - true/*for_replay*/, - memtable::MemtableRefOp::NONE))) { - LOG_WARN("failed to save storage schema on memtable", K(ret), K_(tablet_id), K(replay_storage_schema)); - } else { - ATOMIC_SET(&max_saved_table_version_, table_version); - LOG_INFO("success to replay schema log", K(ret), K_(tablet_id), K(max_saved_table_version_)); - replay_storage_schema.reset(); - } - tablet_handle_.reset(); + LOG_WARN("schema recorder not inited", K(ret), K_(tablet_id)); + } else if (ignore_storage_schema_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported to update storage schema", K(ret), K_(tablet_id)); + } else if (OB_FAIL(serialization::decode_i64(buf, size, pos, &update_version))) { + LOG_WARN("fail to deserialize table_version", K(ret), K_(tablet_id)); + } else if (OB_FAIL(ObIStorageClogRecorder::replay_clog(update_version, scn, buf, size, pos))) { + LOG_WARN("failed to replay clog", K(ret), K(scn), K_(tablet_id), K(update_version)); } - return ret; } -OB_INLINE void ObStorageSchemaRecorder::wait_to_lock(const int64_t table_version) -{ - while (true) { - while (true == ATOMIC_LOAD(&lock_)) { - ob_usleep(100); - if (REACH_TIME_INTERVAL(100 * 1000)) { - LOG_DEBUG("waiting to update schema", K_(tablet_id), K(table_version), K(max_saved_table_version_)); - } - WEAK_BARRIER(); - } - - if (ATOMIC_BCAS(&lock_, false, true)) { - break; - } - } // end of while - -} -OB_INLINE void ObStorageSchemaRecorder::wait_for_logcb(const int64_t table_version) -{ - while (false == ATOMIC_LOAD(&logcb_finish_flag_)) { - if (REACH_TIME_INTERVAL(100 * 1000)) { - LOG_DEBUG("waiting for clog callback", K_(tablet_id), K(table_version), K(max_saved_table_version_)); - } - ob_usleep(100); - WEAK_BARRIER(); - } -} - -int ObStorageSchemaRecorder::try_update_with_lock( - const int64_t table_id, - const int64_t table_version, - const int64_t expire_ts) +// replay after get update_version +int ObStorageSchemaRecorder::inner_replay_clog( + const int64_t update_version, + const SCN &scn, + const char *buf, + const int64_t size, + int64_t &pos) { int ret = OB_SUCCESS; - int64_t retry_times = 0; - while ((OB_SUCC(ret) || OB_BLOCK_FROZEN == ret) - && table_version > ATOMIC_LOAD(&max_saved_table_version_)) { - if (OB_FAIL(submit_schema_log(table_id))) { - if (OB_BLOCK_FROZEN != ret) { - LOG_WARN("fail to save table schema", K(ret), K_(tablet_id), K(table_version), K(max_saved_table_version_)); - } else if (ObTimeUtility::fast_current_time() >= expire_ts) { - ret = OB_EAGAIN; - LOG_WARN("failed to sync table schema", K_(tablet_id), K(table_version), - K(max_saved_table_version_), K(expire_ts)); - } - } else { - wait_for_logcb(table_version); // wait clog callback - } - WEAK_BARRIER(); - } // end of while + ObArenaAllocator tmp_allocator; + ObStorageSchema replay_storage_schema; + ObTabletHandle tmp_tablet_handle; + if (OB_FAIL(replay_get_tablet_handle(ls_id_, tablet_id_, scn, tmp_tablet_handle))) { + LOG_WARN("failed to get tablet handle", K(ret), K_(tablet_id), K(scn)); + } else if (OB_FAIL(replay_storage_schema.deserialize(tmp_allocator, buf, size, pos))) { + LOG_WARN("fail to deserialize table schema", K(ret), K_(tablet_id)); + } else if (FALSE_IT(replay_storage_schema.set_sync_finish(true))) { + } else if (OB_FAIL(tmp_tablet_handle.get_obj()->save_multi_source_data_unit(&replay_storage_schema, scn, + true/*for_replay*/, memtable::MemtableRefOp::NONE))) { + LOG_WARN("failed to save storage schema", K(ret), K_(tablet_id), K(replay_storage_schema)); + } + replay_storage_schema.reset(); + tmp_tablet_handle.reset(); return ret; } @@ -255,125 +162,65 @@ int ObStorageSchemaRecorder::try_update_storage_schema( const int64_t table_id, const int64_t table_version, ObIAllocator &allocator, - const int64_t timeout) + const int64_t timeout_ts) { int ret = OB_SUCCESS; if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("schema recorder not inited", K(ret)); - } else if (table_version < 0) { - LOG_WARN("input schema version is invalid", K(ret), K(table_id), K(table_version)); - } else if (tablet_id_.is_special_merge_tablet()) { - // do nothing - } else if (table_version > ATOMIC_LOAD(&max_saved_table_version_)) { - - wait_to_lock(table_version); // lock - allocator_ = &allocator; - if (table_version > ATOMIC_LOAD(&max_saved_table_version_)) { - LOG_INFO("save table schema", K_(ls_id), K_(tablet_id), K(table_version), K(max_saved_table_version_)); - int64_t sync_table_version = table_version; - if (OB_FAIL(get_tablet_handle(tablet_handle_))) { - LOG_WARN("failed to get tablet handle", K(ret), K_(ls_id), K_(tablet_id)); - } else if (OB_FAIL(prepare_schema(table_id, sync_table_version))) { - LOG_WARN("fail to save table schema", K(ret), K_(ls_id), K_(tablet_id), K(sync_table_version)); - } else if (OB_FAIL(try_update_with_lock(table_id, sync_table_version, timeout))) { - if (OB_EAGAIN != ret) { - LOG_WARN("try update failed", K(ret), K_(ls_id), K_(tablet_id), K(table_version)); - } - } else { // sync schema clog success - FLOG_INFO("finish save table schema", K_(ls_id), K_(tablet_id), K(sync_table_version), - "schema_version", storage_schema_->get_schema_version(), K_(clog_scn), K(timeout)); - } - } - - // clear state no matter success or failed - - ATOMIC_STORE(&logcb_finish_flag_, true); - free_allocated_info(); - tablet_handle_.reset(); - WEAK_BARRIER(); - ATOMIC_STORE(&lock_, false); // unlock + } else if (OB_UNLIKELY(table_version < 0 || table_id <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("input schema version is invalid", K(ret), K_(tablet_id), K(table_version)); + } else if (ignore_storage_schema_) { + ret = OB_NOT_SUPPORTED; + LOG_WARN("not supported to update storage schema", K(ret), K_(tablet_id)); + } else if (FALSE_IT(table_id_ = table_id)) { // clear in free_allocated_info + } else if (OB_FAIL(try_update_for_leader(table_version, &allocator, timeout_ts))) { + LOG_WARN("failed to update for leader", K(ret), K(table_version)); } - if (OB_ALLOCATE_MEMORY_FAILED == ret) { + + if (OB_ALLOCATE_MEMORY_FAILED == ret || OB_BLOCK_FROZEN == ret) { ret = OB_EAGAIN; } - return ret; } -int ObStorageSchemaRecorder::get_tablet_handle(ObTabletHandle &tablet_handle) +void ObStorageSchemaRecorder::sync_clog_failed_for_leader() { - int ret = OB_SUCCESS; - ObLSHandle ls_handle; - if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { - LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); - } else if (OB_FAIL(ls_handle.get_ls()->get_tablet(tablet_id_, tablet_handle))) { - LOG_WARN("failed to get tablet", K(ret), K_(ls_id), K_(tablet_id)); - } - return ret; + dec_ref_on_memtable(false/*sync_finish*/); } -int ObStorageSchemaRecorder::replay_get_tablet_handle(const SCN &scn, ObTabletHandle &tablet_handle) +int ObStorageSchemaRecorder::sync_clog_succ_for_leader(const int64_t update_version) { int ret = OB_SUCCESS; - ObLSHandle ls_handle; - if (OB_FAIL(MTL(ObLSService *)->get_ls(ls_id_, ls_handle, ObLSGetMod::STORAGE_MOD))) { - LOG_WARN("failed to get log stream", K(ret), K(ls_id_)); - } else if (OB_FAIL(ls_handle.get_ls()->replay_get_tablet(tablet_id_, scn, tablet_handle))) { - LOG_WARN("failed to get tablet", K(ret), K_(ls_id), K_(tablet_id), K(scn)); - } - return ret; -} - -void ObStorageSchemaRecorder::update_table_schema_fail() -{ - dec_ref_on_memtable(false); - ATOMIC_STORE(&logcb_finish_flag_, true); -} - -void ObStorageSchemaRecorder::update_table_schema_succ( - const int64_t table_version, - bool &finish_flag) -{ - int ret = OB_SUCCESS; - finish_flag = false; - if (table_version <= ATOMIC_LOAD(&max_saved_table_version_)) { - ret = OB_INVALID_ARGUMENT; - LOG_ERROR("schema log with smaller table version", K(ret), K_(tablet_id), - K(table_version), K(max_saved_table_version_)); - } else if (OB_UNLIKELY(!clog_scn_.is_valid() || clog_scn_.is_min() || nullptr == storage_schema_)) { + if (OB_ISNULL(storage_schema_)) { ret = OB_ERR_UNEXPECTED; - // clog_scn_ may be invalid because of concurrency in rare situation - LOG_WARN("clog ts or storage schema is invalid", K(ret), K_(ls_id), K_(tablet_id), - K_(clog_scn), KP_(storage_schema)); - } else if (storage_schema_->get_schema_version() != table_version) { + LOG_WARN("storage schema is invalid", K(ret), K_(clog_scn), KP_(storage_schema)); + } else if (OB_UNLIKELY(storage_schema_->get_schema_version() != update_version)) { ret = OB_ERR_UNEXPECTED; - LOG_ERROR("schema version not match", K(storage_schema_), K(table_version)); + LOG_ERROR("schema version not match", K(storage_schema_), K(update_version)); + } else if (OB_FAIL(dec_ref_on_memtable(true/*sync_finish*/))) { + LOG_WARN("failed to save storage schema", K_(tablet_id), K(storage_schema_)); + } else { + LOG_INFO("success to update storage schema", K(ret), K_(ls_id), K_(tablet_id), K(storage_schema_), + K(update_version), K_(clog_scn)); } - if (OB_SUCC(ret)) { - finish_flag = true; - if (OB_FAIL(dec_ref_on_memtable(true))) { - LOG_WARN("failed to save storage schema", K_(tablet_id), K(storage_schema_)); - } else { - FLOG_INFO("update table schema success", K(ret), K_(ls_id), K_(tablet_id), K(table_version), - "schema_version", table_version); - ATOMIC_SET(&max_saved_table_version_, table_version); - } - } - ATOMIC_STORE(&logcb_finish_flag_, true); + return ret; } int ObStorageSchemaRecorder::dec_ref_on_memtable(const bool sync_finish) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(nullptr == storage_schema_ || !tablet_handle_.is_valid())) { + if (OB_UNLIKELY(nullptr == storage_schema_ + || nullptr == tablet_handle_ptr_ + || !tablet_handle_ptr_->is_valid())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("storage schema or tablet handle is unexpected null", K(ret), K_(ls_id), K_(tablet_id), - KP_(storage_schema), K_(tablet_handle)); + KP_(storage_schema), K_(tablet_handle_ptr)); } else { storage_schema_->set_sync_finish(sync_finish); - if (OB_FAIL(tablet_handle_.get_obj()->save_multi_source_data_unit(storage_schema_, clog_scn_, + if (OB_FAIL(tablet_handle_ptr_->get_obj()->save_multi_source_data_unit(storage_schema_, clog_scn_, false/*for_replay*/, memtable::MemtableRefOp::DEC_REF, true/*is_callback*/))) { LOG_WARN("failed to save storage schema", K(ret), K_(tablet_id), K(storage_schema_)); } @@ -381,24 +228,38 @@ int ObStorageSchemaRecorder::dec_ref_on_memtable(const bool sync_finish) return ret; } -int ObStorageSchemaRecorder::prepare_schema( - const int64_t table_id, - int64_t &table_version) +int ObStorageSchemaRecorder::prepare_struct_in_lock( + int64_t &update_version, + ObIAllocator *allocator, + char *&clog_buf, + int64_t &clog_len) { int ret = OB_SUCCESS; - const int64_t alloc_size = sizeof(ObSchemaGetterGuard) + sizeof(ObStorageSchema); - void *buf = nullptr; - if (OB_ISNULL(allocator_)) { + const int64_t alloc_size = sizeof(ObStorageCLogCb) + sizeof(ObTabletHandle) + + sizeof(ObSchemaGetterGuard) + sizeof(ObStorageSchema); + int64_t alloc_buf_offset = 0; + char *buf = nullptr; + if (OB_ISNULL(allocator)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("allocator is unexpected null", K(ret), KP(allocator_)); - } else if (OB_ISNULL(buf = allocator_->alloc(alloc_size))) { + LOG_WARN("allocator is null", K(ret), K(allocator)); + } else if (FALSE_IT(allocator_ = allocator)) { + } else if (OB_ISNULL(buf = static_cast(allocator_->alloc(alloc_size)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to allocate schema guard", K(ret), K_(tablet_id)); - } else if (FALSE_IT(schema_guard_ = new (buf) ObSchemaGetterGuard(share::schema::ObSchemaMgrItem::MOD_SCHEMA_RECORDER))) { - } else if (FALSE_IT(storage_schema_ = new (static_cast(buf) + sizeof(ObSchemaGetterGuard)) ObStorageSchema())) { - } else if (OB_FAIL(get_expected_schema_guard(table_id, table_version))) { - LOG_WARN("fail to get expected schema", K(ret), K_(tablet_id), K(table_version)); - } else if (OB_FAIL(generate_clog())) { + } else { + logcb_ptr_ = new(buf) ObStorageCLogCb(*this); + alloc_buf_offset += sizeof(ObStorageCLogCb); + tablet_handle_ptr_ = new (buf + alloc_buf_offset) ObTabletHandle(); + alloc_buf_offset += sizeof(ObTabletHandle); + schema_guard_ = new (buf + alloc_buf_offset) ObSchemaGetterGuard(share::schema::ObSchemaMgrItem::MOD_SCHEMA_RECORDER); + alloc_buf_offset += sizeof(ObSchemaGetterGuard); + storage_schema_ = new (buf + alloc_buf_offset) ObStorageSchema(); + } + if (FAILEDx(get_tablet_handle(ls_id_, tablet_id_, *tablet_handle_ptr_))) { + LOG_WARN("failed to get tablet handle", K(ret), K_(ls_id), K_(tablet_id)); + } else if (OB_FAIL(get_schema(update_version))) { + LOG_WARN("fail to get expected schema", K(ret), K_(tablet_id), K(update_version)); + } else if (OB_FAIL(generate_clog(clog_buf, clog_len))) { LOG_WARN("failed to generate clog", K(ret), K_(tablet_id)); } return ret; @@ -406,45 +267,43 @@ int ObStorageSchemaRecorder::prepare_schema( void ObStorageSchemaRecorder::free_allocated_info() { - if (nullptr != allocator_) { - if (nullptr != schema_guard_) { + if (OB_NOT_NULL(allocator_)) { + if (OB_NOT_NULL(logcb_ptr_)) { + tablet_handle_ptr_->reset(); + tablet_handle_ptr_->~ObTabletHandle(); schema_guard_->~ObSchemaGetterGuard(); storage_schema_->~ObStorageSchema(); - allocator_->free(schema_guard_); + allocator_->free(logcb_ptr_); + logcb_ptr_ = nullptr; + tablet_handle_ptr_ = nullptr; schema_guard_ = nullptr; storage_schema_ = nullptr; } - - if (nullptr != clog_buf_) { + if (OB_NOT_NULL(clog_buf_)) { allocator_->free(clog_buf_); clog_buf_ = nullptr; - clog_len_ = 0; } - - if (nullptr != logcb_ptr_) { - allocator_->free(logcb_ptr_); - logcb_ptr_ = nullptr; - } - allocator_ = nullptr; } + table_id_ = 0; } -int ObStorageSchemaRecorder::get_expected_schema_guard( - const int64_t table_id, - int64_t &table_version) +int ObStorageSchemaRecorder::get_schema( + int64_t &table_version) { int ret = OB_SUCCESS; const ObTableSchema *t_schema = NULL; - if (table_version < 0) { + if (OB_UNLIKELY(table_version < 0)) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K_(tablet_id), K(table_version)); - } else if (OB_ISNULL(schema_guard_)) { + } else if (OB_UNLIKELY(nullptr == schema_guard_ || nullptr == storage_schema_ || nullptr == allocator_)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("schema guard is null", K(ret), K_(tablet_id), K(table_version)); - } else if (OB_FAIL(MTL(ObTenantSchemaService*)->get_schema_service()->get_tenant_schema_guard(MTL_ID(), *schema_guard_)) - || OB_FAIL(schema_guard_->get_table_schema(MTL_ID(), table_id, t_schema)) + LOG_WARN("schema guard/schema/allocator is null", K(ret), K_(tablet_id), KP_(schema_guard), + KP_(storage_schema), KP_(allocator)); + } else if (OB_FAIL(MTL(ObTenantSchemaService*)->get_schema_service()->get_tenant_schema_guard(MTL_ID(), *schema_guard_))) { + LOG_WARN("failed to get tenant schema guard", K(ret), K(table_id_)); + } else if (OB_FAIL(schema_guard_->get_table_schema(MTL_ID(), table_id_, t_schema)) || NULL == t_schema || table_version > t_schema->get_schema_version()) { // The version is checked here, so there is no need to check whether it is full @@ -455,7 +314,7 @@ int ObStorageSchemaRecorder::get_expected_schema_guard( } } else { table_version = t_schema->get_schema_version(); - if (OB_FAIL(storage_schema_->init(*allocator_, *t_schema, lib::get_compat_mode()))) { + if (OB_FAIL(storage_schema_->init(*allocator_, *t_schema, compat_mode_))) { LOG_WARN("failed to init storage schema", K(ret), K(t_schema)); } } @@ -471,58 +330,48 @@ int64_t ObStorageSchemaRecorder::calc_schema_log_size() const return size; } -int ObStorageSchemaRecorder::submit_schema_log(const int64_t table_id) +int ObStorageSchemaRecorder::submit_log( + const int64_t update_version, + const char *clog_buf, + const int64_t clog_len) { int ret = OB_SUCCESS; - const bool need_nonblock = false; - palf::LSN lsn; - clog_scn_.reset(); - - if (OB_UNLIKELY(nullptr == log_handler_ || nullptr == storage_schema_ - || !tablet_handle_.is_valid() - || nullptr == clog_buf_ - || clog_len_ <= 0 - || nullptr == allocator_)) { + if (OB_UNLIKELY(nullptr == storage_schema_ + || nullptr == tablet_handle_ptr_ + || !tablet_handle_ptr_->is_valid() + || nullptr == clog_buf + || nullptr == allocator_ + || clog_len <= 0)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("log handler or storage_schema is null", K(ret), KP(log_handler_), KP(storage_schema_), - KP(clog_buf_), K(clog_len_), K(tablet_handle_), KP_(allocator)); - } else if (OB_ISNULL(logcb_ptr_)) { - void *buf = nullptr; - if (OB_ISNULL(buf = allocator_->alloc(sizeof(ObStorageSchemaLogCb)))) { - ret = OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("allocate memory failed", K(ret)); - } else { - logcb_ptr_ = new(buf) ObStorageSchemaLogCb(*this); + LOG_WARN("log handler or storage_schema is null", K(ret), KP(storage_schema_), + KP(clog_buf), K(clog_len), K(tablet_handle_ptr_)); + } else if (FALSE_IT(storage_schema_->set_sync_finish(false))) { + } else if (OB_FAIL(tablet_handle_ptr_->get_obj()->save_multi_source_data_unit(storage_schema_, + SCN::max_scn(), false/*for_replay*/, memtable::MemtableRefOp::INC_REF))) { + if (OB_BLOCK_FROZEN != ret) { + LOG_WARN("failed to inc ref for storage schema", K(ret), K_(tablet_id), K(storage_schema_)); } - } - if (OB_SUCC(ret)) { - logcb_ptr_->set_table_version(storage_schema_->get_schema_version()); - ATOMIC_STORE(&logcb_finish_flag_, false); - storage_schema_->set_sync_finish(false); - if (OB_FAIL(tablet_handle_.get_obj()->save_multi_source_data_unit(storage_schema_, - SCN::max_scn(), false/*for_replay*/, memtable::MemtableRefOp::INC_REF))) { - if (OB_BLOCK_FROZEN != ret) { - LOG_WARN("failed to inc ref for storage schema", K(ret), K_(tablet_id), K(storage_schema_)); - } - } else if (OB_FAIL(log_handler_->append(clog_buf_, clog_len_, SCN::min_scn(), need_nonblock, logcb_ptr_, lsn, clog_scn_))) { - LOG_WARN("fail to submit log", K(ret), K_(tablet_id)); - int tmp_ret = OB_SUCCESS; - if (OB_TMP_FAIL(dec_ref_on_memtable(false))) { - LOG_ERROR("failed to dec ref on memtable", K(tmp_ret), K_(ls_id), K_(tablet_id)); - } - } else { - LOG_INFO("submit schema log succeed", K(ret), K_(ls_id), K_(tablet_id), K_(clog_scn), K_(clog_len), - "schema_version", storage_schema_->get_schema_version()); + } else if (OB_FAIL(write_clog(clog_buf, clog_len))) { + LOG_WARN("fail to submit log", K(ret), K_(tablet_id)); + int tmp_ret = OB_SUCCESS; + if (OB_TMP_FAIL(dec_ref_on_memtable(false))) { + LOG_ERROR("failed to dec ref on memtable", K(tmp_ret), K_(ls_id), K_(tablet_id)); } + } else { + LOG_INFO("submit schema log succeed", K(ret), K_(ls_id), K_(tablet_id), K(clog_scn_), + "schema_version", storage_schema_->get_schema_version()); } return ret; } -int ObStorageSchemaRecorder::generate_clog() +int ObStorageSchemaRecorder::generate_clog( + char *&clog_buf, + int64_t &clog_len) { int ret = OB_SUCCESS; - + clog_buf = nullptr; + clog_len = 0; // tablet_id, schema_version, storage_schema char *buf = NULL; int64_t buf_len = 0; @@ -534,7 +383,7 @@ int ObStorageSchemaRecorder::generate_clog() // log_header + tablet_id + schema_version + storage_schema if (OB_UNLIKELY(nullptr == storage_schema_ || nullptr == allocator_)) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("storage_schema is null", K(ret), KP(storage_schema_)); + LOG_WARN("storage_schema is null", K(ret), KP(storage_schema_), KP_(allocator)); } else if (OB_UNLIKELY(!storage_schema_->is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("data storage schema is invalid", K(ret), K_(tablet_id), K(storage_schema_)); @@ -556,8 +405,9 @@ int ObStorageSchemaRecorder::generate_clog() } if (OB_SUCC(ret)) { - clog_buf_ = buf; - clog_len_ = pos; + clog_buf_ = buf; // record to free later + clog_buf = buf; + clog_len = pos; } else if (nullptr != buf && nullptr != allocator_) { allocator_->free(buf); buf = nullptr; diff --git a/src/storage/ob_storage_schema_recorder.h b/src/storage/ob_storage_schema_recorder.h index 0910ed4426..51426c173b 100644 --- a/src/storage/ob_storage_schema_recorder.h +++ b/src/storage/ob_storage_schema_recorder.h @@ -13,24 +13,15 @@ #ifndef OCEANBASE_STORAGE_STORAGE_SCHEMA_RECORDER_ #define OCEANBASE_STORAGE_STORAGE_SCHEMA_RECORDER_ -#include - #include "lib/ob_define.h" -#include "lib/utility/ob_macro_utils.h" -#include "logservice/ob_append_callback.h" +#include "storage/ob_storage_clog_recorder.h" #include "storage/ob_storage_schema.h" -#include "storage/meta_mem/ob_tablet_handle.h" #include "share/schema/ob_multi_version_schema_service.h" #include "share/scn.h" namespace oceanbase { -namespace logservice -{ -class ObLogHandler; -} // namespace palf - namespace share { namespace schema @@ -46,7 +37,7 @@ class ObTablet; class ObIMemtableMgr; class ObTabletHandle; -class ObStorageSchemaRecorder +class ObStorageSchemaRecorder : public ObIStorageClogRecorder { public: @@ -57,9 +48,10 @@ public: const share::ObLSID &ls_id, const ObTabletID &tablet_id, const int64_t saved_schema_version, + const lib::Worker::CompatMode compat_mode, logservice::ObLogHandler *log_handler); - void reset(); void destroy(); + void reset(); bool is_inited() const { return is_inited_; } bool is_valid() const { @@ -67,7 +59,7 @@ public: && ls_id_.is_valid() && tablet_id_.is_valid() && nullptr != log_handler_ - && max_saved_table_version_ >= 0; + && max_saved_version_ >= 0; } // follower @@ -81,74 +73,55 @@ public: ObStorageSchemaRecorder(const ObStorageSchemaRecorder&) = delete; ObStorageSchemaRecorder& operator=(const ObStorageSchemaRecorder&) = delete; - int64_t get_max_sync_version() const { return ATOMIC_LOAD(&max_saved_table_version_); } TO_STRING_KV(K_(is_inited), K_(ls_id), K_(tablet_id)); private: - class ObStorageSchemaLogCb : public logservice::AppendCb + virtual int inner_replay_clog( + const int64_t update_version, + const share::SCN &scn, + const char *buf, + const int64_t size, + int64_t &pos) override; + virtual int sync_clog_succ_for_leader(const int64_t update_version) override; + virtual void sync_clog_failed_for_leader() override; + + int get_schema(int64_t &table_version); + + virtual int prepare_struct_in_lock( + int64_t &update_version, + ObIAllocator *allocator, + char *&clog_buf, + int64_t &clog_len) override; + virtual int submit_log( + const int64_t update_version, + const char *clog_buf, + const int64_t clog_len) override; + virtual void free_struct_in_lock() override { - public: - virtual int on_success() override; - virtual int on_failure() override; - - void set_table_version(const int64_t table_version); - - ObStorageSchemaLogCb(ObStorageSchemaRecorder &recorder) - : recorder_(recorder), - table_version_(common::OB_INVALID_VERSION) - {} - virtual ~ObStorageSchemaLogCb() { clear(); } - void clear(); - - ObStorageSchemaLogCb(const ObStorageSchemaLogCb&) = delete; - ObStorageSchemaLogCb& operator=(const ObStorageSchemaLogCb&) = delete; - private: - ObStorageSchemaRecorder &recorder_; - int64_t table_version_; - }; - -private: - int prepare_schema(const int64_t table_id, int64_t &table_version); - int get_expected_schema_guard(const int64_t table_id, int64_t &table_version); - int submit_schema_log(const int64_t table_id); - int generate_clog(); + free_allocated_info(); + } + int generate_clog( + char *&clog_buf, + int64_t &clog_len); int64_t calc_schema_log_size() const; - int gen_log_and_submit( - char *buf, - const int64_t buf_len, - int64_t &pos); void free_allocated_info(); int try_update_with_lock(const int64_t table_id, const int64_t table_version, const int64_t expire_ts); - int get_tablet_handle(ObTabletHandle &tablet_handle); - int replay_get_tablet_handle(const share::SCN &scn, ObTabletHandle &tablet_handle); - // clog callback - void update_table_schema_fail(); - void update_table_schema_succ(const int64_t table_version, bool &finish_flag); OB_INLINE int dec_ref_on_memtable(const bool sync_finish); - // lock - OB_INLINE void wait_to_lock(const int64_t table_version); - OB_INLINE void wait_for_logcb(const int64_t table_version); - - static const int64_t MAX_RETRY_TIMES = 10; - bool is_inited_; - bool lock_; - bool logcb_finish_flag_; - ObStorageSchemaLogCb *logcb_ptr_; - int64_t max_saved_table_version_; + bool ignore_storage_schema_; + lib::Worker::CompatMode compat_mode_; char *clog_buf_; - int64_t clog_len_; - share::SCN clog_scn_; + ObTabletHandle *tablet_handle_ptr_; share::schema::ObSchemaGetterGuard *schema_guard_; ObStorageSchema *storage_schema_; ObIAllocator *allocator_; - logservice::ObLogHandler *log_handler_; share::ObLSID ls_id_; ObTabletID tablet_id_; - ObTabletHandle tablet_handle_; + int64_t table_id_; + }; } // storage diff --git a/src/storage/ob_storage_struct.cpp b/src/storage/ob_storage_struct.cpp index 436b630be7..b32ffa0334 100644 --- a/src/storage/ob_storage_struct.cpp +++ b/src/storage/ob_storage_struct.cpp @@ -119,14 +119,14 @@ int64_t ObPartitionBarrierLogState::get_serialize_size() const ObGetMergeTablesParam::ObGetMergeTablesParam() : merge_type_(INVALID_MERGE_TYPE), - merge_version_() + merge_version_(0) { } bool ObGetMergeTablesParam::is_valid() const { return (merge_type_ > INVALID_MERGE_TYPE && merge_type_ < MERGE_TYPE_MAX) - && (!is_major_merge() || merge_version_ > 0); + && (!storage::is_major_merge_type(merge_type_) || merge_version_ > 0); } ObGetMergeTablesResult::ObGetMergeTablesResult() @@ -136,12 +136,10 @@ ObGetMergeTablesResult::ObGetMergeTablesResult() base_schema_version_(INVALID_INT_VALUE), schema_version_(INVALID_INT_VALUE), create_snapshot_version_(INVALID_INT_VALUE), - checksum_method_(INVALID_INT_VALUE), suggest_merge_type_(INVALID_MERGE_TYPE), update_tablet_directly_(false), schedule_major_(false), scn_range_(), - dump_memtable_timestamp_(0), read_base_version_(0) { } @@ -154,7 +152,6 @@ bool ObGetMergeTablesResult::is_valid() const && base_schema_version_ >= 0 && schema_version_ >= 0 && create_snapshot_version_ >= 0 - && dump_memtable_timestamp_ >= 0 && (suggest_merge_type_ > INVALID_MERGE_TYPE && suggest_merge_type_ < MERGE_TYPE_MAX); } @@ -175,31 +172,39 @@ void ObGetMergeTablesResult::reset() create_snapshot_version_ = 0; suggest_merge_type_ = INVALID_MERGE_TYPE; schedule_major_ = false; - checksum_method_ = INVALID_INT_VALUE; scn_range_.reset(); - dump_memtable_timestamp_ = 0; read_base_version_ = 0; } -int ObGetMergeTablesResult::deep_copy(const ObGetMergeTablesResult &src) +int ObGetMergeTablesResult::copy_basic_info(const ObGetMergeTablesResult &src) { int ret = OB_SUCCESS; - if (!src.is_valid()) { + if (OB_UNLIKELY(!src.is_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid argument", K(ret), K(src)); - } else if (OB_FAIL(handle_.assign(src.handle_))) { - LOG_WARN("failed to copy handle", K(ret)); } else { version_range_ = src.version_range_; merge_version_ = src.merge_version_; base_schema_version_ = src.base_schema_version_; schema_version_ = src.schema_version_; create_snapshot_version_ = src.create_snapshot_version_; - checksum_method_ = src.checksum_method_; suggest_merge_type_ = src.suggest_merge_type_; schedule_major_ = src.schedule_major_; scn_range_ = src.scn_range_; - dump_memtable_timestamp_ = src.dump_memtable_timestamp_; + } + return ret; +} + +int ObGetMergeTablesResult::assign(const ObGetMergeTablesResult &src) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!src.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(src)); + } else if (OB_FAIL(handle_.assign(src.handle_))) { + LOG_WARN("failed to assign table handle", K(ret), K(src)); + } else if (OB_FAIL(copy_basic_info(src))) { + LOG_WARN("failed to copy basic info", K(ret), K(src)); } return ret; } @@ -227,7 +232,8 @@ ObUpdateTableStoreParam::ObUpdateTableStoreParam( allow_duplicate_sstable_(false), tx_data_(), binding_info_(), - auto_inc_seq_() + auto_inc_seq_(), + medium_info_list_(nullptr) { clog_checkpoint_scn_.set_min(); } @@ -241,7 +247,8 @@ ObUpdateTableStoreParam::ObUpdateTableStoreParam( const bool need_report, const SCN clog_checkpoint_scn, const bool need_check_sstable, - const bool allow_duplicate_sstable) + const bool allow_duplicate_sstable, + const compaction::ObMediumCompactionInfoList *medium_info_list) : table_handle_(table_handle), snapshot_version_(snapshot_version), clog_checkpoint_scn_(), @@ -260,7 +267,8 @@ ObUpdateTableStoreParam::ObUpdateTableStoreParam( allow_duplicate_sstable_(allow_duplicate_sstable), tx_data_(), binding_info_(), - auto_inc_seq_() + auto_inc_seq_(), + medium_info_list_(medium_info_list) { clog_checkpoint_scn_ = clog_checkpoint_scn; } @@ -291,7 +299,8 @@ ObUpdateTableStoreParam::ObUpdateTableStoreParam( allow_duplicate_sstable_(false), tx_data_(), binding_info_(), - auto_inc_seq_() + auto_inc_seq_(), + medium_info_list_(nullptr) { clog_checkpoint_scn_.set_min(); } diff --git a/src/storage/ob_storage_struct.h b/src/storage/ob_storage_struct.h index fbec67dd77..f0e0d513cb 100644 --- a/src/storage/ob_storage_struct.h +++ b/src/storage/ob_storage_struct.h @@ -23,12 +23,18 @@ #include "storage/ob_i_table.h" #include "storage/ob_storage_schema.h" #include "storage/tablet/ob_tablet_table_store_flag.h" +#include "storage/compaction/ob_compaction_util.h" #include "share/scn.h" #include "storage/tablet/ob_tablet_multi_source_data.h" #include "storage/tablet/ob_tablet_binding_helper.h" namespace oceanbase { +namespace compaction +{ +struct ObMediumCompactionInfoList; +} + namespace transaction { class ObLSTxCtxMgr; @@ -246,13 +252,11 @@ struct ObGetMergeTablesParam { ObMergeType merge_type_; int64_t merge_version_; - ObGetMergeTablesParam(); bool is_valid() const; - OB_INLINE bool is_major_merge() const { return MAJOR_MERGE == merge_type_; } OB_INLINE bool is_major_valid() const { - return is_major_merge() && merge_version_ > 0; + return storage::is_major_merge_type(merge_type_) && merge_version_ > 0; } TO_STRING_KV(K_(merge_type), K_(merge_version)); }; @@ -265,12 +269,10 @@ struct ObGetMergeTablesResult int64_t base_schema_version_; int64_t schema_version_; int64_t create_snapshot_version_; - int64_t checksum_method_; ObMergeType suggest_merge_type_; bool update_tablet_directly_; bool schedule_major_; share::ObScnRange scn_range_; - int64_t dump_memtable_timestamp_; int64_t read_base_version_; static const int64_t INVALID_INT_VALUE = -1; @@ -279,10 +281,11 @@ struct ObGetMergeTablesResult bool is_valid() const; void reset_handle_and_range(); void reset(); - int deep_copy(const ObGetMergeTablesResult &src); - TO_STRING_KV(K_(version_range), K_(merge_version), K_(base_schema_version), K_(schema_version), - K_(create_snapshot_version), K_(checksum_method), K_(suggest_merge_type), K_(handle), - K_(update_tablet_directly), K_(schedule_major), K_(scn_range), K_(dump_memtable_timestamp), K_(read_base_version)); + int assign(const ObGetMergeTablesResult &src); + int copy_basic_info(const ObGetMergeTablesResult &src); + TO_STRING_KV(K_(version_range), K_(scn_range), K_(merge_version), K_(base_schema_version), K_(schema_version), + K_(create_snapshot_version), K_(suggest_merge_type), K_(handle), + K_(update_tablet_directly), K_(schedule_major), K_(read_base_version)); }; OB_INLINE bool is_valid_migrate_status(const ObMigrateStatus &status) @@ -306,7 +309,8 @@ struct ObUpdateTableStoreParam const bool need_report = false, const share::SCN clog_checkpoint_scn = share::SCN::min_scn(), const bool need_check_sstable = false, - const bool allow_duplicate_sstable = false); + const bool allow_duplicate_sstable = false, + const compaction::ObMediumCompactionInfoList *medium_info_list = nullptr); ObUpdateTableStoreParam( // for ddl merge task only const ObTableHandleV2 &table_handle, @@ -321,7 +325,8 @@ struct ObUpdateTableStoreParam TO_STRING_KV(K_(table_handle), K_(snapshot_version), K_(clog_checkpoint_scn), K_(multi_version_start), K_(keep_old_ddl_sstable), K_(need_report), KPC_(storage_schema), K_(rebuild_seq), K_(update_with_major_flag), K_(need_check_sstable), K_(ddl_checkpoint_scn), K_(ddl_start_scn), K_(ddl_snapshot_version), - K_(ddl_execution_id), K_(ddl_cluster_version), K_(allow_duplicate_sstable), K_(tx_data), K_(binding_info), K_(auto_inc_seq)); + K_(ddl_execution_id), K_(ddl_cluster_version), K_(allow_duplicate_sstable), K_(tx_data), K_(binding_info), K_(auto_inc_seq), + KPC_(medium_info_list)); ObTableHandleV2 table_handle_; int64_t snapshot_version_; @@ -344,6 +349,8 @@ struct ObUpdateTableStoreParam ObTabletTxMultiSourceDataUnit tx_data_; ObTabletBindingInfo binding_info_; share::ObTabletAutoincSeq auto_inc_seq_; + + const compaction::ObMediumCompactionInfoList *medium_info_list_; }; struct ObBatchUpdateTableStoreParam final @@ -482,7 +489,7 @@ struct ObMigrateRemoteTableInfo remote_max_end_log_ts_ = 0; remote_max_snapshot_version_ = 0; need_reuse_local_minor_ = true; - buffer_minor_end_log_ts_ = 0; + meta_merge_end_log_ts_ = 0; } bool has_major() const { return remote_min_major_version_ != INT64_MAX; } int64_t remote_min_major_version_; @@ -491,7 +498,7 @@ struct ObMigrateRemoteTableInfo int64_t remote_max_end_log_ts_; int64_t remote_max_snapshot_version_; bool need_reuse_local_minor_; - bool buffer_minor_end_log_ts_; + bool meta_merge_end_log_ts_; TO_STRING_KV( K_(remote_min_major_version), K_(remote_min_start_log_ts), @@ -499,7 +506,7 @@ struct ObMigrateRemoteTableInfo K_(remote_max_end_log_ts), K_(remote_max_snapshot_version), K_(need_reuse_local_minor), - K_(buffer_minor_end_log_ts)); + K_(meta_merge_end_log_ts)); }; class ObRebuildListener diff --git a/src/storage/ob_table_store_stat_mgr.cpp b/src/storage/ob_table_store_stat_mgr.cpp index 682d26ce0b..49365d7dd1 100644 --- a/src/storage/ob_table_store_stat_mgr.cpp +++ b/src/storage/ob_table_store_stat_mgr.cpp @@ -368,21 +368,9 @@ int ObTableStoreStatMgr::report_stat(const ObTableStoreStat &stat) int ObTableStoreStatMgr::get_table_store_stat(const int64_t idx, ObTableStoreStat &stat) { - int ret = OB_SUCCESS; - if (IS_NOT_INIT) { - ret = OB_NOT_INIT; - LOG_WARN("ObTableStoreStatMgr hasn't been initiated", K(ret)); - } else if (idx < 0 || idx > limit_cnt_) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid stat", K(ret), K(idx)); - } else { - SpinRLockGuard guard(lock_); - if (idx >= cur_cnt_) { - ret = OB_ITER_END; - } else { - stat = stat_array_[idx]; - } - } + UNUSED(idx); + UNUSED(stat); + int ret = OB_ITER_END; return ret; } diff --git a/src/storage/ob_table_store_stat_mgr.h b/src/storage/ob_table_store_stat_mgr.h index 42f15dbf1f..fba035269c 100644 --- a/src/storage/ob_table_store_stat_mgr.h +++ b/src/storage/ob_table_store_stat_mgr.h @@ -204,6 +204,7 @@ private: bool is_opened_; }; +// TODO(@DanLing) remove ObTableStoreStatMgr class ObTableStoreStatMgr { public: diff --git a/src/storage/ob_tenant_tablet_stat_mgr.cpp b/src/storage/ob_tenant_tablet_stat_mgr.cpp new file mode 100644 index 0000000000..db1c37f019 --- /dev/null +++ b/src/storage/ob_tenant_tablet_stat_mgr.cpp @@ -0,0 +1,708 @@ +/* + * (C) Copyright 2022 Alipay Inc. All Rights Reserved. + * Authors: + * Danling + */ +#define USING_LOG_PREFIX STORAGE + +#include "lib/oblog/ob_log_module.h" +#include "share/ob_force_print_log.h" +#include "share/ob_thread_mgr.h" +#include "storage/ob_tenant_tablet_stat_mgr.h" + +using namespace oceanbase; +using namespace oceanbase::common; +using namespace oceanbase::storage; + + +/************************************* ObTabletStatKey *************************************/ +ObTabletStatKey::ObTabletStatKey( + const int64_t ls_id, + const uint64_t tablet_id) + : ls_id_(ls_id), + tablet_id_(tablet_id) +{ +} + +ObTabletStatKey::ObTabletStatKey( + const share::ObLSID ls_id, + const ObTabletID tablet_id) + : ls_id_(ls_id), + tablet_id_(tablet_id) +{ +} + +ObTabletStatKey::~ObTabletStatKey() +{ +} + +void ObTabletStatKey::reset() +{ + ls_id_.reset(); + tablet_id_.reset(); +} + +uint64_t ObTabletStatKey::hash() const +{ + uint64_t hash_val = 0; + hash_val += ls_id_.hash(); + hash_val += tablet_id_.hash(); + return hash_val; +} + +bool ObTabletStatKey::is_valid() const +{ + return ls_id_.is_valid() && tablet_id_.is_valid(); +} + +bool ObTabletStatKey::operator==(const ObTabletStatKey &other) const +{ + bool bret = true; + if (this == &other) { + } else if (ls_id_ != other.ls_id_ || tablet_id_ != other.tablet_id_) { + bret = false; + } + return bret; +} + +bool ObTabletStatKey::operator!=(const ObTabletStatKey &other) const +{ + return !(*this == other); +} + + +/************************************* ObTabletStat *************************************/ +bool ObTabletStat::is_valid() const +{ + return ls_id_ > 0 && tablet_id_ > 0; +} + +bool ObTabletStat::is_empty_query() const +{ + bool bret = false; + if (0 == scan_physical_row_cnt_ && 0 == scan_micro_block_cnt_) { + bret = true; + } + return bret; +} + +ObTabletStat& ObTabletStat::operator=(const ObTabletStat &other) +{ + if (this != &other) { + MEMCPY(this, &other, sizeof(ObTabletStat)); + } + return *this; +} + +ObTabletStat& ObTabletStat::operator+=(const ObTabletStat &other) +{ + if (other.is_valid()) { + ls_id_ = other.ls_id_; + tablet_id_ = other.tablet_id_; + query_cnt_ += other.query_cnt_; + merge_cnt_ += other.merge_cnt_; + scan_logical_row_cnt_ += other.scan_logical_row_cnt_; + scan_physical_row_cnt_ += other.scan_physical_row_cnt_; + scan_micro_block_cnt_ += other.scan_micro_block_cnt_; + pushdown_micro_block_cnt_ += other.pushdown_micro_block_cnt_; + exist_row_total_table_cnt_ += other.exist_row_total_table_cnt_; + exist_row_read_table_cnt_ += other.exist_row_read_table_cnt_; + merge_physical_row_cnt_ += other.merge_physical_row_cnt_; + merge_logical_row_cnt_ += other.merge_logical_row_cnt_; + } + return *this; +} + +ObTabletStat& ObTabletStat::archive(int64_t factor) +{ + if (factor > 0) { + query_cnt_ /= factor; + merge_cnt_ /= factor; + scan_logical_row_cnt_ /= factor; + scan_physical_row_cnt_ /= factor; + scan_micro_block_cnt_ /= factor; + pushdown_micro_block_cnt_ /= factor; + exist_row_total_table_cnt_ /= factor; + exist_row_read_table_cnt_ /= factor; + merge_physical_row_cnt_ /= factor; + merge_logical_row_cnt_ /= factor; + } + return *this; +} + +bool ObTabletStat::is_hot_tablet() const +{ + return query_cnt_ + merge_cnt_ >= ACCESS_FREQUENCY; +} + +bool ObTabletStat::is_insert_mostly() const +{ + bool bret = false; + if (merge_physical_row_cnt_ < BASIC_ROW_CNT_THRESHOLD) { + } else { + bret = merge_logical_row_cnt_ >= (merge_physical_row_cnt_ / BASE_FACTOR * INSERT_PIVOT_FACTOR); + } + return bret; +} + +bool ObTabletStat::is_update_mostly() const +{ + bool bret = false; + if (0 == merge_physical_row_cnt_ || merge_physical_row_cnt_ < BASIC_ROW_CNT_THRESHOLD) { + } else { + bret = merge_logical_row_cnt_ >= (merge_physical_row_cnt_ / BASE_FACTOR * UPDATE_PIVOT_FACTOR); + } + return bret; +} + +bool ObTabletStat::is_inefficient_scan() const +{ + bool bret = false; + if (0 == scan_logical_row_cnt_ || scan_logical_row_cnt_ < BASIC_ROW_CNT_THRESHOLD) { + } else { + bret = scan_physical_row_cnt_ / scan_logical_row_cnt_ >= SCAN_READ_FACTOR; + } + return bret; +} + +bool ObTabletStat::is_inefficient_insert() const +{ + bool bret = false; + if (0 == exist_row_total_table_cnt_ || exist_row_total_table_cnt_ < BASIC_TABLE_CNT_THRESHOLD) { + } else { + bret = exist_row_read_table_cnt_ * BASE_FACTOR / exist_row_total_table_cnt_ >= EXIST_READ_FACTOR; + } + return bret; +} + +bool ObTabletStat::is_inefficient_pushdown() const +{ + bool bret = false; + if (0 == scan_micro_block_cnt_ || scan_micro_block_cnt_ < BASIC_MICRO_BLOCK_CNT_THRESHOLD) { + } else { + bret = pushdown_micro_block_cnt_ < scan_micro_block_cnt_ / SCAN_READ_FACTOR; + } + return bret; +} + + +/************************************* ObTabletStream *************************************/ +ObTabletStream::ObTabletStream() + : key_(), + curr_buckets_(CURR_BUCKET_STEP), + latest_buckets_(LATEST_BUCKET_STEP), + past_buckets_(PAST_BUCKET_STEP) +{ +} + +ObTabletStream::~ObTabletStream() +{ +} + +void ObTabletStream::reset() +{ + key_.reset(); + curr_buckets_.reset(); + latest_buckets_.reset(); + past_buckets_.reset(); +} + +void ObTabletStream::add_stat(const ObTabletStat &stat) +{ + if (!key_.is_valid()) { + key_.ls_id_ = stat.ls_id_; + key_.tablet_id_ = stat.tablet_id_; + } + + if (key_.ls_id_.id() == stat.ls_id_ && key_.tablet_id_.id() == stat.tablet_id_) { + curr_buckets_.add(stat); + } +} + +void ObTabletStream::refresh() +{ + ObTabletStat tablet_stat; + bool has_retired_stat = false; + + curr_buckets_.refresh(tablet_stat, has_retired_stat); + latest_buckets_.refresh(tablet_stat, has_retired_stat); + past_buckets_.refresh(tablet_stat, has_retired_stat); +} + +template +int ObTabletStream::get_bucket_tablet_stat( + const ObTabletStatBucket &bucket, + common::ObIArray &tablet_stats) const +{ + int ret = OB_SUCCESS; + int64_t idx = bucket.head_idx_; + + for (int64_t i = 0; OB_SUCC(ret) && i < bucket.count(); ++i) { + int64_t curr_idx = bucket.get_idx(idx); + if (OB_FAIL(tablet_stats.push_back(bucket.units_[curr_idx]))) { + LOG_WARN("failed to add tablet stat", K(ret), K(idx)); + } + ++idx; + } + return ret; +} + +int ObTabletStream::get_all_tablet_stat(common::ObIArray &tablet_stats) const +{ + int ret = OB_SUCCESS; + if (OB_FAIL(get_bucket_tablet_stat(curr_buckets_, tablet_stats))) { + LOG_WARN("failed to get bucket tablet stat in past bucket", K(ret)); + } else if (OB_FAIL(get_bucket_tablet_stat(latest_buckets_, tablet_stats))) { + LOG_WARN("failed to get bucket tablet stat in latest bucket", K(ret)); + } else if (OB_FAIL(get_bucket_tablet_stat(past_buckets_, tablet_stats))) { + LOG_WARN("failed to get bucket tablet stat in curr bucket", K(ret)); + } + return ret; +} + + +/************************************* ObTabletStreamPool *************************************/ +ObTabletStreamPool::ObTabletStreamPool() + : dynamic_allocator_(MTL_ID()), + free_list_allocator_("FreeTbltStream"), + free_list_(), + max_free_list_num_(0), + max_dynamic_node_num_(0), + allocated_dynamic_num_(0), + is_inited_(false) +{ +} + +ObTabletStreamPool::~ObTabletStreamPool() +{ + destroy(); +} + +void ObTabletStreamPool::destroy() +{ + is_inited_ = false; + ObTabletStreamNode *node = nullptr; + + while (OB_SUCCESS == free_list_.pop(node)) { + if (OB_NOT_NULL(node)) { + node->~ObTabletStreamNode(); + node = nullptr; + } + } + dynamic_allocator_.reset(); + free_list_.destroy(); + free_list_allocator_.reset(); +} + +int ObTabletStreamPool::init( + const int64_t max_free_list_num, + const int64_t max_dynamic_node_num) +{ + int ret = OB_SUCCESS; + const char *LABEL = "IncTbltStream"; + ObTabletStreamNode *buf = nullptr; + + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObTabletStreamPool has been inited", K(ret)); + } else if (max_free_list_num <= 0 || max_dynamic_node_num < 0) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid argument", K(ret), K(max_free_list_num), K(max_dynamic_node_num)); + } else if (OB_FAIL(dynamic_allocator_.init(ObMallocAllocator::get_instance(), OB_MALLOC_NORMAL_BLOCK_SIZE))) { + LOG_WARN("failed to init fifo allocator", K(ret)); + } else if (OB_FAIL(free_list_.init(max_free_list_num, &free_list_allocator_))) { + LOG_WARN("failed to init free list", K(ret), K(max_free_list_num)); + } else if (OB_ISNULL(buf = static_cast(free_list_allocator_.alloc(sizeof(ObTabletStreamNode) * max_free_list_num)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for stream node in free list", K(ret), K(max_free_list_num)); + } else { + dynamic_allocator_.set_label(LABEL); + ObTabletStreamNode *node = nullptr; + for (int64_t i = 0; OB_SUCC(ret) && i < max_free_list_num; ++i) { + node = new (buf + i) ObTabletStreamNode(FIXED_ALLOC); + if (OB_FAIL(free_list_.push(node))) { + LOG_WARN("failed to push node to free list", K(ret)); + } + } + + if (OB_FAIL(ret)) { + destroy(); + } else { + max_free_list_num_ = max_free_list_num; + max_dynamic_node_num_ = max_dynamic_node_num; + is_inited_ = true; + } + } + return ret; +} + +int ObTabletStreamPool::alloc(ObTabletStreamNode *&free_node) +{ + int ret = OB_SUCCESS; + void *buf = nullptr; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTabletStreamPool not inited", K(ret)); + } else if (OB_NOT_NULL(free_node)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid argument", K(ret), K(free_node)); + } else if (OB_FAIL(free_list_.pop(free_node))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_WARN("failed to pop free node from free list", K(ret)); + } else { + ret = OB_SUCCESS; + } + } + + if (OB_FAIL(ret)) { + } else if (NULL == free_node) { + if (allocated_dynamic_num_ >= max_dynamic_node_num_) { + ret = OB_SIZE_OVERFLOW; + LOG_WARN("the number of allocated dynamic node has reached MAX", K(ret), K(max_dynamic_node_num_), K(allocated_dynamic_num_)); + } else if (OB_ISNULL(buf = dynamic_allocator_.alloc(sizeof(ObTabletStreamNode)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to allocate memory for free node", K(ret)); + } else { + free_node = new (buf) ObTabletStreamNode(DYNAMIC_ALLOC); + ++allocated_dynamic_num_; + } + } + return ret; +} + +void ObTabletStreamPool::free(ObTabletStreamNode *node) +{ + if (OB_NOT_NULL(node)) { + int tmp_ret = OB_SUCCESS; + if (IS_NOT_INIT) { + tmp_ret = OB_NOT_INIT; + LOG_ERROR("[MEMORY LEAK] ObTabletStreamPool is not inited, cannot free this node!!!", K(tmp_ret), KPC(node)); + } else if (DYNAMIC_ALLOC == node->flag_) { + node->~ObTabletStreamNode(); + dynamic_allocator_.free(node); + --allocated_dynamic_num_; + } else { + node->reset(); + OB_ASSERT(OB_SUCCESS == free_list_.push(node)); + } + } +} + + +/************************************* ObTenantTabletStatMgr *************************************/ +ObTenantTabletStatMgr::ObTenantTabletStatMgr() + : report_stat_task_(*this), + stream_pool_(), + stream_map_(), + lru_list_(), + bucket_lock_(), + report_queue_(), + report_cursor_(0), + pending_cursor_(0), + report_tg_id_(0), + is_inited_(false) +{ +} + +ObTenantTabletStatMgr::~ObTenantTabletStatMgr() +{ + destroy(); +} + +int ObTenantTabletStatMgr::init() +{ + int ret = OB_SUCCESS; + const bool repeat = true; + + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObTenantTabletStatMgr init twice", K(ret)); + } else if (OB_FAIL(stream_pool_.init(DEFAULT_MAX_FREE_STREAM_CNT, DEFAULT_UP_LIMIT_STREAM_CNT))) { + LOG_WARN("failed to init tablet stream pool", K(ret)); + } else if (OB_FAIL(stream_map_.create(DEFAULT_BUCKET_NUM, "TabletStats"))) { + LOG_WARN("failed to create TabletStats", K(ret)); + } else if (OB_FAIL(bucket_lock_.init(DEFAULT_BUCKET_NUM))) { + LOG_WARN("failed to init bucket lock", K(ret)); + } else if (OB_FAIL(TG_CREATE_TENANT(lib::TGDefIDs::TabletStatRpt, report_tg_id_))) { + LOG_WARN("failed to create TabletStatRpt thread", K(ret)); + } else if (OB_FAIL(TG_START(report_tg_id_))) { + LOG_WARN("failed to start stat TabletStatRpt thread", K(ret)); + } else if (OB_FAIL(TG_SCHEDULE(report_tg_id_, report_stat_task_, TABLET_STAT_PROCESS_INTERVAL, repeat))) { + LOG_WARN("failed to schedule tablet stat update task", K(ret)); + } else { + is_inited_ = true; + } + return ret; +} + +int ObTenantTabletStatMgr::mtl_init(ObTenantTabletStatMgr* &tablet_stat_mgr) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(tablet_stat_mgr->init())) { + LOG_WARN("failed to init tablet stat mgr", K(ret), K(MTL_ID())); + } else { + LOG_INFO("success to init ObTenantTabletStatMgr", K(MTL_ID())); + } + return ret; +} + +void ObTenantTabletStatMgr::wait() +{ + TG_WAIT(report_tg_id_); +} + +void ObTenantTabletStatMgr::stop() +{ + TG_STOP(report_tg_id_); +} + +void ObTenantTabletStatMgr::destroy() +{ + stop(); + wait(); + TG_DESTROY(report_tg_id_); + { + ObBucketWLockAllGuard lock_guard(bucket_lock_); + stream_map_.destroy(); + stream_pool_.destroy(); + lru_list_.reset(); + report_cursor_ = 0; + pending_cursor_ = 0; + report_tg_id_ = 0; + is_inited_ = false; + } + bucket_lock_.destroy(); + FLOG_INFO("ObTenantTabletStatMgr destroyed!"); +} + +int ObTenantTabletStatMgr::report_stat(const ObTabletStat &stat) +{ + int ret = OB_SUCCESS; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTenantTabletStatMgr not inited", K(ret)); + } else if (!stat.is_valid()) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid arguments", K(ret), K(stat)); + } else { + int64_t retry_cnt = 0; + while (retry_cnt < MAX_REPORT_RETRY_CNT) { + uint64_t pending_cur = ATOMIC_LOAD(&pending_cursor_); + uint64_t report_cur = ATOMIC_LOAD(&report_cursor_); + if (pending_cur - report_cur + 1 == DEFAULT_MAX_PENDING_CNT) { // full queue + LOG_INFO("report_queue is full, wait to process", K(report_cur), K(pending_cur), K(stat)); + break; + } else if (pending_cur != ATOMIC_CAS(&pending_cursor_, pending_cur, pending_cur + 1)) { + ++retry_cnt; + } else { + report_queue_[pending_cur % DEFAULT_MAX_PENDING_CNT] = stat; // allow dirty write + break; + } + } + if (retry_cnt == MAX_REPORT_RETRY_CNT) { + // pending cursor has been moved in other thread, ignore this tablet_stat + LOG_INFO("pending cursor has beed moved in other thread, ignore current stat", K(stat)); + } + } + return ret; +} + +int ObTenantTabletStatMgr::get_latest_tablet_stat( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + ObTabletStat &tablet_stat) +{ + int ret = OB_SUCCESS; + tablet_stat.reset(); + tablet_stat.ls_id_ = ls_id.id(); + tablet_stat.tablet_id_ = tablet_id.id(); + const ObTabletStatKey key(ls_id, tablet_id); + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTenantTabletStatMgr not inited", K(ret)); + } else if (OB_UNLIKELY(!key.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid arguments", K(ret), K(ls_id), K(tablet_id)); + } else { + ObTabletStreamNode *stream_node = nullptr; + ObBucketHashRLockGuard lock_guard(bucket_lock_, key.hash()); + if (OB_FAIL(stream_map_.get_refactored(key, stream_node))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get history stat", K(ret), K(key)); + } + } else { + stream_node->stream_.get_latest_stat(tablet_stat); + } + } + return ret; +} + +int ObTenantTabletStatMgr::get_history_tablet_stats( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + common::ObIArray &tablet_stats) +{ + int ret = OB_SUCCESS; + const ObTabletStatKey key(ls_id, tablet_id); + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObTenantTabletStatMgr not inited", K(ret)); + } else if (OB_UNLIKELY(!key.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid arguments", K(ret), K(ls_id), K(tablet_id)); + } else { + ObTabletStreamNode *stream_node = nullptr; + ObBucketHashRLockGuard lock_guard(bucket_lock_, key.hash()); + if (OB_FAIL(stream_map_.get_refactored(key, stream_node))) { + if (OB_HASH_NOT_EXIST != ret) { + LOG_WARN("failed to get history stat", K(ret), K(key)); + } + } else if (OB_FAIL(stream_node->stream_.get_all_tablet_stat(tablet_stats))) { + LOG_WARN("failed to get all tablet stat", K(ret), K(key)); + } + } + return ret; +} + +int ObTenantTabletStatMgr::update_tablet_stream(const ObTabletStat &report_stat) +{ + int ret = OB_SUCCESS; + ObTabletStreamNode *stream_node = nullptr; + ObTabletStatKey key(report_stat.ls_id_, report_stat.tablet_id_); + { + ObBucketHashRLockGuard lock_guard(bucket_lock_, key.hash()); + ret = stream_map_.get_refactored(key, stream_node); + } + + if (OB_SUCC(ret)) { + } else if (OB_HASH_NOT_EXIST == ret) { + ret = OB_SUCCESS; + if (OB_FAIL(fetch_node(stream_node))) { + LOG_WARN("failed to fetch node from stream pool", K(ret), K(report_stat)); + } else { + ObBucketHashWLockGuard lock_guard(bucket_lock_, key.hash()); + if (OB_FAIL(stream_map_.set_refactored(key, stream_node))) { + LOG_WARN("failed to update stat map", K(ret), K(report_stat)); + } + } + } else { + LOG_WARN("failed to get stream node from stream map", K(ret), K(key)); + } + + if (OB_SUCC(ret)) { + if (OB_ISNULL(stream_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("stream node is unexpected null", K(ret), K(report_stat)); + } else if (!lru_list_.move_to_first(stream_node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to add node to lru list", K(ret), K(stream_node)); + } else { + ObBucketHashWLockGuard lock_guard(bucket_lock_, key.hash()); + stream_node->stream_.add_stat(report_stat); + } + } + + if (OB_FAIL(ret) && OB_NOT_NULL(stream_node)) { + stream_pool_.free(stream_node); + stream_node = nullptr; + } + return ret; +} + +int ObTenantTabletStatMgr::fetch_node(ObTabletStreamNode *&node) +{ + int ret = OB_SUCCESS; + node = nullptr; + if (OB_FAIL(stream_pool_.alloc(node))) { + if (OB_SIZE_OVERFLOW == ret) { + if (lru_list_.is_empty()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("lru list is unexpected null", K(ret)); + } else { + ret = OB_SUCCESS; + ObTabletStatKey old_key = lru_list_.get_last()->stream_.get_tablet_stat_key(); + ObBucketHashWLockGuard lock_guard(bucket_lock_, old_key.hash()); + if (OB_FAIL(stream_map_.erase_refactored(old_key))) { + LOG_WARN("failed to erase tablet stat stream", K(ret), K(old_key)); + } else { + node = lru_list_.remove_last(); + node->stream_.reset(); + } + } + } else { + LOG_WARN("failed to get free node from stream pool", K(ret)); + } + } + + if (OB_FAIL(ret)) { + } else if (OB_ISNULL(node)) { + } else if (!lru_list_.add_first(node)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("failed to add node to lru list", K(ret), KPC(node)); + } + return ret; +} + +void ObTenantTabletStatMgr::dump_tablet_stat_status() +{ + if (REACH_TENANT_TIME_INTERVAL(DUMP_TABLET_STAT_INTERVAL)) { + uint64_t start_idx = report_cursor_; // it's OK to dirty read + uint64_t end_idx = pending_cursor_; + int64_t map_size = stream_map_.size(); + int64_t stream_node_cnt = stream_pool_.get_allocated_num(); + + LOG_INFO("dump_tablet_stat_status", + "queue_cnt", end_idx - start_idx, K(start_idx), K(end_idx), + "map_size", map_size, + "stream_node_cnt", stream_node_cnt); + } +} + +void ObTenantTabletStatMgr::process_stats() +{ + int tmp_ret = OB_SUCCESS; + uint64_t start_idx = ATOMIC_LOAD(&report_cursor_); + const uint64_t end_idx = ATOMIC_LOAD(&pending_cursor_); + + if (start_idx == end_idx) { // empty queue + } else { + for (uint64_t i = start_idx; i < end_idx; ++i) { + const ObTabletStat &cur_stat = report_queue_[i % DEFAULT_MAX_PENDING_CNT]; + if (!cur_stat.is_valid()) { + // allow dirty read + } else if (OB_TMP_FAIL(update_tablet_stream(cur_stat))) { + LOG_WARN("failed to update tablet stat", K(tmp_ret), K(cur_stat)); + } + } + ATOMIC_STORE(&report_cursor_, end_idx); + } +} + +void ObTenantTabletStatMgr::refresh_all(const int64_t step) +{ + TabletStreamMap::iterator iter = stream_map_.begin(); + for ( ; iter != stream_map_.end(); ++iter) { + for (int64_t i = 0; i < step; ++i) { + iter->second->stream_.refresh(); + } + } +} + +void ObTenantTabletStatMgr::TabletStatUpdater::runTimerTask() +{ + mgr_.dump_tablet_stat_status(); + mgr_.process_stats(); + + int64_t interval_step = 0; + if (CHECK_SCHEDULE_TIME_INTERVAL(CHECK_INTERVAL, interval_step)) { + if (OB_UNLIKELY(interval_step > 1)) { + LOG_WARN("tablet streams not refresh too long", K(interval_step)); + } + mgr_.refresh_all(interval_step); + FLOG_INFO("TenantTabletStatMgr refresh all tablet stream", K(MTL_ID()), K(interval_step)); + } +} diff --git a/src/storage/ob_tenant_tablet_stat_mgr.h b/src/storage/ob_tenant_tablet_stat_mgr.h new file mode 100644 index 0000000000..12833837c1 --- /dev/null +++ b/src/storage/ob_tenant_tablet_stat_mgr.h @@ -0,0 +1,336 @@ +/* + * (C) Copyright 2022 Alipay Inc. All Rights Reserved. + * Authors: + * Danling + */ + +#ifndef OCEANBASE_STORAGE_TENANT_TABLET_STAT_MGR_H_ +#define OCEANBASE_STORAGE_TENANT_TABLET_STAT_MGR_H_ + +#include "common/ob_tablet_id.h" +#include "share/ob_ls_id.h" +#include "lib/hash/ob_hashmap.h" +#include "share/rc/ob_tenant_base.h" +#include "lib/allocator/page_arena.h" +#include "lib/allocator/ob_fifo_allocator.h" +#include "lib/lock/ob_bucket_lock.h" +#include "lib/queue/ob_fixed_queue.h" +#include "lib/list/ob_dlist.h" + +namespace oceanbase +{ + +namespace storage +{ + +struct ObTabletStatKey +{ +public: + ObTabletStatKey() : ls_id_(), tablet_id_() {} + ObTabletStatKey(const int64_t ls_id, const uint64_t tablet_id); + ObTabletStatKey(const share::ObLSID ls_id, const common::ObTabletID tablet_id); + ~ObTabletStatKey(); + void reset(); + uint64_t hash() const; + bool is_valid() const; + bool operator == (const ObTabletStatKey &other) const; + bool operator != (const ObTabletStatKey &other) const; + TO_STRING_KV(K_(ls_id), K_(tablet_id)); + + share::ObLSID ls_id_; + common::ObTabletID tablet_id_; +}; + + +struct ObTabletStat +{ +public: + ObTabletStat() { reset(); } + ~ObTabletStat() = default; + void reset() { MEMSET(this, 0, sizeof(ObTabletStat)); } + bool is_valid() const; + bool is_empty_query() const; + ObTabletStat& operator=(const ObTabletStat &other); + ObTabletStat& operator+=(const ObTabletStat &other); + ObTabletStat& archive(int64_t factor); + bool is_hot_tablet() const; + bool is_insert_mostly() const; + bool is_update_mostly() const; + bool is_inefficient_scan() const; + bool is_inefficient_insert() const; + bool is_inefficient_pushdown() const; + TO_STRING_KV(K_(ls_id), K_(tablet_id), K_(query_cnt), K_(merge_cnt), K_(scan_logical_row_cnt), + K_(scan_physical_row_cnt), K_(scan_micro_block_cnt), K_(pushdown_micro_block_cnt), + K_(exist_row_total_table_cnt), K_(exist_row_read_table_cnt), K_(merge_physical_row_cnt), + K_(merge_logical_row_cnt)); + +public: + static constexpr int64_t ACCESS_FREQUENCY = 5; + static constexpr int64_t BASE_FACTOR = 10; + static constexpr int64_t INSERT_PIVOT_FACTOR = 5; + static constexpr int64_t UPDATE_PIVOT_FACTOR = 4; + static constexpr int64_t SCAN_READ_FACTOR = 2; + static constexpr int64_t EXIST_READ_FACTOR = 7; + static constexpr int64_t BASIC_TABLE_CNT_THRESHOLD = 5; + static constexpr int64_t BASIC_MICRO_BLOCK_CNT_THRESHOLD = 16; + static constexpr int64_t BASIC_ROW_CNT_THRESHOLD = 10000; // TODO(@Danling) make it a comfiguration item +public: + int64_t ls_id_; + uint64_t tablet_id_; + uint32_t query_cnt_; + uint32_t merge_cnt_; + uint64_t scan_logical_row_cnt_; + uint64_t scan_physical_row_cnt_; + uint64_t scan_micro_block_cnt_; + uint64_t pushdown_micro_block_cnt_; + uint64_t exist_row_total_table_cnt_; + uint64_t exist_row_read_table_cnt_; + uint64_t merge_physical_row_cnt_; + uint64_t merge_logical_row_cnt_; +}; + + +template +class ObTabletStatBucket +{ +public: + ObTabletStatBucket(const uint64_t step) + : head_idx_(0), curr_idx_(SIZE - 1), refresh_cnt_(0), step_(step) {} + ~ObTabletStatBucket() {} + void reset(); + OB_INLINE int64_t count() const { return curr_idx_ - head_idx_ + 1; } + void add(const ObTabletStat &tablet_stat); + bool retire_and_switch(ObTabletStat &old_stat); + void refresh(ObTabletStat &stat, bool &has_retired_stat); + void get_tablet_stat(ObTabletStat &tablet_stat) const; + uint32_t get_idx(const uint32_t &idx) const { return idx % SIZE; } + TO_STRING_KV(K_(units), K_(head_idx), K_(curr_idx), K_(refresh_cnt), K_(step)); + +public: + ObTabletStat units_[SIZE]; + uint32_t head_idx_; + uint32_t curr_idx_; + uint32_t refresh_cnt_; + uint32_t step_; +private: + DISALLOW_COPY_AND_ASSIGN(ObTabletStatBucket); +}; + +template +void ObTabletStatBucket::reset() +{ + for (int64_t i = 0; i < SIZE; ++i) { + units_[i].reset(); + } + head_idx_ = 0; + curr_idx_ = SIZE - 1; + refresh_cnt_ = 0; +} + +template +void ObTabletStatBucket::add(const ObTabletStat &stat) +{ + units_[get_idx(curr_idx_)] += stat; +} + +template +bool ObTabletStatBucket::retire_and_switch(ObTabletStat &old_stat) +{ + bool need_retire = (0 == refresh_cnt_ % step_); + + if (need_retire) { // retire head unit and switch cur unit + old_stat = units_[get_idx(head_idx_)]; + units_[get_idx(head_idx_)].reset(); + ++head_idx_; + ++curr_idx_; + } + return need_retire; +} + +template +void ObTabletStatBucket::refresh(ObTabletStat &stat, bool &has_retired_stat) +{ + ++refresh_cnt_; + + if (has_retired_stat) { + add(stat); + has_retired_stat = false; + } + has_retired_stat = retire_and_switch(stat); +} + +template +void ObTabletStatBucket::get_tablet_stat(ObTabletStat &tablet_stat) const +{ + for (int64_t i = 0; i < SIZE; ++i) { + tablet_stat += units_[i]; + } +} + + +class ObTabletStream +{ +public: + ObTabletStream(); + virtual ~ObTabletStream(); + void reset(); + void add_stat(const ObTabletStat &stat); + void refresh(); + + template + int get_bucket_tablet_stat( + const ObTabletStatBucket &bucket, + common::ObIArray &tablet_stats) const; + int get_all_tablet_stat(common::ObIArray &tablet_stats) const; + OB_INLINE ObTabletStatKey& get_tablet_stat_key() { return key_; } + OB_INLINE void get_latest_stat(ObTabletStat &tablet_stat) const { curr_buckets_.get_tablet_stat(tablet_stat); } + TO_STRING_KV(K_(key), K_(curr_buckets), K_(latest_buckets), K_(past_buckets)); + +private: + static constexpr uint32_t CURR_BUCKET_CNT = 8; + static constexpr uint32_t LATEST_BUCKET_CNT = 4; + static constexpr uint32_t PAST_BUCKET_CNT = 4; + static constexpr uint32_t CURR_BUCKET_STEP = 1; // 2min for each unit, total 16min + static constexpr uint32_t LATEST_BUCKET_STEP = 16; // 32min for each unit, total 128min + static constexpr uint32_t PAST_BUCKET_STEP = 32; // 64min for each unit, total 256min + + ObTabletStatKey key_; + ObTabletStatBucket curr_buckets_; + ObTabletStatBucket latest_buckets_; + ObTabletStatBucket past_buckets_; +}; + + +class ObTabletStreamNode : public ObDLinkBase +{ +public: + explicit ObTabletStreamNode(const int64_t flag = 0) + : stream_(), flag_(flag) {} + ~ObTabletStreamNode() { reset(); } + void reset() { stream_.reset(); } + TO_STRING_KV(K_(stream), K_(flag)); + +public: + ObTabletStream stream_; + const int64_t flag_; +}; + + +class ObTabletStreamPool +{ +public: + typedef common::ObFixedQueue FreeList; + enum NodeAllocType: int64_t { + FIXED_ALLOC = 0, + DYNAMIC_ALLOC + }; + + ObTabletStreamPool(); + ~ObTabletStreamPool(); + void destroy(); + int init(const int64_t max_free_list_num, + const int64_t up_limit_node_num); + int alloc(ObTabletStreamNode *&node); + void free(ObTabletStreamNode *node); + OB_INLINE int64_t get_free_num() const { return free_list_.get_total(); } + OB_INLINE int64_t get_allocated_num() const { return (max_free_list_num_ - get_free_num()) + allocated_dynamic_num_; } + TO_STRING_KV(K_(max_free_list_num), K_(max_dynamic_node_num), K_(allocated_dynamic_num)); + +private: + common::ObFIFOAllocator dynamic_allocator_; + common::ObArenaAllocator free_list_allocator_; + FreeList free_list_; + int64_t max_free_list_num_; + int64_t max_dynamic_node_num_; + int64_t allocated_dynamic_num_; + bool is_inited_; +}; + + +class ObTenantTabletStatMgr +{ +public: + static int mtl_init(ObTenantTabletStatMgr* &tablet_stat_mgr); + ObTenantTabletStatMgr(); + virtual ~ObTenantTabletStatMgr(); + int init(); + bool is_inited() const { return is_inited_; } + // int start(); + void wait(); + void stop(); + void destroy(); + + int report_stat(const ObTabletStat &stat); + int get_latest_tablet_stat( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + ObTabletStat &tablet_stat); + int get_history_tablet_stats( + const share::ObLSID &ls_id, + const common::ObTabletID &tablet_id, + common::ObIArray &tablet_stats); + void process_stats(); + void refresh_all(const int64_t step); + void dump_tablet_stat_status(); +private: + class TabletStatUpdater : public common::ObTimerTask + { + public: + TabletStatUpdater(ObTenantTabletStatMgr &mgr) : mgr_(mgr) {} + virtual ~TabletStatUpdater() {} + virtual void runTimerTask(); + private: + ObTenantTabletStatMgr &mgr_; + }; + +private: + int update_tablet_stream(const ObTabletStat &report_stat); + int fetch_node(ObTabletStreamNode *&node); +private: + typedef common::hash::ObHashMap TabletStreamMap; + + static constexpr int64_t TABLET_STAT_PROCESS_INTERVAL = 5 * 1000L * 1000L; //5s + static constexpr int64_t CHECK_INTERVAL = 120L * 1000L * 1000L; //120s + static constexpr int64_t CHECK_RUNNING_TIME_INTERVAL = 120L * 1000L * 1000L; //120s + static constexpr int64_t DUMP_TABLET_STAT_INTERVAL = 60 * 1000LL * 1000LL; //60s + static constexpr int32_t DEFAULT_MAX_FREE_STREAM_CNT = 10000; + static constexpr int32_t DEFAULT_UP_LIMIT_STREAM_CNT = 20000; + static constexpr int32_t DEFAULT_BUCKET_NUM = 1000; + static constexpr int32_t DEFAULT_MAX_PENDING_CNT = 20000; + static constexpr int32_t MAX_REPORT_RETRY_CNT = 5; + + TabletStatUpdater report_stat_task_; + ObTabletStreamPool stream_pool_; + TabletStreamMap stream_map_; + common::ObDList lru_list_; + common::ObBucketLock bucket_lock_; + ObTabletStat report_queue_[DEFAULT_MAX_PENDING_CNT]; + uint64_t report_cursor_; + uint64_t pending_cursor_; + int report_tg_id_; + bool is_inited_; +}; + + +#define CHECK_SCHEDULE_TIME_INTERVAL(interval, step) \ + ({ \ + bool bret = false; \ + RLOCAL_STATIC(int64_t, last_time) = ::oceanbase::common::ObTimeUtility::fast_current_time(); \ + int64_t cur_time = ::oceanbase::common::ObTimeUtility::fast_current_time(); \ + int64_t old_time = last_time; \ + step = 0; \ + step = (cur_time - old_time) / interval; \ + if (0 == step) { \ + } else if (old_time == ATOMIC_CAS(&last_time, old_time, cur_time)) \ + { \ + bret = true; \ + } \ + bret; \ + }) + +} /* namespace storage */ +} /* namespace oceanbase */ + +#endif /* OCEANBASE_STORAGE_TENANT_TABLET_STAT_MGR_H_ */ diff --git a/src/storage/tablet/ob_table_store_util.cpp b/src/storage/tablet/ob_table_store_util.cpp index f9ba448905..420c9cf008 100644 --- a/src/storage/tablet/ob_table_store_util.cpp +++ b/src/storage/tablet/ob_table_store_util.cpp @@ -448,9 +448,9 @@ int ObExtendTableArray::deserialize( LOG_WARN("unexpected error, sstable is nullptr", K(ret), K(table_handle)); } else if (OB_FAIL(sstable->deserialize(allocator, buf, data_len, pos))) { LOG_WARN("failed to deserialize sstable", K(ret)); - } else if (table_handle.get_table()->is_buf_minor_sstable()) { - if (OB_FAIL(assign(ObTabletTableStore::BUF_MINOR, table_handle.get_table()))) { - LOG_WARN("failed to add buf minor table", K(ret)); + } else if (table_handle.get_table()->is_meta_major_sstable()) { + if (OB_FAIL(assign(ObTabletTableStore::META_MAJOR, table_handle.get_table()))) { + LOG_WARN("failed to add meta major table", K(ret)); } } } @@ -830,6 +830,18 @@ int ObTableStoreIterator::add_tables(ObITable **start, const int64_t count) return ret; } +int ObTableStoreIterator::add_table(ObITable *input_table) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(input_table)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), KP(input_table)); + } else if (OB_FAIL(array_.push_back(input_table))) { + LOG_WARN("failed to add table to iterator", K(ret), KP(input_table)); + } + return ret; +} + int ObTableStoreIterator::get_next(ObITable *&table) { int ret = OB_SUCCESS; diff --git a/src/storage/tablet/ob_table_store_util.h b/src/storage/tablet/ob_table_store_util.h index 4eb38e6300..b9300dc51a 100644 --- a/src/storage/tablet/ob_table_store_util.h +++ b/src/storage/tablet/ob_table_store_util.h @@ -9,7 +9,6 @@ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. * See the Mulan PubL v2 for more details. */ - #ifndef OCEANBASE_STORAGE_OB_TABLE_STORE_UTIL_H_ #define OCEANBASE_STORAGE_OB_TABLE_STORE_UTIL_H_ @@ -147,6 +146,7 @@ public: int copy(const ObTableStoreIterator &other); int add_tables(ObMemtableArray &array, const int64_t start_pos = 0); int add_tables(ObITable **start, const int64_t count = 1); + int add_table(ObITable *input_table); int get_next(ObITable *&table); ObITable *get_boundary_table(const bool is_last); diff --git a/src/storage/tablet/ob_tablet.cpp b/src/storage/tablet/ob_tablet.cpp index 40d0aa8cf3..2f2985d284 100644 --- a/src/storage/tablet/ob_tablet.cpp +++ b/src/storage/tablet/ob_tablet.cpp @@ -55,6 +55,7 @@ #include "storage/tx/ob_trans_part_ctx.h" #include "storage/tx/ob_trans_service.h" #include "storage/tx_storage/ob_ls_service.h" +#include "storage/ob_tenant_tablet_stat_mgr.h" namespace oceanbase { @@ -63,6 +64,7 @@ using namespace share; using namespace share::schema; using namespace blocksstable; using namespace logservice; +using namespace compaction; using namespace palf; namespace storage @@ -76,7 +78,7 @@ ObTablet::ObTablet() tablet_meta_(), table_store_(), storage_schema_(), - medium_info_list_(compaction::ObMediumCompactionInfoList::MEDIUM_LIST_IN_STORAGE), + medium_info_list_(), memtable_mgr_(nullptr), log_handler_(nullptr), table_store_lock_(common::ObLatchIds::TABLET_TABLE_STORE_LOCK), @@ -86,7 +88,7 @@ ObTablet::ObTablet() is_inited_(false) { #if defined(__x86_64__) - static_assert(sizeof(ObTablet) <= 2432, "The size of ObTablet will affect the meta memory manager, and the necessity of adding new fields needs to be considered."); + static_assert(sizeof(ObTablet) <= 2496, "The size of ObTablet will affect the meta memory manager, and the necessity of adding new fields needs to be considered."); #endif } @@ -128,6 +130,7 @@ int ObTablet::init( { int ret = OB_SUCCESS; allocator_ = &(MTL(ObTenantMetaMemMgr*)->get_tenant_allocator()); + const int64_t default_max_sync_medium_scn = 0; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; @@ -148,11 +151,11 @@ int ObTablet::init( || OB_ISNULL(log_handler_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet pointer handle is invalid", K(ret), K_(pointer_hdl), K_(memtable_mgr), K_(log_handler)); - } else if (OB_FAIL(init_shared_params(ls_id, tablet_id, table_schema.get_schema_version(), freezer))) { + } else if (OB_FAIL(init_shared_params(ls_id, tablet_id, table_schema.get_schema_version(), default_max_sync_medium_scn, compat_mode, freezer))) { LOG_WARN("failed to init shared params", K(ret), K(ls_id), K(tablet_id), KP(freezer)); } else if (OB_FAIL(tablet_meta_.init(*allocator_, ls_id, tablet_id, data_tablet_id, lob_meta_tablet_id, lob_piece_tablet_id, - create_scn, snapshot_version, compat_mode, store_flag, table_schema.get_schema_version()))) { + create_scn, snapshot_version, compat_mode, store_flag, table_schema.get_schema_version(), default_max_sync_medium_scn))) { LOG_WARN("failed to init tablet meta", K(ret), K(ls_id), K(tablet_id), K(data_tablet_id), K(lob_meta_tablet_id), K(lob_piece_tablet_id), K(create_scn), K(snapshot_version), K(compat_mode), K(store_flag)); @@ -193,7 +196,10 @@ int ObTablet::init( { int ret = OB_SUCCESS; int64_t max_sync_schema_version = 0; + int64_t max_serialized_medium_scn = nullptr != param.medium_info_list_ ? param.medium_info_list_->get_max_medium_snapshot() : 0; int64_t input_max_sync_schema_version = 0; + ObITable *last_major = nullptr; + const bool update_in_major_type_merge = param.need_report_ && param.table_handle_.get_table()->is_major_sstable(); allocator_ = &(MTL(ObTenantMetaMemMgr*)->get_tenant_allocator()); if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; @@ -215,6 +221,7 @@ int ObTablet::init( } else if (OB_FAIL(tablet_meta_.init(*allocator_, old_tablet.tablet_meta_, param.snapshot_version_, param.multi_version_start_, tx_data, ddl_data, autoinc_seq, input_max_sync_schema_version, + MAX(max_serialized_medium_scn, old_tablet.tablet_meta_.max_serialized_medium_scn_), param.clog_checkpoint_scn_, param.ddl_checkpoint_scn_, param.ddl_start_scn_, param.ddl_snapshot_version_, param.ddl_execution_id_, param.ddl_cluster_version_))) { LOG_WARN("failed to init tablet meta", K(ret), K(old_tablet), K(param), @@ -229,7 +236,14 @@ int ObTablet::init( LOG_WARN("failed to update clog checkpoint ts", K(ret), K(param), K(table_store_)); } else if (OB_FAIL(try_update_table_store_flag(param))) { LOG_WARN("failed to update table store flag", K(ret), K(param), K(table_store_)); - } else if (OB_FAIL(medium_info_list_.init(*allocator_, &(old_tablet.get_medium_compaction_info_list())))) { + } else if (FALSE_IT(last_major = table_store_.get_major_sstables().get_boundary_table(true/*last*/))) { + } else if (OB_FAIL(medium_info_list_.init( + *allocator_, + &(old_tablet.get_medium_compaction_info_list()), + param.medium_info_list_, + // delete all medium before latest finish major snapshot + nullptr != last_major ? last_major->get_snapshot_version() : 0, + update_in_major_type_merge))) { LOG_WARN("failed to init medium info list", K(ret)); } else if (OB_FAIL(build_read_info(*allocator_))) { LOG_WARN("failed to build read info", K(ret)); @@ -245,10 +259,11 @@ int ObTablet::init( set_next_tablet_guard(old_tablet.next_tablet_guard_); } is_inited_ = true; - LOG_INFO("succeeded to init tablet", K(ret), K(param), K(old_tablet), K(tx_data), K(ddl_data), K(autoinc_seq), KPC(this)); + LOG_INFO("succeeded to init tablet", K(ret), K(param), K(old_tablet), K(tx_data), K(ddl_data), + K(autoinc_seq), K(medium_info_list_), KPC(this)); } - if (OB_SUCC(ret) && param.need_report_ && param.table_handle_.get_table()->is_major_sstable()) { + if (OB_SUCC(ret) && update_in_major_type_merge) { const ObSSTable *major_table = static_cast(param.table_handle_.get_table()); int tmp_ret = OB_SUCCESS; if (OB_ISNULL(major_table)) { // init tablet with no major table, skip to init report info @@ -288,7 +303,11 @@ int ObTablet::init( || OB_ISNULL(log_handler_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet pointer handle is invalid", K(ret), K_(pointer_hdl), K_(memtable_mgr), K_(log_handler)); - } else if (!is_update && OB_FAIL(init_shared_params(ls_id, tablet_id, param.max_sync_storage_schema_version_, freezer))) { + } else if (!is_update && OB_FAIL(init_shared_params(ls_id, tablet_id, + param.max_sync_storage_schema_version_, + param.max_serialized_medium_scn_, + param.compat_mode_, + freezer))) { LOG_WARN("failed to init shared params", K(ret), K(ls_id), K(tablet_id), KP(freezer)); } else if (OB_FAIL(tablet_meta_.init(*allocator_, param))) { LOG_WARN("failed to init tablet meta", K(ret), K(param)); @@ -296,7 +315,7 @@ int ObTablet::init( LOG_WARN("failed to init table store", K(ret)); } else if (OB_FAIL(storage_schema_.init(*allocator_, param.storage_schema_))) { LOG_WARN("failed to init storage schema", K(ret), K(param)); - } else if (OB_FAIL(medium_info_list_.init(*allocator_, nullptr))) { + } else if (OB_FAIL(medium_info_list_.init(*allocator_, ¶m.medium_info_list_))) { LOG_WARN("failed to init medium info list", K(ret)); } else if (OB_FAIL(build_read_info(*allocator_))) { LOG_WARN("failed to build read info", K(ret), K(param)); @@ -340,7 +359,9 @@ int ObTablet::init( ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet pointer handle is invalid", K(ret), K_(pointer_hdl), K_(memtable_mgr), K_(log_handler)); } else if (OB_FAIL(tablet_meta_.init(*allocator_, old_tablet.tablet_meta_, old_tablet.get_snapshot_version(), - old_tablet.get_multi_version_start(), tx_data, ddl_data, autoinc_seq, old_tablet.tablet_meta_.max_sync_storage_schema_version_))) { + old_tablet.get_multi_version_start(), tx_data, ddl_data, autoinc_seq, + old_tablet.tablet_meta_.max_sync_storage_schema_version_, + old_tablet.tablet_meta_.max_serialized_medium_scn_))) { LOG_WARN("fail to init tablet_meta", K(ret), K(old_tablet.tablet_meta_), K(tx_data), K(ddl_data), K(autoinc_seq)); } else if (OB_FAIL(table_store_.batch_replace_sstables(*allocator_, this, table_handles, old_tablet.table_store_))) { LOG_WARN("fail to init table store", K(ret), K(old_tablet), K(table_handles)); @@ -377,9 +398,8 @@ int ObTablet::init( { int ret = OB_SUCCESS; allocator_ = &(MTL(ObTenantMetaMemMgr*)->get_tenant_allocator()); - int64_t max_sync_schema_version = 0; const ObStorageSchema *storage_schema = nullptr; - + ObITable *last_major = nullptr; if (OB_UNLIKELY(is_inited_)) { ret = OB_INIT_TWICE; LOG_WARN("init twice", K(ret), K(is_inited_)); @@ -391,8 +411,6 @@ int ObTablet::init( || OB_ISNULL(log_handler_)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("tablet pointer handle is invalid", K(ret), K_(pointer_hdl), K_(memtable_mgr), K_(log_handler)); - } else if (OB_FAIL(old_tablet.get_max_sync_storage_schema_version(max_sync_schema_version))) { - LOG_WARN("failed to get max sync storage schema version", K(ret)); } else if (FALSE_IT(storage_schema = OB_ISNULL(param.tablet_meta_) ? &old_tablet.storage_schema_ : ¶m.tablet_meta_->storage_schema_)) { } else if (OB_FAIL(tablet_meta_.init(*allocator_, old_tablet.tablet_meta_, tx_data, ddl_data, autoinc_seq, param.tablet_meta_ // this interface for migration to batch update table store @@ -406,10 +424,16 @@ int ObTablet::init( LOG_WARN("failed to choose and save storage schema", K(ret), K(old_tablet), K(param)); } else if (OB_FAIL(try_update_start_scn())) { LOG_WARN("failed to update start scn", K(ret), K(param), K(table_store_)); + } else if (FALSE_IT(last_major = table_store_.get_major_sstables().get_boundary_table(true/*last*/))) { + } else if (OB_FAIL(medium_info_list_.init( + *allocator_, + &(old_tablet.get_medium_compaction_info_list()), + OB_ISNULL(param.tablet_meta_) ? nullptr : ¶m.tablet_meta_->medium_info_list_, + // delete all medium before latest finish major snapshot + nullptr != last_major ? last_major->get_snapshot_version() : 0))) { + LOG_WARN("failed to init medium info list", K(ret), K(old_tablet)); //This interface should not try_update_ddl_checkpoint_ts //Bug : 45542552 - } else if (OB_FAIL(medium_info_list_.init(*allocator_, &(old_tablet.get_medium_compaction_info_list())))) { - LOG_WARN("failed to init medium info list", K(ret)); } else if (OB_FAIL(build_read_info(*allocator_))) { LOG_WARN("failed to build read info", K(ret)); } else if (OB_FAIL(pre_transform_sstable_root_block(*full_read_info_.get_index_read_info()))) { @@ -427,6 +451,7 @@ int ObTablet::init( } if (OB_SUCC(ret)) { + DEBUG_SYNC(HA_REPORT_META_TABLE); const ObSSTable *last_major = static_cast(table_store_.get_major_sstables().get_boundary_table(true/*last*/)); int tmp_ret = OB_SUCCESS; if (OB_ISNULL(last_major)) { // init tablet with no major table, skip to init report info @@ -436,6 +461,51 @@ int ObTablet::init( } } + if (OB_UNLIKELY(!is_inited_)) { + reset(); + } + + return ret; +} + +int ObTablet::init_with_update_medium_info(const ObTablet &old_tablet) +{ + int ret = OB_SUCCESS; + const ObTabletMeta &old_tablet_meta = old_tablet.tablet_meta_; + const ObTabletTableStore &old_table_store = old_tablet.table_store_; + allocator_ = &(MTL(ObTenantMetaMemMgr*)->get_tenant_allocator()); + + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret), K(is_inited_)); + } else if (OB_UNLIKELY(!old_tablet.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), K(old_tablet)); + } else if (OB_UNLIKELY(!pointer_hdl_.is_valid()) + || OB_ISNULL(memtable_mgr_) + || OB_ISNULL(log_handler_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("tablet pointer handle is invalid", K(ret), K_(pointer_hdl), K_(pointer_hdl), K_(memtable_mgr), K_(log_handler)); + } else if (OB_FAIL(tablet_meta_.init(*allocator_, old_tablet_meta))) { + LOG_WARN("failed to init tablet meta", K(ret), K(old_tablet_meta)); + } else if (OB_FAIL(table_store_.assign(*allocator_, old_table_store, this))) { + LOG_WARN("failed to copy table store", K(ret), K(old_table_store)); + } else if (OB_FAIL(storage_schema_.init(*allocator_, old_tablet.storage_schema_))) { + LOG_WARN("failed to init storage schema", K(ret), K(old_tablet)); + } else if (OB_FAIL(try_update_start_scn())) { + LOG_WARN("failed to update start scn", K(ret), K(table_store_)); + } else if (OB_FAIL(medium_info_list_.init_after_check_finish(*allocator_, old_tablet.get_medium_compaction_info_list()))) { + LOG_WARN("failed to init medium info mgr", K(ret)); + } else if (OB_FAIL(build_read_info(*allocator_))) { + LOG_WARN("failed to build read info", K(ret)); + } else { + if (old_tablet.get_tablet_meta().has_next_tablet_) { + set_next_tablet_guard(old_tablet.next_tablet_guard_); + } + LOG_INFO("succeeded to init tablet", K(ret), K(medium_info_list_), K(old_tablet)); + is_inited_ = true; + } + if (OB_UNLIKELY(!is_inited_)) { reset(); } @@ -768,6 +838,26 @@ int ObTablet::deserialize_id( return ret; } +int ObTablet::get_max_sync_medium_scn(int64_t &max_medium_snapshot) const +{ + int ret = OB_SUCCESS; + max_medium_snapshot = 0; + ObIMemtableMgr *memtable_mgr = nullptr; + ObTabletMemtableMgr *data_memtable_mgr = nullptr; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (tablet_meta_.tablet_id_.is_special_merge_tablet()) { + // do nothing + } else if (OB_FAIL(get_memtable_mgr(memtable_mgr))) { + LOG_WARN("failed to get memtable mgr", K(ret)); + } else if (FALSE_IT(data_memtable_mgr = static_cast(memtable_mgr))) { + } else { + max_medium_snapshot = data_memtable_mgr->get_medium_info_recorder().get_max_saved_version(); + } + return ret; +} + int ObTablet::get_max_sync_storage_schema_version(int64_t &max_schema_version) const { int ret = OB_SUCCESS; @@ -780,7 +870,7 @@ int ObTablet::get_max_sync_storage_schema_version(int64_t &max_schema_version) c LOG_WARN("failed to get memtable mgr", K(ret)); } else if (FALSE_IT(data_memtable_mgr = static_cast(memtable_mgr))) { } else { - max_schema_version = data_memtable_mgr->get_storage_schema_recorder().get_max_sync_version(); + max_schema_version = data_memtable_mgr->get_storage_schema_recorder().get_max_saved_version(); } return ret; } @@ -1373,6 +1463,7 @@ int ObTablet::do_rowkey_exists( } else { bool found = false; ObITable *table = nullptr; + int64_t check_table_cnt = 0; while (OB_SUCC(ret) && !found) { if (OB_FAIL(table_iter.get_next(table))) { if (OB_ITER_END != ret) { @@ -1384,12 +1475,21 @@ int ObTablet::do_rowkey_exists( } else if (OB_FAIL(table->exist(store_ctx, table_id, full_read_info_, rowkey, exists, found))) { LOG_WARN("Fail to check if exist in store", K(ret), KPC(table)); } else { + ++check_table_cnt; LOG_DEBUG("rowkey_exists check", KPC(table), K(rowkey), K(exists), K(found), K(table_iter)); } } if (OB_ITER_END == ret) { ret = OB_SUCCESS; + int tmp_ret = OB_SUCCESS; + if (0 == store_ctx.tablet_stat_.query_cnt_) { + // ROWKEY IN_ROW_CACHE / NOT EXIST + } else if (FALSE_IT(store_ctx.tablet_stat_.exist_row_read_table_cnt_ = check_table_cnt)) { + } else if (FALSE_IT(store_ctx.tablet_stat_.exist_row_total_table_cnt_ = table_iter.count())) { + } else if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->report_stat(store_ctx.tablet_stat_))) { + LOG_WARN("failed to report tablet stat", K(tmp_ret), K(stat)); + } } if (OB_SUCCESS == ret && false == found) { @@ -1408,6 +1508,7 @@ int ObTablet::do_rowkeys_exist(ObTableStoreIterator &tables_iter, ObRowsInfo &ro LOG_WARN("invalid argument-tables_iter", K(ret), K(tables_iter.count())); } bool all_rows_found = false; + int64_t check_table_cnt = 0; while (OB_SUCC(ret) && !exists && !all_rows_found) { ObITable *table = nullptr; if (OB_FAIL(tables_iter.get_next(table))) { @@ -1422,10 +1523,26 @@ int ObTablet::do_rowkeys_exist(ObTableStoreIterator &tables_iter, ObRowsInfo &ro } else if (OB_FAIL(table->exist(rows_info, exists, all_rows_found))) { LOG_WARN("fail to check the existence of rows", K(ret), K(rows_info), K(exists)); } else { + ++check_table_cnt; LOG_DEBUG("rowkey exists check", K(rows_info), K(exists)); } } + if (OB_SUCC(ret)) { + ObTabletStat tablet_stat; + const ObTableAccessContext &access_ctx = rows_info.exist_helper_.table_access_context_; + tablet_stat.ls_id_ = access_ctx.ls_id_.id(); + tablet_stat.tablet_id_ = access_ctx.tablet_id_.id(); + tablet_stat.query_cnt_ = 1; + tablet_stat.exist_row_read_table_cnt_ = check_table_cnt; + tablet_stat.exist_row_total_table_cnt_ = tables_iter.count(); + int tmp_ret = OB_SUCCESS; + if (0 == access_ctx.table_store_stat_.exist_row_.empty_read_cnt_) { + // ROWKEY IN_ROW_CACHE / NOT EXIST + } else if (OB_TMP_FAIL(MTL(ObTenantTabletStatMgr *)->report_stat(tablet_stat))) { + LOG_WARN("failed to report tablet stat", K(tmp_ret), K(tablet_stat)); + } + } return ret; } @@ -1630,6 +1747,60 @@ int ObTablet::replay_update_storage_schema( return ret; } +int ObTablet::submit_medium_compaction_clog( + ObMediumCompactionInfo &medium_info, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + ObIMemtableMgr *memtable_mgr = nullptr; + ObTabletMemtableMgr *data_memtable_mgr = nullptr; + + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret)); + } else if (tablet_meta_.tablet_id_.is_special_merge_tablet()) { + // do nothing + } else if (OB_FAIL(get_memtable_mgr(memtable_mgr))) { + LOG_WARN("failed to get memtable mgr", K(ret)); + } else if (FALSE_IT(data_memtable_mgr = static_cast(memtable_mgr))) { + } else if (OB_FAIL(data_memtable_mgr->get_medium_info_recorder().submit_medium_compaction_info( + medium_info, allocator))) { + LOG_WARN("medium compaction recorder submit fail", K(ret), K(medium_info)); + } else { + LOG_DEBUG("success to submit medium compaction clog", K(medium_info)); + } + return ret; +} + +int ObTablet::replay_medium_compaction_clog( + const share::SCN &scn, + const char *buf, + const int64_t buf_size, + int64_t &pos) +{ + int ret = OB_SUCCESS; + int64_t new_pos = pos; + ObIMemtableMgr *memtable_mgr = nullptr; + ObTabletMemtableMgr *data_memtable_mgr = nullptr; + + if (IS_NOT_INIT) { + LOG_WARN("not inited", K(ret)); + } else if (OB_UNLIKELY(buf_size <= pos || pos < 0 || buf_size <= 0)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid argument", K(ret), K(buf_size), K(pos)); + } else if (tablet_meta_.tablet_id_.is_special_merge_tablet()) { + // do nothing + } else if (OB_FAIL(get_memtable_mgr(memtable_mgr))) { + LOG_WARN("failed to get memtable mgr", K(ret)); + } else if (FALSE_IT(data_memtable_mgr = static_cast(memtable_mgr))) { + } else if (OB_FAIL(data_memtable_mgr->get_medium_info_recorder().replay_medium_compaction_log(scn, buf, buf_size, new_pos))) { + LOG_WARN("medium compaction recorder replay fail", K(ret), KPC(this), K(buf_size), K(new_pos)); + } else { + pos = new_pos; + } + return ret; +} + int ObTablet::get_schema_version_from_storage_schema(int64_t &schema_version) { int ret = OB_SUCCESS; @@ -1772,8 +1943,8 @@ int ObTablet::reset_storage_related_member() // do nothing } else if (OB_FAIL(get_memtable_mgr(memtable_mgr))) { LOG_WARN("failed to get memtable mgr", K(ret)); - } else if (OB_FAIL(memtable_mgr->reset_storage_schema_recorder())) { - LOG_WARN("failed to destroy storage schema recorder", K(ret), KPC(memtable_mgr)); + } else if (OB_FAIL(memtable_mgr->reset_storage_recorder())) { + LOG_WARN("failed to destroy storage recorder", K(ret), KPC(memtable_mgr)); } return ret; } @@ -1850,6 +2021,8 @@ int ObTablet::init_shared_params( const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, const int64_t max_saved_schema_version, // for init storage_schema_recorder on MemtableMgr + const int64_t max_saved_medium_scn, // for init medium_recorder on MemtableMgr + const lib::Worker::CompatMode compat_mode, ObFreezer *freezer) { int ret = OB_SUCCESS; @@ -1864,7 +2037,15 @@ int ObTablet::init_shared_params( if (OB_FAIL(get_memtable_mgr(memtable_mgr))) { LOG_WARN("failed to get memtable mgr", K(ret)); - } else if (OB_FAIL(memtable_mgr->init(tablet_id, ls_id, max_saved_schema_version, log_handler_, freezer, t3m))) { + } else if (OB_FAIL(memtable_mgr->init( + tablet_id, + ls_id, + max_saved_schema_version, + max_saved_medium_scn, + compat_mode, + log_handler_, + freezer, + t3m))) { LOG_WARN("failed to init memtable mgr", K(ret), K(tablet_id), K(ls_id), KP(freezer)); } else if (OB_FAIL(cond.init(ObWaitEventIds::TABLET_LOCK_WAIT))) { LOG_WARN("failed to init thread cond", K(ret)); @@ -1962,13 +2143,17 @@ int ObTablet::build_migration_tablet_param(ObMigrationTabletParam &mig_tablet_pa mig_tablet_param.ddl_checkpoint_scn_ = tablet_meta_.ddl_checkpoint_scn_; mig_tablet_param.ddl_start_scn_ = tablet_meta_.ddl_start_scn_; mig_tablet_param.ddl_snapshot_version_ = tablet_meta_.ddl_snapshot_version_; + // max_version on tablet meta is the latest serialized version mig_tablet_param.max_sync_storage_schema_version_ = tablet_meta_.max_sync_storage_schema_version_; + mig_tablet_param.max_serialized_medium_scn_ = tablet_meta_.max_serialized_medium_scn_; mig_tablet_param.ddl_execution_id_ = tablet_meta_.ddl_execution_id_; mig_tablet_param.ddl_cluster_version_ = tablet_meta_.ddl_cluster_version_; mig_tablet_param.report_status_.reset(); if (OB_FAIL(mig_tablet_param.storage_schema_.init(mig_tablet_param.allocator_, storage_schema_))) { LOG_WARN("failed to copy storage schema", K(ret), K_(tablet_meta)); + } else if (OB_FAIL(mig_tablet_param.medium_info_list_.init(mig_tablet_param.allocator_, &medium_info_list_))) { + LOG_WARN("failed to copy medium info list", K(ret), K(medium_info_list_)); } else if (OB_FAIL(mig_tablet_param.ddl_data_.assign(tablet_meta_.ddl_data_))) { LOG_WARN("failed to assign ddl data", K(ret), K_(tablet_meta)); } else if (OB_FAIL(mig_tablet_param.autoinc_seq_.assign(tablet_meta_.autoinc_seq_))) { @@ -2093,31 +2278,180 @@ int ObTablet::fetch_tablet_autoinc_seq_cache( return ret; } -int ObTablet::get_kept_multi_version_start( - int64_t &multi_version_start, - int64_t &min_reserved_snapshot) +static int get_msd_from_table( + ObITable *table, + ObIAllocator *allocator, + const bool get_latest, + memtable::ObIMultiSourceDataUnit &msd) { int ret = OB_SUCCESS; - multi_version_start = 0; - min_reserved_snapshot = 0; - int64_t min_merged_snapshot = 0; - ObTenantFreezeInfoMgr *freeze_info_mgr = MTL(ObTenantFreezeInfoMgr*); - const ObTabletID &tablet_id = tablet_meta_.tablet_id_; + memtable::ObMemtable * memtable = nullptr; + if (OB_ISNULL(table)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("table is null", K(ret), KPC(table)); + } else if (OB_ISNULL(memtable = static_cast(table))) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("table pointer does not point to a ObMemtable object", K(ret), KPC(table)); + } else if (OB_FAIL(memtable->get_multi_source_data_unit( + &msd, + allocator, + get_latest))) { + if (OB_ENTRY_NOT_EXIST != ret) { + LOG_WARN("failed to get multi_source_data from memtable", K(ret), KPC(table)); + } + } + return ret; +} + +int ObTablet::get_msd_from_memtables( + memtable::ObIMultiSourceDataUnit &msd, + ObIAllocator *allocator, + const bool get_latest) const +{ + int ret = OB_SUCCESS; + bool exist_on_memtable = false; + ObSEArray memtables; + if (is_ls_inner_tablet()) { + // do nothing + } else if (OB_FAIL(get_table_store().get_memtables(memtables, true/*need_active*/))) { + LOG_WARN("failed to get memtables", K(ret), KPC(this)); + } else if (memtables.empty()) { + // do nothing + } else { + if (get_latest) { + for (int64_t i = memtables.count() - 1; OB_SUCC(ret) && i >= 0; --i) { + if (OB_FAIL(get_msd_from_table( + memtables.at(i), + allocator, + get_latest, + msd))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get msd from memtable", K(ret), K(i), KPC(memtables.at(i))); + } + } else { + exist_on_memtable = true; + break; + } + } // end of for + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < memtables.count(); ++i) { + if (OB_FAIL(get_msd_from_table( + memtables.at(i), + allocator, + get_latest, + msd))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get msd from memtable", K(ret), K(i), KPC(memtables.at(i))); + } + } else { + exist_on_memtable = true; + break; + } + } // end of for + } + } + if (OB_SUCC(ret) && !exist_on_memtable) { + ret = OB_ENTRY_NOT_EXIST; + } + return ret; +} + +int ObTablet::get_min_medium_snapshot(int64_t &min_medium_snapshot) const +{ + int ret = OB_SUCCESS; + min_medium_snapshot = INT64_MAX; + const ObMediumCompactionInfoList &medium_list = get_medium_compaction_info_list(); if (OB_UNLIKELY(!is_inited_)) { ret = OB_NOT_INIT; LOG_WARN("not inited", K(ret), K_(is_inited)); - } else if (0 != table_store_.get_major_sstables().count()) { - min_merged_snapshot = table_store_.get_major_sstables().get_boundary_table(true/*last*/)->get_snapshot_version(); + } else if (is_ls_inner_tablet()) { + // do nothing + } else if (medium_list.size() > 0) { // oldest medium info in Tablet + min_medium_snapshot = medium_list.get_min_medium_snapshot(); + } else { + ObArenaAllocator temp_allocator; + ObMediumCompactionInfo medium_info; + if (OB_FAIL(get_msd_from_memtables(medium_info, &temp_allocator, false/*get_latest*/))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("failed to get medium from memtable", K(ret), KPC(this), K(medium_info)); + } + } else { + min_medium_snapshot = medium_info.medium_snapshot_; + } + } + return ret; +} + +int ObTablet::get_max_medium_snapshot(int64_t &max_medium_snapshot) const +{ + int ret = OB_SUCCESS; + max_medium_snapshot = 0; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("not inited", K(ret), K_(is_inited)); + } else if (is_ls_inner_tablet()) { + // do nothing + } else { + ObArenaAllocator temp_allocator; + ObMediumCompactionInfo medium_info; + if (OB_FAIL(get_msd_from_memtables(medium_info, &temp_allocator, true/*get_latest*/))) { + if (OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + const ObMediumCompactionInfoList &medium_list = get_medium_compaction_info_list(); + if (medium_list.size() > 0) { + max_medium_snapshot = medium_list.get_max_medium_snapshot(); + } + } else { + LOG_WARN("failed to get medium from memtable", K(ret), KPC(this), K(medium_info)); + } + } else { + max_medium_snapshot = medium_info.medium_snapshot_; + } + } + return ret; +} + +// MIN { ls min_reserved_snapshot, freeze_info, all_acquired_snapshot} +int ObTablet::get_kept_multi_version_start( + ObLS &ls, + const ObTablet &tablet, + int64_t &multi_version_start) +{ + int ret = OB_SUCCESS; + multi_version_start = 0; + int64_t max_merged_snapshot = 0; + int64_t min_reserved_snapshot = 0; + int64_t min_medium_snapshot = INT64_MAX; + const ObTabletID &tablet_id = tablet.get_tablet_meta().tablet_id_; + const ObTabletTableStore &table_store = tablet.get_table_store(); + if (0 != table_store.get_major_sstables().count()) { + max_merged_snapshot = table_store.get_major_sstables().get_boundary_table(true/*last*/)->get_snapshot_version(); } if (OB_FAIL(ret)) { - } else if (OB_FAIL(freeze_info_mgr->get_min_reserved_snapshot( - tablet_id, min_merged_snapshot, min_reserved_snapshot))) { - LOG_WARN("failed to get multi version from freeze info mgr", K(ret), K(tablet_id)); - } else { - multi_version_start = MIN(MAX(min_reserved_snapshot, get_multi_version_start()), get_snapshot_version()); + } else if (FALSE_IT(multi_version_start = tablet.get_multi_version_start())) { + } else if (OB_FAIL(MTL(ObTenantFreezeInfoMgr*)->get_min_reserved_snapshot( + tablet_id, max_merged_snapshot, min_reserved_snapshot))) { + LOG_WARN("failed to get multi version from freeze info mgr", K(ret), K(table_id)); + } else if (!tablet.is_ls_inner_tablet() + && OB_FAIL(tablet.get_min_medium_snapshot(min_medium_snapshot))) { + LOG_WARN("failed to get min medium snapshot", K(ret), K(tablet)); } - + if (OB_SUCC(ret)) { + min_reserved_snapshot = common::min( + ls.get_min_reserved_snapshot(), + common::min(min_reserved_snapshot, min_medium_snapshot)); + multi_version_start = MIN(MAX(min_reserved_snapshot, multi_version_start), tablet.get_snapshot_version()); + } + LOG_DEBUG("get multi version start", "ls_id", tablet.get_tablet_meta().ls_id_, K(tablet_id), + K(multi_version_start), K(min_reserved_snapshot), K(tablet.get_tablet_meta()), K(min_medium_snapshot), + K(ls.get_min_reserved_snapshot()), K(max_merged_snapshot)); return ret; } @@ -2676,7 +3010,7 @@ int ObTablet::inner_get_tx_data(ObTabletTxMultiSourceDataUnit &tx_data, bool &ex const common::ObTabletID &tablet_id = tablet_meta_.tablet_id_; exist_on_memtable = false; - if (OB_FAIL(get_msd_from_memtable(tx_data))) { + if (OB_FAIL(get_msd_from_memtables(tx_data))) { if (OB_ENTRY_NOT_EXIST == ret) { exist_on_memtable = false; ret = OB_SUCCESS; @@ -2792,7 +3126,7 @@ int ObTablet::get_ddl_data(ObTabletBindingInfo &info) const if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("not inited", K(ret), K_(is_inited)); - } else if (OB_FAIL(get_msd_from_memtable(info))) { + } else if (OB_FAIL(get_msd_from_memtables(info))) { if (OB_ENTRY_NOT_EXIST == ret) { exist_on_memtable = false; ret = OB_SUCCESS; @@ -2812,50 +3146,6 @@ int ObTablet::get_ddl_data(ObTabletBindingInfo &info) const return ret; } -int ObTablet::get_msd_from_memtable(memtable::ObIMultiSourceDataUnit &msd) const -{ - int ret = OB_SUCCESS; - const share::ObLSID &ls_id = tablet_meta_.ls_id_; - const common::ObTabletID &tablet_id = tablet_meta_.tablet_id_; - bool exist_on_memtable = false; - - if (is_ls_inner_tablet()) { - // won't do anything for ls inner tablet - } else { - ObSEArray memtable_array; - if (OB_FAIL(get_memtables(memtable_array, true/*need_active*/))) { - LOG_WARN("failed to get memtables", K(ret)); - } else { - for (int64_t i = memtable_array.count() - 1; OB_SUCC(ret) && !exist_on_memtable && i >= 0; --i) { - ObMemtable *memtable = static_cast(memtable_array[i]); - if (OB_ISNULL(memtable)) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("unexpected error, memtable is null", K(ret), K(i), KP(memtable)); - } else if (OB_FAIL(memtable->get_multi_source_data_unit(&msd, nullptr/*allocator*/))) { - if (OB_ENTRY_NOT_EXIST == ret) { - LOG_DEBUG("multi source data does not exist on memtable", K(ret), K(ls_id), K(tablet_id), K(tablet_meta_)); - ret = OB_SUCCESS; - exist_on_memtable = false; - } else { - LOG_WARN("failed to get multi source data", K(ret), K(ls_id), K(tablet_id)); - } - } else { - exist_on_memtable = true; - LOG_DEBUG("succeeded to get msd from memtable", K(ret), K(ls_id), K(tablet_id), KPC(memtable)); - } - } - } - } - - if (OB_FAIL(ret)) { - } else if (!exist_on_memtable) { - ret = OB_ENTRY_NOT_EXIST; - LOG_DEBUG("msd does not exist on memtable", K(ret), K(ls_id), K(tablet_id)); - } - - return ret; -} - int ObTablet::set_tx_data_in_tablet_pointer(const ObTabletTxMultiSourceDataUnit &tx_data) { int ret = OB_SUCCESS; diff --git a/src/storage/tablet/ob_tablet.h b/src/storage/tablet/ob_tablet.h index 837394121e..00c37d5b97 100644 --- a/src/storage/tablet/ob_tablet.h +++ b/src/storage/tablet/ob_tablet.h @@ -97,6 +97,7 @@ public: ObTablet(const ObTablet&) = delete; ObTablet &operator=(const ObTablet&) = delete; ~ObTablet(); + public: void reset(); bool is_ls_inner_tablet() const; @@ -113,6 +114,8 @@ public: int64_t get_ref() const { return ATOMIC_LOAD(&ref_cnt_); } int64_t get_wash_score() const { return ATOMIC_LOAD(&wash_score_); } int get_rec_log_scn(share::SCN &rec_scn); + int get_max_sync_medium_scn(int64_t &max_medium_scn) const; + int get_max_sync_storage_schema_version(int64_t &max_schema_version) const; public: // first time create tablet int init( @@ -147,6 +150,8 @@ public: const ObTabletTxMultiSourceDataUnit &tx_data, const ObTabletBindingInfo &ddl_data, const share::ObTabletAutoincSeq &autoinc_seq); + // update medium compaction info mgr and build new tablet + int init_with_update_medium_info(const ObTablet &old_tablet); // batch replace sstables without data modification int init( const ObIArray &table_handles, @@ -203,6 +208,11 @@ public: ObRelativeTable &relative_table, ObStoreCtx &store_ctx, const blocksstable::ObDatumRowkey &rowkey); + int try_update_storage_schema( + const int64_t table_id, + const int64_t schema_version, + ObIAllocator &allocator, + const int64_t timeout_ts); // table operation int get_read_tables( @@ -327,6 +337,16 @@ public: const int64_t buf_size, int64_t &pos); int get_schema_version_from_storage_schema(int64_t &schema_version); + + int submit_medium_compaction_clog( + compaction::ObMediumCompactionInfo &medium_info, + ObIAllocator &allocator); + int replay_medium_compaction_clog( + const share::SCN &scn, + const char *buf, + const int64_t buf_size, + int64_t &pos); + int fetch_tablet_autoinc_seq_cache( const uint64_t cache_size, share::ObTabletAutoincInterval &result); @@ -335,11 +355,10 @@ public: int update_tablet_autoinc_seq( const uint64_t autoinc_seq, const share::SCN &replay_scn); - - int get_kept_multi_version_start( - int64_t &multi_version_start, - int64_t &min_reserved_snapshot); - + static int get_kept_multi_version_start( + ObLS &ls, + const ObTablet &tablet, + int64_t &multi_version_start); int check_schema_version_elapsed( const int64_t schema_version, const bool need_wait_trans_end, @@ -358,14 +377,21 @@ public: const bool for_replay, const memtable::MemtableRefOp ref_op = memtable::MemtableRefOp::NONE, const bool is_callback = false); + int get_max_medium_snapshot(int64_t &max_medium_snapshot) const; + int get_msd_from_memtables( + memtable::ObIMultiSourceDataUnit &msd, + ObIAllocator *allocator = nullptr, + const bool get_latest = true) const; + int update_msd_cache_on_pointer(); int get_redefined_schema_version_in_tablet_pointer(int64_t &schema_version) const; int set_redefined_schema_version_in_tablet_pointer(const int64_t schema_version); int set_memtable_clog_checkpoint_scn( const ObMigrationTabletParam *tablet_meta); - - TO_STRING_KV(KP(this), K_(wash_score), K_(ref_cnt), K_(tablet_meta), K_(table_store), K_(storage_schema)); + TO_STRING_KV(KP(this), K_(wash_score), K_(ref_cnt), K_(tablet_meta), K_(table_store), K_(storage_schema), + K_(medium_info_list)); private: + int get_min_medium_snapshot(int64_t &min_medium_snapshot) const; int64_t get_self_size() const; int get_memtable_mgr(ObIMemtableMgr *&memtable_mgr) const; @@ -377,17 +403,14 @@ private: const share::ObLSID &ls_id, const common::ObTabletID &tablet_id, const int64_t max_saved_schema_version, + const int64_t max_saved_medium_scn, + const lib::Worker::CompatMode compat_mode, ObFreezer *freezer); int build_read_info(common::ObIAllocator &allocator); int create_memtable(const int64_t schema_version, const bool for_replay=false); int try_update_start_scn(); int try_update_ddl_checkpoint_scn(); int try_update_table_store_flag(const ObUpdateTableStoreParam ¶m); - int try_update_storage_schema( - const int64_t table_id, - const int64_t schema_version, - ObIAllocator &allocator, - const int64_t timeout_ts); int get_max_schema_version(int64_t &schema_version); int inner_get_all_sstables(common::ObIArray &sstables) const; int pre_transform_sstable_root_block(const ObTableReadInfo &index_read_info); @@ -458,9 +481,7 @@ private: const bool for_replay, const memtable::MemtableRefOp ref_op = memtable::MemtableRefOp::NONE, const bool is_callback = false); - int get_msd_from_memtable(memtable::ObIMultiSourceDataUnit &msd) const; int set_tx_data_in_tablet_pointer(const ObTabletTxMultiSourceDataUnit &tx_data); - int get_max_sync_storage_schema_version(int64_t &max_schema_version) const; int check_max_sync_schema_version() const; int check_sstable_column_checksum() const; diff --git a/src/storage/tablet/ob_tablet_binding_helper.h b/src/storage/tablet/ob_tablet_binding_helper.h index e8f0081647..41ea62c219 100644 --- a/src/storage/tablet/ob_tablet_binding_helper.h +++ b/src/storage/tablet/ob_tablet_binding_helper.h @@ -57,7 +57,6 @@ public: ObTabletBindingInfo(); virtual ~ObTabletBindingInfo() {} - int set_allocator(ObIAllocator &allocator); int assign(const ObTabletBindingInfo &arg); virtual int deep_copy(const memtable::ObIMultiSourceDataUnit *src, ObIAllocator *allocator = nullptr) override; diff --git a/src/storage/tablet/ob_tablet_memtable_mgr.cpp b/src/storage/tablet/ob_tablet_memtable_mgr.cpp index 2f879ebd17..41c75fbf7b 100644 --- a/src/storage/tablet/ob_tablet_memtable_mgr.cpp +++ b/src/storage/tablet/ob_tablet_memtable_mgr.cpp @@ -36,10 +36,11 @@ ObTabletMemtableMgr::ObTabletMemtableMgr() : ObIMemtableMgr(LockType::OB_SPIN_RWLOCK, &lock_def_), ls_(NULL), lock_def_(common::ObLatchIds::TABLET_MEMTABLE_LOCK), - schema_recorder_() + schema_recorder_(), + medium_info_recorder_() { #if defined(__x86_64__) - static_assert(sizeof(ObTabletMemtableMgr) <= 370, "The size of ObTabletMemtableMgr will affect the meta memory manager, and the necessity of adding new fields needs to be considered."); + static_assert(sizeof(ObTabletMemtableMgr) <= 448, "The size of ObTabletMemtableMgr will affect the meta memory manager, and the necessity of adding new fields needs to be considered."); #endif } @@ -68,7 +69,8 @@ void ObTabletMemtableMgr::destroy() tablet_id_ = 0; ls_ = NULL; freezer_ = nullptr; - schema_recorder_.reset(); + schema_recorder_.destroy(); + medium_info_recorder_.destroy(); is_inited_ = false; } @@ -114,27 +116,33 @@ int ObTabletMemtableMgr::init(const common::ObTabletID &tablet_id, return ret; } -int ObTabletMemtableMgr::init_storage_schema_recorder( +int ObTabletMemtableMgr::init_storage_recorder( const ObTabletID &tablet_id, const share::ObLSID &ls_id, const int64_t max_saved_schema_version, + const int64_t max_saved_medium_scn, + const lib::Worker::CompatMode compat_mode, logservice::ObLogHandler *log_handler) { int ret = OB_SUCCESS; - if (OB_FAIL(schema_recorder_.init(ls_id, tablet_id, max_saved_schema_version, log_handler))) { + if (OB_FAIL(schema_recorder_.init(ls_id, tablet_id, max_saved_schema_version, compat_mode, log_handler))) { TRANS_LOG(WARN, "failed to init schema recorder", K(ret), K(max_saved_schema_version), KP(log_handler)); + } else if (OB_FAIL(medium_info_recorder_.init(ls_id, tablet_id, max_saved_medium_scn, log_handler))) { + TRANS_LOG(WARN, "failed to init medium info recorder", K(ret), K(max_saved_medium_scn), KP(log_handler)); } return ret; } -int ObTabletMemtableMgr::reset_storage_schema_recorder() +int ObTabletMemtableMgr::reset_storage_recorder() { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!schema_recorder_.is_inited())) { + if (OB_UNLIKELY(!schema_recorder_.is_inited() || !medium_info_recorder_.is_inited())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("schema recorder is not init", K(ret)); + LOG_WARN("schema recorder or medium recorder is not init", K(ret), K_(schema_recorder), + K_(medium_info_recorder)); } else { schema_recorder_.reset(); + medium_info_recorder_.reset(); } return ret; } diff --git a/src/storage/tablet/ob_tablet_memtable_mgr.h b/src/storage/tablet/ob_tablet_memtable_mgr.h index 135b855ada..ff1b3cbc4f 100644 --- a/src/storage/tablet/ob_tablet_memtable_mgr.h +++ b/src/storage/tablet/ob_tablet_memtable_mgr.h @@ -17,6 +17,8 @@ #include "storage/memtable/ob_memtable.h" #include "storage/ob_i_memtable_mgr.h" #include "storage/ob_storage_struct.h" +#include "storage/ob_storage_schema_recorder.h" +#include "storage/compaction/ob_medium_compaction_mgr.h" namespace oceanbase { @@ -93,12 +95,18 @@ public: { return schema_recorder_; } - virtual int init_storage_schema_recorder( + compaction::ObTabletMediumCompactionInfoRecorder &get_medium_info_recorder() + { + return medium_info_recorder_; + } + virtual int init_storage_recorder( const ObTabletID &tablet_id, const share::ObLSID &ls_id, const int64_t max_saved_schema_version, + const int64_t max_saved_medium_scn, + const lib::Worker::CompatMode compat_mode, logservice::ObLogHandler *log_handler) override; - virtual int reset_storage_schema_recorder() override; + virtual int reset_storage_recorder() override; DECLARE_VIRTUAL_TO_STRING; protected: @@ -133,9 +141,10 @@ private: static const int64_t PRINT_READABLE_INFO_DURATION_US = 1000 * 1000 * 60 * 10L; //10min private: - ObLS *ls_; //8B + ObLS *ls_; // 8B common::SpinRWLock lock_def_; //8B - ObStorageSchemaRecorder schema_recorder_;// 136B + ObStorageSchemaRecorder schema_recorder_; // 120B + compaction::ObTabletMediumCompactionInfoRecorder medium_info_recorder_; // 96B }; } } diff --git a/src/storage/tablet/ob_tablet_meta.cpp b/src/storage/tablet/ob_tablet_meta.cpp index 4380f37b7e..fd225643fe 100644 --- a/src/storage/tablet/ob_tablet_meta.cpp +++ b/src/storage/tablet/ob_tablet_meta.cpp @@ -56,6 +56,7 @@ ObTabletMeta::ObTabletMeta() ddl_start_scn_(SCN::min_scn()), ddl_snapshot_version_(OB_INVALID_TIMESTAMP), max_sync_storage_schema_version_(0), + max_serialized_medium_scn_(0), ddl_execution_id_(-1), ddl_cluster_version_(0), is_inited_(false) @@ -78,7 +79,8 @@ int ObTabletMeta::init( const int64_t snapshot_version, const lib::Worker::CompatMode compat_mode, const ObTabletTableStoreFlag &table_store_flag, - const int64_t max_sync_storage_schema_version) + const int64_t max_sync_storage_schema_version, + const int64_t max_serialized_medium_scn) { int ret = OB_SUCCESS; @@ -113,6 +115,7 @@ int ObTabletMeta::init( ddl_start_scn_.set_min(); ddl_snapshot_version_ = 0; max_sync_storage_schema_version_ = max_sync_storage_schema_version; + max_serialized_medium_scn_ = max_serialized_medium_scn; ddl_execution_id_ = -1; ddl_cluster_version_ = 0; @@ -148,6 +151,7 @@ int ObTabletMeta::init( const ObTabletBindingInfo &ddl_data, const ObTabletAutoincSeq &autoinc_seq, const int64_t max_sync_storage_schema_version, + const int64_t max_serialized_medium_scn, const SCN clog_checkpoint_scn, const SCN ddl_checkpoint_scn, const SCN ddl_start_scn, @@ -187,6 +191,7 @@ int ObTabletMeta::init( tx_data_ = tx_data; table_store_flag_ = old_tablet_meta.table_store_flag_; max_sync_storage_schema_version_ = max_sync_storage_schema_version; + max_serialized_medium_scn_ = max_serialized_medium_scn; ddl_checkpoint_scn_ = SCN::max(old_tablet_meta.ddl_checkpoint_scn_, ddl_checkpoint_scn); ddl_snapshot_version_ = MAX(old_tablet_meta.ddl_snapshot_version_, ddl_snapshot_version); ddl_execution_id_ = MAX(old_tablet_meta.ddl_execution_id_, ddl_execution_id); @@ -237,6 +242,7 @@ int ObTabletMeta::init( ddl_start_scn_ = param.ddl_start_scn_; ddl_snapshot_version_ = param.ddl_snapshot_version_; max_sync_storage_schema_version_ = param.max_sync_storage_schema_version_; + max_serialized_medium_scn_ = param.max_serialized_medium_scn_; ddl_execution_id_ = param.ddl_execution_id_; ddl_cluster_version_ = param.ddl_cluster_version_; is_inited_ = true; @@ -248,6 +254,54 @@ int ObTabletMeta::init( return ret; } +int ObTabletMeta::init( + common::ObIAllocator &allocator, + const ObTabletMeta &old_tablet_meta) +{ + int ret = OB_SUCCESS; + + if (OB_UNLIKELY(is_inited_)) { + ret = OB_INIT_TWICE; + LOG_WARN("init twice", K(ret), K_(is_inited)); + } else if (OB_UNLIKELY(!old_tablet_meta.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid args", K(ret), K(old_tablet_meta)); + } else if (OB_FAIL(ddl_data_.assign(old_tablet_meta.ddl_data_))) { + LOG_WARN("failed to assign ddl data", K(ret), K_(old_tablet_meta.ddl_data)); + } else if (OB_FAIL(autoinc_seq_.assign(old_tablet_meta.autoinc_seq_))) { + LOG_WARN("failed to assign autoinc seq", K(ret)); + } else { + version_ = TABLET_META_VERSION; + ls_id_ = old_tablet_meta.ls_id_; + tablet_id_ = old_tablet_meta.tablet_id_; + data_tablet_id_ = old_tablet_meta.data_tablet_id_; + ref_tablet_id_ = old_tablet_meta.ref_tablet_id_; + create_scn_ = old_tablet_meta.create_scn_; + start_scn_ = old_tablet_meta.start_scn_; + clog_checkpoint_scn_ = old_tablet_meta.clog_checkpoint_scn_; + ddl_checkpoint_scn_ = old_tablet_meta.ddl_checkpoint_scn_; + compat_mode_ = old_tablet_meta.compat_mode_; + report_status_ = old_tablet_meta.report_status_; + ha_status_ = old_tablet_meta.ha_status_; + snapshot_version_ = old_tablet_meta.snapshot_version_; + multi_version_start_ = old_tablet_meta.multi_version_start_; + tx_data_ = old_tablet_meta.tx_data_; + table_store_flag_ = old_tablet_meta.table_store_flag_; + ddl_start_scn_ = old_tablet_meta.ddl_start_scn_; + ddl_snapshot_version_ = old_tablet_meta.ddl_snapshot_version_; + max_sync_storage_schema_version_ = old_tablet_meta.max_sync_storage_schema_version_; + max_serialized_medium_scn_ = old_tablet_meta.max_serialized_medium_scn_; + ddl_execution_id_ = old_tablet_meta.ddl_execution_id_; + ddl_cluster_version_ = old_tablet_meta.ddl_cluster_version_; + is_inited_ = true; + } + + if (OB_UNLIKELY(!is_inited_)) { + reset(); + } + return ret; +} + int ObTabletMeta::init( common::ObIAllocator &allocator, const ObTabletMeta &old_tablet_meta, @@ -318,6 +372,8 @@ int ObTabletMeta::init( ddl_start_scn_ = old_tablet_meta.ddl_start_scn_; ddl_snapshot_version_ = old_tablet_meta.ddl_snapshot_version_; max_sync_storage_schema_version_ = max_sync_storage_schema_version; + max_serialized_medium_scn_ = MAX(old_tablet_meta.max_serialized_medium_scn_, + OB_ISNULL(tablet_meta) ? 0 : tablet_meta->max_serialized_medium_scn_); ddl_execution_id_ = old_tablet_meta.ddl_execution_id_; ddl_cluster_version_ = old_tablet_meta.ddl_cluster_version_; @@ -356,6 +412,7 @@ void ObTabletMeta::reset() ddl_start_scn_.set_min(); ddl_snapshot_version_ = OB_INVALID_TIMESTAMP; max_sync_storage_schema_version_ = 0; + max_serialized_medium_scn_ = 0; ddl_execution_id_ = -1; ddl_cluster_version_ = 0; is_inited_ = false; @@ -372,6 +429,7 @@ bool ObTabletMeta::is_valid() const && multi_version_start_ <= snapshot_version_ && compat_mode_ != lib::Worker::CompatMode::INVALID && max_sync_storage_schema_version_ >= 0 + && max_serialized_medium_scn_ >= 0 && ha_status_.is_valid() && (ha_status_.is_restore_status_pending() || (!ha_status_.is_restore_status_pending() @@ -450,6 +508,8 @@ int ObTabletMeta::serialize(char *buf, const int64_t len, int64_t &pos) LOG_WARN("failed to serialize ddl snapshot version", K(ret), K(len), K(new_pos), K_(ddl_snapshot_version)); } else if (new_pos - pos < length_ && OB_FAIL(serialization::encode_i64(buf, len, new_pos, max_sync_storage_schema_version_))) { LOG_WARN("failed to serialize max_sync_storage_schema_version", K(ret), K(len), K(new_pos), K_(max_sync_storage_schema_version)); + } else if (new_pos - pos < length_ && OB_FAIL(serialization::encode_i64(buf, len, new_pos, max_serialized_medium_scn_))) { + LOG_WARN("failed to serialize max_serialized_medium_scn", K(ret), K(len), K(new_pos), K_(max_serialized_medium_scn)); } else if (new_pos - pos < length_ && OB_FAIL(serialization::encode_i64(buf, len, new_pos, ddl_execution_id_))) { LOG_WARN("failed to serialize ddl execution id", K(ret), K(len), K(new_pos), K_(ddl_execution_id)); } else if (new_pos - pos < length_ && OB_FAIL(serialization::encode_i64(buf, len, new_pos, ddl_cluster_version_))) { @@ -532,7 +592,9 @@ int ObTabletMeta::deserialize( } else if (new_pos - pos < length_ && OB_FAIL(serialization::decode_i64(buf, len, new_pos, &ddl_snapshot_version_))) { LOG_WARN("failed to deserialize ddl snapshot version", K(ret), K(len), K(new_pos)); } else if (new_pos - pos < length_ && OB_FAIL(serialization::decode_i64(buf, len, new_pos, &max_sync_storage_schema_version_))) { - LOG_WARN("failed to deserialize max_sync_storage_schema_version_", K(ret), K(len), K(new_pos)); + LOG_WARN("failed to deserialize max_sync_storage_schema_version", K(ret), K(len), K(new_pos)); + } else if (new_pos - pos < length_ && OB_FAIL(serialization::decode_i64(buf, len, new_pos, &max_serialized_medium_scn_))) { + LOG_WARN("failed to deserialize max_serialized_medium_scn", K(ret), K(len), K(new_pos)); } else if (new_pos - pos < length_ && OB_FAIL(serialization::decode_i64(buf, len, new_pos, &ddl_execution_id_))) { LOG_WARN("failed to deserialize ddl execution id", K(ret), K(len), K(new_pos)); } else if (new_pos - pos < length_ && OB_FAIL(serialization::decode_i64(buf, len, new_pos, &ddl_cluster_version_))) { @@ -579,6 +641,7 @@ int64_t ObTabletMeta::get_serialize_size() const size += ddl_start_scn_.get_fixed_serialize_size(); size += serialization::encoded_length_i64(ddl_snapshot_version_); size += serialization::encoded_length_i64(max_sync_storage_schema_version_); + size += serialization::encoded_length_i64(max_serialized_medium_scn_); size += serialization::encoded_length_i64(ddl_execution_id_); size += serialization::encoded_length_i64(ddl_cluster_version_); return size; @@ -674,6 +737,7 @@ int ObTabletMeta::update(const ObMigrationTabletParam ¶m) ddl_start_scn_ = param.ddl_start_scn_; ddl_snapshot_version_ = param.ddl_snapshot_version_; max_sync_storage_schema_version_ = param.max_sync_storage_schema_version_; + max_serialized_medium_scn_ = param.max_serialized_medium_scn_; ddl_execution_id_ = param.ddl_execution_id_; ddl_cluster_version_ = param.ddl_cluster_version_; } @@ -740,11 +804,12 @@ ObMigrationTabletParam::ObMigrationTabletParam() tx_data_(), ddl_data_(), storage_schema_(), - medium_info_list_(compaction::ObMediumCompactionInfoList::MEDIUM_LIST_IN_STORAGE), + medium_info_list_(), table_store_flag_(), ddl_start_scn_(SCN::min_scn()), ddl_snapshot_version_(OB_INVALID_TIMESTAMP), max_sync_storage_schema_version_(0), + max_serialized_medium_scn_(0), ddl_execution_id_(-1), ddl_cluster_version_(0) { @@ -759,14 +824,15 @@ bool ObMigrationTabletParam::is_valid() const && multi_version_start_ >= 0 && multi_version_start_ <= snapshot_version_ && compat_mode_ != lib::Worker::CompatMode::INVALID - && medium_info_list_.is_valid() - && ha_status_.is_valid() && max_sync_storage_schema_version_ >= 0 + && max_serialized_medium_scn_ >= 0 + && ha_status_.is_valid() && (ha_status_.is_restore_status_pending() || (start_scn_ >= SCN::base_scn() && clog_checkpoint_scn_ >= SCN::base_scn() && start_scn_ <= clog_checkpoint_scn_ - && storage_schema_.is_valid())); + && storage_schema_.is_valid() + && medium_info_list_.is_valid())); } int ObMigrationTabletParam::serialize(char *buf, const int64_t len, int64_t &pos) const @@ -826,6 +892,8 @@ int ObMigrationTabletParam::serialize(char *buf, const int64_t len, int64_t &pos LOG_WARN("failed to serialize ddl snapshot version", K(ret), K(len), K(new_pos), K_(ddl_snapshot_version)); } else if (OB_FAIL(serialization::encode_i64(buf, len, new_pos, max_sync_storage_schema_version_))) { LOG_WARN("failed to serialize max_sync_storage_schema_version", K(ret), K(len), K(new_pos), K_(max_sync_storage_schema_version)); + } else if (OB_FAIL(serialization::encode_i64(buf, len, new_pos, max_serialized_medium_scn_))) { + LOG_WARN("failed to serialize max_serialized_medium_scn", K(ret), K(len), K(new_pos), K_(max_serialized_medium_scn)); } else if (OB_FAIL(serialization::encode_i64(buf, len, new_pos, ddl_execution_id_))) { LOG_WARN("failed to serialize ddl execution id", K(ret), K(len), K(new_pos), K_(ddl_execution_id)); } else if (OB_FAIL(serialization::encode_i64(buf, len, new_pos, ddl_cluster_version_))) { @@ -893,6 +961,8 @@ int ObMigrationTabletParam::deserialize(const char *buf, const int64_t len, int6 LOG_WARN("failed to deserialize ddl snapshot version", K(ret), K(len), K(new_pos)); } else if (OB_FAIL(serialization::decode_i64(buf, len, new_pos, &max_sync_storage_schema_version_))) { LOG_WARN("failed to deserialize max sync storage schema version", K(ret), K(len), K(new_pos)); + } else if (OB_FAIL(serialization::decode_i64(buf, len, new_pos, &max_serialized_medium_scn_))) { + LOG_WARN("failed to deserialize max sync medium snapshot", K(ret), K(len), K(new_pos)); } else if (OB_FAIL(serialization::decode_i64(buf, len, new_pos, &ddl_execution_id_))) { LOG_WARN("failed to deserialize ddl execution id", K(ret), K(len), K(new_pos)); } else if (OB_FAIL(serialization::decode_i64(buf, len, new_pos, &ddl_cluster_version_))) { @@ -930,6 +1000,7 @@ int64_t ObMigrationTabletParam::get_serialize_size() const size += ddl_start_scn_.get_fixed_serialize_size(); size += serialization::encoded_length_i64(ddl_snapshot_version_); size += serialization::encoded_length_i64(max_sync_storage_schema_version_); + size += serialization::encoded_length_i64(max_serialized_medium_scn_); size += serialization::encoded_length_i64(ddl_execution_id_); size += serialization::encoded_length_i64(ddl_cluster_version_); return size; @@ -958,6 +1029,7 @@ void ObMigrationTabletParam::reset() ddl_start_scn_.set_min(); ddl_snapshot_version_ = OB_INVALID_TIMESTAMP; max_sync_storage_schema_version_ = 0; + max_serialized_medium_scn_ = 0; ddl_execution_id_ = -1; ddl_cluster_version_ = 0; } @@ -989,6 +1061,7 @@ int ObMigrationTabletParam::assign(const ObMigrationTabletParam ¶m) ddl_start_scn_ = param.ddl_start_scn_; ddl_snapshot_version_ = param.ddl_snapshot_version_; max_sync_storage_schema_version_ = param.max_sync_storage_schema_version_; + max_serialized_medium_scn_ = param.max_serialized_medium_scn_; ddl_execution_id_ = param.ddl_execution_id_; ddl_cluster_version_ = param.ddl_cluster_version_; @@ -1005,9 +1078,10 @@ int ObMigrationTabletParam::assign(const ObMigrationTabletParam ¶m) return ret; } -int ObMigrationTabletParam::construct_placeholder_storage_schema( +int ObMigrationTabletParam::construct_placeholder_storage_schema_and_medium( ObIAllocator &allocator, - ObStorageSchema &storage_schema) + ObStorageSchema &storage_schema, + compaction::ObMediumCompactionInfoList &medium_info_list) { int ret = OB_SUCCESS; storage_schema.reset(); @@ -1029,6 +1103,7 @@ int ObMigrationTabletParam::construct_placeholder_storage_schema( storage_schema.pctfree_ = OB_DEFAULT_PCTFREE; storage_schema.block_size_ = OB_DEFAULT_MACRO_BLOCK_SIZE; storage_schema.progressive_merge_round_ = 0; + storage_schema.progressive_merge_num_ = 0; storage_schema.master_key_id_ = OB_INVALID_ID; storage_schema.compat_mode_ = static_cast(lib::Worker::get_compatibility_mode()); @@ -1055,9 +1130,13 @@ int ObMigrationTabletParam::construct_placeholder_storage_schema( storage_schema.is_inited_ = true; } - if (OB_SUCC(ret) && OB_UNLIKELY(!storage_schema.is_valid())) { + if (FAILEDx(medium_info_list.init(allocator))) { + LOG_WARN("failed to init medium info list", K(ret)); + } + + if (OB_SUCC(ret) && OB_UNLIKELY(!storage_schema.is_valid() || !medium_info_list.is_valid())) { ret = OB_ERR_UNEXPECTED; - LOG_WARN("placeholder storage schema is not valid", K(ret), K(storage_schema)); + LOG_WARN("placeholder storage schema or medium info list is not valid", K(ret), K(storage_schema), K(medium_info_list)); } return ret; } diff --git a/src/storage/tablet/ob_tablet_meta.h b/src/storage/tablet/ob_tablet_meta.h index 342893e61d..c189c8bf67 100644 --- a/src/storage/tablet/ob_tablet_meta.h +++ b/src/storage/tablet/ob_tablet_meta.h @@ -64,7 +64,8 @@ public: const int64_t snapshot_version, const lib::Worker::CompatMode compat_mode, const ObTabletTableStoreFlag &table_store_flag, - const int64_t max_sync_storage_schema_version); + const int64_t max_sync_storage_schema_version, + const int64_t max_serialized_medium_scn); int init( common::ObIAllocator &allocator, const ObTabletMeta &old_tablet_meta, @@ -74,6 +75,7 @@ public: const ObTabletBindingInfo &ddl_data, const share::ObTabletAutoincSeq &autoinc_seq, const int64_t max_sync_storage_schema_version, + const int64_t max_serialized_medium_scn, const share::SCN clog_checkpoint_scn = share::SCN::min_scn(), const share::SCN ddl_checkpoint_scn = share::SCN::min_scn(), const share::SCN ddl_start_scn = share::SCN::min_scn(), @@ -90,6 +92,9 @@ public: const ObTabletBindingInfo &ddl_data, const share::ObTabletAutoincSeq &autoinc_seq, const ObMigrationTabletParam *tablet_meta); + int init( + common::ObIAllocator &allocator, + const ObTabletMeta &old_tablet_meta); void reset(); bool is_valid() const; @@ -138,6 +143,7 @@ public: K_(ddl_start_scn), K_(ddl_snapshot_version), K_(max_sync_storage_schema_version), + K_(max_serialized_medium_scn), K_(ddl_execution_id), K_(ddl_cluster_version)); @@ -165,7 +171,11 @@ public: ObTabletTableStoreFlag table_store_flag_; share::SCN ddl_start_scn_; int64_t ddl_snapshot_version_; + // max_sync_storage_schema_version_ = MIN(serialized_schema_version, sync_schema_version) + // serialized_schema_version > sync_schema_version when major update storage schema + // sync_schema_version > serialized_schema_version when replay schema clog but not mini merge yet int64_t max_sync_storage_schema_version_; + int64_t max_serialized_medium_scn_; // update when serialized medium info int64_t ddl_execution_id_; int64_t ddl_cluster_version_; //ATTENTION : Add a new variable need consider ObMigrationTabletParam @@ -199,9 +209,10 @@ public: int assign(const ObMigrationTabletParam ¶m); // used for restore PENDING tablet, the placeholder tablet doesn't have storage schema to use - static int construct_placeholder_storage_schema( + static int construct_placeholder_storage_schema_and_medium( ObIAllocator &allocator, - ObStorageSchema &storage_schema); + ObStorageSchema &storage_schema, + compaction::ObMediumCompactionInfoList &medium_info_list); TO_STRING_KV(K_(ls_id), K_(tablet_id), @@ -224,7 +235,8 @@ public: K_(storage_schema), K_(medium_info_list), K_(table_store_flag), - K_(max_sync_storage_schema_version)); + K_(max_sync_storage_schema_version), + K_(max_serialized_medium_scn)); public: common::ObArenaAllocator allocator_; // for storage schema @@ -251,6 +263,7 @@ public: int64_t ddl_snapshot_version_; // max_sync_version may less than storage_schema.schema_version_ when major update schema int64_t max_sync_storage_schema_version_; + int64_t max_serialized_medium_scn_; int64_t ddl_execution_id_; int64_t ddl_cluster_version_; }; diff --git a/src/storage/tablet/ob_tablet_table_store.cpp b/src/storage/tablet/ob_tablet_table_store.cpp index bdd3dbee4c..a9b15b7c89 100644 --- a/src/storage/tablet/ob_tablet_table_store.cpp +++ b/src/storage/tablet/ob_tablet_table_store.cpp @@ -29,6 +29,7 @@ using namespace oceanbase::storage; using namespace oceanbase::memtable; using namespace oceanbase::share::schema; using compaction::ObPartitionMergePolicy; +using compaction::ObAdaptiveMergePolicy; /* ObTabletTableStore Section */ @@ -294,7 +295,7 @@ int ObTabletTableStore::inner_replace_sstables( LOG_WARN("failed to init minor tables", K(ret)); } else if (!ddl_tables.empty() && OB_FAIL(ddl_sstables_.init_and_copy(allocator, ddl_tables))) { LOG_WARN("failed to init ddl tables", K(ret)); - } else if (nullptr != old_extend[BUF_MINOR] && OB_FAIL(extend_tables_.assign(BUF_MINOR, old_extend[BUF_MINOR]))) { + } else if (nullptr != old_extend[META_MAJOR] && OB_FAIL(extend_tables_.assign(META_MAJOR, old_extend[META_MAJOR]))) { LOG_WARN("failed to build buf minor table", K(ret), K(old_extend)); } else if (OB_FAIL(pull_memtables())) { LOG_WARN("failed to pull memtable from memtable_mgr", K(ret)); @@ -390,12 +391,12 @@ int ObTabletTableStore::get_memtables(ObIArray &memtables, { int ret = OB_SUCCESS; for (int64_t i = 0; OB_SUCC(ret) && i < memtables_.count(); ++i) { - if (OB_ISNULL(memtables_[i])) { + if (OB_ISNULL(memtables_.get_table(i))) { ret = OB_ERR_UNEXPECTED; LOG_WARN("memtable must not null", K(ret), K(memtables_)); - } else if (!need_active && memtables_[i]->is_active_memtable()) { + } else if (!need_active && memtables_.get_table(i)->is_active_memtable()) { continue; - } else if (OB_FAIL(memtables.push_back(memtables_[i]))) { + } else if (OB_FAIL(memtables.push_back(memtables_.get_table(i)))) { LOG_WARN("failed to add memtables", K(ret), K(*this)); } } @@ -566,11 +567,11 @@ int ObTabletTableStore::calculate_read_tables( int ret = OB_SUCCESS; ObITable *base_table = nullptr; - if (OB_NOT_NULL(extend_tables_[BUF_MINOR]) - && extend_tables_[BUF_MINOR]->get_max_merged_trans_version() <= snapshot_version) { - base_table = extend_tables_[BUF_MINOR]; - if (OB_FAIL(iterator.add_tables(extend_tables_.array_ + BUF_MINOR))) { - LOG_WARN("failed to add buf minor table to iterator", K(ret)); + if (OB_NOT_NULL(extend_tables_[META_MAJOR]) + && extend_tables_[META_MAJOR]->get_snapshot_version() <= snapshot_version) { + base_table = extend_tables_[META_MAJOR]; + if (OB_FAIL(iterator.add_tables(extend_tables_.array_ + META_MAJOR))) { + LOG_WARN("failed to add meta sstable to iterator", K(ret)); } } else if (!major_tables_.empty()) { for (int64_t i = major_tables_.count_ - 1; OB_SUCC(ret) && i >= 0; --i) { @@ -705,6 +706,39 @@ int ObTabletTableStore::pull_memtables() return ret; } +int ObTabletTableStore::assign( + ObIAllocator &allocator, + const ObTabletTableStore &other, ObTablet *new_tablet) +{ + int ret = OB_SUCCESS; + if (IS_INIT) { + ret = OB_INIT_TWICE; + LOG_WARN("ObTabletTableStore has been inited, cannot assign", K(ret), K(*this)); + } else if (OB_UNLIKELY(!other.is_valid() || NULL == (tablet_ptr_ = new_tablet))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("get invalid arguments", K(ret), K(other), K(new_tablet)); + } else if (other.major_tables_.count_ > 0 && OB_FAIL(major_tables_.copy(allocator, other.major_tables_))) { + LOG_WARN("failed to copy major tables", K(ret)); + } else if (other.minor_tables_.count_ > 0 && OB_FAIL(minor_tables_.copy(allocator, other.minor_tables_))) { + LOG_WARN("failed to copy minor tables", K(ret)); + } else if (other.ddl_sstables_.count_ > 0 && OB_FAIL(ddl_sstables_.copy(allocator, other.ddl_sstables_))) { + LOG_WARN("failed to copy ddl tables", K(ret)); + } else if (OB_FAIL(extend_tables_.copy(allocator, other.extend_tables_, true/*allow_empty_table*/))) { + LOG_WARN("failed to copy extend tables", K(ret)); + } else if (OB_FAIL(memtables_.init(&allocator, other.memtables_))) { + LOG_WARN("failed to copy memtables", K(ret)); + } else { + is_ready_for_read_ = other.is_ready_for_read_; + is_inited_ = true; + int tmp_ret = OB_SUCCESS; + if (OB_SUCCESS != (tmp_ret = init_read_cache())) { + LOG_WARN("failed to init read cache iterator", K(tmp_ret)); + } + FLOG_INFO("success to assign table store", K(ret), K(PRINT_TS(*this))); + } + return ret; +} + int ObTabletTableStore::build_new_table_store( ObIAllocator &allocator, const ObUpdateTableStoreParam ¶m, @@ -722,11 +756,11 @@ int ObTabletTableStore::build_new_table_store( } if (OB_SUCC(ret) && OB_NOT_NULL(new_table)) { - if (new_table->is_buf_minor_sstable()) { - ObITable *buf_minor_table = old_store.extend_tables_[BUF_MINOR]; - if (OB_NOT_NULL(buf_minor_table) && new_table->get_end_scn() <= buf_minor_table->get_end_scn()) { + if (new_table->is_meta_major_sstable()) { + ObITable *meta_major_table = old_store.extend_tables_[META_MAJOR]; + if (OB_NOT_NULL(meta_major_table) && new_table->get_end_scn() <= meta_major_table->get_end_scn()) { ret= OB_MINOR_SSTABLE_RANGE_CROSS; - LOG_WARN("new buf minor table is covered by old one", K(ret), KPC(new_table), KPC(buf_minor_table)); + LOG_WARN("new meta major table is covered by old one", K(ret), KPC(new_table), KPC(meta_major_table)); } } else if (new_table->is_ddl_sstable() || new_table->is_major_sstable() || new_table->is_minor_sstable()) { // ddl will deal in build_ddl_table later; all major/minor sstables always need rebuild @@ -746,8 +780,8 @@ int ObTabletTableStore::build_new_table_store( } } else if (OB_FAIL(build_ddl_sstables(allocator, param, old_store))) { LOG_WARN("Failed to add ddl minor sstable", K(ret)); - } else if (OB_FAIL(build_buf_minor_table(param.table_handle_, old_store))) { - LOG_WARN("Failed to add buf minor sstable", K(ret)); + } else if (OB_FAIL(build_meta_major_table(param.table_handle_, old_store))) { + LOG_WARN("Failed to add meta sstable", K(ret)); } else if (OB_FAIL(pull_memtables())) { LOG_WARN("failed to pull memtable from memtable_mgr", K(ret)); } else if (OB_FAIL(check_ready_for_read())) { @@ -949,32 +983,32 @@ int ObTabletTableStore::build_minor_tables( return ret; } -int ObTabletTableStore::build_buf_minor_table( +int ObTabletTableStore::build_meta_major_table( const ObTableHandleV2 &new_handle, const ObTabletTableStore &old_store) { int ret = OB_SUCCESS; ObITable *new_table = const_cast(new_handle.get_table()); - ObITable *old_buf_minor = old_store.extend_tables_[BUF_MINOR]; + ObITable *old_meta_major = old_store.extend_tables_[META_MAJOR]; ObITable *last_major = nullptr; - extend_tables_.reset_table(BUF_MINOR); + extend_tables_.reset_table(META_MAJOR); - if (OB_NOT_NULL(new_table) - && TABLE_MODE_QUEUING != static_cast(new_table)->get_meta().get_basic_meta().table_mode_.mode_flag_) { - } else if (OB_ISNULL(last_major = major_tables_.get_boundary_table(true))) { - LOG_WARN("no major sstable exists", K(major_tables_)); - } else if (OB_NOT_NULL(new_table) && new_table->is_buf_minor_sstable()) { // if new_table is buf minor, it must be newer than old_buf_minor + if (OB_ISNULL(last_major = major_tables_.get_boundary_table(true))) { + LOG_INFO("no major sstable exists, skip to try to build meta sstable", K(*this)); + } else if (OB_NOT_NULL(new_table) && new_table->is_meta_major_sstable()) { // new meta sstable must be newer than old meta if (new_table->get_max_merged_trans_version() <= last_major->get_snapshot_version()) { ret= OB_MINOR_SSTABLE_RANGE_CROSS; - LOG_WARN("the new buf minor sstable is covered by major", K(ret), KPC(new_table), KPC(last_major)); - } else if (OB_FAIL(extend_tables_.assign(BUF_MINOR, new_table))) { - LOG_WARN("failed to add new buf minor sstable", K(ret)); + LOG_WARN("the new meta merge sstable is covered by major", K(ret), KPC(new_table), KPC(last_major)); + } else if (OB_FAIL(extend_tables_.assign(META_MAJOR, new_table))) { + LOG_WARN("failed to add new meta merge sstable", K(ret)); } - } else if (OB_NOT_NULL(old_buf_minor)) { - if (old_buf_minor->get_max_merged_trans_version() <= last_major->get_snapshot_version()) { // new table is not buf minor - FLOG_INFO("buf minor table is covered by major sstable", KPC(last_major), KPC(old_buf_minor)); - } else if (OB_FAIL(extend_tables_.assign(BUF_MINOR, old_buf_minor))) { - LOG_WARN("failed to add new buf minor sstable", K(ret)); + } else if (OB_NOT_NULL(new_table) && new_table->is_major_sstable()) { + // new table is major sstable, retire old meta sstable. + } else if (OB_NOT_NULL(old_meta_major)) { + if (old_meta_major->get_snapshot_version() <= last_major->get_snapshot_version()) { // new table is not meta sstable + FLOG_INFO("meta sstable is covered by major sstable", KPC(last_major), KPC(old_meta_major)); + } else if (OB_FAIL(extend_tables_.assign(META_MAJOR, old_meta_major))) { + LOG_WARN("failed to add new meta sstable", K(ret)); } } return ret; @@ -1279,7 +1313,7 @@ int ObTabletTableStore::build_ha_new_table_store_( const ObTabletTableStore &old_store) { int ret = OB_SUCCESS; - const ObExtendTableArray &buffer_minor = old_store.extend_tables_; + const ObExtendTableArray &meta_major_table = old_store.extend_tables_; int64_t inc_base_snapshot_version = 0; ObTableHandleV2 tmp_handle; @@ -1295,7 +1329,7 @@ int ObTabletTableStore::build_ha_new_table_store_( LOG_WARN("failed to build ha minor tables", K(ret), K(param), K(old_store)); } else if (OB_FAIL(build_ha_ddl_tables_(allocator, param, old_store))) { LOG_WARN("failed to build ha ddl tables", K(ret), K(param), K(old_store)); - } else if (!buffer_minor.empty() && OB_FAIL(build_buf_minor_table(tmp_handle, old_store))) { + } else if (!meta_major_table.empty() && OB_FAIL(build_meta_major_table(tmp_handle, old_store))) { LOG_WARN("failed to build buf minor table", K(ret), K(old_store)); } else if (OB_FAIL(pull_memtables())) { LOG_WARN("failed to pull memtable from memtable_mgr", K(ret)); @@ -1820,11 +1854,11 @@ int64_t ObPrintTableStore::to_string(char *buf, const int64_t buf_len) const print_arr(major_tables_, "MAJOR", buf, buf_len, pos, is_print); print_arr(minor_tables_, "MINOR", buf, buf_len, pos, is_print); print_mem(memtables_, "MEM", buf, buf_len, pos, is_print); - if (nullptr != extend_tables_[ObTabletTableStore::BUF_MINOR]) { + if (nullptr != extend_tables_[ObTabletTableStore::META_MAJOR]) { if (is_print) { J_NEWLINE(); } - table_to_string(extend_tables_[ObTabletTableStore::BUF_MINOR], "BUF", buf, buf_len, pos); + table_to_string(extend_tables_[ObTabletTableStore::META_MAJOR], "BUF", buf, buf_len, pos); } } else { J_EMPTY_OBJ(); diff --git a/src/storage/tablet/ob_tablet_table_store.h b/src/storage/tablet/ob_tablet_table_store.h index 6a40a103de..dc53024c13 100644 --- a/src/storage/tablet/ob_tablet_table_store.h +++ b/src/storage/tablet/ob_tablet_table_store.h @@ -40,7 +40,7 @@ public: friend class ObPrintTableStore; typedef common::ObSEArray ObTableHandleArray; enum ExtendTable: int64_t { - BUF_MINOR = 0, + META_MAJOR = 0, EXTEND_CNT }; @@ -102,6 +102,7 @@ public: int get_ddl_sstable_handles(ObTablesHandleArray &ddl_sstable_handles) const; int get_mini_minor_sstables(ObTablesHandleArray &minor_sstables) const; + int assign(common::ObIAllocator &allocator, const ObTabletTableStore &other, ObTablet *new_tablet); int get_recycle_version(const int64_t multi_version_start, int64_t &recycle_version) const; int64_t to_string(char *buf, const int64_t buf_len) const; @@ -150,7 +151,7 @@ private: const ObUpdateTableStoreParam ¶m, const ObTabletTableStore &old_store, const int64_t inc_base_snapshot_version); - int build_buf_minor_table( + int build_meta_major_table( const ObTableHandleV2 &new_handle, const ObTabletTableStore &old_store); int build_ddl_sstables( diff --git a/src/storage/tx_table/ob_tx_ctx_memtable.cpp b/src/storage/tx_table/ob_tx_ctx_memtable.cpp index 45029e87b2..c6e11a19ab 100644 --- a/src/storage/tx_table/ob_tx_ctx_memtable.cpp +++ b/src/storage/tx_table/ob_tx_ctx_memtable.cpp @@ -289,7 +289,7 @@ int ObTxCtxMemtable::flush(SCN recycle_scn, bool need_freeze) param.ls_id_ = ls_id_; param.tablet_id_ = LS_TX_CTX_TABLET; param.merge_type_ = MINI_MERGE; - param.merge_version_ = ObVersion::MIN_VERSION; + param.merge_version_ = ObVersionRange::MIN_VERSION; if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_tx_table_merge_dag(param))) { if (OB_EAGAIN != ret && OB_SIZE_OVERFLOW != ret) { TRANS_LOG(WARN, "failed to schedule tablet merge dag", K(ret)); diff --git a/src/storage/tx_table/ob_tx_data_memtable.cpp b/src/storage/tx_table/ob_tx_data_memtable.cpp index 0bad18f445..1aa65c408c 100644 --- a/src/storage/tx_table/ob_tx_data_memtable.cpp +++ b/src/storage/tx_table/ob_tx_data_memtable.cpp @@ -452,7 +452,7 @@ int ObTxDataMemtable::flush() param.ls_id_ = freezer_->get_ls_id(); param.tablet_id_ = key_.tablet_id_; param.merge_type_ = MINI_MERGE; - param.merge_version_ = ObVersion::MIN_VERSION; + param.merge_version_ = ObVersionRange::MIN_VERSION; if (OB_FAIL(compaction::ObScheduleDagFunc::schedule_tx_table_merge_dag(param))) { if (OB_EAGAIN != ret && OB_SIZE_OVERFLOW != ret) { STORAGE_LOG(WARN, "failed to schedule tablet merge dag", K(ret)); diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result index 471e85075a..5927169452 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/inner_table_overall.result @@ -549,6 +549,7 @@ select 0xffffffffff & table_id, table_name, table_type, database_id, part_num fr 12325 __all_virtual_query_response_time 2 201001 1 12330 __all_virtual_column_checksum_error_info 2 201001 1 12331 __all_virtual_kvcache_handle_leak_info 2 201001 1 +12334 __all_virtual_tablet_compaction_info 2 201001 1 12335 __all_virtual_ls_replica_task_plan 2 201001 1 12336 __all_virtual_schema_memory 2 201001 1 12337 __all_virtual_schema_slot 2 201001 1 diff --git a/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.cpp b/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.cpp index 4ae14cef8b..fc97f3f68c 100644 --- a/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.cpp +++ b/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.cpp @@ -483,6 +483,20 @@ int ObAdminParserLogEntry::parse_gais_log_() return ret; } +int ObAdminParserLogEntry::parse_reserved_snapshot_log_() +{ + //not supported so far, just reserved + int ret = OB_NOT_SUPPORTED; + return ret; +} + +int ObAdminParserLogEntry::parse_medium_log_() +{ + //not supported so far, just reserved + int ret = OB_NOT_SUPPORTED; + return ret; +} + int ObAdminParserLogEntry::parse_different_entry_type_(const logservice::ObLogBaseHeader &header) { int ret = OB_SUCCESS; @@ -549,6 +563,15 @@ int ObAdminParserLogEntry::parse_different_entry_type_(const logservice::ObLogBa ret = parse_gais_log_(); break; } + case oceanbase::logservice::ObLogBaseType::RESERVED_SNAPSHOT_LOG_BASE_TYPE: { + ret = parse_reserved_snapshot_log_(); + break; + } + case oceanbase::logservice::ObLogBaseType::MEDIUM_COMPACTION_LOG_BASE_TYPE: { + ret = parse_medium_log_(); + break; + } + default: { fprintf(stdout, " Unknown Base Log Type : %d\n", header.get_log_type()); LOG_WARN("don't support this log type", K(header.get_log_type())); diff --git a/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.h b/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.h index c6acdf8771..fb331187ae 100644 --- a/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.h +++ b/tools/ob_admin/log_tool/parser/ob_admin_parser_log_entry.h @@ -60,6 +60,8 @@ private: int parse_recovery_ls_service_log_(); int parse_standby_timestamp_log_(); int parse_gais_log_(); + int parse_reserved_snapshot_log_(); + int parse_medium_log_(); //log type belong to trans_service int parse_trans_redo_log_(transaction::ObTxLogBlock &tx_log_block, diff --git a/unittest/share/scheduler/test_dag_net_in_dag_scheduler.cpp b/unittest/share/scheduler/test_dag_net_in_dag_scheduler.cpp index 08a72bd956..31be0dfdff 100644 --- a/unittest/share/scheduler/test_dag_net_in_dag_scheduler.cpp +++ b/unittest/share/scheduler/test_dag_net_in_dag_scheduler.cpp @@ -117,7 +117,7 @@ class ObBasicDag : public ObIDag { public: ObBasicDag() : - ObIDag(ObDagType::DAG_TYPE_MINOR_MERGE), + ObIDag(ObDagType::DAG_TYPE_MAJOR_MERGE), id_(ObTimeUtility::current_time() + random()) {} void init(int64_t id) { id_ = id; } @@ -457,11 +457,8 @@ class ObOperator public: ObOperator() : num_(0) {} ~ObOperator() {} - void inc() { ++num_; } - void dec() - { - --num_; - } + void inc() { ATOMIC_INC(&num_); } + void dec() { ATOMIC_DEC(&num_); } private: int64_t num_; }; diff --git a/unittest/share/scheduler/test_dag_scheduler.cpp b/unittest/share/scheduler/test_dag_scheduler.cpp index 94b73d81db..d64f527b04 100644 --- a/unittest/share/scheduler/test_dag_scheduler.cpp +++ b/unittest/share/scheduler/test_dag_scheduler.cpp @@ -377,7 +377,7 @@ class TestDag : public ObIDag { public: TestDag() : - ObIDag(ObDagType::DAG_TYPE_MINOR_MERGE), id_(0), expect_(-1), expect_ret_(0), running_(false), tester_(NULL) { } + ObIDag(ObDagType::DAG_TYPE_MERGE_EXECUTE), id_(0), expect_(-1), expect_ret_(0), running_(false), tester_(NULL) { } explicit TestDag(const ObDagType::ObDagTypeEnum type) : ObIDag(type), id_(0), expect_(-1), expect_ret_(0), running_(false), tester_(NULL) { } virtual ~TestDag() @@ -469,7 +469,7 @@ private: class TestHPDag : public TestDag { public: - TestHPDag() : TestDag(ObDagType::DAG_TYPE_MINOR_MERGE) {} + TestHPDag() : TestDag(ObDagType::DAG_TYPE_MERGE_EXECUTE) {} private: DISALLOW_COPY_AND_ASSIGN(TestHPDag); }; @@ -485,7 +485,7 @@ private: class TestCompMidDag : public TestDag { public: - TestCompMidDag() : TestDag(ObDagType::DAG_TYPE_MINOR_MERGE) {} + TestCompMidDag() : TestDag(ObDagType::DAG_TYPE_MERGE_EXECUTE) {} private: DISALLOW_COPY_AND_ASSIGN(TestCompMidDag); }; @@ -1317,7 +1317,7 @@ TEST_F(TestDagScheduler, test_get_dag_count) int64_t counter = 1; EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_MAJOR_MERGE)); - EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_MINOR_MERGE)); + EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_MERGE_EXECUTE)); EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_DDL)); EXPECT_EQ(-1, scheduler->get_dag_count(ObDagType::DAG_TYPE_MAX)); @@ -1328,7 +1328,7 @@ TEST_F(TestDagScheduler, test_get_dag_count) EXPECT_EQ(OB_SUCCESS, dag->add_task(*mul_task)); EXPECT_EQ(OB_SUCCESS, scheduler->add_dag(dag)); sleep(1); - EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_MINOR_MERGE)); + EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_MERGE_EXECUTE)); EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_MAJOR_MERGE)); EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_DDL)); EXPECT_EQ(-1, scheduler->get_dag_count(ObDagType::DAG_TYPE_MAX)); @@ -1339,7 +1339,7 @@ TEST_F(TestDagScheduler, test_get_dag_count) EXPECT_EQ(OB_SUCCESS, mul_task->init(&counter)); EXPECT_EQ(OB_SUCCESS, dag->add_task(*mul_task)); EXPECT_EQ(OB_SUCCESS, scheduler->add_dag(dag)); - EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_MINOR_MERGE)); + EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_MERGE_EXECUTE)); EXPECT_EQ(1, scheduler->get_dag_count(ObDagType::DAG_TYPE_MAJOR_MERGE)); EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_DDL)); EXPECT_EQ(-1, scheduler->get_dag_count(ObDagType::DAG_TYPE_MAX)); @@ -1349,7 +1349,7 @@ TEST_F(TestDagScheduler, test_get_dag_count) EXPECT_EQ(OB_SUCCESS, mul_task2->init(&counter)); EXPECT_EQ(OB_SUCCESS, dag2->add_task(*mul_task2)); EXPECT_EQ(OB_SUCCESS, scheduler->add_dag(dag2)); - EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_MINOR_MERGE)); + EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_MERGE_EXECUTE)); EXPECT_EQ(2, scheduler->get_dag_count(ObDagType::DAG_TYPE_MAJOR_MERGE)); EXPECT_EQ(0, scheduler->get_dag_count(ObDagType::DAG_TYPE_DDL)); EXPECT_EQ(-1, scheduler->get_dag_count(ObDagType::DAG_TYPE_MAX)); diff --git a/unittest/storage/CMakeLists.txt b/unittest/storage/CMakeLists.txt index 9332b687ae..9e3a6f24ee 100644 --- a/unittest/storage/CMakeLists.txt +++ b/unittest/storage/CMakeLists.txt @@ -31,7 +31,8 @@ storage_unittest(test_parallel_external_sort) storage_unittest(test_i_store) storage_unittest(test_sstable_merge_info_mgr) #storage_unittest(test_row_sample_iterator) -storage_unittest(test_table_store_stat_mgr) +#storage_unittest(test_table_store_stat_mgr) +storage_unittest(test_tenant_tablet_stat_mgr) #storage_unittest(test_dag_size) storage_unittest(test_handle_cache) #storage_unittest(test_log_replay_engine replayengine/test_log_replay_engine.cpp) @@ -69,8 +70,10 @@ storage_unittest(test_backup_extern_info_mgr backup/test_backup_extern_info_mgr. storage_unittest(test_simple_rows_merger) storage_unittest(test_partition_incremental_range_spliter) storage_unittest(test_partition_major_sstable_range_spliter) +storage_unittest(test_parallel_minor_dag) storage_dml_unittest(test_major_rows_merger) #storage_dml_unittest(test_table_scan_pure_index_table) storage_unittest(test_sstable_log_ts_range_cut test_sstable_log_ts_range_cut.cpp) +storage_unittest(test_medium_compaction_mgr test_medium_compaction_mgr.cpp) diff --git a/unittest/storage/blocksstable/ob_multi_version_sstable_test.h b/unittest/storage/blocksstable/ob_multi_version_sstable_test.h index 982369bd4d..0a6f22bba3 100644 --- a/unittest/storage/blocksstable/ob_multi_version_sstable_test.h +++ b/unittest/storage/blocksstable/ob_multi_version_sstable_test.h @@ -164,7 +164,7 @@ public: const int64_t micro_cnt, const int64_t max_merged_trans_version = INT64_MAX - 2, const bool contain_uncommitted = false); - void prepare_data_end(ObTableHandleV2 &handle); + void prepare_data_end(ObTableHandleV2 &handle, const ObITable::TableType &table_type = ObITable::MINI_SSTABLE); void append_micro_block(ObMockIterator &data_iter); protected: @@ -294,13 +294,13 @@ ObITable::TableType ObMultiVersionSSTableTest::get_merged_table_type() const ObITable::TableType table_type = ObITable::MAX_TABLE_TYPE; if (MAJOR_MERGE == merge_type_) { table_type = ObITable::TableType::MAJOR_SSTABLE; - } else if (MINI_MERGE == merge_type_ || MINI_MINOR_MERGE == merge_type_) { + } else if (MINI_MERGE == merge_type_) { table_type = ObITable::TableType::MINI_SSTABLE; - } else if (BUF_MINOR_MERGE == merge_type_) { - table_type = ObITable::TableType::BUF_MINOR_SSTABLE; + } else if (META_MAJOR_MERGE == merge_type_) { + table_type = ObITable::TableType::META_MAJOR_SSTABLE; } else if (DDL_KV_MERGE == merge_type_) { table_type = ObITable::TableType::KV_DUMP_SSTABLE; - } else { // MINOR_MERGE || HISTORY_MINI_MINOR_MERGE + } else { // MINOR_MERGE table_type = ObITable::TableType::MINOR_SSTABLE; } return table_type; @@ -509,7 +509,9 @@ void ObMultiVersionSSTableTest::append_micro_block(ObMockIterator &data_iter) } } -void ObMultiVersionSSTableTest::prepare_data_end(ObTableHandleV2 &handle) +void ObMultiVersionSSTableTest::prepare_data_end( + ObTableHandleV2 &handle, + const ObITable::TableType &table_type) { ASSERT_EQ(OB_SUCCESS, macro_writer_.close()); ObSSTableMergeRes res; @@ -518,6 +520,7 @@ void ObMultiVersionSSTableTest::prepare_data_end(ObTableHandleV2 &handle) ASSERT_EQ(OB_SUCCESS, root_index_builder_->close(column_cnt, res)); ObTabletCreateSSTableParam param; + table_key_.table_type_ = table_type; param.table_key_ = table_key_; param.schema_version_ = SCHEMA_VERSION; param.create_snapshot_version_ = 0; @@ -548,7 +551,7 @@ void ObMultiVersionSSTableTest::prepare_data_end(ObTableHandleV2 &handle) param.nested_size_ = res.nested_size_; param.nested_offset_ = res.nested_offset_; param.ddl_scn_.set_min(); - if (merge_type_ == MAJOR_MERGE) { + if (table_type == ObITable::MAJOR_SSTABLE) { ASSERT_EQ(OB_SUCCESS, ObSSTableMergeRes::fill_column_checksum_for_empty_major(param.column_cnt_, param.column_checksums_)); } diff --git a/unittest/storage/blocksstable/test_row_reader.cpp b/unittest/storage/blocksstable/test_row_reader.cpp index 84cc6755e3..b19ad670b5 100644 --- a/unittest/storage/blocksstable/test_row_reader.cpp +++ b/unittest/storage/blocksstable/test_row_reader.cpp @@ -774,7 +774,7 @@ TEST_F(TestNewRowReader, test_macro_block) ObDataStoreDesc desc; int64_t data_version = 1; - ret = desc.init(table_schema_, data_version, 1, MINI_MINOR_MERGE, true); + ret = desc.init(table_schema_, data_version, 1, MINOR_MERGE, true); ASSERT_EQ(OB_SUCCESS, ret); ret = writer.open(desc, start_seq); ASSERT_EQ(OB_SUCCESS, ret); diff --git a/unittest/storage/test_compaction_policy.cpp b/unittest/storage/test_compaction_policy.cpp index d0ab260491..1396ffa201 100644 --- a/unittest/storage/test_compaction_policy.cpp +++ b/unittest/storage/test_compaction_policy.cpp @@ -106,6 +106,15 @@ public: common::ObIArray &freeze_infos, common::ObIArray &snapshots); + void prepare_schema(share::schema::ObTableSchema &table_schema); + int prepare_medium_list( + const char *snapshot_list, + ObTabletHandle &tablet_handle); + int construct_array( + const char *snapshot_list, + ObIArray &array); + int check_result_tables_handle(const char *end_log_ts_list, const ObGetMergeTablesResult &result); + public: TestCompactionPolicy(); ~TestCompactionPolicy() = default; @@ -129,6 +138,9 @@ public: ObSEArray major_tables_; ObSEArray minor_tables_; ObSEArray memtables_; + ObMediumCompactionInfo medium_info_; + ObSEArray array_; + ObArenaAllocator allocator_; }; TestCompactionPolicy::TestCompactionPolicy() @@ -151,6 +163,15 @@ void TestCompactionPolicy::SetUp() t3m->destroy(); ret = t3m->init(); ASSERT_EQ(OB_SUCCESS, ret); + + share::schema::ObTableSchema table_schema; + prepare_schema(table_schema); + + medium_info_.compaction_type_ = ObMediumCompactionInfo::MEDIUM_COMPACTION; + medium_info_.medium_snapshot_ = 100; + medium_info_.medium_scn_.convert_for_tx(100); + + medium_info_.storage_schema_.init(allocator_, table_schema, lib::Worker::CompatMode::MYSQL); } void TestCompactionPolicy::TearDown() @@ -368,6 +389,64 @@ int TestCompactionPolicy::mock_tablet( return ret; } +int TestCompactionPolicy::construct_array( + const char *snapshot_list, + ObIArray &array) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(snapshot_list)) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "invalid argument", K(ret), K(snapshot_list)); + } else { + array.reset(); + std::string copy(snapshot_list); + char *org = const_cast(copy.c_str()); + static const char *delim = " "; + char *s = std::strtok(org, delim); + if (NULL != s) { + array.push_back(atoi(s)); + while (NULL != (s= strtok(NULL, delim))) { + array.push_back(atoi(s)); + } + } + } + return ret; +} + +int TestCompactionPolicy::prepare_medium_list( + const char *snapshot_list, + ObTabletHandle &tablet_handle) +{ + int ret = OB_SUCCESS; + ObTablet &tablet = *tablet_handle.get_obj(); + construct_array(snapshot_list, array_); + tablet.medium_info_list_.reset_list(); + for (int i = 0; OB_SUCC(ret) && i < array_.count(); ++i) { + medium_info_.medium_snapshot_ = array_.at(i); + ret = tablet.medium_info_list_.add_medium_compaction_info(medium_info_); + } + return ret; +} + +int TestCompactionPolicy::check_result_tables_handle( + const char *end_log_ts_list, + const ObGetMergeTablesResult &result) +{ + int ret = OB_SUCCESS; + construct_array(end_log_ts_list, array_); + if (array_.count() != result.handle_.get_count()) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "table count is not equal", K(ret), K(array_), K(result.handle_)); + } + for (int i = 0; OB_SUCC(ret) && i < array_.count(); ++i) { + if (array_.at(i) != result.handle_.get_table(i)->get_end_scn().get_val_for_tx()) { + ret = OB_ERR_UNEXPECTED; + COMMON_LOG(WARN, "table is not equal", K(ret), K(i), K(array_.at(i)), KPC(result.handle_.get_table(i))); + } + } + return ret; +} + int TestCompactionPolicy::mock_table_store( ObTabletHandle &tablet_handle, common::ObIArray &major_table_handles, @@ -547,6 +626,72 @@ int TestCompactionPolicy::prepare_freeze_info( return ret; } +class FakeLS : public storage::ObLS +{ +public: + FakeLS() { + ls_meta_.tenant_id_ = 1001; + ls_meta_.ls_id_ = ObLSID(100); + } + int64_t get_min_reserved_snapshot() { return 10; } +}; + + +static const int64_t TENANT_ID = 1; +static const int64_t TABLE_ID = 7777; +static const int64_t TEST_ROWKEY_COLUMN_CNT = 3; +static const int64_t TEST_COLUMN_CNT = 6; + +void TestCompactionPolicy::prepare_schema(share::schema::ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + int64_t micro_block_size = 16 * 1024; + const uint64_t tenant_id = TENANT_ID; + const uint64_t table_id = TABLE_ID; + share::schema::ObColumnSchemaV2 column; + + //generate data table schema + table_schema.reset(); + ret = table_schema.set_table_name("test_merge_multi_version"); + ASSERT_EQ(OB_SUCCESS, ret); + table_schema.set_tenant_id(tenant_id); + table_schema.set_tablegroup_id(1); + table_schema.set_database_id(1); + table_schema.set_table_id(table_id); + table_schema.set_rowkey_column_num(TEST_ROWKEY_COLUMN_CNT); + table_schema.set_max_used_column_id(TEST_COLUMN_CNT); + table_schema.set_block_size(micro_block_size); + table_schema.set_compress_func_name("none"); + table_schema.set_row_store_type(FLAT_ROW_STORE); + //init column + char name[OB_MAX_FILE_NAME_LENGTH]; + memset(name, 0, sizeof(name)); + const int64_t column_ids[] = {16,17,20,21,22,23,24,29}; + for(int64_t i = 0; i < TEST_COLUMN_CNT; ++i){ + ObObjType obj_type = ObIntType; + const int64_t column_id = column_ids[i]; + + if (i == 1) { + obj_type = ObVarcharType; + } + column.reset(); + column.set_table_id(table_id); + column.set_column_id(column_id); + sprintf(name, "test%020ld", i); + ASSERT_EQ(OB_SUCCESS, column.set_column_name(name)); + column.set_data_type(obj_type); + column.set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI); + column.set_data_length(10); + if (i < TEST_ROWKEY_COLUMN_CNT) { + column.set_rowkey_position(i + 1); + } else { + column.set_rowkey_position(0); + } + COMMON_LOG(INFO, "add column", K(i), K(column)); + ASSERT_EQ(OB_SUCCESS, table_schema.add_column(column)); + } + COMMON_LOG(INFO, "dump stable schema", LITERAL_K(TEST_ROWKEY_COLUMN_CNT), K(table_schema)); +} TEST_F(TestCompactionPolicy, basic_create_sstable) { @@ -720,12 +865,13 @@ TEST_F(TestCompactionPolicy, check_mini_merge_basic) ret = prepare_tablet(key_data, 150, 150); ASSERT_EQ(OB_SUCCESS, ret); + FakeLS ls; ObGetMergeTablesParam param; param.merge_type_ = ObMergeType::MINI_MERGE; ObGetMergeTablesResult result; tablet_handle_.get_obj()->tablet_meta_.clog_checkpoint_scn_.convert_for_tx(300); tablet_handle_.get_obj()->tablet_meta_.snapshot_version_ = 300; - ret = ObPartitionMergePolicy::get_mini_merge_tables(param, 0, *tablet_handle_.get_obj(), result); + ret = ObPartitionMergePolicy::get_mini_merge_tables(param, ls, *tablet_handle_.get_obj(), result); ASSERT_EQ(OB_NO_NEED_MERGE, ret); ASSERT_EQ(result.update_tablet_directly_, false); } @@ -756,9 +902,10 @@ TEST_F(TestCompactionPolicy, check_minor_merge_basic) ASSERT_EQ(OB_SUCCESS, ret); ObGetMergeTablesParam param; - param.merge_type_ = ObMergeType::MINI_MINOR_MERGE; + param.merge_type_ = ObMergeType::MINOR_MERGE; ObGetMergeTablesResult result; - ret = ObPartitionMergePolicy::get_mini_minor_merge_tables(param, 0, *tablet_handle_.get_obj(), result); + FakeLS ls; + ret = ObPartitionMergePolicy::get_minor_merge_tables(param, ls, *tablet_handle_.get_obj(), result); ASSERT_EQ(OB_SUCCESS, ret); ASSERT_EQ(5, result.handle_.get_count()); } @@ -792,9 +939,10 @@ TEST_F(TestCompactionPolicy, check_no_need_minor_merge) ASSERT_EQ(OB_SUCCESS, ret); ObGetMergeTablesParam param; - param.merge_type_ = ObMergeType::MINI_MINOR_MERGE; + param.merge_type_ = ObMergeType::MINOR_MERGE; ObGetMergeTablesResult result; - ret = ObPartitionMergePolicy::get_mini_minor_merge_tables(param, 0, *tablet_handle_.get_obj(), result); + FakeLS ls; + ret = ObPartitionMergePolicy::get_minor_merge_tables(param, ls, *tablet_handle_.get_obj(), result); ASSERT_EQ(OB_NO_NEED_MERGE, ret); } @@ -828,7 +976,8 @@ TEST_F(TestCompactionPolicy, check_major_merge_basic) param.merge_type_ = ObMergeType::MAJOR_MERGE; param.merge_version_ = 340; ObGetMergeTablesResult result; - ret = ObPartitionMergePolicy::get_major_merge_tables(param, 0, *tablet_handle_.get_obj(), result); + FakeLS ls; + ret = ObPartitionMergePolicy::get_medium_merge_tables(param, ls, *tablet_handle_.get_obj(), result); ASSERT_EQ(OB_SUCCESS, ret); ASSERT_EQ(6, result.handle_.get_count()); } @@ -863,10 +1012,58 @@ TEST_F(TestCompactionPolicy, check_no_need_major_merge) param.merge_type_ = ObMergeType::MAJOR_MERGE; param.merge_version_ = 340; ObGetMergeTablesResult result; - ret = ObPartitionMergePolicy::get_major_merge_tables(param, 0, *tablet_handle_.get_obj(), result); + FakeLS ls; + ret = ObPartitionMergePolicy::get_medium_merge_tables(param, ls, *tablet_handle_.get_obj(), result); ASSERT_EQ(OB_NO_NEED_MERGE, ret); } +TEST_F(TestCompactionPolicy, test_minor_with_medium) +{ + int ret = OB_SUCCESS; + ObTenantFreezeInfoMgr *mgr = MTL(ObTenantFreezeInfoMgr *); + ASSERT_TRUE(nullptr != mgr); + + common::ObArray freeze_info; + common::ObArray snapshots; + share::SCN scn; + ASSERT_EQ(OB_SUCCESS, freeze_info.push_back(ObTenantFreezeInfoMgr::FreezeInfo(1, 1, 0))); + ASSERT_EQ(OB_SUCCESS, freeze_info.push_back(ObTenantFreezeInfoMgr::FreezeInfo(140, 1, 0))); + + ret = TestCompactionPolicy::prepare_freeze_info(500, freeze_info, snapshots); + ASSERT_EQ(OB_SUCCESS, ret); + + const char *key_data = + "table_type start_scn end_scn max_ver upper_ver\n" + "10 0 1 1 1 \n" + "11 150 200 200 200 \n" + "11 200 250 250 250 \n" + "11 250 300 300 300 \n" + "11 300 340 340 340 \n"; + + ret = prepare_tablet(key_data, 340, 340); + ASSERT_EQ(OB_SUCCESS, ret); + + ObGetMergeTablesParam param; + param.merge_type_ = ObMergeType::MINOR_MERGE; + param.merge_version_ = 0; + ObGetMergeTablesResult result; + FakeLS ls; + + prepare_medium_list("240", tablet_handle_); + ret = ObPartitionMergePolicy::get_minor_merge_tables(param, ls, *tablet_handle_.get_obj(), result); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_EQ(OB_SUCCESS, check_result_tables_handle("250, 300, 340", result)); + + prepare_medium_list("150", tablet_handle_); + ret = ObPartitionMergePolicy::get_minor_merge_tables(param, ls, *tablet_handle_.get_obj(), result); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_EQ(OB_SUCCESS, check_result_tables_handle("200, 250, 300, 340", result)); + + prepare_medium_list("300", tablet_handle_); + ret = ObPartitionMergePolicy::get_minor_merge_tables(param, ls, *tablet_handle_.get_obj(), result); + ASSERT_EQ(OB_NO_NEED_MERGE, ret); + +} } //unittest } //oceanbase diff --git a/unittest/storage/test_dag_warning_history.cpp b/unittest/storage/test_dag_warning_history.cpp index 9a373b9dd2..4ff9e4e335 100644 --- a/unittest/storage/test_dag_warning_history.cpp +++ b/unittest/storage/test_dag_warning_history.cpp @@ -50,7 +50,7 @@ TEST_F(TestDagWarningHistory, simple_add) info->dag_ret_ = -4016; info->dag_status_ = ObDagWarningInfo::ODS_WARNING; - info->dag_type_ = share::ObDagType::DAG_TYPE_MINOR_MERGE; + info->dag_type_ = share::ObDagType::DAG_TYPE_MERGE_EXECUTE; strcpy(info->warning_info_, "table_id=1101710651081571, partition_id=66, mini merge error"); ObDagWarningInfo *ret_info = NULL; @@ -79,7 +79,7 @@ TEST_F(TestDagWarningHistory, simple_del_with_no_lock) ASSERT_EQ(OB_SUCCESS, manager.alloc_and_add_with_no_lock(key, info)); info->dag_ret_ = -4016; info->dag_status_ = ObDagWarningInfo::ODS_WARNING; - info->dag_type_ = share::ObDagType::DAG_TYPE_MINOR_MERGE; + info->dag_type_ = share::ObDagType::DAG_TYPE_MERGE_EXECUTE; strcpy(info->warning_info_, "table_id=1101710651081571, partition_id=66, mini merge error"); ASSERT_EQ(OB_HASH_NOT_EXIST, manager.del_with_no_lock(key + 1)); @@ -102,7 +102,7 @@ TEST_F(TestDagWarningHistory, simple_loop_get) basic_info.tenant_id_ = tenant_id; basic_info.dag_ret_ = -4016; basic_info.dag_status_ = ObDagWarningInfo::ODS_WARNING; - basic_info.dag_type_ = share::ObDagType::DAG_TYPE_MINOR_MERGE; + basic_info.dag_type_ = share::ObDagType::DAG_TYPE_MERGE_EXECUTE; strcpy(basic_info.warning_info_, "table_id=1101710651081571, partition_id=66, mini merge error"); const int64_t max_cnt = 20; @@ -114,7 +114,7 @@ TEST_F(TestDagWarningHistory, simple_loop_get) info->tenant_id_ = tenant_id; info->dag_ret_ = -4016 + i; info->dag_status_ = ObDagWarningInfo::ODS_WARNING; - info->dag_type_ = share::ObDagType::DAG_TYPE_MINOR_MERGE; + info->dag_type_ = share::ObDagType::DAG_TYPE_MERGE_EXECUTE; strcpy(info->warning_info_, "table_id=1101710651081571, partition_id=66, mini merge error"); } ObDagWarningInfoIterator iterator; @@ -166,7 +166,7 @@ TEST_F(TestDagWarningHistory, test_rebuild) basic_info.tenant_id_ = tenant_id; basic_info.dag_ret_ = -4016; basic_info.dag_status_ = ObDagWarningInfo::ODS_WARNING; - basic_info.dag_type_ = share::ObDagType::DAG_TYPE_MINOR_MERGE; + basic_info.dag_type_ = share::ObDagType::DAG_TYPE_MERGE_EXECUTE; strcpy(basic_info.warning_info_, "table_id=1101710651081571, partition_id=66, mini merge error"); ObDagWarningInfo *info = NULL; @@ -177,7 +177,7 @@ TEST_F(TestDagWarningHistory, test_rebuild) info->tenant_id_ = tenant_id; info->dag_ret_ = -4016 + i; info->dag_status_ = ObDagWarningInfo::ODS_WARNING; - info->dag_type_ = share::ObDagType::DAG_TYPE_MINOR_MERGE; + info->dag_type_ = share::ObDagType::DAG_TYPE_MERGE_EXECUTE; strcpy(info->warning_info_, "table_id=1101710651081571, partition_id=66, mini merge error"); STORAGE_LOG(DEBUG, "print info", K(ret), K(i), K(key), KPC(info)); } diff --git a/unittest/storage/test_major_rows_merger.cpp b/unittest/storage/test_major_rows_merger.cpp index c2a2c88034..b473d92ebf 100644 --- a/unittest/storage/test_major_rows_merger.cpp +++ b/unittest/storage/test_major_rows_merger.cpp @@ -140,8 +140,6 @@ void ObMajorRowsMergerTest::prepare_merge_context(const ObMergeType &merge_type, merge_context.schema_ctx_.base_schema_version_ = table_schema_.get_schema_version(); merge_context.schema_ctx_.schema_version_ = table_schema_.get_schema_version(); merge_context.schema_ctx_.storage_schema_ = &table_merge_schema_; - merge_context.schema_ctx_.merge_schema_ = &table_merge_schema_; - merge_context.schema_ctx_.table_schema_ = &table_schema_; merge_context.is_full_merge_ = is_full_merge; merge_context.merge_level_ = MACRO_BLOCK_MERGE_LEVEL; @@ -225,7 +223,7 @@ TEST_F(ObMajorRowsMergerTest, tset_compare_func) trans_version_range.multi_version_start_ = 1; trans_version_range.base_version_ = 1; - prepare_merge_context(MINI_MINOR_MERGE, false, trans_version_range, merge_context); + prepare_merge_context(MINOR_MERGE, false, trans_version_range, merge_context); ObMergeParameter merge_param; OK(merge_param.init(merge_context, 0)); ObPartitionMergeIter *iter_0 = nullptr; @@ -335,7 +333,7 @@ TEST_F(ObMajorRowsMergerTest, single) trans_version_range.multi_version_start_ = 1; trans_version_range.base_version_ = 1; - prepare_merge_context(MINI_MINOR_MERGE, false, trans_version_range, merge_context); + prepare_merge_context(MINOR_MERGE, false, trans_version_range, merge_context); ObMergeParameter merge_param; OK(merge_param.init(merge_context, 0)); ObPartitionMergeIter *iter_0 = nullptr; @@ -411,7 +409,7 @@ TEST_F(ObMajorRowsMergerTest, two_iters) prepare_table_schema(micro_data, schema_rowkey_cnt, scn_range, snapshot_version); reset_writer(snapshot_version); prepare_one_macro(micro_data, 1); - prepare_data_end(handle1); + prepare_data_end(handle1, storage::ObITable::MAJOR_SSTABLE); merge_context.tables_handle_.add_table(handle1); STORAGE_LOG(INFO, "finish prepare sstable1"); @@ -502,4 +500,4 @@ int main(int argc, char **argv) oceanbase::common::ObLogger::get_logger().set_log_level("INFO"); testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} diff --git a/unittest/storage/test_medium_compaction_mgr.cpp b/unittest/storage/test_medium_compaction_mgr.cpp new file mode 100644 index 0000000000..dcb7d51de1 --- /dev/null +++ b/unittest/storage/test_medium_compaction_mgr.cpp @@ -0,0 +1,234 @@ +// Copyright 2019-2021 Alibaba Inc. All Rights Reserved. +// Author: +// lixia.yq@antgroup.com +// This file defines test_medium_compaction_mgr.cpp +// + +#include +#define protected public +#define private public +#include +#include "storage/compaction/ob_medium_compaction_mgr.h" +#include "share/schema/ob_column_schema.h" + +namespace oceanbase +{ +using namespace common; +using namespace compaction; +using namespace storage; + +namespace unittest +{ + +class TestMediumCompactionMgr : public ::testing::Test +{ +public: + virtual void SetUp() + { + share::schema::ObTableSchema table_schema; + prepare_schema(table_schema); + + medium_info_.compaction_type_ = ObMediumCompactionInfo::MEDIUM_COMPACTION; + medium_info_.medium_snapshot_ = 100; + medium_info_.medium_scn_.convert_for_tx(100); + medium_info_.cluster_id_ = INIT_CLUSTER_ID; + + medium_info_.storage_schema_.init(allocator_, table_schema, lib::Worker::CompatMode::MYSQL); + GCONF.cluster_id = 1; + } + virtual void TearDown() + { + medium_info_.reset(); + allocator_.reset(); + } + int construct_list( + const char *snapshot_list, + ObMediumCompactionInfoList &list, + const int64_t cluster_id = INIT_CLUSTER_ID); + int construct_array( + const char *snapshot_list, + ObIArray &array); + void prepare_schema(share::schema::ObTableSchema &table_schema); + + static const int64_t INIT_CLUSTER_ID = 1; + static const int64_t OTHER_CLUSTER_ID = 2; +private: + ObMediumCompactionInfo medium_info_; + ObSEArray array_; + ObArenaAllocator allocator_; +}; + +int TestMediumCompactionMgr::construct_array( + const char *snapshot_list, + ObIArray &array) +{ + int ret = OB_SUCCESS; + if (OB_ISNULL(snapshot_list)) { + ret = OB_INVALID_ARGUMENT; + COMMON_LOG(WARN, "invalid argument", K(ret), K(snapshot_list)); + } else { + array.reset(); + std::string copy(snapshot_list); + char *org = const_cast(copy.c_str()); + static const char *delim = " "; + char *s = std::strtok(org, delim); + if (NULL != s) { + array.push_back(atoi(s)); + while (NULL != (s= strtok(NULL, delim))) { + array.push_back(atoi(s)); + } + } + } + return ret; +} + +int TestMediumCompactionMgr::construct_list( + const char *snapshot_list, + ObMediumCompactionInfoList &list, + const int64_t cluster_id) +{ + int ret = OB_SUCCESS; + if (!list.is_inited_ && OB_FAIL(list.init(allocator_))) { + COMMON_LOG(WARN, "failed to init list", K(ret)); + } + construct_array(snapshot_list, array_); + for (int i = 0; OB_SUCC(ret) && i < array_.count(); ++i) { + medium_info_.cluster_id_ = cluster_id; + medium_info_.medium_snapshot_ = array_.at(i); + ret = list.add_medium_compaction_info(medium_info_); + } + return ret; +} + +static const int64_t TENANT_ID = 1; +static const int64_t TABLE_ID = 7777; +static const int64_t TEST_ROWKEY_COLUMN_CNT = 3; +static const int64_t TEST_COLUMN_CNT = 6; + +void TestMediumCompactionMgr::prepare_schema(share::schema::ObTableSchema &table_schema) +{ + int ret = OB_SUCCESS; + int64_t micro_block_size = 16 * 1024; + const uint64_t tenant_id = TENANT_ID; + const uint64_t table_id = TABLE_ID; + share::schema::ObColumnSchemaV2 column; + + //generate data table schema + table_schema.reset(); + ret = table_schema.set_table_name("test_merge_multi_version"); + ASSERT_EQ(OB_SUCCESS, ret); + table_schema.set_tenant_id(tenant_id); + table_schema.set_tablegroup_id(1); + table_schema.set_database_id(1); + table_schema.set_table_id(table_id); + table_schema.set_rowkey_column_num(TEST_ROWKEY_COLUMN_CNT); + table_schema.set_max_used_column_id(TEST_COLUMN_CNT); + table_schema.set_block_size(micro_block_size); + table_schema.set_compress_func_name("none"); + table_schema.set_row_store_type(FLAT_ROW_STORE); + //init column + char name[OB_MAX_FILE_NAME_LENGTH]; + memset(name, 0, sizeof(name)); + const int64_t column_ids[] = {16,17,20,21,22,23,24,29}; + for(int64_t i = 0; i < TEST_COLUMN_CNT; ++i){ + ObObjType obj_type = ObIntType; + const int64_t column_id = column_ids[i]; + + if (i == 1) { + obj_type = ObVarcharType; + } + column.reset(); + column.set_table_id(table_id); + column.set_column_id(column_id); + sprintf(name, "test%020ld", i); + ASSERT_EQ(OB_SUCCESS, column.set_column_name(name)); + column.set_data_type(obj_type); + column.set_collation_type(CS_TYPE_UTF8MB4_GENERAL_CI); + column.set_data_length(10); + if (i < TEST_ROWKEY_COLUMN_CNT) { + column.set_rowkey_position(i + 1); + } else { + column.set_rowkey_position(0); + } + COMMON_LOG(INFO, "add column", K(i), K(column)); + ASSERT_EQ(OB_SUCCESS, table_schema.add_column(column)); + } + COMMON_LOG(INFO, "dump stable schema", LITERAL_K(TEST_ROWKEY_COLUMN_CNT), K(table_schema)); +} + +TEST_F(TestMediumCompactionMgr, test_basic_init) +{ + ObMediumCompactionInfoList list_1; + ASSERT_EQ(OB_SUCCESS, construct_list("300, 400, 500", list_1)); + + ObMediumCompactionInfoList list_2; + ASSERT_EQ(OB_SUCCESS, construct_list("100, 200", list_2)); + + ObMediumCompactionInfoList out_list; + ASSERT_EQ(OB_SUCCESS, out_list.init(allocator_, &list_2, &list_1, 0)); + + ASSERT_EQ(5, out_list.size()); + ASSERT_EQ(100, out_list.get_min_medium_snapshot()); + ASSERT_EQ(500, out_list.get_max_medium_snapshot()); + + out_list.reset(); + + ASSERT_EQ(OB_SUCCESS, out_list.init(allocator_, &list_2, &list_1, 400)); + ASSERT_EQ(1, out_list.size()); + ASSERT_EQ(500, out_list.get_min_medium_snapshot()); + ASSERT_EQ(500, out_list.get_max_medium_snapshot()); +} + +TEST_F(TestMediumCompactionMgr, test_push_list_error) +{ + ObMediumCompactionInfoList test_list; + ASSERT_EQ(OB_SUCCESS, construct_list("300, 500, 400", test_list)); + ASSERT_EQ(test_list.get_max_medium_snapshot(), 500); + ASSERT_EQ(test_list.get_list().get_size(), 2); + + medium_info_.medium_snapshot_ = 900; + ASSERT_EQ(OB_SUCCESS, test_list.add_medium_compaction_info(medium_info_)); + ASSERT_EQ(test_list.get_list().get_size(), 3); + + medium_info_.medium_snapshot_ = 700; + ASSERT_EQ(OB_SUCCESS, test_list.add_medium_compaction_info(medium_info_)); + ASSERT_EQ(test_list.get_list().get_size(), 3); + ASSERT_EQ(test_list.get_max_medium_snapshot(), 900); + + medium_info_.medium_snapshot_ = 1000; + ASSERT_EQ(OB_SUCCESS, test_list.add_medium_compaction_info(medium_info_)); + ASSERT_EQ(test_list.get_list().get_size(), 4); + + medium_info_.medium_snapshot_ = 1200; + ASSERT_EQ(OB_SUCCESS, test_list.add_medium_compaction_info(medium_info_)); + ASSERT_EQ(test_list.get_list().get_size(), 5); + + medium_info_.medium_snapshot_ = 100; + ASSERT_EQ(OB_SUCCESS, test_list.add_medium_compaction_info(medium_info_)); + ASSERT_EQ(test_list.get_list().get_size(), 5); + + medium_info_.medium_snapshot_ = 100; + ASSERT_EQ(test_list.size(), 5); // 300 500 900 1000 1200 + + ObMediumCompactionInfoList test_list_2; + ASSERT_EQ(OB_SUCCESS, test_list_2.init(allocator_, &test_list, nullptr, 900)); + ASSERT_EQ(1000, test_list_2.get_min_medium_snapshot()); + ASSERT_EQ(test_list_2.size(), 2); + const ObMediumCompactionInfo *ret_info = nullptr; + ASSERT_EQ(OB_ENTRY_NOT_EXIST, test_list_2.get_specified_scn_info(900, ret_info)); + + test_list_2.reset(); + ASSERT_EQ(OB_SUCCESS, test_list_2.init(allocator_, &test_list, nullptr, 900)); +} + +}//end namespace unittest +}//end namespace oceanbase + +int main(int argc, char **argv) +{ + system("rm -f test_medium_compaction_mgr.log*"); + OB_LOGGER.set_file_name("test_medium_compaction_mgr.log"); + OB_LOGGER.set_log_level("DEBUG"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/storage/test_parallel_minor_dag.cpp b/unittest/storage/test_parallel_minor_dag.cpp new file mode 100644 index 0000000000..dbac45b47f --- /dev/null +++ b/unittest/storage/test_parallel_minor_dag.cpp @@ -0,0 +1,291 @@ +// Copyright (c) 2019-2021 Alibaba Inc. All Rights Reserved. +// Author: +// lixia.yq@antfin.com +// + +#include + +#define private public +#define protected public + +#include "storage/compaction/ob_partition_merge_policy.h" +#include "storage/ob_storage_struct.h" +#include "storage/blocksstable/ob_sstable.h" +#include "share/rc/ob_tenant_base.h" +#include "storage/meta_mem/ob_tenant_meta_mem_mgr.h" + +namespace oceanbase +{ +using namespace common; +using namespace storage; +using namespace blocksstable; +using namespace compaction; +using namespace omt; +using namespace share; + +namespace unittest +{ + +class TestParallelMinorDag : public ::testing::Test +{ +public: + TestParallelMinorDag() : allocator_(ObModIds::TEST), tenant_base_(500) {} + virtual ~TestParallelMinorDag() {} + int prepare_merge_result(const int64_t sstable_cnt, ObGetMergeTablesResult &result); + + void SetUp() + { + ObTenantMetaMemMgr *t3m = OB_NEW(ObTenantMetaMemMgr, ObModIds::TEST, 500); + tenant_base_.set(t3m); + + ObTenantEnv::set_tenant(&tenant_base_); + ASSERT_EQ(OB_SUCCESS, tenant_base_.init()); + + ASSERT_EQ(OB_SUCCESS, t3m->init()); + } + + share::SCN get_start_log_ts(const int64_t idx); + share::SCN get_end_log_ts(const int64_t idx); + void check_result(const int64_t sstable_cnt, const int64_t result_cnt); + + static const int64_t TENANT_ID = 1; + static const int64_t TABLE_ID = 7777; + static const int64_t TEST_ROWKEY_COLUMN_CNT = 3; + static const int64_t TEST_COLUMN_CNT = 6; + static const int64_t MAX_SSTABLE_CNT = 60; + + common::ObArenaAllocator allocator_; + ObTenantBase tenant_base_; + ObSSTable fake_sstables_[MAX_SSTABLE_CNT]; +}; + +int TestParallelMinorDag::prepare_merge_result( + const int64_t sstable_cnt, + ObGetMergeTablesResult &result) +{ + int ret = OB_SUCCESS; + result.reset(); + + result.version_range_.base_version_ = 50; + result.version_range_.snapshot_version_ = 100; + result.version_range_.multi_version_start_ = 100; + result.merge_version_ = 0; + result.base_schema_version_ = 0; + result.schema_version_ = 0; + result.create_snapshot_version_ = 0; + result.suggest_merge_type_ = MINOR_MERGE; + + int64_t log_ts = 1; + for (int i = 0; OB_SUCC(ret) && i < sstable_cnt; ++i) { + fake_sstables_[i].key_.scn_range_.start_scn_.convert_for_tx(log_ts++); + fake_sstables_[i].key_.scn_range_.end_scn_.convert_for_tx(log_ts); + if (OB_FAIL(result.handle_.add_table(&fake_sstables_[i]))) { + COMMON_LOG(WARN, "failed to push table", K(ret), K(i), K(fake_sstables_[i])); + } + } + result.scn_range_.start_scn_ = fake_sstables_[0].key_.scn_range_.start_scn_; + result.scn_range_.end_scn_ = fake_sstables_[sstable_cnt - 1].key_.scn_range_.end_scn_; + return ret; +} + +share::SCN TestParallelMinorDag::get_start_log_ts(const int64_t idx) +{ + return fake_sstables_[idx].key_.scn_range_.start_scn_; +} + +share::SCN TestParallelMinorDag::get_end_log_ts(const int64_t idx) +{ + return fake_sstables_[idx].key_.scn_range_.end_scn_; +} + +void check_result_valid(const ObGetMergeTablesResult &result) +{ + ASSERT_EQ(result.handle_.get_table(0)->get_start_scn(), result.scn_range_.start_scn_); + ASSERT_EQ(result.handle_.get_table(result.handle_.get_count() - 1)->get_end_scn(), result.scn_range_.end_scn_); +} + +void TestParallelMinorDag::check_result(const int64_t sstable_cnt, const int64_t result_cnt) +{ + ObGetMergeTablesResult result; + ObArray result_array; + ObMinorExecuteRangeMgr minor_range_mgr; + + ASSERT_EQ(OB_SUCCESS, prepare_merge_result(sstable_cnt, result)); + ASSERT_EQ(OB_SUCCESS, ObPartitionMergePolicy::generate_parallel_minor_interval(result, minor_range_mgr, result_array)); + + COMMON_LOG(INFO, "generate_parallel_minor_interval", K(sstable_cnt), K(result_array)); + ASSERT_EQ(result_array.count(), result_cnt); + int idx = 0; + int rest_cnt = sstable_cnt; + const int64_t minor_trigger = ObPartitionMergePolicy::OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG / 2; + if (sstable_cnt < ObPartitionMergePolicy::OB_MINOR_PARALLEL_SSTABLE_CNT_TRIGGER) { + ASSERT_EQ(result_array.count(), 1); + ASSERT_EQ(result_array.at(0).handle_.get_count(), sstable_cnt); + } else { + for (int i = 0; i < result_array.count(); ++i) { + check_result_valid(result_array.at(i)); + + ASSERT_EQ(result_array.at(i).scn_range_.start_scn_, get_start_log_ts(idx)); + if (rest_cnt > ObPartitionMergePolicy::OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG + minor_trigger + && sstable_cnt >= minor_trigger) { + ASSERT_EQ(result_array.at(i).handle_.get_count(), ObPartitionMergePolicy::OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG); + idx += ObPartitionMergePolicy::OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG; + rest_cnt -= ObPartitionMergePolicy::OB_MINOR_PARALLEL_SSTABLE_CNT_IN_DAG; + } else { + ASSERT_EQ(result_array.at(i).handle_.get_count(), rest_cnt); + idx = sstable_cnt; + } + ASSERT_EQ(result_array.at(i).scn_range_.end_scn_, get_end_log_ts(idx - 1)); + } + } +} + +TEST_F(TestParallelMinorDag, test_parallel_interval) +{ + check_result(20, 2); + check_result(19, 1); + check_result(36, 4); + check_result(35, 3); + check_result(32, 3); + check_result(12, 1); + check_result(18, 1); + check_result(22, 2); + check_result(3, 1); + check_result(9, 1); + check_result(40, 4); +} + +#define CHECK_IN_RANGE(start_log_ts, end_log_ts, flag) \ + fake_sstables_[0].key_.scn_range_.start_scn_.convert_for_tx(start_log_ts); \ + fake_sstables_[0].key_.scn_range_.end_scn_.convert_for_tx(end_log_ts); \ + ASSERT_EQ(flag, range_mgr.in_execute_range(&fake_sstables_[0])); + +ObScnRange construct_scn_range(const int64_t start_scn, const int64_t end_scn) +{ + ObScnRange ret_range; + ret_range.start_scn_.convert_for_tx(start_scn); + ret_range.end_scn_.convert_for_tx(end_scn); + return ret_range; +} + +TEST_F(TestParallelMinorDag, test_range_mgr) +{ + ObMinorExecuteRangeMgr range_mgr; + + range_mgr.exe_range_array_.push_back(construct_scn_range(60, 80)); + range_mgr.exe_range_array_.push_back(construct_scn_range(50, 70)); + ASSERT_EQ(OB_ERR_UNEXPECTED, range_mgr.sort_ranges()); + + range_mgr.reset(); + range_mgr.exe_range_array_.push_back(construct_scn_range(60, 80)); + range_mgr.exe_range_array_.push_back(construct_scn_range(10, 20)); + range_mgr.exe_range_array_.push_back(construct_scn_range(30, 50)); + ASSERT_EQ(OB_SUCCESS, range_mgr.sort_ranges()); + COMMON_LOG(INFO, "success to sort ranges", K(range_mgr.exe_range_array_)); + + CHECK_IN_RANGE(18, 19, true); + CHECK_IN_RANGE(60, 70, true); + CHECK_IN_RANGE(22, 30, false); + CHECK_IN_RANGE(30, 50, true); + + range_mgr.reset(); + range_mgr.exe_range_array_.push_back(construct_scn_range(10, 20)); + range_mgr.exe_range_array_.push_back(construct_scn_range(40, 80)); + range_mgr.exe_range_array_.push_back(construct_scn_range(20, 40)); + ASSERT_EQ(OB_SUCCESS, range_mgr.sort_ranges()); + COMMON_LOG(INFO, "success to sort ranges", K(range_mgr.exe_range_array_)); + + CHECK_IN_RANGE(18, 19, true); + CHECK_IN_RANGE(60, 70, true); + CHECK_IN_RANGE(22, 30, true); + CHECK_IN_RANGE(30, 50, true); + CHECK_IN_RANGE(80, 85, false); + CHECK_IN_RANGE(30, 65, true); + + range_mgr.reset(); + range_mgr.exe_range_array_.push_back(construct_scn_range(0, 200)); + range_mgr.exe_range_array_.push_back(construct_scn_range(10, 20)); + range_mgr.exe_range_array_.push_back(construct_scn_range(40, 80)); + range_mgr.exe_range_array_.push_back(construct_scn_range(20, 40)); + ASSERT_EQ(OB_SUCCESS, range_mgr.sort_ranges()); + + CHECK_IN_RANGE(100, 165, true); +} + +TEST_F(TestParallelMinorDag, test_parallel_with_range_mgr) +{ + int64_t sstable_cnt = 40; + ObGetMergeTablesResult result; + ObArray result_array; + ObMinorExecuteRangeMgr minor_range_mgr; + + minor_range_mgr.exe_range_array_.push_back(construct_scn_range(11, 21)); + minor_range_mgr.exe_range_array_.push_back(construct_scn_range(31, 41)); + + ASSERT_EQ(OB_SUCCESS, prepare_merge_result(sstable_cnt, result)); + ASSERT_EQ(OB_SUCCESS, ObPartitionMergePolicy::generate_parallel_minor_interval(result, minor_range_mgr, result_array)); + ASSERT_EQ(result_array.count(), 2); + + ASSERT_EQ(result_array.at(0).scn_range_.start_scn_.get_val_for_tx(), 1); + ASSERT_EQ(result_array.at(0).scn_range_.end_scn_.get_val_for_tx(), 11); + + ASSERT_EQ(result_array.at(1).scn_range_.start_scn_.get_val_for_tx(), 21); + ASSERT_EQ(result_array.at(1).scn_range_.end_scn_.get_val_for_tx(), 31); + + + result_array.reset(); + minor_range_mgr.reset(); + minor_range_mgr.exe_range_array_.push_back(construct_scn_range(15, 19)); + minor_range_mgr.exe_range_array_.push_back(construct_scn_range(37, 39)); + + ASSERT_EQ(OB_SUCCESS, ObPartitionMergePolicy::generate_parallel_minor_interval(result, minor_range_mgr, result_array)); + COMMON_LOG(INFO, "generate_parallel_minor_interval", K(result_array)); + ASSERT_EQ(result_array.count(), 2); + + ASSERT_EQ(result_array.at(0).scn_range_.start_scn_.get_val_for_tx(), 1); + ASSERT_EQ(result_array.at(0).scn_range_.end_scn_.get_val_for_tx(), 15); + + ASSERT_EQ(result_array.at(1).scn_range_.start_scn_.get_val_for_tx(), 19); + ASSERT_EQ(result_array.at(1).scn_range_.end_scn_.get_val_for_tx(), 37); + + // two runing ranges, candidates need > 8 + result_array.reset(); + minor_range_mgr.reset(); + minor_range_mgr.exe_range_array_.push_back(construct_scn_range(1, 17)); + minor_range_mgr.exe_range_array_.push_back(construct_scn_range(18, 34)); + ASSERT_EQ(OB_SUCCESS, ObPartitionMergePolicy::generate_parallel_minor_interval(result, minor_range_mgr, result_array)); + COMMON_LOG(INFO, "generate_parallel_minor_interval", K(result_array)); + ASSERT_EQ(result_array.count(), 0); + + result_array.reset(); + minor_range_mgr.reset(); + minor_range_mgr.exe_range_array_.push_back(construct_scn_range(1, 17)); + minor_range_mgr.exe_range_array_.push_back(construct_scn_range(17, 31)); + ASSERT_EQ(OB_SUCCESS, ObPartitionMergePolicy::generate_parallel_minor_interval(result, minor_range_mgr, result_array)); + COMMON_LOG(INFO, "generate_parallel_minor_interval", K(result_array)); + ASSERT_EQ(result_array.count(), 1); + ASSERT_EQ(result_array.at(0).scn_range_.start_scn_.get_val_for_tx(), 31); + ASSERT_EQ(result_array.at(0).scn_range_.end_scn_.get_val_for_tx(), 41); + + // one runing ranges, candidates need > 4 + result_array.reset(); + minor_range_mgr.reset(); + minor_range_mgr.exe_range_array_.push_back(construct_scn_range(1, 34)); + ASSERT_EQ(OB_SUCCESS, ObPartitionMergePolicy::generate_parallel_minor_interval(result, minor_range_mgr, result_array)); + COMMON_LOG(INFO, "generate_parallel_minor_interval", K(result_array)); + ASSERT_EQ(result_array.count(), 1); + ASSERT_EQ(result_array.at(0).scn_range_.start_scn_.get_val_for_tx(), 34); + ASSERT_EQ(result_array.at(0).scn_range_.end_scn_.get_val_for_tx(), 41); +} + +} // namespace unittest +} // namespace oceanbase + +int main(int argc, char **argv) +{ + system("rm -rf test_parallel_minor_dag.log*"); + OB_LOGGER.set_file_name("test_parallel_minor_dag.log"); + OB_LOGGER.set_log_level("INFO"); + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/unittest/storage/test_partition_incremental_range_spliter.cpp b/unittest/storage/test_partition_incremental_range_spliter.cpp index 700c339662..dd557ed1a4 100644 --- a/unittest/storage/test_partition_incremental_range_spliter.cpp +++ b/unittest/storage/test_partition_incremental_range_spliter.cpp @@ -624,7 +624,7 @@ public: private: void set_tablet_size(int64_t tablet_size) { - table_schema_.tablet_size_ = tablet_size; + storage_schema_.tablet_size_ = tablet_size; range_spliter_.tablet_size_ = tablet_size; } void set_major_sstable_meta(int64_t macro_block_count, int64_t occupy_size, int64_t row_count) @@ -659,7 +659,7 @@ private: share::ObTenantBase tenant_base_; ObArenaAllocator allocator_; compaction::ObTabletMergeDagParam param_; - share::schema::ObTableSchema table_schema_; + ObStorageSchema storage_schema_; ObTablet tablet_; ObMockSSTableV2 major_sstable_; ObSSTable minor_sstable_; // 增量 @@ -687,13 +687,14 @@ void TestPartitionIncrementalRangeSliter::SetUp() ASSERT_EQ(OB_SUCCESS, tenant_base_.init()); // table schema - table_schema_.tablet_size_ = 1024; - table_schema_.rowkey_column_num_ = ObMockDatumRowkey::COLUMN_NUM; - ObRowkeyColumn rowkey_col; - rowkey_col.column_id_ = 1; - rowkey_col.type_.set_int(); + storage_schema_.tablet_size_ = 1024; + storage_schema_.rowkey_array_.set_allocator(&allocator_); + storage_schema_.rowkey_array_.reserve(1); + ObStorageRowkeyColumnSchema rowkey_col; + rowkey_col.column_idx_ = 1 + common::OB_APP_MIN_COLUMN_ID; + rowkey_col.meta_type_.set_int(); rowkey_col.order_ = ASC; - ASSERT_EQ(OB_SUCCESS, table_schema_.rowkey_info_.add_column(rowkey_col)); + ASSERT_EQ(OB_SUCCESS, storage_schema_.rowkey_array_.push_back(rowkey_col)); // major sstable major_sstable_.set_table_type(ObITable::MAJOR_SSTABLE); @@ -708,7 +709,7 @@ void TestPartitionIncrementalRangeSliter::SetUp() minor_sstable_.key_.tablet_id_ = 1; // merge ctx - merge_ctx_.schema_ctx_.merge_schema_ = &table_schema_; + merge_ctx_.schema_ctx_.storage_schema_ = &storage_schema_; ASSERT_EQ(OB_SUCCESS, merge_ctx_.tables_handle_.add_table(&major_sstable_)); ASSERT_EQ(OB_SUCCESS, merge_ctx_.tables_handle_.add_table(&minor_sstable_)); merge_ctx_.tablet_handle_.obj_ = &tablet_; @@ -1162,7 +1163,7 @@ TEST_F(TestPartitionIncrementalRangeSliter, test_not_empty_major_sstable_split_r int main(int argc, char **argv) { system("rm -f test_partition_incremental_range_spliter.log*"); - oceanbase::ObLogger::get_logger().set_file_name("test_partition_incremental_range_spliter.log", true); + oceanbase::ObLogger::get_logger().set_file_name("test_partition_incremental_range_spliter.log"); oceanbase::ObLogger::get_logger().set_log_level("DEBUG"); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/unittest/storage/test_tenant_tablet_stat_mgr.cpp b/unittest/storage/test_tenant_tablet_stat_mgr.cpp new file mode 100644 index 0000000000..c1b48653fe --- /dev/null +++ b/unittest/storage/test_tenant_tablet_stat_mgr.cpp @@ -0,0 +1,415 @@ +/* + * test_tenant_tablet_stat_mgr.cpp + * Author: fengjingkun.fjk@antgroup.com + */ + +#include +#include +#include + +#define USING_LOG_PREFIX STORAGE +#define protected public +#define private public + +#include "mtlenv/mock_tenant_module_env.h" +#include "storage/ob_tenant_tablet_stat_mgr.h" + +namespace oceanbase +{ +using namespace share; +using namespace common; +using namespace storage; +using namespace compaction; + +class TestTenantTabletStatMgr : public ::testing::Test +{ +public: + TestTenantTabletStatMgr(); + virtual ~TestTenantTabletStatMgr() = default; + virtual void SetUp() override; + virtual void TearDown() override; + static void SetUpTestCase(); + static void TearDownTestCase(); + static void report(ObTenantTabletStatMgr *mgr, const ObTabletStat &stat); + void batch_report_stat(int64_t report_num); + +private: + const uint64_t tenant_id_; + ObTenantBase tenant_base_; + ObTenantTabletStatMgr *stat_mgr_; +}; + +TestTenantTabletStatMgr::TestTenantTabletStatMgr() + : tenant_id_(1), + tenant_base_(tenant_id_), + stat_mgr_(nullptr) +{ +} + +void TestTenantTabletStatMgr::SetUpTestCase() +{ + EXPECT_EQ(OB_SUCCESS, MockTenantModuleEnv::get_instance().init()); +} + +void TestTenantTabletStatMgr::TearDownTestCase() +{ + MockTenantModuleEnv::get_instance().destroy(); +} + +void TestTenantTabletStatMgr::SetUp() +{ + int ret = OB_SUCCESS; + + stat_mgr_ = OB_NEW(ObTenantTabletStatMgr, ObModIds::TEST); + ret = stat_mgr_->init(); + ASSERT_EQ(OB_SUCCESS, ret); + + tenant_base_.set(stat_mgr_); + ObTenantEnv::set_tenant(&tenant_base_); + ASSERT_EQ(OB_SUCCESS, tenant_base_.init()); + ASSERT_EQ(tenant_id_, MTL_ID()); + ASSERT_EQ(stat_mgr_, MTL(ObTenantTabletStatMgr *)); +} + +void TestTenantTabletStatMgr::TearDown() +{ + stat_mgr_->destroy(); + ObTenantEnv::set_tenant(nullptr); +} + +void TestTenantTabletStatMgr::report(ObTenantTabletStatMgr *mgr, const ObTabletStat &stat) +{ + ASSERT_TRUE(NULL != mgr); + ASSERT_TRUE(stat.is_valid()); + ASSERT_EQ(OB_SUCCESS, mgr->report_stat(stat)); +} + +void TestTenantTabletStatMgr::batch_report_stat(int64_t report_num) +{ + ASSERT_TRUE(NULL != stat_mgr_); + ASSERT_EQ(true, stat_mgr_->is_inited_); + + for (int64_t i = 0; i < report_num; ++i) { + ObTabletStat curr_stat; + curr_stat.ls_id_ = 1; + curr_stat.tablet_id_ = 10001 + i; + curr_stat.query_cnt_ = 100 * (i + 1); + + std::thread sub_report_thread(report, stat_mgr_, curr_stat); + if (sub_report_thread.joinable()) { + sub_report_thread.join(); + } + } +} + +namespace unittest +{ +TEST_F(TestTenantTabletStatMgr, basic) +{ + EXPECT_EQ(OB_SYS_TENANT_ID, MTL_ID()); + ObTenantTabletStatMgr *stat_mgr = MTL(ObTenantTabletStatMgr *); + ASSERT_TRUE(NULL != stat_mgr); +} + +TEST_F(TestTenantTabletStatMgr, basic_tablet_stat_bucket) +{ + ObTabletStat tablet_stat; + tablet_stat.ls_id_ = 1; + tablet_stat.tablet_id_ = 1; + tablet_stat.query_cnt_ = 100; + tablet_stat.scan_logical_row_cnt_ = 100; + tablet_stat.scan_physical_row_cnt_ = 100; + tablet_stat.merge_physical_row_cnt_ = 100; + tablet_stat.merge_logical_row_cnt_ = 100; + + { + int64_t step = 1; + ObTabletStatBucket<8> bucket(step); + ObTabletStat retired_stat; + bool has_retired = false; + for (int64_t i = 0; i < 8; ++i) { + ASSERT_EQ(i, bucket.head_idx_); + ASSERT_EQ(7 + i, bucket.curr_idx_); + bucket.add(tablet_stat); + ASSERT_EQ(100, bucket.units_[bucket.get_idx(7 + i)].query_cnt_); + bucket.refresh(retired_stat, has_retired); + ASSERT_EQ(true, has_retired); + } + bucket.refresh(retired_stat, has_retired); + ASSERT_EQ(100, retired_stat.scan_logical_row_cnt_); + } + + { + int64_t step = 16; + ObTabletStatBucket<4> bucket(step); + ObTabletStat retired_stat; + bool has_retired = false; + for (int64_t i = 0; i < 64; ++i) { + ASSERT_EQ(i / step, bucket.head_idx_); + ASSERT_EQ(3 + i / step, bucket.curr_idx_); + bucket.add(tablet_stat); + bucket.refresh(retired_stat, has_retired); + if (has_retired) { + ASSERT_EQ(0, bucket.units_[bucket.get_idx(bucket.curr_idx_)].query_cnt_); + } + } + + ASSERT_TRUE(has_retired); + ASSERT_EQ(1600, retired_stat.scan_logical_row_cnt_); + } + + { + int64_t step = 32; + ObTabletStatBucket<4> bucket(step); + ObTabletStat retired_stat; + bool has_retired = false; + for (int64_t i = 0; i < 128; ++i) { + ASSERT_EQ(i / step, bucket.head_idx_); + ASSERT_EQ(3 + i / step, bucket.curr_idx_); + bucket.add(tablet_stat); + bucket.refresh(retired_stat, has_retired); + if (has_retired) { + ASSERT_EQ(0, bucket.units_[bucket.get_idx(bucket.curr_idx_)].query_cnt_); + } + } + ASSERT_TRUE(has_retired); + ASSERT_EQ(3200, retired_stat.scan_logical_row_cnt_); + } +} + +TEST_F(TestTenantTabletStatMgr, basic_tablet_stream) +{ + ObTabletStat tablet_stat; + tablet_stat.ls_id_ = 1; + tablet_stat.tablet_id_ = 1; + tablet_stat.query_cnt_ = 100; + tablet_stat.scan_logical_row_cnt_ = 100; + tablet_stat.scan_physical_row_cnt_ = 100; + tablet_stat.merge_physical_row_cnt_ = 100; + tablet_stat.merge_logical_row_cnt_ = 100; + + ObTabletStream stream; + auto &curr_buckets = stream.curr_buckets_; + auto &latest_buckets = stream.latest_buckets_; + auto &past_buckets = stream.past_buckets_; + + for (int64_t i = 0; i < ObTabletStream::CURR_BUCKET_CNT * ObTabletStream::CURR_BUCKET_STEP; ++i) { + stream.add_stat(tablet_stat); + stream.refresh(); + } + ASSERT_EQ(8, latest_buckets.refresh_cnt_); + // retired from curr_buckets + ASSERT_EQ(100, latest_buckets.units_[latest_buckets.get_idx(latest_buckets.curr_idx_)].query_cnt_); + + for (int64_t i = 0; i < ObTabletStream::LATEST_BUCKET_CNT * ObTabletStream::LATEST_BUCKET_STEP; ++i) { + stream.add_stat(tablet_stat); + stream.refresh(); + } + ASSERT_EQ(72, latest_buckets.refresh_cnt_); + ASSERT_EQ(800, latest_buckets.units_[latest_buckets.get_idx(latest_buckets.curr_idx_)].query_cnt_); + ASSERT_EQ(72, past_buckets.refresh_cnt_); + ASSERT_EQ(0, past_buckets.units_[past_buckets.get_idx(past_buckets.curr_idx_)].query_cnt_); + + for (int64_t i = 0; i < ObTabletStream::CURR_BUCKET_CNT * ObTabletStream::CURR_BUCKET_STEP; ++i) { + stream.add_stat(tablet_stat); + stream.refresh(); + } + ASSERT_EQ(80, latest_buckets.refresh_cnt_); + ASSERT_EQ(80, past_buckets.refresh_cnt_); + ASSERT_EQ(1600, past_buckets.units_[past_buckets.get_idx(past_buckets.curr_idx_)].query_cnt_); +} + +TEST_F(TestTenantTabletStatMgr, get_all_tablet_stat) +{ + int ret = OB_SUCCESS; + + ObTabletStat tablet_stat; + tablet_stat.ls_id_ = 1; + tablet_stat.tablet_id_ = 1; + tablet_stat.query_cnt_ = 100; + tablet_stat.scan_logical_row_cnt_ = 100; + tablet_stat.scan_physical_row_cnt_ = 100; + tablet_stat.merge_physical_row_cnt_ = 100; + tablet_stat.merge_logical_row_cnt_ = 100; + + ObTabletStream stream; + auto &curr_buckets = stream.curr_buckets_; + auto &latest_buckets = stream.latest_buckets_; + auto &past_buckets = stream.past_buckets_; + ObArray tablet_stats; + + int64_t curr_bucket_size = curr_buckets.count(); + for (int64_t i = 0; i < curr_bucket_size; ++i) { + curr_buckets.units_[i] += tablet_stat; + } + ret = stream.get_bucket_tablet_stat(curr_buckets, tablet_stats); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(curr_bucket_size == tablet_stats.count()); + + int64_t latest_bucket_size = latest_buckets.count(); + for (int64_t i = 0; i < latest_bucket_size; ++i) { + latest_buckets.units_[i] += tablet_stat; + } + ret = stream.get_bucket_tablet_stat(latest_buckets, tablet_stats); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE((curr_bucket_size + latest_bucket_size) == tablet_stats.count()); + + int64_t past_bucket_size = past_buckets.count(); + for (int64_t i = 0; i < past_bucket_size; ++i) { + past_buckets.units_[i] += tablet_stat; + } + ret = stream.get_bucket_tablet_stat(past_buckets, tablet_stats); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE((curr_bucket_size + latest_bucket_size + past_bucket_size) == tablet_stats.count()); +} + +TEST_F(TestTenantTabletStatMgr, basic_stream_pool) +{ + int ret = OB_SUCCESS; + const int64_t max_free_list_num = 500; + const int64_t up_limit_node_num = 1000; + ObTabletStreamPool pool; + + ret = pool.init(max_free_list_num, up_limit_node_num); + ASSERT_EQ(OB_SUCCESS, ret); + + int64_t free_num = pool.get_free_num(); + ASSERT_EQ(max_free_list_num, free_num); + + ObTabletStreamNode *fixed_node = nullptr; + ret = pool.alloc(fixed_node); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(NULL != fixed_node); + ASSERT_EQ(storage::ObTabletStreamPool::NodeAllocType::FIXED_ALLOC, fixed_node->flag_); + ASSERT_EQ(max_free_list_num - 1, pool.get_free_num()); + pool.free(fixed_node); + fixed_node = nullptr; + ASSERT_EQ(max_free_list_num, pool.get_free_num()); + + common::ObSEArray free_nodes; + for (int64_t i = 0; i < max_free_list_num; ++i) { + ObTabletStreamNode *free_node = nullptr; + ret = pool.alloc(free_node); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(NULL != free_node); + ASSERT_EQ(storage::ObTabletStreamPool::NodeAllocType::FIXED_ALLOC, free_node->flag_); + ret = free_nodes.push_back(free_node); + ASSERT_EQ(OB_SUCCESS, ret); + } + ASSERT_EQ(0, pool.get_free_num()); + + ObTabletStreamNode *dynamic_node = nullptr; + ret = pool.alloc(dynamic_node); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(NULL != dynamic_node); + ASSERT_EQ(storage::ObTabletStreamPool::NodeAllocType::DYNAMIC_ALLOC, dynamic_node->flag_); + pool.free(dynamic_node); + dynamic_node = nullptr; + ASSERT_EQ(0, pool.get_free_num()); + + for (int64_t i = 0; i < free_nodes.count(); ++i) { + ObTabletStreamNode *node = free_nodes.at(i); + ASSERT_TRUE(NULL != node); + pool.free(node); + ASSERT_EQ(i + 1, pool.get_free_num()); + } +} + +TEST_F(TestTenantTabletStatMgr, check_fetch_node) +{ + int ret = OB_SUCCESS; + const int64_t max_free_list_num = 500; + const int64_t up_limit_node_num = 500; + ObTabletStreamPool pool; + common::ObDList lru_list_; + + ret = pool.init(max_free_list_num, up_limit_node_num); + ASSERT_EQ(OB_SUCCESS, ret); + + int64_t free_num = pool.get_free_num(); + ASSERT_EQ(max_free_list_num, free_num); + + ObTabletStreamNode *first_node = nullptr; + for (int64_t i = 0; i < max_free_list_num; ++i) { + ObTabletStreamNode *curr_node = nullptr; + ret = pool.free_list_.pop(curr_node); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_TRUE(NULL == curr_node->prev_); + ASSERT_TRUE(NULL == curr_node->next_); + + ASSERT_EQ(true, lru_list_.add_first(curr_node)); + first_node = (NULL == first_node) ? curr_node : first_node; + } + + ObTabletStreamNode *last_node = nullptr; + for (int64_t i = 0; i < max_free_list_num; ++i) { + last_node = lru_list_.get_last(); + ASSERT_EQ(true, lru_list_.move_to_first(last_node)); + ret = pool.free_list_.push(last_node); + ASSERT_EQ(OB_SUCCESS, ret); + } + + last_node = lru_list_.get_last(); + ASSERT_TRUE(last_node == first_node); +} + +TEST_F(TestTenantTabletStatMgr, basic_tablet_stat_mgr) +{ + int ret = OB_SUCCESS; + + EXPECT_EQ(OB_SYS_TENANT_ID, MTL_ID()); + ObTenantTabletStatMgr *stat_mgr = MTL(ObTenantTabletStatMgr *); + ASSERT_TRUE(NULL != stat_mgr); + + ObTabletStat tablet_stat; + tablet_stat.ls_id_ = 1; + tablet_stat.tablet_id_ = 123; + tablet_stat.query_cnt_ = 100; + tablet_stat.scan_logical_row_cnt_ = 100; + tablet_stat.scan_physical_row_cnt_ = 100; + tablet_stat.merge_physical_row_cnt_ = 100; + tablet_stat.merge_logical_row_cnt_ = 100; + + ret = stat_mgr_->report_stat(tablet_stat); + ASSERT_EQ(OB_SUCCESS, ret); + stat_mgr_->process_stats(); + + ObTabletStat res; + share::ObLSID ls_id(1); + common::ObTabletID tablet_id(123); + ret = stat_mgr_->get_latest_tablet_stat(ls_id, tablet_id, res); + ASSERT_EQ(OB_SUCCESS, ret); + ASSERT_EQ(100, res.query_cnt_); +} + +TEST_F(TestTenantTabletStatMgr, multi_report_tablet_stat) +{ + EXPECT_EQ(OB_SYS_TENANT_ID, MTL_ID()); + ObTenantTabletStatMgr *stat_mgr = MTL(ObTenantTabletStatMgr *); + ASSERT_TRUE(NULL != stat_mgr); + ASSERT_TRUE(stat_mgr->is_inited_); + + int64_t report_num = 10; + batch_report_stat(report_num); + stat_mgr_->process_stats(); + + int64_t report_cnt = 0; + ObTenantTabletStatMgr::TabletStreamMap::iterator iter = stat_mgr_->stream_map_.begin(); + for ( ; iter != stat_mgr_->stream_map_.end(); ++iter) { + ++report_cnt; + } + ASSERT_TRUE(report_cnt > 5); +} + +} // end unittest +} // end oceanbase + +int main(int argc, char **argv) +{ + ::testing::InitGoogleTest(&argc, argv); + system("rm -f test_tenant_tablet_stat_mgr.log*"); + OB_LOGGER.set_file_name("test_tenant_tablet_stat_mgr.log", true); + OB_LOGGER.set_log_level("INFO"); + OB_LOGGER.set_max_file_size(256*1024*1024); + return RUN_ALL_TESTS(); +} \ No newline at end of file