enhance clog replaying checking logic to prevent replaying clog after tablet delete/finish transfer out tx is committed
This commit is contained in:
@ -1895,6 +1895,10 @@ DEF_BOOL(_enable_choose_migration_source_policy, OB_TENANT_PARAMETER, "True",
|
||||
DEF_BOOL(_global_enable_rich_vector_format, OB_CLUSTER_PARAMETER, "True",
|
||||
"Control whether use rich vector format in vectorization engine",
|
||||
ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
|
||||
DEF_BOOL(_allow_skip_replay_redo_after_detete_tablet, OB_TENANT_PARAMETER, "FALSE",
|
||||
"allow skip replay invalid redo log after tablet delete transaction is committed."
|
||||
"The default value is FALSE. Value: TRUE means we allow skip replaying this invalid redo log, False means we do not alow such behavior.",
|
||||
ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE));
|
||||
|
||||
//check os params
|
||||
DEF_BOOL(strict_check_os_params, OB_CLUSTER_PARAMETER, "False",
|
||||
|
||||
@ -24,6 +24,7 @@
|
||||
#include "observer/net/ob_ingress_bw_alloc_service.h"
|
||||
#include "observer/ob_srv_network_frame.h"
|
||||
#include "observer/report/ob_i_meta_report.h"
|
||||
#include "observer/omt/ob_tenant_config_mgr.h"
|
||||
#include "rootserver/freeze/ob_major_freeze_service.h"
|
||||
#include "rootserver/tenant_snapshot/ob_tenant_snapshot_scheduler.h"
|
||||
#include "rootserver/restore/ob_clone_scheduler.h"
|
||||
@ -46,6 +47,7 @@
|
||||
#include "share/leak_checker/obj_leak_checker.h"
|
||||
#include "share/ob_ls_id.h"
|
||||
#include "share/ob_global_autoinc_service.h"
|
||||
#include "share/ob_force_print_log.h"
|
||||
#include "sql/das/ob_das_id_service.h"
|
||||
#include "storage/compaction/ob_tenant_tablet_scheduler.h"
|
||||
#include "storage/ls/ob_ls.h"
|
||||
@ -1780,6 +1782,8 @@ int ObLS::replay_get_tablet(
|
||||
&& ObTabletStatus::TRANSFER_OUT_DELETED != tablet_status) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("tablet is empty shell but user data is unexpected", K(ret), KPC(tablet));
|
||||
} else if (OB_FAIL(check_tablet_status_and_scn(data, scn))) {
|
||||
LOG_ERROR("fail to check tablet status and scn", K(ret), K(ls_id), K(tablet_id), K(data), K(scn));
|
||||
} else {
|
||||
ret = OB_OBSOLETE_CLOG_NEED_SKIP;
|
||||
LOG_INFO("tablet is already deleted, need skip", KR(ret), K(ls_id), K(tablet_id), K(scn));
|
||||
@ -1800,6 +1804,8 @@ int ObLS::replay_get_tablet(
|
||||
LOG_INFO("latest transaction has not committed yet, should retry", KR(ret), K(ls_id), K(tablet_id),
|
||||
K(scn), "clog_checkpoint_scn", tablet->get_clog_checkpoint_scn(), K(data));
|
||||
}
|
||||
} else if (OB_FAIL(check_tablet_status_and_scn(data, scn))) {
|
||||
LOG_ERROR("fail to check tablet status and scn", K(ret), K(ls_id), K(tablet_id), K(data), K(scn));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1810,6 +1816,30 @@ int ObLS::replay_get_tablet(
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObLS::check_tablet_status_and_scn(
|
||||
const ObTabletCreateDeleteMdsUserData &data,
|
||||
const share::SCN &scn)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
if (OB_UNLIKELY((ObTabletStatus::DELETED == data.tablet_status_ || ObTabletStatus::TRANSFER_OUT_DELETED == data.tablet_status_)
|
||||
&& scn >= data.delete_commit_scn_)) {
|
||||
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
|
||||
if (OB_UNLIKELY(!tenant_config.is_valid())) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("tenant config is invalid", K(ret));
|
||||
} else if (tenant_config->_allow_skip_replay_redo_after_detete_tablet) {
|
||||
FLOG_WARN("scn is bigger than tablet delete commit scn, allow to skip replaying this clog for emergency",
|
||||
K(ret), K(data), K(scn));
|
||||
} else {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("scn is bigger than tablet delete commit scn", K(ret), K(data), K(scn));
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObLS::logstream_freeze(const int64_t trace_id, const bool is_sync, const int64_t abs_timeout_ts)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
|
||||
@ -85,7 +85,7 @@ class ObCompactionScheduleIterator;
|
||||
}
|
||||
namespace storage
|
||||
{
|
||||
const static int64_t LS_INNER_TABLET_FROZEN_TIMESTAMP = 1;
|
||||
class ObTabletCreateDeleteMdsUserData;
|
||||
|
||||
struct ObLSVTInfo
|
||||
{
|
||||
@ -928,7 +928,10 @@ public:
|
||||
DELEGATE_WITH_RET(reserved_snapshot_mgr_, add_dependent_medium_tablet, int);
|
||||
DELEGATE_WITH_RET(reserved_snapshot_mgr_, del_dependent_medium_tablet, int);
|
||||
int set_ls_migration_gc(bool &allow_gc);
|
||||
|
||||
private:
|
||||
static int check_tablet_status_and_scn(
|
||||
const ObTabletCreateDeleteMdsUserData &data,
|
||||
const share::SCN &scn);
|
||||
private:
|
||||
// StorageBaseUtil
|
||||
// table manager: create, remove and guard get.
|
||||
|
||||
@ -5508,7 +5508,8 @@ int ObTablet::get_ddl_info(int64_t &schema_version, int64_t &schema_refreshed_ts
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObTablet::get_rec_log_scn(SCN &rec_scn) {
|
||||
int ObTablet::get_rec_log_scn(SCN &rec_scn)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
rec_scn = SCN::max_scn();
|
||||
ObTableHandleV2 handle;
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
*/
|
||||
|
||||
#include "share/throttle/ob_throttle_unit.h"
|
||||
#include "observer/omt/ob_tenant_config_mgr.h"
|
||||
#include "storage/ls/ob_ls.h"
|
||||
#include "storage/ls/ob_ls_tx_service.h"
|
||||
#include "storage/memtable/ob_memtable.h"
|
||||
@ -765,7 +766,9 @@ int ObTxReplayExecutor::replay_row_(storage::ObStoreCtx &store_ctx,
|
||||
memtable::ObMemtableMutatorIterator *mmi_ptr)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
common::ObTimeGuard timeguard("replay_row_in_memtable", 10 * 1000);
|
||||
const share::ObLSID &ls_id = tablet->get_ls_id();
|
||||
const common::ObTabletID &tablet_id = tablet->get_tablet_id();
|
||||
common::ObTimeGuard timeguard("replay_row_in_memtable", 10_ms);
|
||||
ObIMemtable *mem_ptr = nullptr;
|
||||
ObMemtable *data_mem_ptr = nullptr;
|
||||
ObStorageTableGuard w_guard(tablet, store_ctx, true, true, log_ts_ns_);
|
||||
@ -774,12 +777,25 @@ int ObTxReplayExecutor::replay_row_(storage::ObStoreCtx &store_ctx,
|
||||
TRANS_LOG(WARN, "[Replay Tx] invaild arguments", K(ret), KP(mmi_ptr));
|
||||
} else if (FALSE_IT(timeguard.click("start"))) {
|
||||
} else if (OB_FAIL(prepare_memtable_replay_(w_guard, mem_ptr))) {
|
||||
if (OB_NO_NEED_UPDATE != ret) {
|
||||
TRANS_LOG(WARN, "[Replay Tx] prepare for replay failed", K(ret), KP(mem_ptr), KP(mmi_ptr));
|
||||
if (OB_NO_NEED_UPDATE == ret) {
|
||||
TRANS_LOG(DEBUG, "[Replay Tx] Not need replay row for tablet",
|
||||
K(ret), K(ls_id), K(tablet_id), K(log_ts_ns_),
|
||||
K(tx_part_log_no_), K(mmi_ptr->get_row_head()));
|
||||
} else if (OB_TABLET_NOT_EXIST == ret) {
|
||||
omt::ObTenantConfigGuard tenant_config(TENANT_CONF(MTL_ID()));
|
||||
if (OB_UNLIKELY(!tenant_config.is_valid())) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
TRANS_LOG(WARN, "tenant config is invalid", K(ret));
|
||||
} else if (tenant_config->_allow_skip_replay_redo_after_detete_tablet) {
|
||||
ret = OB_NO_NEED_UPDATE;
|
||||
TRANS_LOG(WARN, "[Replay Tx] tablet does not exist while preparing memtable for replay, allow to skip this clog replaying for emergency",
|
||||
K(ret), K(ls_id), K(tablet_id), K_(log_ts_ns));
|
||||
} else {
|
||||
TRANS_LOG(ERROR, "[Replay Tx] tablet does not exist while preparing memtable for replay",
|
||||
K(ret), K(ls_id), K(tablet_id), K_(log_ts_ns));
|
||||
}
|
||||
} else {
|
||||
TRANS_LOG(DEBUG, "[Replay Tx] Not need replay row for tablet", K(log_ts_ns_),
|
||||
K(tx_part_log_no_), K(mmi_ptr->get_row_head()),
|
||||
K(tablet->get_tablet_meta().tablet_id_));
|
||||
TRANS_LOG(WARN, "[Replay Tx] prepare for replay failed", K(ret), K(ls_id), K(tablet_id), KP(mem_ptr), KP(mmi_ptr));
|
||||
}
|
||||
// dynamic_cast will check whether this is really a ObMemtable.
|
||||
} else if (OB_ISNULL(data_mem_ptr = static_cast<ObMemtable *>(mem_ptr))) {
|
||||
|
||||
@ -254,6 +254,7 @@ writing_throttling_maximum_duration
|
||||
writing_throttling_trigger_percentage
|
||||
zone
|
||||
_advance_checkpoint_timeout
|
||||
_allow_skip_replay_redo_after_detete_tablet
|
||||
_audit_mode
|
||||
_auto_broadcast_tablet_location_rate_limit
|
||||
_auto_drop_recovering_auxiliary_tenant
|
||||
|
||||
Reference in New Issue
Block a user