[SCN] fix farm failure

This commit is contained in:
obdev
2022-11-28 02:58:33 +00:00
committed by ob-robot
parent 87a9357186
commit 51de5b5911
594 changed files with 9722 additions and 7770 deletions

View File

@ -517,7 +517,7 @@ int ObService::get_min_sstable_schema_version(
return ret;
}
int ObService::calc_column_checksum_request(const obrpc::ObCalcColumnChecksumRequestArg &arg)
int ObService::calc_column_checksum_request(const obrpc::ObCalcColumnChecksumRequestArg &arg, obrpc::ObCalcColumnChecksumRequestRes &res)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(!inited_)) {
@ -529,32 +529,63 @@ int ObService::calc_column_checksum_request(const obrpc::ObCalcColumnChecksumReq
} else {
// schedule unique checking task
const uint64_t tenant_id = arg.tenant_id_;
int saved_ret = OB_SUCCESS;
MTL_SWITCH(tenant_id) {
ObGlobalUniqueIndexCallback *callback = NULL;
ObUniqueCheckingDag *dag = NULL;
ObTenantDagScheduler* dag_scheduler = nullptr;
if (OB_ISNULL(dag_scheduler = MTL(ObTenantDagScheduler *))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("error unexpected, dag scheduler must not be nullptr", KR(ret));
} else if (OB_FAIL(dag_scheduler->alloc_dag(dag))) {
STORAGE_LOG(WARN, "fail to alloc dag", KR(ret));
} else if (OB_FAIL(dag->init(arg.tenant_id_, arg.ls_id_, arg.tablet_id_, arg.calc_table_id_ == arg.target_table_id_, arg.target_table_id_, arg.schema_version_, arg.task_id_, arg.execution_id_, arg.snapshot_version_))) {
STORAGE_LOG(WARN, "fail to init ObUniqueCheckingDag", KR(ret));
} else if (OB_FAIL(dag->alloc_global_index_task_callback(arg.tablet_id_, arg.target_table_id_, arg.source_table_id_, arg.schema_version_, arg.task_id_, callback))) {
STORAGE_LOG(WARN, "fail to alloc global index task callback", KR(ret));
} else if (OB_FAIL(dag->alloc_unique_checking_prepare_task(callback))) {
STORAGE_LOG(WARN, "fail to alloc unique checking prepare task", KR(ret));
} else if (OB_FAIL(dag_scheduler->add_dag(dag))) {
if (OB_EAGAIN != ret && OB_SIZE_OVERFLOW != ret) {
STORAGE_LOG(WARN, "fail to add dag to queue", KR(ret));
} else {
ret = OB_EAGAIN;
} else if (OB_FAIL(res.ret_codes_.reserve(arg.calc_items_.count()))) {
LOG_WARN("reserve return code array failed", K(ret), K(arg.calc_items_.count()));
} else {
for (int64_t i = 0; OB_SUCC(ret) && i < arg.calc_items_.count(); ++i) {
const ObCalcColumnChecksumRequestArg::SingleItem &calc_item = arg.calc_items_.at(i);
ObUniqueCheckingDag *dag = NULL;
int tmp_ret = OB_SUCCESS;
saved_ret = OB_SUCCESS;
if (OB_TMP_FAIL(dag_scheduler->alloc_dag(dag))) {
STORAGE_LOG(WARN, "fail to alloc dag", KR(tmp_ret));
} else if (OB_TMP_FAIL(dag->init(arg.tenant_id_,
calc_item.ls_id_,
calc_item.tablet_id_,
calc_item.calc_table_id_ == arg.target_table_id_,
arg.target_table_id_,
arg.schema_version_,
arg.task_id_,
arg.execution_id_,
arg.snapshot_version_))) {
STORAGE_LOG(WARN, "fail to init ObUniqueCheckingDag", KR(tmp_ret));
} else if (OB_TMP_FAIL(dag->alloc_global_index_task_callback(calc_item.tablet_id_,
arg.target_table_id_,
arg.source_table_id_,
arg.schema_version_,
arg.task_id_,
callback))) {
STORAGE_LOG(WARN, "fail to alloc global index task callback", KR(tmp_ret));
} else if (OB_TMP_FAIL(dag->alloc_unique_checking_prepare_task(callback))) {
STORAGE_LOG(WARN, "fail to alloc unique checking prepare task", KR(tmp_ret));
} else if (OB_TMP_FAIL(dag_scheduler->add_dag(dag))) {
saved_ret = tmp_ret;
if (OB_EAGAIN == tmp_ret) {
tmp_ret = OB_SUCCESS;
} else if (OB_SIZE_OVERFLOW == tmp_ret) {
tmp_ret = OB_EAGAIN;
} else {
STORAGE_LOG(WARN, "fail to add dag to queue", KR(tmp_ret));
}
}
if (OB_SUCCESS != saved_ret && NULL != dag) {
dag_scheduler->free_dag(*dag);
dag = NULL;
}
if (OB_SUCC(ret)) {
if (OB_FAIL(res.ret_codes_.push_back(tmp_ret))) {
LOG_WARN("push back return code failed", K(ret), K(tmp_ret));
}
}
}
}
if (OB_FAIL(ret) && NULL != dag) {
dag_scheduler->free_dag(*dag);
dag = NULL;
}
}
LOG_INFO("receive column checksum request", K(arg));
}
@ -1010,22 +1041,37 @@ int ObService::check_modify_time_elapsed(
SCN tmp_scn;
transaction::ObTransService *txs = MTL(transaction::ObTransService *);
ObLSService *ls_service = MTL(ObLSService *);
if (OB_FAIL(ls_service->get_ls(ObLSID(arg.ls_id_), ls_handle, ObLSGetMod::OBSERVER_MOD))) {
LOG_WARN("get ls failed", K(ret), K(arg.ls_id_));
} else if (OB_FAIL(ls_handle.get_ls()->check_modify_time_elapsed(arg.tablet_id_,
arg.sstable_exist_ts_,
result.pending_tx_id_))) {
if (OB_EAGAIN != ret) {
LOG_WARN("check schema version elapsed failed", K(ret), K(arg));
if (OB_FAIL(result.results_.reserve(arg.tablets_.count()))) {
LOG_WARN("reserve result array failed", K(ret), K(arg.tablets_.count()));
}
for (int64_t i = 0; OB_SUCC(ret) && i < arg.tablets_.count(); ++i) {
ObTabletHandle tablet_handle;
ObLSHandle ls_handle;
const ObLSID &ls_id = arg.tablets_.at(i).ls_id_;
const ObTabletID &tablet_id = arg.tablets_.at(i).tablet_id_;
SCN snapshot_version;
ObCheckTransElapsedResult single_result;
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(ls_service->get_ls(ls_id, ls_handle, ObLSGetMod::OBSERVER_MOD))) {
LOG_WARN("get ls failed", K(tmp_ret), K(ls_id));
} else if (OB_TMP_FAIL(ls_handle.get_ls()->check_modify_time_elapsed(tablet_id,
arg.sstable_exist_ts_,
single_result.pending_tx_id_))) {
if (OB_EAGAIN != tmp_ret) {
LOG_WARN("check schema version elapsed failed", K(tmp_ret), K(arg));
}
} else if (OB_TMP_FAIL(txs->get_max_commit_version(snapshot_version))) {
LOG_WARN("fail to get max commit version", K(tmp_ret));
} else {
single_result.snapshot_ = snapshot_version.get_val_for_tx();
}
if (OB_SUCC(ret)) {
single_result.ret_code_ = tmp_ret;
if (OB_FAIL(result.results_.push_back(single_result))) {
LOG_WARN("push back single result failed", K(ret), K(i), K(single_result));
}
}
} else if (OB_FAIL(txs->get_max_commit_version(tmp_scn))) {
LOG_WARN("fail to get max commit version", K(ret));
} else if (OB_UNLIKELY(!tmp_scn.is_valid())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error, scn is invalid", K(ret), K(tmp_scn));
} else {
result.snapshot_ = tmp_scn.get_val_for_tx();
LOG_INFO("succeed to wait transaction end", K(arg));
}
}
}
@ -1046,21 +1092,36 @@ int ObService::check_schema_version_elapsed(
LOG_WARN("invalid argument", K(ret), K(arg));
} else {
MTL_SWITCH(arg.tenant_id_) {
ObTabletHandle tablet_handle;
ObLSHandle ls_handle;
ObLSService *ls_service = nullptr;
if (OB_ISNULL(ls_service = MTL(ObLSService *))) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("error unexpected, get ls service failed", K(ret));
} else if (OB_FAIL(ls_service->get_ls(ObLSID(arg.ls_id_), ls_handle, ObLSGetMod::OBSERVER_MOD))) {
LOG_WARN("get ls failed", K(ret), K(arg.ls_id_));
} else if (OB_FAIL(ls_handle.get_ls()->get_tablet(arg.data_tablet_id_, tablet_handle))) {
LOG_WARN("fail to get tablet", K(ret), K(arg));
} else if (OB_FAIL(tablet_handle.get_obj()->check_schema_version_elapsed(arg.schema_version_,
arg.need_wait_trans_end_,
result.snapshot_,
result.pending_tx_id_))) {
LOG_WARN("check schema version elapsed failed", K(ret), K(arg));
} else if (OB_FAIL(result.results_.reserve(arg.tablets_.count()))) {
LOG_WARN("reserve result array failed", K(ret), K(arg.tablets_.count()));
}
for (int64_t i = 0; OB_SUCC(ret) && i < arg.tablets_.count(); ++i) {
ObTabletHandle tablet_handle;
ObLSHandle ls_handle;
const ObLSID &ls_id = arg.tablets_.at(i).ls_id_;
const ObTabletID &tablet_id = arg.tablets_.at(i).tablet_id_;
ObCheckTransElapsedResult single_result;
int tmp_ret = OB_SUCCESS;
if (OB_TMP_FAIL(ls_service->get_ls(ls_id, ls_handle, ObLSGetMod::OBSERVER_MOD))) {
LOG_WARN("get ls failed", K(tmp_ret), K(i), K(ls_id));
} else if (OB_TMP_FAIL(ls_handle.get_ls()->get_tablet(tablet_id, tablet_handle))) {
LOG_WARN("fail to get tablet", K(tmp_ret), K(i), K(ls_id), K(tablet_id));
} else if (OB_TMP_FAIL(tablet_handle.get_obj()->check_schema_version_elapsed(arg.schema_version_,
arg.need_wait_trans_end_,
single_result.snapshot_,
single_result.pending_tx_id_))) {
LOG_WARN("check schema version elapsed failed", K(tmp_ret), K(arg), K(ls_id), K(tablet_id));
}
if (OB_SUCC(ret)) {
single_result.ret_code_ = tmp_ret;
if (OB_FAIL(result.results_.push_back(single_result))) {
LOG_WARN("push back single result failed", K(ret), K(i), K(single_result));
}
}
}
}
}
@ -1950,8 +2011,8 @@ int ObService::inner_fill_tablet_info_(
int64_t data_size = 0;
int64_t required_size = 0;
ObArray<int64_t> column_checksums;
if (OB_FAIL(tablet_handle.get_obj()->get_tablet_report_info(column_checksums, data_size,
required_size, need_checksum))) {
if (OB_FAIL(tablet_handle.get_obj()->get_tablet_report_info(snapshot_version, column_checksums,
data_size, required_size, need_checksum))) {
LOG_WARN("fail to get tablet report info from tablet", KR(ret), K(tenant_id), K(tablet_id));
} else if (OB_FAIL(tablet_replica.init(
tenant_id,