Wait until old ddl session exit before deciding the need of retry
This commit is contained in:
@ -1685,21 +1685,15 @@ int ObCheckTabletDataComplementOp::check_and_wait_old_complement_task(
|
|||||||
{
|
{
|
||||||
int ret = OB_SUCCESS;
|
int ret = OB_SUCCESS;
|
||||||
need_exec_new_inner_sql = true; // default need execute new inner sql
|
need_exec_new_inner_sql = true; // default need execute new inner sql
|
||||||
bool is_old_task_session_exist = false;
|
bool is_old_task_session_exist = true;
|
||||||
bool is_all_sstable_build_finished = false;
|
bool is_dst_checksums_all_report = false;
|
||||||
bool need_wait = false;
|
|
||||||
|
|
||||||
if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || OB_INVALID_ID == table_id)) {
|
if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || OB_INVALID_ID == table_id)) {
|
||||||
ret = OB_INVALID_ARGUMENT;
|
ret = OB_INVALID_ARGUMENT;
|
||||||
LOG_WARN("fail to check and wait complement task", K(ret), K(tenant_id), K(table_id));
|
LOG_WARN("fail to check and wait complement task", K(ret), K(tenant_id), K(table_id));
|
||||||
} else {
|
} else {
|
||||||
LOG_INFO("start to check and wait complement task", K(tenant_id), K(table_id), K(inner_sql_exec_addr), K(trace_id));
|
LOG_INFO("start to check and wait complement task", K(tenant_id), K(table_id), K(inner_sql_exec_addr), K(trace_id));
|
||||||
do {
|
while (OB_SUCC(ret) && is_old_task_session_exist) {
|
||||||
if (OB_FAIL(check_all_tablet_sstable_status(tenant_id, table_id, scn, execution_id, ddl_task_id, is_all_sstable_build_finished))) {
|
|
||||||
LOG_WARN("fail to check task tablet sstable status", K(ret), K(tenant_id), K(table_id), K(scn), K(execution_id), K(ddl_task_id));
|
|
||||||
} else if (is_all_sstable_build_finished) {
|
|
||||||
LOG_INFO("all tablet sstable has build finished");
|
|
||||||
} else {
|
|
||||||
if (OB_FAIL(check_task_inner_sql_session_status(inner_sql_exec_addr, trace_id, tenant_id, execution_id, scn, is_old_task_session_exist))) {
|
if (OB_FAIL(check_task_inner_sql_session_status(inner_sql_exec_addr, trace_id, tenant_id, execution_id, scn, is_old_task_session_exist))) {
|
||||||
LOG_WARN("fail check task inner sql session status", K(ret), K(trace_id), K(inner_sql_exec_addr));
|
LOG_WARN("fail check task inner sql session status", K(ret), K(trace_id), K(inner_sql_exec_addr));
|
||||||
} else if (!is_old_task_session_exist) {
|
} else if (!is_old_task_session_exist) {
|
||||||
@ -1708,20 +1702,31 @@ int ObCheckTabletDataComplementOp::check_and_wait_old_complement_task(
|
|||||||
usleep(10 * 1000); // sleep 10ms
|
usleep(10 * 1000); // sleep 10ms
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (OB_EAGAIN == ret) { // retry
|
|
||||||
ret = OB_SUCCESS;
|
// After old session exits, the rule of retry is specified as follows
|
||||||
}
|
//
|
||||||
need_wait = !is_all_sstable_build_finished && is_old_task_session_exist;
|
// A. for dst table merge checksums of this execution,
|
||||||
} while (OB_SUCC(ret) && need_wait); // TODO: time out
|
// - if complete, goto B (need_exec_new_inner_sql = false)
|
||||||
///// end
|
// - else if all tablets has been merged, this means some checksum report failed, retry
|
||||||
/* Only in table all sstables not finished case, we will do retry */
|
// - else old session must fail/crash, retry
|
||||||
if (is_all_sstable_build_finished) {
|
//
|
||||||
|
// B. do checksum validation against src table scan checksums of this execution,
|
||||||
|
// - if src checksums are complete, this is exactly a validation
|
||||||
|
// - else old session must fail/crash "unexpectedly" (because complete dst checksum in A
|
||||||
|
// guarantees at least one preivous execution has successfully finished table scan),
|
||||||
|
// the validation may returns error due to lack of src checksum records
|
||||||
|
|
||||||
|
ObArray<ObTabletID> dest_tablet_ids;
|
||||||
|
if (OB_FAIL(ret)) {
|
||||||
|
} else if (OB_FAIL(check_tablet_checksum_update_status(tenant_id, table_id, ddl_task_id, execution_id, dest_tablet_ids, is_dst_checksums_all_report))) {
|
||||||
|
LOG_WARN("fail to check tablet checksum update status.", K(ret), K(tenant_id), K(dest_tablet_ids), K(execution_id));
|
||||||
|
} else if (is_dst_checksums_all_report) {
|
||||||
need_exec_new_inner_sql = false;
|
need_exec_new_inner_sql = false;
|
||||||
LOG_INFO("no need to execute inner sql to do complement.", K(need_exec_new_inner_sql));
|
LOG_INFO("no need execute because all tablet sstable has build finished", K(need_exec_new_inner_sql));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
LOG_INFO("end to check and wait complement task", K(ret),
|
LOG_INFO("end to check and wait complement task", K(ret),
|
||||||
K(table_id), K(is_old_task_session_exist), K(is_all_sstable_build_finished), K(need_exec_new_inner_sql));
|
K(table_id), K(is_old_task_session_exist), K(is_dst_checksums_all_report), K(need_exec_new_inner_sql));
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user