Wait until old ddl session exit before deciding the need of retry
This commit is contained in:
		| @ -1685,21 +1685,15 @@ int ObCheckTabletDataComplementOp::check_and_wait_old_complement_task( | |||||||
| { | { | ||||||
|   int ret = OB_SUCCESS; |   int ret = OB_SUCCESS; | ||||||
|   need_exec_new_inner_sql = true; // default need execute new inner sql |   need_exec_new_inner_sql = true; // default need execute new inner sql | ||||||
|   bool is_old_task_session_exist = false; |   bool is_old_task_session_exist = true; | ||||||
|   bool is_all_sstable_build_finished = false; |   bool is_dst_checksums_all_report = false; | ||||||
|   bool need_wait = false; |  | ||||||
|  |  | ||||||
|   if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || OB_INVALID_ID == table_id)) { |   if (OB_UNLIKELY(OB_INVALID_ID == tenant_id || OB_INVALID_ID == table_id)) { | ||||||
|     ret = OB_INVALID_ARGUMENT; |     ret = OB_INVALID_ARGUMENT; | ||||||
|     LOG_WARN("fail to check and wait complement task", K(ret), K(tenant_id), K(table_id)); |     LOG_WARN("fail to check and wait complement task", K(ret), K(tenant_id), K(table_id)); | ||||||
|   } else { |   } else { | ||||||
|     LOG_INFO("start to check and wait complement task", K(tenant_id), K(table_id), K(inner_sql_exec_addr), K(trace_id)); |     LOG_INFO("start to check and wait complement task", K(tenant_id), K(table_id), K(inner_sql_exec_addr), K(trace_id)); | ||||||
|     do { |     while (OB_SUCC(ret) && is_old_task_session_exist) { | ||||||
|       if (OB_FAIL(check_all_tablet_sstable_status(tenant_id, table_id, scn, execution_id, ddl_task_id, is_all_sstable_build_finished))) { |  | ||||||
|         LOG_WARN("fail to check task tablet sstable status", K(ret), K(tenant_id), K(table_id), K(scn), K(execution_id), K(ddl_task_id)); |  | ||||||
|       } else if (is_all_sstable_build_finished) { |  | ||||||
|         LOG_INFO("all tablet sstable has build finished"); |  | ||||||
|       } else { |  | ||||||
|       if (OB_FAIL(check_task_inner_sql_session_status(inner_sql_exec_addr, trace_id, tenant_id, execution_id, scn, is_old_task_session_exist))) { |       if (OB_FAIL(check_task_inner_sql_session_status(inner_sql_exec_addr, trace_id, tenant_id, execution_id, scn, is_old_task_session_exist))) { | ||||||
|         LOG_WARN("fail check task inner sql session status", K(ret), K(trace_id), K(inner_sql_exec_addr)); |         LOG_WARN("fail check task inner sql session status", K(ret), K(trace_id), K(inner_sql_exec_addr)); | ||||||
|       } else if (!is_old_task_session_exist) { |       } else if (!is_old_task_session_exist) { | ||||||
| @ -1708,20 +1702,31 @@ int ObCheckTabletDataComplementOp::check_and_wait_old_complement_task( | |||||||
|         usleep(10 * 1000); // sleep 10ms |         usleep(10 * 1000); // sleep 10ms | ||||||
|       } |       } | ||||||
|     } |     } | ||||||
|       if (OB_EAGAIN == ret) { // retry |  | ||||||
|         ret = OB_SUCCESS; |     // After old session exits, the rule of retry is specified as follows | ||||||
|       } |     // | ||||||
|       need_wait = !is_all_sstable_build_finished && is_old_task_session_exist; |     // A. for dst table merge checksums of this execution, | ||||||
|     } while (OB_SUCC(ret) && need_wait); // TODO: time out |     // - if complete, goto B (need_exec_new_inner_sql = false) | ||||||
|     ///// end |     // - else if all tablets has been merged, this means some checksum report failed, retry | ||||||
|     /* Only in table all sstables not finished case, we will do retry */ |     // - else old session must fail/crash, retry | ||||||
|     if (is_all_sstable_build_finished) { |     // | ||||||
|  |     // B. do checksum validation against src table scan checksums of this execution, | ||||||
|  |     // - if src checksums are complete, this is exactly a validation | ||||||
|  |     // - else old session must fail/crash "unexpectedly" (because complete dst checksum in A | ||||||
|  |     //   guarantees at least one preivous execution has successfully finished table scan), | ||||||
|  |     //   the validation may returns error due to lack of src checksum records | ||||||
|  |  | ||||||
|  |     ObArray<ObTabletID> dest_tablet_ids; | ||||||
|  |     if (OB_FAIL(ret)) { | ||||||
|  |     } else if (OB_FAIL(check_tablet_checksum_update_status(tenant_id, table_id, ddl_task_id, execution_id, dest_tablet_ids, is_dst_checksums_all_report))) { | ||||||
|  |       LOG_WARN("fail to check tablet checksum update status.", K(ret), K(tenant_id), K(dest_tablet_ids), K(execution_id)); | ||||||
|  |     } else if (is_dst_checksums_all_report) { | ||||||
|       need_exec_new_inner_sql = false; |       need_exec_new_inner_sql = false; | ||||||
|       LOG_INFO("no need to execute inner sql to do complement.", K(need_exec_new_inner_sql)); |       LOG_INFO("no need execute because all tablet sstable has build finished", K(need_exec_new_inner_sql)); | ||||||
|     } |     } | ||||||
|   } |   } | ||||||
|   LOG_INFO("end to check and wait complement task", K(ret), |   LOG_INFO("end to check and wait complement task", K(ret), | ||||||
|     K(table_id), K(is_old_task_session_exist), K(is_all_sstable_build_finished), K(need_exec_new_inner_sql)); |     K(table_id), K(is_old_task_session_exist), K(is_dst_checksums_all_report), K(need_exec_new_inner_sql)); | ||||||
|  |  | ||||||
|   return ret; |   return ret; | ||||||
| } | } | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user
	 Hongqin-Li
					Hongqin-Li