[Alter Job] Cancel the alter job after a task failed for 3 times (#2447)
To avoid waiting timeout when it is a invalid alter job.
This commit is contained in:
@ -373,6 +373,12 @@ public class RollupJobV2 extends AlterJobV2 {
|
||||
|
||||
if (!rollupBatchTask.isFinished()) {
|
||||
LOG.info("rollup tasks not finished. job: {}", jobId);
|
||||
List<AgentTask> tasks = rollupBatchTask.getUnfinishedTasks(2000);
|
||||
for (AgentTask task : tasks) {
|
||||
if (task.getFailedTimes() >= 3) {
|
||||
throw new AlterCancelException("rollup task failed after try three times: " + task.getErrorMsg());
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -406,6 +406,12 @@ public class SchemaChangeJobV2 extends AlterJobV2 {
|
||||
|
||||
if (!schemaChangeBatchTask.isFinished()) {
|
||||
LOG.info("schema change tasks not finished. job: {}", jobId);
|
||||
List<AgentTask> tasks = schemaChangeBatchTask.getUnfinishedTasks(2000);
|
||||
for (AgentTask task : tasks) {
|
||||
if (task.getFailedTimes() >= 3) {
|
||||
throw new AlterCancelException("schema change task failed after try three times: " + task.getErrorMsg());
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -130,6 +130,9 @@ public class MasterImpl {
|
||||
} else {
|
||||
if (taskStatus.getStatus_code() != TStatusCode.OK) {
|
||||
task.failed();
|
||||
String errMsg = "task type: " + taskType + ", status_code: " + taskStatus.getStatus_code().toString() +
|
||||
", backendId: " + backend + ", signature: " + signature;
|
||||
task.setErrorMsg(errMsg);
|
||||
// We start to let FE perceive the task's error msg
|
||||
if (taskType != TTaskType.MAKE_SNAPSHOT && taskType != TTaskType.UPLOAD
|
||||
&& taskType != TTaskType.DOWNLOAD && taskType != TTaskType.MOVE
|
||||
|
||||
@ -34,6 +34,7 @@ public abstract class AgentTask {
|
||||
protected TResourceInfo resourceInfo;
|
||||
|
||||
protected int failedTimes;
|
||||
protected String errorMsg;
|
||||
// some of process may use this member to check if the task is finished.
|
||||
// some of are not.
|
||||
// so whether the task is finished depends on caller's logic, not the value of this member.
|
||||
@ -105,6 +106,14 @@ public abstract class AgentTask {
|
||||
return this.failedTimes;
|
||||
}
|
||||
|
||||
public void setErrorMsg(String errorMsg) {
|
||||
this.errorMsg = errorMsg;
|
||||
}
|
||||
|
||||
public String getErrorMsg() {
|
||||
return errorMsg;
|
||||
}
|
||||
|
||||
public void setFinished(boolean isFinished) {
|
||||
this.isFinished = isFinished;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user