From ad8e2f4749762b4f27038f3fc819aa3953cd307b Mon Sep 17 00:00:00 2001 From: Henry2SS <45096548+Henry2SS@users.noreply.github.com> Date: Thu, 1 Sep 2022 18:05:37 +0800 Subject: [PATCH] [fix](rpc) fix that coordinator rpc timeout too large may make show load blocked for long time (#12152) Co-authored-by: wuhangze --- .../src/main/java/org/apache/doris/common/Config.java | 8 ++++++++ .../src/main/java/org/apache/doris/qe/Coordinator.java | 8 ++++---- .../window_functions/test_window_function.groovy | 3 +++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java index 5be6ce78f8..4dcc64f398 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/Config.java @@ -1753,4 +1753,12 @@ public class Config extends ConfigBase { */ @ConfField(mutable = true) public static boolean enable_new_es_dsl = true; + + /** + * The timeout of executing async remote fragment. + * In normal case, the async remote fragment will be executed in a short time. If system are under high load + * condition,try to set this timeout longer. + */ + @ConfField(mutable = true) + public static long remote_fragment_exec_timeout_ms = 5000; // 5 sec } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index 1d4d6f2b99..21af37ccd2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -689,13 +689,14 @@ public class Coordinator { } } - private void waitRpc(List>> futures, long timeoutMs, + private void waitRpc(List>> futures, long leftTimeMs, String operation) throws RpcException, UserException { - if (timeoutMs <= 0) { + if (leftTimeMs <= 0) { throw new UserException("timeout before waiting for " + operation + " RPC. Elapse(sec): " + ( (System.currentTimeMillis() - timeoutDeadline) / 1000 + queryOptions.query_timeout)); } + long timeoutMs = Math.min(leftTimeMs, Config.remote_fragment_exec_timeout_ms); for (Pair> pair : futures) { TStatusCode code; String errMsg = null; @@ -720,8 +721,7 @@ public class Coordinator { code = TStatusCode.INTERNAL_ERROR; } catch (TimeoutException e) { exception = e; - errMsg = "timeout when waiting for " + operation + " RPC. Elapse(sec): " - + ((System.currentTimeMillis() - timeoutDeadline) / 1000 + queryOptions.query_timeout); + errMsg = "timeout when waiting for " + operation + " RPC. Wait(sec): " + timeoutMs / 1000; code = TStatusCode.TIMEOUT; } diff --git a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy index 175e8a3bfb..ed0ae196d4 100644 --- a/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/window_functions/test_window_function.groovy @@ -491,6 +491,9 @@ suite("test_window_function") { line = line + cur + ")" } } + + sql """ admin set frontend config("remote_fragment_exec_timeout_ms"="60000"); """ + qt_window_hang2"""select A.${k1}, A.wj - B.dyk + 1 as num from (select ${k1}, wj from ${line} as W1) as A join (select ${k1}, min(wj) as dyk from ${line} as W2 group by ${k1}) as B