From e3611f6a1df1e7a39c367e85d4a2729788b43371 Mon Sep 17 00:00:00 2001 From: HHoflittlefish777 <77738092+HHoflittlefish777@users.noreply.github.com> Date: Fri, 8 Mar 2024 12:42:16 +0800 Subject: [PATCH] [improve](routine-load) increase routing load max_batch _size max limit (#31846) --- .../Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md | 2 +- .../Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md | 2 +- .../java/org/apache/doris/analysis/AlterRoutineLoadStmt.java | 2 +- .../org/apache/doris/analysis/CreateRoutineLoadStmt.java | 5 +++-- .../org/apache/doris/analysis/AlterRoutineLoadStmtTest.java | 2 +- 5 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md index 33803649dd..a42459a022 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md @@ -161,7 +161,7 @@ FROM data_source [data_source_properties] 1. The maximum execution time of each subtask, in seconds. Must be greater than or equal to 1. The default is 10. 2. The maximum number of lines read by each subtask. Must be greater than or equal to 200000. The default is 200000. - 3. The maximum number of bytes read by each subtask. The unit is bytes and the range is 100MB to 1GB. The default is 100MB. + 3. The maximum number of bytes read by each subtask. The unit is bytes and the range is 100MB to 10GB. The default is 100MB. These three parameters are used to control the execution time and processing volume of a subtask. When either one reaches the threshold, the task ends. diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md index b93c16c837..646afa135e 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/CREATE-ROUTINE-LOAD.md @@ -158,7 +158,7 @@ FROM data_source [data_source_properties] 1. 每个子任务最大执行时间,单位是秒。必须大于等于 1。默认为10。 2. 每个子任务最多读取的行数。必须大于等于200000。默认是200000。 - 3. 每个子任务最多读取的字节数。单位是字节,范围是 100MB 到 1GB。默认是 100MB。 + 3. 每个子任务最多读取的字节数。单位是字节,范围是 100MB 到 10GB。默认是 100MB。 这三个参数,用于控制一个子任务的执行时间和处理量。当任意一个达到阈值,则任务结束。 diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterRoutineLoadStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterRoutineLoadStmt.java index eb339360d4..2df891fbb3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterRoutineLoadStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AlterRoutineLoadStmt.java @@ -199,7 +199,7 @@ public class AlterRoutineLoadStmt extends DdlStmt { long maxBatchSizeBytes = Util.getLongPropertyOrDefault( jobProperties.get(CreateRoutineLoadStmt.MAX_BATCH_SIZE_PROPERTY), -1, CreateRoutineLoadStmt.MAX_BATCH_SIZE_PRED, - CreateRoutineLoadStmt.MAX_BATCH_SIZE_PROPERTY + " should between 100MB and 1GB"); + CreateRoutineLoadStmt.MAX_BATCH_SIZE_PROPERTY + " should between 100MB and 10GB"); analyzedJobProperties.put(CreateRoutineLoadStmt.MAX_BATCH_SIZE_PROPERTY, String.valueOf(maxBatchSizeBytes)); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateRoutineLoadStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateRoutineLoadStmt.java index ff0a9053c5..f859d7d8f0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateRoutineLoadStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateRoutineLoadStmt.java @@ -196,7 +196,8 @@ public class CreateRoutineLoadStmt extends DdlStmt { public static final Predicate MAX_FILTER_RATIO_PRED = (v) -> v >= 0 && v <= 1; public static final Predicate MAX_BATCH_INTERVAL_PRED = (v) -> v >= 1; public static final Predicate MAX_BATCH_ROWS_PRED = (v) -> v >= 200000; - public static final Predicate MAX_BATCH_SIZE_PRED = (v) -> v >= 100 * 1024 * 1024 && v <= 1024 * 1024 * 1024; + public static final Predicate MAX_BATCH_SIZE_PRED = (v) -> v >= 100 * 1024 * 1024 + && v <= (long) (1024 * 1024 * 1024) * 10; public static final Predicate EXEC_MEM_LIMIT_PRED = (v) -> v >= 0L; public static final Predicate SEND_BATCH_PARALLELISM_PRED = (v) -> v > 0L; @@ -482,7 +483,7 @@ public class CreateRoutineLoadStmt extends DdlStmt { maxBatchSizeBytes = Util.getLongPropertyOrDefault(jobProperties.get(MAX_BATCH_SIZE_PROPERTY), RoutineLoadJob.DEFAULT_MAX_BATCH_SIZE, MAX_BATCH_SIZE_PRED, - MAX_BATCH_SIZE_PROPERTY + " should between 100MB and 1GB"); + MAX_BATCH_SIZE_PROPERTY + " should between 100MB and 10GB"); strictMode = Util.getBooleanPropertyOrDefault(jobProperties.get(LoadStmt.STRICT_MODE), RoutineLoadJob.DEFAULT_STRICT_MODE, diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/AlterRoutineLoadStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/AlterRoutineLoadStmtTest.java index 9a4b7d19e0..9b14629a9a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/AlterRoutineLoadStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/AlterRoutineLoadStmtTest.java @@ -220,7 +220,7 @@ public class AlterRoutineLoadStmtTest { stmt.analyze(analyzer); Assert.fail(); } catch (AnalysisException e) { - Assert.assertTrue(e.getMessage().contains("max_batch_size should between 100MB and 1GB")); + Assert.assertTrue(e.getMessage().contains("max_batch_size should between 100MB and 10GB")); } catch (UserException e) { Assert.fail(); }