From 346b89e6833f23cb87b38f7e5a989db30143df59 Mon Sep 17 00:00:00 2001 From: hui lai <1353307710@qq.com> Date: Thu, 21 Nov 2024 23:05:11 +0800 Subject: [PATCH] [improve](routine load) adjust default values to make routine load more convenient to use (#42491) (#44377) pick (#42491) For a routine load job, it will be divided into many tasks, each of which is a transaction. Currently, the default time consumed(max_batch_interval) is 10 seconds. The benefits of increasing this value are: 1. Larger batch consumption can lead to better performance. 2. Reducing the number of transactions can alleviate the pressure of compaction and the conflicts of concurrent transaction submissions. related doc: https://github.com/apache/doris-website/pull/1236/files --- be/src/runtime/stream_load/stream_load_context.h | 7 ++++--- .../org/apache/doris/load/routineload/RoutineLoadJob.java | 2 +- .../apache/doris/load/routineload/RoutineLoadJobTest.java | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/be/src/runtime/stream_load/stream_load_context.h b/be/src/runtime/stream_load/stream_load_context.h index 2ccf8ce501..1fb902a97b 100644 --- a/be/src/runtime/stream_load/stream_load_context.h +++ b/be/src/runtime/stream_load/stream_load_context.h @@ -162,9 +162,10 @@ public: // the following members control the max progress of a consuming // process. if any of them reach, the consuming will finish. - int64_t max_interval_s = 5; - int64_t max_batch_rows = 100000; - int64_t max_batch_size = 100 * 1024 * 1024; // 100MB + // same as values set in fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java + int64_t max_interval_s = 60; + int64_t max_batch_rows = 20000000; + int64_t max_batch_size = 1024 * 1024 * 1024; // 1GB // for parse json-data std::string data_format = ""; diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java index 46682e08bd..57f01f3d15 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java @@ -108,7 +108,7 @@ public abstract class RoutineLoadJob extends AbstractTxnStateChangeCallback impl public static final long DEFAULT_MAX_ERROR_NUM = 0; public static final double DEFAULT_MAX_FILTER_RATIO = 1.0; - public static final long DEFAULT_MAX_INTERVAL_SECOND = 10; + public static final long DEFAULT_MAX_INTERVAL_SECOND = 60; public static final long DEFAULT_MAX_BATCH_ROWS = 20000000; public static final long DEFAULT_MAX_BATCH_SIZE = 1024 * 1024 * 1024; // 1GB public static final long DEFAULT_EXEC_MEM_LIMIT = 2 * 1024 * 1024 * 1024L; diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java index 6f3dd2eaaa..2d9efd895c 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java @@ -362,7 +362,7 @@ public class RoutineLoadJobTest { + "\"desired_concurrent_number\" = \"0\",\n" + "\"max_error_number\" = \"10\",\n" + "\"max_filter_ratio\" = \"1.0\",\n" - + "\"max_batch_interval\" = \"10\",\n" + + "\"max_batch_interval\" = \"60\",\n" + "\"max_batch_rows\" = \"10\",\n" + "\"max_batch_size\" = \"1073741824\",\n" + "\"format\" = \"csv\",\n"