diff --git a/docs/en/docs/query-acceleration/statistics.md b/docs/en/docs/query-acceleration/statistics.md index 976161b1fc..fdc1661faf 100644 --- a/docs/en/docs/query-acceleration/statistics.md +++ b/docs/en/docs/query-acceleration/statistics.md @@ -86,7 +86,7 @@ This feature has been officially supported since 2.0.3 and is enabled by default The collection jobs for statistics themselves consume a certain amount of system resources. To minimize the overhead, for tables with a large amount of data (default 5 GiB, adjustable with the FE parameter `huge_table_lower_bound_size_in_bytes`), Doris automatically uses sampling to collect statistics. Automatic sampling defaults to sampling 4,194,304 (2^22) rows to reduce the system's burden and complete the collection job as quickly as possible. If you want to sample more rows to obtain a more accurate data distribution, you can increase the sampling row count by adjusting the `huge_table_default_sample_rows` parameter. In addition, for tables with data larger than `huge_table_lower_bound_size_in_bytes` * 5, Doris ensures that the collection time interval is not less than 12 hours (which can be controlled by adjusting the `huge_table_auto_analyze_interval_in_millis` parameter). -If you are concerned about automatic collection jobs interfering with your business, you can specify a time frame for the automatic collection jobs to run during low business loads by setting the `full_auto_analyze_start_time` and `full_auto_analyze_end_time` parameters according to your needs. You can also completely disable this feature by setting the `enable_full_auto_analyze` parameter to `false`. +If you are concerned about automatic collection jobs interfering with your business, you can specify a time frame for the automatic collection jobs to run during low business loads by setting the `auto_analyze_start_time` and `auto_analyze_end_time` parameters according to your needs. You can also completely disable this feature by setting the `enable_auto_analyze` parameter to `false`. External catalogs do not participate in automatic collection by default. Because external catalogs often contain massive historical data, if they participate in automatic collection, it may occupy too many resources. You can turn on and off the automatic collection of external catalogs by setting the catalog's properties. @@ -292,9 +292,9 @@ mysql> KILL ANALYZE 52357; | Session Variable | Description | Default Value | | ----------------------------- | -------------------------------------------- | ------------- | -| full_auto_analyze_start_time | Start time for automatic statistics collection | 00:00:00 | -| full_auto_analyze_end_time | End time for automatic statistics collection | 23:59:59 | -| enable_full_auto_analyze | Enable automatic collection functionality | true | +| auto_analyze_start_time | Start time for automatic statistics collection | 00:00:00 | +| auto_analyze_end_time | End time for automatic statistics collection | 23:59:59 | +| enable_auto_analyze | Enable automatic collection functionality | true | | huge_table_default_sample_rows | Sampling rows for large tables | 4194304 | | huge_table_lower_bound_size_in_bytes | Tables with size greater than this value will be automatically sampled during collection of statistics | 5368709120 | | huge_table_auto_analyze_interval_in_millis | Controls the minimum time interval for automatic ANALYZE on large tables. Tables with sizes greater than `huge_table_lower_bound_size_in_bytes * 5` will be ANALYZEed only once within this time interval. | 43200000 | diff --git a/docs/zh-CN/docs/query-acceleration/statistics.md b/docs/zh-CN/docs/query-acceleration/statistics.md index e93deb9d22..def289dad1 100644 --- a/docs/zh-CN/docs/query-acceleration/statistics.md +++ b/docs/zh-CN/docs/query-acceleration/statistics.md @@ -89,7 +89,7 @@ ANALYZE TABLE lineitem WITH SAMPLE ROWS 100000; 统计信息的收集作业本身需要占用一定的系统资源,为了尽可能降低开销,对于数据量较大(默认为5GiB,可通过设置FE参数`huge_table_lower_bound_size_in_bytes`来调节此行为)的表,Doris会自动采取采样的方式去收集,自动采样默认采样4194304(2^22)行,以尽可能降低对系统造成的负担并尽快完成收集作业。如果希望采样更多的行以获得更准确的数据分布信息,可通过调整参数`huge_table_default_sample_rows`增大采样行数。另外对于数据量大于`huge_table_lower_bound_size_in_bytes` * 5 的表,Doris保证其收集时间间隔不小于12小时(该时间可通过调整参数`huge_table_auto_analyze_interval_in_millis`控制)。 -如果担心自动收集作业对业务造成干扰,可结合自身需求通过设置参数`full_auto_analyze_start_time`和参数`full_auto_analyze_end_time`指定自动收集作业在业务负载较低的时间段执行。也可以通过设置参数`enable_full_auto_analyze` 为`false`来彻底关闭本功能。 +如果担心自动收集作业对业务造成干扰,可结合自身需求通过设置参数`auto_analyze_start_time`和参数`auto_analyze_end_time`指定自动收集作业在业务负载较低的时间段执行。也可以通过设置参数`enable_auto_analyze` 为`false`来彻底关闭本功能。 External catalog 默认不参与自动收集。因为 external catalog 往往包含海量历史数据,如果参与自动收集,可能占用过多资源。可以通过设置 catalog 的 property 来打开和关闭 external catalog 的自动收集。 @@ -295,9 +295,9 @@ mysql> KILL ANALYZE 52357; |会话变量|说明|默认值| |---|---|---| -|full_auto_analyze_start_time|自动统计信息收集开始时间|00:00:00| -|full_auto_analyze_end_time|自动统计信息收集结束时间|23:59:59| -|enable_full_auto_analyze|开启自动收集功能|true| +|auto_analyze_start_time|自动统计信息收集开始时间|00:00:00| +|auto_analyze_end_time|自动统计信息收集结束时间|23:59:59| +|enable_auto_analyze|开启自动收集功能|true| |huge_table_default_sample_rows|对大表的采样行数|4194304| |huge_table_lower_bound_size_in_bytes|大小超过该值的的表,在自动收集时将会自动通过采样收集统计信息|5368709120| |huge_table_auto_analyze_interval_in_millis|控制对大表的自动ANALYZE的最小时间间隔,在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes * 5的表仅ANALYZE一次|43200000| diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 5c8fb7d566..5b0c18cbeb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -422,9 +422,9 @@ public class SessionVariable implements Serializable, Writable { public static final String INVERTED_INDEX_CONJUNCTION_OPT_THRESHOLD = "inverted_index_conjunction_opt_threshold"; - public static final String FULL_AUTO_ANALYZE_START_TIME = "full_auto_analyze_start_time"; + public static final String AUTO_ANALYZE_START_TIME = "auto_analyze_start_time"; - public static final String FULL_AUTO_ANALYZE_END_TIME = "full_auto_analyze_end_time"; + public static final String AUTO_ANALYZE_END_TIME = "auto_analyze_end_time"; public static final String SQL_DIALECT = "sql_dialect"; @@ -432,7 +432,7 @@ public class SessionVariable implements Serializable, Writable { public static final String TEST_QUERY_CACHE_HIT = "test_query_cache_hit"; - public static final String ENABLE_FULL_AUTO_ANALYZE = "enable_full_auto_analyze"; + public static final String ENABLE_AUTO_ANALYZE = "enable_auto_analyze"; public static final String FASTER_FLOAT_CONVERT = "faster_float_convert"; @@ -1277,18 +1277,6 @@ public class SessionVariable implements Serializable, Writable { + " use a skiplist to optimize the intersection."}) public int invertedIndexConjunctionOptThreshold = 1000; - @VariableMgr.VarAttr(name = FULL_AUTO_ANALYZE_START_TIME, needForward = true, checker = "checkAnalyzeTimeFormat", - description = {"该参数定义自动ANALYZE例程的开始时间", - "This parameter defines the start time for the automatic ANALYZE routine."}, - flag = VariableMgr.GLOBAL) - public String fullAutoAnalyzeStartTime = "00:00:00"; - - @VariableMgr.VarAttr(name = FULL_AUTO_ANALYZE_END_TIME, needForward = true, checker = "checkAnalyzeTimeFormat", - description = {"该参数定义自动ANALYZE例程的结束时间", - "This parameter defines the end time for the automatic ANALYZE routine."}, - flag = VariableMgr.GLOBAL) - public String fullAutoAnalyzeEndTime = "23:59:59"; - @VariableMgr.VarAttr(name = SQL_DIALECT, needForward = true, checker = "checkSqlDialect", description = {"解析sql使用的方言", "The dialect used to parse sql."}) public String sqlDialect = "doris"; @@ -1303,10 +1291,22 @@ public class SessionVariable implements Serializable, Writable { options = {"none", "sql_cache", "partition_cache"}) public String testQueryCacheHit = "none"; - @VariableMgr.VarAttr(name = ENABLE_FULL_AUTO_ANALYZE, + @VariableMgr.VarAttr(name = ENABLE_AUTO_ANALYZE, description = {"该参数控制是否开启自动收集", "Set false to disable auto analyze"}, flag = VariableMgr.GLOBAL) - public boolean enableFullAutoAnalyze = true; + public boolean enableAutoAnalyze = true; + + @VariableMgr.VarAttr(name = AUTO_ANALYZE_START_TIME, needForward = true, checker = "checkAnalyzeTimeFormat", + description = {"该参数定义自动ANALYZE例程的开始时间", + "This parameter defines the start time for the automatic ANALYZE routine."}, + flag = VariableMgr.GLOBAL) + public String autoAnalyzeStartTime = "00:00:00"; + + @VariableMgr.VarAttr(name = AUTO_ANALYZE_END_TIME, needForward = true, checker = "checkAnalyzeTimeFormat", + description = {"该参数定义自动ANALYZE例程的结束时间", + "This parameter defines the end time for the automatic ANALYZE routine."}, + flag = VariableMgr.GLOBAL) + public String autoAnalyzeEndTime = "23:59:59"; @VariableMgr.VarAttr(name = FASTER_FLOAT_CONVERT, description = {"是否启用更快的浮点数转换算法,注意会影响输出格式", "Set true to enable faster float pointer number convert"}) diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java index 86429f09e2..59fe04339f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/util/StatisticsUtil.java @@ -832,14 +832,14 @@ public class StatisticsUtil { private static Pair findConfigFromGlobalSessionVar() { try { String startTime = - findConfigFromGlobalSessionVar(SessionVariable.FULL_AUTO_ANALYZE_START_TIME) - .fullAutoAnalyzeStartTime; + findConfigFromGlobalSessionVar(SessionVariable.AUTO_ANALYZE_START_TIME) + .autoAnalyzeStartTime; // For compatibility if (StringUtils.isEmpty(startTime)) { startTime = StatisticConstants.FULL_AUTO_ANALYZE_START_TIME; } - String endTime = findConfigFromGlobalSessionVar(SessionVariable.FULL_AUTO_ANALYZE_END_TIME) - .fullAutoAnalyzeEndTime; + String endTime = findConfigFromGlobalSessionVar(SessionVariable.AUTO_ANALYZE_END_TIME) + .autoAnalyzeEndTime; if (StringUtils.isEmpty(startTime)) { endTime = StatisticConstants.FULL_AUTO_ANALYZE_END_TIME; } @@ -859,7 +859,7 @@ public class StatisticsUtil { public static boolean enableAutoAnalyze() { try { - return findConfigFromGlobalSessionVar(SessionVariable.ENABLE_FULL_AUTO_ANALYZE).enableFullAutoAnalyze; + return findConfigFromGlobalSessionVar(SessionVariable.ENABLE_AUTO_ANALYZE).enableAutoAnalyze; } catch (Exception e) { LOG.warn("Fail to get value of enable auto analyze, return false by default", e); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java index c0c790c9c2..107a5f5382 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/statistics/util/StatisticsUtilTest.java @@ -81,8 +81,8 @@ public class StatisticsUtilTest { @Mock protected SessionVariable findConfigFromGlobalSessionVar(String varName) throws Exception { SessionVariable sessionVariable = new SessionVariable(); - sessionVariable.fullAutoAnalyzeStartTime = "00:00:00"; - sessionVariable.fullAutoAnalyzeEndTime = "02:00:00"; + sessionVariable.autoAnalyzeStartTime = "00:00:00"; + sessionVariable.autoAnalyzeEndTime = "02:00:00"; return sessionVariable; } }; @@ -100,8 +100,8 @@ public class StatisticsUtilTest { @Mock protected SessionVariable findConfigFromGlobalSessionVar(String varName) throws Exception { SessionVariable sessionVariable = new SessionVariable(); - sessionVariable.fullAutoAnalyzeStartTime = "00:00:00"; - sessionVariable.fullAutoAnalyzeEndTime = "23:00:00"; + sessionVariable.autoAnalyzeStartTime = "00:00:00"; + sessionVariable.autoAnalyzeEndTime = "23:00:00"; return sessionVariable; } };