[feat](stats) Support analyze with sample automatically (#23978)
1. Analyze with sample automatically when table size is greater than huge_table_lower_bound_size_in_bytes(5G by default). User can disable this feature by fe option enable_auto_sample 2. Support grammer like `ANALYZE TABLE test WITH FULL` to force do full analyze whatever table size is 3. Fix bugs that tables stats doesn't get updated properly when stats is dropped, or only few column is analyzed
This commit is contained in:
@ -17,6 +17,8 @@
|
||||
|
||||
package org.apache.doris.common;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
public class Config extends ConfigBase {
|
||||
|
||||
@ConfField(description = {"用户自定义配置文件的路径,用于存放 fe_custom.conf。该文件中的配置会覆盖 fe.conf 中的配置",
|
||||
@ -1516,8 +1518,12 @@ public class Config extends ConfigBase {
|
||||
/*
|
||||
* the system automatically checks the time interval for statistics
|
||||
*/
|
||||
@ConfField(mutable = true, masterOnly = true)
|
||||
public static int auto_check_statistics_in_minutes = 1;
|
||||
@ConfField(mutable = true, masterOnly = true, description = {
|
||||
"该参数控制自动收集作业检查库表统计信息健康度并触发自动收集的时间间隔",
|
||||
"This parameter controls the time interval for automatic collection jobs to check the health of table"
|
||||
+ "statistics and trigger automatic collection"
|
||||
})
|
||||
public static int auto_check_statistics_in_minutes = 10;
|
||||
|
||||
/**
|
||||
* If this configuration is enabled, you should also specify the trace_export_url.
|
||||
@ -2172,4 +2178,26 @@ public class Config extends ConfigBase {
|
||||
+ "The larger the value, the more uniform the distribution of the hash algorithm, "
|
||||
+ "but it will increase the memory overhead."})
|
||||
public static int virtual_node_number = 2048;
|
||||
|
||||
@ConfField(description = {"控制对大表的自动ANALYZE的最小时间间隔,"
|
||||
+ "在该时间间隔内大小超过huge_table_lower_bound_size_in_bytes的表仅ANALYZE一次",
|
||||
"This controls the minimum time interval for automatic ANALYZE on large tables. Within this interval,"
|
||||
+ "tables larger than huge_table_lower_bound_size_in_bytes are analyzed only once."})
|
||||
public static long huge_table_auto_analyze_interval_in_millis = TimeUnit.HOURS.toMillis(12);
|
||||
|
||||
@ConfField(description = {"定义大表的大小下界,在开启enable_auto_sample的情况下,"
|
||||
+ "大小超过该值的表将会自动通过采样收集统计信息", "This defines the lower size bound for large tables. "
|
||||
+ "When enable_auto_sample is enabled, tables larger than this value will automatically collect "
|
||||
+ "statistics through sampling"})
|
||||
public static long huge_table_lower_bound_size_in_bytes = 5L * 1024 * 1024 * 1024;
|
||||
|
||||
@ConfField(description = {"定义开启开启大表自动sample后,对大表的采样行数",
|
||||
"This defines the number of sample rows for large tables when automatic sampling for"
|
||||
+ "large tables is enabled"})
|
||||
public static int huge_table_default_sample_rows = 20_0000;
|
||||
|
||||
@ConfField(description = {"是否开启大表自动sample,开启后对于大小超过huge_table_lower_bound_size_in_bytes会自动通过采样收集"
|
||||
+ "统计信息", "Whether to enable automatic sampling for large tables, which, when enabled, automatically"
|
||||
+ "collects statistics through sampling for tables larger than 'huge_table_lower_bound_size_in_bytes'"})
|
||||
public static boolean enable_auto_sample = false;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user