[fix](stats) Fix data size calculation of auto sample (#24672)

1. Fix data size calculation of auto sample, before this pr, the data size is include all the replicas 2. Move some auto analyze related options to global session variable 3. Add some logs
2023-09-22 19:12:39 +09:00
parent b34a4779e5
commit c943a05065
13 changed files with 61 additions and 40 deletions
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@ -2078,15 +2078,6 @@ public class Config extends ConfigBase {
        "Sample size for hive row count estimation."})
    public static int hive_stats_partition_sample_size = 3000;

-    @ConfField
-    public static boolean enable_full_auto_analyze = true;
-
-    @ConfField
-    public static String full_auto_analyze_start_time = "00:00:00";
-
-    @ConfField
-    public static String full_auto_analyze_end_time = "02:00:00";
-
    @ConfField
    public static int statistics_sql_parallel_exec_instance_num = 1;

@ -2183,10 +2174,10 @@ public class Config extends ConfigBase {
            + "statistics through sampling"})
    public static long huge_table_lower_bound_size_in_bytes = 5L * 1024 * 1024 * 1024;

-    @ConfField(description = {"定义开启开启大表自动sample后，对大表的采样行数",
-            "This defines the number of sample rows for large tables when automatic sampling for"
+    @ConfField(description = {"定义开启开启大表自动sample后，对大表的采样比例",
+            "This defines the number of sample percent for large tables when automatic sampling for"
                    + "large tables is enabled"})
-    public static int huge_table_default_sample_rows = 20_0000;
+    public static int huge_table_default_sample_rows = 4194304;

    @ConfField(description = {"是否开启大表自动sample，开启后对于大小超过huge_table_lower_bound_size_in_bytes会自动通过采样收集"
            + "统计信息", "Whether to enable automatic sampling for large tables, which, when enabled, automatically"