From 718297d3c1277dcece42d817d644529643dec07d Mon Sep 17 00:00:00 2001 From: ElvinWei Date: Fri, 28 Apr 2023 15:50:05 +0800 Subject: [PATCH] [test](statistics) add p0 test of sampling statistics (#19176) 1. Added test p0 for sampling collection statistics 2. Modify the uniqueKeys of table analysis_jobs for deletion based on relevant conditions 3. Solve the problem that incremental statistics p0 is less stable --- .../apache/doris/analysis/AnalyzeStmt.java | 2 +- .../catalog/InternalSchemaInitializer.java | 5 +- .../doris/statistics/HistogramTask.java | 3 +- .../data/statistics/sampled_stats_test.out | 159 ++++++++++++ .../suites/statistics/analyze_test.groovy | 3 +- .../statistics/incremental_stats_test.groovy | 28 +- .../statistics/sampled_stats_test.groovy | 245 ++++++++++++++++++ 7 files changed, 429 insertions(+), 16 deletions(-) create mode 100644 regression-test/data/statistics/sampled_stats_test.out create mode 100644 regression-test/suites/statistics/sampled_stats_test.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java index 468f6b25c1..e9b6c89ed3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyzeStmt.java @@ -219,7 +219,7 @@ public class AnalyzeStmt extends DdlStmt { if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)) { checkNumericProperty(PROPERTY_SAMPLE_PERCENT, properties.get(PROPERTY_SAMPLE_PERCENT), - 0, 100, false, "should be > 0 and < 100"); + 1, 100, true, "should be >= 1 and <= 100"); } if (properties.containsKey(PROPERTY_SAMPLE_ROWS)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java index 3f421d5a86..14544e57b1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java @@ -190,8 +190,9 @@ public class InternalSchemaInitializer extends Thread { columnDefs.add(new ColumnDef("state", TypeDef.createVarchar(32))); columnDefs.add(new ColumnDef("schedule_type", TypeDef.createVarchar(32))); String engineName = "olap"; - KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS, - Lists.newArrayList("job_id", "task_id")); + ArrayList uniqueKeys = Lists.newArrayList("job_id", "task_id", + "catalog_name", "db_name", "tbl_name", "col_name", "index_id"); + KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS, uniqueKeys); DistributionDesc distributionDesc = new HashDistributionDesc( StatisticConstants.STATISTIC_TABLE_BUCKET_COUNT, diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java index 58f9ad01a7..e8186fff25 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/HistogramTask.java @@ -86,7 +86,8 @@ public class HistogramTask extends BaseAnalysisTask { if (info.samplePercent > 0) { return String.valueOf(info.samplePercent / 100.0); } else { - double sampRate = (double) info.sampleRows / tbl.getRowCount(); + long rowCount = tbl.getRowCount() > 0 ? tbl.getRowCount() : 1; + double sampRate = (double) info.sampleRows / rowCount; return sampRate >= 1 ? "1.0" : String.format("%.4f", sampRate); } } diff --git a/regression-test/data/statistics/sampled_stats_test.out b/regression-test/data/statistics/sampled_stats_test.out new file mode 100644 index 0000000000..dd1ac90b44 --- /dev/null +++ b/regression-test/data/statistics/sampled_stats_test.out @@ -0,0 +1,159 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +3 2 0 20 32 6 +4 3 0 20 35 8 +10 4 0 20 35 20 +3 1 0 35 35 6 +4 2 0 Beijing Shenzhen 29 +10 4 0 Beijing Shenzhen 78 +3 2 0 Guangzhou Shanghai 25 +3 1 0 Shenzhen Shenzhen 24 +3 1 0 11 11 24 +4 4 0 2 100 32 +10 7 0 2 200 80 +3 2 0 30 200 24 +4 1 0 2017-10-01 2017-10-01 64 +10 3 0 2017-10-01 2017-10-03 160 +3 1 0 2017-10-02 2017-10-02 48 +3 1 0 2017-10-03 2017-10-03 48 +4 4 0 2017-10-01 06:00:00 2017-10-01 17:05:45 64 +10 7 0 2017-10-01 06:00:00 2017-10-03 10:20:22 160 +3 2 0 2017-10-02 11:20:00 2017-10-02 12:59:12 48 +3 1 0 2017-10-03 10:20:22 2017-10-03 10:20:22 48 +4 4 0 2 22 16 +10 7 0 2 22 40 +3 2 0 5 11 12 +3 1 0 6 6 12 +4 4 0 2 22 16 +10 7 0 2 22 40 +3 2 0 5 11 12 +3 1 0 6 6 12 +3 1 0 0 0 3 +3 2 0 0 1 3 +4 2 0 0 1 4 +10 2 0 0 1 10 +4 4 0 10000 10006 64 +10 10 0 10000 10009 160 +3 3 0 10003 10005 48 +3 3 0 10007 10009 48 + +-- !sql -- +0.0 {"num_buckets":4,"buckets":[{"lower":"20","upper":"20","ndv":1,"count":4,"pre_sum":0},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":4},{"lower":"32","upper":"32","ndv":1,"count":1,"pre_sum":5},{"lower":"35","upper":"35","ndv":1,"count":4,"pre_sum":6}]} +0.0 {"num_buckets":4,"buckets":[{"lower":"Beijing","upper":"Beijing","ndv":1,"count":3,"pre_sum":0},{"lower":"Guangzhou","upper":"Guangzhou","ndv":1,"count":1,"pre_sum":3},{"lower":"Shanghai","upper":"Shanghai","ndv":1,"count":2,"pre_sum":4},{"lower":"Shenzhen","upper":"Shenzhen","ndv":1,"count":4,"pre_sum":6}]} +0.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"11","upper":"11","ndv":1,"count":3,"pre_sum":1},{"lower":"15","upper":"15","ndv":1,"count":1,"pre_sum":4},{"lower":"20","upper":"20","ndv":1,"count":1,"pre_sum":5},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":6},{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":7},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":8}]} +0.0 {"num_buckets":3,"buckets":[{"lower":"2017-10-01","upper":"2017-10-01","ndv":1,"count":4,"pre_sum":0},{"lower":"2017-10-02","upper":"2017-10-02","ndv":1,"count":3,"pre_sum":4},{"lower":"2017-10-03","upper":"2017-10-03","ndv":1,"count":3,"pre_sum":7}]} +0.0 {"num_buckets":7,"buckets":[{"lower":"2017-10-01 06:00:00","upper":"2017-10-01 06:00:00","ndv":1,"count":1,"pre_sum":0},{"lower":"2017-10-01 07:00:00","upper":"2017-10-01 07:00:00","ndv":1,"count":1,"pre_sum":1},{"lower":"2017-10-01 10:00:15","upper":"2017-10-01 10:00:15","ndv":1,"count":1,"pre_sum":2},{"lower":"2017-10-01 17:05:45","upper":"2017-10-01 17:05:45","ndv":1,"count":1,"pre_sum":3},{"lower":"2017-10-02 11:20:00","upper":"2017-10-02 11:20:00","ndv":1,"count":1,"pre_sum":4},{"lower":"2017-10-02 12:59:12","upper":"2017-10-02 12:59:12","ndv":1,"count":2,"pre_sum":5},{"lower":"2017-10-03 10:20:22","upper":"2017-10-03 10:20:22","ndv":1,"count":3,"pre_sum":7}]} +0.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]} +0.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]} +0.0 {"num_buckets":2,"buckets":[{"lower":"0","upper":"0","ndv":1,"count":7,"pre_sum":0},{"lower":"1","upper":"1","ndv":1,"count":3,"pre_sum":7}]} +0.0 {"num_buckets":10,"buckets":[{"lower":"10000","upper":"10000","ndv":1,"count":1,"pre_sum":0},{"lower":"10001","upper":"10001","ndv":1,"count":1,"pre_sum":1},{"lower":"10002","upper":"10002","ndv":1,"count":1,"pre_sum":2},{"lower":"10003","upper":"10003","ndv":1,"count":1,"pre_sum":3},{"lower":"10004","upper":"10004","ndv":1,"count":1,"pre_sum":4},{"lower":"10005","upper":"10005","ndv":1,"count":1,"pre_sum":5},{"lower":"10006","upper":"10006","ndv":1,"count":1,"pre_sum":6},{"lower":"10007","upper":"10007","ndv":1,"count":1,"pre_sum":7},{"lower":"10008","upper":"10008","ndv":1,"count":1,"pre_sum":8},{"lower":"10009","upper":"10009","ndv":1,"count":1,"pre_sum":9}]} + +-- !sql -- +3 2 0 20 32 6 +4 3 0 20 35 8 +10 4 0 20 35 20 +3 1 0 35 35 6 +4 2 0 Beijing Shenzhen 29 +10 4 0 Beijing Shenzhen 78 +3 2 0 Guangzhou Shanghai 25 +3 1 0 Shenzhen Shenzhen 24 +3 1 0 11 11 24 +4 4 0 2 100 32 +10 7 0 2 200 80 +3 2 0 30 200 24 +4 1 0 2017-10-01 2017-10-01 64 +10 3 0 2017-10-01 2017-10-03 160 +3 1 0 2017-10-02 2017-10-02 48 +3 1 0 2017-10-03 2017-10-03 48 +4 4 0 2017-10-01 06:00:00 2017-10-01 17:05:45 64 +10 7 0 2017-10-01 06:00:00 2017-10-03 10:20:22 160 +3 2 0 2017-10-02 11:20:00 2017-10-02 12:59:12 48 +3 1 0 2017-10-03 10:20:22 2017-10-03 10:20:22 48 +4 4 0 2 22 16 +10 7 0 2 22 40 +3 2 0 5 11 12 +3 1 0 6 6 12 +4 4 0 2 22 16 +10 7 0 2 22 40 +3 2 0 5 11 12 +3 1 0 6 6 12 +3 1 0 0 0 3 +3 2 0 0 1 3 +4 2 0 0 1 4 +10 2 0 0 1 10 +4 4 0 10000 10006 64 +10 10 0 10000 10009 160 +3 3 0 10003 10005 48 +3 3 0 10007 10009 48 + +-- !sql -- +1.0 {"num_buckets":4,"buckets":[{"lower":"20","upper":"20","ndv":1,"count":4,"pre_sum":0},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":4},{"lower":"32","upper":"32","ndv":1,"count":1,"pre_sum":5},{"lower":"35","upper":"35","ndv":1,"count":4,"pre_sum":6}]} +1.0 {"num_buckets":4,"buckets":[{"lower":"Beijing","upper":"Beijing","ndv":1,"count":3,"pre_sum":0},{"lower":"Guangzhou","upper":"Guangzhou","ndv":1,"count":1,"pre_sum":3},{"lower":"Shanghai","upper":"Shanghai","ndv":1,"count":2,"pre_sum":4},{"lower":"Shenzhen","upper":"Shenzhen","ndv":1,"count":4,"pre_sum":6}]} +1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"11","upper":"11","ndv":1,"count":3,"pre_sum":1},{"lower":"15","upper":"15","ndv":1,"count":1,"pre_sum":4},{"lower":"20","upper":"20","ndv":1,"count":1,"pre_sum":5},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":6},{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":7},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":8}]} +1.0 {"num_buckets":3,"buckets":[{"lower":"2017-10-01","upper":"2017-10-01","ndv":1,"count":4,"pre_sum":0},{"lower":"2017-10-02","upper":"2017-10-02","ndv":1,"count":3,"pre_sum":4},{"lower":"2017-10-03","upper":"2017-10-03","ndv":1,"count":3,"pre_sum":7}]} +1.0 {"num_buckets":7,"buckets":[{"lower":"2017-10-01 06:00:00","upper":"2017-10-01 06:00:00","ndv":1,"count":1,"pre_sum":0},{"lower":"2017-10-01 07:00:00","upper":"2017-10-01 07:00:00","ndv":1,"count":1,"pre_sum":1},{"lower":"2017-10-01 10:00:15","upper":"2017-10-01 10:00:15","ndv":1,"count":1,"pre_sum":2},{"lower":"2017-10-01 17:05:45","upper":"2017-10-01 17:05:45","ndv":1,"count":1,"pre_sum":3},{"lower":"2017-10-02 11:20:00","upper":"2017-10-02 11:20:00","ndv":1,"count":1,"pre_sum":4},{"lower":"2017-10-02 12:59:12","upper":"2017-10-02 12:59:12","ndv":1,"count":2,"pre_sum":5},{"lower":"2017-10-03 10:20:22","upper":"2017-10-03 10:20:22","ndv":1,"count":3,"pre_sum":7}]} +1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]} +1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]} +1.0 {"num_buckets":2,"buckets":[{"lower":"0","upper":"0","ndv":1,"count":7,"pre_sum":0},{"lower":"1","upper":"1","ndv":1,"count":3,"pre_sum":7}]} +1.0 {"num_buckets":10,"buckets":[{"lower":"10000","upper":"10000","ndv":1,"count":1,"pre_sum":0},{"lower":"10001","upper":"10001","ndv":1,"count":1,"pre_sum":1},{"lower":"10002","upper":"10002","ndv":1,"count":1,"pre_sum":2},{"lower":"10003","upper":"10003","ndv":1,"count":1,"pre_sum":3},{"lower":"10004","upper":"10004","ndv":1,"count":1,"pre_sum":4},{"lower":"10005","upper":"10005","ndv":1,"count":1,"pre_sum":5},{"lower":"10006","upper":"10006","ndv":1,"count":1,"pre_sum":6},{"lower":"10007","upper":"10007","ndv":1,"count":1,"pre_sum":7},{"lower":"10008","upper":"10008","ndv":1,"count":1,"pre_sum":8},{"lower":"10009","upper":"10009","ndv":1,"count":1,"pre_sum":9}]} + +-- !sql -- +3 2 0 20 32 6 +4 3 0 20 35 8 +10 4 0 20 35 20 +3 1 0 35 35 6 +4 2 0 Beijing Shenzhen 29 +10 4 0 Beijing Shenzhen 78 +3 2 0 Guangzhou Shanghai 25 +3 1 0 Shenzhen Shenzhen 24 +3 1 0 11 11 24 +4 4 0 2 100 32 +10 7 0 2 200 80 +3 2 0 30 200 24 +4 1 0 2017-10-01 2017-10-01 64 +10 3 0 2017-10-01 2017-10-03 160 +3 1 0 2017-10-02 2017-10-02 48 +3 1 0 2017-10-03 2017-10-03 48 +4 4 0 2017-10-01 06:00:00 2017-10-01 17:05:45 64 +10 7 0 2017-10-01 06:00:00 2017-10-03 10:20:22 160 +3 2 0 2017-10-02 11:20:00 2017-10-02 12:59:12 48 +3 1 0 2017-10-03 10:20:22 2017-10-03 10:20:22 48 +4 4 0 2 22 16 +10 7 0 2 22 40 +3 2 0 5 11 12 +3 1 0 6 6 12 +4 4 0 2 22 16 +10 7 0 2 22 40 +3 2 0 5 11 12 +3 1 0 6 6 12 +3 1 0 0 0 3 +3 2 0 0 1 3 +4 2 0 0 1 4 +10 2 0 0 1 10 +4 4 0 10000 10006 64 +10 10 0 10000 10009 160 +3 3 0 10003 10005 48 +3 3 0 10007 10009 48 + +-- !sql -- +1.0 {"num_buckets":4,"buckets":[{"lower":"20","upper":"20","ndv":1,"count":4,"pre_sum":0},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":4},{"lower":"32","upper":"32","ndv":1,"count":1,"pre_sum":5},{"lower":"35","upper":"35","ndv":1,"count":4,"pre_sum":6}]} +1.0 {"num_buckets":4,"buckets":[{"lower":"Beijing","upper":"Beijing","ndv":1,"count":3,"pre_sum":0},{"lower":"Guangzhou","upper":"Guangzhou","ndv":1,"count":1,"pre_sum":3},{"lower":"Shanghai","upper":"Shanghai","ndv":1,"count":2,"pre_sum":4},{"lower":"Shenzhen","upper":"Shenzhen","ndv":1,"count":4,"pre_sum":6}]} +1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"11","upper":"11","ndv":1,"count":3,"pre_sum":1},{"lower":"15","upper":"15","ndv":1,"count":1,"pre_sum":4},{"lower":"20","upper":"20","ndv":1,"count":1,"pre_sum":5},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":6},{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":7},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":8}]} +1.0 {"num_buckets":3,"buckets":[{"lower":"2017-10-01","upper":"2017-10-01","ndv":1,"count":4,"pre_sum":0},{"lower":"2017-10-02","upper":"2017-10-02","ndv":1,"count":3,"pre_sum":4},{"lower":"2017-10-03","upper":"2017-10-03","ndv":1,"count":3,"pre_sum":7}]} +1.0 {"num_buckets":7,"buckets":[{"lower":"2017-10-01 06:00:00","upper":"2017-10-01 06:00:00","ndv":1,"count":1,"pre_sum":0},{"lower":"2017-10-01 07:00:00","upper":"2017-10-01 07:00:00","ndv":1,"count":1,"pre_sum":1},{"lower":"2017-10-01 10:00:15","upper":"2017-10-01 10:00:15","ndv":1,"count":1,"pre_sum":2},{"lower":"2017-10-01 17:05:45","upper":"2017-10-01 17:05:45","ndv":1,"count":1,"pre_sum":3},{"lower":"2017-10-02 11:20:00","upper":"2017-10-02 11:20:00","ndv":1,"count":1,"pre_sum":4},{"lower":"2017-10-02 12:59:12","upper":"2017-10-02 12:59:12","ndv":1,"count":2,"pre_sum":5},{"lower":"2017-10-03 10:20:22","upper":"2017-10-03 10:20:22","ndv":1,"count":3,"pre_sum":7}]} +1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]} +1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]} +1.0 {"num_buckets":2,"buckets":[{"lower":"0","upper":"0","ndv":1,"count":7,"pre_sum":0},{"lower":"1","upper":"1","ndv":1,"count":3,"pre_sum":7}]} +1.0 {"num_buckets":10,"buckets":[{"lower":"10000","upper":"10000","ndv":1,"count":1,"pre_sum":0},{"lower":"10001","upper":"10001","ndv":1,"count":1,"pre_sum":1},{"lower":"10002","upper":"10002","ndv":1,"count":1,"pre_sum":2},{"lower":"10003","upper":"10003","ndv":1,"count":1,"pre_sum":3},{"lower":"10004","upper":"10004","ndv":1,"count":1,"pre_sum":4},{"lower":"10005","upper":"10005","ndv":1,"count":1,"pre_sum":5},{"lower":"10006","upper":"10006","ndv":1,"count":1,"pre_sum":6},{"lower":"10007","upper":"10007","ndv":1,"count":1,"pre_sum":7},{"lower":"10008","upper":"10008","ndv":1,"count":1,"pre_sum":8},{"lower":"10009","upper":"10009","ndv":1,"count":1,"pre_sum":9}]} + +-- !sql -- +0.5 +0.5 +0.5 +0.5 +0.5 +0.5 +0.5 +0.5 +0.5 + diff --git a/regression-test/suites/statistics/analyze_test.groovy b/regression-test/suites/statistics/analyze_test.groovy index c793a1c8cd..6a287b48c8 100644 --- a/regression-test/suites/statistics/analyze_test.groovy +++ b/regression-test/suites/statistics/analyze_test.groovy @@ -273,7 +273,8 @@ suite("analyze_test") { );""" sql """ - DELETE FROM __internal_schema.analysis_jobs WHERE job_id > 0 + DELETE FROM __internal_schema.analysis_jobs + WHERE tbl_name = 'analyze_test_tbl_2'; """ test { diff --git a/regression-test/suites/statistics/incremental_stats_test.groovy b/regression-test/suites/statistics/incremental_stats_test.groovy index 18cd683f8b..308bb048e3 100644 --- a/regression-test/suites/statistics/incremental_stats_test.groovy +++ b/regression-test/suites/statistics/incremental_stats_test.groovy @@ -124,10 +124,10 @@ suite("test_incremental_stats") { WHERE col_id IN ${columnNameValues}; """ - // TODO delete by database name and table name sql """ - DELETE FROM __internal_schema.analysis_jobs WHERE job_id IS NOT NULL; - """ + DELETE FROM __internal_schema.analysis_jobs + WHERE col_name IN ${columnNameValues}; + """ // 1. Firstly do a full collection of statistics sql """ @@ -270,18 +270,24 @@ suite("test_incremental_stats") { ); """ - sql """ - DELETE FROM __internal_schema.column_statistics - WHERE col_id IN ( + // sql """ + // DELETE FROM __internal_schema.column_statistics + // WHERE col_id IN ( + // 't_1682176142000_user_id', 't_1682176142000_date', 't_1682176142000_city', + // 't_1682176142000_age', 't_1682176142000_sex', 't_1682176142000_last_visit_date', + // 't_1682176142000_cost', 't_1682176142000_max_dwell_time', 't_1682176142000_min_dwell_time', + // 't_1682176142000_new_column' + // ); + // """ + + sql """ + DELETE FROM __internal_schema.analysis_jobs + WHERE col_name IN ( 't_1682176142000_user_id', 't_1682176142000_date', 't_1682176142000_city', 't_1682176142000_age', 't_1682176142000_sex', 't_1682176142000_last_visit_date', 't_1682176142000_cost', 't_1682176142000_max_dwell_time', 't_1682176142000_min_dwell_time', 't_1682176142000_new_column' - ); - """ - - sql """ - DELETE FROM __internal_schema.analysis_jobs WHERE job_id IS NOT NULL; + ); """ sql """ diff --git a/regression-test/suites/statistics/sampled_stats_test.groovy b/regression-test/suites/statistics/sampled_stats_test.groovy new file mode 100644 index 0000000000..1bb96d8f30 --- /dev/null +++ b/regression-test/suites/statistics/sampled_stats_test.groovy @@ -0,0 +1,245 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_sampled_stats") { + def dbName = "test_sampled_stats" + def tblName = "${dbName}.example_tbl" + + def colStatisticsTblName = "__internal_schema.column_statistics" + def colHistogramTblName = "__internal_schema.histogram_statistics" + + def columnNames = """ + ( + `t_1682570060000_user_id`, `t_1682570060000_date`, + `t_1682570060000_city`, `t_1682570060000_age`, `t_1682570060000_sex`, + `t_1682570060000_last_visit_date`, `t_1682570060000_cost`, + `t_1682570060000_max_dwell_time`, `t_1682570060000_min_dwell_time` + ) + """ + + def columnNameValues = """ + ( + 't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city', + 't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date', + 't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time' + ) + """ + + def query_col_statistics_with_order_sql = """ + SELECT + count, + ndv, + null_count, + min, + max, + data_size_in_bytes + FROM + ${colStatisticsTblName} + WHERE + col_id IN ${columnNameValues} + ORDER BY + col_id, + min, + max, + count, + ndv, + null_count, + data_size_in_bytes; + """ + + def query_col_histogram_with_order_sql = """ + SELECT + sample_rate, + buckets + FROM + ${colHistogramTblName} + WHERE + col_id IN ${columnNameValues} + ORDER BY + col_id, + sample_rate, + buckets; + """ + + sql """ + DROP DATABASE IF EXISTS ${dbName}; + """ + + sql """ + CREATE DATABASE IF NOT EXISTS ${dbName}; + """ + + sql """ + DROP TABLE IF EXISTS ${tblName}; + """ + + sql """ + CREATE TABLE IF NOT EXISTS ${tblName} ( + `t_1682570060000_user_id` LARGEINT NOT NULL, + `t_1682570060000_date` DATE NOT NULL, + `t_1682570060000_city` VARCHAR(20), + `t_1682570060000_age` SMALLINT, + `t_1682570060000_sex` TINYINT, + `t_1682570060000_last_visit_date` DATETIME REPLACE, + `t_1682570060000_cost` BIGINT SUM, + `t_1682570060000_max_dwell_time` INT MAX, + `t_1682570060000_min_dwell_time` INT MIN + ) ENGINE=OLAP + AGGREGATE KEY(`t_1682570060000_user_id`, `t_1682570060000_date`, + `t_1682570060000_city`, `t_1682570060000_age`, `t_1682570060000_sex`) + PARTITION BY LIST(`t_1682570060000_date`) + ( + PARTITION `p_201701` VALUES IN ("2017-10-01"), + PARTITION `p_201702` VALUES IN ("2017-10-02"), + PARTITION `p_201703` VALUES IN ("2017-10-03") + ) + DISTRIBUTED BY HASH(`t_1682570060000_user_id`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + INSERT INTO ${tblName} ${columnNames} + VALUES (10000, "2017-10-01", "Beijing", 20, 0, "2017-10-01 07:00:00", 15, 2, 2), + (10001, "2017-10-01", "Beijing", 20, 0, "2017-10-01 06:00:00", 20, 10, 10), + (10002, "2017-10-01", "Beijing", 30, 1, "2017-10-01 17:05:45", 2, 22, 22), + (10003, "2017-10-02", "Shanghai", 20, 1, "2017-10-02 12:59:12", 200, 5, 5), + (10004, "2017-10-02", "Shanghai", 20, 1, "2017-10-02 12:59:12", 200, 5, 5), + (10005, "2017-10-02", "Guangzhou", 32, 0, "2017-10-02 11:20:00", 30, 11, 11), + (10006, "2017-10-01", "Shenzhen", 35, 0, "2017-10-01 10:00:15", 100, 3, 3), + (10007, "2017-10-03", "Shenzhen", 35, 0, "2017-10-03 10:20:22", 11, 6, 6), + (10008, "2017-10-03", "Shenzhen", 35, 0, "2017-10-03 10:20:22", 11, 6, 6), + (10009, "2017-10-03", "Shenzhen", 35, 0, "2017-10-03 10:20:22", 11, 6, 6); + """ + + sql """ + DELETE FROM __internal_schema.column_statistics + WHERE col_id IN ( + 't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city', + 't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date', + 't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time' + ); + """ + + sql """ + DELETE FROM __internal_schema.histogram_statistics + WHERE col_id IN ( + 't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city', + 't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date', + 't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time' + ); + """ + + sql """ + ANALYZE TABLE ${tblName} WITH sync; + """ + + sql """ + ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync; + """ + + qt_sql query_col_statistics_with_order_sql + + qt_sql query_col_histogram_with_order_sql + + sql """ + ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE ROWS 100; + """ + + sql """ + ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE ROWS 100; + """ + + qt_sql query_col_statistics_with_order_sql + + qt_sql query_col_histogram_with_order_sql + + sql """ + ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE PERCENT 100; + """ + + sql """ + ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE PERCENT 100; + """ + + qt_sql query_col_statistics_with_order_sql + + qt_sql query_col_histogram_with_order_sql + + sql """ + ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE ROWS 3; + """ + + sql """ + ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE ROWS 1; + """ + + // TODO Optimize the calculation method of the sample rate of the number of sampling rows + // qt_sql """ + // SELECT + // sample_rate + // FROM + // ${colHistogramTblName} + // WHERE + // col_id IN ${columnNameValues} + // ORDER BY + // col_id, + // sample_rate + // """ + + sql """ + ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE PERCENT 50; + """ + + sql """ + ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE PERCENT 50; + """ + + qt_sql """ + SELECT + sample_rate + FROM + ${colHistogramTblName} + WHERE + col_id IN ${columnNameValues} + ORDER BY + sample_rate + """ + + sql """ + DELETE FROM __internal_schema.column_statistics + WHERE col_id IN ( + 't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city', + 't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date', + 't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time' + ); + """ + + sql """ + DELETE FROM __internal_schema.histogram_statistics + WHERE col_id IN ( + 't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city', + 't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date', + 't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time' + ); + """ + + sql """ + DROP DATABASE IF EXISTS ${dbName}; + """ +}