[test](statistics) add p0 test of sampling statistics (#19176)
1. Added test p0 for sampling collection statistics 2. Modify the uniqueKeys of table analysis_jobs for deletion based on relevant conditions 3. Solve the problem that incremental statistics p0 is less stable
This commit is contained in:
@ -219,7 +219,7 @@ public class AnalyzeStmt extends DdlStmt {
|
||||
|
||||
if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)) {
|
||||
checkNumericProperty(PROPERTY_SAMPLE_PERCENT, properties.get(PROPERTY_SAMPLE_PERCENT),
|
||||
0, 100, false, "should be > 0 and < 100");
|
||||
1, 100, true, "should be >= 1 and <= 100");
|
||||
}
|
||||
|
||||
if (properties.containsKey(PROPERTY_SAMPLE_ROWS)) {
|
||||
|
||||
@ -190,8 +190,9 @@ public class InternalSchemaInitializer extends Thread {
|
||||
columnDefs.add(new ColumnDef("state", TypeDef.createVarchar(32)));
|
||||
columnDefs.add(new ColumnDef("schedule_type", TypeDef.createVarchar(32)));
|
||||
String engineName = "olap";
|
||||
KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS,
|
||||
Lists.newArrayList("job_id", "task_id"));
|
||||
ArrayList<String> uniqueKeys = Lists.newArrayList("job_id", "task_id",
|
||||
"catalog_name", "db_name", "tbl_name", "col_name", "index_id");
|
||||
KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS, uniqueKeys);
|
||||
|
||||
DistributionDesc distributionDesc = new HashDistributionDesc(
|
||||
StatisticConstants.STATISTIC_TABLE_BUCKET_COUNT,
|
||||
|
||||
@ -86,7 +86,8 @@ public class HistogramTask extends BaseAnalysisTask {
|
||||
if (info.samplePercent > 0) {
|
||||
return String.valueOf(info.samplePercent / 100.0);
|
||||
} else {
|
||||
double sampRate = (double) info.sampleRows / tbl.getRowCount();
|
||||
long rowCount = tbl.getRowCount() > 0 ? tbl.getRowCount() : 1;
|
||||
double sampRate = (double) info.sampleRows / rowCount;
|
||||
return sampRate >= 1 ? "1.0" : String.format("%.4f", sampRate);
|
||||
}
|
||||
}
|
||||
|
||||
159
regression-test/data/statistics/sampled_stats_test.out
Normal file
159
regression-test/data/statistics/sampled_stats_test.out
Normal file
@ -0,0 +1,159 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !sql --
|
||||
3 2 0 20 32 6
|
||||
4 3 0 20 35 8
|
||||
10 4 0 20 35 20
|
||||
3 1 0 35 35 6
|
||||
4 2 0 Beijing Shenzhen 29
|
||||
10 4 0 Beijing Shenzhen 78
|
||||
3 2 0 Guangzhou Shanghai 25
|
||||
3 1 0 Shenzhen Shenzhen 24
|
||||
3 1 0 11 11 24
|
||||
4 4 0 2 100 32
|
||||
10 7 0 2 200 80
|
||||
3 2 0 30 200 24
|
||||
4 1 0 2017-10-01 2017-10-01 64
|
||||
10 3 0 2017-10-01 2017-10-03 160
|
||||
3 1 0 2017-10-02 2017-10-02 48
|
||||
3 1 0 2017-10-03 2017-10-03 48
|
||||
4 4 0 2017-10-01 06:00:00 2017-10-01 17:05:45 64
|
||||
10 7 0 2017-10-01 06:00:00 2017-10-03 10:20:22 160
|
||||
3 2 0 2017-10-02 11:20:00 2017-10-02 12:59:12 48
|
||||
3 1 0 2017-10-03 10:20:22 2017-10-03 10:20:22 48
|
||||
4 4 0 2 22 16
|
||||
10 7 0 2 22 40
|
||||
3 2 0 5 11 12
|
||||
3 1 0 6 6 12
|
||||
4 4 0 2 22 16
|
||||
10 7 0 2 22 40
|
||||
3 2 0 5 11 12
|
||||
3 1 0 6 6 12
|
||||
3 1 0 0 0 3
|
||||
3 2 0 0 1 3
|
||||
4 2 0 0 1 4
|
||||
10 2 0 0 1 10
|
||||
4 4 0 10000 10006 64
|
||||
10 10 0 10000 10009 160
|
||||
3 3 0 10003 10005 48
|
||||
3 3 0 10007 10009 48
|
||||
|
||||
-- !sql --
|
||||
0.0 {"num_buckets":4,"buckets":[{"lower":"20","upper":"20","ndv":1,"count":4,"pre_sum":0},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":4},{"lower":"32","upper":"32","ndv":1,"count":1,"pre_sum":5},{"lower":"35","upper":"35","ndv":1,"count":4,"pre_sum":6}]}
|
||||
0.0 {"num_buckets":4,"buckets":[{"lower":"Beijing","upper":"Beijing","ndv":1,"count":3,"pre_sum":0},{"lower":"Guangzhou","upper":"Guangzhou","ndv":1,"count":1,"pre_sum":3},{"lower":"Shanghai","upper":"Shanghai","ndv":1,"count":2,"pre_sum":4},{"lower":"Shenzhen","upper":"Shenzhen","ndv":1,"count":4,"pre_sum":6}]}
|
||||
0.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"11","upper":"11","ndv":1,"count":3,"pre_sum":1},{"lower":"15","upper":"15","ndv":1,"count":1,"pre_sum":4},{"lower":"20","upper":"20","ndv":1,"count":1,"pre_sum":5},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":6},{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":7},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":8}]}
|
||||
0.0 {"num_buckets":3,"buckets":[{"lower":"2017-10-01","upper":"2017-10-01","ndv":1,"count":4,"pre_sum":0},{"lower":"2017-10-02","upper":"2017-10-02","ndv":1,"count":3,"pre_sum":4},{"lower":"2017-10-03","upper":"2017-10-03","ndv":1,"count":3,"pre_sum":7}]}
|
||||
0.0 {"num_buckets":7,"buckets":[{"lower":"2017-10-01 06:00:00","upper":"2017-10-01 06:00:00","ndv":1,"count":1,"pre_sum":0},{"lower":"2017-10-01 07:00:00","upper":"2017-10-01 07:00:00","ndv":1,"count":1,"pre_sum":1},{"lower":"2017-10-01 10:00:15","upper":"2017-10-01 10:00:15","ndv":1,"count":1,"pre_sum":2},{"lower":"2017-10-01 17:05:45","upper":"2017-10-01 17:05:45","ndv":1,"count":1,"pre_sum":3},{"lower":"2017-10-02 11:20:00","upper":"2017-10-02 11:20:00","ndv":1,"count":1,"pre_sum":4},{"lower":"2017-10-02 12:59:12","upper":"2017-10-02 12:59:12","ndv":1,"count":2,"pre_sum":5},{"lower":"2017-10-03 10:20:22","upper":"2017-10-03 10:20:22","ndv":1,"count":3,"pre_sum":7}]}
|
||||
0.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
|
||||
0.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
|
||||
0.0 {"num_buckets":2,"buckets":[{"lower":"0","upper":"0","ndv":1,"count":7,"pre_sum":0},{"lower":"1","upper":"1","ndv":1,"count":3,"pre_sum":7}]}
|
||||
0.0 {"num_buckets":10,"buckets":[{"lower":"10000","upper":"10000","ndv":1,"count":1,"pre_sum":0},{"lower":"10001","upper":"10001","ndv":1,"count":1,"pre_sum":1},{"lower":"10002","upper":"10002","ndv":1,"count":1,"pre_sum":2},{"lower":"10003","upper":"10003","ndv":1,"count":1,"pre_sum":3},{"lower":"10004","upper":"10004","ndv":1,"count":1,"pre_sum":4},{"lower":"10005","upper":"10005","ndv":1,"count":1,"pre_sum":5},{"lower":"10006","upper":"10006","ndv":1,"count":1,"pre_sum":6},{"lower":"10007","upper":"10007","ndv":1,"count":1,"pre_sum":7},{"lower":"10008","upper":"10008","ndv":1,"count":1,"pre_sum":8},{"lower":"10009","upper":"10009","ndv":1,"count":1,"pre_sum":9}]}
|
||||
|
||||
-- !sql --
|
||||
3 2 0 20 32 6
|
||||
4 3 0 20 35 8
|
||||
10 4 0 20 35 20
|
||||
3 1 0 35 35 6
|
||||
4 2 0 Beijing Shenzhen 29
|
||||
10 4 0 Beijing Shenzhen 78
|
||||
3 2 0 Guangzhou Shanghai 25
|
||||
3 1 0 Shenzhen Shenzhen 24
|
||||
3 1 0 11 11 24
|
||||
4 4 0 2 100 32
|
||||
10 7 0 2 200 80
|
||||
3 2 0 30 200 24
|
||||
4 1 0 2017-10-01 2017-10-01 64
|
||||
10 3 0 2017-10-01 2017-10-03 160
|
||||
3 1 0 2017-10-02 2017-10-02 48
|
||||
3 1 0 2017-10-03 2017-10-03 48
|
||||
4 4 0 2017-10-01 06:00:00 2017-10-01 17:05:45 64
|
||||
10 7 0 2017-10-01 06:00:00 2017-10-03 10:20:22 160
|
||||
3 2 0 2017-10-02 11:20:00 2017-10-02 12:59:12 48
|
||||
3 1 0 2017-10-03 10:20:22 2017-10-03 10:20:22 48
|
||||
4 4 0 2 22 16
|
||||
10 7 0 2 22 40
|
||||
3 2 0 5 11 12
|
||||
3 1 0 6 6 12
|
||||
4 4 0 2 22 16
|
||||
10 7 0 2 22 40
|
||||
3 2 0 5 11 12
|
||||
3 1 0 6 6 12
|
||||
3 1 0 0 0 3
|
||||
3 2 0 0 1 3
|
||||
4 2 0 0 1 4
|
||||
10 2 0 0 1 10
|
||||
4 4 0 10000 10006 64
|
||||
10 10 0 10000 10009 160
|
||||
3 3 0 10003 10005 48
|
||||
3 3 0 10007 10009 48
|
||||
|
||||
-- !sql --
|
||||
1.0 {"num_buckets":4,"buckets":[{"lower":"20","upper":"20","ndv":1,"count":4,"pre_sum":0},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":4},{"lower":"32","upper":"32","ndv":1,"count":1,"pre_sum":5},{"lower":"35","upper":"35","ndv":1,"count":4,"pre_sum":6}]}
|
||||
1.0 {"num_buckets":4,"buckets":[{"lower":"Beijing","upper":"Beijing","ndv":1,"count":3,"pre_sum":0},{"lower":"Guangzhou","upper":"Guangzhou","ndv":1,"count":1,"pre_sum":3},{"lower":"Shanghai","upper":"Shanghai","ndv":1,"count":2,"pre_sum":4},{"lower":"Shenzhen","upper":"Shenzhen","ndv":1,"count":4,"pre_sum":6}]}
|
||||
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"11","upper":"11","ndv":1,"count":3,"pre_sum":1},{"lower":"15","upper":"15","ndv":1,"count":1,"pre_sum":4},{"lower":"20","upper":"20","ndv":1,"count":1,"pre_sum":5},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":6},{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":7},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":8}]}
|
||||
1.0 {"num_buckets":3,"buckets":[{"lower":"2017-10-01","upper":"2017-10-01","ndv":1,"count":4,"pre_sum":0},{"lower":"2017-10-02","upper":"2017-10-02","ndv":1,"count":3,"pre_sum":4},{"lower":"2017-10-03","upper":"2017-10-03","ndv":1,"count":3,"pre_sum":7}]}
|
||||
1.0 {"num_buckets":7,"buckets":[{"lower":"2017-10-01 06:00:00","upper":"2017-10-01 06:00:00","ndv":1,"count":1,"pre_sum":0},{"lower":"2017-10-01 07:00:00","upper":"2017-10-01 07:00:00","ndv":1,"count":1,"pre_sum":1},{"lower":"2017-10-01 10:00:15","upper":"2017-10-01 10:00:15","ndv":1,"count":1,"pre_sum":2},{"lower":"2017-10-01 17:05:45","upper":"2017-10-01 17:05:45","ndv":1,"count":1,"pre_sum":3},{"lower":"2017-10-02 11:20:00","upper":"2017-10-02 11:20:00","ndv":1,"count":1,"pre_sum":4},{"lower":"2017-10-02 12:59:12","upper":"2017-10-02 12:59:12","ndv":1,"count":2,"pre_sum":5},{"lower":"2017-10-03 10:20:22","upper":"2017-10-03 10:20:22","ndv":1,"count":3,"pre_sum":7}]}
|
||||
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
|
||||
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
|
||||
1.0 {"num_buckets":2,"buckets":[{"lower":"0","upper":"0","ndv":1,"count":7,"pre_sum":0},{"lower":"1","upper":"1","ndv":1,"count":3,"pre_sum":7}]}
|
||||
1.0 {"num_buckets":10,"buckets":[{"lower":"10000","upper":"10000","ndv":1,"count":1,"pre_sum":0},{"lower":"10001","upper":"10001","ndv":1,"count":1,"pre_sum":1},{"lower":"10002","upper":"10002","ndv":1,"count":1,"pre_sum":2},{"lower":"10003","upper":"10003","ndv":1,"count":1,"pre_sum":3},{"lower":"10004","upper":"10004","ndv":1,"count":1,"pre_sum":4},{"lower":"10005","upper":"10005","ndv":1,"count":1,"pre_sum":5},{"lower":"10006","upper":"10006","ndv":1,"count":1,"pre_sum":6},{"lower":"10007","upper":"10007","ndv":1,"count":1,"pre_sum":7},{"lower":"10008","upper":"10008","ndv":1,"count":1,"pre_sum":8},{"lower":"10009","upper":"10009","ndv":1,"count":1,"pre_sum":9}]}
|
||||
|
||||
-- !sql --
|
||||
3 2 0 20 32 6
|
||||
4 3 0 20 35 8
|
||||
10 4 0 20 35 20
|
||||
3 1 0 35 35 6
|
||||
4 2 0 Beijing Shenzhen 29
|
||||
10 4 0 Beijing Shenzhen 78
|
||||
3 2 0 Guangzhou Shanghai 25
|
||||
3 1 0 Shenzhen Shenzhen 24
|
||||
3 1 0 11 11 24
|
||||
4 4 0 2 100 32
|
||||
10 7 0 2 200 80
|
||||
3 2 0 30 200 24
|
||||
4 1 0 2017-10-01 2017-10-01 64
|
||||
10 3 0 2017-10-01 2017-10-03 160
|
||||
3 1 0 2017-10-02 2017-10-02 48
|
||||
3 1 0 2017-10-03 2017-10-03 48
|
||||
4 4 0 2017-10-01 06:00:00 2017-10-01 17:05:45 64
|
||||
10 7 0 2017-10-01 06:00:00 2017-10-03 10:20:22 160
|
||||
3 2 0 2017-10-02 11:20:00 2017-10-02 12:59:12 48
|
||||
3 1 0 2017-10-03 10:20:22 2017-10-03 10:20:22 48
|
||||
4 4 0 2 22 16
|
||||
10 7 0 2 22 40
|
||||
3 2 0 5 11 12
|
||||
3 1 0 6 6 12
|
||||
4 4 0 2 22 16
|
||||
10 7 0 2 22 40
|
||||
3 2 0 5 11 12
|
||||
3 1 0 6 6 12
|
||||
3 1 0 0 0 3
|
||||
3 2 0 0 1 3
|
||||
4 2 0 0 1 4
|
||||
10 2 0 0 1 10
|
||||
4 4 0 10000 10006 64
|
||||
10 10 0 10000 10009 160
|
||||
3 3 0 10003 10005 48
|
||||
3 3 0 10007 10009 48
|
||||
|
||||
-- !sql --
|
||||
1.0 {"num_buckets":4,"buckets":[{"lower":"20","upper":"20","ndv":1,"count":4,"pre_sum":0},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":4},{"lower":"32","upper":"32","ndv":1,"count":1,"pre_sum":5},{"lower":"35","upper":"35","ndv":1,"count":4,"pre_sum":6}]}
|
||||
1.0 {"num_buckets":4,"buckets":[{"lower":"Beijing","upper":"Beijing","ndv":1,"count":3,"pre_sum":0},{"lower":"Guangzhou","upper":"Guangzhou","ndv":1,"count":1,"pre_sum":3},{"lower":"Shanghai","upper":"Shanghai","ndv":1,"count":2,"pre_sum":4},{"lower":"Shenzhen","upper":"Shenzhen","ndv":1,"count":4,"pre_sum":6}]}
|
||||
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"11","upper":"11","ndv":1,"count":3,"pre_sum":1},{"lower":"15","upper":"15","ndv":1,"count":1,"pre_sum":4},{"lower":"20","upper":"20","ndv":1,"count":1,"pre_sum":5},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":6},{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":7},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":8}]}
|
||||
1.0 {"num_buckets":3,"buckets":[{"lower":"2017-10-01","upper":"2017-10-01","ndv":1,"count":4,"pre_sum":0},{"lower":"2017-10-02","upper":"2017-10-02","ndv":1,"count":3,"pre_sum":4},{"lower":"2017-10-03","upper":"2017-10-03","ndv":1,"count":3,"pre_sum":7}]}
|
||||
1.0 {"num_buckets":7,"buckets":[{"lower":"2017-10-01 06:00:00","upper":"2017-10-01 06:00:00","ndv":1,"count":1,"pre_sum":0},{"lower":"2017-10-01 07:00:00","upper":"2017-10-01 07:00:00","ndv":1,"count":1,"pre_sum":1},{"lower":"2017-10-01 10:00:15","upper":"2017-10-01 10:00:15","ndv":1,"count":1,"pre_sum":2},{"lower":"2017-10-01 17:05:45","upper":"2017-10-01 17:05:45","ndv":1,"count":1,"pre_sum":3},{"lower":"2017-10-02 11:20:00","upper":"2017-10-02 11:20:00","ndv":1,"count":1,"pre_sum":4},{"lower":"2017-10-02 12:59:12","upper":"2017-10-02 12:59:12","ndv":1,"count":2,"pre_sum":5},{"lower":"2017-10-03 10:20:22","upper":"2017-10-03 10:20:22","ndv":1,"count":3,"pre_sum":7}]}
|
||||
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
|
||||
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
|
||||
1.0 {"num_buckets":2,"buckets":[{"lower":"0","upper":"0","ndv":1,"count":7,"pre_sum":0},{"lower":"1","upper":"1","ndv":1,"count":3,"pre_sum":7}]}
|
||||
1.0 {"num_buckets":10,"buckets":[{"lower":"10000","upper":"10000","ndv":1,"count":1,"pre_sum":0},{"lower":"10001","upper":"10001","ndv":1,"count":1,"pre_sum":1},{"lower":"10002","upper":"10002","ndv":1,"count":1,"pre_sum":2},{"lower":"10003","upper":"10003","ndv":1,"count":1,"pre_sum":3},{"lower":"10004","upper":"10004","ndv":1,"count":1,"pre_sum":4},{"lower":"10005","upper":"10005","ndv":1,"count":1,"pre_sum":5},{"lower":"10006","upper":"10006","ndv":1,"count":1,"pre_sum":6},{"lower":"10007","upper":"10007","ndv":1,"count":1,"pre_sum":7},{"lower":"10008","upper":"10008","ndv":1,"count":1,"pre_sum":8},{"lower":"10009","upper":"10009","ndv":1,"count":1,"pre_sum":9}]}
|
||||
|
||||
-- !sql --
|
||||
0.5
|
||||
0.5
|
||||
0.5
|
||||
0.5
|
||||
0.5
|
||||
0.5
|
||||
0.5
|
||||
0.5
|
||||
0.5
|
||||
|
||||
@ -273,7 +273,8 @@ suite("analyze_test") {
|
||||
);"""
|
||||
|
||||
sql """
|
||||
DELETE FROM __internal_schema.analysis_jobs WHERE job_id > 0
|
||||
DELETE FROM __internal_schema.analysis_jobs
|
||||
WHERE tbl_name = 'analyze_test_tbl_2';
|
||||
"""
|
||||
|
||||
test {
|
||||
|
||||
@ -124,10 +124,10 @@ suite("test_incremental_stats") {
|
||||
WHERE col_id IN ${columnNameValues};
|
||||
"""
|
||||
|
||||
// TODO delete by database name and table name
|
||||
sql """
|
||||
DELETE FROM __internal_schema.analysis_jobs WHERE job_id IS NOT NULL;
|
||||
"""
|
||||
DELETE FROM __internal_schema.analysis_jobs
|
||||
WHERE col_name IN ${columnNameValues};
|
||||
"""
|
||||
|
||||
// 1. Firstly do a full collection of statistics
|
||||
sql """
|
||||
@ -270,18 +270,24 @@ suite("test_incremental_stats") {
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
DELETE FROM __internal_schema.column_statistics
|
||||
WHERE col_id IN (
|
||||
// sql """
|
||||
// DELETE FROM __internal_schema.column_statistics
|
||||
// WHERE col_id IN (
|
||||
// 't_1682176142000_user_id', 't_1682176142000_date', 't_1682176142000_city',
|
||||
// 't_1682176142000_age', 't_1682176142000_sex', 't_1682176142000_last_visit_date',
|
||||
// 't_1682176142000_cost', 't_1682176142000_max_dwell_time', 't_1682176142000_min_dwell_time',
|
||||
// 't_1682176142000_new_column'
|
||||
// );
|
||||
// """
|
||||
|
||||
sql """
|
||||
DELETE FROM __internal_schema.analysis_jobs
|
||||
WHERE col_name IN (
|
||||
't_1682176142000_user_id', 't_1682176142000_date', 't_1682176142000_city',
|
||||
't_1682176142000_age', 't_1682176142000_sex', 't_1682176142000_last_visit_date',
|
||||
't_1682176142000_cost', 't_1682176142000_max_dwell_time', 't_1682176142000_min_dwell_time',
|
||||
't_1682176142000_new_column'
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
DELETE FROM __internal_schema.analysis_jobs WHERE job_id IS NOT NULL;
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
|
||||
245
regression-test/suites/statistics/sampled_stats_test.groovy
Normal file
245
regression-test/suites/statistics/sampled_stats_test.groovy
Normal file
@ -0,0 +1,245 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_sampled_stats") {
|
||||
def dbName = "test_sampled_stats"
|
||||
def tblName = "${dbName}.example_tbl"
|
||||
|
||||
def colStatisticsTblName = "__internal_schema.column_statistics"
|
||||
def colHistogramTblName = "__internal_schema.histogram_statistics"
|
||||
|
||||
def columnNames = """
|
||||
(
|
||||
`t_1682570060000_user_id`, `t_1682570060000_date`,
|
||||
`t_1682570060000_city`, `t_1682570060000_age`, `t_1682570060000_sex`,
|
||||
`t_1682570060000_last_visit_date`, `t_1682570060000_cost`,
|
||||
`t_1682570060000_max_dwell_time`, `t_1682570060000_min_dwell_time`
|
||||
)
|
||||
"""
|
||||
|
||||
def columnNameValues = """
|
||||
(
|
||||
't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city',
|
||||
't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date',
|
||||
't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time'
|
||||
)
|
||||
"""
|
||||
|
||||
def query_col_statistics_with_order_sql = """
|
||||
SELECT
|
||||
count,
|
||||
ndv,
|
||||
null_count,
|
||||
min,
|
||||
max,
|
||||
data_size_in_bytes
|
||||
FROM
|
||||
${colStatisticsTblName}
|
||||
WHERE
|
||||
col_id IN ${columnNameValues}
|
||||
ORDER BY
|
||||
col_id,
|
||||
min,
|
||||
max,
|
||||
count,
|
||||
ndv,
|
||||
null_count,
|
||||
data_size_in_bytes;
|
||||
"""
|
||||
|
||||
def query_col_histogram_with_order_sql = """
|
||||
SELECT
|
||||
sample_rate,
|
||||
buckets
|
||||
FROM
|
||||
${colHistogramTblName}
|
||||
WHERE
|
||||
col_id IN ${columnNameValues}
|
||||
ORDER BY
|
||||
col_id,
|
||||
sample_rate,
|
||||
buckets;
|
||||
"""
|
||||
|
||||
sql """
|
||||
DROP DATABASE IF EXISTS ${dbName};
|
||||
"""
|
||||
|
||||
sql """
|
||||
CREATE DATABASE IF NOT EXISTS ${dbName};
|
||||
"""
|
||||
|
||||
sql """
|
||||
DROP TABLE IF EXISTS ${tblName};
|
||||
"""
|
||||
|
||||
sql """
|
||||
CREATE TABLE IF NOT EXISTS ${tblName} (
|
||||
`t_1682570060000_user_id` LARGEINT NOT NULL,
|
||||
`t_1682570060000_date` DATE NOT NULL,
|
||||
`t_1682570060000_city` VARCHAR(20),
|
||||
`t_1682570060000_age` SMALLINT,
|
||||
`t_1682570060000_sex` TINYINT,
|
||||
`t_1682570060000_last_visit_date` DATETIME REPLACE,
|
||||
`t_1682570060000_cost` BIGINT SUM,
|
||||
`t_1682570060000_max_dwell_time` INT MAX,
|
||||
`t_1682570060000_min_dwell_time` INT MIN
|
||||
) ENGINE=OLAP
|
||||
AGGREGATE KEY(`t_1682570060000_user_id`, `t_1682570060000_date`,
|
||||
`t_1682570060000_city`, `t_1682570060000_age`, `t_1682570060000_sex`)
|
||||
PARTITION BY LIST(`t_1682570060000_date`)
|
||||
(
|
||||
PARTITION `p_201701` VALUES IN ("2017-10-01"),
|
||||
PARTITION `p_201702` VALUES IN ("2017-10-02"),
|
||||
PARTITION `p_201703` VALUES IN ("2017-10-03")
|
||||
)
|
||||
DISTRIBUTED BY HASH(`t_1682570060000_user_id`) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
INSERT INTO ${tblName} ${columnNames}
|
||||
VALUES (10000, "2017-10-01", "Beijing", 20, 0, "2017-10-01 07:00:00", 15, 2, 2),
|
||||
(10001, "2017-10-01", "Beijing", 20, 0, "2017-10-01 06:00:00", 20, 10, 10),
|
||||
(10002, "2017-10-01", "Beijing", 30, 1, "2017-10-01 17:05:45", 2, 22, 22),
|
||||
(10003, "2017-10-02", "Shanghai", 20, 1, "2017-10-02 12:59:12", 200, 5, 5),
|
||||
(10004, "2017-10-02", "Shanghai", 20, 1, "2017-10-02 12:59:12", 200, 5, 5),
|
||||
(10005, "2017-10-02", "Guangzhou", 32, 0, "2017-10-02 11:20:00", 30, 11, 11),
|
||||
(10006, "2017-10-01", "Shenzhen", 35, 0, "2017-10-01 10:00:15", 100, 3, 3),
|
||||
(10007, "2017-10-03", "Shenzhen", 35, 0, "2017-10-03 10:20:22", 11, 6, 6),
|
||||
(10008, "2017-10-03", "Shenzhen", 35, 0, "2017-10-03 10:20:22", 11, 6, 6),
|
||||
(10009, "2017-10-03", "Shenzhen", 35, 0, "2017-10-03 10:20:22", 11, 6, 6);
|
||||
"""
|
||||
|
||||
sql """
|
||||
DELETE FROM __internal_schema.column_statistics
|
||||
WHERE col_id IN (
|
||||
't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city',
|
||||
't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date',
|
||||
't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time'
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
DELETE FROM __internal_schema.histogram_statistics
|
||||
WHERE col_id IN (
|
||||
't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city',
|
||||
't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date',
|
||||
't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time'
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE ${tblName} WITH sync;
|
||||
"""
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync;
|
||||
"""
|
||||
|
||||
qt_sql query_col_statistics_with_order_sql
|
||||
|
||||
qt_sql query_col_histogram_with_order_sql
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE ROWS 100;
|
||||
"""
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE ROWS 100;
|
||||
"""
|
||||
|
||||
qt_sql query_col_statistics_with_order_sql
|
||||
|
||||
qt_sql query_col_histogram_with_order_sql
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE PERCENT 100;
|
||||
"""
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE PERCENT 100;
|
||||
"""
|
||||
|
||||
qt_sql query_col_statistics_with_order_sql
|
||||
|
||||
qt_sql query_col_histogram_with_order_sql
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE ROWS 3;
|
||||
"""
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE ROWS 1;
|
||||
"""
|
||||
|
||||
// TODO Optimize the calculation method of the sample rate of the number of sampling rows
|
||||
// qt_sql """
|
||||
// SELECT
|
||||
// sample_rate
|
||||
// FROM
|
||||
// ${colHistogramTblName}
|
||||
// WHERE
|
||||
// col_id IN ${columnNameValues}
|
||||
// ORDER BY
|
||||
// col_id,
|
||||
// sample_rate
|
||||
// """
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE PERCENT 50;
|
||||
"""
|
||||
|
||||
sql """
|
||||
ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE PERCENT 50;
|
||||
"""
|
||||
|
||||
qt_sql """
|
||||
SELECT
|
||||
sample_rate
|
||||
FROM
|
||||
${colHistogramTblName}
|
||||
WHERE
|
||||
col_id IN ${columnNameValues}
|
||||
ORDER BY
|
||||
sample_rate
|
||||
"""
|
||||
|
||||
sql """
|
||||
DELETE FROM __internal_schema.column_statistics
|
||||
WHERE col_id IN (
|
||||
't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city',
|
||||
't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date',
|
||||
't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time'
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
DELETE FROM __internal_schema.histogram_statistics
|
||||
WHERE col_id IN (
|
||||
't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city',
|
||||
't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date',
|
||||
't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time'
|
||||
);
|
||||
"""
|
||||
|
||||
sql """
|
||||
DROP DATABASE IF EXISTS ${dbName};
|
||||
"""
|
||||
}
|
||||
Reference in New Issue
Block a user