[test](statistics) add p0 test of sampling statistics (#19176)

1. Added test p0 for sampling collection statistics
2. Modify the uniqueKeys of table analysis_jobs for deletion based on relevant conditions
3. Solve the problem that incremental statistics p0 is less stable
This commit is contained in:
ElvinWei
2023-04-28 15:50:05 +08:00
committed by GitHub
parent f0852f2ac9
commit 718297d3c1
7 changed files with 429 additions and 16 deletions

View File

@ -219,7 +219,7 @@ public class AnalyzeStmt extends DdlStmt {
if (properties.containsKey(PROPERTY_SAMPLE_PERCENT)) {
checkNumericProperty(PROPERTY_SAMPLE_PERCENT, properties.get(PROPERTY_SAMPLE_PERCENT),
0, 100, false, "should be > 0 and < 100");
1, 100, true, "should be >= 1 and <= 100");
}
if (properties.containsKey(PROPERTY_SAMPLE_ROWS)) {

View File

@ -190,8 +190,9 @@ public class InternalSchemaInitializer extends Thread {
columnDefs.add(new ColumnDef("state", TypeDef.createVarchar(32)));
columnDefs.add(new ColumnDef("schedule_type", TypeDef.createVarchar(32)));
String engineName = "olap";
KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS,
Lists.newArrayList("job_id", "task_id"));
ArrayList<String> uniqueKeys = Lists.newArrayList("job_id", "task_id",
"catalog_name", "db_name", "tbl_name", "col_name", "index_id");
KeysDesc keysDesc = new KeysDesc(KeysType.UNIQUE_KEYS, uniqueKeys);
DistributionDesc distributionDesc = new HashDistributionDesc(
StatisticConstants.STATISTIC_TABLE_BUCKET_COUNT,

View File

@ -86,7 +86,8 @@ public class HistogramTask extends BaseAnalysisTask {
if (info.samplePercent > 0) {
return String.valueOf(info.samplePercent / 100.0);
} else {
double sampRate = (double) info.sampleRows / tbl.getRowCount();
long rowCount = tbl.getRowCount() > 0 ? tbl.getRowCount() : 1;
double sampRate = (double) info.sampleRows / rowCount;
return sampRate >= 1 ? "1.0" : String.format("%.4f", sampRate);
}
}

View File

@ -0,0 +1,159 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
3 2 0 20 32 6
4 3 0 20 35 8
10 4 0 20 35 20
3 1 0 35 35 6
4 2 0 Beijing Shenzhen 29
10 4 0 Beijing Shenzhen 78
3 2 0 Guangzhou Shanghai 25
3 1 0 Shenzhen Shenzhen 24
3 1 0 11 11 24
4 4 0 2 100 32
10 7 0 2 200 80
3 2 0 30 200 24
4 1 0 2017-10-01 2017-10-01 64
10 3 0 2017-10-01 2017-10-03 160
3 1 0 2017-10-02 2017-10-02 48
3 1 0 2017-10-03 2017-10-03 48
4 4 0 2017-10-01 06:00:00 2017-10-01 17:05:45 64
10 7 0 2017-10-01 06:00:00 2017-10-03 10:20:22 160
3 2 0 2017-10-02 11:20:00 2017-10-02 12:59:12 48
3 1 0 2017-10-03 10:20:22 2017-10-03 10:20:22 48
4 4 0 2 22 16
10 7 0 2 22 40
3 2 0 5 11 12
3 1 0 6 6 12
4 4 0 2 22 16
10 7 0 2 22 40
3 2 0 5 11 12
3 1 0 6 6 12
3 1 0 0 0 3
3 2 0 0 1 3
4 2 0 0 1 4
10 2 0 0 1 10
4 4 0 10000 10006 64
10 10 0 10000 10009 160
3 3 0 10003 10005 48
3 3 0 10007 10009 48
-- !sql --
0.0 {"num_buckets":4,"buckets":[{"lower":"20","upper":"20","ndv":1,"count":4,"pre_sum":0},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":4},{"lower":"32","upper":"32","ndv":1,"count":1,"pre_sum":5},{"lower":"35","upper":"35","ndv":1,"count":4,"pre_sum":6}]}
0.0 {"num_buckets":4,"buckets":[{"lower":"Beijing","upper":"Beijing","ndv":1,"count":3,"pre_sum":0},{"lower":"Guangzhou","upper":"Guangzhou","ndv":1,"count":1,"pre_sum":3},{"lower":"Shanghai","upper":"Shanghai","ndv":1,"count":2,"pre_sum":4},{"lower":"Shenzhen","upper":"Shenzhen","ndv":1,"count":4,"pre_sum":6}]}
0.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"11","upper":"11","ndv":1,"count":3,"pre_sum":1},{"lower":"15","upper":"15","ndv":1,"count":1,"pre_sum":4},{"lower":"20","upper":"20","ndv":1,"count":1,"pre_sum":5},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":6},{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":7},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":8}]}
0.0 {"num_buckets":3,"buckets":[{"lower":"2017-10-01","upper":"2017-10-01","ndv":1,"count":4,"pre_sum":0},{"lower":"2017-10-02","upper":"2017-10-02","ndv":1,"count":3,"pre_sum":4},{"lower":"2017-10-03","upper":"2017-10-03","ndv":1,"count":3,"pre_sum":7}]}
0.0 {"num_buckets":7,"buckets":[{"lower":"2017-10-01 06:00:00","upper":"2017-10-01 06:00:00","ndv":1,"count":1,"pre_sum":0},{"lower":"2017-10-01 07:00:00","upper":"2017-10-01 07:00:00","ndv":1,"count":1,"pre_sum":1},{"lower":"2017-10-01 10:00:15","upper":"2017-10-01 10:00:15","ndv":1,"count":1,"pre_sum":2},{"lower":"2017-10-01 17:05:45","upper":"2017-10-01 17:05:45","ndv":1,"count":1,"pre_sum":3},{"lower":"2017-10-02 11:20:00","upper":"2017-10-02 11:20:00","ndv":1,"count":1,"pre_sum":4},{"lower":"2017-10-02 12:59:12","upper":"2017-10-02 12:59:12","ndv":1,"count":2,"pre_sum":5},{"lower":"2017-10-03 10:20:22","upper":"2017-10-03 10:20:22","ndv":1,"count":3,"pre_sum":7}]}
0.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
0.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
0.0 {"num_buckets":2,"buckets":[{"lower":"0","upper":"0","ndv":1,"count":7,"pre_sum":0},{"lower":"1","upper":"1","ndv":1,"count":3,"pre_sum":7}]}
0.0 {"num_buckets":10,"buckets":[{"lower":"10000","upper":"10000","ndv":1,"count":1,"pre_sum":0},{"lower":"10001","upper":"10001","ndv":1,"count":1,"pre_sum":1},{"lower":"10002","upper":"10002","ndv":1,"count":1,"pre_sum":2},{"lower":"10003","upper":"10003","ndv":1,"count":1,"pre_sum":3},{"lower":"10004","upper":"10004","ndv":1,"count":1,"pre_sum":4},{"lower":"10005","upper":"10005","ndv":1,"count":1,"pre_sum":5},{"lower":"10006","upper":"10006","ndv":1,"count":1,"pre_sum":6},{"lower":"10007","upper":"10007","ndv":1,"count":1,"pre_sum":7},{"lower":"10008","upper":"10008","ndv":1,"count":1,"pre_sum":8},{"lower":"10009","upper":"10009","ndv":1,"count":1,"pre_sum":9}]}
-- !sql --
3 2 0 20 32 6
4 3 0 20 35 8
10 4 0 20 35 20
3 1 0 35 35 6
4 2 0 Beijing Shenzhen 29
10 4 0 Beijing Shenzhen 78
3 2 0 Guangzhou Shanghai 25
3 1 0 Shenzhen Shenzhen 24
3 1 0 11 11 24
4 4 0 2 100 32
10 7 0 2 200 80
3 2 0 30 200 24
4 1 0 2017-10-01 2017-10-01 64
10 3 0 2017-10-01 2017-10-03 160
3 1 0 2017-10-02 2017-10-02 48
3 1 0 2017-10-03 2017-10-03 48
4 4 0 2017-10-01 06:00:00 2017-10-01 17:05:45 64
10 7 0 2017-10-01 06:00:00 2017-10-03 10:20:22 160
3 2 0 2017-10-02 11:20:00 2017-10-02 12:59:12 48
3 1 0 2017-10-03 10:20:22 2017-10-03 10:20:22 48
4 4 0 2 22 16
10 7 0 2 22 40
3 2 0 5 11 12
3 1 0 6 6 12
4 4 0 2 22 16
10 7 0 2 22 40
3 2 0 5 11 12
3 1 0 6 6 12
3 1 0 0 0 3
3 2 0 0 1 3
4 2 0 0 1 4
10 2 0 0 1 10
4 4 0 10000 10006 64
10 10 0 10000 10009 160
3 3 0 10003 10005 48
3 3 0 10007 10009 48
-- !sql --
1.0 {"num_buckets":4,"buckets":[{"lower":"20","upper":"20","ndv":1,"count":4,"pre_sum":0},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":4},{"lower":"32","upper":"32","ndv":1,"count":1,"pre_sum":5},{"lower":"35","upper":"35","ndv":1,"count":4,"pre_sum":6}]}
1.0 {"num_buckets":4,"buckets":[{"lower":"Beijing","upper":"Beijing","ndv":1,"count":3,"pre_sum":0},{"lower":"Guangzhou","upper":"Guangzhou","ndv":1,"count":1,"pre_sum":3},{"lower":"Shanghai","upper":"Shanghai","ndv":1,"count":2,"pre_sum":4},{"lower":"Shenzhen","upper":"Shenzhen","ndv":1,"count":4,"pre_sum":6}]}
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"11","upper":"11","ndv":1,"count":3,"pre_sum":1},{"lower":"15","upper":"15","ndv":1,"count":1,"pre_sum":4},{"lower":"20","upper":"20","ndv":1,"count":1,"pre_sum":5},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":6},{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":7},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":8}]}
1.0 {"num_buckets":3,"buckets":[{"lower":"2017-10-01","upper":"2017-10-01","ndv":1,"count":4,"pre_sum":0},{"lower":"2017-10-02","upper":"2017-10-02","ndv":1,"count":3,"pre_sum":4},{"lower":"2017-10-03","upper":"2017-10-03","ndv":1,"count":3,"pre_sum":7}]}
1.0 {"num_buckets":7,"buckets":[{"lower":"2017-10-01 06:00:00","upper":"2017-10-01 06:00:00","ndv":1,"count":1,"pre_sum":0},{"lower":"2017-10-01 07:00:00","upper":"2017-10-01 07:00:00","ndv":1,"count":1,"pre_sum":1},{"lower":"2017-10-01 10:00:15","upper":"2017-10-01 10:00:15","ndv":1,"count":1,"pre_sum":2},{"lower":"2017-10-01 17:05:45","upper":"2017-10-01 17:05:45","ndv":1,"count":1,"pre_sum":3},{"lower":"2017-10-02 11:20:00","upper":"2017-10-02 11:20:00","ndv":1,"count":1,"pre_sum":4},{"lower":"2017-10-02 12:59:12","upper":"2017-10-02 12:59:12","ndv":1,"count":2,"pre_sum":5},{"lower":"2017-10-03 10:20:22","upper":"2017-10-03 10:20:22","ndv":1,"count":3,"pre_sum":7}]}
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
1.0 {"num_buckets":2,"buckets":[{"lower":"0","upper":"0","ndv":1,"count":7,"pre_sum":0},{"lower":"1","upper":"1","ndv":1,"count":3,"pre_sum":7}]}
1.0 {"num_buckets":10,"buckets":[{"lower":"10000","upper":"10000","ndv":1,"count":1,"pre_sum":0},{"lower":"10001","upper":"10001","ndv":1,"count":1,"pre_sum":1},{"lower":"10002","upper":"10002","ndv":1,"count":1,"pre_sum":2},{"lower":"10003","upper":"10003","ndv":1,"count":1,"pre_sum":3},{"lower":"10004","upper":"10004","ndv":1,"count":1,"pre_sum":4},{"lower":"10005","upper":"10005","ndv":1,"count":1,"pre_sum":5},{"lower":"10006","upper":"10006","ndv":1,"count":1,"pre_sum":6},{"lower":"10007","upper":"10007","ndv":1,"count":1,"pre_sum":7},{"lower":"10008","upper":"10008","ndv":1,"count":1,"pre_sum":8},{"lower":"10009","upper":"10009","ndv":1,"count":1,"pre_sum":9}]}
-- !sql --
3 2 0 20 32 6
4 3 0 20 35 8
10 4 0 20 35 20
3 1 0 35 35 6
4 2 0 Beijing Shenzhen 29
10 4 0 Beijing Shenzhen 78
3 2 0 Guangzhou Shanghai 25
3 1 0 Shenzhen Shenzhen 24
3 1 0 11 11 24
4 4 0 2 100 32
10 7 0 2 200 80
3 2 0 30 200 24
4 1 0 2017-10-01 2017-10-01 64
10 3 0 2017-10-01 2017-10-03 160
3 1 0 2017-10-02 2017-10-02 48
3 1 0 2017-10-03 2017-10-03 48
4 4 0 2017-10-01 06:00:00 2017-10-01 17:05:45 64
10 7 0 2017-10-01 06:00:00 2017-10-03 10:20:22 160
3 2 0 2017-10-02 11:20:00 2017-10-02 12:59:12 48
3 1 0 2017-10-03 10:20:22 2017-10-03 10:20:22 48
4 4 0 2 22 16
10 7 0 2 22 40
3 2 0 5 11 12
3 1 0 6 6 12
4 4 0 2 22 16
10 7 0 2 22 40
3 2 0 5 11 12
3 1 0 6 6 12
3 1 0 0 0 3
3 2 0 0 1 3
4 2 0 0 1 4
10 2 0 0 1 10
4 4 0 10000 10006 64
10 10 0 10000 10009 160
3 3 0 10003 10005 48
3 3 0 10007 10009 48
-- !sql --
1.0 {"num_buckets":4,"buckets":[{"lower":"20","upper":"20","ndv":1,"count":4,"pre_sum":0},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":4},{"lower":"32","upper":"32","ndv":1,"count":1,"pre_sum":5},{"lower":"35","upper":"35","ndv":1,"count":4,"pre_sum":6}]}
1.0 {"num_buckets":4,"buckets":[{"lower":"Beijing","upper":"Beijing","ndv":1,"count":3,"pre_sum":0},{"lower":"Guangzhou","upper":"Guangzhou","ndv":1,"count":1,"pre_sum":3},{"lower":"Shanghai","upper":"Shanghai","ndv":1,"count":2,"pre_sum":4},{"lower":"Shenzhen","upper":"Shenzhen","ndv":1,"count":4,"pre_sum":6}]}
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"11","upper":"11","ndv":1,"count":3,"pre_sum":1},{"lower":"15","upper":"15","ndv":1,"count":1,"pre_sum":4},{"lower":"20","upper":"20","ndv":1,"count":1,"pre_sum":5},{"lower":"30","upper":"30","ndv":1,"count":1,"pre_sum":6},{"lower":"100","upper":"100","ndv":1,"count":1,"pre_sum":7},{"lower":"200","upper":"200","ndv":1,"count":2,"pre_sum":8}]}
1.0 {"num_buckets":3,"buckets":[{"lower":"2017-10-01","upper":"2017-10-01","ndv":1,"count":4,"pre_sum":0},{"lower":"2017-10-02","upper":"2017-10-02","ndv":1,"count":3,"pre_sum":4},{"lower":"2017-10-03","upper":"2017-10-03","ndv":1,"count":3,"pre_sum":7}]}
1.0 {"num_buckets":7,"buckets":[{"lower":"2017-10-01 06:00:00","upper":"2017-10-01 06:00:00","ndv":1,"count":1,"pre_sum":0},{"lower":"2017-10-01 07:00:00","upper":"2017-10-01 07:00:00","ndv":1,"count":1,"pre_sum":1},{"lower":"2017-10-01 10:00:15","upper":"2017-10-01 10:00:15","ndv":1,"count":1,"pre_sum":2},{"lower":"2017-10-01 17:05:45","upper":"2017-10-01 17:05:45","ndv":1,"count":1,"pre_sum":3},{"lower":"2017-10-02 11:20:00","upper":"2017-10-02 11:20:00","ndv":1,"count":1,"pre_sum":4},{"lower":"2017-10-02 12:59:12","upper":"2017-10-02 12:59:12","ndv":1,"count":2,"pre_sum":5},{"lower":"2017-10-03 10:20:22","upper":"2017-10-03 10:20:22","ndv":1,"count":3,"pre_sum":7}]}
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
1.0 {"num_buckets":7,"buckets":[{"lower":"2","upper":"2","ndv":1,"count":1,"pre_sum":0},{"lower":"3","upper":"3","ndv":1,"count":1,"pre_sum":1},{"lower":"5","upper":"5","ndv":1,"count":2,"pre_sum":2},{"lower":"6","upper":"6","ndv":1,"count":3,"pre_sum":4},{"lower":"10","upper":"10","ndv":1,"count":1,"pre_sum":7},{"lower":"11","upper":"11","ndv":1,"count":1,"pre_sum":8},{"lower":"22","upper":"22","ndv":1,"count":1,"pre_sum":9}]}
1.0 {"num_buckets":2,"buckets":[{"lower":"0","upper":"0","ndv":1,"count":7,"pre_sum":0},{"lower":"1","upper":"1","ndv":1,"count":3,"pre_sum":7}]}
1.0 {"num_buckets":10,"buckets":[{"lower":"10000","upper":"10000","ndv":1,"count":1,"pre_sum":0},{"lower":"10001","upper":"10001","ndv":1,"count":1,"pre_sum":1},{"lower":"10002","upper":"10002","ndv":1,"count":1,"pre_sum":2},{"lower":"10003","upper":"10003","ndv":1,"count":1,"pre_sum":3},{"lower":"10004","upper":"10004","ndv":1,"count":1,"pre_sum":4},{"lower":"10005","upper":"10005","ndv":1,"count":1,"pre_sum":5},{"lower":"10006","upper":"10006","ndv":1,"count":1,"pre_sum":6},{"lower":"10007","upper":"10007","ndv":1,"count":1,"pre_sum":7},{"lower":"10008","upper":"10008","ndv":1,"count":1,"pre_sum":8},{"lower":"10009","upper":"10009","ndv":1,"count":1,"pre_sum":9}]}
-- !sql --
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5
0.5

View File

@ -273,7 +273,8 @@ suite("analyze_test") {
);"""
sql """
DELETE FROM __internal_schema.analysis_jobs WHERE job_id > 0
DELETE FROM __internal_schema.analysis_jobs
WHERE tbl_name = 'analyze_test_tbl_2';
"""
test {

View File

@ -124,10 +124,10 @@ suite("test_incremental_stats") {
WHERE col_id IN ${columnNameValues};
"""
// TODO delete by database name and table name
sql """
DELETE FROM __internal_schema.analysis_jobs WHERE job_id IS NOT NULL;
"""
DELETE FROM __internal_schema.analysis_jobs
WHERE col_name IN ${columnNameValues};
"""
// 1. Firstly do a full collection of statistics
sql """
@ -270,18 +270,24 @@ suite("test_incremental_stats") {
);
"""
sql """
DELETE FROM __internal_schema.column_statistics
WHERE col_id IN (
// sql """
// DELETE FROM __internal_schema.column_statistics
// WHERE col_id IN (
// 't_1682176142000_user_id', 't_1682176142000_date', 't_1682176142000_city',
// 't_1682176142000_age', 't_1682176142000_sex', 't_1682176142000_last_visit_date',
// 't_1682176142000_cost', 't_1682176142000_max_dwell_time', 't_1682176142000_min_dwell_time',
// 't_1682176142000_new_column'
// );
// """
sql """
DELETE FROM __internal_schema.analysis_jobs
WHERE col_name IN (
't_1682176142000_user_id', 't_1682176142000_date', 't_1682176142000_city',
't_1682176142000_age', 't_1682176142000_sex', 't_1682176142000_last_visit_date',
't_1682176142000_cost', 't_1682176142000_max_dwell_time', 't_1682176142000_min_dwell_time',
't_1682176142000_new_column'
);
"""
sql """
DELETE FROM __internal_schema.analysis_jobs WHERE job_id IS NOT NULL;
);
"""
sql """

View File

@ -0,0 +1,245 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_sampled_stats") {
def dbName = "test_sampled_stats"
def tblName = "${dbName}.example_tbl"
def colStatisticsTblName = "__internal_schema.column_statistics"
def colHistogramTblName = "__internal_schema.histogram_statistics"
def columnNames = """
(
`t_1682570060000_user_id`, `t_1682570060000_date`,
`t_1682570060000_city`, `t_1682570060000_age`, `t_1682570060000_sex`,
`t_1682570060000_last_visit_date`, `t_1682570060000_cost`,
`t_1682570060000_max_dwell_time`, `t_1682570060000_min_dwell_time`
)
"""
def columnNameValues = """
(
't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city',
't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date',
't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time'
)
"""
def query_col_statistics_with_order_sql = """
SELECT
count,
ndv,
null_count,
min,
max,
data_size_in_bytes
FROM
${colStatisticsTblName}
WHERE
col_id IN ${columnNameValues}
ORDER BY
col_id,
min,
max,
count,
ndv,
null_count,
data_size_in_bytes;
"""
def query_col_histogram_with_order_sql = """
SELECT
sample_rate,
buckets
FROM
${colHistogramTblName}
WHERE
col_id IN ${columnNameValues}
ORDER BY
col_id,
sample_rate,
buckets;
"""
sql """
DROP DATABASE IF EXISTS ${dbName};
"""
sql """
CREATE DATABASE IF NOT EXISTS ${dbName};
"""
sql """
DROP TABLE IF EXISTS ${tblName};
"""
sql """
CREATE TABLE IF NOT EXISTS ${tblName} (
`t_1682570060000_user_id` LARGEINT NOT NULL,
`t_1682570060000_date` DATE NOT NULL,
`t_1682570060000_city` VARCHAR(20),
`t_1682570060000_age` SMALLINT,
`t_1682570060000_sex` TINYINT,
`t_1682570060000_last_visit_date` DATETIME REPLACE,
`t_1682570060000_cost` BIGINT SUM,
`t_1682570060000_max_dwell_time` INT MAX,
`t_1682570060000_min_dwell_time` INT MIN
) ENGINE=OLAP
AGGREGATE KEY(`t_1682570060000_user_id`, `t_1682570060000_date`,
`t_1682570060000_city`, `t_1682570060000_age`, `t_1682570060000_sex`)
PARTITION BY LIST(`t_1682570060000_date`)
(
PARTITION `p_201701` VALUES IN ("2017-10-01"),
PARTITION `p_201702` VALUES IN ("2017-10-02"),
PARTITION `p_201703` VALUES IN ("2017-10-03")
)
DISTRIBUTED BY HASH(`t_1682570060000_user_id`) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
"""
sql """
INSERT INTO ${tblName} ${columnNames}
VALUES (10000, "2017-10-01", "Beijing", 20, 0, "2017-10-01 07:00:00", 15, 2, 2),
(10001, "2017-10-01", "Beijing", 20, 0, "2017-10-01 06:00:00", 20, 10, 10),
(10002, "2017-10-01", "Beijing", 30, 1, "2017-10-01 17:05:45", 2, 22, 22),
(10003, "2017-10-02", "Shanghai", 20, 1, "2017-10-02 12:59:12", 200, 5, 5),
(10004, "2017-10-02", "Shanghai", 20, 1, "2017-10-02 12:59:12", 200, 5, 5),
(10005, "2017-10-02", "Guangzhou", 32, 0, "2017-10-02 11:20:00", 30, 11, 11),
(10006, "2017-10-01", "Shenzhen", 35, 0, "2017-10-01 10:00:15", 100, 3, 3),
(10007, "2017-10-03", "Shenzhen", 35, 0, "2017-10-03 10:20:22", 11, 6, 6),
(10008, "2017-10-03", "Shenzhen", 35, 0, "2017-10-03 10:20:22", 11, 6, 6),
(10009, "2017-10-03", "Shenzhen", 35, 0, "2017-10-03 10:20:22", 11, 6, 6);
"""
sql """
DELETE FROM __internal_schema.column_statistics
WHERE col_id IN (
't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city',
't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date',
't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time'
);
"""
sql """
DELETE FROM __internal_schema.histogram_statistics
WHERE col_id IN (
't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city',
't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date',
't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time'
);
"""
sql """
ANALYZE TABLE ${tblName} WITH sync;
"""
sql """
ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync;
"""
qt_sql query_col_statistics_with_order_sql
qt_sql query_col_histogram_with_order_sql
sql """
ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE ROWS 100;
"""
sql """
ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE ROWS 100;
"""
qt_sql query_col_statistics_with_order_sql
qt_sql query_col_histogram_with_order_sql
sql """
ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE PERCENT 100;
"""
sql """
ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE PERCENT 100;
"""
qt_sql query_col_statistics_with_order_sql
qt_sql query_col_histogram_with_order_sql
sql """
ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE ROWS 3;
"""
sql """
ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE ROWS 1;
"""
// TODO Optimize the calculation method of the sample rate of the number of sampling rows
// qt_sql """
// SELECT
// sample_rate
// FROM
// ${colHistogramTblName}
// WHERE
// col_id IN ${columnNameValues}
// ORDER BY
// col_id,
// sample_rate
// """
sql """
ANALYZE TABLE ${tblName} WITH sync WITH SAMPLE PERCENT 50;
"""
sql """
ANALYZE TABLE ${tblName} UPDATE HISTOGRAM WITH sync WITH SAMPLE PERCENT 50;
"""
qt_sql """
SELECT
sample_rate
FROM
${colHistogramTblName}
WHERE
col_id IN ${columnNameValues}
ORDER BY
sample_rate
"""
sql """
DELETE FROM __internal_schema.column_statistics
WHERE col_id IN (
't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city',
't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date',
't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time'
);
"""
sql """
DELETE FROM __internal_schema.histogram_statistics
WHERE col_id IN (
't_1682570060000_user_id', 't_1682570060000_date', 't_1682570060000_city',
't_1682570060000_age', 't_1682570060000_sex', 't_1682570060000_last_visit_date',
't_1682570060000_cost', 't_1682570060000_max_dwell_time', 't_1682570060000_min_dwell_time'
);
"""
sql """
DROP DATABASE IF EXISTS ${dbName};
"""
}