[Improvement](statistics) optimise histogram keyword (#16369)

This commit is contained in:
ElvinWei
2023-02-03 23:02:41 +08:00
committed by GitHub
parent 125b60b4b9
commit f443ebfd9a
6 changed files with 41 additions and 39 deletions

View File

@ -47,14 +47,14 @@ Alias function: `hist(expr[, DOUBLE sample_rate, INT max_bucket_num])`
### example
```
MySQL [test]> SELECT `histogram`(c_float) FROM histogram_test;
MySQL [test]> SELECT histogram(c_float) FROM histogram_test;
+-------------------------------------------------------------------------------------------------------------------------------------+
| histogram(`c_float`) |
+-------------------------------------------------------------------------------------------------------------------------------------+
| {"sample_rate":0.2,"max_bucket_num":128,"bucket_num":3,"buckets":[{"lower":"0.1","upper":"0.1","count":1,"pre_sum":0,"ndv":1},...]} |
+-------------------------------------------------------------------------------------------------------------------------------------+
MySQL [test]> SELECT `histogram`(c_string, 0.5, 2) FROM histogram_test;
MySQL [test]> SELECT histogram(c_string, 0.5, 2) FROM histogram_test;
+-------------------------------------------------------------------------------------------------------------------------------------+
| histogram(`c_string`) |
+-------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -49,14 +49,14 @@ histogram(直方图)函数用于描述数据分布情况,它使用“等
### example
```
MySQL [test]> SELECT `histogram`(c_float) FROM histogram_test;
MySQL [test]> SELECT histogram(c_float) FROM histogram_test;
+-------------------------------------------------------------------------------------------------------------------------------------+
| histogram(`c_float`) |
+-------------------------------------------------------------------------------------------------------------------------------------+
| {"sample_rate":0.2,"max_bucket_num":128,"bucket_num":3,"buckets":[{"lower":"0.1","upper":"0.1","count":1,"pre_sum":0,"ndv":1},...]} |
+-------------------------------------------------------------------------------------------------------------------------------------+
MySQL [test]> SELECT `histogram`(c_string, 0.5, 2) FROM histogram_test;
MySQL [test]> SELECT histogram(c_string, 0.5, 2) FROM histogram_test;
+-------------------------------------------------------------------------------------------------------------------------------------+
| histogram(`c_string`) |
+-------------------------------------------------------------------------------------------------------------------------------------+

View File

@ -7077,6 +7077,8 @@ keyword ::=
{: RESULT = id; :}
| KW_IGNORE:id
{: RESULT = id; :}
| KW_HISTOGRAM:id
{: RESULT = id; :}
;
// Identifier that contain keyword

View File

@ -45,7 +45,7 @@ public class HistogramTask extends BaseAnalysisTask {
+ " ${idxId} AS idx_id, "
+ " '${colId}' AS col_id, "
+ " ${sampleRate} AS sample_rate, "
+ " `HISTOGRAM`(`${colName}`, 1, ${maxBucketNum}) AS buckets, "
+ " HISTOGRAM(`${colName}`, 1, ${maxBucketNum}) AS buckets, "
+ " NOW() AS create_time "
+ "FROM "
+ " `${dbName}`.`${tblName}` TABLESAMPLE (${percentValue} PERCENT)";

View File

@ -107,23 +107,23 @@ suite("test_aggregate_histogram") {
// Nereids does't support decimalV3 function
// qt_select """
// SELECT
// `histogram`(c_bool, 1.0, 1),
// `histogram`(c_tinyint, 1.0, 1),
// `histogram`(c_smallint, 1.0, 1),
// `histogram`(c_bigint, 1.0, 1),
// `histogram`(c_largeint, 1.0, 1),
// `histogram`(c_float, 1.0, 1),
// `histogram`(c_double, 1.0, 1),
// `histogram`(c_decimal, 1.0, 1),
// `histogram`(c_decimalv3, 1.0, 1),
// `histogram`(c_char, 1.0, 1),
// `histogram`(c_varchar, 1.0, 1),
// `histogram`(c_string, 1.0, 1),
// `histogram`(c_date, 1.0, 1),
// `histogram`(c_datev2, 1.0, 1),
// `histogram`(c_date_time, 1.0, 1),
// `histogram`(c_date_timev2, 1.0, 1),
// `histogram`(c_string_not_null, 1.0, 1)
// histogram(c_bool, 1.0, 1),
// histogram(c_tinyint, 1.0, 1),
// histogram(c_smallint, 1.0, 1),
// histogram(c_bigint, 1.0, 1),
// histogram(c_largeint, 1.0, 1),
// histogram(c_float, 1.0, 1),
// histogram(c_double, 1.0, 1),
// histogram(c_decimal, 1.0, 1),
// histogram(c_decimalv3, 1.0, 1),
// histogram(c_char, 1.0, 1),
// histogram(c_varchar, 1.0, 1),
// histogram(c_string, 1.0, 1),
// histogram(c_date, 1.0, 1),
// histogram(c_datev2, 1.0, 1),
// histogram(c_date_time, 1.0, 1),
// histogram(c_date_timev2, 1.0, 1),
// histogram(c_string_not_null, 1.0, 1)
// FROM
// ${tableName}
// """

View File

@ -103,23 +103,23 @@ suite("test_aggregate_histogram") {
// Test without GROUP BY
qt_select """
SELECT
`histogram`(c_bool, 1.0, 1),
`histogram`(c_tinyint, 1.0, 1),
`histogram`(c_smallint, 1.0, 1),
`histogram`(c_bigint, 1.0, 1),
`histogram`(c_largeint, 1.0, 1),
`histogram`(c_float, 1.0, 1),
`histogram`(c_double, 1.0, 1),
`histogram`(c_decimal, 1.0, 1),
`histogram`(c_decimalv3, 1.0, 1),
`histogram`(c_char, 1.0, 1),
`histogram`(c_varchar, 1.0, 1),
`histogram`(c_string, 1.0, 1),
`histogram`(c_date, 1.0, 1),
`histogram`(c_datev2, 1.0, 1),
`histogram`(c_date_time, 1.0, 1),
`histogram`(c_date_timev2, 1.0, 1),
`histogram`(c_string_not_null, 1.0, 1)
histogram(c_bool, 1.0, 1),
histogram(c_tinyint, 1.0, 1),
histogram(c_smallint, 1.0, 1),
histogram(c_bigint, 1.0, 1),
histogram(c_largeint, 1.0, 1),
histogram(c_float, 1.0, 1),
histogram(c_double, 1.0, 1),
histogram(c_decimal, 1.0, 1),
histogram(c_decimalv3, 1.0, 1),
histogram(c_char, 1.0, 1),
histogram(c_varchar, 1.0, 1),
histogram(c_string, 1.0, 1),
histogram(c_date, 1.0, 1),
histogram(c_datev2, 1.0, 1),
histogram(c_date_time, 1.0, 1),
histogram(c_date_timev2, 1.0, 1),
histogram(c_string_not_null, 1.0, 1)
FROM
${tableName}
"""