From 2fd2b714c1fe64db68ecad4efd2661ddf122f51a Mon Sep 17 00:00:00 2001 From: HangyuanLiu <460660596@qq.com> Date: Thu, 11 Jul 2019 16:45:45 +0800 Subject: [PATCH] Add aggregate function doc (#1434) --- .../sql-functions/aggregate-functions/avg.md | 30 ++++++++++++++++ .../aggregate-functions/count.md | 34 +++++++++++++++++++ .../aggregate-functions/count_distinct.md | 20 +++++++++++ .../aggregate-functions/group_concat.md | 21 ++++++++++++ .../aggregate-functions/hll_union_agg.md | 25 ++++++++++++++ .../sql-functions/aggregate-functions/max.md | 19 +++++++++++ .../sql-functions/aggregate-functions/min.md | 19 +++++++++++ .../sql-functions/aggregate-functions/ndv.md | 21 ++++++++++++ .../aggregate-functions/percentile_approx.md | 21 ++++++++++++ .../aggregate-functions/stddev.md | 26 ++++++++++++++ .../aggregate-functions/stddev_samp.md | 19 +++++++++++ .../sql-functions/aggregate-functions/sum.md | 19 +++++++++++ .../aggregate-functions/var_samp.md | 19 +++++++++++ .../aggregate-functions/variance.md | 26 ++++++++++++++ 14 files changed, 319 insertions(+) create mode 100644 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/avg.md create mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md create mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md create mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/group_concat.md create mode 100644 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md create mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md create mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md create mode 100644 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md create mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md create mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md create mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md create mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md create mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md create mode 100755 docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/avg.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/avg.md new file mode 100644 index 0000000000..d73f2a9a8c --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/avg.md @@ -0,0 +1,30 @@ +# AVG + +## Syntax + +`AVG([DISTINCT] expr)` + +## Description + +用于返回选中字段的平均值 + +可选字段DISTINCT参数可以用来返回去重平均值 + +## Examples + +``` +mysql> SELECT datetime, AVG(cost_time) FROM log_statis group by datetime; ++---------------------+--------------------+ +| datetime | avg(`cost_time`) | ++---------------------+--------------------+ +| 2019-07-03 21:01:20 | 25.827794561933533 | ++---------------------+--------------------+ + +mysql> SELECT datetime, AVG(distinct cost_time) FROM log_statis group by datetime; ++---------------------+---------------------------+ +| datetime | avg(DISTINCT `cost_time`) | ++---------------------+---------------------------+ +| 2019-07-04 02:23:24 | 20.666666666666668 | ++---------------------+---------------------------+ + +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md new file mode 100755 index 0000000000..507ac1c649 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count.md @@ -0,0 +1,34 @@ +# COUNT + +## Syntax + +`COUNT([DISTINCT] expr)` + +## Description + +用于返回满足要求的行的数目 + +## Examples + +``` +MySQL > select count(*) from log_statis group by datetime; ++----------+ +| count(*) | ++----------+ +| 28515903 | ++----------+ + +MySQL > select count(datetime) from log_statis group by datetime; ++-------------------+ +| count(`datetime`) | ++-------------------+ +| 28521682 | ++-------------------+ + +MySQL > select count(distinct datetime) from log_statis group by datetime; ++-------------------------------+ +| count(DISTINCT `datetime`) | ++-------------------------------+ +| 71045 | ++-------------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md new file mode 100755 index 0000000000..ecaee4400e --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/count_distinct.md @@ -0,0 +1,20 @@ +# COUNT_DISTINCT + +## Syntax + +`COUNT_DISTINCT(expr)` + +## Description + +用于返回满足要求的行的数目,或者非NULL行的数目 + +## Examples + +``` +MySQL > select count_distinct(query_id) from log_statis group by datetime; ++----------------------------+ +| count_distinct(`query_id`) | ++----------------------------+ +| 577 | ++----------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/group_concat.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/group_concat.md new file mode 100755 index 0000000000..c326dbb10a --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/group_concat.md @@ -0,0 +1,21 @@ +# GROUP_CONCAT + +## Syntax + +`GROUP_CONCAT(expr)` + +## Description + +用于返回选中字段字符串连接起来的新字符串 + +使用逗号连接 + +## Examples +``` +MySQL> select group_concat(`query_id`) from log_statis group by datetime; ++---------------------------------------------------------------------------------------------------------+ +| group_concat(`query_id`) | ++---------------------------------------------------------------------------------------------------------+ +| 445f5875f8854dfa:b9358d5cd86488a2, d5055534e6c04fa0:9b3f645913c4973c, a55801d0764d47fe:a98f3710649dc558 | ++------------------------------------------------------------------------------------------------ --------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md new file mode 100644 index 0000000000..69253c8c72 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/hll_union_agg.md @@ -0,0 +1,25 @@ +# HLL_UNION_AGG + +## Syntax + +`HLL_UNION_AGG(hll)` + +## Description + +HLL是基于HyperLogLog算法的工程实现,用于保存HyperLogLog计算过程的中间结果 + +它只能作为表的value列类型、通过聚合来不断的减少数据量,以此来实现加快查询的目的 + +基于它得到的是一个估算结果,误差大概在1%左右,hll列是通过其它列或者导入数据里面的数据生成的 + +导入的时候通过hll_hash函数来指定数据中哪一列用于生成hll列,它常用于替代count distinct,通过结合rollup在业务上用于快速计算uv等 + +## Examples +``` +MySQL > select HLL_UNION_AGG(uv_set) from test_uv;; ++-------------------------+ +| HLL_UNION_AGG(`uv_set`) | ++-------------------------+ +| 17721 | ++-------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md new file mode 100755 index 0000000000..d4ca250647 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/max.md @@ -0,0 +1,19 @@ +# MAX + +## Syntax + +`MAX(expr)` + +## Description + +返回expr表达式的最大值 + +## Examples +``` +MySQL > select max(scan_rows) from log_statis group by datetime; ++------------------+ +| max(`scan_rows`) | ++------------------+ +| 4671587 | ++------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md new file mode 100755 index 0000000000..e6c8f18516 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/min.md @@ -0,0 +1,19 @@ +# MIN + +## Syntax + +`MIN(expr)` + +## Description + +返回expr表达式的最小值 + +## Examples +``` +MySQL > select min(scan_rows) from log_statis group by datetime; ++------------------+ +| min(`scan_rows`) | ++------------------+ +| 0 | ++------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md new file mode 100644 index 0000000000..ff3a087534 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/ndv.md @@ -0,0 +1,21 @@ +# NDV + +## Syntax + +`NDV(expr)` + +## Description + +返回类似于 COUNT(DISTINCT col) 结果的近似值聚合函数。 + +它比 COUNT 和 DISTINCT 组合的速度更快,并使用固定大小的内存,因此对于高基数的列可以使用更少的内存。 + +## Examples +``` +MySQL > select ndv(query_id) from log_statis group by datetime; ++-----------------+ +| ndv(`query_id`) | ++-----------------+ +| 17721 | ++-----------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md new file mode 100755 index 0000000000..329f6b104a --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/percentile_approx.md @@ -0,0 +1,21 @@ +# PERCENTILE_APPROX + +## Syntax + +`PERCENTILE_APPROX(expr, DOUBLE p)` + +## Description + +返回第p个百分位点的近似值,p的值介于0到1之间 + +该函数使用固定大小的内存,因此对于高基数的列可以使用更少的内存,可用于计算tp99等统计值 + +## Examples +``` +MySQL > select `table`, percentile_approx(cost_time,0.99) from log_statis group by `table`; ++---------------------+---------------------------+ +| table | percentile_approx(`cost_time`, 0.99) | ++----------+--------------------------------------+ +| test | 54.22 | ++----------+--------------------------------------+ +``` \ No newline at end of file diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md new file mode 100755 index 0000000000..393ffdf141 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev.md @@ -0,0 +1,26 @@ +# STDDEV,STDDEV_POP + +## Syntax + +`STDDEV(expr)` + +## Description + +返回expr表达式的标准差 + +## Examples +``` +MySQL > select stddev(scan_rows) from log_statis group by datetime; ++---------------------+ +| stddev(`scan_rows`) | ++---------------------+ +| 2.3736656687790934 | ++---------------------+ + +MySQL > select stddev_pop(scan_rows) from log_statis group by datetime; ++-------------------------+ +| stddev_pop(`scan_rows`) | ++-------------------------+ +| 2.3722760595994914 | ++-------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md new file mode 100755 index 0000000000..5243a99b90 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/stddev_samp.md @@ -0,0 +1,19 @@ +# STDDEV_SAMP + +## Syntax + +`STDDEV_SAMP(expr)` + +## Description + +返回expr表达式的样本标准差 + +## Examples +``` +MySQL > select stddev_samp(scan_rows) from log_statis group by datetime; ++--------------------------+ +| stddev_samp(`scan_rows`) | ++--------------------------+ +| 2.372044195280762 | ++--------------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md new file mode 100755 index 0000000000..ad4fe7c993 --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/sum.md @@ -0,0 +1,19 @@ +# SUM + +## Syntax + +`SUM(expr)` + +## Description + +用于返回选中字段所有值的和 + +## Examples +``` +MySQL > select sum(scan_rows) from log_statis group by datetime; ++------------------+ +| sum(`scan_rows`) | ++------------------+ +| 8217360135 | ++------------------+ +``` diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md new file mode 100755 index 0000000000..5da00ad73d --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/var_samp.md @@ -0,0 +1,19 @@ +# VAR_SAMP,VARIANCE_SAMP + +## Syntax + +`VAR_SAMP(expr)` + +## Description + +返回expr表达式的样本方差 + +## Examples +``` +MySQL > select var_samp(scan_rows) from log_statis group by datetime; ++-----------------------+ +| var_samp(`scan_rows`) | ++-----------------------+ +| 5.6227132145741789 | ++-----------------------+ +``` \ No newline at end of file diff --git a/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md new file mode 100755 index 0000000000..27e948f3ae --- /dev/null +++ b/docs/documentation/cn/sql-reference/sql-functions/aggregate-functions/variance.md @@ -0,0 +1,26 @@ +# VARIANCE,VAR_POP,VARIANCE_POP + +## Syntax + +`VARIANCE(expr)` + +## Description + +返回expr表达式的方差 + +## Examples +``` +MySQL > select variance(scan_rows) from log_statis group by datetime; ++-----------------------+ +| variance(`scan_rows`) | ++-----------------------+ +| 5.6183332881176211 | ++-----------------------+ + +MySQL > select var_pop(scan_rows) from log_statis group by datetime; ++----------------------+ +| var_pop(`scan_rows`) | ++----------------------+ +| 5.6230744719006163 | ++----------------------+ +``` \ No newline at end of file