From 6af6997f1d2ff12a66555be5af3a694b23177396 Mon Sep 17 00:00:00 2001 From: Pxl Date: Thu, 29 Feb 2024 11:01:34 +0800 Subject: [PATCH] [Improvement](materialized-view) add approx_count_distinct/ndv to FunctionAlias rule (#31535) add approx_count_distinct/ndv to FunctionAlias rule --- .../apache/doris/rewrite/FunctionAlias.java | 7 ++- .../test_approx_count_distinct.out | 14 +++++ .../data/mv_p0/test_ndv/test_ndv.out | 14 +++++ .../mv_p0/ut/testNDVToHll/testNDVToHll.out | 4 ++ .../test_approx_count_distinct.groovy | 57 +++++++++++++++++++ .../suites/mv_p0/test_ndv/test_ndv.groovy | 57 +++++++++++++++++++ .../mv_p0/ut/testNDVToHll/testNDVToHll.groovy | 6 ++ 7 files changed, 156 insertions(+), 3 deletions(-) create mode 100644 regression-test/data/mv_p0/test_approx_count_distinct/test_approx_count_distinct.out create mode 100644 regression-test/data/mv_p0/test_ndv/test_ndv.out create mode 100644 regression-test/suites/mv_p0/test_approx_count_distinct/test_approx_count_distinct.groovy create mode 100644 regression-test/suites/mv_p0/test_ndv/test_ndv.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FunctionAlias.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FunctionAlias.java index b32cc9919b..29cf2988cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FunctionAlias.java +++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FunctionAlias.java @@ -27,8 +27,8 @@ import com.google.common.collect.ImmutableMap; import java.util.Map; /** - * Change functio name to function class name on nereids - * alias list: catalog/BuiltinScalarFunctions.java + * Change functio name to function class name on nereids alias list: + * catalog/BuiltinScalarFunctions.java */ public final class FunctionAlias implements ExprRewriteRule { public static ExprRewriteRule INSTANCE = new FunctionAlias(); @@ -41,7 +41,8 @@ public final class FunctionAlias implements ExprRewriteRule { .put("inet_aton", "ipv4_string_to_num_or_null").put("inet6_ntoa", "ipv6_num_to_string") .put("inet6_aton", "ipv6_string_to_num_or_null").put("lcase", "lower").put("add_months", "months_add") .put("current_timestamp", "now").put("localtime", "now").put("localtimestamp", "now").put("ifnull", "nvl") - .put("rand", "random").put("sha", "sha1").put("substr", "substring").put("ucase", "upper").build(); + .put("rand", "random").put("sha", "sha1").put("substr", "substring").put("ucase", "upper") + .put("approx_count_distinct", "ndv").build(); @Override public Expr apply(Expr expr, Analyzer analyzer, ExprRewriter.ClauseType clauseType) throws AnalysisException { diff --git a/regression-test/data/mv_p0/test_approx_count_distinct/test_approx_count_distinct.out b/regression-test/data/mv_p0/test_approx_count_distinct/test_approx_count_distinct.out new file mode 100644 index 0000000000..7afb3700d9 --- /dev/null +++ b/regression-test/data/mv_p0/test_approx_count_distinct/test_approx_count_distinct.out @@ -0,0 +1,14 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_star -- +2020-01-01 1 a 1 +2020-01-01 1 a 2 +2020-01-02 2 b 2 + +-- !select_mv -- +1 2 +2 1 + +-- !select_mv -- +1 2 +2 1 + diff --git a/regression-test/data/mv_p0/test_ndv/test_ndv.out b/regression-test/data/mv_p0/test_ndv/test_ndv.out new file mode 100644 index 0000000000..7afb3700d9 --- /dev/null +++ b/regression-test/data/mv_p0/test_ndv/test_ndv.out @@ -0,0 +1,14 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select_star -- +2020-01-01 1 a 1 +2020-01-01 1 a 2 +2020-01-02 2 b 2 + +-- !select_mv -- +1 2 +2 1 + +-- !select_mv -- +1 2 +2 1 + diff --git a/regression-test/data/mv_p0/ut/testNDVToHll/testNDVToHll.out b/regression-test/data/mv_p0/ut/testNDVToHll/testNDVToHll.out index 61924a11ab..7afb3700d9 100644 --- a/regression-test/data/mv_p0/ut/testNDVToHll/testNDVToHll.out +++ b/regression-test/data/mv_p0/ut/testNDVToHll/testNDVToHll.out @@ -8,3 +8,7 @@ 1 2 2 1 +-- !select_mv -- +1 2 +2 1 + diff --git a/regression-test/suites/mv_p0/test_approx_count_distinct/test_approx_count_distinct.groovy b/regression-test/suites/mv_p0/test_approx_count_distinct/test_approx_count_distinct.groovy new file mode 100644 index 0000000000..900447d3e5 --- /dev/null +++ b/regression-test/suites/mv_p0/test_approx_count_distinct/test_approx_count_distinct.groovy @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite ("test_approx_count_distinct") { + sql """set enable_nereids_planner=true;""" + sql """set enable_fallback_to_original_planner=false;""" + sql """ DROP TABLE IF EXISTS user_tags; """ + + sql """ create table user_tags ( + time_col date, + user_id int, + user_name varchar(20), + tag_id int) + partition by range (time_col) (partition p1 values less than MAXVALUE) distributed by hash(time_col) buckets 3 properties('replication_num' = '1'); + """ + + sql """insert into user_tags values("2020-01-01",1,"a",1);""" + sql """insert into user_tags values("2020-01-02",2,"b",2);""" + + createMV("create materialized view user_tags_mv as select user_id, approx_count_distinct(tag_id) from user_tags group by user_id;") + + sql """insert into user_tags values("2020-01-01",1,"a",2);""" + + explain { + sql("select * from user_tags order by time_col;") + contains "(user_tags)" + } + qt_select_star "select * from user_tags order by time_col,tag_id;" + + explain { + sql("select user_id, ndv(tag_id) a from user_tags group by user_id order by user_id;") + contains "(user_tags_mv)" + } + qt_select_mv "select user_id, ndv(tag_id) a from user_tags group by user_id order by user_id;" + + explain { + sql("select user_id, approx_count_distinct(tag_id) a from user_tags group by user_id order by user_id;") + contains "(user_tags_mv)" + } + qt_select_mv "select user_id, approx_count_distinct(tag_id) a from user_tags group by user_id order by user_id;" +} diff --git a/regression-test/suites/mv_p0/test_ndv/test_ndv.groovy b/regression-test/suites/mv_p0/test_ndv/test_ndv.groovy new file mode 100644 index 0000000000..b4be75f53e --- /dev/null +++ b/regression-test/suites/mv_p0/test_ndv/test_ndv.groovy @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite ("test_ndv") { + sql """set enable_nereids_planner=true;""" + sql """set enable_fallback_to_original_planner=false;""" + sql """ DROP TABLE IF EXISTS user_tags; """ + + sql """ create table user_tags ( + time_col date, + user_id int, + user_name varchar(20), + tag_id int) + partition by range (time_col) (partition p1 values less than MAXVALUE) distributed by hash(time_col) buckets 3 properties('replication_num' = '1'); + """ + + sql """insert into user_tags values("2020-01-01",1,"a",1);""" + sql """insert into user_tags values("2020-01-02",2,"b",2);""" + + createMV("create materialized view user_tags_mv as select user_id, ndv(tag_id) from user_tags group by user_id;") + + sql """insert into user_tags values("2020-01-01",1,"a",2);""" + + explain { + sql("select * from user_tags order by time_col;") + contains "(user_tags)" + } + qt_select_star "select * from user_tags order by time_col,tag_id;" + + explain { + sql("select user_id, ndv(tag_id) a from user_tags group by user_id order by user_id;") + contains "(user_tags_mv)" + } + qt_select_mv "select user_id, ndv(tag_id) a from user_tags group by user_id order by user_id;" + + explain { + sql("select user_id, approx_count_distinct(tag_id) a from user_tags group by user_id order by user_id;") + contains "(user_tags_mv)" + } + qt_select_mv "select user_id, approx_count_distinct(tag_id) a from user_tags group by user_id order by user_id;" +} diff --git a/regression-test/suites/mv_p0/ut/testNDVToHll/testNDVToHll.groovy b/regression-test/suites/mv_p0/ut/testNDVToHll/testNDVToHll.groovy index cdd2978f07..ec83e476ec 100644 --- a/regression-test/suites/mv_p0/ut/testNDVToHll/testNDVToHll.groovy +++ b/regression-test/suites/mv_p0/ut/testNDVToHll/testNDVToHll.groovy @@ -48,4 +48,10 @@ suite ("testNDVToHll") { contains "(user_tags_mv)" } qt_select_mv "select user_id, ndv(tag_id) a from user_tags group by user_id order by user_id;" + + explain { + sql("select user_id, approx_count_distinct(tag_id) a from user_tags group by user_id order by user_id;") + contains "(user_tags_mv)" + } + qt_select_mv "select user_id, approx_count_distinct(tag_id) a from user_tags group by user_id order by user_id;" }