[improvement](mtmv) Support hll function roll up when query rewrite by materialized view (#32431)

Support hll roll up, the hll fucntion supportd is as following:

+-----------------------------------------------------------------------------------------------------------------------------------------------------+
|                      in query                    |                          in materialized view                           |        rolluped        |
+ ------------------------------------------------ + ----------------------------------------------------------------------- + ---------------------- +
| HLL_UNION_AGG(hll column)                        | hll_union(column) or hll_raw_agg(column) as column1                     | HLL_UNION_AGG(column1) |
| HLL_RAW_AGG(hll column) or HLL_UNION(hll column) |                                                                         | HLL_UNION(column)      |
| approx_count_distinct(not hll column)            | hll_union(HLL_HASH(column)) or hll_raw_agg(HLL_HASH(column)) as column1 | HLL_UNION_AGG(column1) |
| HLL_UNION_AGG(HLL_HASH(column))                  |                                                                         | HLL_UNION_AGG(column)  |
| hll_cardinality(hll_union(HLL_HASH(column)))     | hll_union(HLL_HASH(column)) or hll_raw_agg(HLL_HASH(column)) as column1 |                        |
| hll_cardinality(hll_raw_agg(HLL_HASH(column)))   | hll_union(HLL_HASH(column)) or hll_raw_agg(HLL_HASH(column)) as column1 |                        |
| HLL_RAW_AGG(HLL_HASH(column))                    | hll_union(HLL_HASH(column)) or hll_raw_agg(HLL_HASH(column)) as column1 | HLL_RAW_AGG(column1)   |
+-----------------------------------------------------------------------------------------------------------------------------------------------------+
This commit is contained in:
seawinde
2024-03-26 17:10:34 +08:00
committed by yiguolei
parent 0655d49a21
commit 1b6b92a19d
8 changed files with 215 additions and 10 deletions

View File

@ -34,6 +34,11 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.BitmapUnion;
import org.apache.doris.nereids.trees.expressions.functions.agg.BitmapUnionCount;
import org.apache.doris.nereids.trees.expressions.functions.agg.CouldRollUp;
import org.apache.doris.nereids.trees.expressions.functions.agg.Count;
import org.apache.doris.nereids.trees.expressions.functions.agg.HllUnion;
import org.apache.doris.nereids.trees.expressions.functions.agg.HllUnionAgg;
import org.apache.doris.nereids.trees.expressions.functions.agg.Ndv;
import org.apache.doris.nereids.trees.expressions.functions.scalar.HllCardinality;
import org.apache.doris.nereids.trees.expressions.functions.scalar.HllHash;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ToBitmap;
import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter;
import org.apache.doris.nereids.trees.plans.Plan;
@ -41,6 +46,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate;
import org.apache.doris.nereids.trees.plans.logical.LogicalProject;
import org.apache.doris.nereids.trees.plans.visitor.ExpressionLineageReplacer;
import org.apache.doris.nereids.types.BigIntType;
import org.apache.doris.nereids.types.VarcharType;
import org.apache.doris.nereids.util.ExpressionUtils;
import com.google.common.collect.ArrayListMultimap;
@ -70,20 +76,20 @@ public abstract class AbstractMaterializedViewAggregateRule extends AbstractMate
new AggregateExpressionRewriter();
static {
// support count distinct roll up
// with bitmap_union and to_bitmap
// support roll up when count distinct is in query
// the column type is not bitMap
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new Count(true, Any.INSTANCE),
new BitmapUnion(new ToBitmap(Any.INSTANCE)));
// with bitmap_union, to_bitmap and cast
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new Count(true, Any.INSTANCE),
new BitmapUnion(new ToBitmap(new Cast(Any.INSTANCE, BigIntType.INSTANCE))));
// support bitmap_union_count roll up
// field is already bitmap with only bitmap_union
// support roll up when bitmap_union_count is in query
// the column type is bitMap
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(
new BitmapUnionCount(Any.INSTANCE),
new BitmapUnion(Any.INSTANCE));
// with bitmap_union and to_bitmap
// the column type is not bitMap
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(
new BitmapUnionCount(new ToBitmap(Any.INSTANCE)),
new BitmapUnion(new ToBitmap(Any.INSTANCE)));
@ -91,6 +97,63 @@ public abstract class AbstractMaterializedViewAggregateRule extends AbstractMate
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(
new BitmapUnionCount(new ToBitmap(new Cast(Any.INSTANCE, BigIntType.INSTANCE))),
new BitmapUnion(new ToBitmap(new Cast(Any.INSTANCE, BigIntType.INSTANCE))));
// support roll up when the column type is not hll
// query is approx_count_distinct
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new Ndv(Any.INSTANCE),
new HllUnion(new HllHash(Any.INSTANCE)));
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new Ndv(Any.INSTANCE),
new HllUnion(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT))));
// query is HLL_UNION_AGG
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new HllUnionAgg(new HllHash(Any.INSTANCE)),
new HllUnion(new HllHash(Any.INSTANCE)));
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new HllUnionAgg(new HllHash(Any.INSTANCE)),
new HllUnion(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT))));
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(
new HllUnionAgg(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT))),
new HllUnion(new HllHash(Any.INSTANCE)));
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(
new HllUnionAgg(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT))),
new HllUnion(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT))));
// query is HLL_CARDINALITY
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new HllCardinality(new HllUnion(new HllHash(Any.INSTANCE))),
new HllUnion(new HllHash(Any.INSTANCE)));
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new HllCardinality(new HllUnion(new HllHash(Any.INSTANCE))),
new HllUnion(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT))));
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(
new HllCardinality(new HllUnion(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT)))),
new HllUnion(new HllHash(Any.INSTANCE)));
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(
new HllCardinality(new HllUnion(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT)))),
new HllUnion(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT))));
// query is HLL_RAW_AGG or HLL_UNION
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new HllUnion(new HllHash(Any.INSTANCE)),
new HllUnion(new HllHash(Any.INSTANCE)));
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new HllUnion(new HllHash(Any.INSTANCE)),
new HllUnion(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT))));
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(
new HllUnion(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT))),
new HllUnion(new HllHash(Any.INSTANCE)));
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(
new HllUnion(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT))),
new HllUnion(new HllHash(new Cast(Any.INSTANCE, VarcharType.SYSTEM_DEFAULT))));
// support roll up when the column type is hll
// query is HLL_UNION_AGG
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new HllUnionAgg(Any.INSTANCE),
new HllUnion(Any.INSTANCE));
// query is HLL_CARDINALITY
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new HllCardinality(new HllUnion(Any.INSTANCE)),
new HllUnion(Any.INSTANCE));
// query is HLL_RAW_AGG or HLL_UNION
AGGREGATE_ROLL_UP_EQUIVALENT_FUNCTION_MAP.put(new HllUnion(Any.INSTANCE),
new HllUnion(Any.INSTANCE));
}
@Override

View File

@ -21,7 +21,9 @@ import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.Function;
/**
* Could roll up trait, if a function could roll up in aggregate, it will implement the interface
* Could roll up trait, it could be rolled up if a function appear in query which can be represented
* by aggregate function in view.
* Acquire the rolled up function by constructRollUp method.
*/
public interface CouldRollUp {

View File

@ -21,6 +21,7 @@ import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.Function;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.DataType;
@ -35,7 +36,7 @@ import java.util.List;
* AggregateFunction 'hll_union'. This class is generated by GenerateFunction.
*/
public class HllUnion extends AggregateFunction
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable, HllFunction {
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable, HllFunction, CouldRollUp {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(HllType.INSTANCE).args(HllType.INSTANCE)
@ -78,4 +79,9 @@ public class HllUnion extends AggregateFunction
public List<FunctionSignature> getSignatures() {
return SIGNATURES;
}
@Override
public Function constructRollUp(Expression param, Expression... varParams) {
return new HllUnion(param);
}
}

View File

@ -21,6 +21,7 @@ import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.Function;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.BigIntType;
@ -36,7 +37,7 @@ import java.util.List;
* AggregateFunction 'hll_union_agg'. This class is generated by GenerateFunction.
*/
public class HllUnionAgg extends AggregateFunction
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable, HllFunction {
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable, HllFunction, CouldRollUp {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(BigIntType.INSTANCE).args(HllType.INSTANCE)
@ -79,4 +80,9 @@ public class HllUnionAgg extends AggregateFunction
public List<FunctionSignature> getSignatures() {
return SIGNATURES;
}
@Override
public Function constructRollUp(Expression param, Expression... varParams) {
return new HllUnionAgg(param);
}
}

View File

@ -23,6 +23,7 @@ import org.apache.doris.nereids.exceptions.AnalysisException;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.Function;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.BigIntType;
@ -37,7 +38,7 @@ import java.util.List;
* AggregateFunction 'ndv'. This class is generated by GenserateFunction.
*/
public class Ndv extends AggregateFunction
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable {
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable, CouldRollUp {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(BigIntType.INSTANCE).args(AnyDataType.INSTANCE_WITHOUT_INDEX)
@ -79,4 +80,9 @@ public class Ndv extends AggregateFunction
public List<FunctionSignature> getSignatures() {
return SIGNATURES;
}
@Override
public Function constructRollUp(Expression param, Expression... varParams) {
return new HllUnionAgg(param);
}
}

View File

@ -21,7 +21,10 @@ import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.Function;
import org.apache.doris.nereids.trees.expressions.functions.agg.CouldRollUp;
import org.apache.doris.nereids.trees.expressions.functions.agg.HllFunction;
import org.apache.doris.nereids.trees.expressions.functions.agg.HllUnionAgg;
import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.BigIntType;
@ -36,7 +39,7 @@ import java.util.List;
* ScalarFunction 'hll_cardinality'. This class is generated by GenerateFunction.
*/
public class HllCardinality extends ScalarFunction
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable, HllFunction {
implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable, HllFunction, CouldRollUp {
public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(BigIntType.INSTANCE).args(HllType.INSTANCE)
@ -67,4 +70,9 @@ public class HllCardinality extends ScalarFunction
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitHllCardinality(this, context);
}
@Override
public Function constructRollUp(Expression param, Expression... varParams) {
return new HllUnionAgg(param);
}
}

View File

@ -217,6 +217,34 @@
2 3 2023-12-12 57.40 60.40
2 4 2023-12-10 46.00 50.00
-- !query25_5_before --
2 3 2023-12-08 20.00 10.50 9.50 1 1 1 1 1 \N \N
2 3 2023-12-12 57.40 56.20 1.20 1 1 1 1 1 \N \N
2 4 2023-12-10 46.00 33.50 12.50 1 1 1 1 1 \N \N
3 3 2023-12-11 43.20 43.20 43.20 1 1 1 1 1 \N \N
4 3 2023-12-09 11.50 11.50 11.50 1 1 1 1 1 \N \N
-- !query25_5_after --
2 3 2023-12-08 20.00 10.50 9.50 1 1 1 1 1 \N \N
2 3 2023-12-12 57.40 56.20 1.20 1 1 1 1 1 \N \N
2 4 2023-12-10 46.00 33.50 12.50 1 1 1 1 1 \N \N
3 3 2023-12-11 43.20 43.20 43.20 1 1 1 1 1 \N \N
4 3 2023-12-09 11.50 11.50 11.50 1 1 1 1 1 \N \N
-- !query25_6_before --
2 3 2023-12-08 20.00 10.50 9.50 1 1 1 1 1 \N \N
2 3 2023-12-12 57.40 56.20 1.20 0 0 0 0 0 \N \N
2 4 2023-12-10 46.00 33.50 12.50 1 1 1 1 1 \N \N
3 3 2023-12-11 43.20 43.20 43.20 0 0 0 0 0 \N \N
4 3 2023-12-09 11.50 11.50 11.50 0 0 0 0 0 \N \N
-- !query25_6_after --
2 3 2023-12-08 20.00 10.50 9.50 1 1 1 1 1 \N \N
2 3 2023-12-12 57.40 56.20 1.20 0 0 0 0 0 \N \N
2 4 2023-12-10 46.00 33.50 12.50 1 1 1 1 1 \N \N
3 3 2023-12-11 43.20 43.20 43.20 0 0 0 0 0 \N \N
4 3 2023-12-09 11.50 11.50 11.50 0 0 0 0 0 \N \N
-- !query1_1_before --
1 yy 0 0 11.50 11.50 11.50 1

View File

@ -916,6 +916,92 @@ suite("aggregate_with_roll_up") {
order_qt_query25_4_after "${query25_4}"
sql """ DROP MATERIALIZED VIEW IF EXISTS mv25_4"""
// hll roll up with column
def mv25_5 =
"""
select l_shipdate, o_orderdate, l_partkey, l_suppkey,
sum(o_totalprice) as sum_total,
max(o_totalprice) as max_total,
min(o_totalprice) as min_total,
bitmap_union(to_bitmap(l_partkey)),
hll_union(hll_hash(l_partkey))
from lineitem
left join orders on l_orderkey = o_orderkey and l_shipdate = o_orderdate
group by
l_shipdate,
o_orderdate,
l_partkey,
l_suppkey;
"""
def query25_5 =
"""
select l_partkey, l_suppkey, o_orderdate,
sum(o_totalprice),
max(o_totalprice),
min(o_totalprice),
count(distinct l_partkey),
approx_count_distinct(l_partkey),
hll_union_agg(hll_hash(l_partkey)),
hll_cardinality(hll_union(hll_hash(l_partkey))),
hll_cardinality(hll_raw_agg(hll_hash(l_partkey))),
hll_raw_agg(hll_hash(l_partkey)),
hll_union(hll_hash(l_partkey))
from lineitem
left join orders on l_orderkey = o_orderkey and l_shipdate = o_orderdate
group by
o_orderdate,
l_partkey,
l_suppkey;
"""
order_qt_query25_5_before "${query25_5}"
check_mv_rewrite_success(db, mv25_5, query25_5, "mv25_5")
order_qt_query25_5_after "${query25_5}"
sql """ DROP MATERIALIZED VIEW IF EXISTS mv25_5"""
// hll roll up with complex expression
def mv25_6 =
"""
select l_shipdate, o_orderdate, l_partkey, l_suppkey,
sum(o_totalprice) as sum_total,
max(o_totalprice) as max_total,
min(o_totalprice) as min_total,
bitmap_union(to_bitmap(case when o_shippriority > 0 and o_orderkey IN (1, 3) then o_custkey else null end)),
hll_union(hll_hash(case when o_shippriority > 0 and o_orderkey IN (1, 3) then o_custkey else null end))
from lineitem
left join orders on l_orderkey = o_orderkey and l_shipdate = o_orderdate
group by
l_shipdate,
o_orderdate,
l_partkey,
l_suppkey;
"""
def query25_6 =
"""
select l_partkey, l_suppkey, o_orderdate,
sum(o_totalprice),
max(o_totalprice),
min(o_totalprice),
count(distinct case when o_shippriority > 0 and o_orderkey IN (1, 3) then o_custkey else null end) as count_1,
approx_count_distinct(case when o_shippriority > 0 and o_orderkey IN (1, 3) then o_custkey else null end) as count_2,
hll_union_agg(hll_hash(case when o_shippriority > 0 and o_orderkey IN (1, 3) then o_custkey else null end)) as count_3,
hll_cardinality(hll_union(hll_hash(case when o_shippriority > 0 and o_orderkey IN (1, 3) then o_custkey else null end))) as count_4,
hll_cardinality(hll_raw_agg(hll_hash(case when o_shippriority > 0 and o_orderkey IN (1, 3) then o_custkey else null end))) as count_5,
hll_raw_agg(hll_hash(case when o_shippriority > 0 and o_orderkey IN (1, 3) then o_custkey else null end)) as count_6,
hll_union(hll_hash(case when o_shippriority > 0 and o_orderkey IN (1, 3) then o_custkey else null end)) as count_7
from lineitem
left join orders on l_orderkey = o_orderkey and l_shipdate = o_orderdate
group by
o_orderdate,
l_partkey,
l_suppkey;
"""
order_qt_query25_6_before "${query25_6}"
check_mv_rewrite_success(db, mv25_6, query25_6, "mv25_6")
order_qt_query25_6_after "${query25_6}"
sql """ DROP MATERIALIZED VIEW IF EXISTS mv25_6"""
// single table
// filter + use roll up dimension