[improvement](mtmv) Optimize get available mv logic to avoid unnecessary rewrite (#39734) (#40136)

## Proposed changes

commitId: 55ed1277
pr: https://github.com/apache/doris/pull/39734
This commit is contained in:
seawinde
2024-08-30 08:02:27 +08:00
committed by GitHub
parent e072ce7f68
commit 45e35e8a99
4 changed files with 201 additions and 1 deletions

View File

@ -68,7 +68,8 @@ public class InitMaterializationContextHook implements PlannerHook {
* @param cascadesContext current cascadesContext in the planner
*/
protected void doInitMaterializationContext(CascadesContext cascadesContext) {
TableCollectorContext collectorContext = new TableCollectorContext(Sets.newHashSet(), true);
// Only collect the table or mv which query use directly, to avoid useless mv partition in rewrite
TableCollectorContext collectorContext = new TableCollectorContext(Sets.newHashSet(), false);
try {
Plan rewritePlan = cascadesContext.getRewritePlan();
// Keep use one connection context when in query, if new connect context,

View File

@ -243,6 +243,22 @@ public class PlanVisitorTest extends TestWithFeService {
.map(TableIf::getName)
.collect(Collectors.toSet()),
expectedMvsWithNoExpand);
TableCollectorContext allTableTypeWithExpand =
new TableCollector.TableCollectorContext(
Sets.newHashSet(TableType.values()), true);
physicalPlan.accept(TableCollector.INSTANCE, allTableTypeWithExpand);
// when collect in plan with expand, should collect table which is expended
Set<String> expectedTablesWithExpand = new HashSet<>();
expectedTablesWithExpand.add("mv1");
expectedTablesWithExpand.add("table1");
expectedTablesWithExpand.add("table2");
expectedTablesWithExpand.add("table3");
Assertions.assertEquals(
allTableTypeWithExpand.getCollectedTables().stream()
.map(TableIf::getName)
.collect(Collectors.toSet()),
expectedTablesWithExpand);
});
dropMvByNereids("drop materialized view mv1");
}

View File

@ -1366,6 +1366,21 @@ class Suite implements GroovyInterceptable {
return result.values().toList()
}
def create_async_mv = { db, mv_name, mv_sql ->
sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}"""
sql"""
CREATE MATERIALIZED VIEW ${mv_name}
BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL
DISTRIBUTED BY RANDOM BUCKETS 2
PROPERTIES ('replication_num' = '1')
AS ${mv_sql}
"""
def job_name = getJobName(db, mv_name);
waitingMTMVTaskFinished(job_name)
sql "analyze table ${mv_name} with sync;"
}
def check_mv_rewrite_success = { db, mv_sql, query_sql, mv_name ->
sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}"""

View File

@ -0,0 +1,168 @@
package mv.direct_query
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("direct_query_mv") {
String db = context.config.getDbNameByFile(context.file)
sql "use ${db}"
sql "set runtime_filter_mode=OFF"
sql """
drop table if exists orders
"""
sql """
CREATE TABLE IF NOT EXISTS orders (
o_orderkey INTEGER NOT NULL,
o_custkey INTEGER NOT NULL,
o_orderstatus CHAR(1) NOT NULL,
o_totalprice DECIMALV3(15,2) NOT NULL,
o_orderdate DATE NOT NULL,
o_orderpriority CHAR(15) NOT NULL,
o_clerk CHAR(15) NOT NULL,
o_shippriority INTEGER NOT NULL,
o_comment VARCHAR(79) NOT NULL
)
DUPLICATE KEY(o_orderkey, o_custkey)
PARTITION BY RANGE(o_orderdate) (PARTITION `day_2` VALUES LESS THAN ('2023-12-30'))
DISTRIBUTED BY HASH(o_orderkey) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
)
"""
sql """
drop table if exists lineitem
"""
sql"""
CREATE TABLE IF NOT EXISTS lineitem (
l_orderkey INTEGER NOT NULL,
l_partkey INTEGER NOT NULL,
l_suppkey INTEGER NOT NULL,
l_linenumber INTEGER NOT NULL,
l_quantity DECIMALV3(15,2) NOT NULL,
l_extendedprice DECIMALV3(15,2) NOT NULL,
l_discount DECIMALV3(15,2) NOT NULL,
l_tax DECIMALV3(15,2) NOT NULL,
l_returnflag CHAR(1) NOT NULL,
l_linestatus CHAR(1) NOT NULL,
l_shipdate DATE NOT NULL,
l_commitdate DATE NOT NULL,
l_receiptdate DATE NOT NULL,
l_shipinstruct CHAR(25) NOT NULL,
l_shipmode CHAR(10) NOT NULL,
l_comment VARCHAR(44) NOT NULL
)
DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey, l_linenumber)
PARTITION BY RANGE(l_shipdate) (PARTITION `day_1` VALUES LESS THAN ('2023-12-30'))
DISTRIBUTED BY HASH(l_orderkey) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
)
"""
sql """
drop table if exists partsupp
"""
sql """
CREATE TABLE IF NOT EXISTS partsupp (
ps_partkey INTEGER NOT NULL,
ps_suppkey INTEGER NOT NULL,
ps_availqty INTEGER NOT NULL,
ps_supplycost DECIMALV3(15,2) NOT NULL,
ps_comment VARCHAR(199) NOT NULL
)
DUPLICATE KEY(ps_partkey, ps_suppkey)
DISTRIBUTED BY HASH(ps_partkey) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
)
"""
sql """ insert into lineitem values
(1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-08', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
(2, 4, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-09', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
(3, 2, 4, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-10', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
(4, 3, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-11', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'),
(5, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-12-12', '2023-12-12', '2023-12-13', 'c', 'd', 'xxxxxxxxx');
"""
sql """
insert into orders values
(1, 1, 'o', 9.5, '2023-12-08', 'a', 'b', 1, 'yy'),
(1, 1, 'o', 9.5, '2023-12-08', 'a', 'b', 1, 'yy'),
(1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'),
(2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'),
(2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'),
(3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'),
(3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'),
(3, 1, 'o', 33.5, '2023-12-10', 'a', 'b', 1, 'yy'),
(3, 1, 'o', 33.5, '2023-12-10', 'a', 'b', 1, 'yy'),
(4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'),
(4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'),
(5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'),
(5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'),
(5, 2, 'o', 1.2, '2023-12-12', 'c','d',2, 'mi'),
(5, 2, 'o', 1.2, '2023-12-12', 'c','d',2, 'mi');
"""
sql """
insert into partsupp values
(2, 3, 9, 10.01, 'supply1'),
(2, 3, 10, 11.01, 'supply2');
"""
sql """analyze table lineitem with sync;"""
sql """analyze table orders with sync;"""
sql """analyze table partsupp with sync;"""
create_async_mv(db, "mv1_0",
"""
select lineitem.L_LINENUMBER, orders.O_CUSTKEY
from lineitem
inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY
""")
create_async_mv(db, "mv2_0",
"""
select L_LINENUMBER, count(O_CUSTKEY)
from mv1_0
group by L_LINENUMBER;
""")
// mv2 use mv1, though query not use mv1 directly, mv2 should part in rewrite and shoule be chosen
check_mv_rewrite_success(db,
"""
select L_LINENUMBER, count(O_CUSTKEY)
from mv1_0
group by L_LINENUMBER;
""",
"""
select L_LINENUMBER, count(O_CUSTKEY)
from lineitem
inner join orders on lineitem.L_ORDERKEY = orders.O_ORDERKEY
group by L_LINENUMBER;
""",
"mv2_0"
)
sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0"""
sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_0"""
}