[fix](nereids) Fix the bugs of data distribution calculation on OlapScan (#15699)

when need to scan more than one olap table partition and it is not a colocate table or its colocate group is unstable, we need to make it as any distribution even if its distribution type is Hash
This commit is contained in:
AKIRA
2023-01-09 15:25:54 +08:00
committed by GitHub
parent e2492cf7fc
commit 7543d677fa
3 changed files with 69 additions and 3 deletions

View File

@ -17,9 +17,13 @@
package org.apache.doris.nereids.rules.implementation;
import org.apache.doris.catalog.ColocateTableIndex;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DistributionInfo;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.HashDistributionInfo;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.nereids.properties.DistributionSpec;
import org.apache.doris.nereids.properties.DistributionSpecAny;
import org.apache.doris.nereids.properties.DistributionSpecHash;
@ -59,10 +63,17 @@ public class LogicalOlapScanToPhysicalOlapScan extends OneImplementationRuleFact
}
private DistributionSpec convertDistribution(LogicalOlapScan olapScan) {
DistributionInfo distributionInfo = olapScan.getTable().getDefaultDistributionInfo();
if (distributionInfo instanceof HashDistributionInfo) {
OlapTable olapTable = olapScan.getTable();
DistributionInfo distributionInfo = olapTable.getDefaultDistributionInfo();
ColocateTableIndex colocateTableIndex = Env.getCurrentColocateIndex();
if ((colocateTableIndex.isColocateTable(olapTable.getId())
&& !colocateTableIndex.isGroupUnstable(colocateTableIndex.getGroup(olapTable.getId())))
|| olapTable.getPartitionInfo().getType() == PartitionType.UNPARTITIONED
|| olapTable.getPartitions().size() == 1) {
if (!(distributionInfo instanceof HashDistributionInfo)) {
return DistributionSpecAny.INSTANCE;
}
HashDistributionInfo hashDistributionInfo = (HashDistributionInfo) distributionInfo;
List<Slot> output = olapScan.getOutput();
List<ExprId> hashColumns = Lists.newArrayList();
List<Column> schemaColumns = olapScan.getTable().getFullSchema();

View File

@ -0,0 +1,4 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
1 12

View File

@ -0,0 +1,51 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_scan_part") {
sql """DROP TABLE IF EXISTS test_part;"""
sql """CREATE TABLE test_part (
id INT,
i1 INT NOT NULL,
i2 VARCHAR(32) NOT NULL
) DUPLICATE KEY(`id`)
PARTITION BY LIST(i2)
(
PARTITION p1 VALUES IN('1'),
PARTITION p2 VALUES IN('5'),
PARTITION p3 VALUES IN('9')
)
DISTRIBUTED BY HASH(`id`)
BUCKETS 3
PROPERTIES("replication_allocation" = "tag.location.default:1");
"""
sql """INSERT INTO test_part VALUES (1, 1, '1');"""
sql """INSERT INTO test_part VALUES (1, 1, '1');"""
sql """INSERT INTO test_part VALUES (1, 5, '5');"""
sql """INSERT INTO test_part VALUES (1, 5, '5');"""
sql """INSERT INTO test_part VALUES (2, 5, '5');"""
sql """INSERT INTO test_part VALUES (3, 5, '1');"""
sql """INSERT INTO test_part VALUES (4, 5, '5');"""
sql """INSERT INTO test_part VALUES (7, 5, '9');"""
sql "SET enable_nereids_planner=true"
sql "SET enable_fallback_to_original_planner=false"
sql "SET parallel_fragment_exec_instance_num=8"
qt_sql """
SELECT id, SUM(i1) FROM test_part WHERE i2 IN ('1','5')
GROUP BY id HAVING id = 1;
"""
}