diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java index f61c4be669..b273b798a3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionValue.java @@ -24,6 +24,7 @@ public class PartitionValue { public static final PartitionValue MAX_VALUE = new PartitionValue(); private String value; + private boolean isHiveDefaultPartition; private PartitionValue() { @@ -33,7 +34,15 @@ public class PartitionValue { this.value = value; } + public PartitionValue(String value, boolean isHiveDefaultPartition) { + this.value = value; + this.isHiveDefaultPartition = isHiveDefaultPartition; + } + public LiteralExpr getValue(Type type) throws AnalysisException { + if (isHiveDefaultPartition) { + return new StringLiteral(value); + } if (isMax()) { return LiteralExpr.createInfinity(type, true); } else { @@ -52,4 +61,8 @@ public class PartitionValue { return value; } } + + public boolean isHiveDefaultPartition() { + return isHiveDefaultPartition; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java index f11c5fe29d..b41c16e6ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java @@ -26,8 +26,10 @@ import org.apache.doris.catalog.FsBroker; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.ClientPool; import org.apache.doris.common.Config; +import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; +import org.apache.doris.datasource.hive.HiveMetaStoreCache; import org.apache.doris.service.FrontendOptions; import org.apache.doris.thrift.TBrokerCheckPathExistRequest; import org.apache.doris.thrift.TBrokerCheckPathExistResponse; @@ -151,7 +153,8 @@ public class BrokerUtil { if (index == -1) { continue; } - columns[index] = pair[1]; + columns[index] = HiveMetaStoreCache.HIVE_DEFAULT_PARTITION.equals(pair[1]) + ? FeConstants.null_string : pair[1]; size++; if (size >= columnsFromPath.size()) { break; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index a1ab3cc92f..31b8d2f3b4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -80,6 +80,7 @@ import java.util.stream.Stream; public class HiveMetaStoreCache { private static final Logger LOG = LogManager.getLogger(HiveMetaStoreCache.class); private static final int MIN_BATCH_FETCH_PARTITION_NUM = 50; + public static final String HIVE_DEFAULT_PARTITION = "__HIVE_DEFAULT_PARTITION__"; private HMSExternalCatalog catalog; @@ -207,7 +208,7 @@ public class HiveMetaStoreCache { for (String part : parts) { String[] kv = part.split("="); Preconditions.checkState(kv.length == 2, partitionName); - values.add(new PartitionValue(kv[1])); + values.add(new PartitionValue(kv[1], HIVE_DEFAULT_PARTITION.equals(kv[1]))); } try { PartitionKey key = PartitionKey.createListPartitionKeyWithTypes(values, types); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java index fcae3c4ecd..5b8ec54b3a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ListPartitionPrunerV2.java @@ -79,6 +79,19 @@ public class ListPartitionPrunerV2 extends PartitionPrunerV2Base { this.rangeToId = rangeToId; } + // For hive partition table. + public ListPartitionPrunerV2(Map idToPartitionItem, + List partitionColumns, + Map columnNameToRange, + Map> uidToPartitionRange, + Map, UniqueId> rangeToId, + RangeMap singleColumnRangeMap, + boolean isHive) { + super(idToPartitionItem, partitionColumns, columnNameToRange, singleColumnRangeMap, isHive); + this.uidToPartitionRange = uidToPartitionRange; + this.rangeToId = rangeToId; + } + public static Map> genUidToPartitionRange( Map idToPartitionItem, Map> idToUniqueIdsMap) { Map> uidToPartitionRange = Maps.newHashMap(); @@ -147,6 +160,11 @@ public class ListPartitionPrunerV2 extends PartitionPrunerV2Base { Optional> rangeSetOpt = columnRange.getRangeSet(); if (columnRange.hasConjunctiveIsNull() || !rangeSetOpt.isPresent()) { + // For Hive external table, partition column could be null. + // In which case, the data will be put to a default partition __HIVE_DEFAULT_PARTITION__ + if (isHive) { + return FinalFilters.noFilters(); + } return FinalFilters.constantFalseFilters(); } else { RangeSet rangeSet = rangeSetOpt.get(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java index e760a85d84..376e2a4c7f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PartitionPrunerV2Base.java @@ -43,6 +43,8 @@ public abstract class PartitionPrunerV2Base implements PartitionPruner { protected final Map columnNameToRange; // used for single column partition protected RangeMap singleColumnRangeMap = null; + // Flag to indicate if this pruner is for hive partition or not. + protected boolean isHive = false; // currently only used for list partition private Map.Entry defaultPartition; @@ -83,6 +85,18 @@ public abstract class PartitionPrunerV2Base implements PartitionPruner { .orElse(null); } + public PartitionPrunerV2Base(Map idToPartitionItem, + List partitionColumns, + Map columnNameToRange, + RangeMap singleColumnRangeMap, + boolean isHive) { + this.idToPartitionItem = idToPartitionItem; + this.partitionColumns = partitionColumns; + this.columnNameToRange = columnNameToRange; + this.singleColumnRangeMap = singleColumnRangeMap; + this.isHive = isHive; + } + @Override public Collection prune() throws AnalysisException { Map columnToFilters = Maps.newHashMap(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java b/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java index 4aa3ee41a5..8acba72f15 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/RangePartitionPrunerV2.java @@ -86,6 +86,11 @@ public class RangePartitionPrunerV2 extends PartitionPrunerV2Base { Optional> rangeSetOpt = columnRange.getRangeSet(); if (columnRange.hasConjunctiveIsNull()) { if (!rangeSetOpt.isPresent()) { + // For Hive external table, partition column could be null. + // In which case, the data will be put to a default partition __HIVE_DEFAULT_PARTITION__ + if (isHive) { + return FinalFilters.noFilters(); + } // Only has conjunctive `is null` predicate. return FinalFilters.create(Sets.newHashSet(getMinInfinityRange(column))); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java index e69f9788d0..f538bf3c3e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveScanProvider.java @@ -161,7 +161,8 @@ public class HiveScanProvider extends HMSTableScanProvider { hmsTable.getPartitionColumns(), columnNameToRange, hivePartitionValues.getUidToPartitionRange(), hivePartitionValues.getRangeToId(), - hivePartitionValues.getSingleColumnRangeMap()); + hivePartitionValues.getSingleColumnRangeMap(), + true); Collection filteredPartitionIds = pruner.prune(); this.readPartitionNum = filteredPartitionIds.size(); LOG.debug("hive partition fetch and prune for table {}.{} cost: {} ms", diff --git a/regression-test/data/external_table_emr_p2/hive/test_hive_default_partition.out b/regression-test/data/external_table_emr_p2/hive/test_hive_default_partition.out new file mode 100644 index 0000000000..3737fbc7f6 --- /dev/null +++ b/regression-test/data/external_table_emr_p2/hive/test_hive_default_partition.out @@ -0,0 +1,135 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !one_partition1 -- +1 1 +2 1 +3 2 +4 2 +5 \N +6 \N + +-- !one_partition2 -- +5 \N +6 \N + +-- !one_partition3 -- +1 +2 +3 +4 + +-- !one_partition4 -- +1 +1 +2 +2 + +-- !one_partition5 -- +4 2 +5 \N +6 \N + +-- !two_partition1 -- +1 \N one +2 \N one +3 2 \N +4 2 \N +5 3 three +6 3 three +7 \N \N +8 \N \N + +-- !two_partition2 -- +1 \N one +2 \N one +7 \N \N +8 \N \N + +-- !two_partition3 -- +3 2 \N +4 2 \N +5 3 three +6 3 three + +-- !two_partition4 -- +3 2 \N +4 2 \N +7 \N \N +8 \N \N + +-- !two_partition5 -- +1 \N one +2 \N one +5 3 three +6 3 three + +-- !two_partition6 -- +5 3 three +6 3 three + +-- !two_partition7 -- +1 \N one +2 \N one + +-- !two_partition8 -- +3 2 \N +4 2 \N + +-- !two_partition9 -- +7 \N \N +8 \N \N + +-- !two_partition10 -- +1 \N one +2 \N one +3 2 \N +4 2 \N +5 3 three +6 3 three + +-- !two_partition11 -- +1 \N one +2 \N one +5 3 three +6 3 three +7 \N \N +8 \N \N + +-- !two_partition12 -- +3 2 \N +4 2 \N +5 3 three +6 3 three +7 \N \N +8 \N \N + +-- !two_partition13 -- +1 \N one +2 \N one +3 2 \N +4 2 \N +7 \N \N +8 \N \N + +-- !two_partition14 -- +1 \N one +2 \N one +3 2 \N +4 2 \N +5 3 three +6 3 three + +-- !two_partition15 -- +6 3 three +7 \N \N +8 \N \N + +-- !two_partition16 -- +3 2 \N +4 2 \N +5 3 three +6 3 three + +-- !two_partition17 -- +1 \N one +2 \N one + diff --git a/regression-test/suites/external_table_emr_p2/hive/test_hive_default_partition.groovy b/regression-test/suites/external_table_emr_p2/hive/test_hive_default_partition.groovy new file mode 100644 index 0000000000..2deddb9d2d --- /dev/null +++ b/regression-test/suites/external_table_emr_p2/hive/test_hive_default_partition.groovy @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_default_partition", "p2") { + def one_partition1 = """select * from one_partition order by id;""" + def one_partition2 = """select id, part1 from one_partition where part1 is null order by id;""" + def one_partition3 = """select id from one_partition where part1 is not null order by id;""" + def one_partition4 = """select part1 from one_partition where part1>0 order by id;""" + def one_partition5 = """select id, part1 from one_partition where part1 is null or id>3 order by id;""" + + def two_partition1 = """select * from two_partition order by id;""" + def two_partition2 = """select id, part1, part2 from two_partition where part1 is null order by id;""" + def two_partition3 = """select id, part1, part2 from two_partition where part1 is not null order by id;""" + def two_partition4 = """select id, part1, part2 from two_partition where part2 is null order by id;""" + def two_partition5 = """select id, part1, part2 from two_partition where part2 is not null order by id;""" + def two_partition6 = """select id, part1, part2 from two_partition where part1 is not null and part2 is not null order by id;""" + def two_partition7 = """select id, part1, part2 from two_partition where part1 is null and part2 is not null order by id;""" + def two_partition8 = """select id, part1, part2 from two_partition where part1 is not null and part2 is null order by id;""" + def two_partition9 = """select id, part1, part2 from two_partition where part1 is null and part2 is null order by id;""" + def two_partition10 = """select id, part1, part2 from two_partition where part1 is not null or part2 is not null order by id;""" + def two_partition11 = """select id, part1, part2 from two_partition where part1 is null or part2 is not null order by id;""" + def two_partition12 = """select id, part1, part2 from two_partition where part1 is not null or part2 is null order by id;""" + def two_partition13 = """select id, part1, part2 from two_partition where part1 is null or part2 is null order by id;""" + def two_partition14 = """select id, part1, part2 from two_partition where part1 is not null or part2 is not null order by id;""" + def two_partition15 = """select id, part1, part2 from two_partition where id > 5 order by id;""" + def two_partition16 = """select id, part1, part2 from two_partition where part1>0 order by id;""" + def two_partition17 = """select id, part1, part2 from two_partition where part2 = 'one' order by id;""" + + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") + String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") + String catalog_name = "hive_default_partition" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + sql """use multi_catalog;""" + qt_one_partition1 one_partition1 + qt_one_partition2 one_partition2 + qt_one_partition3 one_partition3 + qt_one_partition4 one_partition4 + qt_one_partition5 one_partition5 + + qt_two_partition1 two_partition1 + qt_two_partition2 two_partition2 + qt_two_partition3 two_partition3 + qt_two_partition4 two_partition4 + qt_two_partition5 two_partition5 + qt_two_partition6 two_partition6 + qt_two_partition7 two_partition7 + qt_two_partition8 two_partition8 + qt_two_partition9 two_partition9 + qt_two_partition10 two_partition10 + qt_two_partition11 two_partition11 + qt_two_partition12 two_partition12 + qt_two_partition13 two_partition13 + qt_two_partition14 two_partition14 + qt_two_partition15 two_partition15 + qt_two_partition16 two_partition16 + qt_two_partition17 two_partition17 + + } +} +