Cherry-picked from #44669 Co-authored-by: daidai <changyuwei@selectdb.com>
This commit is contained in:
committed by
GitHub
parent
6ddc45b3fd
commit
b4e2e2cf78
@ -17,6 +17,7 @@
|
||||
|
||||
package org.apache.doris.datasource.hive;
|
||||
|
||||
import org.apache.doris.analysis.TableSnapshot;
|
||||
import org.apache.doris.catalog.Column;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.ListPartitionItem;
|
||||
@ -31,6 +32,7 @@ import org.apache.doris.common.Config;
|
||||
import org.apache.doris.common.DdlException;
|
||||
import org.apache.doris.datasource.ExternalTable;
|
||||
import org.apache.doris.datasource.SchemaCacheValue;
|
||||
import org.apache.doris.datasource.TablePartitionValues;
|
||||
import org.apache.doris.datasource.hudi.HudiUtils;
|
||||
import org.apache.doris.datasource.iceberg.IcebergUtils;
|
||||
import org.apache.doris.datasource.mvcc.MvccSnapshot;
|
||||
@ -41,6 +43,7 @@ import org.apache.doris.mtmv.MTMVRelatedTableIf;
|
||||
import org.apache.doris.mtmv.MTMVSnapshotIf;
|
||||
import org.apache.doris.mtmv.MTMVTimestampSnapshot;
|
||||
import org.apache.doris.nereids.exceptions.NotSupportedException;
|
||||
import org.apache.doris.nereids.trees.plans.logical.LogicalFileScan.SelectedPartitions;
|
||||
import org.apache.doris.qe.GlobalVariable;
|
||||
import org.apache.doris.statistics.AnalysisInfo;
|
||||
import org.apache.doris.statistics.BaseAnalysisTask;
|
||||
@ -302,7 +305,28 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI
|
||||
|
||||
@Override
|
||||
public boolean supportInternalPartitionPruned() {
|
||||
return getDlaType() == DLAType.HIVE;
|
||||
return getDlaType() == DLAType.HIVE || getDlaType() == DLAType.HUDI;
|
||||
}
|
||||
|
||||
public SelectedPartitions initHudiSelectedPartitions(Optional<TableSnapshot> tableSnapshot) {
|
||||
if (getDlaType() != DLAType.HUDI) {
|
||||
return SelectedPartitions.NOT_PRUNED;
|
||||
}
|
||||
|
||||
if (getPartitionColumns().isEmpty()) {
|
||||
return SelectedPartitions.NOT_PRUNED;
|
||||
}
|
||||
TablePartitionValues tablePartitionValues = HudiUtils.getPartitionValues(tableSnapshot, this);
|
||||
|
||||
Map<Long, PartitionItem> idToPartitionItem = tablePartitionValues.getIdToPartitionItem();
|
||||
Map<Long, String> idToNameMap = tablePartitionValues.getPartitionIdToNameMap();
|
||||
|
||||
Map<String, PartitionItem> nameToPartitionItems = Maps.newHashMapWithExpectedSize(idToPartitionItem.size());
|
||||
for (Entry<Long, PartitionItem> entry : idToPartitionItem.entrySet()) {
|
||||
nameToPartitionItems.put(idToNameMap.get(entry.getKey()), entry.getValue());
|
||||
}
|
||||
|
||||
return new SelectedPartitions(nameToPartitionItems.size(), nameToPartitionItems, false);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -17,24 +17,35 @@
|
||||
|
||||
package org.apache.doris.datasource.hudi;
|
||||
|
||||
import org.apache.doris.analysis.TableSnapshot;
|
||||
import org.apache.doris.catalog.ArrayType;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.MapType;
|
||||
import org.apache.doris.catalog.ScalarType;
|
||||
import org.apache.doris.catalog.StructField;
|
||||
import org.apache.doris.catalog.StructType;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.datasource.TablePartitionValues;
|
||||
import org.apache.doris.datasource.hive.HMSExternalTable;
|
||||
import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;
|
||||
import org.apache.doris.datasource.hudi.source.HudiCachedPartitionProcessor;
|
||||
|
||||
import org.apache.avro.LogicalType;
|
||||
import org.apache.avro.LogicalTypes;
|
||||
import org.apache.avro.Schema;
|
||||
import org.apache.avro.Schema.Field;
|
||||
import org.apache.hudi.common.table.HoodieTableMetaClient;
|
||||
import org.apache.hudi.common.table.timeline.HoodieActiveTimeline;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstant;
|
||||
import org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator;
|
||||
import org.apache.hudi.common.table.timeline.HoodieTimeline;
|
||||
import org.apache.hudi.common.util.Option;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class HudiUtils {
|
||||
@ -231,4 +242,43 @@ public class HudiUtils {
|
||||
}
|
||||
return Type.UNSUPPORTED;
|
||||
}
|
||||
|
||||
public static TablePartitionValues getPartitionValues(Optional<TableSnapshot> tableSnapshot,
|
||||
HMSExternalTable hmsTable) {
|
||||
TablePartitionValues partitionValues = new TablePartitionValues();
|
||||
if (hmsTable.getPartitionColumns().isEmpty()) {
|
||||
//isn't partition table.
|
||||
return partitionValues;
|
||||
}
|
||||
|
||||
HoodieTableMetaClient hudiClient = HiveMetaStoreClientHelper.getHudiClient(hmsTable);
|
||||
HudiCachedPartitionProcessor processor = (HudiCachedPartitionProcessor) Env.getCurrentEnv()
|
||||
.getExtMetaCacheMgr().getHudiPartitionProcess(hmsTable.getCatalog());
|
||||
boolean useHiveSyncPartition = hmsTable.useHiveSyncPartition();
|
||||
|
||||
if (tableSnapshot.isPresent()) {
|
||||
if (tableSnapshot.get().getType() == TableSnapshot.VersionType.VERSION) {
|
||||
// Hudi does not support `FOR VERSION AS OF`, please use `FOR TIME AS OF`";
|
||||
return partitionValues;
|
||||
}
|
||||
String queryInstant = tableSnapshot.get().getTime().replaceAll("[-: ]", "");
|
||||
|
||||
partitionValues =
|
||||
HiveMetaStoreClientHelper.ugiDoAs(
|
||||
HiveMetaStoreClientHelper.getConfiguration(hmsTable),
|
||||
() -> processor.getSnapshotPartitionValues(
|
||||
hmsTable, hudiClient, queryInstant, useHiveSyncPartition));
|
||||
} else {
|
||||
HoodieTimeline timeline = hudiClient.getCommitsAndCompactionTimeline().filterCompletedInstants();
|
||||
Option<HoodieInstant> snapshotInstant = timeline.lastInstant();
|
||||
if (!snapshotInstant.isPresent()) {
|
||||
return partitionValues;
|
||||
}
|
||||
partitionValues =
|
||||
HiveMetaStoreClientHelper.ugiDoAs(
|
||||
HiveMetaStoreClientHelper.getConfiguration(hmsTable),
|
||||
() -> processor.getPartitionValues(hmsTable, hudiClient, useHiveSyncPartition));
|
||||
}
|
||||
return partitionValues;
|
||||
}
|
||||
}
|
||||
|
||||
@ -21,6 +21,7 @@ import org.apache.doris.analysis.TableScanParams;
|
||||
import org.apache.doris.analysis.TableSnapshot;
|
||||
import org.apache.doris.analysis.TupleDescriptor;
|
||||
import org.apache.doris.catalog.Env;
|
||||
import org.apache.doris.catalog.ListPartitionItem;
|
||||
import org.apache.doris.catalog.PartitionItem;
|
||||
import org.apache.doris.catalog.Type;
|
||||
import org.apache.doris.common.AnalysisException;
|
||||
@ -30,12 +31,10 @@ import org.apache.doris.common.util.LocationPath;
|
||||
import org.apache.doris.datasource.ExternalTable;
|
||||
import org.apache.doris.datasource.FileSplit;
|
||||
import org.apache.doris.datasource.TableFormatType;
|
||||
import org.apache.doris.datasource.TablePartitionValues;
|
||||
import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper;
|
||||
import org.apache.doris.datasource.hive.HivePartition;
|
||||
import org.apache.doris.datasource.hive.source.HiveScanNode;
|
||||
import org.apache.doris.datasource.hudi.HudiUtils;
|
||||
import org.apache.doris.planner.ListPartitionPrunerV2;
|
||||
import org.apache.doris.planner.PlanNodeId;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
import org.apache.doris.qe.SessionVariable;
|
||||
@ -70,7 +69,6 @@ import org.apache.logging.log4j.Logger;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
@ -286,50 +284,29 @@ public class HudiScanNode extends HiveScanNode {
|
||||
return !sessionVariable.isForceJniScanner() && isCowTable;
|
||||
}
|
||||
|
||||
private List<HivePartition> getPrunedPartitions(
|
||||
HoodieTableMetaClient metaClient, Option<String> snapshotTimestamp) throws AnalysisException {
|
||||
private List<HivePartition> getPrunedPartitions(HoodieTableMetaClient metaClient) {
|
||||
List<Type> partitionColumnTypes = hmsTable.getPartitionColumnTypes();
|
||||
if (!partitionColumnTypes.isEmpty()) {
|
||||
HudiCachedPartitionProcessor processor = (HudiCachedPartitionProcessor) Env.getCurrentEnv()
|
||||
.getExtMetaCacheMgr().getHudiPartitionProcess(hmsTable.getCatalog());
|
||||
TablePartitionValues partitionValues;
|
||||
if (snapshotTimestamp.isPresent()) {
|
||||
partitionValues = processor.getSnapshotPartitionValues(
|
||||
hmsTable, metaClient, snapshotTimestamp.get(), useHiveSyncPartition);
|
||||
} else {
|
||||
partitionValues = processor.getPartitionValues(hmsTable, metaClient, useHiveSyncPartition);
|
||||
}
|
||||
if (partitionValues != null) {
|
||||
// 2. prune partitions by expr
|
||||
partitionValues.readLock().lock();
|
||||
try {
|
||||
Map<Long, PartitionItem> idToPartitionItem = partitionValues.getIdToPartitionItem();
|
||||
this.totalPartitionNum = idToPartitionItem.size();
|
||||
ListPartitionPrunerV2 pruner = new ListPartitionPrunerV2(idToPartitionItem,
|
||||
hmsTable.getPartitionColumns(), columnNameToRange,
|
||||
partitionValues.getUidToPartitionRange(),
|
||||
partitionValues.getRangeToId(),
|
||||
partitionValues.getSingleColumnRangeMap(),
|
||||
true);
|
||||
Collection<Long> filteredPartitionIds = pruner.prune();
|
||||
this.selectedPartitionNum = filteredPartitionIds.size();
|
||||
// 3. get partitions from cache
|
||||
String dbName = hmsTable.getDbName();
|
||||
String tblName = hmsTable.getName();
|
||||
String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat();
|
||||
String basePath = metaClient.getBasePathV2().toString();
|
||||
Map<Long, String> partitionIdToNameMap = partitionValues.getPartitionIdToNameMap();
|
||||
Map<Long, List<String>> partitionValuesMap = partitionValues.getPartitionValuesMap();
|
||||
return filteredPartitionIds.stream().map(id -> {
|
||||
String path = basePath + "/" + partitionIdToNameMap.get(id);
|
||||
return new HivePartition(
|
||||
dbName, tblName, false, inputFormat, path, partitionValuesMap.get(id),
|
||||
Maps.newHashMap());
|
||||
}).collect(Collectors.toList());
|
||||
} finally {
|
||||
partitionValues.readLock().unlock();
|
||||
}
|
||||
}
|
||||
this.totalPartitionNum = selectedPartitions.totalPartitionNum;
|
||||
Map<String, PartitionItem> prunedPartitions = selectedPartitions.selectedPartitions;
|
||||
this.selectedPartitionNum = prunedPartitions.size();
|
||||
|
||||
String dbName = hmsTable.getDbName();
|
||||
String tblName = hmsTable.getName();
|
||||
String inputFormat = hmsTable.getRemoteTable().getSd().getInputFormat();
|
||||
String basePath = metaClient.getBasePathV2().toString();
|
||||
|
||||
List<HivePartition> hivePartitions = Lists.newArrayList();
|
||||
prunedPartitions.forEach(
|
||||
(key, value) -> {
|
||||
String path = basePath + "/" + key;
|
||||
hivePartitions.add(new HivePartition(
|
||||
dbName, tblName, false, inputFormat, path,
|
||||
((ListPartitionItem) value).getItems().get(0).getPartitionValuesAsStringList(),
|
||||
Maps.newHashMap()));
|
||||
}
|
||||
);
|
||||
return hivePartitions;
|
||||
}
|
||||
// unpartitioned table, create a dummy partition to save location and
|
||||
// inputformat,
|
||||
@ -420,7 +397,7 @@ public class HudiScanNode extends HiveScanNode {
|
||||
if (!partitionInit) {
|
||||
prunedPartitions = HiveMetaStoreClientHelper.ugiDoAs(
|
||||
HiveMetaStoreClientHelper.getConfiguration(hmsTable),
|
||||
() -> getPrunedPartitions(hudiClient, snapshotTimestamp));
|
||||
() -> getPrunedPartitions(hudiClient));
|
||||
partitionInit = true;
|
||||
}
|
||||
List<Split> splits = Collections.synchronizedList(new ArrayList<>());
|
||||
@ -482,7 +459,7 @@ public class HudiScanNode extends HiveScanNode {
|
||||
// Non partition table will get one dummy partition
|
||||
prunedPartitions = HiveMetaStoreClientHelper.ugiDoAs(
|
||||
HiveMetaStoreClientHelper.getConfiguration(hmsTable),
|
||||
() -> getPrunedPartitions(hudiClient, snapshotTimestamp));
|
||||
() -> getPrunedPartitions(hudiClient));
|
||||
partitionInit = true;
|
||||
}
|
||||
int numPartitions = ConnectContext.get().getSessionVariable().getNumPartitionsInBatchMode();
|
||||
|
||||
@ -680,6 +680,8 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor<PlanFragment, Pla
|
||||
if (fileScan.getTableSnapshot().isPresent()) {
|
||||
((FileQueryScanNode) scanNode).setQueryTableSnapshot(fileScan.getTableSnapshot().get());
|
||||
}
|
||||
HudiScanNode hudiScanNode = (HudiScanNode) scanNode;
|
||||
hudiScanNode.setSelectedPartitions(fileScan.getSelectedPartitions());
|
||||
return getPlanFragmentForPhysicalFileScan(fileScan, context, scanNode, table, tupleDescriptor);
|
||||
}
|
||||
|
||||
|
||||
@ -84,7 +84,7 @@ public class LogicalHudiScan extends LogicalFileScan {
|
||||
public LogicalHudiScan(RelationId id, ExternalTable table, List<String> qualifier,
|
||||
Optional<TableSample> tableSample, Optional<TableSnapshot> tableSnapshot) {
|
||||
this(id, table, qualifier, Optional.empty(), Optional.empty(),
|
||||
SelectedPartitions.NOT_PRUNED, tableSample, tableSnapshot,
|
||||
((HMSExternalTable) table).initHudiSelectedPartitions(tableSnapshot), tableSample, tableSnapshot,
|
||||
Optional.empty(), Optional.empty());
|
||||
}
|
||||
|
||||
|
||||
@ -0,0 +1,357 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !one_partition_1_1 --
|
||||
1 Alice 2024
|
||||
2 Bob 2024
|
||||
3 Charlie 2024
|
||||
|
||||
-- !one_partition_2_1 --
|
||||
4 David 2025
|
||||
5 Eva 2025
|
||||
|
||||
-- !one_partition_3_all --
|
||||
1 Alice 2024
|
||||
2 Bob 2024
|
||||
3 Charlie 2024
|
||||
4 David 2025
|
||||
5 Eva 2025
|
||||
|
||||
-- !one_partition_4_all --
|
||||
5 Eva 2025
|
||||
|
||||
-- !one_partition_5_1 --
|
||||
3 Charlie 2024
|
||||
|
||||
-- !two_partition_1_1 --
|
||||
1 Alice US 1
|
||||
2 Bob US 1
|
||||
3 Charlie US 1
|
||||
|
||||
-- !two_partition_2_1 --
|
||||
8 Hannah EU 2
|
||||
9 Ivy EU 2
|
||||
10 Jack EU 2
|
||||
|
||||
-- !two_partition_3_2 --
|
||||
1 Alice US 1
|
||||
2 Bob US 1
|
||||
3 Charlie US 1
|
||||
4 David US 2
|
||||
5 Eva US 2
|
||||
|
||||
-- !two_partition_4_all --
|
||||
1 Alice US 1
|
||||
2 Bob US 1
|
||||
3 Charlie US 1
|
||||
4 David US 2
|
||||
5 Eva US 2
|
||||
6 Frank EU 1
|
||||
7 Grace EU 1
|
||||
8 Hannah EU 2
|
||||
9 Ivy EU 2
|
||||
10 Jack EU 2
|
||||
|
||||
-- !two_partition_5_1 --
|
||||
|
||||
-- !two_partition_6_1 --
|
||||
8 Hannah EU 2
|
||||
9 Ivy EU 2
|
||||
10 Jack EU 2
|
||||
|
||||
-- !three_partition_1_1 --
|
||||
1 Alice US 2024 Q1
|
||||
2 Bob US 2024 Q1
|
||||
3 Charlie US 2024 Q1
|
||||
|
||||
-- !three_partition_2_1 --
|
||||
10 Jack EU 2025 Q2
|
||||
11 Leo EU 2025 Q2
|
||||
|
||||
-- !three_partition_3_3 --
|
||||
13 Nina AS 2025 Q1
|
||||
14 Oscar AS 2025 Q2
|
||||
15 Paul AS 2025 Q3
|
||||
|
||||
-- !three_partition_4_2 --
|
||||
1 Alice US 2024 Q1
|
||||
2 Bob US 2024 Q1
|
||||
3 Charlie US 2024 Q1
|
||||
6 Frank US 2025 Q1
|
||||
|
||||
-- !three_partition_5_all --
|
||||
1 Alice US 2024 Q1
|
||||
2 Bob US 2024 Q1
|
||||
3 Charlie US 2024 Q1
|
||||
4 David US 2024 Q2
|
||||
5 Eva US 2024 Q2
|
||||
6 Frank US 2025 Q1
|
||||
7 Grace US 2025 Q2
|
||||
8 Hannah EU 2024 Q1
|
||||
9 Ivy EU 2024 Q1
|
||||
10 Jack EU 2025 Q2
|
||||
11 Leo EU 2025 Q2
|
||||
12 Mia EU 2025 Q3
|
||||
13 Nina AS 2025 Q1
|
||||
14 Oscar AS 2025 Q2
|
||||
15 Paul AS 2025 Q3
|
||||
|
||||
-- !three_partition_6_1 --
|
||||
8 Hannah EU 2024 Q1
|
||||
9 Ivy EU 2024 Q1
|
||||
|
||||
-- !three_partition_7_7 --
|
||||
6 Frank US 2025 Q1
|
||||
7 Grace US 2025 Q2
|
||||
10 Jack EU 2025 Q2
|
||||
11 Leo EU 2025 Q2
|
||||
12 Mia EU 2025 Q3
|
||||
13 Nina AS 2025 Q1
|
||||
14 Oscar AS 2025 Q2
|
||||
15 Paul AS 2025 Q3
|
||||
|
||||
-- !three_partition_8_2 --
|
||||
7 Grace US 2025 Q2
|
||||
|
||||
-- !one_partition_6_0 --
|
||||
|
||||
-- !two_partition_7_0 --
|
||||
|
||||
-- !two_partition_8_0 --
|
||||
|
||||
-- !three_partition_9_0 --
|
||||
|
||||
-- !three_partition_10_0 --
|
||||
|
||||
-- !three_partition_11_0 --
|
||||
|
||||
-- !time_travel_two_partition_1_3 --
|
||||
1 Alice US 1
|
||||
2 Bob US 1
|
||||
3 Charlie US 1
|
||||
4 David US 2
|
||||
5 Eva US 2
|
||||
6 Frank EU 1
|
||||
|
||||
-- !time_travel_two_partition_2_2 --
|
||||
1 Alice US 1
|
||||
2 Bob US 1
|
||||
3 Charlie US 1
|
||||
4 David US 2
|
||||
5 Eva US 2
|
||||
|
||||
-- !time_travel_two_partition_3_1 --
|
||||
4 David US 2
|
||||
5 Eva US 2
|
||||
|
||||
-- !time_travel_two_partition_4_0 --
|
||||
|
||||
-- !time_travel_two_partition_5_0 --
|
||||
|
||||
-- !time_travel_two_partition_6_1 --
|
||||
1 Alice US 1
|
||||
|
||||
-- !one_partition_boolean --
|
||||
1 Alice true
|
||||
2 Bob true
|
||||
|
||||
-- !one_partition_tinyint --
|
||||
1 Alice 1
|
||||
2 Bob 1
|
||||
|
||||
-- !one_partition_smallint --
|
||||
1 Alice 10
|
||||
2 Bob 10
|
||||
|
||||
-- !one_partition_int --
|
||||
1 Alice 100
|
||||
2 Bob 100
|
||||
|
||||
-- !one_partition_bigint --
|
||||
1 Alice 1234567890
|
||||
2 Bob 1234567890
|
||||
|
||||
-- !one_partition_string --
|
||||
1 Alice RegionA
|
||||
2 Bob RegionA
|
||||
|
||||
-- !one_partition_date --
|
||||
1 Alice 2023-12-01
|
||||
2 Bob 2023-12-01
|
||||
|
||||
-- !one_partition_1_1 --
|
||||
1 Alice 2024
|
||||
2 Bob 2024
|
||||
3 Charlie 2024
|
||||
|
||||
-- !one_partition_2_1 --
|
||||
4 David 2025
|
||||
5 Eva 2025
|
||||
|
||||
-- !one_partition_3_all --
|
||||
1 Alice 2024
|
||||
2 Bob 2024
|
||||
3 Charlie 2024
|
||||
4 David 2025
|
||||
5 Eva 2025
|
||||
|
||||
-- !one_partition_4_all --
|
||||
5 Eva 2025
|
||||
|
||||
-- !one_partition_5_1 --
|
||||
3 Charlie 2024
|
||||
|
||||
-- !two_partition_1_1 --
|
||||
1 Alice US 1
|
||||
2 Bob US 1
|
||||
3 Charlie US 1
|
||||
|
||||
-- !two_partition_2_1 --
|
||||
8 Hannah EU 2
|
||||
9 Ivy EU 2
|
||||
10 Jack EU 2
|
||||
|
||||
-- !two_partition_3_2 --
|
||||
1 Alice US 1
|
||||
2 Bob US 1
|
||||
3 Charlie US 1
|
||||
4 David US 2
|
||||
5 Eva US 2
|
||||
|
||||
-- !two_partition_4_all --
|
||||
1 Alice US 1
|
||||
2 Bob US 1
|
||||
3 Charlie US 1
|
||||
4 David US 2
|
||||
5 Eva US 2
|
||||
6 Frank EU 1
|
||||
7 Grace EU 1
|
||||
8 Hannah EU 2
|
||||
9 Ivy EU 2
|
||||
10 Jack EU 2
|
||||
|
||||
-- !two_partition_5_1 --
|
||||
|
||||
-- !two_partition_6_1 --
|
||||
8 Hannah EU 2
|
||||
9 Ivy EU 2
|
||||
10 Jack EU 2
|
||||
|
||||
-- !three_partition_1_1 --
|
||||
1 Alice US 2024 Q1
|
||||
2 Bob US 2024 Q1
|
||||
3 Charlie US 2024 Q1
|
||||
|
||||
-- !three_partition_2_1 --
|
||||
10 Jack EU 2025 Q2
|
||||
11 Leo EU 2025 Q2
|
||||
|
||||
-- !three_partition_3_3 --
|
||||
13 Nina AS 2025 Q1
|
||||
14 Oscar AS 2025 Q2
|
||||
15 Paul AS 2025 Q3
|
||||
|
||||
-- !three_partition_4_2 --
|
||||
1 Alice US 2024 Q1
|
||||
2 Bob US 2024 Q1
|
||||
3 Charlie US 2024 Q1
|
||||
6 Frank US 2025 Q1
|
||||
|
||||
-- !three_partition_5_all --
|
||||
1 Alice US 2024 Q1
|
||||
2 Bob US 2024 Q1
|
||||
3 Charlie US 2024 Q1
|
||||
4 David US 2024 Q2
|
||||
5 Eva US 2024 Q2
|
||||
6 Frank US 2025 Q1
|
||||
7 Grace US 2025 Q2
|
||||
8 Hannah EU 2024 Q1
|
||||
9 Ivy EU 2024 Q1
|
||||
10 Jack EU 2025 Q2
|
||||
11 Leo EU 2025 Q2
|
||||
12 Mia EU 2025 Q3
|
||||
13 Nina AS 2025 Q1
|
||||
14 Oscar AS 2025 Q2
|
||||
15 Paul AS 2025 Q3
|
||||
|
||||
-- !three_partition_6_1 --
|
||||
8 Hannah EU 2024 Q1
|
||||
9 Ivy EU 2024 Q1
|
||||
|
||||
-- !three_partition_7_7 --
|
||||
6 Frank US 2025 Q1
|
||||
7 Grace US 2025 Q2
|
||||
10 Jack EU 2025 Q2
|
||||
11 Leo EU 2025 Q2
|
||||
12 Mia EU 2025 Q3
|
||||
13 Nina AS 2025 Q1
|
||||
14 Oscar AS 2025 Q2
|
||||
15 Paul AS 2025 Q3
|
||||
|
||||
-- !three_partition_8_2 --
|
||||
7 Grace US 2025 Q2
|
||||
|
||||
-- !one_partition_6_0 --
|
||||
|
||||
-- !two_partition_7_0 --
|
||||
|
||||
-- !two_partition_8_0 --
|
||||
|
||||
-- !three_partition_9_0 --
|
||||
|
||||
-- !three_partition_10_0 --
|
||||
|
||||
-- !three_partition_11_0 --
|
||||
|
||||
-- !time_travel_two_partition_1_3 --
|
||||
1 Alice US 1
|
||||
2 Bob US 1
|
||||
3 Charlie US 1
|
||||
4 David US 2
|
||||
5 Eva US 2
|
||||
6 Frank EU 1
|
||||
|
||||
-- !time_travel_two_partition_2_2 --
|
||||
1 Alice US 1
|
||||
2 Bob US 1
|
||||
3 Charlie US 1
|
||||
4 David US 2
|
||||
5 Eva US 2
|
||||
|
||||
-- !time_travel_two_partition_3_1 --
|
||||
4 David US 2
|
||||
5 Eva US 2
|
||||
|
||||
-- !time_travel_two_partition_4_0 --
|
||||
|
||||
-- !time_travel_two_partition_5_0 --
|
||||
|
||||
-- !time_travel_two_partition_6_1 --
|
||||
1 Alice US 1
|
||||
|
||||
-- !one_partition_boolean --
|
||||
1 Alice true
|
||||
2 Bob true
|
||||
|
||||
-- !one_partition_tinyint --
|
||||
1 Alice 1
|
||||
2 Bob 1
|
||||
|
||||
-- !one_partition_smallint --
|
||||
1 Alice 10
|
||||
2 Bob 10
|
||||
|
||||
-- !one_partition_int --
|
||||
1 Alice 100
|
||||
2 Bob 100
|
||||
|
||||
-- !one_partition_bigint --
|
||||
1 Alice 1234567890
|
||||
2 Bob 1234567890
|
||||
|
||||
-- !one_partition_string --
|
||||
1 Alice RegionA
|
||||
2 Bob RegionA
|
||||
|
||||
-- !one_partition_date --
|
||||
1 Alice 2023-12-01
|
||||
2 Bob 2023-12-01
|
||||
|
||||
@ -0,0 +1,333 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_hudi_partition_prune", "p2,external,hudi,external_remote,external_remote_hudi") {
|
||||
String enabled = context.config.otherConfigs.get("enableExternalHudiTest")
|
||||
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
|
||||
logger.info("disable hudi test")
|
||||
}
|
||||
|
||||
String catalog_name = "test_hudi_partition_prune"
|
||||
String props = context.config.otherConfigs.get("hudiEmrCatalog")
|
||||
sql """drop catalog if exists ${catalog_name};"""
|
||||
|
||||
for (String use_hive_sync_partition : ['true','false']) {
|
||||
|
||||
sql """
|
||||
create catalog if not exists ${catalog_name} properties (
|
||||
${props}
|
||||
,"use_hive_sync_partition"="${use_hive_sync_partition}"
|
||||
);
|
||||
"""
|
||||
|
||||
sql """ switch ${catalog_name};"""
|
||||
sql """ use regression_hudi;"""
|
||||
sql """ set enable_fallback_to_original_planner=false """
|
||||
|
||||
|
||||
|
||||
def one_partition_1_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2024 ORDER BY id;"""
|
||||
def one_partition_2_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2025 ORDER BY id;"""
|
||||
def one_partition_3_all = """SELECT id,name,part1 FROM one_partition_tb ORDER BY id;"""
|
||||
def one_partition_4_all = """SELECT id,name,part1 FROM one_partition_tb WHERE id = 5 ORDER BY id;"""
|
||||
def one_partition_5_1 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2024 AND id >= 3 ORDER BY id;"""
|
||||
|
||||
def two_partition_1_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 1 ORDER BY id;"""
|
||||
def two_partition_2_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;"""
|
||||
def two_partition_3_2 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' ORDER BY id;"""
|
||||
def two_partition_4_all = """SELECT id,name,part1,part2 FROM two_partition_tb ORDER BY id;"""
|
||||
def two_partition_5_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 2 AND id > 5 ORDER BY id;"""
|
||||
def two_partition_6_1 = """SELECT id,name,part1,part2 FROM two_partition_tb WHERE part1 = 'EU' AND part2 = 2 ORDER BY id;"""
|
||||
|
||||
def three_partition_1_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;"""
|
||||
def three_partition_2_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2025 AND part3 = 'Q2' ORDER BY id;"""
|
||||
def three_partition_3_3 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 ORDER BY id;"""
|
||||
def three_partition_4_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q1' ORDER BY id;"""
|
||||
def three_partition_5_all = """SELECT id,name,part1,part2,part3 FROM three_partition_tb ORDER BY id;"""
|
||||
def three_partition_6_1 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q1' ORDER BY id;"""
|
||||
def three_partition_7_7 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part2 = 2025 ORDER BY id;"""
|
||||
def three_partition_8_2 = """SELECT id,name,part1,part2,part3 FROM three_partition_tb WHERE part1 = 'US' AND part3 = 'Q2' AND id BETWEEN 6 AND 10 ORDER BY id;"""
|
||||
|
||||
def one_partition_boolean = """SELECT id,name,part1 FROM boolean_partition_tb WHERE part1 = true ORDER BY id;"""
|
||||
def one_partition_tinyint = """SELECT id,name,part1 FROM tinyint_partition_tb WHERE part1 = 1 ORDER BY id;"""
|
||||
def one_partition_smallint = """SELECT id,name,part1 FROM smallint_partition_tb WHERE part1 = 10 ORDER BY id;"""
|
||||
def one_partition_int = """SELECT id,name,part1 FROM int_partition_tb WHERE part1 = 100 ORDER BY id;"""
|
||||
def one_partition_bigint = """SELECT id,name,part1 FROM bigint_partition_tb WHERE part1 = 1234567890 ORDER BY id;"""
|
||||
def one_partition_string = """SELECT id,name,part1 FROM string_partition_tb WHERE part1 = 'RegionA' ORDER BY id;"""
|
||||
def one_partition_date = """SELECT id,name,part1 FROM date_partition_tb WHERE part1 = '2023-12-01' ORDER BY id;"""
|
||||
def one_partition_timestamp = """SELECT id,name,part1 FROM timestamp_partition_tb WHERE part1 = '2023-12-01 08:00:00' ORDER BY id;"""
|
||||
|
||||
|
||||
|
||||
qt_one_partition_1_1 one_partition_1_1
|
||||
explain {
|
||||
sql("${one_partition_1_1}")
|
||||
contains "partition=1/2"
|
||||
}
|
||||
|
||||
qt_one_partition_2_1 one_partition_2_1
|
||||
explain {
|
||||
sql("${one_partition_2_1}")
|
||||
contains "partition=1/2"
|
||||
}
|
||||
|
||||
qt_one_partition_3_all one_partition_3_all
|
||||
explain {
|
||||
sql("${one_partition_3_all}")
|
||||
contains "partition=2/2"
|
||||
}
|
||||
|
||||
qt_one_partition_4_all one_partition_4_all
|
||||
explain {
|
||||
sql("${one_partition_4_all}")
|
||||
contains "partition=2/2"
|
||||
}
|
||||
|
||||
qt_one_partition_5_1 one_partition_5_1
|
||||
explain {
|
||||
sql("${one_partition_5_1}")
|
||||
contains "partition=1/2"
|
||||
}
|
||||
|
||||
|
||||
qt_two_partition_1_1 two_partition_1_1
|
||||
explain {
|
||||
sql("${two_partition_1_1}")
|
||||
contains "partition=1/4"
|
||||
}
|
||||
|
||||
qt_two_partition_2_1 two_partition_2_1
|
||||
explain {
|
||||
sql("${two_partition_2_1}")
|
||||
contains "partition=1/4"
|
||||
}
|
||||
|
||||
qt_two_partition_3_2 two_partition_3_2
|
||||
explain {
|
||||
sql("${two_partition_3_2}")
|
||||
contains "partition=2/4"
|
||||
}
|
||||
|
||||
qt_two_partition_4_all two_partition_4_all
|
||||
explain {
|
||||
sql("${two_partition_4_all}")
|
||||
contains "partition=4/4"
|
||||
}
|
||||
|
||||
qt_two_partition_5_1 two_partition_5_1
|
||||
explain {
|
||||
sql("${two_partition_5_1}")
|
||||
contains "partition=1/4"
|
||||
}
|
||||
|
||||
qt_two_partition_6_1 two_partition_6_1
|
||||
explain {
|
||||
sql("${two_partition_6_1}")
|
||||
contains "partition=1/4"
|
||||
}
|
||||
|
||||
|
||||
|
||||
qt_three_partition_1_1 three_partition_1_1
|
||||
explain {
|
||||
sql("${three_partition_1_1}")
|
||||
contains "partition=1/10"
|
||||
}
|
||||
|
||||
qt_three_partition_2_1 three_partition_2_1
|
||||
explain {
|
||||
sql("${three_partition_2_1}")
|
||||
contains "partition=1/10"
|
||||
}
|
||||
|
||||
qt_three_partition_3_3 three_partition_3_3
|
||||
explain {
|
||||
sql("${three_partition_3_3}")
|
||||
contains "partition=3/10"
|
||||
}
|
||||
|
||||
qt_three_partition_4_2 three_partition_4_2
|
||||
explain {
|
||||
sql("${three_partition_4_2}")
|
||||
contains "partition=2/10"
|
||||
}
|
||||
|
||||
qt_three_partition_5_all three_partition_5_all
|
||||
explain {
|
||||
sql("${three_partition_5_all}")
|
||||
contains "partition=10/10"
|
||||
}
|
||||
|
||||
qt_three_partition_6_1 three_partition_6_1
|
||||
explain {
|
||||
sql("${three_partition_6_1}")
|
||||
contains "partition=1/10"
|
||||
}
|
||||
|
||||
qt_three_partition_7_7 three_partition_7_7
|
||||
explain {
|
||||
sql("${three_partition_7_7}")
|
||||
contains "partition=7/10"
|
||||
}
|
||||
|
||||
qt_three_partition_8_2 three_partition_8_2
|
||||
explain {
|
||||
sql("${three_partition_8_2}")
|
||||
contains "partition=2/10"
|
||||
}
|
||||
|
||||
|
||||
// 0 partitions
|
||||
def one_partition_6_0 = """SELECT id,name,part1 FROM one_partition_tb WHERE part1 = 2023 ORDER BY id;"""
|
||||
qt_one_partition_6_0 one_partition_6_0
|
||||
explain {
|
||||
sql("${one_partition_6_0}")
|
||||
contains "partition=0/2"
|
||||
}
|
||||
|
||||
def two_partition_7_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'CN' AND part2 = 1 ORDER BY id;"""
|
||||
qt_two_partition_7_0 two_partition_7_0
|
||||
explain {
|
||||
sql("${two_partition_7_0}")
|
||||
contains "partition=0/4"
|
||||
}
|
||||
|
||||
def two_partition_8_0 = """SELECT id,name,part1 FROM two_partition_tb WHERE part1 = 'US' AND part2 = 3 ORDER BY id;"""
|
||||
qt_two_partition_8_0 two_partition_8_0
|
||||
explain {
|
||||
sql("${two_partition_8_0}")
|
||||
contains "partition=0/4"
|
||||
}
|
||||
|
||||
def three_partition_9_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'US' AND part2 = 2023 AND part3 = 'Q1' ORDER BY id;"""
|
||||
qt_three_partition_9_0 three_partition_9_0
|
||||
explain {
|
||||
sql("${three_partition_9_0}")
|
||||
contains "partition=0/10"
|
||||
}
|
||||
|
||||
def three_partition_10_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'EU' AND part2 = 2024 AND part3 = 'Q4' ORDER BY id;"""
|
||||
qt_three_partition_10_0 three_partition_10_0
|
||||
explain {
|
||||
sql("${three_partition_10_0}")
|
||||
contains "partition=0/10"
|
||||
}
|
||||
|
||||
def three_partition_11_0 = """SELECT id,name,part1 FROM three_partition_tb WHERE part1 = 'AS' AND part2 = 2025 AND part3 = 'Q4' ORDER BY id;"""
|
||||
qt_three_partition_11_0 three_partition_11_0
|
||||
explain {
|
||||
sql("${three_partition_11_0}")
|
||||
contains "partition=0/10"
|
||||
}
|
||||
|
||||
|
||||
//time travel
|
||||
def time_travel_two_partition_1_3 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241202171226401' order by id;"
|
||||
def time_travel_two_partition_2_2 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241202171226401' where part1='US' order by id;"
|
||||
def time_travel_two_partition_3_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241202171226401' where part2=2 order by id;"
|
||||
def time_travel_two_partition_4_0 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241202171226401' where part2=10 order by id;"
|
||||
|
||||
qt_time_travel_two_partition_1_3 time_travel_two_partition_1_3
|
||||
explain {
|
||||
sql("${time_travel_two_partition_1_3}")
|
||||
contains "partition=3/3"
|
||||
}
|
||||
|
||||
|
||||
qt_time_travel_two_partition_2_2 time_travel_two_partition_2_2
|
||||
explain {
|
||||
sql("${time_travel_two_partition_2_2}")
|
||||
contains "partition=2/3"
|
||||
}
|
||||
|
||||
qt_time_travel_two_partition_3_1 time_travel_two_partition_3_1
|
||||
explain {
|
||||
sql("${time_travel_two_partition_3_1}")
|
||||
contains "partition=1/3"
|
||||
}
|
||||
|
||||
qt_time_travel_two_partition_4_0 time_travel_two_partition_4_0
|
||||
explain {
|
||||
sql("${time_travel_two_partition_4_0}")
|
||||
contains "partition=0/3"
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def time_travel_two_partition_5_0 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20231126012025218' order by id;"
|
||||
qt_time_travel_two_partition_5_0 time_travel_two_partition_5_0
|
||||
explain {
|
||||
sql("${time_travel_two_partition_5_0}")
|
||||
contains "partition=0/0"
|
||||
}
|
||||
|
||||
def time_travel_two_partition_6_1 = "select id,name,part1,part2 from two_partition_tb FOR TIME AS OF '20241202171214902' order by id;"
|
||||
qt_time_travel_two_partition_6_1 time_travel_two_partition_6_1
|
||||
explain {
|
||||
sql("${time_travel_two_partition_6_1}")
|
||||
contains "partition=1/1"
|
||||
}
|
||||
|
||||
// all types as partition
|
||||
qt_one_partition_boolean one_partition_boolean
|
||||
explain {
|
||||
sql("${one_partition_boolean}")
|
||||
contains "partition=1/2"
|
||||
}
|
||||
qt_one_partition_tinyint one_partition_tinyint
|
||||
explain {
|
||||
sql("${one_partition_tinyint}")
|
||||
contains "partition=1/2"
|
||||
}
|
||||
qt_one_partition_smallint one_partition_smallint
|
||||
explain {
|
||||
sql("${one_partition_smallint}")
|
||||
contains "partition=1/2"
|
||||
}
|
||||
qt_one_partition_int one_partition_int
|
||||
explain {
|
||||
sql("${one_partition_int}")
|
||||
contains "partition=1/2"
|
||||
}
|
||||
qt_one_partition_bigint one_partition_bigint
|
||||
explain {
|
||||
sql("${one_partition_bigint}")
|
||||
contains "partition=1/2"
|
||||
}
|
||||
qt_one_partition_string one_partition_string
|
||||
explain {
|
||||
sql("${one_partition_string}")
|
||||
contains "partition=1/2"
|
||||
}
|
||||
qt_one_partition_date one_partition_date
|
||||
explain {
|
||||
sql("${one_partition_date}")
|
||||
contains "partition=1/2"
|
||||
}
|
||||
// qt_one_partition_timestamp one_partition_timestamp
|
||||
// explain {
|
||||
// sql("${one_partition_timestamp}")
|
||||
// contains "partition=1/2"
|
||||
// }
|
||||
|
||||
sql """drop catalog if exists ${catalog_name};"""
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user