From 34b7f381b1ca06247c6bc555a291762362c00aea Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Fri, 4 Aug 2023 14:14:53 +0800 Subject: [PATCH] [fix](multi catalog)Filter .hive-staging dir under hive file path. #22574 Hive file path may contain temporary directory like this: drwxrwxrwx - root supergroup 0 2023-03-22 21:03 /usr/hive/warehouse/datalake_performance.db/clickbench_parquet_hits/.hive-staging_hive_2023-03-22_21-03-12_047_8461238469577574033-1 drwxrwxrwx - root supergroup 0 2023-05-18 15:03 /usr/hive/warehouse/datalake_performance.db/clickbench_parquet_hits/.hive-staging_hive_2023-05-18_15-03-52_780_3065787006787646235-1 This will cause error when be try to read these files. Need to filter them during FE plan. --- .../datasource/hive/HiveMetaStoreCache.java | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index e1fa35d07e..d3e4750c3b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -968,7 +968,7 @@ public class HiveMetaStoreCache { private AcidInfo acidInfo; public void addFile(RemoteFile file) { - if (isFileVisible(file.getName())) { + if (isFileVisible(file.getPath())) { HiveFileStatus status = new HiveFileStatus(); status.setBlockLocations(file.getBlockLocations()); status.setPath(file.getPath()); @@ -980,7 +980,7 @@ public class HiveMetaStoreCache { } public void addSplit(FileSplit split) { - if (isFileVisible(split.getPath().getName())) { + if (isFileVisible(split.getPath())) { splits.add(split); } } @@ -998,10 +998,19 @@ public class HiveMetaStoreCache { this.acidInfo = acidInfo; } - private boolean isFileVisible(String filename) { - return StringUtils.isNotEmpty(filename) - && !filename.startsWith(".") - && !filename.startsWith("_"); + private boolean isFileVisible(Path path) { + if (path == null || StringUtils.isEmpty(path.toString())) { + return false; + } + if (path.getName().startsWith(".") || path.getName().startsWith("_")) { + return false; + } + for (String name : path.toString().split("/")) { + if (name.startsWith(".hive-staging")) { + return false; + } + } + return true; } }