branch-2.1: [fix](hive) ignore all hidden dir and files #52286 (#52324)

Cherry-picked from #52286

Co-authored-by: Mingyu Chen (Rayner) <morningman@163.com>
This commit is contained in:
github-actions[bot]
2025-06-28 14:23:27 +08:00
committed by GitHub
parent e923acef1b
commit 8480691871
2 changed files with 9 additions and 16 deletions

View File

@ -1048,34 +1048,24 @@ public class HiveMetaStoreCache {
return false;
}
String pathStr = path.toUri().toString();
if (containsHiddenPath(pathStr) || path.getName().startsWith("_")) {
if (containsHiddenPath(pathStr)) {
return false;
}
for (String name : pathStr.split("/")) {
if (isGeneratedPath(name)) {
return false;
}
}
return true;
}
private static boolean containsHiddenPath(String path) {
if (path.startsWith(".")) {
// Hive ignores files starting with _ and .
if (path.startsWith(".") || path.startsWith("_")) {
return true;
}
for (int i = 0; i < path.length() - 1; i++) {
if (path.charAt(i) == '/' && path.charAt(i + 1) == '.') {
if (path.charAt(i) == '/' && (path.charAt(i + 1) == '.' || path.charAt(i + 1) == '_')) {
return true;
}
}
return false;
}
private static boolean isGeneratedPath(String name) {
return "_temporary".equals(name) // generated by spark
|| "_imapala_insert_staging".equals(name) // generated by impala
|| name.startsWith("."); // generated by hive or hidden file
}
}
@Data

View File

@ -31,17 +31,20 @@ public class PathVisibleTest {
Assert.assertFalse(FileCacheValue.isFileVisible(new Path("/visible/.hidden/path")));
Assert.assertFalse(FileCacheValue.isFileVisible(new Path("hdfs://visible/path/.file")));
Assert.assertFalse(FileCacheValue.isFileVisible(new Path("/visible/path/_temporary_xx")));
Assert.assertFalse(FileCacheValue.isFileVisible(new Path("/visible/path/_imapala_insert_staging")));
Assert.assertFalse(FileCacheValue.isFileVisible(new Path("/visible/path/_impala_insert_staging")));
Assert.assertFalse(FileCacheValue.isFileVisible(new Path("/visible//.hidden/path")));
Assert.assertFalse(FileCacheValue.isFileVisible(new Path("s3://visible/.hidden/path")));
Assert.assertFalse(FileCacheValue.isFileVisible(new Path("///visible/path/.file")));
Assert.assertFalse(FileCacheValue.isFileVisible(new Path("/visible/path///_temporary_xx")));
Assert.assertFalse(FileCacheValue.isFileVisible(new Path("hdfs://visible//path/_imapala_insert_staging")));
Assert.assertFalse(FileCacheValue.isFileVisible(new Path("hdfs://visible//path/_impala_insert_staging")));
Assert.assertFalse(FileCacheValue.isFileVisible(
new Path("hdfs://hacluster/user/hive/warehouse/db1.db/tbl1/_spark_metadata/")));
Assert.assertTrue(FileCacheValue.isFileVisible(new Path("s3://visible/path")));
Assert.assertTrue(FileCacheValue.isFileVisible(new Path("path")));
Assert.assertTrue(FileCacheValue.isFileVisible(new Path("hdfs://visible/path./1.txt")));
Assert.assertTrue(FileCacheValue.isFileVisible(new Path("/1.txt")));
Assert.assertTrue(FileCacheValue.isFileVisible(new Path("hdfs://vis_ible_/pa.th./1_.txt__")));
}
}