[opt](hive) skip hidden file and dir (#32412)

When query hive table, we should skip all hidden dirs and files, like:
```
/visible/.hidden/path
/visible/.hidden.txt
```
This commit is contained in:
Mingyu Chen
2024-03-19 14:26:10 +08:00
committed by yiguolei
parent a5f3611b88
commit 73de61ed84
4 changed files with 68 additions and 5 deletions

View File

@ -50,6 +50,7 @@ import org.apache.doris.planner.ColumnBound;
import org.apache.doris.planner.ListPartitionPrunerV2;
import org.apache.doris.planner.PartitionPrunerV2Base.UniqueId;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.cache.CacheBuilder;
@ -63,7 +64,6 @@ import com.google.common.collect.RangeMap;
import com.google.common.collect.Streams;
import com.google.common.collect.TreeRangeMap;
import lombok.Data;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
@ -1035,14 +1035,16 @@ public class HiveMetaStoreCache {
this.acidInfo = acidInfo;
}
private boolean isFileVisible(Path path) {
if (path == null || StringUtils.isEmpty(path.toString())) {
@VisibleForTesting
public static boolean isFileVisible(Path path) {
if (path == null) {
return false;
}
if (path.getName().startsWith(".") || path.getName().startsWith("_")) {
String pathStr = path.toUri().toString();
if (containsHiddenPath(pathStr) || path.getName().startsWith("_")) {
return false;
}
for (String name : path.toString().split("/")) {
for (String name : pathStr.split("/")) {
if (isGeneratedPath(name)) {
return false;
}
@ -1050,6 +1052,18 @@ public class HiveMetaStoreCache {
return true;
}
private static boolean containsHiddenPath(String path) {
if (path.startsWith(".")) {
return true;
}
for (int i = 0; i < path.length() - 1; i++) {
if (path.charAt(i) == '/' && path.charAt(i + 1) == '.') {
return true;
}
}
return false;
}
private static boolean isGeneratedPath(String name) {
return "_temporary".equals(name) // generated by spark
|| "_imapala_insert_staging".equals(name) // generated by impala