[Fix](multi catalog)Fix hive partition contains special character bug (#22541)

Hive partition path may contain special characters, need to encode it before creating a URI object based on the file path.
This commit is contained in:
Jibing-Li
2023-08-03 23:53:25 +08:00
committed by GitHub
parent 3447a70b25
commit 479e62de0f

View File

@ -39,8 +39,12 @@ import software.amazon.awssdk.regions.Region;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.S3Configuration;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.util.Map;
@ -85,15 +89,21 @@ public class S3Util {
return normalizedHdfsPath(location, props);
}
return location;
} catch (URISyntaxException e) {
} catch (URISyntaxException | UnsupportedEncodingException e) {
throw new RuntimeException(e.getMessage(), e);
}
}
private static String normalizedHdfsPath(String location, Map<String, String> props) throws URISyntaxException {
private static String normalizedHdfsPath(String location, Map<String, String> props)
throws URISyntaxException, UnsupportedEncodingException {
// Hive partition may contain special characters such as ' ', '<', '>' and so on.
// Need to encode these characters before creating URI.
// But doesn't encode '/' and ':' so that we can get the correct uri host.
location = URLEncoder.encode(location, StandardCharsets.UTF_8.name()).replace("%2F", "/").replace("%3A", ":");
URI normalizedUri = new URI(location);
// compatible with 'hdfs:///' or 'hdfs:/'
if (StringUtils.isEmpty(normalizedUri.getHost())) {
location = URLDecoder.decode(location, StandardCharsets.UTF_8.name());
String normalizedPrefix = HdfsResource.HDFS_PREFIX + "//";
String brokenPrefix = HdfsResource.HDFS_PREFIX + "/";
if (location.startsWith(brokenPrefix) && !location.startsWith(normalizedPrefix)) {
@ -116,7 +126,7 @@ public class S3Util {
}
}
}
return location;
return URLDecoder.decode(location, StandardCharsets.UTF_8.name());
}
/**