[fix](tvf) Support fs.defaultFS with postfix '/' (#33202)

For HDFS tvf like:
```
select count(*) from hdfs(
"uri" = "hdfs://HDFS8000871/path/to/1.parquet",
"fs.defaultFS" = "hdfs://HDFS8000871/",
"format" = "parquet"
);
```

Before, if the `fs.defaultFS` is end with `/`, the query will fail with error like:
```
reason: RemoteException: File does not exist: /user/doris/path/to/1.parquet
```
You can see that is a wrong path with wrong prefix `/user/doris`
User need to set `fs.defaultFS` to `hdfs://HDFS8000871` to avoid this error.

This PR fix this issue
This commit is contained in:
Mingyu Chen
2024-04-03 16:26:13 +08:00
committed by morningman
parent 466972926e
commit 586df24b9d
2 changed files with 8 additions and 1 deletions

View File

@ -45,6 +45,11 @@ Path convert_path(const Path& path, const std::string& namenode) {
Path real_path(path);
if (path.string().find(namenode) != std::string::npos) {
std::string real_path_str = path.string().substr(namenode.size());
if (!real_path_str.starts_with("/")) {
// The real path must starts with "/"
// Or the hadoop client will add a prefix like "/user/hadoop".
real_path_str = "/" + real_path_str;
}
real_path = real_path_str;
}
return real_path;

View File

@ -41,6 +41,7 @@ suite("test_hive_read_parquet_complex_type", "external,hive,external_docker") {
def hdfsUserName = "doris"
def format = "parquet"
def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}"
def defaultFS_with_postfix = "hdfs://${externalEnvIp}:${hdfs_port}/"
def outfile_path = "/user/doris/tmp_data"
def uri = "${defaultFS}" + "${outfile_path}/exp_"
@ -99,7 +100,6 @@ suite("test_hive_read_parquet_complex_type", "external,hive,external_docker") {
INTO OUTFILE "${uri}"
FORMAT AS ${format}
PROPERTIES (
"fs.defaultFS"="${defaultFS}",
"hadoop.username" = "${hdfsUserName}"
);
"""
@ -147,6 +147,7 @@ suite("test_hive_read_parquet_complex_type", "external,hive,external_docker") {
qt_select_tvf1 """ select * from HDFS(
"uri" = "${outfile_url}0.parquet",
"fs.defaultFS" = "${defaultFS_with_postfix}",
"hadoop.username" = "${hdfsUserName}",
"format" = "${format}");
"""
@ -185,6 +186,7 @@ suite("test_hive_read_parquet_complex_type", "external,hive,external_docker") {
qt_select_tvf2 """ select * from HDFS(
"uri" = "${outfile_url}0.parquet",
"fs.defaultFS" = "${defaultFS}",
"hadoop.username" = "${hdfsUserName}",
"format" = "${format}");
"""