branch-2.1: [fix](multi-catalog) Fix bug: "Can not create a Path from an empty string" (#49382) (#49641)

### What problem does this PR solve?
Problem Summary:
In HiveMetaStoreCache, the function FileInputFormat.setInputPaths is
used to set input paths. However, this function splits paths using
commas, which is not the expected behavior. As a result, when partition
values contain commas, it leads to incorrect path parsing and potential
errors.
```java
  public static void setInputPaths(JobConf conf, String org.apache.hadoop.shaded.com.aSeparatedPaths) {
    setInputPaths(conf, StringUtils.stringToPath(
                        getPathStrings(org.apache.hadoop.shaded.com.aSeparatedPaths)));
  }
```
To prevent FileInputFormat.setInputPaths from splitting paths by commas,
we use another overloaded version of the method. Instead of passing a
comma-separated string, we explicitly pass a Path object, ensuring that
partition values containing commas are handled correctly.
```java
  public static void setInputPaths(JobConf conf, Path... inputPaths) {
    Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]);
    StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString()));
    for(int i = 1; i < inputPaths.length;i++) {
      str.append(StringUtils.COMMA_STR);
      path = new Path(conf.getWorkingDirectory(), inputPaths[i]);
      str.append(StringUtils.escapeString(path.toString()));
    }
    conf.set(org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.lib.input.
      FileInputFormat.INPUT_DIR, str.toString());
  }
```

### Release note

None
This commit is contained in:
Socrates
2025-03-29 09:13:43 +08:00
committed by GitHub
parent 05ffc62594
commit 94986fc574
7 changed files with 69 additions and 3 deletions

View File

@ -0,0 +1,53 @@
create database if not exists partition_tables;
use partition_tables;
CREATE TABLE decimal_partition_table (
id INT,
name STRING,
value FLOAT
)
PARTITIONED BY (partition_col DECIMAL(10, 2))
STORED AS PARQUET
LOCATION '/user/doris/preinstalled_data/partition_tables/decimal_partition_table';
CREATE TABLE int_partition_table (
id INT,
name STRING,
value FLOAT
)
PARTITIONED BY (partition_col INT)
STORED AS PARQUET
LOCATION '/user/doris/preinstalled_data/partition_tables/int_partition_table';
CREATE TABLE string_partition_table (
id INT,
name STRING,
value FLOAT
)
PARTITIONED BY (partition_col STRING)
STORED AS PARQUET
LOCATION '/user/doris/preinstalled_data/partition_tables/string_partition_table';
CREATE TABLE date_partition_table (
id INT,
name STRING,
value FLOAT
)
PARTITIONED BY (partition_col DATE)
STORED AS PARQUET
LOCATION '/user/doris/preinstalled_data/partition_tables/date_partition_table';
CREATE TABLE string_partition_table_with_comma (
id INT,
name STRING,
value FLOAT
)
PARTITIONED BY (partition_col STRING)
STORED AS PARQUET
LOCATION '/user/doris/preinstalled_data/partition_tables/string_partition_table_with_comma';
msck repair table decimal_partition_table;
msck repair table int_partition_table;
msck repair table string_partition_table;
msck repair table date_partition_table;
msck repair table string_partition_table_with_comma;

View File

@ -404,7 +404,8 @@ public class HiveMetaStoreCache {
} catch (Exception e) {
LOG.warn("unknown scheme in path: " + finalLocation, e);
}
FileInputFormat.setInputPaths(jobConf, finalLocation.get());
// NOTICE: the setInputPaths has 2 overloads, the 2nd arg should be Path not String
FileInputFormat.setInputPaths(jobConf, finalLocation.getPath());
try {
FileCacheValue result = getFileCache(finalLocation.get(), key.inputFormat, jobConf,
key.getPartitionValues(), key.bindBrokerName);

View File

@ -120,6 +120,11 @@ nation=us/city=washington
-- !q21 --
-- !string_partition_table_with_comma --
1 a 1.1 ,
2 b 2.2 a, b
3 c 3.3 a, b, c
-- !q01 --
33 1.11xyz
34 1.11XYZ
@ -241,3 +246,8 @@ nation=us/city=washington
-- !q21 --
-- !string_partition_table_with_comma --
1 a 1.1 ,
2 b 2.2 a, b
3 c 3.3 a, b, c

View File

@ -91,6 +91,10 @@ suite("test_hive_partitions", "p0,external,hive,external_docker,external_docker_
q01()
qt_string_partition_table_with_comma """
select * from partition_tables.string_partition_table_with_comma order by id;
"""
sql """set num_partitions_in_batch_mode=1"""
explain {
sql ("select * from partition_table")
@ -99,8 +103,6 @@ suite("test_hive_partitions", "p0,external,hive,external_docker,external_docker_
contains "(approximate)inputSplitNum=60"
}
sql """unset variable num_partitions_in_batch_mode"""
// sql """drop catalog if exists ${catalog_name}"""
} finally {
}
}