branch-2.1: [fix](multi-catalog) Fix bug: "Can not create a Path from an empty string" (#49382) (#49641)
### What problem does this PR solve?
Problem Summary:
In HiveMetaStoreCache, the function FileInputFormat.setInputPaths is
used to set input paths. However, this function splits paths using
commas, which is not the expected behavior. As a result, when partition
values contain commas, it leads to incorrect path parsing and potential
errors.
```java
public static void setInputPaths(JobConf conf, String org.apache.hadoop.shaded.com.aSeparatedPaths) {
setInputPaths(conf, StringUtils.stringToPath(
getPathStrings(org.apache.hadoop.shaded.com.aSeparatedPaths)));
}
```
To prevent FileInputFormat.setInputPaths from splitting paths by commas,
we use another overloaded version of the method. Instead of passing a
comma-separated string, we explicitly pass a Path object, ensuring that
partition values containing commas are handled correctly.
```java
public static void setInputPaths(JobConf conf, Path... inputPaths) {
Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]);
StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString()));
for(int i = 1; i < inputPaths.length;i++) {
str.append(StringUtils.COMMA_STR);
path = new Path(conf.getWorkingDirectory(), inputPaths[i]);
str.append(StringUtils.escapeString(path.toString()));
}
conf.set(org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.lib.input.
FileInputFormat.INPUT_DIR, str.toString());
}
```
### Release note
None
This commit is contained in:
@ -0,0 +1,53 @@
|
||||
create database if not exists partition_tables;
|
||||
use partition_tables;
|
||||
|
||||
CREATE TABLE decimal_partition_table (
|
||||
id INT,
|
||||
name STRING,
|
||||
value FLOAT
|
||||
)
|
||||
PARTITIONED BY (partition_col DECIMAL(10, 2))
|
||||
STORED AS PARQUET
|
||||
LOCATION '/user/doris/preinstalled_data/partition_tables/decimal_partition_table';
|
||||
|
||||
CREATE TABLE int_partition_table (
|
||||
id INT,
|
||||
name STRING,
|
||||
value FLOAT
|
||||
)
|
||||
PARTITIONED BY (partition_col INT)
|
||||
STORED AS PARQUET
|
||||
LOCATION '/user/doris/preinstalled_data/partition_tables/int_partition_table';
|
||||
|
||||
CREATE TABLE string_partition_table (
|
||||
id INT,
|
||||
name STRING,
|
||||
value FLOAT
|
||||
)
|
||||
PARTITIONED BY (partition_col STRING)
|
||||
STORED AS PARQUET
|
||||
LOCATION '/user/doris/preinstalled_data/partition_tables/string_partition_table';
|
||||
|
||||
CREATE TABLE date_partition_table (
|
||||
id INT,
|
||||
name STRING,
|
||||
value FLOAT
|
||||
)
|
||||
PARTITIONED BY (partition_col DATE)
|
||||
STORED AS PARQUET
|
||||
LOCATION '/user/doris/preinstalled_data/partition_tables/date_partition_table';
|
||||
|
||||
CREATE TABLE string_partition_table_with_comma (
|
||||
id INT,
|
||||
name STRING,
|
||||
value FLOAT
|
||||
)
|
||||
PARTITIONED BY (partition_col STRING)
|
||||
STORED AS PARQUET
|
||||
LOCATION '/user/doris/preinstalled_data/partition_tables/string_partition_table_with_comma';
|
||||
|
||||
msck repair table decimal_partition_table;
|
||||
msck repair table int_partition_table;
|
||||
msck repair table string_partition_table;
|
||||
msck repair table date_partition_table;
|
||||
msck repair table string_partition_table_with_comma;
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -404,7 +404,8 @@ public class HiveMetaStoreCache {
|
||||
} catch (Exception e) {
|
||||
LOG.warn("unknown scheme in path: " + finalLocation, e);
|
||||
}
|
||||
FileInputFormat.setInputPaths(jobConf, finalLocation.get());
|
||||
// NOTICE: the setInputPaths has 2 overloads, the 2nd arg should be Path not String
|
||||
FileInputFormat.setInputPaths(jobConf, finalLocation.getPath());
|
||||
try {
|
||||
FileCacheValue result = getFileCache(finalLocation.get(), key.inputFormat, jobConf,
|
||||
key.getPartitionValues(), key.bindBrokerName);
|
||||
|
||||
@ -120,6 +120,11 @@ nation=us/city=washington
|
||||
|
||||
-- !q21 --
|
||||
|
||||
-- !string_partition_table_with_comma --
|
||||
1 a 1.1 ,
|
||||
2 b 2.2 a, b
|
||||
3 c 3.3 a, b, c
|
||||
|
||||
-- !q01 --
|
||||
33 1.11xyz
|
||||
34 1.11XYZ
|
||||
@ -241,3 +246,8 @@ nation=us/city=washington
|
||||
|
||||
-- !q21 --
|
||||
|
||||
-- !string_partition_table_with_comma --
|
||||
1 a 1.1 ,
|
||||
2 b 2.2 a, b
|
||||
3 c 3.3 a, b, c
|
||||
|
||||
|
||||
@ -91,6 +91,10 @@ suite("test_hive_partitions", "p0,external,hive,external_docker,external_docker_
|
||||
|
||||
q01()
|
||||
|
||||
qt_string_partition_table_with_comma """
|
||||
select * from partition_tables.string_partition_table_with_comma order by id;
|
||||
"""
|
||||
|
||||
sql """set num_partitions_in_batch_mode=1"""
|
||||
explain {
|
||||
sql ("select * from partition_table")
|
||||
@ -99,8 +103,6 @@ suite("test_hive_partitions", "p0,external,hive,external_docker,external_docker_
|
||||
contains "(approximate)inputSplitNum=60"
|
||||
}
|
||||
sql """unset variable num_partitions_in_batch_mode"""
|
||||
|
||||
// sql """drop catalog if exists ${catalog_name}"""
|
||||
} finally {
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user