From 94986fc5746b0751daaf71d79af50dcd6004e39f Mon Sep 17 00:00:00 2001 From: Socrates Date: Sat, 29 Mar 2025 09:13:43 +0800 Subject: [PATCH] branch-2.1: [fix](multi-catalog) Fix bug: "Can not create a Path from an empty string" (#49382) (#49641) ### What problem does this PR solve? Problem Summary: In HiveMetaStoreCache, the function FileInputFormat.setInputPaths is used to set input paths. However, this function splits paths using commas, which is not the expected behavior. As a result, when partition values contain commas, it leads to incorrect path parsing and potential errors. ```java public static void setInputPaths(JobConf conf, String org.apache.hadoop.shaded.com.aSeparatedPaths) { setInputPaths(conf, StringUtils.stringToPath( getPathStrings(org.apache.hadoop.shaded.com.aSeparatedPaths))); } ``` To prevent FileInputFormat.setInputPaths from splitting paths by commas, we use another overloaded version of the method. Instead of passing a comma-separated string, we explicitly pass a Path object, ensuring that partition values containing commas are handled correctly. ```java public static void setInputPaths(JobConf conf, Path... inputPaths) { Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]); StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString())); for(int i = 1; i < inputPaths.length;i++) { str.append(StringUtils.COMMA_STR); path = new Path(conf.getWorkingDirectory(), inputPaths[i]); str.append(StringUtils.escapeString(path.toString())); } conf.set(org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.lib.input. FileInputFormat.INPUT_DIR, str.toString()); } ``` ### Release note None --- .../create_preinstalled_scripts/run74.hql | 53 ++++++++++++++++++ .../partition_col=,/000000_0 | Bin 0 -> 408 bytes .../partition_col=a, b, c/000000_0 | Bin 0 -> 408 bytes .../partition_col=a, b/000000_0 | Bin 0 -> 408 bytes .../datasource/hive/HiveMetaStoreCache.java | 3 +- .../hive/test_hive_partitions.out | 10 ++++ .../hive/test_hive_partitions.groovy | 6 +- 7 files changed, 69 insertions(+), 3 deletions(-) create mode 100644 docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a, b, c/000000_0 create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=a, b/000000_0 diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql new file mode 100644 index 0000000000..31e98f370d --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run74.hql @@ -0,0 +1,53 @@ +create database if not exists partition_tables; +use partition_tables; + +CREATE TABLE decimal_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col DECIMAL(10, 2)) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/decimal_partition_table'; + +CREATE TABLE int_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col INT) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/int_partition_table'; + +CREATE TABLE string_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col STRING) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/string_partition_table'; + +CREATE TABLE date_partition_table ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col DATE) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/date_partition_table'; + +CREATE TABLE string_partition_table_with_comma ( + id INT, + name STRING, + value FLOAT +) +PARTITIONED BY (partition_col STRING) +STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/partition_tables/string_partition_table_with_comma'; + +msck repair table decimal_partition_table; +msck repair table int_partition_table; +msck repair table string_partition_table; +msck repair table date_partition_table; +msck repair table string_partition_table_with_comma; diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0 b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/partition_tables/string_partition_table_with_comma/partition_col=,/000000_0 new file mode 100644 index 0000000000000000000000000000000000000000..a93ce013162c2057aa7974cd8afce3a6c638a218 GIT binary patch literal 408 zcmWG=3^EjD5ET&>(Gg`5We{Z(<&cqJVPs%nfRJJg3=9lR3=9m+j37Z#22n9lF(jpo zi4u&7Xo?as4Lp0M$6kU3L?WpLaYdOVeLT1`GRsoqi<2`_a}!0`7(|&=nIxDpQy4^f zRGB1L@)C1XRT&sWIaHY>Sj!S~N>drcm?UK+ZPYkqM42Q_IT+X^LCQrL#F)gw#KP1# zY#?rjkYEed;$%d5pi038f))FS`KZO&NFe;6wu(_klmn^`Vi#B;$kbJgHV_{`NU*VD zKN!WB7&JT!5{n8;Q%iJnixkRIi;6Sz^ArsAEc6T&G?Gd)b5ayc5|a{5QW8_mladTl oOj1*llGDsBjf@jh4ULU0(~K(Gg`5We{Z(<&cqJVP;@pfRJJg3=9lR3=9m+j37Z#22n9lF(jpo z$r6moNQxL47#NZ<4Ky|mc938Jkw|JmTu~-T9}n(~%(B$@;^d6f+(c0}22mzeCJCm@ z6b4ZqRVE3Ryu{p8RR#u84pk-z*0RK$(o_a9CP^7d8#N9YQ6@=K4hA+!kaAH5F($Dv zu`o3b8;IK>B-lc=I2lnMs8XC<#H2)%l*Cl? sq$GnBlhl-?(Gg`5We{Z(<&cqJVParlfRJJg3=9k)E;A#D6lD+<6BR>J%9td< zn1rN=k%56B3Ddx{XLuYWSU@C_S`b&1Nz%uIJ0r6!HNH4GBQ-Zsl#M}@NtH>0DKmvZ zlt-0Gf+a68H&vB^L6k$4NrJU3F{d<@L5xXKM$$%&Lq?QI(v*XNO%kMBltGM1EKDp+ zjl%}wb_fZ!P%TbIln1I5Y#>;%kC=~IoQ(v+4{ECzWkflk>L7N36@pA%#b^WZ0fYn_ zEB1p?jEO