From 489171e4c180ef603ad95eea8b4bc49f042e177b Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Tue, 18 Jul 2023 11:20:38 +0800 Subject: [PATCH] [Fix](multi catalog)Fix hive partition value contains special character such as / bug (#21876) Hive escapes some special characters in partition value to %XX, for example, / is escaped to %2F. Doris didn't handle this case which will cause doris failed to list the files under partition with special characters. This pr is to fix this bug. --- .../datasource/hive/HiveMetaStoreCache.java | 4 +- .../hive/test_hive_special_char_partition.out | 51 +++++++++++++++++++ .../test_hive_special_char_partition.groovy | 51 +++++++++++++++++++ 3 files changed, 105 insertions(+), 1 deletion(-) create mode 100644 regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out create mode 100644 regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 51653afc68..97a38b9864 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -74,6 +74,7 @@ import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hive.common.ValidWriteIdList; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.utils.FileUtils; import org.apache.hadoop.hive.ql.io.AcidUtils; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.InputFormat; @@ -339,7 +340,8 @@ public class HiveMetaStoreCache { for (int i = 0; i < partitionColumns.size(); i++) { sb.append(partitionColumns.get(i).getName()); sb.append("="); - sb.append(key.getValues().get(i)); + // Partition value may contain special character, like / and so on. Need to encode. + sb.append(FileUtils.escapePathName(key.getValues().get(i))); sb.append("/"); } sb.delete(sb.length() - 1, sb.length()); diff --git a/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out b/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out new file mode 100644 index 0000000000..0bd26b1276 --- /dev/null +++ b/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out @@ -0,0 +1,51 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !1 -- +name# 2023#01#01 +name1 2023/01/01 +name10 2023<01><01> +name11 2023\\01\\01 +name12 2023.01.01 +name2 2023 01 01 +name3 2023:01:01 +name4 2023?01?01 +name5 2023=01=01 +name6 2023%01%01 +name8 2023"01"01 +name9 2023'01'01 + +-- !2 -- +name2 + +-- !3 -- +name1 + +-- !4 -- +name4 2023?01?01 + +-- !5 -- +name12 2023.01.01 + +-- !6 -- +name10 2023<01><01> + +-- !7 -- +name3 2023:01:01 + +-- !8 -- +name5 2023=01=01 + +-- !9 -- +name8 2023"01"01 + +-- !10 -- +name9 2023'01'01 + +-- !11 -- +name11 2023\\01\\01 + +-- !12 -- +name6 2023%01%01 + +-- !13 -- +name# 2023#01#01 + diff --git a/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy b/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy new file mode 100644 index 0000000000..cb862469f6 --- /dev/null +++ b/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_special_char_partition", "p2") { + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") + String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") + String catalog_name = "test_hive_special_char_partition" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hadoop.username' = 'hadoop', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + sql """use multi_catalog;""" + qt_1 "select * from special_character_1_partition order by name" + qt_2 "select name from special_character_1_partition where part='2023 01 01'" + qt_3 "select name from special_character_1_partition where part='2023/01/01'" + qt_4 "select * from special_character_1_partition where part='2023?01?01'" + qt_5 "select * from special_character_1_partition where part='2023.01.01'" + qt_6 "select * from special_character_1_partition where part='2023<01><01>'" + qt_7 "select * from special_character_1_partition where part='2023:01:01'" + qt_8 "select * from special_character_1_partition where part='2023=01=01'" + qt_9 "select * from special_character_1_partition where part='2023\"01\"01'" + qt_10 "select * from special_character_1_partition where part='2023\\'01\\'01'" + qt_11 "select * from special_character_1_partition where part='2023\\\\01\\\\01'" + qt_12 "select * from special_character_1_partition where part='2023%01%01'" + qt_13 "select * from special_character_1_partition where part='2023#01#01'" + } +} +