From b21b906306d539805cbba5751bc423cadc34fa68 Mon Sep 17 00:00:00 2001 From: Tiewei Fang <43782773+BePPPower@users.noreply.github.com> Date: Wed, 31 Jul 2024 22:46:37 +0800 Subject: [PATCH] [Fix](outfile) FE check the hdfs URI of outfile (#38602) bp: #38203 1. Previously, if the root path of the HDFS URI started with two slashes, the outfile would be successfully exported without errors, but the exported path would not be the expected path. Currently, we will delete repeated '/' which specified by users in FE. 2. move the test case for outfile HDFS from p2 to p0. --- .../apache/doris/analysis/OutFileClause.java | 8 ++ .../outfile/hdfs/test_outfile_hdfs.out} | 64 ++++++++++ .../outfile/hdfs/test_outfile_hdfs.groovy | 97 ++++++++++++++ .../export_p2/test_export_with_hdfs.groovy | 118 ------------------ 4 files changed, 169 insertions(+), 118 deletions(-) rename regression-test/data/{export_p2/test_export_with_hdfs.out => export_p0/outfile/hdfs/test_outfile_hdfs.out} (56%) create mode 100644 regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy delete mode 100644 regression-test/suites/export_p2/test_export_with_hdfs.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java index 03849abb97..4954427d69 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java @@ -55,6 +55,8 @@ import org.apache.hadoop.fs.Path; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.net.URI; +import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -535,6 +537,12 @@ public class OutFileClause { filePath = filePath.replace(HDFS_FILE_PREFIX, HDFS_FILE_PREFIX + dfsNameServices); } } + // delete repeated '/' + try { + filePath = new URI(filePath).normalize().toString(); + } catch (URISyntaxException e) { + throw new AnalysisException("Can not normalize the URI, error: " + e.getMessage()); + } if (Strings.isNullOrEmpty(filePath)) { throw new AnalysisException("Must specify file in OUTFILE clause"); } diff --git a/regression-test/data/export_p2/test_export_with_hdfs.out b/regression-test/data/export_p0/outfile/hdfs/test_outfile_hdfs.out similarity index 56% rename from regression-test/data/export_p2/test_export_with_hdfs.out rename to regression-test/data/export_p0/outfile/hdfs/test_outfile_hdfs.out index 457bcfced3..b0159c5296 100644 --- a/regression-test/data/export_p2/test_export_with_hdfs.out +++ b/regression-test/data/export_p0/outfile/hdfs/test_outfile_hdfs.out @@ -47,6 +47,70 @@ 8 ftw-8 26 9 ftw-9 27 +-- !select -- + +-- !select -- + +-- !select -- +1 ftw-1 19 +10 \N \N +2 ftw-2 20 +3 ftw-3 21 +4 ftw-4 22 +5 ftw-5 23 +6 ftw-6 24 +7 ftw-7 25 +8 ftw-8 26 +9 ftw-9 27 + +-- !select -- +1 ftw-1 19 +10 \N \N +2 ftw-2 20 +3 ftw-3 21 +4 ftw-4 22 +5 ftw-5 23 +6 ftw-6 24 +7 ftw-7 25 +8 ftw-8 26 +9 ftw-9 27 + +-- !select -- +1 ftw-1 19 +10 \N \N +2 ftw-2 20 +3 ftw-3 21 +4 ftw-4 22 +5 ftw-5 23 +6 ftw-6 24 +7 ftw-7 25 +8 ftw-8 26 +9 ftw-9 27 + +-- !select -- +1 ftw-1 19 +10 \N \N +2 ftw-2 20 +3 ftw-3 21 +4 ftw-4 22 +5 ftw-5 23 +6 ftw-6 24 +7 ftw-7 25 +8 ftw-8 26 +9 ftw-9 27 + +-- !select -- +1 ftw-1 19 +10 \N \N +2 ftw-2 20 +3 ftw-3 21 +4 ftw-4 22 +5 ftw-5 23 +6 ftw-6 24 +7 ftw-7 25 +8 ftw-8 26 +9 ftw-9 27 + -- !select -- 1 ftw-1 19 10 \N \N diff --git a/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy b/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy new file mode 100644 index 0000000000..fccf5800e6 --- /dev/null +++ b/regression-test/suites/export_p0/outfile/hdfs/test_outfile_hdfs.groovy @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_outfile_with_hdfs", "external,hive,external_docker") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + def table_export_name = "test_outfile_with_hdfs" + // create table and insert + sql """ DROP TABLE IF EXISTS ${table_export_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_export_name} ( + `id` int(11) NULL, + `name` string NULL, + `age` int(11) NULL + ) + PARTITION BY RANGE(id) + ( + PARTITION less_than_20 VALUES LESS THAN ("20"), + PARTITION between_20_70 VALUES [("20"),("70")), + PARTITION more_than_70 VALUES LESS THAN ("151") + ) + DISTRIBUTED BY HASH(id) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + StringBuilder sb = new StringBuilder() + int i = 1 + for (; i < 10; i ++) { + sb.append(""" + (${i}, 'ftw-${i}', ${i + 18}), + """) + } + sb.append(""" + (${i}, NULL, NULL) + """) + sql """ INSERT INTO ${table_export_name} VALUES + ${sb.toString()} + """ + qt_select_export """ SELECT * FROM ${table_export_name} t ORDER BY id; """ + + String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + // It's okay to use random `hdfsUser`, but can not be empty. + def hdfsUserName = "doris" + def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" + + + // test outfile + def test_outfile = {format, uri -> + def res = sql """ + SELECT * FROM ${table_export_name} t ORDER BY id + INTO OUTFILE "${defaultFS}${uri}" + FORMAT AS ${format} + PROPERTIES ( + "fs.defaultFS"="${defaultFS}", + "hadoop.username" = "${hdfsUserName}" + ); + """ + + def outfile_url = res[0][3] + // check data correctness + order_qt_select """ select * from hdfs( + "uri" = "${outfile_url}.${format}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}"); + """ + } + + test_outfile('csv', '/tmp/ftw/export/exp_'); + test_outfile('parquet', '/tmp/ftw/export/exp_'); + test_outfile('orc', '/tmp/ftw/export/exp_'); + test_outfile('csv_with_names', '/tmp/ftw/export/exp_'); + test_outfile('csv_with_names_and_types', '/tmp/ftw/export/exp_'); + + // test uri with multi '/' + test_outfile('parquet', '//tmp/ftw/export/exp_'); + test_outfile('parquet', '//tmp//ftw/export/exp_'); + test_outfile('parquet', '//tmp/ftw/export//exp_'); + test_outfile('parquet', '//tmp/ftw//export//exp_'); + test_outfile('parquet', '//tmp/ftw//export/exp_'); + test_outfile('parquet', '///tmp/ftw/export/exp_'); + test_outfile('parquet', '////tmp/ftw/export/exp_'); + } +} diff --git a/regression-test/suites/export_p2/test_export_with_hdfs.groovy b/regression-test/suites/export_p2/test_export_with_hdfs.groovy deleted file mode 100644 index e523fdf5a4..0000000000 --- a/regression-test/suites/export_p2/test_export_with_hdfs.groovy +++ /dev/null @@ -1,118 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("test_export_with_hdfs", "p2") { - // open nereids - sql """ set enable_nereids_planner=true """ - sql """ set enable_fallback_to_original_planner=false """ - - - String nameNodeHost = context.config.otherConfigs.get("extHiveHmsHost") - String hdfsPort = context.config.otherConfigs.get("extHdfsPort") - String fs = "hdfs://${nameNodeHost}:${hdfsPort}" - String user_name = context.config.otherConfigs.get("extHiveHmsUser") - - - def table_export_name = "test_export_with_hdfs" - // create table and insert - sql """ DROP TABLE IF EXISTS ${table_export_name} """ - sql """ - CREATE TABLE IF NOT EXISTS ${table_export_name} ( - `id` int(11) NULL, - `name` string NULL, - `age` int(11) NULL - ) - PARTITION BY RANGE(id) - ( - PARTITION less_than_20 VALUES LESS THAN ("20"), - PARTITION between_20_70 VALUES [("20"),("70")), - PARTITION more_than_70 VALUES LESS THAN ("151") - ) - DISTRIBUTED BY HASH(id) BUCKETS 3 - PROPERTIES("replication_num" = "1"); - """ - StringBuilder sb = new StringBuilder() - int i = 1 - for (; i < 10; i ++) { - sb.append(""" - (${i}, 'ftw-${i}', ${i + 18}), - """) - } - sb.append(""" - (${i}, NULL, NULL) - """) - sql """ INSERT INTO ${table_export_name} VALUES - ${sb.toString()} - """ - qt_select_export """ SELECT * FROM ${table_export_name} t ORDER BY id; """ - - - def waiting_export = { export_label -> - while (true) { - def res = sql """ show export where label = "${export_label}" """ - logger.info("export state: " + res[0][2]) - if (res[0][2] == "FINISHED") { - def json = parseJson(res[0][11]) - assert json instanceof List - assertEquals("1", json.fileNumber[0][0]) - log.info("outfile_path: ${json.url[0][0]}") - return json.url[0][0]; - } else if (res[0][2] == "CANCELLED") { - throw new IllegalStateException("""export failed: ${res[0][10]}""") - } else { - sleep(5000) - } - } - } - - def outFilePath = """/user/export_test/export/exp_""" - - // 1. csv test - def test_export = {format, file_suffix, isDelete -> - def uuid = UUID.randomUUID().toString() - // exec export - sql """ - EXPORT TABLE ${table_export_name} TO "${fs}${outFilePath}" - PROPERTIES( - "label" = "${uuid}", - "format" = "${format}", - "column_separator"=",", - "delete_existing_files"="${isDelete}" - ) - with HDFS ( - "fs.defaultFS"="${fs}", - "hadoop.username" = "${user_name}" - ); - """ - - def outfile_url = waiting_export.call(uuid) - - // check data correctness - order_qt_select """ select * from hdfs( - "uri" = "${outfile_url}0.${file_suffix}", - "hadoop.username" = "${user_name}", - "column_separator" = ",", - "format" = "${format}"); - """ - } - - test_export('csv', 'csv', true); - test_export('parquet', 'parquet', true); - test_export('orc', 'orc', true); - test_export('csv_with_names', 'csv', true); - test_export('csv_with_names_and_types', 'csv', true); -}