From 48e33bfb2a082e69a9c2cfba793b2b8c9405f55a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Sat, 16 Nov 2024 16:14:50 +0800 Subject: [PATCH] branch-2.1: [fix](hive)Fixed the issue of reading hive table with empty lzo files #43979 (#44063) Cherry-picked from #43979 Co-authored-by: wuwenchi --- be/src/exec/lzo_decompressor.cpp | 1 + .../user_empty_lzo/part-m-00000.lzo | Bin 0 -> 42 bytes .../tvf/test_user_empty_lzo.groovy | 41 ++++++++++++++++++ 3 files changed, 42 insertions(+) create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/user_empty_lzo/part-m-00000.lzo create mode 100644 regression-test/suites/external_table_p0/tvf/test_user_empty_lzo.groovy diff --git a/be/src/exec/lzo_decompressor.cpp b/be/src/exec/lzo_decompressor.cpp index b075509202..b240e2995a 100644 --- a/be/src/exec/lzo_decompressor.cpp +++ b/be/src/exec/lzo_decompressor.cpp @@ -103,6 +103,7 @@ Status LzopDecompressor::decompress(uint8_t* input, size_t input_len, size_t* in ptr = get_uint32(ptr, &uncompressed_size); left_input_len -= sizeof(uint32_t); if (uncompressed_size == 0) { + *input_bytes_read += sizeof(uint32_t); *stream_end = true; return Status::OK(); } diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/user_empty_lzo/part-m-00000.lzo b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/user_empty_lzo/part-m-00000.lzo new file mode 100644 index 0000000000000000000000000000000000000000..48aa4f9c93f1567a120cefdb926abfac6d092845 GIT binary patch literal 42 ocmeD5iSlRQ<&xqO5Ku_qbYNs<0E5OQ>DrSPfSJ0|%r`*{0GCPzwg3PC literal 0 HcmV?d00001 diff --git a/regression-test/suites/external_table_p0/tvf/test_user_empty_lzo.groovy b/regression-test/suites/external_table_p0/tvf/test_user_empty_lzo.groovy new file mode 100644 index 0000000000..4da30630ee --- /dev/null +++ b/regression-test/suites/external_table_p0/tvf/test_user_empty_lzo.groovy @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_user_empty_lzo","external,hive,tvf,external_docker") { + String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + def hdfsUserName = "doris" + def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" + def uri = "" + + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + try { + // it's a empty data file + uri = "${defaultFS}" + "/user/doris/preinstalled_data/user_empty_lzo/part-m-00000.lzo" + test { + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "csv"); """ + exception """The first line is empty, can not parse column numbers""" + } + } finally { + } + } +}