From cb749ce51d3b8d0d941d2e532dc4e5dff4b0cd02 Mon Sep 17 00:00:00 2001 From: xinghuayu007 <1450306854@qq.com> Date: Sat, 28 Nov 2020 09:54:18 +0800 Subject: [PATCH] [Improvement] Add parquet file name to the error message (#4954) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a user tries to load parquet file into Doris, like this path: `hdfs://hadoop/user/data/date=20201024/*`, but acturally the path contains some none parquet files,the error is throwed `Couldn't deserialize thrift: No more data to read.\\nDeserializing page header failed.`. If the error message includes the file name information, we can quickly locate the errors. Therefore, this patch try to add the file name to the error message. --- be/src/exec/parquet_scanner.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/be/src/exec/parquet_scanner.cpp b/be/src/exec/parquet_scanner.cpp index 2db36f3f4b..adad61e9b0 100644 --- a/be/src/exec/parquet_scanner.cpp +++ b/be/src/exec/parquet_scanner.cpp @@ -152,11 +152,17 @@ Status ParquetScanner::open_next_reader() { } Status status = _cur_file_reader->init_parquet_reader(_src_slot_descs, _state->timezone()); - + if (status.is_end_of_file()) { continue; } else { - return status; + if (!status.ok()) { + std::stringstream ss; + ss << " file: " << range.path << " error:" << status.get_error_msg(); + return Status::InternalError(ss.str()); + } else { + return status; + } } } }