From 9019a71770ea158b035159bb523a4501aee87b55 Mon Sep 17 00:00:00 2001 From: Xiangyu Wang Date: Fri, 3 Nov 2023 13:42:29 +0800 Subject: [PATCH] [Fix](multi-catalog) Filter _temporary dir which is generated by spark. (#26194) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes there are some temporary files which are generated by some spark jobs, we need filter these files. ``` errCode = 2, detailMessage = (xxx.xxx.xxx.xxx.xxx) [CANCELLED][INTERNAL_ERROR]failed to init reader for file hdfs://xxx/user/hive/warehouse/xxx.db/xxx/_temporary/0/_temporary/attempt_202311010952379045223848154629697_0013_m_000442_3355/part-00442-3f97ac18-0583-4d2c-8995-8d21ceda56ec-c000.snappy.orc, err: [INTERNAL_ERROR]Init OrcReader failed. reason = Failed to parse the postscript from hdfs://xxx/user/hive/warehouse/xxx.db/xxx/_temporary/0/_temporary/attempt_202311010952379045223848154629697_0013_m_000442_3355/part-00442-3f97ac18-0583-4d2c-8995-8d21ceda56ec-c000.snappy.orc 0. @ 0x000000000bdbd89f doris::Status doris::Status::Error<6, true, std::__cxx11::basic_string, std::allocator >&>(std::basic_string_view >, std::__cxx11::basic_string, std::allocator >&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:173 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 1. @ 0x000000000eccecfe doris::vectorized::OrcReader::_create_file_reader() /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:187 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 2. @ 0x000000000ecceff9 doris::vectorized::OrcReader::init_reader(std::vector, std::allocator >, std::allocator, std::allocator > > > const*, std::unordered_map, std::allocator >, std::variant, doris::ColumnValueRange<(doris::PrimitiveType)4>, doris::ColumnValueRange<(doris::PrimitiveType)5>, doris::ColumnValueRange<(doris::PrimitiveType)6>, doris::ColumnValueRange<(doris::PrimitiveType)7>, doris::ColumnValueRange<(doris::PrimitiveType)15>, doris::ColumnValueRange<(doris::PrimitiveType)10>, doris::ColumnValueRange<(doris::PrimitiveType)23>, doris::ColumnValueRange<(doris::PrimitiveType)11>, doris::ColumnValueRange<(doris::PrimitiveType)25>, doris::ColumnValueRange<(doris::PrimitiveType)12>, doris::ColumnValueRange<(doris::PrimitiveType)26>, doris::ColumnValueRange<(doris::PrimitiveType)20>, doris::ColumnValueRange<(doris::PrimitiveType)2>, doris::ColumnValueRange<(doris::PrimitiveType)19>, doris::ColumnValueRange<(doris::PrimitiveType)28>, doris::ColumnValueRange<(doris::PrimitiveType)29>, doris::ColumnValueRange<(doris::PrimitiveType)30> >, std::hash, std::allocator > >, std::equal_to, std::allocator > >, std::allocator, std::allocator > const, std::variant, doris::ColumnValueRange<(doris::PrimitiveType)4>, doris::ColumnValueRange<(doris::PrimitiveType)5>, doris::ColumnValueRange<(doris::PrimitiveType)6>, doris::ColumnValueRange<(doris::PrimitiveType)7>, doris::ColumnValueRange<(doris::PrimitiveType)15>, doris::ColumnValueRange<(doris::PrimitiveType)10>, doris::ColumnValueRange<(doris::PrimitiveType)23>, doris::ColumnValueRange<(doris::PrimitiveType)11>, doris::ColumnValueRange<(doris::PrimitiveType)25>, doris::ColumnValueRange<(doris::PrimitiveType)12>, doris::ColumnValueRange<(doris::PrimitiveType)26>, doris::ColumnValueRange<(doris::PrimitiveType)20>, doris::ColumnValueRange<(doris::PrimitiveType)2>, doris::ColumnValueRange<(doris::PrimitiveType)19>, doris::ColumnValueRange<(doris::PrimitiveType)28>, doris::ColumnValueRange<(doris::PrimitiveType)29>, doris::ColumnValueRange<(doris::PrimitiveType)30> > > > >*, std::vector, std::allocator > > const&, bool, doris::TupleDescriptor const*, doris::RowDescriptor const*, std::vector, std::allocator > > const*, std::unordered_map, std::allocator > >, std::hash, std::equal_to, std::allocator, std::allocator > > > > > const*) /root/doris-master/be/src/common/status.h:448 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 3. @ 0x000000000ff43da0 doris::vectorized::VFileScanner::_get_next_reader() /root/doris-master/be/src/vec/exec/scan/vfile_scanner.cpp:800 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 4. @ 0x000000000ff4256f doris::vectorized::VFileScanner::_get_block_impl(doris::RuntimeState*, doris::vectorized::Block*, bool*) /root/doris-master/be/src/common/status.h:448 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 5. @ 0x000000000ffdb1aa doris::vectorized::VScanner::get_block(doris::RuntimeState*, doris::vectorized::Block*, bool*) /root/doris-master/be/src/vec/exec/scan/vscanner.cpp:0 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 6. @ 0x000000000ff3b379 doris::vectorized::ScannerScheduler::_scanner_scan(doris::vectorized::ScannerScheduler*, doris::vectorized::ScannerContext*, std::shared_ptr) /root/doris-master/be/src/common/status.h:346 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 7. @ 0x000000000ff3ca11 std::_Function_handler::_M_invoke(std::_Any_data const&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/shared_ptr_base.h:701 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 8. @ 0x000000000c986608 doris::ThreadPool::dispatch_thread() /root/doris-master/be/src/util/threadpool.cpp:0 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 9. @ 0x000000000c97c2ac doris::Thread::supervise_thread(void*) /var/local/ldb-toolchain/bin/../usr/include/pthread.h:562 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 10. @ 0x0000000000007ea5 start_thread in /usr/lib64/libpthread-2.17.so 11. @ 0x00000000000feb0d __clone in /usr/lib64/libc-2.17.so 0. @ 0x000000000c647366 doris::Status doris::Status::Error<6, true, std::__cxx11::basic_string, std::allocator > const&, std::__cxx11::basic_string, std::allocator > >(std::basic_string_view >, std::__cxx11::basic_string, std::allocator > const&, std::__cxx11::basic_string, std::allocator >&&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/unique_ptr.h:173 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 1. @ 0x000000000ff42dc3 doris::vectorized::VFileScanner::_get_next_reader() /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.h:187 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 2. @ 0x000000000ff4256f doris::vectorized::VFileScanner::_get_block_impl(doris::RuntimeState*, doris::vectorized::Block*, bool*) /root/doris-master/be/src/common/status.h:448 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 3. @ 0x000000000ffdb1aa doris::vectorized::VScanner::get_block(doris::RuntimeState*, doris::vectorized::Block*, bool*) /root/doris-master/be/src/vec/exec/scan/vscanner.cpp:0 in /hdpData10/yarn/localcache/usercache/hive/appcache/application_1696938614818_1832823/filecache/13/doris-be-360-master.tar.gz/apache-doris-be-360-master-bin-x86_64/lib/doris_be 4. @ 0x000000000ff3b379 doris::vectorized::ScannerScheduler::_scanner_scan(doris::vectorized::ScannerScheduler*, doris::vectorized::ScannerContext*, std::shared_ptr) /root/doris-master/be/src/common/status.h:346 in /hdpData10/yarn/localcache/usercache/hive/appcache/a… ``` Co-authored-by: wangxiangyu --- .../org/apache/doris/datasource/hive/HiveMetaStoreCache.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 480edd60f2..bac891eb92 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -1058,9 +1058,14 @@ public class HiveMetaStoreCache { return false; } for (String name : path.toString().split("/")) { + // generated by hive if (name.startsWith(".hive-staging")) { return false; } + // generated by spark + if ("_temporary".equals(name)) { + return false; + } } return true; }