From 3ea9d3f2e1d159285d0e1d09dfe232f3c6c90573 Mon Sep 17 00:00:00 2001 From: camby <104178625@qq.com> Date: Tue, 15 Nov 2022 17:48:17 +0800 Subject: [PATCH] [enhancement](array) support read list(Array) type from orc file (#14132) Before this pr, if we try to load ORC file with native list(or array) type data, the be will crash. Because complex types in ORC file include multi real columns, so we need to filter columns by column names. Otherwise we could not read all columns we need. Now arrow release-7.0.0 only support create stripe reader by column index, so we patch it to support create stripe reader by column names. Co-authored-by: cambyzju --- be/src/exec/arrow/arrow_reader.cpp | 3 + be/src/exec/arrow/arrow_reader.h | 1 + be/src/exec/arrow/orc_reader.cpp | 2 +- .../broker_load/simple_array_list_type.orc | Bin 0 -> 1692 bytes .../load_p0/broker_load/test_array_load.out | 80 ++++++++++++++++++ .../broker_load/test_array_load.groovy | 37 +++++++- 6 files changed, 121 insertions(+), 2 deletions(-) create mode 100644 regression-test/data/load_p0/broker_load/simple_array_list_type.orc diff --git a/be/src/exec/arrow/arrow_reader.cpp b/be/src/exec/arrow/arrow_reader.cpp index 3221fbfe67..79e206649f 100644 --- a/be/src/exec/arrow/arrow_reader.cpp +++ b/be/src/exec/arrow/arrow_reader.cpp @@ -71,11 +71,13 @@ void ArrowReaderWrap::close() { Status ArrowReaderWrap::column_indices() { _include_column_ids.clear(); + _include_cols.clear(); for (auto& slot_desc : _file_slot_descs) { // Get the Column Reader for the boolean column auto iter = _map_column.find(slot_desc->col_name()); if (iter != _map_column.end()) { _include_column_ids.emplace_back(iter->second); + _include_cols.push_back(slot_desc->col_name()); } else { _missing_cols.push_back(slot_desc->col_name()); } @@ -136,6 +138,7 @@ Status ArrowReaderWrap::next_batch(std::shared_ptr* batch, b while (!_closed && _queue.empty()) { if (_batch_eof) { _include_column_ids.clear(); + _include_cols.clear(); *eof = true; return Status::OK(); } diff --git a/be/src/exec/arrow/arrow_reader.h b/be/src/exec/arrow/arrow_reader.h index 561f67fe94..c83abcf9e5 100644 --- a/be/src/exec/arrow/arrow_reader.h +++ b/be/src/exec/arrow/arrow_reader.h @@ -121,6 +121,7 @@ protected: int _current_group; // current group(stripe) std::map _map_column; // column-name <---> column-index std::vector _include_column_ids; // columns that need to get from file + std::vector _include_cols; // columns that need to get from file std::shared_ptr _statistics; std::atomic _closed = false; diff --git a/be/src/exec/arrow/orc_reader.cpp b/be/src/exec/arrow/orc_reader.cpp index 8f46a9bf21..e2fc8d3bcd 100644 --- a/be/src/exec/arrow/orc_reader.cpp +++ b/be/src/exec/arrow/orc_reader.cpp @@ -152,7 +152,7 @@ Status ORCReaderWrap::_next_stripe_reader(bool* eof) { // which may cause OOM issues by loading the whole stripe into memory. // Note this will only read rows for the current stripe, not the entire file. arrow::Result> maybe_rb_reader = - _reader->NextStripeReader(_state->batch_size(), _include_column_ids); + _reader->NextStripeReader(_state->batch_size(), _include_cols); if (!maybe_rb_reader.ok()) { LOG(WARNING) << "Get RecordBatch Failed. " << maybe_rb_reader.status(); return Status::InternalError(maybe_rb_reader.status().ToString()); diff --git a/regression-test/data/load_p0/broker_load/simple_array_list_type.orc b/regression-test/data/load_p0/broker_load/simple_array_list_type.orc new file mode 100644 index 0000000000000000000000000000000000000000..f6f1cb90ff97d8b7901cd5640ec609a96e01aa91 GIT binary patch literal 1692 zcmeYdau#G@;9?VE;b0A5&}LxZ66RuNU|7&!3{&96$8WLNl(EF75VHG<@)OX&t#n?6`}I)uQ`7L z3x4%_igJDS|8KHNNm;1a{rQefy*>j2mng(xd>oQW99)cvLX4>rMwm`^Wnkb^fT)z@ z;8GIi;z-IYD#=JmtQ2A`&o9bJk;uZ-WXHhpIOus&T0(+A(1V1~Nsk^GZP{XS#gsu& zQDvKp7Xvef2TVX7ew>udz^o|97p8C5*~Z$weNISlLjWTWhGGkl;%7;z42+73e0s`q zdTz{$V$8P2#hPxH8Q3tChcPfbzVryJ+?7vHITNJZFWJ^v*g4o+*;q4Anc3Ev+1Z;} zIr%ge-8LZ0o`Q7?^3^HZfpn)i+a_y@ITzc4tm9>1_|3?~%)-jez~IIb%BaB3!0_+? z0dWR~Z(w0Ib`DN1Rt5$pX0XH`IR=K)k5hmA@1NM%II*!)ftdj=`Dd0M1H30M)U2m?d$71| zkAWe1(u5=@Ck9>y1}8p^MO_-(7#VW=&So7p5NOH0IZ5|sQI2|B=7KNmRk`yQp1+{} zLGz^Q^nLav)^bivm3`B+YMaT9_fAAt9+~L#i7!7z z|9pii`%k|9CmQoVxSacBd+%g(&B5-CTjpJSu_irRXH9xSTw+4pVdL+oY}1}ORxaL? z7@z*v)>i4V@?N8DUzXIW-;a>o{(tYcnI*>#)Vlf0Z@K?*zIk5x;!m8NKbO>m?Mu(k z_c_CVO8l;V$L4JA-rbMyuUy-`fA6c?tN(oemDl(4^Y2Y{HFK}Oez_;Ej^P6%gKytO zufql+4%64pEWdtp>Y9u*E^e&yEZ&oLP5slP|83*?$y{2C3|Ky{{h?adQvYCb{X_L1 zjGL+!2XQsF6!U#&v;Qr@pkF1yB<}peU4y?n&hpU0<*EY5 zkIRRDt(&x6#s8KD<2((Q$8#*N{_8jw9d_l-*0*{eJ_lS||2|*-_~-rIKm5KgF6;5> zc36HoW+UrG&+Psa=k~~^uCz4fPM@B2`_+|<$6L7Kf9K8-Zh_qRIs@2ZimN_R(Hh7|tV^_iq&s)btSf)R6+Wlr*kmFhn5yv~M;d*y> zedf^H)2OZgP|~Jh%RUoT-6#J0zSsAkX?VA%F`DsirQwnGYkw4VU&%VooZgW(v(YM7{~K3(a+v9rF+ zGs2S#3T-QQNb%0MsNFOFPTvvUQ-5ntGQZl{v(h4r-Kh2lSGVB1oM*eG9sF}A?y0Y> zdvCg#|J&57-E66DD&|Y>_r_d0xNX+$*e$(Rq86^dwMv*hcDsm2yr<}Cg`UF9>ZdER z5|*Fd^~w2qz{L6g)|CCJDhv9-&hd^(fKj5MK|qO#fk)#vqXApf7iMPvAZJMcm77pN literal 0 HcmV?d00001 diff --git a/regression-test/data/load_p0/broker_load/test_array_load.out b/regression-test/data/load_p0/broker_load/test_array_load.out index ff4d69cbff..1b61847609 100644 --- a/regression-test/data/load_p0/broker_load/test_array_load.out +++ b/regression-test/data/load_p0/broker_load/test_array_load.out @@ -83,3 +83,83 @@ 5 \N \N \N \N \N \N \N \N \N \N 100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] + diff --git a/regression-test/suites/load_p0/broker_load/test_array_load.groovy b/regression-test/suites/load_p0/broker_load/test_array_load.groovy index b462d3c00e..7f5d109952 100644 --- a/regression-test/suites/load_p0/broker_load/test_array_load.groovy +++ b/regression-test/suites/load_p0/broker_load/test_array_load.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_array_load", "p0") { +suite("test_array_load", "load_p0") { // define a sql table def testTable = "tbl_test_array_load" def testTable01 = "tbl_test_array_load01" @@ -293,6 +293,8 @@ suite("test_array_load", "p0") { def hdfs_json_file_path = uploadToHdfs "broker_load/simple_object_array.json" def hdfs_csv_file_path = uploadToHdfs "broker_load/simple_array.csv" def hdfs_orc_file_path = uploadToHdfs "broker_load/simple_array.orc" + // orc file with native array(list) type + def hdfs_orc_file_path2 = uploadToHdfs "broker_load/simple_array_list_type.orc" def hdfs_parquet_file_path = uploadToHdfs "broker_load/simple_array.parquet" // case5: import array data by hdfs and enable vectorized engine @@ -422,5 +424,38 @@ suite("test_array_load", "p0") { } finally { try_sql("DROP TABLE IF EXISTS ${testTable}") } + + // case13: import array data by hdfs in orc format(with array type) and enable vectorized + try { + sql "DROP TABLE IF EXISTS ${testTable}" + + create_test_table.call(testTable, true) + + def test_load_label = UUID.randomUUID().toString().replaceAll("-", "") + load_from_hdfs1.call(testTable, test_load_label, hdfs_orc_file_path2, "orc", + brokerName, hdfsUser, hdfsPasswd) + + check_load_result.call(test_load_label, testTable) + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + + // case14: import array data by hdfs in orc format(with array type) and disable vectorized + try { + sql "DROP TABLE IF EXISTS ${testTable}" + + create_test_table.call(testTable, false) + + def test_load_label = UUID.randomUUID().toString().replaceAll("-", "") + load_from_hdfs1.call(testTable, test_load_label, hdfs_orc_file_path2, "orc", + brokerName, hdfsUser, hdfsPasswd) + + check_load_result.call(test_load_label, testTable) + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + } }