diff --git a/be/src/exec/arrow/arrow_reader.cpp b/be/src/exec/arrow/arrow_reader.cpp index 3221fbfe67..79e206649f 100644 --- a/be/src/exec/arrow/arrow_reader.cpp +++ b/be/src/exec/arrow/arrow_reader.cpp @@ -71,11 +71,13 @@ void ArrowReaderWrap::close() { Status ArrowReaderWrap::column_indices() { _include_column_ids.clear(); + _include_cols.clear(); for (auto& slot_desc : _file_slot_descs) { // Get the Column Reader for the boolean column auto iter = _map_column.find(slot_desc->col_name()); if (iter != _map_column.end()) { _include_column_ids.emplace_back(iter->second); + _include_cols.push_back(slot_desc->col_name()); } else { _missing_cols.push_back(slot_desc->col_name()); } @@ -136,6 +138,7 @@ Status ArrowReaderWrap::next_batch(std::shared_ptr* batch, b while (!_closed && _queue.empty()) { if (_batch_eof) { _include_column_ids.clear(); + _include_cols.clear(); *eof = true; return Status::OK(); } diff --git a/be/src/exec/arrow/arrow_reader.h b/be/src/exec/arrow/arrow_reader.h index 561f67fe94..c83abcf9e5 100644 --- a/be/src/exec/arrow/arrow_reader.h +++ b/be/src/exec/arrow/arrow_reader.h @@ -121,6 +121,7 @@ protected: int _current_group; // current group(stripe) std::map _map_column; // column-name <---> column-index std::vector _include_column_ids; // columns that need to get from file + std::vector _include_cols; // columns that need to get from file std::shared_ptr _statistics; std::atomic _closed = false; diff --git a/be/src/exec/arrow/orc_reader.cpp b/be/src/exec/arrow/orc_reader.cpp index 8f46a9bf21..e2fc8d3bcd 100644 --- a/be/src/exec/arrow/orc_reader.cpp +++ b/be/src/exec/arrow/orc_reader.cpp @@ -152,7 +152,7 @@ Status ORCReaderWrap::_next_stripe_reader(bool* eof) { // which may cause OOM issues by loading the whole stripe into memory. // Note this will only read rows for the current stripe, not the entire file. arrow::Result> maybe_rb_reader = - _reader->NextStripeReader(_state->batch_size(), _include_column_ids); + _reader->NextStripeReader(_state->batch_size(), _include_cols); if (!maybe_rb_reader.ok()) { LOG(WARNING) << "Get RecordBatch Failed. " << maybe_rb_reader.status(); return Status::InternalError(maybe_rb_reader.status().ToString()); diff --git a/regression-test/data/load_p0/broker_load/simple_array_list_type.orc b/regression-test/data/load_p0/broker_load/simple_array_list_type.orc new file mode 100644 index 0000000000..f6f1cb90ff Binary files /dev/null and b/regression-test/data/load_p0/broker_load/simple_array_list_type.orc differ diff --git a/regression-test/data/load_p0/broker_load/test_array_load.out b/regression-test/data/load_p0/broker_load/test_array_load.out index ff4d69cbff..1b61847609 100644 --- a/regression-test/data/load_p0/broker_load/test_array_load.out +++ b/regression-test/data/load_p0/broker_load/test_array_load.out @@ -83,3 +83,83 @@ 5 \N \N \N \N \N \N \N \N \N \N 100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 00:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1, 1.2, 1.3] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4, 5.5, 6.67] + +-- !select -- +1 [1, 2, 3, 4, 5] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +2 [6, 7, 8, 9, 10] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +3 [] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['happy', 'birthday'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +4 [NULL] [32767, 32768, 32769] [NULL, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +5 [NULL, NULL] [32767, 32768, NULL] [65534, NULL, 65536] ['a', 'b', 'c', 'd', 'e'] ['hello', 'world'] [1991-01-01] [1991-01-01 08:00:00] [0.33, 0.67] [3.1415926, 0.878787878] [1.000000, 1.200000, 1.300000] +100 [1, 2, 3] [32767, 32768, 32769] [65534, 65535, 65536] ['a', 'b', 'c'] ['hello', 'world'] [2022-07-13] [2022-07-13 12:30:00] [0.33, 0.67] [3.1415926, 0.878787878] [4.000000, 5.500000, 6.670000] + diff --git a/regression-test/suites/load_p0/broker_load/test_array_load.groovy b/regression-test/suites/load_p0/broker_load/test_array_load.groovy index b462d3c00e..7f5d109952 100644 --- a/regression-test/suites/load_p0/broker_load/test_array_load.groovy +++ b/regression-test/suites/load_p0/broker_load/test_array_load.groovy @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -suite("test_array_load", "p0") { +suite("test_array_load", "load_p0") { // define a sql table def testTable = "tbl_test_array_load" def testTable01 = "tbl_test_array_load01" @@ -293,6 +293,8 @@ suite("test_array_load", "p0") { def hdfs_json_file_path = uploadToHdfs "broker_load/simple_object_array.json" def hdfs_csv_file_path = uploadToHdfs "broker_load/simple_array.csv" def hdfs_orc_file_path = uploadToHdfs "broker_load/simple_array.orc" + // orc file with native array(list) type + def hdfs_orc_file_path2 = uploadToHdfs "broker_load/simple_array_list_type.orc" def hdfs_parquet_file_path = uploadToHdfs "broker_load/simple_array.parquet" // case5: import array data by hdfs and enable vectorized engine @@ -422,5 +424,38 @@ suite("test_array_load", "p0") { } finally { try_sql("DROP TABLE IF EXISTS ${testTable}") } + + // case13: import array data by hdfs in orc format(with array type) and enable vectorized + try { + sql "DROP TABLE IF EXISTS ${testTable}" + + create_test_table.call(testTable, true) + + def test_load_label = UUID.randomUUID().toString().replaceAll("-", "") + load_from_hdfs1.call(testTable, test_load_label, hdfs_orc_file_path2, "orc", + brokerName, hdfsUser, hdfsPasswd) + + check_load_result.call(test_load_label, testTable) + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + + // case14: import array data by hdfs in orc format(with array type) and disable vectorized + try { + sql "DROP TABLE IF EXISTS ${testTable}" + + create_test_table.call(testTable, false) + + def test_load_label = UUID.randomUUID().toString().replaceAll("-", "") + load_from_hdfs1.call(testTable, test_load_label, hdfs_orc_file_path2, "orc", + brokerName, hdfsUser, hdfsPasswd) + + check_load_result.call(test_load_label, testTable) + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } + } }