// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include #include #include #include #include #include #include #include "common/status.h" #include "exec/arrow/parquet_reader.h" #include "exec/base_scanner.h" #include "gen_cpp/Types_types.h" #include "runtime/mem_pool.h" #include "util/runtime_profile.h" namespace doris::vectorized { // VArrow scanner convert the data read from orc|parquet to doris's columns. class VArrowScanner : public BaseScanner { public: VArrowScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params, const std::vector& ranges, const std::vector& broker_addresses, const std::vector& pre_filter_texprs, ScannerCounter* counter); virtual ~VArrowScanner(); // Open this scanner, will initialize information need to virtual Status open() override; virtual Status get_next(doris::Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) override { return Status::NotSupported("Not Implemented get next"); } virtual Status get_next(Block* block, bool* eof) override; // Update file predicate filter profile void update_profile(std::shared_ptr& statistics); virtual void close() override; protected: virtual ArrowReaderWrap* _new_arrow_reader(FileReader* file_reader, int64_t batch_size, int32_t num_of_columns_from_file, int64_t range_start_offset, int64_t range_size) = 0; private: // Read next buffer from reader Status _open_next_reader(); Status _next_arrow_batch(); Status _init_arrow_batch_if_necessary(); Status _init_src_block() override; Status _append_batch_to_src_block(Block* block); Status _cast_src_block(Block* block); private: // Reader ArrowReaderWrap* _cur_file_reader; bool _cur_file_eof; // is read over? std::shared_ptr _batch; size_t _arrow_batch_cur_idx; RuntimeProfile::Counter* _filtered_row_groups_counter; RuntimeProfile::Counter* _filtered_rows_counter; RuntimeProfile::Counter* _filtered_bytes_counter; RuntimeProfile::Counter* _total_rows_counter; RuntimeProfile::Counter* _total_groups_counter; }; } // namespace doris::vectorized