Files
doris/be/src/vec/exec/varrow_scanner.h
Adonis Ling e412dd12e8 [chore](build) Use include-what-you-use to optimize includes (PART II) (#18761)
Currently, there are some useless includes in the codebase. We can use a tool named include-what-you-use to optimize these includes. By using a strict include-what-you-use policy, we can get lots of benefits from it.
2023-04-19 23:11:48 +08:00

110 lines
3.6 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <vector>
#include "common/status.h"
#include "exec/base_scanner.h"
#include "io/file_factory.h"
#include "io/fs/file_reader_writer_fwd.h"
#include "util/runtime_profile.h"
namespace arrow {
class RecordBatch;
} // namespace arrow
namespace doris {
class ArrowReaderWrap;
class RuntimeState;
class SlotDescriptor;
class TBrokerRangeDesc;
class TBrokerScanRangeParams;
class TExpr;
class TNetworkAddress;
namespace io {
class FileSystem;
} // namespace io
namespace vectorized {
class Block;
} // namespace vectorized
struct Statistics;
} // namespace doris
namespace doris::vectorized {
// VArrow scanner convert the data read from orc|parquet to doris's columns.
class VArrowScanner : public BaseScanner {
public:
VArrowScanner(RuntimeState* state, RuntimeProfile* profile,
const TBrokerScanRangeParams& params, const std::vector<TBrokerRangeDesc>& ranges,
const std::vector<TNetworkAddress>& broker_addresses,
const std::vector<TExpr>& pre_filter_texprs, ScannerCounter* counter);
virtual ~VArrowScanner();
// Open this scanner, will initialize information need to
virtual Status open() override;
virtual Status get_next(Block* block, bool* eof) override;
// Update file predicate filter profile
void update_profile(std::shared_ptr<Statistics>& statistics);
virtual void close() override;
protected:
virtual ArrowReaderWrap* _new_arrow_reader(const std::vector<SlotDescriptor*>& file_slot_descs,
io::FileReaderSPtr file_reader,
int32_t num_of_columns_from_file,
int64_t range_start_offset, int64_t range_size) = 0;
private:
// Read next buffer from reader
Status _open_next_reader();
Status _next_arrow_batch();
Status _init_arrow_batch_if_necessary();
Status _init_src_block() override;
Status _append_batch_to_src_block(Block* block);
Status _cast_src_block(Block* block);
void _init_system_properties(const TBrokerRangeDesc& range);
void _init_file_description(const TBrokerRangeDesc& range);
private:
// Reader
ArrowReaderWrap* _cur_file_reader;
bool _cur_file_eof; // is read over?
std::shared_ptr<arrow::RecordBatch> _batch;
size_t _arrow_batch_cur_idx;
FileSystemProperties _system_properties;
FileDescription _file_description;
std::shared_ptr<io::FileSystem> _file_system;
RuntimeProfile::Counter* _filtered_row_groups_counter;
RuntimeProfile::Counter* _filtered_rows_counter;
RuntimeProfile::Counter* _filtered_bytes_counter;
RuntimeProfile::Counter* _total_rows_counter;
RuntimeProfile::Counter* _total_groups_counter;
};
} // namespace doris::vectorized