Currently, there are some useless includes in the codebase. We can use a tool named include-what-you-use to optimize these includes. By using a strict include-what-you-use policy, we can get lots of benefits from it.
110 lines
3.6 KiB
C++
110 lines
3.6 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#pragma once
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
|
|
#include <memory>
|
|
#include <vector>
|
|
|
|
#include "common/status.h"
|
|
#include "exec/base_scanner.h"
|
|
#include "io/file_factory.h"
|
|
#include "io/fs/file_reader_writer_fwd.h"
|
|
#include "util/runtime_profile.h"
|
|
|
|
namespace arrow {
|
|
class RecordBatch;
|
|
} // namespace arrow
|
|
namespace doris {
|
|
class ArrowReaderWrap;
|
|
class RuntimeState;
|
|
class SlotDescriptor;
|
|
class TBrokerRangeDesc;
|
|
class TBrokerScanRangeParams;
|
|
class TExpr;
|
|
class TNetworkAddress;
|
|
|
|
namespace io {
|
|
class FileSystem;
|
|
} // namespace io
|
|
namespace vectorized {
|
|
class Block;
|
|
} // namespace vectorized
|
|
struct Statistics;
|
|
} // namespace doris
|
|
|
|
namespace doris::vectorized {
|
|
|
|
// VArrow scanner convert the data read from orc|parquet to doris's columns.
|
|
class VArrowScanner : public BaseScanner {
|
|
public:
|
|
VArrowScanner(RuntimeState* state, RuntimeProfile* profile,
|
|
const TBrokerScanRangeParams& params, const std::vector<TBrokerRangeDesc>& ranges,
|
|
const std::vector<TNetworkAddress>& broker_addresses,
|
|
const std::vector<TExpr>& pre_filter_texprs, ScannerCounter* counter);
|
|
|
|
virtual ~VArrowScanner();
|
|
|
|
// Open this scanner, will initialize information need to
|
|
virtual Status open() override;
|
|
|
|
virtual Status get_next(Block* block, bool* eof) override;
|
|
|
|
// Update file predicate filter profile
|
|
void update_profile(std::shared_ptr<Statistics>& statistics);
|
|
|
|
virtual void close() override;
|
|
|
|
protected:
|
|
virtual ArrowReaderWrap* _new_arrow_reader(const std::vector<SlotDescriptor*>& file_slot_descs,
|
|
io::FileReaderSPtr file_reader,
|
|
int32_t num_of_columns_from_file,
|
|
int64_t range_start_offset, int64_t range_size) = 0;
|
|
|
|
private:
|
|
// Read next buffer from reader
|
|
Status _open_next_reader();
|
|
Status _next_arrow_batch();
|
|
Status _init_arrow_batch_if_necessary();
|
|
Status _init_src_block() override;
|
|
Status _append_batch_to_src_block(Block* block);
|
|
Status _cast_src_block(Block* block);
|
|
void _init_system_properties(const TBrokerRangeDesc& range);
|
|
void _init_file_description(const TBrokerRangeDesc& range);
|
|
|
|
private:
|
|
// Reader
|
|
ArrowReaderWrap* _cur_file_reader;
|
|
bool _cur_file_eof; // is read over?
|
|
std::shared_ptr<arrow::RecordBatch> _batch;
|
|
size_t _arrow_batch_cur_idx;
|
|
FileSystemProperties _system_properties;
|
|
FileDescription _file_description;
|
|
std::shared_ptr<io::FileSystem> _file_system;
|
|
|
|
RuntimeProfile::Counter* _filtered_row_groups_counter;
|
|
RuntimeProfile::Counter* _filtered_rows_counter;
|
|
RuntimeProfile::Counter* _filtered_bytes_counter;
|
|
RuntimeProfile::Counter* _total_rows_counter;
|
|
RuntimeProfile::Counter* _total_groups_counter;
|
|
};
|
|
|
|
} // namespace doris::vectorized
|