Files
doris/be/src/exec/orc_scanner.h
HappenLee 8fa677b59c [Refactor][Bug-Fix][Load Vec] Refactor code of basescanner and vjson/vparquet/vbroker scanner (#9666)
* [Refactor][Bug-Fix][Load Vec] Refactor code of basescanner and vjson/vparquet/vbroker scanner
1. fix bug of vjson scanner not support `range_from_file_path`
2. fix bug of vjson/vbrocker scanner core dump by src/dest slot nullable is different
3. fix bug of vparquest filter_block reference of column in not 1
4. refactor code to simple all the code

It only changed vectorized load, not original row based load.

Co-authored-by: lihaopeng <lihaopeng@baidu.com>
2022-05-20 11:43:03 +08:00

71 lines
2.4 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <orc/OrcFile.hh>
#include "exec/base_scanner.h"
namespace doris {
// Broker scanner convert the data read from broker to doris's tuple.
class ORCScanner : public BaseScanner {
public:
ORCScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params,
const std::vector<TBrokerRangeDesc>& ranges,
const std::vector<TNetworkAddress>& broker_addresses,
const std::vector<TExpr>& pre_filter_texprs, ScannerCounter* counter);
~ORCScanner() override;
// Open this scanner, will initialize information need to
Status open() override;
// Get next tuple
Status get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool* fill_tuple) override;
// Close this scanner
void close() override;
private:
// Read next buffer from reader
Status open_next_reader();
private:
// Reader
bool _cur_file_eof;
// orc file reader object
orc::ReaderOptions _options;
orc::RowReaderOptions _row_reader_options;
std::shared_ptr<orc::ColumnVectorBatch> _batch;
std::unique_ptr<orc::Reader> _reader;
std::unique_ptr<orc::RowReader> _row_reader;
// The batch after reading from orc data is arranged in the original order,
// so we need to record the index in the original order to correspond the column names to the order
std::vector<int> _position_in_orc_original;
int _num_of_columns_from_file;
int _total_groups; // groups in a orc file
int _current_group;
int64_t _rows_of_group; // rows in a group.
int64_t _current_line_of_group;
};
} // namespace doris