Files
doris/be/src/vec/exec/scan/vfile_scanner.h
Jibing-Li fbdebe2424 [feature-wip](new-scan)Add load counter for VFileScanner (#12812)
The new scanner (VFileScanner) need a counter to record two values in load job.
1. The number of rows unselected by pre-filter, and
2. The number of rows filtered by unmatched schema or other error. This pr is to implement the counter.
2022-09-21 20:59:13 +08:00

127 lines
4.1 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "exec/text_converter.h"
#include "exprs/bloomfilter_predicate.h"
#include "exprs/function_filter.h"
#include "io/file_factory.h"
#include "runtime/tuple.h"
#include "vec/exec/format/generic_reader.h"
#include "vec/exec/scan/vscanner.h"
namespace doris::vectorized {
class NewFileScanNode;
// The counter will be passed to each scanner.
// Note that this struct is not thread safe.
// So if we support concurrent scan in the future, we need to modify this struct.
struct ScannerCounter {
ScannerCounter() : num_rows_filtered(0), num_rows_unselected(0) {}
int64_t num_rows_filtered; // unqualified rows (unmatched the dest schema, or no partition)
int64_t num_rows_unselected; // rows filtered by predicates
};
class VFileScanner : public VScanner {
public:
VFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t limit,
const TFileScanRange& scan_range, MemTracker* tracker, RuntimeProfile* profile);
Status open(RuntimeState* state) override;
public:
Status prepare(VExprContext** vconjunct_ctx_ptr);
protected:
Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) override;
void _init_profiles(RuntimeProfile* profile);
Status _fill_columns_from_path();
Status _get_next_reader();
// TODO: cast input block columns type to string.
Status _cast_src_block(Block* block) { return Status::OK(); };
protected:
std::unique_ptr<TextConverter> _text_converter;
const TFileScanRangeParams& _params;
const std::vector<TFileRangeDesc>& _ranges;
int _next_range;
GenericReader* _cur_reader;
bool _cur_reader_eof;
// File source slot descriptors
std::vector<SlotDescriptor*> _file_slot_descs;
// File slot id to index map.
std::map<SlotId, int> _file_slot_index_map;
// Partition source slot descriptors
std::vector<SlotDescriptor*> _partition_slot_descs;
// Partition slot id to index map
std::map<SlotId, int> _partition_slot_index_map;
// Mem pool used to allocate _src_tuple and _src_tuple_row
std::unique_ptr<MemPool> _mem_pool;
// Dest tuple descriptor and dest expr context
const TupleDescriptor* _dest_tuple_desc;
// Profile
RuntimeProfile* _profile;
RuntimeProfile::Counter* _rows_read_counter;
RuntimeProfile::Counter* _read_timer;
ScannerCounter _counter;
bool _scanner_eof = false;
int _rows = 0;
int _num_of_columns_from_file;
std::vector<vectorized::VExprContext*> _dest_vexpr_ctx;
// the map values of dest slot id to src slot desc
// if there is not key of dest slot id in dest_sid_to_src_sid_without_trans, it will be set to nullptr
std::vector<SlotDescriptor*> _src_slot_descs_order_by_dest;
bool _src_block_mem_reuse = false;
bool _strict_mode;
Block* _src_block_ptr;
Block _src_block;
// dest slot desc index to src slot desc index
std::unordered_map<int, int> _dest_slot_to_src_slot_index;
std::unordered_map<std::string, size_t> _src_block_name_to_idx;
private:
Status _init_expr_ctxes();
Status _init_src_block(Block* block);
Status _cast_to_input_block(Block* block);
Status _pre_filter_src_block();
Status _convert_to_output_block(Block* block);
void _reset_counter() {
_counter.num_rows_unselected = 0;
_counter.num_rows_filtered = 0;
}
};
} // namespace doris::vectorized