Files
doris/be/src/exec/base_scanner.h
yiguolei 4e4fb33995 [refactor](conjuncts) simplify conjuncts in exec node (#19254)
Co-authored-by: yiguolei <yiguolei@gmail.com>
Currently, exec node save exprcontext**, but the object is in object pool, the code is very unclear. we could just use exprcontext*.
2023-05-04 18:04:32 +08:00

153 lines
5.0 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <gen_cpp/Exprs_types.h>
#include <stdint.h>
#include <memory>
#include <unordered_map>
#include <vector>
#include "common/global_types.h"
#include "common/status.h"
#include "runtime/descriptors.h"
#include "util/runtime_profile.h"
#include "util/slice.h"
#include "vec/columns/column.h"
#include "vec/common/schema_util.h"
#include "vec/core/block.h"
#include "vec/exprs/vexpr.h"
namespace doris {
class RuntimeState;
class TBrokerRangeDesc;
class TBrokerScanRangeParams;
class TNetworkAddress;
namespace vectorized {
class VExprContext;
using MutableColumnPtr = IColumn::MutablePtr;
} // namespace vectorized
// The counter will be passed to each scanner.
// Note that this struct is not thread safe.
// So if we support concurrent scan in the future, we need to modify this struct.
struct ScannerCounter {
ScannerCounter() : num_rows_filtered(0), num_rows_unselected(0) {}
int64_t num_rows_filtered; // unqualified rows (unmatched the dest schema, or no partition)
int64_t num_rows_unselected; // rows filtered by predicates
};
class BaseScanner {
public:
BaseScanner(RuntimeState* state, RuntimeProfile* profile, const TBrokerScanRangeParams& params,
const std::vector<TBrokerRangeDesc>& ranges,
const std::vector<TNetworkAddress>& broker_addresses,
const std::vector<TExpr>& pre_filter_texprs, ScannerCounter* counter);
virtual ~BaseScanner() { vectorized::VExpr::close(_dest_vexpr_ctx, _state); }
virtual Status init_expr_ctxes();
// Open this scanner, will initialize information need to
virtual Status open();
// Get next block
virtual Status get_next(vectorized::Block* block, bool* eof) {
return Status::NotSupported("Not Implemented get block");
}
// Close this scanner
virtual void close() = 0;
bool is_dynamic_schema() const { return _is_dynamic_schema; }
protected:
Status _fill_dest_block(vectorized::Block* dest_block, bool* eof);
virtual Status _init_src_block();
bool is_null(const Slice& slice);
bool is_array(const Slice& slice);
bool check_array_format(std::vector<Slice>& split_values);
RuntimeState* _state;
const TBrokerScanRangeParams& _params;
//const TBrokerScanRangeParams& _params;
const std::vector<TBrokerRangeDesc>& _ranges;
const std::vector<TNetworkAddress>& _broker_addresses;
int _next_range;
// used for process stat
ScannerCounter* _counter;
// Used for constructing tuple
// slots for value read from broker file
std::vector<SlotDescriptor*> _src_slot_descs;
std::unique_ptr<RowDescriptor> _row_desc;
// Dest tuple descriptor and dest expr context
const TupleDescriptor* _dest_tuple_desc;
// the map values of dest slot id to src slot desc
// if there is not key of dest slot id in dest_sid_to_src_sid_without_trans, it will be set to nullptr
std::vector<SlotDescriptor*> _src_slot_descs_order_by_dest;
// dest slot desc index to src slot desc index
std::unordered_map<int, int> _dest_slot_to_src_slot_index;
// to filter src tuple directly
// the `_pre_filter_texprs` is the origin thrift exprs passed from scan node.
const std::vector<TExpr> _pre_filter_texprs;
bool _strict_mode;
int32_t _line_counter;
// Profile
RuntimeProfile* _profile;
RuntimeProfile::Counter* _rows_read_counter;
RuntimeProfile::Counter* _read_timer;
RuntimeProfile::Counter* _materialize_timer;
// Used to record whether a row of data is successfully read.
bool _success = false;
bool _scanner_eof = false;
// for vectorized load
std::vector<vectorized::VExprContext*> _dest_vexpr_ctx;
vectorized::VExprContext* _vpre_filter_ctx_ptr = nullptr;
vectorized::Block _src_block;
bool _src_block_mem_reuse = false;
int _num_of_columns_from_file;
// slot_ids for parquet predicate push down are in tuple desc
TupleId _tupleId = -1;
bool _is_dynamic_schema = false;
// for tracing dynamic schema
std::unique_ptr<vectorized::schema_util::FullBaseSchemaView> _full_base_schema_view;
private:
Status _filter_src_block();
void _fill_columns_from_path();
Status _materialize_dest_block(vectorized::Block* output_block);
};
} /* namespace doris */