Files
doris/be/src/exec/exec_node.h
trueeyu 839ec45197 Remove llvm relative code from be/src/exec (#2955)
Remove unused LLVM related codes of directory:be/src/exec (#2910)

there are many LLVM related codes in code base, but these codes are not really used.
The higher version of GCC is not compatible with the LLVM 3.4.2 version currently used by Doris.
The PR delete all LLVM related code of directory: be/src/exec.
2020-02-20 20:43:26 +08:00

415 lines
17 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef DORIS_BE_SRC_QUERY_EXEC_EXEC_NODE_H
#define DORIS_BE_SRC_QUERY_EXEC_EXEC_NODE_H
#include <sstream>
#include <vector>
#include <mutex>
#include "common/status.h"
#include "gen_cpp/PlanNodes_types.h"
#include "runtime/descriptors.h"
#include "runtime/mem_pool.h"
#include "util/runtime_profile.h"
#include "util/blocking_queue.hpp"
#include "runtime/bufferpool/buffer_pool.h"
#include "runtime/query_statistics.h"
#include "service/backend_options.h"
#include "util/uid_util.h" // for print_id
namespace doris {
class Expr;
class ExprContext;
class ObjectPool;
class Counters;
class RowBatch;
class RuntimeState;
class TPlan;
class TupleRow;
class DataSink;
class MemTracker;
using std::string;
using std::stringstream;
using std::vector;
using std::map;
using boost::lock_guard;
using boost::mutex;
// Superclass of all executor nodes.
// All subclasses need to make sure to check RuntimeState::is_cancelled()
// periodically in order to ensure timely termination after the cancellation
// flag gets set.
class ExecNode {
public:
// Init conjuncts.
ExecNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
virtual ~ExecNode();
/// Initializes this object from the thrift tnode desc. The subclass should
/// do any initialization that can fail in Init() rather than the ctor.
/// If overridden in subclass, must first call superclass's Init().
virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr);
// Sets up internal structures, etc., without doing any actual work.
// Must be called prior to open(). Will only be called once in this
// node's lifetime.
// All code generation (adding functions to the LlvmCodeGen object) must happen
// in prepare(). Retrieving the jit compiled function pointer must happen in
// open().
// If overridden in subclass, must first call superclass's prepare().
virtual Status prepare(RuntimeState* state);
// Performs any preparatory work prior to calling get_next().
// Can be called repeatedly (after calls to close()).
// Caller must not be holding any io buffers. This will cause deadlock.
virtual Status open(RuntimeState* state);
// Retrieves rows and returns them via row_batch. Sets eos to true
// if subsequent calls will not retrieve any more rows.
// Data referenced by any tuples returned in row_batch must not be overwritten
// by the callee until close() is called. The memory holding that data
// can be returned via row_batch's tuple_data_pool (in which case it may be deleted
// by the caller) or held on to by the callee. The row_batch, including its
// tuple_data_pool, will be destroyed by the caller at some point prior to the final
// close() call.
// In other words, if the memory holding the tuple data will be referenced
// by the callee in subsequent get_next() calls, it must *not* be attached to the
// row_batch's tuple_data_pool.
// Caller must not be holding any io buffers. This will cause deadlock.
// TODO: AggregationNode and HashJoinNode cannot be "re-opened" yet.
virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) = 0;
// Resets the stream of row batches to be retrieved by subsequent GetNext() calls.
// Clears all internal state, returning this node to the state it was in after calling
// Prepare() and before calling Open(). This function must not clear memory
// still owned by this node that is backing rows returned in GetNext().
// Prepare() and Open() must have already been called before calling Reset().
// GetNext() may have optionally been called (not necessarily until eos).
// Close() must not have been called.
// Reset() is not idempotent. Calling it multiple times in a row without a preceding
// call to Open() is invalid.
// If overridden in a subclass, must call superclass's Reset() at the end. The default
// implementation calls Reset() on children.
// Note that this function may be called many times (proportional to the input data),
// so should be fast.
virtual Status reset(RuntimeState* state);
// This should be called before close() and after get_next(), it is responsible for
// collecting statistics sent with row batch, it can't be called when prepare() returns
// error.
virtual Status collect_query_statistics(QueryStatistics* statistics);
// close() will get called for every exec node, regardless of what else is called and
// the status of these calls (i.e. prepare() may never have been called, or
// prepare()/open()/get_next() returned with an error).
// close() releases all resources that were allocated in open()/get_next(), even if the
// latter ended with an error. close() can be called if the node has been prepared or
// the node is closed.
// After calling close(), the caller calls open() again prior to subsequent calls to
// get_next(). The default implementation updates runtime profile counters and calls
// close() on the children. To ensure that close() is called on the entire plan tree,
// each implementation should start out by calling the default implementation.
virtual Status close(RuntimeState* state);
// Creates exec node tree from list of nodes contained in plan via depth-first
// traversal. All nodes are placed in pool.
// Returns error if 'plan' is corrupted, otherwise success.
static Status create_tree(RuntimeState* state, ObjectPool* pool, const TPlan& plan,
const DescriptorTbl& descs, ExecNode** root);
// Set debug action for node with given id in 'tree'
static void set_debug_options(int node_id, TExecNodePhase::type phase,
TDebugAction::type action, ExecNode* tree);
// Collect all nodes of given 'node_type' that are part of this subtree, and return in
// 'nodes'.
void collect_nodes(TPlanNodeType::type node_type, std::vector<ExecNode*>* nodes);
// Collect all scan node types.
void collect_scan_nodes(std::vector<ExecNode*>* nodes);
// When the agg node is the scan node direct parent,
// we directly return agg object from scan node to agg node,
// and don't serialize the agg object.
// This improve is cautious, we ensure the correctness firstly.
void try_do_aggregate_serde_improve();
typedef bool (*EvalConjunctsFn)(ExprContext* const* ctxs, int num_ctxs, TupleRow* row);
// Evaluate exprs over row. Returns true if all exprs return true.
// TODO: This doesn't use the vector<Expr*> signature because I haven't figured
// out how to deal with declaring a templated std:vector type in IR
static bool eval_conjuncts(ExprContext* const* ctxs, int num_ctxs, TupleRow* row);
// Returns a string representation in DFS order of the plan rooted at this.
std::string debug_string() const;
virtual void push_down_predicate(RuntimeState* state, std::list<ExprContext*>* expr_ctxs);
// recursive helper method for generating a string for Debug_string().
// implementations should call debug_string(int, std::stringstream) on their children.
// Input parameters:
// indentation_level: Current level in plan tree.
// Output parameters:
// out: Stream to accumulate debug string.
virtual void debug_string(int indentation_level, std::stringstream* out) const;
const std::vector<ExprContext*>& conjunct_ctxs() const {
return _conjunct_ctxs;
}
int id() const {
return _id;
}
TPlanNodeType::type type() const {
return _type;
}
const RowDescriptor& row_desc() const {
return _row_descriptor;
}
int64_t rows_returned() const {
return _num_rows_returned;
}
int64_t limit() const {
return _limit;
}
bool reached_limit() {
return _limit != -1 && _num_rows_returned >= _limit;
}
const std::vector<TupleId>& get_tuple_ids() const {
return _tuple_ids;
}
RuntimeProfile* runtime_profile() {
return _runtime_profile.get();
}
RuntimeProfile::Counter* memory_used_counter() const {
return _memory_used_counter;
}
MemTracker* mem_tracker() const {
return _mem_tracker.get();
}
MemTracker* expr_mem_tracker() const {
return _expr_mem_tracker.get();
}
MemPool* expr_mem_pool() {
return _expr_mem_pool.get();
}
// Extract node id from p->name().
static int get_node_id_from_profile(RuntimeProfile* p);
// Names of counters shared by all exec nodes
static const std::string ROW_THROUGHPUT_COUNTER;
protected:
friend class DataSink;
/// Initialize 'buffer_pool_client_' and claim the initial reservation for this
/// ExecNode. Only needs to be called by ExecNodes that will use the client.
/// The client is automatically cleaned up in Close(). Should not be called if
/// the client is already open.
/// The ExecNode must return the initial reservation to
/// QueryState::initial_reservations(), which is done automatically in Close() as long
/// as the initial reservation is not released before Close().
Status claim_buffer_reservation(RuntimeState* state);
/// Release any unused reservation in excess of the node's initial reservation. Returns
/// an error if releasing the reservation requires flushing pages to disk, and that
/// fails.
Status release_unused_reservation();
/// Enable the increase reservation denial probability on 'buffer_pool_client_' based on
/// the 'debug_action_' set on this node. Returns an error if 'debug_action_param_' is
/// invalid.
//Status enable_deny_reservation_debug_action();
/// Extends blocking queue for row batches. Row batches have a property that
/// they must be processed in the order they were produced, even in cancellation
/// paths. Preceding row batches can contain ptrs to memory in subsequent row batches
/// and we need to make sure those ptrs stay valid.
/// Row batches that are added after Shutdown() are queued in another queue, which can
/// be cleaned up during Close().
/// All functions are thread safe.
class RowBatchQueue : public BlockingQueue<RowBatch*> {
public:
/// max_batches is the maximum number of row batches that can be queued.
/// When the queue is full, producers will block.
RowBatchQueue(int max_batches);
~RowBatchQueue();
/// Adds a batch to the queue. This is blocking if the queue is full.
void AddBatch(RowBatch* batch);
/// Adds a batch to the queue. If the queue is full, this blocks until space becomes
/// available or 'timeout_micros' has elapsed.
/// Returns true if the element was added to the queue, false if it wasn't. If this
/// method returns false, the queue didn't take ownership of the batch and it must be
/// managed externally.
bool AddBatchWithTimeout(RowBatch* batch, int64_t timeout_micros);
/// Gets a row batch from the queue. Returns NULL if there are no more.
/// This function blocks.
/// Returns NULL after Shutdown().
RowBatch* GetBatch();
/// Deletes all row batches in cleanup_queue_. Not valid to call AddBatch()
/// after this is called.
/// Returns the number of io buffers that were released (for debug tracking)
int Cleanup();
private:
/// Lock protecting cleanup_queue_
// SpinLock lock_;
// TODO(dhc): need to modify spinlock
std::mutex lock_;
/// Queue of orphaned row batches
std::list<RowBatch*> cleanup_queue_;
};
int _id; // unique w/in single plan tree
TPlanNodeType::type _type;
ObjectPool* _pool;
std::vector<Expr*> _conjuncts;
std::vector<ExprContext*> _conjunct_ctxs;
std::vector<TupleId> _tuple_ids;
std::vector<ExecNode*> _children;
RowDescriptor _row_descriptor;
/// Resource information sent from the frontend.
const TBackendResourceProfile _resource_profile;
// debug-only: if _debug_action is not INVALID, node will perform action in
// _debug_phase
TExecNodePhase::type _debug_phase;
TDebugAction::type _debug_action;
int64_t _limit; // -1: no limit
int64_t _num_rows_returned;
boost::scoped_ptr<RuntimeProfile> _runtime_profile;
/// Account for peak memory used by this node
boost::scoped_ptr<MemTracker> _mem_tracker;
/// MemTracker used by 'expr_mem_pool_'.
boost::scoped_ptr<MemTracker> _expr_mem_tracker;
/// MemPool for allocating data structures used by expression evaluators in this node.
/// Created in Prepare().
boost::scoped_ptr<MemPool> _expr_mem_pool;
RuntimeProfile::Counter* _rows_returned_counter;
RuntimeProfile::Counter* _rows_returned_rate;
// Account for peak memory used by this node
RuntimeProfile::Counter* _memory_used_counter;
// Execution options that are determined at runtime. This is added to the
// runtime profile at close(). Examples for options logged here would be
// "Codegen Enabled"
boost::mutex _exec_options_lock;
std::string _runtime_exec_options;
/// Buffer pool client for this node. Initialized with the node's minimum reservation
/// in ClaimBufferReservation(). After initialization, the client must hold onto at
/// least the minimum reservation so that it can be returned to the initial
/// reservations pool in Close().
BufferPool::ClientHandle _buffer_pool_client;
ExecNode* child(int i) {
return _children[i];
}
bool is_closed() const {
return _is_closed;
}
// TODO(zc)
/// Pointer to the containing SubplanNode or NULL if not inside a subplan.
/// Set by SubplanNode::Init(). Not owned.
// SubplanNode* containing_subplan_;
/// Returns true if this node is inside the right-hand side plan tree of a SubplanNode.
/// Valid to call in or after Prepare().
bool is_in_subplan() const { return false; }
// Create a single exec node derived from thrift node; place exec node in 'pool'.
static Status create_node(RuntimeState* state, ObjectPool* pool, const TPlanNode& tnode,
const DescriptorTbl& descs, ExecNode** node);
static Status create_tree_helper(RuntimeState* state, ObjectPool* pool, const std::vector<TPlanNode>& tnodes,
const DescriptorTbl& descs, ExecNode* parent, int* node_idx, ExecNode** root);
virtual bool is_scan_node() const {
return false;
}
void init_runtime_profile(const std::string& name);
// Executes _debug_action if phase matches _debug_phase.
// 'phase' must not be INVALID.
Status exec_debug_action(TExecNodePhase::type phase);
// Appends option to '_runtime_exec_options'
void add_runtime_exec_option(const std::string& option);
/// Frees any local allocations made by evals_to_free_ and returns the result of
/// state->CheckQueryState(). Nodes should call this periodically, e.g. once per input
/// row batch. This should not be called outside the main execution thread.
//
/// Nodes may override this to add extra periodic cleanup, e.g. freeing other local
/// allocations. ExecNodes overriding this function should return
/// ExecNode::QueryMaintenance().
virtual Status QueryMaintenance(RuntimeState* state, const std::string& msg) WARN_UNUSED_RESULT;
private:
bool _is_closed;
};
#define LIMIT_EXCEEDED(tracker, state, msg) \
do { \
stringstream str; \
str << "Memory exceed limit. " << msg << " "; \
str << "Backend: " << BackendOptions::get_localhost() << ", "; \
str << "fragment: " << print_id(state->fragment_instance_id()) << " "; \
str << "Used: " << tracker->consumption() << ", Limit: " << tracker->limit() << ". "; \
str << "You can change the limit by session variable exec_mem_limit."; \
return Status::MemoryLimitExceeded(str.str()); \
} while (false)
#define RETURN_IF_LIMIT_EXCEEDED(state, msg) \
do { \
/* if (UNLIKELY(MemTracker::limit_exceeded(*(state)->mem_trackers()))) { */ \
MemTracker* tracker = state->instance_mem_tracker()->find_limit_exceeded_tracker(); \
if (tracker != nullptr) { \
LIMIT_EXCEEDED(tracker, state, msg); \
} \
} while (false)
}
#endif