Files
doris/be/src/exprs/agg_fn.h
hongbin e61d296486 [Refactor] Replace '#ifndef' with '#pragma once' (#9456)
* Replace '#ifndef' with '#pragma once'
2022-05-10 09:25:59 +08:00

188 lines
7.7 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/apache/impala/blob/branch-2.10.0/be/src/exprs/agg-fn.h
// and modified by Doris
#pragma once
#include "exprs/expr.h"
#include "runtime/descriptors.h"
#include "udf/udf.h"
namespace doris {
using doris_udf::FunctionContext;
class MemPool;
class ObjectPool;
class RuntimeState;
class Tuple;
class TupleRow;
class TExprNode;
class RPCFn;
/// --- AggFn overview
///
/// An aggregate function generates an output over a set of tuple rows.
/// An example would be AVG() which computes the average of all input rows.
/// The built-in aggregate functions such as min, max, sum, avg, ndv etc are
/// in this category.
///
/// --- Implementation
///
/// AggFn contains the aggregation operations, pointers to the UDAF interface functions
/// implementing various states of aggregation and the descriptors for the intermediate
/// and output values. Please see udf/udf.h for details of the UDAF interfaces.
///
/// AggFnEvaluator is the interface for evaluating aggregate functions against input
/// tuple rows. It invokes the following functions at different phases of the aggregation:
///
/// _init_fn : An initialization function that initializes the aggregate value.
///
/// _update_fn : An update function that processes the arguments for each row in the
/// query result set and accumulates an intermediate result. For example,
/// this function might increment a counter, append to a string buffer or
/// add the input to a cumulative sum.
///
/// _merge_fn : A merge function that combines multiple intermediate results into a
/// single value.
///
/// _serialize_fn: A serialization function that flattens any intermediate values
/// containing pointers, and frees any memory allocated during the init,
/// update and merge phases.
///
/// _finalize_fn : A finalize function that either passes through the combined result
/// unchanged, or does one final transformation. Also frees the resources
/// allocated during init, update and merge phases.
///
/// _get_value_fn: Used by AnalyticEval node to obtain the current intermediate value.
///
/// _remove_fn : Used by AnalyticEval node to undo the update to the intermediate value
/// by an input row as it falls out of a sliding window.
///
class AggFn : public Expr {
public:
/// Override the base class' implementation.
virtual bool is_agg_fn() const { return true; }
/// Enum for some built-in aggregation ops.
enum AggregationOp {
COUNT,
MIN,
MAX,
SUM,
AVG,
NDV,
SUM_DISTINCT,
COUNT_DISTINCT,
HLL_UNION_AGG,
OTHER,
};
/// Creates and initializes an aggregate function from 'texpr' and returns it in
/// 'agg_fn'. The returned AggFn lives in the ObjectPool of 'state'. 'row_desc' is
/// the row descriptor of the input tuple row; 'intermediate_slot_desc' is the slot
/// descriptor of the intermediate value; 'output_slot_desc' is the slot descriptor
/// of the output value. On failure, returns error status and sets 'agg_fn' to nullptr.
static Status create(const TExpr& texpr, const RowDescriptor& row_desc,
const SlotDescriptor& intermediate_slot_desc,
const SlotDescriptor& output_slot_desc, RuntimeState* state,
AggFn** agg_fn) WARN_UNUSED_RESULT;
bool is_merge() const { return is_merge_; }
AggregationOp agg_op() const { return agg_op_; }
bool is_count_star() const { return agg_op_ == COUNT && _children.empty(); }
bool is_count_distinct() const { return agg_op_ == COUNT_DISTINCT; }
bool is_sum_distinct() const { return agg_op_ == SUM_DISTINCT; }
bool is_builtin() const { return _fn.binary_type == TFunctionBinaryType::BUILTIN; }
const std::string& fn_name() const { return _fn.name.function_name; }
const TypeDescriptor& intermediate_type() const { return intermediate_slot_desc_.type(); }
const SlotDescriptor& intermediate_slot_desc() const { return intermediate_slot_desc_; }
// Output type is the same as Expr::type().
const SlotDescriptor& output_slot_desc() const { return output_slot_desc_; }
void* remove_fn() const { return _remove_fn; }
void* merge_or_update_fn() const { return is_merge_ ? _merge_fn : _update_fn; }
void* serialize_fn() const { return _serialize_fn; }
void* get_value_fn() const { return _get_value_fn; }
void* finalize_fn() const { return _finalize_fn; }
bool supports_remove() const { return _remove_fn != nullptr; }
bool supports_serialize() const { return _serialize_fn != nullptr; }
FunctionContext::TypeDesc get_intermediate_type_desc() const;
FunctionContext::TypeDesc get_output_type_desc() const;
const std::vector<FunctionContext::TypeDesc>& arg_type_descs() const { return arg_type_descs_; }
/// Releases all cache entries to libCache for all nodes in the expr tree.
virtual void close();
static void close(const std::vector<AggFn*>& exprs);
Expr* clone(ObjectPool* pool) const { return nullptr; }
virtual std::string debug_string() const;
static std::string debug_string(const std::vector<AggFn*>& exprs);
const int get_vararg_start_idx() const { return _vararg_start_idx; }
private:
friend class Expr;
friend class NewAggFnEvaluator;
/// True if this is a merging aggregation.
const bool is_merge_;
/// Slot into which Update()/Merge()/Serialize() write their result. Not owned.
const SlotDescriptor& intermediate_slot_desc_;
/// Slot into which Finalize() results are written. Not owned. Identical to
/// intermediate_slot_desc_ if this agg fn has the same intermediate and result type.
const SlotDescriptor& output_slot_desc_;
/// The types of the arguments to the aggregate function.
const std::vector<FunctionContext::TypeDesc> arg_type_descs_;
/// The aggregation operation.
AggregationOp agg_op_;
/// Function pointers for the different phases of the aggregate function.
void* _init_fn = nullptr;
void* _update_fn = nullptr;
void* _remove_fn = nullptr;
void* _merge_fn = nullptr;
void* _serialize_fn = nullptr;
void* _get_value_fn = nullptr;
void* _finalize_fn = nullptr;
int _vararg_start_idx;
std::unique_ptr<RPCFn> _rpc_init;
std::unique_ptr<RPCFn> _rpc_update;
std::unique_ptr<RPCFn> _rpc_remove;
std::unique_ptr<RPCFn> _rpc_merge;
std::unique_ptr<RPCFn> _rpc_serialize;
std::unique_ptr<RPCFn> _rpc_get_value;
std::unique_ptr<RPCFn> _rpc_finalize;
AggFn(const TExprNode& node, const SlotDescriptor& intermediate_slot_desc,
const SlotDescriptor& output_slot_desc);
/// Initializes the AggFn and its input expressions. May load the UDAF from LibCache
/// if necessary.
virtual Status init(const RowDescriptor& desc, RuntimeState* state) WARN_UNUSED_RESULT;
};
} // namespace doris