642 lines
27 KiB
C++
642 lines
27 KiB
C++
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
|
|
#ifndef DORIS_BE_SRC_QUERY_CODEGEN_LLVM_CODEGEN_H
|
|
#define DORIS_BE_SRC_QUERY_CODEGEN_LLVM_CODEGEN_H
|
|
|
|
#include <map>
|
|
#include <string>
|
|
#include <vector>
|
|
#include <boost/scoped_ptr.hpp>
|
|
#include <boost/thread/mutex.hpp>
|
|
|
|
#include <llvm/IR/DerivedTypes.h>
|
|
#include <llvm/IR/Intrinsics.h>
|
|
#include <llvm/IR/IRBuilder.h>
|
|
#include <llvm/IR/LLVMContext.h>
|
|
#include <llvm/IR/Module.h>
|
|
#include <llvm/Analysis/Verifier.h>
|
|
#include <llvm/Support/raw_ostream.h>
|
|
#include <llvm/Support/MemoryBuffer.h>
|
|
|
|
#include "common/status.h"
|
|
#include "runtime/primitive_type.h"
|
|
#include "exprs/expr.h"
|
|
#include "util/runtime_profile.h"
|
|
#include "doris_ir/doris_ir_functions.h"
|
|
|
|
// Forward declare all llvm classes to avoid namespace pollution.
|
|
namespace llvm {
|
|
class AllocaInst;
|
|
class BasicBlock;
|
|
class ConstantFolder;
|
|
class ExecutionEngine;
|
|
class Function;
|
|
// class FunctionPassManager;
|
|
class LLVMContext;
|
|
class Module;
|
|
class NoFolder;
|
|
// class PassManager;
|
|
class PointerType;
|
|
class StructType;
|
|
class TargetData;
|
|
class Type;
|
|
class Value;
|
|
|
|
template<bool B, typename T, typename I>
|
|
class IRBuilder;
|
|
|
|
template<bool preserveName>
|
|
class IRBuilderDefaultInserter;
|
|
}
|
|
|
|
namespace doris {
|
|
|
|
class SubExprElimination;
|
|
|
|
// LLVM code generator. This is the top level object to generate jitted code.
|
|
//
|
|
// LLVM provides a c++ IR builder interface so IR does not need to be written
|
|
// manually. The interface is very low level so each line of IR that needs to
|
|
// be output maps 1:1 with calls to the interface.
|
|
// The llvm documentation is not fantastic and a lot of this was figured out
|
|
// by experimenting. Thankfully, their API is pretty well designed so it's
|
|
// possible to get by without great documentation. The llvm tutorial is very
|
|
// helpful, http://llvm.org/docs/tutorial/LangImpl1.html. In this tutorial, they
|
|
// go over how to JIT an AST for a toy language they create.
|
|
// It is also helpful to use their online app that lets you compile c/c++ to IR.
|
|
// http://llvm.org/demo/index.cgi.
|
|
//
|
|
// This class provides two interfaces, one for testing and one for the query
|
|
// engine. The interface for the query engine will load the cross-compiled
|
|
// IR module (output during the build) and extract all of functions that will
|
|
// be called directly. The test interface can be used to load any precompiled
|
|
// module or none at all (but this class will not validate the module).
|
|
//
|
|
// This class is mostly not threadsafe. During the Prepare() phase of the fragment
|
|
// execution, nodes should codegen functions.
|
|
// Afterward, optimize_module() should be called at which point all codegened functions
|
|
// are optimized.
|
|
// Subsequently, nodes can get at the jit compiled function pointer (typically during the
|
|
// Open() call). Getting the jit compiled function (jit_function()) is the only thread
|
|
// safe function.
|
|
//
|
|
// Currently, each query will create and initialize one of these
|
|
// objects. This requires loading and parsing the cross compiled modules.
|
|
// TODO: we should be able to do this once per process and let llvm compile
|
|
// functions from across modules.
|
|
//
|
|
// LLVM has a nontrivial memory management scheme and objects will take
|
|
// ownership of others. The document is pretty good about being explicit with this
|
|
// but it is not very intuitive.
|
|
// TODO: look into diagnostic output and debuggability
|
|
// TODO: confirm that the multi-threaded usage is correct
|
|
class LlvmCodeGen {
|
|
public:
|
|
// This function must be called once per process before any llvm API calls are
|
|
// made. LLVM needs to allocate data structures for multi-threading support and
|
|
// to enable dynamic linking of jitted code.
|
|
// if 'load_backend', load the backend static object for llvm. This is needed
|
|
// when libbackend.so is loaded from java. llvm will be default only look in
|
|
// the current object and not be able to find the backend symbols
|
|
// TODO: this can probably be removed after Doris refactor where the java
|
|
// side is not loading the be explicitly anymore.
|
|
static void initialize_llvm(bool load_backend = false);
|
|
|
|
// Loads and parses the precompiled doris IR module
|
|
// codegen will contain the created object on success.
|
|
static Status load_doris_ir(
|
|
ObjectPool*, const std::string& id, boost::scoped_ptr<LlvmCodeGen>* codegen);
|
|
|
|
// Removes all jit compiled dynamically linked functions from the process.
|
|
~LlvmCodeGen();
|
|
|
|
RuntimeProfile* runtime_profile() {
|
|
return &_profile;
|
|
}
|
|
RuntimeProfile::Counter* codegen_timer() {
|
|
return _codegen_timer;
|
|
}
|
|
|
|
// Turns on/off optimization passes
|
|
void enable_optimizations(bool enable);
|
|
|
|
// For debugging. Returns the IR that was generated. If full_module, the
|
|
// entire module is dumped, including what was loaded from precompiled IR.
|
|
// If false, only output IR for functions which were generated.
|
|
std::string get_ir(bool full_module) const;
|
|
|
|
// Typedef builder in case we want to change the template arguments later
|
|
typedef llvm::IRBuilder<> LlvmBuilder;
|
|
|
|
// Utility struct that wraps a variable name and llvm type.
|
|
struct NamedVariable {
|
|
std::string name;
|
|
llvm::Type* type;
|
|
|
|
NamedVariable(const std::string& name = "", llvm::Type* type = NULL) {
|
|
this->name = name;
|
|
this->type = type;
|
|
}
|
|
};
|
|
|
|
// Abstraction over function prototypes. Contains helpers to build prototypes and
|
|
// generate IR for the types.
|
|
class FnPrototype {
|
|
public:
|
|
// Create a function prototype object, specifying the name of the function and
|
|
// the return type.
|
|
FnPrototype(LlvmCodeGen*, const std::string& name, llvm::Type* ret_type);
|
|
|
|
// Returns name of function
|
|
const std::string& name() const {
|
|
return _name;
|
|
}
|
|
|
|
// Add argument
|
|
void add_argument(const NamedVariable& var) {
|
|
_args.push_back(var);
|
|
}
|
|
|
|
void add_argument(const std::string& name, llvm::Type* type) {
|
|
_args.push_back(NamedVariable(name, type));
|
|
}
|
|
|
|
// Generate LLVM function prototype.
|
|
// If a non-null builder is passed, this function will also create the entry block
|
|
// and set the builder's insert point to there.
|
|
// If params is non-null, this function will also return the arguments
|
|
// values (params[0] is the first arg, etc).
|
|
// In that case, params should be preallocated to be number of arguments
|
|
llvm::Function* generate_prototype(LlvmBuilder* builder = NULL,
|
|
llvm::Value** params = NULL);
|
|
|
|
private:
|
|
friend class LlvmCodeGen;
|
|
|
|
LlvmCodeGen* _codegen;
|
|
std::string _name;
|
|
llvm::Type* _ret_type;
|
|
std::vector<NamedVariable> _args;
|
|
};
|
|
|
|
/// Codegens IR to load array[idx] and returns the loaded value. 'array' should be a
|
|
/// C-style array (e.g. i32*) or an IR array (e.g. [10 x i32]). This function does not
|
|
/// do bounds checking.
|
|
llvm::Value* codegen_array_at(
|
|
LlvmBuilder*, llvm::Value* array, int idx, const char* name);
|
|
|
|
/// Return a pointer type to 'type'
|
|
llvm::PointerType* get_ptr_type(llvm::Type* type);
|
|
|
|
// Returns llvm type for the primitive type
|
|
llvm::Type* get_type(const PrimitiveType& type);
|
|
|
|
// Returns llvm type for the primitive type
|
|
llvm::Type* get_type(const TypeDescriptor& type);
|
|
|
|
// Return a pointer type to 'type' (e.g. int16_t*)
|
|
llvm::PointerType* get_ptr_type(const TypeDescriptor& type);
|
|
llvm::PointerType* get_ptr_type(const PrimitiveType& type);
|
|
|
|
// Returns the type with 'name'. This is used to pull types from clang
|
|
// compiled IR. The types we generate at runtime are unnamed.
|
|
// The name is generated by the clang compiler in this form:
|
|
// <class/struct>.<namespace>::<class name>. For example:
|
|
// "class.doris::AggregationNode"
|
|
llvm::Type* get_type(const std::string& name);
|
|
|
|
/// Returns the pointer type of the type returned by GetType(name)
|
|
llvm::PointerType* get_ptr_type(const std::string& name);
|
|
|
|
/// Alloca's an instance of the appropriate pointer type and sets it to point at 'v'
|
|
llvm::Value* get_ptr_to(LlvmBuilder* builder, llvm::Value* v, const char* name);
|
|
|
|
/// Alloca's an instance of the appropriate pointer type and sets it to point at 'v'
|
|
llvm::Value* get_ptr_to(LlvmBuilder* builder, llvm::Value* v) {
|
|
return get_ptr_to(builder, v, "");
|
|
}
|
|
|
|
// Returns reference to llvm context object. Each LlvmCodeGen has its own
|
|
// context to allow multiple threads to be calling into llvm at the same time.
|
|
llvm::LLVMContext& context() {
|
|
return *_context.get();
|
|
}
|
|
|
|
// Returns execution engine interface
|
|
llvm::ExecutionEngine* execution_engine() {
|
|
return _execution_engine.get();
|
|
}
|
|
|
|
// Returns the underlying llvm module
|
|
llvm::Module* module() {
|
|
return _module;
|
|
}
|
|
|
|
// Register a expr function with unique id. It can be subsequently retrieved via
|
|
// get_registered_expr_fn with that id.
|
|
void register_expr_fn(int64_t id, llvm::Function* function) {
|
|
DCHECK(_registered_exprs_map.find(id) == _registered_exprs_map.end());
|
|
_registered_exprs_map[id] = function;
|
|
_registered_exprs.insert(function);
|
|
}
|
|
|
|
// Returns a registered expr function for id or NULL if it does not exist.
|
|
llvm::Function* get_registered_expr_fn(int64_t id) {
|
|
std::map<int64_t, llvm::Function*>::iterator it = _registered_exprs_map.find(id);
|
|
|
|
if (it == _registered_exprs_map.end()) {
|
|
return NULL;
|
|
}
|
|
|
|
return it->second;
|
|
}
|
|
|
|
/// Optimize and compile the module. This should be called after all functions to JIT
|
|
/// have been added to the module via AddFunctionToJit(). If optimizations_enabled_ is
|
|
/// false, the module will not be optimized before compilation.
|
|
Status finalize_module();
|
|
|
|
// Optimize the entire module. LLVM is more built for running its optimization
|
|
// passes over the entire module (all the functions) rather than individual
|
|
// functions.
|
|
void optimize_module();
|
|
|
|
// Replaces all instructions that call 'target_name' with a call instruction
|
|
// to the new_fn. Returns the modified function.
|
|
// - target_name is the unmangled function name that should be replaced.
|
|
// The name is assumed to be unmangled so all call sites that contain the
|
|
// replace_name substring will be replaced. target_name is case-sensitive
|
|
// TODO: be more strict than substring? work out the mangling rules?
|
|
// - If update_in_place is true, the caller function will be modified in place.
|
|
// Otherwise, the caller function will be cloned and the original function
|
|
// is unmodified. If update_in_place is false and the function is already
|
|
// been dynamically linked, the existing function will be unlinked. Note that
|
|
// this is very unthread-safe, if there are threads in the function to be unlinked,
|
|
// bad things will happen.
|
|
// - 'num_replaced' returns the number of call sites updated
|
|
//
|
|
// Most of our use cases will likely not be in place. We will have one 'template'
|
|
// version of the function loaded for each type of Node (e.g. AggregationNode).
|
|
// Each instance of the node will clone the function, replacing the inner loop
|
|
// body with the codegened version. The codegened bodies differ from instance
|
|
// to instance since they are specific to the node's tuple desc.
|
|
llvm::Function* replace_call_sites(llvm::Function* caller, bool update_in_place,
|
|
llvm::Function* new_fn, const std::string& target_name, int* num_replaced);
|
|
|
|
/// Returns a copy of fn. The copy is added to the module.
|
|
llvm::Function* clone_function(llvm::Function* fn);
|
|
|
|
// Verify and optimize function. This should be called at the end for each
|
|
// codegen'd function. If the function does not verify, it will return NULL,
|
|
// otherwise, it will optimize, mark the function for inlining and return the
|
|
// function object.
|
|
llvm::Function* finalize_function(llvm::Function* function);
|
|
|
|
// Inline all function calls for 'fn'. 'fn' is modified in place. Returns
|
|
// the number of functions inlined. This is *not* called recursively
|
|
// (i.e. second level function calls are not inlined). This can be called
|
|
// again to inline those until this returns 0.
|
|
int inline_call_sites(llvm::Function* fn, bool skip_registered_fns);
|
|
|
|
// Optimizes the function in place. This uses a combination of llvm optimization
|
|
// passes as well as some custom heuristics. This should be called for all
|
|
// functions which call Exprs. The exprs will be inlined as much as possible,
|
|
// and will do basic sub expression elimination.
|
|
// This should be called before optimize_module for functions that want to remove
|
|
// redundant exprs. This should be called at the highest level possible to
|
|
// maximize the number of redundant exprs that can be found.
|
|
// TODO: we need to spend more time to output better IR. Asking llvm to
|
|
// remove redundant codeblocks on its own is too difficult for it.
|
|
// TODO: this should implement the llvm FunctionPass interface and integrated
|
|
// with the llvm optimization passes.
|
|
llvm::Function* optimize_function_with_exprs(llvm::Function* fn);
|
|
|
|
/// Adds the function to be automatically jit compiled after the module is optimized.
|
|
/// That is, after FinalizeModule(), this will do *result_fn_ptr = JitFunction(fn);
|
|
//
|
|
/// This is useful since it is not valid to call JitFunction() before every part of the
|
|
/// query has finished adding their IR and it's convenient to not have to rewalk the
|
|
/// objects. This provides the same behavior as walking each of those objects and calling
|
|
/// JitFunction().
|
|
//
|
|
/// In addition, any functions not registered with AddFunctionToJit() are marked as
|
|
/// internal in FinalizeModule() and may be removed as part of optimization.
|
|
//
|
|
/// This will also wrap functions returning DecimalVals in an ABI-compliant wrapper (see
|
|
/// the comment in the .cc file for details). This is so we don't accidentally try to
|
|
/// call non-compliant code from native code.
|
|
void add_function_to_jit(llvm::Function* fn, void** fn_ptr);
|
|
|
|
// Jit compile the function. This will run optimization passes and verify
|
|
// the function. The result is a function pointer that is dynamically linked
|
|
// into the process.
|
|
// Returns NULL if the function is invalid.
|
|
// scratch_size will be set to the buffer size required to call the function
|
|
// scratch_size is the total size from all LlvmCodeGen::get_scratch_buffer
|
|
// calls (with some additional bytes for alignment)
|
|
// This function is thread safe.
|
|
void* jit_function(llvm::Function* function, int* scratch_size = NULL);
|
|
|
|
// Verfies the function if the verfier is enabled. Returns false if function
|
|
// is invalid.
|
|
bool verify_function(llvm::Function* function);
|
|
|
|
// This will generate a printf call instruction to output 'message' at the
|
|
// builder's insert point. Only for debugging.
|
|
void codegen_debug_trace(LlvmBuilder* builder, const char* message);
|
|
|
|
/// Returns the string representation of a llvm::Value* or llvm::Type*
|
|
template <typename T>
|
|
static std::string print(T* value_or_type) {
|
|
std::string str;
|
|
llvm::raw_string_ostream stream(str);
|
|
value_or_type->print(stream);
|
|
return str;
|
|
}
|
|
|
|
// Returns the libc function, adding it to the module if it has not already been.
|
|
llvm::Function* get_lib_c_function(FnPrototype* prototype);
|
|
|
|
// Returns the cross compiled function. IRFunction::Type is an enum which is
|
|
// defined in 'doris-ir/doris-ir-functions.h'
|
|
llvm::Function* get_function(IRFunction::Type);
|
|
|
|
// Returns the hash function with signature:
|
|
// int32_t Hash(int8_t* data, int len, int32_t seed);
|
|
// If num_bytes is non-zero, the returned function will be codegen'd to only
|
|
// work for that number of bytes. It is invalid to call that function with a
|
|
// different 'len'.
|
|
llvm::Function* get_hash_function(int num_bytes = -1);
|
|
|
|
// Allocate stack storage for local variables. This is similar to traditional c, where
|
|
// all the variables must be declared at the top of the function. This helper can be
|
|
// called from anywhere and will add a stack allocation for 'var' at the beginning of
|
|
// the function. This would be used, for example, if a function needed a temporary
|
|
// struct allocated. The allocated variable is scoped to the function.
|
|
// This is not related to get_scratch_buffer which is used for structs that are returned
|
|
// to the caller.
|
|
llvm::AllocaInst* create_entry_block_alloca(llvm::Function* f, const NamedVariable& var);
|
|
llvm::AllocaInst* create_entry_block_alloca(
|
|
const LlvmBuilder& builder, llvm::Type* type, const char* name);
|
|
|
|
// Utility to create two blocks in 'fn' for if/else codegen. if_block and else_block
|
|
// are return parameters. insert_before is optional and if set, the two blocks
|
|
// will be inserted before that block otherwise, it will be inserted at the end
|
|
// of 'fn'. Being able to place blocks is useful for debugging so the IR has a
|
|
// better looking control flow.
|
|
void create_if_else_blocks(llvm::Function* fn, const std::string& if_name,
|
|
const std::string& else_name,
|
|
llvm::BasicBlock** if_block, llvm::BasicBlock** else_block,
|
|
llvm::BasicBlock* insert_before = NULL);
|
|
|
|
// Returns offset into scratch buffer: offset points to area of size 'byte_size'
|
|
// Called by expr generation to request scratch buffer. This is used for struct
|
|
// types (i.e. StringValue) where data cannot be returned by registers.
|
|
// For example, to jit the expr "strlen(str_col)", we need a temporary StringValue
|
|
// struct from the inner SlotRef expr node. The SlotRef node would call
|
|
// get_scratch_buffer(sizeof(StringValue)) and output the intermediate struct at
|
|
// scratch_buffer (passed in as argument to compute function) + offset.
|
|
int get_scratch_buffer(int byte_size);
|
|
|
|
// Create a llvm pointer value from 'ptr'. This is used to pass pointers between
|
|
// c-code and code-generated IR. The resulting value will be of 'type'.
|
|
llvm::Value* cast_ptr_to_llvm_ptr(llvm::Type* type, void* ptr);
|
|
|
|
// Returns the constant 'val' of 'type'
|
|
llvm::Value* get_int_constant(PrimitiveType type, int64_t val);
|
|
|
|
// Returns true/false constants (bool type)
|
|
llvm::Value* true_value() {
|
|
return _true_value;
|
|
}
|
|
llvm::Value* false_value() {
|
|
return _false_value;
|
|
}
|
|
llvm::Value* null_ptr_value() {
|
|
return llvm::ConstantPointerNull::get(ptr_type());
|
|
}
|
|
|
|
// Simple wrappers to reduce code verbosity
|
|
llvm::Type* boolean_type() {
|
|
return get_type(TYPE_BOOLEAN);
|
|
}
|
|
llvm::Type* tinyint_type() {
|
|
return get_type(TYPE_TINYINT);
|
|
}
|
|
llvm::Type* smallint_type() {
|
|
return get_type(TYPE_SMALLINT);
|
|
}
|
|
llvm::Type* int_type() {
|
|
return get_type(TYPE_INT);
|
|
}
|
|
llvm::Type* bigint_type() {
|
|
return get_type(TYPE_BIGINT);
|
|
}
|
|
llvm::Type* largeint_type() {
|
|
return get_type(TYPE_LARGEINT);
|
|
}
|
|
llvm::Type* float_type() {
|
|
return get_type(TYPE_FLOAT);
|
|
}
|
|
llvm::Type* double_type() {
|
|
return get_type(TYPE_DOUBLE);
|
|
}
|
|
llvm::Type* string_val_type() const {
|
|
return _string_val_type;
|
|
}
|
|
llvm::Type* datetime_val_type() const {
|
|
return _datetime_val_type;
|
|
}
|
|
llvm::Type* decimal_val_type() const {
|
|
return _decimal_val_type;
|
|
}
|
|
llvm::PointerType* ptr_type() {
|
|
return _ptr_type;
|
|
}
|
|
llvm::Type* void_type() {
|
|
return _void_type;
|
|
}
|
|
|
|
llvm::Type* i128_type() {
|
|
return llvm::Type::getIntNTy(context(), 128);
|
|
}
|
|
|
|
// Fills 'functions' with all the functions that are defined in the module.
|
|
// Note: this does not include functions that are just declared
|
|
void get_functions(std::vector<llvm::Function*>* functions);
|
|
|
|
// Generates function to return min/max(v1, v2)
|
|
llvm::Function* codegen_min_max(const TypeDescriptor& type, bool min);
|
|
|
|
// Codegen to call llvm memcpy intrinsic at the current builder location
|
|
// dst & src must be pointer types. size is the number of bytes to copy.
|
|
void codegen_memcpy(LlvmBuilder*, llvm::Value* dst, llvm::Value* src, int size);
|
|
|
|
// Codegen for do *dst = src. For native types, this is just a store, for structs
|
|
// we need to assign the fields one by one
|
|
void codegen_assign(LlvmBuilder*, llvm::Value* dst, llvm::Value* src, PrimitiveType);
|
|
|
|
llvm::Instruction::CastOps get_cast_op(
|
|
const TypeDescriptor& from_type, const TypeDescriptor& to_type);
|
|
|
|
private:
|
|
friend class LlvmCodeGenTest;
|
|
friend class SubExprElimination;
|
|
|
|
// Top level codegen object. 'module_name' is only used for debugging when
|
|
// outputting the IR. module's loaded from disk will be named as the file
|
|
// path.
|
|
LlvmCodeGen(ObjectPool* pool, const std::string& module_name);
|
|
|
|
// Initializes the jitter and execution engine.
|
|
Status init();
|
|
|
|
// Load a pre-compiled IR module from 'file'. This creates a top level
|
|
// codegen object. This is used by tests to load custom modules.
|
|
// codegen will contain the created object on success.
|
|
static Status load_from_file(ObjectPool*, const std::string& file,
|
|
boost::scoped_ptr<LlvmCodeGen>* codegen);
|
|
|
|
/// Load a pre-compiled IR module from module_ir. This creates a top level codegen
|
|
/// object. codegen will contain the created object on success.
|
|
static Status load_from_memory(ObjectPool* pool, llvm::MemoryBuffer* module_ir,
|
|
const std::string& module_name, const std::string& id,
|
|
boost::scoped_ptr<LlvmCodeGen>* codegen);
|
|
|
|
/// Loads an LLVM module. 'module_ir' should be a reference to a memory buffer containing
|
|
/// LLVM bitcode. module_name is the name of the module to use when reporting errors.
|
|
/// The caller is responsible for cleaning up module.
|
|
static Status load_module_from_memory(LlvmCodeGen* codegen, llvm::MemoryBuffer* module_ir,
|
|
const std::string& module_name, llvm::Module** module);
|
|
|
|
// Load the intrinsics doris needs. This is a one time initialization.
|
|
// Values are stored in '_llvm_intrinsics'
|
|
Status load_intrinsics();
|
|
|
|
// Clears generated hash fns. This is only used for testing.
|
|
void clear_hash_fns();
|
|
|
|
// Name of the JIT module. Useful for debugging.
|
|
std::string _name;
|
|
|
|
// Codegen counters
|
|
RuntimeProfile _profile;
|
|
RuntimeProfile::Counter* _load_module_timer;
|
|
RuntimeProfile::Counter* _prepare_module_timer;
|
|
RuntimeProfile::Counter* _module_file_size;
|
|
RuntimeProfile::Counter* _codegen_timer;
|
|
RuntimeProfile::Counter* _optimization_timer;
|
|
RuntimeProfile::Counter* _compile_timer;
|
|
|
|
// whether or not optimizations are enabled
|
|
bool _optimizations_enabled;
|
|
|
|
// If true, the module is corrupt and we cannot codegen this query.
|
|
// TODO: we could consider just removing the offending function and attempting to
|
|
// codegen the rest of the query. This requires more testing though to make sure
|
|
// that the error is recoverable.
|
|
bool _is_corrupt;
|
|
|
|
// If true, the module has been compiled. It is not valid to add additional
|
|
// functions after this point.
|
|
bool _is_compiled;
|
|
|
|
// Error string that llvm will write to
|
|
std::string _error_string;
|
|
|
|
// Top level llvm object. Objects from different contexts do not share anything.
|
|
// We can have multiple instances of the LlvmCodeGen object in different threads
|
|
boost::scoped_ptr<llvm::LLVMContext> _context;
|
|
|
|
// Top level codegen object. Contains everything to jit one 'unit' of code.
|
|
// Owned by the _execution_engine.
|
|
llvm::Module* _module;
|
|
|
|
// Execution/Jitting engine.
|
|
boost::scoped_ptr<llvm::ExecutionEngine> _execution_engine;
|
|
|
|
// current offset into scratch buffer
|
|
int _scratch_buffer_offset;
|
|
|
|
// Keeps track of all the functions that have been jit compiled and linked into
|
|
// the process. Special care needs to be taken if we need to modify these functions.
|
|
// bool is unused.
|
|
std::map<llvm::Function*, bool> _jitted_functions;
|
|
|
|
// Lock protecting _jitted_functions
|
|
boost::mutex _jitted_functions_lock;
|
|
|
|
// Keeps track of the external functions that have been included in this module
|
|
// e.g libc functions or non-jitted doris functions.
|
|
// TODO: this should probably be FnPrototype->Functions mapping
|
|
std::map<std::string, llvm::Function*> _external_functions;
|
|
|
|
// Functions parsed from pre-compiled module. Indexed by DorisIR::Function enum
|
|
std::vector<llvm::Function*> _loaded_functions;
|
|
|
|
// Stores functions codegen'd by doris. This does not contain cross compiled
|
|
// functions, only function that were generated at runtime. Does not overlap
|
|
// with _loaded_functions.
|
|
std::vector<llvm::Function*> _codegend_functions;
|
|
|
|
// A mapping of unique id to registered expr functions
|
|
std::map<int64_t, llvm::Function*> _registered_exprs_map;
|
|
|
|
// A set of all the functions in '_registered_exprs_map' for quick lookup.
|
|
std::set<llvm::Function*> _registered_exprs;
|
|
|
|
// A cache of loaded llvm intrinsics
|
|
std::map<llvm::Intrinsic::ID, llvm::Function*> _llvm_intrinsics;
|
|
|
|
// This is a cache of generated hash functions by byte size. It is common
|
|
// for the caller to know the number of bytes to hash (e.g. tuple width) and
|
|
// we can codegen a loop unrolled hash function.
|
|
std::map<int, llvm::Function*> _hash_fns;
|
|
|
|
/// The locations of modules that have been linked. Used to avoid linking the same module
|
|
/// twice, which causes symbol collision errors.
|
|
std::set<std::string> _linked_modules;
|
|
|
|
/// The vector of functions to automatically JIT compile after FinalizeModule().
|
|
std::vector<std::pair<llvm::Function*, void**> > _fns_to_jit_compile;
|
|
|
|
// Debug utility that will insert a printf-like function into the generated
|
|
// IR. Useful for debugging the IR. This is lazily created.
|
|
llvm::Function* _debug_trace_fn;
|
|
|
|
// Debug strings that will be outputted by jitted code. This is a copy of all
|
|
// strings passed to codegen_debug_trace.
|
|
std::vector<std::string> _debug_strings;
|
|
|
|
// llvm representation of a few common types. Owned by context.
|
|
llvm::PointerType* _ptr_type; // int8_t*
|
|
llvm::Type* _void_type; // void
|
|
llvm::Type* _string_val_type; // StringVal
|
|
llvm::Type* _decimal_val_type; // StringVal
|
|
llvm::Type* _datetime_val_type; // DateTimeValue
|
|
|
|
// llvm constants to help with code gen verbosity
|
|
llvm::Value* _true_value;
|
|
llvm::Value* _false_value;
|
|
};
|
|
|
|
}
|
|
|
|
#endif
|
|
|