// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef DORIS_BE_SRC_QUERY_CODEGEN_LLVM_CODEGEN_H #define DORIS_BE_SRC_QUERY_CODEGEN_LLVM_CODEGEN_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/status.h" #include "runtime/primitive_type.h" #include "exprs/expr.h" #include "util/runtime_profile.h" #include "doris_ir/doris_ir_functions.h" // Forward declare all llvm classes to avoid namespace pollution. namespace llvm { class AllocaInst; class BasicBlock; class ConstantFolder; class ExecutionEngine; class Function; // class FunctionPassManager; class LLVMContext; class Module; class NoFolder; // class PassManager; class PointerType; class StructType; class TargetData; class Type; class Value; template class IRBuilder; template class IRBuilderDefaultInserter; } namespace doris { class SubExprElimination; // LLVM code generator. This is the top level object to generate jitted code. // // LLVM provides a c++ IR builder interface so IR does not need to be written // manually. The interface is very low level so each line of IR that needs to // be output maps 1:1 with calls to the interface. // The llvm documentation is not fantastic and a lot of this was figured out // by experimenting. Thankfully, their API is pretty well designed so it's // possible to get by without great documentation. The llvm tutorial is very // helpful, http://llvm.org/docs/tutorial/LangImpl1.html. In this tutorial, they // go over how to JIT an AST for a toy language they create. // It is also helpful to use their online app that lets you compile c/c++ to IR. // http://llvm.org/demo/index.cgi. // // This class provides two interfaces, one for testing and one for the query // engine. The interface for the query engine will load the cross-compiled // IR module (output during the build) and extract all of functions that will // be called directly. The test interface can be used to load any precompiled // module or none at all (but this class will not validate the module). // // This class is mostly not threadsafe. During the Prepare() phase of the fragment // execution, nodes should codegen functions. // Afterward, optimize_module() should be called at which point all codegened functions // are optimized. // Subsequently, nodes can get at the jit compiled function pointer (typically during the // Open() call). Getting the jit compiled function (jit_function()) is the only thread // safe function. // // Currently, each query will create and initialize one of these // objects. This requires loading and parsing the cross compiled modules. // TODO: we should be able to do this once per process and let llvm compile // functions from across modules. // // LLVM has a nontrivial memory management scheme and objects will take // ownership of others. The document is pretty good about being explicit with this // but it is not very intuitive. // TODO: look into diagnostic output and debuggability // TODO: confirm that the multi-threaded usage is correct class LlvmCodeGen { public: // This function must be called once per process before any llvm API calls are // made. LLVM needs to allocate data structures for multi-threading support and // to enable dynamic linking of jitted code. // if 'load_backend', load the backend static object for llvm. This is needed // when libbackend.so is loaded from java. llvm will be default only look in // the current object and not be able to find the backend symbols // TODO: this can probably be removed after Doris refactor where the java // side is not loading the be explicitly anymore. static void initialize_llvm(bool load_backend = false); // Loads and parses the precompiled doris IR module // codegen will contain the created object on success. static Status load_doris_ir( ObjectPool*, const std::string& id, boost::scoped_ptr* codegen); // Removes all jit compiled dynamically linked functions from the process. ~LlvmCodeGen(); RuntimeProfile* runtime_profile() { return &_profile; } RuntimeProfile::Counter* codegen_timer() { return _codegen_timer; } // Turns on/off optimization passes void enable_optimizations(bool enable); // For debugging. Returns the IR that was generated. If full_module, the // entire module is dumped, including what was loaded from precompiled IR. // If false, only output IR for functions which were generated. std::string get_ir(bool full_module) const; // Typedef builder in case we want to change the template arguments later typedef llvm::IRBuilder<> LlvmBuilder; // Utility struct that wraps a variable name and llvm type. struct NamedVariable { std::string name; llvm::Type* type; NamedVariable(const std::string& name = "", llvm::Type* type = NULL) { this->name = name; this->type = type; } }; // Abstraction over function prototypes. Contains helpers to build prototypes and // generate IR for the types. class FnPrototype { public: // Create a function prototype object, specifying the name of the function and // the return type. FnPrototype(LlvmCodeGen*, const std::string& name, llvm::Type* ret_type); // Returns name of function const std::string& name() const { return _name; } // Add argument void add_argument(const NamedVariable& var) { _args.push_back(var); } void add_argument(const std::string& name, llvm::Type* type) { _args.push_back(NamedVariable(name, type)); } // Generate LLVM function prototype. // If a non-null builder is passed, this function will also create the entry block // and set the builder's insert point to there. // If params is non-null, this function will also return the arguments // values (params[0] is the first arg, etc). // In that case, params should be preallocated to be number of arguments llvm::Function* generate_prototype(LlvmBuilder* builder = NULL, llvm::Value** params = NULL); private: friend class LlvmCodeGen; LlvmCodeGen* _codegen; std::string _name; llvm::Type* _ret_type; std::vector _args; }; /// Codegens IR to load array[idx] and returns the loaded value. 'array' should be a /// C-style array (e.g. i32*) or an IR array (e.g. [10 x i32]). This function does not /// do bounds checking. llvm::Value* codegen_array_at( LlvmBuilder*, llvm::Value* array, int idx, const char* name); /// Return a pointer type to 'type' llvm::PointerType* get_ptr_type(llvm::Type* type); // Returns llvm type for the primitive type llvm::Type* get_type(const PrimitiveType& type); // Returns llvm type for the primitive type llvm::Type* get_type(const TypeDescriptor& type); // Return a pointer type to 'type' (e.g. int16_t*) llvm::PointerType* get_ptr_type(const TypeDescriptor& type); llvm::PointerType* get_ptr_type(const PrimitiveType& type); // Returns the type with 'name'. This is used to pull types from clang // compiled IR. The types we generate at runtime are unnamed. // The name is generated by the clang compiler in this form: // .::. For example: // "class.doris::AggregationNode" llvm::Type* get_type(const std::string& name); /// Returns the pointer type of the type returned by GetType(name) llvm::PointerType* get_ptr_type(const std::string& name); /// Alloca's an instance of the appropriate pointer type and sets it to point at 'v' llvm::Value* get_ptr_to(LlvmBuilder* builder, llvm::Value* v, const char* name); /// Alloca's an instance of the appropriate pointer type and sets it to point at 'v' llvm::Value* get_ptr_to(LlvmBuilder* builder, llvm::Value* v) { return get_ptr_to(builder, v, ""); } // Returns reference to llvm context object. Each LlvmCodeGen has its own // context to allow multiple threads to be calling into llvm at the same time. llvm::LLVMContext& context() { return *_context.get(); } // Returns execution engine interface llvm::ExecutionEngine* execution_engine() { return _execution_engine.get(); } // Returns the underlying llvm module llvm::Module* module() { return _module; } // Register a expr function with unique id. It can be subsequently retrieved via // get_registered_expr_fn with that id. void register_expr_fn(int64_t id, llvm::Function* function) { DCHECK(_registered_exprs_map.find(id) == _registered_exprs_map.end()); _registered_exprs_map[id] = function; _registered_exprs.insert(function); } // Returns a registered expr function for id or NULL if it does not exist. llvm::Function* get_registered_expr_fn(int64_t id) { std::map::iterator it = _registered_exprs_map.find(id); if (it == _registered_exprs_map.end()) { return NULL; } return it->second; } /// Optimize and compile the module. This should be called after all functions to JIT /// have been added to the module via AddFunctionToJit(). If optimizations_enabled_ is /// false, the module will not be optimized before compilation. Status finalize_module(); // Optimize the entire module. LLVM is more built for running its optimization // passes over the entire module (all the functions) rather than individual // functions. void optimize_module(); // Replaces all instructions that call 'target_name' with a call instruction // to the new_fn. Returns the modified function. // - target_name is the unmangled function name that should be replaced. // The name is assumed to be unmangled so all call sites that contain the // replace_name substring will be replaced. target_name is case-sensitive // TODO: be more strict than substring? work out the mangling rules? // - If update_in_place is true, the caller function will be modified in place. // Otherwise, the caller function will be cloned and the original function // is unmodified. If update_in_place is false and the function is already // been dynamically linked, the existing function will be unlinked. Note that // this is very unthread-safe, if there are threads in the function to be unlinked, // bad things will happen. // - 'num_replaced' returns the number of call sites updated // // Most of our use cases will likely not be in place. We will have one 'template' // version of the function loaded for each type of Node (e.g. AggregationNode). // Each instance of the node will clone the function, replacing the inner loop // body with the codegened version. The codegened bodies differ from instance // to instance since they are specific to the node's tuple desc. llvm::Function* replace_call_sites(llvm::Function* caller, bool update_in_place, llvm::Function* new_fn, const std::string& target_name, int* num_replaced); /// Returns a copy of fn. The copy is added to the module. llvm::Function* clone_function(llvm::Function* fn); // Verify and optimize function. This should be called at the end for each // codegen'd function. If the function does not verify, it will return NULL, // otherwise, it will optimize, mark the function for inlining and return the // function object. llvm::Function* finalize_function(llvm::Function* function); // Inline all function calls for 'fn'. 'fn' is modified in place. Returns // the number of functions inlined. This is *not* called recursively // (i.e. second level function calls are not inlined). This can be called // again to inline those until this returns 0. int inline_call_sites(llvm::Function* fn, bool skip_registered_fns); // Optimizes the function in place. This uses a combination of llvm optimization // passes as well as some custom heuristics. This should be called for all // functions which call Exprs. The exprs will be inlined as much as possible, // and will do basic sub expression elimination. // This should be called before optimize_module for functions that want to remove // redundant exprs. This should be called at the highest level possible to // maximize the number of redundant exprs that can be found. // TODO: we need to spend more time to output better IR. Asking llvm to // remove redundant codeblocks on its own is too difficult for it. // TODO: this should implement the llvm FunctionPass interface and integrated // with the llvm optimization passes. llvm::Function* optimize_function_with_exprs(llvm::Function* fn); /// Adds the function to be automatically jit compiled after the module is optimized. /// That is, after FinalizeModule(), this will do *result_fn_ptr = JitFunction(fn); // /// This is useful since it is not valid to call JitFunction() before every part of the /// query has finished adding their IR and it's convenient to not have to rewalk the /// objects. This provides the same behavior as walking each of those objects and calling /// JitFunction(). // /// In addition, any functions not registered with AddFunctionToJit() are marked as /// internal in FinalizeModule() and may be removed as part of optimization. // /// This will also wrap functions returning DecimalVals in an ABI-compliant wrapper (see /// the comment in the .cc file for details). This is so we don't accidentally try to /// call non-compliant code from native code. void add_function_to_jit(llvm::Function* fn, void** fn_ptr); // Jit compile the function. This will run optimization passes and verify // the function. The result is a function pointer that is dynamically linked // into the process. // Returns NULL if the function is invalid. // scratch_size will be set to the buffer size required to call the function // scratch_size is the total size from all LlvmCodeGen::get_scratch_buffer // calls (with some additional bytes for alignment) // This function is thread safe. void* jit_function(llvm::Function* function, int* scratch_size = NULL); // Verfies the function if the verfier is enabled. Returns false if function // is invalid. bool verify_function(llvm::Function* function); // This will generate a printf call instruction to output 'message' at the // builder's insert point. Only for debugging. void codegen_debug_trace(LlvmBuilder* builder, const char* message); /// Returns the string representation of a llvm::Value* or llvm::Type* template static std::string print(T* value_or_type) { std::string str; llvm::raw_string_ostream stream(str); value_or_type->print(stream); return str; } // Returns the libc function, adding it to the module if it has not already been. llvm::Function* get_lib_c_function(FnPrototype* prototype); // Returns the cross compiled function. IRFunction::Type is an enum which is // defined in 'doris-ir/doris-ir-functions.h' llvm::Function* get_function(IRFunction::Type); // Returns the hash function with signature: // int32_t Hash(int8_t* data, int len, int32_t seed); // If num_bytes is non-zero, the returned function will be codegen'd to only // work for that number of bytes. It is invalid to call that function with a // different 'len'. llvm::Function* get_hash_function(int num_bytes = -1); // Allocate stack storage for local variables. This is similar to traditional c, where // all the variables must be declared at the top of the function. This helper can be // called from anywhere and will add a stack allocation for 'var' at the beginning of // the function. This would be used, for example, if a function needed a temporary // struct allocated. The allocated variable is scoped to the function. // This is not related to get_scratch_buffer which is used for structs that are returned // to the caller. llvm::AllocaInst* create_entry_block_alloca(llvm::Function* f, const NamedVariable& var); llvm::AllocaInst* create_entry_block_alloca( const LlvmBuilder& builder, llvm::Type* type, const char* name); // Utility to create two blocks in 'fn' for if/else codegen. if_block and else_block // are return parameters. insert_before is optional and if set, the two blocks // will be inserted before that block otherwise, it will be inserted at the end // of 'fn'. Being able to place blocks is useful for debugging so the IR has a // better looking control flow. void create_if_else_blocks(llvm::Function* fn, const std::string& if_name, const std::string& else_name, llvm::BasicBlock** if_block, llvm::BasicBlock** else_block, llvm::BasicBlock* insert_before = NULL); // Returns offset into scratch buffer: offset points to area of size 'byte_size' // Called by expr generation to request scratch buffer. This is used for struct // types (i.e. StringValue) where data cannot be returned by registers. // For example, to jit the expr "strlen(str_col)", we need a temporary StringValue // struct from the inner SlotRef expr node. The SlotRef node would call // get_scratch_buffer(sizeof(StringValue)) and output the intermediate struct at // scratch_buffer (passed in as argument to compute function) + offset. int get_scratch_buffer(int byte_size); // Create a llvm pointer value from 'ptr'. This is used to pass pointers between // c-code and code-generated IR. The resulting value will be of 'type'. llvm::Value* cast_ptr_to_llvm_ptr(llvm::Type* type, void* ptr); // Returns the constant 'val' of 'type' llvm::Value* get_int_constant(PrimitiveType type, int64_t val); // Returns true/false constants (bool type) llvm::Value* true_value() { return _true_value; } llvm::Value* false_value() { return _false_value; } llvm::Value* null_ptr_value() { return llvm::ConstantPointerNull::get(ptr_type()); } // Simple wrappers to reduce code verbosity llvm::Type* boolean_type() { return get_type(TYPE_BOOLEAN); } llvm::Type* tinyint_type() { return get_type(TYPE_TINYINT); } llvm::Type* smallint_type() { return get_type(TYPE_SMALLINT); } llvm::Type* int_type() { return get_type(TYPE_INT); } llvm::Type* bigint_type() { return get_type(TYPE_BIGINT); } llvm::Type* largeint_type() { return get_type(TYPE_LARGEINT); } llvm::Type* float_type() { return get_type(TYPE_FLOAT); } llvm::Type* double_type() { return get_type(TYPE_DOUBLE); } llvm::Type* string_val_type() const { return _string_val_type; } llvm::Type* datetime_val_type() const { return _datetime_val_type; } llvm::Type* decimal_val_type() const { return _decimal_val_type; } llvm::PointerType* ptr_type() { return _ptr_type; } llvm::Type* void_type() { return _void_type; } llvm::Type* i128_type() { return llvm::Type::getIntNTy(context(), 128); } // Fills 'functions' with all the functions that are defined in the module. // Note: this does not include functions that are just declared void get_functions(std::vector* functions); // Generates function to return min/max(v1, v2) llvm::Function* codegen_min_max(const TypeDescriptor& type, bool min); // Codegen to call llvm memcpy intrinsic at the current builder location // dst & src must be pointer types. size is the number of bytes to copy. void codegen_memcpy(LlvmBuilder*, llvm::Value* dst, llvm::Value* src, int size); // Codegen for do *dst = src. For native types, this is just a store, for structs // we need to assign the fields one by one void codegen_assign(LlvmBuilder*, llvm::Value* dst, llvm::Value* src, PrimitiveType); llvm::Instruction::CastOps get_cast_op( const TypeDescriptor& from_type, const TypeDescriptor& to_type); private: friend class LlvmCodeGenTest; friend class SubExprElimination; // Top level codegen object. 'module_name' is only used for debugging when // outputting the IR. module's loaded from disk will be named as the file // path. LlvmCodeGen(ObjectPool* pool, const std::string& module_name); // Initializes the jitter and execution engine. Status init(); // Load a pre-compiled IR module from 'file'. This creates a top level // codegen object. This is used by tests to load custom modules. // codegen will contain the created object on success. static Status load_from_file(ObjectPool*, const std::string& file, boost::scoped_ptr* codegen); /// Load a pre-compiled IR module from module_ir. This creates a top level codegen /// object. codegen will contain the created object on success. static Status load_from_memory(ObjectPool* pool, llvm::MemoryBuffer* module_ir, const std::string& module_name, const std::string& id, boost::scoped_ptr* codegen); /// Loads an LLVM module. 'module_ir' should be a reference to a memory buffer containing /// LLVM bitcode. module_name is the name of the module to use when reporting errors. /// The caller is responsible for cleaning up module. static Status load_module_from_memory(LlvmCodeGen* codegen, llvm::MemoryBuffer* module_ir, const std::string& module_name, llvm::Module** module); // Load the intrinsics doris needs. This is a one time initialization. // Values are stored in '_llvm_intrinsics' Status load_intrinsics(); // Clears generated hash fns. This is only used for testing. void clear_hash_fns(); // Name of the JIT module. Useful for debugging. std::string _name; // Codegen counters RuntimeProfile _profile; RuntimeProfile::Counter* _load_module_timer; RuntimeProfile::Counter* _prepare_module_timer; RuntimeProfile::Counter* _module_file_size; RuntimeProfile::Counter* _codegen_timer; RuntimeProfile::Counter* _optimization_timer; RuntimeProfile::Counter* _compile_timer; // whether or not optimizations are enabled bool _optimizations_enabled; // If true, the module is corrupt and we cannot codegen this query. // TODO: we could consider just removing the offending function and attempting to // codegen the rest of the query. This requires more testing though to make sure // that the error is recoverable. bool _is_corrupt; // If true, the module has been compiled. It is not valid to add additional // functions after this point. bool _is_compiled; // Error string that llvm will write to std::string _error_string; // Top level llvm object. Objects from different contexts do not share anything. // We can have multiple instances of the LlvmCodeGen object in different threads boost::scoped_ptr _context; // Top level codegen object. Contains everything to jit one 'unit' of code. // Owned by the _execution_engine. llvm::Module* _module; // Execution/Jitting engine. boost::scoped_ptr _execution_engine; // current offset into scratch buffer int _scratch_buffer_offset; // Keeps track of all the functions that have been jit compiled and linked into // the process. Special care needs to be taken if we need to modify these functions. // bool is unused. std::map _jitted_functions; // Lock protecting _jitted_functions boost::mutex _jitted_functions_lock; // Keeps track of the external functions that have been included in this module // e.g libc functions or non-jitted doris functions. // TODO: this should probably be FnPrototype->Functions mapping std::map _external_functions; // Functions parsed from pre-compiled module. Indexed by DorisIR::Function enum std::vector _loaded_functions; // Stores functions codegen'd by doris. This does not contain cross compiled // functions, only function that were generated at runtime. Does not overlap // with _loaded_functions. std::vector _codegend_functions; // A mapping of unique id to registered expr functions std::map _registered_exprs_map; // A set of all the functions in '_registered_exprs_map' for quick lookup. std::set _registered_exprs; // A cache of loaded llvm intrinsics std::map _llvm_intrinsics; // This is a cache of generated hash functions by byte size. It is common // for the caller to know the number of bytes to hash (e.g. tuple width) and // we can codegen a loop unrolled hash function. std::map _hash_fns; /// The locations of modules that have been linked. Used to avoid linking the same module /// twice, which causes symbol collision errors. std::set _linked_modules; /// The vector of functions to automatically JIT compile after FinalizeModule(). std::vector > _fns_to_jit_compile; // Debug utility that will insert a printf-like function into the generated // IR. Useful for debugging the IR. This is lazily created. llvm::Function* _debug_trace_fn; // Debug strings that will be outputted by jitted code. This is a copy of all // strings passed to codegen_debug_trace. std::vector _debug_strings; // llvm representation of a few common types. Owned by context. llvm::PointerType* _ptr_type; // int8_t* llvm::Type* _void_type; // void llvm::Type* _string_val_type; // StringVal llvm::Type* _decimal_val_type; // StringVal llvm::Type* _datetime_val_type; // DateTimeValue // llvm constants to help with code gen verbosity llvm::Value* _true_value; llvm::Value* _false_value; }; } #endif