doris/be/src/codegen/llvm_codegen.cpp

// Modifications copyright (C) 2017, Baidu.com, Inc.
// Copyright 2017 The Apache Software Foundation

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements.  See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership.  The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License.  You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.  See the License for the
// specific language governing permissions and limitations
// under the License.

#include "codegen/llvm_codegen.h"

#include <fstream>
#include <mutex>
#include <iostream>
#include <sstream>
#include <boost/thread/mutex.hpp>

#include <llvm/IR/DataLayout.h>
#include <llvm/Analysis/Passes.h>
#include <llvm/Analysis/InstructionSimplify.h>
#include <llvm/ExecutionEngine/ExecutionEngine.h>
#include <llvm/ExecutionEngine/JIT.h>
#include <llvm/Bitcode/ReaderWriter.h>
#include <llvm/PassManager.h>
#include <llvm/Support/DynamicLibrary.h>
#include <llvm/IRReader/IRReader.h>
#include <llvm/Support/MemoryBuffer.h>
#include <llvm/Support/NoFolder.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/system_error.h>
#include <llvm/Support/InstIterator.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/PassManagerBuilder.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Transforms/Utils/Cloning.h>

#include "common/logging.h"
#include "codegen/subexpr_elimination.h"
#include "codegen/palo_ir_data.h"
#include "palo_ir/palo_ir_names.h"
#include "util/cpu_info.h"
#include "util/path_builder.h"

using llvm::Value;
using llvm::Function;
using llvm::Module;
using llvm::PassManager;
using llvm::PassManagerBuilder;
using llvm::DataLayout;
using llvm::FunctionPassManager;

namespace palo {

static std::mutex s_llvm_initialization_lock;
static bool s_llvm_initialized = false;

void LlvmCodeGen::initialize_llvm(bool load_backend) {
    std::unique_lock<std::mutex> initialization_lock(s_llvm_initialization_lock);
    if (s_llvm_initialized) {
        return;
    }

    // This allocates a global llvm struct and enables multithreading.
    // There is no real good time to clean this up but we only make it once.
    bool result = llvm::llvm_start_multithreaded();
    DCHECK(result);
    // This can *only* be called once per process and is used to setup
    // dynamically linking jitted code.
    llvm::InitializeNativeTarget();
    s_llvm_initialized = true;

    if (load_backend) {
        std::string path;
        // For test env, we have to load libfesupport.so to provide sym for LLVM.
        PathBuilder::get_full_build_path("service/libfesupport.so", &path);
        bool failed = llvm::sys::DynamicLibrary::LoadLibraryPermanently(path.c_str());
        DCHECK_EQ(failed, 0);
    }
}

LlvmCodeGen::LlvmCodeGen(ObjectPool* pool, const std::string& name) :
        _name(name),
        _profile(pool, "CodeGen"),
        _optimizations_enabled(false),
        _is_corrupt(false),
        _is_compiled(false),
        _context(new llvm::LLVMContext()),
        _module(NULL),
        _execution_engine(NULL),
        _scratch_buffer_offset(0),
        _debug_trace_fn(NULL) {
    DCHECK(s_llvm_initialized) << "Must call LlvmCodeGen::initialize_llvm first.";

    _load_module_timer = ADD_TIMER(&_profile, "LoadTime");
    _prepare_module_timer = ADD_TIMER(&_profile, "PrepareTime");
    _module_file_size = ADD_COUNTER(&_profile, "ModuleFileSize", TUnit::BYTES);
    _codegen_timer = ADD_TIMER(&_profile, "CodegenTime");
    _optimization_timer = ADD_TIMER(&_profile, "OptimizationTime");
    _compile_timer = ADD_TIMER(&_profile, "CompileTime");

    _loaded_functions.resize(IRFunction::FN_END);
}

Status LlvmCodeGen::load_from_file(
        ObjectPool* pool,
        const std::string& file,
        boost::scoped_ptr<LlvmCodeGen>* codegen) {
    codegen->reset(new LlvmCodeGen(pool, ""));
    SCOPED_TIMER((*codegen)->_profile.total_time_counter());
    SCOPED_TIMER((*codegen)->_load_module_timer);
    llvm::OwningPtr<llvm::MemoryBuffer> file_buffer;
    llvm::error_code err = llvm::MemoryBuffer::getFile(file, file_buffer);

    if (err.value() != 0) {
        std::stringstream ss;
        ss << "Could not load module " << file << ": " << err.message();
        return Status(ss.str());
    }

    COUNTER_UPDATE((*codegen)->_module_file_size, file_buffer->getBufferSize());
    std::string error;
    llvm::Module* loaded_module = NULL;
    // llvm::ParseBitcodeFile(file_buffer.get(),
    //                        (*codegen)->context(), &error);

    if (loaded_module == NULL) {
        std::stringstream ss;
        ss << "Could not parse module " << file << ": " << error;
        return Status(ss.str());
    }

    (*codegen)->_module = loaded_module;

    return (*codegen)->init();
}

Status LlvmCodeGen::load_from_memory(
        ObjectPool* pool, llvm::MemoryBuffer* module_ir,
        const std::string& module_name, const std::string& id,
        boost::scoped_ptr<LlvmCodeGen>* codegen) {
    codegen->reset(new LlvmCodeGen(pool, id));
    SCOPED_TIMER((*codegen)->_profile.total_time_counter());

    llvm::Module* loaded_module = NULL;
    RETURN_IF_ERROR(load_module_from_memory(
            codegen->get(), module_ir, module_name, &loaded_module));
    (*codegen)->_module = loaded_module;

    return (*codegen)->init();
}

Status LlvmCodeGen::load_module_from_memory(
        LlvmCodeGen* codegen, llvm::MemoryBuffer* module_ir,
        const std::string& module_name, llvm::Module** module) {
    SCOPED_TIMER(codegen->_prepare_module_timer);
    std::string error;
    *module = llvm::ParseBitcodeFile(module_ir, codegen->context(), &error);
    if (*module == NULL) {
        std::stringstream ss;
        ss << "Could not parse module " << module_name << ": " << error;
        return Status(ss.str());
    }
    return Status::OK;
}

Status LlvmCodeGen::load_palo_ir(
        ObjectPool* pool,
        const std::string& id,
        boost::scoped_ptr<LlvmCodeGen>* codegen_ret) {
    // Select the appropriate IR version.  We cannot use LLVM IR with sse instructions on
    // a machine without sse support (loading the module will fail regardless of whether
    // those instructions are run or not).
    llvm::StringRef module_ir;
    std::string module_name;
    if (CpuInfo::is_supported(CpuInfo::SSE4_2)) {
        module_ir = llvm::StringRef(reinterpret_cast<const char*>(palo_sse_llvm_ir),
                              palo_sse_llvm_ir_len);
        module_name = "Palo IR with SSE support";
    } else {
        module_ir = llvm::StringRef(reinterpret_cast<const char*>(palo_no_sse_llvm_ir),
                              palo_no_sse_llvm_ir_len);
        module_name = "Palo IR with no SSE support";
    }
    boost::scoped_ptr<llvm::MemoryBuffer> module_ir_buf(
        llvm::MemoryBuffer::getMemBuffer(module_ir, "", false));
    RETURN_IF_ERROR(load_from_memory(pool, module_ir_buf.get(), module_name, id,
                                   codegen_ret));
    LlvmCodeGen* codegen = codegen_ret->get();

    // Parse module for cross compiled functions and types
    SCOPED_TIMER(codegen->_profile.total_time_counter());
    SCOPED_TIMER(codegen->_load_module_timer);

    // Get type for StringValue
    codegen->_string_val_type = codegen->get_type(StringValue::s_llvm_class_name);
    codegen->_decimal_val_type = codegen->get_type(DecimalValue::_s_llvm_class_name);
    // Get type for DateTimeValue
    codegen->_datetime_val_type = codegen->get_type(DateTimeValue::_s_llvm_class_name);

    // Verify size is correct
    const llvm::DataLayout* data_layout = codegen->execution_engine()->getDataLayout();
    const llvm::StructLayout* layout =
        data_layout->getStructLayout(static_cast<llvm::StructType*>(codegen->_string_val_type));

    if (layout->getSizeInBytes() != sizeof(StringValue)) {
        DCHECK_EQ(layout->getSizeInBytes(), sizeof(StringValue));
        return Status("Could not create llvm struct type for StringVal");
    }

    // Parse functions from module
    std::vector<llvm::Function*> functions;
    codegen->get_functions(&functions);
    int parsed_functions = 0;

    for (int i = 0; i < functions.size(); ++i) {
        std::string fn_name = functions[i]->getName();

        for (int j = IRFunction::FN_START; j < IRFunction::FN_END; ++j) {
            // Substring match to match precompiled functions.  The compiled function names
            // will be mangled.
            // TODO: reconsider this.  Substring match is probably not strict enough but
            // undoing the mangling is no fun either.
            if (fn_name.find(FN_MAPPINGS[j].fn_name) != std::string::npos) {
                if (codegen->_loaded_functions[FN_MAPPINGS[j].fn] != NULL) {
                    return Status("Duplicate definition found for function: " + fn_name);
                }

                codegen->_loaded_functions[FN_MAPPINGS[j].fn] = functions[i];
                ++parsed_functions;
            }
        }
    }

    if (parsed_functions != IRFunction::FN_END) {
        std::stringstream ss;
        ss << "Unable to find these precompiled functions: ";
        bool first = true;

        for (int i = IRFunction::FN_START; i != IRFunction::FN_END; ++i) {
            if (codegen->_loaded_functions[i] == NULL) {
                if (!first) {
                    ss << ", ";
                }

                ss << FN_MAPPINGS[i].fn_name;
                first = false;
            }
        }

        return Status(ss.str());
    }

    return Status::OK;
}

Status LlvmCodeGen::init() {
    if (_module == NULL) {
        _module = new llvm::Module(_name, context());
    }

    llvm::CodeGenOpt::Level opt_level = llvm::CodeGenOpt::Aggressive;
#ifndef NDEBUG
    // For debug builds, don't generate JIT compiled optimized assembly.
    // This takes a non-neglible amount of time (~.5 ms per function) and
    // blows up the fe tests (which take ~10-20 ms each).
    opt_level = llvm::CodeGenOpt::None;
#endif
    llvm::EngineBuilder builder = llvm::EngineBuilder(_module).setOptLevel(opt_level);
    // TODO Uncomment the below line as soon as we upgrade to LLVM 3.5 to enable SSE, if
    // available. In LLVM 3.3 this is done automatically and cannot be enabled because
    // for some reason SSE4 intrinsics selection will not work.
    // builder.setMCPU(llvm::sys::getHostCPUName());
    builder.setErrorStr(&_error_string);
    _execution_engine.reset(builder.create());
    if (_execution_engine == NULL) {
        // _execution_engine will take ownership of the module if it is created
        delete _module;
        std::stringstream ss;
        ss << "Could not create ExecutionEngine: " << _error_string;
        return Status(ss.str());
    }
    _void_type = llvm::Type::getVoidTy(context());
    _ptr_type = llvm::PointerType::get(get_type(TYPE_TINYINT), 0);
    _true_value = llvm::ConstantInt::get(context(), llvm::APInt(1, true, true));
    _false_value = llvm::ConstantInt::get(context(), llvm::APInt(1, false, true));

    RETURN_IF_ERROR(load_intrinsics());

    return Status::OK;
}

LlvmCodeGen::~LlvmCodeGen() {
    for (auto& it : _jitted_functions) {
        _execution_engine->freeMachineCodeForFunction(it.first);
    }
}

void LlvmCodeGen::enable_optimizations(bool enable) {
    _optimizations_enabled = enable;
}

std::string LlvmCodeGen::get_ir(bool full_module) const {
    std::string str;
    llvm::raw_string_ostream stream(str);
    if (full_module) {
        _module->print(stream, NULL);
    } else {
        for (int i = 0; i < _codegend_functions.size(); ++i) {
            _codegend_functions[i]->print(stream, NULL);
        }
    }
    return str;
}

llvm::PointerType* LlvmCodeGen::get_ptr_type(llvm::Type* type) {
    return llvm::PointerType::get(type, 0);
}

llvm::Type* LlvmCodeGen::get_type(const PrimitiveType& type) {
    switch (type) {
    case TYPE_NULL:
        return llvm::Type::getInt1Ty(context());
    case TYPE_BOOLEAN:
        return llvm::Type::getInt1Ty(context());
    case TYPE_TINYINT:
        return llvm::Type::getInt8Ty(context());
    case TYPE_SMALLINT:
        return llvm::Type::getInt16Ty(context());
    case TYPE_INT:
        return llvm::Type::getInt32Ty(context());
    case TYPE_BIGINT:
        return llvm::Type::getInt64Ty(context());
    case TYPE_LARGEINT:
         return llvm::Type::getIntNTy(context(), 128);
    case TYPE_FLOAT:
        return llvm::Type::getFloatTy(context());
    case TYPE_DOUBLE:
        return llvm::Type::getDoubleTy(context());
    case TYPE_CHAR:
    case TYPE_VARCHAR:
    case TYPE_HLL:
        return _string_val_type;
    case TYPE_DECIMAL:
        return _decimal_val_type;
    case TYPE_DATE:
    case TYPE_DATETIME:
        return _datetime_val_type;
    default:
        DCHECK(false) << "Invalid type.";
        return NULL;
    }
}

llvm::Type* LlvmCodeGen::get_type(const TypeDescriptor& type) {
    return get_type(type.type);
}

llvm::PointerType* LlvmCodeGen::get_ptr_type(const TypeDescriptor& type) {
    return llvm::PointerType::get(get_type(type.type), 0);
}

llvm::PointerType* LlvmCodeGen::get_ptr_type(const PrimitiveType& type) {
    return llvm::PointerType::get(get_type(type), 0);
}


llvm::Type* LlvmCodeGen::get_type(const std::string& name) {
    return _module->getTypeByName(name);
}

llvm::PointerType* LlvmCodeGen::get_ptr_type(const std::string& name) {
    llvm::Type* type = get_type(name);
    DCHECK(type != NULL) << name;
    return llvm::PointerType::get(type, 0);
}

// Llvm doesn't let you create a PointerValue from a c-side ptr.  Instead
// cast it to an int and then to 'type'.
llvm::Value* LlvmCodeGen::cast_ptr_to_llvm_ptr(llvm::Type* type, void* ptr) {
    llvm::Constant* const_int = llvm::ConstantInt::get(
            llvm::Type::getInt64Ty(context()), (int64_t)ptr);
    return llvm::ConstantExpr::getIntToPtr(const_int, type);
}

llvm::Value* LlvmCodeGen::get_int_constant(PrimitiveType type, int64_t val) {
    switch (type) {
    case TYPE_NULL:
        return llvm::ConstantInt::get(context(), llvm::APInt(8, val));
    case TYPE_TINYINT:
        return llvm::ConstantInt::get(context(), llvm::APInt(8, val));
    case TYPE_SMALLINT:
        return llvm::ConstantInt::get(context(), llvm::APInt(16, val));
    case TYPE_INT:
        return llvm::ConstantInt::get(context(), llvm::APInt(32, val));
    case TYPE_BIGINT:
        return llvm::ConstantInt::get(context(), llvm::APInt(64, val));
    case TYPE_LARGEINT:
        return llvm::ConstantInt::get(context(), llvm::APInt(128, val));
    default:
        DCHECK(false);
        return NULL;
    }
}

llvm::AllocaInst* LlvmCodeGen::create_entry_block_alloca(
        llvm::Function* f,
        const NamedVariable& var) {
    llvm::IRBuilder<> tmp(&f->getEntryBlock(), f->getEntryBlock().begin());
    return tmp.CreateAlloca(var.type, 0, var.name.c_str());
}

llvm::AllocaInst* LlvmCodeGen::create_entry_block_alloca(
        const LlvmBuilder& builder, llvm::Type* type, const char* name) {
    return create_entry_block_alloca(
        builder.GetInsertBlock()->getParent(), NamedVariable(name, type));
}

void LlvmCodeGen::create_if_else_blocks(
        llvm::Function* fn, const std::string& if_name,
        const std::string& else_name, llvm::BasicBlock** if_block, llvm::BasicBlock** else_block,
        llvm::BasicBlock* insert_before) {
    *if_block = llvm::BasicBlock::Create(context(), if_name, fn, insert_before);
    *else_block = llvm::BasicBlock::Create(context(), else_name, fn, insert_before);
}

llvm::Function* LlvmCodeGen::get_lib_c_function(FnPrototype* prototype) {
    if (_external_functions.find(prototype->name()) != _external_functions.end()) {
        return _external_functions[prototype->name()];
    }

    llvm::Function* func = prototype->generate_prototype();
    _external_functions[prototype->name()] = func;
    return func;
}

llvm::Function* LlvmCodeGen::get_function(IRFunction::Type function) {
    DCHECK(_loaded_functions[function] != NULL);
    return _loaded_functions[function];
}

// There is an llvm bug (#10957) that causes the first step of the verifier to always
// abort the process if it runs into an issue and ignores ReturnStatusAction.  This
// would cause Palo to go down if one query has a problem.
// To work around this, we will copy that step here and not abort on error.
// TODO: doesn't seem there is much traction in getting this fixed but we'll see
bool LlvmCodeGen::verify_function(llvm::Function* fn) {
    if (_is_corrupt) {
        return false;
    }

    // Check that there are no calls to Expr::GetConstant(). These should all have been
    // inlined via Expr::InlineConstants().
    for (llvm::inst_iterator iter = inst_begin(fn); iter != inst_end(fn); ++iter) {
        llvm::Instruction* instr = &*iter;
        if (!llvm::isa<llvm::CallInst>(instr)) {
            continue;
        }
        llvm::CallInst* call_instr = reinterpret_cast<llvm::CallInst*>(instr);
        llvm::Function* called_fn = call_instr->getCalledFunction();
        // look for call to Expr::GetConstant()
        if (called_fn != NULL && called_fn->getName().find(
                    Expr::_s_get_constant_symbol_prefix) != std::string::npos) {
            LOG(ERROR) << "Found call to Expr::GetConstant(): " << print(call_instr);
            _is_corrupt = true;
            break;
        }
    }

    // There is an llvm bug (#10957) that causes the first step of the verifier to always
    // abort the process if it runs into an issue and ignores ReturnStatusAction.  This
    // would cause impalad to go down if one query has a problem.  To work around this, we
    // will copy that step here and not abort on error. Adapted from the pre-verifier
    // function pass.
    // TODO: doesn't seem there is much traction in getting this fixed but we'll see
    for (llvm::Function::iterator i = fn->begin(), e = fn->end(); i != e; ++i) {
        if (i->empty() || !i->back().isTerminator()) {
            LOG(ERROR) << "Basic block must end with terminator: \n" << print(&(*i));
            _is_corrupt = true;
            break;
        }
    }

    if (!_is_corrupt) {
        _is_corrupt = llvm::verifyFunction(*fn, llvm::PrintMessageAction);
    }

    if (_is_corrupt) {
        std::string fn_name = fn->getName(); // llvm has some fancy operator overloading
        LOG(ERROR) << "Function corrupt: " << fn_name;
        return false;
    }
    return true;
}

LlvmCodeGen::FnPrototype::FnPrototype(
    LlvmCodeGen* gen, const std::string& name, llvm::Type* ret_type) :
    _codegen(gen), _name(name), _ret_type(ret_type) {
    DCHECK(!_codegen->_is_compiled) << "Not valid to add additional functions";
}

llvm::Function* LlvmCodeGen::FnPrototype::generate_prototype(
        LlvmBuilder* builder, llvm::Value** params) {
    std::vector<llvm::Type*> arguments;
    for (int i = 0; i < _args.size(); ++i) {
        arguments.push_back(_args[i].type);
    }
    llvm::FunctionType* prototype = llvm::FunctionType::get(_ret_type, arguments, false);
    llvm::Function* fn = llvm::Function::Create(
        prototype, llvm::Function::ExternalLinkage, _name, _codegen->_module);
    DCHECK(fn != NULL);

    // Name the arguments
    int idx = 0;
    for (llvm::Function::arg_iterator iter = fn->arg_begin();
            iter != fn->arg_end(); ++iter, ++idx) {
        iter->setName(_args[idx].name);
        if (params != NULL) {
            params[idx] = iter;
        }
    }

    if (builder != NULL) {
        llvm::BasicBlock* entry_block = llvm::BasicBlock::Create(_codegen->context(), "entry", fn);
        builder->SetInsertPoint(entry_block);
    }

    _codegen->_codegend_functions.push_back(fn);
    return fn;
}

llvm::Function* LlvmCodeGen::replace_call_sites(
        llvm::Function* caller, bool update_in_place,
        llvm::Function* new_fn, const std::string& replacee_name, int* replaced) {
    DCHECK(caller->getParent() == _module);

    if (!update_in_place) {
        // Clone the function and add it to the module
        llvm::ValueToValueMapTy dummy_vmap;
        llvm::Function* new_caller = llvm::CloneFunction(caller, dummy_vmap, false);
        new_caller->copyAttributesFrom(caller);
        _module->getFunctionList().push_back(new_caller);
        caller = new_caller;
    } else if (_jitted_functions.find(caller) != _jitted_functions.end()) {
        // This function is already dynamically linked, unlink it.
        _execution_engine->freeMachineCodeForFunction(caller);
        _jitted_functions.erase(caller);
    }

    *replaced = 0;
    // loop over all blocks
    llvm::Function::iterator block_iter = caller->begin();

    while (block_iter != caller->end()) {
        llvm::BasicBlock* block = block_iter++;
        // loop over instructions in the block
        llvm::BasicBlock::iterator instr_iter = block->begin();

        while (instr_iter != block->end()) {
            llvm::Instruction* instr = instr_iter++;

            // look for call instructions
            if (llvm::CallInst::classof(instr)) {
                llvm::CallInst* call_instr = reinterpret_cast<llvm::CallInst*>(instr);
                llvm::Function* old_fn = call_instr->getCalledFunction();

                // look for call instruction that matches the name
                if (old_fn->getName().find(replacee_name) != std::string::npos) {
                    // Replace the called function
                    call_instr->setCalledFunction(new_fn);
                    ++*replaced;
                }
            }
        }
    }

    return caller;
}

Function* LlvmCodeGen::clone_function(Function* fn) {
    llvm::ValueToValueMapTy dummy_vmap;
    // CloneFunction() automatically gives the new function a unique name
    Function* fn_clone = llvm::CloneFunction(fn, dummy_vmap, false);
    fn_clone->copyAttributesFrom(fn);
    _module->getFunctionList().push_back(fn_clone);
    return fn_clone;
}

// TODO: revisit this.  Inlining all call sites might not be the right call.  We
// probably need to make this more complicated and somewhat cost based or write
// our own optimization passes.
int LlvmCodeGen::inline_call_sites(llvm::Function* fn, bool skip_registered_fns) {
    int functions_inlined = 0;
    // Collect all call sites
    std::vector<llvm::CallInst*> call_sites;

    // loop over all blocks
    llvm::Function::iterator block_iter = fn->begin();

    while (block_iter != fn->end()) {
        llvm::BasicBlock* block = block_iter++;
        // loop over instructions in the block
        llvm::BasicBlock::iterator instr_iter = block->begin();

        while (instr_iter != block->end()) {
            llvm::Instruction* instr = instr_iter++;

            // look for call instructions
            if (llvm::CallInst::classof(instr)) {
                llvm::CallInst* call_instr = reinterpret_cast<llvm::CallInst*>(instr);
                llvm::Function* called_fn = call_instr->getCalledFunction();

                if (skip_registered_fns) {
                    if (_registered_exprs.find(called_fn) != _registered_exprs.end()) {
                        continue;
                    }
                }

                call_sites.push_back(call_instr);
            }
        }
    }

    // Inline all call sites.  InlineFunction can still fail (function is recursive, etc)
    // but that always leaves the original function in a consistent state
    for (int i = 0; i < call_sites.size(); ++i) {
        llvm::InlineFunctionInfo info;

        if (llvm::InlineFunction(call_sites[i], info)) {
            ++functions_inlined;
        }
    }

    return functions_inlined;
}

llvm::Function* LlvmCodeGen::optimize_function_with_exprs(llvm::Function* fn) {
    int num_inlined = 0;
    do {
        // This assumes that all redundant exprs have been registered.
        num_inlined = inline_call_sites(fn, false);
    } while (num_inlined > 0);
    // TODO(zc): fix
    // SubExprElimination subexpr_elim(this);
    // subexpr_elim.run(fn);
    return finalize_function(fn);
}

llvm::Function* LlvmCodeGen::finalize_function(llvm::Function* function) {
    if (!verify_function(function)) {
        return NULL;
    }

    return function;
}

Status LlvmCodeGen::finalize_module() {
    DCHECK(!_is_compiled);
    _is_compiled = true;

    // TODO(zc)
#if 0
    if (FLAGS_unopt_module_dir.size() != 0) {
        string path = FLAGS_unopt_module_dir + "/" + id_ + "_unopt.ll";
        fstream f(path.c_str(), fstream::out | fstream::trunc);
        if (f.fail()) {
            LOG(ERROR) << "Could not save IR to: " << path;
        } else {
            f << GetIR(true);
            f.close();
        }
    }
#endif

    if (_is_corrupt) {
        return Status("Module is corrupt.");
    }
    SCOPED_TIMER(_profile.total_time_counter());

    // Don't waste time optimizing module if there are no functions to JIT. This can happen
    // if the codegen object is created but no functions are successfully codegen'd.
    if (_optimizations_enabled // TODO(zc): && !FLAGS_disable_optimization_passes
            && !_fns_to_jit_compile.empty()) {
        optimize_module();
    }

    SCOPED_TIMER(_compile_timer);
    // JIT compile all codegen'd functions
    for (int i = 0; i < _fns_to_jit_compile.size(); ++i) {
        *_fns_to_jit_compile[i].second = jit_function(_fns_to_jit_compile[i].first);
    }
#if 0
    if (FLAGS_opt_module_dir.size() != 0) {
        string path = FLAGS_opt_module_dir + "/" + id_ + "_opt.ll";
        fstream f(path.c_str(), fstream::out | fstream::trunc);
        if (f.fail()) {
            LOG(ERROR) << "Could not save IR to: " << path;
        } else {
            f << GetIR(true);
            f.close();
        }
    }
#endif

    return Status::OK;
}

void LlvmCodeGen::optimize_module() {
    SCOPED_TIMER(_optimization_timer);

    // This pass manager will construct optimizations passes that are "typical" for
    // c/c++ programs.  We're relying on llvm to pick the best passes for us.
    // TODO: we can likely muck with this to get better compile speeds or write
    // our own passes.  Our subexpression elimination optimization can be rolled into
    // a pass.
    PassManagerBuilder pass_builder;
    // 2 maps to -O2
    // TODO: should we switch to 3? (3 may not produce different IR than 2 while taking
    // longer, but we should check)
    pass_builder.OptLevel = 2;
    // Don't optimize for code size (this corresponds to -O2/-O3)
    pass_builder.SizeLevel = 0;
    pass_builder.Inliner = llvm::createFunctionInliningPass() ;

    // Specifying the data layout is necessary for some optimizations (e.g. removing many
    // of the loads/stores produced by structs).
    const std::string& data_layout_str = _module->getDataLayout();
    DCHECK(!data_layout_str.empty());

    // Before running any other optimization passes, run the internalize pass, giving it
    // the names of all functions registered by AddFunctionToJit(), followed by the
    // global dead code elimination pass. This causes all functions not registered to be
    // JIT'd to be marked as internal, and any internal functions that are not used are
    // deleted by DCE pass. This greatly decreases compile time by removing unused code.
    std::vector<const char*> exported_fn_names;
    for (int i = 0; i < _fns_to_jit_compile.size(); ++i) {
        exported_fn_names.push_back(_fns_to_jit_compile[i].first->getName().data());
    }
    boost::scoped_ptr<PassManager> module_pass_manager(new PassManager());
    module_pass_manager->add(new DataLayout(data_layout_str));
    module_pass_manager->add(llvm::createInternalizePass(exported_fn_names));
    module_pass_manager->add(llvm::createGlobalDCEPass());
    module_pass_manager->run(*_module);

    // Create and run function pass manager
    boost::scoped_ptr<FunctionPassManager> fn_pass_manager(new FunctionPassManager(_module));
    fn_pass_manager->add(new DataLayout(data_layout_str));
    pass_builder.populateFunctionPassManager(*fn_pass_manager);
    fn_pass_manager->doInitialization();
    for (Module::iterator it = _module->begin(), end = _module->end(); it != end; ++it) {
        if (!it->isDeclaration()) fn_pass_manager->run(*it);
    }
    fn_pass_manager->doFinalization();

    // Create and run module pass manager
    module_pass_manager.reset(new PassManager());
    module_pass_manager->add(new DataLayout(data_layout_str));
    pass_builder.populateModulePassManager(*module_pass_manager);
    module_pass_manager->run(*_module);

    // if (FLAGS_print_llvm_ir_instruction_count) {
    //     for (int i = 0; i < _fns_to_jit_compile.size(); ++i) {
    //         InstructionCounter counter;
    //         counter.visit(*_fns_to_jit_compile[i].first);
    //         VLOG(1) << _fns_to_jit_compile[i].first->getName().str();
    //         VLOG(1) << counter.PrintCounters();
    //     }
    // }
}

void LlvmCodeGen::add_function_to_jit(llvm::Function* fn, void** fn_ptr) {
#if 0
    llvm::Type* decimal_val_type = get_type(CodegenAnyVal::LLVM_DECIMALVAL_NAME);
    if (fn->getReturnType() == decimal_val_type) {
        // Per the x86 calling convention ABI, DecimalVals should be returned via an extra
        // first DecimalVal* argument. We generate non-compliant functions that return the
        // DecimalVal directly, which we can call from generated code, but not from compiled
        // native code.  To avoid accidentally calling a non-compliant function from native
        // code, call 'function' from an ABI-compliant wrapper.
        stringstream name;
        name << fn->getName().str() << "ABIWrapper";
        LlvmCodeGen::FnPrototype prototype(this, name.str(), void_type_);
        // Add return argument
        prototype.AddArgument(NamedVariable("result", decimal_val_type->getPointerTo()));
        // Add regular arguments
        for (Function::arg_iterator arg = fn->arg_begin(); arg != fn->arg_end(); ++arg) {
            prototype.AddArgument(NamedVariable(arg->getName(), arg->getType()));
        }
        LlvmBuilder builder(context());
        Value* args[fn->arg_size() + 1];
        Function* fn_wrapper = prototype.GeneratePrototype(&builder, &args[0]);
        fn_wrapper->addFnAttr(llvm::Attribute::AlwaysInline);
        // Mark first argument as sret (not sure if this is necessary but it can't hurt)
        fn_wrapper->addAttribute(1, Attribute::StructRet);
        // Call 'fn' and store the result in the result argument
        Value* result =
            builder.CreateCall(fn, ArrayRef<Value*>(&args[1], fn->arg_size()), "result");
        builder.CreateStore(result, args[0]);
        builder.CreateRetVoid();
        fn = FinalizeFunction(fn_wrapper);
        DCHECK(fn != NULL);
    }
#endif
    _fns_to_jit_compile.push_back(std::make_pair(fn, fn_ptr));
}


void* LlvmCodeGen::jit_function(llvm::Function* function, int* scratch_size) {
    if (_is_corrupt) {
        return NULL;
    }

    if (scratch_size == NULL) {
        DCHECK_EQ(_scratch_buffer_offset, 0);
    } else {
        *scratch_size = _scratch_buffer_offset;
    }

    // TODO: log a warning if the jitted function is too big (larger than I cache)
    void* jitted_function = _execution_engine->getPointerToFunction(function);
    boost::lock_guard<boost::mutex> l(_jitted_functions_lock);

    if (jitted_function != NULL) {
        _jitted_functions[function] = true;
    }

    return jitted_function;
}

int LlvmCodeGen::get_scratch_buffer(int byte_size) {
    // TODO: this is not yet implemented/tested
    DCHECK(false);
    int result = _scratch_buffer_offset;
    // TODO: alignment?
    result += byte_size;
    return result;
}

// Wrapper around printf to make it easier to call from IR
extern "C" void debug_trace(const char* str) {
    printf("LLVM Trace: %s\n", str);
}

void LlvmCodeGen::codegen_debug_trace(LlvmBuilder* builder, const char* str) {
    LOG(ERROR) << "Remove IR codegen debug traces before checking in.";

    // Lazily link in debug function to the module
    if (_debug_trace_fn == NULL) {
        std::vector<llvm::Type*> args;
        args.push_back(_ptr_type);
        llvm::FunctionType* fn_type = llvm::FunctionType::get(_void_type, args, false);
        _debug_trace_fn = llvm::Function::Create(fn_type, llvm::GlobalValue::ExternalLinkage,
                                           "debug_trace", _module);

        DCHECK(_debug_trace_fn != NULL);
        // debug_trace shouldn't already exist (llvm mangles function names if there
        // are duplicates)
        DCHECK(_debug_trace_fn->getName() ==  "debug_trace");

        _debug_trace_fn->setCallingConv(llvm::CallingConv::C);

        // Add a mapping to the execution engine so it can link the debug_trace function
        _execution_engine->addGlobalMapping(_debug_trace_fn,
                                            reinterpret_cast<void*>(&debug_trace));
    }

    // Make a copy of str into memory owned by this object.  This is no guarantee that str is
    // still around when the debug printf is executed.
    _debug_strings.push_back(str);
    str = _debug_strings[_debug_strings.size() - 1].c_str();

    // Call the function by turning 'str' into a constant ptr value
    llvm::Value* str_ptr = cast_ptr_to_llvm_ptr(_ptr_type, const_cast<char*>(str));
    std::vector<llvm::Value*> calling_args;
    calling_args.push_back(str_ptr);
    builder->CreateCall(_debug_trace_fn, calling_args);
}

void LlvmCodeGen::get_functions(std::vector<llvm::Function*>* functions) {
    llvm::Module::iterator fn_iter = _module->begin();

    while (fn_iter != _module->end()) {
        llvm::Function* fn = fn_iter++;

        if (!fn->empty()) {
            functions->push_back(fn);
        }
    }
}

// TODO: cache this function (e.g. all min(int, int) are identical).
// we probably want some more global IR function cache, or, implement this
// in c and precompile it with clang.
// define i32 @Min(i32 %v1, i32 %v2) {
// entry:
//   %0 = icmp slt i32 %v1, %v2
//   br i1 %0, label %ret_v1, label %ret_v2
//
// ret_v1:                                           ; preds = %entry
//   ret i32 %v1
//
// ret_v2:                                           ; preds = %entry
//   ret i32 %v2
// }
llvm::Function* LlvmCodeGen::codegen_min_max(const TypeDescriptor& type, bool min) {
    LlvmCodeGen::FnPrototype prototype(this, min ? "Min" : "Max", get_type(type));
    prototype.add_argument(LlvmCodeGen::NamedVariable("v1", get_type(type)));
    prototype.add_argument(LlvmCodeGen::NamedVariable("v2", get_type(type)));

    llvm::Value* params[2];
    LlvmBuilder builder(context());
    llvm::Function* fn = prototype.generate_prototype(&builder, &params[0]);

    llvm::Value* compare = NULL;

    switch (type.type) {
    case TYPE_NULL:
        compare = false_value();
        break;

    case TYPE_BOOLEAN:
        if (min) {
            // For min, return x && y
            compare = builder.CreateAnd(params[0], params[1]);
        } else {
            // For max, return x || y
            compare = builder.CreateOr(params[0], params[1]);
        }

        break;

    case TYPE_TINYINT:
    case TYPE_SMALLINT:
    case TYPE_INT:
    case TYPE_BIGINT:
        if (min) {
            compare = builder.CreateICmpSLT(params[0], params[1]);
        } else {
            compare = builder.CreateICmpSGT(params[0], params[1]);
        }

        break;

    case TYPE_FLOAT:
    case TYPE_DOUBLE:
        if (min) {
            compare = builder.CreateFCmpULT(params[0], params[1]);
        } else {
            compare = builder.CreateFCmpUGT(params[0], params[1]);
        }

        break;

    default:
        DCHECK(false);
    }

    if (type.type == TYPE_BOOLEAN) {
        builder.CreateRet(compare);
    } else {
        llvm::BasicBlock* ret_v1 = NULL;
        llvm::BasicBlock* ret_v2 = NULL;
        create_if_else_blocks(fn, "ret_v1", "ret_v2", &ret_v1, &ret_v2);

        builder.CreateCondBr(compare, ret_v1, ret_v2);
        builder.SetInsertPoint(ret_v1);
        builder.CreateRet(params[0]);
        builder.SetInsertPoint(ret_v2);
        builder.CreateRet(params[1]);
    }

    if (!verify_function(fn)) {
        return NULL;
    }

    return fn;
}

// Intrinsics are loaded one by one.  Some are overloaded (e.g. memcpy) and the types must
// be specified.
// TODO: is there a better way to do this?
Status LlvmCodeGen::load_intrinsics() {
    // Load memcpy
    {
        llvm::Type* types[] = { ptr_type(), ptr_type(), get_type(TYPE_INT) };
        llvm::Function* fn = llvm::Intrinsic::getDeclaration(
                module(), llvm::Intrinsic::memcpy, types);

        if (fn == NULL) {
            return Status("Could not find memcpy intrinsic.");
        }

        _llvm_intrinsics[llvm::Intrinsic::memcpy] = fn;
    }

    // TODO: where is the best place to put this?
    struct {
        llvm::Intrinsic::ID id;
        const char* error;
    } non_overloaded_intrinsics[] = {
        { llvm::Intrinsic::x86_sse42_crc32_32_8, "sse4.2 crc32_u8" },
        { llvm::Intrinsic::x86_sse42_crc32_32_16, "sse4.2 crc32_u16" },
        { llvm::Intrinsic::x86_sse42_crc32_32_32, "sse4.2 crc32_u32" },
        { llvm::Intrinsic::x86_sse42_crc32_64_64, "sse4.2 crc32_u64" },
    };
    const int num_intrinsics =
        sizeof(non_overloaded_intrinsics) / sizeof(non_overloaded_intrinsics[0]);

    for (int i = 0; i < num_intrinsics; ++i) {
        llvm::Intrinsic::ID id = non_overloaded_intrinsics[i].id;
        llvm::Function* fn = llvm::Intrinsic::getDeclaration(module(), id);

        if (fn == NULL) {
            std::stringstream ss;
            ss << "Could not find " << non_overloaded_intrinsics[i].error << " intrinsic";
            return Status(ss.str());
        }

        _llvm_intrinsics[id] = fn;
    }

    return Status::OK;
}

void LlvmCodeGen::codegen_memcpy(LlvmBuilder* builder, llvm::Value* dst, llvm::Value* src, int size) {
    // Cast src/dst to int8_t*.  If they already are, this will get optimized away
    DCHECK(llvm::PointerType::classof(dst->getType()));
    DCHECK(llvm::PointerType::classof(src->getType()));
    dst = builder->CreateBitCast(dst, ptr_type());
    src = builder->CreateBitCast(src, ptr_type());

    // Get intrinsic function.
    llvm::Function* memcpy_fn = _llvm_intrinsics[llvm::Intrinsic::memcpy];
    DCHECK(memcpy_fn != NULL);

    // The fourth argument is the alignment.  For non-zero values, the caller
    // must guarantee that the src and dst values are aligned to that byte boundary.
    // TODO: We should try to take advantage of this since our tuples are well aligned.
    llvm::Value* args[] = {
        dst, src, get_int_constant(TYPE_INT, size),
        get_int_constant(TYPE_INT, 0),
        false_value()                       // is_volatile.
    };
    builder->CreateCall(memcpy_fn, args);
}

Value* LlvmCodeGen::codegen_array_at(
        LlvmBuilder* builder, Value* array, int idx, const char* name) {
    DCHECK(array->getType()->isPointerTy() || array->getType()->isArrayTy())
        << print(array->getType());
    Value* ptr = builder->CreateConstGEP1_32(array, idx);
    return builder->CreateLoad(ptr, name);
}

void LlvmCodeGen::codegen_assign(LlvmBuilder* builder,
                                llvm::Value* dst, llvm::Value* src, PrimitiveType type) {
    switch (type) {
    case TYPE_CHAR:
    case TYPE_VARCHAR:
    case TYPE_HLL:  {
        codegen_memcpy(builder, dst, src, sizeof(StringValue));
        break;
    }

    case TYPE_DATETIME:
        DCHECK(false) << "Timestamp NYI"; // TODO
        break;

    default:
        builder->CreateStore(src, dst);
        break;
    }
}

void LlvmCodeGen::clear_hash_fns() {
    _hash_fns.clear();
}

// Codegen to compute hash for a particular byte size.  Loops are unrolled in this
// process.  For the case where num_bytes == 11, we'd do this by calling
//   1. crc64 (for first 8 bytes)
//   2. crc16 (for bytes 9, 10)
//   3. crc8 (for byte 11)
// The resulting IR looks like:
// define i32 @CrcHash11(i8* %data, i32 %len, i32 %seed) {
// entry:
//   %0 = zext i32 %seed to i64
//   %1 = bitcast i8* %data to i64*
//   %2 = getelementptr i64* %1, i32 0
//   %3 = load i64* %2
//   %4 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %0, i64 %3)
//   %5 = trunc i64 %4 to i32
//   %6 = getelementptr i8* %data, i32 8
//   %7 = bitcast i8* %6 to i16*
//   %8 = load i16* %7
//   %9 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %5, i16 %8)
//   %10 = getelementptr i8* %6, i32 2
//   %11 = load i8* %10
//   %12 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %9, i8 %11)
//   ret i32 %12
// }
llvm::Function* LlvmCodeGen::get_hash_function(int num_bytes) {
    if (CpuInfo::is_supported(CpuInfo::SSE4_2)) {
        if (num_bytes == -1) {
            // -1 indicates variable length, just return the generic loop based
            // hash fn.
            return get_function(IRFunction::HASH_CRC);
            return NULL;
        }

        std::map<int, llvm::Function*>::iterator cached_fn = _hash_fns.find(num_bytes);
        if (cached_fn != _hash_fns.end()) {
            return cached_fn->second;
        }

        // Generate a function to hash these bytes
        std::stringstream ss;
        ss << "CrcHash" << num_bytes;
        FnPrototype prototype(this, ss.str(), get_type(TYPE_INT));
        prototype.add_argument(LlvmCodeGen::NamedVariable("data", ptr_type()));
        prototype.add_argument(LlvmCodeGen::NamedVariable("len", get_type(TYPE_INT)));
        prototype.add_argument(LlvmCodeGen::NamedVariable("seed", get_type(TYPE_INT)));

        llvm::Value* args[3];
        LlvmBuilder builder(context());
        llvm::Function* fn = prototype.generate_prototype(&builder, &args[0]);
        llvm::Value* data = args[0];
        llvm::Value* result = args[2];

        llvm::Function* crc8_fn = _llvm_intrinsics[llvm::Intrinsic::x86_sse42_crc32_32_8];
        llvm::Function* crc16_fn = _llvm_intrinsics[llvm::Intrinsic::x86_sse42_crc32_32_16];
        llvm::Function* crc32_fn = _llvm_intrinsics[llvm::Intrinsic::x86_sse42_crc32_32_32];
        llvm::Function* crc64_fn = _llvm_intrinsics[llvm::Intrinsic::x86_sse42_crc32_64_64];

        // Generate the crc instructions starting with the highest number of bytes
        if (num_bytes >= 8) {
            llvm::Value* result_64 = builder.CreateZExt(result, get_type(TYPE_BIGINT));
            llvm::Value* ptr = builder.CreateBitCast(data, get_ptr_type(TYPE_BIGINT));
            int i = 0;

            while (num_bytes >= 8) {
                llvm::Value* index[] = { get_int_constant(TYPE_INT, i++) };
                llvm::Value* d = builder.CreateLoad(builder.CreateGEP(ptr, index));
                result_64 = builder.CreateCall2(crc64_fn, result_64, d);
                num_bytes -= 8;
            }

            result = builder.CreateTrunc(result_64, get_type(TYPE_INT));
            llvm::Value* index[] = { get_int_constant(TYPE_INT, i * 8) };
            // Update data to past the 8-byte chunks
            data = builder.CreateGEP(data, index);
        }

        if (num_bytes >= 4) {
            DCHECK_LT(num_bytes, 8);
            llvm::Value* ptr = builder.CreateBitCast(data, get_ptr_type(TYPE_INT));
            llvm::Value* d = builder.CreateLoad(ptr);
            result = builder.CreateCall2(crc32_fn, result, d);
            llvm::Value* index[] = { get_int_constant(TYPE_INT, 4) };
            data = builder.CreateGEP(data, index);
            num_bytes -= 4;
        }

        if (num_bytes >= 2) {
            DCHECK_LT(num_bytes, 4);
            llvm::Value* ptr = builder.CreateBitCast(data, get_ptr_type(TYPE_SMALLINT));
            llvm::Value* d = builder.CreateLoad(ptr);
            result = builder.CreateCall2(crc16_fn, result, d);
            llvm::Value* index[] = { get_int_constant(TYPE_INT, 2) };
            data = builder.CreateGEP(data, index);
            num_bytes -= 2;
        }

        if (num_bytes > 0) {
            DCHECK_EQ(num_bytes, 1);
            llvm::Value* d = builder.CreateLoad(data);
            result = builder.CreateCall2(crc8_fn, result, d);
            --num_bytes;
        }
        DCHECK_EQ(num_bytes, 0);
        Value* shift_16 = get_int_constant(TYPE_INT, 16);
        Value* upper_bits = builder.CreateShl(result, shift_16);
        Value* lower_bits = builder.CreateLShr(result, shift_16);
        result = builder.CreateOr(upper_bits, lower_bits);
        builder.CreateRet(result);

        fn = finalize_function(fn);
        if (fn != NULL) {
            _hash_fns[num_bytes] = fn;
        }
        return fn;
    } else {
        // Don't bother with optimizations without crc hash instruction
        return get_function(IRFunction::HASH_FNV);
        return NULL;
    }
}

llvm::Value* LlvmCodeGen::get_ptr_to(LlvmBuilder* builder, llvm::Value* v, const char* name) {
    llvm::Value* ptr = create_entry_block_alloca(*builder, v->getType(), name);
    builder->CreateStore(v, ptr);
    return ptr;
}

llvm::Instruction::CastOps LlvmCodeGen::get_cast_op(
        const TypeDescriptor& from_type, const TypeDescriptor& to_type) {
    switch (from_type.type) {
    case TYPE_BOOLEAN: {
        switch (to_type.type) {
        case TYPE_BOOLEAN:
            return llvm::Instruction::Trunc;
        case TYPE_TINYINT:
        case TYPE_SMALLINT:
        case TYPE_INT:
        case TYPE_BIGINT:
        case TYPE_LARGEINT:
            return llvm::Instruction::ZExt;
        case TYPE_FLOAT:
        case TYPE_DOUBLE:
            return llvm::Instruction::SIToFP;
        default:
            return llvm::Instruction::CastOpsEnd;
        }
    }
    case TYPE_TINYINT: {
        switch (to_type.type) {
        case TYPE_BOOLEAN:
        case TYPE_TINYINT:
            return llvm::Instruction::Trunc;
        case TYPE_SMALLINT:
        case TYPE_INT:
        case TYPE_BIGINT:
        case TYPE_LARGEINT:
            return llvm::Instruction::SExt;
        case TYPE_FLOAT:
        case TYPE_DOUBLE:
            return llvm::Instruction::SIToFP;
        default:
            return llvm::Instruction::CastOpsEnd;
        }
    }
    case TYPE_SMALLINT: {
        switch (to_type.type) {
        case TYPE_BOOLEAN:
        case TYPE_TINYINT:
        case TYPE_SMALLINT:
            return llvm::Instruction::Trunc;
        case TYPE_INT:
        case TYPE_BIGINT:
        case TYPE_LARGEINT:
            return llvm::Instruction::SExt;
        case TYPE_FLOAT:
        case TYPE_DOUBLE:
            return llvm::Instruction::SIToFP;
        default:
            return llvm::Instruction::CastOpsEnd;
        }
    }
    case TYPE_INT: {
        switch (to_type.type) {
        case TYPE_BOOLEAN:
        case TYPE_TINYINT:
        case TYPE_SMALLINT:
        case TYPE_INT:
            return llvm::Instruction::Trunc;
        case TYPE_BIGINT:
        case TYPE_LARGEINT:
            return llvm::Instruction::SExt;
        case TYPE_FLOAT:
        case TYPE_DOUBLE:
            return llvm::Instruction::SIToFP;
        default:
            return llvm::Instruction::CastOpsEnd;
        }
    }
    case TYPE_BIGINT: {
        switch (to_type.type) {
        case TYPE_BOOLEAN:
        case TYPE_TINYINT:
        case TYPE_SMALLINT:
        case TYPE_INT:
        case TYPE_BIGINT:
            return llvm::Instruction::Trunc;
        case TYPE_LARGEINT:
            return llvm::Instruction::SExt;
        case TYPE_FLOAT:
        case TYPE_DOUBLE:
            return llvm::Instruction::SIToFP;
        default:
            return llvm::Instruction::CastOpsEnd;
        }
    }
    case TYPE_LARGEINT: {
        switch (to_type.type) {
        case TYPE_BOOLEAN:
        case TYPE_TINYINT:
        case TYPE_SMALLINT:
        case TYPE_INT:
        case TYPE_BIGINT:
        case TYPE_LARGEINT:
            return llvm::Instruction::Trunc;
        case TYPE_FLOAT:
        case TYPE_DOUBLE:
            return llvm::Instruction::SIToFP;
        default:
            return llvm::Instruction::CastOpsEnd;
        }
    }
    case TYPE_FLOAT: {
        switch (to_type.type) {
        case TYPE_BOOLEAN:
        case TYPE_TINYINT:
        case TYPE_SMALLINT:
        case TYPE_INT:
        case TYPE_BIGINT:
        case TYPE_LARGEINT:
            return llvm::Instruction::FPToSI;
        case TYPE_FLOAT:
        case TYPE_DOUBLE:
            return llvm::Instruction::FPExt;
        default:
            return llvm::Instruction::CastOpsEnd;
        }
    }
    case TYPE_DOUBLE: {
        switch (to_type.type) {
        case TYPE_BOOLEAN:
        case TYPE_TINYINT:
        case TYPE_SMALLINT:
        case TYPE_INT:
        case TYPE_BIGINT:
        case TYPE_LARGEINT:
            return llvm::Instruction::FPToSI;
        case TYPE_FLOAT:
        case TYPE_DOUBLE:
            return llvm::Instruction::FPTrunc;
        default:
            return llvm::Instruction::CastOpsEnd;
        }
    }
    default:
        return llvm::Instruction::CastOpsEnd;
    }
    return llvm::Instruction::CastOpsEnd;
}

}