From 0f98f975c71337697e38834c2ee9ee3d165632ed Mon Sep 17 00:00:00 2001
From: trueeyu <lxhhust350@qq.com>
Date: Wed, 26 Feb 2020 10:57:57 +0800
Subject: [PATCH] Remove unused LLVM related codes of directory:be/src/codegen
 (#2910) (#2987)

Remove unused LLVM related codes of directory (step 5):be/src/codegen (#2910)

there are many LLVM related codes in code base, but these codes are not really used.
The higher version of GCC is not compatible with the LLVM 3.4.2 version currently used by Doris.
The PR delete all LLVM related code of directory: be/src/codegen
---
 be/CMakeLists.txt                      |    2 -
 be/src/codegen/CMakeLists.txt          |  102 --
 be/src/codegen/codegen_anyval.cpp      |  708 -------------
 be/src/codegen/codegen_anyval.h        |  279 -----
 be/src/codegen/codegen_anyval_ir.cpp   |   61 --
 be/src/codegen/doris_ir.cpp            |   41 -
 be/src/codegen/doris_ir_data.h         |   30 -
 be/src/codegen/gen_ir_descriptions.py  |  203 ----
 be/src/codegen/llvm_codegen.cpp        | 1355 ------------------------
 be/src/codegen/llvm_codegen.h          |  641 -----------
 be/src/codegen/llvm_codegen_test.cpp   |  455 --------
 be/src/codegen/subexpr_elimination.cpp |  228 ----
 be/src/codegen/subexpr_elimination.h   |   44 -
 be/src/exprs/scalar_fn_call.cpp        |    1 -
 be/src/runtime/datetime_value.cpp      |    2 -
 be/src/runtime/datetime_value.h        |    2 -
 be/src/runtime/decimal_value.cpp       |    2 -
 be/src/runtime/decimal_value.h         |    3 -
 be/src/runtime/string_value.cpp        |    2 -
 be/src/runtime/string_value.h          |    3 -
 be/src/runtime/types.cpp               |   31 -
 be/src/runtime/types.h                 |   12 -
 be/src/service/doris_main.cpp          |    3 -
 23 files changed, 4210 deletions(-)
 delete mode 100644 be/src/codegen/CMakeLists.txt
 delete mode 100644 be/src/codegen/codegen_anyval.cpp
 delete mode 100644 be/src/codegen/codegen_anyval.h
 delete mode 100644 be/src/codegen/codegen_anyval_ir.cpp
 delete mode 100644 be/src/codegen/doris_ir.cpp
 delete mode 100644 be/src/codegen/doris_ir_data.h
 delete mode 100755 be/src/codegen/gen_ir_descriptions.py
 delete mode 100644 be/src/codegen/llvm_codegen.cpp
 delete mode 100644 be/src/codegen/llvm_codegen.h
 delete mode 100644 be/src/codegen/llvm_codegen_test.cpp
 delete mode 100644 be/src/codegen/subexpr_elimination.cpp
 delete mode 100644 be/src/codegen/subexpr_elimination.h

diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 2fa3f80539..a22ab01317 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -498,7 +498,6 @@ set(WL_END_GROUP "-Wl,--end-group")
 set(DORIS_LINK_LIBS
     ${WL_START_GROUP}
     Agent
-    CodeGen
     Common
     Env
     Exec
@@ -622,7 +621,6 @@ if (${MAKE_TEST} STREQUAL "ON")
 endif ()
 
 add_subdirectory(${SRC_DIR}/agent)
-add_subdirectory(${SRC_DIR}/codegen)
 add_subdirectory(${SRC_DIR}/common)
 add_subdirectory(${SRC_DIR}/env)
 add_subdirectory(${SRC_DIR}/exec)
diff --git a/be/src/codegen/CMakeLists.txt b/be/src/codegen/CMakeLists.txt
deleted file mode 100644
index 881ab8fb0a..0000000000
--- a/be/src/codegen/CMakeLists.txt
+++ /dev/null
@@ -1,102 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# where to put generated libraries
-set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/codegen")
-
-# where to put generated binaries
-set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/codegen")
-
-# Generated C files for IR
-set(IR_SSE_C_FILE ${GENSRC_DIR}/doris_ir/doris_sse_ir.cpp)
-set(IR_NO_SSE_C_FILE ${GENSRC_DIR}/doris_ir/doris_no_sse_ir.cpp)
-
-add_library(CodeGen STATIC
-    codegen_anyval.cpp
-    llvm_codegen.cpp
-    subexpr_elimination.cpp
-    ${IR_SSE_C_FILE}
-    ${IR_NO_SSE_C_FILE}
-)
-
-add_dependencies(CodeGen gen_ir_descriptions compile_to_ir_sse compile_to_ir_no_sse)
-
-# output cross compile to ir metadata
-set(IR_DESC_GEN_OUTPUT
-    ${GENSRC_DIR}/doris_ir/doris_ir_names.h
-    ${GENSRC_DIR}/doris_ir/doris_ir_functions.h
-)
-
-add_custom_command(
-    OUTPUT ${IR_DESC_GEN_OUTPUT}
-    COMMAND $ENV{PYTHON} ${CMAKE_CURRENT_SOURCE_DIR}/gen_ir_descriptions.py
-    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/gen_ir_descriptions.py
-    COMMENT "Generating ir cross compile metadata."
-    VERBATIM
-)
-add_custom_target(gen_ir_descriptions ALL DEPENDS ${IR_DESC_GEN_OUTPUT})
-
-set(IR_INPUT_FILES ${CMAKE_CURRENT_SOURCE_DIR}/doris_ir.cpp)
-set(IR_SSE_TMP_OUTPUT_FILE "${GENSRC_DIR}/doris_ir/doris_sse_tmp.bc")
-set(IR_NO_SSE_TMP_OUTPUT_FILE "${GENSRC_DIR}/doris_ir/doris_no_sse_tmp.bc")
-set(IR_SSE_OUTPUT_FILE "${GENSRC_DIR}/doris_ir/doris_sse.bc")
-set(IR_NO_SSE_OUTPUT_FILE "${GENSRC_DIR}/doris_ir/doris_no_sse.bc")
-set(IR_SSE_TMP_C_FILE ${IR_SSE_C_FILE}.tmp)
-set(IR_NO_SSE_TMP_C_FILE ${IR_NO_SSE_C_FILE}.tmp)
-
-# Run the clang compiler to generate IR.  Then run their opt tool to remove
-# unnamed instr.  This makes the IR verifiable and more readable.
-# We need to compile to IR twice, once with sse enabled and one without.  At runtime
-# impala will pick the correct file to load.
-add_custom_command(
-  OUTPUT ${IR_SSE_OUTPUT_FILE}
-  COMMAND ${LLVM_CLANG_EXECUTABLE} ${CLANG_IR_CXX_FLAGS} "-msse4.2" ${CLANG_INCLUDE_FLAGS} ${IR_INPUT_FILES} -o ${IR_SSE_TMP_OUTPUT_FILE}
-  COMMAND ${LLVM_OPT_EXECUTABLE} --instnamer < ${IR_SSE_TMP_OUTPUT_FILE} > ${IR_SSE_OUTPUT_FILE}
-  COMMAND rm ${IR_SSE_TMP_OUTPUT_FILE}
-  DEPENDS Exprs Udf ${IR_INPUT_FILES}
-)
-
-# Compile without sse enabled.
-add_custom_command(
-  OUTPUT ${IR_NO_SSE_OUTPUT_FILE}
-  COMMAND ${LLVM_CLANG_EXECUTABLE} ${CLANG_IR_CXX_FLAGS} ${CLANG_INCLUDE_FLAGS} ${IR_INPUT_FILES} -o ${IR_NO_SSE_TMP_OUTPUT_FILE}
-  COMMAND ${LLVM_OPT_EXECUTABLE} --instnamer < ${IR_NO_SSE_TMP_OUTPUT_FILE} > ${IR_NO_SSE_OUTPUT_FILE}
-  COMMAND rm ${IR_NO_SSE_TMP_OUTPUT_FILE}
-  DEPENDS Exprs Udf ${IR_INPUT_FILES}
-)
-
-add_custom_target(compile_to_ir_sse DEPENDS ${IR_SSE_OUTPUT_FILE})
-add_custom_target(compile_to_ir_no_sse DEPENDS ${IR_NO_SSE_OUTPUT_FILE})
-
-# Convert LLVM bytecode to C array.
-add_custom_command(
-  OUTPUT ${IR_SSE_C_FILE}
-  COMMAND $ENV{DORIS_HOME}/gensrc/script/file2array.sh -n -v doris_sse_llvm_ir ${IR_SSE_OUTPUT_FILE} > ${IR_SSE_TMP_C_FILE}
-  COMMAND mv ${IR_SSE_TMP_C_FILE} ${IR_SSE_C_FILE}
-  DEPENDS $ENV{DORIS_HOME}/gensrc/script/file2array.sh
-  DEPENDS ${IR_SSE_OUTPUT_FILE}
-)
-
-# Convert LLVM bytecode to C array.
-add_custom_command(
-  OUTPUT ${IR_NO_SSE_C_FILE}
-  COMMAND $ENV{DORIS_HOME}/gensrc/script/file2array.sh -n -v doris_no_sse_llvm_ir ${IR_NO_SSE_OUTPUT_FILE} > ${IR_NO_SSE_TMP_C_FILE}
-  COMMAND mv ${IR_NO_SSE_TMP_C_FILE} ${IR_NO_SSE_C_FILE}
-  DEPENDS $ENV{DORIS_HOME}/gensrc/script/file2array.sh
-  DEPENDS ${IR_NO_SSE_OUTPUT_FILE}
-)
-
diff --git a/be/src/codegen/codegen_anyval.cpp b/be/src/codegen/codegen_anyval.cpp
deleted file mode 100644
index 3c226ec42d..0000000000
--- a/be/src/codegen/codegen_anyval.cpp
+++ /dev/null
@@ -1,708 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "codegen/codegen_anyval.h"
-
-#include "runtime/multi_precision.h"
-
-using llvm::Function;
-using llvm::Type;
-using llvm::Value;
-using llvm::ConstantInt;
-using llvm::Constant;
-
-namespace doris {
-
-const char* CodegenAnyVal::_s_llvm_booleanval_name   = "struct.doris_udf::BooleanVal";
-const char* CodegenAnyVal::_s_llvm_tinyintval_name   = "struct.doris_udf::TinyIntVal";
-const char* CodegenAnyVal::_s_llvm_smallintval_name  = "struct.doris_udf::SmallIntVal";
-const char* CodegenAnyVal::_s_llvm_intval_name       = "struct.doris_udf::IntVal";
-const char* CodegenAnyVal::_s_llvm_bigintval_name    = "struct.doris_udf::BigIntVal";
-const char* CodegenAnyVal::_s_llvm_largeintval_name  = "struct.doris_udf::LargeIntVal";
-const char* CodegenAnyVal::_s_llvm_floatval_name     = "struct.doris_udf::FloatVal";
-const char* CodegenAnyVal::_s_llvm_doubleval_name    = "struct.doris_udf::DoubleVal";
-const char* CodegenAnyVal::_s_llvm_stringval_name    = "struct.doris_udf::StringVal";
-const char* CodegenAnyVal::_s_llvm_datetimeval_name = "struct.doris_udf::DateTimeVal";
-const char* CodegenAnyVal::_s_llvm_decimalval_name   = "struct.doris_udf::DecimalVal";
-
-Type* CodegenAnyVal::get_lowered_type(LlvmCodeGen* cg, const TypeDescriptor& type) {
-    switch (type.type) {
-    case TYPE_BOOLEAN: // i16
-        return cg->smallint_type();
-    case TYPE_TINYINT: // i16
-        return cg->smallint_type();
-    case TYPE_SMALLINT: // i32
-        return cg->int_type();
-    case TYPE_INT: // i64
-        return cg->bigint_type();
-    case TYPE_BIGINT: // { i8, i64 }
-        return llvm::StructType::get(cg->tinyint_type(), cg->bigint_type(), NULL);
-    case TYPE_LARGEINT: // %"struct.doris_udf::LargeIntVal" (isn't lowered)
-        // = { {i8}, [15 x i8], i128 }
-        return cg->get_type(_s_llvm_largeintval_name);
-    case TYPE_FLOAT: // i64
-        return cg->bigint_type();
-    case TYPE_DOUBLE: // { i8, double }
-        return llvm::StructType::get(cg->tinyint_type(), cg->double_type(), NULL);
-    case TYPE_VARCHAR: // { i64, i8* }
-    case TYPE_CHAR:
-    case TYPE_HLL:
-        return llvm::StructType::get(cg->bigint_type(), cg->ptr_type(), NULL);
-    case TYPE_DATE:
-    case TYPE_DATETIME: // %"struct.doris_udf::DateTimeVal" (isn't lowered) 
-        // = { {i8}, i64, i32 }
-        return cg->get_type(_s_llvm_datetimeval_name);
-    case TYPE_DECIMAL: // %"struct.doris_udf::DecimalVal" (isn't lowered)
-        // = { {i8}, i8, i8, i8, [9 x i32] }
-        return cg->get_type(_s_llvm_decimalval_name);
-    default:
-        DCHECK(false) << "Unsupported type: " << type;
-        return NULL;
-    }
-}
-
-Type* CodegenAnyVal::get_lowered_ptr_type(LlvmCodeGen* cg, const TypeDescriptor& type) {
-    return get_lowered_type(cg, type)->getPointerTo();
-}
-
-Type* CodegenAnyVal::get_unlowered_type(LlvmCodeGen* cg, const TypeDescriptor& type) {
-    Type* result = NULL;
-    switch (type.type) {
-    case TYPE_BOOLEAN:
-        result = cg->get_type(_s_llvm_booleanval_name);
-        break;
-    case TYPE_TINYINT:
-        result = cg->get_type(_s_llvm_tinyintval_name);
-        break;
-    case TYPE_SMALLINT:
-        result = cg->get_type(_s_llvm_smallintval_name);
-        break;
-    case TYPE_INT:
-        result = cg->get_type(_s_llvm_intval_name);
-        break;
-    case TYPE_BIGINT:
-        result = cg->get_type(_s_llvm_bigintval_name);
-        break;
-    case TYPE_LARGEINT:
-        result = cg->get_type(_s_llvm_largeintval_name);
-        break;
-    case TYPE_FLOAT:
-        result = cg->get_type(_s_llvm_floatval_name);
-        break;
-    case TYPE_DOUBLE:
-        result = cg->get_type(_s_llvm_doubleval_name);
-        break;
-    case TYPE_CHAR:
-    case TYPE_VARCHAR:
-    case TYPE_HLL:
-        result = cg->get_type(_s_llvm_stringval_name);
-        break;
-    case TYPE_DATE:
-    case TYPE_DATETIME:
-        result = cg->get_type(_s_llvm_datetimeval_name);
-        break;
-    case TYPE_DECIMAL:
-        result = cg->get_type(_s_llvm_decimalval_name);
-        break;
-    default:
-        DCHECK(false) << "Unsupported type: " << type;
-        return NULL;
-    }
-    DCHECK(result != NULL) << type;
-    return result;
-}
-
-Type* CodegenAnyVal::get_unlowered_ptr_type(LlvmCodeGen* cg, const TypeDescriptor& type) {
-    return get_unlowered_type(cg, type)->getPointerTo();
-}
-
-Value* CodegenAnyVal::create_call(
-        LlvmCodeGen* cg, LlvmCodeGen::LlvmBuilder* builder, llvm::Function* fn,
-        llvm::ArrayRef<Value*> args, const char* name, Value* result_ptr) {
-    if (fn->getReturnType()->isVoidTy()) {
-        // Void return type indicates that this function returns a DecimalVal via the first
-        // argument (which should be a DecimalVal*).
-        llvm::Function::arg_iterator ret_arg = fn->arg_begin();
-        DCHECK(ret_arg->getType()->isPointerTy());
-        Type* ret_type = ret_arg->getType()->getPointerElementType();
-
-        // We need to pass a DecimalVal pointer to 'fn' that will be populated with the result
-        // value. Use 'result_ptr' if specified, otherwise alloca one.
-        Value* ret_ptr = (result_ptr == NULL) ?
-            cg->create_entry_block_alloca(*builder, ret_type, name) : result_ptr;
-        std::vector<Value*> new_args = args.vec();
-        new_args.insert(new_args.begin(), ret_ptr);
-        builder->CreateCall(fn, new_args);
-
-        // If 'result_ptr' was specified, we're done. Otherwise load and return the result.
-        if (result_ptr != NULL) {
-            return NULL;
-        }
-        return builder->CreateLoad(ret_ptr, name);
-    } else {
-        // Function returns *Val normally (note that it could still be returning a DecimalVal,
-        // since we generate non-complaint functions)
-        Value* ret = builder->CreateCall(fn, args, name);
-        if (result_ptr == NULL) {
-            return ret;
-        }
-        builder->CreateStore(ret, result_ptr);
-        return NULL;
-    }
-}
-
-CodegenAnyVal CodegenAnyVal::create_call_wrapped(
-        LlvmCodeGen* cg, LlvmCodeGen::LlvmBuilder* builder, const TypeDescriptor& type,
-        llvm::Function* fn, llvm::ArrayRef<Value*> args, 
-        const char* name, Value* result_ptr) {
-    Value* v = create_call(cg, builder, fn, args, name, result_ptr);
-    return CodegenAnyVal(cg, builder, type, v, name);
-}
-
-CodegenAnyVal::CodegenAnyVal(
-        LlvmCodeGen* codegen, LlvmCodeGen::LlvmBuilder* builder,
-        const TypeDescriptor& type, Value* value, const char* name) : 
-            _type(type),
-            _value(value),
-            _name(name),
-            _codegen(codegen),
-            _builder(builder) {
-    Type* value_type = get_lowered_type(codegen, type);
-    if (_value == NULL) {
-        // No Value* was specified, so allocate one on the stack and load it.
-        Value* ptr = _codegen->create_entry_block_alloca(*builder, value_type, "");
-        _value = _builder->CreateLoad(ptr, _name);
-    }
-    DCHECK_EQ(_value->getType(), value_type);
-}
-
-Value* CodegenAnyVal::get_is_null(const char* name) {
-    switch (_type.type) {
-    case TYPE_BIGINT:
-    case TYPE_DOUBLE: {
-        // Lowered type is of form { i8, * }. Get the i8 value.
-        Value* is_null_i8 = _builder->CreateExtractValue(_value, 0);
-        DCHECK(is_null_i8->getType() == _codegen->tinyint_type());
-        return _builder->CreateTrunc(is_null_i8, _codegen->boolean_type(), name);
-    }
-    case TYPE_DATE:
-    case TYPE_DATETIME:
-    case TYPE_LARGEINT:
-    case TYPE_DECIMAL: {
-        // Lowered type is of the form { {i8}, ... }
-        uint32_t idxs[] = {0, 0};
-        Value* is_null_i8 = _builder->CreateExtractValue(_value, idxs);
-        DCHECK(is_null_i8->getType() == _codegen->tinyint_type());
-        return _builder->CreateTrunc(is_null_i8, _codegen->boolean_type(), name);
-    }
-    case TYPE_VARCHAR:
-    case TYPE_HLL:
-    case TYPE_CHAR: {
-        // Lowered type is of form { i64, *}. Get the first byte of the i64 value.
-        Value* v = _builder->CreateExtractValue(_value, 0);
-        DCHECK(v->getType() == _codegen->bigint_type());
-        return _builder->CreateTrunc(v, _codegen->boolean_type(), name);
-    }
-    case TYPE_BOOLEAN:
-    case TYPE_TINYINT:
-    case TYPE_SMALLINT:
-    case TYPE_INT:
-    case TYPE_FLOAT:
-        // Lowered type is an integer. Get the first byte.
-        return _builder->CreateTrunc(_value, _codegen->boolean_type(), name);
-    default:
-        DCHECK(false);
-        return NULL;
-    }
-}
-
-void CodegenAnyVal::set_is_null(Value* is_null) {
-    switch (_type.type) {
-    case TYPE_BIGINT:
-    case TYPE_DOUBLE: {
-        // Lowered type is of form { i8, * }. Set the i8 value to 'is_null'.
-        Value* is_null_ext =
-            _builder->CreateZExt(is_null, _codegen->tinyint_type(), "is_null_ext");
-        _value = _builder->CreateInsertValue(_value, is_null_ext, 0, _name);
-        break;
-    }
-    case TYPE_DATE:
-    case TYPE_DATETIME:
-    case TYPE_LARGEINT:
-    case TYPE_DECIMAL: {
-        // Lowered type is of form { {i8}, [15 x i8], {i128} }. Set the i8 value to
-        // 'is_null'.
-        Value* is_null_ext =
-            _builder->CreateZExt(is_null, _codegen->tinyint_type(), "is_null_ext");
-        // Index into the {i8} struct as well as the outer struct.
-        uint32_t idxs[] = {0, 0};
-        _value = _builder->CreateInsertValue(_value, is_null_ext, idxs, _name);
-        break;
-    }
-    case TYPE_VARCHAR:
-    case TYPE_HLL:
-    case TYPE_CHAR: {
-        // Lowered type is of the form { i64, * }. Set the first byte of the i64 value to
-        // 'is_null'
-        Value* v = _builder->CreateExtractValue(_value, 0);
-        v = _builder->CreateAnd(v, -0x100LL, "masked");
-        Value* is_null_ext = _builder->CreateZExt(is_null, v->getType(), "is_null_ext");
-        v = _builder->CreateOr(v, is_null_ext);
-        _value = _builder->CreateInsertValue(_value, v, 0, _name);
-        break;
-    }
-    case TYPE_BOOLEAN:
-    case TYPE_TINYINT:
-    case TYPE_SMALLINT:
-    case TYPE_INT:
-    case TYPE_FLOAT: {
-        // Lowered type is an integer. Set the first byte to 'is_null'.
-        _value = _builder->CreateAnd(_value, -0x100LL, "masked");
-        Value* is_null_ext = _builder->CreateZExt(is_null, _value->getType(), "is_null_ext");
-        _value = _builder->CreateOr(_value, is_null_ext, _name);
-        break;
-    }
-    default:
-        DCHECK(false) << "NYI: " << _type.debug_string();
-    }
-}
-
-Value* CodegenAnyVal::get_val(const char* name) {
-    DCHECK(_type.type != TYPE_VARCHAR) << "Use get_ptr and get_len for Varchar";
-    DCHECK(_type.type != TYPE_HLL) << "Use get_ptr and get_len for Hll";
-    DCHECK(_type.type != TYPE_CHAR) << "Use get_ptr and get_len for Char";
-    switch (_type.type) {
-    case TYPE_BOOLEAN:
-    case TYPE_TINYINT:
-    case TYPE_SMALLINT:
-    case TYPE_INT: {
-        // Lowered type is an integer. Get the high bytes.
-        int num_bits = _type.get_byte_size() * 8;
-        Value* val = get_high_bits(num_bits, _value, name);
-        if (_type.type == TYPE_BOOLEAN) {
-            // Return booleans as i1 (vs. i8)
-            val = _builder->CreateTrunc(val, _builder->getInt1Ty(), name);
-        }
-        return val;
-    }
-    case TYPE_FLOAT: {
-        // Same as above, but we must cast the value to a float.
-        Value* val = get_high_bits(32, _value);
-        return _builder->CreateBitCast(val, _codegen->float_type());
-    }
-    case TYPE_BIGINT:
-    case TYPE_DOUBLE:
-        // Lowered type is of form { i8, * }. Get the second value.
-        return _builder->CreateExtractValue(_value, 1, name);
-    case TYPE_LARGEINT:
-        // Lowered type is of form { i8, [], * }. Get the third value.
-        return _builder->CreateExtractValue(_value, 2, name);
-    case TYPE_DATE:
-    case TYPE_DATETIME:
-        /// TYPE_DATETIME/DateTimeVal: { {i8}, i64, i32 } Not Lowered
-        return _builder->CreateExtractValue(_value, 1, name);
-    default:
-        DCHECK(false) << "Unsupported type: " << _type;
-        return NULL;
-    }
-}
-
-void CodegenAnyVal::set_val(Value* val) {
-    DCHECK(_type.type != TYPE_VARCHAR) << "Use set_ptr and set_len for StringVals";
-    DCHECK(_type.type != TYPE_HLL) << "Use set_ptr and set_len for StringVals";
-    DCHECK(_type.type != TYPE_CHAR) << "Use set_ptr and set_len for StringVals";
-    switch (_type.type) {
-    case TYPE_BOOLEAN:
-    case TYPE_TINYINT:
-    case TYPE_SMALLINT:
-    case TYPE_INT: {
-        // Lowered type is an integer. Set the high bytes to 'val'.
-        int num_bits = _type.get_byte_size() * 8;
-        _value = set_high_bits(num_bits, val, _value, _name);
-        break;
-    }
-    case TYPE_FLOAT:
-        // Same as above, but we must cast 'val' to an integer type.
-        val = _builder->CreateBitCast(val, _codegen->int_type());
-        _value = set_high_bits(32, val, _value, _name);
-        break;
-    case TYPE_BIGINT:
-    case TYPE_DOUBLE:
-        // Lowered type is of form { i8, * }. Set the second value to 'val'.
-        _value = _builder->CreateInsertValue(_value, val, 1, _name);
-        break;
-    case TYPE_LARGEINT:
-        // Lowered type is of form { i8, [], * }. Set the third value to 'val'.
-        _value = _builder->CreateInsertValue(_value, val, 2, _name);
-        break;
-    case TYPE_DATE:
-    case TYPE_DATETIME:
-        /// TYPE_DATETIME/DateTimeVal: { {i8}, i64, i32 } Not Lowered
-        _value = _builder->CreateInsertValue(_value, val, 1, _name);
-        break;
-    default:
-        DCHECK(false) << "Unsupported type: " << _type;
-    }
-}
-
-void CodegenAnyVal::set_val(bool val) {
-    DCHECK_EQ(_type.type, TYPE_BOOLEAN);
-    set_val(_builder->getInt1(val));
-}
-
-void CodegenAnyVal::set_val(int8_t val) {
-    DCHECK_EQ(_type.type, TYPE_TINYINT);
-    set_val(_builder->getInt8(val));
-}
-
-void CodegenAnyVal::set_val(int16_t val) {
-    DCHECK_EQ(_type.type, TYPE_SMALLINT);
-    set_val(_builder->getInt16(val));
-}
-
-void CodegenAnyVal::set_val(int32_t val) {
-    DCHECK(_type.type == TYPE_INT);
-    set_val(_builder->getInt32(val));
-}
-
-void CodegenAnyVal::set_val(int64_t val) {
-    DCHECK(_type.type == TYPE_BIGINT);
-    set_val(_builder->getInt64(val));
-}
-
-void CodegenAnyVal::set_val(__int128 val) {
-    DCHECK_EQ(_type.type, TYPE_LARGEINT);
-    // TODO: is there a better way to do this?
-    // Set high bits
-    Value* ir_val = llvm::ConstantInt::get(_codegen->i128_type(), high_bits(val));
-    ir_val = _builder->CreateShl(ir_val, 64, "tmp");
-    // Set low bits
-    ir_val = _builder->CreateOr(ir_val, low_bits(val), "tmp");
-    set_val(ir_val);
-}
-
-void CodegenAnyVal::set_val(float val) {
-    DCHECK_EQ(_type.type, TYPE_FLOAT);
-    set_val(llvm::ConstantFP::get(_builder->getFloatTy(), val));
-}
-
-void CodegenAnyVal::set_val(double val) {
-    DCHECK_EQ(_type.type, TYPE_DOUBLE);
-    set_val(llvm::ConstantFP::get(_builder->getDoubleTy(), val));
-}
-
-Value* CodegenAnyVal::get_ptr() {
-    // Set the second pointer value to 'ptr'.
-    DCHECK(_type.is_string_type());
-    return _builder->CreateExtractValue(_value, 1, _name);
-}
-
-Value* CodegenAnyVal::get_len() {
-    // Get the high bytes of the first value.
-    DCHECK(_type.is_string_type());
-    Value* v = _builder->CreateExtractValue(_value, 0);
-    return get_high_bits(32, v);
-}
-
-void CodegenAnyVal::set_ptr(Value* ptr) {
-    // Set the second pointer value to 'ptr'.
-    DCHECK(_type.is_string_type());
-    _value = _builder->CreateInsertValue(_value, ptr, 1, _name);
-}
-
-void CodegenAnyVal::set_len(Value* len) {
-    // Set the high bytes of the first value to 'len'.
-    DCHECK(_type.is_string_type());
-    Value* v = _builder->CreateExtractValue(_value, 0);
-    v = set_high_bits(32, len, v);
-    _value = _builder->CreateInsertValue(_value, v, 0, _name);
-}
-
-Value* CodegenAnyVal::get_unlowered_ptr() {
-    Value* value_ptr = _codegen->create_entry_block_alloca(*_builder, _value->getType(), "");
-    _builder->CreateStore(_value, value_ptr);
-    return _builder->CreateBitCast(value_ptr, get_unlowered_ptr_type(_codegen, _type));
-}
-
-void CodegenAnyVal::set_from_raw_ptr(Value* raw_ptr) {
-    Value* val_ptr =
-        _builder->CreateBitCast(raw_ptr, _codegen->get_ptr_type(_type), "val_ptr");
-    Value* val = _builder->CreateLoad(val_ptr);
-    set_from_raw_value(val);
-}
-
-void CodegenAnyVal::set_from_raw_value(Value* raw_val) {
-    DCHECK_EQ(raw_val->getType(), _codegen->get_type(_type))
-        << std::endl << LlvmCodeGen::print(raw_val)
-        << std::endl << _type << " => " << LlvmCodeGen::print(_codegen->get_type(_type));
-    switch (_type.type) {
-    case TYPE_VARCHAR:
-    case TYPE_CHAR: 
-    case TYPE_HLL: {
-        // Convert StringValue to StringVal
-        set_ptr(_builder->CreateExtractValue(raw_val, 0, "ptr"));
-        set_len(_builder->CreateExtractValue(raw_val, 1, "len"));
-        break;
-    }
-    case TYPE_DATE:
-    case TYPE_DATETIME: {
-        Function* fn = _codegen->get_function(IRFunction::TO_DATETIME_VAL);
-        Value* val_ptr = _builder->CreateAlloca(get_lowered_type(_codegen, _type), 0, "val_ptr");
-        _builder->CreateCall2(fn, _codegen->get_ptr_to(_builder, raw_val), val_ptr);
-        _value = _builder->CreateLoad(val_ptr);
-        break;
-    }
-    case TYPE_DECIMAL: {
-        Function* fn = _codegen->get_function(IRFunction::TO_DECIMAL_VAL);
-        Value* val_ptr = _builder->CreateAlloca(get_lowered_type(_codegen, _type), 0, "val_ptr");
-        _builder->CreateCall2(fn, _codegen->get_ptr_to(_builder, raw_val), val_ptr);
-        _value = _builder->CreateLoad(val_ptr);
-        break;
-    }
-    case TYPE_BOOLEAN:
-    case TYPE_TINYINT:
-    case TYPE_SMALLINT:
-    case TYPE_INT:
-    case TYPE_BIGINT:
-    case TYPE_LARGEINT:
-    case TYPE_FLOAT:
-    case TYPE_DOUBLE:
-        // raw_val is a native type
-        set_val(raw_val);
-        break;
-    default:
-        DCHECK(false) << "NYI: " << _type;
-        break;
-    }
-}
-
-Value* CodegenAnyVal::to_native_value() {
-    Type* raw_type = _codegen->get_type(_type);
-    Value* raw_val = llvm::Constant::getNullValue(raw_type);
-    switch (_type.type) {
-    case TYPE_CHAR:
-    case TYPE_VARCHAR: 
-    case TYPE_HLL: {
-        // Convert StringVal to StringValue
-        raw_val = _builder->CreateInsertValue(raw_val, get_ptr(), 0);
-        raw_val = _builder->CreateInsertValue(raw_val, get_len(), 1);
-        break;
-    }
-    case TYPE_BOOLEAN:
-    case TYPE_TINYINT:
-    case TYPE_SMALLINT:
-    case TYPE_INT:
-    case TYPE_BIGINT:
-    case TYPE_LARGEINT:
-    case TYPE_FLOAT:
-    case TYPE_DOUBLE:
-        // raw_val is a native type
-        raw_val = get_val();
-        break;
-    case TYPE_DATE:
-    case TYPE_DATETIME: {
-        Function* func = _codegen->get_function(IRFunction::FROM_DATETIME_VAL);
-        Value* raw_val_ptr = _codegen->create_entry_block_alloca(
-            *_builder, _codegen->get_type(TYPE_DECIMAL), "raw_val_ptr");
-        _builder->CreateCall2(func, raw_val_ptr, _value);
-        raw_val = _builder->CreateLoad(raw_val_ptr, "result");
-        break;
-    }
-    case TYPE_DECIMAL: {
-        Function* func = _codegen->get_function(IRFunction::FROM_DECIMAL_VAL);
-        Value* raw_val_ptr = _codegen->create_entry_block_alloca(
-            *_builder, _codegen->get_type(TYPE_DECIMAL), "raw_val_ptr");
-        _builder->CreateCall2(func, raw_val_ptr, _value);
-        raw_val = _builder->CreateLoad(raw_val_ptr, "result");
-        break;
-    }
-    default:
-        DCHECK(false) << "NYI: " << _type;
-        break;
-    }
-    return raw_val;
-}
-
-Value* CodegenAnyVal::to_native_ptr(Value* native_ptr) {
-    Value* v = to_native_value();
-    if (native_ptr == NULL) {
-        native_ptr = _codegen->create_entry_block_alloca(*_builder, v->getType(), "");
-    }
-    _builder->CreateStore(v, native_ptr);
-    return native_ptr;
-}
-
-Value* CodegenAnyVal::eq(CodegenAnyVal* other) {
-    DCHECK_EQ(_type, other->_type);
-    switch (_type.type) {
-    case TYPE_BOOLEAN:
-    case TYPE_TINYINT:
-    case TYPE_SMALLINT:
-    case TYPE_INT:
-    case TYPE_BIGINT:
-    case TYPE_LARGEINT:
-        return _builder->CreateICmpEQ(get_val(), other->get_val(), "eq");
-    case TYPE_FLOAT:
-    case TYPE_DOUBLE:
-        return _builder->CreateFCmpUEQ(get_val(), other->get_val(), "eq");
-    case TYPE_CHAR:
-    case TYPE_VARCHAR:
-    case TYPE_HLL: {
-        llvm::Function* eq_fn = _codegen->get_function(IRFunction::CODEGEN_ANYVAL_STRING_VAL_EQ);
-        return _builder->CreateCall2(
-            eq_fn, get_unlowered_ptr(), other->get_unlowered_ptr(), "eq");
-    }
-    case TYPE_DATE:
-    case TYPE_DATETIME: {
-        llvm::Function* eq_fn = _codegen->get_function(
-            IRFunction::CODEGEN_ANYVAL_DATETIME_VAL_EQ);
-        return _builder->CreateCall2(
-            eq_fn, get_unlowered_ptr(), other->get_unlowered_ptr(), "eq");
-    }
-    case TYPE_DECIMAL: {
-        llvm::Function* eq_fn = _codegen->get_function(IRFunction::CODEGEN_ANYVAL_DECIMAL_VAL_EQ);
-        return _builder->CreateCall2(
-            eq_fn, get_unlowered_ptr(), other->get_unlowered_ptr(), "eq");
-    }
-    default:
-        DCHECK(false) << "NYI: " << _type;
-        return NULL;
-    }
-}
-
-Value* CodegenAnyVal::eq_to_native_ptr(Value* native_ptr) {
-    Value* val = NULL;
-    if (!_type.is_string_type()) {
-        val = _builder->CreateLoad(native_ptr);
-    }
-    switch (_type.type) {
-    case TYPE_NULL:
-        return _codegen->false_value();
-    case TYPE_BOOLEAN:
-    case TYPE_TINYINT:
-    case TYPE_SMALLINT:
-    case TYPE_INT:
-    case TYPE_BIGINT:
-    case TYPE_LARGEINT:
-        return _builder->CreateICmpEQ(get_val(), val, "cmp_raw");
-    case TYPE_FLOAT:
-    case TYPE_DOUBLE:
-        return _builder->CreateFCmpUEQ(get_val(), val, "cmp_raw");
-    case TYPE_CHAR:
-    case TYPE_VARCHAR: 
-    case TYPE_HLL: {
-        llvm::Function* eq_fn = _codegen->get_function(
-            IRFunction::CODEGEN_ANYVAL_STRING_VALUE_EQ);
-        return _builder->CreateCall2(eq_fn, get_unlowered_ptr(), native_ptr, "cmp_raw");
-    }
-    case TYPE_DATE:
-    case TYPE_DATETIME: {
-        llvm::Function* eq_fn = _codegen->get_function(
-            IRFunction::CODEGEN_ANYVAL_DATETIME_VALUE_EQ);
-        return _builder->CreateCall2(eq_fn, get_unlowered_ptr(), native_ptr, "cmp_raw");
-    }
-    case TYPE_DECIMAL: {
-        llvm::Function* eq_fn = _codegen->get_function(
-            IRFunction::CODEGEN_ANYVAL_DECIMAL_VALUE_EQ);
-        return _builder->CreateCall2(eq_fn, get_unlowered_ptr(), native_ptr, "cmp_raw");
-    }
-
-    default:
-        DCHECK(false) << "NYI: " << _type;
-        return NULL;
-    }
-}
-
-Value* CodegenAnyVal::compare(CodegenAnyVal* other, const char* name) {
-    DCHECK_EQ(_type, other->_type);
-    Value* v1 = to_native_ptr();
-    Value* void_v1 = _builder->CreateBitCast(v1, _codegen->ptr_type());
-    Value* v2 = other->to_native_ptr();
-    Value* void_v2 = _builder->CreateBitCast(v2, _codegen->ptr_type());
-    Value* type_ptr = _codegen->get_ptr_to(_builder, _type.to_ir(_codegen), "type");
-    llvm::Function* compare_fn = _codegen->get_function(IRFunction::RAW_VALUE_COMPARE);
-    Value* args[] = { void_v1, void_v2, type_ptr };
-    return _builder->CreateCall(compare_fn, args, name);
-}
-
-Value* CodegenAnyVal::get_high_bits(int num_bits, Value* v, const char* name) {
-    DCHECK_EQ(v->getType()->getIntegerBitWidth(), num_bits * 2);
-    Value* shifted = _builder->CreateAShr(v, num_bits);
-    return _builder->CreateTrunc(
-        shifted, llvm::IntegerType::get(_codegen->context(), num_bits));
-}
-
-// Example output: (num_bits = 8)
-// %1 = zext i1 %src to i16
-// %2 = shl i16 %1, 8
-// %3 = and i16 %dst1 255 ; clear the top half of dst
-// %dst2 = or i16 %3, %2  ; set the top of half of dst to src
-Value* CodegenAnyVal::set_high_bits(
-        int num_bits, Value* src, Value* dst, const char* name) {
-    DCHECK_LE(src->getType()->getIntegerBitWidth(), num_bits);
-    DCHECK_EQ(dst->getType()->getIntegerBitWidth(), num_bits * 2);
-    Value* extended_src = _builder->CreateZExt(
-        src, llvm::IntegerType::get(_codegen->context(), num_bits * 2));
-    Value* shifted_src = _builder->CreateShl(extended_src, num_bits);
-    Value* masked_dst = _builder->CreateAnd(dst, (1LL << num_bits) - 1);
-    return _builder->CreateOr(masked_dst, shifted_src, name);
-}
-
-Value* CodegenAnyVal::get_null_val(LlvmCodeGen* codegen, const TypeDescriptor& type) {
-    Type* val_type = get_lowered_type(codegen, type);
-    return get_null_val(codegen, val_type);
-}
-
-Value* CodegenAnyVal::get_null_val(LlvmCodeGen* codegen, Type* val_type) {
-    if (val_type->isStructTy()) {
-        llvm::StructType* struct_type = llvm::cast<llvm::StructType>(val_type);
-        std::vector<Constant*> elements;
-        if (struct_type->getElementType(0)->isStructTy()) {
-            // Return the struct { {1}, 0, 0 } (the 'is_null' byte, i.e. the first value's first
-            // byte, is set to 1, the other bytes don't matter)
-            llvm::StructType* anyval_struct_type = llvm::cast<llvm::StructType>(
-                struct_type->getElementType(0));
-            Type* is_null_type = anyval_struct_type->getElementType(0);
-            llvm::Constant* null_anyval = llvm::ConstantStruct::get(
-                anyval_struct_type, llvm::ConstantInt::get(is_null_type, 1));
-            elements.push_back(null_anyval);
-        } else {
-            Type* type1 = struct_type->getElementType(0);
-            elements.push_back(llvm::ConstantInt::get(type1, 1));
-        }
-        for (int i = 1; i < struct_type->getNumElements(); ++i) {
-            Type* ele_type = struct_type->getElementType(i);
-            elements.push_back(llvm::Constant::getNullValue(ele_type));
-        }
-        return llvm::ConstantStruct::get(struct_type, elements);
-    }
-    // Return the int 1 ('is_null' byte is 1, other bytes don't matter)
-    DCHECK(val_type->isIntegerTy());
-    return llvm::ConstantInt::get(val_type, 1);
-}
-
-CodegenAnyVal CodegenAnyVal::get_non_null_val(
-        LlvmCodeGen* codegen, LlvmCodeGen::LlvmBuilder* builder,
-        const TypeDescriptor& type, const char* name) {
-    Type* val_type = get_lowered_type(codegen, type);
-    // All zeros => 'is_null' = false
-    Value* value = llvm::Constant::getNullValue(val_type);
-    return CodegenAnyVal(codegen, builder, type, value, name);
-}
-
-}
-
diff --git a/be/src/codegen/codegen_anyval.h b/be/src/codegen/codegen_anyval.h
deleted file mode 100644
index 5dd5d78b6d..0000000000
--- a/be/src/codegen/codegen_anyval.h
+++ /dev/null
@@ -1,279 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef IMPALA_CODEGEN_CODEGEN_ANYVAL_H
-#define IMPALA_CODEGEN_CODEGEN_ANYVAL_H
-
-#include "codegen/llvm_codegen.h"
-
-namespace llvm {
-class Type;
-class Value;
-}
-
-namespace doris {
-
-/// Class for handling AnyVal subclasses during codegen. Codegen functions should use this
-/// wrapper instead of creating or manipulating *Val values directly in most cases. This is
-/// because the struct types must be lowered to integer types in many cases in order to
-/// conform to the standard calling convention (e.g., { i8, i32 } => i64). This class wraps
-/// the lowered types for each *Val struct.
-//
-/// This class conceptually represents a single *Val that is mutated, but operates by
-/// generating IR instructions involving _value (each of which generates a new Value*,
-/// since IR uses SSA), and then setting _value to the most recent Value* generated. The
-/// generated instructions perform the integer manipulation equivalent to setting the
-/// fields of the original struct type.
-//
-/// Lowered types:
-/// TYPE_BOOLEAN/BooleanVal: i16
-/// TYPE_TINYINT/TinyIntVal: i16
-/// TYPE_SMALLINT/SmallIntVal: i32
-/// TYPE_INT/INTVal: i64
-/// TYPE_BIGINT/BigIntVal: { i8, i64 }
-/// TYPE_LARGEINT/LargeIntVal: { {i8}, [15 x i8], i128 } Not Lowered
-/// TYPE_FLOAT/FloatVal: i64
-/// TYPE_DOUBLE/DoubleVal: { i8, double }
-/// TYPE_STRING/StringVal: { i64, i8* }
-/// TYPE_DATETIME/DateTimeVal: { {i8}, i64, i32 } Not Lowered
-/// TYPE_DECIMAL/DecimalVal: { {i8}, i8, i8, i8, [9 x i32] } Not Lowered
-//
-/// TODO:
-/// - unit tests
-class CodegenAnyVal {
-public:
-    static const char* _s_llvm_booleanval_name;
-    static const char* _s_llvm_tinyintval_name;
-    static const char* _s_llvm_smallintval_name;
-    static const char* _s_llvm_intval_name;
-    static const char* _s_llvm_bigintval_name;
-    static const char* _s_llvm_largeintval_name;
-    static const char* _s_llvm_floatval_name;
-    static const char* _s_llvm_doubleval_name;
-    static const char* _s_llvm_stringval_name;
-    static const char* _s_llvm_datetimeval_name;
-    static const char* _s_llvm_decimalval_name;
-
-    /// Creates a call to 'fn', which should return a (lowered) *Val, and returns the result.
-    /// This abstracts over the x64 calling convention, in particular for functions returning
-    /// a DecimalVal, which pass the return value as an output argument.
-    //
-    /// If 'result_ptr' is non-NULL, it should be a pointer to the lowered return type of
-    /// 'fn' (e.g. if 'fn' returns a BooleanVal, 'result_ptr' should have type i16*). The
-    /// result of calling 'fn' will be stored in 'result_ptr' and this function will return
-    /// NULL. If 'result_ptr' is NULL, this function will return the result (note that the
-    /// result will not be a pointer in this case).
-    //
-    /// 'name' optionally specifies the name of the returned value.
-    static llvm::Value* create_call(
-            LlvmCodeGen* cg, LlvmCodeGen::LlvmBuilder* builder,
-            llvm::Function* fn, llvm::ArrayRef<llvm::Value*> args, 
-            const char* name,
-            llvm::Value* result_ptr);
-
-    static llvm::Value* create_call(
-            LlvmCodeGen* cg, LlvmCodeGen::LlvmBuilder* builder,
-            llvm::Function* fn, llvm::ArrayRef<llvm::Value*> args, 
-            const char* name) {
-        return create_call(cg, builder, fn, args, name, NULL);
-    }
-
-    /// Same as above but wraps the result in a CodegenAnyVal.
-    static CodegenAnyVal create_call_wrapped(LlvmCodeGen* cg,
-            LlvmCodeGen::LlvmBuilder* builder, const TypeDescriptor& type, llvm::Function* fn,
-            llvm::ArrayRef<llvm::Value*> args, const char* name,
-            llvm::Value* result_ptr);
-
-    /// Same as above but wraps the result in a CodegenAnyVal.
-    static CodegenAnyVal create_call_wrapped(LlvmCodeGen* cg,
-            LlvmCodeGen::LlvmBuilder* builder, const TypeDescriptor& type, llvm::Function* fn,
-            llvm::ArrayRef<llvm::Value*> args, const char* name) {
-        return create_call_wrapped(cg, builder, type, fn, args, name, NULL);
-    }
-
-    /// Returns the lowered AnyVal type associated with 'type'.
-    /// E.g.: TYPE_BOOLEAN (which corresponds to a BooleanVal) => i16
-    static llvm::Type* get_lowered_type(LlvmCodeGen* cg, const TypeDescriptor& type);
-
-    /// Returns the lowered AnyVal pointer type associated with 'type'.
-    /// E.g.: TYPE_BOOLEAN => i16*
-    static llvm::Type* get_lowered_ptr_type(LlvmCodeGen* cg, const TypeDescriptor& type);
-
-    /// Returns the unlowered AnyVal type associated with 'type'.
-    /// E.g.: TYPE_BOOLEAN => %"struct.impala_udf::BooleanVal"
-    static llvm::Type* get_unlowered_type(LlvmCodeGen* cg, const TypeDescriptor& type);
-
-    /// Returns the unlowered AnyVal pointer type associated with 'type'.
-    /// E.g.: TYPE_BOOLEAN => %"struct.impala_udf::BooleanVal"*
-    static llvm::Type* get_unlowered_ptr_type(LlvmCodeGen* cg, const TypeDescriptor& type);
-
-    /// Return the constant type-lowered value corresponding to a null *Val.
-    /// E.g.: passing TYPE_DOUBLE (corresponding to the lowered DoubleVal { i8, double })
-    /// returns the constant struct { 1, 0.0 }
-    static llvm::Value* get_null_val(LlvmCodeGen* codegen, const TypeDescriptor& type);
-
-    /// Return the constant type-lowered value corresponding to a null *Val.
-    /// 'val_type' must be a lowered type (i.e. one of the types returned by GetType)
-    static llvm::Value* get_null_val(LlvmCodeGen* codegen, llvm::Type* val_type);
-
-    /// Return the constant type-lowered value corresponding to a non-null *Val.
-    /// E.g.: TYPE_DOUBLE (lowered DoubleVal: { i8, double }) => { 0, 0 }
-    /// This returns a CodegenAnyVal, rather than the unwrapped Value*, because the actual
-    /// value still needs to be set.
-    static CodegenAnyVal get_non_null_val(
-        LlvmCodeGen* codegen, LlvmCodeGen::LlvmBuilder* builder, 
-        const TypeDescriptor& type, const char* name);
-
-    static CodegenAnyVal get_non_null_val(
-            LlvmCodeGen* codegen, LlvmCodeGen::LlvmBuilder* builder, 
-            const TypeDescriptor& type) {
-        return get_non_null_val(codegen, builder, type, "");
-    }
-
-    /// Creates a wrapper around a lowered *Val value.
-    //
-    /// Instructions for manipulating the value are generated using 'builder'. The insert
-    /// point of 'builder' is not modified by this class, and it is safe to call
-    /// 'builder'.SetInsertPoint() after passing 'builder' to this class.
-    //
-    /// 'type' identified the type of wrapped value (e.g., TYPE_INT corresponds to IntVal,
-    /// which is lowered to i64).
-    //
-    /// If 'value' is NULL, a new value of the lowered type is alloca'd. Otherwise 'value'
-    /// must be of the correct lowered type.
-    //
-    /// If 'name' is specified, it will be used when generated instructions that set value.
-    CodegenAnyVal(LlvmCodeGen* codegen, LlvmCodeGen::LlvmBuilder* builder,
-                  const TypeDescriptor& type, llvm::Value* value = NULL, const char* name = "");
-
-    ~CodegenAnyVal() { }
-
-    /// Returns the current type-lowered value.
-    llvm::Value* value() { return _value; }
-
-    /// Gets the 'is_null' field of the *Val.
-    llvm::Value* get_is_null(const char* name = "is_null");
-
-    /// Get the 'val' field of the *Val. Do not call if this represents a StringVal or
-    /// TimestampVal. If this represents a DecimalVal, returns 'val4', 'val8', or 'val16'
-    /// depending on the precision of 'type_'.  The returned value will have variable name
-    /// 'name'.
-    llvm::Value* get_val(const char* name = "val");
-
-    /// Sets the 'is_null' field of the *Val.
-    void set_is_null(llvm::Value* is_null);
-
-    /// Sets the 'val' field of the *Val. Do not call if this represents a StringVal or
-    /// TimestampVal.
-    void set_val(llvm::Value* val);
-
-    /// Sets the 'val' field of the *Val. The *Val must correspond to the argument type.
-    void set_val(bool val);
-    void set_val(int8_t val);
-    void set_val(int16_t val);
-    void set_val(int32_t val);
-    void set_val(int64_t val);
-    void set_val(__int128 val);
-    void set_val(float val);
-    void set_val(double val);
-
-    /// Getters for StringVals.
-    llvm::Value* get_ptr();
-    llvm::Value *get_len();
-
-    /// Setters for StringVals.
-    void set_ptr(llvm::Value* ptr);
-    void set_len(llvm::Value* len);
-
-    /// Allocas and stores this value in an unlowered pointer, and returns the pointer. This
-    /// *Val should be non-null.
-    llvm::Value* get_unlowered_ptr();
-
-    /// Set this *Val's value based on 'raw_val'. 'raw_val' should be a native type,
-    /// StringValue, or DateTimeValue.
-    void set_from_raw_value(llvm::Value* raw_val);
-
-    /// Set this *Val's value based on void* 'raw_ptr'. 'raw_ptr' should be a pointer to a
-    /// native type, StringValue, or TimestampValue (i.e. the value returned by an
-    /// interpreted compute fn).
-    void set_from_raw_ptr(llvm::Value* raw_ptr);
-
-    /// Converts this *Val's value to a native type, StringValue, TimestampValue, etc.
-    /// This should only be used if this *Val is not null.
-    llvm::Value* to_native_value();
-
-    /// Sets 'native_ptr' to this *Val's value. If non-NULL, 'native_ptr' should be a
-    /// pointer to a native type, StringValue, TimestampValue, etc. If NULL, a pointer is
-    /// alloca'd. In either case the pointer is returned. This should only be used if this
-    /// *Val is not null.
-    llvm::Value* to_native_ptr(llvm::Value* native_ptr = NULL);
-
-    /// Returns the i1 result of this == other. this and other must be non-null.
-    llvm::Value* eq(CodegenAnyVal* other);
-
-    /// Compares this *Val to the value of 'native_ptr'. 'native_ptr' should be a pointer to
-    /// a native type, StringValue, or TimestampValue. This *Val should match 'native_ptr's
-    /// type (e.g. if this is an IntVal, 'native_ptr' should have type i32*). Returns the i1
-    /// result of the equality comparison.
-    llvm::Value* eq_to_native_ptr(llvm::Value* native_ptr);
-
-    /// Returns the i32 result of comparing this value to 'other' (similar to
-    /// RawValue::Compare()). This and 'other' must be non-null. Return value is < 0 if
-    /// this < 'other', 0 if this == 'other', > 0 if this > 'other'.
-    llvm::Value* compare(CodegenAnyVal* other, const char* name);
-    llvm::Value* compare(CodegenAnyVal* other) { 
-        return compare(other, "result");
-    }
-
-    /// Ctor for created an uninitialized CodegenAnYVal that can be assigned to later.
-    CodegenAnyVal() : 
-        _type(INVALID_TYPE), _value(NULL), _name(NULL), _codegen(NULL), _builder(NULL) { 
-    }
-
-private:
-    TypeDescriptor _type;
-    llvm::Value* _value;
-    const char* _name;
-
-    LlvmCodeGen* _codegen;
-    LlvmCodeGen::LlvmBuilder* _builder;
-
-    /// Helper function for getting the top (most significant) half of 'v'.
-    /// 'v' should have width = 'num_bits' * 2 and be an integer type.
-    llvm::Value* get_high_bits(int num_bits, llvm::Value* v, const char* name);
-
-    llvm::Value* get_high_bits(int num_bits, llvm::Value* v) {
-        return get_high_bits(num_bits, v, "");
-    }
-
-
-    /// Helper function for setting the top (most significant) half of a 'dst' to 'src'.
-    /// 'src' must have width <= 'num_bits' and 'dst' must have width = 'num_bits' * 2.
-    /// Both 'dst' and 'src' should be integer types.
-    llvm::Value* set_high_bits(
-        int num_bits, llvm::Value* src, llvm::Value* dst, const char* name);
-
-    llvm::Value* set_high_bits(
-            int num_bits, llvm::Value* src, llvm::Value* dst) {
-        return set_high_bits(num_bits, src, dst, "");
-    }
-};
-
-}
-
-#endif
-
diff --git a/be/src/codegen/codegen_anyval_ir.cpp b/be/src/codegen/codegen_anyval_ir.cpp
deleted file mode 100644
index fc14fd193f..0000000000
--- a/be/src/codegen/codegen_anyval_ir.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifdef IR_COMPILE
-
-#include "runtime/string_value.hpp"
-#include "runtime/datetime_value.h"
-#include "runtime/decimal_value.h"
-#include "udf/udf.h"
-
-namespace doris {
-// Note: we explicitly pass by reference because passing by value has special ABI rules
-
-// Used by CodegenAnyVal::Eq()
-
-bool string_val_eq(const StringVal& x, const StringVal& y) {
-    return x == y;
-}
-
-bool datetime_val_eq(const DateTimeVal& x, const DateTimeVal& y) {
-    return x == y;
-}
-
-bool decimal_val_eq(const DecimalVal& x, const DecimalVal& y) {
-    return x == y;
-}
-
-// Used by CodegenAnyVal::EqToNativePtr()
-
-bool string_value_eq(const StringVal& x, const StringValue& y) {
-    StringValue sv = StringValue::from_string_val(x);
-    return sv.eq(y);
-}
-
-bool datetime_value_eq(const DateTimeVal& x, const DateTimeValue& y) {
-    DateTimeValue tv = DateTimeValue::from_datetime_val(x);
-    return tv == y;
-}
-
-bool decimal_value_eq(const DecimalVal& x, const DecimalValue& y) {
-    DecimalValue tv = DecimalValue::from_decimal_val(x);
-    return tv == y;
-}
-}
-#else
-#error "This file should only be used for cross compiling to IR."
-#endif
diff --git a/be/src/codegen/doris_ir.cpp b/be/src/codegen/doris_ir.cpp
deleted file mode 100644
index 4c9a369a5b..0000000000
--- a/be/src/codegen/doris_ir.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifdef IR_COMPILE
-struct __float128;
-#include "codegen/codegen_anyval_ir.cpp"
-#include "exec/aggregation_node_ir.cpp"
-#include "exec/hash_join_node_ir.cpp"
-#include "exprs/cast_functions.cpp"
-#include "exprs/conditional_functions_ir.cpp"
-#include "exprs/decimal_operators.cpp"
-#include "exprs/expr_ir.cpp"
-#include "exprs/is_null_predicate.cpp"
-#include "exprs/like_predicate.cpp"
-#include "exprs/math_functions.cpp"
-#include "exprs/operators.cpp"
-#include "exprs/string_functions.cpp"
-#include "exprs/timestamp_functions.cpp"
-#include "exprs/utility_functions.cpp"
-#include "runtime/raw_value_ir.cpp"
-#include "runtime/string_value_ir.cpp"
-#include "udf/udf_ir.cpp"
-#include "util/hash_util_ir.cpp"
-#else
-#error "This file should only be used for cross compiling to IR."
-#endif
-
diff --git a/be/src/codegen/doris_ir_data.h b/be/src/codegen/doris_ir_data.h
deleted file mode 100644
index 2d6292f3fc..0000000000
--- a/be/src/codegen/doris_ir_data.h
+++ /dev/null
@@ -1,30 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef DORIS_BE_SRC_QUERY_CODEGEN_DORIS_IR_DATA_H
-#define DORIS_BE_SRC_QUERY_CODEGEN_DORIS_IR_DATA_H
-
-/// Header with declarations of Impala IR data. Definitions of the arrays are generated
-/// separately.
-
-extern const unsigned char doris_sse_llvm_ir[];
-extern const size_t doris_sse_llvm_ir_len;
-
-extern const unsigned char doris_no_sse_llvm_ir[];
-extern const size_t doris_no_sse_llvm_ir_len;
-
-#endif
diff --git a/be/src/codegen/gen_ir_descriptions.py b/be/src/codegen/gen_ir_descriptions.py
deleted file mode 100755
index 5b4d997709..0000000000
--- a/be/src/codegen/gen_ir_descriptions.py
+++ /dev/null
@@ -1,203 +0,0 @@
-#!/usr/bin/env python
-# encoding: utf-8
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-"""
-This script will generate two headers that describe all of the clang cross compiled
-functions.
-The script outputs (run: 'doris/common/function-registry/gen_functions.py')
-  - be/src/generated-sources/doris-ir/doris-ir-functions.h
-    This file contains enums for all of the cross compiled functions
-  - be/src/generated-sources/doris-ir/doris-ir-function-names.h
-    This file contains a mapping of <string, enum>
-
-Mapping of enum to compiled function name.  The compiled function name only has to
-be a substring of the actual, mangled compiler generated name.
-TODO: should we work out the mangling rules?
-"""
-
-import string
-import os
-
-ir_functions = [
-
-    ["AGG_NODE_PROCESS_ROW_BATCH_WITH_GROUPING", "process_row_batch_with_grouping"],
-    ["AGG_NODE_PROCESS_ROW_BATCH_NO_GROUPING", "process_row_batch_no_grouping"],
-#    ["EXPR_GET_VALUE", "IrExprGetValue"],
-#    ["HASH_CRC", "IrCrcHash"],
-#    ["HASH_FVN", "IrFvnHash"],
-    ["HASH_JOIN_PROCESS_BUILD_BATCH", "12HashJoinNode19process_build_batch"],
-    ["HASH_JOIN_PROCESS_PROBE_BATCH", "12HashJoinNode19process_probe_batch"],
-    ["EXPR_GET_BOOLEAN_VAL", "4Expr15get_boolean_val"],
-    ["EXPR_GET_TINYINT_VAL", "4Expr16get_tiny_int_val"],
-    ["EXPR_GET_SMALLINT_VAL", "4Expr17get_small_int_val"],
-    ["EXPR_GET_INT_VAL", "4Expr11get_int_val"],
-    ["EXPR_GET_BIGINT_VAL", "4Expr15get_big_int_val"],
-    ["EXPR_GET_LARGEINT_VAL", "4Expr17get_large_int_val"],
-    ["EXPR_GET_FLOAT_VAL", "4Expr13get_float_val"],
-    ["EXPR_GET_DOUBLE_VAL", "4Expr14get_double_val"],
-    ["EXPR_GET_STRING_VAL", "4Expr14get_string_val"],
-    ["EXPR_GET_DATETIME_VAL", "4Expr16get_datetime_val"],
-    ["EXPR_GET_DECIMAL_VAL", "4Expr15get_decimal_val"],
-    ["HASH_CRC", "ir_crc_hash"],
-    ["HASH_FNV", "ir_fnv_hash"],
-    ["FROM_DECIMAL_VAL", "16from_decimal_val"],
-    ["TO_DECIMAL_VAL", "14to_decimal_val"],
-    ["FROM_DATETIME_VAL", "17from_datetime_val"],
-    ["TO_DATETIME_VAL", "15to_datetime_val"],
-    ["IR_STRING_COMPARE", "ir_string_compare"],
-#    ["STRING_VALUE_EQ", "StringValueEQ"],
-#    ["STRING_VALUE_NE", "StringValueNE"],
-#    ["STRING_VALUE_GE", "StringValueGE"],
-#    ["STRING_VALUE_GT", "StringValueGT"],
-#    ["STRING_VALUE_LT", "StringValueLT"],
-#    ["STRING_VALUE_LE", "StringValueLE"],
-#    ["STRING_TO_BOOL", "IrStringToBool"],
-#    ["STRING_TO_INT8", "IrStringToInt8"],
-#    ["STRING_TO_INT16", "IrStringToInt16"],
-#    ["STRING_TO_INT32", "IrStringToInt32"],
-#    ["STRING_TO_INT64", "IrStringToInt64"],
-#    ["STRING_TO_FLOAT", "IrStringToFloat"],
-#    ["STRING_TO_DOUBLE", "IrStringToDouble"],
-#    ["STRING_IS_NULL", "IrIsNullString"],
-    ["HLL_UPDATE_BOOLEAN", "hll_updateIN8doris_udf10BooleanVal"],
-    ["HLL_UPDATE_TINYINT", "hll_updateIN8doris_udf10TinyIntVal"],
-    ["HLL_UPDATE_SMALLINT", "hll_updateIN8doris_udf11SmallIntVal"],
-    ["HLL_UPDATE_INT", "hll_updateIN8doris_udf6IntVal"],
-    ["HLL_UPDATE_BIGINT", "hll_updateIN8doris_udf9BigIntVal"],
-    ["HLL_UPDATE_FLOAT", "hll_updateIN8doris_udf8FloatVal"],
-    ["HLL_UPDATE_DOUBLE", "hll_updateIN8doris_udf9DoubleVal"],
-    ["HLL_UPDATE_STRING", "hll_updateIN8doris_udf9StringVal"],
-    ["HLL_UPDATE_TIMESTAMP", "hll_updateIN8doris_udf11DateTimeVal"],
-    ["HLL_UPDATE_DECIMAL", "hll_updateIN8doris_udf10DecimalVal"],
-    ["HLL_MERGE", "hll_merge"],
-    ["CODEGEN_ANYVAL_DATETIME_VAL_EQ", "datetime_val_eq"],
-    ["CODEGEN_ANYVAL_STRING_VAL_EQ", "string_val_eq"],
-    ["CODEGEN_ANYVAL_DECIMAL_VAL_EQ", "decimal_val_eq"],
-    ["CODEGEN_ANYVAL_DATETIME_VALUE_EQ", "datetime_value_eq"],
-    ["CODEGEN_ANYVAL_STRING_VALUE_EQ", "string_value_eq"],
-    ["CODEGEN_ANYVAL_DECIMAL_VALUE_EQ", "decimal_value_eq"],
-    ["RAW_VALUE_COMPARE", "8RawValue7compare"],
-]
-
-enums_preamble = '\
-// Licensed to the Apache Software Foundation (ASF) under one \n\
-// or more contributor license agreements.  See the NOTICE file \n\
-// distributed with this work for additional information \n\
-// regarding copyright ownership.  The ASF licenses this file \n\
-// to you under the Apache License, Version 2.0 (the \n\
-// "License"); you may not use this file except in compliance \n\
-// with the License.  You may obtain a copy of the License at \n\
-// \n\
-//   http://www.apache.org/licenses/LICENSE-2.0 \n\
-// \n\
-// Unless required by applicable law or agreed to in writing, \n\
-// software distributed under the License is distributed on an \n\
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY \n\
-// KIND, either express or implied.  See the License for the \n\
-// specific language governing permissions and limitations \n\
-// under the License. \n\
-\n\
-// This is a generated file, DO NOT EDIT IT.\n\
-// To add new functions, see be/src/codegen/gen_ir_descriptions.py.\n\
-\n\
-#ifndef DORIS_IR_FUNCTIONS_H\n\
-#define DORIS_IR_FUNCTIONS_H\n\
-\n\
-namespace doris {\n\
-\n\
-class IRFunction {\n\
- public:\n\
-  enum Type {\n'
-
-enums_epilogue = '\
-  };\n\
-};\n\
-\n\
-}\n\
-\n\
-#endif\n'
-
-names_preamble = '\
-// Licensed to the Apache Software Foundation (ASF) under one \n\
-// or more contributor license agreements.  See the NOTICE file \n\
-// distributed with this work for additional information \n\
-// regarding copyright ownership.  The ASF licenses this file \n\
-// to you under the Apache License, Version 2.0 (the \n\
-// "License"); you may not use this file except in compliance \n\
-// with the License.  You may obtain a copy of the License at \n\
-// \n\
-//   http://www.apache.org/licenses/LICENSE-2.0 \n\
-// \n\
-// Unless required by applicable law or agreed to in writing, \n\
-// software distributed under the License is distributed on an \n\
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY \n\
-// KIND, either express or implied.  See the License for the \n\
-// specific language governing permissions and limitations \n\
-// under the License. \n\
-\n\
-// This is a generated file, DO NOT EDIT IT.\n\
-// To add new functions, see be/src/codegen/gen_ir_descriptions.py.\n\
-\n\
-#ifndef DORIS_IR_FUNCTION_NAMES_H\n\
-#define DORIS_IR_FUNCTION_NAMES_H\n\
-\n\
-#include "doris_ir/doris_ir_functions.h"\n\
-\n\
-namespace doris {\n\
-\n\
-static struct {\n\
-  std::string fn_name; \n\
-  IRFunction::Type fn; \n\
-} FN_MAPPINGS[] = {\n'
-
-names_epilogue = '\
-};\n\
-\n\
-}\n\
-\n\
-#endif\n'
-
-BE_PATH = os.environ['DORIS_HOME'] + "/gensrc/build/doris_ir/"
-if not os.path.exists(BE_PATH):
-    os.makedirs(BE_PATH)
-
-if __name__ == "__main__":
-    print("Generating IR description files")
-    enums_file = open(BE_PATH + 'doris_ir_functions.h', 'w')
-    enums_file.write(enums_preamble)
-
-    names_file = open(BE_PATH + 'doris_ir_names.h', 'w')
-    names_file.write(names_preamble)
-
-    idx = 0
-    enums_file.write("    FN_START = " + str(idx) + ",\n")
-    for fn in ir_functions:
-        enum = fn[0]
-        fn_name = fn[1]
-        enums_file.write("    " + enum + " = " + str(idx) + ",\n")
-        names_file.write("  { \"" + fn_name + "\", IRFunction::" + enum + " },\n")
-        idx = idx + 1
-    enums_file.write("    FN_END = " + str(idx) + "\n")
-
-    enums_file.write(enums_epilogue)
-    enums_file.close()
-
-    names_file.write(names_epilogue)
-    names_file.close()
diff --git a/be/src/codegen/llvm_codegen.cpp b/be/src/codegen/llvm_codegen.cpp
deleted file mode 100644
index 05b02d33d8..0000000000
--- a/be/src/codegen/llvm_codegen.cpp
+++ /dev/null
@@ -1,1355 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "codegen/llvm_codegen.h"
-
-#include <fstream>
-#include <mutex>
-#include <iostream>
-#include <sstream>
-#include <boost/thread/mutex.hpp>
-
-#include <llvm/IR/DataLayout.h>
-#include <llvm/Analysis/Passes.h>
-#include <llvm/Analysis/InstructionSimplify.h>
-#include <llvm/ExecutionEngine/ExecutionEngine.h>
-#include <llvm/ExecutionEngine/JIT.h>
-#include <llvm/Bitcode/ReaderWriter.h>
-#include <llvm/PassManager.h>
-#include <llvm/Support/DynamicLibrary.h>
-#include <llvm/IRReader/IRReader.h>
-#include <llvm/Support/MemoryBuffer.h>
-#include <llvm/Support/NoFolder.h>
-#include <llvm/Support/TargetSelect.h>
-#include <llvm/Support/raw_ostream.h>
-#include <llvm/Support/system_error.h>
-#include <llvm/Support/InstIterator.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/IPO/PassManagerBuilder.h>
-#include <llvm/Transforms/Scalar.h>
-#include <llvm/Transforms/Utils/Cloning.h>
-
-#include "common/logging.h"
-#include "codegen/subexpr_elimination.h"
-#include "codegen/doris_ir_data.h"
-#include "doris_ir/doris_ir_names.h"
-#include "util/cpu_info.h"
-#include "util/path_builder.h"
-
-using llvm::Value;
-using llvm::Function;
-using llvm::Module;
-using llvm::PassManager;
-using llvm::PassManagerBuilder;
-using llvm::DataLayout;
-using llvm::FunctionPassManager;
-
-namespace doris {
-
-static std::mutex s_llvm_initialization_lock;
-static bool s_llvm_initialized = false;
-
-void LlvmCodeGen::initialize_llvm(bool load_backend) {
-    std::unique_lock<std::mutex> initialization_lock(s_llvm_initialization_lock);
-    if (s_llvm_initialized) {
-        return;
-    }
-
-    // This allocates a global llvm struct and enables multithreading.
-    // There is no real good time to clean this up but we only make it once.
-    bool result = llvm::llvm_start_multithreaded();
-    DCHECK(result);
-    // This can *only* be called once per process and is used to setup
-    // dynamically linking jitted code.
-    llvm::InitializeNativeTarget();
-    s_llvm_initialized = true;
-
-    if (load_backend) {
-        std::string path;
-        // For test env, we have to load libfesupport.so to provide sym for LLVM.
-        PathBuilder::get_full_build_path("service/libfesupport.so", &path);
-        bool failed = llvm::sys::DynamicLibrary::LoadLibraryPermanently(path.c_str());
-        DCHECK_EQ(failed, 0);
-    }
-}
-
-LlvmCodeGen::LlvmCodeGen(ObjectPool* pool, const std::string& name) :
-        _name(name),
-        _profile(pool, "CodeGen"),
-        _optimizations_enabled(false),
-        _is_corrupt(false),
-        _is_compiled(false),
-        _context(new llvm::LLVMContext()),
-        _module(NULL),
-        _execution_engine(NULL),
-        _scratch_buffer_offset(0),
-        _debug_trace_fn(NULL) {
-    DCHECK(s_llvm_initialized) << "Must call LlvmCodeGen::initialize_llvm first.";
-
-    _load_module_timer = ADD_TIMER(&_profile, "LoadTime");
-    _prepare_module_timer = ADD_TIMER(&_profile, "PrepareTime");
-    _module_file_size = ADD_COUNTER(&_profile, "ModuleFileSize", TUnit::BYTES);
-    _codegen_timer = ADD_TIMER(&_profile, "CodegenTime");
-    _optimization_timer = ADD_TIMER(&_profile, "OptimizationTime");
-    _compile_timer = ADD_TIMER(&_profile, "CompileTime");
-
-    _loaded_functions.resize(IRFunction::FN_END);
-}
-
-Status LlvmCodeGen::load_from_file(
-        ObjectPool* pool,
-        const std::string& file,
-        boost::scoped_ptr<LlvmCodeGen>* codegen) {
-    codegen->reset(new LlvmCodeGen(pool, ""));
-    SCOPED_TIMER((*codegen)->_profile.total_time_counter());
-    SCOPED_TIMER((*codegen)->_load_module_timer);
-    llvm::OwningPtr<llvm::MemoryBuffer> file_buffer;
-    llvm::error_code err = llvm::MemoryBuffer::getFile(file, file_buffer);
-
-    if (err.value() != 0) {
-        std::stringstream ss;
-        ss << "Could not load module " << file << ": " << err.message();
-        return Status::InternalError(ss.str());
-    }
-
-    COUNTER_UPDATE((*codegen)->_module_file_size, file_buffer->getBufferSize());
-    std::string error;
-    llvm::Module* loaded_module = NULL;
-    // llvm::ParseBitcodeFile(file_buffer.get(),
-    //                        (*codegen)->context(), &error);
-
-    if (loaded_module == NULL) {
-        std::stringstream ss;
-        ss << "Could not parse module " << file << ": " << error;
-        return Status::InternalError(ss.str());
-    }
-
-    (*codegen)->_module = loaded_module;
-
-    return (*codegen)->init();
-}
-
-Status LlvmCodeGen::load_from_memory(
-        ObjectPool* pool, llvm::MemoryBuffer* module_ir,
-        const std::string& module_name, const std::string& id, 
-        boost::scoped_ptr<LlvmCodeGen>* codegen) {
-    codegen->reset(new LlvmCodeGen(pool, id));
-    SCOPED_TIMER((*codegen)->_profile.total_time_counter());
-
-    llvm::Module* loaded_module = NULL;
-    RETURN_IF_ERROR(load_module_from_memory(
-            codegen->get(), module_ir, module_name, &loaded_module));
-    (*codegen)->_module = loaded_module;
-
-    return (*codegen)->init();
-}
-
-Status LlvmCodeGen::load_module_from_memory(
-        LlvmCodeGen* codegen, llvm::MemoryBuffer* module_ir,
-        const std::string& module_name, llvm::Module** module) {
-    SCOPED_TIMER(codegen->_prepare_module_timer);
-    std::string error;
-    *module = llvm::ParseBitcodeFile(module_ir, codegen->context(), &error);
-    if (*module == NULL) {
-        std::stringstream ss;
-        ss << "Could not parse module " << module_name << ": " << error;
-        return Status::InternalError(ss.str());
-    }
-    return Status::OK();
-}
-
-Status LlvmCodeGen::load_doris_ir(
-        ObjectPool* pool, 
-        const std::string& id,
-        boost::scoped_ptr<LlvmCodeGen>* codegen_ret) {
-    // Select the appropriate IR version.  We cannot use LLVM IR with sse instructions on
-    // a machine without sse support (loading the module will fail regardless of whether
-    // those instructions are run or not).
-    llvm::StringRef module_ir;
-    std::string module_name;
-    if (CpuInfo::is_supported(CpuInfo::SSE4_2)) {
-        module_ir = llvm::StringRef(reinterpret_cast<const char*>(doris_sse_llvm_ir),
-                              doris_sse_llvm_ir_len);
-        module_name = "Doris IR with SSE support";
-    } else {
-        module_ir = llvm::StringRef(reinterpret_cast<const char*>(doris_no_sse_llvm_ir),
-                              doris_no_sse_llvm_ir_len);
-        module_name = "Doris IR with no SSE support";
-    }
-    boost::scoped_ptr<llvm::MemoryBuffer> module_ir_buf(
-        llvm::MemoryBuffer::getMemBuffer(module_ir, "", false));
-    RETURN_IF_ERROR(load_from_memory(pool, module_ir_buf.get(), module_name, id,
-                                   codegen_ret));
-    LlvmCodeGen* codegen = codegen_ret->get();
-
-    // Parse module for cross compiled functions and types
-    SCOPED_TIMER(codegen->_profile.total_time_counter());
-    SCOPED_TIMER(codegen->_load_module_timer);
-
-    // Get type for StringValue
-    codegen->_string_val_type = codegen->get_type(StringValue::s_llvm_class_name);
-    codegen->_decimal_val_type = codegen->get_type(DecimalValue::_s_llvm_class_name);
-    // Get type for DateTimeValue
-    codegen->_datetime_val_type = codegen->get_type(DateTimeValue::_s_llvm_class_name);
-
-    // Verify size is correct
-    const llvm::DataLayout* data_layout = codegen->execution_engine()->getDataLayout();
-    const llvm::StructLayout* layout =
-        data_layout->getStructLayout(static_cast<llvm::StructType*>(codegen->_string_val_type));
-
-    if (layout->getSizeInBytes() != sizeof(StringValue)) {
-        DCHECK_EQ(layout->getSizeInBytes(), sizeof(StringValue));
-        return Status::InternalError("Could not create llvm struct type for StringVal");
-    }
-
-    // Parse functions from module
-    std::vector<llvm::Function*> functions;
-    codegen->get_functions(&functions);
-    int parsed_functions = 0;
-
-    for (int i = 0; i < functions.size(); ++i) {
-        std::string fn_name = functions[i]->getName();
-
-        for (int j = IRFunction::FN_START; j < IRFunction::FN_END; ++j) {
-            // Substring match to match precompiled functions.  The compiled function names
-            // will be mangled.
-            // TODO: reconsider this.  Substring match is probably not strict enough but
-            // undoing the mangling is no fun either.
-            if (fn_name.find(FN_MAPPINGS[j].fn_name) != std::string::npos) {
-                if (codegen->_loaded_functions[FN_MAPPINGS[j].fn] != NULL) {
-                    return Status::InternalError("Duplicate definition found for function: " + fn_name);
-                }
-
-                codegen->_loaded_functions[FN_MAPPINGS[j].fn] = functions[i];
-                ++parsed_functions;
-            }
-        }
-    }
-
-    if (parsed_functions != IRFunction::FN_END) {
-        std::stringstream ss;
-        ss << "Unable to find these precompiled functions: ";
-        bool first = true;
-
-        for (int i = IRFunction::FN_START; i != IRFunction::FN_END; ++i) {
-            if (codegen->_loaded_functions[i] == NULL) {
-                if (!first) {
-                    ss << ", ";
-                }
-
-                ss << FN_MAPPINGS[i].fn_name;
-                first = false;
-            }
-        }
-
-        return Status::InternalError(ss.str());
-    }
-
-    return Status::OK();
-}
-
-Status LlvmCodeGen::init() {
-    if (_module == NULL) {
-        _module = new llvm::Module(_name, context());
-    }
-
-    llvm::CodeGenOpt::Level opt_level = llvm::CodeGenOpt::Aggressive;
-#ifndef NDEBUG
-    // For debug builds, don't generate JIT compiled optimized assembly.
-    // This takes a non-neglible amount of time (~.5 ms per function) and
-    // blows up the fe tests (which take ~10-20 ms each).
-    opt_level = llvm::CodeGenOpt::None;
-#endif
-    llvm::EngineBuilder builder = llvm::EngineBuilder(_module).setOptLevel(opt_level);
-    // TODO Uncomment the below line as soon as we upgrade to LLVM 3.5 to enable SSE, if
-    // available. In LLVM 3.3 this is done automatically and cannot be enabled because
-    // for some reason SSE4 intrinsics selection will not work.
-    // builder.setMCPU(llvm::sys::getHostCPUName());
-    builder.setErrorStr(&_error_string);
-    _execution_engine.reset(builder.create());
-    if (_execution_engine == NULL) {
-        // _execution_engine will take ownership of the module if it is created
-        delete _module;
-        std::stringstream ss;
-        ss << "Could not create ExecutionEngine: " << _error_string;
-        return Status::InternalError(ss.str());
-    }
-    _void_type = llvm::Type::getVoidTy(context());
-    _ptr_type = llvm::PointerType::get(get_type(TYPE_TINYINT), 0);
-    _true_value = llvm::ConstantInt::get(context(), llvm::APInt(1, true, true));
-    _false_value = llvm::ConstantInt::get(context(), llvm::APInt(1, false, true));
-
-    RETURN_IF_ERROR(load_intrinsics());
-
-    return Status::OK();
-}
-
-LlvmCodeGen::~LlvmCodeGen() {
-    for (auto& it : _jitted_functions) {
-        _execution_engine->freeMachineCodeForFunction(it.first);
-    }
-}
-
-void LlvmCodeGen::enable_optimizations(bool enable) {
-    _optimizations_enabled = enable;
-}
-
-std::string LlvmCodeGen::get_ir(bool full_module) const {
-    std::string str;
-    llvm::raw_string_ostream stream(str);
-    if (full_module) {
-        _module->print(stream, NULL);
-    } else {
-        for (int i = 0; i < _codegend_functions.size(); ++i) {
-            _codegend_functions[i]->print(stream, NULL);
-        }
-    }
-    return str;
-}
-
-llvm::PointerType* LlvmCodeGen::get_ptr_type(llvm::Type* type) {
-    return llvm::PointerType::get(type, 0);
-}
-
-llvm::Type* LlvmCodeGen::get_type(const PrimitiveType& type) {
-    switch (type) {
-    case TYPE_NULL:
-        return llvm::Type::getInt1Ty(context());
-    case TYPE_BOOLEAN:
-        return llvm::Type::getInt1Ty(context());
-    case TYPE_TINYINT:
-        return llvm::Type::getInt8Ty(context());
-    case TYPE_SMALLINT:
-        return llvm::Type::getInt16Ty(context());
-    case TYPE_INT:
-        return llvm::Type::getInt32Ty(context());
-    case TYPE_BIGINT:
-        return llvm::Type::getInt64Ty(context());
-    case TYPE_LARGEINT:
-         return llvm::Type::getIntNTy(context(), 128);
-    case TYPE_FLOAT:
-        return llvm::Type::getFloatTy(context());
-    case TYPE_DOUBLE:
-        return llvm::Type::getDoubleTy(context());
-    case TYPE_CHAR:
-    case TYPE_VARCHAR:
-    case TYPE_HLL:
-        return _string_val_type;
-    case TYPE_DECIMAL:
-        return _decimal_val_type;
-    case TYPE_DATE:
-    case TYPE_DATETIME:
-        return _datetime_val_type;
-    default:
-        DCHECK(false) << "Invalid type.";
-        return NULL;
-    }
-}
-
-llvm::Type* LlvmCodeGen::get_type(const TypeDescriptor& type) {
-    return get_type(type.type);
-}
-
-llvm::PointerType* LlvmCodeGen::get_ptr_type(const TypeDescriptor& type) {
-    return llvm::PointerType::get(get_type(type.type), 0);
-}
-
-llvm::PointerType* LlvmCodeGen::get_ptr_type(const PrimitiveType& type) {
-    return llvm::PointerType::get(get_type(type), 0);
-}
-
-
-llvm::Type* LlvmCodeGen::get_type(const std::string& name) {
-    return _module->getTypeByName(name);
-}
-
-llvm::PointerType* LlvmCodeGen::get_ptr_type(const std::string& name) {
-    llvm::Type* type = get_type(name);
-    DCHECK(type != NULL) << name;
-    return llvm::PointerType::get(type, 0);
-}
-
-// Llvm doesn't let you create a PointerValue from a c-side ptr.  Instead
-// cast it to an int and then to 'type'.
-llvm::Value* LlvmCodeGen::cast_ptr_to_llvm_ptr(llvm::Type* type, void* ptr) {
-    llvm::Constant* const_int = llvm::ConstantInt::get(
-            llvm::Type::getInt64Ty(context()), (int64_t)ptr);
-    return llvm::ConstantExpr::getIntToPtr(const_int, type);
-}
-
-llvm::Value* LlvmCodeGen::get_int_constant(PrimitiveType type, int64_t val) {
-    switch (type) {
-    case TYPE_NULL:
-        return llvm::ConstantInt::get(context(), llvm::APInt(8, val));
-    case TYPE_TINYINT:
-        return llvm::ConstantInt::get(context(), llvm::APInt(8, val));
-    case TYPE_SMALLINT:
-        return llvm::ConstantInt::get(context(), llvm::APInt(16, val));
-    case TYPE_INT:
-        return llvm::ConstantInt::get(context(), llvm::APInt(32, val));
-    case TYPE_BIGINT:
-        return llvm::ConstantInt::get(context(), llvm::APInt(64, val));
-    case TYPE_LARGEINT:
-        return llvm::ConstantInt::get(context(), llvm::APInt(128, val));
-    default:
-        DCHECK(false);
-        return NULL;
-    }
-}
-
-llvm::AllocaInst* LlvmCodeGen::create_entry_block_alloca(
-        llvm::Function* f,
-        const NamedVariable& var) {
-    llvm::IRBuilder<> tmp(&f->getEntryBlock(), f->getEntryBlock().begin());
-    return tmp.CreateAlloca(var.type, 0, var.name.c_str());
-}
-
-llvm::AllocaInst* LlvmCodeGen::create_entry_block_alloca(
-        const LlvmBuilder& builder, llvm::Type* type, const char* name) {
-    return create_entry_block_alloca(
-        builder.GetInsertBlock()->getParent(), NamedVariable(name, type));
-}
-
-void LlvmCodeGen::create_if_else_blocks(
-        llvm::Function* fn, const std::string& if_name,
-        const std::string& else_name, llvm::BasicBlock** if_block, llvm::BasicBlock** else_block,
-        llvm::BasicBlock* insert_before) {
-    *if_block = llvm::BasicBlock::Create(context(), if_name, fn, insert_before);
-    *else_block = llvm::BasicBlock::Create(context(), else_name, fn, insert_before);
-}
-
-llvm::Function* LlvmCodeGen::get_lib_c_function(FnPrototype* prototype) {
-    if (_external_functions.find(prototype->name()) != _external_functions.end()) {
-        return _external_functions[prototype->name()];
-    }
-
-    llvm::Function* func = prototype->generate_prototype();
-    _external_functions[prototype->name()] = func;
-    return func;
-}
-
-llvm::Function* LlvmCodeGen::get_function(IRFunction::Type function) {
-    DCHECK(_loaded_functions[function] != NULL);
-    return _loaded_functions[function];
-}
-
-// There is an llvm bug (#10957) that causes the first step of the verifier to always
-// abort the process if it runs into an issue and ignores ReturnStatusAction.  This
-// would cause Doris to go down if one query has a problem.
-// To work around this, we will copy that step here and not abort on error.
-// TODO: doesn't seem there is much traction in getting this fixed but we'll see
-bool LlvmCodeGen::verify_function(llvm::Function* fn) {
-    if (_is_corrupt) {
-        return false;
-    }
-
-    // Check that there are no calls to Expr::GetConstant(). These should all have been
-    // inlined via Expr::InlineConstants().
-    for (llvm::inst_iterator iter = inst_begin(fn); iter != inst_end(fn); ++iter) {
-        llvm::Instruction* instr = &*iter;
-        if (!llvm::isa<llvm::CallInst>(instr)) {
-            continue;
-        }
-        llvm::CallInst* call_instr = reinterpret_cast<llvm::CallInst*>(instr);
-        llvm::Function* called_fn = call_instr->getCalledFunction();
-        // look for call to Expr::GetConstant()
-        if (called_fn != NULL && called_fn->getName().find(
-                    Expr::_s_get_constant_symbol_prefix) != std::string::npos) {
-            LOG(ERROR) << "Found call to Expr::GetConstant(): " << print(call_instr);
-            _is_corrupt = true;
-            break;
-        }
-    }
-
-    // There is an llvm bug (#10957) that causes the first step of the verifier to always
-    // abort the process if it runs into an issue and ignores ReturnStatusAction.  This
-    // would cause impalad to go down if one query has a problem.  To work around this, we
-    // will copy that step here and not abort on error. Adapted from the pre-verifier
-    // function pass.
-    // TODO: doesn't seem there is much traction in getting this fixed but we'll see
-    for (llvm::Function::iterator i = fn->begin(), e = fn->end(); i != e; ++i) {
-        if (i->empty() || !i->back().isTerminator()) {
-            LOG(ERROR) << "Basic block must end with terminator: \n" << print(&(*i));
-            _is_corrupt = true;
-            break;
-        }
-    }
-
-    if (!_is_corrupt) {
-        _is_corrupt = llvm::verifyFunction(*fn, llvm::PrintMessageAction);
-    }
-
-    if (_is_corrupt) {
-        std::string fn_name = fn->getName(); // llvm has some fancy operator overloading
-        LOG(ERROR) << "Function corrupt: " << fn_name;
-        return false;
-    }
-    return true;
-}
-
-LlvmCodeGen::FnPrototype::FnPrototype(
-    LlvmCodeGen* gen, const std::string& name, llvm::Type* ret_type) :
-    _codegen(gen), _name(name), _ret_type(ret_type) {
-    DCHECK(!_codegen->_is_compiled) << "Not valid to add additional functions";
-}
-
-llvm::Function* LlvmCodeGen::FnPrototype::generate_prototype(
-        LlvmBuilder* builder, llvm::Value** params) {
-    std::vector<llvm::Type*> arguments;
-    for (int i = 0; i < _args.size(); ++i) {
-        arguments.push_back(_args[i].type);
-    }
-    llvm::FunctionType* prototype = llvm::FunctionType::get(_ret_type, arguments, false);
-    llvm::Function* fn = llvm::Function::Create(
-        prototype, llvm::Function::ExternalLinkage, _name, _codegen->_module);
-    DCHECK(fn != NULL);
-
-    // Name the arguments
-    int idx = 0;
-    for (llvm::Function::arg_iterator iter = fn->arg_begin();
-            iter != fn->arg_end(); ++iter, ++idx) {
-        iter->setName(_args[idx].name);
-        if (params != NULL) {
-            params[idx] = iter;
-        }
-    }
-
-    if (builder != NULL) {
-        llvm::BasicBlock* entry_block = llvm::BasicBlock::Create(_codegen->context(), "entry", fn);
-        builder->SetInsertPoint(entry_block);
-    }
-
-    _codegen->_codegend_functions.push_back(fn);
-    return fn;
-}
-
-llvm::Function* LlvmCodeGen::replace_call_sites(
-        llvm::Function* caller, bool update_in_place,
-        llvm::Function* new_fn, const std::string& replacee_name, int* replaced) {
-    DCHECK(caller->getParent() == _module);
-
-    if (!update_in_place) {
-        // Clone the function and add it to the module
-        llvm::ValueToValueMapTy dummy_vmap;
-        llvm::Function* new_caller = llvm::CloneFunction(caller, dummy_vmap, false);
-        new_caller->copyAttributesFrom(caller);
-        _module->getFunctionList().push_back(new_caller);
-        caller = new_caller;
-    } else if (_jitted_functions.find(caller) != _jitted_functions.end()) {
-        // This function is already dynamically linked, unlink it.
-        _execution_engine->freeMachineCodeForFunction(caller);
-        _jitted_functions.erase(caller);
-    }
-
-    *replaced = 0;
-    // loop over all blocks
-    llvm::Function::iterator block_iter = caller->begin();
-
-    while (block_iter != caller->end()) {
-        llvm::BasicBlock* block = block_iter++;
-        // loop over instructions in the block
-        llvm::BasicBlock::iterator instr_iter = block->begin();
-
-        while (instr_iter != block->end()) {
-            llvm::Instruction* instr = instr_iter++;
-
-            // look for call instructions
-            if (llvm::CallInst::classof(instr)) {
-                llvm::CallInst* call_instr = reinterpret_cast<llvm::CallInst*>(instr);
-                llvm::Function* old_fn = call_instr->getCalledFunction();
-
-                // look for call instruction that matches the name
-                if (old_fn->getName().find(replacee_name) != std::string::npos) {
-                    // Replace the called function
-                    call_instr->setCalledFunction(new_fn);
-                    ++*replaced;
-                }
-            }
-        }
-    }
-
-    return caller;
-}
-
-Function* LlvmCodeGen::clone_function(Function* fn) {
-    llvm::ValueToValueMapTy dummy_vmap;
-    // CloneFunction() automatically gives the new function a unique name
-    Function* fn_clone = llvm::CloneFunction(fn, dummy_vmap, false);
-    fn_clone->copyAttributesFrom(fn);
-    _module->getFunctionList().push_back(fn_clone);
-    return fn_clone;
-}
-
-// TODO: revisit this.  Inlining all call sites might not be the right call.  We
-// probably need to make this more complicated and somewhat cost based or write
-// our own optimization passes.
-int LlvmCodeGen::inline_call_sites(llvm::Function* fn, bool skip_registered_fns) {
-    int functions_inlined = 0;
-    // Collect all call sites
-    std::vector<llvm::CallInst*> call_sites;
-
-    // loop over all blocks
-    llvm::Function::iterator block_iter = fn->begin();
-
-    while (block_iter != fn->end()) {
-        llvm::BasicBlock* block = block_iter++;
-        // loop over instructions in the block
-        llvm::BasicBlock::iterator instr_iter = block->begin();
-
-        while (instr_iter != block->end()) {
-            llvm::Instruction* instr = instr_iter++;
-
-            // look for call instructions
-            if (llvm::CallInst::classof(instr)) {
-                llvm::CallInst* call_instr = reinterpret_cast<llvm::CallInst*>(instr);
-                llvm::Function* called_fn = call_instr->getCalledFunction();
-
-                if (skip_registered_fns) {
-                    if (_registered_exprs.find(called_fn) != _registered_exprs.end()) {
-                        continue;
-                    }
-                }
-
-                call_sites.push_back(call_instr);
-            }
-        }
-    }
-
-    // Inline all call sites.  InlineFunction can still fail (function is recursive, etc)
-    // but that always leaves the original function in a consistent state
-    for (int i = 0; i < call_sites.size(); ++i) {
-        llvm::InlineFunctionInfo info;
-
-        if (llvm::InlineFunction(call_sites[i], info)) {
-            ++functions_inlined;
-        }
-    }
-
-    return functions_inlined;
-}
-
-llvm::Function* LlvmCodeGen::optimize_function_with_exprs(llvm::Function* fn) {
-    int num_inlined = 0;
-    do {
-        // This assumes that all redundant exprs have been registered.
-        num_inlined = inline_call_sites(fn, false);
-    } while (num_inlined > 0);
-    // TODO(zc): fix
-    // SubExprElimination subexpr_elim(this);
-    // subexpr_elim.run(fn);
-    return finalize_function(fn);
-}
-
-llvm::Function* LlvmCodeGen::finalize_function(llvm::Function* function) {
-    if (!verify_function(function)) {
-        return NULL;
-    }
-
-    return function;
-}
-
-Status LlvmCodeGen::finalize_module() {
-    DCHECK(!_is_compiled);
-    _is_compiled = true;
-
-    // TODO(zc)
-#if 0
-    if (FLAGS_unopt_module_dir.size() != 0) {
-        string path = FLAGS_unopt_module_dir + "/" + id_ + "_unopt.ll";
-        fstream f(path.c_str(), fstream::out | fstream::trunc);
-        if (f.fail()) {
-            LOG(ERROR) << "Could not save IR to: " << path;
-        } else {
-            f << GetIR(true);
-            f.close();
-        }
-    }
-#endif
-
-    if (_is_corrupt) {
-        return Status::InternalError("Module is corrupt.");
-    }
-    SCOPED_TIMER(_profile.total_time_counter());
-
-    // Don't waste time optimizing module if there are no functions to JIT. This can happen
-    // if the codegen object is created but no functions are successfully codegen'd.
-    if (_optimizations_enabled // TODO(zc): && !FLAGS_disable_optimization_passes 
-            && !_fns_to_jit_compile.empty()) {
-        optimize_module();
-    }
-
-    SCOPED_TIMER(_compile_timer);
-    // JIT compile all codegen'd functions
-    for (int i = 0; i < _fns_to_jit_compile.size(); ++i) {
-        *_fns_to_jit_compile[i].second = jit_function(_fns_to_jit_compile[i].first);
-    }
-#if 0
-    if (FLAGS_opt_module_dir.size() != 0) {
-        string path = FLAGS_opt_module_dir + "/" + id_ + "_opt.ll";
-        fstream f(path.c_str(), fstream::out | fstream::trunc);
-        if (f.fail()) {
-            LOG(ERROR) << "Could not save IR to: " << path;
-        } else {
-            f << GetIR(true);
-            f.close();
-        }
-    }
-#endif
-
-    return Status::OK();
-}
-
-void LlvmCodeGen::optimize_module() {
-    SCOPED_TIMER(_optimization_timer);
-
-    // This pass manager will construct optimizations passes that are "typical" for
-    // c/c++ programs.  We're relying on llvm to pick the best passes for us.
-    // TODO: we can likely muck with this to get better compile speeds or write
-    // our own passes.  Our subexpression elimination optimization can be rolled into
-    // a pass.
-    PassManagerBuilder pass_builder;
-    // 2 maps to -O2
-    // TODO: should we switch to 3? (3 may not produce different IR than 2 while taking
-    // longer, but we should check)
-    pass_builder.OptLevel = 2;
-    // Don't optimize for code size (this corresponds to -O2/-O3)
-    pass_builder.SizeLevel = 0;
-    pass_builder.Inliner = llvm::createFunctionInliningPass() ;
-
-    // Specifying the data layout is necessary for some optimizations (e.g. removing many
-    // of the loads/stores produced by structs).
-    const std::string& data_layout_str = _module->getDataLayout();
-    DCHECK(!data_layout_str.empty());
-
-    // Before running any other optimization passes, run the internalize pass, giving it
-    // the names of all functions registered by AddFunctionToJit(), followed by the
-    // global dead code elimination pass. This causes all functions not registered to be
-    // JIT'd to be marked as internal, and any internal functions that are not used are
-    // deleted by DCE pass. This greatly decreases compile time by removing unused code.
-    std::vector<const char*> exported_fn_names;
-    for (int i = 0; i < _fns_to_jit_compile.size(); ++i) {
-        exported_fn_names.push_back(_fns_to_jit_compile[i].first->getName().data());
-    }
-    boost::scoped_ptr<PassManager> module_pass_manager(new PassManager());
-    module_pass_manager->add(new DataLayout(data_layout_str));
-    module_pass_manager->add(llvm::createInternalizePass(exported_fn_names));
-    module_pass_manager->add(llvm::createGlobalDCEPass());
-    module_pass_manager->run(*_module);
-
-    // Create and run function pass manager
-    boost::scoped_ptr<FunctionPassManager> fn_pass_manager(new FunctionPassManager(_module));
-    fn_pass_manager->add(new DataLayout(data_layout_str));
-    pass_builder.populateFunctionPassManager(*fn_pass_manager);
-    fn_pass_manager->doInitialization();
-    for (Module::iterator it = _module->begin(), end = _module->end(); it != end; ++it) {
-        if (!it->isDeclaration()) fn_pass_manager->run(*it);
-    }
-    fn_pass_manager->doFinalization();
-
-    // Create and run module pass manager
-    module_pass_manager.reset(new PassManager());
-    module_pass_manager->add(new DataLayout(data_layout_str));
-    pass_builder.populateModulePassManager(*module_pass_manager);
-    module_pass_manager->run(*_module);
-
-    // if (FLAGS_print_llvm_ir_instruction_count) {
-    //     for (int i = 0; i < _fns_to_jit_compile.size(); ++i) {
-    //         InstructionCounter counter;
-    //         counter.visit(*_fns_to_jit_compile[i].first);
-    //         VLOG(1) << _fns_to_jit_compile[i].first->getName().str();
-    //         VLOG(1) << counter.PrintCounters();
-    //     }
-    // }
-}
-
-void LlvmCodeGen::add_function_to_jit(llvm::Function* fn, void** fn_ptr) {
-#if 0
-    llvm::Type* decimal_val_type = get_type(CodegenAnyVal::LLVM_DECIMALVAL_NAME);
-    if (fn->getReturnType() == decimal_val_type) {
-        // Per the x86 calling convention ABI, DecimalVals should be returned via an extra
-        // first DecimalVal* argument. We generate non-compliant functions that return the
-        // DecimalVal directly, which we can call from generated code, but not from compiled
-        // native code.  To avoid accidentally calling a non-compliant function from native
-        // code, call 'function' from an ABI-compliant wrapper.
-        stringstream name;
-        name << fn->getName().str() << "ABIWrapper";
-        LlvmCodeGen::FnPrototype prototype(this, name.str(), void_type_);
-        // Add return argument
-        prototype.AddArgument(NamedVariable("result", decimal_val_type->getPointerTo()));
-        // Add regular arguments
-        for (Function::arg_iterator arg = fn->arg_begin(); arg != fn->arg_end(); ++arg) {
-            prototype.AddArgument(NamedVariable(arg->getName(), arg->getType()));
-        }
-        LlvmBuilder builder(context());
-        Value* args[fn->arg_size() + 1];
-        Function* fn_wrapper = prototype.GeneratePrototype(&builder, &args[0]);
-        fn_wrapper->addFnAttr(llvm::Attribute::AlwaysInline);
-        // Mark first argument as sret (not sure if this is necessary but it can't hurt)
-        fn_wrapper->addAttribute(1, Attribute::StructRet);
-        // Call 'fn' and store the result in the result argument
-        Value* result =
-            builder.CreateCall(fn, ArrayRef<Value*>(&args[1], fn->arg_size()), "result");
-        builder.CreateStore(result, args[0]);
-        builder.CreateRetVoid();
-        fn = FinalizeFunction(fn_wrapper);
-        DCHECK(fn != NULL);
-    }
-#endif
-    _fns_to_jit_compile.push_back(std::make_pair(fn, fn_ptr));
-}
-
-
-void* LlvmCodeGen::jit_function(llvm::Function* function, int* scratch_size) {
-    if (_is_corrupt) {
-        return NULL;
-    }
-
-    if (scratch_size == NULL) {
-        DCHECK_EQ(_scratch_buffer_offset, 0);
-    } else {
-        *scratch_size = _scratch_buffer_offset;
-    }
-
-    // TODO: log a warning if the jitted function is too big (larger than I cache)
-    void* jitted_function = _execution_engine->getPointerToFunction(function);
-    boost::lock_guard<boost::mutex> l(_jitted_functions_lock);
-
-    if (jitted_function != NULL) {
-        _jitted_functions[function] = true;
-    }
-
-    return jitted_function;
-}
-
-int LlvmCodeGen::get_scratch_buffer(int byte_size) {
-    // TODO: this is not yet implemented/tested
-    DCHECK(false);
-    int result = _scratch_buffer_offset;
-    // TODO: alignment?
-    result += byte_size;
-    return result;
-}
-
-// Wrapper around printf to make it easier to call from IR
-extern "C" void debug_trace(const char* str) {
-    printf("LLVM Trace: %s\n", str);
-}
-
-void LlvmCodeGen::codegen_debug_trace(LlvmBuilder* builder, const char* str) {
-    LOG(ERROR) << "Remove IR codegen debug traces before checking in.";
-
-    // Lazily link in debug function to the module
-    if (_debug_trace_fn == NULL) {
-        std::vector<llvm::Type*> args;
-        args.push_back(_ptr_type);
-        llvm::FunctionType* fn_type = llvm::FunctionType::get(_void_type, args, false);
-        _debug_trace_fn = llvm::Function::Create(fn_type, llvm::GlobalValue::ExternalLinkage,
-                                           "debug_trace", _module);
-
-        DCHECK(_debug_trace_fn != NULL);
-        // debug_trace shouldn't already exist (llvm mangles function names if there
-        // are duplicates)
-        DCHECK(_debug_trace_fn->getName() ==  "debug_trace");
-
-        _debug_trace_fn->setCallingConv(llvm::CallingConv::C);
-
-        // Add a mapping to the execution engine so it can link the debug_trace function
-        _execution_engine->addGlobalMapping(_debug_trace_fn,
-                                            reinterpret_cast<void*>(&debug_trace));
-    }
-
-    // Make a copy of str into memory owned by this object.  This is no guarantee that str is
-    // still around when the debug printf is executed.
-    _debug_strings.push_back(str);
-    str = _debug_strings[_debug_strings.size() - 1].c_str();
-
-    // Call the function by turning 'str' into a constant ptr value
-    llvm::Value* str_ptr = cast_ptr_to_llvm_ptr(_ptr_type, const_cast<char*>(str));
-    std::vector<llvm::Value*> calling_args;
-    calling_args.push_back(str_ptr);
-    builder->CreateCall(_debug_trace_fn, calling_args);
-}
-
-void LlvmCodeGen::get_functions(std::vector<llvm::Function*>* functions) {
-    llvm::Module::iterator fn_iter = _module->begin();
-
-    while (fn_iter != _module->end()) {
-        llvm::Function* fn = fn_iter++;
-
-        if (!fn->empty()) {
-            functions->push_back(fn);
-        }
-    }
-}
-
-// TODO: cache this function (e.g. all min(int, int) are identical).
-// we probably want some more global IR function cache, or, implement this
-// in c and precompile it with clang.
-// define i32 @Min(i32 %v1, i32 %v2) {
-// entry:
-//   %0 = icmp slt i32 %v1, %v2
-//   br i1 %0, label %ret_v1, label %ret_v2
-//
-// ret_v1:                                           ; preds = %entry
-//   ret i32 %v1
-//
-// ret_v2:                                           ; preds = %entry
-//   ret i32 %v2
-// }
-llvm::Function* LlvmCodeGen::codegen_min_max(const TypeDescriptor& type, bool min) {
-    LlvmCodeGen::FnPrototype prototype(this, min ? "Min" : "Max", get_type(type));
-    prototype.add_argument(LlvmCodeGen::NamedVariable("v1", get_type(type)));
-    prototype.add_argument(LlvmCodeGen::NamedVariable("v2", get_type(type)));
-
-    llvm::Value* params[2];
-    LlvmBuilder builder(context());
-    llvm::Function* fn = prototype.generate_prototype(&builder, &params[0]);
-
-    llvm::Value* compare = NULL;
-
-    switch (type.type) {
-    case TYPE_NULL:
-        compare = false_value();
-        break;
-
-    case TYPE_BOOLEAN:
-        if (min) {
-            // For min, return x && y
-            compare = builder.CreateAnd(params[0], params[1]);
-        } else {
-            // For max, return x || y
-            compare = builder.CreateOr(params[0], params[1]);
-        }
-
-        break;
-
-    case TYPE_TINYINT:
-    case TYPE_SMALLINT:
-    case TYPE_INT:
-    case TYPE_BIGINT:
-        if (min) {
-            compare = builder.CreateICmpSLT(params[0], params[1]);
-        } else {
-            compare = builder.CreateICmpSGT(params[0], params[1]);
-        }
-
-        break;
-
-    case TYPE_FLOAT:
-    case TYPE_DOUBLE:
-        if (min) {
-            compare = builder.CreateFCmpULT(params[0], params[1]);
-        } else {
-            compare = builder.CreateFCmpUGT(params[0], params[1]);
-        }
-
-        break;
-
-    default:
-        DCHECK(false);
-    }
-
-    if (type.type == TYPE_BOOLEAN) {
-        builder.CreateRet(compare);
-    } else {
-        llvm::BasicBlock* ret_v1 = NULL;
-        llvm::BasicBlock* ret_v2 = NULL;
-        create_if_else_blocks(fn, "ret_v1", "ret_v2", &ret_v1, &ret_v2);
-
-        builder.CreateCondBr(compare, ret_v1, ret_v2);
-        builder.SetInsertPoint(ret_v1);
-        builder.CreateRet(params[0]);
-        builder.SetInsertPoint(ret_v2);
-        builder.CreateRet(params[1]);
-    }
-
-    if (!verify_function(fn)) {
-        return NULL;
-    }
-
-    return fn;
-}
-
-// Intrinsics are loaded one by one.  Some are overloaded (e.g. memcpy) and the types must
-// be specified.
-// TODO: is there a better way to do this?
-Status LlvmCodeGen::load_intrinsics() {
-    // Load memcpy
-    {
-        llvm::Type* types[] = { ptr_type(), ptr_type(), get_type(TYPE_INT) };
-        llvm::Function* fn = llvm::Intrinsic::getDeclaration(
-                module(), llvm::Intrinsic::memcpy, types);
-
-        if (fn == NULL) {
-            return Status::InternalError("Could not find memcpy intrinsic.");
-        }
-
-        _llvm_intrinsics[llvm::Intrinsic::memcpy] = fn;
-    }
-
-    // TODO: where is the best place to put this?
-    struct {
-        llvm::Intrinsic::ID id;
-        const char* error;
-    } non_overloaded_intrinsics[] = {
-        { llvm::Intrinsic::x86_sse42_crc32_32_8, "sse4.2 crc32_u8" },
-        { llvm::Intrinsic::x86_sse42_crc32_32_16, "sse4.2 crc32_u16" },
-        { llvm::Intrinsic::x86_sse42_crc32_32_32, "sse4.2 crc32_u32" },
-        { llvm::Intrinsic::x86_sse42_crc32_64_64, "sse4.2 crc32_u64" },
-    };
-    const int num_intrinsics =
-        sizeof(non_overloaded_intrinsics) / sizeof(non_overloaded_intrinsics[0]);
-
-    for (int i = 0; i < num_intrinsics; ++i) {
-        llvm::Intrinsic::ID id = non_overloaded_intrinsics[i].id;
-        llvm::Function* fn = llvm::Intrinsic::getDeclaration(module(), id);
-
-        if (fn == NULL) {
-            std::stringstream ss;
-            ss << "Could not find " << non_overloaded_intrinsics[i].error << " intrinsic";
-            return Status::InternalError(ss.str());
-        }
-
-        _llvm_intrinsics[id] = fn;
-    }
-
-    return Status::OK();
-}
-
-void LlvmCodeGen::codegen_memcpy(LlvmBuilder* builder, llvm::Value* dst, llvm::Value* src, int size) {
-    // Cast src/dst to int8_t*.  If they already are, this will get optimized away
-    DCHECK(llvm::PointerType::classof(dst->getType()));
-    DCHECK(llvm::PointerType::classof(src->getType()));
-    dst = builder->CreateBitCast(dst, ptr_type());
-    src = builder->CreateBitCast(src, ptr_type());
-
-    // Get intrinsic function.
-    llvm::Function* memcpy_fn = _llvm_intrinsics[llvm::Intrinsic::memcpy];
-    DCHECK(memcpy_fn != NULL);
-
-    // The fourth argument is the alignment.  For non-zero values, the caller
-    // must guarantee that the src and dst values are aligned to that byte boundary.
-    // TODO: We should try to take advantage of this since our tuples are well aligned.
-    llvm::Value* args[] = {
-        dst, src, get_int_constant(TYPE_INT, size),
-        get_int_constant(TYPE_INT, 0),
-        false_value()                       // is_volatile.
-    };
-    builder->CreateCall(memcpy_fn, args);
-}
-
-Value* LlvmCodeGen::codegen_array_at(
-        LlvmBuilder* builder, Value* array, int idx, const char* name) {
-    DCHECK(array->getType()->isPointerTy() || array->getType()->isArrayTy())
-        << print(array->getType());
-    Value* ptr = builder->CreateConstGEP1_32(array, idx);
-    return builder->CreateLoad(ptr, name);
-}
-
-void LlvmCodeGen::codegen_assign(LlvmBuilder* builder,
-                                llvm::Value* dst, llvm::Value* src, PrimitiveType type) {
-    switch (type) {
-    case TYPE_CHAR:
-    case TYPE_VARCHAR: 
-    case TYPE_HLL:  {
-        codegen_memcpy(builder, dst, src, sizeof(StringValue));
-        break;
-    }
-
-    case TYPE_DATETIME:
-        DCHECK(false) << "Timestamp NYI"; // TODO
-        break;
-
-    default:
-        builder->CreateStore(src, dst);
-        break;
-    }
-}
-
-void LlvmCodeGen::clear_hash_fns() {
-    _hash_fns.clear();
-}
-
-// Codegen to compute hash for a particular byte size.  Loops are unrolled in this
-// process.  For the case where num_bytes == 11, we'd do this by calling
-//   1. crc64 (for first 8 bytes)
-//   2. crc16 (for bytes 9, 10)
-//   3. crc8 (for byte 11)
-// The resulting IR looks like:
-// define i32 @CrcHash11(i8* %data, i32 %len, i32 %seed) {
-// entry:
-//   %0 = zext i32 %seed to i64
-//   %1 = bitcast i8* %data to i64*
-//   %2 = getelementptr i64* %1, i32 0
-//   %3 = load i64* %2
-//   %4 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %0, i64 %3)
-//   %5 = trunc i64 %4 to i32
-//   %6 = getelementptr i8* %data, i32 8
-//   %7 = bitcast i8* %6 to i16*
-//   %8 = load i16* %7
-//   %9 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %5, i16 %8)
-//   %10 = getelementptr i8* %6, i32 2
-//   %11 = load i8* %10
-//   %12 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %9, i8 %11)
-//   ret i32 %12
-// }
-llvm::Function* LlvmCodeGen::get_hash_function(int num_bytes) {
-    if (CpuInfo::is_supported(CpuInfo::SSE4_2)) {
-        if (num_bytes == -1) {
-            // -1 indicates variable length, just return the generic loop based
-            // hash fn.
-            return get_function(IRFunction::HASH_CRC);
-            return NULL;
-        }
-
-        std::map<int, llvm::Function*>::iterator cached_fn = _hash_fns.find(num_bytes);
-        if (cached_fn != _hash_fns.end()) {
-            return cached_fn->second;
-        }
-
-        // Generate a function to hash these bytes
-        std::stringstream ss;
-        ss << "CrcHash" << num_bytes;
-        FnPrototype prototype(this, ss.str(), get_type(TYPE_INT));
-        prototype.add_argument(LlvmCodeGen::NamedVariable("data", ptr_type()));
-        prototype.add_argument(LlvmCodeGen::NamedVariable("len", get_type(TYPE_INT)));
-        prototype.add_argument(LlvmCodeGen::NamedVariable("seed", get_type(TYPE_INT)));
-
-        llvm::Value* args[3];
-        LlvmBuilder builder(context());
-        llvm::Function* fn = prototype.generate_prototype(&builder, &args[0]);
-        llvm::Value* data = args[0];
-        llvm::Value* result = args[2];
-
-        llvm::Function* crc8_fn = _llvm_intrinsics[llvm::Intrinsic::x86_sse42_crc32_32_8];
-        llvm::Function* crc16_fn = _llvm_intrinsics[llvm::Intrinsic::x86_sse42_crc32_32_16];
-        llvm::Function* crc32_fn = _llvm_intrinsics[llvm::Intrinsic::x86_sse42_crc32_32_32];
-        llvm::Function* crc64_fn = _llvm_intrinsics[llvm::Intrinsic::x86_sse42_crc32_64_64];
-
-        // Generate the crc instructions starting with the highest number of bytes
-        if (num_bytes >= 8) {
-            llvm::Value* result_64 = builder.CreateZExt(result, get_type(TYPE_BIGINT));
-            llvm::Value* ptr = builder.CreateBitCast(data, get_ptr_type(TYPE_BIGINT));
-            int i = 0;
-
-            while (num_bytes >= 8) {
-                llvm::Value* index[] = { get_int_constant(TYPE_INT, i++) };
-                llvm::Value* d = builder.CreateLoad(builder.CreateGEP(ptr, index));
-                result_64 = builder.CreateCall2(crc64_fn, result_64, d);
-                num_bytes -= 8;
-            }
-
-            result = builder.CreateTrunc(result_64, get_type(TYPE_INT));
-            llvm::Value* index[] = { get_int_constant(TYPE_INT, i * 8) };
-            // Update data to past the 8-byte chunks
-            data = builder.CreateGEP(data, index);
-        }
-
-        if (num_bytes >= 4) {
-            DCHECK_LT(num_bytes, 8);
-            llvm::Value* ptr = builder.CreateBitCast(data, get_ptr_type(TYPE_INT));
-            llvm::Value* d = builder.CreateLoad(ptr);
-            result = builder.CreateCall2(crc32_fn, result, d);
-            llvm::Value* index[] = { get_int_constant(TYPE_INT, 4) };
-            data = builder.CreateGEP(data, index);
-            num_bytes -= 4;
-        }
-
-        if (num_bytes >= 2) {
-            DCHECK_LT(num_bytes, 4);
-            llvm::Value* ptr = builder.CreateBitCast(data, get_ptr_type(TYPE_SMALLINT));
-            llvm::Value* d = builder.CreateLoad(ptr);
-            result = builder.CreateCall2(crc16_fn, result, d);
-            llvm::Value* index[] = { get_int_constant(TYPE_INT, 2) };
-            data = builder.CreateGEP(data, index);
-            num_bytes -= 2;
-        }
-
-        if (num_bytes > 0) {
-            DCHECK_EQ(num_bytes, 1);
-            llvm::Value* d = builder.CreateLoad(data);
-            result = builder.CreateCall2(crc8_fn, result, d);
-            --num_bytes;
-        }
-        DCHECK_EQ(num_bytes, 0);
-        Value* shift_16 = get_int_constant(TYPE_INT, 16);
-        Value* upper_bits = builder.CreateShl(result, shift_16);
-        Value* lower_bits = builder.CreateLShr(result, shift_16);
-        result = builder.CreateOr(upper_bits, lower_bits);
-        builder.CreateRet(result);
-
-        fn = finalize_function(fn);
-        if (fn != NULL) {
-            _hash_fns[num_bytes] = fn;
-        }
-        return fn;
-    } else {
-        // Don't bother with optimizations without crc hash instruction
-        return get_function(IRFunction::HASH_FNV);
-        return NULL;
-    }
-}
-
-llvm::Value* LlvmCodeGen::get_ptr_to(LlvmBuilder* builder, llvm::Value* v, const char* name) {
-    llvm::Value* ptr = create_entry_block_alloca(*builder, v->getType(), name);
-    builder->CreateStore(v, ptr);
-    return ptr;
-}
-
-llvm::Instruction::CastOps LlvmCodeGen::get_cast_op(
-        const TypeDescriptor& from_type, const TypeDescriptor& to_type) {
-    switch (from_type.type) {
-    case TYPE_BOOLEAN: {
-        switch (to_type.type) {
-        case TYPE_BOOLEAN:
-            return llvm::Instruction::Trunc;
-        case TYPE_TINYINT:
-        case TYPE_SMALLINT:
-        case TYPE_INT:
-        case TYPE_BIGINT:
-        case TYPE_LARGEINT:
-            return llvm::Instruction::ZExt;
-        case TYPE_FLOAT:
-        case TYPE_DOUBLE:
-            return llvm::Instruction::SIToFP;
-        default:
-            return llvm::Instruction::CastOpsEnd;
-        }
-    }
-    case TYPE_TINYINT: {
-        switch (to_type.type) {
-        case TYPE_BOOLEAN:
-        case TYPE_TINYINT:
-            return llvm::Instruction::Trunc;
-        case TYPE_SMALLINT:
-        case TYPE_INT:
-        case TYPE_BIGINT:
-        case TYPE_LARGEINT:
-            return llvm::Instruction::SExt;
-        case TYPE_FLOAT:
-        case TYPE_DOUBLE:
-            return llvm::Instruction::SIToFP;
-        default:
-            return llvm::Instruction::CastOpsEnd;
-        }
-    }
-    case TYPE_SMALLINT: {
-        switch (to_type.type) {
-        case TYPE_BOOLEAN:
-        case TYPE_TINYINT:
-        case TYPE_SMALLINT:
-            return llvm::Instruction::Trunc;
-        case TYPE_INT:
-        case TYPE_BIGINT:
-        case TYPE_LARGEINT:
-            return llvm::Instruction::SExt;
-        case TYPE_FLOAT:
-        case TYPE_DOUBLE:
-            return llvm::Instruction::SIToFP;
-        default:
-            return llvm::Instruction::CastOpsEnd;
-        }
-    }
-    case TYPE_INT: {
-        switch (to_type.type) {
-        case TYPE_BOOLEAN:
-        case TYPE_TINYINT:
-        case TYPE_SMALLINT:
-        case TYPE_INT:
-            return llvm::Instruction::Trunc;
-        case TYPE_BIGINT:
-        case TYPE_LARGEINT:
-            return llvm::Instruction::SExt;
-        case TYPE_FLOAT:
-        case TYPE_DOUBLE:
-            return llvm::Instruction::SIToFP;
-        default:
-            return llvm::Instruction::CastOpsEnd;
-        }
-    }
-    case TYPE_BIGINT: {
-        switch (to_type.type) {
-        case TYPE_BOOLEAN:
-        case TYPE_TINYINT:
-        case TYPE_SMALLINT:
-        case TYPE_INT:
-        case TYPE_BIGINT:
-            return llvm::Instruction::Trunc;
-        case TYPE_LARGEINT:
-            return llvm::Instruction::SExt;
-        case TYPE_FLOAT:
-        case TYPE_DOUBLE:
-            return llvm::Instruction::SIToFP;
-        default:
-            return llvm::Instruction::CastOpsEnd;
-        }
-    }
-    case TYPE_LARGEINT: {
-        switch (to_type.type) {
-        case TYPE_BOOLEAN:
-        case TYPE_TINYINT:
-        case TYPE_SMALLINT:
-        case TYPE_INT:
-        case TYPE_BIGINT:
-        case TYPE_LARGEINT:
-            return llvm::Instruction::Trunc;
-        case TYPE_FLOAT:
-        case TYPE_DOUBLE:
-            return llvm::Instruction::SIToFP;
-        default:
-            return llvm::Instruction::CastOpsEnd;
-        }
-    }
-    case TYPE_FLOAT: {
-        switch (to_type.type) {
-        case TYPE_BOOLEAN:
-        case TYPE_TINYINT:
-        case TYPE_SMALLINT:
-        case TYPE_INT:
-        case TYPE_BIGINT:
-        case TYPE_LARGEINT:
-            return llvm::Instruction::FPToSI;
-        case TYPE_FLOAT:
-        case TYPE_DOUBLE:
-            return llvm::Instruction::FPExt;
-        default:
-            return llvm::Instruction::CastOpsEnd;
-        }
-    }
-    case TYPE_DOUBLE: {
-        switch (to_type.type) {
-        case TYPE_BOOLEAN:
-        case TYPE_TINYINT:
-        case TYPE_SMALLINT:
-        case TYPE_INT:
-        case TYPE_BIGINT:
-        case TYPE_LARGEINT:
-            return llvm::Instruction::FPToSI;
-        case TYPE_FLOAT:
-        case TYPE_DOUBLE:
-            return llvm::Instruction::FPTrunc;
-        default:
-            return llvm::Instruction::CastOpsEnd;
-        }
-    }
-    default:
-        return llvm::Instruction::CastOpsEnd;
-    }
-    return llvm::Instruction::CastOpsEnd;
-}
-
-}
diff --git a/be/src/codegen/llvm_codegen.h b/be/src/codegen/llvm_codegen.h
deleted file mode 100644
index ba0b8b6ce7..0000000000
--- a/be/src/codegen/llvm_codegen.h
+++ /dev/null
@@ -1,641 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef DORIS_BE_SRC_QUERY_CODEGEN_LLVM_CODEGEN_H
-#define DORIS_BE_SRC_QUERY_CODEGEN_LLVM_CODEGEN_H
-
-#include <map>
-#include <string>
-#include <vector>
-#include <boost/scoped_ptr.hpp>
-#include <boost/thread/mutex.hpp>
-
-#include <llvm/IR/DerivedTypes.h>
-#include <llvm/IR/Intrinsics.h>
-#include <llvm/IR/IRBuilder.h>
-#include <llvm/IR/LLVMContext.h>
-#include <llvm/IR/Module.h>
-#include <llvm/Analysis/Verifier.h>
-#include <llvm/Support/raw_ostream.h>
-#include <llvm/Support/MemoryBuffer.h>
-
-#include "common/status.h"
-#include "runtime/primitive_type.h"
-#include "exprs/expr.h"
-#include "util/runtime_profile.h"
-#include "doris_ir/doris_ir_functions.h"
-
-// Forward declare all llvm classes to avoid namespace pollution.
-namespace llvm {
-class AllocaInst;
-class BasicBlock;
-class ConstantFolder;
-class ExecutionEngine;
-class Function;
-// class FunctionPassManager;
-class LLVMContext;
-class Module;
-class NoFolder;
-// class PassManager;
-class PointerType;
-class StructType;
-class TargetData;
-class Type;
-class Value;
-
-template<bool B, typename T, typename I>
-class IRBuilder;
-
-template<bool preserveName>
-class IRBuilderDefaultInserter;
-}
-
-namespace doris {
-
-class SubExprElimination;
-
-// LLVM code generator.  This is the top level object to generate jitted code.
-//
-// LLVM provides a c++ IR builder interface so IR does not need to be written
-// manually.  The interface is very low level so each line of IR that needs to
-// be output maps 1:1 with calls to the interface.
-// The llvm documentation is not fantastic and a lot of this was figured out
-// by experimenting.  Thankfully, their API is pretty well designed so it's
-// possible to get by without great documentation.  The llvm tutorial is very
-// helpful, http://llvm.org/docs/tutorial/LangImpl1.html.  In this tutorial, they
-// go over how to JIT an AST for a toy language they create.
-// It is also helpful to use their online app that lets you compile c/c++ to IR.
-// http://llvm.org/demo/index.cgi.
-//
-// This class provides two interfaces, one for testing and one for the query
-// engine.  The interface for the query engine will load the cross-compiled
-// IR module (output during the build) and extract all of functions that will
-// be called directly.  The test interface can be used to load any precompiled
-// module or none at all (but this class will not validate the module).
-//
-// This class is mostly not threadsafe.  During the Prepare() phase of the fragment
-// execution, nodes should codegen functions.
-// Afterward, optimize_module() should be called at which point all codegened functions
-// are optimized.
-// Subsequently, nodes can get at the jit compiled function pointer (typically during the
-// Open() call).  Getting the jit compiled function (jit_function()) is the only thread
-// safe function.
-//
-// Currently, each query will create and initialize one of these
-// objects.  This requires loading and parsing the cross compiled modules.
-// TODO: we should be able to do this once per process and let llvm compile
-// functions from across modules.
-//
-// LLVM has a nontrivial memory management scheme and objects will take
-// ownership of others.  The document is pretty good about being explicit with this
-// but it is not very intuitive.
-// TODO: look into diagnostic output and debuggability
-// TODO: confirm that the multi-threaded usage is correct
-class LlvmCodeGen {
-public:
-    // This function must be called once per process before any llvm API calls are
-    // made.  LLVM needs to allocate data structures for multi-threading support and
-    // to enable dynamic linking of jitted code.
-    // if 'load_backend', load the backend static object for llvm.  This is needed
-    // when libbackend.so is loaded from java.  llvm will be default only look in
-    // the current object and not be able to find the backend symbols
-    // TODO: this can probably be removed after Doris refactor where the java
-    // side is not loading the be explicitly anymore.
-    static void initialize_llvm(bool load_backend = false);
-
-    // Loads and parses the precompiled doris IR module
-    // codegen will contain the created object on success.
-    static Status load_doris_ir(
-        ObjectPool*, const std::string& id, boost::scoped_ptr<LlvmCodeGen>* codegen);
-
-    // Removes all jit compiled dynamically linked functions from the process.
-    ~LlvmCodeGen();
-
-    RuntimeProfile* runtime_profile() {
-        return &_profile;
-    }
-    RuntimeProfile::Counter* codegen_timer() {
-        return _codegen_timer;
-    }
-
-    // Turns on/off optimization passes
-    void enable_optimizations(bool enable);
-
-    // For debugging. Returns the IR that was generated.  If full_module, the
-    // entire module is dumped, including what was loaded from precompiled IR.
-    // If false, only output IR for functions which were generated.
-    std::string get_ir(bool full_module) const;
-
-    // Typedef builder in case we want to change the template arguments later
-    typedef llvm::IRBuilder<> LlvmBuilder;
-
-    // Utility struct that wraps a variable name and llvm type.
-    struct NamedVariable {
-        std::string name;
-        llvm::Type* type;
-
-        NamedVariable(const std::string& name = "", llvm::Type* type = NULL) {
-            this->name = name;
-            this->type = type;
-        }
-    };
-
-    // Abstraction over function prototypes.  Contains helpers to build prototypes and
-    // generate IR for the types.
-    class FnPrototype {
-    public:
-        // Create a function prototype object, specifying the name of the function and
-        // the return type.
-        FnPrototype(LlvmCodeGen*, const std::string& name, llvm::Type* ret_type);
-
-        // Returns name of function
-        const std::string& name() const {
-            return _name;
-        }
-
-        // Add argument
-        void add_argument(const NamedVariable& var) {
-            _args.push_back(var);
-        }
-
-        void add_argument(const std::string& name, llvm::Type* type) {
-            _args.push_back(NamedVariable(name, type));
-        }
-
-        // Generate LLVM function prototype.
-        // If a non-null builder is passed, this function will also create the entry block
-        // and set the builder's insert point to there.
-        // If params is non-null, this function will also return the arguments
-        // values (params[0] is the first arg, etc).
-        // In that case, params should be preallocated to be number of arguments
-        llvm::Function* generate_prototype(LlvmBuilder* builder = NULL,
-                                          llvm::Value** params = NULL);
-
-    private:
-        friend class LlvmCodeGen;
-
-        LlvmCodeGen* _codegen;
-        std::string _name;
-        llvm::Type* _ret_type;
-        std::vector<NamedVariable> _args;
-    };
-
-    /// Codegens IR to load array[idx] and returns the loaded value. 'array' should be a
-    /// C-style array (e.g. i32*) or an IR array (e.g. [10 x i32]). This function does not
-    /// do bounds checking.
-    llvm::Value* codegen_array_at(
-        LlvmBuilder*, llvm::Value* array, int idx, const char* name);
-
-    /// Return a pointer type to 'type'
-    llvm::PointerType* get_ptr_type(llvm::Type* type);
-
-    // Returns llvm type for the primitive type
-    llvm::Type* get_type(const PrimitiveType& type);
-
-    // Returns llvm type for the primitive type
-    llvm::Type* get_type(const TypeDescriptor& type);
-
-    // Return a pointer type to 'type' (e.g. int16_t*)
-    llvm::PointerType* get_ptr_type(const TypeDescriptor& type);
-    llvm::PointerType* get_ptr_type(const PrimitiveType& type);
-
-    // Returns the type with 'name'.  This is used to pull types from clang
-    // compiled IR.  The types we generate at runtime are unnamed.
-    // The name is generated by the clang compiler in this form:
-    // <class/struct>.<namespace>::<class name>.  For example:
-    // "class.doris::AggregationNode"
-    llvm::Type* get_type(const std::string& name);
-
-    /// Returns the pointer type of the type returned by GetType(name)
-    llvm::PointerType* get_ptr_type(const std::string& name);
-
-    /// Alloca's an instance of the appropriate pointer type and sets it to point at 'v'
-    llvm::Value* get_ptr_to(LlvmBuilder* builder, llvm::Value* v, const char* name);
-
-    /// Alloca's an instance of the appropriate pointer type and sets it to point at 'v'
-    llvm::Value* get_ptr_to(LlvmBuilder* builder, llvm::Value* v) {
-        return get_ptr_to(builder, v, "");
-    }
-
-    // Returns reference to llvm context object.  Each LlvmCodeGen has its own
-    // context to allow multiple threads to be calling into llvm at the same time.
-    llvm::LLVMContext& context() {
-        return *_context.get();
-    }
-
-    // Returns execution engine interface
-    llvm::ExecutionEngine* execution_engine() {
-        return _execution_engine.get();
-    }
-
-    // Returns the underlying llvm module
-    llvm::Module* module() {
-        return _module;
-    }
-
-    // Register a expr function with unique id.  It can be subsequently retrieved via
-    // get_registered_expr_fn with that id.
-    void register_expr_fn(int64_t id, llvm::Function* function) {
-        DCHECK(_registered_exprs_map.find(id) == _registered_exprs_map.end());
-        _registered_exprs_map[id] = function;
-        _registered_exprs.insert(function);
-    }
-
-    // Returns a registered expr function for id or NULL if it does not exist.
-    llvm::Function* get_registered_expr_fn(int64_t id) {
-        std::map<int64_t, llvm::Function*>::iterator it = _registered_exprs_map.find(id);
-
-        if (it == _registered_exprs_map.end()) {
-            return NULL;
-        }
-
-        return it->second;
-    }
-
-    /// Optimize and compile the module. This should be called after all functions to JIT
-    /// have been added to the module via AddFunctionToJit(). If optimizations_enabled_ is
-    /// false, the module will not be optimized before compilation.
-    Status finalize_module();
-
-    // Optimize the entire module.  LLVM is more built for running its optimization
-    // passes over the entire module (all the functions) rather than individual
-    // functions.
-    void optimize_module();
-
-    // Replaces all instructions that call 'target_name' with a call instruction
-    // to the new_fn.  Returns the modified function.
-    // - target_name is the unmangled function name that should be replaced.
-    //   The name is assumed to be unmangled so all call sites that contain the
-    //   replace_name substring will be replaced. target_name is case-sensitive
-    //   TODO: be more strict than substring? work out the mangling rules?
-    // - If update_in_place is true, the caller function will be modified in place.
-    //   Otherwise, the caller function will be cloned and the original function
-    //   is unmodified.  If update_in_place is false and the function is already
-    //   been dynamically linked, the existing function will be unlinked. Note that
-    //   this is very unthread-safe, if there are threads in the function to be unlinked,
-    //   bad things will happen.
-    // - 'num_replaced' returns the number of call sites updated
-    //
-    // Most of our use cases will likely not be in place.  We will have one 'template'
-    // version of the function loaded for each type of Node (e.g. AggregationNode).
-    // Each instance of the node will clone the function, replacing the inner loop
-    // body with the codegened version.  The codegened bodies differ from instance
-    // to instance since they are specific to the node's tuple desc.
-    llvm::Function* replace_call_sites(llvm::Function* caller, bool update_in_place,
-                                     llvm::Function* new_fn, const std::string& target_name, int* num_replaced);
-
-    /// Returns a copy of fn. The copy is added to the module.
-    llvm::Function* clone_function(llvm::Function* fn);
-
-    // Verify and optimize function.  This should be called at the end for each
-    // codegen'd function.  If the function does not verify, it will return NULL,
-    // otherwise, it will optimize, mark the function for inlining and return the
-    // function object.
-    llvm::Function* finalize_function(llvm::Function* function);
-
-    // Inline all function calls for 'fn'.  'fn' is modified in place.  Returns
-    // the number of functions inlined.  This is *not* called recursively
-    // (i.e. second level function calls are not inlined).  This can be called
-    // again to inline those until this returns 0.
-    int inline_call_sites(llvm::Function* fn, bool skip_registered_fns);
-
-    // Optimizes the function in place.  This uses a combination of llvm optimization
-    // passes as well as some custom heuristics.  This should be called for all
-    // functions which call Exprs.  The exprs will be inlined as much as possible,
-    // and will do basic sub expression elimination.
-    // This should be called before optimize_module for functions that want to remove
-    // redundant exprs.  This should be called at the highest level possible to
-    // maximize the number of redundant exprs that can be found.
-    // TODO: we need to spend more time to output better IR.  Asking llvm to
-    // remove redundant codeblocks on its own is too difficult for it.
-    // TODO: this should implement the llvm FunctionPass interface and integrated
-    // with the llvm optimization passes.
-    llvm::Function* optimize_function_with_exprs(llvm::Function* fn);
-
-    /// Adds the function to be automatically jit compiled after the module is optimized.
-    /// That is, after FinalizeModule(), this will do *result_fn_ptr = JitFunction(fn);
-    //
-    /// This is useful since it is not valid to call JitFunction() before every part of the
-    /// query has finished adding their IR and it's convenient to not have to rewalk the
-    /// objects. This provides the same behavior as walking each of those objects and calling
-    /// JitFunction().
-    //
-    /// In addition, any functions not registered with AddFunctionToJit() are marked as
-    /// internal in FinalizeModule() and may be removed as part of optimization.
-    //
-    /// This will also wrap functions returning DecimalVals in an ABI-compliant wrapper (see
-    /// the comment in the .cc file for details). This is so we don't accidentally try to
-    /// call non-compliant code from native code.
-    void add_function_to_jit(llvm::Function* fn, void** fn_ptr);
-
-    // Jit compile the function.  This will run optimization passes and verify
-    // the function.  The result is a function pointer that is dynamically linked
-    // into the process.
-    // Returns NULL if the function is invalid.
-    // scratch_size will be set to the buffer size required to call the function
-    // scratch_size is the total size from all LlvmCodeGen::get_scratch_buffer
-    // calls (with some additional bytes for alignment)
-    // This function is thread safe.
-    void* jit_function(llvm::Function* function, int* scratch_size = NULL);
-
-    // Verfies the function if the verfier is enabled.  Returns false if function
-    // is invalid.
-    bool verify_function(llvm::Function* function);
-
-    // This will generate a printf call instruction to output 'message' at the
-    // builder's insert point.  Only for debugging.
-    void codegen_debug_trace(LlvmBuilder* builder, const char* message);
-
-    /// Returns the string representation of a llvm::Value* or llvm::Type*
-    template <typename T> 
-    static std::string print(T* value_or_type) {
-        std::string str;
-        llvm::raw_string_ostream stream(str);
-        value_or_type->print(stream);
-        return str;
-    }
-
-    // Returns the libc function, adding it to the module if it has not already been.
-    llvm::Function* get_lib_c_function(FnPrototype* prototype);
-
-    // Returns the cross compiled function. IRFunction::Type is an enum which is
-    // defined in 'doris-ir/doris-ir-functions.h'
-    llvm::Function* get_function(IRFunction::Type);
-
-    // Returns the hash function with signature:
-    //   int32_t Hash(int8_t* data, int len, int32_t seed);
-    // If num_bytes is non-zero, the returned function will be codegen'd to only
-    // work for that number of bytes.  It is invalid to call that function with a
-    // different 'len'.
-    llvm::Function* get_hash_function(int num_bytes = -1);
-
-    // Allocate stack storage for local variables.  This is similar to traditional c, where
-    // all the variables must be declared at the top of the function.  This helper can be
-    // called from anywhere and will add a stack allocation for 'var' at the beginning of
-    // the function.  This would be used, for example, if a function needed a temporary
-    // struct allocated.  The allocated variable is scoped to the function.
-    // This is not related to get_scratch_buffer which is used for structs that are returned
-    // to the caller.
-    llvm::AllocaInst* create_entry_block_alloca(llvm::Function* f, const NamedVariable& var);
-    llvm::AllocaInst* create_entry_block_alloca(
-        const LlvmBuilder& builder, llvm::Type* type, const char* name);
-
-    // Utility to create two blocks in 'fn' for if/else codegen.  if_block and else_block
-    // are return parameters.  insert_before is optional and if set, the two blocks
-    // will be inserted before that block otherwise, it will be inserted at the end
-    // of 'fn'.  Being able to place blocks is useful for debugging so the IR has a
-    // better looking control flow.
-    void create_if_else_blocks(llvm::Function* fn, const std::string& if_name,
-                            const std::string& else_name,
-                            llvm::BasicBlock** if_block, llvm::BasicBlock** else_block,
-                            llvm::BasicBlock* insert_before = NULL);
-
-    // Returns offset into scratch buffer: offset points to area of size 'byte_size'
-    // Called by expr generation to request scratch buffer.  This is used for struct
-    // types (i.e. StringValue) where data cannot be returned by registers.
-    // For example, to jit the expr "strlen(str_col)", we need a temporary StringValue
-    // struct from the inner SlotRef expr node.  The SlotRef node would call
-    // get_scratch_buffer(sizeof(StringValue)) and output the intermediate struct at
-    // scratch_buffer (passed in as argument to compute function) + offset.
-    int get_scratch_buffer(int byte_size);
-
-    // Create a llvm pointer value from 'ptr'.  This is used to pass pointers between
-    // c-code and code-generated IR.  The resulting value will be of 'type'.
-    llvm::Value* cast_ptr_to_llvm_ptr(llvm::Type* type, void* ptr);
-
-    // Returns the constant 'val' of 'type'
-    llvm::Value* get_int_constant(PrimitiveType type, int64_t val);
-
-    // Returns true/false constants (bool type)
-    llvm::Value* true_value() {
-        return _true_value;
-    }
-    llvm::Value* false_value() {
-        return _false_value;
-    }
-    llvm::Value* null_ptr_value() {
-        return llvm::ConstantPointerNull::get(ptr_type());
-    }
-
-    // Simple wrappers to reduce code verbosity
-    llvm::Type* boolean_type() {
-        return get_type(TYPE_BOOLEAN);
-    }
-    llvm::Type* tinyint_type() {
-        return get_type(TYPE_TINYINT);
-    }
-    llvm::Type* smallint_type() {
-        return get_type(TYPE_SMALLINT);
-    }
-    llvm::Type* int_type() {
-        return get_type(TYPE_INT);
-    }
-    llvm::Type* bigint_type() {
-        return get_type(TYPE_BIGINT);
-    }
-    llvm::Type* largeint_type() {
-        return get_type(TYPE_LARGEINT);
-    }
-    llvm::Type* float_type() {
-        return get_type(TYPE_FLOAT);
-    }
-    llvm::Type* double_type() {
-        return get_type(TYPE_DOUBLE);
-    }
-    llvm::Type* string_val_type() const {
-        return _string_val_type;
-    }
-    llvm::Type* datetime_val_type() const {
-        return _datetime_val_type;
-    }
-    llvm::Type* decimal_val_type() const {
-        return _decimal_val_type;
-    }
-    llvm::PointerType* ptr_type() {
-        return _ptr_type;
-    }
-    llvm::Type* void_type() {
-        return _void_type;
-    }
-
-    llvm::Type* i128_type() { 
-        return llvm::Type::getIntNTy(context(), 128); 
-    }
-
-    // Fills 'functions' with all the functions that are defined in the module.
-    // Note: this does not include functions that are just declared
-    void get_functions(std::vector<llvm::Function*>* functions);
-
-    // Generates function to return min/max(v1, v2)
-    llvm::Function* codegen_min_max(const TypeDescriptor& type, bool min);
-
-    // Codegen to call llvm memcpy intrinsic at the current builder location
-    // dst & src must be pointer types.  size is the number of bytes to copy.
-    void codegen_memcpy(LlvmBuilder*, llvm::Value* dst, llvm::Value* src, int size);
-
-    // Codegen for do *dst = src.  For native types, this is just a store, for structs
-    // we need to assign the fields one by one
-    void codegen_assign(LlvmBuilder*, llvm::Value* dst, llvm::Value* src, PrimitiveType);
-
-    llvm::Instruction::CastOps get_cast_op(
-            const TypeDescriptor& from_type, const TypeDescriptor& to_type);
-
-private:
-    friend class LlvmCodeGenTest;
-    friend class SubExprElimination;
-
-    // Top level codegen object.  'module_name' is only used for debugging when
-    // outputting the IR.  module's loaded from disk will be named as the file
-    // path.
-    LlvmCodeGen(ObjectPool* pool, const std::string& module_name);
-
-    // Initializes the jitter and execution engine.
-    Status init();
-
-    // Load a pre-compiled IR module from 'file'.  This creates a top level
-    // codegen object.  This is used by tests to load custom modules.
-    // codegen will contain the created object on success.
-    static Status load_from_file(ObjectPool*, const std::string& file,
-                               boost::scoped_ptr<LlvmCodeGen>* codegen);
-
-    /// Load a pre-compiled IR module from module_ir.  This creates a top level codegen
-    /// object.  codegen will contain the created object on success.
-    static Status load_from_memory(ObjectPool* pool, llvm::MemoryBuffer* module_ir,
-                                   const std::string& module_name, const std::string& id, 
-                                   boost::scoped_ptr<LlvmCodeGen>* codegen);
-
-    /// Loads an LLVM module. 'module_ir' should be a reference to a memory buffer containing
-    /// LLVM bitcode. module_name is the name of the module to use when reporting errors.
-    /// The caller is responsible for cleaning up module.
-    static Status load_module_from_memory(LlvmCodeGen* codegen, llvm::MemoryBuffer* module_ir,
-                                          const std::string& module_name, llvm::Module** module);
-
-    // Load the intrinsics doris needs.  This is a one time initialization.
-    // Values are stored in '_llvm_intrinsics'
-    Status load_intrinsics();
-
-    // Clears generated hash fns.  This is only used for testing.
-    void clear_hash_fns();
-
-    // Name of the JIT module.  Useful for debugging.
-    std::string _name;
-
-    // Codegen counters
-    RuntimeProfile _profile;
-    RuntimeProfile::Counter* _load_module_timer;
-    RuntimeProfile::Counter* _prepare_module_timer;
-    RuntimeProfile::Counter* _module_file_size;
-    RuntimeProfile::Counter* _codegen_timer;
-    RuntimeProfile::Counter* _optimization_timer;
-    RuntimeProfile::Counter* _compile_timer;
-
-    // whether or not optimizations are enabled
-    bool _optimizations_enabled;
-
-    // If true, the module is corrupt and we cannot codegen this query.
-    // TODO: we could consider just removing the offending function and attempting to
-    // codegen the rest of the query.  This requires more testing though to make sure
-    // that the error is recoverable.
-    bool _is_corrupt;
-
-    // If true, the module has been compiled.  It is not valid to add additional
-    // functions after this point.
-    bool _is_compiled;
-
-    // Error string that llvm will write to
-    std::string _error_string;
-
-    // Top level llvm object.  Objects from different contexts do not share anything.
-    // We can have multiple instances of the LlvmCodeGen object in different threads
-    boost::scoped_ptr<llvm::LLVMContext> _context;
-
-    // Top level codegen object.  Contains everything to jit one 'unit' of code.
-    // Owned by the _execution_engine.
-    llvm::Module* _module;
-
-    // Execution/Jitting engine.
-    boost::scoped_ptr<llvm::ExecutionEngine> _execution_engine;
-
-    // current offset into scratch buffer
-    int _scratch_buffer_offset;
-
-    // Keeps track of all the functions that have been jit compiled and linked into
-    // the process. Special care needs to be taken if we need to modify these functions.
-    // bool is unused.
-    std::map<llvm::Function*, bool> _jitted_functions;
-
-    // Lock protecting _jitted_functions
-    boost::mutex _jitted_functions_lock;
-
-    // Keeps track of the external functions that have been included in this module
-    // e.g libc functions or non-jitted doris functions.
-    // TODO: this should probably be FnPrototype->Functions mapping
-    std::map<std::string, llvm::Function*> _external_functions;
-
-    // Functions parsed from pre-compiled module.  Indexed by DorisIR::Function enum
-    std::vector<llvm::Function*> _loaded_functions;
-
-    // Stores functions codegen'd by doris.  This does not contain cross compiled
-    // functions, only function that were generated at runtime.  Does not overlap
-    // with _loaded_functions.
-    std::vector<llvm::Function*> _codegend_functions;
-
-    // A mapping of unique id to registered expr functions
-    std::map<int64_t, llvm::Function*> _registered_exprs_map;
-
-    // A set of all the functions in '_registered_exprs_map' for quick lookup.
-    std::set<llvm::Function*> _registered_exprs;
-
-    // A cache of loaded llvm intrinsics
-    std::map<llvm::Intrinsic::ID, llvm::Function*> _llvm_intrinsics;
-
-    // This is a cache of generated hash functions by byte size.  It is common
-    // for the caller to know the number of bytes to hash (e.g. tuple width) and
-    // we can codegen a loop unrolled hash function.
-    std::map<int, llvm::Function*> _hash_fns;
-
-    /// The locations of modules that have been linked. Used to avoid linking the same module
-    /// twice, which causes symbol collision errors.
-    std::set<std::string> _linked_modules;
-
-    /// The vector of functions to automatically JIT compile after FinalizeModule().
-    std::vector<std::pair<llvm::Function*, void**> > _fns_to_jit_compile;
-
-    // Debug utility that will insert a printf-like function into the generated
-    // IR.  Useful for debugging the IR.  This is lazily created.
-    llvm::Function* _debug_trace_fn;
-
-    // Debug strings that will be outputted by jitted code.  This is a copy of all
-    // strings passed to codegen_debug_trace.
-    std::vector<std::string> _debug_strings;
-
-    // llvm representation of a few common types.  Owned by context.
-    llvm::PointerType* _ptr_type;     // int8_t*
-    llvm::Type* _void_type;           // void
-    llvm::Type* _string_val_type;     // StringVal
-    llvm::Type* _decimal_val_type;    // StringVal
-    llvm::Type* _datetime_val_type;   // DateTimeValue
-
-    // llvm constants to help with code gen verbosity
-    llvm::Value* _true_value;
-    llvm::Value* _false_value;
-};
-
-}
-
-#endif
-
diff --git a/be/src/codegen/llvm_codegen_test.cpp b/be/src/codegen/llvm_codegen_test.cpp
deleted file mode 100644
index 540f14fb6f..0000000000
--- a/be/src/codegen/llvm_codegen_test.cpp
+++ /dev/null
@@ -1,455 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <string>
-#include <gtest/gtest.h>
-#include <boost/thread/thread.hpp>
-
-#include "codegen/llvm-codegen.h"
-#include "runtime/raw-value.h"
-#include "util/cpu-info.h"
-#include "util/disk-info.h"
-#include "util/hash-util.h"
-#include "util/mem-info.h"
-#include "util/path-builder.h"
-
-using namespace std;
-using namespace boost;
-using namespace llvm;
-
-namespace doris {
-
-class LlvmCodeGenTest : public testing::Test {
-private:
-    static void LifetimeTest() {
-        ObjectPool pool;
-        Status status;
-
-        for (int i = 0; i < 10; ++i) {
-            LlvmCodeGen object1(&pool, "Test");
-            LlvmCodeGen object2(&pool, "Test");
-            LlvmCodeGen object3(&pool, "Test");
-
-            status = object1.Init();
-            ASSERT_TRUE(status.ok());
-
-            status = object2.Init();
-            ASSERT_TRUE(status.ok());
-
-            status = object3.Init();
-            ASSERT_TRUE(status.ok());
-        }
-    }
-
-    // Wrapper to call private test-only methods on LlvmCodeGen object
-    static Status load_from_file(ObjectPool* pool, const string& filename,
-                               scoped_ptr<LlvmCodeGen>* codegen) {
-        return LlvmCodeGen::load_from_file(pool, filename, codegen);
-    }
-
-    static LlvmCodeGen* CreateCodegen(ObjectPool* pool) {
-        LlvmCodeGen* codegen = pool->Add(new LlvmCodeGen(pool, "Test"));
-
-        if (codegen != NULL) {
-            Status status = codegen->Init();
-
-            if (!status.ok()) {
-                return NULL;
-            }
-        }
-
-        return codegen;
-    }
-
-    static void clear_hash_fns(LlvmCodeGen* codegen) {
-        codegen->clear_hash_fns();
-    }
-};
-
-// Simple test to just make and destroy llvmcodegen objects.  LLVM
-// has non-obvious object ownership transfers and this sanity checks that.
-TEST_F(LlvmCodeGenTest, BasicLifetime) {
-    LifetimeTest();
-}
-
-// Same as above but multithreaded
-TEST_F(LlvmCodeGenTest, MultithreadedLifetime) {
-    const int NUM_THREADS = 10;
-    thread_group thread_group;
-
-    for (int i = 0; i < NUM_THREADS; ++i) {
-        thread_group.add_thread(new thread(&LifetimeTest));
-    }
-
-    thread_group.join_all();
-}
-
-// Test loading a non-existent file
-TEST_F(LlvmCodeGenTest, BadIRFile) {
-    ObjectPool pool;
-    string module_file = "NonExistentFile.ir";
-    scoped_ptr<LlvmCodeGen> codegen;
-    Status status = LlvmCodeGenTest::load_from_file(&pool, module_file.c_str(), &codegen);
-    EXPECT_TRUE(!status.ok());
-}
-
-// IR for the generated linner loop
-// define void @JittedInnerLoop() {
-// entry:
-//   call void @DebugTrace(i8* inttoptr (i64 18970856 to i8*))
-//   %0 = load i64* inttoptr (i64 140735197627800 to i64*)
-//   %1 = add i64 %0, <delta>
-//   store i64 %1, i64* inttoptr (i64 140735197627800 to i64*)
-//   ret void
-// }
-// The random int in there is the address of jitted_counter
-Function* CodegenInnerLoop(LlvmCodeGen* codegen, int64_t* jitted_counter, int delta) {
-    LLVMContext& context = codegen->context();
-    LlvmCodeGen::LlvmBuilder builder(context);
-
-    LlvmCodeGen::FnPrototype fn_prototype(codegen, "JittedInnerLoop", codegen->void_type());
-    Function* jitted_loop_call = fn_prototype.generate_prototype();
-    BasicBlock* entry_block = BasicBlock::Create(context, "entry", jitted_loop_call);
-    builder.SetInsertPoint(entry_block);
-    codegen->codegen_debug_trace(&builder, "Jitted");
-
-    // Store &jitted_counter as a constant.
-    Value* const_delta = ConstantInt::get(context, APInt(64, delta));
-    Value* counter_ptr = codegen->cast_ptr_to_llvm_ptr(codegen->get_ptr_type(TYPE_BIGINT),
-                         jitted_counter);
-    Value* loaded_counter = builder.CreateLoad(counter_ptr);
-    Value* incremented_value = builder.CreateAdd(loaded_counter, const_delta);
-    builder.CreateStore(incremented_value, counter_ptr);
-    builder.CreateRetVoid();
-
-    return jitted_loop_call;
-}
-
-// This test loads a precompiled IR file (compiled from testdata/llvm/test-loop.cc).
-// The test contains two functions, an outer loop function and an inner loop function.
-// The outer loop calls the inner loop function.
-// The test will
-//   1. create a LlvmCodegen object from the precompiled file
-//   2. add another function to the module with the same signature as the inner
-//      loop function.
-//   3. Replace the call instruction in the outer loop to a call to the new inner loop
-//      function.
-//   4. Run the loop and make sure the inner loop is called.
-//   5. Updated the jitted loop in place with another jitted inner loop function
-//   6. Run the loop and make sure the updated is called.
-TEST_F(LlvmCodeGenTest, ReplaceFnCall) {
-    ObjectPool pool;
-    const char* loop_call_name = "DefaultImplementation";
-    const char* loop_name = "TestLoop";
-    typedef void (*TestLoopFn)(int);
-
-    string module_file;
-    PathBuilder::GetFullPath("llvm-ir/test-loop.ir", &module_file);
-
-    // Part 1: Load the module and make sure everything is loaded correctly.
-    scoped_ptr<LlvmCodeGen> codegen;
-    Status status = LlvmCodeGenTest::load_from_file(&pool, module_file.c_str(), &codegen);
-    EXPECT_TRUE(codegen.get() != NULL);
-    EXPECT_TRUE(status.ok());
-
-    vector<Function*> functions;
-    codegen->get_functions(&functions);
-    EXPECT_EQ(functions.size(), 2);
-
-    Function* loop_call = functions[0];
-    Function* loop = functions[1];
-
-    EXPECT_TRUE(loop_call->getName().find(loop_call_name) != string::npos);
-    EXPECT_TRUE(loop_call->arg_empty());
-    EXPECT_TRUE(loop->getName().find(loop_name) != string::npos);
-    EXPECT_EQ(loop->arg_size(), 1);
-
-    int scratch_size;
-    void* original_loop = codegen->jit_function(loop, &scratch_size);
-    EXPECT_EQ(scratch_size, 0);
-    EXPECT_TRUE(original_loop != NULL);
-
-    TestLoopFn original_loop_fn = reinterpret_cast<TestLoopFn>(original_loop);
-    original_loop_fn(5);
-
-    // Part 2: Generate a new inner loop function.
-    //
-    // The c++ version of the code is
-    // static int64_t* counter;
-    // void JittedInnerLoop() {
-    //   printf("LLVM Trace: Jitted\n");
-    //   ++*counter;
-    // }
-    //
-    int64_t jitted_counter = 0;
-    Function* jitted_loop_call = CodegenInnerLoop(codegen.get(), &jitted_counter, 1);
-
-    // Part 3: Replace the call instruction to the normal function with a call to the
-    // jitted one
-    int num_replaced;
-    Function* jitted_loop = codegen->replace_call_sites(
-                                loop, false, jitted_loop_call, loop_call_name, &num_replaced);
-    EXPECT_EQ(num_replaced, 1);
-    EXPECT_TRUE(codegen->verify_function(jitted_loop));
-
-    // Part4: Call the new loop and verify results
-    void* new_loop = codegen->jit_function(jitted_loop, &scratch_size);
-    EXPECT_EQ(scratch_size, 0);
-    EXPECT_TRUE(new_loop != NULL);
-
-    TestLoopFn new_loop_fn = reinterpret_cast<TestLoopFn>(new_loop);
-    EXPECT_EQ(jitted_counter, 0);
-    new_loop_fn(5);
-    EXPECT_EQ(jitted_counter, 5);
-    new_loop_fn(5);
-    EXPECT_EQ(jitted_counter, 10);
-
-    // Part5: Generate a new inner loop function and a new loop function in place
-    Function* jitted_loop_call2 = CodegenInnerLoop(codegen.get(), &jitted_counter, -2);
-    Function* jitted_loop2 = codegen->replace_call_sites(loop, true, jitted_loop_call2,
-                             loop_call_name, &num_replaced);
-    EXPECT_EQ(num_replaced, 1);
-    EXPECT_TRUE(codegen->verify_function(jitted_loop2));
-
-    // Part6: Call new loop
-    void* new_loop2 = codegen->jit_function(jitted_loop2, &scratch_size);
-    EXPECT_EQ(scratch_size, 0);
-    EXPECT_TRUE(new_loop2 != NULL);
-
-    TestLoopFn new_loop_fn2 = reinterpret_cast<TestLoopFn>(new_loop2);
-    new_loop_fn2(5);
-    EXPECT_EQ(jitted_counter, 0);
-}
-
-// Test function for c++/ir interop for strings.  Function will do:
-// int StringTest(StringValue* strval) {
-//   strval->ptr[0] = 'A';
-//   int len = strval->len;
-//   strval->len = 1;
-//   return len;
-// }
-// Corresponding IR is:
-// define i32 @StringTest(%StringValue* %str) {
-// entry:
-//   %str_ptr = getelementptr inbounds %StringValue* %str, i32 0, i32 0
-//   %ptr = load i8** %str_ptr
-//   %first_char_ptr = getelementptr i8* %ptr, i32 0
-//   store i8 65, i8* %first_char_ptr
-//   %len_ptr = getelementptr inbounds %StringValue* %str, i32 0, i32 1
-//   %len = load i32* %len_ptr
-//   store i32 1, i32* %len_ptr
-//   ret i32 %len
-// }
-Function* CodegenStringTest(LlvmCodeGen* codegen) {
-    PointerType* string_val_ptr_type = codegen->get_ptr_type(TYPE_VARCHAR);
-    EXPECT_TRUE(string_val_ptr_type != NULL);
-
-    LlvmCodeGen::FnPrototype prototype(codegen, "StringTest", codegen->get_type(TYPE_INT));
-    prototype.add_argument(LlvmCodeGen::NamedVariable("str", string_val_ptr_type));
-    LlvmCodeGen::LlvmBuilder builder(codegen->context());
-
-    Value* str = NULL;
-    Function* interop_fn = prototype.generate_prototype(&builder, &str);
-
-    // strval->ptr[0] = 'A'
-    Value* str_ptr = builder.CreateStructGEP(str, 0, "str_ptr");
-    Value* ptr = builder.CreateLoad(str_ptr, "ptr");
-    Value* first_char_offset[] = { codegen->get_int_constant(TYPE_INT, 0) };
-    Value* first_char_ptr = builder.CreateGEP(ptr, first_char_offset, "first_char_ptr");
-    builder.CreateStore(codegen->get_int_constant(TYPE_TINYINT, 'A'), first_char_ptr);
-
-    // Update and return old len
-    Value* len_ptr = builder.CreateStructGEP(str, 1, "len_ptr");
-    Value* len = builder.CreateLoad(len_ptr, "len");
-    builder.CreateStore(codegen->get_int_constant(TYPE_INT, 1), len_ptr);
-    builder.CreateRet(len);
-
-    return interop_fn;
-}
-
-// This test validates that the llvm StringValue struct matches the c++ stringvalue
-// struct.  Just create a simple StringValue struct and make sure the IR can read it
-// and modify it.
-TEST_F(LlvmCodeGenTest, StringValue) {
-    ObjectPool pool;
-
-    scoped_ptr<LlvmCodeGen> codegen;
-    Status status = LlvmCodeGen::load_doris_ir(&pool, &codegen);
-    EXPECT_TRUE(status.ok());
-    EXPECT_TRUE(codegen.get() != NULL);
-
-    string str("Test");
-
-    StringValue str_val;
-    // Call memset to make sure padding bits are zero.
-    memset(&str_val, 0, sizeof(str_val));
-    str_val.ptr = const_cast<char*>(str.c_str());
-    str_val.len = str.length();
-
-    Function* string_test_fn = CodegenStringTest(codegen.get());
-    EXPECT_TRUE(string_test_fn != NULL);
-    EXPECT_TRUE(codegen->verify_function(string_test_fn));
-
-    // Jit compile function
-    void* jitted_fn = codegen->jit_function(string_test_fn);
-    EXPECT_TRUE(jitted_fn != NULL);
-
-    // Call IR function
-    typedef int (*TestStringInteropFn)(StringValue*);
-    TestStringInteropFn fn = reinterpret_cast<TestStringInteropFn>(jitted_fn);
-    int result = fn(&str_val);
-
-    // Validate
-    EXPECT_EQ(str.length(), result);
-    EXPECT_EQ('A', str_val.ptr[0]);
-    EXPECT_EQ(1, str_val.len);
-    EXPECT_EQ(static_cast<void*>(str_val.ptr), static_cast<const void*>(str.c_str()));
-
-    // Validate padding bytes are unchanged
-    int32_t* bytes = reinterpret_cast<int32_t*>(&str_val);
-    EXPECT_EQ(1, bytes[2]);   // str_val.len
-    EXPECT_EQ(0, bytes[3]);   // padding
-}
-
-// Test calling memcpy intrinsic
-TEST_F(LlvmCodeGenTest, MemcpyTest) {
-    ObjectPool pool;
-
-    scoped_ptr<LlvmCodeGen> codegen;
-    Status status = LlvmCodeGen::load_doris_ir(&pool, &codegen);
-    ASSERT_TRUE(status.ok());
-    ASSERT_TRUE(codegen.get() != NULL);
-
-    LlvmCodeGen::FnPrototype prototype(codegen.get(), "MemcpyTest", codegen->void_type());
-    prototype.add_argument(LlvmCodeGen::NamedVariable("dest", codegen->ptr_type()));
-    prototype.add_argument(LlvmCodeGen::NamedVariable("src", codegen->ptr_type()));
-    prototype.add_argument(LlvmCodeGen::NamedVariable("n", codegen->get_type(TYPE_INT)));
-
-    LlvmCodeGen::LlvmBuilder builder(codegen->context());
-
-    char src[] = "abcd";
-    char dst[] = "aaaa";
-
-    Value* args[3];
-    Function* fn = prototype.generate_prototype(&builder, &args[0]);
-    codegen->codegen_memcpy(&builder, args[0], args[1], sizeof(src));
-    builder.CreateRetVoid();
-
-    fn = codegen->finalize_function(fn);
-    ASSERT_TRUE(fn != NULL);
-
-    void* jitted_fn = codegen->jit_function(fn);
-    ASSERT_TRUE(jitted_fn != NULL);
-
-    typedef void (*TestMemcpyFn)(char*, char*, int64_t);
-    TestMemcpyFn test_fn = reinterpret_cast<TestMemcpyFn>(jitted_fn);
-
-    test_fn(dst, src, 4);
-
-    EXPECT_EQ(memcmp(src, dst, 4), 0);
-}
-
-// Test codegen for hash
-TEST_F(LlvmCodeGenTest, HashTest) {
-    ObjectPool pool;
-
-    // Values to compute hash on
-    const char* data1 = "test string";
-    const char* data2 = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
-
-    scoped_ptr<LlvmCodeGen> codegen;
-    Status status = LlvmCodeGen::load_doris_ir(&pool, &codegen);
-    ASSERT_TRUE(status.ok());
-    ASSERT_TRUE(codegen.get() != NULL);
-
-    bool restore_sse_support = false;
-
-    Value* llvm_data1 = codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(),
-                        const_cast<char*>(data1));
-    Value* llvm_data2 = codegen->cast_ptr_to_llvm_ptr(codegen->ptr_type(),
-                        const_cast<char*>(data2));
-    Value* llvm_len1 = codegen->get_int_constant(TYPE_INT, strlen(data1));
-    Value* llvm_len2 = codegen->get_int_constant(TYPE_INT, strlen(data2));
-
-    // Loop to test both the sse4 on/off paths
-    for (int i = 0; i < 2; ++i) {
-        uint32_t expected_hash = 0;
-        expected_hash = HashUtil::Hash(data1, strlen(data1), expected_hash);
-        expected_hash = HashUtil::Hash(data2, strlen(data2), expected_hash);
-        expected_hash = HashUtil::Hash(data1, strlen(data1), expected_hash);
-
-        // Create a codegen'd function that hashes all the types and returns the results.
-        // The tuple/values to hash are baked into the codegen for simplicity.
-        LlvmCodeGen::FnPrototype prototype(codegen.get(), "HashTest",
-                                           codegen->get_type(TYPE_INT));
-        LlvmCodeGen::LlvmBuilder builder(codegen->context());
-
-        // Test both byte-size specific hash functions and the generic loop hash function
-        Function* fn_fixed = prototype.generate_prototype(&builder, NULL);
-        Function* data1_hash_fn = codegen->get_hash_function(strlen(data1));
-        Function* data2_hash_fn = codegen->get_hash_function(strlen(data2));
-        Function* generic_hash_fn = codegen->get_hash_function();
-
-        ASSERT_TRUE(data1_hash_fn != NULL);
-        ASSERT_TRUE(data2_hash_fn != NULL);
-        ASSERT_TRUE(generic_hash_fn != NULL);
-
-        Value* seed = codegen->get_int_constant(TYPE_INT, 0);
-        seed = builder.CreateCall3(data1_hash_fn, llvm_data1, llvm_len1, seed);
-        seed = builder.CreateCall3(data2_hash_fn, llvm_data2, llvm_len2, seed);
-        seed = builder.CreateCall3(generic_hash_fn, llvm_data1, llvm_len1, seed);
-        builder.CreateRet(seed);
-
-        fn_fixed = codegen->finalize_function(fn_fixed);
-        ASSERT_TRUE(fn_fixed != NULL);
-
-        void* jitted_fn = codegen->jit_function(fn_fixed);
-        ASSERT_TRUE(jitted_fn != NULL);
-
-        typedef uint32_t (*TestHashFn)();
-        TestHashFn test_fn = reinterpret_cast<TestHashFn>(jitted_fn);
-
-        uint32_t result = test_fn();
-
-        // Validate that the hashes are identical
-        EXPECT_EQ(result, expected_hash);
-
-        if (i == 0 && CpuInfo::is_supported(CpuInfo::SSE4_2)) {
-            CpuInfo::EnableFeature(CpuInfo::SSE4_2, false);
-            restore_sse_support = true;
-            LlvmCodeGenTest::clear_hash_fns(codegen.get());
-        } else {
-            // System doesn't have sse, no reason to test non-sse path again.
-            break;
-        }
-    }
-
-    // Restore hardware feature for next test
-    CpuInfo::EnableFeature(CpuInfo::SSE4_2, restore_sse_support);
-}
-
-}
-
-int main(int argc, char** argv) {
-    doris::CpuInfo::Init();
-    doris::DiskInfo::Init();
-    doris::MemInfo::Init();
-    ::testing::InitGoogleTest(&argc, argv);
-    doris::LlvmCodeGen::initialize_llvm();
-    return RUN_ALL_TESTS();
-}
-
diff --git a/be/src/codegen/subexpr_elimination.cpp b/be/src/codegen/subexpr_elimination.cpp
deleted file mode 100644
index c8f4c32427..0000000000
--- a/be/src/codegen/subexpr_elimination.cpp
+++ /dev/null
@@ -1,228 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "codegen/subexpr_elimination.h"
-
-#include <fstream>
-#include <iostream>
-#include <sstream>
-
-#include <boost/thread/mutex.hpp>
-#include <llvm/Analysis/Dominators.h>
-#include <llvm/Analysis/Passes.h>
-#include <llvm/Analysis/InstructionSimplify.h>
-#include <llvm/Support/DynamicLibrary.h>
-#include <llvm/IRReader/IRReader.h>
-#include <llvm/Support/MemoryBuffer.h>
-#include <llvm/Support/InstIterator.h>
-#include <llvm/Support/NoFolder.h>
-#include <llvm/Support/TargetSelect.h>
-#include <llvm/Support/raw_ostream.h>
-#include <llvm/Support/system_error.h>
-#include "llvm/Transforms/IPO.h"
-#include <llvm/Transforms/Scalar.h>
-#include <llvm/Transforms/Utils/SSAUpdater.h>
-
-#include "common/logging.h"
-#include "codegen/subexpr_elimination.h"
-#include "doris_ir/doris_ir_names.h"
-#include "util/cpu_info.h"
-#include "util/path_builder.h"
-
-using llvm::CallInst;
-using llvm::BitCastInst;
-using llvm::Instruction;
-using llvm::LoadInst;
-using llvm::StoreInst;
-using llvm::Function;
-using llvm::Value;
-using llvm::DominatorTree;
-namespace doris {
-
-SubExprElimination::SubExprElimination(LlvmCodeGen* codegen) : _codegen(codegen) {
-}
-
-// Before running the standard llvm optimization passes, first remove redundant calls
-// to slotref expression.  SlotRefs are more heavyweight due to the null handling that
-// is required and after they are inlined, llvm is unable to eliminate the redundant
-// inlined code blocks.
-// For example:
-//   select colA + colA would generate an inner loop with 2 calls to the colA slot ref,
-// rather than doing subexpression elimination.  To handle this, we will:
-//   1. inline all call sites in the original function except calls to SlotRefs
-//   2. for all call sites to SlotRefs except the first to that SlotRef, replace the
-//      results from the secondary calls with the result from the first and remove
-//      the call instruction.
-//   3. Inline calls to the SlotRefs (there should only be one for each slot ref).
-//
-// In the above example, the input function would look something like:
-// int ArithmeticAdd(TupleRow* row, bool* is_null) {
-//   bool lhs_is_null, rhs_is_null;
-//   int lhs_value = SlotRef(row, &lhs_is_null);
-//   if (lhs_is_null) { *is_null = true; return 0; }
-//   int rhs_value = SlotRef(row, &rhs_is_null);
-//   if (rhs_is_null) { *is_null = true; return 0; }
-//   *is_null = false; return lhs_value + rhs_value;
-// }
-// During step 2, we'd substitute the second call to SlotRef with the results from
-// the first call.
-// int ArithmeticAdd(TupleRow* row, bool* is_null) {
-//   bool lhs_is_null, rhs_is_null;
-//   int lhs_value = SlotRef(row, &lhs_is_null);
-//   if (lhs_is_null) { *is_null = true; return 0; }
-//   int rhs_value = lhs_value;
-//   rhs_is_null = lhs_is_null;
-//   if (rhs_is_null) { *is_null = true; return 0; }
-//   *is_null = false; return lhs_value + rhs_value;
-// }
-// And then rely on llvm to finish the removing the redundant code, resulting in:
-// int ArithmeticAdd(TupleRow* row, bool* is_null) {
-//   bool lhs_is_null, rhs_is_null;
-//   int lhs_value = SlotRef(row, &lhs_is_null);
-//   if (lhs_is_null) { *is_null = true; return 0; }
-//   *is_null = false; return lhs_value + lhs_value;
-// }
-// Details on how to do this:
-// http://llvm.org/docs/ProgrammersManual.html#replacing-an-instruction-with-another-value
-
-// Step 2 requires more manipulation to ensure the resulting IR is still valid IR.
-// The call to the expr returns two things, both of which need to be replaced.
-// The value of the function as the return argument and whether or not the result was
-// null as a function output argument.
-//    1. The return value is trivial since with SSA, it is easy to identity all uses of
-//       We simply replace the subsequent call instructions with the value.
-//    2. For the is_null result ptr, we replace the call to the expr with a store
-//       instruction of the cached value.
-//       i.e:
-//           val1 = Call(is_null_ptr);
-//           is_null1 = *is_null_ptr
-//           ...
-//           val2 = Call(is_null_ptr);
-//           is_null2 = *is_null_ptr
-//       Becomes:
-//           val1 = Call(is_null_ptr);
-//           is_null1 = *is_null_ptr
-//           ...
-//           val2 = val1;
-//           *is_null_ptr = is_null1;
-//           is_null2 = *is_null_ptr
-//       We do this because the is_null ptr is not SSA form, making manipulating it
-//       complex. The above approach exactly preserves the Call function, including
-//       all writes to ptrs. We then rely on the llvm load/store removal pass which
-//       will remove the redundant loads (which is tricky since you have to track
-//       other instructions that wrote to the ptr, etc).
-// When doing the eliminations, we need to consider the call graph to make sure
-// the instruction we are replacing with dominates the instruction we are replacing;
-// that is, we need to guarantee the instruction we are replacing with always executes
-// before the replacee instruction in all code paths.
-// TODO: remove all this with expr refactoring. Everything will be SSA form then.
-struct CachedExprResult {
-    // First function call result. Subsequent calls will be replaced with this value
-    CallInst* result;
-    // First is null result. Subsequent calls will be replaced with this value.
-    Instruction* is_null_value;
-};
-
-bool SubExprElimination::run(Function* fn) {
-    // Step 1:
-    int num_inlined = 0;
-    do {
-        // This assumes that all redundant exprs have been registered.
-        num_inlined = _codegen->inline_call_sites(fn, true);
-    } while (num_inlined > 0);
-
-    // Mapping of (expr eval function, its 'row' arg) to cached result.  We want to remove
-    // redundant calls to the same function with the same argument.
-    std::map<std::pair<Function*, Value*>, CachedExprResult> cached_slot_ref_results;
-
-    // Step 2:
-    DominatorTree dom_tree;
-    dom_tree.runOnFunction(*fn);
-
-    llvm::inst_iterator fn_end = llvm::inst_end(fn);
-    llvm::inst_iterator instr_iter = llvm::inst_begin(fn);
-    // Loop over every instruction in the function.
-    while (instr_iter != fn_end) {
-        Instruction* instr = &*instr_iter;
-        ++instr_iter;
-        // Look for call instructions
-        if (!CallInst::classof(instr)) {
-            continue;
-        }
-
-        CallInst* call_instr = reinterpret_cast<CallInst*>(instr);
-        Function* called_fn = call_instr->getCalledFunction();
-        if (_codegen->_registered_exprs.find(called_fn) == 
-                _codegen->_registered_exprs.end()) {
-            continue;
-        }
-
-        // Found a registered expr function.  We generate the IR in a very specific way
-        // when calling the expr.  The call instruction is always followed by loading the
-        // resulting is_null result.  We need to update both.
-        // TODO: we need to update this to do more analysis since we are relying on a very
-        // specific code structure to do this.
-
-        // Arguments are (row, scratch_buffer, is_null);
-        DCHECK_EQ(call_instr->getNumArgOperands(), 3);
-        Value* row_arg = call_instr->getArgOperand(0);
-
-        DCHECK(BitCastInst::classof(row_arg));
-        BitCastInst* row_cast = reinterpret_cast<BitCastInst*>(row_arg);
-        // Get at the underlying row arg.  We need to differentiate between
-        // call Fn(row1) and call Fn(row2). (identical fns but different input).
-        row_arg = row_cast->getOperand(0);
-
-        instr = &*instr_iter;
-        ++instr_iter;
-
-        if (!LoadInst::classof(instr)) {
-            continue;
-        }
-        LoadInst* is_null_value = reinterpret_cast<LoadInst*>(instr);
-        Value* loaded_ptr = is_null_value->getPointerOperand();
-
-        // Subexpr elimination requires the IR to be a very specific form.
-        //   call SlotRef(row, NULL, is_null_ptr)
-        //   load is_null_ptr
-        // Since we generate this IR currently, we can enforce this logic in our exprs
-        // TODO: this should be removed/generalized with expr refactoring
-        DCHECK_EQ(loaded_ptr, call_instr->getArgOperand(2));
-
-        std::pair<Function*, Value*> call_desc = std::make_pair(called_fn, row_arg);
-        if (cached_slot_ref_results.find(call_desc) == cached_slot_ref_results.end()) {
-            CachedExprResult cache_entry;
-            cache_entry.result = call_instr;
-            cache_entry.is_null_value = is_null_value;
-            cached_slot_ref_results[call_desc] = cache_entry;
-        } else {
-            // Reuse the result.
-            CachedExprResult& cache_entry = cached_slot_ref_results[call_desc];
-            if (dom_tree.dominates(cache_entry.result, call_instr)) {
-                new StoreInst(cache_entry.is_null_value, loaded_ptr, call_instr);
-                call_instr->replaceAllUsesWith(cache_entry.result);
-                call_instr->eraseFromParent();
-            }
-        }
-    }
-
-    // Step 3:
-    _codegen->inline_call_sites(fn, false);
-    return true;
-}
-
-}
diff --git a/be/src/codegen/subexpr_elimination.h b/be/src/codegen/subexpr_elimination.h
deleted file mode 100644
index 965aae9351..0000000000
--- a/be/src/codegen/subexpr_elimination.h
+++ /dev/null
@@ -1,44 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef DORIS_BE_SRC_QUERY_CODEGEN_SUBEXPR_ELIMINATION_H
-#define DORIS_BE_SRC_QUERY_CODEGEN_SUBEXPR_ELIMINATION_H
-
-#include "common/status.h"
-#include "codegen/llvm_codegen.h"
-
-namespace doris {
-
-// Optimization pass to remove redundant exprs.
-// TODO: make this into a llvm function pass (i.e. implement FunctionPass interface).
-class SubExprElimination {
-public:
-    SubExprElimination(LlvmCodeGen* codegen);
-    ~SubExprElimination() { }
-
-    // Perform subexpr elimination on function.
-    bool run(llvm::Function* function);
-
-private:
-    // Parent codegen object.
-    LlvmCodeGen* _codegen;
-};
-
-}
-
-#endif
-
diff --git a/be/src/exprs/scalar_fn_call.cpp b/be/src/exprs/scalar_fn_call.cpp
index b6a3dc67b5..7c04158038 100644
--- a/be/src/exprs/scalar_fn_call.cpp
+++ b/be/src/exprs/scalar_fn_call.cpp
@@ -19,7 +19,6 @@
 
 #include <vector>
 
-#include "codegen/codegen_anyval.h"
 #include "exprs/anyval_util.h"
 #include "exprs/expr_context.h"
 #include "runtime/user_function_cache.h"
diff --git a/be/src/runtime/datetime_value.cpp b/be/src/runtime/datetime_value.cpp
index a2459e3edd..6aaeab04ff 100644
--- a/be/src/runtime/datetime_value.cpp
+++ b/be/src/runtime/datetime_value.cpp
@@ -28,8 +28,6 @@
 
 namespace doris {
 
-const char* DateTimeValue::_s_llvm_class_name = "class.doris::DateTimeValue";
-
 const uint64_t log_10_int[] = {
     1, 10, 100, 1000, 10000UL, 100000UL, 1000000UL, 10000000UL,
     100000000UL, 1000000000UL, 10000000000UL, 100000000000UL
diff --git a/be/src/runtime/datetime_value.h b/be/src/runtime/datetime_value.h
index 815bce6262..fc2822775d 100644
--- a/be/src/runtime/datetime_value.h
+++ b/be/src/runtime/datetime_value.h
@@ -454,8 +454,6 @@ public:
 
     void set_type(int type);
 
-    static const char* _s_llvm_class_name;
-
 private:
     // Used to make sure sizeof DateTimeValue
     friend class UnusedClass;
diff --git a/be/src/runtime/decimal_value.cpp b/be/src/runtime/decimal_value.cpp
index 0a7ba77500..ea06c90c04 100755
--- a/be/src/runtime/decimal_value.cpp
+++ b/be/src/runtime/decimal_value.cpp
@@ -23,8 +23,6 @@
 
 namespace doris {
 
-const char* DecimalValue::_s_llvm_class_name = "class.doris::DecimalValue";
-
 // set the 1st param if the second param is smaller.
 template<typename T> inline void set_if_smaller(T* num1_ptr, const T num2) {
     if (*num1_ptr > num2) {
diff --git a/be/src/runtime/decimal_value.h b/be/src/runtime/decimal_value.h
index abc8ca5be7..dc14606c2f 100755
--- a/be/src/runtime/decimal_value.h
+++ b/be/src/runtime/decimal_value.h
@@ -445,9 +445,6 @@ public:
 
     int round(DecimalValue *to, int scale, DecimalRoundMode mode);
 
-    // For C++/IR interop, we need to be able to look up types by name.
-    static const char* _s_llvm_class_name;
-
 private:
 
     friend class MultiDistinctDecimalState;
diff --git a/be/src/runtime/string_value.cpp b/be/src/runtime/string_value.cpp
index 8ac089236f..31508189b7 100644
--- a/be/src/runtime/string_value.cpp
+++ b/be/src/runtime/string_value.cpp
@@ -21,8 +21,6 @@
 
 namespace doris {
 
-const char* StringValue::s_llvm_class_name = "struct.doris::StringValue";
-
 std::string StringValue::debug_string() const {
     return std::string(ptr, len);
 }
diff --git a/be/src/runtime/string_value.h b/be/src/runtime/string_value.h
index 7059067e3b..2f2d33ceee 100644
--- a/be/src/runtime/string_value.h
+++ b/be/src/runtime/string_value.h
@@ -127,9 +127,6 @@ struct StringValue {
     static StringValue from_string_val(const doris_udf::StringVal& sv) {
         return StringValue(reinterpret_cast<char*>(sv.ptr), sv.len);
     }
-
-    // For C++/IR interop, we need to be able to look up types by name.
-    static const char* s_llvm_class_name;
 };
 
 // This function must be called 'hash_value' to be picked up by boost.
diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp
index e21ba42c63..0216c58662 100644
--- a/be/src/runtime/types.cpp
+++ b/be/src/runtime/types.cpp
@@ -21,12 +21,8 @@
 #include <sstream>
 #include <boost/foreach.hpp>
 
-#include "codegen/llvm_codegen.h"
-
 namespace doris {
 
-const char* TypeDescriptor::s_llvm_class_name = "struct.doris::TypeDescriptor";
-
 TypeDescriptor::TypeDescriptor(const std::vector<TTypeNode>& types, int* idx) : 
         len(-1), precision(-1), scale(-1) {
     DCHECK_GE(*idx, 0);
@@ -183,32 +179,5 @@ std::ostream& operator<<(std::ostream& os, const TypeDescriptor& type) {
   return os;
 }
 
-llvm::ConstantStruct* TypeDescriptor::to_ir(LlvmCodeGen* codegen) const {
-    // ColumnType = { i32, i32, i32, i32, <vector>, <vector> }
-    llvm::StructType* column_type_type = llvm::cast<llvm::StructType>(
-        codegen->get_type(s_llvm_class_name));
-
-    DCHECK_EQ(sizeof(type), sizeof(int32_t));
-    llvm::Constant* type_field = llvm::ConstantInt::get(codegen->int_type(), type);
-    DCHECK_EQ(sizeof(len), sizeof(int32_t));
-    llvm::Constant* len_field = llvm::ConstantInt::get(codegen->int_type(), len);
-    DCHECK_EQ(sizeof(precision), sizeof(int32_t));
-    llvm::Constant* precision_field = llvm::ConstantInt::get(codegen->int_type(), precision);
-    DCHECK_EQ(sizeof(scale), sizeof(int32_t));
-    llvm::Constant* scale_field = llvm::ConstantInt::get(codegen->int_type(), scale);
-
-    // Create empty 'children' and 'field_names' vectors
-    DCHECK(children.empty()) << "Nested types NYI";
-    DCHECK(field_names.empty()) << "Nested types NYI";
-    llvm::Constant* children_field = llvm::Constant::getNullValue(
-        column_type_type->getElementType(4));
-    llvm::Constant* field_names_field =
-        llvm::Constant::getNullValue(column_type_type->getElementType(5));
-
-    return llvm::cast<llvm::ConstantStruct>(llvm::ConstantStruct::get(
-            column_type_type, type_field, len_field,
-            precision_field, scale_field, children_field, field_names_field, NULL));
-}
-
 }
 
diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h
index bb2335e121..5915eb0182 100644
--- a/be/src/runtime/types.h
+++ b/be/src/runtime/types.h
@@ -28,14 +28,8 @@
 #include "common/config.h"
 #include "olap/hll.h"
 
-namespace llvm {
-class ConstantStruct;
-}
-
 namespace doris {
 
-class LlvmCodeGen;
-
 // Describes a type. Includes the enum, children types, and any type-specific metadata
 // (e.g. precision and scale for decimals).
 // TODO for 2.3: rename to TypeDescriptor
@@ -299,10 +293,6 @@ struct TypeDescriptor {
         return 16;
     }
 
-    /// Returns the IR version of this ColumnType. Only implemented for scalar types. LLVM
-    /// optimizer can pull out fields of the returned ConstantStruct for constant folding.
-    llvm::ConstantStruct* to_ir(LlvmCodeGen* codegen) const;
-
     std::string debug_string() const;
 
 private:
@@ -317,8 +307,6 @@ private:
     /// Recursive implementation of ToThrift() that populates 'thrift_type' with the
     /// TTypeNodes for this type and its children.
     void to_thrift(TTypeDesc* thrift_type) const;
-
-    static const char* s_llvm_class_name;
 };
 
 std::ostream& operator<<(std::ostream& os, const TypeDescriptor& type);
diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp
index 051bb75b73..0711f703bd 100644
--- a/be/src/service/doris_main.cpp
+++ b/be/src/service/doris_main.cpp
@@ -37,7 +37,6 @@
 #include "common/daemon.h"
 #include "common/config.h"
 #include "common/status.h"
-#include "codegen/llvm_codegen.h"
 #include "runtime/exec_env.h"
 #include "util/file_utils.h"
 #include "util/logging.h"
@@ -156,8 +155,6 @@ int main(int argc, char** argv) {
         exit(-1);
     }
 
-    doris::LlvmCodeGen::initialize_llvm();
-
     // initilize libcurl here to avoid concurrent initialization
     auto curl_ret = curl_global_init(CURL_GLOBAL_ALL);
     if (curl_ret != 0) {