diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt
index 24f28b10c1..1bd3e49f40 100644
--- a/be/CMakeLists.txt
+++ b/be/CMakeLists.txt
@@ -469,6 +469,7 @@ set(DORIS_LINK_LIBS
     DorisGen
     Webserver
     Geo
+    Vec
     Plugin
     ${WL_END_GROUP}
 )
@@ -653,6 +654,7 @@ endif()
 
 add_subdirectory(${SRC_DIR}/util)
 add_subdirectory(${SRC_DIR}/plugin)
+add_subdirectory(${SRC_DIR}/vec)
 
 # Utility CMake function to make specifying tests and benchmarks less verbose
 FUNCTION(ADD_BE_TEST TEST_NAME)
@@ -705,6 +707,11 @@ if (${MAKE_TEST} STREQUAL "ON")
     add_subdirectory(${TEST_DIR}/runtime)
     add_subdirectory(${TEST_DIR}/udf)
     add_subdirectory(${TEST_DIR}/util)
+    add_subdirectory(${TEST_DIR}/vec/core)
+    add_subdirectory(${TEST_DIR}/vec/exprs)
+    add_subdirectory(${TEST_DIR}/vec/function)
+    add_subdirectory(${TEST_DIR}/vec/runtime)
+    add_subdirectory(${TEST_DIR}/vec/aggregate_functions)
     add_subdirectory(${TEST_DIR}/plugin)
     add_subdirectory(${TEST_DIR}/plugin/example)
     add_subdirectory(${TEST_DIR}/tools)
@@ -728,3 +735,8 @@ install(FILES
     ${BASE_DIR}/../conf/odbcinst.ini
     DESTINATION ${OUTPUT_DIR}/conf)
 
+
+get_property(dirs DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
+foreach(dir ${dirs})
+  message(STATUS "dir='${dir}'")
+  endforeach()
diff --git a/be/src/exec/blocking_join_node.cpp b/be/src/exec/blocking_join_node.cpp
index da977016fb..ba137860ac 100644
--- a/be/src/exec/blocking_join_node.cpp
+++ b/be/src/exec/blocking_join_node.cpp
@@ -30,7 +30,8 @@ namespace doris {
 BlockingJoinNode::BlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op,
                                    ObjectPool* pool, const TPlanNode& tnode,
                                    const DescriptorTbl& descs)
-        : ExecNode(pool, tnode, descs), _node_name(node_name), _join_op(join_op) {}
+        : ExecNode(pool, tnode, descs), _node_name(node_name), _join_op(join_op),
+          _left_side_eos(false) {}
 
 Status BlockingJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
     return ExecNode::init(tnode, state);
diff --git a/be/src/exec/data_sink.cpp b/be/src/exec/data_sink.cpp
index aefa087f45..cf06f09f24 100644
--- a/be/src/exec/data_sink.cpp
+++ b/be/src/exec/data_sink.cpp
@@ -35,7 +35,10 @@
 #include "runtime/result_file_sink.h"
 #include "runtime/result_sink.h"
 #include "runtime/runtime_state.h"
-#include "util/logging.h"
+
+#include "vec/sink/result_sink.h"
+#include "vec/sink/vdata_stream_sender.h"
+#include "vec/sink/vtablet_sink.h"
 
 namespace doris {
 
@@ -57,6 +60,9 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink
                         : false;
         // TODO: figure out good buffer size based on size of output row
         if (is_vec) {
+            tmp_sink = new doris::vectorized::VDataStreamSender(
+                    pool, params.sender_id, row_desc, thrift_sink.stream_sink, params.destinations,
+                    16 * 1024, send_query_statistics_with_every_batch);
         } else {
             tmp_sink = new DataStreamSender(pool, params.sender_id, row_desc,
                                             thrift_sink.stream_sink, params.destinations, 16 * 1024,
@@ -73,6 +79,7 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink
 
         // TODO: figure out good buffer size based on size of output row
         if (is_vec) {
+            tmp_sink = new doris::vectorized::VResultSink(row_desc, output_exprs, thrift_sink.result_sink, 4096);
         } else {
             tmp_sink = new ResultSink(row_desc, output_exprs, thrift_sink.result_sink, 1024);
         }
@@ -149,7 +156,11 @@ Status DataSink::create_data_sink(ObjectPool* pool, const TDataSink& thrift_sink
     case TDataSinkType::OLAP_TABLE_SINK: {
         Status status;
         DCHECK(thrift_sink.__isset.olap_table_sink);
-        sink->reset(new stream_load::OlapTableSink(pool, row_desc, output_exprs, &status));
+        if (is_vec) {
+            sink->reset(new stream_load::VOlapTableSink(pool, row_desc, output_exprs, &status));
+        } else {
+            sink->reset(new stream_load::OlapTableSink(pool, row_desc, output_exprs, &status));
+        }
         RETURN_IF_ERROR(status);
         break;
     }
diff --git a/be/src/exec/data_sink.h b/be/src/exec/data_sink.h
index 30f5580934..fcec10aed8 100644
--- a/be/src/exec/data_sink.h
+++ b/be/src/exec/data_sink.h
@@ -33,11 +33,13 @@ class ObjectPool;
 class RowBatch;
 class RuntimeProfile;
 class RuntimeState;
-class TPlanExecRequest;
-class TPlanExecParams;
 class TPlanFragmentExecParams;
 class RowDescriptor;
 
+namespace vectorized {
+class Block;
+}
+
 // Superclass of all data sinks.
 class DataSink {
 public:
@@ -56,8 +58,11 @@ public:
     // Send a row batch into this sink.
     // eos should be true when the last batch is passed to send()
     virtual Status send(RuntimeState* state, RowBatch* batch) = 0;
-    // virtual Status send(RuntimeState* state, RowBatch* batch, bool eos) = 0;
 
+    // Send a Block into this sink.
+    virtual Status send(RuntimeState* state, vectorized::Block* block) {
+        return Status::NotSupported("Not support send block");
+    };
     // Releases all resources that were allocated in prepare()/send().
     // Further send() calls are illegal after calling close().
     // It must be okay to call this multiple times. Subsequent calls should
diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp
index 396c0869d6..86cd16a934 100644
--- a/be/src/exec/es/es_scroll_parser.cpp
+++ b/be/src/exec/es/es_scroll_parser.cpp
@@ -31,6 +31,7 @@
 #include "runtime/mem_pool.h"
 #include "runtime/mem_tracker.h"
 #include "util/string_parser.hpp"
+#include "vec/runtime/vdatetime_value.h"
 
 namespace doris {
 
@@ -79,6 +80,8 @@ static const std::string ERROR_MEM_LIMIT_EXCEEDED =
 static const std::string ERROR_COL_DATA_IS_ARRAY =
         "Data source returned an array for the type $0"
         "based on column metadata.";
+static const std::string INVALID_NULL_VALUE =
+        "Invalid null value occurs: Non-null column `$0` contains NULL";
 
 #define RETURN_ERROR_IF_COL_IS_ARRAY(col, type)                              \
     do {                                                                     \
@@ -169,7 +172,7 @@ static Status get_int_value(const rapidjson::Value& col, PrimitiveType type, voi
 template <typename T>
 static Status get_float_value(const rapidjson::Value& col, PrimitiveType type, void* slot,
                               bool pure_doc_value) {
-    DCHECK(sizeof(T) == 4 || sizeof(T) == 8);
+    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
     if (col.IsNumber()) {
         *reinterpret_cast<T*>(slot) = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
         return Status::OK();
@@ -193,6 +196,68 @@ static Status get_float_value(const rapidjson::Value& col, PrimitiveType type, v
     return Status::OK();
 }
 
+template <typename T>
+static Status insert_float_value(const rapidjson::Value& col, PrimitiveType type,
+                                 vectorized::IColumn* col_ptr, bool pure_doc_value,
+                                 bool nullable) {
+    static_assert(sizeof(T) == 4 || sizeof(T) == 8);
+    if (col.IsNumber() && nullable) {
+        T value = (T)(sizeof(T) == 4 ? col.GetFloat() : col.GetDouble());
+        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
+        return Status::OK();
+    }
+
+    if (pure_doc_value && col.IsArray() && nullable) {
+        T value = (T)(sizeof(T) == 4 ? col[0].GetFloat() : col[0].GetDouble());
+        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
+        return Status::OK();
+    }
+
+    RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
+    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
+
+    StringParser::ParseResult result;
+    const std::string& val = col.GetString();
+    size_t len = col.GetStringLength();
+    T v = StringParser::string_to_float<T>(val.c_str(), len, &result);
+    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
+
+    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
+
+    return Status::OK();
+}
+
+template <typename T>
+static Status insert_int_value(const rapidjson::Value& col, PrimitiveType type,
+                               vectorized::IColumn* col_ptr, bool pure_doc_value,
+                               bool nullable) {
+    if (col.IsNumber()) {
+        T value = (T)(sizeof(T) < 8 ? col.GetInt() : col.GetInt64());
+        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
+        return Status::OK();
+    }
+
+    if (pure_doc_value && col.IsArray()) {
+        RETURN_ERROR_IF_COL_IS_NOT_NUMBER(col[0], type);
+        T value = (T)(sizeof(T) < 8 ? col[0].GetInt() : col[0].GetInt64());
+        col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&value)), 0);
+        return Status::OK();
+    }
+
+    RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
+    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
+
+    StringParser::ParseResult result;
+    const std::string& val = col.GetString();
+    size_t len = col.GetStringLength();
+    T v = StringParser::string_to_int<T>(val.c_str(), len, &result);
+    RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
+
+    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&v)), 0);
+
+    return Status::OK();
+}
+
 ScrollParser::ScrollParser(bool doc_value_mode)
         : _scroll_id(""), _size(0), _line_index(0), _doc_value_mode(doc_value_mode) {}
 
@@ -426,22 +491,53 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple,
                 *reinterpret_cast<int8_t*>(slot) = col.GetInt();
                 break;
             }
-            if (pure_doc_value && col.IsArray()) {
+
+            bool is_nested_str = false;
+            if (pure_doc_value && col.IsArray() && col[0].IsBool()) {
                 *reinterpret_cast<int8_t*>(slot) = col[0].GetBool();
                 break;
+            } else if (pure_doc_value && col.IsArray() && col[0].IsString()) {
+                is_nested_str = true;
+            } else if (pure_doc_value && col.IsArray()) {
+                return Status::InternalError(
+                        strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
             }
 
-            RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
-            RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
-
-            const std::string& val = col.GetString();
-            size_t val_size = col.GetStringLength();
+            const rapidjson::Value& str_col = is_nested_str? col[0]: col;
+            const std::string& val = str_col.GetString();
+            size_t val_size = str_col.GetStringLength();
             StringParser::ParseResult result;
             bool b = StringParser::string_to_bool(val.c_str(), val_size, &result);
-            RETURN_ERROR_IF_PARSING_FAILED(result, col, type);
+            RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type);
             *reinterpret_cast<int8_t*>(slot) = b;
             break;
         }
+        case TYPE_DECIMALV2: {
+            DecimalV2Value data;
+
+            if (col.IsDouble()) {
+                data.assign_from_double(col.GetDouble());
+            } else {
+                std::string val;
+                if (pure_doc_value) {
+                    if (!col[0].IsString()) {
+                        val = json_value_to_string(col[0]);
+                    } else {
+                        val = col[0].GetString();
+                    }
+                } else {
+                    RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
+                    if (!col.IsString()) {
+                        val = json_value_to_string(col);
+                    } else {
+                        val = col.GetString();
+                    }
+                }
+                data.parse_from_str(val.data(), val.length());
+            }
+            reinterpret_cast<DecimalV2Value*>(slot)->set_value(data.value());
+            break;
+        }
 
         case TYPE_DATE:
         case TYPE_DATETIME: {
@@ -482,6 +578,235 @@ Status ScrollParser::fill_tuple(const TupleDescriptor* tuple_desc, Tuple* tuple,
     return Status::OK();
 }
 
+Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc,
+                                  std::vector<vectorized::MutableColumnPtr>& columns,
+                                  MemPool* tuple_pool, bool* line_eof,
+                                  const std::map<std::string, std::string>& docvalue_context) {
+    *line_eof = true;
+
+    if (_size <= 0 || _line_index >= _size) {
+        return Status::OK();
+    }
+
+    const rapidjson::Value& obj = _inner_hits_node[_line_index++];
+    bool pure_doc_value = false;
+    if (obj.HasMember("fields")) {
+        pure_doc_value = true;
+    }
+    const rapidjson::Value& line = obj.HasMember(FIELD_SOURCE) ? obj[FIELD_SOURCE] : obj["fields"];
+
+    for (int i = 0; i < tuple_desc->slots().size(); ++i) {
+        const SlotDescriptor* slot_desc = tuple_desc->slots()[i];
+        auto col_ptr = columns[i].get();
+
+        if (!slot_desc->is_materialized()) {
+            continue;
+        }
+        if (slot_desc->col_name() == FIELD_ID) {
+            // actually this branch will not be reached, this is guaranteed by Doris FE.
+            if (pure_doc_value) {
+                std::stringstream ss;
+                ss << "obtain `_id` is not supported in doc_values mode";
+                return Status::RuntimeError(ss.str());
+            }
+            // obj[FIELD_ID] must not be NULL
+            std::string _id = obj[FIELD_ID].GetString();
+            size_t len = _id.length();
+
+            col_ptr->insert_data(const_cast<const char*>(_id.data()), len);
+            continue;
+        }
+
+        const char* col_name = pure_doc_value ? docvalue_context.at(slot_desc->col_name()).c_str()
+                                              : slot_desc->col_name().c_str();
+
+        rapidjson::Value::ConstMemberIterator itr = line.FindMember(col_name);
+        if (itr == line.MemberEnd() && slot_desc->is_nullable()) {
+            auto nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(col_ptr);
+            nullable_column->insert_data(nullptr, 0);
+            continue;
+        } else if (itr == line.MemberEnd() && !slot_desc->is_nullable()) {
+            std::string details = strings::Substitute(INVALID_NULL_VALUE, col_name);
+            return Status::RuntimeError(details);
+        }
+
+        const rapidjson::Value& col = line[col_name];
+
+        PrimitiveType type = slot_desc->type().type;
+
+        // when the column value is null, the subsequent type casting will report an error
+        if (col.IsNull() && slot_desc->is_nullable()) {
+            col_ptr->insert_data(nullptr, 0);
+            continue;
+        } else if (col.IsNull() && !slot_desc->is_nullable()) {
+            std::string details = strings::Substitute(INVALID_NULL_VALUE, col_name);
+            return Status::RuntimeError(details);
+        }
+        switch (type) {
+            case TYPE_CHAR:
+            case TYPE_VARCHAR:
+            case TYPE_STRING: {
+                // sometimes elasticsearch user post some not-string value to Elasticsearch Index.
+                // because of reading value from _source, we can not process all json type and then just transfer the value to original string representation
+                // this may be a tricky, but we can workaround this issue
+                std::string val;
+                if (pure_doc_value) {
+                    if (!col[0].IsString()) {
+                        val = json_value_to_string(col[0]);
+                    } else {
+                        val = col[0].GetString();
+                    }
+                } else {
+                    RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
+                    if (!col.IsString()) {
+                        val = json_value_to_string(col);
+                    } else {
+                        val = col.GetString();
+                    }
+                }
+                size_t val_size = val.length();
+                col_ptr->insert_data(
+                        const_cast<const char*>(val.data()), val_size);
+                break;
+            }
+
+            case TYPE_TINYINT: {
+                insert_int_value<int8_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+                break;
+            }
+
+            case TYPE_SMALLINT: {
+                insert_int_value<int16_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+                break;
+            }
+
+            case TYPE_INT: {
+                insert_int_value<int32>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+                break;
+            }
+
+            case TYPE_BIGINT: {
+                insert_int_value<int64_t>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+                break;
+            }
+
+            case TYPE_LARGEINT: {
+                insert_int_value<__int128>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+                break;
+            }
+
+            case TYPE_DOUBLE: {
+                insert_float_value<double>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+                break;
+            }
+
+            case TYPE_FLOAT: {
+                insert_float_value<float>(col, type, col_ptr, pure_doc_value, slot_desc->is_nullable());
+                break;
+            }
+
+            case TYPE_BOOLEAN: {
+                if (col.IsBool()) {
+                    int8_t val = col.GetBool();
+                    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
+                    break;
+                }
+
+                if (col.IsNumber()) {
+                    int8_t val = col.GetInt();
+                    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
+                    break;
+                }
+
+                bool is_nested_str = false;
+                if (pure_doc_value && col.IsArray() && col[0].IsBool()) {
+                    int8_t val = col[0].GetBool();
+                    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
+                    break;
+                } else if (pure_doc_value && col.IsArray() && col[0].IsString()) {
+                    is_nested_str = true;
+                } else if (pure_doc_value && col.IsArray()) {
+                    return Status::InternalError(
+                            strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
+                }
+
+                const rapidjson::Value& str_col = is_nested_str? col[0]: col;
+
+                const std::string& val = str_col.GetString();
+                size_t val_size = str_col.GetStringLength();
+                StringParser::ParseResult result;
+                bool b = StringParser::string_to_bool(val.c_str(), val_size, &result);
+                RETURN_ERROR_IF_PARSING_FAILED(result, str_col, type);
+                col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&b)), 0);
+                break;
+            }
+            case TYPE_DECIMALV2: {
+                DecimalV2Value data;
+
+                if (col.IsDouble()) {
+                    data.assign_from_double(col.GetDouble());
+                } else {
+                    std::string val;
+                    if (pure_doc_value) {
+                        if (!col[0].IsString()) {
+                            val = json_value_to_string(col[0]);
+                        } else {
+                            val = col[0].GetString();
+                        }
+                    } else {
+                        RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
+                        if (!col.IsString()) {
+                            val = json_value_to_string(col);
+                        } else {
+                            val = col.GetString();
+                        }
+                    }
+                    data.parse_from_str(val.data(), val.length());
+                }
+                col_ptr->insert_data(
+                        const_cast<const char*>(reinterpret_cast<char*>(&data)), 0);
+                break;
+            }
+
+            case TYPE_DATE:
+            case TYPE_DATETIME: {
+                // this would happend just only when `enable_docvalue_scan = false`, and field has timestamp format date from _source
+                if (col.IsNumber()) {
+                    // ES process date/datetime field would use millisecond timestamp for index or docvalue
+                    // processing date type field, if a number is encountered, Doris On ES will force it to be processed according to ms
+                    // Doris On ES needs to be consistent with ES, so just divided by 1000 because the unit for from_unixtime is seconds
+                    RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
+                } else if (col.IsArray() && pure_doc_value) {
+                    // this would happened just only when `enable_docvalue_scan = true`
+                    // ES add default format for all field after ES 6.4, if we not provided format for `date` field ES would impose
+                    // a standard date-format for date field as `2020-06-16T00:00:00.000Z`
+                    // At present, we just process this string format date. After some PR were merged into Doris, we would impose `epoch_mills` for
+                    // date field's docvalue
+                    if (col[0].IsString()) {
+                        RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col[0], type));
+                        break;
+                    }
+                    // ES would return millisecond timestamp for date field, divided by 1000 because the unit for from_unixtime is seconds
+                    RETURN_IF_ERROR(fill_date_col_with_timestamp(col_ptr, col, type));
+                } else {
+                    // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source
+                    RETURN_ERROR_IF_COL_IS_ARRAY(col, type);
+                    RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type);
+                    RETURN_IF_ERROR(fill_date_col_with_strval(col_ptr, col, type));
+                }
+                break;
+            }
+            default: {
+                DCHECK(false);
+                break;
+            }
+        }
+    }
+
+    *line_eof = false;
+    return Status::OK();
+}
+
 Status ScrollParser::fill_date_slot_with_strval(void* slot, const rapidjson::Value& col,
                                                 PrimitiveType type) {
     DateTimeValue* ts_slot = reinterpret_cast<DateTimeValue*>(slot);
@@ -511,4 +836,44 @@ Status ScrollParser::fill_date_slot_with_timestamp(void* slot, const rapidjson::
     return Status::OK();
 }
 
+Status ScrollParser::fill_date_col_with_strval(vectorized::IColumn* col_ptr,
+                                               const rapidjson::Value& col, PrimitiveType type) {
+    vectorized::VecDateTimeValue dt_val;
+    const std::string& val = col.GetString();
+    size_t val_size = col.GetStringLength();
+    if (!dt_val.from_date_str(val.c_str(), val_size)) {
+        RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
+    }
+    if (type == TYPE_DATE) {
+        dt_val.cast_to_date();
+    } else {
+        dt_val.to_datetime();
+    }
+
+    auto date_packed_int = binary_cast<doris::vectorized::VecDateTimeValue, int64_t>(
+            *reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val));
+    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
+    return Status::OK();
+}
+
+Status ScrollParser::fill_date_col_with_timestamp(vectorized::IColumn* col_ptr,
+                                                  const rapidjson::Value& col, PrimitiveType type) {
+    vectorized::VecDateTimeValue dt_val;
+    if (!dt_val.from_unixtime(col.GetInt64() / 1000, "+08:00")) {
+        RETURN_ERROR_IF_CAST_FORMAT_ERROR(col, type);
+    }
+    if (type == TYPE_DATE) {
+        reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val)->cast_to_date();
+    } else {
+        reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val)->set_type(TIME_DATETIME);
+    }
+
+    auto date_packed_int = binary_cast<doris::vectorized::VecDateTimeValue, int64_t>(
+            *reinterpret_cast<vectorized::VecDateTimeValue*>(&dt_val));
+    col_ptr->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
+
+    return Status::OK();
+}
+
+
 } // namespace doris
diff --git a/be/src/exec/es/es_scroll_parser.h b/be/src/exec/es/es_scroll_parser.h
index f2ff855a7c..30eabaff2c 100644
--- a/be/src/exec/es/es_scroll_parser.h
+++ b/be/src/exec/es/es_scroll_parser.h
@@ -22,6 +22,7 @@
 #include "rapidjson/document.h"
 #include "runtime/descriptors.h"
 #include "runtime/tuple.h"
+#include "vec/core/block.h"
 
 namespace doris {
 
@@ -35,6 +36,9 @@ public:
     Status parse(const std::string& scroll_result, bool exactly_once = false);
     Status fill_tuple(const TupleDescriptor* _tuple_desc, Tuple* tuple, MemPool* mem_pool,
                       bool* line_eof, const std::map<std::string, std::string>& docvalue_context);
+    Status fill_columns(const TupleDescriptor* _tuple_desc,
+                        std::vector<vectorized::MutableColumnPtr>& columns, MemPool* mem_pool,
+                        bool* line_eof, const std::map<std::string, std::string>& docvalue_context);
 
     const std::string& get_scroll_id();
     int get_size();
@@ -44,9 +48,13 @@ private:
     // type is used for distinguish date and datetime
     // fill date slot with string format date
     Status fill_date_slot_with_strval(void* slot, const rapidjson::Value& col, PrimitiveType type);
+    Status fill_date_col_with_strval(vectorized::IColumn* col_ptr, const rapidjson::Value& col,
+                                     PrimitiveType type);
     // fill date slot with timestamp
     Status fill_date_slot_with_timestamp(void* slot, const rapidjson::Value& col,
                                          PrimitiveType type);
+    Status fill_date_col_with_timestamp(vectorized::IColumn* col_ptr, const rapidjson::Value& col,
+                                         PrimitiveType type);
 
 private:
     std::string _scroll_id;
diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp
index 012110d80e..7b67486401 100644
--- a/be/src/exec/es_http_scan_node.cpp
+++ b/be/src/exec/es_http_scan_node.cpp
@@ -437,10 +437,17 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length, std::promise<Stat
             properties, _column_names, _predicates, _docvalue_context, &doc_value_mode);
 
     // start scanner to scan
-    std::unique_ptr<EsHttpScanner> scanner(
-            new EsHttpScanner(_runtime_state, runtime_profile(), _tuple_id, properties,
-                              scanner_expr_ctxs, &counter, doc_value_mode));
-    status = scanner_scan(std::move(scanner), scanner_expr_ctxs, &counter);
+    if (!_vectorized) {
+        std::unique_ptr<EsHttpScanner> scanner(
+                new EsHttpScanner(_runtime_state, runtime_profile(), _tuple_id, properties,
+                                  scanner_expr_ctxs, &counter, doc_value_mode));
+        status = scanner_scan(std::move(scanner), scanner_expr_ctxs, &counter);
+    } else {
+        std::unique_ptr<VEsHttpScanner> scanner(
+                new VEsHttpScanner(_runtime_state, runtime_profile(), _tuple_id, properties,
+                                  scanner_expr_ctxs, &counter, doc_value_mode));
+        status = scanner_scan(std::move(scanner));
+    }
     if (!status.ok()) {
         LOG(WARNING) << "Scanner[" << start_idx
                      << "] process failed. status=" << status.get_error_msg();
diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h
index 640ea0c576..b90bf0e466 100644
--- a/be/src/exec/es_http_scan_node.h
+++ b/be/src/exec/es_http_scan_node.h
@@ -29,6 +29,7 @@
 
 #include "common/status.h"
 #include "exec/es_http_scanner.h"
+#include "vec/exec/ves_http_scanner.h"
 #include "exec/scan_node.h"
 #include "gen_cpp/PaloInternalService_types.h"
 
@@ -56,7 +57,6 @@ protected:
     // Write debug string of this into out.
     virtual void debug_string(int indentation_level, std::stringstream* out) const override;
 
-private:
     // Update process status to one failed status,
     // NOTE: Must hold the mutex of this scan node
     bool update_status(const Status& new_status) {
@@ -67,20 +67,8 @@ private:
         return false;
     }
 
-    // Create scanners to do scan job
-    Status start_scanners();
-
-    // Collect all scanners 's status
-    Status collect_scanners_status();
-
     // One scanner worker, This scanner will handle 'length' ranges start from start_idx
-    void scanner_worker(int start_idx, int length, std::promise<Status>& p_status);
-
-    // Scan one range
-    Status scanner_scan(std::unique_ptr<EsHttpScanner> scanner,
-                        const std::vector<ExprContext*>& conjunct_ctxs, EsScanCounter* counter);
-
-    Status build_conjuncts_list();
+    virtual void scanner_worker(int start_idx, int length, std::promise<Status>& p_status);
 
     TupleId _tuple_id;
     RuntimeState* _runtime_state;
@@ -92,20 +80,41 @@ private:
     int _max_buffered_batches;
     RuntimeProfile::Counter* _wait_scanner_timer;
 
-    bool _all_scanners_finished;
     Status _process_status;
 
+    std::map<std::string, std::string> _docvalue_context;
+
+    std::condition_variable _queue_reader_cond;
+    std::condition_variable _queue_writer_cond;
+    bool _vectorized = false;
+
+private:
+    // Create scanners to do scan job
+    Status start_scanners();
+
+    // Collect all scanners 's status
+    Status collect_scanners_status();
+
+    // Scan one range
+    Status scanner_scan(std::unique_ptr<EsHttpScanner> scanner,
+                        const std::vector<ExprContext*>& conjunct_ctxs, EsScanCounter* counter);
+
+    virtual Status scanner_scan(std::unique_ptr<VEsHttpScanner> scanner) {
+            return Status::NotSupported("vectorized scan in EsHttpScanNode is not supported!");
+    };
+
+    Status build_conjuncts_list();
+
+    bool _all_scanners_finished;
+
     std::vector<std::thread> _scanner_threads;
     std::vector<std::promise<Status>> _scanners_status;
     std::map<std::string, std::string> _properties;
-    std::map<std::string, std::string> _docvalue_context;
     std::map<std::string, std::string> _fields_context;
     std::vector<TScanRangeParams> _scan_ranges;
     std::vector<std::string> _column_names;
 
     std::mutex _batch_queue_lock;
-    std::condition_variable _queue_reader_cond;
-    std::condition_variable _queue_writer_cond;
     std::deque<std::shared_ptr<RowBatch>> _batch_queue;
     std::vector<EsPredicate*> _predicates;
 
diff --git a/be/src/exec/es_http_scanner.h b/be/src/exec/es_http_scanner.h
index 2bade5ae3f..dcebfe1649 100644
--- a/be/src/exec/es_http_scanner.h
+++ b/be/src/exec/es_http_scanner.h
@@ -68,7 +68,7 @@ public:
 
     void close();
 
-private:
+protected:
     RuntimeState* _state;
     RuntimeProfile* _profile;
     TupleId _tuple_id;
diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp
index d692a79772..97c3259327 100644
--- a/be/src/exec/exec_node.cpp
+++ b/be/src/exec/exec_node.cpp
@@ -63,6 +63,26 @@
 #include "util/debug_util.h"
 #include "util/runtime_profile.h"
 
+#include "vec/core/block.h"
+#include "vec/exec/join/vhash_join_node.h"
+#include "vec/exec/vaggregation_node.h"
+#include "vec/exec/ves_http_scan_node.h"
+#include "vec/exec/vcross_join_node.h"
+#include "vec/exec/vexchange_node.h"
+#include "vec/exec/vmysql_scan_node.h"
+#include "vec/exec/vodbc_scan_node.h"
+#include "vec/exec/volap_scan_node.h"
+#include "vec/exec/vsort_node.h"
+#include "vec/exec/vunion_node.h"
+#include "vec/exec/vintersect_node.h"
+#include "vec/exec/vexcept_node.h"
+#include "vec/exec/vanalytic_eval_node.h"
+#include "vec/exec/vassert_num_rows_node.h"
+#include "vec/exec/vselect_node.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exec/vempty_set_node.h"
+#include "vec/exec/vschema_scan_node.h"
+#include "vec/exec/vrepeat_node.h"
 namespace doris {
 
 const std::string ExecNode::ROW_THROUGHPUT_COUNTER = "RowsReturnedRate";
@@ -167,6 +187,9 @@ Status ExecNode::init(const TPlanNode& tnode, RuntimeState* state) {
     init_runtime_profile(profile);
 
     if (tnode.__isset.vconjunct) {
+        _vconjunct_ctx_ptr.reset(new doris::vectorized::VExprContext*);
+        RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_tree(_pool, tnode.vconjunct,
+                                                                   _vconjunct_ctx_ptr.get()));
     }
     RETURN_IF_ERROR(Expr::create_expr_trees(_pool, tnode.conjuncts, &_conjunct_ctxs));
 
@@ -189,6 +212,9 @@ Status ExecNode::prepare(RuntimeState* state) {
                                                   _mem_tracker);
     _expr_mem_pool.reset(new MemPool(_expr_mem_tracker.get()));
 
+    if (_vconjunct_ctx_ptr) {
+        RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->prepare(state, row_desc(), expr_mem_tracker()));
+    }
     RETURN_IF_ERROR(Expr::prepare(_conjunct_ctxs, state, row_desc(), expr_mem_tracker()));
 
     // TODO(zc):
@@ -202,6 +228,9 @@ Status ExecNode::prepare(RuntimeState* state) {
 
 Status ExecNode::open(RuntimeState* state) {
     RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN));
+    if (_vconjunct_ctx_ptr) {
+        RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->open(state));
+    }
     return Expr::open(_conjunct_ctxs, state);
 }
 
@@ -240,6 +269,7 @@ Status ExecNode::close(RuntimeState* state) {
         }
     }
 
+    if (_vconjunct_ctx_ptr) (*_vconjunct_ctx_ptr)->close(state);
     Expr::close(_conjunct_ctxs, state);
 
     if (expr_mem_pool() != nullptr) {
@@ -339,12 +369,42 @@ Status ExecNode::create_tree_helper(RuntimeState* state, ObjectPool* pool,
 
 Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanNode& tnode,
                              const DescriptorTbl& descs, ExecNode** node) {
-    if (state->enable_vectorized_exec()) {
-        return Status::InternalError("unsupport enable_vectorized_engine");
-    }
-
     std::stringstream error_msg;
 
+    if (state->enable_vectorized_exec()) {
+        switch (tnode.node_type) {
+        case TPlanNodeType::OLAP_SCAN_NODE:
+        case TPlanNodeType::ASSERT_NUM_ROWS_NODE:
+        case TPlanNodeType::HASH_JOIN_NODE:
+        case TPlanNodeType::AGGREGATION_NODE:
+        case TPlanNodeType::UNION_NODE:
+        case TPlanNodeType::CROSS_JOIN_NODE:
+        case TPlanNodeType::SORT_NODE:
+        case TPlanNodeType::EXCHANGE_NODE:
+        case TPlanNodeType::ODBC_SCAN_NODE:
+        case TPlanNodeType::MYSQL_SCAN_NODE:
+        case TPlanNodeType::INTERSECT_NODE:
+        case TPlanNodeType::EXCEPT_NODE:
+        case TPlanNodeType::ES_HTTP_SCAN_NODE:
+        case TPlanNodeType::EMPTY_SET_NODE:
+        case TPlanNodeType::SCHEMA_SCAN_NODE:
+        case TPlanNodeType::ANALYTIC_EVAL_NODE:
+        case TPlanNodeType::SELECT_NODE:
+        case TPlanNodeType::REPEAT_NODE:
+            break;
+        default: {
+            const auto& i = _TPlanNodeType_VALUES_TO_NAMES.find(tnode.node_type);
+            const char* str = "unknown node type";
+
+            if (i != _TPlanNodeType_VALUES_TO_NAMES.end()) {
+                str = i->second;
+            }
+            error_msg << "V" << str << " not implemented";
+            return Status::InternalError(error_msg.str());
+        }
+        }
+    }
+
     VLOG_CRITICAL << "tnode:\n" << apache::thrift::ThriftDebugString(tnode);
     switch (tnode.node_type) {
     case TPlanNodeType::CSV_SCAN_NODE:
@@ -353,14 +413,20 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
 
     case TPlanNodeType::MYSQL_SCAN_NODE:
 #ifdef DORIS_WITH_MYSQL
-        *node = pool->add(new MysqlScanNode(pool, tnode, descs));
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VMysqlScanNode(pool, tnode, descs));
+        } else
+            *node = pool->add(new MysqlScanNode(pool, tnode, descs));
         return Status::OK();
 #else
         return Status::InternalError(
                 "Don't support MySQL table, you should rebuild Doris with WITH_MYSQL option ON");
 #endif
     case TPlanNodeType::ODBC_SCAN_NODE:
-        *node = pool->add(new OdbcScanNode(pool, tnode, descs));
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VOdbcScanNode(pool, tnode, descs));
+        } else
+            *node = pool->add(new OdbcScanNode(pool, tnode, descs));
         return Status::OK();
 
     case TPlanNodeType::ES_SCAN_NODE:
@@ -368,15 +434,24 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
         return Status::OK();
 
     case TPlanNodeType::ES_HTTP_SCAN_NODE:
-        *node = pool->add(new EsHttpScanNode(pool, tnode, descs));
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VEsHttpScanNode(pool, tnode, descs));
+        } else {
+            *node = pool->add(new EsHttpScanNode(pool, tnode, descs));
+        }
         return Status::OK();
 
     case TPlanNodeType::SCHEMA_SCAN_NODE:
-        *node = pool->add(new SchemaScanNode(pool, tnode, descs));
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VSchemaScanNode(pool, tnode, descs));
+        } else {
+            *node = pool->add(new SchemaScanNode(pool, tnode, descs));
+        }
         return Status::OK();
 
     case TPlanNodeType::OLAP_SCAN_NODE:
         if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VOlapScanNode(pool, tnode, descs));
         } else {
             *node = pool->add(new OlapScanNode(pool, tnode, descs));
         }
@@ -384,6 +459,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
 
     case TPlanNodeType::AGGREGATION_NODE:
         if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::AggregationNode(pool, tnode, descs));
         } else {
             if (config::enable_partitioned_aggregation) {
                 *node = pool->add(new PartitionedAggregationNode(pool, tnode, descs));
@@ -394,11 +470,16 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
         return Status::OK();
 
     case TPlanNodeType::HASH_JOIN_NODE:
-        *node = pool->add(new HashJoinNode(pool, tnode, descs));
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::HashJoinNode(pool, tnode, descs));
+        } else {
+            *node = pool->add(new HashJoinNode(pool, tnode, descs));
+        }
         return Status::OK();
 
     case TPlanNodeType::CROSS_JOIN_NODE:
         if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VCrossJoinNode(pool, tnode, descs));
         } else {
             *node = pool->add(new CrossJoinNode(pool, tnode, descs));
         }
@@ -409,18 +490,27 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
         return Status::OK();
 
     case TPlanNodeType::EMPTY_SET_NODE:
-        *node = pool->add(new EmptySetNode(pool, tnode, descs));
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VEmptySetNode(pool, tnode, descs));
+        } else {
+            *node = pool->add(new EmptySetNode(pool, tnode, descs));
+        }
         return Status::OK();
 
     case TPlanNodeType::EXCHANGE_NODE:
         if (state->enable_vectorized_exec()) {
+            *node = pool->add(new doris::vectorized::VExchangeNode(pool, tnode, descs));
         } else {
             *node = pool->add(new ExchangeNode(pool, tnode, descs));
         }
         return Status::OK();
 
     case TPlanNodeType::SELECT_NODE:
-        *node = pool->add(new SelectNode(pool, tnode, descs));
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new doris::vectorized::VSelectNode(pool, tnode, descs));
+        } else {
+            *node = pool->add(new SelectNode(pool, tnode, descs));
+        }
         return Status::OK();
 
     case TPlanNodeType::OLAP_REWRITE_NODE:
@@ -429,6 +519,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
 
     case TPlanNodeType::SORT_NODE:
         if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VSortNode(pool, tnode, descs));
         } else {
             if (tnode.sort_node.use_top_n) {
                 *node = pool->add(new TopNNode(pool, tnode, descs));
@@ -439,8 +530,12 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
 
         return Status::OK();
     case TPlanNodeType::ANALYTIC_EVAL_NODE:
-        *node = pool->add(new AnalyticEvalNode(pool, tnode, descs));
-        break;
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VAnalyticEvalNode(pool, tnode, descs));
+        } else {
+            *node = pool->add(new AnalyticEvalNode(pool, tnode, descs));
+        }
+        return Status::OK();
 
     case TPlanNodeType::MERGE_NODE:
         *node = pool->add(new MergeNode(pool, tnode, descs));
@@ -448,17 +543,26 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
 
     case TPlanNodeType::UNION_NODE:
         if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VUnionNode(pool, tnode, descs));
         } else {
             *node = pool->add(new UnionNode(pool, tnode, descs));
         }
         return Status::OK();
 
     case TPlanNodeType::INTERSECT_NODE:
-        *node = pool->add(new IntersectNode(pool, tnode, descs));
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VIntersectNode(pool, tnode, descs));
+        } else {
+            *node = pool->add(new IntersectNode(pool, tnode, descs));
+        }
         return Status::OK();
 
     case TPlanNodeType::EXCEPT_NODE:
-        *node = pool->add(new ExceptNode(pool, tnode, descs));
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VExceptNode(pool, tnode, descs));
+        } else {
+            *node = pool->add(new ExceptNode(pool, tnode, descs));
+        }
         return Status::OK();
 
     case TPlanNodeType::BROKER_SCAN_NODE:
@@ -466,11 +570,19 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
         return Status::OK();
 
     case TPlanNodeType::REPEAT_NODE:
-        *node = pool->add(new RepeatNode(pool, tnode, descs));
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VRepeatNode(pool, tnode, descs));
+        } else {
+            *node = pool->add(new RepeatNode(pool, tnode, descs));
+        }
         return Status::OK();
 
     case TPlanNodeType::ASSERT_NUM_ROWS_NODE:
-        *node = pool->add(new AssertNumRowsNode(pool, tnode, descs));
+        if (state->enable_vectorized_exec()) {
+            *node = pool->add(new vectorized::VAssertNumRowsNode(pool, tnode, descs));
+        } else {
+            *node = pool->add(new AssertNumRowsNode(pool, tnode, descs));
+        }
         return Status::OK();
 
     case TPlanNodeType::TABLE_FUNCTION_NODE:
@@ -637,6 +749,22 @@ Status ExecNode::claim_buffer_reservation(RuntimeState* state) {
 Status ExecNode::release_unused_reservation() {
     return _buffer_pool_client.DecreaseReservationTo(_resource_profile.min_reservation);
 }
+
+void ExecNode::release_block_memory(vectorized::Block& block, uint16_t child_idx) {
+    DCHECK(child_idx < _children.size());
+    block.clear_column_data(child(child_idx)->row_desc().num_materialized_slots());
+}
+
+void ExecNode::reached_limit(vectorized::Block* block, bool* eos) {
+    if (_limit != -1 and _num_rows_returned + block->rows() >= _limit) {
+        block->set_num_rows(_limit - _num_rows_returned);
+        *eos = true;
+    }
+
+    _num_rows_returned += block->rows();
+    COUNTER_SET(_rows_returned_counter, _num_rows_returned);
+}
+
 /*
 Status ExecNode::enable_deny_reservation_debug_action() {
   DCHECK_EQ(debug_action_, TDebugAction::SET_DENY_RESERVATION_PROBABILITY);
diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h
index c58f862b72..7cad50018d 100644
--- a/be/src/exec/exec_node.h
+++ b/be/src/exec/exec_node.h
@@ -33,6 +33,8 @@
 #include "util/runtime_profile.h"
 #include "util/uid_util.h" // for print_id
 
+#include "vec/exprs/vexpr_context.h"
+
 namespace doris {
 class Expr;
 class ExprContext;
@@ -219,6 +221,15 @@ protected:
     /// fails.
     Status release_unused_reservation();
 
+    /// Release all memory of block which got from child. The block
+    // 1. clear mem of valid column get from child, make sure child can reuse the mem
+    // 2. delete and release the column which create by function all and other reason
+    void release_block_memory(vectorized::Block& block, uint16_t child_idx = 0);
+
+    /// Only use in vectorized exec engine to check whether reach limit and cut num row for block
+    // and add block rows for profile
+    void reached_limit(vectorized::Block* block, bool* eos);
+
     /// Enable the increase reservation denial probability on 'buffer_pool_client_' based on
     /// the 'debug_action_' set on this node. Returns an error if 'debug_action_param_' is
     /// invalid.
@@ -275,6 +286,8 @@ protected:
     std::vector<ExprContext*> _conjunct_ctxs;
     std::vector<TupleId> _tuple_ids;
 
+    std::unique_ptr<doris::vectorized::VExprContext*> _vconjunct_ctx_ptr;
+
     std::vector<ExecNode*> _children;
     RowDescriptor _row_descriptor;
 
diff --git a/be/src/exec/mysql_scan_node.h b/be/src/exec/mysql_scan_node.h
index 6d9d602d6b..ff5fb54f99 100644
--- a/be/src/exec/mysql_scan_node.h
+++ b/be/src/exec/mysql_scan_node.h
@@ -63,7 +63,7 @@ private:
     // The Mysql value is converted into the appropriate target type.
     Status write_text_slot(char* value, int value_length, SlotDescriptor* slot,
                            RuntimeState* state);
-
+protected:
     bool _is_init;
     MysqlScannerParam _my_param;
     // Name of Mysql table
diff --git a/be/src/exec/odbc_scan_node.cpp b/be/src/exec/odbc_scan_node.cpp
index 6bdc620993..ea32cd8f14 100644
--- a/be/src/exec/odbc_scan_node.cpp
+++ b/be/src/exec/odbc_scan_node.cpp
@@ -29,9 +29,11 @@
 
 namespace doris {
 
-OdbcScanNode::OdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+OdbcScanNode::OdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs,
+                           std::string scan_node_type)
         : ScanNode(pool, tnode, descs),
           _is_init(false),
+          _scan_node_type(scan_node_type),
           _table_name(tnode.odbc_scan_node.table_name),
           _connect_string(std::move(tnode.odbc_scan_node.connect_string)),
           _query_string(std::move(tnode.odbc_scan_node.query_string)),
@@ -42,7 +44,7 @@ OdbcScanNode::OdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, const Descr
 OdbcScanNode::~OdbcScanNode() {}
 
 Status OdbcScanNode::prepare(RuntimeState* state) {
-    VLOG_CRITICAL << "OdbcScanNode::Prepare";
+    VLOG_CRITICAL << _scan_node_type << "::Prepare";
 
     if (_is_init) {
         return Status::OK();
@@ -91,7 +93,7 @@ Status OdbcScanNode::prepare(RuntimeState* state) {
 
 Status OdbcScanNode::open(RuntimeState* state) {
     RETURN_IF_ERROR(ExecNode::open(state));
-    VLOG_CRITICAL << "OdbcScanNode::Open";
+    VLOG_CRITICAL << _scan_node_type << "::Open";
 
     if (nullptr == state) {
         return Status::InternalError("input pointer is null.");
@@ -125,7 +127,7 @@ Status OdbcScanNode::write_text_slot(char* value, int value_length, SlotDescript
 }
 
 Status OdbcScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
-    VLOG_CRITICAL << "OdbcScanNode::GetNext";
+    VLOG_CRITICAL << _scan_node_type << "::GetNext";
 
     if (nullptr == state || nullptr == row_batch || nullptr == eos) {
         return Status::InternalError("input is nullptr pointer");
@@ -240,7 +242,7 @@ Status OdbcScanNode::close(RuntimeState* state) {
 
 void OdbcScanNode::debug_string(int indentation_level, std::stringstream* out) const {
     *out << string(indentation_level * 2, ' ');
-    *out << "OdbcScanNode(tupleid=" << _tuple_id << " table=" << _table_name;
+    *out << _scan_node_type << "(tupleid=" << _tuple_id << " table=" << _table_name;
     *out << ")" << std::endl;
 
     for (int i = 0; i < _children.size(); ++i) {
diff --git a/be/src/exec/odbc_scan_node.h b/be/src/exec/odbc_scan_node.h
index 393d9ac29f..1754993628 100644
--- a/be/src/exec/odbc_scan_node.h
+++ b/be/src/exec/odbc_scan_node.h
@@ -35,7 +35,8 @@ class Status;
 
 class OdbcScanNode : public ScanNode {
 public:
-    OdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    OdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs,
+                 std::string scan_node_type = "OdbcScanNode");
     ~OdbcScanNode();
 
     // initialize _odbc_scanner, and create _text_converter.
@@ -53,6 +54,12 @@ public:
 
     // No use
     virtual Status set_scan_ranges(const std::vector<TScanRangeParams>& scan_ranges);
+    const TupleDescriptor* get_tuple_desc() { return _tuple_desc; }
+    TextConverter* get_text_converter() { return _text_converter.get(); }
+    ODBCConnector* get_odbc_scanner() { return _odbc_scanner.get(); }
+    const std::string& get_scan_node_type() { return _scan_node_type; }
+
+    bool is_init() { return _is_init; }
 
 protected:
     // Write debug string of this into out.
@@ -65,6 +72,9 @@ private:
                            RuntimeState* state);
 
     bool _is_init;
+
+    std::string _scan_node_type;
+
     // Name of Odbc table
     std::string _table_name;
 
diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp
index ebf872043b..dfc9a293ee 100644
--- a/be/src/exec/olap_scan_node.cpp
+++ b/be/src/exec/olap_scan_node.cpp
@@ -61,7 +61,7 @@ OlapScanNode::OlapScanNode(ObjectPool* pool, const TPlanNode& tnode, const Descr
 
 Status OlapScanNode::init(const TPlanNode& tnode, RuntimeState* state) {
     RETURN_IF_ERROR(ExecNode::init(tnode, state));
-    _direct_conjunct_size = state->enable_vectorized_exec() ? 1 : _conjunct_ctxs.size();
+    _direct_conjunct_size = _conjunct_ctxs.size();
 
     const TQueryOptions& query_options = state->query_options();
     if (query_options.__isset.max_scan_key_num) {
@@ -464,7 +464,6 @@ Status OlapScanNode::start_scan(RuntimeState* state) {
     VLOG_CRITICAL << "Filter idle conjuncts";
     // 5. Filter idle conjunct which already trans to olap filters
     // this must be after build_scan_key, it will free the StringValue memory
-    // TODO: filter idle conjunct in vexpr_contexts
     remove_pushed_conjuncts(state);
 
     VLOG_CRITICAL << "StartScanThread";
@@ -516,6 +515,7 @@ void OlapScanNode::remove_pushed_conjuncts(RuntimeState* state) {
     _conjunct_ctxs = std::move(new_conjunct_ctxs);
     _direct_conjunct_size = new_direct_conjunct_size;
 
+    // TODO: support vbloom_filter_predicate/vbinary_predicate and merge unpushed predicate to _vconjunct_ctx
     for (auto push_down_ctx : _pushed_conjuncts_index) {
         auto iter = _conjunctid_to_runtime_filter_ctxs.find(push_down_ctx);
         if (iter != _conjunctid_to_runtime_filter_ctxs.end()) {
@@ -524,7 +524,13 @@ void OlapScanNode::remove_pushed_conjuncts(RuntimeState* state) {
     }
     // set vconjunct_ctx is empty, if all conjunct
     if (_direct_conjunct_size == 0) {
+        if (_vconjunct_ctx_ptr.get() != nullptr) {
+            (*_vconjunct_ctx_ptr.get())->close(state);
+            _vconjunct_ctx_ptr = nullptr;
+        }
     }
+    // filter idle conjunct in vexpr_contexts
+    _peel_pushed_conjuncts();
 }
 
 void OlapScanNode::eval_const_conjuncts() {
@@ -843,11 +849,6 @@ static bool ignore_cast(SlotDescriptor* slot, Expr* expr) {
 
 bool OlapScanNode::should_push_down_in_predicate(doris::SlotDescriptor* slot,
                                                  doris::InPredicate* pred) {
-    if (pred->is_not_in()) {
-        // can not push down NOT IN predicate to storage engine
-        return false;
-    }
-
     if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) {
         // not a slot ref(column)
         return false;
@@ -1672,4 +1673,46 @@ Status OlapScanNode::add_one_batch(RowBatch* row_batch) {
 }
 
 
+vectorized::VExpr* OlapScanNode::_dfs_peel_conjunct(vectorized::VExpr* expr, int& leaf_index) {
+    static constexpr auto is_leaf = [](vectorized::VExpr* expr) { return !expr->is_and_expr(); };
+
+    if (is_leaf(expr)) {
+        return _pushed_conjuncts_index.count(leaf_index++) ? nullptr : expr;
+    } else {
+        vectorized::VExpr* left_child = _dfs_peel_conjunct(expr->children()[0], leaf_index);
+        vectorized::VExpr* right_child = _dfs_peel_conjunct(expr->children()[1], leaf_index);
+
+        if (left_child != nullptr && right_child != nullptr) {
+            expr->set_children({left_child, right_child});
+            return expr;
+        }
+        // here do not close Expr* now
+        return left_child != nullptr ? left_child : right_child;
+    }
+}
+
+// This function is used to remove pushed expr in expr tree.
+// It relies on the logic of function convertConjunctsToAndCompoundPredicate() of FE splicing expr.
+// It requires FE to satisfy each splicing with 'and' expr, and spliced from left to right, in order.
+// Expr tree specific forms do not require requirements.
+void OlapScanNode::_peel_pushed_conjuncts() {
+    if (_vconjunct_ctx_ptr.get() == nullptr) return;
+
+    int leaf_index = 0;
+    vectorized::VExpr* conjunct_expr_root = (*_vconjunct_ctx_ptr.get())->root();
+
+    if (conjunct_expr_root != nullptr) {
+        vectorized::VExpr* new_conjunct_expr_root =
+                _dfs_peel_conjunct(conjunct_expr_root, leaf_index);
+        if (new_conjunct_expr_root == nullptr) {
+            _vconjunct_ctx_ptr = nullptr;
+            _scanner_profile->add_info_string("VconjunctExprTree", "null");
+        } else {
+            (*_vconjunct_ctx_ptr.get())->set_root(new_conjunct_expr_root);
+            _scanner_profile->add_info_string("VconjunctExprTree",
+                                              new_conjunct_expr_root->debug_string());
+        }
+    }
+}
+
 } // namespace doris
diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h
index 3b7df03452..f9905e256d 100644
--- a/be/src/exec/olap_scan_node.h
+++ b/be/src/exec/olap_scan_node.h
@@ -32,6 +32,8 @@
 #include "runtime/vectorized_row_batch.h"
 #include "util/progress_updater.h"
 #include "util/spinlock.h"
+#include "vec/exec/volap_scanner.h"
+#include "vec/exprs/vexpr.h"
 
 namespace doris {
 class IRuntimeFilter;
@@ -158,6 +160,7 @@ protected:
                             RuntimeProfile* profile);
 
     friend class OlapScanner;
+    friend class doris::vectorized::VOlapScanner;
 
     // Tuple id resolved in prepare() to set _tuple_desc;
     TupleId _tuple_id;
@@ -215,7 +218,7 @@ protected:
 
     std::mutex _scan_batches_lock;
     std::condition_variable _scan_batch_added_cv;
-    int64_t _running_thread = 0;
+    std::atomic_int _running_thread = 0;
     std::condition_variable _scan_thread_exit_cv;
 
     std::list<RowBatch*> _scan_row_batches;
@@ -321,6 +324,9 @@ protected:
     RuntimeProfile::Counter* _scanner_wait_worker_timer = nullptr;
 
     RuntimeProfile::Counter* _olap_wait_batch_queue_timer = nullptr;
+
+    vectorized::VExpr* _dfs_peel_conjunct(vectorized::VExpr* expr, int& leaf_index);
+    void _peel_pushed_conjuncts(); // remove pushed expr from conjunct tree
 };
 
 } // namespace doris
diff --git a/be/src/exec/olap_scanner.cpp b/be/src/exec/olap_scanner.cpp
index af1d658054..2e05c5d12c 100644
--- a/be/src/exec/olap_scanner.cpp
+++ b/be/src/exec/olap_scanner.cpp
@@ -19,9 +19,9 @@
 
 #include <string>
 
-#include "gen_cpp/PaloInternalService_types.h"
 #include "common/utils.h"
 #include "exprs/expr_context.h"
+#include "gen_cpp/PaloInternalService_types.h"
 #include "olap/decimal12.h"
 #include "olap/field.h"
 #include "olap/uint24.h"
@@ -176,8 +176,10 @@ Status OlapScanner::_init_tablet_reader_params(
              _tablet_reader_params.rs_readers[1]->rowset()->start_version() == 2 &&
              !_tablet_reader_params.rs_readers[1]->rowset()->rowset_meta()->is_segments_overlapping());
 
+    _tablet_reader_params.origin_return_columns = &_return_columns;
     if (_aggregation || single_version) {
         _tablet_reader_params.return_columns = _return_columns;
+        _tablet_reader_params.direct_mode = true;
     } else {
         // we need to fetch all key columns to do the right aggregation on storage engine side.
         for (size_t i = 0; i < _tablet->num_key_columns(); ++i) {
@@ -239,7 +241,8 @@ Status OlapScanner::_init_return_columns() {
             }
         }
         if (auto sequence_col_idx = _tablet->tablet_schema().sequence_col_idx();
-            has_replace_col && std::find(_return_columns.begin(), _return_columns.end(), sequence_col_idx) == _return_columns.end()) {
+            has_replace_col && std::find(_return_columns.begin(), _return_columns.end(),
+                                         sequence_col_idx) == _return_columns.end()) {
             _return_columns.push_back(sequence_col_idx);
         }
     }
diff --git a/be/src/exec/olap_scanner.h b/be/src/exec/olap_scanner.h
index f234925647..0c684d9851 100644
--- a/be/src/exec/olap_scanner.h
+++ b/be/src/exec/olap_scanner.h
@@ -58,7 +58,7 @@ public:
 
     Status open();
 
-    Status get_batch(RuntimeState* state, RowBatch* batch, bool* eof);
+    virtual Status get_batch(RuntimeState* state, RowBatch* batch, bool* eof);
 
     Status close(RuntimeState* state);
 
@@ -103,7 +103,7 @@ protected:
     // Update profile that need to be reported in realtime.
     void _update_realtime_counter();
 
-    virtual void set_tablet_reader() { _tablet_reader.reset(new TupleReader); }
+    virtual void set_tablet_reader() { _tablet_reader = std::make_unique<TupleReader>(); }
 
 protected:
     RuntimeState* _runtime_state;
diff --git a/be/src/exec/partitioned_aggregation_node.cc b/be/src/exec/partitioned_aggregation_node.cc
index 2311f82f7b..68f536ffda 100644
--- a/be/src/exec/partitioned_aggregation_node.cc
+++ b/be/src/exec/partitioned_aggregation_node.cc
@@ -1048,7 +1048,7 @@ Tuple* PartitionedAggregationNode::GetOutputTuple(const vector<NewAggFnEvaluator
     }
     if (needs_finalize_) {
         NewAggFnEvaluator::Finalize(agg_fn_evals, tuple, dst,
-                                    grouping_exprs_.size() == 0 && child(0)->rows_returned() == 0);
+                grouping_exprs_.size() == 0 && child(0)->rows_returned() == 0);
     } else {
         NewAggFnEvaluator::Serialize(agg_fn_evals, tuple);
     }
diff --git a/be/src/exec/repeat_node.h b/be/src/exec/repeat_node.h
index 01335d2233..d9dce75278 100644
--- a/be/src/exec/repeat_node.h
+++ b/be/src/exec/repeat_node.h
@@ -40,7 +40,7 @@ public:
 protected:
     virtual void debug_string(int indentation_level, std::stringstream* out) const override;
 
-private:
+protected:
     Status get_repeated_batch(RowBatch* child_row_batch, int repeat_id_idx, RowBatch* row_batch);
 
     // Slot id set used to indicate those slots need to set to null.
diff --git a/be/src/exec/scan_node.h b/be/src/exec/scan_node.h
index 5f8ec4b082..bd0999127f 100644
--- a/be/src/exec/scan_node.h
+++ b/be/src/exec/scan_node.h
@@ -91,7 +91,7 @@ public:
 
 protected:
     RuntimeProfile::Counter* _bytes_read_counter; // # bytes read from the scanner
-    // # rows/tuples read from the scanner (including those discarded by eval_conjucts())
+    // # rows/tuples read from the scanner (including those discarded by eval_conjuncts())
     RuntimeProfile::Counter* _rows_read_counter;
     // Wall based aggregate read throughput [bytes/sec]
     RuntimeProfile::Counter* _total_throughput_counter;
diff --git a/be/src/exec/schema_scan_node.h b/be/src/exec/schema_scan_node.h
index 2052de05ca..f45086c78f 100644
--- a/be/src/exec/schema_scan_node.h
+++ b/be/src/exec/schema_scan_node.h
@@ -62,7 +62,7 @@ private:
     void debug_string(int indentation_level, std::stringstream* out) const override;
     // Copy one row from schema table to input tuple
     void copy_one_row();
-
+protected:
     bool _is_init;
     const std::string _table_name;
     SchemaScannerParam _scanner_param;
diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp
index 7d470bf33c..a1195d5b35 100644
--- a/be/src/exec/tablet_info.cpp
+++ b/be/src/exec/tablet_info.cpp
@@ -407,4 +407,228 @@ uint32_t OlapTablePartitionParam::_compute_dist_hash(Tuple* key) const {
     return hash_val;
 }
 
+VOlapTablePartitionParam::VOlapTablePartitionParam(std::shared_ptr<OlapTableSchemaParam>& schema,
+                                                 const TOlapTablePartitionParam& t_param)
+        : _schema(schema),
+          _t_param(t_param),
+          _slots(_schema->tuple_desc()->slots()),
+          _mem_tracker(MemTracker::CreateTracker(-1, "OlapTablePartitionParam")) {
+    for (auto slot : _slots) {
+        _partition_block.insert({slot->get_empty_mutable_column(), slot->get_data_type_ptr(), slot->col_name()});
+    }
+}
+
+VOlapTablePartitionParam::~VOlapTablePartitionParam() {
+    _mem_tracker->Release(_mem_usage);
+}
+
+Status VOlapTablePartitionParam::init() {
+    std::vector<std::string> slot_column_names;
+    for (auto slot_desc : _schema->tuple_desc()->slots()) {
+        slot_column_names.emplace_back(slot_desc->col_name());
+    }
+
+    auto find_slot_locs = [&slot_column_names](const std::string& slot_name, std::vector<uint16_t>& locs, const std::string& column_type) {
+        auto it = std::find(slot_column_names.begin(), slot_column_names.end(), slot_name);
+        if (it == slot_column_names.end()) {
+            return Status::InternalError(column_type + " column not found, column =" + slot_name);
+        }
+        locs.emplace_back(it - slot_column_names.begin());
+        return Status::OK();
+    };
+
+    if (_t_param.__isset.partition_columns) {
+        for (auto& part_col : _t_param.partition_columns) {
+            RETURN_IF_ERROR(find_slot_locs(part_col, _partition_slot_locs, "partition"));
+        }
+    }
+
+    _partitions_map.reset(new std::map<BlockRow*, VOlapTablePartition*, VOlapTablePartKeyComparator>(
+            VOlapTablePartKeyComparator(_partition_slot_locs)));
+    if (_t_param.__isset.distributed_columns) {
+        for (auto& col : _t_param.distributed_columns) {
+            RETURN_IF_ERROR(find_slot_locs(col, _distributed_slot_locs, "distributed"));
+        }
+    }
+
+    DCHECK(!_t_param.partitions.empty()) << "must have at least 1 partition";
+    _is_in_partition = _t_param.partitions[0].__isset.in_keys;
+
+    // initial partitions
+    for (int i = 0; i < _t_param.partitions.size(); ++i) {
+        const TOlapTablePartition& t_part = _t_param.partitions[i];
+        auto part = _obj_pool.add(new VOlapTablePartition(&_partition_block));
+        part->id = t_part.id;
+
+        if (!_is_in_partition) {
+            if (t_part.__isset.start_keys) {
+                RETURN_IF_ERROR(_create_partition_keys(t_part.start_keys, &part->start_key));
+            }
+
+            if (t_part.__isset.end_keys) {
+                RETURN_IF_ERROR(_create_partition_keys(t_part.end_keys, &part->end_key));
+            }
+        } else {
+            for (const auto& keys : t_part.in_keys) {
+                RETURN_IF_ERROR(_create_partition_keys(keys, &part->in_keys.emplace_back(&_partition_block, -1)));
+            }
+        }
+
+        part->num_buckets = t_part.num_buckets;
+        auto num_indexes = _schema->indexes().size();
+        if (t_part.indexes.size() != num_indexes) {
+            std::stringstream ss;
+            ss << "number of partition's index is not equal with schema's"
+               << ", num_part_indexes=" << t_part.indexes.size()
+               << ", num_schema_indexes=" << num_indexes;
+            return Status::InternalError(ss.str());
+        }
+        part->indexes = t_part.indexes;
+        std::sort(part->indexes.begin(), part->indexes.end(),
+                  [](const OlapTableIndexTablets& lhs, const OlapTableIndexTablets& rhs) {
+                      return lhs.index_id < rhs.index_id;
+                  });
+        // check index
+        for (int j = 0; j < num_indexes; ++j) {
+            if (part->indexes[j].index_id != _schema->indexes()[j]->index_id) {
+                std::stringstream ss;
+                ss << "partition's index is not equal with schema's"
+                   << ", part_index=" << part->indexes[j].index_id
+                   << ", schema_index=" << _schema->indexes()[j]->index_id;
+                return Status::InternalError(ss.str());
+            }
+        }
+        _partitions.emplace_back(part);
+        if (_is_in_partition) {
+            for (auto& in_key : part->in_keys) {
+                _partitions_map->emplace(&in_key, part);
+            }
+        } else {
+            _partitions_map->emplace(&part->end_key, part);
+        }
+    }
+
+    _mem_usage = _partition_block.allocated_bytes();
+    _mem_tracker->Consume(_mem_usage);
+    return Status::OK();
+}
+
+bool VOlapTablePartitionParam::find_tablet(BlockRow* block_row, const VOlapTablePartition** partition,
+                                           uint32_t* dist_hashes) const {
+    auto it = _is_in_partition ? _partitions_map->find(block_row) : _partitions_map->upper_bound(block_row);
+    if (it == _partitions_map->end()) {
+        return false;
+    }
+    if (_is_in_partition || _part_contains(it->second, block_row)) {
+        *partition = it->second;
+        *dist_hashes = _compute_dist_hash(block_row);
+        return true;
+    }
+    return false;
+}
+
+Status VOlapTablePartitionParam::_create_partition_keys(const std::vector<TExprNode>& t_exprs,
+                                                       BlockRow* part_key) {
+    for (int i = 0; i < t_exprs.size(); i++) {
+        RETURN_IF_ERROR(_create_partition_key(t_exprs[i], part_key->first,
+                _partition_slot_locs[i]));
+    }
+    return Status::OK();
+}
+
+Status VOlapTablePartitionParam::_create_partition_key(const TExprNode& t_expr, vectorized::Block* block,
+                                                      uint16_t pos) {
+    auto column = std::move(*block->get_by_position(pos).column).mutate();
+    switch (t_expr.node_type) {
+    case TExprNodeType::DATE_LITERAL: {
+        vectorized::VecDateTimeValue dt;
+        if (!dt.from_date_str(
+                    t_expr.date_literal.value.c_str(), t_expr.date_literal.value.size())) {
+            std::stringstream ss;
+            ss << "invalid date literal in partition column, date=" << t_expr.date_literal;
+            return Status::InternalError(ss.str());
+        }
+        column->insert_data(reinterpret_cast<const char *>(&dt), 0);
+        break;
+    }
+    case TExprNodeType::INT_LITERAL: {
+        switch (t_expr.type.types[0].scalar_type.type) {
+            case TPrimitiveType::TINYINT: {
+                int8_t value = t_expr.int_literal.value;
+                column->insert_data(reinterpret_cast<const char *>(&value), 0);
+                break;
+            }
+            case TPrimitiveType::SMALLINT: {
+                int16_t value = t_expr.int_literal.value;
+                column->insert_data(reinterpret_cast<const char *>(&value), 0);
+                break;
+            }
+            case TPrimitiveType::INT: {
+                int32_t value = t_expr.int_literal.value;
+                column->insert_data(reinterpret_cast<const char *>(&value), 0);
+                break;
+            }
+            default:
+                int64_t value = t_expr.int_literal.value;
+                column->insert_data(reinterpret_cast<const char *>(&value), 0);
+        }
+        break;
+    } case TExprNodeType::LARGE_INT_LITERAL: {
+        StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
+        __int128 value = StringParser::string_to_int<__int128>(t_expr.large_int_literal.value.c_str(),
+                                                             t_expr.large_int_literal.value.size(),
+                                                             &parse_result);
+        if (parse_result != StringParser::PARSE_SUCCESS) {
+            value = MAX_INT128;
+        }
+        column->insert_data(reinterpret_cast<const char *>(&value), 0);
+        break;
+    } case TExprNodeType::STRING_LITERAL: {
+        int len = t_expr.string_literal.value.size();
+        const char* str_val = t_expr.string_literal.value.c_str();
+
+        // CHAR is a fixed-length string and needs to use the length in the slot definition,
+        // VARVHAR is a variable-length string and needs to use the length of the string itself
+        // padding 0 to CHAR field
+//        if (TYPE_CHAR == slot_desc->type().type && len < slot_desc->type().len) {
+//            auto new_ptr = (char*)_mem_pool->allocate(slot_desc->type().len);
+//            memset(new_ptr, 0, slot_desc->type().len);
+//            memcpy(new_ptr, str_val, len);
+//
+//            str_val = new_ptr;
+//            len = slot_desc->type().len;
+//        }
+        column->insert_data(str_val, len);
+        break;
+    } case TExprNodeType::BOOL_LITERAL: {
+        column->insert_data(reinterpret_cast<const char *>(&t_expr.bool_literal.value), 0);
+        break;
+    } default: {
+        std::stringstream ss;
+        ss << "unsupported partition column node type, type=" << t_expr.node_type;
+        return Status::InternalError(ss.str());
+    }
+    }
+    return Status::OK();
+}
+
+uint32_t VOlapTablePartitionParam::_compute_dist_hash(BlockRow* key) const {
+    uint32_t hash_val = 0;
+    for (int i = 0; i < _distributed_slot_locs.size(); ++i) {
+        auto slot_desc = _slots[_distributed_slot_locs[i]];
+        auto column = key->first->get_by_position(_distributed_slot_locs[i]).column;
+
+        auto val = column->get_data_at(key->second);
+        if (val.data != nullptr) {
+            hash_val = RawValue::zlib_crc32(val.data, val.size, slot_desc->type().type, hash_val);
+        } else {
+            // NULL is treat as 0 when hash
+            static const int INT_VALUE = 0;
+            static const TypeDescriptor INT_TYPE(TYPE_INT);
+            hash_val = RawValue::zlib_crc32(&INT_VALUE, INT_TYPE, hash_val);
+        }
+    }
+    return hash_val;
+}
+
 } // namespace doris
diff --git a/be/src/exec/tablet_info.h b/be/src/exec/tablet_info.h
index 7bb2640582..53221baa04 100644
--- a/be/src/exec/tablet_info.h
+++ b/be/src/exec/tablet_info.h
@@ -31,6 +31,8 @@
 #include "runtime/raw_value.h"
 #include "runtime/tuple.h"
 
+#include "vec/core/block.h"
+
 namespace doris {
 
 class MemPool;
@@ -205,6 +207,104 @@ private:
             _partitions_map;
 };
 
+using BlockRow = std::pair<vectorized::Block*, int32_t>;
+
+struct VOlapTablePartition {
+    int64_t id = 0;
+    BlockRow start_key;
+    BlockRow end_key;
+    std::vector<BlockRow> in_keys;
+    int64_t num_buckets = 0;
+    std::vector<OlapTableIndexTablets> indexes;
+
+    VOlapTablePartition(vectorized::Block* partition_block):
+        start_key{partition_block, -1}, end_key{partition_block, -1} {};
+};
+
+class VOlapTablePartKeyComparator {
+public:
+    VOlapTablePartKeyComparator(const std::vector<uint16_t>& slot_locs)
+            : _slot_locs(slot_locs) {}
+
+    // return true if lhs < rhs
+    // 'row' is -1 mean
+    bool operator()(const BlockRow* lhs, const BlockRow* rhs) const {
+        if (lhs->second == -1) {
+            return false;
+        } else if (rhs->second == -1) {
+            return true;
+        }
+
+        for (auto slot_loc : _slot_locs) {
+            auto res = lhs->first->get_by_position(slot_loc).column->compare_at(
+                    lhs->second, rhs->second, *rhs->first->get_by_position(slot_loc).column, -1);
+            if (res != 0) {
+                return res < 0;
+            }
+        }
+        // equal, return false
+        return false;
+    }
+
+private:
+    const std::vector<uint16_t>& _slot_locs;
+};
+
+// store an olap table's tablet information
+class VOlapTablePartitionParam {
+public:
+    VOlapTablePartitionParam(std::shared_ptr<OlapTableSchemaParam>& schema,
+                            const TOlapTablePartitionParam& param);
+
+    ~VOlapTablePartitionParam();
+
+    Status init();
+
+    int64_t db_id() const { return _t_param.db_id; }
+    int64_t table_id() const { return _t_param.table_id; }
+    int64_t version() const { return _t_param.version; }
+
+    // return true if we found this tuple in partition
+    bool find_tablet(BlockRow* block_row, const VOlapTablePartition** partitions,
+                     uint32_t* dist_hash) const;
+
+    const std::vector<VOlapTablePartition*>& get_partitions() const { return _partitions; }
+
+private:
+    Status _create_partition_keys(const std::vector<TExprNode>& t_exprs, BlockRow* part_key);
+
+    Status _create_partition_key(const TExprNode& t_expr, vectorized::Block* block, uint16_t pos);
+
+    uint32_t _compute_dist_hash(BlockRow* key) const;
+
+    // check if this partition contain this key
+    bool _part_contains(VOlapTablePartition* part, BlockRow* key) const {
+        // start_key.second == -1 means only single partition
+        VOlapTablePartKeyComparator comparator(_partition_slot_locs);
+        return part->start_key.second == -1 || !comparator(key, &part->start_key);
+    }
+
+private:
+    // this partition only valid in this schema
+    std::shared_ptr<OlapTableSchemaParam> _schema;
+    TOlapTablePartitionParam _t_param;
+
+    const std::vector<SlotDescriptor*>& _slots;
+    std::vector<uint16_t> _partition_slot_locs;
+    std::vector<uint16_t> _distributed_slot_locs;
+
+    ObjectPool _obj_pool;
+    vectorized::Block _partition_block;
+    std::shared_ptr<MemTracker> _mem_tracker;
+    std::vector<VOlapTablePartition*> _partitions;
+    std::unique_ptr<std::map<BlockRow*, VOlapTablePartition*, VOlapTablePartKeyComparator>>
+            _partitions_map;
+
+    bool _is_in_partition = false;
+    uint32_t _mem_usage = 0;
+};
+
+
 using TabletLocation = TTabletLocation;
 // struct TTabletLocation {
 //     1: required i64 tablet_id
diff --git a/be/src/exec/tablet_sink.cpp b/be/src/exec/tablet_sink.cpp
index 1718f7d9d7..7acde285b8 100644
--- a/be/src/exec/tablet_sink.cpp
+++ b/be/src/exec/tablet_sink.cpp
@@ -251,6 +251,53 @@ Status NodeChannel::add_row(Tuple* input_tuple, int64_t tablet_id) {
     return Status::OK();
 }
 
+Status NodeChannel::add_row(BlockRow& block_row, int64_t tablet_id) {
+    // If add_row() when _eos_is_produced==true, there must be sth wrong, we can only mark this channel as failed.
+    auto st = none_of({_cancelled, _eos_is_produced});
+    if (!st.ok()) {
+        if (_cancelled) {
+            std::lock_guard<SpinLock> l(_cancel_msg_lock);
+            return Status::InternalError("add row failed. " + _cancel_msg);
+        } else {
+            return st.clone_and_prepend("already stopped, can't add row. cancelled/eos: ");
+        }
+    }
+
+    // We use OlapTableSink mem_tracker which has the same ancestor of _plan node,
+    // so in the ideal case, mem limit is a matter for _plan node.
+    // But there is still some unfinished things, we do mem limit here temporarily.
+    // _cancelled may be set by rpc callback, and it's possible that _cancelled might be set in any of the steps below.
+    // It's fine to do a fake add_row() and return OK, because we will check _cancelled in next add_row() or mark_close().
+    while (!_cancelled && _parent->_mem_tracker->AnyLimitExceeded(MemLimit::HARD) &&
+           _pending_batches_num > 0) {
+        SCOPED_ATOMIC_TIMER(&_mem_exceeded_block_ns);
+        SleepFor(MonoDelta::FromMilliseconds(10));
+    }
+
+    auto row_no = _cur_batch->add_row();
+    if (row_no == RowBatch::INVALID_ROW_INDEX) {
+        {
+            SCOPED_ATOMIC_TIMER(&_queue_push_lock_ns);
+            std::lock_guard<std::mutex> l(_pending_batches_lock);
+            //To simplify the add_row logic, postpone adding batch into req until the time of sending req
+            _pending_batches.emplace(std::move(_cur_batch), _cur_add_batch_request);
+            _pending_batches_num++;
+        }
+
+        _cur_batch.reset(new RowBatch(*_row_desc, _batch_size, _parent->_mem_tracker.get()));
+        _cur_add_batch_request.clear_tablet_ids();
+
+        row_no = _cur_batch->add_row();
+    }
+    DCHECK_NE(row_no, RowBatch::INVALID_ROW_INDEX);
+
+    _cur_batch->get_row(row_no)->set_tuple(0,
+            block_row.first->deep_copy_tuple(*_tuple_desc, _cur_batch->tuple_data_pool(), block_row.second, 0, true));
+    _cur_batch->commit_last_row();
+    _cur_add_batch_request.add_tablet_ids(tablet_id);
+    return Status::OK();
+}
+
 Status NodeChannel::mark_close() {
     auto st = none_of({_cancelled, _eos_is_produced});
     if (!st.ok()) {
@@ -501,6 +548,29 @@ Status IndexChannel::add_row(Tuple* tuple, int64_t tablet_id) {
     return Status::OK();
 }
 
+Status IndexChannel::add_row(BlockRow& block_row, int64_t tablet_id) {
+    auto it = _channels_by_tablet.find(tablet_id);
+    DCHECK(it != _channels_by_tablet.end()) << "unknown tablet, tablet_id=" << tablet_id;
+    std::stringstream ss;
+    for (auto channel : it->second) {
+        // if this node channel is already failed, this add_row will be skipped
+        auto st = channel->add_row(block_row, tablet_id);
+        if (!st.ok()) {
+            mark_as_failed(channel);
+            ss << st.get_error_msg() << "; ";
+        }
+    }
+
+    if (has_intolerable_failure()) {
+        std::stringstream ss2;
+        ss2 << "index channel has intolerable failure. " << BackendOptions::get_localhost()
+            << ", err: " << ss.str();
+        return Status::InternalError(ss2.str());
+    }
+
+    return Status::OK();
+}
+
 bool IndexChannel::has_intolerable_failure() {
     for (const auto& it : _failed_channels) {
         if (it.second.size() >= ((_parent->_num_replicas + 1) / 2)) {
@@ -731,7 +801,7 @@ Status OlapTableSink::send(RuntimeState* state, RowBatch* input_batch) {
         batch = _output_batch.get();
     }
     int num_invalid_rows = 0;
-    if (_need_validate_data) {
+    {
         SCOPED_RAW_TIMER(&_validate_data_ns);
         _filter_bitmap.Reset(batch->num_rows());
         num_invalid_rows = _validate_data(state, batch, &_filter_bitmap);
diff --git a/be/src/exec/tablet_sink.h b/be/src/exec/tablet_sink.h
index b5e86f7f92..6f403b1dd8 100644
--- a/be/src/exec/tablet_sink.h
+++ b/be/src/exec/tablet_sink.h
@@ -160,6 +160,8 @@ public:
 
     Status add_row(Tuple* tuple, int64_t tablet_id);
 
+    Status add_row(BlockRow& block_row, int64_t tablet_id);
+
     // two ways to stop channel:
     // 1. mark_close()->close_wait() PS. close_wait() will block waiting for the last AddBatch rpc response.
     // 2. just cancel()
@@ -276,6 +278,8 @@ public:
 
     Status add_row(Tuple* tuple, int64_t tablet_id);
 
+    Status add_row(BlockRow& block_row, int64_t tablet_id);
+
     void for_each_node_channel(const std::function<void(NodeChannel*)>& func) {
         for (auto& it : _node_channels) {
             func(it.second);
@@ -350,7 +354,7 @@ private:
     // only focus on pending batches and channel status, the internal errors of NodeChannels will be handled by the producer
     void _send_batch_process();
 
-private:
+protected:
     friend class NodeChannel;
     friend class IndexChannel;
 
diff --git a/be/src/exec/text_converter.cpp b/be/src/exec/text_converter.cpp
index 7801f66208..e2d37d536a 100644
--- a/be/src/exec/text_converter.cpp
+++ b/be/src/exec/text_converter.cpp
@@ -59,4 +59,26 @@ void TextConverter::unescape_string(const char* src, char* dest, size_t* len) {
     *len = dest_ptr - dest_start;
 }
 
+void TextConverter::unescape_string_on_spot(const char* src, size_t* len) {
+    char* dest_ptr = const_cast<char*>(src);
+    const char* end = src + *len;
+    bool escape_next_char = false;
+
+    while (src < end) {
+        if (*src == _escape_char) {
+            escape_next_char = !escape_next_char;
+        } else {
+            escape_next_char = false;
+        }
+
+        if (escape_next_char) {
+            ++src;
+        } else {
+            *dest_ptr++ = *src++;
+        }
+    }
+
+    *len = dest_ptr - src;
+}
+
 } // namespace doris
diff --git a/be/src/exec/text_converter.h b/be/src/exec/text_converter.h
index 4d97b7c62b..36b3254fa7 100644
--- a/be/src/exec/text_converter.h
+++ b/be/src/exec/text_converter.h
@@ -19,7 +19,7 @@
 #define DORIS_BE_SRC_QUERY_EXEC_TEXT_CONVERTER_H
 
 #include "runtime/runtime_state.h"
-
+#include "vec/core/block.h"
 namespace doris {
 
 class MemPool;
@@ -48,11 +48,14 @@ public:
     bool write_slot(const SlotDescriptor* slot_desc, Tuple* tuple, const char* data, int len,
                     bool copy_string, bool need_escape, MemPool* pool);
 
+    bool write_column(const SlotDescriptor* slot_desc, vectorized::MutableColumnPtr* column_ptr,
+                      const char* data, size_t len, bool copy_string, bool need_escape);
+
     // Removes escape characters from len characters of the null-terminated string src,
     // and copies the unescaped string into dest, changing *len to the unescaped length.
     // No null-terminator is added to dest.
     void unescape_string(const char* src, char* dest, size_t* len);
-
+    void unescape_string_on_spot(const char* src, size_t* len);
     // Removes escape characters from 'str', allocating a new string from pool.
     // 'str' is updated with the new ptr and length.
     void unescape_string(StringValue* str, MemPool* pool);
diff --git a/be/src/exec/text_converter.hpp b/be/src/exec/text_converter.hpp
index 1b1a60e64b..44e807ced0 100644
--- a/be/src/exec/text_converter.hpp
+++ b/be/src/exec/text_converter.hpp
@@ -17,6 +17,7 @@
 
 #ifndef DORIS_BE_SRC_QUERY_EXEC_TEXT_CONVERTER_HPP
 #define DORIS_BE_SRC_QUERY_EXEC_TEXT_CONVERTER_HPP
+#include <sql.h>
 
 #include <boost/algorithm/string.hpp>
 
@@ -32,7 +33,7 @@
 #include "util/binary_cast.hpp"
 #include "util/string_parser.hpp"
 #include "util/types.h"
-
+#include "vec/runtime/vdatetime_value.h"
 namespace doris {
 
 // Note: this function has a codegen'd version.  Changing this function requires
@@ -165,6 +166,134 @@ inline bool TextConverter::write_slot(const SlotDescriptor* slot_desc, Tuple* tu
     return true;
 }
 
+inline bool TextConverter::write_column(const SlotDescriptor* slot_desc,
+                                        vectorized::MutableColumnPtr* column_ptr, const char* data,
+                                        size_t len, bool copy_string, bool need_escape) {
+    vectorized::IColumn* col_ptr = column_ptr->get();
+    // \N means it's NULL
+    if (true == slot_desc->is_nullable()) {
+        auto* nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(column_ptr->get());
+        if ((len == 2 && data[0] == '\\' && data[1] == 'N') || len == SQL_NULL_DATA) {
+            nullable_column->insert_data(nullptr, 0);
+            return true;
+        } else {
+            nullable_column->get_null_map_data().push_back(0);
+            col_ptr = &nullable_column->get_nested_column();
+        }
+    }
+    StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
+
+    // Parse the raw-text data. Translate the text string to internal format.
+    switch (slot_desc->type().type) {
+    case TYPE_HLL:
+    case TYPE_VARCHAR:
+    case TYPE_CHAR: {
+        if (need_escape) {
+            unescape_string_on_spot(data, &len);
+        }
+        reinterpret_cast<vectorized::ColumnString*>(col_ptr)->insert_data(data, len);
+        break;
+    }
+
+    case TYPE_BOOLEAN: {
+        bool num = StringParser::string_to_bool(data, len, &parse_result);
+        reinterpret_cast<vectorized::ColumnVector<vectorized::UInt8>*>(col_ptr)->insert_value(
+                (uint8_t)num);
+        break;
+    }
+    case TYPE_TINYINT: {
+        int8_t num = StringParser::string_to_int<int8_t>(data, len, &parse_result);
+        reinterpret_cast<vectorized::ColumnVector<vectorized::Int8>*>(col_ptr)->insert_value(num);
+        break;
+    }
+    case TYPE_SMALLINT: {
+        int16_t num = StringParser::string_to_int<int16_t>(data, len, &parse_result);
+        reinterpret_cast<vectorized::ColumnVector<vectorized::Int16>*>(col_ptr)->insert_value(num);
+        break;
+    }
+    case TYPE_INT: {
+        int32_t num = StringParser::string_to_int<int32_t>(data, len, &parse_result);
+        reinterpret_cast<vectorized::ColumnVector<vectorized::Int32>*>(col_ptr)->insert_value(num);
+        break;
+    }
+    case TYPE_BIGINT: {
+        int64_t num = StringParser::string_to_int<int64_t>(data, len, &parse_result);
+        reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(num);
+        break;
+    }
+    case TYPE_LARGEINT: {
+        __int128 num = StringParser::string_to_int<__int128>(data, len, &parse_result);
+        reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(num);
+        break;
+    }
+
+    case TYPE_FLOAT: {
+        float num = StringParser::string_to_float<float>(data, len, &parse_result);
+        reinterpret_cast<vectorized::ColumnVector<vectorized::Float32>*>(col_ptr)->insert_value(
+                num);
+        break;
+    }
+    case TYPE_DOUBLE: {
+        double num = StringParser::string_to_float<double>(data, len, &parse_result);
+        reinterpret_cast<vectorized::ColumnVector<vectorized::Float64>*>(col_ptr)->insert_value(
+                num);
+        break;
+    }
+    case TYPE_DATE: {
+        vectorized::VecDateTimeValue ts_slot;
+        if (!ts_slot.from_date_str(data, len)) {
+            parse_result = StringParser::PARSE_FAILURE;
+            break;
+        }
+        ts_slot.cast_to_date();
+        reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_data(
+                reinterpret_cast<char*>(&ts_slot), 0);
+        break;
+    }
+
+    case TYPE_DATETIME: {
+        vectorized::VecDateTimeValue ts_slot;
+        if (!ts_slot.from_date_str(data, len)) {
+            parse_result = StringParser::PARSE_FAILURE;
+            break;
+        }
+        ts_slot.to_datetime();
+        reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_data(
+                reinterpret_cast<char*>(&ts_slot), 0);
+        break;
+    }
+
+    case TYPE_DECIMALV2: {
+        DecimalV2Value decimal_slot;
+        if (decimal_slot.parse_from_str(data, len)) {
+            parse_result = StringParser::PARSE_FAILURE;
+            break;
+        }
+        PackedInt128 num = binary_cast<DecimalV2Value, PackedInt128>(decimal_slot);
+        reinterpret_cast<vectorized::ColumnVector<doris::PackedInt128>*>(col_ptr)->insert_value(
+                num.value);
+        break;
+    }
+
+    default:
+        DCHECK(false) << "bad slot type: " << slot_desc->type();
+        break;
+    }
+
+    if (parse_result == StringParser::PARSE_FAILURE) {
+        if (true == slot_desc->is_nullable()) {
+            auto* nullable_column =
+                    reinterpret_cast<vectorized::ColumnNullable*>(column_ptr->get());
+            size_t size = nullable_column->get_null_map_data().size();
+            doris::vectorized::NullMap& null_map_data = nullable_column->get_null_map_data();
+            null_map_data[size - 1] = 1;
+        } else {
+            return false;
+        }
+    }
+    return true;
+}
+
 } // namespace doris
 
-#endif
+#endif
\ No newline at end of file
diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt
index 5ebf1d1d26..5d69aa79e5 100644
--- a/be/src/exprs/CMakeLists.txt
+++ b/be/src/exprs/CMakeLists.txt
@@ -71,7 +71,6 @@ add_library(Exprs
   hll_function.cpp
   grouping_sets_functions.cpp
   topn_function.cpp
-
   table_function/explode_split.cpp
   table_function/explode_bitmap.cpp
   table_function/explode_json_array.cpp
diff --git a/be/src/exprs/aggregate_functions.cpp b/be/src/exprs/aggregate_functions.cpp
index 1f8db84085..93166cf463 100644
--- a/be/src/exprs/aggregate_functions.cpp
+++ b/be/src/exprs/aggregate_functions.cpp
@@ -592,30 +592,14 @@ void AggregateFunctions::sum(FunctionContext* ctx, const DecimalV2Val& src, Deci
     }
 
     if (dst->is_null) {
-        dst->is_null = false;
-        dst->set_to_zero();
+        init_zero_not_null<DecimalV2Val>(ctx, dst);
     }
-
     DecimalV2Value new_src = DecimalV2Value::from_decimal_val(src);
     DecimalV2Value new_dst = DecimalV2Value::from_decimal_val(*dst);
     new_dst = new_dst + new_src;
     new_dst.to_decimal_val(dst);
 }
 
-template <>
-void AggregateFunctions::sum(FunctionContext* ctx, const LargeIntVal& src, LargeIntVal* dst) {
-    if (src.is_null) {
-        return;
-    }
-
-    if (dst->is_null) {
-        dst->is_null = false;
-        dst->val = 0;
-    }
-
-    dst->val += src.val;
-}
-
 template <typename T>
 void AggregateFunctions::min_init(FunctionContext* ctx, T* dst) {
     auto val = AnyValUtil::max_val<T>(ctx);
@@ -1890,8 +1874,8 @@ static double compute_knuth_variance(const KnuthVarianceState& state, bool pop)
 static DecimalV2Value decimalv2_compute_knuth_variance(const DecimalV2KnuthVarianceState& state,
                                                        bool pop) {
     DecimalV2Value new_count = DecimalV2Value();
-    new_count.assign_from_double(state.count);
     if (state.count == 1) return new_count;
+    new_count.assign_from_double(state.count);
     DecimalV2Value new_m2 = DecimalV2Value::from_decimal_val(state.m2);
     if (pop)
         return new_m2 / new_count;
@@ -2395,6 +2379,8 @@ template void AggregateFunctions::sum<IntVal, BigIntVal>(FunctionContext*, const
                                                          BigIntVal* dst);
 template void AggregateFunctions::sum<BigIntVal, BigIntVal>(FunctionContext*, const BigIntVal& src,
                                                             BigIntVal* dst);
+template void AggregateFunctions::sum<LargeIntVal, LargeIntVal>(FunctionContext*, const LargeIntVal& src,
+                                                            LargeIntVal* dst);
 template void AggregateFunctions::sum<FloatVal, DoubleVal>(FunctionContext*, const FloatVal& src,
                                                            DoubleVal* dst);
 template void AggregateFunctions::sum<DoubleVal, DoubleVal>(FunctionContext*, const DoubleVal& src,
diff --git a/be/src/exprs/math_functions.cpp b/be/src/exprs/math_functions.cpp
index 787887d1fd..00cf3bd9ea 100644
--- a/be/src/exprs/math_functions.cpp
+++ b/be/src/exprs/math_functions.cpp
@@ -178,11 +178,11 @@ ONE_ARG_MATH_FN(ln, DoubleVal, DoubleVal, std::log);
 ONE_ARG_MATH_FN(log10, DoubleVal, DoubleVal, std::log10);
 ONE_ARG_MATH_FN(exp, DoubleVal, DoubleVal, std::exp);
 
-FloatVal MathFunctions::sign(FunctionContext* ctx, const DoubleVal& v) {
+TinyIntVal MathFunctions::sign(FunctionContext* ctx, const DoubleVal& v) {
     if (v.is_null) {
-        return FloatVal::null();
+        return TinyIntVal::null();
     }
-    return FloatVal((v.val > 0) ? 1.0f : ((v.val < 0) ? -1.0f : 0.0f));
+    return TinyIntVal((v.val > 0) ? 1 : ((v.val < 0) ? -1 : 0));
 }
 
 DoubleVal MathFunctions::radians(FunctionContext* ctx, const DoubleVal& v) {
diff --git a/be/src/exprs/math_functions.h b/be/src/exprs/math_functions.h
index 5d1258e8ac..9d55ed6758 100644
--- a/be/src/exprs/math_functions.h
+++ b/be/src/exprs/math_functions.h
@@ -50,7 +50,8 @@ public:
     static doris_udf::IntVal abs(doris_udf::FunctionContext*, const doris_udf::SmallIntVal&);
     static doris_udf::SmallIntVal abs(doris_udf::FunctionContext*, const doris_udf::TinyIntVal&);
 
-    static doris_udf::FloatVal sign(doris_udf::FunctionContext* ctx, const doris_udf::DoubleVal& v);
+    static doris_udf::TinyIntVal sign(doris_udf::FunctionContext* ctx,
+                                      const doris_udf::DoubleVal& v);
 
     static doris_udf::DoubleVal sin(doris_udf::FunctionContext*, const doris_udf::DoubleVal&);
     static doris_udf::DoubleVal asin(doris_udf::FunctionContext*, const doris_udf::DoubleVal&);
@@ -182,11 +183,6 @@ public:
 
     static double my_double_round(double value, int64_t dec, bool dec_unsigned, bool truncate);
 
-private:
-    static const int32_t MIN_BASE = 2;
-    static const int32_t MAX_BASE = 36;
-    static const char* _s_alphanumeric_chars;
-
     // Converts src_num in decimal to dest_base,
     // and fills expr_val.string_val with the result.
     static doris_udf::StringVal decimal_to_base(doris_udf::FunctionContext* ctx, int64_t src_num,
@@ -207,6 +203,12 @@ private:
     // Returns false otherwise, indicating some other error condition.
     static bool handle_parse_result(int8_t dest_base, int64_t* num,
                                     StringParser::ParseResult parse_res);
+
+    static const int32_t MIN_BASE = 2;
+    static const int32_t MAX_BASE = 36;
+
+private:
+    static const char* _s_alphanumeric_chars;
 };
 
 } // namespace doris
diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp
index b77cf9d71e..3cf45c4b75 100644
--- a/be/src/exprs/runtime_filter.cpp
+++ b/be/src/exprs/runtime_filter.cpp
@@ -373,6 +373,35 @@ public:
             break;
         }
     }
+    void insert(const StringRef& value) {
+        switch (_column_return_type) {
+        case TYPE_DATE:
+        case TYPE_DATETIME: {
+            // DateTime->DateTimeValue
+            vectorized::DateTime date_time =*reinterpret_cast<const vectorized::DateTime*>(value.data);
+            vectorized::VecDateTimeValue vec_date_time_value = binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(date_time);
+            doris::DateTimeValue date_time_value;
+            vec_date_time_value.convert_vec_dt_to_dt(&date_time_value);
+            insert(reinterpret_cast<const void*>(&date_time_value));
+            break;
+        }
+
+        case TYPE_CHAR:
+        case TYPE_VARCHAR:
+        case TYPE_HLL:
+        case TYPE_OBJECT:
+        case TYPE_STRING: {
+            // StringRef->StringValue
+            StringValue data = StringValue(const_cast<char*>(value.data), value.size);
+            insert(reinterpret_cast<const void*>(&data));
+            break;
+        }
+
+        default:
+            insert(reinterpret_cast<const void*>(value.data));
+            break;
+        }
+    }
 
     template <class T>
     Status get_push_context(T* container, RuntimeState* state, ExprContext* prob_expr) {
@@ -784,6 +813,11 @@ void IRuntimeFilter::insert(const void* data) {
     }
 }
 
+void IRuntimeFilter::insert(const StringRef& value) {
+    DCHECK(is_producer());
+    _wrapper->insert(value);
+}
+
 Status IRuntimeFilter::publish() {
     DCHECK(is_producer());
     if (_has_local_target) {
diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h
index 7d406f3600..c5e597aa21 100644
--- a/be/src/exprs/runtime_filter.h
+++ b/be/src/exprs/runtime_filter.h
@@ -128,6 +128,7 @@ public:
     // insert data to build filter
     // only used for producer
     void insert(const void* data);
+    void insert(const StringRef& data);
 
     // publish filter
     // push filter to remote node or push down it to scan_node
diff --git a/be/src/exprs/runtime_filter_slots.h b/be/src/exprs/runtime_filter_slots.h
index 576cad04b3..e098adc3d7 100644
--- a/be/src/exprs/runtime_filter_slots.h
+++ b/be/src/exprs/runtime_filter_slots.h
@@ -20,9 +20,9 @@
 #include "exprs/runtime_filter.h"
 #include "runtime/runtime_filter_mgr.h"
 #include "runtime/runtime_state.h"
+#include "vec/exprs/vexpr.h"
 
 namespace doris {
-
 // this class used in a hash join node
 // Provide a unified interface for other classes
 template <typename ExprCtxType>
@@ -129,6 +129,40 @@ public:
             }
         }
     }
+    void insert(std::unordered_map<const vectorized::Block*, std::vector<int>>& datas) {
+        for (int i = 0; i < _build_expr_context.size(); ++i) {
+            auto iter = _runtime_filters.find(i);
+            if (iter == _runtime_filters.end()) continue;
+
+            int result_column_id = _build_expr_context[i]->get_last_result_column_id();
+            for (auto it : datas) {
+                auto& column = it.first->get_by_position(result_column_id).column;
+
+                if (auto* nullable =
+                            vectorized::check_and_get_column<vectorized::ColumnNullable>(*column)) {
+                    auto& column_nested = nullable->get_nested_column();
+                    auto& column_nullmap = nullable->get_null_map_column();
+                    for (int row_num : it.second) {
+                        if (column_nullmap.get_bool(row_num)) {
+                            continue;
+                        }
+                        const auto& ref_data = column_nested.get_data_at(row_num);
+                        for (auto filter : iter->second) {
+                            filter->insert(ref_data);
+                        }
+                    }
+
+                } else {
+                    for (int row_num : it.second) {
+                        const auto& ref_data = column->get_data_at(row_num);
+                        for (auto filter : iter->second) {
+                            filter->insert(ref_data);
+                        }
+                    }
+                }
+            }
+        }
+    }
 
     // should call this method after insert
     void ready_for_publish() {
@@ -166,5 +200,5 @@ private:
 };
 
 using RuntimeFilterSlots = RuntimeFilterSlotsBase<ExprContext>;
-
+using VRuntimeFilterSlots = RuntimeFilterSlotsBase<vectorized::VExprContext>;
 } // namespace doris
diff --git a/be/src/exprs/v_string_functions.h b/be/src/exprs/v_string_functions.h
new file mode 100644
index 0000000000..3fd9845535
--- /dev/null
+++ b/be/src/exprs/v_string_functions.h
@@ -0,0 +1,219 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#ifndef BE_V_STRING_FUNCTIONS_H
+#define BE_V_STRING_FUNCTIONS_H
+
+#include <stdint.h>
+#include <unistd.h>
+#include "runtime/string_value.hpp"
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+namespace doris {
+class VStringFunctions {
+public:
+#ifdef __SSE2__
+    /// n equals to 16 chars length
+    static constexpr auto REGISTER_SIZE = sizeof(__m128i);
+#endif
+public:
+    static StringVal rtrim(const StringVal& str) {
+        if (str.is_null || str.len == 0) {
+            return str;
+        }
+        auto begin = 0;
+        auto end = str.len - 1;
+#ifdef __SSE2__
+        char blank = ' ';
+        const auto pattern =  _mm_set1_epi8(blank);
+        while (end - begin + 1 >= REGISTER_SIZE) {
+            const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(str.ptr + end + 1 - REGISTER_SIZE));
+            const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, pattern);
+            const auto mask = _mm_movemask_epi8(v_against_pattern);
+            int offset = __builtin_clz(~(mask << REGISTER_SIZE));
+            /// means not found
+            if (offset == 0)
+            {
+                return StringVal(str.ptr + begin, end - begin + 1);
+            } else {
+                end -= offset;
+            }
+        }
+#endif
+        while (end >= begin && str.ptr[end] == ' ') {
+            --end;
+        }
+        if (end < 0) {
+            return StringVal("");
+        }
+        return StringVal(str.ptr + begin, end - begin + 1);
+    }
+
+    static StringVal ltrim(const StringVal& str) {
+        if (str.is_null || str.len == 0) {
+            return str;
+        }
+        auto begin = 0;
+        auto end = str.len - 1;
+#ifdef __SSE2__
+        char blank = ' ';
+        const auto pattern =  _mm_set1_epi8(blank);
+        while (end - begin + 1 >= REGISTER_SIZE) {
+            const auto v_haystack = _mm_loadu_si128(reinterpret_cast<const __m128i *>(str.ptr + begin));
+            const auto v_against_pattern = _mm_cmpeq_epi8(v_haystack, pattern);
+            const auto mask = _mm_movemask_epi8(v_against_pattern);
+            const auto offset = __builtin_ctz(mask ^ 0xffff);
+            /// means not found
+            if (offset == 0)
+            {
+                return StringVal(str.ptr + begin, end - begin + 1);
+            } else if (offset > REGISTER_SIZE) {
+                begin += REGISTER_SIZE;
+            } else {
+                begin += offset;
+                return StringVal(str.ptr + begin, end - begin + 1);
+            }
+        }
+#endif
+        while (begin <= end && str.ptr[begin] == ' ') {
+            ++begin;
+        }
+        return StringVal(str.ptr + begin, end - begin + 1);
+    }
+
+    static StringVal trim(const StringVal& str) {
+        if (str.is_null || str.len == 0) {
+            return str;
+        }
+        return rtrim(ltrim(str));
+    }
+
+    static bool is_ascii(StringVal str) {
+    #ifdef __SSE2__
+        size_t i = 0;
+        __m128i binary_code = _mm_setzero_si128();
+        if (str.len >= REGISTER_SIZE) {
+            for (; i <= str.len - REGISTER_SIZE; i += REGISTER_SIZE) {
+                __m128i chars = _mm_loadu_si128((const __m128i*)(str.ptr + i));
+                binary_code = _mm_or_si128(binary_code, chars);
+            }
+        }
+        int mask = _mm_movemask_epi8(binary_code);
+
+        char or_code = 0;
+        for (; i < str.len; i++) {
+            or_code |= str.ptr[i];
+        }
+        mask |= (or_code & 0x80);
+
+        return !mask;
+    #else
+        char or_code = 0;
+        for (size_t i = 0; i < str.len; i++) {
+            or_code |= str.ptr[i];
+        }
+        return !(or_code & 0x80);
+    #endif
+    }
+
+    static void reverse(const StringVal& str, StringVal dst) {
+        if (str.is_null) {
+            dst.ptr = NULL;
+            return;
+        }
+        const bool is_ascii = VStringFunctions::is_ascii(str);
+        if (is_ascii) {
+            int64_t begin = 0;
+            int64_t end = str.len;
+            int64_t result_end = dst.len;
+    #if defined(__SSE2__)
+            const auto shuffle_array = _mm_set_epi64((__m64)0x00'01'02'03'04'05'06'07ull, (__m64)0x08'09'0a'0b'0c'0d'0e'0full);
+            for (; (begin + REGISTER_SIZE) < end; begin += REGISTER_SIZE) {
+                result_end -= REGISTER_SIZE;
+                _mm_storeu_si128((__m128i*)(dst.ptr + result_end),
+                                 _mm_shuffle_epi8(_mm_loadu_si128((__m128i*)(str.ptr + begin)), shuffle_array));
+            }
+    #endif
+            for (; begin < end; ++begin) {
+                --result_end;
+                dst.ptr[result_end] = str.ptr[begin];
+            }
+        } else {
+            for (size_t i = 0, char_size = 0; i < str.len; i += char_size) {
+                char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]);
+                std::copy(str.ptr + i, str.ptr + i + char_size, dst.ptr + str.len - i - char_size);
+            }
+        }
+    }
+
+    static size_t get_utf8_byte_length(unsigned char byte) {
+        size_t char_size = 0;
+        if (byte >= 0xFC) {
+            char_size = 6;
+        } else if (byte >= 0xF8) {
+            char_size = 5;
+        } else if (byte >= 0xF0) {
+            char_size = 4;
+        } else if (byte >= 0xE0) {
+            char_size = 3;
+        } else if (byte >= 0xC0) {
+            char_size = 2;
+        } else {
+            char_size = 1;
+        }
+        return char_size;
+    }
+
+    static void hex_encode(const unsigned char* src_str, size_t length, char* dst_str) {
+        static constexpr auto hex_table = "0123456789ABCDEF";
+        auto src_str_end = src_str + length;
+
+#if defined(__SSE2__)
+        constexpr auto step = sizeof(uint64);
+        if (src_str + step < src_str_end) {
+            const auto hex_map = _mm_loadu_si128(reinterpret_cast<const __m128i *>(hex_table));
+            const auto mask_map = _mm_set1_epi8(0x0F);
+
+            do {
+                auto data = _mm_loadu_si64(src_str);
+                auto hex_loc = _mm_and_si128(_mm_unpacklo_epi8(_mm_srli_epi64(data, 4), data), mask_map);
+                _mm_storeu_si128(reinterpret_cast<__m128i *>(dst_str), _mm_shuffle_epi8(hex_map, hex_loc));
+
+                src_str += step;
+                dst_str += step * 2;
+            } while (src_str + step < src_str_end);
+        }
+#endif
+        char res[2];
+        // hex(str) str length is n, result must be 2 * n length
+        for (; src_str < src_str_end; src_str += 1, dst_str += 2) {
+            // low 4 bits
+            *(res + 1) = hex_table[src_str[0] & 0x0F];
+            // high 4 bits
+            *res = hex_table[(src_str[0] >> 4)];
+            std::copy(res, res + 2, dst_str);
+        }
+    }
+};
+}
+
+#endif //BE_V_STRING_FUNCTIONS_H
\ No newline at end of file
diff --git a/be/src/olap/block_column_predicate.cpp b/be/src/olap/block_column_predicate.cpp
index f460be5395..725ebda470 100644
--- a/be/src/olap/block_column_predicate.cpp
+++ b/be/src/olap/block_column_predicate.cpp
@@ -39,6 +39,30 @@ void SingleColumnBlockPredicate::evaluate_or(RowBlockV2 *block, uint16_t selecte
     _predicate->evaluate_or(&column_block, block->selection_vector(), selected_size, flags);
 }
 
+void SingleColumnBlockPredicate::evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const {
+    auto column_id = _predicate->column_id();
+    auto& column = block[column_id];
+    _predicate->evaluate(*column, sel, selected_size);
+}
+ 
+void SingleColumnBlockPredicate::evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const {
+    auto column_id = _predicate->column_id();
+    auto& column = block[column_id];
+    _predicate->evaluate_and(*column, sel, selected_size, flags);
+}
+ 
+void SingleColumnBlockPredicate::evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const {
+    auto column_id = _predicate->column_id();
+    auto& column = block[column_id];
+    _predicate->evaluate_or(*column, sel, selected_size, flags);
+}
+ 
+void SingleColumnBlockPredicate::evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const {
+    auto column_id = _predicate->column_id();
+    auto& column = block[column_id];
+    _predicate->evaluate_vec(*column, size, flags);
+}
+
 void OrBlockColumnPredicate::evaluate(RowBlockV2* block, uint16_t* selected_size) const {
     if (num_of_column_predicate() == 1) {
         _block_column_predicate_vec[0]->evaluate(block, selected_size);
@@ -60,12 +84,39 @@ void OrBlockColumnPredicate::evaluate(RowBlockV2* block, uint16_t* selected_size
     }
 }
 
+void OrBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const {
+    if (num_of_column_predicate() == 1) {
+        _block_column_predicate_vec[0]->evaluate(block, sel, selected_size);
+    } else {
+        bool ret_flags[*selected_size];
+        memset(ret_flags, false, *selected_size);
+        for (int i = 0; i < num_of_column_predicate(); ++i) {
+            auto column_predicate = _block_column_predicate_vec[i];
+            column_predicate->evaluate_or(block, sel, *selected_size, ret_flags);
+        }
+ 
+        uint16_t new_size = 0;
+        for (int i = 0; i < *selected_size; ++i) {
+            if (ret_flags[i]) {
+                sel[new_size++] = sel[i];
+            }
+        }
+        *selected_size = new_size;
+    }
+}
+
 void OrBlockColumnPredicate::evaluate_or(RowBlockV2 *block, uint16_t selected_size, bool* flags) const {
     for (auto block_column_predicate : _block_column_predicate_vec) {
         block_column_predicate->evaluate_or(block, selected_size, flags);
     }
 }
 
+void OrBlockColumnPredicate::evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const {
+    for (auto block_column_predicate : _block_column_predicate_vec) {
+        block_column_predicate->evaluate_or(block, sel, selected_size, flags);
+    }
+}
+
 void OrBlockColumnPredicate::evaluate_and(RowBlockV2 *block, uint16_t selected_size, bool* flags) const {
     if (num_of_column_predicate() == 1) {
         _block_column_predicate_vec[0]->evaluate_and(block, selected_size, flags);
@@ -83,18 +134,47 @@ void OrBlockColumnPredicate::evaluate_and(RowBlockV2 *block, uint16_t selected_s
     }
 }
 
+void OrBlockColumnPredicate::evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const {
+    if (num_of_column_predicate() == 1) {
+        _block_column_predicate_vec[0]->evaluate_and(block, sel, selected_size, flags);
+    } else {
+        bool ret_flags[selected_size];
+        memset(ret_flags, false, selected_size);
+        for (int i = 0; i < num_of_column_predicate(); ++i) {
+            auto column_predicate = _block_column_predicate_vec[i];
+            column_predicate->evaluate_or(block, sel, selected_size, ret_flags);
+        }
+ 
+        for (int i = 0; i < selected_size; ++i) {
+            flags[i] &= ret_flags[i];
+        }
+    }
+}
+
 void AndBlockColumnPredicate::evaluate(RowBlockV2* block, uint16_t* selected_size) const {
     for (auto block_column_predicate : _block_column_predicate_vec) {
         block_column_predicate->evaluate(block, selected_size);
     }
 }
 
+void AndBlockColumnPredicate::evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const {
+    for (auto block_column_predicate : _block_column_predicate_vec) {
+        block_column_predicate->evaluate(block, sel, selected_size);
+    }
+}
+
 void AndBlockColumnPredicate::evaluate_and(RowBlockV2 *block, uint16_t selected_size, bool* flags) const {
     for (auto block_column_predicate : _block_column_predicate_vec) {
         block_column_predicate->evaluate_and(block, selected_size, flags);
     }
 }
 
+void AndBlockColumnPredicate::evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const {
+    for (auto block_column_predicate : _block_column_predicate_vec) {
+        block_column_predicate->evaluate_and(block, sel, selected_size, flags);
+    }
+}
+
 void AndBlockColumnPredicate::evaluate_or(RowBlockV2 *block, uint16_t selected_size, bool* flags) const {
     if (num_of_column_predicate() == 1) {
         _block_column_predicate_vec[0]->evaluate_or(block, selected_size, flags);
@@ -113,4 +193,38 @@ void AndBlockColumnPredicate::evaluate_or(RowBlockV2 *block, uint16_t selected_s
     }
 }
 
+void AndBlockColumnPredicate::evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const {
+    if (num_of_column_predicate() == 1) {
+        _block_column_predicate_vec[0]->evaluate_or(block, sel, selected_size, flags);
+    } else {
+        bool new_flags[selected_size];
+        memset(new_flags, true, selected_size);
+ 
+        for (auto block_column_predicate : _block_column_predicate_vec) {
+            block_column_predicate->evaluate_and(block, sel, selected_size, new_flags);
+        }
+ 
+        for (uint16_t i = 0; i < selected_size; i++) {
+            flags[i] |= new_flags[i];
+        }
+    }
+}
+ 
+// todo(wb) Can the 'and' of multiple bitmaps be vectorized?
+void AndBlockColumnPredicate::evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const {
+    if (num_of_column_predicate() == 1) {
+        _block_column_predicate_vec[0]->evaluate_vec(block, size, flags);
+    } else {
+        bool new_flags[size];
+        for (auto block_column_predicate : _block_column_predicate_vec) {
+            memset(new_flags, true, size);
+            block_column_predicate->evaluate_vec(block, size, new_flags);
+ 
+            for (uint16_t j = 0; j < size; j++) {
+                flags[j] &= new_flags[j] ;
+            }
+        }
+    }
+}
+
 } // namespace doris
diff --git a/be/src/olap/block_column_predicate.h b/be/src/olap/block_column_predicate.h
index 4c0648699a..ef2a4ca038 100644
--- a/be/src/olap/block_column_predicate.h
+++ b/be/src/olap/block_column_predicate.h
@@ -44,6 +44,12 @@ public:
     virtual void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags) const = 0;
 
     virtual void get_all_column_ids(std::set<ColumnId>& column_id_set) const = 0;
+
+    virtual void evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const {};
+    virtual void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const {};
+    virtual void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const {};
+ 
+    virtual void evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const {};
 };
 
 class SingleColumnBlockPredicate : public BlockColumnPredicate {
@@ -57,6 +63,13 @@ public:
     void get_all_column_ids(std::set<ColumnId>& column_id_set) const override {
         column_id_set.insert(_predicate->column_id());
     };
+
+    void evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const override;
+    void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override;
+    void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override;
+ 
+    void evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const override;
+
 private:
     const ColumnPredicate* _predicate;
 };
@@ -98,6 +111,12 @@ public:
     // 2.Do AND SEMANTICS in flags use 1 result to get proper select flags
     void evaluate_and(RowBlockV2* block, uint16_t selected_size, bool* flags) const override;
     void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags) const override;
+
+    void evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const override;
+    void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override;
+    void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override;
+
+    // note(wb) we didnt't impelment evaluate_vec method here, because storage layer only support AND predicate now;
 };
 
 class AndBlockColumnPredicate : public MutilColumnBlockPredicate {
@@ -109,6 +128,13 @@ public:
     // 1.AndBlockColumnPredicate need evaluate all child BlockColumnPredicate AND SEMANTICS inside first
     // 2.Evaluate OR SEMANTICS in flags use 1 result to get proper select flags
     void evaluate_or(RowBlockV2* block, uint16_t selected_size, bool* flags) const override;
+
+    void evaluate(vectorized::MutableColumns& block, uint16_t* sel, uint16_t* selected_size) const override;
+    void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override;
+    void evaluate_or(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size, bool* flags) const override;
+
+    void evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const override;
+
 };
 
 } //namespace doris
diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h
index b3dcbbb644..ff3201c692 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -27,6 +27,10 @@
 #include "olap/field.h"
 #include "runtime/string_value.hpp"
 #include "runtime/vectorized_row_batch.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/utils/util.hpp"
 
 namespace doris {
 
@@ -59,12 +63,14 @@ public:
         return Status::OK();
     }
 
+    void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override;
+
 private:
     std::shared_ptr<IBloomFilterFuncBase> _filter;
     SpecificFilter* _specific_filter; // owned by _filter
 };
 
-// blomm filter column predicate do not support in segment v1
+// bloom filter column predicate do not support in segment v1
 template <PrimitiveType type>
 void BloomFilterColumnPredicate<type>::evaluate(VectorizedRowBatch* batch) const {
     uint16_t n = batch->size();
@@ -99,6 +105,38 @@ void BloomFilterColumnPredicate<type>::evaluate(ColumnBlock* block, uint16_t* se
     *size = new_size;
 }
 
+template <PrimitiveType type>
+void BloomFilterColumnPredicate<type>::evaluate(vectorized::IColumn& column, uint16_t* sel,
+                                                uint16_t* size) const {
+    uint16_t new_size = 0;
+    using T = typename PrimitiveTypeTraits<type>::CppType;
+
+    if (column.is_nullable()) {
+        auto* nullable_col = vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
+        auto& null_map_data = nullable_col->get_null_map_column().get_data();
+        auto* pred_col = vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(
+                nullable_col->get_nested_column());
+        auto& pred_col_data = pred_col->get_data();
+        for (uint16_t i = 0; i < *size; i++) {
+            uint16_t idx = sel[i];
+            sel[new_size] = idx;
+            const auto* cell_value = reinterpret_cast<const void*>(&(pred_col_data[idx]));
+            new_size += (!null_map_data[idx]) && _specific_filter->find_olap_engine(cell_value);
+        }
+    } else {
+        auto* pred_col =
+                vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(column);
+        auto& pred_col_data = pred_col->get_data();
+        for (uint16_t i = 0; i < *size; i++) {
+            uint16_t idx = sel[i];
+            sel[new_size] = idx;
+            const auto* cell_value = reinterpret_cast<const void*>(&(pred_col_data[idx]));
+            new_size += _specific_filter->find_olap_engine(cell_value);
+        }
+    }
+    *size = new_size;
+}
+
 class BloomFilterColumnPredicateFactory {
 public:
     static ColumnPredicate* create_column_predicate(
diff --git a/be/src/olap/collect_iterator.cpp b/be/src/olap/collect_iterator.cpp
index 903ed0ac8c..46a7aa4436 100644
--- a/be/src/olap/collect_iterator.cpp
+++ b/be/src/olap/collect_iterator.cpp
@@ -32,7 +32,7 @@ CollectIterator::~CollectIterator() = default;
 void CollectIterator::init(TabletReader* reader) {
     _reader = reader;
     // when aggregate is enabled or key_type is DUP_KEYS, we don't merge
-    // multiple data to aggregate for performance in user fetch
+    // multiple data to aggregate for better performance
     if (_reader->_reader_type == READER_QUERY &&
         (_reader->_aggregation || _reader->_tablet->keys_type() == KeysType::DUP_KEYS)) {
         _merge = false;
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 21b115e821..6b1aa232a1 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -23,6 +23,7 @@
 #include "olap/column_block.h"
 #include "olap/rowset/segment_v2/bitmap_index_reader.h"
 #include "olap/selection_vector.h"
+#include "vec/columns/column.h"
 
 using namespace doris::segment_v2;
 
@@ -54,8 +55,22 @@ public:
                             const std::vector<BitmapIndexIterator*>& iterators, uint32_t num_rows,
                             roaring::Roaring* roaring) const = 0;
 
+    // evaluate predicate on IColumn
+    // a short circuit eval way
+    virtual void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const {};
+    virtual void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size,
+                              bool* flags) const {};
+    virtual void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size,
+                             bool* flags) const {};
+
+    // used to evaluate pre read column in lazy matertialization
+    // now only support integer/float
+    // a vectorized eval way
+    virtual void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const {};
     uint32_t column_id() const { return _column_id; }
 
+    virtual bool is_in_predicate() { return false; }
+
 protected:
     uint32_t _column_id;
     bool _opposite;
diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp
index 7dbe7cb096..a154a04499 100644
--- a/be/src/olap/comparison_predicate.cpp
+++ b/be/src/olap/comparison_predicate.cpp
@@ -21,6 +21,9 @@
 #include "olap/schema.h"
 #include "runtime/string_value.hpp"
 #include "runtime/vectorized_row_batch.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/predicate_column.h"
 
 namespace doris {
 
@@ -142,6 +145,75 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(LessEqualPredicate, <=)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
 
+#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP)                                                                                                    \
+    template <class type>                                                                                                                             \
+    void CLASS<type>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const {                                                    \
+        uint16_t new_size = 0;                                                                                                                        \
+        if (column.is_nullable()) {                                           \
+            auto* nullable_column = vectorized::check_and_get_column<vectorized::ColumnNullable>(column);\
+            auto& null_bitmap = reinterpret_cast<const vectorized::ColumnVector<uint8_t>&>(*(nullable_column->get_null_map_column_ptr())).get_data(); \
+            auto* nest_column_vector = vectorized::check_and_get_column<vectorized::PredicateColumnType<type>>(nullable_column->get_nested_column());\
+            auto& data_array = nest_column_vector->get_data();          \
+            for (uint16_t i = 0; i < *size; i++) {                                                                                                \
+                    uint16_t idx = sel[i];                                                                                                            \
+                    sel[new_size] = idx;                                                                                                              \
+                    const type& cell_value = reinterpret_cast<const type&>(data_array[idx]);                                                          \
+                    bool ret = !null_bitmap[idx] && (cell_value OP _value);                                                                            \
+                    new_size += _opposite ? !ret : ret;                                                                                               \
+            }                                                                                                                                     \
+            *size = new_size;                                                                                                                   \
+        } else {\
+            auto& pred_column_ref = reinterpret_cast<vectorized::PredicateColumnType<type>&>(column);\
+            auto& data_array = pred_column_ref.get_data();                                                                                             \
+            for (uint16_t i = 0; i < *size; i++) {                                                                                                    \
+                uint16_t idx = sel[i];                                                                                                                \
+                sel[new_size] = idx;                                                                                                                  \
+                const type& cell_value = reinterpret_cast<const type&>(data_array[idx]);                                                              \
+                auto ret = cell_value OP _value;                                                                                                      \
+                new_size += _opposite ? !ret : ret;                                                                                                   \
+            }                                                                                                                                         \
+            *size = new_size;   \
+        }\
+    }
+ 
+COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==)
+COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=)
+COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <)
+COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=)
+COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >)
+COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=)
+ 
+#define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP)                                                                                                \
+    template <class type>                                                                                                                             \
+    void CLASS<type>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const {                                                   \
+        if (column.is_nullable()) {                                                                                                                   \
+            auto* nullable_column = vectorized::check_and_get_column<vectorized::ColumnNullable>(column);                                             \
+            auto& data_array = reinterpret_cast<const vectorized::PredicateColumnType<type>&>(nullable_column->get_nested_column()).get_data();              \
+            auto& null_bitmap = reinterpret_cast<const vectorized::ColumnVector<uint8_t>&>(*(nullable_column->get_null_map_column_ptr())).get_data(); \
+            for (uint16_t i = 0; i < size; i++) {                                                                                                     \
+                flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]);                                                                            \
+            }                                                                                                                                         \
+        } else {                                                                                                                                      \
+            auto& predicate_column = reinterpret_cast<vectorized::PredicateColumnType<type>&>(column);                                                \
+            auto& data_array = predicate_column.get_data();                                                                                           \
+            for (uint16_t i = 0; i < size; i++) {                                                                                                     \
+                flags[i] = data_array[i] OP _value;                                                                                                   \
+            }                                                                                                                                         \
+        }                                                                                                                                             \
+        if (_opposite) {                                                                                                                              \
+            for (uint16_t i = 0; i < size; i++) {                                                                                                     \
+                flags[i] = !flags[i];                                                                                                                 \
+            }                                                                                                                                         \
+        }                                                                                                                                             \
+    }
+ 
+COMPARISON_PRED_COLUMN_EVALUATE_VEC(EqualPredicate, ==)
+COMPARISON_PRED_COLUMN_EVALUATE_VEC(NotEqualPredicate, !=)
+COMPARISON_PRED_COLUMN_EVALUATE_VEC(LessPredicate, <)
+COMPARISON_PRED_COLUMN_EVALUATE_VEC(LessEqualPredicate, <=)
+COMPARISON_PRED_COLUMN_EVALUATE_VEC(GreaterPredicate, >)
+COMPARISON_PRED_COLUMN_EVALUATE_VEC(GreaterEqualPredicate, >=)
+
 #define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP)                                      \
     template <class type>                                                                        \
     void CLASS<type>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \
@@ -174,6 +246,20 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(LessEqualPredicate, <=)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterPredicate, >)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterEqualPredicate, >=)
 
+// todo(wb) support it
+#define COMPARISON_PRED_COLUMN_EVALUATE_OR(CLASS, OP)                                  \
+    template <class type>                                                                 \
+    void CLASS<type>::evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const { \
+                                                               \
+    }
+ 
+COMPARISON_PRED_COLUMN_EVALUATE_OR(EqualPredicate, ==)
+COMPARISON_PRED_COLUMN_EVALUATE_OR(NotEqualPredicate, !=)
+COMPARISON_PRED_COLUMN_EVALUATE_OR(LessPredicate, <)
+COMPARISON_PRED_COLUMN_EVALUATE_OR(LessEqualPredicate, <=)
+COMPARISON_PRED_COLUMN_EVALUATE_OR(GreaterPredicate, >)
+COMPARISON_PRED_COLUMN_EVALUATE_OR(GreaterEqualPredicate, >=)
+
 #define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP)                                      \
     template <class type>                                                                         \
     void CLASS<type>::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \
@@ -206,6 +292,21 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(LessEqualPredicate, <=)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterPredicate, >)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterEqualPredicate, >=)
 
+//todo(wb) support it
+#define COMPARISON_PRED_COLUMN_EVALUATE_AND(CLASS, OP)                                  \
+    template <class type>                                                                 \
+    void CLASS<type>::evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const { \
+                                                               \
+                                                                               \
+    }
+ 
+COMPARISON_PRED_COLUMN_EVALUATE_AND(EqualPredicate, ==)
+COMPARISON_PRED_COLUMN_EVALUATE_AND(NotEqualPredicate, !=)
+COMPARISON_PRED_COLUMN_EVALUATE_AND(LessPredicate, <)
+COMPARISON_PRED_COLUMN_EVALUATE_AND(LessEqualPredicate, <=)
+COMPARISON_PRED_COLUMN_EVALUATE_AND(GreaterPredicate, >)
+COMPARISON_PRED_COLUMN_EVALUATE_AND(GreaterEqualPredicate, >=)
+
 #define BITMAP_COMPARE_EqualPredicate(s, exact_match, seeked_ordinal, iterator, bitmap, roaring) \
     do {                                                                                         \
         if (!s.is_not_found()) {                                                                 \
@@ -440,4 +541,66 @@ COMPARISON_PRED_BITMAP_EVALUATE_DECLARATION(LessEqualPredicate)
 COMPARISON_PRED_BITMAP_EVALUATE_DECLARATION(GreaterPredicate)
 COMPARISON_PRED_BITMAP_EVALUATE_DECLARATION(GreaterEqualPredicate)
 
+#define COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(CLASS)                                                  \
+    template void CLASS<int8_t>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)       \
+            const;                                                                                          \
+    template void CLASS<int16_t>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)      \
+            const;                                                                                          \
+    template void CLASS<int32_t>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)      \
+            const;                                                                                          \
+    template void CLASS<int64_t>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)      \
+            const;                                                                                          \
+    template void CLASS<int128_t>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)     \
+            const;                                                                                          \
+    template void CLASS<float>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const; \
+    template void CLASS<double>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)       \
+            const;                                                                                          \
+    template void CLASS<decimal12_t>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)  \
+            const;                                                                                          \
+    template void CLASS<StringValue>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)  \
+            const;                                                                                          \
+    template void CLASS<uint24_t>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)     \
+            const;                                                                                          \
+    template void CLASS<uint64_t>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size)     \
+            const;                                                                                          \
+    template void CLASS<bool>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const;
+ 
+COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(EqualPredicate)
+COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(NotEqualPredicate)
+COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(LessPredicate)
+COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(LessEqualPredicate)
+COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(GreaterPredicate)
+COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(GreaterEqualPredicate)
+ 
+#define COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(CLASS)                                                  \
+    template void CLASS<int8_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags)       \
+            const;                                                                                          \
+    template void CLASS<int16_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags)      \
+            const;                                                                                          \
+    template void CLASS<int32_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags)      \
+            const;                                                                                          \
+    template void CLASS<int64_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags)      \
+            const;                                                                                          \
+    template void CLASS<int128_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags)     \
+            const;                                                                                          \
+    template void CLASS<float>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const; \
+    template void CLASS<double>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags)       \
+            const;                                                                                          \
+    template void CLASS<decimal12_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags)  \
+            const;                                                                                          \
+    template void CLASS<StringValue>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags)  \
+            const;                                                                                          \
+    template void CLASS<uint24_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags)     \
+            const;                                                                                          \
+    template void CLASS<uint64_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags)     \
+            const;                                                                                          \
+    template void CLASS<bool>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const;
+ 
+COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(EqualPredicate)
+COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(NotEqualPredicate)
+COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(LessPredicate)
+COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(LessEqualPredicate)
+COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterPredicate)
+COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterEqualPredicate)
+
 } //namespace doris
diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h
index d0a40490eb..30fd9fde80 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -40,6 +40,10 @@ class VectorizedRowBatch;
         virtual Status evaluate(const Schema& schema,                                         \
                                 const std::vector<BitmapIndexIterator*>& iterators,           \
                                 uint32_t num_rows, roaring::Roaring* roaring) const override; \
+        void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; \
+        void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override; \
+        void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override; \
+        void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const override; \
                                                                                               \
     private:                                                                                  \
         type _value;                                                                          \
diff --git a/be/src/olap/generic_iterators.cpp b/be/src/olap/generic_iterators.cpp
index adac5df916..8a11108c93 100644
--- a/be/src/olap/generic_iterators.cpp
+++ b/be/src/olap/generic_iterators.cpp
@@ -116,6 +116,11 @@ public:
     MergeIteratorContext(RowwiseIterator* iter, std::shared_ptr<MemTracker> parent)
             : _iter(iter), _block(iter->schema(), 1024, std::move(parent)) {}
 
+    MergeIteratorContext(const MergeIteratorContext&) = delete;
+    MergeIteratorContext(MergeIteratorContext&&) = delete;
+    MergeIteratorContext& operator=(const MergeIteratorContext&) = delete;
+    MergeIteratorContext& operator=(MergeIteratorContext&&) = delete;
+
     ~MergeIteratorContext() {
         delete _iter;
         _iter = nullptr;
@@ -151,6 +156,7 @@ private:
 
 private:
     RowwiseIterator* _iter;
+
     // used to store data load from iterator
     RowBlockV2 _block;
 
@@ -181,10 +187,9 @@ Status MergeIteratorContext::advance() {
 }
 
 Status MergeIteratorContext::_load_next_block() {
-    Status st;
     do {
         _block.clear();
-        st = _iter->next_batch(&_block);
+        Status st = _iter->next_batch(&_block);
         if (!st.ok()) {
             _valid = false;
             if (st.is_end_of_file()) {
@@ -202,27 +207,29 @@ Status MergeIteratorContext::_load_next_block() {
 class MergeIterator : public RowwiseIterator {
 public:
     // MergeIterator takes the ownership of input iterators
-    MergeIterator(std::list<RowwiseIterator*> iters, std::shared_ptr<MemTracker> parent, int sequence_id_idx)
-        : _origin_iters(std::move(iters)), _sequence_id_idx(sequence_id_idx) {
+    MergeIterator(std::vector<RowwiseIterator*> iters, std::shared_ptr<MemTracker> parent, int sequence_id_idx)
+        : _origin_iters(std::move(iters)), _sequence_id_idx(sequence_id_idx), _merge_heap(MergeContextComparator(_sequence_id_idx)) {
         // use for count the mem use of Block use in Merge
         _mem_tracker = MemTracker::CreateTracker(-1, "MergeIterator", parent, false);
     }
 
     ~MergeIterator() override {
-        while (!_merge_heap->empty()) {
-            auto ctx = _merge_heap->top();
-            _merge_heap->pop();
+        while (!_merge_heap.empty()) {
+            auto ctx = _merge_heap.top();
+            _merge_heap.pop();
             delete ctx;
         }
     }
+
     Status init(const StorageReadOptions& opts) override;
+
     Status next_batch(RowBlockV2* block) override;
 
     const Schema& schema() const override { return *_schema; }
 
 private:
     // It will be released after '_merge_heap' has been built.
-    std::list<RowwiseIterator*> _origin_iters;
+    std::vector<RowwiseIterator*> _origin_iters;
 
     int _sequence_id_idx;
 
@@ -256,9 +263,12 @@ private:
 
         int sequence_id_idx;
     };
-    using MergeHeap = std::priority_queue<MergeIteratorContext*, std::vector<MergeIteratorContext*>,
-                                          MergeContextComparator>;
-    std::unique_ptr<MergeHeap> _merge_heap;
+
+    using MergeHeap = std::priority_queue<MergeIteratorContext*, 
+                                        std::vector<MergeIteratorContext*>,
+                                        MergeContextComparator>;
+
+    MergeHeap _merge_heap;
 };
 
 Status MergeIterator::init(const StorageReadOptions& opts) {
@@ -267,24 +277,24 @@ Status MergeIterator::init(const StorageReadOptions& opts) {
     }
     _schema.reset(new Schema((*(_origin_iters.begin()))->schema()));
 
-    _merge_heap = std::make_unique<MergeHeap>(MergeContextComparator(_sequence_id_idx));
     for (auto iter : _origin_iters) {
         std::unique_ptr<MergeIteratorContext> ctx(new MergeIteratorContext(iter, _mem_tracker));
         RETURN_IF_ERROR(ctx->init(opts));
         if (!ctx->valid()) {
             continue;
         }
-        _merge_heap->push(ctx.release());
+        _merge_heap.push(ctx.release());
     }
+
     _origin_iters.clear();
     return Status::OK();
 }
 
 Status MergeIterator::next_batch(RowBlockV2* block) {
     size_t row_idx = 0;
-    for (; row_idx < block->capacity() && !_merge_heap->empty(); ++row_idx) {
-        auto ctx = _merge_heap->top();
-        _merge_heap->pop();
+    for (; row_idx < block->capacity() && !_merge_heap.empty(); ++row_idx) {
+        auto ctx = _merge_heap.top();
+        _merge_heap.pop();
 
         RowBlockRow dst_row = block->row(row_idx);
         // copy current row to block
@@ -292,7 +302,7 @@ Status MergeIterator::next_batch(RowBlockV2* block) {
 
         RETURN_IF_ERROR(ctx->advance());
         if (ctx->valid()) {
-            _merge_heap->push(ctx);
+            _merge_heap.push(ctx);
         } else {
             // Release ctx earlier to reduce resource consumed
             delete ctx;
@@ -313,17 +323,17 @@ public:
     // Iterators' ownership it transfered to this class.
     // This class will delete all iterators when destructs
     // Client should not use iterators any more.
-    UnionIterator(std::list<RowwiseIterator*> iters, std::shared_ptr<MemTracker> parent)
-            : _origin_iters(std::move(iters)) {
+    UnionIterator(std::vector<RowwiseIterator*> &v, std::shared_ptr<MemTracker> parent)
+            : _origin_iters(v.begin(), v.end()) {
         _mem_tracker = MemTracker::CreateTracker(-1, "UnionIterator", parent, false);
     }
 
     ~UnionIterator() override {
-        for (auto iter : _origin_iters) {
-            delete iter;
-        }
+        std::for_each(_origin_iters.begin(), _origin_iters.end(), std::default_delete<RowwiseIterator>());
     }
+
     Status init(const StorageReadOptions& opts) override;
+
     Status next_batch(RowBlockV2* block) override;
 
     const Schema& schema() const override { return *_schema; }
@@ -331,7 +341,7 @@ public:
 private:
     std::unique_ptr<Schema> _schema;
     RowwiseIterator* _cur_iter = nullptr;
-    std::list<RowwiseIterator*> _origin_iters;
+    std::deque<RowwiseIterator*> _origin_iters;
 };
 
 Status UnionIterator::init(const StorageReadOptions& opts) {
@@ -364,18 +374,18 @@ Status UnionIterator::next_batch(RowBlockV2* block) {
     return Status::EndOfFile("End of UnionIterator");
 }
 
-RowwiseIterator* new_merge_iterator(std::list<RowwiseIterator*> inputs, std::shared_ptr<MemTracker> parent, int sequence_id_idx) {
+RowwiseIterator* new_merge_iterator(std::vector<RowwiseIterator*> inputs, std::shared_ptr<MemTracker> parent, int sequence_id_idx) {
     if (inputs.size() == 1) {
         return *(inputs.begin());
     }
     return new MergeIterator(std::move(inputs), parent, sequence_id_idx);
 }
 
-RowwiseIterator* new_union_iterator(std::list<RowwiseIterator*> inputs, std::shared_ptr<MemTracker> parent) {
+RowwiseIterator* new_union_iterator(std::vector<RowwiseIterator*>& inputs, std::shared_ptr<MemTracker> parent) {
     if (inputs.size() == 1) {
         return *(inputs.begin());
     }
-    return new UnionIterator(std::move(inputs), parent);
+    return new UnionIterator(inputs, parent);
 }
 
 RowwiseIterator* new_auto_increment_iterator(const Schema& schema, size_t num_rows) {
diff --git a/be/src/olap/generic_iterators.h b/be/src/olap/generic_iterators.h
index 1d3eccf30a..e8f4528885 100644
--- a/be/src/olap/generic_iterators.h
+++ b/be/src/olap/generic_iterators.h
@@ -25,14 +25,14 @@ namespace doris {
 //
 // Inputs iterators' ownership is taken by created merge iterator. And client
 // should delete returned iterator after usage.
-RowwiseIterator* new_merge_iterator(std::list<RowwiseIterator*> inputs, std::shared_ptr<MemTracker> parent, int sequence_id_idx);
+RowwiseIterator* new_merge_iterator(std::vector<RowwiseIterator*> inputs, std::shared_ptr<MemTracker> parent, int sequence_id_idx);
 
 // Create a union iterator for input iterators. Union iterator will read
 // input iterators one by one.
 //
 // Inputs iterators' ownership is taken by created union iterator. And client
 // should delete returned iterator after usage.
-RowwiseIterator* new_union_iterator(std::list<RowwiseIterator*> inputs, std::shared_ptr<MemTracker> parent);
+RowwiseIterator* new_union_iterator(std::vector<RowwiseIterator*>& inputs, std::shared_ptr<MemTracker> parent);
 
 // Create an auto increment iterator which returns num_rows data in format of schema.
 // This class aims to be used in unit test.
diff --git a/be/src/olap/in_list_predicate.cpp b/be/src/olap/in_list_predicate.cpp
index c167a17e4c..a17e157c4d 100644
--- a/be/src/olap/in_list_predicate.cpp
+++ b/be/src/olap/in_list_predicate.cpp
@@ -20,6 +20,8 @@
 #include "olap/field.h"
 #include "runtime/string_value.hpp"
 #include "runtime/vectorized_row_batch.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/columns/column_nullable.h"
 
 namespace doris {
 
@@ -115,6 +117,43 @@ IN_LIST_PRED_EVALUATE(NotInListPredicate, ==)
 IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(InListPredicate, !=)
 IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
 
+#define IN_LIST_PRED_COLUMN_EVALUATE(CLASS, OP)                                                    \
+    template <class type>                                                                          \
+    void CLASS<type>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \
+        uint16_t new_size = 0;                                                                     \
+        if (column.is_nullable()) {                                                                \
+            auto* nullable_column =                                                                \
+                vectorized::check_and_get_column<vectorized::ColumnNullable>(column);              \
+            auto& null_bitmap = reinterpret_cast<const vectorized::ColumnVector<uint8_t>&>(*(      \
+                nullable_column->get_null_map_column_ptr())).get_data();                           \
+            auto* nest_column_vector = vectorized::check_and_get_column                            \
+                <vectorized::PredicateColumnType<type>>(nullable_column->get_nested_column());     \
+            auto& data_array = nest_column_vector->get_data();                                     \
+            for (uint16_t i = 0; i < *size; i++) {                                                 \
+                uint16_t idx = sel[i];                                                             \
+                sel[new_size] = idx;                                                               \
+                const type& cell_value = reinterpret_cast<const type&>(data_array[idx]);           \
+                bool ret = !null_bitmap[idx] && (_values.find(cell_value) OP _values.end());       \
+                new_size += _opposite ? !ret : ret;                                                \
+            }                                                                                      \
+            *size = new_size;                                                                      \
+        } else {                                                                                   \
+            auto& number_column = reinterpret_cast<vectorized::PredicateColumnType<type>&>(column);\
+            auto& data_array = number_column.get_data();                                           \
+            for (uint16_t i = 0; i < *size; i++) {                                                 \
+                uint16_t idx = sel[i];                                                             \
+                sel[new_size] = idx;                                                               \
+                const type& cell_value = reinterpret_cast<const type&>(data_array[idx]);           \
+                auto result = (_values.find(cell_value) OP _values.end());                         \
+                new_size += _opposite ? !result : result;                                          \
+            }                                                                                      \
+        }                                                                                          \
+        *size = new_size;                                                                          \
+    }
+
+IN_LIST_PRED_COLUMN_EVALUATE(InListPredicate, !=)
+IN_LIST_PRED_COLUMN_EVALUATE(NotInListPredicate, ==)
+
 #define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP)                                         \
     template <class type>                                                                        \
     void CLASS<type>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index cf9bf615e5..7cd237b7eb 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -76,6 +76,8 @@ namespace doris {
 
 class VectorizedRowBatch;
 
+// todo(wb) support evaluate_and,evaluate_or
+
 #define IN_LIST_PRED_CLASS_DEFINE(CLASS)                                                          \
     template <class type>                                                                         \
     class CLASS : public ColumnPredicate {                                                        \
@@ -90,7 +92,10 @@ class VectorizedRowBatch;
         virtual Status evaluate(const Schema& schema,                                             \
                                 const std::vector<BitmapIndexIterator*>& iterators,               \
                                 uint32_t num_rows, roaring::Roaring* bitmap) const override;      \
-                                                                                                  \
+        void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; \
+        void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \
+        void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \
+        bool is_in_predicate() override { return true; }                                                                                          \
     private:                                                                                      \
         phmap::flat_hash_set<type> _values;                                                       \
     };
diff --git a/be/src/olap/iterators.h b/be/src/olap/iterators.h
index d514dc0425..4cdfc60578 100644
--- a/be/src/olap/iterators.h
+++ b/be/src/olap/iterators.h
@@ -23,6 +23,7 @@
 #include "olap/olap_common.h"
 #include "olap/column_predicate.h"
 #include "olap/block_column_predicate.h"
+#include "vec/core/block.h"
 
 namespace doris {
 
@@ -80,6 +81,7 @@ public:
     // REQUIRED (null is not allowed)
     OlapReaderStatistics* stats = nullptr;
     bool use_page_cache = false;
+    int block_row_max = 4096;
 };
 
 // Used to read data in RowBlockV2 one by one
@@ -99,7 +101,9 @@ public:
     // into input batch with Status::OK() returned
     // If there is no data to read, will return Status::EndOfFile.
     // If other error happens, other error code will be returned.
-    virtual Status next_batch(RowBlockV2* block) = 0;
+    virtual Status next_batch(RowBlockV2* block) { return Status::NotSupported("to be implemented"); }
+
+    virtual Status next_batch(vectorized::Block* block) { return Status::NotSupported("to be implemented"); }
 
     // return schema for this Iterator
     virtual const Schema& schema() const = 0;
diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp
index cc88d261eb..da3eb295bc 100644
--- a/be/src/olap/null_predicate.cpp
+++ b/be/src/olap/null_predicate.cpp
@@ -20,6 +20,9 @@
 #include "olap/field.h"
 #include "runtime/string_value.hpp"
 #include "runtime/vectorized_row_batch.h"
+#include "vec/columns/column_nullable.h"
+
+using namespace doris::vectorized;
 
 namespace doris {
 
@@ -118,4 +121,44 @@ Status NullPredicate::evaluate(const Schema& schema,
     return Status::OK();
 }
 
+void NullPredicate::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const {
+    uint16_t new_size = 0;
+    if (auto* nullable = check_and_get_column<ColumnNullable>(column)) {
+        auto& null_map = nullable->get_null_map_data();
+        for (uint16_t i = 0; i < *size; ++i) {
+            uint16_t idx = sel[i];
+            sel[new_size] = idx;
+            new_size += (null_map[idx] == _is_null);
+        }
+        *size = new_size;
+    } else {
+        if (_is_null) *size = 0;
+    }
+}
+
+void NullPredicate::evaluate_or(IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const {
+    if (auto* nullable = check_and_get_column<ColumnNullable>(column)) {
+        auto& null_map = nullable->get_null_map_data();
+        for (uint16_t i = 0; i < size; ++i) {
+            if (flags[i]) continue;
+            uint16_t idx = sel[i];
+            flags[i] |= (null_map[idx] == _is_null);
+        }
+    } else {
+        if (!_is_null) memset(flags, true, size);
+    }
+}
+
+void NullPredicate::evaluate_and(IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const {
+    if (auto* nullable = check_and_get_column<ColumnNullable>(column)) {
+        auto& null_map = nullable->get_null_map_data();
+        for (uint16_t i = 0; i < size; ++i) {
+            if (flags[i]) continue;
+            uint16_t idx = sel[i];
+            flags[i] &= (null_map[idx] == _is_null);
+        }
+    } else {
+        if (_is_null) memset(flags, false, size);
+    }
+}
 } //namespace doris
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index e867d58fe4..681e60bc33 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -43,6 +43,14 @@ public:
     virtual Status evaluate(const Schema& schema, const vector<BitmapIndexIterator*>& iterators,
                             uint32_t num_rows, roaring::Roaring* roaring) const override;
 
+    void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override;
+
+    void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size,
+                     bool* flags) const override;
+
+    void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size,
+                      bool* flags) const override;
+
 private:
     bool _is_null; //true for null, false for not null
 };
diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h
index c2d4b7f3c3..a9ac731333 100644
--- a/be/src/olap/olap_define.h
+++ b/be/src/olap/olap_define.h
@@ -384,7 +384,8 @@ enum OLAPStatus {
     OLAP_ERR_ROWSET_LOAD_FAILED = -3109,
     OLAP_ERR_ROWSET_READER_INIT = -3110,
     OLAP_ERR_ROWSET_READ_FAILED = -3111,
-    OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION = -3112
+    OLAP_ERR_ROWSET_INVALID_STATE_TRANSITION = -3112,
+    OLAP_ERR_STRING_OVERFLOW_IN_VEC_ENGINE = -3113
 };
 
 enum ColumnFamilyIndex {
diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp
index 763d1e7a24..13e50b80c6 100644
--- a/be/src/olap/reader.cpp
+++ b/be/src/olap/reader.cpp
@@ -41,6 +41,7 @@
 #include "runtime/string_value.hpp"
 #include "util/date_func.h"
 #include "util/mem_util.hpp"
+#include "vec/olap/vcollect_iterator.h"
 
 using std::nothrow;
 using std::set;
@@ -57,8 +58,8 @@ void TabletReader::ReaderParams::check_validation() const {
 std::string TabletReader::ReaderParams::to_string() const {
     std::stringstream ss;
     ss << "tablet=" << tablet->full_name() << " reader_type=" << reader_type
-       << " aggregation=" << aggregation << " version=" << version << " start_key_include=" << start_key_include
-       << " end_key_include=" << end_key_include;
+       << " aggregation=" << aggregation << " version=" << version
+       << " start_key_include=" << start_key_include << " end_key_include=" << end_key_include;
 
     for (const auto& key : start_key) {
         ss << " keys=" << key;
@@ -230,6 +231,7 @@ OLAPStatus TabletReader::_capture_rs_readers(const ReaderParams& read_params,
 OLAPStatus TabletReader::_init_params(const ReaderParams& read_params) {
     read_params.check_validation();
 
+    _direct_mode = read_params.direct_mode;
     _aggregation = read_params.aggregation;
     _need_agg_finalize = read_params.need_agg_finalize;
     _reader_type = read_params.reader_type;
@@ -699,7 +701,7 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition,
             break;
         }
         case OLAP_FIELD_TYPE_VARCHAR:
-        case OLAP_FIELD_TYPE_STRING:{
+        case OLAP_FIELD_TYPE_STRING: {
             phmap::flat_hash_set<StringValue> values;
             for (auto& cond_val : condition.condition_values) {
                 StringValue value;
@@ -806,7 +808,8 @@ void TabletReader::_init_load_bf_columns(const ReaderParams& read_params, Condit
         return;
     }
     FieldType type = _tablet->tablet_schema().column(max_equal_index).type();
-    if ((type != OLAP_FIELD_TYPE_VARCHAR && type != OLAP_FIELD_TYPE_STRING)|| max_equal_index + 1 > _tablet->num_short_key_columns()) {
+    if ((type != OLAP_FIELD_TYPE_VARCHAR && type != OLAP_FIELD_TYPE_STRING) ||
+        max_equal_index + 1 > _tablet->num_short_key_columns()) {
         load_bf_columns->erase(max_equal_index);
     }
 }
diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h
index 0f20895069..82cd7ffcf4 100644
--- a/be/src/olap/reader.h
+++ b/be/src/olap/reader.h
@@ -49,6 +49,11 @@ class RowBlock;
 class CollectIterator;
 class RuntimeState;
 
+namespace vectorized {
+class VCollectIterator;
+class Block;
+} // namespace vectorized
+
 class TabletReader {
     struct KeysParam {
         std::string to_string() const;
@@ -64,6 +69,7 @@ public:
     struct ReaderParams {
         TabletSharedPtr tablet;
         ReaderType reader_type = READER_QUERY;
+        bool direct_mode = false;
         bool aggregation = false;
         bool need_agg_finalize = true;
         // 1. when read column data page:
@@ -88,6 +94,9 @@ public:
         RuntimeProfile* profile = nullptr;
         RuntimeState* runtime_state = nullptr;
 
+        // use only in vec exec engine
+        std::vector<uint32_t>* origin_return_columns = nullptr;
+
         void check_validation() const;
 
         std::string to_string() const;
@@ -105,7 +114,17 @@ public:
     // Return OLAP_SUCCESS and set `*eof` to true when no more rows can be read.
     // Return others when unexpected error happens.
     virtual OLAPStatus next_row_with_aggregation(RowCursor* row_cursor, MemPool* mem_pool,
-                                         ObjectPool* agg_pool, bool* eof) = 0;
+                                                 ObjectPool* agg_pool, bool* eof) = 0;
+
+    // Read next block with aggregation.
+    // Return OLAP_SUCCESS and set `*eof` to false when next block is read
+    // Return OLAP_SUCCESS and set `*eof` to true when no more rows can be read.
+    // Return others when unexpected error happens.
+    // TODO: Rethink here we still need mem_pool and agg_pool?
+    virtual OLAPStatus next_block_with_aggregation(vectorized::Block* block, MemPool* mem_pool,
+                                                   ObjectPool* agg_pool, bool* eof) {
+        return OLAP_ERR_READER_INITIALIZE_ERROR;
+    }
 
     uint64_t merged_rows() const { return _merged_rows; }
 
@@ -119,6 +138,7 @@ public:
 
 protected:
     friend class CollectIterator;
+    friend class vectorized::VCollectIterator;
     friend class DeleteHandler;
 
     OLAPStatus _init_params(const ReaderParams& read_params);
@@ -188,8 +208,8 @@ protected:
     ReaderType _reader_type = READER_QUERY;
     bool _next_delete_flag = false;
     bool _filter_delete = false;
-    bool _has_sequence_col = false;
     int32_t _sequence_col_idx = -1;
+    bool _direct_mode = false;
 
     CollectIterator _collect_iter;
     std::vector<uint32_t> _key_cids;
diff --git a/be/src/olap/row_block2.cpp b/be/src/olap/row_block2.cpp
index 20ed4ae029..877f6a21a2 100644
--- a/be/src/olap/row_block2.cpp
+++ b/be/src/olap/row_block2.cpp
@@ -23,6 +23,11 @@
 #include "gutil/strings/substitute.h"
 #include "olap/row_cursor.h"
 #include "util/bitmap.h"
+#include "vec/columns/column_complex.h"
+#include "vec/columns/column_vector.h"
+#include "vec/core/block.h"
+#include "vec/core/types.h"
+#include "vec/runtime/vdatetime_value.h"
 
 using strings::Substitute;
 namespace doris {
@@ -90,6 +95,231 @@ Status RowBlockV2::convert_to_row_block(RowCursor* helper, RowBlock* dst) {
     return Status::OK();
 }
 
+Status RowBlockV2::_copy_data_to_column(int cid, doris::vectorized::MutableColumnPtr& origin_column) {
+    constexpr auto MAX_SIZE_OF_VEC_STRING = 1024l * 1024;
+
+    auto* column = origin_column.get();
+    bool nullable_mark_array[_selected_size];
+
+    bool column_nullable = origin_column->is_nullable();
+    bool origin_nullable = _schema.column(cid)->is_nullable();
+    if (column_nullable) {
+        auto nullable_column = assert_cast<vectorized::ColumnNullable*>(origin_column.get());
+        auto& null_map = nullable_column->get_null_map_data();
+        column = nullable_column->get_nested_column_ptr().get();
+
+        if (origin_nullable) {
+            for (uint16_t i = 0; i < _selected_size; ++i) {
+                uint16_t row_idx = _selection_vector[i];
+                null_map.push_back(_column_vector_batches[cid]->is_null_at(row_idx));
+                nullable_mark_array[i] = null_map.back();
+            }
+        } else {
+            null_map.resize_fill(null_map.size() + _selected_size, 0);
+            memset(nullable_mark_array, false, _selected_size * sizeof(bool));
+        }
+    } else {
+        memset(nullable_mark_array, false, _selected_size * sizeof(bool));
+    }
+
+    auto insert_data_directly = [this, &nullable_mark_array](int cid, auto& column) {
+        for (uint16_t j = 0; j < _selected_size; ++j) {
+            if (!nullable_mark_array[j]) {
+                uint16_t row_idx = _selection_vector[j];
+                column->insert_data(
+                        reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx)), 0);
+            } else {
+                column->insert_default();
+            }
+        }
+    };
+
+    switch (_schema.column(cid)->type()) {
+    case OLAP_FIELD_TYPE_OBJECT: {
+        auto column_bitmap = assert_cast<vectorized::ColumnBitmap*>(column);
+        for (uint16_t j = 0; j < _selected_size; ++j) {
+            column_bitmap->insert_default();
+            if (!nullable_mark_array[j]) {
+                uint16_t row_idx = _selection_vector[j];
+                auto slice = reinterpret_cast<const Slice*>(column_block(cid).cell_ptr(row_idx));
+
+                BitmapValue* pvalue = &column_bitmap->get_element(column_bitmap->size() - 1);
+
+                if (slice->size != 0) {
+                    BitmapValue value;
+                    value.deserialize(slice->data);
+                    *pvalue = std::move(value);
+                } else {
+                    *pvalue = std::move(*reinterpret_cast<BitmapValue*>(slice->data));
+                }
+            }
+        }
+        break;
+    }
+    case OLAP_FIELD_TYPE_HLL:
+    case OLAP_FIELD_TYPE_MAP:
+    case OLAP_FIELD_TYPE_VARCHAR: {
+        auto column_string = assert_cast<vectorized::ColumnString*>(column);
+
+        for (uint16_t j = 0; j < _selected_size; ++j) {
+            if (!nullable_mark_array[j]) {
+                uint16_t row_idx = _selection_vector[j];
+                auto slice = reinterpret_cast<const Slice*>(column_block(cid).cell_ptr(row_idx));
+                column_string->insert_data(slice->data, slice->size);
+            } else {
+                column_string->insert_default();
+            }
+        }
+        break;
+    }
+    case OLAP_FIELD_TYPE_STRING: {
+        auto column_string = assert_cast<vectorized::ColumnString*>(column);
+
+        for (uint16_t j = 0; j < _selected_size; ++j) {
+            if (!nullable_mark_array[j]) {
+                uint16_t row_idx = _selection_vector[j];
+                auto slice = reinterpret_cast<const Slice*>(column_block(cid).cell_ptr(row_idx));
+                if (LIKELY(slice->size <= MAX_SIZE_OF_VEC_STRING)) {
+                    column_string->insert_data(slice->data, slice->size);
+                } else {
+                    return Status::NotSupported("Not support string len over than 1MB in vec engine.");
+                }
+            } else {
+                column_string->insert_default();
+            }
+        }
+        break;
+    }
+    case OLAP_FIELD_TYPE_CHAR: {
+        auto column_string = assert_cast<vectorized::ColumnString*>(column);
+
+        for (uint16_t j = 0; j < _selected_size; ++j) {
+            if (!nullable_mark_array[j]) {
+                uint16_t row_idx = _selection_vector[j];
+                auto slice = reinterpret_cast<const Slice*>(column_block(cid).cell_ptr(row_idx));
+                column_string->insert_data(slice->data, strnlen(slice->data, slice->size));
+            } else {
+                column_string->insert_default();
+            }
+        }
+        break;
+    }
+    case OLAP_FIELD_TYPE_DATE: {
+        auto column_int = assert_cast<vectorized::ColumnVector<vectorized::Int64>*>(column);
+
+        for (uint16_t j = 0; j < _selected_size; ++j) {
+            if (!nullable_mark_array[j]) {
+                uint16_t row_idx = _selection_vector[j];
+                auto ptr = reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx));
+
+                uint64_t value = 0;
+                value = *(unsigned char*)(ptr + 2);
+                value <<= 8;
+                value |= *(unsigned char*)(ptr + 1);
+                value <<= 8;
+                value |= *(unsigned char*)(ptr);
+                vectorized::VecDateTimeValue date;
+                date.from_olap_date(value);
+                (column_int)->insert_data(reinterpret_cast<char*>(&date), 0);
+            } else
+                column_int->insert_default();
+        }
+        break;
+    }
+    case OLAP_FIELD_TYPE_DATETIME: {
+        auto column_int = assert_cast<vectorized::ColumnVector<vectorized::Int64>*>(column);
+
+        for (uint16_t j = 0; j < _selected_size; ++j) {
+            if (!nullable_mark_array[j]) {
+                uint16_t row_idx = _selection_vector[j];
+                auto ptr = reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx));
+
+                uint64_t value = *reinterpret_cast<const uint64_t*>(ptr);
+                vectorized::VecDateTimeValue data(value);
+                (column_int)->insert_data(reinterpret_cast<char*>(&data), 0);
+            } else {
+                column_int->insert_default();
+            }
+        }
+        break;
+    }
+    case OLAP_FIELD_TYPE_DECIMAL: {
+        auto column_decimal =
+                assert_cast<vectorized::ColumnDecimal<vectorized::Decimal128>*>(column);
+
+        for (uint16_t j = 0; j < _selected_size; ++j) {
+            if (!nullable_mark_array[j]) {
+                uint16_t row_idx = _selection_vector[j];
+                auto ptr = reinterpret_cast<const char*>(column_block(cid).cell_ptr(row_idx));
+
+                int64_t int_value = *(int64_t*)(ptr);
+                int32_t frac_value = *(int32_t*)(ptr + sizeof(int64_t));
+                DecimalV2Value data(int_value, frac_value);
+                column_decimal->insert_data(reinterpret_cast<char*>(&data), 0);
+            } else {
+                column_decimal->insert_default();
+            }
+        }
+        break;
+    }
+    case OLAP_FIELD_TYPE_INT: {
+        auto column_int = assert_cast<vectorized::ColumnVector<vectorized::Int32>*>(column);
+        insert_data_directly(cid, column_int);
+        break;
+    }
+    case OLAP_FIELD_TYPE_BOOL: {
+        auto column_int = assert_cast<vectorized::ColumnVector<vectorized::UInt8>*>(column);
+        insert_data_directly(cid, column_int);
+        break;
+    }
+    case OLAP_FIELD_TYPE_TINYINT: {
+        auto column_int = assert_cast<vectorized::ColumnVector<vectorized::Int8>*>(column);
+        insert_data_directly(cid, column_int);
+        break;
+    }
+    case OLAP_FIELD_TYPE_SMALLINT: {
+        auto column_int = assert_cast<vectorized::ColumnVector<vectorized::Int16>*>(column);
+        insert_data_directly(cid, column_int);
+        break;
+    }
+    case OLAP_FIELD_TYPE_BIGINT: {
+        auto column_int = assert_cast<vectorized::ColumnVector<vectorized::Int64>*>(column);
+        insert_data_directly(cid, column_int);
+        break;
+    }
+    case OLAP_FIELD_TYPE_LARGEINT: {
+        auto column_int = assert_cast<vectorized::ColumnVector<vectorized::Int128>*>(column);
+        insert_data_directly(cid, column_int);
+        break;
+    }
+    case OLAP_FIELD_TYPE_FLOAT: {
+        auto column_float = assert_cast<vectorized::ColumnVector<vectorized::Float32>*>(column);
+        insert_data_directly(cid, column_float);
+        break;
+    }
+    case OLAP_FIELD_TYPE_DOUBLE: {
+        auto column_float = assert_cast<vectorized::ColumnVector<vectorized::Float64>*>(column);
+        insert_data_directly(cid, column_float);
+        break;
+    }
+    default: {
+        DCHECK(false) << "Invalid type in RowBlockV2:" << _schema.column(cid)->type();
+    }
+    }
+
+    return Status::OK();
+}
+
+Status RowBlockV2::convert_to_vec_block(vectorized::Block* block) {
+    for (int i = 0; i < _schema.column_ids().size(); ++i) {
+        auto cid = _schema.column_ids()[i];
+        auto column = (*std::move(block->get_by_position(i).column)).assume_mutable();
+        RETURN_IF_ERROR(_copy_data_to_column(cid, column));
+    }
+    _pool->clear();
+    return Status::OK();
+}
+
 std::string RowBlockRow::debug_string() const {
     std::stringstream ss;
     ss << "[";
diff --git a/be/src/olap/row_block2.h b/be/src/olap/row_block2.h
index ece8d725a0..b98ab954c3 100644
--- a/be/src/olap/row_block2.h
+++ b/be/src/olap/row_block2.h
@@ -73,6 +73,9 @@ public:
     // convert RowBlockV2 to RowBlock
     Status convert_to_row_block(RowCursor* helper, RowBlock* dst);
 
+    // convert RowBlockV2 to vectorized::Block
+    Status convert_to_vec_block(vectorized::Block* block);
+
     // low-level API to access memory for each column block(including data array and nullmap).
     // `cid` must be one of `schema()->column_ids()`.
     ColumnBlock column_block(ColumnId cid) const {
@@ -106,6 +109,8 @@ public:
     std::string debug_string();
 
 private:
+    Status _copy_data_to_column(int cid, vectorized::MutableColumnPtr& mutable_column_ptr);
+
     Schema _schema;
     size_t _capacity;
     // _column_vector_batches[cid] == null if cid is not in `_schema`.
diff --git a/be/src/olap/rowset/alpha_rowset_reader.h b/be/src/olap/rowset/alpha_rowset_reader.h
index 959a90bc88..fd90f37a96 100644
--- a/be/src/olap/rowset/alpha_rowset_reader.h
+++ b/be/src/olap/rowset/alpha_rowset_reader.h
@@ -65,6 +65,10 @@ public:
     // It's ok, because we only get ref here, the block's owner is this reader.
     OLAPStatus next_block(RowBlock** block) override;
 
+    OLAPStatus next_block(vectorized::Block *block) override {
+        return OLAP_ERR_DATA_EOF;
+    }
+
     bool delete_flag() override;
 
     Version version() override;
diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp
index ada5157e76..4d35f2f31e 100644
--- a/be/src/olap/rowset/beta_rowset_reader.cpp
+++ b/be/src/olap/rowset/beta_rowset_reader.cpp
@@ -27,6 +27,8 @@
 #include "olap/rowset/segment_v2/segment_iterator.h"
 #include "olap/schema.h"
 
+#include "vec/core/block.h"
+
 namespace doris {
 
 BetaRowsetReader::BetaRowsetReader(BetaRowsetSharedPtr rowset,
@@ -107,7 +109,8 @@ OLAPStatus BetaRowsetReader::init(RowsetReaderContext* read_context) {
         }
         seg_iterators.push_back(std::move(iter));
     }
-    std::list<RowwiseIterator*> iterators;
+
+    std::vector<RowwiseIterator*> iterators;
     for (auto& owned_it : seg_iterators) {
         // transfer ownership of segment iterator to `_iterator`
         iterators.push_back(owned_it.release());
@@ -172,4 +175,45 @@ OLAPStatus BetaRowsetReader::next_block(RowBlock** block) {
     return OLAP_SUCCESS;
 }
 
+OLAPStatus BetaRowsetReader::next_block(vectorized::Block* block) {
+    SCOPED_RAW_TIMER(&_stats->block_fetch_ns);
+    bool is_first = true;
+
+    do {
+        // read next input block
+        {
+            _input_block->clear();
+            {
+                auto s = _iterator->next_batch(_input_block.get());
+                if (!s.ok()) {
+                    if (s.is_end_of_file()) {
+                        if (is_first) {
+                            return OLAP_ERR_DATA_EOF;
+                        } else {
+                            break;
+                        }
+                    } else {
+                        LOG(WARNING) << "failed to read next block: " << s.to_string();
+                        return OLAP_ERR_ROWSET_READ_FAILED;
+                    }
+                } else if (_input_block->selected_size() == 0) {
+                    continue;
+                }
+            }
+        }
+
+        {
+            SCOPED_RAW_TIMER(&_stats->block_convert_ns);
+            auto s = _input_block->convert_to_vec_block(block);
+            if (UNLIKELY(!s.ok())) {
+                LOG(WARNING) << "failed to read next block: " << s.to_string();
+                return OLAP_ERR_STRING_OVERFLOW_IN_VEC_ENGINE;
+            }
+        }
+        is_first = false;
+    } while (block->rows() < _context->runtime_state->batch_size()); // here we should keep block.rows() < batch_size
+
+    return OLAP_SUCCESS;
+}
+
 } // namespace doris
diff --git a/be/src/olap/rowset/beta_rowset_reader.h b/be/src/olap/rowset/beta_rowset_reader.h
index 34e7d9cf3d..6074308e48 100644
--- a/be/src/olap/rowset/beta_rowset_reader.h
+++ b/be/src/olap/rowset/beta_rowset_reader.h
@@ -40,6 +40,7 @@ public:
     // If parent_tracker is not null, the block we get from next_block() will have the parent_tracker.
     // It's ok, because we only get ref here, the block's owner is this reader.
     OLAPStatus next_block(RowBlock** block) override;
+    OLAPStatus next_block(vectorized::Block* block) override;
 
     bool delete_flag() override { return _rowset->delete_flag(); }
 
diff --git a/be/src/olap/rowset/rowset_reader.h b/be/src/olap/rowset/rowset_reader.h
index b9e46d1118..53af387b3b 100644
--- a/be/src/olap/rowset/rowset_reader.h
+++ b/be/src/olap/rowset/rowset_reader.h
@@ -23,9 +23,14 @@
 
 #include "olap/rowset/rowset.h"
 #include "olap/rowset/rowset_reader_context.h"
+#include "vec/core/block.h"
 
 namespace doris {
 
+namespace vectorized {
+class Block;
+}
+
 class RowBlock;
 class RowsetReader;
 using RowsetReaderSharedPtr = std::shared_ptr<RowsetReader>;
@@ -33,6 +38,7 @@ using RowsetReaderSharedPtr = std::shared_ptr<RowsetReader>;
 class RowsetReader {
 public:
     enum RowsetReaderType { ALPHA, BETA };
+
     virtual ~RowsetReader() {}
 
     // reader init
@@ -45,6 +51,8 @@ public:
     //      Others when error happens.
     virtual OLAPStatus next_block(RowBlock** block) = 0;
 
+    virtual OLAPStatus next_block(vectorized::Block* block) = 0;
+
     virtual bool delete_flag() = 0;
 
     virtual Version version() = 0;
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
index 6fae7eb976..edd64db455 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
@@ -19,9 +19,12 @@
 
 #include "common/logging.h"
 #include "gutil/strings/substitute.h" // for Substitute
-#include "olap/rowset/segment_v2/bitshuffle_page.h"
 #include "runtime/mem_pool.h"
 #include "util/slice.h" // for Slice
+#include "vec/columns/column_vector.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/predicate_column.h"
 
 namespace doris {
 namespace segment_v2 {
@@ -220,6 +223,8 @@ Status BinaryDictPageDecoder::init() {
     return Status::OK();
 }
 
+BinaryDictPageDecoder::~BinaryDictPageDecoder() {}
+
 Status BinaryDictPageDecoder::seek_to_position_in_page(size_t pos) {
     return _data_page_decoder->seek_to_position_in_page(pos);
 }
@@ -230,8 +235,64 @@ bool BinaryDictPageDecoder::is_dict_encoding() const {
 
 void BinaryDictPageDecoder::set_dict_decoder(PageDecoder* dict_decoder) {
     _dict_decoder = (BinaryPlainPageDecoder*)dict_decoder;
+    _bit_shuffle_ptr = reinterpret_cast<BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>*>(_data_page_decoder.get());
 };
 
+Status BinaryDictPageDecoder::next_batch(size_t* n, vectorized::MutableColumnPtr &dst) {
+    if (_encoding_type == PLAIN_ENCODING) {
+        return _data_page_decoder->next_batch(n, dst);
+    }
+    // dictionary encoding
+    DCHECK(_parsed);
+    DCHECK(_dict_decoder != nullptr) << "dict decoder pointer is nullptr";
+ 
+    if (PREDICT_FALSE(*n == 0 || _bit_shuffle_ptr->_cur_index >= _bit_shuffle_ptr->_num_elements)) {
+        *n = 0;
+        return Status::OK();
+    }
+ 
+    size_t max_fetch = std::min(*n, static_cast<size_t>(_bit_shuffle_ptr->_num_elements - _bit_shuffle_ptr->_cur_index));
+    *n = max_fetch;
+ 
+    const int32_t* data_array = reinterpret_cast<const int32_t*>(_bit_shuffle_ptr->_chunk.data);
+    size_t start_index = _bit_shuffle_ptr->_cur_index;
+
+    auto* dst_col_ptr = dst.get();
+    if (dst->is_nullable()) {
+        auto nullable_column = assert_cast<vectorized::ColumnNullable*>(dst.get());
+        dst_col_ptr = nullable_column->get_nested_column_ptr().get();
+        
+        // fill null bitmap here, not null;
+        // todo(wb) using SIMD speed up here
+        for (int i = 0; i < max_fetch; i++) {
+            nullable_column->get_null_map_data().push_back(0);
+        }
+    }
+
+    if (dst_col_ptr->is_predicate_column()) {
+        // cast columnptr to columnstringvalue just for avoid virtual function call overhead
+        auto* string_value_column_ptr = reinterpret_cast<vectorized::ColumnStringValue*>(dst_col_ptr);
+        for (int i = 0; i < max_fetch; i++, start_index++) {
+            int32_t codeword = data_array[start_index];
+            uint32_t start_offset = _start_offset_array[codeword];
+            uint32_t str_len = _len_array[codeword];
+            string_value_column_ptr->insert_data(&_dict_decoder->_data[start_offset], str_len);
+        }
+    } else {
+             // todo(wb) research whether using batch memcpy to insert columnString can has better performance when data set is big
+        for (int i = 0; i < max_fetch; i++, start_index++) {
+            int32_t codeword = data_array[start_index];
+            const uint32_t start_offset = _start_offset_array[codeword];
+            const uint32_t str_len = _len_array[codeword];
+            dst_col_ptr->insert_data(&_dict_decoder->_data[start_offset], str_len);
+        }
+    }
+    _bit_shuffle_ptr->_cur_index += max_fetch;
+ 
+    return Status::OK();
+ 
+}
+
 Status BinaryDictPageDecoder::next_batch(size_t* n, ColumnBlockView* dst) {
     if (_encoding_type == PLAIN_ENCODING) {
         return _data_page_decoder->next_batch(n, dst);
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.h b/be/src/olap/rowset/segment_v2/binary_dict_page.h
index 92904662dd..25a70b9a34 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.h
@@ -33,6 +33,7 @@
 #include "olap/types.h"
 #include "runtime/mem_pool.h"
 #include "runtime/mem_tracker.h"
+#include "olap/rowset/segment_v2/bitshuffle_page.h"
 
 namespace doris {
 namespace segment_v2 {
@@ -106,6 +107,8 @@ public:
 
     Status next_batch(size_t* n, ColumnBlockView* dst) override;
 
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override;
+
     size_t count() const override { return _data_page_decoder->count(); }
 
     size_t current_index() const override { return _data_page_decoder->current_index(); }
@@ -114,15 +117,21 @@ public:
 
     void set_dict_decoder(PageDecoder* dict_decoder);
 
+    ~BinaryDictPageDecoder();
+
 private:
     Slice _data;
     PageDecoderOptions _options;
     std::unique_ptr<PageDecoder> _data_page_decoder;
     const BinaryPlainPageDecoder* _dict_decoder = nullptr;
+    BitShufflePageDecoder<OLAP_FIELD_TYPE_INT>* _bit_shuffle_ptr = nullptr;
     bool _parsed;
     EncodingTypePB _encoding_type;
     // use as data buf.
     std::unique_ptr<ColumnVectorBatch> _batch;
+
+    uint32_t* _start_offset_array = nullptr;
+    uint32_t* _len_array = nullptr;
 };
 
 } // namespace segment_v2
diff --git a/be/src/olap/rowset/segment_v2/binary_plain_page.h b/be/src/olap/rowset/segment_v2/binary_plain_page.h
index 0747df2173..3c55d2b0fb 100644
--- a/be/src/olap/rowset/segment_v2/binary_plain_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_plain_page.h
@@ -38,6 +38,8 @@
 #include "runtime/mem_pool.h"
 #include "util/coding.h"
 #include "util/faststring.h"
+#include "vec/columns/column_complex.h"
+#include "vec/columns/column_nullable.h"
 
 namespace doris {
 namespace segment_v2 {
@@ -227,6 +229,61 @@ public:
         return Status::OK();
     }
 
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override {
+        DCHECK(_parsed);
+        if (PREDICT_FALSE(*n == 0 || _cur_idx >= _num_elems)) {
+            *n = 0;
+            return Status::OK();
+        }
+        const size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elems - _cur_idx));
+
+        auto* dst_col_ptr = dst.get();
+        if (dst->is_nullable()) {
+            auto nullable_column = assert_cast<vectorized::ColumnNullable*>(dst.get());
+            dst_col_ptr = nullable_column->get_nested_column_ptr().get();
+            // fill null bitmap here, not null;
+            for (int i = 0; i < max_fetch; i++) {
+                nullable_column->get_null_map_data().push_back(0);
+            }
+        }
+
+        if (dst_col_ptr->is_bitmap()) {
+            auto& bitmap_column = reinterpret_cast<vectorized::ColumnBitmap&>(*dst_col_ptr);
+            for (size_t i = 0; i < max_fetch; i++, _cur_idx++) {
+                const uint32_t start_offset  = offset(_cur_idx);
+                uint32_t len = offset(_cur_idx + 1) - start_offset;
+                
+                bitmap_column.insert_default();
+                BitmapValue* pvalue = &bitmap_column.get_element(bitmap_column.size() - 1);
+                if (len != 0) {
+                    BitmapValue value;
+                    value.deserialize(&_data[start_offset]);
+                    *pvalue = std::move(value);
+                } else {
+                    *pvalue = std::move(*reinterpret_cast<BitmapValue*>(const_cast<char*>(&_data[start_offset])));   
+                }
+            }
+        } else if (dst_col_ptr->is_predicate_column()) {
+            // todo(wb) padding sv here for better comparison performance
+            for (size_t i = 0; i < max_fetch; i++, _cur_idx++) {
+                const uint32_t start_offset  = offset(_cur_idx);
+                uint32_t len = offset(_cur_idx + 1) - start_offset;
+                StringValue sv(const_cast<char*>(&_data[start_offset]), len);
+                dst_col_ptr->insert_data(reinterpret_cast<char*>(&sv), 0);
+            }
+        } else {
+            for (size_t i = 0; i < max_fetch; i++, _cur_idx++) {
+                // todo(wb) need more test case and then improve here
+                const uint32_t start_offset  = offset(_cur_idx);
+                uint32_t len = offset(_cur_idx + 1) - start_offset;
+                dst_col_ptr->insert_data(&_data[start_offset], len);
+            }
+        }
+ 
+        *n = max_fetch;
+        return Status::OK();
+    };
+
     size_t count() const override {
         DCHECK(_parsed);
         return _num_elems;
@@ -263,6 +320,8 @@ private:
 
     // Index of the currently seeked element in the page.
     uint32_t _cur_idx;
+    friend class BinaryDictPageDecoder;
+    friend class FileColumnIterator;
 };
 
 } // namespace segment_v2
diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.h b/be/src/olap/rowset/segment_v2/binary_prefix_page.h
index 7e51b82ee5..7d7bcc9a92 100644
--- a/be/src/olap/rowset/segment_v2/binary_prefix_page.h
+++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.h
@@ -113,6 +113,10 @@ public:
 
     Status next_batch(size_t* n, ColumnBlockView* dst) override;
 
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override {
+        return Status::NotSupported("binary prefix page not implement vec op now");
+    };
+
     size_t count() const override {
         DCHECK(_parsed);
         return _num_values;
diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
index cd54b6bdae..83eebae27b 100644
--- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
@@ -37,6 +37,8 @@
 #include "util/coding.h"
 #include "util/faststring.h"
 #include "util/slice.h"
+#include "vec/runtime/vdatetime_value.h"
+#include "vec/columns/column_nullable.h"
 
 namespace doris {
 namespace segment_v2 {
@@ -348,6 +350,71 @@ public:
         return Status::OK();
     }
 
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override {
+        DCHECK(_parsed);
+        if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) {
+            *n = 0;
+            return Status::OK();
+        }
+ 
+        size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elements - _cur_index));
+ 
+        int begin = _cur_index;
+        int end = _cur_index + max_fetch;
+
+        auto* dst_col_ptr = dst.get();
+        if (dst->is_nullable()) {
+            auto nullable_column = assert_cast<vectorized::ColumnNullable*>(dst.get());
+            dst_col_ptr = nullable_column->get_nested_column_ptr().get();
+
+            // fill null bitmap here, not null;
+            for (int j = begin; j < end; j++) {
+                nullable_column->get_null_map_data().push_back(0);
+            }
+        }
+
+        // todo(wb) Try to eliminate type judgment in pagedecoder
+        if (dst_col_ptr->is_column_decimal()) { // decimal non-predicate column
+            for (; begin < end; begin++) {
+                const char* cur_ptr = (const char*)&_chunk.data[begin * SIZE_OF_TYPE];
+                int64_t int_value = *(int64_t*)(cur_ptr);
+                int32_t frac_value = *(int32_t*)(cur_ptr + sizeof(int64_t));
+                DecimalV2Value data(int_value, frac_value);
+                dst_col_ptr->insert_data(reinterpret_cast<char*>(&data), 0);
+            }
+        } else if (dst_col_ptr->is_date_type()) {
+            for (; begin < end; begin++) {
+                const char* cur_ptr = (const char*)&_chunk.data[begin * SIZE_OF_TYPE];
+                uint64_t value = 0;
+                value = *(unsigned char*)(cur_ptr + 2);
+                value <<= 8;
+                value |= *(unsigned char*)(cur_ptr + 1);
+                value <<= 8;
+                value |= *(unsigned char*)(cur_ptr);
+                vectorized::VecDateTimeValue date;
+                date.from_olap_date(value);
+                dst_col_ptr->insert_data(reinterpret_cast<char*>(&date), 0);
+            }
+        } else if (dst_col_ptr->is_datetime_type()) {
+            for (; begin < end; begin++) {
+                const char* cur_ptr = (const char*)&_chunk.data[begin * SIZE_OF_TYPE];
+                uint64_t value = *reinterpret_cast<const uint64_t*>(cur_ptr);
+                vectorized::VecDateTimeValue date(value);
+                dst_col_ptr->insert_data(reinterpret_cast<char*>(&date), 0);
+            }
+        } else {
+            // todo(wb) batch insert here
+            for (; begin < end; begin++) {
+                dst_col_ptr->insert_data((const char*)&_chunk.data[begin * SIZE_OF_TYPE], 0);
+            }
+        }
+
+        *n = max_fetch;
+        _cur_index += max_fetch;
+ 
+        return Status::OK();
+    };
+
     Status peek_next_batch(size_t* n, ColumnBlockView* dst) override {
         return next_batch<false>(n, dst);
     }
@@ -393,6 +460,7 @@ private:
     int _size_of_element;
     size_t _cur_index;
     Chunk _chunk;
+    friend class BinaryDictPageDecoder;
 };
 
 } // namespace segment_v2
diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp
index 3bbbeab89e..c08b3280a2 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/column_reader.cpp
@@ -30,6 +30,8 @@
 #include "util/block_compression.h"
 #include "util/coding.h"       // for get_varint32
 #include "util/rle_encoding.h" // for RleDecoder
+#include "vec/core/types.h"
+#include "vec/runtime/vdatetime_value.h" //for VecDateTime
 
 namespace doris {
 namespace segment_v2 {
@@ -578,6 +580,57 @@ Status FileColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, bool* has
     return Status::OK();
 }
 
+Status FileColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr &dst, bool* has_null) {
+    size_t curr_size = dst->byte_size();
+    size_t remaining = *n;
+    *has_null = false;
+    while (remaining > 0) {
+        if (!_page->has_remaining()) {
+            bool eos = false;
+            RETURN_IF_ERROR(_load_next_page(&eos));
+            if (eos) {
+                break;
+            }
+        }
+
+        // number of rows to be read from this page
+        size_t nrows_in_page = std::min(remaining, _page->remaining());
+        size_t nrows_to_read = nrows_in_page;
+        if (_page->has_null) {
+            while (nrows_to_read > 0) {
+                bool is_null = false;
+                size_t this_run = _page->null_decoder.GetNextRun(&is_null, nrows_to_read);
+                // we use num_rows only for CHECK
+                size_t num_rows = this_run;
+                if (!is_null) {
+                    RETURN_IF_ERROR(_page->data_decoder->next_batch(&num_rows, dst));
+                    DCHECK_EQ(this_run, num_rows);
+                } else {
+                    *has_null = true;
+                    // todo(wb) add a DCHECK here to check whether type is column nullable
+                    for (size_t x = 0; x < this_run; x++) {
+                        dst->insert_data(nullptr, 0); // todo(wb) vectorized here
+                    }
+                }
+
+                nrows_to_read -= this_run;
+                _page->offset_in_page += this_run;
+                _current_ordinal += this_run;
+            }
+        } else {
+            RETURN_IF_ERROR(_page->data_decoder->next_batch(&nrows_to_read, dst));
+            DCHECK_EQ(nrows_to_read, nrows_in_page);
+
+            _page->offset_in_page += nrows_to_read;
+            _current_ordinal += nrows_to_read;
+        }
+        remaining -= nrows_in_page;
+    }
+    *n -= remaining;
+    _opts.stats->bytes_read += (dst->byte_size() - curr_size) + BitmapSize(*n);
+    return Status::OK();
+}
+
 Status FileColumnIterator::_load_next_page(bool* eos) {
     _page_iter.next();
     if (!_page_iter.valid()) {
@@ -715,5 +768,55 @@ Status DefaultValueColumnIterator::next_batch(size_t* n, ColumnBlockView* dst, b
     return Status::OK();
 }
 
+void DefaultValueColumnIterator::insert_default_data(vectorized::MutableColumnPtr &dst, size_t n) {
+    vectorized::Int128 int128;
+    char* data_ptr = (char*)&int128;
+    size_t data_len = sizeof(int128);
+
+    auto type = _type_info->type();
+    if (type == OLAP_FIELD_TYPE_DATE) {
+        assert(_type_size == sizeof(FieldTypeTraits<OLAP_FIELD_TYPE_DATE>::CppType)); //uint24_t
+        std::string str = FieldTypeTraits<OLAP_FIELD_TYPE_DATE>::to_string(_mem_value);
+
+        vectorized::VecDateTimeValue value;
+        value.from_date_str(str.c_str(), str.length());
+        value.cast_to_date();
+        //TODO: here is int128 = int64
+        int128 = binary_cast<vectorized::VecDateTimeValue, vectorized::Int64>(value);
+    } else if (type == OLAP_FIELD_TYPE_DATETIME) {
+        assert(_type_size == sizeof(FieldTypeTraits<OLAP_FIELD_TYPE_DATETIME>::CppType)); //int64_t
+        std::string str = FieldTypeTraits<OLAP_FIELD_TYPE_DATETIME>::to_string(_mem_value);
+
+        vectorized::VecDateTimeValue value;
+        value.from_date_str(str.c_str(), str.length());
+        value.to_datetime();
+
+        int128 = binary_cast<vectorized::VecDateTimeValue, vectorized::Int64>(value);
+    } else if (type == OLAP_FIELD_TYPE_DECIMAL) {
+        assert(_type_size == sizeof(FieldTypeTraits<OLAP_FIELD_TYPE_DECIMAL>::CppType)); //decimal12_t
+        decimal12_t* d = (decimal12_t*)_mem_value;
+        int128 = DecimalV2Value(d->integer, d->fraction).value();
+    } else {
+        data_ptr = (char*)_mem_value;
+        data_len = _type_size;
+    }
+
+    for (size_t i = 0; i < n; ++i) {
+        dst->insert_data(data_ptr, data_len);
+    }
+}
+
+Status DefaultValueColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr &dst, bool* has_null) {
+    if (_is_default_value_null) {
+        *has_null = true;
+        dst->insert_many_defaults(*n);
+    } else {
+        *has_null = false;
+        insert_default_data(dst, *n);
+    }
+
+    return Status::OK();
+}
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h
index 0b3588ce75..98afe442d2 100644
--- a/be/src/olap/rowset/segment_v2/column_reader.h
+++ b/be/src/olap/rowset/segment_v2/column_reader.h
@@ -215,11 +215,20 @@ public:
         return next_batch(n, dst, &has_null);
     }
 
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) {
+        bool has_null;
+        return next_batch(n, dst, &has_null);
+    }
+
     // After one seek, we can call this function many times to read data
     // into ColumnBlockView. when read string type data, memory will allocated
     // from MemPool
     virtual Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) = 0;
 
+    virtual Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst, bool* has_null) {
+        return Status::NotSupported("not implement");
+    }
+
     virtual ordinal_t get_current_ordinal() const = 0;
 
     virtual Status get_row_ranges_by_zone_map(CondColumn* cond_column, CondColumn* delete_condition,
@@ -268,6 +277,8 @@ public:
 
     Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override;
 
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst, bool* has_null) override;
+
     ordinal_t get_current_ordinal() const override { return _current_ordinal; }
 
     // get row ranges by zone map
@@ -401,11 +412,20 @@ public:
         return Status::OK();
     }
 
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) {
+        bool has_null;
+        return next_batch(n, dst, &has_null);
+    }
+
     Status next_batch(size_t* n, ColumnBlockView* dst, bool* has_null) override;
 
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst, bool* has_null) override;
+
     ordinal_t get_current_ordinal() const override { return _current_rowid; }
 
 private:
+    void insert_default_data(vectorized::MutableColumnPtr &dst, size_t n);
+
     bool _has_default_value;
     std::string _default_value;
     bool _is_nullable;
diff --git a/be/src/olap/rowset/segment_v2/empty_segment_iterator.cpp b/be/src/olap/rowset/segment_v2/empty_segment_iterator.cpp
index 61c0faba69..f367c2d052 100644
--- a/be/src/olap/rowset/segment_v2/empty_segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/empty_segment_iterator.cpp
@@ -30,5 +30,9 @@ Status EmptySegmentIterator::next_batch(RowBlockV2* block) {
     return Status::EndOfFile("no more data in segment");
 }
 
+Status EmptySegmentIterator::next_batch(vectorized::Block* block) {
+    return Status::EndOfFile("no more data in segment");
+}
+
 } // namespace segment_v2
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/empty_segment_iterator.h b/be/src/olap/rowset/segment_v2/empty_segment_iterator.h
index cb6c48c2df..3e1a4f9bf4 100644
--- a/be/src/olap/rowset/segment_v2/empty_segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/empty_segment_iterator.h
@@ -32,10 +32,11 @@ public:
     Status init(const StorageReadOptions& opts) override { return Status::OK(); }
     const Schema& schema() const override { return _schema; }
     Status next_batch(RowBlockV2* row_block) override;
+    Status next_batch(vectorized::Block* block) override;
 
 private:
     Schema _schema;
 };
 
 } // namespace segment_v2
-} // namespace doris
\ No newline at end of file
+} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/frame_of_reference_page.h b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h
index 5d0c0b1e22..df8fe06a1e 100644
--- a/be/src/olap/rowset/segment_v2/frame_of_reference_page.h
+++ b/be/src/olap/rowset/segment_v2/frame_of_reference_page.h
@@ -161,6 +161,10 @@ public:
         return Status::OK();
     }
 
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override {
+        return Status::NotSupported("frame page not implement vec op now");
+    };
+
     Status peek_next_batch(size_t* n, ColumnBlockView* dst) override {
         return next_batch<false>(n, dst);
     }
diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h
index feed37f035..ab5b482e29 100644
--- a/be/src/olap/rowset/segment_v2/page_decoder.h
+++ b/be/src/olap/rowset/segment_v2/page_decoder.h
@@ -19,6 +19,7 @@
 
 #include "common/status.h"     // for Status
 #include "olap/column_block.h" // for ColumnBlockView
+#include "vec/columns/column.h"
 
 namespace doris {
 namespace segment_v2 {
@@ -81,6 +82,10 @@ public:
     // allocated in the column_vector_view's mem_pool.
     virtual Status next_batch(size_t* n, ColumnBlockView* dst) = 0;
 
+    virtual Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) {
+        return Status::NotSupported("not implement vec op now");
+    }
+
     // Same as `next_batch` except for not moving forward the cursor.
     // When read array's ordinals in `ArrayFileColumnIterator`, we want to read one extra ordinal
     // but do not want to move forward the cursor.
diff --git a/be/src/olap/rowset/segment_v2/plain_page.h b/be/src/olap/rowset/segment_v2/plain_page.h
index 555c9af1a1..09e4280474 100644
--- a/be/src/olap/rowset/segment_v2/plain_page.h
+++ b/be/src/olap/rowset/segment_v2/plain_page.h
@@ -186,6 +186,10 @@ public:
 
     Status next_batch(size_t* n, ColumnBlockView* dst) override { return next_batch<true>(n, dst); }
 
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override {
+        return Status::NotSupported("plain page not implement vec op now");
+    };
+
     template <bool forward_index>
     inline Status next_batch(size_t* n, ColumnBlockView* dst) {
         DCHECK(_parsed);
diff --git a/be/src/olap/rowset/segment_v2/rle_page.h b/be/src/olap/rowset/segment_v2/rle_page.h
index ed20aaf94e..52a9613cf4 100644
--- a/be/src/olap/rowset/segment_v2/rle_page.h
+++ b/be/src/olap/rowset/segment_v2/rle_page.h
@@ -230,6 +230,29 @@ public:
         return Status::OK();
     }
 
+    Status next_batch(size_t* n, vectorized::MutableColumnPtr &dst) override {
+        DCHECK(_parsed);
+        if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) {
+            *n = 0;
+            return Status::OK();
+        }
+
+        size_t to_fetch = std::min(*n, static_cast<size_t>(_num_elements - _cur_index));
+        size_t remaining = to_fetch;
+        bool result = false;
+        CppType value;
+        while (remaining > 0) {
+            result = _rle_decoder.Get(&value);
+            DCHECK(result);
+            dst->insert_data((char*)(&value), SIZE_OF_TYPE);
+            remaining--;
+        }
+
+        _cur_index += to_fetch;
+        *n = to_fetch;
+        return Status::OK();
+    };
+
     size_t count() const override { return _num_elements; }
 
     size_t current_index() const override { return _cur_index; }
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index cb7e0d56ca..86a89acf41 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -30,6 +30,7 @@
 #include "olap/rowset/segment_v2/segment.h"
 #include "olap/short_key_index.h"
 #include "util/doris_metrics.h"
+#include "olap/in_list_predicate.h"
 
 using strings::Substitute;
 
@@ -120,7 +121,7 @@ Status SegmentIterator::init(const StorageReadOptions& opts) {
     return Status::OK();
 }
 
-Status SegmentIterator::_init() {
+Status SegmentIterator::_init(bool is_vec) {
     DorisMetrics::instance()->segment_read_total->increment(1);
     // get file handle from file descriptor of segment
     fs::BlockManager* block_mgr = fs::fs_util::block_manager(_segment->_path_desc.storage_medium);
@@ -133,7 +134,11 @@ Status SegmentIterator::_init() {
         RETURN_IF_ERROR(_get_row_ranges_by_keys());
     }
     RETURN_IF_ERROR(_get_row_ranges_by_column_conditions());
-    _init_lazy_materialization();
+    if (is_vec) {
+        _vec_init_lazy_materialization();
+    } else {
+        _init_lazy_materialization();
+    }
     _range_iter.reset(new BitmapRangeIterator(_row_bitmap));
     return Status::OK();
 }
@@ -581,5 +586,359 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
     return Status::OK();
 }
 
+/* ---------------------- for vecterization implementation  ---------------------- */
+
+// todo(wb) need a UT here
+void SegmentIterator::_vec_init_lazy_materialization() {
+    _is_pred_column.resize(_schema.columns().size(), false);
+
+    std::set<ColumnId> pred_column_ids; // including short_cir_pred_col_id_set and vec_pred_col_id_set
+    _is_all_column_basic_type = true;
+    bool is_predicate_column_exists = false;
+    bool is_non_predicate_column_exists = false;
+    
+    if (!_col_predicates.empty()) {
+        is_predicate_column_exists = true;
+
+        std::set<ColumnId> short_cir_pred_col_id_set; // using set for distinct cid
+        std::set<ColumnId> vec_pred_col_id_set;
+        
+        for (auto predicate : _col_predicates) {
+            auto cid = predicate->column_id();
+            FieldType type = _schema.column(cid)->type();
+            _is_pred_column[cid] = true;
+            pred_column_ids.insert(cid);
+
+            // for date type which can not be executed in a vectorized way, using short circuit execution
+            if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_CHAR || type == OLAP_FIELD_TYPE_DECIMAL
+                || type == OLAP_FIELD_TYPE_DATE || predicate->is_in_predicate()) {
+                short_cir_pred_col_id_set.insert(cid);
+                _short_cir_eval_predicate.push_back(predicate);
+                _is_all_column_basic_type = false;
+            } else {
+                vec_pred_col_id_set.insert(predicate->column_id());
+                if (_pre_eval_block_predicate == nullptr) {
+                    _pre_eval_block_predicate = new AndBlockColumnPredicate();
+                }
+                reinterpret_cast<MutilColumnBlockPredicate*>(_pre_eval_block_predicate)->add_column_predicate(new SingleColumnBlockPredicate(predicate));
+            }
+        }
+
+        std::set<ColumnId> del_cond_id_set;
+        _opts.delete_condition_predicates.get()->get_all_column_ids(del_cond_id_set);
+        short_cir_pred_col_id_set.insert(del_cond_id_set.begin(), del_cond_id_set.end());
+        pred_column_ids.insert(del_cond_id_set.begin(), del_cond_id_set.end());
+
+        if (_schema.column_ids().size() > pred_column_ids.size()) {
+            for (auto cid : _schema.column_ids()) {
+                if (!_is_pred_column[cid]) {
+                    _non_predicate_columns.push_back(cid);
+                    is_non_predicate_column_exists = true;
+                }
+            }
+        }
+
+        _vec_pred_column_ids.assign(vec_pred_col_id_set.cbegin(), vec_pred_col_id_set.cend());
+        _short_cir_pred_column_ids.assign(short_cir_pred_col_id_set.cbegin(), short_cir_pred_col_id_set.cend());
+    } else {
+        _is_all_column_basic_type = false;
+        is_non_predicate_column_exists = true;
+        for (auto cid : _schema.column_ids()) {
+            _non_predicate_columns.push_back(cid);
+        }
+    }
+
+    // note(wb) in following cases we disable lazy materialization
+    // case 1: when all column is basic type(is_all_column_basic_type = true) 
+    //   because we think `seek and read` cost > read page cost, lazy materialize may cause more `seek and read`, so disable it
+    // case 2: all column is predicate column
+    // case 3: all column is not predicate column
+    // todo(wb) need further research more lazy materialization rule, such as get more info from `statistics` for better decision
+    if (_is_all_column_basic_type) {
+        std::set<ColumnId> pred_set(_vec_pred_column_ids.begin(), _vec_pred_column_ids.end());
+        std::set<ColumnId> non_pred_set(_non_predicate_columns.begin(), _non_predicate_columns.end());
+
+        // when _is_all_column_basic_type = true, _first_read_column_ids should keep the same order with _schema.column_ids which stands for return column order
+        for (int i = 0; i < _schema.num_column_ids(); i++) {
+            auto cid = _schema.column_ids()[i];
+            if (pred_set.find(cid) != pred_set.end()) {
+                _first_read_column_ids.push_back(cid);    
+            } else if (non_pred_set.find(cid) != non_pred_set.end()) {
+                _first_read_column_ids.push_back(cid);
+                _is_pred_column[cid] = true; // in this case, non-predicate column should also be filtered by sel idx, so we regard it as pred columns
+            }
+        }
+
+    } else if (is_predicate_column_exists && !is_non_predicate_column_exists) {
+        _first_read_column_ids.assign(pred_column_ids.cbegin(), pred_column_ids.cend());
+    } else if (!is_predicate_column_exists && is_non_predicate_column_exists) {
+        for (auto cid : _non_predicate_columns) {
+            _first_read_column_ids.push_back(cid);
+        }
+    } else {
+        _lazy_materialization_read = true;
+        _first_read_column_ids.assign(pred_column_ids.cbegin(), pred_column_ids.cend());
+    }
+
+    // make _schema_block_id_map
+    _schema_block_id_map.resize(_schema.columns().size());
+    for (int i = 0; i < _schema.num_column_ids(); i++) {
+        auto cid = _schema.column_ids()[i];
+        _schema_block_id_map[cid] = i;
+    }
+
+}
+
+Status SegmentIterator::_read_columns(const std::vector<ColumnId>& column_ids, vectorized::MutableColumns& column_block, size_t nrows) {
+    for (auto cid : column_ids) {
+        auto& column = column_block[cid];
+        size_t rows_read = nrows;
+        RETURN_IF_ERROR(_column_iterators[cid]->next_batch(&rows_read, column));
+        DCHECK_EQ(nrows, rows_read);
+    }
+    return Status::OK();
+}
+
+void SegmentIterator::_init_current_block(vectorized::Block* block, std::vector<vectorized::MutableColumnPtr>& current_columns) {
+    bool is_block_mem_reuse= block->mem_reuse();
+    if (is_block_mem_reuse) {
+        size_t column_to_keep = _schema.num_column_ids();
+        for (int i = block->columns() - 1; i >= column_to_keep; i--) {
+            block->erase(i);
+        }
+        block->clear_column_data();
+    } else { // pre fill output block here
+        for (size_t i = 0; i < _schema.num_column_ids(); i++) {
+            auto cid = _schema.column_ids()[i];
+            auto* column_desc = _schema.columns()[cid];
+            auto data_type = Schema::get_data_type_ptr(column_desc->type());
+            if (column_desc->is_nullable()) {
+                block->insert({nullptr, std::make_shared<vectorized::DataTypeNullable>(std::move(data_type)), column_desc->name()});
+            } else {
+                block->insert({nullptr, std::move(data_type), column_desc->name()});
+            }
+        }
+    }
+
+    for (size_t i = 0; i < _schema.num_column_ids(); i++) {
+        auto cid = _schema.column_ids()[i];
+        if (_is_pred_column[cid]) {  //todo(wb) maybe we can relase it after output block
+            current_columns[cid]->clear();
+        } else { // non-predicate column
+            auto &column_desc = _schema.columns()[cid];
+            if (is_block_mem_reuse) {
+                current_columns[cid] = std::move(*block->get_by_position(i).column).mutate();
+            } else {
+                auto data_type = Schema::get_data_type_ptr(column_desc->type());
+                if (column_desc->is_nullable()) {
+                    current_columns[cid] = doris::vectorized::ColumnNullable::create(
+                        std::move(data_type->create_column()), doris::vectorized::ColumnUInt8::create());
+                } else {
+                    current_columns[cid] = data_type->create_column();
+                }
+            }
+            if (column_desc->type() == OLAP_FIELD_TYPE_DATE) {
+                current_columns[cid]->set_date_type();
+            } else if (column_desc->type() == OLAP_FIELD_TYPE_DATETIME) {
+                current_columns[cid]->set_datetime_type();
+            }
+        }
+    }
+}
+
+void SegmentIterator::_output_non_pred_columns(vectorized::Block* block, bool is_block_mem_reuse) {
+    for (auto cid : _non_predicate_columns) {
+        block->replace_by_position(_schema_block_id_map[cid], std::move(_current_return_columns[cid]));
+    }
+ }
+
+void SegmentIterator::_output_column_by_sel_idx(vectorized::Block* block, std::vector<ColumnId> columnIds, 
+        uint16_t* sel_rowid_idx, uint16_t select_size, bool is_block_mem_reuse) {
+    for (auto cid : columnIds) {
+        auto &column_ptr = _current_return_columns[cid];
+        if (is_block_mem_reuse) {
+            column_ptr->filter_by_selector(sel_rowid_idx, select_size, 
+                &block->get_by_position(_schema_block_id_map[cid]).column);
+        } else {
+            block->replace_by_position(_schema_block_id_map[cid], 
+                (*column_ptr).get_ptr()->filter_by_selector(sel_rowid_idx, select_size));
+        }
+    }
+ }
+
+
+Status SegmentIterator::_read_columns_by_index(uint32_t nrows_read_limit, uint32_t& nrows_read, bool set_block_rowid) {
+    do {
+        uint32_t range_from;
+        uint32_t range_to;
+        bool has_next_range =
+            _range_iter->next_range(nrows_read_limit - nrows_read, &range_from, &range_to);
+        if (!has_next_range) {
+            break;
+        }
+        if (_cur_rowid == 0 || _cur_rowid != range_from) {
+            _cur_rowid = range_from;
+            RETURN_IF_ERROR(_seek_columns(_first_read_column_ids, _cur_rowid));
+        }
+        size_t rows_to_read = range_to - range_from;
+        RETURN_IF_ERROR(_read_columns(_first_read_column_ids, _current_return_columns, rows_to_read));
+        _cur_rowid += rows_to_read;
+        if (set_block_rowid) {
+            for (uint32_t rid = range_from; rid < range_to; rid++) {
+                _block_rowids[nrows_read++] = rid;
+            }
+        } else {
+            nrows_read += rows_to_read;
+        }
+    } while (nrows_read < nrows_read_limit);
+    return Status::OK();
+}
+
+void SegmentIterator::_evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, uint16_t& selected_size) {
+    uint16_t new_size = 0;
+    if (_vec_pred_column_ids.size() == 0) {
+        for (uint32_t i = 0; i < selected_size; ++i) {
+            sel_rowid_idx[new_size++] = i;
+        }
+        return;
+    }
+
+    uint16_t original_size = selected_size;
+    bool ret_flags[selected_size];
+    memset(ret_flags, 1, selected_size);
+    _pre_eval_block_predicate->evaluate_vec(_current_return_columns, selected_size, ret_flags);
+    
+    for (uint32_t i = 0; i < selected_size; ++i) {
+        if (ret_flags[i]) {
+            sel_rowid_idx[new_size++] = i;
+        }
+    }
+
+    _opts.stats->rows_vec_cond_filtered += original_size - new_size;
+    selected_size = new_size;
+}
+
+void SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_idx, uint16_t* selected_size_ptr) {
+    if (_short_cir_pred_column_ids.size() == 0) {
+        return;
+    }
+    
+    for (auto column_predicate : _short_cir_eval_predicate) {
+        auto column_id = column_predicate->column_id();
+        auto& short_cir_column = _current_return_columns[column_id];
+        column_predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, selected_size_ptr);
+    }
+
+    // evaluate delete condition
+    _opts.delete_condition_predicates->evaluate(_current_return_columns, vec_sel_rowid_idx, selected_size_ptr);
+}
+
+void SegmentIterator::_read_columns_by_rowids(std::vector<ColumnId>& read_column_ids, std::vector<rowid_t>& rowid_vector,
+        uint16_t* sel_rowid_idx, size_t select_size, vectorized::MutableColumns* mutable_columns) {
+    size_t start_idx = 0;
+    while (start_idx < select_size) {
+        size_t end_idx = start_idx + 1;
+        while (end_idx < select_size && (rowid_vector[sel_rowid_idx[end_idx - 1]] == rowid_vector[sel_rowid_idx[end_idx]] - 1)) {
+            end_idx++;
+        }
+        size_t range = end_idx - start_idx;
+        _seek_columns(read_column_ids, rowid_vector[sel_rowid_idx[start_idx]]);
+        _read_columns(read_column_ids, *mutable_columns, range);
+        start_idx += range;
+    }
+}
+
+Status SegmentIterator::next_batch(vectorized::Block* block) {
+    bool is_mem_reuse = block->mem_reuse();
+    SCOPED_RAW_TIMER(&_opts.stats->block_load_ns);
+    if (UNLIKELY(!_inited)) {
+        RETURN_IF_ERROR(_init(true));
+        _inited = true;
+        if (_vec_pred_column_ids.size() > 0 || _short_cir_pred_column_ids.size() > 0) {
+            _block_rowids.reserve(_opts.block_row_max);
+        }
+        _current_return_columns.resize(_schema.columns().size()); 
+        for (size_t i = 0; i < _schema.num_column_ids(); i++) {
+            auto cid = _schema.column_ids()[i];
+            if (_is_pred_column[cid]) {
+                auto& column_desc = _schema.columns()[cid];
+                _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr(column_desc->type(), column_desc->is_nullable());
+                _current_return_columns[cid]->reserve(_opts.block_row_max);
+            }
+        }
+    }
+
+    _init_current_block(block, _current_return_columns);
+    
+    uint32_t nrows_read = 0;
+    uint32_t nrows_read_limit = _opts.block_row_max;
+    _read_columns_by_index(nrows_read_limit, nrows_read, _col_predicates.size() > 0);
+
+    _opts.stats->blocks_load += 1;
+    _opts.stats->raw_rows_read += nrows_read;
+
+    if (nrows_read == 0) {
+        for (int i = 0; i < _schema.num_column_ids(); i++) {
+            auto cid = _schema.column_ids()[i];
+            // todo(wb) abstract make column where
+            if (!_is_pred_column[cid]) { // non-predicate
+                block->replace_by_position(i, std::move(_current_return_columns[cid]));
+            } else { // predicate
+                if (!is_mem_reuse) {
+                    auto* column_desc = _schema.columns()[cid];
+                    auto data_type = Schema::get_data_type_ptr(column_desc->type());
+                    block->replace_by_position(i, data_type->create_column());
+                }
+            }
+            // not sure whether block is clear before enter segmentIter, so clear it here.
+            if (is_mem_reuse) {
+                block->clear_column_data();
+            }
+        }
+        return Status::EndOfFile("no more data in segment");
+    }
+
+    // when no predicate(include delete condition) is provided, output column directly
+    if (_vec_pred_column_ids.size() == 0 && _short_cir_pred_column_ids.size() == 0) {
+        _output_non_pred_columns(block, is_mem_reuse);
+    } else { // need predicate evaluation
+        uint16_t selected_size = nrows_read;
+        uint16_t sel_rowid_idx[selected_size];
+
+        // step 1: evaluate vectorization predicate
+        _evaluate_vectorization_predicate(sel_rowid_idx, selected_size);
+
+        // When predicate column and no-predicate column are both basic type, lazy materialization is eliminate
+        // So output block directly after vecorization evaluation
+        if (_is_all_column_basic_type) {
+            _output_column_by_sel_idx(block, _first_read_column_ids, sel_rowid_idx, selected_size, is_mem_reuse);
+            return Status::OK();
+        }
+
+        // step 2: evaluate short ciruit predicate
+        // todo(wb) research whether need to read short predicate after vectorization evaluation
+        //          to reduce cost of read short circuit columns.
+        //          In SSB test, it make no difference; So need more scenarios to test
+        _evaluate_short_circuit_predicate(sel_rowid_idx, &selected_size);
+        
+        // step3: read non_predicate column
+        if (_non_predicate_columns.size() != 0) {
+            _read_columns_by_rowids(_non_predicate_columns, _block_rowids, sel_rowid_idx, selected_size, &_current_return_columns);
+        }
+
+        // step4: output columns
+        // 4.1 output non-predicate column
+        _output_non_pred_columns(block, is_mem_reuse);
+
+        // 4.2 output short circuit predicate column
+        _output_column_by_sel_idx(block, _short_cir_pred_column_ids, sel_rowid_idx, selected_size, is_mem_reuse);
+        // 4.3 output vectorizatioin predicate column
+        _output_column_by_sel_idx(block, _vec_pred_column_ids, sel_rowid_idx, selected_size, is_mem_reuse);
+
+    }
+
+    return Status::OK();
+}
+
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.h b/be/src/olap/rowset/segment_v2/segment_iterator.h
index 0da6ae4658..c06077e4db 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.h
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.h
@@ -50,14 +50,17 @@ public:
     SegmentIterator(std::shared_ptr<Segment> segment, const Schema& _schema,
                     std::shared_ptr<MemTracker> parent);
     ~SegmentIterator() override;
+
     Status init(const StorageReadOptions& opts) override;
     Status next_batch(RowBlockV2* row_block) override;
+    Status next_batch(vectorized::Block* block) override;
+
     const Schema& schema() const override { return _schema; }
     bool is_lazy_materialization_read() const override { return _lazy_materialization_read; }
     uint64_t data_id() const { return _segment->id(); }
 
 private:
-    Status _init();
+    Status _init(bool is_vec = false);
 
     Status _init_return_column_iterators();
     Status _init_bitmap_index_iterators();
@@ -75,6 +78,7 @@ private:
     Status _apply_bitmap_index();
 
     void _init_lazy_materialization();
+    void _vec_init_lazy_materialization();
 
     uint32_t segment_id() const { return _segment->id(); }
     uint32_t num_rows() const { return _segment->num_rows(); }
@@ -84,6 +88,17 @@ private:
     Status _read_columns(const std::vector<ColumnId>& column_ids, RowBlockV2* block,
                          size_t row_offset, size_t nrows);
 
+    // for vectorization implementation
+    Status _read_columns(const std::vector<ColumnId>& column_ids, vectorized::MutableColumns& column_block, size_t nrows);
+    Status _read_columns_by_index(uint32_t nrows_read_limit, uint32_t& nrows_read, bool set_block_rowid);
+    void _init_current_block(vectorized::Block* block, std::vector<vectorized::MutableColumnPtr>& non_pred_vector);
+    void _evaluate_vectorization_predicate(uint16_t* sel_rowid_idx, uint16_t& selected_size);
+    void _evaluate_short_circuit_predicate(uint16_t* sel_rowid_idx, uint16_t* selected_size);
+    void _output_non_pred_columns(vectorized::Block* block, bool is_block_mem_reuse);
+    void _output_column_by_sel_idx(vectorized::Block* block, std::vector<ColumnId> columnids, uint16_t* sel_rowid_idx, uint16_t select_size, bool is_block_mem_reuse);
+    void _read_columns_by_rowids(std::vector<ColumnId>& read_column_ids, std::vector<rowid_t>& rowid_vector, 
+        uint16_t* sel_rowid_idx, size_t select_size, vectorized::MutableColumns* mutable_columns);
+
 private:
     class BitmapRangeIterator;
 
@@ -113,6 +128,21 @@ private:
     // could be a local variable of next_batch(), kept here to reuse vector memory
     std::vector<rowid_t> _block_rowids;
 
+    // fields for vectorization execution 
+    bool _is_all_column_basic_type;
+    std::vector<ColumnId> _vec_pred_column_ids; // keep columnId of columns for vectorized predicate evaluation
+    std::vector<ColumnId> _short_cir_pred_column_ids; // keep columnId of columns for short circuit predicate evaluation
+    vector<bool> _is_pred_column; // columns hold by segmentIter
+    vectorized::MutableColumns _current_return_columns;
+    AndBlockColumnPredicate* _pre_eval_block_predicate = nullptr;
+    std::vector<ColumnPredicate*> _short_cir_eval_predicate;
+    // when lazy materialization is enable, segmentIter need to read data at least twice
+    // first, read predicate columns by various index
+    // second, read non-predicate columns
+    // so we need a field to stand for columns first time to read
+    vector<ColumnId> _first_read_column_ids;
+    vector<int> _schema_block_id_map; // map from schema column id to column idx in Block
+
     // the actual init process is delayed to the first call to next_batch()
     bool _inited;
 
diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp
index ab00e37773..f5a0900cac 100644
--- a/be/src/olap/schema.cpp
+++ b/be/src/olap/schema.cpp
@@ -18,6 +18,11 @@
 #include "olap/schema.h"
 
 #include "olap/row_block2.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_complex.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/core/types.h"
+#include "olap/uint24.h"
 
 namespace doris {
 
@@ -103,4 +108,109 @@ Schema::~Schema() {
     }
 }
 
+vectorized::DataTypePtr Schema::get_data_type_ptr(FieldType type) {
+    switch (type) {
+        case OLAP_FIELD_TYPE_BOOL:
+            return std::make_shared<vectorized::DataTypeUInt8>();
+
+        case OLAP_FIELD_TYPE_TINYINT:
+            return std::make_shared<vectorized::DataTypeInt8>();
+
+        case OLAP_FIELD_TYPE_SMALLINT:
+            return std::make_shared<vectorized::DataTypeInt16>();
+
+        case OLAP_FIELD_TYPE_INT:
+            return std::make_shared<vectorized::DataTypeInt32>();
+
+        case OLAP_FIELD_TYPE_FLOAT:
+            return std::make_shared<vectorized::DataTypeFloat32>();
+
+        case OLAP_FIELD_TYPE_BIGINT:
+            return std::make_shared<vectorized::DataTypeInt64>();
+
+        case OLAP_FIELD_TYPE_LARGEINT:
+            return std::make_shared<vectorized::DataTypeInt128>();
+
+        case OLAP_FIELD_TYPE_DATE:
+            return std::make_shared<vectorized::DataTypeDate>();
+
+        case OLAP_FIELD_TYPE_DATETIME:
+            return std::make_shared<vectorized::DataTypeDateTime>();
+
+        case OLAP_FIELD_TYPE_DOUBLE:
+            return std::make_shared<vectorized::DataTypeFloat64>();
+
+        case OLAP_FIELD_TYPE_CHAR:
+        case OLAP_FIELD_TYPE_VARCHAR:
+        case OLAP_FIELD_TYPE_HLL:
+        case OLAP_FIELD_TYPE_STRING:
+            return std::make_shared<vectorized::DataTypeString>();
+        
+        case TYPE_OBJECT:
+            return std::make_shared<vectorized::DataTypeBitMap>();
+
+        case OLAP_FIELD_TYPE_DECIMAL:
+            return std::make_shared<vectorized::DataTypeDecimal<vectorized::Decimal128>>(27, 9);
+
+        default:
+            DCHECK(false);
+    }
+    // For llvm complain
+    return nullptr;
+}
+
+vectorized::IColumn::MutablePtr Schema::get_predicate_column_nullable_ptr(FieldType type, bool is_null) {
+    vectorized::IColumn::MutablePtr ptr = Schema::get_predicate_column_ptr(type);
+    if (is_null) {
+        return doris::vectorized::ColumnNullable::create(std::move(ptr), doris::vectorized::ColumnUInt8::create());
+    }
+    return ptr;
+}
+
+vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(FieldType type) {
+    switch (type) {
+        case OLAP_FIELD_TYPE_BOOL:
+            return doris::vectorized::PredicateColumnType<bool>::create();;
+        case OLAP_FIELD_TYPE_TINYINT:
+            return doris::vectorized::PredicateColumnType<doris::vectorized::Int8>::create();
+ 
+        case OLAP_FIELD_TYPE_SMALLINT:
+            return doris::vectorized::PredicateColumnType<doris::vectorized::Int16>::create();
+ 
+        case OLAP_FIELD_TYPE_INT:
+            return doris::vectorized::PredicateColumnType<doris::vectorized::Int32>::create();
+ 
+        case OLAP_FIELD_TYPE_FLOAT:
+            return doris::vectorized::PredicateColumnType<doris::vectorized::Float32>::create();
+ 
+        case OLAP_FIELD_TYPE_DOUBLE:
+            return doris::vectorized::PredicateColumnType<doris::vectorized::Float64>::create();
+ 
+        case OLAP_FIELD_TYPE_BIGINT:
+            return doris::vectorized::PredicateColumnType<doris::vectorized::Int64>::create();
+ 
+        case OLAP_FIELD_TYPE_LARGEINT:
+            return doris::vectorized::PredicateColumnType<doris::vectorized::Int128>::create();
+ 
+        case OLAP_FIELD_TYPE_DATE:
+            return doris::vectorized::PredicateColumnType<uint24_t>::create();
+
+        case OLAP_FIELD_TYPE_DATETIME:
+            return doris::vectorized::PredicateColumnType<uint64_t>::create();
+ 
+        case OLAP_FIELD_TYPE_CHAR:
+        case OLAP_FIELD_TYPE_VARCHAR: 
+        case OLAP_FIELD_TYPE_STRING:
+            return doris::vectorized::PredicateColumnType<StringValue>::create();
+        
+        case OLAP_FIELD_TYPE_DECIMAL:
+            return doris::vectorized::PredicateColumnType<decimal12_t>::create();    
+ 
+        default:
+            DCHECK(false);
+    }
+    // For llvm complain
+    return nullptr;
+}
+
 } // namespace doris
diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h
index 39e3c7914c..2596f9780c 100644
--- a/be/src/olap/schema.h
+++ b/be/src/olap/schema.h
@@ -100,6 +100,14 @@ public:
 
     ~Schema();
 
+    static vectorized::DataTypePtr get_data_type_ptr(FieldType type);
+
+    static vectorized::IColumn::MutablePtr get_predicate_column_ptr(FieldType type);
+
+    static vectorized::IColumn::MutablePtr get_predicate_column_nullable_ptr(FieldType type, bool is_null = false);
+
+    const std::vector<Field*>& columns() const { return _cols; }
+
     const Field* column(ColumnId cid) const { return _cols[cid]; }
 
     Field* mutable_column(ColumnId cid) const { return _cols[cid]; }
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 072a4155a1..5b407d5185 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -18,6 +18,8 @@
 #include "olap/tablet_schema.h"
 
 #include "tablet_meta.h"
+#include "vec/core/block.h"
+#include "vec/data_types/data_type.h"
 
 namespace doris {
 
@@ -490,6 +492,16 @@ void TabletSchema::init_field_index_for_test() {
     }
 }
 
+vectorized::Block TabletSchema::create_block(const std::vector<uint32_t>& return_columns) const {
+    vectorized::Block block;
+    for (int i = 0; i < return_columns.size(); ++i) {
+        const auto& col = _cols[return_columns[i]];
+        auto data_type = vectorized::IDataType::from_olap_engine(col.type(), col.is_nullable());
+        block.insert({data_type->create_column(), data_type, col.name()});
+    }
+    return block;
+}
+
 bool operator==(const TabletColumn& a, const TabletColumn& b) {
     if (a._unique_id != b._unique_id) return false;
     if (a._col_name != b._col_name) return false;
@@ -548,4 +560,5 @@ bool operator!=(const TabletSchema& a, const TabletSchema& b) {
     return !(a == b);
 }
 
+
 } // namespace doris
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index 0cb894fdfb..ffd8c34bc4 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -26,6 +26,9 @@
 #include "olap/types.h"
 
 namespace doris {
+namespace vectorized {
+class Block;
+}
 
 class TabletColumn {
 public:
@@ -141,6 +144,7 @@ public:
     inline void set_delete_sign_idx(int32_t delete_sign_idx) { _delete_sign_idx = delete_sign_idx; }
     inline bool has_sequence_col() const { return _sequence_col_idx != -1; }
     inline int32_t sequence_col_idx() const { return _sequence_col_idx; }
+    vectorized::Block create_block(const std::vector<uint32_t>& return_columns) const;
 
 private:
     // Only for unit test
diff --git a/be/src/olap/tuple_reader.cpp b/be/src/olap/tuple_reader.cpp
index 9eba1d204f..5c15c2b42f 100644
--- a/be/src/olap/tuple_reader.cpp
+++ b/be/src/olap/tuple_reader.cpp
@@ -196,7 +196,7 @@ OLAPStatus TupleReader::_unique_key_next_row(RowCursor* row_cursor, MemPool* mem
             cur_delete_flag = _next_delete_flag;
         }
 
-        // if reader needs to filter delete row and current delete_flag is ture,
+        // if reader needs to filter delete row and current delete_flag is true,
         // then continue
         if (!(cur_delete_flag && _filter_delete)) {
             break;
diff --git a/be/src/runtime/datetime_value.cpp b/be/src/runtime/datetime_value.cpp
index 9ec8ccbb6a..b545f5512e 100644
--- a/be/src/runtime/datetime_value.cpp
+++ b/be/src/runtime/datetime_value.cpp
@@ -1466,6 +1466,8 @@ bool DateTimeValue::from_date_format_str(const char* format, int format_len, con
 }
 
 bool DateTimeValue::date_add_interval(const TimeInterval& interval, TimeUnit unit) {
+    if (!is_valid_date()) return false;
+
     int sign = interval.is_neg ? -1 : 1;
     switch (unit) {
     case MICROSECOND:
diff --git a/be/src/runtime/datetime_value.h b/be/src/runtime/datetime_value.h
index 096a24d096..4f3a310e48 100644
--- a/be/src/runtime/datetime_value.h
+++ b/be/src/runtime/datetime_value.h
@@ -30,7 +30,7 @@
 #include "udf/udf.h"
 #include "util/hash_util.hpp"
 #include "util/timezone_utils.h"
-
+#include "vec/runtime/vdatetime_value.h"
 namespace doris {
 
 enum TimeUnit {
@@ -566,7 +566,9 @@ public:
 private:
     // Used to make sure sizeof DateTimeValue
     friend class UnusedClass;
-
+    friend void doris::vectorized::VecDateTimeValue::convert_vec_dt_to_dt(DateTimeValue* dt); 
+    friend void doris::vectorized::VecDateTimeValue::convert_dt_to_vec_dt(DateTimeValue* dt);
+    
     void from_packed_time(int64_t packed_time) {
         _microsecond = packed_time % (1LL << 24);
         int64_t ymdhms = packed_time >> 24;
diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp
index ab4fa972c2..95d0a79101 100644
--- a/be/src/runtime/descriptors.cpp
+++ b/be/src/runtime/descriptors.cpp
@@ -25,6 +25,10 @@
 #include "gen_cpp/Descriptors_types.h"
 #include "gen_cpp/descriptors.pb.h"
 
+#include "vec/core/columns_with_type_and_name.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/data_types/data_type_nullable.h"
+
 namespace doris {
 using boost::algorithm::join;
 
@@ -80,6 +84,21 @@ void SlotDescriptor::to_protobuf(PSlotDescriptor* pslot) const {
     pslot->set_is_materialized(_is_materialized);
 }
 
+vectorized::MutableColumnPtr SlotDescriptor::get_empty_mutable_column() const {
+    auto data_column = type().get_data_type_ptr()->create_column();
+    if (is_nullable()) {
+        return doris::vectorized::ColumnNullable::create(std::move(data_column), doris::vectorized::ColumnUInt8::create());
+    }
+    return data_column;
+}
+
+vectorized::DataTypePtr SlotDescriptor::get_data_type_ptr() const {
+    if (is_nullable()) {
+        return std::make_shared<vectorized::DataTypeNullable>(type().get_data_type_ptr());
+    }
+    return type().get_data_type_ptr();
+}
+
 std::string SlotDescriptor::debug_string() const {
     std::stringstream out;
     out << "Slot(id=" << _id << " type=" << _type << " col=" << _col_pos
@@ -378,7 +397,7 @@ void RowDescriptor::to_thrift(std::vector<TTupleId>* row_tuple_ids) {
 }
 
 void RowDescriptor::to_protobuf(
-        google::protobuf::RepeatedField<google::protobuf::int32>* row_tuple_ids) {
+        google::protobuf::RepeatedField<google::protobuf::int32>* row_tuple_ids) const {
     row_tuple_ids->Clear();
     for (auto desc : _tuple_desc_map) {
         row_tuple_ids->Add(desc->id());
diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h
index d668d424ff..ad432097a3 100644
--- a/be/src/runtime/descriptors.h
+++ b/be/src/runtime/descriptors.h
@@ -103,6 +103,9 @@ public:
 
     std::string debug_string() const;
 
+    doris::vectorized::MutableColumnPtr get_empty_mutable_column() const;
+
+    doris::vectorized::DataTypePtr get_data_type_ptr() const;
 private:
     friend class DescriptorTbl;
     friend class TupleDescriptor;
@@ -378,7 +381,6 @@ public:
     int get_row_size() const;
 
     int num_materialized_slots() const {
-        DCHECK(_num_materialized_slots != 0);
         return _num_materialized_slots;
     }
 
@@ -405,7 +407,7 @@ public:
 
     // Populate row_tuple_ids with our ids.
     void to_thrift(std::vector<TTupleId>* row_tuple_ids);
-    void to_protobuf(google::protobuf::RepeatedField<google::protobuf::int32>* row_tuple_ids);
+    void to_protobuf(google::protobuf::RepeatedField<google::protobuf::int32>* row_tuple_ids) const;
 
     // Return true if the tuple ids of this descriptor are a prefix
     // of the tuple ids of other_desc.
diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h
index 3cc2defa02..0b51e47533 100644
--- a/be/src/runtime/exec_env.h
+++ b/be/src/runtime/exec_env.h
@@ -93,6 +93,7 @@ public:
     const std::string& token() const;
     ExternalScanContextMgr* external_scan_context_mgr() { return _external_scan_context_mgr; }
     DataStreamMgr* stream_mgr() { return _stream_mgr; }
+    doris::vectorized::VDataStreamMgr* vstream_mgr() { return _vstream_mgr; }
     ResultBufferMgr* result_mgr() { return _result_mgr; }
     ResultQueueMgr* result_queue_mgr() { return _result_queue_mgr; }
     ClientCache<BackendServiceClient>* client_cache() { return _backend_client_cache; }
@@ -163,6 +164,7 @@ private:
     // Leave protected so that subclasses can override
     ExternalScanContextMgr* _external_scan_context_mgr = nullptr;
     DataStreamMgr* _stream_mgr = nullptr;
+    doris::vectorized::VDataStreamMgr* _vstream_mgr = nullptr;
     ResultBufferMgr* _result_mgr = nullptr;
     ResultQueueMgr* _result_queue_mgr = nullptr;
     ClientCache<BackendServiceClient>* _backend_client_cache = nullptr;
diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp
index a172bb6437..35630d05a6 100644
--- a/be/src/runtime/exec_env_init.cpp
+++ b/be/src/runtime/exec_env_init.cpp
@@ -63,6 +63,7 @@
 #include "util/parse_util.h"
 #include "util/pretty_printer.h"
 #include "util/priority_thread_pool.hpp"
+#include "vec/runtime/vdata_stream_mgr.h"
 
 namespace doris {
 
@@ -85,6 +86,7 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths) {
     _store_paths = store_paths;
     _external_scan_context_mgr = new ExternalScanContextMgr(this);
     _stream_mgr = new DataStreamMgr();
+    _vstream_mgr = new doris::vectorized::VDataStreamMgr();
     _result_mgr = new ResultBufferMgr();
     _result_queue_mgr = new ResultQueueMgr();
     _backend_client_cache = new BackendServiceClientCache(config::max_client_cache_size_per_host);
diff --git a/be/src/runtime/fold_constant_executor.cpp b/be/src/runtime/fold_constant_executor.cpp
index 94d3d9e096..f093c04235 100644
--- a/be/src/runtime/fold_constant_executor.cpp
+++ b/be/src/runtime/fold_constant_executor.cpp
@@ -28,6 +28,9 @@
 #include "common/object_pool.h"
 #include "common/status.h"
 
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vexpr_context.h"
+
 #include "gen_cpp/internal_service.pb.h"
 #include "gen_cpp/PaloInternalService_types.h"
 
@@ -79,7 +82,7 @@ Status FoldConstantExecutor::fold_constant_expr(
                 expr_result.set_success(false);
             } else {
                 expr_result.set_success(true);
-                result = _get_result(src, ctx->root()->type().type);
+                result = _get_result(src, 0, ctx->root()->type().type);
             }
 
             expr_result.set_content(std::move(result));
@@ -96,6 +99,69 @@ Status FoldConstantExecutor::fold_constant_expr(
     return Status::OK();
 }
 
+Status FoldConstantExecutor::fold_constant_vexpr(
+        const TFoldConstantParams& params, PConstantExprResult* response) {
+    const auto& expr_map = params.expr_map;
+    auto expr_result_map = response->mutable_expr_result_map();
+
+    TQueryGlobals query_globals = params.query_globals;
+    // init
+    Status status = _init(query_globals);
+    if (UNLIKELY(!status.ok())) {
+        LOG(WARNING) << "Failed to init mem trackers, msg: " << status.get_error_msg();
+        return status;
+    }
+
+    for (const auto& m : expr_map) {
+        PExprResultMap pexpr_result_map;
+        for (const auto& n : m.second) {
+            vectorized::VExprContext* ctx = nullptr;
+            const TExpr& texpr = n.second;
+            // create expr tree from TExpr
+            RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(&_pool, texpr, &ctx));
+            // prepare and open context
+            status = _prepare_and_open(ctx);
+            if (UNLIKELY(!status.ok())) {
+                LOG(WARNING) << "Failed to init mem trackers, msg: " << status.get_error_msg();
+                return status;
+            }
+
+            vectorized::Block tmp_block;
+            tmp_block.insert({vectorized::ColumnUInt8::create(1),
+                    std::make_shared<vectorized::DataTypeUInt8>(), ""});
+            int result_column = -1;
+            // calc vexpr
+            RETURN_IF_ERROR(ctx->execute(&tmp_block, &result_column));
+            DCHECK(result_column != -1);
+            PrimitiveType root_type = ctx->root()->type().type;
+            // covert to thrift type
+            TPrimitiveType::type t_type = doris::to_thrift(root_type);
+
+            // collect result
+            PExprResult expr_result;
+            string result;
+            const auto& column_ptr = tmp_block.get_by_position(result_column).column;
+            if (column_ptr->is_null_at(0)) {
+                expr_result.set_success(false);
+            } else {
+                expr_result.set_success(true);
+                auto string_ref = column_ptr->get_data_at(0);
+                result = _get_result<true>((void*)string_ref.data, string_ref.size, ctx->root()->type().type);
+            }
+
+            expr_result.set_content(std::move(result));
+            expr_result.mutable_type()->set_type(t_type);
+            pexpr_result_map.mutable_map()->insert({n.first, expr_result});
+
+            // close context expr
+            ctx->close(_runtime_state.get());
+        }
+        expr_result_map->insert({m.first, pexpr_result_map});
+    }
+
+    return Status::OK();
+}
+
 Status FoldConstantExecutor::_init(const TQueryGlobals& query_globals) {
     // init runtime state, runtime profile
     TPlanFragmentExecParams params;
@@ -128,12 +194,14 @@ Status FoldConstantExecutor::_init(const TQueryGlobals& query_globals) {
     return Status::OK();
 }
 
-Status FoldConstantExecutor::_prepare_and_open(ExprContext* ctx) {
-    ctx->prepare(_runtime_state.get(), RowDescriptor(), _mem_tracker);
+template <typename Context>
+Status FoldConstantExecutor::_prepare_and_open(Context* ctx) {
+    RETURN_IF_ERROR(ctx->prepare(_runtime_state.get(), RowDescriptor(), _mem_tracker));
     return ctx->open(_runtime_state.get());
 }
 
-string FoldConstantExecutor::_get_result(void* src, PrimitiveType slot_type){
+template <bool is_vec>
+string FoldConstantExecutor::_get_result(void* src, size_t size, PrimitiveType slot_type){
     switch (slot_type) {
     case TYPE_BOOLEAN: {
         bool val = *reinterpret_cast<const bool*>(src);
@@ -172,14 +240,24 @@ string FoldConstantExecutor::_get_result(void* src, PrimitiveType slot_type){
     case TYPE_STRING:
     case TYPE_HLL:
     case TYPE_OBJECT: {
+        if constexpr (is_vec) {
+            return std::string((char*)src, size);
+        }
         return (reinterpret_cast<StringValue*>(src))->to_string();
     }
     case TYPE_DATE:
     case TYPE_DATETIME: {
-        const DateTimeValue date_value = *reinterpret_cast<DateTimeValue*>(src);
-        char str[MAX_DTVALUE_STR_LEN];
-        date_value.to_string(str);
-        return str;
+        if constexpr (is_vec) {
+            auto date_value = reinterpret_cast<vectorized::VecDateTimeValue*>(src);
+            char str[MAX_DTVALUE_STR_LEN];
+            date_value->to_string(str);
+            return str;
+        } else {
+            const DateTimeValue date_value = *reinterpret_cast<DateTimeValue *>(src);
+            char str[MAX_DTVALUE_STR_LEN];
+            date_value.to_string(str);
+            return str;
+        }
     }
     case TYPE_DECIMALV2: {
         return reinterpret_cast<DecimalV2Value*>(src)->to_string();
diff --git a/be/src/runtime/fold_constant_executor.h b/be/src/runtime/fold_constant_executor.h
index 93ee988836..84c52f781d 100644
--- a/be/src/runtime/fold_constant_executor.h
+++ b/be/src/runtime/fold_constant_executor.h
@@ -36,13 +36,18 @@ class FoldConstantExecutor {
 public:
     // fold constant expr
     Status fold_constant_expr(const TFoldConstantParams& params, PConstantExprResult* response);
+
+    // fold constant vexpr
+    Status fold_constant_vexpr(const TFoldConstantParams& params, PConstantExprResult* response);
 private:
     // init runtime_state and mem_tracker
     Status _init(const TQueryGlobals& query_globals);
     // prepare expr
-    Status _prepare_and_open(ExprContext* ctx);
+    template <typename Context>
+    Status _prepare_and_open(Context* ctx);
 
-    std::string _get_result(void* src, PrimitiveType slot_type);
+    template <bool is_vec = false>
+    std::string _get_result(void* src, size_t size, PrimitiveType slot_type);
 
     std::unique_ptr<RuntimeState> _runtime_state;
     std::shared_ptr<MemTracker> _mem_tracker;
diff --git a/be/src/runtime/mysql_result_writer.cpp b/be/src/runtime/mysql_result_writer.cpp
index 1c2589cc81..eaf1bd7763 100644
--- a/be/src/runtime/mysql_result_writer.cpp
+++ b/be/src/runtime/mysql_result_writer.cpp
@@ -29,6 +29,13 @@
 #include "util/mysql_row_buffer.h"
 #include "util/types.h"
 
+#include "vec/core/block.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/assert_cast.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vexpr_context.h"
+
 namespace doris {
 
 MysqlResultWriter::MysqlResultWriter(BufferControlBlock* sinker,
@@ -204,7 +211,6 @@ int MysqlResultWriter::_add_row_value(int index, const TypeDescriptor& type, voi
 }
 
 Status MysqlResultWriter::_add_one_row(TupleRow* row) {
-    SCOPED_TIMER(_convert_tuple_timer);
     _row_buffer->reset();
     int num_columns = _output_expr_ctxs.size();
     int buf_ret = 0;
diff --git a/be/src/runtime/mysql_result_writer.h b/be/src/runtime/mysql_result_writer.h
index 2754e0c868..8b96fec555 100644
--- a/be/src/runtime/mysql_result_writer.h
+++ b/be/src/runtime/mysql_result_writer.h
@@ -21,6 +21,8 @@
 #include "runtime/result_writer.h"
 #include "runtime/runtime_state.h"
 
+#include "vec/data_types/data_type.h"
+
 namespace doris {
 
 class TupleRow;
diff --git a/be/src/runtime/plan_fragment_executor.cpp b/be/src/runtime/plan_fragment_executor.cpp
index a464211fdb..fe64a4faf3 100644
--- a/be/src/runtime/plan_fragment_executor.cpp
+++ b/be/src/runtime/plan_fragment_executor.cpp
@@ -42,6 +42,10 @@
 #include "util/uid_util.h"
 #include "util/logging.h"
 
+#include "vec/core/block.h"
+#include "vec/exec/vexchange_node.h"
+#include "vec/runtime/vdata_stream_mgr.h"
+
 namespace doris {
 
 PlanFragmentExecutor::PlanFragmentExecutor(ExecEnv* exec_env,
@@ -159,6 +163,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request,
         int num_senders = find_with_default(params.per_exch_num_senders, exch_node->id(), 0);
         DCHECK_GT(num_senders, 0);
         if (_runtime_state->enable_vectorized_exec()) {
+            static_cast<doris::vectorized::VExchangeNode*>(exch_node)->set_num_senders(num_senders);
         } else {
             static_cast<ExchangeNode*>(exch_node)->set_num_senders(num_senders);
         }
@@ -214,6 +219,7 @@ Status PlanFragmentExecutor::prepare(const TExecPlanFragmentParams& request,
 
     _row_batch.reset(new RowBatch(_plan->row_desc(), _runtime_state->batch_size(),
                                   _runtime_state->instance_mem_tracker().get()));
+    _block.reset(new doris::vectorized::Block());
     // _row_batch->tuple_data_pool()->set_limits(*_runtime_state->mem_trackers());
     VLOG_NOTICE << "plan_root=\n" << _plan->debug_string();
     _prepared = true;
@@ -244,6 +250,7 @@ Status PlanFragmentExecutor::open() {
     }
     Status status = Status::OK();
     if (_runtime_state->enable_vectorized_exec()) {
+        status = open_vectorized_internal();
     } else {
         status = open_internal();
     }
@@ -259,6 +266,82 @@ Status PlanFragmentExecutor::open() {
     return status;
 }
 
+Status PlanFragmentExecutor::open_vectorized_internal() {
+    {
+        SCOPED_CPU_TIMER(_fragment_cpu_timer);
+        SCOPED_TIMER(profile()->total_time_counter());
+        RETURN_IF_ERROR(_plan->open(_runtime_state.get()));
+    }
+    if (_sink == nullptr) {
+        return Status::OK();
+    }
+    {
+        SCOPED_CPU_TIMER(_fragment_cpu_timer);
+        RETURN_IF_ERROR(_sink->open(runtime_state()));
+    }
+    doris::vectorized::Block* block = nullptr;
+    while (true) {
+        {
+            SCOPED_CPU_TIMER(_fragment_cpu_timer);
+            RETURN_IF_ERROR(get_vectorized_internal(&block));
+        }
+
+        if (block == NULL) {
+            break;
+        } 
+
+        SCOPED_TIMER(profile()->total_time_counter());
+        SCOPED_CPU_TIMER(_fragment_cpu_timer);
+        // Collect this plan and sub plan statistics, and send to parent plan.
+        if (_collect_query_statistics_with_every_batch) {
+            _collect_query_statistics();
+        }
+        RETURN_IF_ERROR(_sink->send(runtime_state(), block));
+    }
+    {
+        SCOPED_TIMER(profile()->total_time_counter());
+        _collect_query_statistics();
+        Status status;
+        {
+            std::lock_guard<std::mutex> l(_status_lock);
+            status = _status;
+        }
+        status = _sink->close(runtime_state(), status);
+        RETURN_IF_ERROR(status);
+    }
+    // Setting to NULL ensures that the d'tor won't double-close the sink.
+    _sink.reset(nullptr);
+    _done = true;
+
+    stop_report_thread();
+    send_report(true);
+
+    return Status::OK();
+}
+Status PlanFragmentExecutor::get_vectorized_internal(::doris::vectorized::Block** block) {
+    if (_done) {
+        *block = nullptr;
+        return Status::OK();
+    }
+
+    auto vexec_node = static_cast<doris::ExecNode*>(_plan);
+    while (!_done) {
+        _block->clear_column_data(vexec_node->row_desc().num_materialized_slots());
+        SCOPED_TIMER(profile()->total_time_counter());
+        RETURN_IF_ERROR(vexec_node->get_next(_runtime_state.get(), _block.get(), &_done));
+
+        if (_block->rows() > 0) {
+            COUNTER_UPDATE(_rows_produced_counter, _block->rows());
+            *block = _block.get();
+            break;
+        }
+
+        *block = nullptr;
+    }
+
+    return Status::OK();
+}
+
 Status PlanFragmentExecutor::open_internal() {
     {
         SCOPED_CPU_TIMER(_fragment_cpu_timer);
@@ -521,8 +604,14 @@ void PlanFragmentExecutor::cancel() {
                   .query_id(_query_id).instance_id(_runtime_state->fragment_instance_id());
     DCHECK(_prepared);
     _runtime_state->set_is_cancelled(true);
-    _runtime_state->exec_env()->stream_mgr()->cancel(_runtime_state->fragment_instance_id());
-    _runtime_state->exec_env()->result_mgr()->cancel(_runtime_state->fragment_instance_id());
+
+    // must close stream_mgr to avoid dead lock in Exchange Node
+    if (_runtime_state->enable_vectorized_exec()) {
+        _runtime_state->exec_env()->vstream_mgr()->cancel(_runtime_state->fragment_instance_id());
+    } else {
+        _runtime_state->exec_env()->stream_mgr()->cancel(_runtime_state->fragment_instance_id());
+        _runtime_state->exec_env()->result_mgr()->cancel(_runtime_state->fragment_instance_id());
+    }
 }
 
 void PlanFragmentExecutor::set_abort() {
diff --git a/be/src/runtime/plan_fragment_executor.h b/be/src/runtime/plan_fragment_executor.h
index 1fc4578240..3cdb6bb249 100644
--- a/be/src/runtime/plan_fragment_executor.h
+++ b/be/src/runtime/plan_fragment_executor.h
@@ -30,11 +30,11 @@
 #include "runtime/runtime_state.h"
 #include "util/hash_util.hpp"
 #include "util/time.h"
+#include "vec/core/block.h"
 
 namespace doris {
 
 class QueryFragmentsCtx;
-class HdfsFsCache;
 class ExecNode;
 class RowDescriptor;
 class RowBatch;
@@ -195,6 +195,7 @@ private:
     // Created in prepare (if required), owned by this object.
     std::unique_ptr<DataSink> _sink;
     std::unique_ptr<RowBatch> _row_batch;
+    std::unique_ptr<doris::vectorized::Block> _block;
 
     // Number of rows returned by this fragment
     RuntimeProfile::Counter* _rows_produced_counter;
@@ -234,9 +235,11 @@ private:
     // have been closed, a final report will have been sent and the report thread will
     // have been stopped. _sink will be set to nullptr after successful execution.
     Status open_internal();
+    Status open_vectorized_internal();
 
     // Executes get_next() logic and returns resulting status.
     Status get_next_internal(RowBatch** batch);
+    Status get_vectorized_internal(::doris::vectorized::Block** block);
 
     // Stops report thread, if one is running. Blocks until report thread terminates.
     // Idempotent.
diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h
index 36113a55cd..224957f0de 100644
--- a/be/src/runtime/primitive_type.h
+++ b/be/src/runtime/primitive_type.h
@@ -27,6 +27,13 @@
 #include "runtime/decimalv2_value.h"
 #include "runtime/large_int_value.h"
 #include "runtime/string_value.h"
+#include "udf/udf.h"
+
+#include "vec/columns/column_decimal.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/columns_number.h"
+#include "vec/core/types.h"
+#include "vec/runtime/vdatetime_value.h"
 
 namespace doris {
 
@@ -60,6 +67,51 @@ enum PrimitiveType {
     TYPE_STRING, /* 23 */
 };
 
+inline PrimitiveType convert_type_to_primitive(FunctionContext::Type type) {
+    switch (type) {
+    case FunctionContext::Type::INVALID_TYPE:
+        return PrimitiveType::INVALID_TYPE;
+    case FunctionContext::Type::TYPE_DOUBLE:
+        return PrimitiveType::TYPE_DOUBLE;
+    case FunctionContext::Type::TYPE_NULL:
+        return PrimitiveType::TYPE_NULL;
+    case FunctionContext::Type::TYPE_CHAR:
+        return PrimitiveType::TYPE_CHAR;
+    case FunctionContext::Type::TYPE_VARCHAR:
+        return PrimitiveType::TYPE_VARCHAR;
+    case FunctionContext::Type::TYPE_STRING:
+        return PrimitiveType::TYPE_STRING;
+    case FunctionContext::Type::TYPE_DATETIME:
+        return PrimitiveType::TYPE_DATETIME;
+    case FunctionContext::Type::TYPE_DECIMALV2:
+        return PrimitiveType::TYPE_DECIMALV2;
+    case FunctionContext::Type::TYPE_BOOLEAN:
+        return PrimitiveType::TYPE_BOOLEAN;
+    case FunctionContext::Type::TYPE_ARRAY:
+        return PrimitiveType::TYPE_ARRAY;
+    case FunctionContext::Type::TYPE_OBJECT:
+        return PrimitiveType::TYPE_OBJECT;
+    case FunctionContext::Type::TYPE_HLL:
+        return PrimitiveType::TYPE_HLL;
+    case FunctionContext::Type::TYPE_TINYINT:
+        return PrimitiveType::TYPE_TINYINT;
+    case FunctionContext::Type::TYPE_SMALLINT:
+        return PrimitiveType::TYPE_SMALLINT;
+    case FunctionContext::Type::TYPE_INT:
+        return PrimitiveType::TYPE_INT;
+    case FunctionContext::Type::TYPE_BIGINT:
+        return PrimitiveType::TYPE_BIGINT;
+    case FunctionContext::Type::TYPE_LARGEINT:
+        return PrimitiveType::TYPE_LARGEINT;
+    case FunctionContext::Type::TYPE_DATE:
+        return PrimitiveType::TYPE_DATE;
+    default:
+        DCHECK(false);
+    }
+
+    return PrimitiveType::INVALID_TYPE;
+}
+
 inline bool is_enumeration_type(PrimitiveType type) {
     switch (type) {
     case TYPE_FLOAT:
@@ -98,6 +150,10 @@ inline bool is_string_type(PrimitiveType type) {
     return type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_STRING;
 }
 
+inline bool has_variable_type(PrimitiveType type) {
+    return type == TYPE_CHAR || type == TYPE_VARCHAR || type == TYPE_OBJECT || type == TYPE_STRING;
+}
+
 // Returns the byte size of 'type'  Returns 0 for variable length types.
 inline int get_byte_size(PrimitiveType type) {
     switch (type) {
@@ -125,9 +181,9 @@ inline int get_byte_size(PrimitiveType type) {
     case TYPE_DOUBLE:
         return 8;
 
-    case TYPE_LARGEINT:
     case TYPE_DATETIME:
     case TYPE_DATE:
+    case TYPE_LARGEINT:
     case TYPE_DECIMALV2:
         return 16;
 
@@ -220,63 +276,78 @@ struct PrimitiveTypeTraits {};
 template <>
 struct PrimitiveTypeTraits<TYPE_BOOLEAN> {
     using CppType = bool;
+    using ColumnType = vectorized::ColumnUInt8;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_TINYINT> {
     using CppType = int8_t;
+    using ColumnType = vectorized::ColumnInt8;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_SMALLINT> {
     using CppType = int16_t;
+    using ColumnType = vectorized::ColumnInt16;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_INT> {
     using CppType = int32_t;
+    using ColumnType = vectorized::ColumnInt32;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_BIGINT> {
     using CppType = int64_t;
+    using ColumnType = vectorized::ColumnInt64;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_FLOAT> {
     using CppType = float;
+    using ColumnType = vectorized::ColumnFloat32;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_TIME> {
     using CppType = double;
+    using ColumnType = vectorized::ColumnFloat64;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_DOUBLE> {
     using CppType = double;
+    using ColumnType = vectorized::ColumnFloat64;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_DATE> {
-    using CppType = DateTimeValue;
+    using CppType = doris::DateTimeValue;
+    using ColumnType = vectorized::ColumnVector<vectorized::DateTime>;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_DATETIME> {
-    using CppType = DateTimeValue;
+    using CppType = doris::DateTimeValue;
+    using ColumnType = vectorized::ColumnVector<vectorized::DateTime>;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_DECIMALV2> {
     using CppType = DecimalV2Value;
+    using ColumnType = vectorized::ColumnDecimal<vectorized::Decimal128>;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_LARGEINT> {
     using CppType = __int128_t;
+    using ColumnType = vectorized::ColumnInt128;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_CHAR> {
     using CppType = StringValue;
+    using ColumnType = vectorized::ColumnString;
 };
 template <>
 struct PrimitiveTypeTraits<TYPE_VARCHAR> {
     using CppType = StringValue;
+    using ColumnType = vectorized::ColumnString;
 };
 
 template <>
 struct PrimitiveTypeTraits<TYPE_STRING> {
     using CppType = StringValue;
+    using ColumnType = vectorized::ColumnString;
 };
 
 } // namespace doris
diff --git a/be/src/runtime/raw_value.h b/be/src/runtime/raw_value.h
index e9e270e85b..e16bbf5e13 100644
--- a/be/src/runtime/raw_value.h
+++ b/be/src/runtime/raw_value.h
@@ -87,6 +87,9 @@ public:
     // TODO: fix get_hash_value
     static uint32_t zlib_crc32(const void* value, const TypeDescriptor& type, uint32_t seed);
 
+    // Same as the up function, only use in vec exec engine.
+    static uint32_t zlib_crc32(const void* value, size_t len, const TypeDescriptor& type, uint32_t seed);
+
     // Compares both values.
     // Return value is < 0  if v1 < v2, 0 if v1 == v2, > 0 if v1 > v2.
     static int compare(const void* v1, const void* v2, const TypeDescriptor& type);
@@ -399,6 +402,59 @@ inline uint32_t RawValue::zlib_crc32(const void* v, const TypeDescriptor& type,
     }
 }
 
+// NOTE: this is just for split data, decimal use old doris hash function
+// Because crc32 hardware is not equal with zlib crc32
+inline uint32_t RawValue::zlib_crc32(const void* v, size_t len, const TypeDescriptor& type, uint32_t seed) {
+    // Hash_combine with v = 0
+    if (v == nullptr) {
+        uint32_t value = 0x9e3779b9;
+        return seed ^ (value + (seed << 6) + (seed >> 2));
+    }
+
+    switch (type.type) {
+    case TYPE_VARCHAR:
+    case TYPE_HLL:
+    case TYPE_STRING:
+    case TYPE_CHAR: {
+        return HashUtil::zlib_crc_hash(v, len, seed);
+    }
+
+    case TYPE_BOOLEAN:
+    case TYPE_TINYINT:
+        return HashUtil::zlib_crc_hash(v, 1, seed);
+    case TYPE_SMALLINT:
+        return HashUtil::zlib_crc_hash(v, 2, seed);
+    case TYPE_INT:
+        return HashUtil::zlib_crc_hash(v, 4, seed);
+    case TYPE_BIGINT:
+        return HashUtil::zlib_crc_hash(v, 8, seed);
+    case TYPE_LARGEINT:
+        return HashUtil::zlib_crc_hash(v, 16, seed);
+    case TYPE_FLOAT:
+        return HashUtil::zlib_crc_hash(v, 4, seed);
+    case TYPE_DOUBLE:
+        return HashUtil::zlib_crc_hash(v, 8, seed);
+    case TYPE_DATE:
+    case TYPE_DATETIME: {
+        auto* date_val = (const vectorized::VecDateTimeValue*)v;
+        char buf[64];
+        int len = date_val->to_buffer(buf);
+        return HashUtil::zlib_crc_hash(buf, len, seed);
+    }
+
+    case TYPE_DECIMALV2: {
+        const DecimalV2Value* dec_val = (const DecimalV2Value*)v;
+        int64_t int_val = dec_val->int_value();
+        int32_t frac_val = dec_val->frac_value();
+        seed = HashUtil::zlib_crc_hash(&int_val, sizeof(int_val), seed);
+        return HashUtil::zlib_crc_hash(&frac_val, sizeof(frac_val), seed);
+    }
+    default:
+        DCHECK(false) << "invalid type: " << type;
+        return 0;
+    }
+}
+
 } // namespace doris
 
 #endif
diff --git a/be/src/runtime/result_sink.cpp b/be/src/runtime/result_sink.cpp
index 206b0f0c77..60538caee4 100644
--- a/be/src/runtime/result_sink.cpp
+++ b/be/src/runtime/result_sink.cpp
@@ -29,6 +29,8 @@
 #include "runtime/runtime_state.h"
 #include "util/uid_util.h"
 
+#include "vec/exprs/vexpr.h"
+
 namespace doris {
 
 ResultSink::ResultSink(const RowDescriptor& row_desc, const std::vector<TExpr>& t_output_expr,
diff --git a/be/src/runtime/row_batch.cpp b/be/src/runtime/row_batch.cpp
index 85ba3c778f..3e7914e2f1 100644
--- a/be/src/runtime/row_batch.cpp
+++ b/be/src/runtime/row_batch.cpp
@@ -30,8 +30,8 @@
 #include "runtime/string_value.h"
 #include "runtime/tuple_row.h"
 
-//#include "vec/columns/column_vector.h"
-//#include "vec/core/block.h"
+#include "vec/columns/column_vector.h"
+#include "vec/core/block.h"
 
 using std::vector;
 
@@ -625,6 +625,57 @@ void RowBatch::transfer_resource_ownership(RowBatch* dest) {
     reset();
 }
 
+vectorized::Block RowBatch::convert_to_vec_block() const {
+    std::vector<vectorized::MutableColumnPtr> columns;
+    for (const auto tuple_desc : _row_desc.tuple_descriptors()) {
+        for (const auto slot_desc : tuple_desc->slots()) {
+            columns.emplace_back(slot_desc->get_empty_mutable_column());
+        }
+    }
+
+    std::vector<SlotDescriptor*> slot_descs;
+    std::vector<int> tuple_idx;
+    int column_numbers = 0;
+    for (int i = 0; i < _row_desc.tuple_descriptors().size(); ++i) {
+        auto tuple_desc = _row_desc.tuple_descriptors()[i];
+        for (int j = 0; j < tuple_desc->slots().size(); ++j) {
+            slot_descs.push_back(tuple_desc->slots()[j]);
+            tuple_idx.push_back(i);
+        }
+        column_numbers += tuple_desc->slots().size();
+    }
+    for (int i = 0; i < column_numbers; ++i) {
+        auto slot_desc = slot_descs[i];
+        for (int j = 0; j < _num_rows; ++j) {
+            TupleRow* src_row = get_row(j);
+            auto tuple = src_row->get_tuple(tuple_idx[i]);
+            if (slot_desc->is_nullable() && tuple->is_null(slot_desc->null_indicator_offset())) {
+                columns[i]->insert_data(nullptr, 0);
+            } else if (slot_desc->type().is_string_type()) {
+                auto string_value =
+                        static_cast<const StringValue*>(tuple->get_slot(slot_desc->tuple_offset()));
+                columns[i]->insert_data(string_value->ptr, string_value->len);
+            } else {
+                columns[i]->insert_data(
+                        static_cast<const char*>(tuple->get_slot(slot_desc->tuple_offset())),
+                        slot_desc->slot_size());
+            }
+        }
+    }
+
+    doris::vectorized::ColumnsWithTypeAndName columns_with_type_and_name;
+    auto n_columns = 0;
+    for (const auto tuple_desc : _row_desc.tuple_descriptors()) {
+        for (const auto slot_desc : tuple_desc->slots()) {
+            columns_with_type_and_name.emplace_back(columns[n_columns++]->get_ptr(),
+                                                    slot_desc->get_data_type_ptr(),
+                                                    slot_desc->col_name());
+        }
+    }
+
+    return {columns_with_type_and_name};
+}
+
 size_t RowBatch::get_batch_size(const TRowBatch& batch) {
     size_t result = batch.tuple_data.size();
     result += batch.row_tuples.size() * sizeof(TTupleId);
@@ -688,6 +739,7 @@ void RowBatch::deep_copy_to(RowBatch* dst) {
     }
     dst->commit_rows(_num_rows);
 }
+
 // TODO: consider computing size of batches as they are built up
 size_t RowBatch::total_byte_size() const {
     size_t result = 0;
diff --git a/be/src/runtime/row_batch.h b/be/src/runtime/row_batch.h
index 596fa7034c..4333c73618 100644
--- a/be/src/runtime/row_batch.h
+++ b/be/src/runtime/row_batch.h
@@ -43,6 +43,7 @@ class TupleRow;
 class TupleDescriptor;
 class PRowBatch;
 
+
 // A RowBatch encapsulates a batch of rows, each composed of a number of tuples.
 // The maximum number of rows is fixed at the time of construction, and the caller
 // can add rows up to that capacity.
@@ -315,7 +316,7 @@ public:
     // we firstly update dest resource, and then reset current resource
     void transfer_resource_ownership(RowBatch* dest);
 
-    void copy_row(const TupleRow* src, TupleRow* dest) const {
+    void copy_row(TupleRow* src, TupleRow* dest) {
         memcpy(dest, src, _num_tuples_per_row * sizeof(Tuple*));
     }
 
@@ -362,6 +363,8 @@ public:
     static size_t get_batch_size(const TRowBatch& batch);
     static size_t get_batch_size(const PRowBatch& batch);
 
+    vectorized::Block convert_to_vec_block() const;
+
     int num_rows() const { return _num_rows; }
     int capacity() const { return _capacity; }
 
diff --git a/be/src/runtime/string_value.hpp b/be/src/runtime/string_value.hpp
index dd29bff24c..c44115d257 100644
--- a/be/src/runtime/string_value.hpp
+++ b/be/src/runtime/string_value.hpp
@@ -42,22 +42,19 @@ static inline int string_compare(const char* s1, int64_t n1, const char* s2, int
                                  int64_t len) {
     DCHECK_EQ(len, std::min(n1, n2));
 #ifdef __SSE4_2__
-    if (CpuInfo::is_supported(CpuInfo::SSE4_2)) {
-        while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) {
-            __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
-            __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
-            int chars_match =
-                    _mm_cmpestri(xmm0, sse_util::CHARS_PER_128_BIT_REGISTER, xmm1,
-                                 sse_util::CHARS_PER_128_BIT_REGISTER, sse_util::STRCMP_MODE);
-            if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) {
-                return (unsigned char)s1[chars_match] - (unsigned char)s2[chars_match];
-            }
-            len -= sse_util::CHARS_PER_128_BIT_REGISTER;
-            s1 += sse_util::CHARS_PER_128_BIT_REGISTER;
-            s2 += sse_util::CHARS_PER_128_BIT_REGISTER;
+    while (len >= sse_util::CHARS_PER_128_BIT_REGISTER) {
+        __m128i xmm0 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s1));
+        __m128i xmm1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(s2));
+        int chars_match =
+                _mm_cmpestri(xmm0, sse_util::CHARS_PER_128_BIT_REGISTER, xmm1,
+                             sse_util::CHARS_PER_128_BIT_REGISTER, sse_util::STRCMP_MODE);
+        if (chars_match != sse_util::CHARS_PER_128_BIT_REGISTER) {
+            return (unsigned char)s1[chars_match] - (unsigned char)s2[chars_match];
         }
+        len -= sse_util::CHARS_PER_128_BIT_REGISTER;
+        s1 += sse_util::CHARS_PER_128_BIT_REGISTER;
+        s2 += sse_util::CHARS_PER_128_BIT_REGISTER;
     }
-
 #endif
     unsigned char u1, u2;
     while (len-- > 0) {
diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h
index e40bc5b0a1..c2adca90fb 100644
--- a/be/src/runtime/types.h
+++ b/be/src/runtime/types.h
@@ -28,6 +28,12 @@
 #include "runtime/collection_value.h"
 #include "runtime/primitive_type.h"
 #include "thrift/protocol/TDebugProtocol.h"
+#include "vec/data_types/data_type_bitmap.h"
+#include "vec/data_types/data_type_date.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
 
 namespace doris {
 
@@ -272,6 +278,58 @@ struct TypeDescriptor {
         return -1;
     }
 
+    inline doris::vectorized::DataTypePtr get_data_type_ptr() const {
+        switch (type) {
+        case TYPE_BOOLEAN:
+            return std::make_shared<vectorized::DataTypeUInt8>();
+
+        case TYPE_TINYINT:
+            return std::make_shared<vectorized::DataTypeInt8>();
+
+        case TYPE_SMALLINT:
+            return std::make_shared<vectorized::DataTypeInt16>();
+
+        case TYPE_INT:
+            return std::make_shared<vectorized::DataTypeInt32>();
+
+        case TYPE_FLOAT:
+            return std::make_shared<vectorized::DataTypeFloat32>();
+
+        case TYPE_BIGINT:
+            return std::make_shared<vectorized::DataTypeInt64>();
+
+        case TYPE_LARGEINT:
+            return std::make_shared<vectorized::DataTypeInt128>();
+        case TYPE_DATE:
+            return std::make_shared<vectorized::DataTypeDate>();
+        case TYPE_DATETIME:
+            return std::make_shared<vectorized::DataTypeDateTime>();
+        case TYPE_TIME:
+        case TYPE_DOUBLE:
+            return std::make_shared<vectorized::DataTypeFloat64>();
+
+        case TYPE_STRING:
+        case TYPE_CHAR:
+        case TYPE_VARCHAR:
+        case TYPE_HLL:
+            return std::make_shared<vectorized::DataTypeString>();
+        case TYPE_OBJECT:
+            return std::make_shared<vectorized::DataTypeBitMap>();
+
+        case TYPE_DECIMALV2:
+            return std::make_shared<vectorized::DataTypeDecimal<vectorized::Decimal128>>(27, 9);
+        // Just Mock A NULL Type in Vec Exec Engine
+        case TYPE_NULL:
+            return std::make_shared<vectorized::DataTypeUInt8>();
+
+        case INVALID_TYPE:
+        default:
+            DCHECK(false);
+        }
+        // For llvm complain
+        return nullptr;
+    }
+
     static inline int get_decimal_byte_size(int precision) {
         DCHECK_GT(precision, 0);
         if (precision <= MAX_DECIMAL4_PRECISION) {
diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp
index ee6cf7e87a..4106963721 100644
--- a/be/src/service/internal_service.cpp
+++ b/be/src/service/internal_service.cpp
@@ -36,6 +36,7 @@
 #include "util/string_util.h"
 #include "util/thrift_util.h"
 #include "util/uid_util.h"
+#include "vec/runtime/vdata_stream_mgr.h"
 
 namespace doris {
 
@@ -422,7 +423,10 @@ Status PInternalServiceImpl<T>::_fold_constant_expr(const std::string& ser_reque
         uint32_t len = ser_request.size();
         RETURN_IF_ERROR(deserialize_thrift_msg(buf, &len, false, &t_request));
     }
-    return FoldConstantExecutor().fold_constant_expr(t_request, response);
+    if (!t_request.__isset.vec_exec || !t_request.vec_exec)
+        return FoldConstantExecutor().fold_constant_expr(t_request, response);
+
+    return FoldConstantExecutor().fold_constant_vexpr(t_request, response);
 }
 
 template <typename T>
@@ -432,6 +436,7 @@ void PInternalServiceImpl<T>::transmit_block(google::protobuf::RpcController* cn
                                              google::protobuf::Closure* done) {
     VLOG_ROW << "transmit data: fragment_instance_id=" << print_id(request->finst_id())
              << " node=" << request->node_id();
+    _exec_env->vstream_mgr()->transmit_block(request, &done);
     if (done != nullptr) {
         done->Run();
     }
diff --git a/be/src/udf/udf.cpp b/be/src/udf/udf.cpp
index 0360f3945c..269bd8891e 100644
--- a/be/src/udf/udf.cpp
+++ b/be/src/udf/udf.cpp
@@ -131,6 +131,10 @@ void FunctionContextImpl::set_constant_args(const std::vector<doris_udf::AnyVal*
     _constant_args = constant_args;
 }
 
+void FunctionContextImpl::set_constant_cols(const std::vector<doris::ColumnPtrWrapper*>& constant_cols) {
+    _constant_cols = constant_cols;
+}
+
 bool FunctionContextImpl::check_allocations_empty() {
     if (_allocations.empty() && _external_bytes_tracked == 0) {
         return true;
@@ -187,6 +191,7 @@ FunctionContext* FunctionContextImpl::clone(MemPool* pool) {
             create_context(_state, pool, _intermediate_type, _return_type, _arg_types,
                            _varargs_buffer_size, _debug);
     new_context->_impl->_constant_args = _constant_args;
+    new_context->_impl->_constant_cols = _constant_cols;
     new_context->_impl->_fragment_local_fn_state = _fragment_local_fn_state;
     return new_context;
 }
diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h
index 3b9ff5a76d..141219afec 100644
--- a/be/src/udf/udf.h
+++ b/be/src/udf/udf.h
@@ -28,6 +28,7 @@
 // object serves as the interface object between the UDF/UDA and the doris process.
 namespace doris {
 class FunctionContextImpl;
+class ColumnPtrWrapper;
 struct StringValue;
 struct BitmapValue;
 struct DecimalV2Value;
@@ -224,12 +225,16 @@ public:
     // FunctionContext* argument) is a constant (e.g. 5, "string", 1 + 1).
     bool is_arg_constant(int arg_idx) const;
 
+    bool is_col_constant(int arg_idx) const;
+
     // Returns a pointer to the value of the arg_idx-th input argument (0 indexed, not
     // including the FunctionContext* argument). Returns nullptr if the argument is not
     // constant. This function can be used to obtain user-specified constants in a UDF's
     // Init() or Close() functions.
     AnyVal* get_constant_arg(int arg_idx) const;
 
+    doris::ColumnPtrWrapper* get_constant_col(int arg_idx) const;
+
     // Create a test FunctionContext object. The caller is responsible for calling delete
     // on it. This context has additional debugging validation enabled.
     static FunctionContext* create_test_context();
diff --git a/be/src/udf/udf_internal.h b/be/src/udf/udf_internal.h
index 2d2c318ed4..085002d8ec 100644
--- a/be/src/udf/udf_internal.h
+++ b/be/src/udf/udf_internal.h
@@ -32,6 +32,7 @@ namespace doris {
 class FreePool;
 class MemPool;
 class RuntimeState;
+class ColumnPtrWrapper;
 
 // This class actually implements the interface of FunctionContext. This is split to
 // hide the details from the external header.
@@ -67,6 +68,8 @@ public:
 
     void set_constant_args(const std::vector<doris_udf::AnyVal*>& constant_args);
 
+    void set_constant_cols(const std::vector<doris::ColumnPtrWrapper*>& cols);
+
     uint8_t* varargs_buffer() { return _varargs_buffer; }
 
     std::vector<doris_udf::AnyVal*>* staging_input_vals() { return &_staging_input_vals; }
@@ -169,6 +172,8 @@ private:
     // value of the argument.
     std::vector<doris_udf::AnyVal*> _constant_args;
 
+    std::vector<doris::ColumnPtrWrapper*> _constant_cols;
+
     // Used by ScalarFnCall to store the arguments when running without codegen. Allows us
     // to pass AnyVal* arguments to the scalar function directly, rather than codegening a
     // call that passes the correct AnyVal subclass pointer type.
diff --git a/be/src/udf/udf_ir.cpp b/be/src/udf/udf_ir.cpp
index 4eefd175a1..a1ceb02a3c 100644
--- a/be/src/udf/udf_ir.cpp
+++ b/be/src/udf/udf_ir.cpp
@@ -25,6 +25,13 @@ bool FunctionContext::is_arg_constant(int i) const {
     return _impl->_constant_args[i] != nullptr;
 }
 
+bool FunctionContext::is_col_constant(int i) const {
+    if (i < 0 || i >= _impl->_constant_cols.size()) {
+        return false;
+    }
+    return _impl->_constant_cols[i] != nullptr;
+}
+
 AnyVal* FunctionContext::get_constant_arg(int i) const {
     if (i < 0 || i >= _impl->_constant_args.size()) {
         return nullptr;
@@ -32,6 +39,13 @@ AnyVal* FunctionContext::get_constant_arg(int i) const {
     return _impl->_constant_args[i];
 }
 
+doris::ColumnPtrWrapper* FunctionContext::get_constant_col(int i) const {
+    if (i < 0 || i >= _impl->_constant_cols.size()) {
+        return nullptr;
+    }
+    return _impl->_constant_cols[i];
+}
+
 int FunctionContext::get_num_args() const {
     return _impl->_arg_types.size();
 }
diff --git a/be/src/util/binary_cast.hpp b/be/src/util/binary_cast.hpp
index d528d71594..764f5b5f19 100644
--- a/be/src/util/binary_cast.hpp
+++ b/be/src/util/binary_cast.hpp
@@ -23,7 +23,7 @@
 #include "runtime/datetime_value.h"
 #include "runtime/decimalv2_value.h"
 #include "util/types.h"
-
+#include "vec/runtime/vdatetime_value.h"
 namespace doris {
 union TypeConverter {
     uint64_t u64;
@@ -56,6 +56,11 @@ union DateTimeInt128Union {
     ~DateTimeInt128Union() {}
 };
 
+union VecDateTimeInt64Union {
+    doris::vectorized::VecDateTimeValue dt;
+    __int64_t i64;
+    ~VecDateTimeInt64Union() {}
+};
 // similar to reinterpret_cast but won't break strict-aliasing rules
 template <typename From, typename To>
 To binary_cast(From from) {
@@ -66,11 +71,13 @@ To binary_cast(From from) {
     constexpr bool from_decv2_to_packed128 = match_v<From, DecimalV2Value, To, PackedInt128>;
     constexpr bool from_i128_to_dt = match_v<From, __int128_t, To, DateTimeValue>;
     constexpr bool from_dt_to_i128 = match_v<From, DateTimeValue, To, __int128_t>;
+    constexpr bool from_i64_to_vec_dt = match_v<From, __int64_t, To, doris::vectorized::VecDateTimeValue>;
+    constexpr bool from_vec_dt_to_i64 = match_v<From, doris::vectorized::VecDateTimeValue, To, __int64_t>;
     constexpr bool from_i128_to_decv2 = match_v<From, __int128_t, To, DecimalV2Value>;
     constexpr bool from_decv2_to_i128 = match_v<From, DecimalV2Value, To, __int128_t>;
 
     static_assert(from_u64_to_db || from_i64_to_db || from_db_to_i64 || from_db_to_u64 ||
-                  from_decv2_to_packed128 || from_i128_to_dt || from_dt_to_i128 ||
+                  from_decv2_to_packed128 || from_i128_to_dt || from_dt_to_i128 || from_i64_to_vec_dt || from_vec_dt_to_i64 ||
                   from_i128_to_decv2 || from_decv2_to_i128);
 
     if constexpr (from_u64_to_db) {
@@ -99,6 +106,12 @@ To binary_cast(From from) {
     } else if constexpr (from_dt_to_i128) {
         DateTimeInt128Union conv = {.dt = from};
         return conv.i128;
+    } else if constexpr (from_i64_to_vec_dt) {
+        VecDateTimeInt64Union conv = {.i64 = from};
+        return conv.dt;
+    } else if constexpr (from_vec_dt_to_i64) {
+        VecDateTimeInt64Union conv = {.dt = from};
+        return conv.i64;
     } else if constexpr (from_i128_to_decv2) {
         DecimalInt128Union conv;
         conv.i128 = from;
diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h
index 81fb10bbd7..bdb124b31a 100644
--- a/be/src/util/bitmap_value.h
+++ b/be/src/util/bitmap_value.h
@@ -1691,6 +1691,12 @@ public:
         }
         return count;
     }
+    
+    void clear() {
+        _type = EMPTY;
+        _bitmap.clear();
+        _sv = 0;
+    }
 
     // Implement an iterator for convenience
     friend class BitmapValueIterator;
diff --git a/be/src/util/brpc_stub_cache.h b/be/src/util/brpc_stub_cache.h
index 53ee3b7a99..21800f3588 100644
--- a/be/src/util/brpc_stub_cache.h
+++ b/be/src/util/brpc_stub_cache.h
@@ -47,7 +47,7 @@ namespace doris {
 class BrpcStubCache {
 public:
     BrpcStubCache();
-    ~BrpcStubCache();
+    virtual ~BrpcStubCache();
 
     inline std::shared_ptr<PBackendService_Stub> get_stub(const butil::EndPoint& endpoint) {
         auto stub_ptr = _stub_map.find(endpoint);
@@ -66,7 +66,7 @@ public:
         return stub;
     }
 
-    inline std::shared_ptr<PBackendService_Stub> get_stub(const TNetworkAddress& taddr) {
+    virtual std::shared_ptr<PBackendService_Stub> get_stub(const TNetworkAddress& taddr) {
         butil::EndPoint endpoint;
         if (str2endpoint(taddr.hostname.c_str(), taddr.port, &endpoint)) {
             LOG(WARNING) << "unknown endpoint, hostname=" << taddr.hostname
diff --git a/be/src/util/runtime_profile.h b/be/src/util/runtime_profile.h
index f2632d5a2c..1e9366fd03 100644
--- a/be/src/util/runtime_profile.h
+++ b/be/src/util/runtime_profile.h
@@ -52,8 +52,9 @@ namespace doris {
     ScopedTimer<ThreadCpuStopWatch> MACRO_CONCAT(SCOPED_TIMER, __COUNTER__)(c)
 #define CANCEL_SAFE_SCOPED_TIMER(c, is_cancelled) \
     ScopedTimer<MonotonicStopWatch> MACRO_CONCAT(SCOPED_TIMER, __COUNTER__)(c, is_cancelled)
-#define SCOPED_RAW_TIMER(c) \
-    ScopedRawTimer<MonotonicStopWatch, int64_t> MACRO_CONCAT(SCOPED_RAW_TIMER, __COUNTER__)(c)
+#define SCOPED_RAW_TIMER(c)                                                                  \
+    doris::ScopedRawTimer<doris::MonotonicStopWatch, int64_t> MACRO_CONCAT(SCOPED_RAW_TIMER, \
+                                                                           __COUNTER__)(c)
 #define SCOPED_ATOMIC_TIMER(c)                                                                 \
     ScopedRawTimer<MonotonicStopWatch, std::atomic<int64_t>> MACRO_CONCAT(SCOPED_ATOMIC_TIMER, \
                                                                           __COUNTER__)(c)
diff --git a/be/src/util/static_asserts.cpp b/be/src/util/static_asserts.cpp
index eb50636e32..43833ad121 100644
--- a/be/src/util/static_asserts.cpp
+++ b/be/src/util/static_asserts.cpp
@@ -17,7 +17,7 @@
 
 #include "runtime/datetime_value.h"
 #include "runtime/string_value.h"
-
+#include "vec/runtime/vdatetime_value.h"
 namespace doris {
 // This class is unused.  It contains static (compile time) asserts.
 // This is useful to validate struct sizes and other similar things
@@ -28,6 +28,7 @@ private:
     static_assert(offsetof(StringValue, len) == 8);
     // Datetime value
     static_assert(sizeof(DateTimeValue) == 16);
+    static_assert(sizeof(doris::vectorized::VecDateTimeValue) == 8);
     // static_assert(offsetof(DateTimeValue, _year) == 8);
 };
 
diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp
index 0354343ed7..cc1110c7a2 100644
--- a/be/src/util/string_parser.hpp
+++ b/be/src/util/string_parser.hpp
@@ -572,6 +572,26 @@ T StringParser::numeric_limits(bool negative) {
     return negative ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
 }
 
+template<>
+inline int StringParser::StringParseTraits<uint8_t>::max_ascii_len() {
+    return 3;
+}
+
+template<>
+inline int StringParser::StringParseTraits<uint16_t>::max_ascii_len() {
+    return 5;
+}
+
+template<>
+inline int StringParser::StringParseTraits<uint32_t>::max_ascii_len() {
+    return 10;
+}
+
+template<>
+inline int StringParser::StringParseTraits<uint64_t>::max_ascii_len() {
+    return 20;
+}
+
 template<>
 inline int StringParser::StringParseTraits<int8_t>::max_ascii_len() {
     return 3;
diff --git a/be/src/util/url_coding.cpp b/be/src/util/url_coding.cpp
index 862c34ccd7..3c41114fc6 100644
--- a/be/src/util/url_coding.cpp
+++ b/be/src/util/url_coding.cpp
@@ -194,6 +194,10 @@ int64_t base64_decode(const char* data, size_t length, char* decoded_data) {
 
     // run through the whole string, converting as we go
     while ((ch = *current++) != '\0' && length-- > 0) {
+        if (ch >= 256 || ch < 0) {
+            return -1;
+        }
+
         if (ch == base64_pad) {
             if (*current != '=' && (i % 4) == 1) {
                 return -1;
diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt
new file mode 100644
index 0000000000..f73739122d
--- /dev/null
+++ b/be/src/vec/CMakeLists.txt
@@ -0,0 +1,159 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# where to put generated libraries
+set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/vec")
+# where to put generated binaries
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/vec")
+
+set(VEC_FILES
+  aggregate_functions/aggregate_function_avg.cpp
+  aggregate_functions/aggregate_function_count.cpp
+  aggregate_functions/aggregate_function_distinct.cpp
+  aggregate_functions/aggregate_function_sum.cpp
+  aggregate_functions/aggregate_function_min_max.cpp
+  aggregate_functions/aggregate_function_null.cpp
+  aggregate_functions/aggregate_function_uniq.cpp
+  aggregate_functions/aggregate_function_hll_union_agg.cpp
+  aggregate_functions/aggregate_function_bitmap.cpp
+  aggregate_functions/aggregate_function_reader.cpp
+  aggregate_functions/aggregate_function_window.cpp
+  aggregate_functions/aggregate_function_stddev.cpp
+  aggregate_functions/aggregate_function_simple_factory.cpp
+  columns/collator.cpp
+  columns/column.cpp
+  columns/column_const.cpp
+  columns/column_decimal.cpp
+  columns/column_nullable.cpp
+  columns/column_string.cpp
+  columns/column_vector.cpp
+  columns/columns_common.cpp
+  common/demangle.cpp
+  common/exception.cpp
+  common/pod_array.cpp
+  common/string_utils/string_utils.cpp
+  core/block.cpp
+  core/block_info.cpp
+  core/column_with_type_and_name.cpp
+  core/field.cpp
+  core/field.cpp
+  core/sort_block.cpp
+  core/materialize_block.cpp
+  data_types/data_type.cpp
+  data_types/data_type_bitmap.cpp
+  data_types/data_type_nothing.cpp
+  data_types/data_type_nothing.cpp
+  data_types/data_type_nullable.cpp
+  data_types/data_type_nullable.cpp
+  data_types/data_type_number_base.cpp
+  data_types/data_type_string.cpp
+  data_types/data_type_decimal.cpp
+  data_types/get_least_supertype.cpp
+  data_types/nested_utils.cpp
+  data_types/data_type_date.cpp
+  data_types/data_type_date_time.cpp
+  exec/vaggregation_node.cpp
+  exec/ves_http_scan_node.cpp
+  exec/ves_http_scanner.cpp
+  exec/volap_scan_node.cpp
+  exec/vsort_node.cpp
+  exec/vsort_exec_exprs.cpp
+  exec/volap_scanner.cpp
+  exec/vexchange_node.cpp
+  exec/vset_operation_node.cpp
+  exec/vunion_node.cpp
+  exec/vintersect_node.cpp
+  exec/vexcept_node.cpp
+  exec/vselect_node.cpp
+  exec/vblocking_join_node.cpp
+  exec/vcross_join_node.cpp
+  exec/vodbc_scan_node.cpp
+  exec/vmysql_scan_node.cpp
+  exec/vschema_scan_node.cpp
+  exec/vempty_set_node.cpp
+  exec/vanalytic_eval_node.cpp
+  exec/vassert_num_rows_node.cpp
+  exec/vrepeat_node.cpp
+  exec/join/vhash_join_node.cpp
+  exprs/vectorized_agg_fn.cpp
+  exprs/vectorized_fn_call.cpp
+  exprs/vexpr.cpp
+  exprs/vexpr_context.cpp
+  exprs/vliteral.cpp
+  exprs/vin_predicate.cpp
+  exprs/vslot_ref.cpp
+  exprs/vcast_expr.cpp
+  exprs/vcase_expr.cpp
+  exprs/vinfo_func.cpp
+  functions/math.cpp
+  functions/function_bitmap.cpp
+  functions/comparison.cpp
+  functions/comparison_less.cpp
+  functions/comparison_equals.cpp
+  functions/comparison_greater.cpp
+  functions/function.cpp
+  functions/function_helpers.cpp
+  functions/function_hash.cpp
+  functions/functions_logical.cpp
+  functions/function_case.cpp
+  functions/function_cast.cpp
+  functions/function_conv.cpp
+  functions/function_string.cpp
+  functions/function_timestamp.cpp
+  functions/function_utility.cpp
+  functions/comparison_equal_for_null.cpp
+  functions/function_json.cpp
+  functions/hll_cardinality.cpp
+  functions/hll_empty.cpp
+  functions/hll_hash.cpp
+  functions/plus.cpp
+  functions/modulo.cpp
+  functions/multiply.cpp
+  functions/minus.cpp
+  functions/int_div.cpp
+  functions/divide.cpp
+  functions/function_bit.cpp
+  functions/is_null.cpp
+  functions/is_not_null.cpp
+  functions/in.cpp
+  functions/like.cpp
+  functions/to_time_function.cpp
+  functions/time_of_function.cpp
+  functions/if.cpp
+  functions/function_ifnull.cpp
+  functions/nullif.cpp
+  functions/random.cpp
+  functions/function_coalesce.cpp
+  functions/function_date_or_datetime_computation.cpp
+  functions/function_date_or_datetime_to_string.cpp
+  functions/function_datetime_string_to_string.cpp
+  functions/function_grouping.cpp
+  olap/vgeneric_iterators.cpp
+  olap/vcollect_iterator.cpp
+  olap/block_reader.cpp
+  sink/mysql_result_writer.cpp
+  sink/result_sink.cpp
+  sink/vdata_stream_sender.cpp
+  sink/vtabet_sink.cpp
+  runtime/vdatetime_value.cpp
+  runtime/vdata_stream_recvr.cpp
+  runtime/vdata_stream_mgr.cpp
+  runtime/vpartition_info.cpp
+  runtime/vsorted_run_merger.cpp)
+
+add_library(Vec STATIC
+    ${VEC_FILES}
+)
diff --git a/be/src/vec/aggregate_functions/aggregate_function.h b/be/src/vec/aggregate_functions/aggregate_function.h
new file mode 100644
index 0000000000..4c2ef36d7b
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function.h
@@ -0,0 +1,242 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IAggregateFunction.h
+// and modified by Doris
+
+#pragma once
+
+#include <cstddef>
+#include <istream>
+#include <memory>
+#include <ostream>
+#include <type_traits>
+#include <vector>
+
+#include "vec/common/exception.h"
+#include "vec/core/block.h"
+#include "vec/core/column_numbers.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+class Arena;
+class IColumn;
+class IDataType;
+
+using DataTypePtr = std::shared_ptr<const IDataType>;
+using DataTypes = std::vector<DataTypePtr>;
+
+using AggregateDataPtr = char*;
+using ConstAggregateDataPtr = const char*;
+
+/** Aggregate functions interface.
+  * Instances of classes with this interface do not contain the data itself for aggregation,
+  *  but contain only metadata (description) of the aggregate function,
+  *  as well as methods for creating, deleting and working with data.
+  * The data resulting from the aggregation (intermediate computing states) is stored in other objects
+  *  (which can be created in some memory pool),
+  *  and IAggregateFunction is the external interface for manipulating them.
+  */
+class IAggregateFunction {
+public:
+    IAggregateFunction(const DataTypes& argument_types_, const Array& parameters_)
+            : argument_types(argument_types_), parameters(parameters_) {}
+
+    /// Get main function name.
+    virtual String get_name() const = 0;
+
+    /// Get the result type.
+    virtual DataTypePtr get_return_type() const = 0;
+
+    virtual ~IAggregateFunction() {}
+
+    /** Create empty data for aggregation with `placement new` at the specified location.
+      * You will have to destroy them using the `destroy` method.
+      */
+    virtual void create(AggregateDataPtr __restrict place) const = 0;
+
+    /// Delete data for aggregation.
+    virtual void destroy(AggregateDataPtr __restrict place) const noexcept = 0;
+
+    /// Reset aggregation state
+    virtual void reset(AggregateDataPtr place) const = 0;
+
+    /// It is not necessary to delete data.
+    virtual bool has_trivial_destructor() const = 0;
+
+    /// Get `sizeof` of structure with data.
+    virtual size_t size_of_data() const = 0;
+
+    /// How the data structure should be aligned. NOTE: Currently not used (structures with aggregation state are put without alignment).
+    virtual size_t align_of_data() const = 0;
+
+    /** Adds a value into aggregation data on which place points to.
+     *  columns points to columns containing arguments of aggregation function.
+     *  row_num is number of row which should be added.
+     *  Additional parameter arena should be used instead of standard memory allocator if the addition requires memory allocation.
+     */
+    virtual void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+                     Arena* arena) const = 0;
+
+    /// Merges state (on which place points to) with other state of current aggregation function.
+    virtual void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena* arena) const = 0;
+
+    /// Serializes state (to transmit it over the network, for example).
+    virtual void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const = 0;
+
+    /// Deserializes state. This function is called only for empty (just created) states.
+    virtual void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena* arena) const = 0;
+
+    /// Returns true if a function requires Arena to handle own states (see add(), merge(), deserialize()).
+    virtual bool allocates_memory_in_arena() const { return false; }
+
+    /// Inserts results into a column.
+    virtual void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const = 0;
+
+    /** Returns true for aggregate functions of type -State.
+      * They are executed as other aggregate functions, but not finalized (return an aggregation state that can be combined with another).
+      */
+    virtual bool is_state() const { return false; }
+
+    /// if return false, during insert_result_into function, you colud get nullable result column, 
+    /// so could insert to null value by yourself, rather than by AggregateFunctionNullBase;
+    /// because you maybe be calculate a invalid value, but want to use null replace it;
+    virtual bool insert_to_null_default() const { return true; }
+
+    /** The inner loop that uses the function pointer is better than using the virtual function.
+      * The reason is that in the case of virtual functions GCC 5.1.2 generates code,
+      *  which, at each iteration of the loop, reloads the function address (the offset value in the virtual function table) from memory to the register.
+      * This gives a performance drop on simple queries around 12%.
+      * After the appearance of better compilers, the code can be removed.
+      */
+    using AddFunc = void (*)(const IAggregateFunction*, AggregateDataPtr, const IColumn**, size_t,
+                             Arena*);
+    virtual AddFunc get_address_of_add_function() const = 0;
+
+    /** Contains a loop with calls to "add" function. You can collect arguments into array "places"
+      *  and do a single call to "add_batch" for devirtualization and inlining.
+      */
+    virtual void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset,
+                           const IColumn** columns, Arena* arena) const = 0;
+
+    /** The same for single place.
+      */
+    virtual void add_batch_single_place(size_t batch_size, AggregateDataPtr place,
+                                        const IColumn** columns, Arena* arena) const = 0;
+
+    // only used at agg reader
+    virtual void add_batch_range(size_t batch_begin, size_t batch_end, AggregateDataPtr place,
+                                 const IColumn** columns, Arena* arena, bool has_null = false) = 0;
+
+    // only used at window function
+    virtual void add_range_single_place(int64_t partition_start, int64_t partition_end,
+                                        int64_t frame_start, int64_t frame_end,
+                                        AggregateDataPtr place, const IColumn** columns,
+                                        Arena* arena) const = 0;
+
+    /** This is used for runtime code generation to determine, which header files to include in generated source.
+      * Always implement it as
+      * const char * get_header_file_path() const override { return __FILE__; }
+      */
+    virtual const char* get_header_file_path() const = 0;
+
+    const DataTypes& get_argument_types() const { return argument_types; }
+    const Array& get_parameters() const { return parameters; }
+
+protected:
+    DataTypes argument_types;
+    Array parameters;
+};
+
+/// Implement method to obtain an address of 'add' function.
+template <typename Derived>
+class IAggregateFunctionHelper : public IAggregateFunction {
+private:
+    static void add_free(const IAggregateFunction* that, AggregateDataPtr place,
+                         const IColumn** columns, size_t row_num, Arena* arena) {
+        static_cast<const Derived&>(*that).add(place, columns, row_num, arena);
+    }
+
+public:
+    IAggregateFunctionHelper(const DataTypes& argument_types_, const Array& parameters_)
+            : IAggregateFunction(argument_types_, parameters_) {}
+
+    AddFunc get_address_of_add_function() const override { return &add_free; }
+
+    void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset,
+                   const IColumn** columns, Arena* arena) const override {
+        for (size_t i = 0; i < batch_size; ++i)
+            static_cast<const Derived*>(this)->add(places[i] + place_offset, columns, i, arena);
+    }
+
+    void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns,
+                                Arena* arena) const override {
+        for (size_t i = 0; i < batch_size; ++i)
+            static_cast<const Derived*>(this)->add(place, columns, i, arena);
+    }
+    //now this is use for sum/count/avg/min/max win function, other win function should override this function in class
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
+                                int64_t frame_end, AggregateDataPtr place, const IColumn** columns,
+                                Arena* arena) const override {
+        frame_start = std::max<int64_t>(frame_start, partition_start);
+        frame_end = std::min<int64_t>(frame_end, partition_end);
+        for (int64_t i = frame_start; i < frame_end; ++i) {
+            static_cast<const Derived*>(this)->add(place, columns, i, arena);
+        }
+    }
+
+    void add_batch_range(size_t batch_begin, size_t batch_end, AggregateDataPtr place,
+                         const IColumn** columns, Arena* arena, bool has_null) override {
+        for (size_t i = batch_begin; i <= batch_end; ++i)
+            static_cast<const Derived*>(this)->add(place, columns, i, arena);
+    }
+};
+
+/// Implements several methods for manipulation with data. T - type of structure with data for aggregation.
+template <typename T, typename Derived>
+class IAggregateFunctionDataHelper : public IAggregateFunctionHelper<Derived> {
+protected:
+    using Data = T;
+
+    static Data& data(AggregateDataPtr __restrict place) { return *reinterpret_cast<Data*>(place); }
+    static const Data& data(ConstAggregateDataPtr __restrict place) {
+        return *reinterpret_cast<const Data*>(place);
+    }
+
+public:
+    IAggregateFunctionDataHelper(const DataTypes& argument_types_, const Array& parameters_)
+            : IAggregateFunctionHelper<Derived>(argument_types_, parameters_) {}
+
+    void create(AggregateDataPtr __restrict place) const override { new (place) Data; }
+
+    void destroy(AggregateDataPtr __restrict place) const noexcept override { data(place).~Data(); }
+
+    bool has_trivial_destructor() const override { return std::is_trivially_destructible_v<Data>; }
+
+    size_t size_of_data() const override { return sizeof(Data); }
+
+    /// NOTE: Currently not used (structures with aggregation state are put without alignment).
+    size_t align_of_data() const override { return alignof(Data); }
+
+    void reset(AggregateDataPtr place) const override {}
+};
+
+using AggregateFunctionPtr = std::shared_ptr<IAggregateFunction>;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.cpp b/be/src/vec/aggregate_functions/aggregate_function_avg.cpp
new file mode 100644
index 0000000000..bb7605bedf
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_avg.cpp
@@ -0,0 +1,68 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionAvg.cpp
+// and modified by Doris
+
+#include "vec/aggregate_functions/aggregate_function_avg.h"
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/factory_helpers.h"
+#include "vec/aggregate_functions/helpers.h"
+
+namespace doris::vectorized {
+
+namespace {
+
+template <typename T>
+struct Avg {
+    using FieldType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
+    using Function = AggregateFunctionAvg<T, AggregateFunctionAvgData<FieldType>>;
+};
+
+template <typename T>
+using AggregateFuncAvg = typename Avg<T>::Function;
+
+AggregateFunctionPtr create_aggregate_function_avg(const std::string& name,
+                                                   const DataTypes& argument_types,
+                                                   const Array& parameters,
+                                                   const bool result_is_nullable) {
+    assert_no_parameters(name, parameters);
+    assert_unary(name, argument_types);
+
+    AggregateFunctionPtr res;
+    DataTypePtr data_type = argument_types[0];
+    if (is_decimal(data_type))
+        res.reset(
+                create_with_decimal_type<AggregateFuncAvg>(*data_type, *data_type, argument_types));
+    else
+        res.reset(create_with_numeric_type<AggregateFuncAvg>(*data_type, argument_types));
+
+    if (!res) {
+        LOG(WARNING) << fmt::format("Illegal type {} of argument for aggregate function {}",
+                                    argument_types[0]->get_name(), name);
+    }
+    return res;
+}
+
+} // namespace
+
+void register_aggregate_function_avg(AggregateFunctionSimpleFactory& factory) {
+    factory.register_function("avg", create_aggregate_function_avg);
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.h b/be/src/vec/aggregate_functions/aggregate_function_avg.h
new file mode 100644
index 0000000000..18584ee91a
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_avg.h
@@ -0,0 +1,128 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionAvg.h
+// and modified by Doris
+
+#pragma once
+
+#include "common/status.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/columns_number.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+struct AggregateFunctionAvgData {
+    T sum = 0;
+    UInt64 count = 0;
+
+    template <typename ResultT>
+    ResultT result() const {
+        if constexpr (std::is_floating_point_v<ResultT>)
+            if constexpr (std::numeric_limits<ResultT>::is_iec559)
+                return static_cast<ResultT>(sum) / count; /// allow division by zero
+
+        if (!count)
+            throw Exception("AggregateFunctionAvg with zero values", TStatusCode::VEC_LOGIC_ERROR);
+        return static_cast<ResultT>(sum) / count;
+    }
+
+    void write(BufferWritable& buf) const {
+        write_binary(sum, buf);
+        write_binary(count, buf);
+    }
+
+    void read(BufferReadable& buf) {
+        read_binary(sum, buf);
+        read_binary(count, buf);
+    }
+};
+
+/// Calculates arithmetic mean of numbers.
+template <typename T, typename Data>
+class AggregateFunctionAvg final
+        : public IAggregateFunctionDataHelper<Data, AggregateFunctionAvg<T, Data>> {
+public:
+    using ResultType = std::conditional_t<IsDecimalNumber<T>, Decimal128, Float64>;
+    using ResultDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<Decimal128>,
+                                              DataTypeNumber<Float64>>;
+    using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
+    using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128>,
+                                            ColumnVector<Float64>>;
+
+    /// ctor for native types
+    AggregateFunctionAvg(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<Data, AggregateFunctionAvg<T, Data>>(argument_types_,
+                                                                                {}),
+              scale(0) {}
+
+    /// ctor for Decimals
+    AggregateFunctionAvg(const IDataType& data_type, const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<Data, AggregateFunctionAvg<T, Data>>(argument_types_,
+                                                                                {}),
+              scale(get_decimal_scale(data_type)) {}
+
+    String get_name() const override { return "avg"; }
+
+    DataTypePtr get_return_type() const override {
+        if constexpr (IsDecimalNumber<T>)
+            return std::make_shared<ResultDataType>(ResultDataType::max_precision(), scale);
+        else
+            return std::make_shared<ResultDataType>();
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena*) const override {
+        const auto& column = static_cast<const ColVecType&>(*columns[0]);
+        this->data(place).sum += column.get_data()[row_num];
+        ++this->data(place).count;
+    }
+
+    void reset(AggregateDataPtr place) const override {
+        this->data(place).sum = 0;
+        this->data(place).count = 0;
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena*) const override {
+        this->data(place).sum += this->data(rhs).sum;
+        this->data(place).count += this->data(rhs).count;
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena*) const override {
+        this->data(place).read(buf);
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        auto& column = static_cast<ColVecResult&>(to);
+        column.get_data().push_back(this->data(place).template result<ResultType>());
+    }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+
+private:
+    UInt32 scale;
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp b/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp
new file mode 100644
index 0000000000..d110b09e1e
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.cpp
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/aggregate_functions/aggregate_function_bitmap.h"
+
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+
+namespace doris::vectorized {
+
+template <bool nullable, template <bool, typename> class AggregateFunctionTemplate>
+static IAggregateFunction* createWithIntDataType(const DataTypes& argument_type) {
+    auto type = argument_type[0].get();
+    if (type->is_nullable()) {
+        type = assert_cast<const DataTypeNullable*>(type)->get_nested_type().get();
+    }
+    WhichDataType which(type);
+    if (which.idx == TypeIndex::Int8)
+        return new AggregateFunctionTemplate<nullable, ColumnVector<Int8>>(argument_type);
+    if (which.idx == TypeIndex::Int16)
+        return new AggregateFunctionTemplate<nullable, ColumnVector<Int16>>(argument_type);
+    if (which.idx == TypeIndex::Int32)
+        return new AggregateFunctionTemplate<nullable, ColumnVector<Int32>>(argument_type);
+    if (which.idx == TypeIndex::Int64)
+        return new AggregateFunctionTemplate<nullable, ColumnVector<Int64>>(argument_type);
+    return nullptr;
+}
+
+AggregateFunctionPtr create_aggregate_function_bitmap_union(const std::string& name,
+                                                            const DataTypes& argument_types,
+                                                            const Array& parameters,
+                                                            const bool result_is_nullable) {
+    return std::make_shared<AggregateFunctionBitmapOp<AggregateFunctionBitmapUnionOp>>(
+            argument_types);
+}
+
+AggregateFunctionPtr create_aggregate_function_bitmap_intersect(const std::string& name,
+                                                                const DataTypes& argument_types,
+                                                                const Array& parameters,
+                                                                const bool result_is_nullable) {
+    return std::make_shared<AggregateFunctionBitmapOp<AggregateFunctionBitmapIntersectOp>>(
+            argument_types);
+}
+template <bool nullable>
+AggregateFunctionPtr create_aggregate_function_bitmap_union_count(const std::string& name,
+                                                                  const DataTypes& argument_types,
+                                                                  const Array& parameters,
+                                                                  const bool result_is_nullable) {
+    return std::make_shared<AggregateFunctionBitmapCount<nullable, ColumnBitmap>>(argument_types);
+}
+
+template <bool nullable>
+AggregateFunctionPtr create_aggregate_function_bitmap_union_int(const std::string& name,
+                                                                const DataTypes& argument_types,
+                                                                const Array& parameters,
+                                                                const bool result_is_nullable) {
+    return std::shared_ptr<IAggregateFunction>(
+            createWithIntDataType<nullable, AggregateFunctionBitmapCount>(argument_types));
+}
+
+void register_aggregate_function_bitmap(AggregateFunctionSimpleFactory& factory) {
+    factory.register_function("bitmap_union", create_aggregate_function_bitmap_union);
+    factory.register_function("bitmap_intersect", create_aggregate_function_bitmap_intersect);
+    factory.register_function("bitmap_union_count",
+                              create_aggregate_function_bitmap_union_count<false>);
+    factory.register_function("bitmap_union_count",
+                              create_aggregate_function_bitmap_union_count<true>, true);
+
+    factory.register_function("bitmap_union_int",
+                              create_aggregate_function_bitmap_union_int<false>);
+    factory.register_function("bitmap_union_int", create_aggregate_function_bitmap_union_int<true>,
+                              true);
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h
new file mode 100644
index 0000000000..a2e43e5392
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <istream>
+#include <ostream>
+
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_complex.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/assert_cast.h"
+#include "vec/data_types/data_type_bitmap.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+struct AggregateFunctionBitmapUnionOp {
+    static constexpr auto name = "bitmap_union";
+
+    template <typename T>
+    static void add(BitmapValue& res, const T& data) {
+        res.add(data);
+    }
+
+    static void add(BitmapValue& res, const BitmapValue& data) { res |= data; }
+
+    static void merge(BitmapValue& res, const BitmapValue& data) { res |= data; }
+};
+
+struct AggregateFunctionBitmapIntersectOp {
+    static constexpr auto name = "bitmap_intersect";
+    static void add(BitmapValue& res, const BitmapValue& data) { res &= data; }
+
+    static void merge(BitmapValue& res, const BitmapValue& data) { res &= data; }
+};
+
+template <typename Op>
+struct AggregateFunctionBitmapData {
+    BitmapValue value;
+
+    template <typename T>
+    void add(const T& data) {
+        Op::add(value, data);
+    }
+
+    void merge(const BitmapValue& data) { Op::merge(value, data); }
+
+    void write(BufferWritable& buf) const { DataTypeBitMap::serialize_as_stream(value, buf); }
+
+    void read(BufferReadable& buf) { DataTypeBitMap::deserialize_as_stream(value, buf); }
+
+    BitmapValue& get() { return value; }
+};
+
+template <typename Op>
+class AggregateFunctionBitmapOp final
+        : public IAggregateFunctionDataHelper<AggregateFunctionBitmapData<Op>,
+                                              AggregateFunctionBitmapOp<Op>> {
+public:
+    using ResultDataType = BitmapValue;
+    using ColVecType = ColumnBitmap;
+    using ColVecResult = ColumnBitmap;
+
+    String get_name() const override { return Op::name; }
+
+    AggregateFunctionBitmapOp(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<AggregateFunctionBitmapData<Op>,
+                                           AggregateFunctionBitmapOp<Op>>(argument_types_, {}) {}
+
+    DataTypePtr get_return_type() const override { return std::make_shared<DataTypeBitMap>(); }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena*) const override {
+        const auto& column = static_cast<const ColVecType&>(*columns[0]);
+        this->data(place).add(column.get_data()[row_num]);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena*) const override {
+        this->data(place).merge(
+                const_cast<AggregateFunctionBitmapData<Op>&>(this->data(rhs)).get());
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena*) const override {
+        this->data(place).read(buf);
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        auto& column = static_cast<ColVecResult&>(to);
+        column.get_data().push_back(
+                const_cast<AggregateFunctionBitmapData<Op>&>(this->data(place)).get());
+    }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+template <bool nullable, typename ColVecType>
+class AggregateFunctionBitmapCount final
+        : public IAggregateFunctionDataHelper<
+                  AggregateFunctionBitmapData<AggregateFunctionBitmapUnionOp>,
+                  AggregateFunctionBitmapCount<nullable, ColVecType>> {
+public:
+    // using ColVecType = ColumnBitmap;
+    using ColVecResult = ColumnVector<Int64>;
+    using AggFunctionData = AggregateFunctionBitmapData<AggregateFunctionBitmapUnionOp>;
+
+    AggregateFunctionBitmapCount(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<
+                      AggregateFunctionBitmapData<AggregateFunctionBitmapUnionOp>,
+                      AggregateFunctionBitmapCount<nullable, ColVecType>>(argument_types_, {}) {}
+
+    String get_name() const override { return "count"; }
+    DataTypePtr get_return_type() const override { return std::make_shared<DataTypeInt64>(); }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena*) const override {
+        if constexpr (nullable) {
+            auto& nullable_column = assert_cast<const ColumnNullable&>(*columns[0]);
+            if (!nullable_column.is_null_at(row_num)) {
+                const auto& column =
+                        static_cast<const ColVecType&>(nullable_column.get_nested_column());
+                this->data(place).add(column.get_data()[row_num]);
+            }
+        } else {
+            const auto& column = static_cast<const ColVecType&>(*columns[0]);
+            this->data(place).add(column.get_data()[row_num]);
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena*) const override {
+        this->data(place).merge(const_cast<AggFunctionData&>(this->data(rhs)).get());
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena*) const override {
+        this->data(place).read(buf);
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        auto& value_data = const_cast<AggFunctionData&>(this->data(place)).get();
+        auto& column = static_cast<ColVecResult&>(to);
+        column.get_data().push_back(value_data.cardinality());
+    }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+AggregateFunctionPtr create_aggregate_function_bitmap_union(const std::string& name,
+                                                            const DataTypes& argument_types,
+                                                            const Array& parameters,
+                                                            const bool result_is_nullable);
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_combinator.h b/be/src/vec/aggregate_functions/aggregate_function_combinator.h
new file mode 100644
index 0000000000..77d465b236
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_combinator.h
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IAggregateFunctionCombinator.h
+// and modified by Doris
+
+#pragma once
+
+#include <vec/aggregate_functions/aggregate_function.h>
+#include <vec/data_types/data_type.h>
+
+#include <memory>
+
+namespace doris::vectorized {
+
+/** Aggregate function combinator allows to take one aggregate function
+  *  and transform it to another aggregate function.
+  *
+  * In SQL language they are used as suffixes for existing aggregate functions.
+  *
+  * Example: -If combinator takes an aggregate function and transforms it
+  *  to aggregate function with additional argument at end (condition),
+  *  that will pass values to original aggregate function when the condition is true.
+  *
+  * More examples:
+  *
+  * sum(x) - calculate sum of x
+  * sumIf(x, cond) - calculate sum of x for rows where condition is true.
+  * sumArray(arr) - calculate sum of all elements of arrays.
+  *
+  * PS. Please don't mess it with so called "combiner" - totally unrelated notion from Hadoop world.
+  * "combining" - merging the states of aggregate functions - is supported naturally in ClickHouse.
+  */
+
+class IAggregateFunctionCombinator {
+public:
+    virtual String get_name() const = 0;
+
+    virtual bool is_for_internal_usage_only() const { return false; }
+
+    /** From the arguments for combined function (ex: UInt64, UInt8 for sumIf),
+      *  get the arguments for nested function (ex: UInt64 for sum).
+      * If arguments are not suitable for combined function, throw an exception.
+      */
+    virtual DataTypes transform_arguments(const DataTypes& arguments) const { return arguments; }
+
+    /** From the parameters for combined function,
+      *  get the parameters for nested function.
+      * If arguments are not suitable for combined function, throw an exception.
+      */
+    virtual Array transform_parameters(const Array& parameters) const { return parameters; }
+
+    /** Create combined aggregate function (ex: sumIf)
+      *  from nested function (ex: sum)
+      *  and arguments for combined agggregate function (ex: UInt64, UInt8 for sumIf).
+      * It's assumed that function transform_arguments was called before this function and 'arguments' are validated.
+      */
+    virtual AggregateFunctionPtr transform_aggregate_function(
+            const AggregateFunctionPtr& nested_function, const DataTypes& arguments,
+            const Array& params, const bool result_is_nullable) const = 0;
+
+    virtual ~IAggregateFunctionCombinator() {}
+};
+
+using AggregateFunctionCombinatorPtr = std::shared_ptr<const IAggregateFunctionCombinator>;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_count.cpp b/be/src/vec/aggregate_functions/aggregate_function_count.cpp
new file mode 100644
index 0000000000..aa5d0061eb
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_count.cpp
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionCount.cpp
+// and modified by Doris
+
+#include "vec/aggregate_functions/aggregate_function_count.h"
+
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/factory_helpers.h"
+
+namespace doris::vectorized {
+
+AggregateFunctionPtr create_aggregate_function_count(const std::string& name,
+                                                     const DataTypes& argument_types,
+                                                     const Array& parameters,
+                                                     const bool result_is_nullable) {
+    assert_no_parameters(name, parameters);
+    assert_arity_at_most<1>(name, argument_types);
+
+    return std::make_shared<AggregateFunctionCount>(argument_types);
+}
+
+AggregateFunctionPtr create_aggregate_function_count_not_null_unary(const std::string& name,
+                                                                    const DataTypes& argument_types,
+                                                                    const Array& parameters,
+                                                                    const bool result_is_nullable) {
+    assert_arity_at_most<1>(name, argument_types);
+
+    return std::make_shared<AggregateFunctionCountNotNullUnary>(argument_types);
+}
+
+void register_aggregate_function_count(AggregateFunctionSimpleFactory& factory) {
+    factory.register_function("count", create_aggregate_function_count);
+    factory.register_function("count", create_aggregate_function_count_not_null_unary, true);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_count.h b/be/src/vec/aggregate_functions/aggregate_function_count.h
new file mode 100644
index 0000000000..fd096dc6be
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_count.h
@@ -0,0 +1,122 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionCount.h
+// and modified by Doris
+
+#pragma once
+
+#include <array>
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/assert_cast.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+struct AggregateFunctionCountData {
+    UInt64 count = 0;
+};
+
+/// Simply count number of calls.
+class AggregateFunctionCount final
+        : public IAggregateFunctionDataHelper<AggregateFunctionCountData, AggregateFunctionCount> {
+public:
+    AggregateFunctionCount(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper(argument_types_, {}) {}
+
+    String get_name() const override { return "count"; }
+
+    DataTypePtr get_return_type() const override { return std::make_shared<DataTypeInt64>(); }
+
+    void add(AggregateDataPtr __restrict place, const IColumn**, size_t, Arena*) const override {
+        ++data(place).count;
+    }
+
+    void reset(AggregateDataPtr place) const override {
+        this->data(place).count = 0;
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena*) const override {
+        data(place).count += data(rhs).count;
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        write_var_uint(data(place).count, buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena*) const override {
+        read_var_uint(data(place).count, buf);
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        assert_cast<ColumnInt64&>(to).get_data().push_back(data(place).count);
+    }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+/// Simply count number of not-NULL values.
+class AggregateFunctionCountNotNullUnary final
+        : public IAggregateFunctionDataHelper<AggregateFunctionCountData,
+                                              AggregateFunctionCountNotNullUnary> {
+public:
+    AggregateFunctionCountNotNullUnary(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper(argument_types_, {}) {}
+
+    String get_name() const override { return "count"; }
+
+    DataTypePtr get_return_type() const override { return std::make_shared<DataTypeInt64>(); }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena*) const override {
+        data(place).count += !assert_cast<const ColumnNullable&>(*columns[0]).is_null_at(row_num);
+    }
+
+    void reset(AggregateDataPtr place) const override {
+        data(place).count = 0;
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena*) const override {
+        data(place).count += data(rhs).count;
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        write_var_uint(data(place).count, buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena*) const override {
+        read_var_uint(data(place).count, buf);
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        if (to.is_nullable()) {
+            auto& null_column = assert_cast<ColumnNullable &>(to);
+            null_column.get_null_map_data().push_back(0);
+            assert_cast<ColumnInt64 &>(null_column.get_nested_column()).get_data().push_back(data(place).count);
+        } else {
+            assert_cast<ColumnInt64 &>(to).get_data().push_back(data(place).count);
+        }
+    }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.cpp b/be/src/vec/aggregate_functions/aggregate_function_distinct.cpp
new file mode 100644
index 0000000000..af64a3a15d
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.cpp
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionDistinct.cpp
+// and modified by Doris
+
+#include <algorithm>
+#include <boost/algorithm/string.hpp>
+
+#include "vec/aggregate_functions/aggregate_function_combinator.h"
+#include "vec/aggregate_functions/aggregate_function_distinct.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/helpers.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/data_types/data_type_nullable.h"
+
+namespace doris::vectorized {
+
+class AggregateFunctionCombinatorDistinct final : public IAggregateFunctionCombinator {
+public:
+    String get_name() const override { return "Distinct"; }
+
+    DataTypes transform_arguments(const DataTypes& arguments) const override {
+        if (arguments.empty()) {
+            LOG(FATAL)
+                    << "Incorrect number of arguments for aggregate function with Distinct suffix";
+        }
+        return arguments;
+    }
+
+    AggregateFunctionPtr transform_aggregate_function(const AggregateFunctionPtr& nested_function,
+                                                      const DataTypes& arguments,
+                                                      const Array& params,
+                                                      const bool result_is_nullable) const override {
+        DCHECK(nested_function != nullptr);
+        if (nested_function == nullptr) return nullptr;
+        
+        AggregateFunctionPtr res;
+        if (arguments.size() == 1) {
+            res.reset(create_with_numeric_type<AggregateFunctionDistinct,
+                                               AggregateFunctionDistinctSingleNumericData>(
+                    *arguments[0], nested_function, arguments));
+
+            if (res) return res;
+
+            if (arguments[0]->is_value_unambiguously_represented_in_contiguous_memory_region())
+                return std::make_shared<AggregateFunctionDistinct<
+                        AggregateFunctionDistinctSingleGenericData<true>>>(nested_function,
+                                                                           arguments);
+            else
+                return std::make_shared<AggregateFunctionDistinct<
+                        AggregateFunctionDistinctSingleGenericData<false>>>(nested_function,
+                                                                            arguments);
+        }
+
+        return std::make_shared<
+                AggregateFunctionDistinct<AggregateFunctionDistinctMultipleGenericData>>(
+                nested_function, arguments);
+    }
+};
+
+const std::string DISTINCT_FUNCTION_PREFIX = "multi_distinct_";
+
+void register_aggregate_function_combinator_distinct(AggregateFunctionSimpleFactory& factory) {
+    AggregateFunctionCreator creator = [&](const std::string& name, const DataTypes& types,
+                                           const Array& params, const bool result_is_nullable) {
+        // 1. we should get not nullable types;
+        DataTypes nested_types(types.size());
+        std::transform(types.begin(), types.end(), nested_types.begin(),
+                       [](const auto& e) { return remove_nullable(e); });
+        auto function_combinator = std::make_shared<AggregateFunctionCombinatorDistinct>();
+        auto transform_arguments = function_combinator->transform_arguments(nested_types);
+        if (!boost::algorithm::starts_with(name, DISTINCT_FUNCTION_PREFIX)) {
+            return AggregateFunctionPtr();
+        }
+        auto nested_function_name = name.substr(DISTINCT_FUNCTION_PREFIX.size());
+        auto nested_function = factory.get(nested_function_name, transform_arguments, params);
+        return function_combinator->transform_aggregate_function(nested_function, types, params, result_is_nullable);
+    };
+    factory.register_distinct_function_combinator(creator, DISTINCT_FUNCTION_PREFIX);
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.h b/be/src/vec/aggregate_functions/aggregate_function_distinct.h
new file mode 100644
index 0000000000..6502b48275
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.h
@@ -0,0 +1,224 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionDistinct.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/key_holder_helpers.h"
+#include "vec/common/aggregation_common.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/field_visitors.h"
+#include "vec/common/hash_table/hash_set.h"
+#include "vec/common/hash_table/hash_table.h"
+#include "vec/common/sip_hash.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+struct AggregateFunctionDistinctSingleNumericData {
+    /// When creating, the hash table must be small.
+    using Set = HashSetWithStackMemory<T, DefaultHash<T>, 4>;
+    using Self = AggregateFunctionDistinctSingleNumericData<T>;
+    Set set;
+
+    void add(const IColumn** columns, size_t /* columns_num */, size_t row_num, Arena*) {
+        const auto& vec = assert_cast<const ColumnVector<T>&>(*columns[0]).get_data();
+        set.insert(vec[row_num]);
+    }
+
+    void merge(const Self& rhs, Arena*) { set.merge(rhs.set); }
+
+    void serialize(BufferWritable& buf) const { set.write(buf); }
+
+    void deserialize(BufferReadable& buf, Arena*) { set.read(buf); }
+
+    MutableColumns get_arguments(const DataTypes& argument_types) const {
+        MutableColumns argument_columns;
+        argument_columns.emplace_back(argument_types[0]->create_column());
+        for (const auto& elem : set) argument_columns[0]->insert(elem.get_value());
+
+        return argument_columns;
+    }
+};
+
+struct AggregateFunctionDistinctGenericData {
+    /// When creating, the hash table must be small.
+    using Set = HashSetWithSavedHashWithStackMemory<StringRef, StringRefHash, 4>;
+    using Self = AggregateFunctionDistinctGenericData;
+    Set set;
+
+    void merge(const Self& rhs, Arena* arena) {
+        Set::LookupResult it;
+        bool inserted;
+        for (const auto& elem : rhs.set)
+            set.emplace(ArenaKeyHolder{elem.get_value(), *arena}, it, inserted);
+    }
+
+    void serialize(BufferWritable& buf) const {
+        write_var_uint(set.size(), buf);
+        for (const auto& elem : set)
+            write_string_binary(elem.get_value(), buf);
+    }
+
+    void deserialize(BufferReadable& buf, Arena* arena) {
+        size_t size;
+        read_var_uint(size, buf);
+
+        StringRef ref;
+        for (size_t i = 0; i < size; ++i) {
+            read_string_binary(ref, buf);
+            set.insert(ref);
+        }
+    }
+};
+
+template <bool is_plain_column>
+struct AggregateFunctionDistinctSingleGenericData : public AggregateFunctionDistinctGenericData {
+    void add(const IColumn** columns, size_t /* columns_num */, size_t row_num, Arena* arena) {
+        Set::LookupResult it;
+        bool inserted;
+        auto key_holder = get_key_holder<is_plain_column>(*columns[0], row_num, *arena);
+        set.emplace(key_holder, it, inserted);
+    }
+
+    MutableColumns get_arguments(const DataTypes& argument_types) const {
+        MutableColumns argument_columns;
+        argument_columns.emplace_back(argument_types[0]->create_column());
+        for (const auto& elem : set)
+            deserialize_and_insert<is_plain_column>(elem.get_value(), *argument_columns[0]);
+
+        return argument_columns;
+    }
+};
+
+struct AggregateFunctionDistinctMultipleGenericData : public AggregateFunctionDistinctGenericData {
+    void add(const IColumn** columns, size_t columns_num, size_t row_num, Arena* arena) {
+        const char* begin = nullptr;
+        StringRef value(begin, 0);
+        for (size_t i = 0; i < columns_num; ++i) {
+            auto cur_ref = columns[i]->serialize_value_into_arena(row_num, *arena, begin);
+            value.data = cur_ref.data - value.size;
+            value.size += cur_ref.size;
+        }
+
+        Set::LookupResult it;
+        bool inserted;
+        auto key_holder = SerializedKeyHolder{value, *arena};
+        set.emplace(key_holder, it, inserted);
+    }
+
+    MutableColumns get_arguments(const DataTypes& argument_types) const {
+        MutableColumns argument_columns(argument_types.size());
+        for (size_t i = 0; i < argument_types.size(); ++i)
+            argument_columns[i] = argument_types[i]->create_column();
+
+        for (const auto& elem : set) {
+            const char* begin = elem.get_value().data;
+            for (auto& column : argument_columns)
+                begin = column->deserialize_and_insert_from_arena(begin);
+        }
+
+        return argument_columns;
+    }
+};
+
+/** Adaptor for aggregate functions.
+  * Adding -Distinct suffix to aggregate function
+**/
+template <typename Data>
+class AggregateFunctionDistinct
+        : public IAggregateFunctionDataHelper<Data, AggregateFunctionDistinct<Data>> {
+private:
+    static constexpr auto prefix_size = sizeof(Data);
+    AggregateFunctionPtr nested_func;
+    size_t arguments_num;
+
+    AggregateDataPtr get_nested_place(AggregateDataPtr __restrict place) const noexcept {
+        return place + prefix_size;
+    }
+
+    ConstAggregateDataPtr get_nested_place(ConstAggregateDataPtr __restrict place) const noexcept {
+        return place + prefix_size;
+    }
+
+public:
+    AggregateFunctionDistinct(AggregateFunctionPtr nested_func_, const DataTypes& arguments)
+            : IAggregateFunctionDataHelper<Data, AggregateFunctionDistinct>(
+                      arguments, nested_func_->get_parameters()),
+              nested_func(nested_func_),
+              arguments_num(arguments.size()) {}
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena* arena) const override {
+        this->data(place).add(columns, arguments_num, row_num, arena);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
+               Arena* arena) const override {
+        this->data(place).merge(this->data(rhs), arena);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena* arena) const override {
+        this->data(place).deserialize(buf, arena);
+    }
+
+    // void insert_result_into(AggregateDataPtr place, IColumn & to, Arena * arena) const override
+    void insert_result_into(ConstAggregateDataPtr targetplace, IColumn& to) const override {
+        auto place = const_cast<AggregateDataPtr>(targetplace);
+        auto arguments = this->data(place).get_arguments(this->argument_types);
+        ColumnRawPtrs arguments_raw(arguments.size());
+        for (size_t i = 0; i < arguments.size(); ++i) arguments_raw[i] = arguments[i].get();
+
+        assert(!arguments.empty());
+        // nested_func->add_batch_single_place(arguments[0]->size(), get_nested_place(place), arguments_raw.data(), arena);
+        // nested_func->insert_result_into(get_nested_place(place), to, arena);
+
+        nested_func->add_batch_single_place(arguments[0]->size(), get_nested_place(place),
+                                            arguments_raw.data(), nullptr);
+        nested_func->insert_result_into(get_nested_place(place), to);
+    }
+
+    size_t size_of_data() const override { return prefix_size + nested_func->size_of_data(); }
+
+    void create(AggregateDataPtr __restrict place) const override {
+        new (place) Data;
+        nested_func->create(get_nested_place(place));
+    }
+
+    void destroy(AggregateDataPtr __restrict place) const noexcept override {
+        this->data(place).~Data();
+        nested_func->destroy(get_nested_place(place));
+    }
+
+    String get_name() const override { return nested_func->get_name() + "Distinct"; }
+
+    DataTypePtr get_return_type() const override { return nested_func->get_return_type(); }
+
+    bool allocates_memory_in_arena() const override { return true; }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.cpp b/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.cpp
new file mode 100644
index 0000000000..3b2aba0552
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.cpp
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/aggregate_functions/aggregate_function_hll_union_agg.h"
+
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/factory_helpers.h"
+
+namespace doris::vectorized {
+
+AggregateFunctionPtr create_aggregate_function_HLL_union_agg(const std::string& name,
+                                                             const DataTypes& argument_types,
+                                                             const Array& parameters,
+                                                             const bool result_is_nullable) {
+    assert_no_parameters(name, parameters);
+    assert_arity_at_most<1>(name, argument_types);
+
+    return std::make_shared<AggregateFunctionHLLUnionAgg>(argument_types);
+}
+
+AggregateFunctionPtr create_aggregate_function_HLL_union(const std::string& name,
+                                                         const DataTypes& argument_types,
+                                                         const Array& parameters,
+                                                         const bool result_is_nullable) {
+    assert_no_parameters(name, parameters);
+    assert_arity_at_most<1>(name, argument_types);
+
+    return std::make_shared<AggregateFunctionHLLUnion>(argument_types);
+}
+
+void register_aggregate_function_HLL_union_agg(AggregateFunctionSimpleFactory& factory) {
+    factory.register_function("hll_union_agg", create_aggregate_function_HLL_union_agg);
+    factory.register_function("hll_union", create_aggregate_function_HLL_union);
+    factory.register_function("hll_raw_agg", create_aggregate_function_HLL_union);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h b/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h
new file mode 100644
index 0000000000..f71a1f5596
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h
@@ -0,0 +1,133 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <istream>
+#include <ostream>
+#include <type_traits>
+
+#include "exprs/hll_function.h"
+#include "olap/hll.h"
+#include "util/slice.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+struct AggregateFunctionHLLData {
+    HyperLogLog dst_hll {};
+
+    void add(const StringRef& src) { dst_hll.merge(HyperLogLog(Slice(src.data, src.size))); }
+
+    void merge(const AggregateFunctionHLLData& rhs) { dst_hll.merge(rhs.dst_hll); }
+
+    void write(BufferWritable& buf) const {
+        std::string result(dst_hll.max_serialized_size(), '0');
+        int size = dst_hll.serialize((uint8_t*)result.c_str());
+        result.resize(size);
+        write_binary(result, buf);
+    }
+
+    void read(BufferReadable& buf) {
+        StringRef ref;
+        read_binary(ref, buf);
+        dst_hll.deserialize(Slice(ref.data, ref.size));
+    }
+
+    Int64 get_cardinality() const { return dst_hll.estimate_cardinality(); }
+
+    std::string get() const {
+        std::string result(dst_hll.max_serialized_size(), '0');
+        int size = dst_hll.serialize((uint8_t*)result.c_str());
+        result.resize(size);
+
+        return result;
+    }
+};
+
+class AggregateFunctionHLLUnionAgg
+        : public IAggregateFunctionDataHelper<AggregateFunctionHLLData,
+                                              AggregateFunctionHLLUnionAgg> {
+public:
+    virtual String get_name() const override { return "hll_union_agg"; }
+
+    AggregateFunctionHLLUnionAgg(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper(argument_types_, {}) {}
+
+    AggregateFunctionHLLUnionAgg(const IDataType& data_type, const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper(argument_types_, {}) {}
+
+    virtual DataTypePtr get_return_type() const override {
+        return std::make_shared<DataTypeInt64>();
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena*) const override {
+        const auto& column = static_cast<const ColumnString&>(*columns[0]);
+        this->data(place).add(column.get_data_at(row_num));
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena*) const override {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena*) const override {
+        this->data(place).read(buf);
+    }
+
+    virtual void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        auto& column = static_cast<ColumnVector<Int64>&>(to);
+        column.get_data().push_back(this->data(place).get_cardinality());
+    }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+class AggregateFunctionHLLUnion final : public AggregateFunctionHLLUnionAgg {
+public:
+    String get_name() const override { return "hll_union"; }
+
+    AggregateFunctionHLLUnion(const DataTypes& argument_types_)
+            : AggregateFunctionHLLUnionAgg {argument_types_} {}
+
+    AggregateFunctionHLLUnion(const IDataType& data_type, const DataTypes& argument_types_)
+            : AggregateFunctionHLLUnionAgg(data_type, argument_types_) {}
+
+    DataTypePtr get_return_type() const override { return std::make_shared<DataTypeString>(); }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        auto& column = static_cast<ColumnString&>(to);
+        auto result = this->data(place).get();
+        column.insert_data(result.c_str(), result.length());
+    }
+};
+
+AggregateFunctionPtr create_aggregate_function_HLL_union(const std::string& name,
+                                                         const DataTypes& argument_types,
+                                                         const Array& parameters,
+                                                         const bool result_is_nullable);
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp b/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp
new file mode 100644
index 0000000000..813674a312
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_min_max.cpp
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionMinMaxAny.cpp
+// and modified by Doris
+
+#include "vec/aggregate_functions/aggregate_function_min_max.h"
+
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/factory_helpers.h"
+#include "vec/aggregate_functions/helpers.h"
+
+namespace doris::vectorized {
+
+/// min, max
+template <template <typename, bool> class AggregateFunctionTemplate, template <typename> class Data>
+static IAggregateFunction* create_aggregate_function_single_value(const String& name,
+                                                                  const DataTypes& argument_types,
+                                                                  const Array& parameters) {
+    assert_no_parameters(name, parameters);
+    assert_unary(name, argument_types);
+
+    const DataTypePtr& argument_type = argument_types[0];
+
+    WhichDataType which(argument_type);
+#define DISPATCH(TYPE)                                                                 \
+    if (which.idx == TypeIndex::TYPE)                                                  \
+        return new AggregateFunctionTemplate<Data<SingleValueDataFixed<TYPE>>, false>( \
+                argument_type);
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.idx == TypeIndex::String) {
+        return new AggregateFunctionTemplate<Data<SingleValueDataString>, false>(argument_type);
+    }
+    if (which.idx == TypeIndex::DateTime || which.idx == TypeIndex::Date) {
+        return new AggregateFunctionTemplate<Data<SingleValueDataFixed<Int64>>, false>(
+                argument_type);
+    }
+    if (which.idx == TypeIndex::Decimal128) {
+        return new AggregateFunctionTemplate<Data<SingleValueDataFixed<DecimalV2Value>>, false>(
+                argument_type);
+    }
+    return nullptr;
+}
+
+AggregateFunctionPtr create_aggregate_function_max(const std::string& name,
+                                                   const DataTypes& argument_types,
+                                                   const Array& parameters,
+                                                   const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_aggregate_function_single_value<AggregateFunctionsSingleValue,
+                                                   AggregateFunctionMaxData>(name, argument_types,
+                                                                             parameters));
+}
+
+AggregateFunctionPtr create_aggregate_function_min(const std::string& name,
+                                                   const DataTypes& argument_types,
+                                                   const Array& parameters,
+                                                   const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_aggregate_function_single_value<AggregateFunctionsSingleValue,
+                                                   AggregateFunctionMinData>(name, argument_types,
+                                                                             parameters));
+}
+
+void register_aggregate_function_minmax(AggregateFunctionSimpleFactory& factory) {
+    factory.register_function("max", create_aggregate_function_max);
+    factory.register_function("min", create_aggregate_function_min);
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max.h b/be/src/vec/aggregate_functions/aggregate_function_min_max.h
new file mode 100644
index 0000000000..17d682339a
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_min_max.h
@@ -0,0 +1,535 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+// and modified by Doris
+
+#pragma once
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_decimal.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/assert_cast.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+/// For numeric values.
+template <typename T>
+struct SingleValueDataFixed {
+private:
+    using Self = SingleValueDataFixed;
+
+    bool has_value = false; /// We need to remember if at least one value has been passed. This is necessary for AggregateFunctionIf.
+    T value;
+
+public:
+    bool has() const { return has_value; }
+
+    void insert_result_into(IColumn& to) const {
+        if (has())
+            assert_cast<ColumnVector<T>&>(to).get_data().push_back(value);
+        else
+            assert_cast<ColumnVector<T>&>(to).insert_default();
+    }
+
+    void reset() {
+        if (has()) {
+            has_value = false;    
+        }
+    }
+
+    void write(BufferWritable& buf) const {
+        write_binary(has(), buf);
+        if (has()) write_binary(value, buf);
+    }
+
+    void read(BufferReadable& buf) {
+        read_binary(has_value, buf);
+        if (has()) read_binary(value, buf);
+    }
+
+    void change(const IColumn& column, size_t row_num, Arena*) {
+        has_value = true;
+        value = assert_cast<const ColumnVector<T>&>(column).get_data()[row_num];
+    }
+
+    /// Assuming to.has()
+    void change(const Self& to, Arena*) {
+        has_value = true;
+        value = to.value;
+    }
+
+    bool change_first_time(const IColumn& column, size_t row_num, Arena* arena) {
+        if (!has()) {
+            change(column, row_num, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_first_time(const Self& to, Arena* arena) {
+        if (!has() && to.has()) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_every_time(const IColumn& column, size_t row_num, Arena* arena) {
+        change(column, row_num, arena);
+        return true;
+    }
+
+    bool change_every_time(const Self& to, Arena* arena) {
+        if (to.has()) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_if_less(const IColumn& column, size_t row_num, Arena* arena) {
+        if (!has() || assert_cast<const ColumnVector<T>&>(column).get_data()[row_num] < value) {
+            change(column, row_num, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_if_less(const Self& to, Arena* arena) {
+        if (to.has() && (!has() || to.value < value)) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_if_greater(const IColumn& column, size_t row_num, Arena* arena) {
+        if (!has() || assert_cast<const ColumnVector<T>&>(column).get_data()[row_num] > value) {
+            change(column, row_num, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_if_greater(const Self& to, Arena* arena) {
+        if (to.has() && (!has() || to.value > value)) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool is_equal_to(const Self& to) const { return has() && to.value == value; }
+
+    bool is_equal_to(const IColumn& column, size_t row_num) const {
+        return has() && assert_cast<const ColumnVector<T>&>(column).get_data()[row_num] == value;
+    }
+};
+
+/// For numeric values.
+template <>
+struct SingleValueDataFixed<DecimalV2Value> {
+private:
+    using Self = SingleValueDataFixed;
+
+    bool has_value =
+            false; /// We need to remember if at least one value has been passed. This is necessary for AggregateFunctionIf.
+    int128_t value;
+
+public:
+    bool has() const { return has_value; }
+
+    void insert_result_into(IColumn& to) const {
+        if (has()) {
+            DecimalV2Value decimal(value);
+            assert_cast<ColumnDecimal<Decimal128>&>(to).insert_data((const char*)&decimal, 0);
+        } else
+            assert_cast<ColumnDecimal<Decimal128>&>(to).insert_default();
+    }
+
+    void reset() {
+        if (has()) {
+            has_value = false;    
+        }
+    }
+    
+    void write(BufferWritable& buf) const {
+        write_binary(has(), buf);
+        if (has()) write_binary(value, buf);
+    }
+
+    void read(BufferReadable& buf) {
+        read_binary(has_value, buf);
+        if (has()) read_binary(value, buf);
+    }
+
+    void change(const IColumn& column, size_t row_num, Arena*) {
+        has_value = true;
+        value = assert_cast<const ColumnDecimal<Decimal128>&>(column).get_data()[row_num];
+    }
+
+    /// Assuming to.has()
+    void change(const Self& to, Arena*) {
+        has_value = true;
+        value = to.value;
+    }
+
+    bool change_first_time(const IColumn& column, size_t row_num, Arena* arena) {
+        if (!has()) {
+            change(column, row_num, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_first_time(const Self& to, Arena* arena) {
+        if (!has() && to.has()) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_every_time(const IColumn& column, size_t row_num, Arena* arena) {
+        change(column, row_num, arena);
+        return true;
+    }
+
+    bool change_every_time(const Self& to, Arena* arena) {
+        if (to.has()) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_if_less(const IColumn& column, size_t row_num, Arena* arena) {
+        if (!has() ||
+            assert_cast<const ColumnDecimal<Decimal128>&>(column).get_data()[row_num] < value) {
+            change(column, row_num, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_if_less(const Self& to, Arena* arena) {
+        if (to.has() && (!has() || to.value < value)) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_if_greater(const IColumn& column, size_t row_num, Arena* arena) {
+        if (!has() ||
+            assert_cast<const ColumnDecimal<Decimal128>&>(column).get_data()[row_num] > value) {
+            change(column, row_num, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_if_greater(const Self& to, Arena* arena) {
+        if (to.has() && (!has() || to.value > value)) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool is_equal_to(const Self& to) const { return has() && to.value == value; }
+
+    bool is_equal_to(const IColumn& column, size_t row_num) const {
+        return has() &&
+               assert_cast<const ColumnDecimal<Decimal128>&>(column).get_data()[row_num] == value;
+    }
+};
+
+/** For strings. Short strings are stored in the object itself, and long strings are allocated separately.
+  * NOTE It could also be suitable for arrays of numbers.
+  */
+struct SingleValueDataString {
+private:
+    using Self = SingleValueDataString;
+
+    Int32 size = -1;    /// -1 indicates that there is no value.
+    Int32 capacity = 0; /// power of two or zero
+    char* large_data = nullptr;
+
+public:
+    static constexpr Int32 AUTOMATIC_STORAGE_SIZE = 64;
+    static constexpr Int32 MAX_SMALL_STRING_SIZE =
+            AUTOMATIC_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data);
+
+private:
+    char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero.
+
+public:
+    ~SingleValueDataString() { delete large_data; }
+
+    bool has() const { return size >= 0; }
+
+    const char* get_data() const { return size <= MAX_SMALL_STRING_SIZE ? small_data : large_data; }
+
+    void insert_result_into(IColumn& to) const {
+        if (has())
+            assert_cast<ColumnString&>(to).insert_data(get_data(), size);
+        else
+            assert_cast<ColumnString&>(to).insert_default();
+    }
+
+    void reset() {
+        if (size != -1) {
+            size = -1;    
+            capacity = 0; 
+            delete large_data;
+            large_data = nullptr;
+        }
+    }
+    
+    void write(BufferWritable& buf) const {
+        write_binary(size, buf);
+        if (has()) buf.write(get_data(), size);
+    }
+
+    void read(BufferReadable& buf) {
+        Int32 rhs_size;
+        read_binary(rhs_size, buf);
+
+        if (rhs_size >= 0) {
+            if (rhs_size <= MAX_SMALL_STRING_SIZE) {
+                /// Don't free large_data here.
+
+                size = rhs_size;
+
+                if (size > 0) buf.read(small_data, size);
+            } else {
+                if (capacity < rhs_size) {
+                    capacity = static_cast<UInt32>(round_up_to_power_of_two_or_zero(rhs_size));
+                    delete large_data;
+                    large_data = new char[capacity];
+                }
+
+                size = rhs_size;
+                buf.read(large_data, size);
+            }
+        } else {
+            /// Don't free large_data here.
+            size = rhs_size;
+        }
+    }
+
+    StringRef get_string_ref() const { return StringRef(get_data(), size); }
+
+    /// Assuming to.has()
+    void change_impl(StringRef value, Arena* arena) {
+        Int32 value_size = value.size;
+
+        if (value_size <= MAX_SMALL_STRING_SIZE) {
+            /// Don't free large_data here.
+            size = value_size;
+
+            if (size > 0) {
+                memcpy(small_data, value.data, size);
+            }
+        } else {
+            if (capacity < value_size) {
+                /// Don't free large_data here.
+                capacity = round_up_to_power_of_two_or_zero(value_size);
+                delete large_data;
+                large_data = new char[capacity];
+            }
+
+            size = value_size;
+            memcpy(large_data, value.data, size);
+        }
+    }
+
+    void change(const IColumn& column, size_t row_num, Arena* arena) {
+        change_impl(assert_cast<const ColumnString&>(column).get_data_at(row_num), arena);
+    }
+
+    void change(const Self& to, Arena* arena) { change_impl(to.get_string_ref(), arena); }
+
+    bool change_if_less(const IColumn& column, size_t row_num, Arena* arena) {
+        if (!has() ||
+            assert_cast<const ColumnString&>(column).get_data_at(row_num) < get_string_ref()) {
+            change(column, row_num, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_if_greater(const IColumn& column, size_t row_num, Arena* arena) {
+        if (!has() ||
+            assert_cast<const ColumnString&>(column).get_data_at(row_num) > get_string_ref()) {
+            change(column, row_num, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_first_time(const IColumn& column, size_t row_num, Arena* arena) {
+        if (!has()) {
+            change(column, row_num, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_first_time(const Self& to, Arena* arena) {
+        if (!has() && to.has()) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_every_time(const IColumn& column, size_t row_num, Arena* arena) {
+        change(column, row_num, arena);
+        return true;
+    }
+
+    bool change_every_time(const Self& to, Arena* arena) {
+        if (to.has()) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_if_less(const Self& to, Arena* arena) {
+        if (to.has() && (!has() || to.get_string_ref() < get_string_ref())) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool change_if_greater(const Self& to, Arena* arena) {
+        if (to.has() && (!has() || to.get_string_ref() > get_string_ref())) {
+            change(to, arena);
+            return true;
+        } else
+            return false;
+    }
+
+    bool is_equal_to(const Self& to) const {
+        return has() && to.get_string_ref() == get_string_ref();
+    }
+
+    bool is_equal_to(const IColumn& column, size_t row_num) const { return false; }
+};
+
+template <typename Data>
+struct AggregateFunctionMaxData : Data {
+    using Self = AggregateFunctionMaxData;
+
+    bool change_if_better(const IColumn& column, size_t row_num, Arena* arena) {
+        return this->change_if_greater(column, row_num, arena);
+    }
+    bool change_if_better(const Self& to, Arena* arena) {
+        return this->change_if_greater(to, arena);
+    }
+
+    static const char* name() { return "max"; }
+};
+
+template <typename Data>
+struct AggregateFunctionMinData : Data {
+    using Self = AggregateFunctionMinData;
+
+    bool change_if_better(const IColumn& column, size_t row_num, Arena* arena) {
+        return this->change_if_less(column, row_num, arena);
+    }
+    bool change_if_better(const Self& to, Arena* arena) { return this->change_if_less(to, arena); }
+
+    static const char* name() { return "min"; }
+};
+
+template <typename Data, bool AllocatesMemoryInArena>
+class AggregateFunctionsSingleValue final
+        : public IAggregateFunctionDataHelper<
+                  Data, AggregateFunctionsSingleValue<Data, AllocatesMemoryInArena>> {
+private:
+    DataTypePtr& type;
+
+public:
+    AggregateFunctionsSingleValue(const DataTypePtr& type_)
+            : IAggregateFunctionDataHelper<
+                      Data, AggregateFunctionsSingleValue<Data, AllocatesMemoryInArena>>({type_},
+                                                                                         {}),
+              type(this->argument_types[0]) {
+        if (StringRef(Data::name()) == StringRef("min") ||
+            StringRef(Data::name()) == StringRef("max")) {
+            if (!type->is_comparable()) {
+                LOG(FATAL) << fmt::format(
+                        "Illegal type {} of argument of aggregate function {} because the values "
+                        "of that data type are not comparable",
+                        type->get_name(), get_name());
+            }
+        }
+    }
+
+    String get_name() const override { return Data::name(); }
+
+    DataTypePtr get_return_type() const override { return type; }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena* arena) const override {
+        this->data(place).change_if_better(*columns[0], row_num, arena);
+    }
+
+    void reset(AggregateDataPtr place) const override {
+        this->data(place).reset();
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena* arena) const override {
+        this->data(place).change_if_better(this->data(rhs), arena);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena*) const override {
+        this->data(place).read(buf);
+    }
+
+    bool allocates_memory_in_arena() const override { return AllocatesMemoryInArena; }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        this->data(place).insert_result_into(to);
+    }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+AggregateFunctionPtr create_aggregate_function_max(const std::string& name,
+                                                   const DataTypes& argument_types,
+                                                   const Array& parameters,
+                                                   const bool result_is_nullable);
+
+AggregateFunctionPtr create_aggregate_function_min(const std::string& name,
+                                                   const DataTypes& argument_types,
+                                                   const Array& parameters,
+                                                   const bool result_is_nullable);
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_nothing.h b/be/src/vec/aggregate_functions/aggregate_function_nothing.h
new file mode 100644
index 0000000000..4c7b1933e6
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_nothing.h
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionNothing.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column.h"
+#include "vec/data_types/data_type_nothing.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+/** Aggregate function that takes arbitrary number of arbitrary arguments and does nothing.
+  */
+class AggregateFunctionNothing final : public IAggregateFunctionHelper<AggregateFunctionNothing> {
+public:
+    AggregateFunctionNothing(const DataTypes& arguments, const Array& params)
+            : IAggregateFunctionHelper<AggregateFunctionNothing>(arguments, params) {}
+
+    String get_name() const override { return "nothing"; }
+
+    DataTypePtr get_return_type() const override {
+        return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>());
+    }
+
+    void create(AggregateDataPtr) const override {}
+
+    void destroy(AggregateDataPtr) const noexcept override {}
+
+    bool has_trivial_destructor() const override { return true; }
+
+    size_t size_of_data() const override { return 0; }
+
+    size_t align_of_data() const override { return 1; }
+
+    void add(AggregateDataPtr, const IColumn**, size_t, Arena*) const override {}
+
+    void reset(AggregateDataPtr place) const override {}
+    
+    void merge(AggregateDataPtr, ConstAggregateDataPtr, Arena*) const override {}
+
+    void serialize(ConstAggregateDataPtr, BufferWritable& buf) const override {}
+
+    void deserialize(AggregateDataPtr, BufferReadable& buf, Arena*) const override {}
+
+    void insert_result_into(ConstAggregateDataPtr, IColumn& to) const override {
+        to.insert_default();
+    }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_null.cpp b/be/src/vec/aggregate_functions/aggregate_function_null.cpp
new file mode 100644
index 0000000000..e28287f45e
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_null.cpp
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionNull.cpp
+// and modified by Doris
+
+#include "vec/aggregate_functions/aggregate_function_null.h"
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/aggregate_function_combinator.h"
+#include "vec/aggregate_functions/aggregate_function_count.h"
+#include "vec/aggregate_functions/aggregate_function_nothing.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/data_types/data_type_nullable.h"
+
+namespace doris::vectorized {
+
+class AggregateFunctionCombinatorNull final : public IAggregateFunctionCombinator {
+public:
+    String get_name() const override { return "Null"; }
+
+    bool is_for_internal_usage_only() const override { return true; }
+
+    DataTypes transform_arguments(const DataTypes& arguments) const override {
+        size_t size = arguments.size();
+        DataTypes res(size);
+        for (size_t i = 0; i < size; ++i) res[i] = remove_nullable(arguments[i]);
+        return res;
+    }
+
+    AggregateFunctionPtr transform_aggregate_function(const AggregateFunctionPtr& nested_function,
+                                                      const DataTypes& arguments,
+                                                      const Array& params,
+                                                      const bool result_is_nullable) const override {
+        if (nested_function == nullptr) return nullptr;
+
+        bool has_null_types = false;
+        for (const auto& arg_type : arguments) {
+            if (arg_type->only_null()) {
+                has_null_types = true;
+                break;
+            }
+        }
+
+        if (has_null_types) return std::make_shared<AggregateFunctionNothing>(arguments, params);
+
+        if (arguments.size() == 1) {
+            if (result_is_nullable)
+                return std::make_shared<AggregateFunctionNullUnary<true>>(nested_function,
+                                                                          arguments, params);
+            else
+                return std::make_shared<AggregateFunctionNullUnary<false>>(nested_function,
+                                                                           arguments, params);
+        } else {
+            if (result_is_nullable)
+                return std::make_shared<AggregateFunctionNullVariadic<true>>(nested_function,
+                                                                             arguments, params);
+            else
+                return std::make_shared<AggregateFunctionNullVariadic<false>>(nested_function,
+                                                                              arguments, params);
+        }
+    }
+};
+
+void register_aggregate_function_combinator_null(AggregateFunctionSimpleFactory& factory) {
+    // factory.registerCombinator(std::make_shared<AggregateFunctionCombinatorNull>());
+    AggregateFunctionCreator creator = [&](const std::string& name, const DataTypes& types,
+                                           const Array& params, const bool result_is_nullable) {
+        auto function_combinator = std::make_shared<AggregateFunctionCombinatorNull>();
+        auto transform_arguments = function_combinator->transform_arguments(types);
+        auto nested_function = factory.get(name, transform_arguments, params);
+        return function_combinator->transform_aggregate_function(nested_function, types, params, result_is_nullable);
+    };
+    factory.register_nullable_function_combinator(creator);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_null.h b/be/src/vec/aggregate_functions/aggregate_function_null.h
new file mode 100644
index 0000000000..9458d7dcbf
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_null.h
@@ -0,0 +1,294 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionNull.h
+// and modified by Doris
+
+#pragma once
+
+#include <array>
+
+#include "common/logging.h"
+#include "common/status.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/assert_cast.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+/// This class implements a wrapper around an aggregate function. Despite its name,
+/// this is an adapter. It is used to handle aggregate functions that are called with
+/// at least one nullable argument. It implements the logic according to which any
+/// row that contains at least one NULL is skipped.
+
+/// If all rows had NULL, the behaviour is determined by "result_is_nullable" template parameter.
+///  true - return NULL; false - return value from empty aggregation state of nested function.
+
+template <bool result_is_nullable, typename Derived>
+class AggregateFunctionNullBase : public IAggregateFunctionHelper<Derived> {
+protected:
+    AggregateFunctionPtr nested_function;
+    size_t prefix_size;
+
+    /** In addition to data for nested aggregate function, we keep a flag
+      *  indicating - was there at least one non-NULL value accumulated.
+      * In case of no not-NULL values, the function will return NULL.
+      *
+      * We use prefix_size bytes for flag to satisfy the alignment requirement of nested state.
+      */
+
+    AggregateDataPtr nested_place(AggregateDataPtr __restrict place) const noexcept {
+        return place + prefix_size;
+    }
+
+    ConstAggregateDataPtr nested_place(ConstAggregateDataPtr __restrict place) const noexcept {
+        return place + prefix_size;
+    }
+
+    static void init_flag(AggregateDataPtr __restrict place) noexcept {
+        if constexpr (result_is_nullable)
+            place[0] = 0;
+    }
+
+    static void set_flag(AggregateDataPtr __restrict place) noexcept {
+        if constexpr (result_is_nullable)
+            place[0] = 1;
+    }
+
+    static bool get_flag(ConstAggregateDataPtr __restrict place) noexcept {
+        return result_is_nullable ? place[0] : 1;
+    }
+
+public:
+    AggregateFunctionNullBase(AggregateFunctionPtr nested_function_, const DataTypes& arguments,
+                              const Array& params)
+            : IAggregateFunctionHelper<Derived>(arguments, params),
+              nested_function {nested_function_} {
+        if (result_is_nullable)
+            prefix_size = nested_function->align_of_data();
+        else
+            prefix_size = 0;
+    }
+
+    String get_name() const override {
+        /// This is just a wrapper. The function for Nullable arguments is named the same as the nested function itself.
+        return nested_function->get_name();
+    }
+
+    DataTypePtr get_return_type() const override {
+        return result_is_nullable ? make_nullable(nested_function->get_return_type())
+                                  : nested_function->get_return_type();
+    }
+
+    void create(AggregateDataPtr __restrict place) const override {
+        init_flag(place);
+        nested_function->create(nested_place(place));
+    }
+
+    void destroy(AggregateDataPtr __restrict place) const noexcept override {
+        nested_function->destroy(nested_place(place));
+    }
+    void reset(AggregateDataPtr place) const override {
+        init_flag(place);
+        nested_function->reset(nested_place(place));
+    }
+
+    bool has_trivial_destructor() const override {
+        return nested_function->has_trivial_destructor();
+    }
+
+    size_t size_of_data() const override { return prefix_size + nested_function->size_of_data(); }
+
+    size_t align_of_data() const override { return nested_function->align_of_data(); }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena* arena) const override {
+        if (result_is_nullable && get_flag(rhs)) set_flag(place);
+
+        nested_function->merge(nested_place(place), nested_place(rhs), arena);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        bool flag = get_flag(place);
+        if (result_is_nullable) write_binary(flag, buf);
+        if (flag) {
+            nested_function->serialize(nested_place(place), buf);
+        }
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena* arena) const override {
+        bool flag = true;
+        if (result_is_nullable) read_binary(flag, buf);
+        if (flag) {
+            set_flag(place);
+            nested_function->deserialize(nested_place(place), buf, arena);
+        }
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        if constexpr (result_is_nullable) {
+            ColumnNullable& to_concrete = assert_cast<ColumnNullable&>(to);
+            if (get_flag(place)) {
+                if (nested_function->insert_to_null_default()) {
+                    nested_function->insert_result_into(nested_place(place), to_concrete.get_nested_column());
+                    to_concrete.get_null_map_data().push_back(0);
+                } else {
+                    nested_function->insert_result_into(nested_place(place), to);  //want to insert into null value by self
+                }
+            } else {
+                to_concrete.insert_default();
+            }
+        } else {
+            nested_function->insert_result_into(nested_place(place), to);
+        }
+    }
+
+    bool allocates_memory_in_arena() const override {
+        return nested_function->allocates_memory_in_arena();
+    }
+
+    bool is_state() const override { return nested_function->is_state(); }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+/** There are two cases: for single argument and variadic.
+  * Code for single argument is much more efficient.
+  */
+template <bool result_is_nullable>
+class AggregateFunctionNullUnary final
+        : public AggregateFunctionNullBase<result_is_nullable,
+                                           AggregateFunctionNullUnary<result_is_nullable>> {
+public:
+    AggregateFunctionNullUnary(AggregateFunctionPtr nested_function_, const DataTypes& arguments,
+                               const Array& params)
+            : AggregateFunctionNullBase<result_is_nullable,
+                                        AggregateFunctionNullUnary<result_is_nullable>>(
+                      std::move(nested_function_), arguments, params) {}
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena* arena) const override {
+        const ColumnNullable* column = assert_cast<const ColumnNullable*>(columns[0]);
+        if (!column->is_null_at(row_num)) {
+            this->set_flag(place);
+            const IColumn* nested_column = &column->get_nested_column();
+            this->nested_function->add(this->nested_place(place), &nested_column, row_num, arena);
+        }
+    }
+
+    void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns,
+                                Arena* arena) const override {
+        const ColumnNullable* column = assert_cast<const ColumnNullable*>(columns[0]);
+        bool has_null = column->has_null();
+
+        if (has_null) {
+            for (size_t i = 0; i < batch_size; ++i) {
+                if (!column->is_null_at(i)) {
+                    this->set_flag(place);
+                    this->add(place, columns, i, arena);
+                }
+            }
+        } else {
+            this->set_flag(place);
+            const IColumn* nested_column = &column->get_nested_column();
+            this->nested_function->add_batch_single_place(batch_size, this->nested_place(place),
+                                                          &nested_column, arena);
+        }
+    }
+
+    void add_batch_range(size_t batch_begin, size_t batch_end, AggregateDataPtr place,
+                         const IColumn** columns, Arena* arena, bool has_null) override {
+        const ColumnNullable* column = assert_cast<const ColumnNullable*>(columns[0]);
+
+        if (has_null) {
+            for (size_t i = batch_begin; i <= batch_end; ++i) {
+                if (!column->is_null_at(i)) {
+                    this->set_flag(place);
+                    this->add(place, columns, i, arena);
+                }
+            }
+        } else {
+            this->set_flag(place);
+            const IColumn* nested_column = &column->get_nested_column();
+            this->nested_function->add_batch_range(
+                    batch_begin, batch_end, this->nested_place(place), &nested_column, arena);
+        }
+    }
+};
+
+template <bool result_is_nullable>
+class AggregateFunctionNullVariadic final
+        : public AggregateFunctionNullBase<result_is_nullable,
+                                           AggregateFunctionNullVariadic<result_is_nullable>> {
+public:
+    AggregateFunctionNullVariadic(AggregateFunctionPtr nested_function_, const DataTypes& arguments,
+                                  const Array& params)
+            : AggregateFunctionNullBase<result_is_nullable,
+                                        AggregateFunctionNullVariadic<result_is_nullable>>(
+                      std::move(nested_function_), arguments, params),
+              number_of_arguments(arguments.size()) {
+        if (number_of_arguments == 1) {
+            LOG(FATAL)
+                    << "Logical error: single argument is passed to AggregateFunctionNullVariadic";
+        }
+
+        if (number_of_arguments > MAX_ARGS) {
+            LOG(FATAL) << fmt::format(
+                    "Maximum number of arguments for aggregate function with Nullable types is {}",
+                    size_t(MAX_ARGS));
+        }
+
+        for (size_t i = 0; i < number_of_arguments; ++i)
+            is_nullable[i] = arguments[i]->is_nullable();
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena* arena) const override {
+        /// This container stores the columns we really pass to the nested function.
+        const IColumn* nested_columns[number_of_arguments];
+
+        for (size_t i = 0; i < number_of_arguments; ++i) {
+            if (is_nullable[i]) {
+                const ColumnNullable& nullable_col =
+                        assert_cast<const ColumnNullable&>(*columns[i]);
+                if (nullable_col.is_null_at(row_num)) {
+                    /// If at least one column has a null value in the current row,
+                    /// we don't process this row.
+                    return;
+                }
+                nested_columns[i] = &nullable_col.get_nested_column();
+            } else
+                nested_columns[i] = columns[i];
+        }
+
+        this->set_flag(place);
+        this->nested_function->add(this->nested_place(place), nested_columns, row_num, arena);
+    }
+
+    bool allocates_memory_in_arena() const override {
+        return this->nested_function->allocates_memory_in_arena();
+    }
+
+private:
+    enum { MAX_ARGS = 8 };
+    size_t number_of_arguments = 0;
+    std::array<char, MAX_ARGS>
+            is_nullable; /// Plain array is better than std::vector due to one indirection less.
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_reader.cpp b/be/src/vec/aggregate_functions/aggregate_function_reader.cpp
new file mode 100644
index 0000000000..3594d514d3
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_reader.cpp
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/aggregate_functions/aggregate_function_reader.h"
+
+namespace doris::vectorized {
+
+// auto spread at nullable condition, null value do not participate aggregate
+void register_aggregate_function_reader(AggregateFunctionSimpleFactory& factory) {
+    // add a suffix to the function name here to distinguish special functions of agg reader
+    auto register_function_reader = [&](const std::string& name,
+                                        const AggregateFunctionCreator& creator) {
+        factory.register_function(name + agg_reader_suffix, creator, false);
+    };
+
+    register_function_reader("sum", create_aggregate_function_sum_reader);
+    register_function_reader("max", create_aggregate_function_max);
+    register_function_reader("min", create_aggregate_function_min);
+    register_function_reader("replace_if_not_null", create_aggregate_function_replace_if_not_null);
+    register_function_reader("bitmap_union", create_aggregate_function_bitmap_union);
+    register_function_reader("hll_union", create_aggregate_function_HLL_union);
+}
+
+void register_aggregate_function_reader_no_spread(AggregateFunctionSimpleFactory& factory) {
+    auto register_function_reader = [&](const std::string& name,
+                                        const AggregateFunctionCreator& creator, bool nullable) {
+        factory.register_function(name + agg_reader_suffix, creator, nullable);
+    };
+
+    register_function_reader("replace", create_aggregate_function_replace, false);
+    register_function_reader("replace", create_aggregate_function_replace_nullable, true);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_reader.h b/be/src/vec/aggregate_functions/aggregate_function_reader.h
new file mode 100644
index 0000000000..f44be5ee57
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_reader.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/aggregate_functions/aggregate_function_bitmap.h"
+#include "vec/aggregate_functions/aggregate_function_hll_union_agg.h"
+#include "vec/aggregate_functions/aggregate_function_min_max.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/aggregate_function_sum.h"
+#include "vec/aggregate_functions/aggregate_function_window.h"
+
+namespace doris::vectorized {
+
+static const std::string agg_reader_suffix = "_reader";
+
+void register_aggregate_function_reader(AggregateFunctionSimpleFactory& factory);
+
+void register_aggregate_function_reader_no_spread(AggregateFunctionSimpleFactory& factory);
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
new file mode 100644
index 0000000000..ba1b2ba98e
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionFactory.cpp
+// and modified by Doris
+
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+
+#include "vec/aggregate_functions/aggregate_function_reader.h"
+
+namespace doris::vectorized {
+class AggregateFunctionSimpleFactory;
+void register_aggregate_function_sum(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_combinator_null(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_minmax(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_avg(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_count(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_HLL_union_agg(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_uniq(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_combinator_distinct(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_bitmap(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_window_rank(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_window_lead_lag(AggregateFunctionSimpleFactory& factory);
+void register_aggregate_function_stddev_variance(AggregateFunctionSimpleFactory& factory);
+AggregateFunctionSimpleFactory& AggregateFunctionSimpleFactory::instance() {
+    static std::once_flag oc;
+    static AggregateFunctionSimpleFactory instance;
+    std::call_once(oc, [&]() {
+        register_aggregate_function_sum(instance);
+        register_aggregate_function_minmax(instance);
+        register_aggregate_function_avg(instance);
+        register_aggregate_function_count(instance);
+        register_aggregate_function_uniq(instance);
+        register_aggregate_function_bitmap(instance);
+        register_aggregate_function_combinator_distinct(instance);
+        register_aggregate_function_HLL_union_agg(instance);
+        register_aggregate_function_reader(instance); // register aggregate function for agg reader
+        register_aggregate_function_window_rank(instance);
+        register_aggregate_function_stddev_variance(instance);
+        
+        // if you only register function with no nullable, and wants to add nullable automatically, you should place function above this line
+        register_aggregate_function_combinator_null(instance);
+        
+        register_aggregate_function_reader_no_spread(instance);
+        register_aggregate_function_window_lead_lag(instance);
+    });
+    return instance;
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.h b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.h
new file mode 100644
index 0000000000..1bac4f1e7f
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.h
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionFactory.h
+// and modified by Doris
+
+#pragma once
+
+#include <functional>
+#include <iostream>
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/core/field.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+using DataTypePtr = std::shared_ptr<const IDataType>;
+using DataTypes = std::vector<DataTypePtr>;
+using AggregateFunctionCreator = std::function<AggregateFunctionPtr(
+        const std::string&, const DataTypes&, const Array&, const bool)>;
+
+class AggregateFunctionSimpleFactory {
+public:
+    using Creator = AggregateFunctionCreator;
+
+private:
+    using AggregateFunctions = std::unordered_map<std::string, Creator>;
+
+    AggregateFunctions aggregate_functions;
+    AggregateFunctions nullable_aggregate_functions;
+    std::unordered_map<std::string, std::string> function_alias;
+public:
+    void register_nullable_function_combinator(const Creator& creator) {
+        for (const auto& entity : aggregate_functions) {
+            if (nullable_aggregate_functions.find(entity.first) ==
+                nullable_aggregate_functions.end()) {
+                nullable_aggregate_functions[entity.first] = creator;
+            }
+        }
+    }
+
+    void register_distinct_function_combinator(const Creator& creator, const std::string& prefix) {
+        std::vector<std::string> need_insert;
+        for (const auto& entity : aggregate_functions) {
+            std::string target_value = prefix + entity.first;
+            if (aggregate_functions.find(target_value) == aggregate_functions.end()) {
+                need_insert.emplace_back(std::move(target_value));
+            }
+        }
+        for (const auto& function_name : need_insert) {
+            aggregate_functions[function_name] = creator;
+        }
+    }
+
+    AggregateFunctionPtr get(const std::string& name, const DataTypes& argument_types,
+                             const Array& parameters, const bool result_is_nullable = false) {
+        bool nullable = false;
+        for (const auto& type : argument_types) {
+            if (type->is_nullable()) {
+                nullable = true;
+            }
+        }
+
+        std::string name_str = name;
+        if (function_alias.count(name)) {
+            name_str = function_alias[name];
+        }
+
+        if (nullable) {
+            return nullable_aggregate_functions.find(name_str) == nullable_aggregate_functions.end()
+                           ? nullptr
+                           : nullable_aggregate_functions[name_str](name_str, argument_types, parameters,
+                                                                result_is_nullable);
+        } else {
+            return aggregate_functions.find(name_str) == aggregate_functions.end()
+                           ? nullptr
+                           : aggregate_functions[name_str](name_str, argument_types, parameters,
+                                                       result_is_nullable);
+        }
+    }
+
+    void register_function(const std::string& name, const Creator& creator, bool nullable = false) {
+        if (nullable) {
+            nullable_aggregate_functions[name] = creator;
+        } else {
+            aggregate_functions[name] = creator;
+        }
+    }
+
+    void register_alias(const std::string& name, const std::string& alias) {
+        function_alias[alias] = name;
+    }
+
+public:
+    static AggregateFunctionSimpleFactory& instance();
+};
+}; // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_stddev.cpp b/be/src/vec/aggregate_functions/aggregate_function_stddev.cpp
new file mode 100644
index 0000000000..f1794d6149
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_stddev.cpp
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/aggregate_functions/aggregate_function_stddev.h"
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/factory_helpers.h"
+#include "vec/aggregate_functions/helpers.h"
+namespace doris::vectorized {
+
+template <template <typename> class AggregateFunctionTemplate, template <typename> class NameData,
+          template <typename, typename> class Data, bool is_stddev>
+static IAggregateFunction* create_function_single_value(const String& name,
+                                                        const DataTypes& argument_types,
+                                                        const Array& parameters) {
+    auto type = argument_types[0].get();
+    if (type->is_nullable()) {
+        type = assert_cast<const DataTypeNullable*>(type)->get_nested_type().get();
+    }
+    WhichDataType which(*type);
+
+#define DISPATCH(TYPE)                                                                         \
+    if (which.idx == TypeIndex::TYPE)                                                          \
+        return new AggregateFunctionTemplate<NameData<Data<TYPE, BaseData<TYPE, is_stddev>>>>( \
+                argument_types);
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.is_decimal()) {
+        return new AggregateFunctionTemplate<
+                NameData<Data<Decimal128, BaseDatadecimal<is_stddev>>>>(argument_types);
+    }
+    DCHECK(false) << "with unknowed type, failed in  create_aggregate_function_stddev_variance";
+    return nullptr;
+}
+
+template <bool is_stddev>
+AggregateFunctionPtr create_aggregate_function_variance_samp(const std::string& name,
+                                                             const DataTypes& argument_types,
+                                                             const Array& parameters,
+                                                             const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_function_single_value<AggregateFunctionStddevSamp, VarianceSampData, SampData,
+                                         is_stddev>(name, argument_types, parameters));
+}
+
+template <bool is_stddev>
+AggregateFunctionPtr create_aggregate_function_stddev_samp(const std::string& name,
+                                                           const DataTypes& argument_types,
+                                                           const Array& parameters,
+                                                           const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_function_single_value<AggregateFunctionStddevSamp, StddevSampData, SampData,
+                                         is_stddev>(name, argument_types, parameters));
+}
+
+template <bool is_stddev>
+AggregateFunctionPtr create_aggregate_function_variance_pop(const std::string& name,
+                                                            const DataTypes& argument_types,
+                                                            const Array& parameters,
+                                                            const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_function_single_value<AggregateFunctionStddevSamp, VarianceData, PopData,
+                                         is_stddev>(name, argument_types, parameters));
+}
+
+template <bool is_stddev>
+AggregateFunctionPtr create_aggregate_function_stddev_pop(const std::string& name,
+                                                          const DataTypes& argument_types,
+                                                          const Array& parameters,
+                                                          const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_function_single_value<AggregateFunctionStddevSamp, StddevData, PopData,
+                                         is_stddev>(name, argument_types, parameters));
+}
+
+void register_aggregate_function_stddev_variance(AggregateFunctionSimpleFactory& factory) {
+    factory.register_function("variance_samp", create_aggregate_function_variance_samp<false>);
+    factory.register_function("variance", create_aggregate_function_variance_pop<false>);
+    factory.register_alias("variance_samp", "var_samp");
+    factory.register_alias("variance", "var_pop");
+    factory.register_alias("variance", "variance_pop");
+    factory.register_function("stddev_samp", create_aggregate_function_stddev_samp<true>);
+    factory.register_function("stddev", create_aggregate_function_stddev_pop<true>);
+    factory.register_alias("stddev", "stddev_pop");
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_stddev.h b/be/src/vec/aggregate_functions/aggregate_function_stddev.h
new file mode 100644
index 0000000000..6cdba20032
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_stddev.h
@@ -0,0 +1,285 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "common/status.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/columns_number.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/io/io_helper.h"
+namespace doris::vectorized {
+
+template <typename T, bool is_stddev>
+struct BaseData {
+    BaseData() : mean(0.0), m2(0.0), count(0) {}
+
+    void write(BufferWritable& buf) const {
+        write_binary(mean, buf);
+        write_binary(m2, buf);
+        write_binary(count, buf);
+    }
+
+    void read(BufferReadable& buf) {
+        read_binary(mean, buf);
+        read_binary(m2, buf);
+        read_binary(count, buf);
+    }
+
+    void reset() {
+        mean = 0.0;
+        m2 = 0.0;
+        count = 0;
+    }
+
+    double get_result(double res) const {
+        if constexpr (is_stddev) {
+            return std::sqrt(res);
+        } else {
+            return res;
+        }
+    }
+
+    double get_pop_result() const {
+        if (count == 1) {
+            return 0.0;
+        }
+        double res = m2 / count;
+        return get_result(res);
+    }
+
+    double get_samp_result() const {
+        double res = m2 / (count - 1);
+        return get_result(res);
+    }
+
+    static const DataTypePtr get_return_type() {
+        return make_nullable(std::make_shared<DataTypeNumber<Float64>>());
+    }
+
+    void merge(const BaseData& rhs) {
+        if (rhs.count == 0) {
+            return;
+        }
+        double delta = mean - rhs.mean;
+        double sum_count = count + rhs.count;
+        mean = rhs.mean + delta * count / sum_count;
+        m2 = rhs.m2 + m2 + (delta * delta) * rhs.count * count / sum_count;
+        count = sum_count;
+    }
+
+    void add(const IColumn** columns, size_t row_num) {
+        const auto& sources = static_cast<const ColumnVector<T>&>(*columns[0]);
+        double source_data = sources.get_data()[row_num];
+
+        double delta = source_data - mean;
+        double r = delta / (1 + count);
+        mean += r;
+        m2 += count * delta * r;
+        count += 1;
+    }
+
+    double mean;
+    double m2;
+    int64_t count;
+};
+
+template <bool is_stddev>
+struct BaseDatadecimal {
+    BaseDatadecimal() : mean(0), m2(0), count(0) {}
+
+    void write(BufferWritable& buf) const {
+        write_binary(mean, buf);
+        write_binary(m2, buf);
+        write_binary(count, buf);
+    }
+
+    void read(BufferReadable& buf) {
+        read_binary(mean, buf);
+        read_binary(m2, buf);
+        read_binary(count, buf);
+    }
+
+    void reset() {
+        mean = DecimalV2Value();
+        m2 = DecimalV2Value();
+        count = {};
+    }
+
+    DecimalV2Value get_result(DecimalV2Value res) const {
+        if constexpr (is_stddev) {
+            return DecimalV2Value::sqrt(res);
+        } else {
+            return res;
+        }
+    }
+
+    DecimalV2Value get_pop_result() const {
+        DecimalV2Value new_count = DecimalV2Value();
+        if (count == 1) {
+            return new_count;
+        }
+        DecimalV2Value res = m2 / new_count.assign_from_double(count);
+        return get_result(res);
+    }
+
+    DecimalV2Value get_samp_result() const {
+        DecimalV2Value new_count = DecimalV2Value();
+        DecimalV2Value res = m2 / new_count.assign_from_double(count - 1);
+        return get_result(res);
+    }
+
+    static const DataTypePtr get_return_type() {
+        return make_nullable(std::make_shared<DataTypeDecimal<Decimal128>>(27, 9));
+    }
+
+    void merge(const BaseDatadecimal& rhs) {
+        if (rhs.count == 0) {
+            return;
+        }
+        DecimalV2Value new_count = DecimalV2Value();
+        new_count.assign_from_double(count);
+        DecimalV2Value rhs_count = DecimalV2Value();
+        rhs_count.assign_from_double(rhs.count);
+
+        DecimalV2Value delta = mean - rhs.mean;
+        DecimalV2Value sum_count = new_count + rhs_count;
+        mean = rhs.mean + delta * (new_count / sum_count);
+        m2 = rhs.m2 + m2 + (delta * delta) * (rhs_count * new_count / sum_count);
+        count += rhs.count;
+    }
+
+    void add(const IColumn** columns, size_t row_num) {
+        DecimalV2Value source_data = DecimalV2Value();
+        const auto& sources = static_cast<const ColumnDecimal<Decimal128>&>(*columns[0]);
+        source_data = (DecimalV2Value)sources.get_data()[row_num];
+
+        DecimalV2Value new_count = DecimalV2Value();
+        new_count.assign_from_double(count);
+        DecimalV2Value increase_count = DecimalV2Value();
+        increase_count.assign_from_double(1 + count);
+
+        DecimalV2Value delta = source_data - mean;
+        DecimalV2Value r = delta / increase_count;
+        mean += r;
+        m2 += new_count * delta * r;
+        count += 1;
+    }
+
+    DecimalV2Value mean;
+    DecimalV2Value m2;
+    int64_t count;
+};
+
+template <typename T, typename Data>
+struct PopData : Data {
+    using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128>, ColumnVector<Float64>>;
+    void insert_result_into(IColumn& to) const {
+        ColumnNullable& nullable_column = assert_cast<ColumnNullable&>(to);
+        auto& col = static_cast<ColVecResult&>(nullable_column.get_nested_column());
+        if constexpr (IsDecimalNumber<T>) {
+            col.get_data().push_back(this->get_pop_result().value());
+        } else {
+            col.get_data().push_back(this->get_pop_result());
+        }
+        nullable_column.get_null_map_data().push_back(0);
+    }
+};
+
+template <typename T, typename Data>
+struct SampData : Data {
+    using ColVecResult = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<Decimal128>, ColumnVector<Float64>>;
+    void insert_result_into(IColumn& to) const {
+        ColumnNullable& nullable_column = assert_cast<ColumnNullable&>(to);
+        if (this->count == 1) {
+            nullable_column.insert_default();
+        } else {
+            auto& col = static_cast<ColVecResult&>(nullable_column.get_nested_column());
+            if constexpr (IsDecimalNumber<T>) {
+                col.get_data().push_back(this->get_samp_result().value());
+            } else {
+                col.get_data().push_back(this->get_samp_result());
+            }
+            nullable_column.get_null_map_data().push_back(0);
+        }
+    }
+};
+
+template <typename Data>
+struct StddevData : Data {
+    static const char* name() { return "stddev"; }
+};
+
+template <typename Data>
+struct VarianceData : Data {
+    static const char* name() { return "variance"; }
+};
+
+template <typename Data>
+struct VarianceSampData : Data {
+    static const char* name() { return "variance_samp"; }
+};
+
+template <typename Data>
+struct StddevSampData : Data {
+    static const char* name() { return "stddev_samp"; }
+};
+
+template <typename Data>
+class AggregateFunctionStddevSamp final
+        : public IAggregateFunctionDataHelper<Data, AggregateFunctionStddevSamp<Data>> {
+public:
+    AggregateFunctionStddevSamp(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<Data, AggregateFunctionStddevSamp<Data>>(argument_types_,
+                                                                                    {}) {}
+
+    String get_name() const override { return Data::name(); }
+
+    bool insert_to_null_default() const override { return false; }
+
+    DataTypePtr get_return_type() const override { return Data::get_return_type(); }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena*) const override {
+        this->data(place).add(columns, row_num);
+    }
+
+    void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs,
+               Arena*) const override {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf,
+                     Arena*) const override {
+        this->data(place).read(buf);
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        this->data(place).insert_result_into(to);
+    }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_sum.cpp b/be/src/vec/aggregate_functions/aggregate_function_sum.cpp
new file mode 100644
index 0000000000..f7127e7b63
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_sum.cpp
@@ -0,0 +1,88 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionSum.cpp
+// and modified by Doris
+
+#include "vec/aggregate_functions/aggregate_function_sum.h"
+
+#include <fmt/format.h>
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/helpers.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+struct SumSimple {
+    /// @note It uses slow Decimal128 (cause we need such a variant). sumWithOverflow is faster for Decimal32/64
+    using ResultType = std::conditional_t<IsDecimalNumber<T>, Decimal128, NearestFieldType<T>>;
+    // using ResultType = NearestFieldType<T>;
+    using AggregateDataType = AggregateFunctionSumData<ResultType>;
+    using Function = AggregateFunctionSum<T, ResultType, AggregateDataType>;
+};
+
+template <typename T>
+using AggregateFunctionSumSimple = typename SumSimple<T>::Function;
+
+template <template <typename> class Function>
+AggregateFunctionPtr create_aggregate_function_sum(const std::string& name,
+                                                   const DataTypes& argument_types,
+                                                   const Array& parameters,
+                                                   const bool result_is_nullable) {
+    // assert_no_parameters(name, parameters);
+    // assert_unary(name, argument_types);
+
+    AggregateFunctionPtr res;
+    DataTypePtr data_type = argument_types[0];
+    if (is_decimal(data_type))
+        res.reset(create_with_decimal_type<Function>(*data_type, *data_type, argument_types));
+    else
+        res.reset(create_with_numeric_type<Function>(*data_type, argument_types));
+
+    if (!res) {
+        LOG(WARNING) << fmt::format("Illegal type {} of argument for aggregate function {}",
+                                    argument_types[0]->get_name(), name);
+    }
+    return res;
+}
+
+// do not level up return type for agg reader
+template <typename T>
+struct SumSimpleReader {
+    using ResultType = T;
+    using AggregateDataType = AggregateFunctionSumData<ResultType>;
+    using Function = AggregateFunctionSum<T, ResultType, AggregateDataType>;
+};
+
+template <typename T>
+using AggregateFunctionSumSimpleReader = typename SumSimpleReader<T>::Function;
+
+AggregateFunctionPtr create_aggregate_function_sum_reader(const std::string& name,
+                                                          const DataTypes& argument_types,
+                                                          const Array& parameters,
+                                                          const bool result_is_nullable) {
+    return create_aggregate_function_sum<AggregateFunctionSumSimpleReader>(
+            name, argument_types, parameters, result_is_nullable);
+}
+
+void register_aggregate_function_sum(AggregateFunctionSimpleFactory& factory) {
+    factory.register_function("sum", create_aggregate_function_sum<AggregateFunctionSumSimple>);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_sum.h b/be/src/vec/aggregate_functions/aggregate_function_sum.h
new file mode 100644
index 0000000000..402af36354
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_sum.h
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionSum.h
+// and modified by Doris
+
+#pragma once
+
+#include <istream>
+#include <ostream>
+#include <type_traits>
+
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+struct AggregateFunctionSumData {
+    T sum {};
+
+    void add(T value) { sum += value; }
+
+    void merge(const AggregateFunctionSumData& rhs) { sum += rhs.sum; }
+
+    void write(BufferWritable& buf) const { write_binary(sum, buf); }
+
+    void read(BufferReadable& buf) { read_binary(sum, buf); }
+
+    T get() const { return sum; }
+};
+
+/// Counts the sum of the numbers.
+template <typename T, typename TResult, typename Data>
+class AggregateFunctionSum final
+        : public IAggregateFunctionDataHelper<Data, AggregateFunctionSum<T, TResult, Data>> {
+public:
+    using ResultDataType = std::conditional_t<IsDecimalNumber<T>, DataTypeDecimal<TResult>,
+                                              DataTypeNumber<TResult>>;
+    using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
+    using ColVecResult =
+            std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<TResult>, ColumnVector<TResult>>;
+
+    String get_name() const override { return "sum"; }
+
+    AggregateFunctionSum(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<Data, AggregateFunctionSum<T, TResult, Data>>(
+                      argument_types_, {}),
+              scale(0) {}
+
+    AggregateFunctionSum(const IDataType& data_type, const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<Data, AggregateFunctionSum<T, TResult, Data>>(
+                      argument_types_, {}),
+              scale(get_decimal_scale(data_type)) {}
+
+    DataTypePtr get_return_type() const override {
+        if constexpr (IsDecimalNumber<T>)
+            return std::make_shared<ResultDataType>(ResultDataType::max_precision(), scale);
+        else
+            return std::make_shared<ResultDataType>();
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena*) const override {
+        const auto& column = static_cast<const ColVecType&>(*columns[0]);
+        this->data(place).add(column.get_data()[row_num]);
+    }
+    
+    void reset(AggregateDataPtr place) const override {
+        this->data(place).sum = {};
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena*) const override {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        this->data(place).write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena*) const override {
+        this->data(place).read(buf);
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        auto& column = static_cast<ColVecResult&>(to);
+        column.get_data().push_back(this->data(place).get());
+    }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+
+private:
+    UInt32 scale;
+};
+
+AggregateFunctionPtr create_aggregate_function_sum_reader(const std::string& name,
+                                                   const DataTypes& argument_types,
+                                                   const Array& parameters,
+                                                   const bool result_is_nullable);
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp b/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
new file mode 100644
index 0000000000..0258858655
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.cpp
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionUniq.cpp
+// and modified by Doris
+
+#include "vec/aggregate_functions/aggregate_function_uniq.h"
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/factory_helpers.h"
+#include "vec/aggregate_functions/helpers.h"
+#include "vec/data_types/data_type_string.h"
+
+namespace doris::vectorized {
+
+template <template <typename> class Data, typename DataForVariadic>
+AggregateFunctionPtr create_aggregate_function_uniq(const std::string& name,
+                                                    const DataTypes& argument_types,
+                                                    const Array& params,
+                                                    const bool result_is_nullable) {
+    assert_no_parameters(name, params);
+
+    if (argument_types.empty()) {
+        LOG(WARNING) << "Incorrect number of arguments for aggregate function " << name;
+        return nullptr;
+    }
+
+    if (argument_types.size() == 1) {
+        const IDataType& argument_type = *argument_types[0];
+
+        AggregateFunctionPtr res(create_with_numeric_type<AggregateFunctionUniq, Data>(
+                *argument_types[0], argument_types));
+
+        WhichDataType which(argument_type);
+        // TODO: DateType
+        if (res)
+            return res;
+        else if (which.is_decimal())
+            return std::make_shared<AggregateFunctionUniq<Decimal128, Data<Int128>>>(argument_types);
+        else if (which.is_string_or_fixed_string())
+            return std::make_shared<AggregateFunctionUniq<String, Data<String>>>(argument_types);
+    }
+
+    return nullptr;
+}
+
+void register_aggregate_function_uniq(AggregateFunctionSimpleFactory& factory) {
+    AggregateFunctionCreator creator =
+            create_aggregate_function_uniq<AggregateFunctionUniqExactData,
+                                           AggregateFunctionUniqExactData<String>>;
+    factory.register_function("multi_distinct_count", creator);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.h b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
new file mode 100644
index 0000000000..ea3240479f
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.h
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/AggregateFunctionUniq.h
+// and modified by Doris
+
+#pragma once
+
+#include <type_traits>
+
+#include "gutil/hash/city.h"
+
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_decimal.h"
+#include "vec/common/aggregation_common.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/bit_cast.h"
+#include "vec/common/hash_table/hash_set.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/data_types/data_type_number.h"
+
+namespace doris::vectorized {
+
+/// uniqExact
+
+template <typename T>
+struct AggregateFunctionUniqExactData {
+    using Key = T;
+
+    /// When creating, the hash table must be small.
+    using Set = HashSet<Key, HashCRC32<Key>, HashTableGrower<4>,
+                        HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 4)>>;
+
+    Set set;
+
+    static String get_name() { return "uniqExact"; }
+};
+
+/// For rows, we put the SipHash values (128 bits) into the hash table.
+template <>
+struct AggregateFunctionUniqExactData<String> {
+    using Key = UInt128;
+
+    /// When creating, the hash table must be small.
+    using Set = HashSet<Key, UInt128TrivialHash, HashTableGrower<3>,
+                        HashTableAllocatorWithStackMemory<sizeof(Key) * (1 << 3)>>;
+
+    Set set;
+
+    static String get_name() { return "uniqExact"; }
+};
+
+namespace detail {
+
+/** The structure for the delegation work to add one element to the `uniq` aggregate functions.
+  * Used for partial specialization to add strings.
+  */
+template <typename T, typename Data>
+struct OneAdder {
+    static void ALWAYS_INLINE add(Data& data, const IColumn& column, size_t row_num) {
+        if constexpr (std::is_same_v<T, String>) {
+            StringRef value = column.get_data_at(row_num);
+
+            UInt128 key;
+            SipHash hash;
+            hash.update(value.data, value.size);
+            hash.get128(key.low, key.high);
+
+            data.set.insert(key);
+        } else if constexpr(std::is_same_v<T, Decimal128>) {
+            data.set.insert(assert_cast<const ColumnDecimal<Decimal128>&>(column).get_data()[row_num]);
+        } else {
+            data.set.insert(assert_cast<const ColumnVector<T>&>(column).get_data()[row_num]);
+        }
+    }
+};
+
+} // namespace detail
+
+/// Calculates the number of different values approximately or exactly.
+template <typename T, typename Data>
+class AggregateFunctionUniq final
+        : public IAggregateFunctionDataHelper<Data, AggregateFunctionUniq<T, Data>> {
+public:
+    AggregateFunctionUniq(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper<Data, AggregateFunctionUniq<T, Data>>(argument_types_,
+                                                                                 {}) {}
+
+    String get_name() const override { return Data::get_name(); }
+
+    DataTypePtr get_return_type() const override { return std::make_shared<DataTypeInt64>(); }
+
+    void add(AggregateDataPtr __restrict place, const IColumn** columns, size_t row_num,
+             Arena*) const override {
+        detail::OneAdder<T, Data>::add(this->data(place), *columns[0], row_num);
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena*) const override {
+        this->data(place).set.merge(this->data(rhs).set);
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override {
+        this->data(place).set.write(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, Arena*) const override {
+        this->data(place).set.read(buf);
+    }
+
+    void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override {
+        assert_cast<ColumnInt64&>(to).get_data().push_back(this->data(place).set.size());
+    }
+
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.cpp b/be/src/vec/aggregate_functions/aggregate_function_window.cpp
new file mode 100644
index 0000000000..c40f2bce53
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_window.cpp
@@ -0,0 +1,176 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/Transforms/WindowTransform.cpp
+// and modified by Doris
+
+#include "vec/aggregate_functions/aggregate_function_window.h"
+
+#include "common/logging.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/aggregate_functions/factory_helpers.h"
+#include "vec/aggregate_functions/helpers.h"
+namespace doris::vectorized {
+
+AggregateFunctionPtr create_aggregate_function_dense_rank(const std::string& name,
+                                                          const DataTypes& argument_types,
+                                                          const Array& parameters,
+                                                          const bool result_is_nullable) {
+    assert_no_parameters(name, parameters);
+
+    return std::make_shared<WindowFunctionDenseRank>(argument_types);
+}
+
+AggregateFunctionPtr create_aggregate_function_rank(const std::string& name,
+                                                    const DataTypes& argument_types,
+                                                    const Array& parameters,
+                                                    const bool result_is_nullable) {
+    assert_no_parameters(name, parameters);
+
+    return std::make_shared<WindowFunctionRank>(argument_types);
+}
+
+AggregateFunctionPtr create_aggregate_function_row_number(const std::string& name,
+                                                          const DataTypes& argument_types,
+                                                          const Array& parameters,
+                                                          const bool result_is_nullable) {
+    assert_no_parameters(name, parameters);
+
+    return std::make_shared<WindowFunctionRowNumber>(argument_types);
+}
+
+template <template <typename> class AggregateFunctionTemplate, template <typename> class Data,
+          bool is_nullable, bool is_copy = false>
+static IAggregateFunction* create_function_single_value(const String& name,
+                                                        const DataTypes& argument_types,
+                                                        const Array& parameters) {
+    using StoreType = std::conditional_t<is_copy, CopiedValue, Value>;
+
+    assert_arity_at_most<3>(name, argument_types);
+
+    auto type = argument_types[0].get();
+    if (type->is_nullable()) {
+        type = assert_cast<const DataTypeNullable*>(type)->get_nested_type().get();
+    }
+    WhichDataType which(*type);
+
+#define DISPATCH(TYPE)                        \
+    if (which.idx == TypeIndex::TYPE)         \
+        return new AggregateFunctionTemplate< \
+                Data<LeadAndLagData<TYPE, is_nullable, false, StoreType>>>(argument_types);
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+
+    if (which.is_decimal()) {
+        return new AggregateFunctionTemplate<
+                Data<LeadAndLagData<Int128, is_nullable, false, StoreType>>>(argument_types);
+    }
+    if (which.is_date_or_datetime()) {
+        return new AggregateFunctionTemplate<
+                Data<LeadAndLagData<Int64, is_nullable, false, StoreType>>>(argument_types);
+    }
+    if (which.is_string_or_fixed_string())
+        return new AggregateFunctionTemplate<
+                Data<LeadAndLagData<StringRef, is_nullable, true, StoreType>>>(argument_types);
+    DCHECK(false) << "with unknowed type, failed in  create_aggregate_function_leadlag";
+    return nullptr;
+}
+
+template <bool is_nullable>
+AggregateFunctionPtr create_aggregate_function_lag(const std::string& name,
+                                                   const DataTypes& argument_types,
+                                                   const Array& parameters,
+                                                   const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_function_single_value<WindowFunctionData, WindowFunctionLagData, is_nullable>(
+                    name, argument_types, parameters));
+}
+
+template <bool is_nullable>
+AggregateFunctionPtr create_aggregate_function_lead(const std::string& name,
+                                                    const DataTypes& argument_types,
+                                                    const Array& parameters,
+                                                    const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_function_single_value<WindowFunctionData, WindowFunctionLeadData, is_nullable>(
+                    name, argument_types, parameters));
+}
+
+template <bool is_nullable>
+AggregateFunctionPtr create_aggregate_function_first(const std::string& name,
+                                                     const DataTypes& argument_types,
+                                                     const Array& parameters,
+                                                     const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_function_single_value<WindowFunctionData, WindowFunctionFirstData, is_nullable>(
+                    name, argument_types, parameters));
+}
+
+template <bool is_nullable>
+AggregateFunctionPtr create_aggregate_function_last(const std::string& name,
+                                                    const DataTypes& argument_types,
+                                                    const Array& parameters,
+                                                    const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_function_single_value<WindowFunctionData, WindowFunctionLastData, is_nullable>(
+                    name, argument_types, parameters));
+}
+
+AggregateFunctionPtr create_aggregate_function_replace_if_not_null(const std::string& name,
+                                                                   const DataTypes& argument_types,
+                                                                   const Array& parameters,
+                                                                   const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_function_single_value<WindowFunctionData, WindowFunctionFirstData, false, true>(
+                    name, argument_types, parameters));
+}
+
+AggregateFunctionPtr create_aggregate_function_replace(const std::string& name,
+                                                       const DataTypes& argument_types,
+                                                       const Array& parameters,
+                                                       const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_function_single_value<WindowFunctionData, WindowFunctionFirstData, false, true>(
+                    name, argument_types, parameters));
+}
+
+AggregateFunctionPtr create_aggregate_function_replace_nullable(const std::string& name,
+                                                                const DataTypes& argument_types,
+                                                                const Array& parameters,
+                                                                const bool result_is_nullable) {
+    return AggregateFunctionPtr(
+            create_function_single_value<WindowFunctionData, WindowFunctionFirstData, true, true>(
+                    name, argument_types, parameters));
+}
+
+void register_aggregate_function_window_rank(AggregateFunctionSimpleFactory& factory) {
+    factory.register_function("dense_rank", create_aggregate_function_dense_rank);
+    factory.register_function("rank", create_aggregate_function_rank);
+    factory.register_function("row_number", create_aggregate_function_row_number);
+}
+
+void register_aggregate_function_window_lead_lag(AggregateFunctionSimpleFactory& factory) {
+    factory.register_function("lead", create_aggregate_function_lead<false>);
+    factory.register_function("lead", create_aggregate_function_lead<true>, true);
+    factory.register_function("lag", create_aggregate_function_lag<false>);
+    factory.register_function("lag", create_aggregate_function_lag<true>, true);
+    factory.register_function("first_value", create_aggregate_function_first<false>);
+    factory.register_function("first_value", create_aggregate_function_first<true>, true);
+    factory.register_function("last_value", create_aggregate_function_last<false>);
+    factory.register_function("last_value", create_aggregate_function_last<true>, true);
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.h b/be/src/vec/aggregate_functions/aggregate_function_window.h
new file mode 100644
index 0000000000..071c2ea684
--- /dev/null
+++ b/be/src/vec/aggregate_functions/aggregate_function_window.h
@@ -0,0 +1,434 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Processors/Transforms/WindowTransform.h
+// and modified by Doris
+
+#pragma once
+
+#include <istream>
+#include <ostream>
+#include <type_traits>
+
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+struct RowNumberData {
+    int64_t count;
+};
+
+class WindowFunctionRowNumber final
+        : public IAggregateFunctionDataHelper<RowNumberData, WindowFunctionRowNumber> {
+public:
+    WindowFunctionRowNumber(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper(argument_types_, {}) {}
+
+    String get_name() const override { return "row_number"; }
+
+    DataTypePtr get_return_type() const override { return std::make_shared<DataTypeInt64>(); }
+
+    void add(AggregateDataPtr place, const IColumn**, size_t, Arena*) const override {
+        ++data(place).count;
+    }
+
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
+                                int64_t frame_end, AggregateDataPtr place, const IColumn** columns,
+                                Arena* arena) const override {
+        ++data(place).count;
+    }
+
+    void reset(AggregateDataPtr place) const override { this->data(place).count = 0; }
+
+    void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override {
+        assert_cast<ColumnInt64&>(to).get_data().push_back(data(place).count);
+    }
+
+    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override {}
+    void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override {}
+    void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {}
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+struct RankData {
+    int64_t rank;
+    int64_t count;
+    int64_t peer_group_start;
+};
+
+class WindowFunctionRank final : public IAggregateFunctionDataHelper<RankData, WindowFunctionRank> {
+public:
+    WindowFunctionRank(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper(argument_types_, {}) {}
+
+    String get_name() const override { return "rank"; }
+
+    DataTypePtr get_return_type() const override { return std::make_shared<DataTypeInt64>(); }
+
+    void add(AggregateDataPtr place, const IColumn**, size_t, Arena*) const override {
+        ++data(place).rank;
+    }
+
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
+                                int64_t frame_end, AggregateDataPtr place, const IColumn** columns,
+                                Arena* arena) const override {
+        int64_t peer_group_count = frame_end - frame_start;
+        if (this->data(place).peer_group_start != frame_start) {
+            this->data(place).peer_group_start = frame_start;
+            this->data(place).rank += this->data(place).count;
+        }
+        this->data(place).count = peer_group_count;
+    }
+
+    void reset(AggregateDataPtr place) const override {
+        this->data(place).rank = 0;
+        this->data(place).count = 1;
+        this->data(place).peer_group_start = -1;
+    }
+
+    void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override {
+        assert_cast<ColumnInt64&>(to).get_data().push_back(data(place).rank);
+    }
+
+    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override {}
+    void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override {}
+    void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {}
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+struct DenseRankData {
+    int64_t rank;
+    int64_t peer_group_start;
+};
+class WindowFunctionDenseRank final
+        : public IAggregateFunctionDataHelper<DenseRankData, WindowFunctionDenseRank> {
+public:
+    WindowFunctionDenseRank(const DataTypes& argument_types_)
+            : IAggregateFunctionDataHelper(argument_types_, {}) {}
+
+    String get_name() const override { return "dense_rank"; }
+
+    DataTypePtr get_return_type() const override { return std::make_shared<DataTypeInt64>(); }
+
+    void add(AggregateDataPtr place, const IColumn**, size_t, Arena*) const override {
+        ++data(place).rank;
+    }
+
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
+                                int64_t frame_end, AggregateDataPtr place, const IColumn** columns,
+                                Arena* arena) const override {
+        if (this->data(place).peer_group_start != frame_start) {
+            this->data(place).peer_group_start = frame_start;
+            this->data(place).rank++;
+        }
+    }
+
+    void reset(AggregateDataPtr place) const override {
+        this->data(place).rank = 0;
+        this->data(place).peer_group_start = -1;
+    }
+
+    void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override {
+        assert_cast<ColumnInt64&>(to).get_data().push_back(data(place).rank);
+    }
+
+    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override {}
+    void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override {}
+    void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {}
+    const char* get_header_file_path() const override { return __FILE__; }
+};
+
+struct Value {
+public:
+    bool is_null() const { return _is_null; }
+    StringRef get_value() const { return _value; }
+
+    void set_null(bool is_null) { _is_null = is_null; }
+    void set_value(StringRef value) { _value = value; }
+    void reset() {
+        _is_null = false;
+        _value = {};
+    }
+
+protected:
+    StringRef _value;
+    bool _is_null;
+};
+
+struct CopiedValue : public Value {
+public:
+    void set_value(StringRef value) {
+        _copied_value = value.to_string();
+        _value = StringRef(_copied_value);
+    }
+
+private:
+    std::string _copied_value;
+};
+
+template <typename T, bool is_nullable, bool is_string, typename StoreType = Value>
+struct LeadAndLagData {
+public:
+    bool has_init() const { return _is_init; }
+
+    void reset() {
+        _data_value.reset();
+        _default_value.reset();
+        _is_init = false;
+        _has_value = false;
+    }
+
+    void insert_result_into(IColumn& to) const {
+        if constexpr (is_nullable) {
+            if (_data_value.is_null()) {
+                auto& col = assert_cast<ColumnNullable&>(to);
+                col.insert_default();
+            } else {
+                auto& col = assert_cast<ColumnNullable&>(to);
+                if constexpr (is_string) {
+                    StringRef value = _data_value.get_value();
+                    col.insert_data(value.data, value.size);
+                } else {
+                    StringRef value = _data_value.get_value();
+                    col.insert_data(value.data, 0);
+                }
+            }
+        } else {
+            if constexpr (is_string) {
+                auto& col = assert_cast<ColumnString&>(to);
+                StringRef value = _data_value.get_value();
+                col.insert_data(value.data, value.size);
+            } else {
+                StringRef value = _data_value.get_value();
+                to.insert_data(value.data, 0);
+            }
+        }
+    }
+
+    void set_value(const IColumn** columns, int64_t pos) {
+        if constexpr (is_nullable) {
+            const auto* nullable_column = check_and_get_column<ColumnNullable>(columns[0]);
+            if (nullable_column && nullable_column->is_null_at(pos)) {
+                _data_value.set_null(true);
+                _has_value = true;
+                return;
+            }
+            if constexpr (is_string) {
+                const auto *sources = check_and_get_column<ColumnString>(
+                        nullable_column->get_nested_column_ptr().get());
+                _data_value.set_value(sources->get_data_at(pos));
+            } else {
+                _data_value.set_value(nullable_column->get_nested_column_ptr()->get_data_at(pos));
+            }
+        } else {
+            if constexpr (is_string) {
+                const auto* sources = check_and_get_column<ColumnString>(columns[0]);
+                _data_value.set_value(sources->get_data_at(pos));
+            } else {
+                _data_value.set_value(columns[0]->get_data_at(pos));
+            }
+        }
+        _data_value.set_null(false);
+        _has_value = true;
+    }
+
+    bool defualt_is_null() { return _default_value.is_null(); }
+
+    void set_is_null() { _data_value.set_null(true); }
+
+    void set_value_from_default() { _data_value.set_value(_default_value.get_value()); }
+
+    bool has_set_value() { return _has_value; }
+
+    void check_default(const IColumn* column) {
+        if (!has_init()) {
+            if (is_column_nullable(*column)) {
+                const auto* nullable_column = check_and_get_column<ColumnNullable>(column);
+                if (nullable_column->is_null_at(0)) {
+                    _default_value.set_null(true);
+                }
+            } else {
+                if constexpr (is_string) {
+                    const auto& col = static_cast<const ColumnString&>(*column);
+                    _default_value.set_value(col.get_data_at(0));
+                } else {
+                    _default_value.set_value(column->get_data_at(0));
+                }
+            }
+            _is_init = true;
+        }
+    }
+
+private:
+    StoreType _data_value;
+    StoreType _default_value;
+    bool _has_value = false;
+    bool _is_init = false;
+};
+
+template <typename Data>
+struct WindowFunctionLeadData : Data {
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, size_t frame_start,
+                                size_t frame_end, const IColumn** columns) {
+        this->check_default(columns[2]);
+        if (frame_end > partition_end) { //output default value, win end is under partition
+            if (this->defualt_is_null()) {
+                this->set_is_null();
+            } else {
+                this->set_value_from_default();
+            }
+            return;
+        }
+        this->set_value(columns, frame_end - 1);
+    }
+    void add(int64_t row, const IColumn** columns) {
+        LOG(FATAL) << "WindowFunctionLeadData do not support add";
+    }
+    static const char* name() { return "lead"; }
+};
+
+template <typename Data>
+struct WindowFunctionLagData : Data {
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
+                                int64_t frame_end, const IColumn** columns) {
+        this->check_default(columns[2]);
+        if (partition_start >= frame_end) { //[unbound preceding(0), offset preceding(-123)]
+            if (this->defualt_is_null()) {  // win start is beyond partition
+                this->set_is_null();
+            } else {
+                this->set_value_from_default();
+            }
+            return;
+        }
+        this->set_value(columns, frame_end - 1);
+    }
+    void add(int64_t row, const IColumn** columns) {
+        LOG(FATAL) << "WindowFunctionLagData do not support add";
+    }
+    static const char* name() { return "lag"; }
+};
+
+template <typename Data>
+struct WindowFunctionFirstData : Data {
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
+                                int64_t frame_end, const IColumn** columns) {
+        if (this->has_set_value()) {
+            return;
+        }
+        if (frame_start < frame_end &&
+            frame_end <= partition_start) { //rewrite last_value when under partition
+            this->set_is_null();            //so no need more judge
+            return;
+        }
+        frame_start = std::max<int64_t>(frame_start, partition_start);
+        this->set_value(columns, frame_start);
+    }
+    void add(int64_t row, const IColumn** columns) {
+        if (this->has_set_value()) {
+            return;
+        }
+        this->set_value(columns, row);
+    }
+    static const char* name() { return "first_value"; }
+};
+
+template <typename Data>
+struct WindowFunctionLastData : Data {
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
+                                int64_t frame_end, const IColumn** columns) {
+        if ((frame_start < frame_end) &&
+            ((frame_end <= partition_start) ||
+             (frame_start >= partition_end))) { //beyond or under partition, set null
+            this->set_is_null();
+            return;
+        }
+        frame_end = std::min<int64_t>(frame_end, partition_end);
+        this->set_value(columns, frame_end - 1);
+    }
+    void add(int64_t row, const IColumn** columns) {
+        if (this->has_set_value()) {
+            return;
+        }
+        this->set_value(columns, row);
+    }
+    static const char* name() { return "last_value"; }
+};
+
+template <typename Data>
+class WindowFunctionData final
+        : public IAggregateFunctionDataHelper<Data, WindowFunctionData<Data>> {
+public:
+    WindowFunctionData(const DataTypes& argument_types)
+            : IAggregateFunctionDataHelper<Data, WindowFunctionData<Data>>(argument_types, {}),
+              _argument_type(argument_types[0]) {}
+
+    String get_name() const override { return Data::name(); }
+    DataTypePtr get_return_type() const override { return _argument_type; }
+
+    void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start,
+                                int64_t frame_end, AggregateDataPtr place, const IColumn** columns,
+                                Arena* arena) const override {
+        this->data(place).add_range_single_place(partition_start, partition_end, frame_start,
+                                                 frame_end, columns);
+    }
+
+    void reset(AggregateDataPtr place) const override { this->data(place).reset(); }
+
+    void insert_result_into(ConstAggregateDataPtr place, IColumn& to) const override {
+        this->data(place).insert_result_into(to);
+    }
+
+    void add(AggregateDataPtr place, const IColumn** columns, size_t row_num,
+             Arena* arena) const override {
+        this->data(place).add(row_num, columns);
+    }
+    void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena*) const override {
+        LOG(FATAL) << "WindowFunctionData do not support merge";
+    }
+    void serialize(ConstAggregateDataPtr place, BufferWritable& buf) const override {
+        LOG(FATAL) << "WindowFunctionData do not support serialize";
+    }
+    void deserialize(AggregateDataPtr place, BufferReadable& buf, Arena*) const override {
+        LOG(FATAL) << "WindowFunctionData do not support deserialize";
+    }
+    const char* get_header_file_path() const override { return __FILE__; }
+
+private:
+    DataTypePtr _argument_type;
+};
+
+AggregateFunctionPtr create_aggregate_function_replace_if_not_null(const std::string& name,
+                                                                   const DataTypes& argument_types,
+                                                                   const Array& parameters,
+                                                                   const bool result_is_nullable);
+
+AggregateFunctionPtr create_aggregate_function_replace(const std::string& name,
+                                                       const DataTypes& argument_types,
+                                                       const Array& parameters,
+                                                       const bool result_is_nullable);
+
+AggregateFunctionPtr create_aggregate_function_replace_nullable(const std::string& name,
+                                                                const DataTypes& argument_types,
+                                                                const Array& parameters,
+                                                                const bool result_is_nullable);
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/factory_helpers.h b/be/src/vec/aggregate_functions/factory_helpers.h
new file mode 100644
index 0000000000..95b5538441
--- /dev/null
+++ b/be/src/vec/aggregate_functions/factory_helpers.h
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/FactoryHelpers.h
+// and modified by Doris
+
+#pragma once
+
+#include "common/logging.h"
+#include "vec/core/field.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+
+inline void assert_no_parameters(const std::string& name, const Array& parameters) {
+    CHECK(parameters.empty()) << fmt::format("Aggregate function {} cannot have parameters", name);
+}
+
+inline void assert_unary(const std::string& name, const DataTypes& argument_types) {
+    CHECK_EQ(argument_types.size(), 1)
+            << fmt::format("Aggregate function {} require single argument", name);
+}
+
+inline void assert_binary(const std::string& name, const DataTypes& argument_types) {
+    CHECK_EQ(argument_types.size(), 2)
+            << fmt::format("Aggregate function {} require two arguments") << name;
+}
+
+template <std::size_t maximal_arity>
+inline void assert_arity_at_most(const std::string& name, const DataTypes& argument_types) {
+    if (argument_types.size() <= maximal_arity) return;
+
+    if constexpr (maximal_arity == 0) {
+        LOG(FATAL) << fmt::format("Aggregate function {} cannot have arguments", name);
+    }
+
+    if constexpr (maximal_arity == 1) {
+        LOG(FATAL) << fmt::format("Aggregate function {} requires zero or one argument", name);
+    }
+
+    LOG(FATAL) << fmt::format("Aggregate function {} requires at most {} arguments", name,
+                              maximal_arity);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/aggregate_functions/helpers.h b/be/src/vec/aggregate_functions/helpers.h
new file mode 100644
index 0000000000..02b49a8cb0
--- /dev/null
+++ b/be/src/vec/aggregate_functions/helpers.h
@@ -0,0 +1,225 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/Helpers.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/data_types/data_type.h"
+
+#define FOR_NUMERIC_TYPES(M) \
+    M(UInt8)                 \
+    M(UInt16)                \
+    M(UInt32)                \
+    M(UInt64)                \
+    M(Int8)                  \
+    M(Int16)                 \
+    M(Int32)                 \
+    M(Int64)                 \
+    M(Int128)                \
+    M(Float32)               \
+    M(Float64)
+
+namespace doris::vectorized {
+
+/** Create an aggregate function with a numeric type in the template parameter, depending on the type of the argument.
+  */
+template <template <typename> class AggregateFunctionTemplate, typename... TArgs>
+static IAggregateFunction* create_with_numeric_type(const IDataType& argument_type,
+                                                    TArgs&&... args) {
+    WhichDataType which(argument_type);
+#define DISPATCH(TYPE)                \
+    if (which.idx == TypeIndex::TYPE) \
+        return new AggregateFunctionTemplate<TYPE>(std::forward<TArgs>(args)...);
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.idx == TypeIndex::Enum8)
+        return new AggregateFunctionTemplate<Int8>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Enum16)
+        return new AggregateFunctionTemplate<Int16>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+template <template <typename, bool> class AggregateFunctionTemplate, bool bool_param,
+          typename... TArgs>
+static IAggregateFunction* create_with_numeric_type(const IDataType& argument_type,
+                                                    TArgs&&... args) {
+    WhichDataType which(argument_type);
+#define DISPATCH(TYPE)                \
+    if (which.idx == TypeIndex::TYPE) \
+        return new AggregateFunctionTemplate<TYPE, bool_param>(std::forward<TArgs>(args)...);
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.idx == TypeIndex::Enum8)
+        return new AggregateFunctionTemplate<Int8, bool_param>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Enum16)
+        return new AggregateFunctionTemplate<Int16, bool_param>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+template <template <typename, typename> class AggregateFunctionTemplate, typename Data,
+          typename... TArgs>
+static IAggregateFunction* create_with_numeric_type(const IDataType& argument_type,
+                                                    TArgs&&... args) {
+    WhichDataType which(argument_type);
+#define DISPATCH(TYPE)                \
+    if (which.idx == TypeIndex::TYPE) \
+        return new AggregateFunctionTemplate<TYPE, Data>(std::forward<TArgs>(args)...);
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.idx == TypeIndex::Enum8)
+        return new AggregateFunctionTemplate<Int8, Data>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Enum16)
+        return new AggregateFunctionTemplate<Int16, Data>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+template <template <typename, typename> class AggregateFunctionTemplate,
+          template <typename> class Data, typename... TArgs>
+static IAggregateFunction* create_with_numeric_type(const IDataType& argument_type,
+                                                    TArgs&&... args) {
+    WhichDataType which(argument_type);
+#define DISPATCH(TYPE)                \
+    if (which.idx == TypeIndex::TYPE) \
+        return new AggregateFunctionTemplate<TYPE, Data<TYPE>>(std::forward<TArgs>(args)...);
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.idx == TypeIndex::Enum8)
+        return new AggregateFunctionTemplate<Int8, Data<Int8>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Enum16)
+        return new AggregateFunctionTemplate<Int16, Data<Int16>>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+template <template <typename> class AggregateFunctionTemplate, template <typename> class Data,
+          typename... TArgs>
+static IAggregateFunction* create_with_numeric_type(const IDataType& argument_type,
+                                                    TArgs&&... args) {
+    WhichDataType which(argument_type);
+#define DISPATCH(TYPE)                \
+    if (which.idx == TypeIndex::TYPE) \
+        return new AggregateFunctionTemplate<Data<TYPE>>(std::forward<TArgs>(args)...);
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    // if (which.idx == TypeIndex::Enum8) return new AggregateFunctionTemplate<Data<Int8>>(std::forward<TArgs>(args)...);
+    // if (which.idx == TypeIndex::Enum16) return new AggregateFunctionTemplate<Data<Int16>>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+template <template <typename, typename> class AggregateFunctionTemplate,
+          template <typename> class Data, typename... TArgs>
+static IAggregateFunction* create_with_unsigned_integer_type(const IDataType& argument_type,
+                                                             TArgs&&... args) {
+    WhichDataType which(argument_type);
+    if (which.idx == TypeIndex::UInt8)
+        return new AggregateFunctionTemplate<UInt8, Data<UInt8>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::UInt16)
+        return new AggregateFunctionTemplate<UInt16, Data<UInt16>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::UInt32)
+        return new AggregateFunctionTemplate<UInt32, Data<UInt32>>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::UInt64)
+        return new AggregateFunctionTemplate<UInt64, Data<UInt64>>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+template <template <typename> class AggregateFunctionTemplate, typename... TArgs>
+static IAggregateFunction* create_with_numeric_based_type(const IDataType& argument_type,
+                                                          TArgs&&... args) {
+    IAggregateFunction* f = create_with_numeric_type<AggregateFunctionTemplate>(
+            argument_type, std::forward<TArgs>(args)...);
+    if (f) return f;
+
+    /// expects that DataTypeDate based on UInt16, DataTypeDateTime based on UInt32 and UUID based on UInt128
+    WhichDataType which(argument_type);
+    if (which.idx == TypeIndex::Date)
+        return new AggregateFunctionTemplate<UInt16>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::DateTime)
+        return new AggregateFunctionTemplate<UInt32>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::UUID)
+        return new AggregateFunctionTemplate<UInt128>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+template <template <typename> class AggregateFunctionTemplate, typename... TArgs>
+static IAggregateFunction* create_with_decimal_type(const IDataType& argument_type,
+                                                    TArgs&&... args) {
+    WhichDataType which(argument_type);
+    if (which.idx == TypeIndex::Decimal32)
+        return new AggregateFunctionTemplate<Decimal32>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Decimal64)
+        return new AggregateFunctionTemplate<Decimal64>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Decimal128)
+        return new AggregateFunctionTemplate<Decimal128>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+template <template <typename, typename> class AggregateFunctionTemplate, typename Data,
+          typename... TArgs>
+static IAggregateFunction* create_with_decimal_type(const IDataType& argument_type,
+                                                    TArgs&&... args) {
+    WhichDataType which(argument_type);
+    if (which.idx == TypeIndex::Decimal32)
+        return new AggregateFunctionTemplate<Decimal32, Data>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Decimal64)
+        return new AggregateFunctionTemplate<Decimal64, Data>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Decimal128)
+        return new AggregateFunctionTemplate<Decimal128, Data>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+/** For template with two arguments.
+  */
+template <typename FirstType, template <typename, typename> class AggregateFunctionTemplate,
+          typename... TArgs>
+static IAggregateFunction* create_with_two_numeric_types_second(const IDataType& second_type,
+                                                                TArgs&&... args) {
+    WhichDataType which(second_type);
+#define DISPATCH(TYPE)                \
+    if (which.idx == TypeIndex::TYPE) \
+        return new AggregateFunctionTemplate<FirstType, TYPE>(std::forward<TArgs>(args)...);
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.idx == TypeIndex::Enum8)
+        return new AggregateFunctionTemplate<FirstType, Int8>(std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Enum16)
+        return new AggregateFunctionTemplate<FirstType, Int16>(std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+template <template <typename, typename> class AggregateFunctionTemplate, typename... TArgs>
+static IAggregateFunction* create_with_two_numeric_types(const IDataType& first_type,
+                                                         const IDataType& second_type,
+                                                         TArgs&&... args) {
+    WhichDataType which(first_type);
+#define DISPATCH(TYPE)                                                                \
+    if (which.idx == TypeIndex::TYPE)                                                 \
+        return create_with_two_numeric_types_second<TYPE, AggregateFunctionTemplate>( \
+                second_type, std::forward<TArgs>(args)...);
+    FOR_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.idx == TypeIndex::Enum8)
+        return create_with_two_numeric_types_second<Int8, AggregateFunctionTemplate>(
+                second_type, std::forward<TArgs>(args)...);
+    if (which.idx == TypeIndex::Enum16)
+        return create_with_two_numeric_types_second<Int16, AggregateFunctionTemplate>(
+                second_type, std::forward<TArgs>(args)...);
+    return nullptr;
+}
+
+} // namespace  doris::vectorized
diff --git a/be/src/vec/aggregate_functions/key_holder_helpers.h b/be/src/vec/aggregate_functions/key_holder_helpers.h
new file mode 100644
index 0000000000..10246aad2e
--- /dev/null
+++ b/be/src/vec/aggregate_functions/key_holder_helpers.h
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/KeyHolderHelpers.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/common/hash_table/hash_table_key_holder.h"
+
+namespace doris::vectorized {
+
+template <bool is_plain_column = false>
+static auto get_key_holder(const IColumn& column, size_t row_num, Arena& arena) {
+    if constexpr (is_plain_column) {
+        return ArenaKeyHolder{column.get_data_at(row_num), arena};
+    } else {
+        const char* begin = nullptr;
+        StringRef serialized = column.serialize_value_into_arena(row_num, arena, begin);
+        assert(serialized.data != nullptr);
+        return SerializedKeyHolder{serialized, arena};
+    }
+}
+
+template <bool is_plain_column>
+static void deserialize_and_insert(StringRef str, IColumn& data_to) {
+    if constexpr (is_plain_column)
+        data_to.insert_data(str.data, str.size);
+    else
+        data_to.deserialize_and_insert_from_arena(str.data);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/collator.cpp b/be/src/vec/columns/collator.cpp
new file mode 100644
index 0000000000..a3fa2790ac
--- /dev/null
+++ b/be/src/vec/columns/collator.cpp
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/Collator.cpp
+// and modified by Doris
+
+#include "vec/columns/collator.h"
+
+#if USE_ICU
+#include <unicode/ucol.h>
+#else
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wunused-private-field"
+#pragma clang diagnostic ignored "-Wmissing-noreturn"
+#endif
+#endif
+
+#include <boost/algorithm/string/case_conv.hpp>
+
+#include "common/logging.h"
+#include "vec/common/exception.h"
+
+Collator::Collator(const std::string& locale_) : locale(boost::algorithm::to_lower_copy(locale_)) {
+#if USE_ICU
+    UErrorCode status = U_ZERO_ERROR;
+
+    collator = ucol_open(locale.c_str(), &status);
+    if (status != U_ZERO_ERROR) {
+        ucol_close(collator);
+        LOG(FATAL) << "Unsupported collation locale: " << locale;
+    }
+#else
+    LOG(FATAL) << "Collations support is disabled, In Doris";
+#endif
+}
+
+Collator::~Collator() {
+#if USE_ICU
+    ucol_close(collator);
+#endif
+}
+
+int Collator::compare(const char* str1, size_t length1, const char* str2, size_t length2) const {
+#if USE_ICU
+    UCharIterator iter1, iter2;
+    uiter_setUTF8(&iter1, str1, length1);
+    uiter_setUTF8(&iter2, str2, length2);
+
+    UErrorCode status = U_ZERO_ERROR;
+    UCollationResult compare_result = ucol_strcollIter(collator, &iter1, &iter2, &status);
+
+    if (status != U_ZERO_ERROR) {
+        LOG(FATAL) << "ICU collation comparison failed with error code: "
+                   << doris::vectorized::toString<int>(status);
+    }
+
+    /** Values of enum UCollationResult are equals to what exactly we need:
+     *     UCOL_EQUAL = 0
+     *     UCOL_GREATER = 1
+     *     UCOL_LESS = -1
+     */
+    return compare_result;
+#else
+    (void)str1;
+    (void)length1;
+    (void)str2;
+    (void)length2;
+    return 0;
+#endif
+}
+
+const std::string& Collator::get_locale() const {
+    return locale;
+}
+
+std::vector<std::string> Collator::get_available_collations() {
+    std::vector<std::string> result;
+#if USE_ICU
+    size_t available_locales_count = ucol_countAvailable();
+    for (size_t i = 0; i < available_locales_count; ++i) result.push_back(ucol_getAvailable(i));
+#endif
+    return result;
+}
diff --git a/be/src/vec/columns/collator.h b/be/src/vec/columns/collator.h
new file mode 100644
index 0000000000..27b9cd54b6
--- /dev/null
+++ b/be/src/vec/columns/collator.h
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/Collator.h
+// and modified by Doris
+
+#pragma once
+
+#include <boost/noncopyable.hpp>
+#include <string>
+#include <vector>
+
+struct UCollator;
+
+class Collator : private boost::noncopyable {
+public:
+    explicit Collator(const std::string& locale_);
+    ~Collator();
+
+    int compare(const char* str1, size_t length1, const char* str2, size_t length2) const;
+
+    const std::string& get_locale() const;
+
+    static std::vector<std::string> get_available_collations();
+
+private:
+    std::string locale;
+    UCollator* collator;
+};
diff --git a/be/src/vec/columns/column.cpp b/be/src/vec/columns/column.cpp
new file mode 100644
index 0000000000..815be82024
--- /dev/null
+++ b/be/src/vec/columns/column.cpp
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/IColumn.cpp
+// and modified by Doris
+
+#include "vec/columns/column.h"
+
+#include <sstream>
+
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/core/field.h"
+
+namespace doris::vectorized {
+
+std::string IColumn::dump_structure() const {
+    std::stringstream res;
+    res << get_family_name() << "(size = " << size();
+
+    ColumnCallback callback = [&](ColumnPtr& subcolumn) {
+        res << ", " << subcolumn->dump_structure();
+    };
+
+    const_cast<IColumn*>(this)->for_each_subcolumn(callback);
+
+    res << ")";
+    return res.str();
+}
+
+void IColumn::insert_from(const IColumn& src, size_t n) {
+    insert(src[n]);
+}
+
+bool is_column_nullable(const IColumn& column) {
+    return check_column<ColumnNullable>(column);
+}
+
+bool is_column_const(const IColumn& column) {
+    return check_column<ColumnConst>(column);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
new file mode 100644
index 0000000000..71b86ae7e4
--- /dev/null
+++ b/be/src/vec/columns/column.h
@@ -0,0 +1,498 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/IColumn.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/common/cow.h"
+#include "vec/common/exception.h"
+#include "vec/common/pod_array_fwd.h"
+#include "vec/common/string_ref.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/core/field.h"
+#include "vec/core/types.h"
+
+class SipHash;
+
+namespace doris::vectorized {
+
+class Arena;
+class ColumnGathererStream;
+class Field;
+
+/// Declares interface to store columns in memory.
+class IColumn : public COW<IColumn> {
+private:
+    friend class COW<IColumn>;
+
+    /// Creates the same column with the same data.
+    /// This is internal method to use from COW.
+    /// It performs shallow copy with copy-ctor and not useful from outside.
+    /// If you want to copy column for modification, look at 'mutate' method.
+    virtual MutablePtr clone() const = 0;
+
+public:
+    /// Name of a Column. It is used in info messages.
+    virtual std::string get_name() const { return get_family_name(); }
+
+    /// Name of a Column kind, without parameters (example: FixedString, Array).
+    virtual const char* get_family_name() const = 0;
+
+    /** If column isn't constant, returns nullptr (or itself).
+      * If column is constant, transforms constant to full column (if column type allows such transform) and return it.
+      */
+    virtual Ptr convert_to_full_column_if_const() const { return get_ptr(); }
+
+    /// If column isn't ColumnLowCardinality, return itself.
+    /// If column is ColumnLowCardinality, transforms is to full column.
+    virtual Ptr convert_to_full_column_if_low_cardinality() const { return get_ptr(); }
+
+    /// Creates empty column with the same type.
+    virtual MutablePtr clone_empty() const { return clone_resized(0); }
+
+    /// Creates column with the same type and specified size.
+    /// If size is less current size, then data is cut.
+    /// If size is greater, than default values are appended.
+    virtual MutablePtr clone_resized(size_t s) const {
+        LOG(FATAL) << "Cannot clone_resized() column " << get_name();
+        return nullptr;
+    }
+
+    /// Returns number of values in column.
+    virtual size_t size() const = 0;
+
+    /// There are no values in columns.
+    bool empty() const { return size() == 0; }
+
+    /// Returns value of n-th element in universal Field representation.
+    /// Is used in rare cases, since creation of Field instance is expensive usually.
+    virtual Field operator[](size_t n) const = 0;
+
+    /// Like the previous one, but avoids extra copying if Field is in a container, for example.
+    virtual void get(size_t n, Field& res) const = 0;
+
+    /// If possible, returns pointer to memory chunk which contains n-th element (if it isn't possible, throws an exception)
+    /// Is used to optimize some computations (in aggregation, for example).
+    virtual StringRef get_data_at(size_t n) const = 0;
+
+    /// Like getData, but has special behavior for columns that contain variable-length strings.
+    /// Returns zero-ending memory chunk (i.e. its size is 1 byte longer).
+    virtual StringRef get_data_at_with_terminating_zero(size_t n) const { return get_data_at(n); }
+
+    /// If column stores integers, it returns n-th element transformed to UInt64 using static_cast.
+    /// If column stores floating point numbers, bits of n-th elements are copied to lower bits of UInt64, the remaining bits are zeros.
+    /// Is used to optimize some computations (in aggregation, for example).
+    virtual UInt64 get64(size_t /*n*/) const {
+        LOG(FATAL) << "Method get64 is not supported for ";
+        return 0;
+    }
+
+    /// If column stores native numeric type, it returns n-th element casted to Float64
+    /// Is used in regression methods to cast each features into uniform type
+    virtual Float64 get_float64(size_t /*n*/) const {
+        LOG(FATAL) << "Method get_float64 is not supported for " << get_name();
+        return 0;
+    }
+
+    /** If column is numeric, return value of n-th element, casted to UInt64.
+      * For NULL values of Nullable column it is allowed to return arbitrary value.
+      * Otherwise throw an exception.
+      */
+    virtual UInt64 get_uint(size_t /*n*/) const {
+        LOG(FATAL) << "Method get_uint is not supported for " << get_name();
+        return 0;
+    }
+
+    virtual Int64 get_int(size_t /*n*/) const {
+        LOG(FATAL) << "Method get_int is not supported for " << get_name();
+        return 0;
+    }
+
+    virtual bool is_default_at(size_t n) const { return get64(n) == 0; }
+    virtual bool is_null_at(size_t /*n*/) const { return false; }
+
+    /** If column is numeric, return value of n-th element, casted to bool.
+      * For NULL values of Nullable column returns false.
+      * Otherwise throw an exception.
+      */
+    virtual bool get_bool(size_t /*n*/) const {
+        LOG(FATAL) << "Method get_bool is not supported for " << get_name();
+        return false;
+    }
+
+    /// Removes all elements outside of specified range.
+    /// Is used in LIMIT operation, for example.
+    virtual Ptr cut(size_t start, size_t length) const {
+        MutablePtr res = clone_empty();
+        res->insert_range_from(*this, start, length);
+        return res;
+    }
+
+    /// Appends new value at the end of column (column's size is increased by 1).
+    /// Is used to transform raw strings to Blocks (for example, inside input format parsers)
+    virtual void insert(const Field& x) = 0;
+
+    /// Appends n-th element from other column with the same type.
+    /// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation.
+    virtual void insert_from(const IColumn& src, size_t n);
+
+    /// Appends range of elements from other column with the same type.
+    /// Could be used to concatenate columns.
+    virtual void insert_range_from(const IColumn& src, size_t start, size_t length) = 0;
+
+    /// Appends one element from other column with the same type multiple times.
+    virtual void insert_many_from(const IColumn& src, size_t position, size_t length) {
+        for (size_t i = 0; i < length; ++i) insert_from(src, position);
+    }
+ 
+    /// Appends a batch elements from other column with the same type
+    /// indices_begin + indices_end represent the row indices of column src
+    virtual void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) = 0;
+
+    /// Appends data located in specified memory chunk if it is possible (throws an exception if it cannot be implemented).
+    /// Is used to optimize some computations (in aggregation, for example).
+    /// Parameter length could be ignored if column values have fixed size.
+    /// All data will be inserted as single element
+    virtual void insert_data(const char* pos, size_t length) = 0;
+
+    /// Appends "default value".
+    /// Is used when there are need to increase column size, but inserting value doesn't make sense.
+    /// For example, ColumnNullable(Nested) absolutely ignores values of nested column if it is marked as NULL.
+    virtual void insert_default() = 0;
+
+    /// Appends "default value" multiple times.
+    virtual void insert_many_defaults(size_t length) {
+        for (size_t i = 0; i < length; ++i) insert_default();
+    }
+
+    /** Removes last n elements.
+      * Is used to support exception-safety of several operations.
+      *  For example, sometimes insertion should be reverted if we catch an exception during operation processing.
+      * If column has less than n elements or n == 0 - undefined behavior.
+      */
+    virtual void pop_back(size_t n) = 0;
+
+    /** Serializes n-th element. Serialized element should be placed continuously inside Arena's memory.
+      * Serialized value can be deserialized to reconstruct original object. Is used in aggregation.
+      * The method is similar to get_data_at(), but can work when element's value cannot be mapped to existing continuous memory chunk,
+      *  For example, to obtain unambiguous representation of Array of strings, strings data should be interleaved with their sizes.
+      * Parameter begin should be used with Arena::alloc_continue.
+      */
+    virtual StringRef serialize_value_into_arena(size_t n, Arena& arena,
+                                                 char const*& begin) const = 0;
+
+    /// Deserializes a value that was serialized using IColumn::serialize_value_into_arena method.
+    /// Returns pointer to the position after the read data.
+    virtual const char* deserialize_and_insert_from_arena(const char* pos) = 0;
+
+    /// Update state of hash function with value of n-th element.
+    /// On subsequent calls of this method for sequence of column values of arbitrary types,
+    ///  passed bytes to hash must identify sequence of values unambiguously.
+    virtual void update_hash_with_value(size_t n, SipHash& hash) const = 0;
+
+    /** Removes elements that don't match the filter.
+      * Is used in WHERE and HAVING operations.
+      * If result_size_hint > 0, then makes advance reserve(result_size_hint) for the result column;
+      *  if 0, then don't makes reserve(),
+      *  otherwise (i.e. < 0), makes reserve() using size of source column.
+      */
+    using Filter = PaddedPODArray<UInt8>;
+    virtual Ptr filter(const Filter& filt, ssize_t result_size_hint) const = 0;
+
+    /**
+     *  used by lazy materialization to filter column by selected rowids
+     */
+    virtual Ptr filter_by_selector(const uint16_t* sel, size_t sel_size, Ptr* ptr = nullptr) {
+        LOG(FATAL) << "column not support filter_by_selector";
+        __builtin_unreachable();
+    };
+
+    /// Permutes elements using specified permutation. Is used in sortings.
+    /// limit - if it isn't 0, puts only first limit elements in the result.
+    using Permutation = PaddedPODArray<size_t>;
+    virtual Ptr permute(const Permutation& perm, size_t limit) const = 0;
+
+    /// Creates new column with values column[indexes[:limit]]. If limit is 0, all indexes are used.
+    /// Indexes must be one of the ColumnUInt. For default implementation, see select_index_impl from ColumnsCommon.h
+    //    virtual Ptr index(const IColumn & indexes, size_t limit) const = 0;
+
+    /** Compares (*this)[n] and rhs[m]. Column rhs should have the same type.
+      * Returns negative number, 0, or positive number (*this)[n] is less, equal, greater than rhs[m] respectively.
+      * Is used in sortings.
+      *
+      * If one of element's value is NaN or NULLs, then:
+      * - if nan_direction_hint == -1, NaN and NULLs are considered as least than everything other;
+      * - if nan_direction_hint ==  1, NaN and NULLs are considered as greatest than everything other.
+      * For example, if nan_direction_hint == -1 is used by descending sorting, NaNs will be at the end.
+      *
+      * For non Nullable and non floating point types, nan_direction_hint is ignored.
+      */
+    virtual int compare_at(size_t n, size_t m, const IColumn& rhs,
+                           int nan_direction_hint) const = 0;
+
+    /** Returns a permutation that sorts elements of this column,
+      *  i.e. perm[i]-th element of source column should be i-th element of sorted column.
+      * reverse - reverse ordering (acsending).
+      * limit - if isn't 0, then only first limit elements of the result column could be sorted.
+      * nan_direction_hint - see above.
+      */
+    virtual void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+                                 Permutation& res) const = 0;
+
+    /** Copies each element according offsets parameter.
+      * (i-th element should be copied offsets[i] - offsets[i - 1] times.)
+      * It is necessary in ARRAY JOIN operation.
+      */
+    using Offset = UInt32;
+    using Offsets = PaddedPODArray<Offset>;
+    virtual Ptr replicate(const Offsets& offsets) const = 0;
+
+    /** Split column to smaller columns. Each value goes to column index, selected by corresponding element of 'selector'.
+      * Selector must contain values from 0 to num_columns - 1.
+      * For default implementation, see scatter_impl.
+      */
+    using ColumnIndex = UInt64;
+    using Selector = PaddedPODArray<ColumnIndex>;
+    virtual std::vector<MutablePtr> scatter(ColumnIndex num_columns,
+                                            const Selector& selector) const = 0;
+
+    /// Insert data from several other columns according to source mask (used in vertical merge).
+    /// For now it is a helper to de-virtualize calls to insert*() functions inside gather loop
+    /// (descendants should call gatherer_stream.gather(*this) to implement this function.)
+    /// TODO: interface decoupled from ColumnGathererStream that allows non-generic specializations.
+    //    virtual void gather(ColumnGathererStream & gatherer_stream) = 0;
+
+    /** Computes minimum and maximum element of the column.
+      * In addition to numeric types, the function is completely implemented for Date and DateTime.
+      * For strings and arrays function should return default value.
+      *  (except for constant columns; they should return value of the constant).
+      * If column is empty function should return default value.
+      */
+    virtual void get_extremes(Field& min, Field& max) const = 0;
+
+    /// Reserves memory for specified amount of elements. If reservation isn't possible, does nothing.
+    /// It affects performance only (not correctness).
+    virtual void reserve(size_t /*n*/) {}
+
+    /// Resize memory for specified amount of elements. If reservation isn't possible, does nothing.
+    /// It affects performance only (not correctness).
+    virtual void resize(size_t /*n*/) {}
+
+    /// Size of column data in memory (may be approximate) - for profiling. Zero, if could not be determined.
+    virtual size_t byte_size() const = 0;
+
+    /// Size of memory, allocated for column.
+    /// This is greater or equals to byte_size due to memory reservation in containers.
+    /// Zero, if could not be determined.
+    virtual size_t allocated_bytes() const = 0;
+
+    /// Make memory region readonly with mprotect if it is large enough.
+    /// The operation is slow and performed only for debug builds.
+    virtual void protect() {}
+
+    /// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
+    /// Shallow: doesn't do recursive calls; don't do call for itself.
+    using ColumnCallback = std::function<void(WrappedPtr&)>;
+    virtual void for_each_subcolumn(ColumnCallback) {}
+
+    /// Columns have equal structure.
+    /// If true - you can use "compare_at", "insert_from", etc. methods.
+    virtual bool structure_equals(const IColumn&) const {
+        LOG(FATAL) << "Method structure_equals is not supported for " << get_name();
+        return false;
+    }
+
+    MutablePtr mutate() const&& {
+        MutablePtr res = shallow_mutate();
+        res->for_each_subcolumn(
+                [](WrappedPtr& subcolumn) { subcolumn = std::move(*subcolumn).mutate(); });
+        return res;
+    }
+
+    /** Some columns can contain another columns inside.
+      * So, we have a tree of columns. But not all combinations are possible.
+      * There are the following rules:
+      *
+      * ColumnConst may be only at top. It cannot be inside any column.
+      * ColumnNullable can contain only simple columns.
+      */
+
+    /// Various properties on behaviour of column type.
+
+    /// True if column contains something nullable inside. It's true for ColumnNullable, can be true or false for ColumnConst, etc.
+    virtual bool is_nullable() const { return false; }
+
+    virtual bool is_bitmap() const { return false; }
+
+    // true iff column has null element
+    virtual bool has_null() const { return false; }
+
+    // true iff column has null element [0,size)
+    virtual bool has_null(size_t size) const { return false; }
+
+    /// It's a special kind of column, that contain single value, but is not a ColumnConst.
+    virtual bool is_dummy() const { return false; }
+
+    /// Clear data of column, just like vector clear
+    virtual void clear() {};
+
+    /** Memory layout properties.
+      *
+      * Each value of a column can be placed in memory contiguously or not.
+      *
+      * Example: simple columns like UInt64 or FixedString store their values contiguously in single memory buffer.
+      *
+      * Example: Tuple store values of each component in separate subcolumn, so the values of Tuples with at least two components are not contiguous.
+      * Another example is Nullable. Each value have null flag, that is stored separately, so the value is not contiguous in memory.
+      *
+      * There are some important cases, when values are not stored contiguously, but for each value, you can get contiguous memory segment,
+      *  that will unambiguously identify the value. In this case, methods get_data_at and insert_data are implemented.
+      * Example: String column: bytes of strings are stored concatenated in one memory buffer
+      *  and offsets to that buffer are stored in another buffer. The same is for Array of fixed-size contiguous elements.
+      *
+      * To avoid confusion between these cases, we don't have isContiguous method.
+      */
+
+    /// Values in column have fixed size (including the case when values span many memory segments).
+    virtual bool values_have_fixed_size() const { return is_fixed_and_contiguous(); }
+
+    /// Values in column are represented as continuous memory segment of fixed size. Implies values_have_fixed_size.
+    virtual bool is_fixed_and_contiguous() const { return false; }
+
+    /// If is_fixed_and_contiguous, returns the underlying data array, otherwise throws an exception.
+    virtual StringRef get_raw_data() const {
+        LOG(FATAL) << fmt::format("Column {} is not a contiguous block of memory", get_name());
+        return StringRef {};
+    }
+
+    /// If values_have_fixed_size, returns size of value, otherwise throw an exception.
+    virtual size_t size_of_value_if_fixed() const {
+        LOG(FATAL) << fmt::format("Values of column {} are not fixed size.", get_name());
+        return 0;
+    }
+
+    /// Column is ColumnVector of numbers or ColumnConst of it. Note that Nullable columns are not numeric.
+    /// Implies is_fixed_and_contiguous.
+    virtual bool is_numeric() const { return false; }
+
+    virtual bool is_column_string() const { return false; }
+
+    virtual bool is_column_decimal() const { return false; }
+
+    virtual bool is_predicate_column() const { return false; }
+
+    /// If the only value column can contain is NULL.
+    /// Does not imply type of object, because it can be ColumnNullable(ColumnNothing) or ColumnConst(ColumnNullable(ColumnNothing))
+    virtual bool only_null() const { return false; }
+
+    /// Can be inside ColumnNullable.
+    virtual bool can_be_inside_nullable() const { return false; }
+
+    virtual bool low_cardinality() const { return false; }
+
+    virtual ~IColumn() = default;
+    IColumn() = default;
+    IColumn(const IColumn&) = default;
+
+    /** Print column name, size, and recursively print all subcolumns.
+      */
+    String dump_structure() const;
+
+    // only used in agg value replace
+    // ColumnString should replace according to 0,1,2... ,size,0,1,2...
+    virtual void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) = 0;
+
+    // only used in ColumnNullable replace_column_data
+    virtual void replace_column_data_default(size_t self_row = 0) = 0;
+
+    virtual bool is_date_type() { return is_date; }
+    virtual bool is_datetime_type() { return is_date_time; }
+
+    virtual void set_date_type() { is_date = true; }
+    virtual void set_datetime_type() { is_date_time = true; }
+
+    // todo(wb): a temporary implemention, need re-abstract here
+    bool is_date = false;
+    bool is_date_time = false;
+
+protected:
+    /// Template is to devirtualize calls to insert_from method.
+    /// In derived classes (that use final keyword), implement scatter method as call to scatter_impl.
+    template <typename Derived>
+    std::vector<MutablePtr> scatter_impl(ColumnIndex num_columns, const Selector& selector) const;
+};
+
+using ColumnPtr = IColumn::Ptr;
+using MutableColumnPtr = IColumn::MutablePtr;
+using Columns = std::vector<ColumnPtr>;
+using MutableColumns = std::vector<MutableColumnPtr>;
+
+using ColumnRawPtrs = std::vector<const IColumn*>;
+//using MutableColumnRawPtrs = std::vector<IColumn *>;
+
+template <typename... Args>
+struct IsMutableColumns;
+
+template <typename Arg, typename... Args>
+struct IsMutableColumns<Arg, Args...> {
+    static const bool value =
+            std::is_assignable<MutableColumnPtr&&, Arg>::value && IsMutableColumns<Args...>::value;
+};
+
+template <>
+struct IsMutableColumns<> {
+    static const bool value = true;
+};
+
+template <typename Type>
+const Type* check_and_get_column(const IColumn& column) {
+    return typeid_cast<const Type*>(&column);
+}
+
+template <typename Type>
+const Type* check_and_get_column(const IColumn* column) {
+    return typeid_cast<const Type*>(column);
+}
+
+template <typename Type>
+bool check_column(const IColumn& column) {
+    return check_and_get_column<Type>(&column);
+}
+
+template <typename Type>
+bool check_column(const IColumn* column) {
+    return check_and_get_column<Type>(column);
+}
+
+/// True if column's an ColumnConst instance. It's just a syntax sugar for type check.
+bool is_column_const(const IColumn& column);
+
+/// True if column's an ColumnNullable instance. It's just a syntax sugar for type check.
+bool is_column_nullable(const IColumn& column);
+
+} // namespace doris::vectorized
+
+// Wrap `ColumnPtr` because `ColumnPtr` can't be used in forward declaration.
+namespace doris {
+struct ColumnPtrWrapper {
+    vectorized::ColumnPtr column_ptr;
+
+    ColumnPtrWrapper(vectorized::ColumnPtr col) : column_ptr(col) {};
+};
+} // namespace doris
diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h
new file mode 100644
index 0000000000..18794d3077
--- /dev/null
+++ b/be/src/vec/columns/column_complex.h
@@ -0,0 +1,297 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnComplex.h
+// and modified by Doris
+
+#pragma once
+
+#include <vector>
+
+#include "util/bitmap_value.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_impl.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+class ColumnComplexType final : public COWHelper<IColumn, ColumnComplexType<T>> {
+private:
+    ColumnComplexType() {}
+    ColumnComplexType(const size_t n) : data(n) {}
+    friend class COWHelper<IColumn, ColumnComplexType<T>>;
+
+public:
+    using Self = ColumnComplexType;
+    using value_type = T;
+    using Container = std::vector<value_type>;
+
+    bool is_numeric() const override { return false; }
+
+    bool is_bitmap() const override { return std::is_same_v<T, BitmapValue>; }
+
+    size_t size() const override { return data.size(); }
+
+    StringRef get_data_at(size_t n) const override {
+        return StringRef(reinterpret_cast<const char*>(&data[n]), sizeof(data[n]));
+    }
+
+    void insert_from(const IColumn& src, size_t n) override {
+        data.push_back(static_cast<const Self&>(src).get_data()[n]);
+    }
+
+    void insert_data(const char* pos, size_t /*length*/) override {
+        data.push_back(*reinterpret_cast<const T*>(pos));
+    }
+
+    void insert_default() override { data.push_back(T()); }
+
+    void clear() override { data.clear(); }
+
+    // TODO: value_type is not a pod type, so we also need to
+    // calculate the memory requested by value_type
+    size_t byte_size() const override { return data.size() * sizeof(data[0]); }
+
+    size_t allocated_bytes() const override { return byte_size(); }
+
+    void protect() override {}
+
+    void insert_value(T value) { data.emplace_back(std::move(value)); }
+
+    [[noreturn]] void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+                                      IColumn::Permutation& res) const override {
+        LOG(FATAL) << "get_permutation not implemented";
+    }
+
+    void reserve(size_t n) override { data.reserve(n); }
+
+    void resize(size_t n) override { data.resize(n); }
+
+    const char* get_family_name() const override { return TypeName<T>::get(); }
+
+    MutableColumnPtr clone_resized(size_t size) const override;
+
+    [[noreturn]] void insert(const Field& x) override {
+        LOG(FATAL) << "insert field not implemented";
+    }
+
+    [[noreturn]] Field operator[](size_t n) const override {
+        LOG(FATAL) << "operator[] not implemented";
+    }
+    [[noreturn]] void get(size_t n, Field& res) const override {
+        LOG(FATAL) << "get field not implemented";
+    }
+
+    [[noreturn]] UInt64 get64(size_t n) const override {
+        LOG(FATAL) << "get field not implemented";
+    }
+
+    [[noreturn]] Float64 get_float64(size_t n) const override {
+        LOG(FATAL) << "get field not implemented";
+    }
+
+    [[noreturn]] UInt64 get_uint(size_t n) const override {
+        LOG(FATAL) << "get field not implemented";
+    }
+
+    [[noreturn]] bool get_bool(size_t n) const override {
+        LOG(FATAL) << "get field not implemented";
+    }
+
+    [[noreturn]] Int64 get_int(size_t n) const override {
+        LOG(FATAL) << "get field not implemented";
+    }
+
+    void insert_range_from(const IColumn& src, size_t start, size_t length) {
+        auto& col = static_cast<const Self&>(src);
+        auto& src_data = col.get_data();
+        auto st = src_data.begin() + start;
+        auto ed = st + length;
+        data.insert(data.end(), st, ed);
+    }
+
+    void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override {
+        const Self& src_vec = assert_cast<const Self&>(src);
+        data.reserve(size() + (indices_end - indices_begin));
+        for (auto x = indices_begin; x != indices_end; ++x) {
+            data.push_back(src_vec.get_element(*x));
+        }
+    }
+
+    void pop_back(size_t n) { data.erase(data.end() - n, data.end()); }
+    // it's impossable to use ComplexType as key , so we don't have to implemnt them
+    [[noreturn]] StringRef serialize_value_into_arena(size_t n, Arena& arena,
+                                                      char const*& begin) const {
+        LOG(FATAL) << "serialize_value_into_arena not implemented";
+    }
+
+    [[noreturn]] const char* deserialize_and_insert_from_arena(const char* pos) {
+        LOG(FATAL) << "deserialize_and_insert_from_arena not implemented";
+    }
+
+    void update_hash_with_value(size_t n, SipHash& hash) const {
+        // TODO add hash function
+    }
+
+    [[noreturn]] int compare_at(size_t n, size_t m, const IColumn& rhs,
+                                int nan_direction_hint) const {
+        LOG(FATAL) << "compare_at not implemented";
+    }
+
+    void get_extremes(Field& min, Field& max) const {
+        LOG(FATAL) << "get_extremes not implemented";
+    }
+
+    bool can_be_inside_nullable() const override { return true; }
+
+    bool is_fixed_and_contiguous() const override { return true; }
+    size_t size_of_value_if_fixed() const override { return sizeof(T); }
+
+    StringRef get_raw_data() const override {
+        return StringRef(reinterpret_cast<const char*>(data.data()), data.size());
+    }
+
+    bool structure_equals(const IColumn& rhs) const override {
+        return typeid(rhs) == typeid(ColumnComplexType<T>);
+    }
+
+    ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) const override;
+
+    ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override;
+
+    Container& get_data() { return data; }
+
+    const Container& get_data() const { return data; }
+
+    const T& get_element(size_t n) const { return data[n]; }
+
+    T& get_element(size_t n) { return data[n]; }
+
+    ColumnPtr replicate(const IColumn::Offsets& replicate_offsets) const override;
+
+    [[noreturn]] MutableColumns scatter(IColumn::ColumnIndex num_columns,
+                                        const IColumn::Selector& selector) const override {
+        LOG(FATAL) << "scatter not implemented";
+    }
+
+    void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
+        DCHECK(size() > self_row);
+        data[self_row] = static_cast<const Self&>(rhs).data[row];
+    }
+
+    void replace_column_data_default(size_t self_row = 0) override {
+        DCHECK(size() > self_row);
+        data[self_row] = T();
+    }
+
+private:
+    Container data;
+};
+
+template <typename T>
+MutableColumnPtr ColumnComplexType<T>::clone_resized(size_t size) const {
+    auto res = this->create();
+
+    if (size > 0) {
+        auto& new_col = static_cast<Self&>(*res);
+        new_col.data = this->data;
+    }
+
+    return res;
+}
+
+template <typename T>
+ColumnPtr ColumnComplexType<T>::filter(const IColumn::Filter& filt,
+                                       ssize_t result_size_hint) const {
+    size_t size = data.size();
+    if (size != filt.size()) {
+        LOG(FATAL) << "Size of filter doesn't match size of column.";
+    }
+
+    if (data.size() == 0) return this->create();
+    auto res = this->create();
+    Container& res_data = res->get_data();
+
+    if (result_size_hint) res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
+
+    const UInt8* filt_pos = filt.data();
+    const UInt8* filt_end = filt_pos + size;
+    const T* data_pos = data.data();
+
+    while (filt_pos < filt_end) {
+        if (*filt_pos) res_data.push_back(*data_pos);
+
+        ++filt_pos;
+        ++data_pos;
+    }
+
+    return res;
+}
+
+template <typename T>
+ColumnPtr ColumnComplexType<T>::permute(const IColumn::Permutation& perm, size_t limit) const {
+    size_t size = data.size();
+
+    if (limit == 0)
+        limit = size;
+    else
+        limit = std::min(size, limit);
+
+    if (perm.size() < limit) {
+        LOG(FATAL) << "Size of permutation is less than required.";
+    }
+
+    auto res = this->create(limit);
+    typename Self::Container& res_data = res->get_data();
+    for (size_t i = 0; i < limit; ++i) {
+        res_data[i] = data[perm[i]];
+    }
+
+    return res;
+}
+
+template <typename T>
+ColumnPtr ColumnComplexType<T>::replicate(const IColumn::Offsets& offsets) const {
+    size_t size = data.size();
+    if (size != offsets.size()) {
+        LOG(FATAL) << "Size of offsets doesn't match size of column.";
+    }
+
+    if (0 == size) return this->create();
+
+    auto res = this->create();
+    typename Self::Container& res_data = res->get_data();
+    res_data.reserve(offsets.back());
+
+    IColumn::Offset prev_offset = 0;
+    for (size_t i = 0; i < size; ++i) {
+        size_t size_to_replicate = offsets[i] - prev_offset;
+        prev_offset = offsets[i];
+
+        for (size_t j = 0; j < size_to_replicate; ++j) {
+            res_data.push_back(data[i]);
+        }
+    }
+
+    return res;
+}
+
+using ColumnBitmap = ColumnComplexType<BitmapValue>;
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_const.cpp b/be/src/vec/columns/column_const.cpp
new file mode 100644
index 0000000000..3bc07a7a6a
--- /dev/null
+++ b/be/src/vec/columns/column_const.cpp
@@ -0,0 +1,107 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnConst.cpp
+// and modified by Doris
+
+#include "vec/columns/column_const.h"
+
+#include "vec/columns/columns_common.h"
+#include "vec/common/pod_array.h"
+#include "vec/common/typeid_cast.h"
+
+namespace doris::vectorized {
+
+ColumnConst::ColumnConst(const ColumnPtr& data_, size_t s_) : data(data_), s(s_) {
+    /// Squash Const of Const.
+    while (const ColumnConst* const_data = typeid_cast<const ColumnConst*>(data.get())) {
+        data = const_data->get_data_column_ptr();
+    }
+
+    if (data->size() != 1) {
+        LOG(FATAL) << fmt::format(
+                "Incorrect size of nested column in constructor of ColumnConst: {}, must be 1.");
+    }
+}
+
+ColumnPtr ColumnConst::convert_to_full_column() const {
+    return data->replicate(Offsets(1, s));
+}
+
+ColumnPtr ColumnConst::remove_low_cardinality() const {
+    return ColumnConst::create(data->convert_to_full_column_if_low_cardinality(), s);
+}
+
+ColumnPtr ColumnConst::filter(const Filter& filt, ssize_t /*result_size_hint*/) const {
+    if (s != filt.size()) {
+        LOG(FATAL) << fmt::format("Size of filter ({}) doesn't match size of column ({})",
+                                  filt.size(), s);
+    }
+
+    return ColumnConst::create(data, count_bytes_in_filter(filt));
+}
+
+ColumnPtr ColumnConst::replicate(const Offsets& offsets) const {
+    if (s != offsets.size()) {
+        LOG(FATAL) << fmt::format("Size of offsets ({}) doesn't match size of column ({})",
+                                  offsets.size(), s);
+    }
+
+    size_t replicated_size = 0 == s ? 0 : offsets.back();
+    return ColumnConst::create(data, replicated_size);
+}
+
+ColumnPtr ColumnConst::permute(const Permutation& perm, size_t limit) const {
+    if (limit == 0) {
+        limit = s;
+    } else {
+        limit = std::min(s, limit);
+    }
+
+    if (perm.size() < limit) {
+        LOG(FATAL) << fmt::format("Size of permutation ({}) is less than required ({})",
+                                  perm.size(), limit);
+    }
+
+    return ColumnConst::create(data, limit);
+}
+
+MutableColumns ColumnConst::scatter(ColumnIndex num_columns, const Selector& selector) const {
+    if (s != selector.size()) {
+        LOG(FATAL) << fmt::format("Size of selector ({}) doesn't match size of column ({})",
+                                  selector.size(), s);
+    }
+
+    std::vector<size_t> counts = count_columns_size_in_selector(num_columns, selector);
+
+    MutableColumns res(num_columns);
+    for (size_t i = 0; i < num_columns; ++i) {
+        res[i] = clone_resized(counts[i]);
+    }
+
+    return res;
+}
+
+void ColumnConst::get_permutation(bool /*reverse*/, size_t /*limit*/, int /*nan_direction_hint*/,
+                                  Permutation& res) const {
+    res.resize(s);
+    for (size_t i = 0; i < s; ++i) {
+        res[i] = i;
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h
new file mode 100644
index 0000000000..e019c56169
--- /dev/null
+++ b/be/src/vec/columns/column_const.h
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnConst.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/exception.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/core/field.h"
+
+namespace doris::vectorized {
+
+/** ColumnConst contains another column with single element,
+  *  but looks like a column with arbitrary amount of same elements.
+  */
+class ColumnConst final : public COWHelper<IColumn, ColumnConst> {
+private:
+    friend class COWHelper<IColumn, ColumnConst>;
+
+    WrappedPtr data;
+    size_t s;
+
+    ColumnConst(const ColumnPtr& data, size_t s_);
+    ColumnConst(const ColumnConst& src) = default;
+
+public:
+    ColumnPtr convert_to_full_column() const;
+
+    ColumnPtr convert_to_full_column_if_const() const override { return convert_to_full_column(); }
+
+    ColumnPtr remove_low_cardinality() const;
+
+    std::string get_name() const override { return "Const(" + data->get_name() + ")"; }
+
+    const char* get_family_name() const override { return "Const"; }
+
+    MutableColumnPtr clone_resized(size_t new_size) const override {
+        return ColumnConst::create(data, new_size);
+    }
+
+    size_t size() const override { return s; }
+
+    Field operator[](size_t) const override { return (*data)[0]; }
+
+    void get(size_t, Field& res) const override { data->get(0, res); }
+
+    StringRef get_data_at(size_t) const override { return data->get_data_at(0); }
+
+    StringRef get_data_at_with_terminating_zero(size_t) const override {
+        return data->get_data_at_with_terminating_zero(0);
+    }
+
+    UInt64 get64(size_t) const override { return data->get64(0); }
+
+    UInt64 get_uint(size_t) const override { return data->get_uint(0); }
+
+    Int64 get_int(size_t) const override { return data->get_int(0); }
+
+    bool get_bool(size_t) const override { return data->get_bool(0); }
+
+    Float64 get_float64(size_t) const override { return data->get_float64(0); }
+
+    bool is_null_at(size_t) const override { return data->is_null_at(0); }
+
+    void insert_range_from(const IColumn&, size_t /*start*/, size_t length) override {
+        s += length;
+    }
+
+    void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override {
+        s += (indices_end - indices_begin);
+    }
+
+    void insert(const Field&) override { ++s; }
+
+    void insert_data(const char*, size_t) override { ++s; }
+
+    void insert_from(const IColumn&, size_t) override { ++s; }
+
+    void clear() override { s = 0; }
+
+    void insert_default() override { ++s; }
+
+    void pop_back(size_t n) override { s -= n; }
+
+    StringRef serialize_value_into_arena(size_t, Arena& arena, char const*& begin) const override {
+        return data->serialize_value_into_arena(0, arena, begin);
+    }
+
+    const char* deserialize_and_insert_from_arena(const char* pos) override {
+        auto res = data->deserialize_and_insert_from_arena(pos);
+        data->pop_back(1);
+        ++s;
+        return res;
+    }
+
+    void update_hash_with_value(size_t, SipHash& hash) const override {
+        data->update_hash_with_value(0, hash);
+    }
+
+    ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override;
+    ColumnPtr replicate(const Offsets& offsets) const override;
+    ColumnPtr permute(const Permutation& perm, size_t limit) const override;
+    // ColumnPtr index(const IColumn & indexes, size_t limit) const override;
+    void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+                         Permutation& res) const override;
+
+    size_t byte_size() const override { return s > 0 ? data->byte_size() + sizeof(s) : 0; }
+
+    size_t allocated_bytes() const override { return data->allocated_bytes() + sizeof(s); }
+
+    int compare_at(size_t, size_t, const IColumn& rhs, int nan_direction_hint) const override {
+        return data->compare_at(0, 0, *assert_cast<const ColumnConst&>(rhs).data,
+                                nan_direction_hint);
+    }
+
+    MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override;
+
+    void get_extremes(Field& min, Field& max) const override { data->get_extremes(min, max); }
+
+    void for_each_subcolumn(ColumnCallback callback) override { callback(data); }
+
+    bool structure_equals(const IColumn& rhs) const override {
+        if (auto rhs_concrete = typeid_cast<const ColumnConst*>(&rhs))
+            return data->structure_equals(*rhs_concrete->data);
+        return false;
+    }
+
+    //    bool is_nullable() const override { return is_column_nullable(*data); }
+    bool only_null() const override { return data->is_null_at(0); }
+    bool is_numeric() const override { return data->is_numeric(); }
+    bool is_fixed_and_contiguous() const override { return data->is_fixed_and_contiguous(); }
+    bool values_have_fixed_size() const override { return data->values_have_fixed_size(); }
+    size_t size_of_value_if_fixed() const override { return data->size_of_value_if_fixed(); }
+    StringRef get_raw_data() const override { return data->get_raw_data(); }
+
+    /// Not part of the common interface.
+
+    IColumn& get_data_column() { return *data; }
+    const IColumn& get_data_column() const { return *data; }
+    const ColumnPtr& get_data_column_ptr() const { return data; }
+
+    Field get_field() const { return get_data_column()[0]; }
+
+    template <typename T>
+    T get_value() const {
+        return get_field().safe_get<NearestFieldType<T>>();
+    }
+
+    void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
+        DCHECK(size() > self_row);
+        data->replace_column_data(rhs, row, self_row);
+    }
+
+    void replace_column_data_default(size_t self_row = 0) override {
+        DCHECK(size() > self_row);
+        LOG(FATAL) << "should not call the method in column const";
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp
new file mode 100644
index 0000000000..5cc58530bb
--- /dev/null
+++ b/be/src/vec/columns/column_decimal.cpp
@@ -0,0 +1,249 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/ColumnDecimal.cpp
+// and modified by Doris
+
+#include "vec/columns/column_decimal.h"
+
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/exception.h"
+#include "vec/common/sip_hash.h"
+#include "vec/common/unaligned.h"
+
+template <typename T>
+bool decimal_less(T x, T y, doris::vectorized::UInt32 x_scale, doris::vectorized::UInt32 y_scale);
+
+namespace doris::vectorized {
+
+template <typename T>
+int ColumnDecimal<T>::compare_at(size_t n, size_t m, const IColumn& rhs_, int) const {
+    auto& other = static_cast<const Self&>(rhs_);
+    const T& a = data[n];
+    const T& b = other.data[m];
+
+    if (scale == other.scale) return a > b ? 1 : (a < b ? -1 : 0);
+    return decimal_less<T>(b, a, other.scale, scale)
+                   ? 1
+                   : (decimal_less<T>(a, b, scale, other.scale) ? -1 : 0);
+}
+
+template <typename T>
+StringRef ColumnDecimal<T>::serialize_value_into_arena(size_t n, Arena& arena,
+                                                       char const*& begin) const {
+    auto pos = arena.alloc_continue(sizeof(T), begin);
+    memcpy(pos, &data[n], sizeof(T));
+    return StringRef(pos, sizeof(T));
+}
+
+template <typename T>
+const char* ColumnDecimal<T>::deserialize_and_insert_from_arena(const char* pos) {
+    data.push_back(unaligned_load<T>(pos));
+    return pos + sizeof(T);
+}
+
+template <typename T>
+UInt64 ColumnDecimal<T>::get64(size_t n) const {
+    if constexpr (sizeof(T) > sizeof(UInt64)) {
+        LOG(FATAL) << "Method get64 is not supported for " << get_family_name();
+    }
+    return static_cast<typename T::NativeType>(data[n]);
+}
+
+template <typename T>
+void ColumnDecimal<T>::update_hash_with_value(size_t n, SipHash& hash) const {
+    hash.update(data[n]);
+}
+
+template <typename T>
+void ColumnDecimal<T>::get_permutation(bool reverse, size_t limit, int,
+                                       IColumn::Permutation& res) const {
+#if 1 /// TODO: perf test
+    if (data.size() <= std::numeric_limits<UInt32>::max()) {
+        PaddedPODArray<UInt32> tmp_res;
+        permutation(reverse, limit, tmp_res);
+
+        res.resize(tmp_res.size());
+        for (size_t i = 0; i < tmp_res.size(); ++i) res[i] = tmp_res[i];
+        return;
+    }
+#endif
+
+    permutation(reverse, limit, res);
+}
+
+template <typename T>
+ColumnPtr ColumnDecimal<T>::permute(const IColumn::Permutation& perm, size_t limit) const {
+    size_t size = limit ? std::min(data.size(), limit) : data.size();
+    if (perm.size() < size) {
+        LOG(FATAL) << "Size of permutation is less than required.";
+    }
+
+    auto res = this->create(size, scale);
+    typename Self::Container& res_data = res->get_data();
+
+    for (size_t i = 0; i < size; ++i) res_data[i] = data[perm[i]];
+
+    return res;
+}
+
+template <typename T>
+MutableColumnPtr ColumnDecimal<T>::clone_resized(size_t size) const {
+    auto res = this->create(0, scale);
+
+    if (size > 0) {
+        auto& new_col = static_cast<Self&>(*res);
+        new_col.data.resize(size);
+
+        size_t count = std::min(this->size(), size);
+        memcpy(new_col.data.data(), data.data(), count * sizeof(data[0]));
+
+        if (size > count) {
+            void* tail = &new_col.data[count];
+            memset(tail, 0, (size - count) * sizeof(T));
+        }
+    }
+
+    return res;
+}
+
+template <typename T>
+void ColumnDecimal<T>::insert_data(const char* src, size_t /*length*/) {
+    T tmp;
+    memcpy(&tmp, src, sizeof(T));
+    data.emplace_back(tmp);
+}
+
+template <typename T>
+void ColumnDecimal<T>::insert_range_from(const IColumn& src, size_t start, size_t length) {
+    const ColumnDecimal& src_vec = assert_cast<const ColumnDecimal&>(src);
+
+    if (start + length > src_vec.data.size()) {
+        LOG(FATAL) << fmt::format(
+                "Parameters start = {}, length = {} are out of bound in "
+                "ColumnDecimal<T>::insert_range_from method (data.size() = {})",
+                start, length, src_vec.data.size());
+    }
+
+    size_t old_size = data.size();
+    data.resize(old_size + length);
+    memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
+}
+
+template <typename T>
+ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter& filt, ssize_t result_size_hint) const {
+    size_t size = data.size();
+    if (size != filt.size()) {
+        LOG(FATAL) << "Size of filter doesn't match size of column.";
+    }
+
+    auto res = this->create(0, scale);
+    Container& res_data = res->get_data();
+
+    if (result_size_hint) res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
+
+    const UInt8* filt_pos = filt.data();
+    const UInt8* filt_end = filt_pos + size;
+    const T* data_pos = data.data();
+
+    /** A slightly more optimized version.
+        * Based on the assumption that often pieces of consecutive values
+        *  completely pass or do not pass the filter.
+        * Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
+        */
+    static constexpr size_t SIMD_BYTES = 32;
+    const UInt8* filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
+
+    while (filt_pos < filt_end_sse) {
+        uint32_t mask = bytes32_mask_to_bits32_mask(filt_pos);
+
+        if (0xFFFFFFFF == mask) {
+            res_data.insert(data_pos, data_pos + SIMD_BYTES);
+        } else {
+            while (mask) {
+                const size_t idx = __builtin_ctzll(mask);
+                res_data.push_back(data_pos[idx]);
+                mask = mask & (mask - 1);
+            }
+        }
+
+        filt_pos += SIMD_BYTES;
+        data_pos += SIMD_BYTES;
+    }
+
+    while (filt_pos < filt_end) {
+        if (*filt_pos) res_data.push_back(*data_pos);
+
+        ++filt_pos;
+        ++data_pos;
+    }
+
+    return res;
+}
+
+template <typename T>
+ColumnPtr ColumnDecimal<T>::replicate(const IColumn::Offsets& offsets) const {
+    size_t size = data.size();
+    if (size != offsets.size()) {
+        LOG(FATAL) << "Size of offsets doesn't match size of column.";
+    }
+
+    auto res = this->create(0, scale);
+    if (0 == size) return res;
+
+    typename Self::Container& res_data = res->get_data();
+    res_data.reserve(offsets.back());
+
+    IColumn::Offset prev_offset = 0;
+    for (size_t i = 0; i < size; ++i) {
+        size_t size_to_replicate = offsets[i] - prev_offset;
+        prev_offset = offsets[i];
+
+        for (size_t j = 0; j < size_to_replicate; ++j) res_data.push_back(data[i]);
+    }
+
+    return res;
+}
+
+template <typename T>
+void ColumnDecimal<T>::get_extremes(Field& min, Field& max) const {
+    if (data.size() == 0) {
+        min = NearestFieldType<T>(0, scale);
+        max = NearestFieldType<T>(0, scale);
+        return;
+    }
+
+    T cur_min = data[0];
+    T cur_max = data[0];
+
+    for (const T& x : data) {
+        if (x < cur_min)
+            cur_min = x;
+        else if (x > cur_max)
+            cur_max = x;
+    }
+
+    min = NearestFieldType<T>(cur_min, scale);
+    max = NearestFieldType<T>(cur_max, scale);
+}
+
+template class ColumnDecimal<Decimal32>;
+template class ColumnDecimal<Decimal64>;
+template class ColumnDecimal<Decimal128>;
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h
new file mode 100644
index 0000000000..67f4fa9698
--- /dev/null
+++ b/be/src/vec/columns/column_decimal.h
@@ -0,0 +1,220 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/ColumnDecimal.h
+// and modified by Doris
+
+#pragma once
+
+#include <cmath>
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_impl.h"
+#include "vec/columns/column_vector_helper.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/common/assert_cast.h"
+#include "vec/core/field.h"
+
+namespace doris::vectorized {
+
+/// PaddedPODArray extended by Decimal scale
+template <typename T>
+class DecimalPaddedPODArray : public PaddedPODArray<T> {
+public:
+    using Base = PaddedPODArray<T>;
+    using Base::operator[];
+
+    DecimalPaddedPODArray(size_t size, UInt32 scale_) : Base(size), scale(scale_) {}
+
+    DecimalPaddedPODArray(const DecimalPaddedPODArray& other)
+            : Base(other.begin(), other.end()), scale(other.scale) {}
+
+    DecimalPaddedPODArray(DecimalPaddedPODArray&& other) {
+        this->swap(other);
+        std::swap(scale, other.scale);
+    }
+
+    DecimalPaddedPODArray& operator=(DecimalPaddedPODArray&& other) {
+        this->swap(other);
+        std::swap(scale, other.scale);
+        return *this;
+    }
+
+    UInt32 get_scale() const { return scale; }
+
+private:
+    UInt32 scale;
+};
+
+/// A ColumnVector for Decimals
+template <typename T>
+class ColumnDecimal final : public COWHelper<ColumnVectorHelper, ColumnDecimal<T>> {
+    static_assert(IsDecimalNumber<T>);
+
+private:
+    using Self = ColumnDecimal;
+    friend class COWHelper<ColumnVectorHelper, Self>;
+
+public:
+    using Container = DecimalPaddedPODArray<T>;
+
+private:
+    ColumnDecimal(const size_t n, UInt32 scale_) : data(n, scale_), scale(scale_) {}
+
+    ColumnDecimal(const ColumnDecimal& src) : data(src.data), scale(src.scale) {}
+
+public:
+    const char* get_family_name() const override { return TypeName<T>::get(); }
+
+    bool is_numeric() const override { return false; }
+    bool is_column_decimal() const override { return true; }
+    bool can_be_inside_nullable() const override { return true; }
+    bool is_fixed_and_contiguous() const override { return true; }
+    size_t size_of_value_if_fixed() const override { return sizeof(T); }
+
+    size_t size() const override { return data.size(); }
+    size_t byte_size() const override { return data.size() * sizeof(data[0]); }
+    size_t allocated_bytes() const override { return data.allocated_bytes(); }
+    void protect() override { data.protect(); }
+    void reserve(size_t n) override { data.reserve(n); }
+    void resize(size_t n) override { data.resize(n); }
+
+    void insert_from(const IColumn& src, size_t n) override {
+        data.push_back(static_cast<const Self&>(src).get_data()[n]);
+    }
+
+    void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override {
+        const Self& src_vec = assert_cast<const Self&>(src);
+        data.reserve(size() + (indices_end - indices_begin));
+        for (auto x = indices_begin; x != indices_end; ++x) {
+            data.push_back_without_reserve(src_vec.get_element(*x));
+        }
+    }
+
+    void insert_data(const char* pos, size_t /*length*/) override;
+    void insert_default() override { data.push_back(T()); }
+    void insert(const Field& x) override {
+        data.push_back(doris::vectorized::get<NearestFieldType<T>>(x));
+    }
+    void insert_range_from(const IColumn& src, size_t start, size_t length) override;
+
+    void pop_back(size_t n) override { data.resize_assume_reserved(data.size() - n); }
+
+    StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override;
+    const char* deserialize_and_insert_from_arena(const char* pos) override;
+    void update_hash_with_value(size_t n, SipHash& hash) const override;
+    int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override;
+    void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+                         IColumn::Permutation& res) const override;
+
+    MutableColumnPtr clone_resized(size_t size) const override;
+
+    Field operator[](size_t n) const override { return DecimalField(data[n], scale); }
+
+    StringRef get_raw_data() const override {
+        return StringRef(reinterpret_cast<const char*>(data.data()), data.size());
+    }
+    StringRef get_data_at(size_t n) const override {
+        return StringRef(reinterpret_cast<const char*>(&data[n]), sizeof(data[n]));
+    }
+    void get(size_t n, Field& res) const override { res = (*this)[n]; }
+    bool get_bool(size_t n) const override { return bool(data[n]); }
+    Int64 get_int(size_t n) const override { return Int64(data[n] * scale); }
+    UInt64 get64(size_t n) const override;
+    bool is_default_at(size_t n) const override { return data[n] == 0; }
+
+    void clear() override { data.clear(); }
+
+    ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) const override;
+    ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override;
+    //    ColumnPtr index(const IColumn & indexes, size_t limit) const override;
+
+    template <typename Type>
+    ColumnPtr index_impl(const PaddedPODArray<Type>& indexes, size_t limit) const;
+
+    ColumnPtr replicate(const IColumn::Offsets& offsets) const override;
+    void get_extremes(Field& min, Field& max) const override;
+
+    MutableColumns scatter(IColumn::ColumnIndex num_columns,
+                           const IColumn::Selector& selector) const override {
+        return this->template scatter_impl<Self>(num_columns, selector);
+    }
+
+    //    void gather(ColumnGathererStream & gatherer_stream) override;
+
+    bool structure_equals(const IColumn& rhs) const override {
+        if (auto rhs_concrete = typeid_cast<const ColumnDecimal<T>*>(&rhs))
+            return scale == rhs_concrete->scale;
+        return false;
+    }
+
+    void insert(const T value) { data.push_back(value); }
+    Container& get_data() { return data; }
+    const Container& get_data() const { return data; }
+    const T& get_element(size_t n) const { return data[n]; }
+    T& get_element(size_t n) { return data[n]; }
+
+    void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
+        DCHECK(size() > self_row);
+        data[self_row] = static_cast<const Self&>(rhs).data[row];
+    }
+
+    void replace_column_data_default(size_t self_row = 0) override {
+        DCHECK(size() > self_row);
+        data[self_row] = T();
+    }
+
+protected:
+    Container data;
+    UInt32 scale;
+
+    template <typename U>
+    void permutation(bool reverse, size_t limit, PaddedPODArray<U>& res) const {
+        size_t s = data.size();
+        res.resize(s);
+        for (U i = 0; i < s; ++i) res[i] = i;
+
+        auto sort_end = res.end();
+        if (limit && limit < s) sort_end = res.begin() + limit;
+
+        if (reverse)
+            std::partial_sort(res.begin(), sort_end, res.end(),
+                              [this](size_t a, size_t b) { return data[a] > data[b]; });
+        else
+            std::partial_sort(res.begin(), sort_end, res.end(),
+                              [this](size_t a, size_t b) { return data[a] < data[b]; });
+    }
+};
+
+template <typename T>
+template <typename Type>
+ColumnPtr ColumnDecimal<T>::index_impl(const PaddedPODArray<Type>& indexes, size_t limit) const {
+    size_t size = indexes.size();
+
+    if (limit == 0)
+        limit = size;
+    else
+        limit = std::min(size, limit);
+
+    auto res = this->create(limit, scale);
+    typename Self::Container& res_data = res->get_data();
+    for (size_t i = 0; i < limit; ++i) res_data[i] = data[indexes[i]];
+
+    return res;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_dummy.h b/be/src/vec/columns/column_dummy.h
new file mode 100644
index 0000000000..cac3a063cf
--- /dev/null
+++ b/be/src/vec/columns/column_dummy.h
@@ -0,0 +1,145 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnDummy.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+/** Base class for columns-constants that contain a value that is not in the `Field`.
+  * Not a full-fledged column and is used in a special way.
+  */
+class IColumnDummy : public IColumn {
+public:
+    IColumnDummy() : s(0) {}
+    IColumnDummy(size_t s_) : s(s_) {}
+
+public:
+    virtual MutableColumnPtr clone_dummy(size_t s_) const = 0;
+
+    MutableColumnPtr clone_resized(size_t s) const override { return clone_dummy(s); }
+    size_t size() const override { return s; }
+    void insert_default() override { ++s; }
+    void pop_back(size_t n) override { s -= n; }
+    size_t byte_size() const override { return 0; }
+    size_t allocated_bytes() const override { return 0; }
+    int compare_at(size_t, size_t, const IColumn&, int) const override { return 0; }
+
+    [[noreturn]] Field operator[](size_t) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void get(size_t, Field&) const override {
+        LOG(FATAL) << "Cannot get value from " << get_name();
+    }
+
+    void insert(const Field&) override {
+        LOG(FATAL) << "Cannot insert element into " << get_name();
+    }
+
+    StringRef get_data_at(size_t) const override { return {}; }
+
+    void insert_data(const char*, size_t) override { ++s; }
+
+    StringRef serialize_value_into_arena(size_t /*n*/, Arena& arena,
+                                         char const*& begin) const override {
+        return {arena.alloc_continue(0, begin), 0};
+    }
+
+    const char* deserialize_and_insert_from_arena(const char* pos) override {
+        ++s;
+        return pos;
+    }
+
+    void update_hash_with_value(size_t /*n*/, SipHash& /*hash*/) const override {}
+
+    void insert_from(const IColumn&, size_t) override { ++s; }
+
+    void insert_range_from(const IColumn& /*src*/, size_t /*start*/, size_t length) override {
+        s += length;
+    }
+
+    void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override {
+        s += (indices_end - indices_begin);
+    }
+
+    ColumnPtr filter(const Filter& filt, ssize_t /*result_size_hint*/) const override {
+        return clone_dummy(count_bytes_in_filter(filt));
+    }
+
+    ColumnPtr permute(const Permutation& perm, size_t limit) const override {
+        if (s != perm.size()) {
+            LOG(FATAL) << "Size of permutation doesn't match size of column.";
+        }
+
+        return clone_dummy(limit ? std::min(s, limit) : s);
+    }
+
+    void get_permutation(bool /*reverse*/, size_t /*limit*/, int /*nan_direction_hint*/,
+                         Permutation& res) const override {
+        res.resize(s);
+        for (size_t i = 0; i < s; ++i) res[i] = i;
+    }
+
+    ColumnPtr replicate(const Offsets& offsets) const override {
+        if (s != offsets.size()) {
+            LOG(FATAL) << "Size of offsets doesn't match size of column.";
+        }
+
+        return clone_dummy(offsets.back());
+    }
+
+    MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override {
+        if (s != selector.size()) {
+            LOG(FATAL) << "Size of selector doesn't match size of column.";
+        }
+
+        std::vector<size_t> counts(num_columns);
+        for (auto idx : selector) ++counts[idx];
+
+        MutableColumns res(num_columns);
+        for (size_t i = 0; i < num_columns; ++i) res[i] = clone_resized(counts[i]);
+
+        return res;
+    }
+
+    void get_extremes(Field&, Field&) const override {}
+
+    void addSize(size_t delta) { s += delta; }
+
+    bool is_dummy() const override { return true; }
+
+    void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
+        LOG(FATAL) << "should not call the method in column dummy";
+    }
+
+    void replace_column_data_default(size_t self_row = 0) override {
+        LOG(FATAL) << "should not call the method in column dummy";
+    }
+
+protected:
+    size_t s;
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_impl.h b/be/src/vec/columns/column_impl.h
new file mode 100644
index 0000000000..4e12f98d51
--- /dev/null
+++ b/be/src/vec/columns/column_impl.h
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/IColumnImpl.h
+// and modified by Doris
+
+/**
+  * This file implements template methods of IColumn that depend on other types
+  * we don't want to include.
+  * Currently, this is only the scatter_impl method that depends on PODArray
+  * implementation.
+  */
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+
+template <typename Derived>
+std::vector<IColumn::MutablePtr> IColumn::scatter_impl(ColumnIndex num_columns,
+                                                       const Selector& selector) const {
+    size_t num_rows = size();
+
+    if (num_rows != selector.size()) {
+        LOG(FATAL) << fmt::format("Size of selector: {}, doesn't match size of column:{}",
+                                  selector.size(), num_rows);
+    }
+
+    std::vector<MutablePtr> columns(num_columns);
+    for (auto& column : columns) column = clone_empty();
+
+    {
+        size_t reserve_size =
+                num_rows * 1.1 / num_columns; /// 1.1 is just a guess. Better to use n-sigma rule.
+
+        if (reserve_size > 1)
+            for (auto& column : columns) column->reserve(reserve_size);
+    }
+
+    for (size_t i = 0; i < num_rows; ++i)
+        static_cast<Derived&>(*columns[selector[i]]).insert_from(*this, i);
+
+    return columns;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_nothing.h b/be/src/vec/columns/column_nothing.h
new file mode 100644
index 0000000000..d3cb88574c
--- /dev/null
+++ b/be/src/vec/columns/column_nothing.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/AggregateFunctions/ColumnNothing.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_dummy.h"
+
+namespace doris::vectorized {
+
+class ColumnNothing final : public COWHelper<IColumnDummy, ColumnNothing> {
+private:
+    friend class COWHelper<IColumnDummy, ColumnNothing>;
+
+    ColumnNothing(size_t s_) { s = s_; }
+
+    ColumnNothing(const ColumnNothing&) = default;
+
+public:
+    const char* get_family_name() const override { return "Nothing"; }
+    MutableColumnPtr clone_dummy(size_t s_) const override { return ColumnNothing::create(s_); }
+
+    bool can_be_inside_nullable() const override { return true; }
+
+    bool structure_equals(const IColumn& rhs) const override {
+        return typeid(rhs) == typeid(ColumnNothing);
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp
new file mode 100644
index 0000000000..ae3a2fd1b7
--- /dev/null
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -0,0 +1,453 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnNullable.cpp
+// and modified by Doris
+
+#include "vec/columns/column_nullable.h"
+
+#include "vec/columns/column_const.h"
+#include "vec/common/arena.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/nan_utils.h"
+#include "vec/common/sip_hash.h"
+#include "vec/common/typeid_cast.h"
+
+namespace doris::vectorized {
+
+ColumnNullable::ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnPtr&& null_map_)
+        : nested_column(std::move(nested_column_)), null_map(std::move(null_map_)) {
+    /// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
+    nested_column = get_nested_column().convert_to_full_column_if_const();
+
+    if (!get_nested_column().can_be_inside_nullable()) {
+        LOG(FATAL) << get_nested_column().get_name() << " cannot be inside Nullable column";
+    }
+
+    if (is_column_const(*null_map)) {
+        LOG(FATAL) << "ColumnNullable cannot have constant null map";
+    }
+}
+
+void ColumnNullable::update_hash_with_value(size_t n, SipHash& hash) const {
+    const auto& arr = get_null_map_data();
+    hash.update(arr[n]);
+    if (arr[n] == 0) get_nested_column().update_hash_with_value(n, hash);
+}
+
+MutableColumnPtr ColumnNullable::clone_resized(size_t new_size) const {
+    MutableColumnPtr new_nested_col = get_nested_column().clone_resized(new_size);
+    auto new_null_map = ColumnUInt8::create();
+
+    if (new_size > 0) {
+        new_null_map->get_data().resize(new_size);
+
+        size_t count = std::min(size(), new_size);
+        memcpy(new_null_map->get_data().data(), get_null_map_data().data(),
+               count * sizeof(get_null_map_data()[0]));
+
+        /// If resizing to bigger one, set all new values to NULLs.
+        if (new_size > count) memset(&new_null_map->get_data()[count], 1, new_size - count);
+    }
+
+    return ColumnNullable::create(std::move(new_nested_col), std::move(new_null_map));
+}
+
+Field ColumnNullable::operator[](size_t n) const {
+    return is_null_at(n) ? Null() : get_nested_column()[n];
+}
+
+void ColumnNullable::get(size_t n, Field& res) const {
+    if (is_null_at(n))
+        res = Null();
+    else
+        get_nested_column().get(n, res);
+}
+
+StringRef ColumnNullable::get_data_at(size_t n) const {
+    if (is_null_at(n)) {
+        return StringRef((const char*)nullptr, 0);
+    }
+    return get_nested_column().get_data_at(n);
+}
+
+void ColumnNullable::insert_data(const char* pos, size_t length) {
+    if (pos == nullptr) {
+        get_nested_column().insert_default();
+        get_null_map_data().push_back(1);
+    } else {
+        get_nested_column().insert_data(pos, length);
+        get_null_map_data().push_back(0);
+    }
+}
+
+StringRef ColumnNullable::serialize_value_into_arena(size_t n, Arena& arena,
+                                                     char const*& begin) const {
+    const auto& arr = get_null_map_data();
+    static constexpr auto s = sizeof(arr[0]);
+
+    auto pos = arena.alloc_continue(s, begin);
+    memcpy(pos, &arr[n], s);
+
+    if (arr[n]) return StringRef(pos, s);
+
+    auto nested_ref = get_nested_column().serialize_value_into_arena(n, arena, begin);
+
+    /// serialize_value_into_arena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
+    return StringRef(nested_ref.data - s, nested_ref.size + s);
+}
+
+const char* ColumnNullable::deserialize_and_insert_from_arena(const char* pos) {
+    UInt8 val = *reinterpret_cast<const UInt8*>(pos);
+    pos += sizeof(val);
+
+    get_null_map_data().push_back(val);
+
+    if (val == 0)
+        pos = get_nested_column().deserialize_and_insert_from_arena(pos);
+    else
+        get_nested_column().insert_default();
+
+    return pos;
+}
+
+void ColumnNullable::insert_range_from(const IColumn& src, size_t start, size_t length) {
+    const ColumnNullable& nullable_col = assert_cast<const ColumnNullable&>(src);
+    get_null_map_column().insert_range_from(*nullable_col.null_map, start, length);
+    get_nested_column().insert_range_from(*nullable_col.nested_column, start, length);
+}
+
+void ColumnNullable::insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) {
+    const ColumnNullable& src_concrete = assert_cast<const ColumnNullable&>(src);
+    get_nested_column().insert_indices_from(src_concrete.get_nested_column(), indices_begin, indices_end);
+    get_null_map_column().insert_indices_from(src_concrete.get_null_map_column(), indices_begin, indices_end);
+}
+
+void ColumnNullable::insert(const Field& x) {
+    if (x.is_null()) {
+        get_nested_column().insert_default();
+        get_null_map_data().push_back(1);
+    } else {
+        get_nested_column().insert(x);
+        get_null_map_data().push_back(0);
+    }
+}
+
+void ColumnNullable::insert_from(const IColumn& src, size_t n) {
+    const ColumnNullable& src_concrete = assert_cast<const ColumnNullable&>(src);
+    get_nested_column().insert_from(src_concrete.get_nested_column(), n);
+    get_null_map_data().push_back(src_concrete.get_null_map_data()[n]);
+}
+
+void ColumnNullable::insert_from_not_nullable(const IColumn& src, size_t n) {
+    get_nested_column().insert_from(src, n);
+    get_null_map_data().push_back(0);
+}
+
+void ColumnNullable::insert_range_from_not_nullable(const IColumn& src, size_t start,
+                                                    size_t length) {
+    get_nested_column().insert_range_from(src, start, length);
+    get_null_map_data().resize_fill(get_null_map_data().size() + length, 0);
+}
+
+void ColumnNullable::insert_many_from_not_nullable(const IColumn& src, size_t position,
+                                                   size_t length) {
+    for (size_t i = 0; i < length; ++i) {
+        insert_from_not_nullable(src, position);
+    }
+}
+
+void ColumnNullable::pop_back(size_t n) {
+    get_nested_column().pop_back(n);
+    get_null_map_column().pop_back(n);
+}
+
+ColumnPtr ColumnNullable::filter(const Filter& filt, ssize_t result_size_hint) const {
+    ColumnPtr filtered_data = get_nested_column().filter(filt, result_size_hint);
+    ColumnPtr filtered_null_map = get_null_map_column().filter(filt, result_size_hint);
+    return ColumnNullable::create(filtered_data, filtered_null_map);
+}
+
+ColumnPtr ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr) {
+    if (ptr != nullptr) {
+        const ColumnNullable* nullable_col_ptr = reinterpret_cast<const ColumnNullable*>((*ptr).get());
+        ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column;
+        ColumnPtr null_map_ptr = nullable_col_ptr->null_map;
+        get_nested_column().filter_by_selector(sel, sel_size, &nest_col_ptr);
+        get_null_map_column().filter_by_selector(sel, sel_size, &null_map_ptr);
+        return *ptr;
+    } else {
+        ColumnPtr filtered_data = get_nested_column().filter_by_selector(sel, sel_size);
+        ColumnPtr filtered_null_map = get_null_map_column().filter_by_selector(sel, sel_size);
+        return ColumnNullable::create(filtered_data, filtered_null_map);
+    }
+}
+
+ColumnPtr ColumnNullable::permute(const Permutation& perm, size_t limit) const {
+    ColumnPtr permuted_data = get_nested_column().permute(perm, limit);
+    ColumnPtr permuted_null_map = get_null_map_column().permute(perm, limit);
+    return ColumnNullable::create(permuted_data, permuted_null_map);
+}
+
+int ColumnNullable::compare_at(size_t n, size_t m, const IColumn& rhs_,
+                               int null_direction_hint) const {
+    /// NULL values share the properties of NaN values.
+    /// Here the last parameter of compare_at is called null_direction_hint
+    /// instead of the usual nan_direction_hint and is used to implement
+    /// the ordering specified by either NULLS FIRST or NULLS LAST in the
+    /// ORDER BY construction.
+    const ColumnNullable& nullable_rhs = assert_cast<const ColumnNullable&>(rhs_);
+
+    if (is_null_at(n)) {
+        return nullable_rhs.is_null_at(m) ? 0 : null_direction_hint;
+    } else if (nullable_rhs.is_null_at(m)) {
+        return -null_direction_hint;
+    }
+
+    return get_nested_column().compare_at(n, m, nullable_rhs.get_nested_column(),
+                                          null_direction_hint);
+}
+
+void ColumnNullable::get_permutation(bool reverse, size_t limit, int null_direction_hint,
+                                     Permutation& res) const {
+    /// Cannot pass limit because of unknown amount of NULLs.
+    get_nested_column().get_permutation(reverse, 0, null_direction_hint, res);
+
+    if ((null_direction_hint > 0) != reverse) {
+        /// Shift all NULL values to the end.
+
+        size_t read_idx = 0;
+        size_t write_idx = 0;
+        size_t end_idx = res.size();
+
+        if (!limit)
+            limit = end_idx;
+        else
+            limit = std::min(end_idx, limit);
+
+        while (read_idx < limit && !is_null_at(res[read_idx])) {
+            ++read_idx;
+            ++write_idx;
+        }
+
+        ++read_idx;
+
+        /// Invariants:
+        ///  write_idx < read_idx
+        ///  write_idx points to NULL
+        ///  read_idx will be incremented to position of next not-NULL
+        ///  there are range of NULLs between write_idx and read_idx - 1,
+        /// We are moving elements from end to begin of this range,
+        ///  so range will "bubble" towards the end.
+        /// Relative order of NULL elements could be changed,
+        ///  but relative order of non-NULLs is preserved.
+
+        while (read_idx < end_idx && write_idx < limit) {
+            if (!is_null_at(res[read_idx])) {
+                std::swap(res[read_idx], res[write_idx]);
+                ++write_idx;
+            }
+            ++read_idx;
+        }
+    } else {
+        /// Shift all NULL values to the beginning.
+
+        ssize_t read_idx = res.size() - 1;
+        ssize_t write_idx = res.size() - 1;
+
+        while (read_idx >= 0 && !is_null_at(res[read_idx])) {
+            --read_idx;
+            --write_idx;
+        }
+
+        --read_idx;
+
+        while (read_idx >= 0 && write_idx >= 0) {
+            if (!is_null_at(res[read_idx])) {
+                std::swap(res[read_idx], res[write_idx]);
+                --write_idx;
+            }
+            --read_idx;
+        }
+    }
+}
+//
+//void ColumnNullable::gather(ColumnGathererStream & gatherer)
+//{
+//    gatherer.gather(*this);
+//}
+
+void ColumnNullable::reserve(size_t n) {
+    get_nested_column().reserve(n);
+    get_null_map_data().reserve(n);
+}
+
+void ColumnNullable::resize(size_t n) {
+    get_nested_column().resize(n);
+    get_null_map_data().resize(n);
+}
+
+size_t ColumnNullable::byte_size() const {
+    return get_nested_column().byte_size() + get_null_map_column().byte_size();
+}
+
+size_t ColumnNullable::allocated_bytes() const {
+    return get_nested_column().allocated_bytes() + get_null_map_column().allocated_bytes();
+}
+
+void ColumnNullable::protect() {
+    get_nested_column().protect();
+    get_null_map_column().protect();
+}
+
+namespace {
+
+/// The following function implements a slightly more general version
+/// of get_extremes() than the implementation from ColumnVector.
+/// It takes into account the possible presence of nullable values.
+template <typename T>
+void getExtremesFromNullableContent(const ColumnVector<T>& col, const NullMap& null_map, Field& min,
+                                    Field& max) {
+    const auto& data = col.get_data();
+    size_t size = data.size();
+
+    if (size == 0) {
+        min = Null();
+        max = Null();
+        return;
+    }
+
+    bool has_not_null = false;
+    bool has_not_nan = false;
+
+    T cur_min = 0;
+    T cur_max = 0;
+
+    for (size_t i = 0; i < size; ++i) {
+        const T x = data[i];
+
+        if (null_map[i]) continue;
+
+        if (!has_not_null) {
+            cur_min = x;
+            cur_max = x;
+            has_not_null = true;
+            has_not_nan = !is_nan(x);
+            continue;
+        }
+
+        if (is_nan(x)) continue;
+
+        if (!has_not_nan) {
+            cur_min = x;
+            cur_max = x;
+            has_not_nan = true;
+            continue;
+        }
+
+        if (x < cur_min)
+            cur_min = x;
+        else if (x > cur_max)
+            cur_max = x;
+    }
+
+    if (has_not_null) {
+        min = cur_min;
+        max = cur_max;
+    }
+}
+
+} // namespace
+
+void ColumnNullable::get_extremes(Field& min, Field& max) const {
+    min = Null();
+    max = Null();
+
+    const auto& null_map_data = get_null_map_data();
+
+    if (const auto col_i8 = typeid_cast<const ColumnInt8*>(nested_column.get()))
+        getExtremesFromNullableContent<Int8>(*col_i8, null_map_data, min, max);
+    else if (const auto col_i16 = typeid_cast<const ColumnInt16*>(nested_column.get()))
+        getExtremesFromNullableContent<Int16>(*col_i16, null_map_data, min, max);
+    else if (const auto col_i32 = typeid_cast<const ColumnInt32*>(nested_column.get()))
+        getExtremesFromNullableContent<Int32>(*col_i32, null_map_data, min, max);
+    else if (const auto col_i64 = typeid_cast<const ColumnInt64*>(nested_column.get()))
+        getExtremesFromNullableContent<Int64>(*col_i64, null_map_data, min, max);
+    else if (const auto col_u8 = typeid_cast<const ColumnUInt8*>(nested_column.get()))
+        getExtremesFromNullableContent<UInt8>(*col_u8, null_map_data, min, max);
+    else if (const auto col_u16 = typeid_cast<const ColumnUInt16*>(nested_column.get()))
+        getExtremesFromNullableContent<UInt16>(*col_u16, null_map_data, min, max);
+    else if (const auto col_u32 = typeid_cast<const ColumnUInt32*>(nested_column.get()))
+        getExtremesFromNullableContent<UInt32>(*col_u32, null_map_data, min, max);
+    else if (const auto col_u64 = typeid_cast<const ColumnUInt64*>(nested_column.get()))
+        getExtremesFromNullableContent<UInt64>(*col_u64, null_map_data, min, max);
+    else if (const auto col_f32 = typeid_cast<const ColumnFloat32*>(nested_column.get()))
+        getExtremesFromNullableContent<Float32>(*col_f32, null_map_data, min, max);
+    else if (const auto col_f64 = typeid_cast<const ColumnFloat64*>(nested_column.get()))
+        getExtremesFromNullableContent<Float64>(*col_f64, null_map_data, min, max);
+}
+
+ColumnPtr ColumnNullable::replicate(const Offsets& offsets) const {
+    ColumnPtr replicated_data = get_nested_column().replicate(offsets);
+    ColumnPtr replicated_null_map = get_null_map_column().replicate(offsets);
+    return ColumnNullable::create(replicated_data, replicated_null_map);
+}
+
+template <bool negative>
+void ColumnNullable::apply_null_map_impl(const ColumnUInt8& map) {
+    NullMap& arr1 = get_null_map_data();
+    const NullMap& arr2 = map.get_data();
+
+    if (arr1.size() != arr2.size()) {
+        LOG(FATAL) << "Inconsistent sizes of ColumnNullable objects";
+    }
+
+    for (size_t i = 0, size = arr1.size(); i < size; ++i) arr1[i] |= negative ^ arr2[i];
+}
+
+void ColumnNullable::apply_null_map(const ColumnUInt8& map) {
+    apply_null_map_impl<false>(map);
+}
+
+void ColumnNullable::apply_negated_null_map(const ColumnUInt8& map) {
+    apply_null_map_impl<true>(map);
+}
+
+void ColumnNullable::apply_null_map(const ColumnNullable& other) {
+    apply_null_map(other.get_null_map_column());
+}
+
+void ColumnNullable::check_consistency() const {
+    if (null_map->size() != get_nested_column().size()) {
+        LOG(FATAL) << "Logical error: Sizes of nested column and null map of Nullable column are "
+                      "not equal";
+    }
+}
+
+ColumnPtr make_nullable(const ColumnPtr& column, bool is_nullable) {
+    if (is_column_nullable(*column)) return column;
+
+    if (is_column_const(*column))
+        return ColumnConst::create(
+                make_nullable(assert_cast<const ColumnConst&>(*column).get_data_column_ptr(),
+                              is_nullable),
+                column->size());
+
+    return ColumnNullable::create(column, ColumnUInt8::create(column->size(), is_nullable ? 1 : 0));
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h
new file mode 100644
index 0000000000..8163149fe6
--- /dev/null
+++ b/be/src/vec/columns/column_nullable.h
@@ -0,0 +1,246 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnNullable.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_impl.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/typeid_cast.h"
+
+namespace doris::vectorized {
+
+using NullMap = ColumnUInt8::Container;
+using ConstNullMapPtr = const NullMap*;
+
+/// Class that specifies nullable columns. A nullable column represents
+/// a column, which may have any type, provided with the possibility of
+/// storing NULL values. For this purpose, a ColumNullable object stores
+/// an ordinary column along with a special column, namely a byte map,
+/// whose type is ColumnUInt8. The latter column indicates whether the
+/// value of a given row is a NULL or not. Such a design is preferred
+/// over a bitmap because columns are usually stored on disk as compressed
+/// files. In this regard, using a bitmap instead of a byte map would
+/// greatly complicate the implementation with little to no benefits.
+class ColumnNullable final : public COWHelper<IColumn, ColumnNullable> {
+private:
+    friend class COWHelper<IColumn, ColumnNullable>;
+
+    ColumnNullable(MutableColumnPtr&& nested_column_, MutableColumnPtr&& null_map_);
+    ColumnNullable(const ColumnNullable&) = default;
+
+public:
+    /** Create immutable column using immutable arguments. This arguments may be shared with other columns.
+      * Use IColumn::mutate in order to make mutable column and mutate shared nested columns.
+      */
+    using Base = COWHelper<IColumn, ColumnNullable>;
+    static Ptr create(const ColumnPtr& nested_column_, const ColumnPtr& null_map_) {
+        return ColumnNullable::create(nested_column_->assume_mutable(),
+                                      null_map_->assume_mutable());
+    }
+
+    template <typename... Args,
+              typename = typename std::enable_if<IsMutableColumns<Args...>::value>::type>
+    static MutablePtr create(Args&&... args) {
+        return Base::create(std::forward<Args>(args)...);
+    }
+
+    const char* get_family_name() const override { return "Nullable"; }
+    std::string get_name() const override { return "Nullable(" + nested_column->get_name() + ")"; }
+    MutableColumnPtr clone_resized(size_t size) const override;
+    size_t size() const override { return nested_column->size(); }
+    bool is_null_at(size_t n) const override {
+        return assert_cast<const ColumnUInt8&>(*null_map).get_data()[n] != 0;
+    }
+    Field operator[](size_t n) const override;
+    void get(size_t n, Field& res) const override;
+    bool get_bool(size_t n) const override {
+        return is_null_at(n) ? 0 : nested_column->get_bool(n);
+    }
+    UInt64 get64(size_t n) const override { return nested_column->get64(n); }
+    StringRef get_data_at(size_t n) const override;
+
+    /// Will insert null value if pos=nullptr
+    void insert_data(const char* pos, size_t length) override;
+    StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override;
+    const char* deserialize_and_insert_from_arena(const char* pos) override;
+    void insert_range_from(const IColumn& src, size_t start, size_t length) override;
+    void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override;
+    void insert(const Field& x) override;
+    void insert_from(const IColumn& src, size_t n) override;
+
+    void insert_from_not_nullable(const IColumn& src, size_t n);
+    void insert_range_from_not_nullable(const IColumn& src, size_t start, size_t length);
+    void insert_many_from_not_nullable(const IColumn& src, size_t position, size_t length);
+
+    void insert_default() override {
+        get_nested_column().insert_default();
+        get_null_map_data().push_back(1);
+    }
+
+    void pop_back(size_t n) override;
+    ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override;
+    ColumnPtr filter_by_selector(const uint16_t* sel, size_t sel_size,
+                                 ColumnPtr* ptr = nullptr) override;
+    ColumnPtr permute(const Permutation& perm, size_t limit) const override;
+    //    ColumnPtr index(const IColumn & indexes, size_t limit) const override;
+    int compare_at(size_t n, size_t m, const IColumn& rhs_, int null_direction_hint) const override;
+    void get_permutation(bool reverse, size_t limit, int null_direction_hint,
+                         Permutation& res) const override;
+    void reserve(size_t n) override;
+    void resize(size_t n) override;
+    size_t byte_size() const override;
+    size_t allocated_bytes() const override;
+    void protect() override;
+    ColumnPtr replicate(const Offsets& replicate_offsets) const override;
+    void update_hash_with_value(size_t n, SipHash& hash) const override;
+    void get_extremes(Field& min, Field& max) const override;
+
+    MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override {
+        return scatter_impl<ColumnNullable>(num_columns, selector);
+    }
+
+    //    void gather(ColumnGathererStream & gatherer_stream) override;
+
+    void for_each_subcolumn(ColumnCallback callback) override {
+        callback(nested_column);
+        callback(null_map);
+    }
+
+    bool structure_equals(const IColumn& rhs) const override {
+        if (auto rhs_nullable = typeid_cast<const ColumnNullable*>(&rhs))
+            return nested_column->structure_equals(*rhs_nullable->nested_column);
+        return false;
+    }
+
+    bool is_date_type() override { return get_nested_column().is_date_type(); }
+    bool is_datetime_type() override { return get_nested_column().is_datetime_type(); }
+    void set_date_type() override { get_nested_column().set_date_type(); }
+    void set_datetime_type() override { get_nested_column().set_datetime_type(); }
+
+    bool is_nullable() const override { return true; }
+    bool is_bitmap() const override { return get_nested_column().is_bitmap(); }
+    bool is_column_decimal() const override { return get_nested_column().is_column_decimal(); }
+    bool is_column_string() const override { return get_nested_column().is_column_string(); }
+    bool is_fixed_and_contiguous() const override { return false; }
+    bool values_have_fixed_size() const override { return nested_column->values_have_fixed_size(); }
+    size_t size_of_value_if_fixed() const override {
+        return null_map->size_of_value_if_fixed() + nested_column->size_of_value_if_fixed();
+    }
+    bool only_null() const override { return nested_column->is_dummy(); }
+
+    /// Return the column that represents values.
+    IColumn& get_nested_column() { return *nested_column; }
+    const IColumn& get_nested_column() const { return *nested_column; }
+
+    const ColumnPtr& get_nested_column_ptr() const { return nested_column; }
+
+    MutableColumnPtr get_nested_column_ptr() { return nested_column->assume_mutable(); }
+
+    /// Return the column that represents the byte map.
+    const ColumnPtr& get_null_map_column_ptr() const { return null_map; }
+
+    ColumnUInt8& get_null_map_column() { return assert_cast<ColumnUInt8&>(*null_map); }
+    const ColumnUInt8& get_null_map_column() const {
+        return assert_cast<const ColumnUInt8&>(*null_map);
+    }
+
+    void clear() override {
+        null_map->clear();
+        nested_column->clear();
+    }
+
+    NullMap& get_null_map_data() { return get_null_map_column().get_data(); }
+    const NullMap& get_null_map_data() const { return get_null_map_column().get_data(); }
+
+    /// Apply the null byte map of a specified nullable column onto the
+    /// null byte map of the current column by performing an element-wise OR
+    /// between both byte maps. This method is used to determine the null byte
+    /// map of the result column of a function taking one or more nullable
+    /// columns.
+    void apply_null_map(const ColumnNullable& other);
+    void apply_null_map(const ColumnUInt8& map);
+    void apply_negated_null_map(const ColumnUInt8& map);
+
+    /// Check that size of null map equals to size of nested column.
+    void check_consistency() const;
+
+    bool has_null() const override { return has_null(get_null_map_data().size()); }
+
+    bool has_null(size_t size) const override {
+        const UInt8* null_pos = get_null_map_data().data();
+        const UInt8* null_pos_end = get_null_map_data().data() + size;
+#ifdef __SSE2__
+        /** A slightly more optimized version.
+        * Based on the assumption that often pieces of consecutive values
+        *  completely pass or do not pass the filter.
+        * Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
+        */
+        static constexpr size_t SIMD_BYTES = 16;
+        const __m128i zero16 = _mm_setzero_si128();
+        const UInt8* null_end_sse = null_pos + size / SIMD_BYTES * SIMD_BYTES;
+
+        while (null_pos < null_end_sse) {
+            int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
+                    _mm_loadu_si128(reinterpret_cast<const __m128i*>(null_pos)), zero16));
+
+            if (0 != mask) {
+                return true;
+            }
+            null_pos += SIMD_BYTES;
+        }
+#endif
+        while (null_pos < null_pos_end) {
+            if (*null_pos != 0) {
+                return true;
+            }
+            null_pos++;
+        }
+        return false;
+    }
+
+    void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
+        DCHECK(size() > 1);
+        const ColumnNullable& nullable_rhs = assert_cast<const ColumnNullable&>(rhs);
+        null_map->replace_column_data(*nullable_rhs.null_map, row, self_row);
+
+        if (!nullable_rhs.is_null_at(row)) {
+            nested_column->replace_column_data(*nullable_rhs.nested_column, row, self_row);
+        } else {
+            nested_column->replace_column_data_default(self_row);
+        }
+    }
+
+    void replace_column_data_default(size_t self_row = 0) override {
+        LOG(FATAL) << "should not call the method in column nullable";
+    }
+
+private:
+    WrappedPtr nested_column;
+    WrappedPtr null_map;
+
+    template <bool negative>
+    void apply_null_map_impl(const ColumnUInt8& map);
+};
+
+ColumnPtr make_nullable(const ColumnPtr& column, bool is_nullable = false);
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_set.h b/be/src/vec/columns/column_set.h
new file mode 100644
index 0000000000..afd1ae44b0
--- /dev/null
+++ b/be/src/vec/columns/column_set.h
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnSet.h
+// and modified by Doris
+
+#pragma once
+
+#include "exprs/hybrid_set.h"
+
+#include "vec/columns/column_dummy.h"
+
+namespace doris::vectorized {
+
+using ConstSetPtr = std::shared_ptr<HybridSetBase>;
+
+/** A column containing multiple values in the `IN` section.
+  * Behaves like a constant-column (because the set is one, not its own for each line).
+  * This column has a nonstandard value, so it can not be obtained via a normal interface.
+  */
+class ColumnSet final : public COWHelper<IColumnDummy, ColumnSet>
+{
+public:
+    friend class COWHelper<IColumnDummy, ColumnSet>;
+
+    ColumnSet(size_t s_, const ConstSetPtr & data_) : data(data_) { s = s_; }
+    ColumnSet(const ColumnSet &) = default;
+
+    const char * get_family_name() const override { return "Set"; }
+    MutableColumnPtr clone_dummy(size_t s_) const override { return ColumnSet::create(s_, data); }
+
+    ConstSetPtr get_data() const { return data; }
+
+private:
+    ConstSetPtr data;
+};
+
+}
\ No newline at end of file
diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp
new file mode 100644
index 0000000000..f4a32dc4f6
--- /dev/null
+++ b/be/src/vec/columns/column_string.cpp
@@ -0,0 +1,381 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnString.cpp
+// and modified by Doris
+
+#include "vec/columns/column_string.h"
+
+#include "vec/columns/collator.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/memcmp_small.h"
+#include "vec/common/unaligned.h"
+
+namespace doris::vectorized {
+
+MutableColumnPtr ColumnString::clone_resized(size_t to_size) const {
+    auto res = ColumnString::create();
+    if (to_size == 0) return res;
+
+    size_t from_size = size();
+
+    if (to_size <= from_size) {
+        /// Just cut column.
+
+        res->offsets.assign(offsets.begin(), offsets.begin() + to_size);
+        res->chars.assign(chars.begin(), chars.begin() + offsets[to_size - 1]);
+    } else {
+        /// Copy column and append empty strings for extra elements.
+
+        Offset offset = 0;
+        if (from_size > 0) {
+            res->offsets.assign(offsets.begin(), offsets.end());
+            res->chars.assign(chars.begin(), chars.end());
+            offset = offsets.back();
+        }
+
+        /// Empty strings are just zero terminating bytes.
+
+        res->chars.resize_fill(res->chars.size() + to_size - from_size);
+
+        res->offsets.resize(to_size);
+        for (size_t i = from_size; i < to_size; ++i) {
+            ++offset;
+            res->offsets[i] = offset;
+        }
+    }
+
+    return res;
+}
+
+void ColumnString::insert_range_from(const IColumn& src, size_t start, size_t length) {
+    if (length == 0) return;
+
+    const ColumnString& src_concrete = assert_cast<const ColumnString&>(src);
+
+    if (start + length > src_concrete.offsets.size()) {
+        LOG(FATAL) << "Parameter out of bound in IColumnString::insert_range_from method.";
+    }
+
+    size_t nested_offset = src_concrete.offset_at(start);
+    size_t nested_length = src_concrete.offsets[start + length - 1] - nested_offset;
+
+    size_t old_chars_size = chars.size();
+    chars.resize(old_chars_size + nested_length);
+    memcpy(&chars[old_chars_size], &src_concrete.chars[nested_offset], nested_length);
+
+    if (start == 0 && offsets.empty()) {
+        offsets.assign(src_concrete.offsets.begin(), src_concrete.offsets.begin() + length);
+    } else {
+        size_t old_size = offsets.size();
+        size_t prev_max_offset = offsets.back(); /// -1th index is Ok, see PaddedPODArray
+        offsets.resize(old_size + length);
+
+        for (size_t i = 0; i < length; ++i)
+            offsets[old_size + i] =
+                    src_concrete.offsets[start + i] - nested_offset + prev_max_offset;
+    }
+}
+
+void ColumnString::insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) {
+    for (auto x = indices_begin; x != indices_end; ++x) {
+        ColumnString::insert_from(src, *x);
+    }
+}
+
+ColumnPtr ColumnString::filter(const Filter& filt, ssize_t result_size_hint) const {
+    if (offsets.size() == 0) return ColumnString::create();
+
+    auto res = ColumnString::create();
+
+    Chars& res_chars = res->chars;
+    Offsets& res_offsets = res->offsets;
+
+    filter_arrays_impl<UInt8>(chars, offsets, res_chars, res_offsets, filt, result_size_hint);
+    return res;
+}
+
+ColumnPtr ColumnString::permute(const Permutation& perm, size_t limit) const {
+    size_t size = offsets.size();
+
+    if (limit == 0)
+        limit = size;
+    else
+        limit = std::min(size, limit);
+
+    if (perm.size() < limit) {
+        LOG(FATAL) << "Size of permutation is less than required.";
+    }
+
+    if (limit == 0) return ColumnString::create();
+
+    auto res = ColumnString::create();
+
+    Chars& res_chars = res->chars;
+    Offsets& res_offsets = res->offsets;
+
+    if (limit == size)
+        res_chars.resize(chars.size());
+    else {
+        size_t new_chars_size = 0;
+        for (size_t i = 0; i < limit; ++i) new_chars_size += size_at(perm[i]);
+        res_chars.resize(new_chars_size);
+    }
+
+    res_offsets.resize(limit);
+
+    Offset current_new_offset = 0;
+
+    for (size_t i = 0; i < limit; ++i) {
+        size_t j = perm[i];
+        size_t string_offset = offsets[j - 1];
+        size_t string_size = offsets[j] - string_offset;
+
+        memcpy_small_allow_read_write_overflow15(&res_chars[current_new_offset],
+                                                 &chars[string_offset], string_size);
+
+        current_new_offset += string_size;
+        res_offsets[i] = current_new_offset;
+    }
+
+    return res;
+}
+
+StringRef ColumnString::serialize_value_into_arena(size_t n, Arena& arena,
+                                                   char const*& begin) const {
+    size_t string_size = size_at(n);
+    size_t offset = offset_at(n);
+
+    StringRef res;
+    res.size = sizeof(string_size) + string_size;
+    char* pos = arena.alloc_continue(res.size, begin);
+    memcpy(pos, &string_size, sizeof(string_size));
+    memcpy(pos + sizeof(string_size), &chars[offset], string_size);
+    res.data = pos;
+
+    return res;
+}
+
+const char* ColumnString::deserialize_and_insert_from_arena(const char* pos) {
+    const size_t string_size = unaligned_load<size_t>(pos);
+    pos += sizeof(string_size);
+
+    const size_t old_size = chars.size();
+    const size_t new_size = old_size + string_size;
+    chars.resize(new_size);
+    memcpy(chars.data() + old_size, pos, string_size);
+
+    offsets.push_back(new_size);
+    return pos + string_size;
+}
+
+template <typename Type>
+ColumnPtr ColumnString::index_impl(const PaddedPODArray<Type>& indexes, size_t limit) const {
+    if (limit == 0) return ColumnString::create();
+
+    auto res = ColumnString::create();
+
+    Chars& res_chars = res->chars;
+    Offsets& res_offsets = res->offsets;
+
+    size_t new_chars_size = 0;
+    for (size_t i = 0; i < limit; ++i) new_chars_size += size_at(indexes[i]);
+    res_chars.resize(new_chars_size);
+
+    res_offsets.resize(limit);
+
+    Offset current_new_offset = 0;
+
+    for (size_t i = 0; i < limit; ++i) {
+        size_t j = indexes[i];
+        size_t string_offset = offsets[j - 1];
+        size_t string_size = offsets[j] - string_offset;
+
+        memcpy_small_allow_read_write_overflow15(&res_chars[current_new_offset],
+                                                 &chars[string_offset], string_size);
+
+        current_new_offset += string_size;
+        res_offsets[i] = current_new_offset;
+    }
+
+    return res;
+}
+
+template <bool positive>
+struct ColumnString::less {
+    const ColumnString& parent;
+    explicit less(const ColumnString& parent_) : parent(parent_) {}
+    bool operator()(size_t lhs, size_t rhs) const {
+        int res = memcmp_small_allow_overflow15(
+                parent.chars.data() + parent.offset_at(lhs), parent.size_at(lhs) - 1,
+                parent.chars.data() + parent.offset_at(rhs), parent.size_at(rhs) - 1);
+
+        return positive ? (res < 0) : (res > 0);
+    }
+};
+
+void ColumnString::get_permutation(bool reverse, size_t limit, int /*nan_direction_hint*/,
+                                   Permutation& res) const {
+    size_t s = offsets.size();
+    res.resize(s);
+    for (size_t i = 0; i < s; ++i) res[i] = i;
+
+    if (limit >= s) limit = 0;
+
+    if (limit) {
+        if (reverse)
+            std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<false>(*this));
+        else
+            std::partial_sort(res.begin(), res.begin() + limit, res.end(), less<true>(*this));
+    } else {
+        if (reverse)
+            std::sort(res.begin(), res.end(), less<false>(*this));
+        else
+            std::sort(res.begin(), res.end(), less<true>(*this));
+    }
+}
+
+ColumnPtr ColumnString::replicate(const Offsets& replicate_offsets) const {
+    size_t col_size = size();
+    if (col_size != replicate_offsets.size()) {
+        LOG(FATAL) << "Size of offsets doesn't match size of column.";
+    }
+
+    auto res = ColumnString::create();
+
+    if (0 == col_size) return res;
+
+    Chars& res_chars = res->chars;
+    Offsets& res_offsets = res->offsets;
+    res_chars.reserve(chars.size() / col_size * replicate_offsets.back());
+    res_offsets.reserve(replicate_offsets.back());
+
+    Offset prev_replicate_offset = 0;
+    Offset prev_string_offset = 0;
+    Offset current_new_offset = 0;
+
+    for (size_t i = 0; i < col_size; ++i) {
+        size_t size_to_replicate = replicate_offsets[i] - prev_replicate_offset;
+        size_t string_size = offsets[i] - prev_string_offset;
+
+        for (size_t j = 0; j < size_to_replicate; ++j) {
+            current_new_offset += string_size;
+            res_offsets.push_back(current_new_offset);
+
+            res_chars.resize(res_chars.size() + string_size);
+            memcpy_small_allow_read_write_overflow15(&res_chars[res_chars.size() - string_size],
+                                                     &chars[prev_string_offset], string_size);
+        }
+
+        prev_replicate_offset = replicate_offsets[i];
+        prev_string_offset = offsets[i];
+    }
+
+    return res;
+}
+
+void ColumnString::reserve(size_t n) {
+    offsets.reserve(n);
+}
+
+void ColumnString::resize(size_t n) {
+    offsets.resize(n);
+}
+
+void ColumnString::get_extremes(Field& min, Field& max) const {
+    min = String();
+    max = String();
+
+    size_t col_size = size();
+
+    if (col_size == 0) return;
+
+    size_t min_idx = 0;
+    size_t max_idx = 0;
+
+    less<true> less_op(*this);
+
+    for (size_t i = 1; i < col_size; ++i) {
+        if (less_op(i, min_idx))
+            min_idx = i;
+        else if (less_op(max_idx, i))
+            max_idx = i;
+    }
+
+    get(min_idx, min);
+    get(max_idx, max);
+}
+
+int ColumnString::compare_at_with_collation(size_t n, size_t m, const IColumn& rhs_,
+                                            const Collator& collator) const {
+    const ColumnString& rhs = assert_cast<const ColumnString&>(rhs_);
+
+    return collator.compare(reinterpret_cast<const char*>(&chars[offset_at(n)]), size_at(n),
+                            reinterpret_cast<const char*>(&rhs.chars[rhs.offset_at(m)]),
+                            rhs.size_at(m));
+}
+
+template <bool positive>
+struct ColumnString::lessWithCollation {
+    const ColumnString& parent;
+    const Collator& collator;
+
+    lessWithCollation(const ColumnString& parent_, const Collator& collator_)
+            : parent(parent_), collator(collator_) {}
+
+    bool operator()(size_t lhs, size_t rhs) const {
+        int res = collator.compare(
+                reinterpret_cast<const char*>(&parent.chars[parent.offset_at(lhs)]),
+                parent.size_at(lhs),
+                reinterpret_cast<const char*>(&parent.chars[parent.offset_at(rhs)]),
+                parent.size_at(rhs));
+
+        return positive ? (res < 0) : (res > 0);
+    }
+};
+
+void ColumnString::get_permutation_with_collation(const Collator& collator, bool reverse,
+                                                  size_t limit, Permutation& res) const {
+    size_t s = offsets.size();
+    res.resize(s);
+    for (size_t i = 0; i < s; ++i) res[i] = i;
+
+    if (limit >= s) limit = 0;
+
+    if (limit) {
+        if (reverse)
+            std::partial_sort(res.begin(), res.begin() + limit, res.end(),
+                              lessWithCollation<false>(*this, collator));
+        else
+            std::partial_sort(res.begin(), res.begin() + limit, res.end(),
+                              lessWithCollation<true>(*this, collator));
+    } else {
+        if (reverse)
+            std::sort(res.begin(), res.end(), lessWithCollation<false>(*this, collator));
+        else
+            std::sort(res.begin(), res.end(), lessWithCollation<true>(*this, collator));
+    }
+}
+
+void ColumnString::protect() {
+    get_chars().protect();
+    get_offsets().protect();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h
new file mode 100644
index 0000000000..2234490c76
--- /dev/null
+++ b/be/src/vec/columns/column_string.h
@@ -0,0 +1,288 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnString.h
+// and modified by Doris
+
+#pragma once
+
+#include <cassert>
+#include <cstring>
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_impl.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/memcmp_small.h"
+#include "vec/common/memcpy_small.h"
+#include "vec/common/pod_array.h"
+#include "vec/common/sip_hash.h"
+#include "vec/core/field.h"
+
+class Collator;
+
+namespace doris::vectorized {
+
+/** Column for String values.
+  */
+class ColumnString final : public COWHelper<IColumn, ColumnString> {
+public:
+    using Char = UInt8;
+    using Chars = PaddedPODArray<UInt8>;
+
+private:
+    friend class COWHelper<IColumn, ColumnString>;
+
+    /// Maps i'th position to offset to i+1'th element. Last offset maps to the end of all chars (is the size of all chars).
+    Offsets offsets;
+
+    /// Bytes of strings, placed contiguously.
+    /// For convenience, every string ends with terminating zero byte. Note that strings could contain zero bytes in the middle.
+    Chars chars;
+
+    size_t ALWAYS_INLINE offset_at(ssize_t i) const { return offsets[i - 1]; }
+
+    /// Size of i-th element, including terminating zero.
+    size_t ALWAYS_INLINE size_at(ssize_t i) const { return offsets[i] - offsets[i - 1]; }
+
+    template <bool positive>
+    struct less;
+
+    template <bool positive>
+    struct lessWithCollation;
+
+    ColumnString() = default;
+
+    ColumnString(const ColumnString& src)
+            : offsets(src.offsets.begin(), src.offsets.end()),
+              chars(src.chars.begin(), src.chars.end()) {}
+
+public:
+    const char* get_family_name() const override { return "String"; }
+
+    size_t size() const override { return offsets.size(); }
+
+    size_t byte_size() const override { return chars.size() + offsets.size() * sizeof(offsets[0]); }
+
+    size_t allocated_bytes() const override {
+        return chars.allocated_bytes() + offsets.allocated_bytes();
+    }
+
+    void protect() override;
+
+    MutableColumnPtr clone_resized(size_t to_size) const override;
+
+    Field operator[](size_t n) const override {
+        assert(n < size());
+        return Field(&chars[offset_at(n)], size_at(n) - 1);
+    }
+
+    void get(size_t n, Field& res) const override {
+        assert(n < size());
+        res.assign_string(&chars[offset_at(n)], size_at(n) - 1);
+    }
+
+    StringRef get_data_at(size_t n) const override {
+        assert(n < size());
+        return StringRef(&chars[offset_at(n)], size_at(n) - 1);
+    }
+
+    StringRef get_data_at_with_terminating_zero(size_t n) const override {
+        assert(n < size());
+        return StringRef(&chars[offset_at(n)], size_at(n));
+    }
+
+/// Suppress gcc 7.3.1 warning: '*((void*)&<anonymous> +8)' may be used uninitialized in this function
+#if !__clang__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
+    void insert(const Field& x) override {
+        const String& s = doris::vectorized::get<const String&>(x);
+        const size_t old_size = chars.size();
+        const size_t size_to_append = s.size() + 1;
+        const size_t new_size = old_size + size_to_append;
+
+        chars.resize(new_size);
+        memcpy(chars.data() + old_size, s.c_str(), size_to_append);
+        offsets.push_back(new_size);
+    }
+
+#if !__clang__
+#pragma GCC diagnostic pop
+#endif
+
+    void insert_from(const IColumn& src_, size_t n) override {
+        const ColumnString& src = assert_cast<const ColumnString&>(src_);
+        const size_t size_to_append =
+                src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray.
+
+        if (size_to_append == 1) {
+            /// shortcut for empty string
+            chars.push_back(0);
+            offsets.push_back(chars.size());
+        } else {
+            const size_t old_size = chars.size();
+            const size_t offset = src.offsets[n - 1];
+            const size_t new_size = old_size + size_to_append;
+
+            chars.resize(new_size);
+            memcpy_small_allow_read_write_overflow15(chars.data() + old_size, &src.chars[offset],
+                                                     size_to_append);
+            offsets.push_back(new_size);
+        }
+    }
+
+    void insert_data(const char* pos, size_t length) override {
+        const size_t old_size = chars.size();
+        const size_t new_size = old_size + length + 1;
+
+        chars.resize(new_size);
+        if (length) memcpy(chars.data() + old_size, pos, length);
+        chars[old_size + length] = 0;
+        offsets.push_back(new_size);
+    }
+
+    /// Like getData, but inserting data should be zero-ending (i.e. length is 1 byte greater than real string size).
+    void insert_data_with_terminating_zero(const char* pos, size_t length) {
+        const size_t old_size = chars.size();
+        const size_t new_size = old_size + length;
+
+        chars.resize(new_size);
+        memcpy(chars.data() + old_size, pos, length);
+        offsets.push_back(new_size);
+    }
+
+    void pop_back(size_t n) override {
+        size_t nested_n = offsets.back() - offset_at(offsets.size() - n);
+        chars.resize(chars.size() - nested_n);
+        offsets.resize_assume_reserved(offsets.size() - n);
+    }
+
+    StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override;
+
+    const char* deserialize_and_insert_from_arena(const char* pos) override;
+
+    void update_hash_with_value(size_t n, SipHash& hash) const override {
+        size_t string_size = size_at(n);
+        size_t offset = offset_at(n);
+
+        hash.update(reinterpret_cast<const char*>(&string_size), sizeof(string_size));
+        hash.update(reinterpret_cast<const char*>(&chars[offset]), string_size);
+    }
+
+    void insert_range_from(const IColumn& src, size_t start, size_t length) override;
+
+    void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override;
+
+    ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override;
+
+    ColumnPtr permute(const Permutation& perm, size_t limit) const override;
+
+    //    ColumnPtr index(const IColumn & indexes, size_t limit) const override;
+
+    template <typename Type>
+    ColumnPtr index_impl(const PaddedPODArray<Type>& indexes, size_t limit) const;
+
+    void insert_default() override {
+        chars.push_back(0);
+        offsets.push_back(offsets.back() + 1);
+    }
+
+    int compare_at(size_t n, size_t m, const IColumn& rhs_,
+                   int /*nan_direction_hint*/) const override {
+        const ColumnString& rhs = assert_cast<const ColumnString&>(rhs_);
+        return memcmp_small_allow_overflow15(chars.data() + offset_at(n), size_at(n) - 1,
+                                             rhs.chars.data() + rhs.offset_at(m),
+                                             rhs.size_at(m) - 1);
+    }
+
+    /// Variant of compare_at for string comparison with respect of collation.
+    int compare_at_with_collation(size_t n, size_t m, const IColumn& rhs_,
+                                  const Collator& collator) const;
+
+    void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+                         Permutation& res) const override;
+
+    /// Sorting with respect of collation.
+    void get_permutation_with_collation(const Collator& collator, bool reverse, size_t limit,
+                                        Permutation& res) const;
+
+    ColumnPtr replicate(const Offsets& replicate_offsets) const override;
+
+    MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override {
+        return scatter_impl<ColumnString>(num_columns, selector);
+    }
+
+    //    void gather(ColumnGathererStream & gatherer_stream) override;
+
+    void reserve(size_t n) override;
+
+    void resize(size_t n) override;
+
+    void get_extremes(Field& min, Field& max) const override;
+
+    bool can_be_inside_nullable() const override { return true; }
+
+    bool is_column_string() const override { return true; }
+
+    bool structure_equals(const IColumn& rhs) const override {
+        return typeid(rhs) == typeid(ColumnString);
+    }
+
+    Chars& get_chars() { return chars; }
+    const Chars& get_chars() const { return chars; }
+
+    Offsets& get_offsets() { return offsets; }
+    const Offsets& get_offsets() const { return offsets; }
+
+    void clear() override {
+        chars.clear();
+        offsets.clear();
+    }
+
+    void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
+        DCHECK(size() > self_row);
+        const auto& r = assert_cast<const ColumnString&>(rhs);
+        auto data = r.get_data_at(row);
+
+        if (!self_row) {
+            chars.clear();
+            offsets[self_row] = data.size + 1;
+        } else {
+            offsets[self_row] = offsets[self_row - 1] + data.size + 1;
+        }
+
+        chars.insert(data.data, data.data + data.size + 1);
+    }
+
+    // should replace according to 0,1,2... ,size,0,1,2...
+    void replace_column_data_default(size_t self_row = 0) override {
+        DCHECK(size() > self_row);
+
+        if (!self_row) {
+            chars.clear();
+            offsets[self_row] = 1;
+        } else {
+            offsets[self_row] = offsets[self_row - 1] + 1;
+        }
+
+        chars.emplace_back(0);
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp
new file mode 100644
index 0000000000..75ff1448aa
--- /dev/null
+++ b/be/src/vec/columns/column_vector.cpp
@@ -0,0 +1,381 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnVector.cpp
+// and modified by Doris
+
+#include "vec/columns/column_vector.h"
+
+#include <pdqsort.h>
+#include <vec/common/radix_sort.h>
+
+#include <cmath>
+#include <cstring>
+
+#include "runtime/datetime_value.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/arena.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/bit_cast.h"
+#include "vec/common/exception.h"
+#include "vec/common/nan_utils.h"
+#include "vec/common/sip_hash.h"
+#include "vec/common/unaligned.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+StringRef ColumnVector<T>::serialize_value_into_arena(size_t n, Arena& arena,
+                                                      char const*& begin) const {
+    auto pos = arena.alloc_continue(sizeof(T), begin);
+    unaligned_store<T>(pos, data[n]);
+    return StringRef(pos, sizeof(T));
+}
+
+template <typename T>
+const char* ColumnVector<T>::deserialize_and_insert_from_arena(const char* pos) {
+    data.push_back(unaligned_load<T>(pos));
+    return pos + sizeof(T);
+}
+
+template <typename T>
+void ColumnVector<T>::update_hash_with_value(size_t n, SipHash& hash) const {
+    hash.update(data[n]);
+}
+
+template <typename T>
+struct ColumnVector<T>::less {
+    const Self& parent;
+    int nan_direction_hint;
+    less(const Self& parent_, int nan_direction_hint_)
+            : parent(parent_), nan_direction_hint(nan_direction_hint_) {}
+    bool operator()(size_t lhs, size_t rhs) const {
+        return CompareHelper<T>::less(parent.data[lhs], parent.data[rhs], nan_direction_hint);
+    }
+};
+
+template <typename T>
+struct ColumnVector<T>::greater {
+    const Self& parent;
+    int nan_direction_hint;
+    greater(const Self& parent_, int nan_direction_hint_)
+            : parent(parent_), nan_direction_hint(nan_direction_hint_) {}
+    bool operator()(size_t lhs, size_t rhs) const {
+        return CompareHelper<T>::greater(parent.data[lhs], parent.data[rhs], nan_direction_hint);
+    }
+};
+
+namespace {
+template <typename T>
+struct ValueWithIndex {
+    T value;
+    UInt32 index;
+};
+
+template <typename T>
+struct RadixSortTraits : RadixSortNumTraits<T> {
+    using Element = ValueWithIndex<T>;
+    static T& extract_key(Element& elem) { return elem.value; }
+};
+} // namespace
+
+template <typename T>
+void ColumnVector<T>::get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+                                      IColumn::Permutation& res) const {
+    size_t s = data.size();
+    res.resize(s);
+
+    if (s == 0) return;
+
+    if (limit >= s) limit = 0;
+
+    if (limit) {
+        for (size_t i = 0; i < s; ++i) res[i] = i;
+
+        if (reverse)
+            std::partial_sort(res.begin(), res.begin() + limit, res.end(),
+                              greater(*this, nan_direction_hint));
+        else
+            std::partial_sort(res.begin(), res.begin() + limit, res.end(),
+                              less(*this, nan_direction_hint));
+    } else {
+        /// A case for radix sort
+        if constexpr (std::is_arithmetic_v<T> && !std::is_same_v<T, UInt128>) {
+            /// Thresholds on size. Lower threshold is arbitrary. Upper threshold is chosen by the type for histogram counters.
+            if (s >= 256 && s <= std::numeric_limits<UInt32>::max()) {
+                PaddedPODArray<ValueWithIndex<T>> pairs(s);
+                for (UInt32 i = 0; i < s; ++i) pairs[i] = {data[i], i};
+
+                RadixSort<RadixSortTraits<T>>::execute_lsd(pairs.data(), s);
+
+                /// Radix sort treats all NaNs to be greater than all numbers.
+                /// If the user needs the opposite, we must move them accordingly.
+                size_t nans_to_move = 0;
+                if (std::is_floating_point_v<T> && nan_direction_hint < 0) {
+                    for (ssize_t i = s - 1; i >= 0; --i) {
+                        if (is_nan(pairs[i].value))
+                            ++nans_to_move;
+                        else
+                            break;
+                    }
+                }
+
+                if (reverse) {
+                    if (nans_to_move) {
+                        for (size_t i = 0; i < s - nans_to_move; ++i)
+                            res[i] = pairs[s - nans_to_move - 1 - i].index;
+                        for (size_t i = s - nans_to_move; i < s; ++i)
+                            res[i] = pairs[s - 1 - (i - (s - nans_to_move))].index;
+                    } else {
+                        for (size_t i = 0; i < s; ++i) res[s - 1 - i] = pairs[i].index;
+                    }
+                } else {
+                    if (nans_to_move) {
+                        for (size_t i = 0; i < nans_to_move; ++i)
+                            res[i] = pairs[i + s - nans_to_move].index;
+                        for (size_t i = nans_to_move; i < s; ++i)
+                            res[i] = pairs[i - nans_to_move].index;
+                    } else {
+                        for (size_t i = 0; i < s; ++i) res[i] = pairs[i].index;
+                    }
+                }
+
+                return;
+            }
+        }
+
+        /// Default sorting algorithm.
+        for (size_t i = 0; i < s; ++i) res[i] = i;
+
+        if (reverse)
+            pdqsort(res.begin(), res.end(), greater(*this, nan_direction_hint));
+        else
+            pdqsort(res.begin(), res.end(), less(*this, nan_direction_hint));
+    }
+}
+
+template <typename T>
+const char* ColumnVector<T>::get_family_name() const {
+    return TypeName<T>::get();
+}
+
+template <typename T>
+MutableColumnPtr ColumnVector<T>::clone_resized(size_t size) const {
+    auto res = this->create();
+
+    if (size > 0) {
+        auto& new_col = static_cast<Self&>(*res);
+        new_col.data.resize(size);
+
+        size_t count = std::min(this->size(), size);
+        memcpy(new_col.data.data(), data.data(), count * sizeof(data[0]));
+
+        if (size > count)
+            memset(static_cast<void*>(&new_col.data[count]), static_cast<int>(value_type()),
+                   (size - count) * sizeof(value_type));
+    }
+
+    return res;
+}
+
+template <typename T>
+UInt64 ColumnVector<T>::get64(size_t n) const {
+    return ext::bit_cast<UInt64>(data[n]);
+}
+
+template <typename T>
+Float64 ColumnVector<T>::get_float64(size_t n) const {
+    return static_cast<Float64>(data[n]);
+}
+
+template <typename T>
+void ColumnVector<T>::insert_range_from(const IColumn& src, size_t start, size_t length) {
+    const ColumnVector& src_vec = dynamic_cast<const ColumnVector&>(src);
+
+    if (start + length > src_vec.data.size()) {
+        LOG(FATAL) << fmt::format(
+                "Parameters start = {}, length = {}, are out of bound in "
+                "ColumnVector<T>::insert_range_from method (data.size() = {}).",
+                start, length, src_vec.data.size());
+    }
+
+    size_t old_size = data.size();
+    data.resize(old_size + length);
+    memcpy(data.data() + old_size, &src_vec.data[start], length * sizeof(data[0]));
+}
+
+template <typename T>
+void ColumnVector<T>::insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) {
+    const Self& src_vec = assert_cast<const Self&>(src);
+    data.reserve(size() + (indices_end - indices_begin));
+    for (auto x = indices_begin; x != indices_end; ++x) {
+        data.push_back_without_reserve(src_vec.get_element(*x));
+    }
+}
+
+template <typename T>
+ColumnPtr ColumnVector<T>::filter(const IColumn::Filter& filt, ssize_t result_size_hint) const {
+    size_t size = data.size();
+    if (size != filt.size()) {
+        LOG(FATAL) << "Size of filter doesn't match size of column.";
+    }
+
+    auto res = this->create();
+    Container& res_data = res->get_data();
+
+    if (result_size_hint) res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
+
+    const UInt8* filt_pos = filt.data();
+    const UInt8* filt_end = filt_pos + size;
+    const T* data_pos = data.data();
+
+    /** A slightly more optimized version.
+        * Based on the assumption that often pieces of consecutive values
+        *  completely pass or do not pass the filter.
+        * Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
+        */
+    static constexpr size_t SIMD_BYTES = 32;
+    const UInt8* filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
+
+    while (filt_pos < filt_end_sse) {
+        uint32_t mask = bytes32_mask_to_bits32_mask(filt_pos);
+
+        if (0xFFFFFFFF == mask) {
+            res_data.insert(data_pos, data_pos + SIMD_BYTES);
+        } else {
+            while (mask) {
+                const size_t idx = __builtin_ctzll(mask);
+                res_data.push_back(data_pos[idx]);
+                mask = mask & (mask - 1);
+            }
+        }
+
+        filt_pos += SIMD_BYTES;
+        data_pos += SIMD_BYTES;
+    }
+
+    while (filt_pos < filt_end) {
+        if (*filt_pos) res_data.push_back(*data_pos);
+
+        ++filt_pos;
+        ++data_pos;
+    }
+
+    return res;
+}
+
+template <typename T>
+ColumnPtr ColumnVector<T>::permute(const IColumn::Permutation& perm, size_t limit) const {
+    size_t size = data.size();
+
+    if (limit == 0)
+        limit = size;
+    else
+        limit = std::min(size, limit);
+
+    if (perm.size() < limit) {
+        LOG(FATAL) << "Size of permutation is less than required.";
+    }
+
+    auto res = this->create(limit);
+    typename Self::Container& res_data = res->get_data();
+    for (size_t i = 0; i < limit; ++i) res_data[i] = data[perm[i]];
+
+    return res;
+}
+
+template <typename T>
+ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets& offsets) const {
+    size_t size = data.size();
+    if (size != offsets.size()) {
+        LOG(FATAL) << "Size of offsets doesn't match size of column.";
+    }
+
+    if (0 == size) return this->create();
+
+    auto res = this->create();
+    typename Self::Container& res_data = res->get_data();
+    res_data.reserve(offsets.back());
+
+    // vectorized this code to speed up
+    IColumn::Offset counts[size];
+    for (size_t i = 0; i < size; ++i) {
+        counts[i] = offsets[i] - offsets[i - 1];
+    }
+
+    for (size_t i = 0; i < size; ++i) {
+        res_data.add_num_element_without_reserve(data[i], counts[i]);
+    }
+
+    return res;
+}
+
+template <typename T>
+void ColumnVector<T>::get_extremes(Field& min, Field& max) const {
+    size_t size = data.size();
+
+    if (size == 0) {
+        min = T(0);
+        max = T(0);
+        return;
+    }
+
+    bool has_value = false;
+
+    /** Skip all NaNs in extremes calculation.
+        * If all values are NaNs, then return NaN.
+        * NOTE: There exist many different NaNs.
+        * Different NaN could be returned: not bit-exact value as one of NaNs from column.
+        */
+
+    T cur_min = nan_or_zero<T>();
+    T cur_max = nan_or_zero<T>();
+
+    for (const T x : data) {
+        if (is_nan(x)) continue;
+
+        if (!has_value) {
+            cur_min = x;
+            cur_max = x;
+            has_value = true;
+            continue;
+        }
+
+        if (x < cur_min)
+            cur_min = x;
+        else if (x > cur_max)
+            cur_max = x;
+    }
+
+    min = NearestFieldType<T>(cur_min);
+    max = NearestFieldType<T>(cur_max);
+}
+
+/// Explicit template instantiations - to avoid code bloat in headers.
+template class ColumnVector<UInt8>;
+template class ColumnVector<UInt16>;
+template class ColumnVector<UInt32>;
+template class ColumnVector<UInt64>;
+template class ColumnVector<UInt128>;
+template class ColumnVector<Int8>;
+template class ColumnVector<Int16>;
+template class ColumnVector<Int32>;
+template class ColumnVector<Int64>;
+template class ColumnVector<Int128>;
+template class ColumnVector<Float32>;
+template class ColumnVector<Float64>;
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h
new file mode 100644
index 0000000000..2c5647e289
--- /dev/null
+++ b/be/src/vec/columns/column_vector.h
@@ -0,0 +1,296 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnVector.h
+// and modified by Doris
+
+#pragma once
+
+#include <cmath>
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_impl.h"
+#include "vec/columns/column_vector_helper.h"
+#include "vec/common/unaligned.h"
+#include "vec/core/field.h"
+
+namespace doris::vectorized {
+
+/** Stuff for comparing numbers.
+  * Integer values are compared as usual.
+  * Floating-point numbers are compared this way that NaNs always end up at the end
+  *  (if you don't do this, the sort would not work at all).
+  */
+template <typename T>
+struct CompareHelper {
+    static bool less(T a, T b, int /*nan_direction_hint*/) { return a < b; }
+    static bool greater(T a, T b, int /*nan_direction_hint*/) { return a > b; }
+
+    /** Compares two numbers. Returns a number less than zero, equal to zero, or greater than zero if a < b, a == b, a > b, respectively.
+      * If one of the values is NaN, then
+      * - if nan_direction_hint == -1 - NaN are considered less than all numbers;
+      * - if nan_direction_hint == 1 - NaN are considered to be larger than all numbers;
+      * Essentially: nan_direction_hint == -1 says that the comparison is for sorting in descending order.
+      */
+    static int compare(T a, T b, int /*nan_direction_hint*/) {
+        return a > b ? 1 : (a < b ? -1 : 0);
+    }
+};
+
+template <typename T>
+struct FloatCompareHelper {
+    static bool less(T a, T b, int nan_direction_hint) {
+        bool isnan_a = std::isnan(a);
+        bool isnan_b = std::isnan(b);
+
+        if (isnan_a && isnan_b) return false;
+        if (isnan_a) return nan_direction_hint < 0;
+        if (isnan_b) return nan_direction_hint > 0;
+
+        return a < b;
+    }
+
+    static bool greater(T a, T b, int nan_direction_hint) {
+        bool isnan_a = std::isnan(a);
+        bool isnan_b = std::isnan(b);
+
+        if (isnan_a && isnan_b) return false;
+        if (isnan_a) return nan_direction_hint > 0;
+        if (isnan_b) return nan_direction_hint < 0;
+
+        return a > b;
+    }
+
+    static int compare(T a, T b, int nan_direction_hint) {
+        bool isnan_a = std::isnan(a);
+        bool isnan_b = std::isnan(b);
+        if (UNLIKELY(isnan_a || isnan_b)) {
+            if (isnan_a && isnan_b) return 0;
+
+            return isnan_a ? nan_direction_hint : -nan_direction_hint;
+        }
+
+        return (T(0) < (a - b)) - ((a - b) < T(0));
+    }
+};
+
+template <>
+struct CompareHelper<Float32> : public FloatCompareHelper<Float32> {};
+template <>
+struct CompareHelper<Float64> : public FloatCompareHelper<Float64> {};
+
+/** A template for columns that use a simple array to store.
+ */
+template <typename T>
+class ColumnVector final : public COWHelper<ColumnVectorHelper, ColumnVector<T>> {
+    static_assert(!IsDecimalNumber<T>);
+
+private:
+    using Self = ColumnVector;
+    friend class COWHelper<ColumnVectorHelper, Self>;
+
+    struct less;
+    struct greater;
+
+public:
+    using value_type = T;
+    using Container = PaddedPODArray<value_type>;
+
+private:
+    ColumnVector() {}
+    ColumnVector(const size_t n) : data(n) {}
+    ColumnVector(const size_t n, const value_type x) : data(n, x) {}
+    ColumnVector(const ColumnVector& src) : data(src.data.begin(), src.data.end()) {}
+
+    /// Sugar constructor.
+    ColumnVector(std::initializer_list<T> il) : data {il} {}
+
+    void insert_res_column(const uint16_t* sel, size_t sel_size, vectorized::ColumnVector<T>* res_ptr) {
+        for (size_t i = 0; i < sel_size; i++) {
+            T* val_ptr = &data[sel[i]];
+            res_ptr->insert_data((char*)val_ptr, 0);
+        }
+    }
+
+public:
+    bool is_numeric() const override { return IsNumber<T>; }
+
+    size_t size() const override { return data.size(); }
+
+    StringRef get_data_at(size_t n) const override {
+        return StringRef(reinterpret_cast<const char*>(&data[n]), sizeof(data[n]));
+    }
+
+    void insert_from(const IColumn& src, size_t n) override {
+        data.push_back(static_cast<const Self&>(src).get_data()[n]);
+    }
+
+    void insert_data(const char* pos, size_t /*length*/) override {
+        data.push_back(unaligned_load<T>(pos));
+    }
+
+    void insert_default() override { data.push_back(T()); }
+
+    void pop_back(size_t n) override { data.resize_assume_reserved(data.size() - n); }
+
+    StringRef serialize_value_into_arena(size_t n, Arena& arena, char const*& begin) const override;
+
+    const char* deserialize_and_insert_from_arena(const char* pos) override;
+
+    void update_hash_with_value(size_t n, SipHash& hash) const override;
+
+    size_t byte_size() const override { return data.size() * sizeof(data[0]); }
+
+    size_t allocated_bytes() const override { return data.allocated_bytes(); }
+
+    void protect() override { data.protect(); }
+
+    void insert_value(const T value) { data.push_back(value); }
+
+    /// This method implemented in header because it could be possibly devirtualized.
+    int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override {
+        return CompareHelper<T>::compare(data[n], static_cast<const Self&>(rhs_).data[m],
+                                         nan_direction_hint);
+    }
+
+    void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+                         IColumn::Permutation& res) const override;
+
+    void reserve(size_t n) override { data.reserve(n); }
+
+    void resize(size_t n) override { data.resize(n); }
+
+    const char* get_family_name() const override;
+
+    MutableColumnPtr clone_resized(size_t size) const override;
+
+    Field operator[](size_t n) const override { return data[n]; }
+
+    void get(size_t n, Field& res) const override { res = (*this)[n]; }
+
+    UInt64 get64(size_t n) const override;
+
+    Float64 get_float64(size_t n) const override;
+
+    void clear() override { data.clear(); }
+
+    UInt64 get_uint(size_t n) const override { return UInt64(data[n]); }
+
+    bool get_bool(size_t n) const override { return bool(data[n]); }
+
+    Int64 get_int(size_t n) const override { return Int64(data[n]); }
+
+    void insert(const Field& x) override {
+        data.push_back(doris::vectorized::get<NearestFieldType<T>>(x));
+    }
+
+    void insert_range_from(const IColumn& src, size_t start, size_t length) override;
+
+    void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override;
+
+    ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) const override;
+
+    // note(wb) this method is only used in storage layer now
+    ColumnPtr filter_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) override {
+        if (ptr == nullptr) {
+            auto res_ptr = vectorized::ColumnVector<T>::create();
+            if (sel_size == 0) {
+                return res_ptr;
+            }
+            insert_res_column(sel, sel_size, res_ptr.get());
+            return res_ptr;
+        } else {
+            auto res_ptr = (*std::move(*ptr)).assume_mutable();
+            if (sel_size == 0) {
+                return res_ptr;
+            }
+            insert_res_column(sel, sel_size, reinterpret_cast<vectorized::ColumnVector<T>*>(res_ptr.get()));
+            return *ptr;
+        }
+    }
+
+    ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override;
+
+    //    ColumnPtr index(const IColumn & indexes, size_t limit) const override;
+
+    template <typename Type>
+    ColumnPtr index_impl(const PaddedPODArray<Type>& indexes, size_t limit) const;
+
+    ColumnPtr replicate(const IColumn::Offsets& offsets) const override;
+
+    void get_extremes(Field& min, Field& max) const override;
+
+    MutableColumns scatter(IColumn::ColumnIndex num_columns,
+                           const IColumn::Selector& selector) const override {
+        return this->template scatter_impl<Self>(num_columns, selector);
+    }
+
+    //    void gather(ColumnGathererStream & gatherer_stream) override;
+
+    bool can_be_inside_nullable() const override { return true; }
+
+    bool is_fixed_and_contiguous() const override { return true; }
+    size_t size_of_value_if_fixed() const override { return sizeof(T); }
+    StringRef get_raw_data() const override {
+        return StringRef(reinterpret_cast<const char*>(data.data()), data.size());
+    }
+
+    bool structure_equals(const IColumn& rhs) const override {
+        return typeid(rhs) == typeid(ColumnVector<T>);
+    }
+
+    /** More efficient methods of manipulation - to manipulate with data directly. */
+    Container& get_data() { return data; }
+
+    const Container& get_data() const { return data; }
+
+    const T& get_element(size_t n) const { return data[n]; }
+
+    T& get_element(size_t n) { return data[n]; }
+
+    void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
+        DCHECK(size() > self_row);
+        data[self_row] = static_cast<const Self&>(rhs).data[row];
+    }
+
+    void replace_column_data_default(size_t self_row = 0) override {
+        DCHECK(size() > self_row);
+        data[self_row] = T();
+    }
+
+protected:
+    Container data;
+};
+
+template <typename T>
+template <typename Type>
+ColumnPtr ColumnVector<T>::index_impl(const PaddedPODArray<Type>& indexes, size_t limit) const {
+    size_t size = indexes.size();
+
+    if (limit == 0)
+        limit = size;
+    else
+        limit = std::min(size, limit);
+
+    auto res = this->create(limit);
+    typename Self::Container& res_data = res->get_data();
+    for (size_t i = 0; i < limit; ++i) res_data[i] = data[indexes[i]];
+
+    return res;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_vector_helper.h b/be/src/vec/columns/column_vector_helper.h
new file mode 100644
index 0000000000..36b512be3f
--- /dev/null
+++ b/be/src/vec/columns/column_vector_helper.h
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnVectorHelper.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+
+namespace doris::vectorized {
+
+/** Allows to access internal array of ColumnVector or ColumnFixedString without cast to concrete type.
+  * We will inherit ColumnVector and ColumnFixedString from this class instead of IColumn.
+  * Assumes data layout of ColumnVector, ColumnFixedString and PODArray.
+  *
+  * Why it is needed?
+  *
+  * There are some algorithms that specialize on the size of data type but doesn't care about concrete type.
+  * The same specialization may work for UInt64, Int64, Float64, FixedString(8), if it only does byte moving and hashing.
+  * To avoid code bloat and compile time increase, we can use single template instantiation for these cases
+  *  and just static_cast pointer to some single column type (e. g. ColumnUInt64) assuming that all types have identical memory layout.
+  *
+  * But this static_cast (downcast to unrelated type) is illegal according to the C++ standard and UBSan warns about it.
+  * To allow functional tests to work under UBSan we have to separate some base class that will present the memory layout in explicit way,
+  *  and we will do static_cast to this class.
+  */
+class ColumnVectorHelper : public IColumn {
+public:
+    template <size_t ELEMENT_SIZE>
+    const char* get_raw_data_begin() const {
+        return reinterpret_cast<const PODArrayBase<ELEMENT_SIZE, 4096, Allocator<false>, 15, 16>*>(
+                       reinterpret_cast<const char*>(this) + sizeof(*this))
+                ->raw_data();
+    }
+
+    template <size_t ELEMENT_SIZE>
+    void insert_raw_data(const char* ptr) {
+        return reinterpret_cast<PODArrayBase<ELEMENT_SIZE, 4096, Allocator<false>, 15, 16>*>(
+                       reinterpret_cast<char*>(this) + sizeof(*this))
+                ->push_back_raw(ptr);
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/columns_common.cpp b/be/src/vec/columns/columns_common.cpp
new file mode 100644
index 0000000000..3045c5bc0f
--- /dev/null
+++ b/be/src/vec/columns/columns_common.cpp
@@ -0,0 +1,271 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnsCommon.cpp
+// and modified by Doris
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/typeid_cast.h"
+
+namespace doris::vectorized {
+
+size_t count_bytes_in_filter(const IColumn::Filter& filt) {
+    size_t count = 0;
+
+    /** NOTE: In theory, `filt` should only contain zeros and ones.
+      * But, just in case, here the condition > 0 (to signed bytes) is used.
+      * It would be better to use != 0, then this does not allow SSE2.
+      */
+
+    const Int8* pos = reinterpret_cast<const Int8*>(filt.data());
+    const Int8* end = pos + filt.size();
+
+#if defined(__SSE2__) && defined(__POPCNT__)
+    const __m128i zero16 = _mm_setzero_si128();
+    const Int8* end64 = pos + filt.size() / 64 * 64;
+
+    for (; pos < end64; pos += 64)
+        count += __builtin_popcountll(
+                static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                        _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos)), zero16))) |
+                (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                         _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos + 16)), zero16)))
+                 << 16) |
+                (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                         _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos + 32)), zero16)))
+                 << 32) |
+                (static_cast<UInt64>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                         _mm_loadu_si128(reinterpret_cast<const __m128i*>(pos + 48)), zero16)))
+                 << 48));
+
+        /// TODO Add duff device for tail?
+#endif
+
+    for (; pos < end; ++pos) count += *pos > 0;
+
+    return count;
+}
+
+std::vector<size_t> count_columns_size_in_selector(IColumn::ColumnIndex num_columns,
+                                                   const IColumn::Selector& selector) {
+    std::vector<size_t> counts(num_columns);
+    for (auto idx : selector) ++counts[idx];
+
+    return counts;
+}
+
+bool memory_is_byte(const void* data, size_t size, uint8_t byte) {
+    if (size == 0) return true;
+    auto ptr = reinterpret_cast<const uint8_t*>(data);
+    return *ptr == byte && memcmp(ptr, ptr + 1, size - 1) == 0;
+}
+
+bool memory_is_zero(const void* data, size_t size) {
+    return memory_is_byte(data, size, 0x0);
+}
+
+namespace {
+/// Implementation details of filterArraysImpl function, used as template parameter.
+/// Allow to build or not to build offsets array.
+
+struct ResultOffsetsBuilder {
+    IColumn::Offsets& res_offsets;
+    IColumn::Offset current_src_offset = 0;
+
+    explicit ResultOffsetsBuilder(IColumn::Offsets* res_offsets_) : res_offsets(*res_offsets_) {}
+
+    void reserve(ssize_t result_size_hint, size_t src_size) {
+        res_offsets.reserve(result_size_hint > 0 ? result_size_hint : src_size);
+    }
+
+    void insertOne(size_t array_size) {
+        current_src_offset += array_size;
+        res_offsets.push_back(current_src_offset);
+    }
+
+    template <size_t SIMD_BYTES>
+    void insertChunk(const IColumn::Offset* src_offsets_pos, bool first,
+                     IColumn::Offset chunk_offset, size_t chunk_size) {
+        const auto offsets_size_old = res_offsets.size();
+        res_offsets.resize(offsets_size_old + SIMD_BYTES);
+        memcpy(&res_offsets[offsets_size_old], src_offsets_pos,
+               SIMD_BYTES * sizeof(IColumn::Offset));
+
+        if (!first) {
+            /// difference between current and actual offset
+            const auto diff_offset = chunk_offset - current_src_offset;
+
+            if (diff_offset > 0) {
+                const auto res_offsets_pos = &res_offsets[offsets_size_old];
+
+                /// adjust offsets
+                for (size_t i = 0; i < SIMD_BYTES; ++i) res_offsets_pos[i] -= diff_offset;
+            }
+        }
+        current_src_offset += chunk_size;
+    }
+};
+
+struct NoResultOffsetsBuilder {
+    explicit NoResultOffsetsBuilder(IColumn::Offsets*) {}
+    void reserve(ssize_t, size_t) {}
+    void insertOne(size_t) {}
+
+    template <size_t SIMD_BYTES>
+    void insertChunk(const IColumn::Offset*, bool, IColumn::Offset, size_t) {}
+};
+
+template <typename T, typename ResultOffsetsBuilder>
+void filter_arrays_impl_generic(const PaddedPODArray<T>& src_elems,
+                                const IColumn::Offsets& src_offsets, PaddedPODArray<T>& res_elems,
+                                IColumn::Offsets* res_offsets, const IColumn::Filter& filt,
+                                ssize_t result_size_hint) {
+    const size_t size = src_offsets.size();
+    if (size != filt.size()) {
+        LOG(FATAL) << "Size of filter doesn't match size of column.";
+    }
+
+    ResultOffsetsBuilder result_offsets_builder(res_offsets);
+
+    if (result_size_hint) {
+        result_offsets_builder.reserve(result_size_hint, size);
+
+        if (result_size_hint < 0)
+            res_elems.reserve(src_elems.size());
+        else if (result_size_hint < 1000000000 && src_elems.size() < 1000000000) /// Avoid overflow.
+            res_elems.reserve((result_size_hint * src_elems.size() + size - 1) / size);
+    }
+
+    const UInt8* filt_pos = filt.data();
+    const auto filt_end = filt_pos + size;
+
+    auto offsets_pos = src_offsets.data();
+    const auto offsets_begin = offsets_pos;
+
+    /// copy array ending at *end_offset_ptr
+    const auto copy_array = [&](const IColumn::Offset* offset_ptr) {
+        const auto arr_offset = offset_ptr == offsets_begin ? 0 : offset_ptr[-1];
+        const auto arr_size = *offset_ptr - arr_offset;
+
+        result_offsets_builder.insertOne(arr_size);
+
+        const auto elems_size_old = res_elems.size();
+        res_elems.resize(elems_size_old + arr_size);
+        memcpy(&res_elems[elems_size_old], &src_elems[arr_offset], arr_size * sizeof(T));
+    };
+
+    static constexpr size_t SIMD_BYTES = 32;
+    const auto filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
+
+    while (filt_pos < filt_end_aligned) {
+        auto mask = bytes32_mask_to_bits32_mask(filt_pos);
+
+        if (mask == 0xffffffff) {
+            /// SIMD_BYTES consecutive rows pass the filter
+            const auto first = offsets_pos == offsets_begin;
+
+            const auto chunk_offset = first ? 0 : offsets_pos[-1];
+            const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset;
+
+            result_offsets_builder.template insertChunk<SIMD_BYTES>(offsets_pos, first,
+                                                                    chunk_offset, chunk_size);
+
+            /// copy elements for SIMD_BYTES arrays at once
+            const auto elems_size_old = res_elems.size();
+            res_elems.resize(elems_size_old + chunk_size);
+            memcpy(&res_elems[elems_size_old], &src_elems[chunk_offset], chunk_size * sizeof(T));
+        } else {
+            while (mask) {
+                const size_t bit_pos = __builtin_ctzll(mask);
+                copy_array(offsets_pos + bit_pos);
+                mask = mask & (mask - 1);
+            }
+        }
+
+        filt_pos += SIMD_BYTES;
+        offsets_pos += SIMD_BYTES;
+    }
+
+    while (filt_pos < filt_end) {
+        if (*filt_pos) copy_array(offsets_pos);
+
+        ++filt_pos;
+        ++offsets_pos;
+    }
+}
+} // namespace
+
+template <typename T>
+void filter_arrays_impl(const PaddedPODArray<T>& src_elems, const IColumn::Offsets& src_offsets,
+                        PaddedPODArray<T>& res_elems, IColumn::Offsets& res_offsets,
+                        const IColumn::Filter& filt, ssize_t result_size_hint) {
+    return filter_arrays_impl_generic<T, ResultOffsetsBuilder>(
+            src_elems, src_offsets, res_elems, &res_offsets, filt, result_size_hint);
+}
+
+template <typename T>
+void filter_arrays_impl_only_data(const PaddedPODArray<T>& src_elems,
+                                  const IColumn::Offsets& src_offsets, PaddedPODArray<T>& res_elems,
+                                  const IColumn::Filter& filt, ssize_t result_size_hint) {
+    return filter_arrays_impl_generic<T, NoResultOffsetsBuilder>(src_elems, src_offsets, res_elems,
+                                                                 nullptr, filt, result_size_hint);
+}
+
+/// Explicit instantiations - not to place the implementation of the function above in the header file.
+#define INSTANTIATE(TYPE)                                                                        \
+    template void filter_arrays_impl<TYPE>(const PaddedPODArray<TYPE>&, const IColumn::Offsets&, \
+                                           PaddedPODArray<TYPE>&, IColumn::Offsets&,             \
+                                           const IColumn::Filter&, ssize_t);                     \
+    template void filter_arrays_impl_only_data<TYPE>(                                            \
+            const PaddedPODArray<TYPE>&, const IColumn::Offsets&, PaddedPODArray<TYPE>&,         \
+            const IColumn::Filter&, ssize_t);
+
+INSTANTIATE(UInt8)
+INSTANTIATE(UInt16)
+INSTANTIATE(UInt32)
+INSTANTIATE(UInt64)
+INSTANTIATE(Int8)
+INSTANTIATE(Int16)
+INSTANTIATE(Int32)
+INSTANTIATE(Int64)
+INSTANTIATE(Float32)
+INSTANTIATE(Float64)
+
+#undef INSTANTIATE
+
+namespace detail {
+template <typename T>
+const PaddedPODArray<T>* get_indexes_data(const IColumn& indexes) {
+    auto* column = typeid_cast<const ColumnVector<T>*>(&indexes);
+    if (column) return &column->get_data();
+
+    return nullptr;
+}
+
+template const PaddedPODArray<UInt8>* get_indexes_data<UInt8>(const IColumn& indexes);
+template const PaddedPODArray<UInt16>* get_indexes_data<UInt16>(const IColumn& indexes);
+template const PaddedPODArray<UInt32>* get_indexes_data<UInt32>(const IColumn& indexes);
+template const PaddedPODArray<UInt64>* get_indexes_data<UInt64>(const IColumn& indexes);
+} // namespace detail
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/columns_common.h b/be/src/vec/columns/columns_common.h
new file mode 100644
index 0000000000..83c38c4c4f
--- /dev/null
+++ b/be/src/vec/columns/columns_common.h
@@ -0,0 +1,121 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnsCommon.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+
+#ifdef __AVX2__
+#include <immintrin.h>
+#elif __SSE2__
+#include <emmintrin.h>
+#endif
+
+/// Common helper methods for implementation of different columns.
+
+namespace doris::vectorized {
+
+/// Transform 32-byte mask to 32-bit mask
+inline uint32_t bytes32_mask_to_bits32_mask(const uint8_t* filt_pos) {
+#ifdef __AVX2__
+    auto zero32 = _mm256_setzero_si256();
+    uint32_t mask = static_cast<uint32_t>(_mm256_movemask_epi8(_mm256_cmpgt_epi8(
+            _mm256_loadu_si256(reinterpret_cast<const __m256i*>(filt_pos)), zero32)));
+#elif __SSE2__
+    auto zero16 = _mm_setzero_si128();
+    uint32_t mask =
+            (static_cast<uint32_t>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                    _mm_loadu_si128(reinterpret_cast<const __m128i*>(filt_pos)), zero16)))) |
+            ((static_cast<uint32_t>(_mm_movemask_epi8(_mm_cmpgt_epi8(
+                      _mm_loadu_si128(reinterpret_cast<const __m128i*>(filt_pos + 16)), zero16)))
+              << 16) &
+             0xffff0000);
+#else
+    uint32_t mask = 0;
+    for (size_t i = 0; i < 32; ++i) {
+        mask |= static_cast<uint32_t>(1 == *(filt_pos + i)) << i;
+    }
+#endif
+    return mask;
+}
+
+/// Counts how many bytes of `filt` are greater than zero.
+size_t count_bytes_in_filter(const IColumn::Filter& filt);
+
+/// Returns vector with num_columns elements. vector[i] is the count of i values in selector.
+/// Selector must contain values from 0 to num_columns - 1. NOTE: this is not checked.
+std::vector<size_t> count_columns_size_in_selector(IColumn::ColumnIndex num_columns,
+                                                   const IColumn::Selector& selector);
+
+/// Returns true, if the memory contains only zeros.
+bool memory_is_zero(const void* data, size_t size);
+bool memory_is_byte(const void* data, size_t size, uint8_t byte);
+
+/// The general implementation of `filter` function for ColumnArray and ColumnString.
+template <typename T>
+void filter_arrays_impl(const PaddedPODArray<T>& src_elems, const IColumn::Offsets& src_offsets,
+                        PaddedPODArray<T>& res_elems, IColumn::Offsets& res_offsets,
+                        const IColumn::Filter& filt, ssize_t result_size_hint);
+
+/// Same as above, but not fills res_offsets.
+template <typename T>
+void filter_arrays_impl_only_data(const PaddedPODArray<T>& src_elems,
+                                  const IColumn::Offsets& src_offsets, PaddedPODArray<T>& res_elems,
+                                  const IColumn::Filter& filt, ssize_t result_size_hint);
+
+namespace detail {
+template <typename T>
+const PaddedPODArray<T>* get_indexes_data(const IColumn& indexes);
+}
+
+/// Check limit <= indexes->size() and call column.index_impl(const PaddedPodArray<Type> & indexes, UInt64 limit).
+template <typename Column>
+ColumnPtr select_index_impl(const Column& column, const IColumn& indexes, size_t limit) {
+    if (limit == 0) limit = indexes.size();
+
+    if (indexes.size() < limit) {
+        LOG(FATAL) << "Size of indexes is less than required.";
+    }
+
+    if (auto* data_uint8 = detail::get_indexes_data<UInt8>(indexes))
+        return column.template index_impl<UInt8>(*data_uint8, limit);
+    else if (auto* data_uint16 = detail::get_indexes_data<UInt16>(indexes))
+        return column.template index_impl<UInt16>(*data_uint16, limit);
+    else if (auto* data_uint32 = detail::get_indexes_data<UInt32>(indexes))
+        return column.template index_impl<UInt32>(*data_uint32, limit);
+    else if (auto* data_uint64 = detail::get_indexes_data<UInt64>(indexes))
+        return column.template index_impl<UInt64>(*data_uint64, limit);
+    else {
+        LOG(FATAL) << "Indexes column for IColumn::select must be ColumnUInt, got"
+                   << indexes.get_name();
+        return nullptr;
+    }
+}
+
+#define INSTANTIATE_INDEX_IMPL(Column)                                                  \
+    template ColumnPtr Column::indexImpl<UInt8>(const PaddedPODArray<UInt8>& indexes,   \
+                                                size_t limit) const;                    \
+    template ColumnPtr Column::indexImpl<UInt16>(const PaddedPODArray<UInt16>& indexes, \
+                                                 size_t limit) const;                   \
+    template ColumnPtr Column::indexImpl<UInt32>(const PaddedPODArray<UInt32>& indexes, \
+                                                 size_t limit) const;                   \
+    template ColumnPtr Column::indexImpl<UInt64>(const PaddedPODArray<UInt64>& indexes, \
+                                                 size_t limit) const;
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/columns_number.h b/be/src/vec/columns/columns_number.h
new file mode 100644
index 0000000000..da3c011491
--- /dev/null
+++ b/be/src/vec/columns/columns_number.h
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnsNumber.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_vector.h"
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+/** Columns with numbers. */
+
+using ColumnUInt8 = ColumnVector<UInt8>;
+using ColumnUInt16 = ColumnVector<UInt16>;
+using ColumnUInt32 = ColumnVector<UInt32>;
+using ColumnUInt64 = ColumnVector<UInt64>;
+using ColumnUInt128 = ColumnVector<UInt128>;
+
+using ColumnInt8 = ColumnVector<Int8>;
+using ColumnInt16 = ColumnVector<Int16>;
+using ColumnInt32 = ColumnVector<Int32>;
+using ColumnInt64 = ColumnVector<Int64>;
+using ColumnInt128 = ColumnVector<Int128>;
+
+using ColumnFloat32 = ColumnVector<Float32>;
+using ColumnFloat64 = ColumnVector<Float64>;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h
new file mode 100644
index 0000000000..a23c550de9
--- /dev/null
+++ b/be/src/vec/columns/predicate_column.h
@@ -0,0 +1,447 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_impl.h"
+
+#include "runtime/string_value.h"
+#include "olap/decimal12.h"
+#include "olap/uint24.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_decimal.h"
+#include "vec/columns/column_vector.h"
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+/**
+ * used to keep predicate column in storage layer
+ * 
+ *  T = predicate column type
+ */
+template <typename T>
+class PredicateColumnType final : public COWHelper<IColumn, PredicateColumnType<T>> {
+private:
+    PredicateColumnType() {}
+    PredicateColumnType(const size_t n) : data(n) {}
+    friend class COWHelper<IColumn, PredicateColumnType<T>>;
+
+    PredicateColumnType(const PredicateColumnType& src) : data(src.data.begin(), src.data.end()) {}
+
+    uint64_t get_date_at(uint16_t idx) {
+        const T val = data[idx];
+        const char* val_ptr = reinterpret_cast<const char*>(&val);
+        uint64_t value = 0;
+        value = *(unsigned char*)(val_ptr + 2);
+        value <<= 8;
+        value |= *(unsigned char*)(val_ptr + 1);
+        value <<= 8;
+        value |= *(unsigned char*)(val_ptr);
+        return value;
+    }
+
+    void insert_date_to_res_column(const uint16_t* sel, size_t sel_size, vectorized::ColumnVector<Int64>* res_ptr) {
+        for (size_t i = 0; i < sel_size; i++) {
+            VecDateTimeValue date;
+            date.from_olap_date(get_date_at(sel[i]));
+            res_ptr->insert_data(reinterpret_cast<char*>(&date), 0);
+        }
+    }
+
+    void insert_datetime_to_res_column(const uint16_t* sel, size_t sel_size, vectorized::ColumnVector<Int64>* res_ptr) {
+        for (size_t i = 0; i < sel_size; i++) {
+            uint64_t value = data[sel[i]];
+            vectorized::VecDateTimeValue date(value);
+            res_ptr->insert_data(reinterpret_cast<char*>(&date), 0);
+        }
+    }
+
+    void insert_string_to_res_column(const uint16_t* sel, size_t sel_size, vectorized::ColumnString* res_ptr) {
+        for (size_t i = 0; i < sel_size; i++) {
+            uint16_t n = sel[i];
+            auto& sv = reinterpret_cast<StringValue&>(data[n]);
+            res_ptr->insert_data(sv.ptr, sv.len);
+        }
+    }
+
+    void insert_decimal_to_res_column(const uint16_t* sel, size_t sel_size, vectorized::ColumnDecimal<Decimal128>* res_ptr) {
+        for (size_t i = 0; i < sel_size; i++) {
+            uint16_t n = sel[i];
+            auto& dv = reinterpret_cast<const decimal12_t&>(data[n]);
+            DecimalV2Value dv_data(dv.integer, dv.fraction);
+            res_ptr->insert_data(reinterpret_cast<char*>(&dv_data), 0);
+        }
+    }
+
+    template <typename Y>
+    void insert_default_value_res_column(const uint16_t* sel, size_t sel_size, vectorized::ColumnVector<Y>* res_ptr) {
+        for (size_t i = 0; i < sel_size; i++) {
+            T* val_ptr = &data[sel[i]];
+            res_ptr->insert_data((char*)val_ptr, 0);
+        }
+    }
+
+    void insert_byte_to_res_column(const uint16_t* sel, size_t sel_size, vectorized::IColumn* res_ptr) {
+        for (size_t i = 0; i < sel_size; i++) {
+            uint16_t n = sel[i];
+            char* ch_val = reinterpret_cast<char*>(&data[n]);
+            res_ptr->insert_data(ch_val, 0);
+        }
+    }
+
+    template <typename Y>
+    ColumnPtr filter_default_type_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) {
+        static_assert(std::is_same_v<T, Y>);
+        // todo(wb) the operation which create a new column maybe should move to other place
+        if (ptr == nullptr) {
+            auto res = vectorized::ColumnVector<Y>::create();
+            if (sel_size == 0) {
+                return res;
+            }
+            insert_default_value_res_column(sel, sel_size, res.get());
+            return res;
+        } else {
+            if (sel_size != 0) {
+                MutableColumnPtr ptr_res = (*std::move(*ptr)).assume_mutable();
+                insert_default_value_res_column(sel, sel_size, reinterpret_cast<vectorized::ColumnVector<Y>*>(ptr_res.get()));
+            }
+            return *ptr;
+        }
+    }
+
+public:
+    using Self = PredicateColumnType;
+    using value_type = T;
+    using Container = PaddedPODArray<value_type>;
+
+    bool is_numeric() const override { return false; }
+
+    bool is_predicate_column() const override { return true; }
+
+    size_t size() const override { return data.size(); }
+
+   [[noreturn]]  StringRef get_data_at(size_t n) const override {
+         LOG(FATAL) << "get_data_at not supported in PredicateColumnType";
+    }
+
+    void insert_from(const IColumn& src, size_t n) override {
+         LOG(FATAL) << "insert_from not supported in PredicateColumnType";
+    }
+
+    void insert_range_from(const IColumn& src, size_t start, size_t length) override {
+         LOG(FATAL) << "insert_range_from not supported in PredicateColumnType";
+    }
+
+    void insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) override {
+         LOG(FATAL) << "insert_indices_from not supported in PredicateColumnType";
+    }
+
+    void pop_back(size_t n) override {
+        LOG(FATAL) << "pop_back not supported in PredicateColumnType";
+    }
+
+    void update_hash_with_value(size_t n, SipHash& hash) const override {
+         LOG(FATAL) << "update_hash_with_value not supported in PredicateColumnType";
+    }
+
+    void insert_string_value(char* data_ptr, size_t length) {
+        StringValue sv(data_ptr, length);
+        data.push_back_without_reserve(sv);
+    }
+
+    void insert_decimal_value(char* data_ptr, size_t length) {
+        decimal12_t dc12_value;
+        dc12_value.integer = *(int64_t*)(data_ptr);
+        dc12_value.fraction = *(int32_t*)(data_ptr + sizeof(int64_t));
+        data.push_back_without_reserve(dc12_value);
+    }
+
+    // used for int128
+    void insert_in_copy_way(char* data_ptr, size_t length) {
+        T val {};
+        memcpy(&val, data_ptr, sizeof(val));
+        data.push_back_without_reserve(val);
+    }
+    
+    void insert_default_type(char* data_ptr, size_t length) {
+        T* val = (T*)data_ptr;
+        data.push_back_without_reserve(*val);
+    }
+
+    void insert_data(const char* data_ptr, size_t length) override {
+        char* ch = const_cast<char*>(data_ptr);
+        if constexpr (std::is_same_v<T, StringValue>) {
+            insert_string_value(ch, length);
+         } else if constexpr (std::is_same_v<T, decimal12_t>) {
+            insert_decimal_value(ch, length);
+         } else if constexpr (std::is_same_v<T, doris::vectorized::Int128>) {
+            insert_in_copy_way(ch, length);
+         } else {
+            insert_default_type(ch, length);
+         }
+    }
+
+    void insert_default() override { 
+        data.push_back(T()); 
+    }
+
+    void clear() override { data.clear(); }
+
+    size_t byte_size() const override { 
+         return data.size() * sizeof(T);
+    }
+
+    size_t allocated_bytes() const override { return byte_size(); }
+
+    void protect() override {}
+
+    void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
+                                      IColumn::Permutation& res) const override {
+        LOG(FATAL) << "get_permutation not supported in PredicateColumnType";
+    }
+
+    void reserve(size_t n) override { 
+        data.reserve(n); 
+    }
+
+    [[noreturn]] const char* get_family_name() const override { 
+        LOG(FATAL) << "get_family_name not supported in PredicateColumnType";
+    }
+
+   [[noreturn]] MutableColumnPtr clone_resized(size_t size) const override {
+        LOG(FATAL) << "clone_resized not supported in PredicateColumnType";
+    }
+
+    void insert(const Field& x) override {
+        LOG(FATAL) << "insert not supported in PredicateColumnType";
+    }
+
+    [[noreturn]] Field operator[](size_t n) const override {
+        LOG(FATAL) << "operator[] not supported in PredicateColumnType";
+    }
+
+    void get(size_t n, Field& res) const override {
+        LOG(FATAL) << "get field not supported in PredicateColumnType";
+    }
+
+    [[noreturn]] UInt64 get64(size_t n) const override {
+        LOG(FATAL) << "get field not supported in PredicateColumnTyped";
+    }
+
+    [[noreturn]] Float64 get_float64(size_t n) const override {
+        LOG(FATAL) << "get field not supported in PredicateColumnType";
+    }
+
+    [[noreturn]] UInt64 get_uint(size_t n) const override {
+        LOG(FATAL) << "get field not supported in PredicateColumnType";
+    }
+
+    [[noreturn]] bool get_bool(size_t n) const override {
+        LOG(FATAL) << "get field not supported in PredicateColumnType";
+    }
+
+    [[noreturn]] Int64 get_int(size_t n) const override {
+        LOG(FATAL) << "get field not supported in PredicateColumnType";
+    }
+
+    // it's impossable to use ComplexType as key , so we don't have to implemnt them
+    [[noreturn]] StringRef serialize_value_into_arena(size_t n, Arena& arena,
+                                                      char const*& begin) const {
+        LOG(FATAL) << "serialize_value_into_arena not supported in PredicateColumnType";
+    }
+
+    [[noreturn]] const char* deserialize_and_insert_from_arena(const char* pos) {
+        LOG(FATAL) << "deserialize_and_insert_from_arena not supported in PredicateColumnType";
+    }
+
+    [[noreturn]] int compare_at(size_t n, size_t m, const IColumn& rhs,
+                                int nan_direction_hint) const {
+        LOG(FATAL) << "compare_at not supported in PredicateColumnType";
+    }
+
+    void get_extremes(Field& min, Field& max) const {
+        LOG(FATAL) << "get_extremes not supported in PredicateColumnType";
+    }
+
+    bool can_be_inside_nullable() const override { return true; }
+
+    bool is_fixed_and_contiguous() const override { return true; }
+    size_t size_of_value_if_fixed() const override { return sizeof(T); }
+
+    [[noreturn]] StringRef get_raw_data() const override {
+        LOG(FATAL) << "get_raw_data not supported in PredicateColumnType";
+    }
+
+    [[noreturn]] bool structure_equals(const IColumn& rhs) const override {
+         LOG(FATAL) << "structure_equals not supported in PredicateColumnType";
+    }
+
+    [[noreturn]] ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) const override {
+         LOG(FATAL) << "filter not supported in PredicateColumnType";
+    };
+
+    [[noreturn]] ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override { 
+         LOG(FATAL) << "permute not supported in PredicateColumnType";
+    };
+
+    Container& get_data() { return data; }
+
+    const Container& get_data() const { return data; }
+
+    [[noreturn]] ColumnPtr replicate(const IColumn::Offsets& replicate_offsets) const override {
+        LOG(FATAL) << "replicate not supported in PredicateColumnType";
+    };
+
+    [[noreturn]] MutableColumns scatter(IColumn::ColumnIndex num_columns,
+                                        const IColumn::Selector& selector) const override {
+        LOG(FATAL) << "scatter not supported in PredicateColumnType";
+    }
+
+    ColumnPtr filter_decimal_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) {
+        if (ptr == nullptr) {
+            auto res = vectorized::ColumnDecimal<Decimal128>::create(0, 9); // todo(wb) need a global variable to stand for scale
+            if (sel_size == 0) {
+                return res;
+            }
+            insert_decimal_to_res_column(sel, sel_size, res.get());
+            return res;
+        } else {
+            if (sel_size != 0) {
+                MutableColumnPtr res_ptr = (*std::move(*ptr)).assume_mutable();
+                insert_decimal_to_res_column(sel, sel_size, reinterpret_cast<vectorized::ColumnDecimal<Decimal128>*>(res_ptr.get()));
+            }
+            return *ptr;
+        }
+    }
+    
+    ColumnPtr filter_date_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) {
+        if (ptr == nullptr) {
+            auto res = vectorized::ColumnVector<Int64>::create();
+            if (sel_size == 0) {
+                return res;
+            }
+            insert_date_to_res_column(sel, sel_size, res.get());
+            return res;
+        } else {
+            if (sel_size != 0) {
+                MutableColumnPtr res_ptr = (*std::move(*ptr)).assume_mutable();
+                insert_date_to_res_column(sel, sel_size, reinterpret_cast<vectorized::ColumnVector<Int64>*>(res_ptr.get()));
+            }
+            return *ptr;
+        }
+    }
+
+    ColumnPtr filter_date_time_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) {
+        if (ptr == nullptr) {
+            auto res = vectorized::ColumnVector<Int64>::create();
+            if (sel_size == 0) {
+                return res;
+            }
+
+            insert_datetime_to_res_column(sel, sel_size, res.get());
+            return res;
+        } else {
+            if (sel_size != 0) {
+                MutableColumnPtr res_ptr = (*std::move(*ptr)).assume_mutable();
+                insert_datetime_to_res_column(sel, sel_size, reinterpret_cast<vectorized::ColumnVector<Int64>*>(res_ptr.get()));
+            }
+        }
+        return *ptr;
+    }
+
+    ColumnPtr filter_string_value_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) {
+        if (ptr == nullptr) {
+            auto res = vectorized::ColumnString::create();
+            if (sel_size == 0) {
+                return res;
+            }
+            res->reserve(sel_size);
+            insert_string_to_res_column(sel, sel_size, res.get());
+            return res;
+        } else {
+            if (sel_size != 0) {
+                MutableColumnPtr ptr_res = (*std::move(*ptr)).assume_mutable();
+                insert_string_to_res_column(sel, sel_size, reinterpret_cast<vectorized::ColumnString*>(ptr_res.get()));
+            }
+        }
+        return *ptr;
+    }
+
+    ColumnPtr filter_bool_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) {
+        if (ptr == nullptr) {
+            auto res = vectorized::ColumnVector<vectorized::UInt8>::create();
+            if (sel_size == 0) {
+                return res;
+            }
+            res->reserve(sel_size);
+            insert_byte_to_res_column(sel, sel_size, res.get());
+        } else {
+            if (sel_size != 0) {
+                MutableColumnPtr ptr_res = (*std::move(*ptr)).assume_mutable();
+                insert_byte_to_res_column(sel, sel_size, ptr_res.get());
+            }
+        }
+        return *ptr;
+    }
+
+    //todo(wb) need refactor this method, using return status to check unexpect args instead of LOG(FATAL)
+    ColumnPtr filter_by_selector(const uint16_t* sel, size_t sel_size, ColumnPtr* ptr = nullptr) override {
+        if constexpr (std::is_same_v<T, StringValue>) {
+            return filter_string_value_by_selector(sel, sel_size, ptr);
+        } else if constexpr (std::is_same_v<T, decimal12_t>) {
+            return filter_decimal_by_selector(sel, sel_size, ptr);
+        } else if constexpr (std::is_same_v<T, doris::vectorized::Int8>) {
+            return filter_default_type_by_selector<doris::vectorized::Int8>(sel, sel_size, ptr);
+        } else if constexpr (std::is_same_v<T, doris::vectorized::Int16>) {
+            return filter_default_type_by_selector<doris::vectorized::Int16>(sel, sel_size, ptr);
+        } else if constexpr (std::is_same_v<T, doris::vectorized::Int32>) {
+            return filter_default_type_by_selector<doris::vectorized::Int32>(sel, sel_size, ptr);
+        } else if constexpr (std::is_same_v<T, doris::vectorized::Int64>) {
+            return filter_default_type_by_selector<doris::vectorized::Int64>(sel, sel_size, ptr);
+        } else if constexpr (std::is_same_v<T, doris::vectorized::Float32>) {
+            return filter_default_type_by_selector<doris::vectorized::Float32>(sel, sel_size, ptr);
+        } else if constexpr (std::is_same_v<T, doris::vectorized::Float64>) {
+            return filter_default_type_by_selector<doris::vectorized::Float64>(sel, sel_size, ptr);
+        } else if constexpr (std::is_same_v<T, uint64_t>) {
+            return filter_date_time_by_selector(sel, sel_size, ptr);
+        } else if constexpr (std::is_same_v<T, uint24_t>) {
+            return filter_date_by_selector(sel, sel_size, ptr);
+        } else if constexpr (std::is_same_v<T, doris::vectorized::Int128>) {
+            return filter_default_type_by_selector<doris::vectorized::Int128>(sel, sel_size, ptr);
+        } else if (std::is_same_v<T, bool>) {
+            return filter_bool_by_selector(sel, sel_size, ptr);
+        } else {
+            LOG(FATAL) << "unexpected type in predicate column";
+        }
+    }
+
+    void replace_column_data(const IColumn&, size_t row, size_t self_row = 0) override {
+        LOG(FATAL) << "should not call replace_column_data in predicate column";
+    }
+
+    void replace_column_data_default(size_t self_row = 0) override {
+        LOG(FATAL) << "should not call replace_column_data_default in predicate column";
+    }
+
+private:
+    Container data;
+};
+using ColumnStringValue = PredicateColumnType<StringValue>;
+
+} // namespace
diff --git a/be/src/vec/common/aggregation_common.h b/be/src/vec/common/aggregation_common.h
new file mode 100644
index 0000000000..d6a4d30df5
--- /dev/null
+++ b/be/src/vec/common/aggregation_common.h
@@ -0,0 +1,269 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/AggregationCommon.h
+// and modified by Doris
+
+#pragma once
+
+#include <array>
+
+#include "vec/columns/column.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/arena.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/hash_table/hash.h"
+#include "vec/common/memcpy_small.h"
+#include "vec/common/sip_hash.h"
+#include "vec/common/string_ref.h"
+#include "vec/common/uint128.h"
+
+template <>
+struct DefaultHash<StringRef> : public StringRefHash {};
+
+namespace doris::vectorized {
+
+using Sizes = std::vector<size_t>;
+
+/// When packing the values of nullable columns at a given row, we have to
+/// store the fact that these values are nullable or not. This is achieved
+/// by encoding this information as a bitmap. Let S be the size in bytes of
+/// a packed values binary blob and T the number of bytes we may place into
+/// this blob, the size that the bitmap shall occupy in the blob is equal to:
+/// ceil(T/8). Thus we must have: S = T + ceil(T/8). Below we indicate for
+/// each value of S, the corresponding value of T, and the bitmap size:
+///
+/// 32,28,4
+/// 16,14,2
+/// 8,7,1
+/// 4,3,1
+/// 2,1,1
+///
+
+namespace {
+// clang-format off
+template <typename T>
+constexpr auto get_bitmap_size() {
+    return (sizeof(T) == 32)
+            ? 4: (sizeof(T) == 16)
+            ? 2: ((sizeof(T) == 8)
+            ? 1: ((sizeof(T) == 4) 
+            ? 1: ((sizeof(T) == 2) 
+            ? 1: 0)));
+}
+// clang-format on
+
+} // namespace
+
+template <typename T>
+using KeysNullMap = std::array<UInt8, get_bitmap_size<T>()>;
+
+/// Pack into a binary blob of type T a set of fixed-size keys. Granted that all the keys fit into the
+/// binary blob, they are disposed in it consecutively.
+template <typename T, bool has_low_cardinality = false>
+static inline T ALWAYS_INLINE
+pack_fixed(size_t i, size_t keys_size, const ColumnRawPtrs& key_columns, const Sizes& key_sizes,
+           const ColumnRawPtrs* low_cardinality_positions [[maybe_unused]] = nullptr,
+           const Sizes* low_cardinality_sizes [[maybe_unused]] = nullptr) {
+    union {
+        T key;
+        char bytes[sizeof(key)] = {};
+    };
+
+    size_t offset = 0;
+
+    for (size_t j = 0; j < keys_size; ++j) {
+        size_t index = i;
+        const IColumn* column = key_columns[j];
+        if constexpr (has_low_cardinality) {
+            if (const IColumn* positions = (*low_cardinality_positions)[j]) {
+                switch ((*low_cardinality_sizes)[j]) {
+                case sizeof(UInt8):
+                    index = assert_cast<const ColumnUInt8*>(positions)->get_element(i);
+                    break;
+                case sizeof(UInt16):
+                    index = assert_cast<const ColumnUInt16*>(positions)->get_element(i);
+                    break;
+                case sizeof(UInt32):
+                    index = assert_cast<const ColumnUInt32*>(positions)->get_element(i);
+                    break;
+                case sizeof(UInt64):
+                    index = assert_cast<const ColumnUInt64*>(positions)->get_element(i);
+                    break;
+                default:
+                    LOG(FATAL) << "Unexpected size of index type for low cardinality column.";
+                }
+            }
+        }
+
+        switch (key_sizes[j]) {
+        case 1:
+            memcpy(bytes + offset,
+                   static_cast<const ColumnVectorHelper*>(column)->get_raw_data_begin<1>() + index,
+                   1);
+            offset += 1;
+            break;
+        case 2:
+            memcpy(bytes + offset,
+                   static_cast<const ColumnVectorHelper*>(column)->get_raw_data_begin<2>() +
+                           index * 2,
+                   2);
+            offset += 2;
+            break;
+        case 4:
+            memcpy(bytes + offset,
+                   static_cast<const ColumnVectorHelper*>(column)->get_raw_data_begin<4>() +
+                           index * 4,
+                   4);
+            offset += 4;
+            break;
+        case 8:
+            memcpy(bytes + offset,
+                   static_cast<const ColumnVectorHelper*>(column)->get_raw_data_begin<8>() +
+                           index * 8,
+                   8);
+            offset += 8;
+            break;
+        default:
+            memcpy(bytes + offset,
+                   static_cast<const ColumnVectorHelper*>(column)->get_raw_data_begin<1>() +
+                           index * key_sizes[j],
+                   key_sizes[j]);
+            offset += key_sizes[j];
+        }
+    }
+
+    return key;
+}
+
+/// Similar as above but supports nullable values.
+template <typename T>
+static inline T ALWAYS_INLINE pack_fixed(size_t i, size_t keys_size,
+                                         const ColumnRawPtrs& key_columns, const Sizes& key_sizes,
+                                         const KeysNullMap<T>& bitmap) {
+    union {
+        T key;
+        char bytes[sizeof(key)] = {};
+    };
+
+    size_t offset = 0;
+
+    static constexpr auto bitmap_size = std::tuple_size<KeysNullMap<T>>::value;
+    static constexpr bool has_bitmap = bitmap_size > 0;
+
+    if (has_bitmap) {
+        memcpy(bytes + offset, bitmap.data(), bitmap_size * sizeof(UInt8));
+        offset += bitmap_size;
+    }
+
+    for (size_t j = 0; j < keys_size; ++j) {
+        bool is_null;
+
+        if (!has_bitmap)
+            is_null = false;
+        else {
+            size_t bucket = j / 8;
+            size_t off = j % 8;
+            is_null = ((bitmap[bucket] >> off) & 1) == 1;
+        }
+
+        if (is_null) continue;
+
+        switch (key_sizes[j]) {
+        case 1:
+            memcpy(bytes + offset,
+                   static_cast<const ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<1>() +
+                           i,
+                   1);
+            offset += 1;
+            break;
+        case 2:
+            memcpy(bytes + offset,
+                   static_cast<const ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<2>() +
+                           i * 2,
+                   2);
+            offset += 2;
+            break;
+        case 4:
+            memcpy(bytes + offset,
+                   static_cast<const ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<4>() +
+                           i * 4,
+                   4);
+            offset += 4;
+            break;
+        case 8:
+            memcpy(bytes + offset,
+                   static_cast<const ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<8>() +
+                           i * 8,
+                   8);
+            offset += 8;
+            break;
+        default:
+            memcpy(bytes + offset,
+                   static_cast<const ColumnVectorHelper*>(key_columns[j])->get_raw_data_begin<1>() +
+                           i * key_sizes[j],
+                   key_sizes[j]);
+            offset += key_sizes[j];
+        }
+    }
+
+    return key;
+}
+
+/// Hash a set of keys into a UInt128 value.
+static inline UInt128 ALWAYS_INLINE hash128(size_t i, size_t keys_size,
+                                            const ColumnRawPtrs& key_columns) {
+    UInt128 key;
+    SipHash hash;
+
+    for (size_t j = 0; j < keys_size; ++j) key_columns[j]->update_hash_with_value(i, hash);
+
+    hash.get128(key.low, key.high);
+
+    return key;
+}
+
+/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first.
+static inline StringRef* ALWAYS_INLINE place_keys_in_pool(size_t keys_size, StringRefs& keys,
+                                                          Arena& pool) {
+    for (size_t j = 0; j < keys_size; ++j) {
+        char* place = pool.alloc(keys[j].size);
+        memcpy_small_allow_read_write_overflow15(place, keys[j].data, keys[j].size);
+        keys[j].data = place;
+    }
+
+    /// Place the StringRefs on the newly copied keys in the pool.
+    char* res = pool.aligned_alloc(keys_size * sizeof(StringRef), alignof(StringRef));
+    memcpy_small_allow_read_write_overflow15(res, keys.data(), keys_size * sizeof(StringRef));
+
+    return reinterpret_cast<StringRef*>(res);
+}
+
+/** Serialize keys into a continuous chunk of memory.
+  */
+static inline StringRef ALWAYS_INLINE serialize_keys_to_pool_contiguous(
+        size_t i, size_t keys_size, const ColumnRawPtrs& key_columns, Arena& pool) {
+    const char* begin = nullptr;
+
+    size_t sum_size = 0;
+    for (size_t j = 0; j < keys_size; ++j)
+        sum_size += key_columns[j]->serialize_value_into_arena(i, pool, begin).size;
+
+    return {begin, sum_size};
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/common/allocator.h b/be/src/vec/common/allocator.h
new file mode 100644
index 0000000000..2a50dabf5c
--- /dev/null
+++ b/be/src/vec/common/allocator.h
@@ -0,0 +1,312 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Allocator.h
+// and modified by Doris
+
+#pragma once
+
+// TODO: Tracker
+// TODO: Readable
+
+#include <fmt/format.h>
+#include <string.h>
+
+#include <exception>
+
+#include "common/status.h"
+
+#ifdef NDEBUG
+#define ALLOCATOR_ASLR 0
+#else
+#define ALLOCATOR_ASLR 1
+#endif
+
+#if !defined(__APPLE__) && !defined(__FreeBSD__)
+#include <malloc.h>
+#endif
+
+#include <sys/mman.h>
+
+#include <algorithm>
+#include <cstdlib>
+
+#include "common/compiler_util.h"
+#ifdef THREAD_SANITIZER
+/// Thread sanitizer does not intercept mremap. The usage of mremap will lead to false positives.
+#define DISABLE_MREMAP 1
+#endif
+#include "vec/common/allocator_fwd.h"
+#include "vec/common/exception.h"
+#include "vec/common/mremap.h"
+
+/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+/**
+  * Many modern allocators (for example, tcmalloc) do not do a mremap for
+  * realloc, even in case of large enough chunks of memory. Although this allows
+  * you to increase performance and reduce memory consumption during realloc.
+  * To fix this, we do mremap manually if the chunk of memory is large enough.
+  * The threshold (64 MB) is chosen quite large, since changing the address
+  * space is very slow, especially in the case of a large number of threads. We
+  * expect that the set of operations mmap/something to do/mremap can only be
+  * performed about 1000 times per second.
+  *
+  * P.S. This is also required, because tcmalloc can not allocate a chunk of
+  * memory greater than 16 GB.
+  */
+#ifdef NDEBUG
+static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
+#else
+/**
+      * In debug build, use small mmap threshold to reproduce more memory
+      * stomping bugs. Along with ASLR it will hopefully detect more issues than
+      * ASan. The program may fail due to the limit on number of memory mappings.
+      */
+static constexpr size_t MMAP_THRESHOLD = 4096;
+#endif
+
+static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
+static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
+
+/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
+  * Also used in hash tables.
+  * The interface is different from std::allocator
+  * - the presence of the method realloc, which for large chunks of memory uses mremap;
+  * - passing the size into the `free` method;
+  * - by the presence of the `alignment` argument;
+  * - the possibility of zeroing memory (used in hash tables);
+  * - random hint address for mmap
+  * - mmap_threshold for using mmap less or more
+  */
+template <bool clear_memory_, bool mmap_populate>
+class Allocator {
+public:
+    /// Allocate memory range.
+    void* alloc(size_t size, size_t alignment = 0) {
+        return alloc_no_track(size, alignment);
+    }
+
+    /// Free memory range.
+    void free(void* buf, size_t size) {
+        free_no_track(buf, size);
+        // CurrentMemoryTracker::free(size);
+    }
+
+    /** Enlarge memory range.
+      * Data from old range is moved to the beginning of new range.
+      * Address of memory range could change.
+      */
+    void* realloc(void* buf, size_t old_size, size_t new_size, size_t alignment = 0) {
+        if (old_size == new_size) {
+            /// nothing to do.
+            /// BTW, it's not possible to change alignment while doing realloc.
+        } else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD &&
+                   alignment <= MALLOC_MIN_ALIGNMENT) {
+            /// Resize malloc'd memory region with no special alignment requirement.
+            // CurrentMemoryTracker::realloc(old_size, new_size);
+
+            void* new_buf = ::realloc(buf, new_size);
+            if (nullptr == new_buf)
+                doris::vectorized::throwFromErrno("Allocator: Cannot realloc from " +
+                                                          std::to_string(old_size) + " to " +
+                                                          std::to_string(new_size) + ".",
+                                                  doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
+
+            buf = new_buf;
+            if constexpr (clear_memory)
+                if (new_size > old_size)
+                    memset(reinterpret_cast<char*>(buf) + old_size, 0, new_size - old_size);
+        } else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD) {
+            /// Resize mmap'd memory region.
+            // CurrentMemoryTracker::realloc(old_size, new_size);
+
+            // On apple and freebsd self-implemented mremap used (common/mremap.h)
+            buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE,
+                                    mmap_flags, -1, 0);
+            if (MAP_FAILED == buf)
+                doris::vectorized::throwFromErrno("Allocator: Cannot mremap memory chunk from " +
+                                                          std::to_string(old_size) + " to " +
+                                                          std::to_string(new_size) + ".",
+                                                  doris::TStatusCode::VEC_CANNOT_MREMAP);
+
+            /// No need for zero-fill, because mmap guarantees it.
+        } else if (new_size < MMAP_THRESHOLD) {
+            /// Small allocs that requires a copy. Assume there's enough memory in system. Call CurrentMemoryTracker once.
+            // CurrentMemoryTracker::realloc(old_size, new_size);
+
+            void* new_buf = alloc_no_track(new_size, alignment);
+            memcpy(new_buf, buf, std::min(old_size, new_size));
+            free_no_track(buf, old_size);
+            buf = new_buf;
+        } else {
+            /// Big allocs that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
+            void* new_buf = alloc(new_size, alignment);
+            memcpy(new_buf, buf, std::min(old_size, new_size));
+            free(buf, old_size);
+            buf = new_buf;
+        }
+
+        return buf;
+    }
+
+protected:
+    static constexpr size_t get_stack_threshold() { return 0; }
+
+    static constexpr bool clear_memory = clear_memory_;
+
+    // Freshly mmapped pages are copy-on-write references to a global zero page.
+    // On the first write, a page fault occurs, and an actual writable page is
+    // allocated. If we are going to use this memory soon, such as when resizing
+    // hash tables, it makes sense to pre-fault the pages by passing
+    // MAP_POPULATE to mmap(). This takes some time, but should be faster
+    // overall than having a hot loop interrupted by page faults.
+    // It is only supported on Linux.
+    static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
+#if defined(OS_LINUX)
+                                      | (mmap_populate ? MAP_POPULATE : 0)
+#endif
+            ;
+
+private:
+    void* alloc_no_track(size_t size, size_t alignment) {
+        void* buf;
+
+        if (size >= MMAP_THRESHOLD) {
+            if (alignment > MMAP_MIN_ALIGNMENT)
+                throw doris::vectorized::Exception(
+                        fmt::format(
+                                "Too large alignment {}: more than page size when allocating {}.",
+                                alignment, size),
+                        doris::TStatusCode::VEC_BAD_ARGUMENTS);
+
+            buf = mmap(get_mmap_hint(), size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+            if (MAP_FAILED == buf)
+                doris::vectorized::throwFromErrno(fmt::format("Allocator: Cannot mmap {}.", size),
+                                                  doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
+
+            /// No need for zero-fill, because mmap guarantees it.
+        } else {
+            if (alignment <= MALLOC_MIN_ALIGNMENT) {
+                if constexpr (clear_memory)
+                    buf = ::calloc(size, 1);
+                else
+                    buf = ::malloc(size);
+
+                if (nullptr == buf)
+                    doris::vectorized::throwFromErrno(
+                            fmt::format("Allocator: Cannot malloc {}.", size),
+                            doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY);
+            } else {
+                buf = nullptr;
+                int res = posix_memalign(&buf, alignment, size);
+
+                if (0 != res)
+                    doris::vectorized::throwFromErrno(
+                            fmt::format("Cannot allocate memory (posix_memalign) {}.", size),
+                            doris::TStatusCode::VEC_CANNOT_ALLOCATE_MEMORY, res);
+
+                if constexpr (clear_memory) memset(buf, 0, size);
+            }
+        }
+        return buf;
+    }
+
+    void free_no_track(void* buf, size_t size) {
+        if (size >= MMAP_THRESHOLD) {
+            if (0 != munmap(buf, size))
+                doris::vectorized::throwFromErrno(fmt::format("Allocator: Cannot munmap {}.", size),
+                                                  doris::TStatusCode::VEC_CANNOT_MUNMAP);
+        } else {
+            ::free(buf);
+        }
+    }
+
+#ifndef NDEBUG
+    /// In debug builds, request mmap() at random addresses (a kind of ASLR), to
+    /// reproduce more memory stomping bugs. Note that Linux doesn't do it by
+    /// default. This may lead to worse TLB performance.
+    void* get_mmap_hint() {
+        // return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(thread_local_rng));
+        return nullptr;
+    }
+#else
+    void* get_mmap_hint() { return nullptr; }
+#endif
+};
+
+/** When using AllocatorWithStackMemory, located on the stack,
+  *  GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.
+  * In fact, the combination of conditions inside AllocatorWithStackMemory does not allow this.
+  */
+#if !__clang__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wfree-nonheap-object"
+#endif
+
+/** Allocator with optimization to place small memory ranges in automatic memory.
+  */
+template <typename Base, size_t N, size_t Alignment>
+class AllocatorWithStackMemory : private Base {
+private:
+    alignas(Alignment) char stack_memory[N];
+
+public:
+    /// Do not use boost::noncopyable to avoid the warning about direct base
+    /// being inaccessible due to ambiguity, when derived classes are also
+    /// noncopiable (-Winaccessible-base).
+    AllocatorWithStackMemory(const AllocatorWithStackMemory&) = delete;
+    AllocatorWithStackMemory& operator=(const AllocatorWithStackMemory&) = delete;
+    AllocatorWithStackMemory() = default;
+    ~AllocatorWithStackMemory() = default;
+
+    void* alloc(size_t size) {
+        if (size <= N) {
+            if constexpr (Base::clear_memory) memset(stack_memory, 0, N);
+            return stack_memory;
+        }
+
+        return Base::alloc(size, Alignment);
+    }
+
+    void free(void* buf, size_t size) {
+        if (size > N) Base::free(buf, size);
+    }
+
+    void* realloc(void* buf, size_t old_size, size_t new_size) {
+        /// Was in stack_memory, will remain there.
+        if (new_size <= N) return buf;
+
+        /// Already was big enough to not fit in stack_memory.
+        if (old_size > N) return Base::realloc(buf, old_size, new_size, Alignment);
+
+        /// Was in stack memory, but now will not fit there.
+        void* new_buf = Base::alloc(new_size, Alignment);
+        memcpy(new_buf, buf, old_size);
+        return new_buf;
+    }
+
+protected:
+    static constexpr size_t get_stack_threshold() { return N; }
+};
+
+#if !__clang__
+#pragma GCC diagnostic pop
+#endif
diff --git a/be/src/vec/common/allocator_fwd.h b/be/src/vec/common/allocator_fwd.h
new file mode 100644
index 0000000000..a92665d12c
--- /dev/null
+++ b/be/src/vec/common/allocator_fwd.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Allocator_fwd.h
+// and modified by Doris
+
+/**
+  * This file provides forward declarations for Allocator.
+  */
+#pragma once
+
+template <bool clear_memory_, bool mmap_populate = false>
+class Allocator;
+
+template <typename Base, size_t N = 64, size_t Alignment = 1>
+class AllocatorWithStackMemory;
diff --git a/be/src/vec/common/arena.h b/be/src/vec/common/arena.h
new file mode 100644
index 0000000000..c46d790a82
--- /dev/null
+++ b/be/src/vec/common/arena.h
@@ -0,0 +1,285 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Arena.h
+// and modified by Doris
+
+#pragma once
+
+#include <common/compiler_util.h>
+#include <string.h>
+
+#include <boost/noncopyable.hpp>
+#include <memory>
+#include <vector>
+#if __has_include(<sanitizer/asan_interface.h>)
+#include <sanitizer/asan_interface.h>
+#endif
+#include "vec/common/allocator.h"
+#include "vec/common/memcpy_small.h"
+
+#include "gutil/dynamic_annotations.h"
+
+
+namespace doris::vectorized {
+
+/** Memory pool to append something. For example, short strings.
+  * Usage scenario:
+  * - put lot of strings inside pool, keep their addresses;
+  * - addresses remain valid during lifetime of pool;
+  * - at destruction of pool, all memory is freed;
+  * - memory is allocated and freed by large chunks;
+  * - freeing parts of data is not possible (but look at ArenaWithFreeLists if you need);
+  */
+class Arena : private boost::noncopyable {
+private:
+    /// Padding allows to use 'memcpy_small_allow_read_write_overflow15' instead of 'memcpy'.
+    static constexpr size_t pad_right = 15;
+
+    /// Contiguous chunk of memory and pointer to free space inside it. Member of single-linked list.
+    struct alignas(16) Chunk : private Allocator<false> /// empty base optimization
+    {
+        char* begin;
+        char* pos;
+        char* end; /// does not include padding.
+
+        Chunk* prev;
+
+        Chunk(size_t size_, Chunk* prev_) {
+
+            begin = reinterpret_cast<char*>(Allocator<false>::alloc(size_));
+            pos = begin;
+            end = begin + size_ - pad_right;
+            prev = prev_;
+
+            ASAN_POISON_MEMORY_REGION(begin, size_);
+        }
+
+        ~Chunk() {
+            /// We must unpoison the memory before returning to the allocator,
+            /// because the allocator might not have asan integration, and the
+            /// memory would stay poisoned forever. If the allocator supports
+            /// asan, it will correctly poison the memory by itself.
+            ASAN_UNPOISON_MEMORY_REGION(begin, size());
+
+            Allocator<false>::free(begin, size());
+
+            if (prev) delete prev;
+        }
+
+        size_t size() const { return end + pad_right - begin; }
+        size_t remaining() const { return end - pos; }
+    };
+
+    size_t growth_factor;
+    size_t linear_growth_threshold;
+
+    /// Last contiguous chunk of memory.
+    Chunk* head;
+    size_t size_in_bytes;
+
+    static size_t round_up_to_page_size(size_t s) { return (s + 4096 - 1) / 4096 * 4096; }
+
+    /// If chunks size is less than 'linear_growth_threshold', then use exponential growth, otherwise - linear growth
+    ///  (to not allocate too much excessive memory).
+    size_t next_size(size_t min_next_size) const {
+        size_t size_after_grow = 0;
+
+        if (head->size() < linear_growth_threshold) {
+            size_after_grow = std::max(min_next_size, head->size() * growth_factor);
+        } else {
+            // alloc_continue() combined with linear growth results in quadratic
+            // behavior: we append the data by small amounts, and when it
+            // doesn't fit, we create a new chunk and copy all the previous data
+            // into it. The number of times we do this is directly proportional
+            // to the total size of data that is going to be serialized. To make
+            // the copying happen less often, round the next size up to the
+            // linear_growth_threshold.
+            size_after_grow =
+                    ((min_next_size + linear_growth_threshold - 1) / linear_growth_threshold) *
+                    linear_growth_threshold;
+        }
+
+        assert(size_after_grow >= min_next_size);
+        return round_up_to_page_size(size_after_grow);
+    }
+
+    /// Add next contiguous chunk of memory with size not less than specified.
+    void NO_INLINE add_chunk(size_t min_size) {
+        head = new Chunk(next_size(min_size + pad_right), head);
+        size_in_bytes += head->size();
+    }
+
+    friend class ArenaAllocator;
+    template <size_t>
+    friend class AlignedArenaAllocator;
+
+public:
+    Arena(size_t initial_size_ = 4096, size_t growth_factor_ = 2,
+          size_t linear_growth_threshold_ = 128 * 1024 * 1024)
+            : growth_factor(growth_factor_),
+              linear_growth_threshold(linear_growth_threshold_),
+              head(new Chunk(initial_size_, nullptr)),
+              size_in_bytes(head->size()) {}
+
+    ~Arena() { delete head; }
+
+    /// Get piece of memory, without alignment.
+    char* alloc(size_t size) {
+        if (UNLIKELY(head->pos + size > head->end)) add_chunk(size);
+
+        char* res = head->pos;
+        head->pos += size;
+        ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
+        return res;
+    }
+
+    /// Get peice of memory with alignment
+    char* aligned_alloc(size_t size, size_t alignment) {
+        do {
+            void* head_pos = head->pos;
+            size_t space = head->end - head->pos;
+
+            auto res = static_cast<char*>(std::align(alignment, size, head_pos, space));
+            if (res) {
+                head->pos = static_cast<char*>(head_pos);
+                head->pos += size;
+                ASAN_UNPOISON_MEMORY_REGION(res, size + pad_right);
+                return res;
+            }
+
+            add_chunk(size + alignment);
+        } while (true);
+    }
+
+    template <typename T>
+    T* alloc() {
+        return reinterpret_cast<T*>(aligned_alloc(sizeof(T), alignof(T)));
+    }
+
+    /** Rollback just performed allocation.
+      * Must pass size not more that was just allocated.
+	  * Return the resulting head pointer, so that the caller can assert that
+	  * the allocation it intended to roll back was indeed the last one.
+      */
+    void* rollback(size_t size) {
+        head->pos -= size;
+        ASAN_POISON_MEMORY_REGION(head->pos, size + pad_right);
+        return head->pos;
+    }
+
+    /** Begin or expand a contiguous range of memory.
+      * 'range_start' is the start of range. If nullptr, a new range is
+      * allocated.
+      * If there is no space in the current chunk to expand the range,
+      * the entire range is copied to a new, bigger memory chunk, and the value
+      * of 'range_start' is updated.
+      * If the optional 'start_alignment' is specified, the start of range is
+      * kept aligned to this value.
+      *
+      * NOTE This method is usable only for the last allocation made on this
+      * Arena. For earlier allocations, see 'realloc' method.
+      */
+    char* alloc_continue(size_t additional_bytes, char const*& range_start,
+                         size_t start_alignment = 0) {
+        if (!range_start) {
+            // Start a new memory range.
+            char* result = start_alignment ? aligned_alloc(additional_bytes, start_alignment)
+                                           : alloc(additional_bytes);
+
+            range_start = result;
+            return result;
+        }
+
+        // Extend an existing memory range with 'additional_bytes'.
+
+        // This method only works for extending the last allocation. For lack of
+        // original size, check a weaker condition: that 'begin' is at least in
+        // the current Chunk.
+        assert(range_start >= head->begin && range_start < head->end);
+
+        if (head->pos + additional_bytes <= head->end) {
+            // The new size fits into the last chunk, so just alloc the
+            // additional size. We can alloc without alignment here, because it
+            // only applies to the start of the range, and we don't change it.
+            return alloc(additional_bytes);
+        }
+
+        // New range doesn't fit into this chunk, will copy to a new one.
+        //
+        // Note: among other things, this method is used to provide a hack-ish
+        // implementation of realloc over Arenas in ArenaAllocators. It wastes a
+        // lot of memory -- quadratically so when we reach the linear allocation
+        // threshold. This deficiency is intentionally left as is, and should be
+        // solved not by complicating this method, but by rethinking the
+        // approach to memory management for aggregate function states, so that
+        // we can provide a proper realloc().
+        const size_t existing_bytes = head->pos - range_start;
+        const size_t new_bytes = existing_bytes + additional_bytes;
+        const char* old_range = range_start;
+
+        char* new_range =
+                start_alignment ? aligned_alloc(new_bytes, start_alignment) : alloc(new_bytes);
+
+        memcpy(new_range, old_range, existing_bytes);
+
+        range_start = new_range;
+        return new_range + existing_bytes;
+    }
+
+    /// NOTE Old memory region is wasted.
+    char* realloc(const char* old_data, size_t old_size, size_t new_size) {
+        char* res = alloc(new_size);
+        if (old_data) {
+            memcpy(res, old_data, old_size);
+            ASAN_POISON_MEMORY_REGION(old_data, old_size);
+        }
+        return res;
+    }
+
+    char* aligned_realloc(const char* old_data, size_t old_size, size_t new_size, size_t alignment) {
+        char* res = aligned_alloc(new_size, alignment);
+        if (old_data) {
+            memcpy(res, old_data, old_size);
+            ASAN_POISON_MEMORY_REGION(old_data, old_size);
+        }
+        return res;
+    }
+
+    /// Insert string without alignment.
+    const char* insert(const char* data, size_t size) {
+        char* res = alloc(size);
+        memcpy(res, data, size);
+        return res;
+    }
+
+    const char* aligned_insert(const char* data, size_t size, size_t alignment) {
+        char* res = aligned_alloc(size, alignment);
+        memcpy(res, data, size);
+        return res;
+    }
+
+    /// Size of chunks in bytes.
+    size_t size() const { return size_in_bytes; }
+
+    size_t remaining_space_in_current_chunk() const { return head->remaining(); }
+};
+
+using ArenaPtr = std::shared_ptr<Arena>;
+using Arenas = std::vector<ArenaPtr>;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/common/arithmetic_overflow.h b/be/src/vec/common/arithmetic_overflow.h
new file mode 100644
index 0000000000..0d0828a61b
--- /dev/null
+++ b/be/src/vec/common/arithmetic_overflow.h
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/arithmeticOverflow.h
+// and modified by Doris
+
+#pragma once
+
+namespace common {
+template <typename T>
+inline bool add_overflow(T x, T y, T& res) {
+    return __builtin_add_overflow(x, y, &res);
+}
+
+template <>
+inline bool add_overflow(int x, int y, int& res) {
+    return __builtin_sadd_overflow(x, y, &res);
+}
+
+template <>
+inline bool add_overflow(long x, long y, long& res) {
+    return __builtin_saddl_overflow(x, y, &res);
+}
+
+template <>
+inline bool add_overflow(long long x, long long y, long long& res) {
+    return __builtin_saddll_overflow(x, y, &res);
+}
+
+template <>
+inline bool add_overflow(__int128 x, __int128 y, __int128& res) {
+    static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64;
+    static constexpr __int128 max_int128 =
+            (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll;
+    res = x + y;
+    return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y);
+}
+
+template <typename T>
+inline bool sub_overflow(T x, T y, T& res) {
+    return __builtin_sub_overflow(x, y, &res);
+}
+
+template <>
+inline bool sub_overflow(int x, int y, int& res) {
+    return __builtin_ssub_overflow(x, y, &res);
+}
+
+template <>
+inline bool sub_overflow(long x, long y, long& res) {
+    return __builtin_ssubl_overflow(x, y, &res);
+}
+
+template <>
+inline bool sub_overflow(long long x, long long y, long long& res) {
+    return __builtin_ssubll_overflow(x, y, &res);
+}
+
+template <>
+inline bool sub_overflow(__int128 x, __int128 y, __int128& res) {
+    static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64;
+    static constexpr __int128 max_int128 =
+            (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll;
+    res = x - y;
+    return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y);
+}
+
+template <typename T>
+inline bool mul_overflow(T x, T y, T& res) {
+    return __builtin_mul_overflow(x, y, &res);
+}
+
+template <>
+inline bool mul_overflow(int x, int y, int& res) {
+    return __builtin_smul_overflow(x, y, &res);
+}
+
+template <>
+inline bool mul_overflow(long x, long y, long& res) {
+    return __builtin_smull_overflow(x, y, &res);
+}
+
+template <>
+inline bool mul_overflow(long long x, long long y, long long& res) {
+    return __builtin_smulll_overflow(x, y, &res);
+}
+
+template <>
+inline bool mul_overflow(__int128 x, __int128 y, __int128& res) {
+    res = static_cast<unsigned __int128>(x) *
+          static_cast<unsigned __int128>(y); /// Avoid signed integer overflow.
+    if (!x || !y) return false;
+
+    unsigned __int128 a = (x > 0) ? x : -x;
+    unsigned __int128 b = (y > 0) ? y : -y;
+    return (a * b) / b != a;
+}
+} // namespace common
diff --git a/be/src/vec/common/assert_cast.h b/be/src/vec/common/assert_cast.h
new file mode 100644
index 0000000000..0c3d32ec6f
--- /dev/null
+++ b/be/src/vec/common/assert_cast.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/assert_cast.h
+// and modified by Doris
+
+#pragma once
+
+#include <string>
+#include <type_traits>
+#include <typeindex>
+#include <typeinfo>
+
+#include "common/logging.h"
+#include "fmt/format.h"
+#include "vec/common/demangle.h"
+#include "vec/common/exception.h"
+
+/** Perform static_cast in release build.
+  * Checks type by comparing typeid and throw an exception in debug build.
+  * The exact match of the type is checked. That is, cast to the ancestor will be unsuccessful.
+  */
+template <typename To, typename From>
+To assert_cast(From&& from) {
+#ifndef NDEBUG
+    try {
+        if constexpr (std::is_pointer_v<To>) {
+            if (typeid(*from) == typeid(std::remove_pointer_t<To>)) return static_cast<To>(from);
+        } else {
+            if (typeid(from) == typeid(To)) return static_cast<To>(from);
+        }
+    } catch (const std::exception& e) {
+        LOG(FATAL) << "assert cast err:" << e.what();
+    }
+
+    LOG(FATAL) << fmt::format("Bad cast from type:{} to {}", demangle(typeid(from).name()),
+                              demangle(typeid(To).name()));
+    __builtin_unreachable();
+#else
+    return static_cast<To>(from);
+#endif
+}
diff --git a/be/src/vec/common/bit_cast.h b/be/src/vec/common/bit_cast.h
new file mode 100644
index 0000000000..a31fa7a6e8
--- /dev/null
+++ b/be/src/vec/common/bit_cast.h
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/bit_cast.h
+// and modified by Doris
+
+#pragma once
+
+#include <string.h>
+
+#include <algorithm>
+#include <type_traits>
+
+namespace ext {
+/** \brief Returns value `from` converted to type `To` while retaining bit representation.
+      *    `To` and `From` must satisfy `CopyConstructible`.
+      */
+template <typename To, typename From>
+std::decay_t<To> bit_cast(const From& from) {
+    To res {};
+    memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
+    return res;
+}
+
+/** \brief Returns value `from` converted to type `To` while retaining bit representation.
+      *    `To` and `From` must satisfy `CopyConstructible`.
+      */
+template <typename To, typename From>
+std::decay_t<To> safe_bit_cast(const From& from) {
+    static_assert(sizeof(To) == sizeof(From), "bit cast on types of different width");
+    return bit_cast<To, From>(from);
+}
+} // namespace ext
diff --git a/be/src/vec/common/bit_helpers.h b/be/src/vec/common/bit_helpers.h
new file mode 100644
index 0000000000..c2c480637a
--- /dev/null
+++ b/be/src/vec/common/bit_helpers.h
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/BitHelpers.h
+// and modified by Doris
+
+#pragma once
+
+#include <cstddef>
+#include <type_traits>
+
+/** Returns log2 of number, rounded down.
+  * Compiles to single 'bsr' instruction on x86.
+  * For zero argument, result is unspecified.
+  */
+inline unsigned int bit_scan_reverse(unsigned int x) {
+    return sizeof(unsigned int) * 8 - 1 - __builtin_clz(x);
+}
+
+/** For zero argument, result is zero.
+  * For arguments with most significand bit set, result is zero.
+  * For other arguments, returns value, rounded up to power of two.
+  */
+inline size_t round_up_to_power_of_two_or_zero(size_t n) {
+    --n;
+    n |= n >> 1;
+    n |= n >> 2;
+    n |= n >> 4;
+    n |= n >> 8;
+    n |= n >> 16;
+    n |= n >> 32;
+    ++n;
+
+    return n;
+}
+
+template <typename T>
+inline size_t get_leading_zero_bits(T x) {
+    if (!x) return sizeof(x) * 8;
+
+    if constexpr (sizeof(T) <= sizeof(unsigned int)) {
+        return __builtin_clz(x);
+    } else if constexpr (sizeof(T) <= sizeof(unsigned long int)) {
+        return __builtin_clzl(x);
+    } else {
+        return __builtin_clzll(x);
+    }
+}
+
+template <typename T>
+inline size_t get_trailing_zero_bits(T x) {
+    if (!x) return sizeof(x) * 8;
+
+    if constexpr (sizeof(T) <= sizeof(unsigned int)) {
+        return __builtin_ctz(x);
+    } else if constexpr (sizeof(T) <= sizeof(unsigned long int)) {
+        return __builtin_ctzl(x);
+    } else {
+        return __builtin_ctzll(x);
+    }
+}
+
+/** Returns a mask that has '1' for `bits` LSB set:
+ * mask_low_bits<UInt8>(3) => 00000111
+ */
+template <typename T>
+inline T mask_low_bits(unsigned char bits) {
+    if (bits == 0) {
+        return 0;
+    }
+
+    T result = static_cast<T>(~T {0});
+    if (bits < sizeof(T) * 8) {
+        result = static_cast<T>(result >> (sizeof(T) * 8 - bits));
+    }
+
+    return result;
+}
diff --git a/be/src/vec/common/columns_hashing.h b/be/src/vec/common/columns_hashing.h
new file mode 100644
index 0000000000..26bc0d5c92
--- /dev/null
+++ b/be/src/vec/common/columns_hashing.h
@@ -0,0 +1,242 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ColumnsHashing.h
+// and modified by Doris
+
+#pragma once
+
+#include <memory>
+
+#include "vec/columns/column_string.h"
+#include "vec/common/arena.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/columns_hashing_impl.h"
+#include "vec/common/hash_table/hash_table.h"
+#include "vec/common/hash_table/hash_table_key_holder.h"
+#include "vec/common/unaligned.h"
+
+namespace doris::vectorized {
+
+namespace ColumnsHashing {
+
+/// For the case when there is one numeric key.
+/// UInt8/16/32/64 for any type with corresponding bit width.
+template <typename Value, typename Mapped, typename FieldType, bool use_cache = true>
+struct HashMethodOneNumber : public columns_hashing_impl::HashMethodBase<
+                                     HashMethodOneNumber<Value, Mapped, FieldType, use_cache>,
+                                     Value, Mapped, use_cache> {
+    using Self = HashMethodOneNumber<Value, Mapped, FieldType, use_cache>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+
+    const char* vec;
+
+    /// If the keys of a fixed length then key_sizes contains their lengths, empty otherwise.
+    HashMethodOneNumber(const ColumnRawPtrs& key_columns, const Sizes& /*key_sizes*/,
+                        const HashMethodContextPtr&) {
+        vec = key_columns[0]->get_raw_data().data;
+    }
+
+    HashMethodOneNumber(const IColumn* column) { vec = column->get_raw_data().data; }
+
+    /// Creates context. Method is called once and result context is used in all threads.
+    using Base::createContext; /// (const HashMethodContext::Settings &) -> HashMethodContextPtr
+
+    /// Emplace key into HashTable or HashMap. If Data is HashMap, returns ptr to value, otherwise nullptr.
+    /// Data is a HashTable where to insert key from column's row.
+    /// For Serialized method, key may be placed in pool.
+    using Base::emplace_key; /// (Data & data, size_t row, Arena & pool) -> EmplaceResult
+
+    /// Find key into HashTable or HashMap. If Data is HashMap and key was found, returns ptr to value, otherwise nullptr.
+    using Base::find_key; /// (Data & data, size_t row, Arena & pool) -> FindResult
+
+    /// Get hash value of row.
+    using Base::get_hash; /// (const Data & data, size_t row, Arena & pool) -> size_t
+
+    /// Is used for default implementation in HashMethodBase.
+    FieldType get_key_holder(size_t row, Arena&) const {
+        return unaligned_load<FieldType>(vec + row * sizeof(FieldType));
+    }
+};
+
+/// For the case when there is one string key.
+template <typename Value, typename Mapped, bool place_string_to_arena = true, bool use_cache = true>
+struct HashMethodString : public columns_hashing_impl::HashMethodBase<
+                                  HashMethodString<Value, Mapped, place_string_to_arena, use_cache>,
+                                  Value, Mapped, use_cache> {
+    using Self = HashMethodString<Value, Mapped, place_string_to_arena, use_cache>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+
+    const IColumn::Offset* offsets;
+    const UInt8* chars;
+
+    HashMethodString(const ColumnRawPtrs& key_columns, const Sizes& /*key_sizes*/,
+                     const HashMethodContextPtr&) {
+        const IColumn& column = *key_columns[0];
+        const ColumnString& column_string = assert_cast<const ColumnString&>(column);
+        offsets = column_string.get_offsets().data();
+        chars = column_string.get_chars().data();
+    }
+
+    auto get_key_holder(ssize_t row, [[maybe_unused]] Arena& pool) const {
+        StringRef key(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
+
+        if constexpr (place_string_to_arena) {
+            return ArenaKeyHolder {key, pool};
+        } else {
+            return key;
+        }
+    }
+
+protected:
+    friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+};
+
+/** Hash by concatenating serialized key values.
+  * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts.
+  * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes.
+  * Therefore, when aggregating by several strings, there is no ambiguity.
+  */
+template <typename Value, typename Mapped>
+struct HashMethodSerialized
+        : public columns_hashing_impl::HashMethodBase<HashMethodSerialized<Value, Mapped>, Value,
+                                                      Mapped, false> {
+    using Self = HashMethodSerialized<Value, Mapped>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
+
+    ColumnRawPtrs key_columns;
+    size_t keys_size;
+
+    HashMethodSerialized(const ColumnRawPtrs& key_columns_, const Sizes& /*key_sizes*/,
+                         const HashMethodContextPtr&)
+            : key_columns(key_columns_), keys_size(key_columns_.size()) {}
+
+protected:
+    friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, false>;
+
+    ALWAYS_INLINE SerializedKeyHolder get_key_holder(size_t row, Arena& pool) const {
+        return SerializedKeyHolder {
+                serialize_keys_to_pool_contiguous(row, keys_size, key_columns, pool), pool};
+    }
+};
+
+/// For the case when there is one string key.
+template <typename Value, typename Mapped, bool use_cache = true>
+struct HashMethodHashed
+        : public columns_hashing_impl::HashMethodBase<HashMethodHashed<Value, Mapped, use_cache>,
+                                                      Value, Mapped, use_cache> {
+    using Key = UInt128;
+    using Self = HashMethodHashed<Value, Mapped, use_cache>;
+    using Base = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+
+    ColumnRawPtrs key_columns;
+
+    HashMethodHashed(ColumnRawPtrs key_columns_, const Sizes&, const HashMethodContextPtr&)
+            : key_columns(std::move(key_columns_)) {}
+
+    ALWAYS_INLINE Key get_key_holder(size_t row, Arena&) const {
+        return hash128(row, key_columns.size(), key_columns);
+    }
+};
+
+/// For the case when all keys are of fixed length, and they fit in N (for example, 128) bits.
+template <typename Value, typename Key, typename Mapped, bool has_nullable_keys_ = false,
+          bool use_cache = true>
+struct HashMethodKeysFixed
+        : private columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>,
+          public columns_hashing_impl::HashMethodBase<
+                  HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, use_cache>, Value,
+                  Mapped, use_cache> {
+    using Self = HashMethodKeysFixed<Value, Key, Mapped, has_nullable_keys_, use_cache>;
+    using BaseHashed = columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
+    using Base = columns_hashing_impl::BaseStateKeysFixed<Key, has_nullable_keys_>;
+
+    const Sizes& key_sizes;
+    size_t keys_size;
+
+    HashMethodKeysFixed(const ColumnRawPtrs& key_columns, const Sizes& key_sizes_,
+                        const HashMethodContextPtr&)
+            : Base(key_columns), key_sizes(key_sizes_), keys_size(key_columns.size()) {}
+
+    ALWAYS_INLINE Key get_key_holder(size_t row, Arena&) const {
+        if constexpr (has_nullable_keys_) {
+            auto bitmap = Base::create_bitmap(row);
+            return pack_fixed<Key>(row, keys_size, Base::get_actual_columns(), key_sizes, bitmap);
+        } else {
+            return pack_fixed<Key>(row, keys_size, Base::get_actual_columns(), key_sizes);
+        }
+    }
+};
+
+template <typename SingleColumnMethod, typename Mapped, bool use_cache>
+struct HashMethodSingleLowNullableColumn : public SingleColumnMethod {
+    using Base = SingleColumnMethod;
+
+    static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
+    using EmplaceResult = columns_hashing_impl::EmplaceResultImpl<Mapped>;
+    using FindResult = columns_hashing_impl::FindResultImpl<Mapped>;
+
+    static HashMethodContextPtr createContext(const HashMethodContext::Settings & settings) {
+        return nullptr;
+    }
+
+    ColumnRawPtrs key_columns;
+
+    static const ColumnRawPtrs get_nested_column(const IColumn *col) {
+        auto* nullable = check_and_get_column<ColumnNullable>(*col);
+        DCHECK(nullable != nullptr);
+        const auto nested_col = nullable->get_nested_column_ptr().get();
+        return {nested_col};
+    }
+
+    HashMethodSingleLowNullableColumn(
+            const ColumnRawPtrs & key_columns_nullable, const Sizes & key_sizes, const HashMethodContextPtr & context)
+        : Base(get_nested_column(key_columns_nullable[0]), key_sizes, context), key_columns(key_columns_nullable) {
+    }
+
+    template <typename Data>
+    ALWAYS_INLINE EmplaceResult emplace_key(Data & data, size_t row, Arena & pool) {
+        if (key_columns[0]->is_null_at(row)) {
+            bool has_null_key = data.has_null_key_data();
+            data.has_null_key_data() = true;
+
+            if constexpr (has_mapped)
+                return EmplaceResult(data.get_null_key_data(), data.get_null_key_data(), !has_null_key);
+            else
+                return EmplaceResult(!has_null_key);
+        }
+
+        auto key_holder = Base::get_key_holder(row, pool);
+
+        bool inserted = false;
+        typename Data::LookupResult it;
+        data.emplace(key_holder, it, inserted);
+
+        if constexpr (has_mapped) {
+            auto & mapped = *lookup_result_get_mapped(it);
+            if (inserted) {
+                new (&mapped) Mapped();
+            }
+            return EmplaceResult(mapped, mapped, inserted);
+        }
+        else
+            return EmplaceResult(inserted);
+    }
+};
+
+} // namespace ColumnsHashing
+} // namespace doris::vectorized
diff --git a/be/src/vec/common/columns_hashing_impl.h b/be/src/vec/common/columns_hashing_impl.h
new file mode 100644
index 0000000000..18d02b9a2e
--- /dev/null
+++ b/be/src/vec/common/columns_hashing_impl.h
@@ -0,0 +1,324 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ColumnsHashingImpl.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/aggregation_common.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/hash_table/hash_table_key_holder.h"
+// #include <Interpreters/AggregationCommon.h>
+
+namespace doris::vectorized {
+
+namespace ColumnsHashing {
+
+/// Generic context for HashMethod. Context is shared between multiple threads, all methods must be thread-safe.
+/// Is used for caching.
+class HashMethodContext {
+public:
+    virtual ~HashMethodContext() = default;
+
+    struct Settings {
+        size_t max_threads;
+    };
+};
+
+using HashMethodContextPtr = std::shared_ptr<HashMethodContext>;
+
+namespace columns_hashing_impl {
+
+template <typename Value, bool consecutive_keys_optimization_>
+struct LastElementCache {
+    static constexpr bool consecutive_keys_optimization = consecutive_keys_optimization_;
+    Value value;
+    bool empty = true;
+    bool found = false;
+
+    bool check(const Value& value_) { return !empty && value == value_; }
+
+    template <typename Key>
+    bool check(const Key& key) {
+        return !empty && value.first == key;
+    }
+};
+
+template <typename Data>
+struct LastElementCache<Data, false> {
+    static constexpr bool consecutive_keys_optimization = false;
+};
+
+template <typename Mapped>
+class EmplaceResultImpl {
+    Mapped& value;
+    Mapped& cached_value;
+    bool inserted;
+
+public:
+    EmplaceResultImpl(Mapped& value_, Mapped& cached_value_, bool inserted_)
+            : value(value_), cached_value(cached_value_), inserted(inserted_) {}
+
+    bool is_inserted() const { return inserted; }
+    auto& get_mapped() const { return value; }
+
+    void set_mapped(const Mapped& mapped) {
+        cached_value = mapped;
+        value = mapped;
+    }
+};
+
+template <>
+class EmplaceResultImpl<void> {
+    bool inserted;
+
+public:
+    explicit EmplaceResultImpl(bool inserted_) : inserted(inserted_) {}
+    bool is_inserted() const { return inserted; }
+};
+
+template <typename Mapped>
+class FindResultImpl {
+    Mapped* value;
+    bool found;
+
+public:
+    FindResultImpl(Mapped* value_, bool found_) : value(value_), found(found_) {}
+    bool is_found() const { return found; }
+    Mapped& get_mapped() const { return *value; }
+};
+
+template <>
+class FindResultImpl<void> {
+    bool found;
+
+public:
+    explicit FindResultImpl(bool found_) : found(found_) {}
+    bool is_found() const { return found; }
+};
+
+template <typename Derived, typename Value, typename Mapped, bool consecutive_keys_optimization>
+class HashMethodBase {
+public:
+    using EmplaceResult = EmplaceResultImpl<Mapped>;
+    using FindResult = FindResultImpl<Mapped>;
+    static constexpr bool has_mapped = !std::is_same<Mapped, void>::value;
+    using Cache = LastElementCache<Value, consecutive_keys_optimization>;
+
+    static HashMethodContextPtr createContext(const HashMethodContext::Settings&) {
+        return nullptr;
+    }
+
+    template <typename Data>
+    ALWAYS_INLINE EmplaceResult emplace_key(Data& data, size_t row, Arena& pool) {
+        auto key_holder = static_cast<Derived&>(*this).get_key_holder(row, pool);
+        return emplaceImpl(key_holder, data);
+    }
+
+    template <typename Data>
+    ALWAYS_INLINE FindResult find_key(Data& data, size_t row, Arena& pool) {
+        auto key_holder = static_cast<Derived&>(*this).get_key_holder(row, pool);
+        return find_key_impl(key_holder_get_key(key_holder), data);
+    }
+
+    template <typename Data>
+    ALWAYS_INLINE size_t get_hash(const Data& data, size_t row, Arena& pool) {
+        auto key_holder = static_cast<Derived&>(*this).get_key_holder(row, pool);
+        return data.hash(key_holder_get_key(key_holder));
+    }
+
+    template <typename Data>
+    ALWAYS_INLINE void prefetch(Data& data, size_t row, Arena& pool) {
+        auto key_holder = static_cast<Derived&>(*this).get_key_holder(row, pool);
+        data.prefetch(key_holder);
+    }
+
+protected:
+    Cache cache;
+
+    HashMethodBase() {
+        if constexpr (consecutive_keys_optimization) {
+            if constexpr (has_mapped) {
+                /// Init PairNoInit elements.
+                cache.value.second = Mapped();
+                cache.value.first = {};
+            } else
+                cache.value = Value();
+        }
+    }
+
+    template <typename Data, typename KeyHolder>
+    ALWAYS_INLINE EmplaceResult emplaceImpl(KeyHolder& key_holder, Data& data) {
+        if constexpr (Cache::consecutive_keys_optimization) {
+            if (cache.found && cache.check(key_holder_get_key(key_holder))) {
+                if constexpr (has_mapped)
+                    return EmplaceResult(cache.value.second, cache.value.second, false);
+                else
+                    return EmplaceResult(false);
+            }
+        }
+
+        typename Data::LookupResult it;
+        bool inserted = false;
+        data.emplace(key_holder, it, inserted);
+
+        [[maybe_unused]] Mapped* cached = nullptr;
+        if constexpr (has_mapped) cached = lookup_result_get_mapped(it);
+
+        if (inserted) {
+            if constexpr (has_mapped) {
+                new (lookup_result_get_mapped(it)) Mapped();
+            }
+        }
+
+        if constexpr (consecutive_keys_optimization) {
+            cache.found = true;
+            cache.empty = false;
+
+            if constexpr (has_mapped) {
+                cache.value.first = *lookup_result_get_key(it);
+                cache.value.second = *lookup_result_get_mapped(it);
+                cached = &cache.value.second;
+            } else {
+                cache.value = *lookup_result_get_key(it);
+            }
+        }
+
+        if constexpr (has_mapped)
+            return EmplaceResult(*lookup_result_get_mapped(it), *cached, inserted);
+        else
+            return EmplaceResult(inserted);
+    }
+
+    template <typename Data, typename Key>
+    ALWAYS_INLINE FindResult find_key_impl(Key key, Data& data) {
+        if constexpr (Cache::consecutive_keys_optimization) {
+            if (cache.check(key)) {
+                if constexpr (has_mapped)
+                    return FindResult(&cache.value.second, cache.found);
+                else
+                    return FindResult(cache.found);
+            }
+        }
+
+        auto it = data.find(key);
+
+        if constexpr (consecutive_keys_optimization) {
+            cache.found = it != nullptr;
+            cache.empty = false;
+
+            if constexpr (has_mapped) {
+                cache.value.first = key;
+                if (it) {
+                    cache.value.second = *lookup_result_get_mapped(it);
+                }
+            } else {
+                cache.value = key;
+            }
+        }
+
+        if constexpr (has_mapped)
+            return FindResult(it ? lookup_result_get_mapped(it) : nullptr, it != nullptr);
+        else
+            return FindResult(it != nullptr);
+    }
+};
+
+template <typename T>
+struct MappedCache : public PaddedPODArray<T> {};
+
+template <>
+struct MappedCache<void> {};
+
+/// This class is designed to provide the functionality that is required for
+/// supporting nullable keys in HashMethodKeysFixed. If there are
+/// no nullable keys, this class is merely implemented as an empty shell.
+template <typename Key, bool has_nullable_keys>
+class BaseStateKeysFixed;
+
+/// Case where nullable keys are supported.
+template <typename Key>
+class BaseStateKeysFixed<Key, true> {
+protected:
+    BaseStateKeysFixed(const ColumnRawPtrs& key_columns) {
+        null_maps.reserve(key_columns.size());
+        actual_columns.reserve(key_columns.size());
+
+        for (const auto& col : key_columns) {
+            if (auto* nullable_col = check_and_get_column<ColumnNullable>(col)) {
+                actual_columns.push_back(&nullable_col->get_nested_column());
+                null_maps.push_back(&nullable_col->get_null_map_column());
+            } else {
+                actual_columns.push_back(col);
+                null_maps.push_back(nullptr);
+            }
+        }
+    }
+
+    /// Return the columns which actually contain the values of the keys.
+    /// For a given key column, if it is nullable, we return its nested
+    /// column. Otherwise we return the key column itself.
+    inline const ColumnRawPtrs& get_actual_columns() const { return actual_columns; }
+
+    /// Create a bitmap that indicates whether, for a particular row,
+    /// a key column bears a null value or not.
+    KeysNullMap<Key> create_bitmap(size_t row) const {
+        KeysNullMap<Key> bitmap {};
+
+        for (size_t k = 0; k < null_maps.size(); ++k) {
+            if (null_maps[k] != nullptr) {
+                const auto& null_map = assert_cast<const ColumnUInt8&>(*null_maps[k]).get_data();
+                if (null_map[row] == 1) {
+                    size_t bucket = k / 8;
+                    size_t offset = k % 8;
+                    bitmap[bucket] |= UInt8(1) << offset;
+                }
+            }
+        }
+
+        return bitmap;
+    }
+
+private:
+    ColumnRawPtrs actual_columns;
+    ColumnRawPtrs null_maps;
+};
+
+/// Case where nullable keys are not supported.
+template <typename Key>
+class BaseStateKeysFixed<Key, false> {
+protected:
+    BaseStateKeysFixed(const ColumnRawPtrs& columns) : actual_columns(columns) {}
+
+    const ColumnRawPtrs& get_actual_columns() const { return actual_columns; }
+
+    KeysNullMap<Key> create_bitmap(size_t) const {
+        LOG(FATAL) << "Internal error: calling create_bitmap() for non-nullable keys is forbidden";
+    }
+
+private:
+    ColumnRawPtrs actual_columns;
+};
+
+} // namespace columns_hashing_impl
+
+} // namespace ColumnsHashing
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/common/cow.h b/be/src/vec/common/cow.h
new file mode 100644
index 0000000000..58ae14d7e0
--- /dev/null
+++ b/be/src/vec/common/cow.h
@@ -0,0 +1,439 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/COW.h
+// and modified by Doris
+
+#pragma once
+
+#include <boost/smart_ptr/intrusive_ptr.hpp>
+#include <boost/smart_ptr/intrusive_ref_counter.hpp>
+#include <initializer_list>
+
+
+/** Copy-on-write shared ptr.
+  * Allows to work with shared immutable objects and sometimes unshare and mutate you own unique copy.
+  *
+  * Usage:
+
+    class Column : public COW<Column>
+    {
+    private:
+        friend class COW<Column>;
+
+        /// Leave all constructors in private section. They will be avaliable through 'create' method.
+        Column();
+
+        /// Provide 'clone' method. It can be virtual if you want polymorphic behaviour.
+        virtual Column * clone() const;
+    public:
+        /// Correctly use const qualifiers in your interface.
+
+        virtual ~Column() {}
+    };
+
+  * It will provide 'create' and 'mutate' methods.
+  * And 'Ptr' and 'MutablePtr' types.
+  * Ptr is refcounted pointer to immutable object.
+  * MutablePtr is refcounted noncopyable pointer to mutable object.
+  * MutablePtr can be assigned to Ptr through move assignment.
+  *
+  * 'create' method creates MutablePtr: you cannot share mutable objects.
+  * To share, move-assign to immutable pointer.
+  * 'mutate' method allows to create mutable noncopyable object from immutable object:
+  *   either by cloning or by using directly, if it is not shared.
+  * These methods are thread-safe.
+  *
+  * Example:
+  *
+    /// Creating and assigning to immutable ptr.
+    Column::Ptr x = Column::create(1);
+    /// Sharing single immutable object in two ptrs.
+    Column::Ptr y = x;
+
+    /// Now x and y are shared.
+
+    /// Change value of x.
+    {
+        /// Creating mutable ptr. It can clone an object under the hood if it was shared.
+        Column::MutablePtr mutate_x = std::move(*x).mutate();
+        /// Using non-const methods of an object.
+        mutate_x->set(2);
+        /// Assigning pointer 'x' to mutated object.
+        x = std::move(mutate_x);
+    }
+
+    /// Now x and y are unshared and have different values.
+
+  * Note. You may have heard that COW is bad practice.
+  * Actually it is, if your values are small or if copying is done implicitly.
+  * This is the case for string implementations.
+  *
+  * In contrast, COW is intended for the cases when you need to share states of large objects,
+  * (when you usually will use std::shared_ptr) but you also want precise control over modification
+  * of this shared state.
+  *
+  * Caveats:
+  * - after a call to 'mutate' method, you can still have a reference to immutable ptr somewhere.
+  * - as 'mutable_ptr' should be unique, it's refcount is redundant - probably it would be better
+  *   to use std::unique_ptr for it somehow.
+  */
+template <typename Derived>
+class COW {
+    std::atomic_uint ref_counter;
+
+protected:
+    COW() : ref_counter(0) {}
+
+    COW(COW const&) : ref_counter(0) {}
+
+    COW& operator=(COW const&) {
+        return *this;
+    }
+
+    void add_ref() {
+        ++ref_counter;
+    }
+
+    void release_ref() {
+        if (--ref_counter == 0) {
+            delete static_cast<const Derived*>(this);
+        }
+    }
+
+    Derived* derived() { return static_cast<Derived*>(this); }
+
+    const Derived* derived() const { return static_cast<const Derived*>(this); }
+
+    template <typename T>
+    class intrusive_ptr {
+    public:
+        intrusive_ptr() : t(nullptr) {}
+
+        intrusive_ptr(T* t, bool add_ref=true) : t(t) {
+            if (t && add_ref) ((std::remove_const_t<T>*)t)->add_ref();
+        }
+
+        template <typename U>
+        intrusive_ptr(intrusive_ptr<U> const& rhs) : t(rhs.get()) {
+            if (t) ((std::remove_const_t<T>*)t)->add_ref();
+        }
+
+        intrusive_ptr(intrusive_ptr const& rhs) : t(rhs.get()) {
+            if (t) ((std::remove_const_t<T>*)t)->add_ref();
+        }
+
+        ~intrusive_ptr() {
+            if (t) ((std::remove_const_t<T>*)t)->release_ref();
+        }
+
+        template <typename U>
+        intrusive_ptr& operator=(intrusive_ptr<U> const& rhs) {
+            intrusive_ptr(rhs).swap(*this);
+            return *this;
+        }
+
+        intrusive_ptr(intrusive_ptr&& rhs) : t(rhs.t) {
+            rhs.t = nullptr;
+        }
+
+        intrusive_ptr& operator=(intrusive_ptr&& rhs) {
+            intrusive_ptr(static_cast<intrusive_ptr&&>(rhs)).swap(*this);
+            return *this;
+        }
+
+        template<class U> friend class intrusive_ptr;
+
+        template<class U>
+        intrusive_ptr(intrusive_ptr<U>&& rhs) : t(rhs.t) {
+            rhs.t = nullptr;
+        }
+
+        template<class U>
+        intrusive_ptr& operator=(intrusive_ptr<U>&& rhs) {
+            intrusive_ptr(static_cast<intrusive_ptr<U>&&>(rhs)).swap(*this);
+            return *this;
+        }
+
+        intrusive_ptr& operator=(intrusive_ptr const& rhs) {
+            intrusive_ptr(rhs).swap(*this);
+            return *this;
+        }
+
+        intrusive_ptr& operator=(T* rhs) {
+            intrusive_ptr(rhs).swap(*this);
+            return *this;
+        }
+
+        void reset() {
+            intrusive_ptr().swap(*this);
+        }
+
+        void reset(T* rhs) {
+            intrusive_ptr(rhs).swap(*this);
+        }
+
+        void reset(T* rhs, bool add_ref) {
+            intrusive_ptr(rhs, add_ref).swap(*this);
+        }
+
+        T* get() const {
+            return t;
+        }
+
+        T* detach() {
+            T* ret = t;
+            t = nullptr;
+            return ret;
+        }
+
+        void swap(intrusive_ptr& rhs) {
+            T* tmp = t;
+            t = rhs.t;
+            rhs.t = tmp;
+        }
+
+        T& operator*() const& {
+            return *t;
+        }
+
+        T&& operator*() const&& {
+            return const_cast<std::remove_const_t<T>&&>(*t);
+        }
+
+        T* operator->() const {
+            return t;
+        }
+
+        operator bool() const {
+            return t != nullptr;
+        }
+
+        operator T*() const {
+            return t;
+        }
+
+    private:
+        T* t;
+    };
+
+protected:
+    template <typename T>
+    class mutable_ptr : public intrusive_ptr<T> {
+    private:
+        using Base = intrusive_ptr<T>;
+
+        template <typename> friend class COW;
+        template <typename, typename> friend class COWHelper;
+
+        explicit mutable_ptr(T* ptr) : Base(ptr) {}
+    public:
+        /// Copy: not possible.
+        mutable_ptr(const mutable_ptr&) = delete;
+
+        /// Move: ok.
+        mutable_ptr(mutable_ptr&&) = default;
+        mutable_ptr& operator=(mutable_ptr&&) = default;
+
+        /// Initializing from temporary of compatible type.
+        template <typename U>
+        mutable_ptr(mutable_ptr<U>&& other) : Base(std::move(other)) {}
+
+        mutable_ptr() = default;
+
+        mutable_ptr(std::nullptr_t) {}
+    };
+
+public:
+    using MutablePtr = mutable_ptr<Derived>;
+
+    unsigned int use_count() const {
+        return ref_counter.load();
+    }
+
+protected:
+    template <typename T>
+    class immutable_ptr : public intrusive_ptr<const T> {
+    private:
+        using Base = intrusive_ptr<const T>;
+
+        template <typename> friend class COW;
+        template <typename, typename> friend class COWHelper;
+
+        explicit immutable_ptr(const T* ptr) : Base(ptr) {}
+    public:
+        /// Copy from immutable ptr: ok.
+        immutable_ptr(const immutable_ptr&) = default;
+        immutable_ptr& operator=(const immutable_ptr&) = default;
+
+        template <typename U>
+        immutable_ptr(const immutable_ptr<U>& other) : Base(other) {}
+
+        /// Move: ok.
+        immutable_ptr(immutable_ptr&&) = default;
+        immutable_ptr& operator=(immutable_ptr&&) = default;
+
+        /// Initializing from temporary of compatible type.
+        template <typename U>
+        immutable_ptr(immutable_ptr<U>&& other) : Base(std::move(other)) {}
+
+        /// Move from mutable ptr: ok.
+        template <typename U>
+        immutable_ptr(mutable_ptr<U>&& other) : Base(std::move(other)) {}
+
+        /// Copy from mutable ptr: not possible.
+        template <typename U>
+        immutable_ptr(const mutable_ptr<U>&) = delete;
+
+        immutable_ptr() = default;
+
+        immutable_ptr(std::nullptr_t) {}
+    };
+
+public:
+    using Ptr = immutable_ptr<Derived>;
+
+    template <typename... Args>
+    static MutablePtr create(Args&&... args) {
+        return MutablePtr(new Derived(std::forward<Args>(args)...));
+    }
+
+    template <typename T>
+    static MutablePtr create(std::initializer_list<T>&& arg) {
+        return create(std::forward<std::initializer_list<T>>(arg));
+    }
+
+public:
+    Ptr get_ptr() const { return Ptr(derived()); }
+    MutablePtr get_ptr() { return MutablePtr(derived()); }
+
+protected:
+    MutablePtr shallow_mutate() const {
+        if (this->use_count() > 1)
+            return derived()->clone();
+        else
+            return assume_mutable();
+    }
+
+public:
+    MutablePtr mutate() const&& { return shallow_mutate(); }
+
+    MutablePtr assume_mutable() const { return const_cast<COW*>(this)->get_ptr(); }
+
+    Derived& assume_mutable_ref() const { return const_cast<Derived&>(*derived()); }
+
+protected:
+    /// It works as immutable_ptr if it is const and as mutable_ptr if it is non const.
+    template <typename T>
+    class chameleon_ptr {
+    private:
+        immutable_ptr<T> value;
+
+    public:
+        template <typename... Args>
+        chameleon_ptr(Args&&... args) : value(std::forward<Args>(args)...) {}
+
+        template <typename U>
+        chameleon_ptr(std::initializer_list<U>&& arg)
+                : value(std::forward<std::initializer_list<U>>(arg)) {}
+
+        const T* get() const { return value.get(); }
+        T* get() { return &value->assume_mutable_ref(); }
+
+        const T* operator->() const { return get(); }
+        T* operator->() { return get(); }
+
+        const T& operator*() const { return *value; }
+        T& operator*() { return value->assume_mutable_ref(); }
+
+        operator const immutable_ptr<T> &() const { return value; }
+        operator immutable_ptr<T> &() { return value; }
+
+        operator bool() const { return value != nullptr; }
+        bool operator!() const { return value == nullptr; }
+
+        bool operator==(const chameleon_ptr& rhs) const { return value == rhs.value; }
+        bool operator!=(const chameleon_ptr& rhs) const { return value != rhs.value; }
+    };
+
+public:
+    /** Use this type in class members for compositions.
+      *
+      * NOTE:
+      * For classes with WrappedPtr members,
+      * you must reimplement 'mutate' method, so it will call 'mutate' of all subobjects (do deep mutate).
+      * It will guarantee, that mutable object have all subobjects unshared.
+      *
+      * NOTE:
+      * If you override 'mutate' method in inherited classes, don't forget to make it virtual in base class or to make it call a virtual method.
+      * (COW itself doesn't force any methods to be virtual).
+      *
+      * See example in "cow_compositions.cpp".
+      */
+    using WrappedPtr = chameleon_ptr<Derived>;
+};
+
+/** Helper class to support inheritance.
+  * Example:
+  *
+  * class IColumn : public COW<IColumn>
+  * {
+  *     friend class COW<IColumn>;
+  *     virtual MutablePtr clone() const = 0;
+  *     virtual ~IColumn() {}
+  * };
+  *
+  * class ConcreteColumn : public COWHelper<IColumn, ConcreteColumn>
+  * {
+  *     friend class COWHelper<IColumn, ConcreteColumn>;
+  * };
+  *
+  * Here is complete inheritance diagram:
+  *
+  * ConcreteColumn
+  *  COWHelper<IColumn, ConcreteColumn>
+  *   IColumn
+  *    CowPtr<IColumn>
+  *     boost::intrusive_ref_counter<IColumn>
+  *
+  * See example in "cow_columns.cpp".
+  */
+template <typename Base, typename Derived>
+class COWHelper : public Base {
+public:
+    using Ptr = typename Base::template immutable_ptr<Derived>;
+    using MutablePtr = typename Base::template mutable_ptr<Derived>;
+
+    template <typename... Args>
+    static MutablePtr create(Args&&... args) {
+        return MutablePtr(new Derived(std::forward<Args>(args)...));
+    }
+
+    template <typename T>
+    static MutablePtr create(std::initializer_list<T>&& arg) {
+        return create(std::forward<std::initializer_list<T>>(arg));
+    }
+
+    typename Base::MutablePtr clone() const override {
+        return typename Base::MutablePtr(new Derived(static_cast<const Derived&>(*this)));
+    }
+
+protected:
+    MutablePtr shallow_mutate() const {
+        return MutablePtr(static_cast<Derived*>(Base::shallow_mutate().get()));
+    }
+};
diff --git a/be/src/vec/common/demangle.cpp b/be/src/vec/common/demangle.cpp
new file mode 100644
index 0000000000..fa32334541
--- /dev/null
+++ b/be/src/vec/common/demangle.cpp
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/demangle.cpp
+// and modified by Doris
+
+#include "vec/common/demangle.h"
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define MEMORY_SANITIZER 1
+#endif
+#elif defined(__MEMORY_SANITIZER__)
+#define MEMORY_SANITIZER 1
+#endif
+
+#if _MSC_VER || MEMORY_SANITIZER
+
+std::string demangle(const char* name, int& status) {
+    status = 0;
+    return name;
+}
+
+#else
+
+#include <cxxabi.h>
+#include <stdlib.h>
+
+std::string demangle(const char* name, int& status) {
+    std::string res;
+
+    char* demangled_str = abi::__cxa_demangle(name, 0, 0, &status);
+    if (demangled_str) {
+        try {
+            res = demangled_str;
+        } catch (...) {
+            free(demangled_str);
+            throw;
+        }
+        free(demangled_str);
+    } else
+        res = name;
+
+    return res;
+}
+
+#endif
diff --git a/be/src/vec/common/demangle.h b/be/src/vec/common/demangle.h
new file mode 100644
index 0000000000..3edf2e1640
--- /dev/null
+++ b/be/src/vec/common/demangle.h
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/demangle.h
+// and modified by Doris
+
+#pragma once
+
+#include <string>
+
+/** Demangles C++ symbol name.
+  * When demangling fails, returns the original name and sets status to non-zero.
+  * TODO: Write msvc version (now returns the same string)
+  */
+std::string demangle(const char* name, int& status);
+
+inline std::string demangle(const char* name) {
+    int status = 0;
+    return demangle(name, status);
+}
diff --git a/be/src/vec/common/exception.cpp b/be/src/vec/common/exception.cpp
new file mode 100644
index 0000000000..128bcec6f4
--- /dev/null
+++ b/be/src/vec/common/exception.cpp
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Exception.cpp
+// and modified by Doris
+
+#include "vec/common/exception.h"
+
+#include <cxxabi.h>
+#include <string.h>
+
+#include <filesystem>
+#include <iostream>
+#include <string>
+#include <typeinfo>
+
+namespace doris::vectorized {
+
+AbstractException::AbstractException(int code) : _pNested(0), _code(code) {}
+
+AbstractException::AbstractException(const std::string& msg, int code)
+        : _msg(msg), _pNested(0), _code(code) {}
+
+AbstractException::AbstractException(const std::string& msg, const std::string& arg, int code)
+        : _msg(msg), _pNested(0), _code(code) {
+    if (!arg.empty()) {
+        _msg.append(": ");
+        _msg.append(arg);
+    }
+}
+
+AbstractException::AbstractException(const std::string& msg, const AbstractException& nested,
+                                     int code)
+        : _msg(msg), _pNested(nested.clone()), _code(code) {}
+
+AbstractException::AbstractException(const AbstractException& exc)
+        : std::exception(exc), _msg(exc._msg), _code(exc._code) {
+    _pNested = exc._pNested ? exc._pNested->clone() : 0;
+}
+
+AbstractException::~AbstractException() throw() {
+    delete _pNested;
+}
+
+AbstractException& AbstractException::operator=(const AbstractException& exc) {
+    if (&exc != this) {
+        AbstractException* newPNested = exc._pNested ? exc._pNested->clone() : 0;
+        delete _pNested;
+        _msg = exc._msg;
+        _pNested = newPNested;
+        _code = exc._code;
+    }
+    return *this;
+}
+
+const char* AbstractException::name() const throw() {
+    return "Exception";
+}
+
+const char* AbstractException::className() const throw() {
+    return typeid(*this).name();
+}
+
+const char* AbstractException::what() const throw() {
+    return name();
+}
+
+std::string AbstractException::displayText() const {
+    std::string txt = name();
+    if (!_msg.empty()) {
+        txt.append(": ");
+        txt.append(_msg);
+    }
+    return txt;
+}
+
+void AbstractException::extendedMessage(const std::string& arg) {
+    if (!arg.empty()) {
+        if (!_msg.empty()) _msg.append(": ");
+        _msg.append(arg);
+    }
+}
+
+AbstractException* AbstractException::clone() const {
+    return new AbstractException(*this);
+}
+
+void AbstractException::rethrow() const {
+    throw *this;
+}
+
+//TODO: use fmt
+std::string errnoToString(int code, int e) {
+    const size_t buf_size = 128;
+    char buf[buf_size];
+    return "errno: " + std::to_string(e) +
+           ", strerror: " + std::string(strerror_r(e, buf, sizeof(buf)));
+}
+
+void throwFromErrno(const std::string& s, int code, int e) {
+    throw ErrnoException(s + ", " + errnoToString(code, e), code, e);
+}
+
+void throwFromErrnoWithPath(const std::string& s, const std::string& path, int code,
+                            int the_errno) {
+    throw ErrnoException(s + ", " + errnoToString(code, the_errno), code, the_errno, path);
+}
+
+void tryLogCurrentException(const char* log_name, const std::string& start_of_message) {
+    // tryLogCurrentException(&Logger::get(log_name), start_of_message);
+    std::cout << "[TODO] should use glog here :" << start_of_message << std::endl;
+}
+
+std::string getExtraExceptionInfo(const std::exception& e) {
+    std::string msg;
+    return msg;
+}
+
+std::string getCurrentExceptionMessage(bool with_stacktrace,
+                                       bool check_embedded_stacktrace /*= false*/,
+                                       bool with_extra_info /*= true*/) {
+    std::stringstream stream;
+
+    try {
+        throw;
+    } catch (const Exception& e) {
+        stream << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace)
+               << (with_extra_info ? getExtraExceptionInfo(e) : "") << " (version "
+               << "VERSION_STRING"
+               << "VERSION_OFFICIAL"
+               << ")";
+    } catch (const AbstractException& e) {
+        try {
+            stream << "Poco::Exception. Code: " << TStatusCode::VEC_EXCEPTION
+                   << ", e.code() = " << e.code() << ", e.displayText() = " << e.displayText()
+                   << (with_extra_info ? getExtraExceptionInfo(e) : "") << " (version "
+                   << "VERSION_STRING"
+                   << "VERSION_OFFICIAL";
+        } catch (...) {
+        }
+    } catch (const std::exception& e) {
+        try {
+        } catch (...) {
+        }
+    } catch (...) {
+        try {
+        } catch (...) {
+        }
+    }
+
+    return stream.str();
+}
+
+std::string getExceptionMessage(const Exception& e, bool with_stacktrace,
+                                bool check_embedded_stacktrace) {
+    std::stringstream stream;
+
+    try {
+        std::string text = e.displayText();
+
+        bool has_embedded_stack_trace = false;
+        if (check_embedded_stacktrace) {
+            auto embedded_stack_trace_pos = text.find("Stack trace");
+            has_embedded_stack_trace = embedded_stack_trace_pos != std::string::npos;
+            if (!with_stacktrace && has_embedded_stack_trace) {
+                text.resize(embedded_stack_trace_pos);
+            }
+        }
+
+        stream << "Code: " << e.code() << ", e.displayText() = " << text;
+
+        if (with_stacktrace && !has_embedded_stack_trace)
+            stream << ", Stack trace:\n\n" << e.getStackTrace().value();
+    } catch (...) {
+    }
+
+    return stream.str();
+}
+
+std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace) {
+    try {
+        std::rethrow_exception(std::move(e));
+    } catch (...) {
+        return getCurrentExceptionMessage(with_stacktrace);
+    }
+}
+
+} // namespace  doris::vectorized
diff --git a/be/src/vec/common/exception.h b/be/src/vec/common/exception.h
new file mode 100644
index 0000000000..3259134060
--- /dev/null
+++ b/be/src/vec/common/exception.h
@@ -0,0 +1,287 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Exception.h
+// and modified by Doris
+
+#pragma once
+
+#include <boost/exception/all.hpp>
+#include <boost/stacktrace.hpp>
+#include <cerrno>
+#include <memory>
+#include <stdexcept>
+#include <vector>
+
+#include "common/status.h"
+
+using stacktrace = boost::error_info<struct tag_stacktrace, boost::stacktrace::stacktrace>;
+
+namespace doris::vectorized {
+
+class AbstractException : public std::exception
+/// This is the base class for all exceptions defined
+/// in the Poco class library.
+{
+public:
+    AbstractException(const std::string& msg, int code = 0);
+    /// Creates an exception.
+
+    AbstractException(const std::string& msg, const std::string& arg, int code = 0);
+    /// Creates an exception.
+
+    AbstractException(const std::string& msg, const AbstractException& nested, int code = 0);
+    /// Creates an exception and stores a clone
+    /// of the nested exception.
+
+    AbstractException(const AbstractException& exc);
+    /// Copy constructor.
+
+    ~AbstractException() throw();
+    /// Destroys the exception and deletes the nested exception.
+
+    AbstractException& operator=(const AbstractException& exc);
+    /// Assignment operator.
+
+    virtual const char* name() const throw();
+    /// Returns a static string describing the exception.
+
+    virtual const char* className() const throw();
+    /// Returns the name of the exception class.
+
+    virtual const char* what() const throw();
+    /// Returns a static string describing the exception.
+    ///
+    /// Same as name(), but for compatibility with std::exception.
+
+    const AbstractException* nested() const;
+    /// Returns a pointer to the nested exception, or
+    /// null if no nested exception exists.
+
+    const std::string& message() const;
+    /// Returns the message text.
+
+    int code() const;
+    /// Returns the exception code if defined.
+
+    virtual std::string displayText() const;
+    /// Returns a string consisting of the
+    /// message name and the message text.
+
+    virtual AbstractException* clone() const;
+    /// Creates an exact copy of the exception.
+    ///
+    /// The copy can later be thrown again by
+    /// invoking rethrow() on it.
+
+    virtual void rethrow() const;
+    /// (Re)Throws the exception.
+    ///
+    /// This is useful for temporarily storing a
+    /// copy of an exception (see clone()), then
+    /// throwing it again.
+
+protected:
+    AbstractException(int code = 0);
+    /// Standard constructor.
+
+    void message(const std::string& msg);
+    /// Sets the message for the exception.
+
+    void extendedMessage(const std::string& arg);
+    /// Sets the extended message for the exception.
+
+private:
+    std::string _msg;
+    AbstractException* _pNested;
+    int _code;
+};
+
+//
+// inlines
+//
+inline const AbstractException* AbstractException::nested() const {
+    return _pNested;
+}
+
+inline const std::string& AbstractException::message() const {
+    return _msg;
+}
+
+inline void AbstractException::message(const std::string& msg) {
+    _msg = msg;
+}
+
+inline int AbstractException::code() const {
+    return _code;
+}
+
+//
+// Macros for quickly declaring and implementing exception classes.
+// Unfortunately, we cannot use a template here because character
+// pointers (which we need for specifying the exception name)
+// are not allowed as template arguments.
+//
+#define DORIS_DECLARE_EXCEPTION_CODE(API, CLS, BASE, CODE)                          \
+    class CLS : public BASE {                                                       \
+    public:                                                                         \
+        CLS(int code = CODE);                                                       \
+        CLS(const std::string& msg, int code = CODE);                               \
+        CLS(const std::string& msg, const std::string& arg, int code = CODE);       \
+        CLS(const std::string& msg, const AbstractException& exc, int code = CODE); \
+        CLS(const CLS& exc);                                                        \
+        ~CLS() throw();                                                             \
+        CLS& operator=(const CLS& exc);                                             \
+        const char* name() const throw();                                           \
+        const char* className() const throw();                                      \
+        AbstractException* clone() const;                                           \
+        void rethrow() const;                                                       \
+    };
+
+#define DORIS_DECLARE_EXCEPTION(API, CLS, BASE) POCO_DECLARE_EXCEPTION_CODE(API, CLS, BASE, 0)
+
+#define DORIS_IMPLEMENT_EXCEPTION(CLS, BASE, NAME)                                               \
+    CLS::CLS(int code) : BASE(code) {}                                                           \
+    CLS::CLS(const std::string& msg, int code) : BASE(msg, code) {}                              \
+    CLS::CLS(const std::string& msg, const std::string& arg, int code) : BASE(msg, arg, code) {} \
+    CLS::CLS(const std::string& msg, const AbstractException& exc, int code)                     \
+            : BASE(msg, exc, code) {}                                                            \
+    CLS::CLS(const CLS& exc) : BASE(exc) {}                                                      \
+    CLS::~CLS() throw() {}                                                                       \
+    CLS& CLS::operator=(const CLS& exc) {                                                        \
+        BASE::operator=(exc);                                                                    \
+        return *this;                                                                            \
+    }                                                                                            \
+    const char* CLS::name() const throw() { return NAME; }                                       \
+    const char* CLS::className() const throw() { return typeid(*this).name(); }                  \
+    AbstractException* CLS::clone() const { return new CLS(*this); }                             \
+    void CLS::rethrow() const { throw *this; }
+
+class Exception : public AbstractException {
+public:
+    Exception() : trace(boost::stacktrace::stacktrace()) {} /// For deferred initialization.
+    Exception(const std::string& msg, int code)
+            : AbstractException(msg, code), trace(boost::stacktrace::stacktrace()) {}
+    Exception(const std::string& msg, const Exception& nested_exception, int code)
+            : AbstractException(msg, nested_exception, code), trace(nested_exception.trace) {}
+
+    enum CreateFromPocoTag { CreateFromPoco };
+    Exception(CreateFromPocoTag, const AbstractException& exc)
+            : AbstractException(exc.displayText(), TStatusCode::VEC_EXCEPTION),
+              trace(boost::stacktrace::stacktrace()) {}
+
+    Exception* clone() const override { return new Exception(*this); }
+    void rethrow() const override { throw *this; }
+    const char* name() const throw() override { return "doris::vectorized::Exception"; }
+    const char* what() const throw() override { return message().data(); }
+
+    /// Add something to the existing message.
+    void addMessage(const std::string& arg) { extendedMessage(arg); }
+
+    //const StackTrace& getStackTrace() const { return trace; }
+    const stacktrace& getStackTrace() const { return trace; }
+
+private:
+    //StackTrace trace;
+    stacktrace trace;
+
+    const char* className() const throw() override { return "doris::vectorized::Exception"; }
+};
+
+/// Contains an additional member `saved_errno`. See the throwFromErrno function.
+class ErrnoException : public Exception {
+public:
+    ErrnoException(const std::string& msg, int code, int saved_errno_,
+                   const std::optional<std::string>& path_ = {})
+            : Exception(msg, code), saved_errno(saved_errno_), path(path_) {}
+
+    ErrnoException* clone() const override { return new ErrnoException(*this); }
+    void rethrow() const override { throw *this; }
+
+    int getErrno() const { return saved_errno; }
+    const std::optional<std::string> getPath() const { return path; }
+
+private:
+    int saved_errno;
+    std::optional<std::string> path;
+
+    const char* name() const throw() override { return "doris::vectorized::ErrnoException"; }
+    const char* className() const throw() override { return "doris::vectorized::ErrnoException"; }
+};
+
+using Exceptions = std::vector<std::exception_ptr>;
+
+std::string errnoToString(int code, int the_errno = errno);
+[[noreturn]] void throwFromErrno(const std::string& s, int code, int the_errno = errno);
+[[noreturn]] void throwFromErrnoWithPath(const std::string& s, const std::string& path, int code,
+                                         int the_errno = errno);
+
+/** Try to write an exception to the log (and forget about it).
+  * Can be used in destructors in the catch-all block.
+  */
+void tryLogCurrentException(const char* log_name, const std::string& start_of_message = "");
+
+/** Prints current exception in canonical format.
+  * with_stacktrace - prints stack trace for doris::vectorized::Exception.
+  * check_embedded_stacktrace - if doris::vectorized::Exception has embedded stacktrace then
+  *  only this stack trace will be printed.
+  */
+std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded_stacktrace = false,
+                                       bool with_extra_info = true);
+
+int getCurrentExceptionCode();
+
+/// An execution status of any piece of code, contains return code and optional error
+struct ExecutionStatus {
+    int code = 0;
+    std::string message;
+
+    ExecutionStatus() = default;
+
+    explicit ExecutionStatus(int return_code, const std::string& exception_message = "")
+            : code(return_code), message(exception_message) {}
+
+    static ExecutionStatus fromCurrentException(const std::string& start_of_message = "");
+
+    std::string serializeText() const;
+
+    void deserializeText(const std::string& data);
+
+    bool tryDeserializeText(const std::string& data);
+};
+
+void tryLogException(std::exception_ptr e, const char* log_name,
+                     const std::string& start_of_message = "");
+
+std::string getExceptionMessage(const Exception& e, bool with_stacktrace,
+                                bool check_embedded_stacktrace = false);
+std::string getExceptionMessage(std::exception_ptr e, bool with_stacktrace);
+
+void rethrowFirstException(const Exceptions& exceptions);
+
+template <typename T>
+std::enable_if_t<std::is_pointer_v<T>, T> exception_cast(std::exception_ptr e) {
+    try {
+        std::rethrow_exception(std::move(e));
+    } catch (std::remove_pointer_t<T>& concrete) {
+        return &concrete;
+    } catch (...) {
+        return nullptr;
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/common/field_visitors.h b/be/src/vec/common/field_visitors.h
new file mode 100644
index 0000000000..1dee4491b5
--- /dev/null
+++ b/be/src/vec/common/field_visitors.h
@@ -0,0 +1,554 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/FieldVisitors.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/common/demangle.h"
+#include "vec/core/accurate_comparison.h"
+#include "vec/core/field.h"
+
+class SipHash;
+
+namespace doris::vectorized {
+
+UInt128 string_to_uuid(const String&);
+
+/** StaticVisitor (and its descendants) - class with overloaded operator() for all types of fields.
+  * You could call visitor for field using function 'apply_visitor'.
+  * Also "binary visitor" is supported - its operator() takes two arguments.
+  */
+template <typename R = void>
+struct StaticVisitor {
+    using ResultType = R;
+};
+
+/// F is template parameter, to allow universal reference for field, that is useful for const and non-const values.
+template <typename Visitor, typename F>
+typename std::decay_t<Visitor>::ResultType apply_visitor(Visitor&& visitor, F&& field) {
+    switch (field.get_type()) {
+    case Field::Types::Null:
+        return visitor(field.template get<Null>());
+    case Field::Types::UInt64:
+        return visitor(field.template get<UInt64>());
+    case Field::Types::UInt128:
+        return visitor(field.template get<UInt128>());
+    case Field::Types::Int64:
+        return visitor(field.template get<Int64>());
+    case Field::Types::Float64:
+        return visitor(field.template get<Float64>());
+    case Field::Types::String:
+        return visitor(field.template get<String>());
+    case Field::Types::Array:
+        return visitor(field.template get<Array>());
+    case Field::Types::Tuple:
+        return visitor(field.template get<Tuple>());
+    case Field::Types::Decimal32:
+        return visitor(field.template get<DecimalField<Decimal32>>());
+    case Field::Types::Decimal64:
+        return visitor(field.template get<DecimalField<Decimal64>>());
+    case Field::Types::Decimal128:
+        return visitor(field.template get<DecimalField<Decimal128>>());
+    case Field::Types::AggregateFunctionState:
+        return visitor(field.template get<AggregateFunctionStateData>());
+
+    default:
+        LOG(FATAL) << "Bad type of Field";
+        return {};
+    }
+}
+
+template <typename Visitor, typename F1, typename F2>
+static typename std::decay_t<Visitor>::ResultType apply_binary_visitor_impl(Visitor&& visitor,
+                                                                            F1&& field1,
+                                                                            F2&& field2) {
+    switch (field2.getType()) {
+    case Field::Types::Null:
+        return visitor(field1, field2.template get<Null>());
+    case Field::Types::UInt64:
+        return visitor(field1, field2.template get<UInt64>());
+    case Field::Types::UInt128:
+        return visitor(field1, field2.template get<UInt128>());
+    case Field::Types::Int64:
+        return visitor(field1, field2.template get<Int64>());
+    case Field::Types::Float64:
+        return visitor(field1, field2.template get<Float64>());
+    case Field::Types::String:
+        return visitor(field1, field2.template get<String>());
+    case Field::Types::Array:
+        return visitor(field1, field2.template get<Array>());
+    case Field::Types::Tuple:
+        return visitor(field1, field2.template get<Tuple>());
+    case Field::Types::Decimal32:
+        return visitor(field1, field2.template get<DecimalField<Decimal32>>());
+    case Field::Types::Decimal64:
+        return visitor(field1, field2.template get<DecimalField<Decimal64>>());
+    case Field::Types::Decimal128:
+        return visitor(field1, field2.template get<DecimalField<Decimal128>>());
+    case Field::Types::AggregateFunctionState:
+        return visitor(field1, field2.template get<AggregateFunctionStateData>());
+
+    default:
+        LOG(FATAL) << "Bad type of Field";
+        return {};
+    }
+}
+
+template <typename Visitor, typename F1, typename F2>
+typename std::decay_t<Visitor>::ResultType apply_visitor(Visitor&& visitor, F1&& field1,
+                                                         F2&& field2) {
+    switch (field1.getType()) {
+    case Field::Types::Null:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<Null>(), std::forward<F2>(field2));
+    case Field::Types::UInt64:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<UInt64>(), std::forward<F2>(field2));
+    case Field::Types::UInt128:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<UInt128>(), std::forward<F2>(field2));
+    case Field::Types::Int64:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<Int64>(), std::forward<F2>(field2));
+    case Field::Types::Float64:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<Float64>(), std::forward<F2>(field2));
+    case Field::Types::String:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<String>(), std::forward<F2>(field2));
+    case Field::Types::Array:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<Array>(), std::forward<F2>(field2));
+    case Field::Types::Tuple:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<Tuple>(), std::forward<F2>(field2));
+    case Field::Types::Decimal32:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<DecimalField<Decimal32>>(),
+                                         std::forward<F2>(field2));
+    case Field::Types::Decimal64:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<DecimalField<Decimal64>>(),
+                                         std::forward<F2>(field2));
+    case Field::Types::Decimal128:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<DecimalField<Decimal128>>(),
+                                         std::forward<F2>(field2));
+    case Field::Types::AggregateFunctionState:
+        return apply_binary_visitor_impl(std::forward<Visitor>(visitor),
+                                         field1.template get<AggregateFunctionStateData>(),
+                                         std::forward<F2>(field2));
+
+    default:
+        LOG(FATAL) << "Bad type of Field";
+        return {};
+    }
+}
+
+/** Prints Field as literal in SQL query */
+class FieldVisitorToString : public StaticVisitor<String> {
+public:
+    String operator()(const Null& x) const;
+    String operator()(const UInt64& x) const;
+    String operator()(const UInt128& x) const;
+    String operator()(const Int64& x) const;
+    String operator()(const Float64& x) const;
+    String operator()(const String& x) const;
+    String operator()(const Array& x) const;
+    String operator()(const Tuple& x) const;
+    String operator()(const DecimalField<Decimal32>& x) const;
+    String operator()(const DecimalField<Decimal64>& x) const;
+    String operator()(const DecimalField<Decimal128>& x) const;
+    String operator()(const AggregateFunctionStateData& x) const;
+};
+
+/** Print readable and unique text dump of field type and value. */
+class FieldVisitorDump : public StaticVisitor<String> {
+public:
+    String operator()(const Null& x) const;
+    String operator()(const UInt64& x) const;
+    String operator()(const UInt128& x) const;
+    String operator()(const Int64& x) const;
+    String operator()(const Float64& x) const;
+    String operator()(const String& x) const;
+    String operator()(const Array& x) const;
+    String operator()(const Tuple& x) const;
+    String operator()(const DecimalField<Decimal32>& x) const;
+    String operator()(const DecimalField<Decimal64>& x) const;
+    String operator()(const DecimalField<Decimal128>& x) const;
+    String operator()(const AggregateFunctionStateData& x) const;
+};
+
+/** Converts numberic value of any type to specified type. */
+template <typename T>
+class FieldVisitorConvertToNumber : public StaticVisitor<T> {
+public:
+    T operator()(const Null&) const {
+        LOG(FATAL) << "Cannot convert NULL to " << demangle(typeid(T).name());
+        return {};
+    }
+
+    T operator()(const String&) const {
+        LOG(FATAL) << "Cannot convert String to " << demangle(typeid(T).name());
+        return {};
+    }
+
+    T operator()(const Array&) const {
+        LOG(FATAL) << "Cannot convert Array to " << demangle(typeid(T).name());
+        return {};
+    }
+
+    T operator()(const Tuple&) const {
+        LOG(FATAL) << "Cannot convert Tuple to " << demangle(typeid(T).name());
+        return {};
+    }
+
+    T operator()(const UInt64& x) const { return x; }
+    T operator()(const Int64& x) const { return x; }
+    T operator()(const Float64& x) const { return x; }
+
+    T operator()(const UInt128&) const {
+        LOG(FATAL) << "Cannot convert UInt128 to " << demangle(typeid(T).name());
+        return {};
+    }
+
+    template <typename U>
+    T operator()(const DecimalField<U>& x) const {
+        if constexpr (std::is_floating_point_v<T>)
+            return static_cast<T>(x.get_value()) / x.get_scale_multiplier();
+        else
+            return x.get_value() / x.get_scale_multiplier();
+    }
+
+    T operator()(const AggregateFunctionStateData&) const {
+        LOG(FATAL) << "Cannot convert AggregateFunctionStateData to " << demangle(typeid(T).name());
+        return {};
+    }
+};
+
+/** Updates SipHash by type and value of Field */
+class FieldVisitorHash : public StaticVisitor<> {
+private:
+    SipHash& hash;
+
+public:
+    FieldVisitorHash(SipHash& hash_);
+
+    void operator()(const Null& x) const;
+    void operator()(const UInt64& x) const;
+    void operator()(const UInt128& x) const;
+    void operator()(const Int64& x) const;
+    void operator()(const Float64& x) const;
+    void operator()(const String& x) const;
+    void operator()(const Array& x) const;
+    void operator()(const Tuple& x) const;
+    void operator()(const DecimalField<Decimal32>& x) const;
+    void operator()(const DecimalField<Decimal64>& x) const;
+    void operator()(const DecimalField<Decimal128>& x) const;
+    void operator()(const AggregateFunctionStateData& x) const;
+};
+
+template <typename T>
+constexpr bool is_decimalField() {
+    return false;
+}
+template <>
+constexpr bool is_decimalField<DecimalField<Decimal32>>() {
+    return true;
+}
+template <>
+constexpr bool is_decimalField<DecimalField<Decimal64>>() {
+    return true;
+}
+template <>
+constexpr bool is_decimalField<DecimalField<Decimal128>>() {
+    return true;
+}
+
+/** More precise comparison, used for index.
+  * Differs from Field::operator< and Field::operator== in that it also compares values of different types.
+  * Comparison rules are same as in FunctionsComparison (to be consistent with expression evaluation in query).
+  */
+class FieldVisitorAccurateEquals : public StaticVisitor<bool> {
+public:
+    bool operator()(const UInt64& l, const Null& r) const { return cant_compare(l, r); }
+    bool operator()(const UInt64& l, const UInt64& r) const { return l == r; }
+    bool operator()(const UInt64& l, const UInt128& r) const { return cant_compare(l, r); }
+    bool operator()(const UInt64& l, const Int64& r) const { return accurate::equalsOp(l, r); }
+    bool operator()(const UInt64& l, const Float64& r) const { return accurate::equalsOp(l, r); }
+    bool operator()(const UInt64& l, const String& r) const { return cant_compare(l, r); }
+    bool operator()(const UInt64& l, const Array& r) const { return cant_compare(l, r); }
+    bool operator()(const UInt64& l, const Tuple& r) const { return cant_compare(l, r); }
+    bool operator()(const UInt64& l, const AggregateFunctionStateData& r) const {
+        return cant_compare(l, r);
+    }
+
+    bool operator()(const Int64& l, const Null& r) const { return cant_compare(l, r); }
+    bool operator()(const Int64& l, const UInt64& r) const { return accurate::equalsOp(l, r); }
+    bool operator()(const Int64& l, const UInt128& r) const { return cant_compare(l, r); }
+    bool operator()(const Int64& l, const Int64& r) const { return l == r; }
+    bool operator()(const Int64& l, const Float64& r) const { return accurate::equalsOp(l, r); }
+    bool operator()(const Int64& l, const String& r) const { return cant_compare(l, r); }
+    bool operator()(const Int64& l, const Array& r) const { return cant_compare(l, r); }
+    bool operator()(const Int64& l, const Tuple& r) const { return cant_compare(l, r); }
+    bool operator()(const Int64& l, const AggregateFunctionStateData& r) const {
+        return cant_compare(l, r);
+    }
+
+    bool operator()(const Float64& l, const Null& r) const { return cant_compare(l, r); }
+    bool operator()(const Float64& l, const UInt64& r) const { return accurate::equalsOp(l, r); }
+    bool operator()(const Float64& l, const UInt128& r) const { return cant_compare(l, r); }
+    bool operator()(const Float64& l, const Int64& r) const { return accurate::equalsOp(l, r); }
+    bool operator()(const Float64& l, const Float64& r) const { return l == r; }
+    bool operator()(const Float64& l, const String& r) const { return cant_compare(l, r); }
+    bool operator()(const Float64& l, const Array& r) const { return cant_compare(l, r); }
+    bool operator()(const Float64& l, const Tuple& r) const { return cant_compare(l, r); }
+    bool operator()(const Float64& l, const AggregateFunctionStateData& r) const {
+        return cant_compare(l, r);
+    }
+
+    template <typename T>
+    bool operator()(const Null&, const T&) const {
+        return std::is_same_v<T, Null>;
+    }
+
+    template <typename T>
+    bool operator()(const String& l, const T& r) const {
+        if constexpr (std::is_same_v<T, String>) return l == r;
+        if constexpr (std::is_same_v<T, UInt128>) return string_to_uuid(l) == r;
+        return cant_compare(l, r);
+    }
+
+    template <typename T>
+    bool operator()(const UInt128& l, const T& r) const {
+        if constexpr (std::is_same_v<T, UInt128>) return l == r;
+        if constexpr (std::is_same_v<T, String>) return l == string_to_uuid(r);
+        return cant_compare(l, r);
+    }
+
+    template <typename T>
+    bool operator()(const Array& l, const T& r) const {
+        if constexpr (std::is_same_v<T, Array>) return l == r;
+        return cant_compare(l, r);
+    }
+
+    template <typename T>
+    bool operator()(const Tuple& l, const T& r) const {
+        if constexpr (std::is_same_v<T, Tuple>) return l == r;
+        return cant_compare(l, r);
+    }
+
+    template <typename T, typename U>
+    bool operator()(const DecimalField<T>& l, const U& r) const {
+        if constexpr (is_decimalField<U>()) return l == r;
+        if constexpr (std::is_same_v<U, Int64> || std::is_same_v<U, UInt64>)
+            return l == DecimalField<Decimal128>(r, 0);
+        return cant_compare(l, r);
+    }
+
+    template <typename T>
+    bool operator()(const UInt64& l, const DecimalField<T>& r) const {
+        return DecimalField<Decimal128>(l, 0) == r;
+    }
+    template <typename T>
+    bool operator()(const Int64& l, const DecimalField<T>& r) const {
+        return DecimalField<Decimal128>(l, 0) == r;
+    }
+    template <typename T>
+    bool operator()(const Float64& l, const DecimalField<T>& r) const {
+        return cant_compare(l, r);
+    }
+
+    template <typename T>
+    bool operator()(const AggregateFunctionStateData& l, const T& r) const {
+        if constexpr (std::is_same_v<T, AggregateFunctionStateData>) return l == r;
+        return cant_compare(l, r);
+    }
+
+private:
+    template <typename T, typename U>
+    bool cant_compare(const T&, const U&) const {
+        if constexpr (std::is_same_v<U, Null>) return false;
+        LOG(FATAL) << fmt::format("Cannot compare {} with {}", demangle(typeid(T).name()),
+                                  demangle(typeid(U).name()));
+    }
+};
+
+class FieldVisitorAccurateLess : public StaticVisitor<bool> {
+public:
+    bool operator()(const UInt64& l, const Null& r) const { return cant_compare(l, r); }
+    bool operator()(const UInt64& l, const UInt64& r) const { return l < r; }
+    bool operator()(const UInt64& l, const UInt128& r) const { return cant_compare(l, r); }
+    bool operator()(const UInt64& l, const Int64& r) const { return accurate::lessOp(l, r); }
+    bool operator()(const UInt64& l, const Float64& r) const { return accurate::lessOp(l, r); }
+    bool operator()(const UInt64& l, const String& r) const { return cant_compare(l, r); }
+    bool operator()(const UInt64& l, const Array& r) const { return cant_compare(l, r); }
+    bool operator()(const UInt64& l, const Tuple& r) const { return cant_compare(l, r); }
+    bool operator()(const UInt64& l, const AggregateFunctionStateData& r) const {
+        return cant_compare(l, r);
+    }
+
+    bool operator()(const Int64& l, const Null& r) const { return cant_compare(l, r); }
+    bool operator()(const Int64& l, const UInt64& r) const { return accurate::lessOp(l, r); }
+    bool operator()(const Int64& l, const UInt128& r) const { return cant_compare(l, r); }
+    bool operator()(const Int64& l, const Int64& r) const { return l < r; }
+    bool operator()(const Int64& l, const Float64& r) const { return accurate::lessOp(l, r); }
+    bool operator()(const Int64& l, const String& r) const { return cant_compare(l, r); }
+    bool operator()(const Int64& l, const Array& r) const { return cant_compare(l, r); }
+    bool operator()(const Int64& l, const Tuple& r) const { return cant_compare(l, r); }
+    bool operator()(const Int64& l, const AggregateFunctionStateData& r) const {
+        return cant_compare(l, r);
+    }
+
+    bool operator()(const Float64& l, const Null& r) const { return cant_compare(l, r); }
+    bool operator()(const Float64& l, const UInt64& r) const { return accurate::lessOp(l, r); }
+    bool operator()(const Float64& l, const UInt128& r) const { return cant_compare(l, r); }
+    bool operator()(const Float64& l, const Int64& r) const { return accurate::lessOp(l, r); }
+    bool operator()(const Float64& l, const Float64& r) const { return l < r; }
+    bool operator()(const Float64& l, const String& r) const { return cant_compare(l, r); }
+    bool operator()(const Float64& l, const Array& r) const { return cant_compare(l, r); }
+    bool operator()(const Float64& l, const Tuple& r) const { return cant_compare(l, r); }
+    bool operator()(const Float64& l, const AggregateFunctionStateData& r) const {
+        return cant_compare(l, r);
+    }
+
+    template <typename T>
+    bool operator()(const Null&, const T&) const {
+        return !std::is_same_v<T, Null>;
+    }
+
+    template <typename T>
+    bool operator()(const String& l, const T& r) const {
+        if constexpr (std::is_same_v<T, String>) return l < r;
+        if constexpr (std::is_same_v<T, UInt128>) return string_to_uuid(l) < r;
+        return cant_compare(l, r);
+    }
+
+    template <typename T>
+    bool operator()(const UInt128& l, const T& r) const {
+        if constexpr (std::is_same_v<T, UInt128>) return l < r;
+        if constexpr (std::is_same_v<T, String>) return l < string_to_uuid(r);
+        return cant_compare(l, r);
+    }
+
+    template <typename T>
+    bool operator()(const Array& l, const T& r) const {
+        if constexpr (std::is_same_v<T, Array>) return l < r;
+        return cant_compare(l, r);
+    }
+
+    template <typename T>
+    bool operator()(const Tuple& l, const T& r) const {
+        if constexpr (std::is_same_v<T, Tuple>) return l < r;
+        return cant_compare(l, r);
+    }
+
+    template <typename T, typename U>
+    bool operator()(const DecimalField<T>& l, const U& r) const {
+        if constexpr (is_decimalField<U>())
+            return l < r;
+        else if constexpr (std::is_same_v<U, Int64> || std::is_same_v<U, UInt64>)
+            return l < DecimalField<Decimal128>(r, 0);
+        return cant_compare(l, r);
+    }
+
+    template <typename T>
+    bool operator()(const UInt64& l, const DecimalField<T>& r) const {
+        return DecimalField<Decimal128>(l, 0) < r;
+    }
+    template <typename T>
+    bool operator()(const Int64& l, const DecimalField<T>& r) const {
+        return DecimalField<Decimal128>(l, 0) < r;
+    }
+    template <typename T>
+    bool operator()(const Float64&, const DecimalField<T>&) const {
+        return false;
+    }
+
+    template <typename T>
+    bool operator()(const AggregateFunctionStateData& l, const T& r) const {
+        return cant_compare(l, r);
+    }
+
+private:
+    template <typename T, typename U>
+    bool cant_compare(const T&, const U&) const {
+        LOG(FATAL) << fmt::format("Cannot compare {} with {}", demangle(typeid(T).name()),
+                                  demangle(typeid(U).name()));
+        return false;
+    }
+};
+
+/** Implements `+=` operation.
+ *  Returns false if the result is zero.
+ */
+class FieldVisitorSum : public StaticVisitor<bool> {
+private:
+    const Field& rhs;
+
+public:
+    explicit FieldVisitorSum(const Field& rhs_) : rhs(rhs_) {}
+
+    bool operator()(UInt64& x) const {
+        x += get<UInt64>(rhs);
+        return x != 0;
+    }
+    bool operator()(Int64& x) const {
+        x += get<Int64>(rhs);
+        return x != 0;
+    }
+    bool operator()(Float64& x) const {
+        x += get<Float64>(rhs);
+        return x != 0;
+    }
+
+    bool operator()(Null&) const {
+        LOG(FATAL) << "Cannot sum Nulls";
+        return false;
+    }
+
+    bool operator()(String&) const {
+        LOG(FATAL) << "Cannot sum Strings";
+        return false;
+    }
+    bool operator()(Array&) const {
+        LOG(FATAL) << "Cannot sum Arrays";
+        return false;
+    }
+    bool operator()(Tuple&) const {
+        LOG(FATAL) << "Cannot sum Tuples";
+        return false;
+    }
+    bool operator()(UInt128&) const {
+        LOG(FATAL) << "Cannot sum UUIDs";
+        return false;
+    }
+    bool operator()(AggregateFunctionStateData&) const {
+        LOG(FATAL) << "Cannot sum AggregateFunctionStates";
+        return false;
+    }
+
+    template <typename T>
+    bool operator()(DecimalField<T>& x) const {
+        x += get<DecimalField<T>>(rhs);
+        return x.get_value() != 0;
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/common/hash_table/fixed_hash_map.h b/be/src/vec/common/hash_table/fixed_hash_map.h
new file mode 100644
index 0000000000..075ada2d3b
--- /dev/null
+++ b/be/src/vec/common/hash_table/fixed_hash_map.h
@@ -0,0 +1,194 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/FixedHashMap.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/common/hash_table/fixed_hash_table.h"
+#include "vec/common/hash_table/hash_map.h"
+
+template <typename Key, typename TMapped, typename TState = HashTableNoState>
+struct FixedHashMapCell {
+    using Mapped = TMapped;
+    using State = TState;
+
+    using value_type = PairNoInit<Key, Mapped>;
+    using mapped_type = TMapped;
+
+    bool full;
+    Mapped mapped;
+
+    FixedHashMapCell() {}
+    FixedHashMapCell(const Key &, const State &) : full(true) {}
+    FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
+
+    const VoidKey get_key() const { return {}; }
+    Mapped & get_mapped() { return mapped; }
+    const Mapped & get_mapped() const { return mapped; }
+
+    bool is_zero(const State &) const { return !full; }
+    void set_zero() { full = false; }
+
+    /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
+    ///  Note that we have to assemble a continuous layout for the value_type on each call of getValue().
+    struct CellExt {
+        CellExt() {}
+        CellExt(Key && key_, const FixedHashMapCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapCell *>(ptr_)) {}
+        void update(Key && key_, const FixedHashMapCell * ptr_) {
+            key = key_;
+            ptr = const_cast<FixedHashMapCell *>(ptr_);
+        }
+        Key key;
+        FixedHashMapCell * ptr;
+
+        const Key & get_key() const { return key; }
+        Mapped & get_mapped() { return ptr->mapped; }
+        const Mapped & get_mapped() const { return ptr->mapped; }
+        const value_type get_value() const { return {key, ptr->mapped}; }
+    };
+};
+
+
+/// In case when we can encode empty cells with zero mapped values.
+template <typename Key, typename TMapped, typename TState = HashTableNoState>
+struct FixedHashMapImplicitZeroCell {
+    using Mapped = TMapped;
+    using State = TState;
+
+    using value_type = PairNoInit<Key, Mapped>;
+    using mapped_type = TMapped;
+
+    Mapped mapped;
+
+    FixedHashMapImplicitZeroCell() {}
+    FixedHashMapImplicitZeroCell(const Key &, const State &) {}
+    FixedHashMapImplicitZeroCell(const value_type & value_, const State &) : mapped(value_.second) {}
+
+    const VoidKey get_first() const { return {}; }
+    Mapped & get_second() { return mapped; }
+    const Mapped & get_second() const { return mapped; }
+
+    bool is_zero(const State &) const { return !mapped; }
+    void set_zero() { mapped = {}; }
+
+    /// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
+    ///  Note that we have to assemble a continuous layout for the value_type on each call of getValue().
+    struct CellExt {
+        CellExt() {}
+        CellExt(Key && key_, const FixedHashMapImplicitZeroCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapImplicitZeroCell *>(ptr_)) {}
+        void update(Key && key_, const FixedHashMapImplicitZeroCell * ptr_) {
+            key = key_;
+            ptr = const_cast<FixedHashMapImplicitZeroCell *>(ptr_);
+        }
+        Key key;
+        FixedHashMapImplicitZeroCell * ptr;
+
+        const Key & get_first() const { return key; }
+        Mapped & get_second() { return ptr->mapped; }
+        const Mapped & get_second() const { return ptr->mapped; }
+        const value_type get_value() const { return {key, ptr->mapped}; }
+    };
+};
+
+template <typename Key, typename Mapped, typename State>
+ALWAYS_INLINE inline auto lookup_result_get_mapped(FixedHashMapImplicitZeroCell<Key, Mapped, State>* cell) {
+    return &cell->get_second();
+}
+
+template <
+    typename Key,
+    typename Mapped,
+    typename Cell = FixedHashMapCell<Key, Mapped>,
+    typename Size = FixedHashTableStoredSize<Cell>,
+    typename Allocator = HashTableAllocator>
+class FixedHashMap : public FixedHashTable<Key, Cell, Size, Allocator> {
+public:
+    using Base = FixedHashTable<Key, Cell, Size, Allocator>;
+    using Self = FixedHashMap;
+    using LookupResult = typename Base::LookupResult;
+
+    using Base::Base;
+
+    template <typename Func>
+    void ALWAYS_INLINE merge_to_via_emplace(Self & that, Func && func) {
+        for (auto it = this->begin(), end = this->end(); it != end; ++it) {
+            typename Self::LookupResult res_it;
+            bool inserted;
+            that.emplace(it->get_key(), res_it, inserted, it.get_hash());
+            func(res_it->get_mapped(), it->get_mapped(), inserted);
+        }
+    }
+
+    template <typename Func>
+    void ALWAYS_INLINE merge_to_via_find(Self & that, Func && func) {
+        for (auto it = this->begin(), end = this->end(); it != end; ++it) {
+            auto res_it = that.find(it->get_key(), it.get_hash());
+            if (!res_it)
+                func(it->get_mapped(), it->get_mapped(), false);
+            else
+                func(res_it->get_mapped(), it->get_mapped(), true);
+        }
+    }
+
+    template <typename Func>
+    void for_each_value(Func && func) {
+        for (auto & v : *this)
+            func(v.get_key(), v.get_mapped());
+    }
+
+    template <typename Func>
+    void for_each_mapped(Func && func) {
+        for (auto & v : *this)
+            func(v.get_second());
+    }
+
+    Mapped & ALWAYS_INLINE operator[](const Key & x) {
+        LookupResult it;
+        bool inserted;
+        this->emplace(x, it, inserted);
+        if (inserted)
+            new (&it->get_mapped()) Mapped();
+
+        return it->get_mapped();
+    }
+
+    // fixed hash map never overflow
+    bool add_elem_size_overflow(size_t add_size) const {
+        return false;
+    }
+
+    char* get_null_key_data() { return nullptr; }
+    bool has_null_key_data() const { return false; }
+};
+
+template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
+using FixedImplicitZeroHashMap = FixedHashMap<
+    Key,
+    Mapped,
+    FixedHashMapImplicitZeroCell<Key, Mapped>,
+    FixedHashTableStoredSize<FixedHashMapImplicitZeroCell<Key, Mapped>>,
+    Allocator>;
+
+template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
+using FixedImplicitZeroHashMapWithCalculatedSize = FixedHashMap<
+    Key,
+    Mapped,
+    FixedHashMapImplicitZeroCell<Key, Mapped>,
+    FixedHashTableCalculatedSize<FixedHashMapImplicitZeroCell<Key, Mapped>>,
+    Allocator>;
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/fixed_hash_table.h b/be/src/vec/common/hash_table/fixed_hash_table.h
new file mode 100644
index 0000000000..fc03781a49
--- /dev/null
+++ b/be/src/vec/common/hash_table/fixed_hash_table.h
@@ -0,0 +1,383 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/FixedHashTable.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/common/hash_table/hash_table.h"
+
+template <typename Key, typename TState = HashTableNoState>
+struct FixedHashTableCell {
+    using State = TState;
+
+    using value_type = Key;
+    using mapped_type = VoidMapped;
+    bool full;
+
+    FixedHashTableCell() {}
+    FixedHashTableCell(const Key &, const State &) : full(true) {}
+
+    const VoidKey get_key() const { return {}; }
+    VoidMapped get_mapped() const { return {}; }
+
+    bool is_zero(const State &) const { return !full; }
+    void set_zero() { full = false; }
+    static constexpr bool need_zero_value_storage = false;
+
+    /// This Cell is only stored inside an iterator. It's used to accommodate the fact
+    ///  that the iterator based API always provide a reference to a continuous memory
+    ///  containing the Key. As a result, we have to instantiate a real Key field.
+    /// All methods that return a mutable reference to the Key field are named with
+    ///  -Mutable suffix, indicating this is uncommon usage. As this is only for lookup
+    ///  tables, it's totally fine to discard the Key mutations.
+    struct CellExt {
+        Key key;
+
+        const VoidKey get_key() const { return {}; }
+        VoidMapped get_mapped() const { return {}; }
+        const value_type & get_value() const { return key; }
+        void update(Key && key_, FixedHashTableCell *) { key = key_; }
+    };
+};
+
+
+/// How to obtain the size of the table.
+
+template <typename Cell>
+struct FixedHashTableStoredSize {
+    size_t m_size = 0;
+
+    size_t get_size(const Cell *, const typename Cell::State &, size_t) const { return m_size; }
+    bool is_empty(const Cell *, const typename Cell::State &, size_t) const { return m_size == 0; }
+
+    void increase_size() { ++m_size; }
+    void clear_size() { m_size = 0; }
+    void set_size(size_t to) { m_size = to; }
+};
+
+template <typename Cell>
+struct FixedHashTableCalculatedSize {
+    size_t get_size(const Cell * buf, const typename Cell::State & state, size_t num_cells) const {
+        size_t res = 0;
+        for (const Cell * end = buf + num_cells; buf != end; ++buf)
+            if (!buf->is_zero(state))
+                ++res;
+        return res;
+    }
+
+    bool isEmpty(const Cell * buf, const typename Cell::State & state, size_t num_cells) const {
+        for (const Cell * end = buf + num_cells; buf != end; ++buf)
+            if (!buf->is_zero(state))
+                return false;
+        return true;
+    }
+
+    void increase_size() {}
+    void clear_size() {}
+    void set_size(size_t) {}
+};
+
+
+/** Used as a lookup table for small keys such as UInt8, UInt16. It's different
+  *  than a HashTable in that keys are not stored in the Cell buf, but inferred
+  *  inside each iterator. There are a bunch of to make it faster than using
+  *  HashTable: a) It doesn't have a conflict chain; b) There is no key
+  *  comparison; c) The number of cycles for checking cell empty is halved; d)
+  *  Memory layout is tighter, especially the Clearable variants.
+  *
+  * NOTE: For Set variants this should always be better. For Map variants
+  *  however, as we need to assemble the real cell inside each iterator, there
+  *  might be some cases we fall short.
+  *
+  * TODO: Deprecate the cell API so that end users don't rely on the structure
+  *  of cell. Instead iterator should be used for operations such as cell
+  *  transfer, key updates (f.g. StringRef) and serde. This will allow
+  *  TwoLevelHashSet(Map) to contain different type of sets(maps).
+  */
+template <typename Key, typename Cell, typename Size, typename Allocator>
+class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State, protected Size {
+    static constexpr size_t NUM_CELLS = 1ULL << (sizeof(Key) * 8);
+
+protected:
+    friend class const_iterator;
+    friend class iterator;
+
+    using Self = FixedHashTable;
+
+    Cell * buf; /// A piece of memory for all elements.
+
+    void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(NUM_CELLS * sizeof(Cell))); }
+
+    void free() {
+        if (buf) {
+            Allocator::free(buf, get_buffer_size_in_bytes());
+            buf = nullptr;
+        }
+    }
+
+    void destroy_elements() {
+        if (!std::is_trivially_destructible_v<Cell>)
+            for (iterator it = begin(), it_end = end(); it != it_end; ++it)
+                it.ptr->~Cell();
+    }
+
+
+    template <typename Derived, bool is_const>
+    class iterator_base {
+        using Container = std::conditional_t<is_const, const Self, Self>;
+        using cell_type = std::conditional_t<is_const, const Cell, Cell>;
+
+        Container * container;
+        cell_type * ptr;
+
+        friend class FixedHashTable;
+
+    public:
+        iterator_base() {}
+        iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_) {
+            cell.update(ptr - container->buf, ptr);
+        }
+
+        bool operator==(const iterator_base & rhs) const { return ptr == rhs.ptr; }
+        bool operator!=(const iterator_base & rhs) const { return ptr != rhs.ptr; }
+
+        Derived & operator++() {
+            ++ptr;
+
+            /// Skip empty cells in the main buffer.
+            auto buf_end = container->buf + container->NUM_CELLS;
+            while (ptr < buf_end && ptr->is_zero(*container))
+                ++ptr;
+
+            return static_cast<Derived &>(*this);
+        }
+
+        auto & operator*() {
+            if (cell.key != ptr - container->buf)
+                cell.update(ptr - container->buf, ptr);
+            return cell;
+        }
+        auto * operator-> () {
+            if (cell.key != ptr - container->buf)
+                cell.update(ptr - container->buf, ptr);
+            return &cell;
+        }
+
+        auto get_ptr() const { return ptr; }
+        size_t get_hash() const { return ptr - container->buf; }
+        size_t get_collision_chain_length() const { return 0; }
+        typename cell_type::CellExt cell;
+    };
+
+
+public:
+    using key_type = Key;
+    using mapped_type = typename Cell::mapped_type;
+    using value_type = typename Cell::value_type;
+    using cell_type = Cell;
+
+    using LookupResult = Cell *;
+    using ConstLookupResult = const Cell *;
+
+
+    size_t hash(const Key & x) const { return x; }
+
+    FixedHashTable() { alloc(); }
+
+    FixedHashTable(FixedHashTable && rhs) : buf(nullptr) { *this = std::move(rhs); }
+
+    ~FixedHashTable() {
+        destroy_elements();
+        free();
+    }
+
+    FixedHashTable & operator=(FixedHashTable && rhs) {
+        destroy_elements();
+        free();
+
+        std::swap(buf, rhs.buf);
+        this->setSize(rhs.size());
+
+        Allocator::operator=(std::move(rhs));
+        Cell::State::operator=(std::move(rhs));
+
+        return *this;
+    }
+
+    class iterator : public iterator_base<iterator, false> {
+    public:
+        using iterator_base<iterator, false>::iterator_base;
+    };
+
+    class const_iterator : public iterator_base<const_iterator, true> {
+    public:
+        using iterator_base<const_iterator, true>::iterator_base;
+    };
+
+
+    const_iterator begin() const {
+        if (!buf)
+            return end();
+
+        const Cell * ptr = buf;
+        auto buf_end = buf + NUM_CELLS;
+        while (ptr < buf_end && ptr->is_zero(*this))
+            ++ptr;
+
+        return const_iterator(this, ptr);
+    }
+
+    const_iterator cbegin() const { return begin(); }
+
+    iterator begin() {
+        if (!buf)
+            return end();
+
+        Cell * ptr = buf;
+        auto buf_end = buf + NUM_CELLS;
+        while (ptr < buf_end && ptr->is_zero(*this))
+            ++ptr;
+
+        return iterator(this, ptr);
+    }
+
+    const_iterator end() const {
+        /// Avoid UBSan warning about adding zero to nullptr. It is valid in C++20 (and earlier) but not valid in C.
+        return const_iterator(this, buf ? buf + NUM_CELLS : buf);
+    }
+
+    const_iterator cend() const {
+        return end();
+    }
+
+    iterator end() {
+        return iterator(this, buf ? buf + NUM_CELLS : buf);
+    }
+
+
+public:
+    /// The last parameter is unused but exists for compatibility with HashTable interface.
+    void ALWAYS_INLINE emplace(const Key & x, LookupResult & it, bool & inserted, size_t /* hash */ = 0) {
+        it = &buf[x];
+
+        if (!buf[x].is_zero(*this))
+        {
+            inserted = false;
+            return;
+        }
+
+        new (&buf[x]) Cell(x, *this);
+        inserted = true;
+        this->increase_size();
+    }
+
+    std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type & x) {
+        std::pair<LookupResult, bool> res;
+        emplace(Cell::get_key(x), res.first, res.second);
+        if (res.second)
+            insert_set_mapped(res.first->get_mapped(), x);
+
+        return res;
+    }
+
+    LookupResult ALWAYS_INLINE find(const Key & x) { return !buf[x].is_zero(*this) ? &buf[x] : nullptr; }
+
+    ConstLookupResult ALWAYS_INLINE find(const Key & x) const { return const_cast<std::decay_t<decltype(*this)> *>(this)->find(x); }
+
+    LookupResult ALWAYS_INLINE find(const Key &, size_t hash_value) { return !buf[hash_value].is_zero(*this) ? &buf[hash_value] : nullptr; }
+
+    ConstLookupResult ALWAYS_INLINE find(const Key & key, size_t hash_value) const {
+        return const_cast<std::decay_t<decltype(*this)> *>(this)->find(key, hash_value);
+    }
+
+    bool ALWAYS_INLINE has(const Key & x) const { return !buf[x].is_zero(*this); }
+    bool ALWAYS_INLINE has(const Key &, size_t hash_value) const { return !buf[hash_value].is_zero(*this); }
+
+    void write(doris::vectorized::BufferWritable& wb) const {
+        Cell::State::write(wb);
+        doris::vectorized::write_var_uint(size(), wb);
+
+        if (!buf)
+            return;
+
+        for (auto ptr = buf, buf_end = buf + NUM_CELLS; ptr < buf_end; ++ptr) {
+            if (!ptr->is_zero(*this)) {
+                doris::vectorized::write_var_uint(ptr - buf, wb);
+                ptr->write(wb);
+            }
+        }
+    }
+
+    void read(doris::vectorized::BufferReadable& rb) {
+        Cell::State::read(rb);
+        destroy_elements();
+        size_t m_size;
+        doris::vectorized::read_var_uint(m_size, rb);
+        this->set_size(m_size);
+        free();
+        alloc();
+
+        for (size_t i = 0; i < m_size; ++i) {
+            size_t place_value = 0;
+            doris::vectorized::read_var_uint(place_value, rb);
+            Cell x;
+            x.read(rb);
+            new (&buf[place_value]) Cell(x, *this);
+        }
+    }
+
+    size_t size() const { return this->get_size(buf, *this, NUM_CELLS); }
+    bool empty() const { return this->is_empty(buf, *this, NUM_CELLS); }
+
+    void clear() {
+        destroy_elements();
+        this->clear_size();
+
+        memset(static_cast<void *>(buf), 0, NUM_CELLS * sizeof(*buf));
+    }
+
+    /// After executing this function, the table can only be destroyed,
+    ///  and also you can use the methods `size`, `empty`, `begin`, `end`.
+    void clear_and_shrink() {
+        destroy_elements();
+        this->clear_size();
+        free();
+    }
+
+    size_t get_buffer_size_in_bytes() const { return NUM_CELLS * sizeof(Cell); }
+
+    size_t get_buffer_size_in_cells() const { return NUM_CELLS; }
+
+    /// Return offset for result in internal buffer.
+    /// Result can have value up to `getBufferSizeInCells() + 1`
+    /// because offset for zero value considered to be 0
+    /// and for other values it will be `offset in buffer + 1`
+    size_t offset_internal(ConstLookupResult ptr) const {
+        if (ptr->is_zero(*this))
+            return 0;
+        return ptr - buf + 1;
+    }
+
+    const Cell * data() const { return buf; }
+    Cell * data() { return buf; }
+
+#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
+    size_t get_collisions() const { return 0; }
+#endif
+};
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/hash.h b/be/src/vec/common/hash_table/hash.h
new file mode 100644
index 0000000000..5fe9590741
--- /dev/null
+++ b/be/src/vec/common/hash_table/hash.h
@@ -0,0 +1,191 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/Hash.h
+// and modified by Doris
+
+#pragma once
+
+#include <type_traits>
+
+#include "vec/common/uint128.h"
+#include "vec/core/types.h"
+
+/** Hash functions that are better than the trivial function std::hash.
+  *
+  * Example: when we do aggregation by the visitor ID, the performance increase is more than 5 times.
+  * This is because of following reasons:
+  * - in Yandex, visitor identifier is an integer that has timestamp with seconds resolution in lower bits;
+  * - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
+  * - traffic is non-uniformly distributed across a day;
+  * - we are using open-addressing linear probing hash tables that are most critical to hash function quality,
+  *   and trivial hash function gives disastrous results.
+  */
+
+/** Taken from MurmurHash. This is Murmur finalizer.
+  * Faster than int_hash32 when inserting into the hash table UInt64 -> UInt64, where the key is the visitor ID.
+  */
+inline doris::vectorized::UInt64 int_hash64(doris::vectorized::UInt64 x) {
+    x ^= x >> 33;
+    x *= 0xff51afd7ed558ccdULL;
+    x ^= x >> 33;
+    x *= 0xc4ceb9fe1a85ec53ULL;
+    x ^= x >> 33;
+
+    return x;
+}
+
+/** CRC32C is not very high-quality as a hash function,
+  *  according to avalanche and bit independence tests (see SMHasher software), as well as a small number of bits,
+  *  but can behave well when used in hash tables,
+  *  due to high speed (latency 3 + 1 clock cycle, throughput 1 clock cycle).
+  * Works only with SSE 4.2 support.
+  */
+#ifdef __SSE4_2__
+#include <nmmintrin.h>
+#endif
+
+#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+#include <arm_acle.h>
+#include <arm_neon.h>
+#endif
+
+inline doris::vectorized::UInt64 int_hash_crc32(doris::vectorized::UInt64 x) {
+#ifdef __SSE4_2__
+    return _mm_crc32_u64(-1ULL, x);
+#elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+    return __crc32cd(-1U, x);
+#else
+    /// On other platforms we do not have CRC32. NOTE This can be confusing.
+    return int_hash64(x);
+#endif
+}
+
+template <typename T>
+inline size_t default_hash64(T key) {
+    union {
+        T in;
+        doris::vectorized::UInt64 out;
+    } u;
+    u.out = 0;
+    u.in = key;
+    return int_hash64(u.out);
+}
+
+template <typename T, typename Enable = void>
+struct DefaultHash;
+
+template <typename T>
+struct DefaultHash<T, std::enable_if_t<std::is_arithmetic_v<T>>> {
+    size_t operator()(T key) const { return default_hash64<T>(key); }
+};
+
+template <typename T>
+struct HashCRC32;
+
+template <typename T>
+inline size_t hash_crc32(T key) {
+    union {
+        T in;
+        doris::vectorized::UInt64 out;
+    } u;
+    u.out = 0;
+    u.in = key;
+    return int_hash_crc32(u.out);
+}
+
+#define DEFINE_HASH(T)                                                \
+    template <>                                                       \
+    struct HashCRC32<T> {                                             \
+        size_t operator()(T key) const { return hash_crc32<T>(key); } \
+    };
+
+DEFINE_HASH(doris::vectorized::UInt8)
+DEFINE_HASH(doris::vectorized::UInt16)
+DEFINE_HASH(doris::vectorized::UInt32)
+DEFINE_HASH(doris::vectorized::UInt64)
+DEFINE_HASH(doris::vectorized::UInt128)
+DEFINE_HASH(doris::vectorized::Int8)
+DEFINE_HASH(doris::vectorized::Int16)
+DEFINE_HASH(doris::vectorized::Int32)
+DEFINE_HASH(doris::vectorized::Int64)
+DEFINE_HASH(doris::vectorized::Int128)
+DEFINE_HASH(doris::vectorized::Float32)
+DEFINE_HASH(doris::vectorized::Float64)
+
+#undef DEFINE_HASH
+
+template <>
+struct HashCRC32<doris::vectorized::UInt256> {
+    size_t operator()(const doris::vectorized::UInt256& x) const {
+#ifdef __SSE4_2__
+        doris::vectorized::UInt64 crc = -1ULL;
+        crc = _mm_crc32_u64(crc, x.a);
+        crc = _mm_crc32_u64(crc, x.b);
+        crc = _mm_crc32_u64(crc, x.c);
+        crc = _mm_crc32_u64(crc, x.d);
+        return crc;
+#else
+        return Hash128to64({Hash128to64({x.a, x.b}), Hash128to64({x.c, x.d})});
+#endif
+    }
+};
+
+/// It is reasonable to use for UInt8, UInt16 with sufficient hash table size.
+struct TrivialHash {
+    template <typename T>
+    size_t operator()(T key) const {
+        return key;
+    }
+};
+
+/** A relatively good non-cryptographic hash function from UInt64 to UInt32.
+  * But worse (both in quality and speed) than just cutting int_hash64.
+  * Taken from here: http://www.concentric.net/~ttwang/tech/inthash.htm
+  *
+  * Slightly changed compared to the function by link: shifts to the right are accidentally replaced by a cyclic shift to the right.
+  * This change did not affect the smhasher test results.
+  *
+  * It is recommended to use different salt for different tasks.
+  * That was the case that in the database values were sorted by hash (for low-quality pseudo-random spread),
+  *  and in another place, in the aggregate function, the same hash was used in the hash table,
+  *  as a result, this aggregate function was monstrously slowed due to collisions.
+  *
+  * NOTE Salting is far from perfect, because it commutes with first steps of calculation.
+  *
+  * NOTE As mentioned, this function is slower than int_hash64.
+  * But occasionally, it is faster, when written in a loop and loop is vectorized.
+  */
+template <doris::vectorized::UInt64 salt>
+inline doris::vectorized::UInt32 int_hash32(doris::vectorized::UInt64 key) {
+    key ^= salt;
+
+    key = (~key) + (key << 18);
+    key = key ^ ((key >> 31) | (key << 33));
+    key = key * 21;
+    key = key ^ ((key >> 11) | (key << 53));
+    key = key + (key << 6);
+    key = key ^ ((key >> 22) | (key << 42));
+
+    return key;
+}
+
+/// For containers.
+template <typename T, doris::vectorized::UInt64 salt = 0>
+struct IntHash32 {
+    size_t operator()(const T& key) const { return int_hash32<salt>(key); }
+};
diff --git a/be/src/vec/common/hash_table/hash_map.h b/be/src/vec/common/hash_table/hash_map.h
new file mode 100644
index 0000000000..0b849c13fd
--- /dev/null
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -0,0 +1,240 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/HashMap.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/common/hash_table/hash.h"
+#include "vec/common/hash_table/hash_table.h"
+#include "vec/common/hash_table/hash_table_allocator.h"
+/** NOTE HashMap could only be used for memmoveable (position independent) types.
+  * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++.
+  * Also, key in hash table must be of type, that zero bytes is compared equals to zero key.
+  */
+
+struct NoInitTag {};
+
+/// A pair that does not initialize the elements, if not needed.
+template <typename First, typename Second>
+struct PairNoInit {
+    First first;
+    Second second;
+
+    PairNoInit() {}
+
+    template <typename First_>
+    PairNoInit(First_&& first_, NoInitTag) : first(std::forward<First_>(first_)) {}
+
+    template <typename First_, typename Second_>
+    PairNoInit(First_&& first_, Second_&& second_)
+            : first(std::forward<First_>(first_)), second(std::forward<Second_>(second_)) {}
+};
+
+template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
+struct HashMapCell {
+    using Mapped = TMapped;
+    using State = TState;
+
+    using value_type = PairNoInit<Key, Mapped>;
+    using mapped_type = Mapped;
+    using key_type = Key;
+
+    value_type value;
+
+    HashMapCell() {}
+    HashMapCell(const Key& key_, const State&) : value(key_, NoInitTag()) {}
+    HashMapCell(const value_type& value_, const State&) : value(value_) {}
+
+    const Key& get_first() const { return value.first; }
+    Mapped& get_second() { return value.second; }
+    const Mapped& get_second() const { return value.second; }
+
+    const value_type& get_value() const { return value; }
+
+    static const Key& get_key(const value_type& value) { return value.first; }
+
+    bool key_equals(const Key& key_) const { return value.first == key_; }
+    bool key_equals(const Key& key_, size_t /*hash_*/) const { return value.first == key_; }
+    bool key_equals(const Key& key_, size_t /*hash_*/, const State& /*state*/) const {
+        return value.first == key_;
+    }
+
+    void set_hash(size_t /*hash_value*/) {}
+    size_t get_hash(const Hash& hash) const { return hash(value.first); }
+
+    bool is_zero(const State& state) const { return is_zero(value.first, state); }
+    static bool is_zero(const Key& key, const State& /*state*/) { return ZeroTraits::check(key); }
+
+    /// Set the key value to zero.
+    void set_zero() { ZeroTraits::set(value.first); }
+
+    /// Do I need to store the zero key separately (that is, can a zero key be inserted into the hash table).
+    static constexpr bool need_zero_value_storage = true;
+
+    /// Whether the cell was deleted.
+    bool is_deleted() const { return false; }
+
+    void set_mapped(const value_type& value_) { value.second = value_.second; }
+};
+
+template <typename Key, typename Mapped, typename Hash, typename State>
+ALWAYS_INLINE inline auto lookup_result_get_key(HashMapCell<Key, Mapped, Hash, State>* cell) {
+    return &cell->get_first();
+}
+
+template <typename Key, typename Mapped, typename Hash, typename State>
+ALWAYS_INLINE inline auto lookup_result_get_mapped(HashMapCell<Key, Mapped, Hash, State>* cell) {
+    return &cell->get_second();
+}
+
+template <typename Key, typename TMapped, typename Hash, typename TState = HashTableNoState>
+struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState> {
+    using Base = HashMapCell<Key, TMapped, Hash, TState>;
+
+    size_t saved_hash;
+
+    using Base::Base;
+
+    bool key_equals(const Key& key_) const { return this->value.first == key_; }
+    bool key_equals(const Key& key_, size_t hash_) const {
+        return saved_hash == hash_ && this->value.first == key_;
+    }
+    bool key_equals(const Key& key_, size_t hash_, const typename Base::State&) const {
+        return key_equals(key_, hash_);
+    }
+
+    void set_hash(size_t hash_value) { saved_hash = hash_value; }
+    size_t get_hash(const Hash& /*hash_function*/) const { return saved_hash; }
+};
+
+template <typename Key, typename Mapped, typename Hash, typename State>
+ALWAYS_INLINE inline auto lookup_result_get_key(
+        HashMapCellWithSavedHash<Key, Mapped, Hash, State>* cell) {
+    return &cell->get_first();
+}
+
+template <typename Key, typename Mapped, typename Hash, typename State>
+ALWAYS_INLINE inline auto lookup_result_get_mapped(
+        HashMapCellWithSavedHash<Key, Mapped, Hash, State>* cell) {
+    return &cell->get_second();
+}
+
+template <typename Key, typename Cell, typename Hash = DefaultHash<Key>,
+          typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator>
+class HashMapTable : public HashTable<Key, Cell, Hash, Grower, Allocator> {
+public:
+    using Self = HashMapTable;
+    using Base = HashTable<Key, Cell, Hash, Grower, Allocator>;
+
+    using key_type = Key;
+    using value_type = typename Cell::value_type;
+    using mapped_type = typename Cell::Mapped;
+
+    using LookupResult = typename Base::LookupResult;
+
+    using HashTable<Key, Cell, Hash, Grower, Allocator>::HashTable;
+
+    /// Merge every cell's value of current map into the destination map via emplace.
+    ///  Func should have signature void(Mapped & dst, Mapped & src, bool emplaced).
+    ///  Each filled cell in current map will invoke func once. If that map doesn't
+    ///  have a key equals to the given cell, a new cell gets emplaced into that map,
+    ///  and func is invoked with the third argument emplaced set to true. Otherwise
+    ///  emplaced is set to false.
+    template <typename Func>
+    void ALWAYS_INLINE merge_to_via_emplace(Self& that, Func&& func) {
+        for (auto it = this->begin(), end = this->end(); it != end; ++it) {
+            typename Self::LookupResult res_it;
+            bool inserted;
+            that.emplace(it->get_first(), res_it, inserted, it.get_hash());
+            func(*lookup_result_get_mapped(res_it), it->get_second(), inserted);
+        }
+    }
+
+    /// Merge every cell's value of current map into the destination map via find.
+    ///  Func should have signature void(Mapped & dst, Mapped & src, bool exist).
+    ///  Each filled cell in current map will invoke func once. If that map doesn't
+    ///  have a key equals to the given cell, func is invoked with the third argument
+    ///  exist set to false. Otherwise exist is set to true.
+    template <typename Func>
+    void ALWAYS_INLINE merge_to_via_find(Self& that, Func&& func) {
+        for (auto it = this->begin(), end = this->end(); it != end; ++it) {
+            auto res_it = that.find(it->get_first(), it.get_hash());
+            if (!res_it)
+                func(it->get_second(), it->get_second(), false);
+            else
+                func(*lookup_result_get_mapped(res_it), it->get_second(), true);
+        }
+    }
+
+    /// Call func(const Key &, Mapped &) for each hash map element.
+    template <typename Func>
+    void for_each_value(Func&& func) {
+        for (auto& v : *this) func(v.get_first(), v.get_second());
+    }
+
+    /// Call func(Mapped &) for each hash map element.
+    template <typename Func>
+    void for_each_mapped(Func&& func) {
+        for (auto& v : *this) func(v.get_second());
+    }
+
+    size_t get_size() {
+        size_t count = 0;
+        for (auto& v : *this) {
+            count += v.get_second().get_row_count();
+        }
+        return count;
+    }
+
+    mapped_type& ALWAYS_INLINE operator[](Key x) {
+        typename HashMapTable::LookupResult it;
+        bool inserted;
+        this->emplace(x, it, inserted);
+
+        /** It may seem that initialization is not necessary for POD-types (or __has_trivial_constructor),
+          *  since the hash table memory is initially initialized with zeros.
+          * But, in fact, an empty cell may not be initialized with zeros in the following cases:
+          * - ZeroValueStorage (it only zeros the key);
+          * - after resizing and moving a part of the cells to the new half of the hash table, the old cells also have only the key to zero.
+          *
+          * On performance, there is almost always no difference, due to the fact that it->second is usually assigned immediately
+          *  after calling `operator[]`, and since `operator[]` is inlined, the compiler removes unnecessary initialization.
+          *
+          * Sometimes due to initialization, the performance even grows. This occurs in code like `++map[key]`.
+          * When we do the initialization, for new cells, it's enough to make `store 1` right away.
+          * And if we did not initialize, then even though there was zero in the cell,
+          *  the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
+          */
+        if (inserted) new (lookup_result_get_mapped(it)) mapped_type();
+
+        return *lookup_result_get_mapped(it);
+    }
+
+    char* get_null_key_data() { return nullptr; }
+    bool has_null_key_data() const { return false; }
+};
+
+template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
+          typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator>
+using HashMap = HashMapTable<Key, HashMapCell<Key, Mapped, Hash>, Hash, Grower, Allocator>;
+
+template <typename Key, typename Mapped, typename Hash = DefaultHash<Key>,
+          typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator>
+using HashMapWithSavedHash =
+        HashMapTable<Key, HashMapCellWithSavedHash<Key, Mapped, Hash>, Hash, Grower, Allocator>;
diff --git a/be/src/vec/common/hash_table/hash_set.h b/be/src/vec/common/hash_table/hash_set.h
new file mode 100644
index 0000000000..23dd606549
--- /dev/null
+++ b/be/src/vec/common/hash_table/hash_set.h
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/HashSet.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/common/hash_table/hash.h"
+#include "vec/common/hash_table/hash_table.h"
+#include "vec/common/hash_table/hash_table_allocator.h"
+
+/** NOTE HashSet could only be used for memmoveable (position independent) types.
+  * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++.
+  * Also, key must be of type, that zero bytes is compared equals to zero key.
+  */
+
+template <typename Key, typename TCell, typename Hash = DefaultHash<Key>,
+          typename Grower = HashTableGrower<>, typename Allocator = HashTableAllocator>
+class HashSetTable : public HashTable<Key, TCell, Hash, Grower, Allocator> {
+public:
+    using Self = HashSetTable;
+    using Cell = TCell;
+
+    using Base = HashTable<Key, TCell, Hash, Grower, Allocator>;
+    using typename Base::LookupResult;
+
+    void merge(const Self& rhs) {
+        if (!this->get_has_zero() && rhs.get_has_zero()) {
+            this->set_get_has_zero();
+            ++this->m_size;
+        }
+
+        for (size_t i = 0; i < rhs.grower.buf_size(); ++i)
+            if (!rhs.buf[i].is_zero(*this)) this->insert(Cell::get_key(rhs.buf[i].get_value()));
+    }
+
+};
+
+template <typename Key, typename Hash, typename TState = HashTableNoState>
+struct HashSetCellWithSavedHash : public HashTableCell<Key, Hash, TState> {
+    using Base = HashTableCell<Key, Hash, TState>;
+
+    size_t saved_hash;
+
+    HashSetCellWithSavedHash() : Base() {}
+    HashSetCellWithSavedHash(const Key& key_, const typename Base::State& state)
+            : Base(key_, state) {}
+
+    bool key_equals(const Key& key_) const { return this->key == key_; }
+    bool key_equals(const Key& key_, size_t hash_) const {
+        return saved_hash == hash_ && this->key == key_;
+    }
+    bool key_equals(const Key& key_, size_t hash_, const typename Base::State&) const {
+        return key_equals(key_, hash_);
+    }
+
+    void set_hash(size_t hash_value) { saved_hash = hash_value; }
+    size_t get_hash(const Hash& /*hash_function*/) const { return saved_hash; }
+};
+
+template <typename Key, typename Hash, typename State>
+ALWAYS_INLINE inline auto lookup_result_get_key(HashSetCellWithSavedHash<Key, Hash, State>* cell) {
+    return &cell->key;
+}
+
+template <typename Key, typename Hash, typename State>
+ALWAYS_INLINE inline void* lookup_result_get_mapped(HashSetCellWithSavedHash<Key, Hash, State>*) {
+    return nullptr;
+}
+
+template <typename Key, typename Hash = DefaultHash<Key>, typename Grower = HashTableGrower<>,
+          typename Allocator = HashTableAllocator>
+using HashSet = HashSetTable<Key, HashTableCell<Key, Hash>, Hash, Grower, Allocator>;
+
+template <typename Key, typename Hash = DefaultHash<Key>, typename Grower = HashTableGrower<>,
+          typename Allocator = HashTableAllocator>
+using HashSetWithSavedHash =
+        HashSetTable<Key, HashSetCellWithSavedHash<Key, Hash>, Hash, Grower, Allocator>;
+
+template <typename Key, typename Hash, size_t initial_size_degree>
+using HashSetWithStackMemory =
+        HashSet<Key, Hash, HashTableGrower<initial_size_degree>,
+                HashTableAllocatorWithStackMemory<(1ULL << initial_size_degree) *
+                                                  sizeof(HashTableCell<Key, Hash>)>>;
+
+template <typename Key, typename Hash, size_t initial_size_degree>
+using HashSetWithSavedHashWithStackMemory = HashSetWithSavedHash<
+        Key, Hash, HashTableGrower<initial_size_degree>,
+        HashTableAllocatorWithStackMemory<(1ULL << initial_size_degree) *
+                                          sizeof(HashSetCellWithSavedHash<Key, Hash>)>>;
\ No newline at end of file
diff --git a/be/src/vec/common/hash_table/hash_table.h b/be/src/vec/common/hash_table/hash_table.h
new file mode 100644
index 0000000000..53af823ea0
--- /dev/null
+++ b/be/src/vec/common/hash_table/hash_table.h
@@ -0,0 +1,914 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/HashTable.h
+// and modified by Doris
+
+#pragma once
+
+#include <math.h>
+#include <string.h>
+
+#include <boost/noncopyable.hpp>
+#include <utility>
+
+#include "common/status.h"
+#include "util/runtime_profile.h"
+#include "vec/common/exception.h"
+#include "vec/common/hash_table/hash_table_allocator.h"
+#include "vec/common/hash_table/hash_table_key_holder.h"
+#include "vec/core/types.h"
+#include "vec/io/io_helper.h"
+
+#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
+#include <Common/Stopwatch.h>
+
+#include <iomanip>
+#include <iostream>
+#endif
+
+/** NOTE HashTable could only be used for memmoveable (position independent) types.
+  * Example: std::string is not position independent in libstdc++ with C++11 ABI or in libc++.
+  * Also, key in hash table must be of type, that zero bytes is compared equals to zero key.
+  */
+
+/** The state of the hash table that affects the properties of its cells.
+  * Used as a template parameter.
+  * For example, there is an implementation of an instantly clearable hash table - ClearableHashMap.
+  * For it, each cell holds the version number, and in the hash table itself is the current version.
+  *  When clearing, the current version simply increases; All cells with a mismatching version are considered empty.
+  *  Another example: for an approximate calculation of the number of unique visitors, there is a hash table for UniquesHashSet.
+  *  It has the concept of "degree". At each overflow, cells with keys that do not divide by the corresponding power of the two are deleted.
+  */
+struct HashTableNoState {
+    /// Serialization, in binary and text form.
+    void write(doris::vectorized::BufferWritable&) const {}
+
+    // /// Deserialization, in binary and text form.
+    void read(doris::vectorized::BufferReadable&) {}
+};
+
+/// These functions can be overloaded for custom types.
+namespace ZeroTraits {
+
+template <typename T>
+bool check(const T x) {
+    return x == 0;
+}
+
+template <typename T>
+void set(T& x) {
+    x = 0;
+}
+
+} // namespace ZeroTraits
+
+/**
+  * lookup_result_get_key/Mapped -- functions to get key/"mapped" values from the
+  * LookupResult returned by find() and emplace() methods of HashTable.
+  * Must not be called for a null LookupResult.
+  *
+  * We don't use iterators for lookup result to avoid creating temporary
+  * objects. Instead, LookupResult is a pointer of some kind. There are global
+  * functions lookup_result_get_key/Mapped, overloaded for this pointer type, that
+  * return pointers to key/"mapped" values. They are implemented as global
+  * functions and not as methods, because they have to be overloaded for POD
+  * types, e.g. in StringHashTable where different components have different
+  * Cell format.
+  *
+  * Different hash table implementations support this interface to a varying
+  * degree:
+  *
+  * 1) Hash tables that store neither the key in its original form, nor a
+  *    "mapped" value: FixedHashTable or StringHashTable.
+  *    Neither GetKey nor GetMapped are supported, the only valid operation is
+  *    checking LookupResult for null.
+  *
+  * 2) Hash maps that do not store the key, e.g. FixedHashMap or StringHashMap.
+  *    Only GetMapped is supported.
+  *
+  * 3) Hash tables that store the key and do not have a "mapped" value, e.g. the
+  *    normal HashTable.
+  *    GetKey returns the key, and GetMapped returns a zero void pointer. This
+  *    simplifies generic code that works with mapped values: it can overload
+  *    on the return type of GetMapped(), and doesn't need other parameters. One
+  *    example is insert_set_mapped() function.
+  *
+  * 4) Hash tables that store both the key and the "mapped" value, e.g. HashMap.
+  *    Both GetKey and GetMapped are supported.
+  *
+  * The implementation side goes as follows:
+  * for (1), LookupResult = void *, no getters;
+  * for (2), LookupResult = Mapped *, GetMapped is a default implementation that
+  * takes any pointer-like object;
+  * for (3) and (4), LookupResult = Cell *, and both getters are implemented.
+  * They have to be specialized for each particular Cell class to supersede the
+  * default verision that takes a generic pointer-like object.
+  */
+struct VoidKey {};
+struct VoidMapped {
+    template <typename T>
+    auto& operator=(const T&) {
+        return *this;
+    }
+};
+
+/**
+  * The default implementation of GetMapped that is used for the above case (2).
+  */
+template <typename PointerLike>
+ALWAYS_INLINE inline auto lookup_result_get_mapped(PointerLike&& ptr) {
+    return &*ptr;
+}
+
+/**
+  * Generic const wrapper for lookup_result_get_mapped, that calls a non-const
+  * version. Should be safe, given that these functions only do pointer
+  * arithmetics.
+  */
+template <typename T>
+ALWAYS_INLINE inline auto lookup_result_get_mapped(const T* obj) {
+    auto mapped_ptr = lookup_result_get_mapped(const_cast<T*>(obj));
+    const auto const_mapped_ptr = mapped_ptr;
+    return const_mapped_ptr;
+}
+
+/** Compile-time interface for cell of the hash table.
+  * Different cell types are used to implement different hash tables.
+  * The cell must contain a key.
+  * It can also contain a value and arbitrary additional data
+  *  (example: the stored hash value; version number for ClearableHashMap).
+  */
+template <typename Key, typename Hash, typename TState = HashTableNoState>
+struct HashTableCell {
+    using State = TState;
+
+    using key_type = Key;
+    using value_type = Key;
+    using mapped_type = void;
+
+    Key key;
+
+    HashTableCell() {}
+
+    /// Create a cell with the given key / key and value.
+    HashTableCell(const Key& key_, const State&) : key(key_) {}
+
+    /// Get what the value_type of the container will be.
+    const value_type& get_value() const { return key; }
+
+    /// Get the key.
+    static const Key& get_key(const value_type& value) { return value; }
+
+    /// Are the keys at the cells equal?
+    bool key_equals(const Key& key_) const { return key == key_; }
+    bool key_equals(const Key& key_, size_t /*hash_*/) const { return key == key_; }
+    bool key_equals(const Key& key_, size_t /*hash_*/, const State& /*state*/) const {
+        return key == key_;
+    }
+
+    /// If the cell can remember the value of the hash function, then remember it.
+    void set_hash(size_t /*hash_value*/) {}
+
+    /// If the cell can store the hash value in itself, then return the stored value.
+    /// It must be at least once calculated before.
+    /// If storing the hash value is not provided, then just compute the hash.
+    size_t get_hash(const Hash& hash) const { return hash(key); }
+
+    /// Whether the key is zero. In the main buffer, cells with a zero key are considered empty.
+    /// If zero keys can be inserted into the table, then the cell for the zero key is stored separately, not in the main buffer.
+    /// Zero keys must be such that the zeroed-down piece of memory is a zero key.
+    bool is_zero(const State& state) const { return is_zero(key, state); }
+    static bool is_zero(const Key& key, const State& /*state*/) { return ZeroTraits::check(key); }
+
+    /// Set the key value to zero.
+    void set_zero() { ZeroTraits::set(key); }
+
+    /// Do the hash table need to store the zero key separately (that is, can a zero key be inserted into the hash table).
+    static constexpr bool need_zero_value_storage = true;
+
+    /// Whether the cell is deleted.
+    bool is_deleted() const { return false; }
+
+    /// Set the mapped value, if any (for HashMap), to the corresponding `value`.
+    void set_mapped(const value_type& /*value*/) {}
+
+    /// Serialization, in binary and text form.
+    void write(doris::vectorized::BufferWritable& wb) const {
+        doris::vectorized::write_binary(key, wb);
+    }
+
+    /// Deserialization, in binary and text form.
+    void read(doris::vectorized::BufferReadable& rb) { doris::vectorized::read_binary(key, rb); }
+};
+
+template <typename Key, typename Hash, typename State>
+ALWAYS_INLINE inline auto lookup_result_get_key(HashTableCell<Key, Hash, State>* cell) {
+    return &cell->key;
+}
+
+template <typename Key, typename Hash, typename State>
+ALWAYS_INLINE inline void* lookup_result_get_mapped(HashTableCell<Key, Hash, State>*) {
+    return nullptr;
+}
+
+/**
+  * A helper function for HashTable::insert() to set the "mapped" value.
+  * Overloaded on the mapped type, does nothing if it's void.
+  */
+template <typename ValueType>
+void insert_set_mapped(void* /* dest */, const ValueType& /* src */) {}
+
+template <typename MappedType, typename ValueType>
+void insert_set_mapped(MappedType* dest, const ValueType& src) {
+    *dest = src.second;
+}
+
+/** Determines the size of the hash table, and when and how much it should be resized.
+  */
+template <size_t initial_size_degree = 8>
+struct HashTableGrower {
+    /// The state of this structure is enough to get the buffer size of the hash table.
+    doris::vectorized::UInt8 size_degree = initial_size_degree;
+
+    /// The size of the hash table in the cells.
+    size_t buf_size() const { return 1ULL << size_degree; }
+
+    size_t max_fill() const { return 1ULL << (size_degree - 1); }
+    size_t mask() const { return buf_size() - 1; }
+
+    /// From the hash value, get the cell number in the hash table.
+    size_t place(size_t x) const { return x & mask(); }
+
+    /// The next cell in the collision resolution chain.
+    size_t next(size_t pos) const {
+        ++pos;
+        return pos & mask();
+    }
+
+    /// Whether the hash table is sufficiently full. You need to increase the size of the hash table, or remove something unnecessary from it.
+    bool overflow(size_t elems) const { return elems > max_fill(); }
+
+    /// Increase the size of the hash table.
+    void increase_size() { size_degree += size_degree >= 23 ? 1 : 2; }
+
+    /// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
+    void set(size_t num_elems) {
+        size_degree =
+                num_elems <= 1
+                        ? initial_size_degree
+                        : ((initial_size_degree > static_cast<size_t>(log2(num_elems - 1)) + 2)
+                                   ? initial_size_degree
+                                   : (static_cast<size_t>(log2(num_elems - 1)) + 2));
+    }
+
+    void set_buf_size(size_t buf_size_) {
+        size_degree = static_cast<size_t>(log2(buf_size_ - 1) + 1);
+    }
+};
+
+/** When used as a Grower, it turns a hash table into something like a lookup table.
+  * It remains non-optimal - the cells store the keys.
+  * Also, the compiler can not completely remove the code of passing through the collision resolution chain, although it is not needed.
+  * TODO Make a proper lookup table.
+  */
+template <size_t key_bits>
+struct HashTableFixedGrower {
+    size_t buf_size() const { return 1ULL << key_bits; }
+    size_t place(size_t x) const { return x; }
+    /// You could write __builtin_unreachable(), but the compiler does not optimize everything, and it turns out less efficiently.
+    size_t next(size_t pos) const { return pos + 1; }
+    bool overflow(size_t /*elems*/) const { return false; }
+
+    void increase_size() { __builtin_unreachable(); }
+    void set(size_t /*num_elems*/) {}
+    void set_buf_size(size_t /*buf_size_*/) {}
+};
+
+/** If you want to store the zero key separately - a place to store it. */
+template <bool need_zero_value_storage, typename Cell>
+struct ZeroValueStorage;
+
+template <typename Cell>
+struct ZeroValueStorage<true, Cell> {
+private:
+    bool has_zero = false;
+    std::aligned_storage_t<sizeof(Cell), alignof(Cell)>
+            zero_value_storage; /// Storage of element with zero key.
+
+public:
+    bool get_has_zero() const { return has_zero; }
+
+    void set_get_has_zero() {
+        has_zero = true;
+        new (zero_value()) Cell();
+    }
+
+    void clear_get_has_zero() {
+        has_zero = false;
+        zero_value()->~Cell();
+    }
+
+    Cell* zero_value() { return reinterpret_cast<Cell*>(&zero_value_storage); }
+    const Cell* zero_value() const { return reinterpret_cast<const Cell*>(&zero_value_storage); }
+};
+
+template <typename Cell>
+struct ZeroValueStorage<false, Cell> {
+    bool get_has_zero() const { return false; }
+    void set_get_has_zero() {
+        throw doris::vectorized::Exception("HashTable: logical error",
+                                           doris::TStatusCode::VEC_LOGIC_ERROR);
+    }
+    void clear_get_has_zero() {}
+
+    Cell* zero_value() { return nullptr; }
+    const Cell* zero_value() const { return nullptr; }
+};
+
+template <typename Key, typename Cell, typename Hash, typename Grower, typename Allocator>
+class HashTable : private boost::noncopyable,
+                  protected Hash,
+                  protected Allocator,
+                  protected Cell::State,
+                  protected ZeroValueStorage<Cell::need_zero_value_storage,
+                                             Cell> /// empty base optimization
+{
+protected:
+    friend class const_iterator;
+    friend class iterator;
+    friend class Reader;
+
+    template <typename, typename, typename, typename, typename, typename, size_t>
+    friend class TwoLevelHashTable;
+
+    using HashValue = size_t;
+    using Self = HashTable;
+    using cell_type = Cell;
+
+    size_t m_size = 0; /// Amount of elements
+    Cell* buf;         /// A piece of memory for all elements except the element with zero key.
+    Grower grower;
+    int64_t _resize_timer_ns;
+
+    //factor that will trigger growing the hash table on insert.
+    static constexpr float MAX_BUCKET_OCCUPANCY_FRACTION = 0.5f;
+
+#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
+    mutable size_t collisions = 0;
+#endif
+
+    /// Find a cell with the same key or an empty cell, starting from the specified position and further along the collision resolution chain.
+    size_t ALWAYS_INLINE find_cell(const Key& x, size_t hash_value, size_t place_value) const {
+        while (!buf[place_value].is_zero(*this) &&
+               !buf[place_value].key_equals(x, hash_value, *this)) {
+            place_value = grower.next(place_value);
+#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
+            ++collisions;
+#endif
+        }
+
+        return place_value;
+    }
+
+    std::pair<bool, size_t> ALWAYS_INLINE find_cell_opt(const Key& x, size_t hash_value, size_t place_value) const {
+        bool is_zero = false;
+        do {
+            is_zero = buf[place_value].is_zero(*this);
+            if (is_zero || buf[place_value].key_equals(x, hash_value, *this)) break;
+            place_value = grower.next(place_value);
+        } while (true);
+
+        return {is_zero, place_value};
+    }
+
+    /// Find an empty cell, starting with the specified position and further along the collision resolution chain.
+    size_t ALWAYS_INLINE find_empty_cell(size_t place_value) const {
+        while (!buf[place_value].is_zero(*this)) {
+            place_value = grower.next(place_value);
+#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
+            ++collisions;
+#endif
+        }
+
+        return place_value;
+    }
+
+    void alloc(const Grower& new_grower) {
+        buf = reinterpret_cast<Cell*>(Allocator::alloc(new_grower.buf_size() * sizeof(Cell)));
+        grower = new_grower;
+    }
+
+    void free() {
+        if (buf) {
+            Allocator::free(buf, get_buffer_size_in_bytes());
+            buf = nullptr;
+        }
+    }
+
+    /// Increase the size of the buffer.
+    void resize(size_t for_num_elems = 0, size_t for_buf_size = 0) {
+        SCOPED_RAW_TIMER(&_resize_timer_ns);
+#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
+        Stopwatch watch;
+#endif
+
+        size_t old_size = grower.buf_size();
+
+        /** In case of exception for the object to remain in the correct state,
+          *  changing the variable `grower` (which determines the buffer size of the hash table)
+          *  is postponed for a moment after a real buffer change.
+          * The temporary variable `new_grower` is used to determine the new size.
+          */
+        Grower new_grower = grower;
+        if (for_num_elems) {
+            new_grower.set(for_num_elems);
+            if (new_grower.buf_size() <= old_size) return;
+        } else if (for_buf_size) {
+            new_grower.set_buf_size(for_buf_size);
+            if (new_grower.buf_size() <= old_size) return;
+        } else
+            new_grower.increase_size();
+
+        /// Expand the space.
+        buf = reinterpret_cast<Cell*>(Allocator::realloc(buf, get_buffer_size_in_bytes(),
+                                                         new_grower.buf_size() * sizeof(Cell)));
+        grower = new_grower;
+
+        /** Now some items may need to be moved to a new location.
+          * The element can stay in place, or move to a new location "on the right",
+          *  or move to the left of the collision resolution chain, because the elements to the left of it have been moved to the new "right" location.
+          */
+        size_t i = 0;
+        for (; i < old_size; ++i)
+            if (!buf[i].is_zero(*this) && !buf[i].is_deleted())
+                reinsert(buf[i], buf[i].get_hash(*this));
+
+        /** There is also a special case:
+          *    if the element was to be at the end of the old buffer,                  [        x]
+          *    but is at the beginning because of the collision resolution chain,      [o       x]
+          *    then after resizing, it will first be out of place again,               [        xo        ]
+          *    and in order to transfer it where necessary,
+          *    after transferring all the elements from the old halves you need to     [         o   x    ]
+          *    process tail from the collision resolution chain immediately after it   [        o    x    ]
+          */
+        for (; !buf[i].is_zero(*this) && !buf[i].is_deleted(); ++i)
+            reinsert(buf[i], buf[i].get_hash(*this));
+
+#ifdef DBMS_HASH_MAP_DEBUG_RESIZES
+        watch.stop();
+        std::cerr << std::fixed << std::setprecision(3) << "Resize from " << old_size << " to "
+                  << grower.buf_size() << " took " << watch.elapsedSeconds() << " sec."
+                  << std::endl;
+#endif
+    }
+
+    /** Paste into the new buffer the value that was in the old buffer.
+      * Used when increasing the buffer size.
+      */
+    void reinsert(Cell& x, size_t hash_value) {
+        size_t place_value = grower.place(hash_value);
+
+        /// If the element is in its place.
+        if (&x == &buf[place_value]) return;
+
+        /// Compute a new location, taking into account the collision resolution chain.
+        place_value = find_cell(Cell::get_key(x.get_value()), hash_value, place_value);
+
+        /// If the item remains in its place in the old collision resolution chain.
+        if (!buf[place_value].is_zero(*this)) return;
+
+        /// Copy to a new location and zero the old one.
+        x.set_hash(hash_value);
+        memcpy(static_cast<void*>(&buf[place_value]), &x, sizeof(x));
+        x.set_zero();
+
+        /// Then the elements that previously were in collision with this can move to the old place.
+    }
+
+    void destroy_elements() {
+        if (!std::is_trivially_destructible_v<Cell>)
+            for (iterator it = begin(), it_end = end(); it != it_end; ++it) it.ptr->~Cell();
+    }
+
+    template <typename Derived, bool is_const>
+    class iterator_base {
+        using Container = std::conditional_t<is_const, const Self, Self>;
+        using cell_type = std::conditional_t<is_const, const Cell, Cell>;
+
+        Container* container;
+        cell_type* ptr;
+
+        friend class HashTable;
+
+    public:
+        iterator_base() {}
+        iterator_base(Container* container_, cell_type* ptr_) : container(container_), ptr(ptr_) {}
+
+        bool operator==(const iterator_base& rhs) const { return ptr == rhs.ptr; }
+        bool operator!=(const iterator_base& rhs) const { return ptr != rhs.ptr; }
+
+        Derived& operator++() {
+            /// If iterator was pointed to ZeroValueStorage, move it to the beginning of the main buffer.
+            if (UNLIKELY(ptr->is_zero(*container)))
+                ptr = container->buf;
+            else
+                ++ptr;
+
+            /// Skip empty cells in the main buffer.
+            auto buf_end = container->buf + container->grower.buf_size();
+            while (ptr < buf_end && ptr->is_zero(*container)) ++ptr;
+
+            return static_cast<Derived&>(*this);
+        }
+
+        auto& operator*() const { return *ptr; }
+        auto* operator->() const { return ptr; }
+
+        auto get_ptr() const { return ptr; }
+        size_t get_hash() const { return ptr->get_hash(*container); }
+
+        size_t get_collision_chain_length() const {
+            return container->grower.place((ptr - container->buf) -
+                                           container->grower.place(get_hash()));
+        }
+
+        /**
+          * A hack for HashedDictionary.
+          *
+          * The problem: std-like find() returns an iterator, which has to be
+          * compared to end(). On the other hand, HashMap::find() returns
+          * LookupResult, which is compared to nullptr. HashedDictionary has to
+          * support both hash maps with the same code, hence the need for this
+          * hack.
+          *
+          * The proper way would be to remove iterator interface from our
+          * HashMap completely, change all its users to the existing internal
+          * iteration interface, and redefine end() to return LookupResult for
+          * compatibility with std find(). Unfortunately, now is not the time to
+          * do this.
+          */
+        operator Cell*() const { return nullptr; }
+    };
+
+public:
+    using key_type = Key;
+    using value_type = typename Cell::value_type;
+
+    // Use lookup_result_get_mapped/Key to work with these values.
+    using LookupResult = Cell*;
+    using ConstLookupResult = const Cell*;
+
+    void reset_resize_timer() { _resize_timer_ns = 0; }
+    int64_t get_resize_timer_value() const { return _resize_timer_ns; }
+
+    size_t hash(const Key& x) const { return Hash::operator()(x); }
+
+    HashTable() {
+        if (Cell::need_zero_value_storage) this->zero_value()->set_zero();
+        alloc(grower);
+    }
+
+    HashTable(size_t reserve_for_num_elements) {
+        if (Cell::need_zero_value_storage) this->zero_value()->set_zero();
+        grower.set(reserve_for_num_elements);
+        alloc(grower);
+    }
+
+    HashTable(HashTable&& rhs) : buf(nullptr) { *this = std::move(rhs); }
+
+    ~HashTable() {
+        destroy_elements();
+        free();
+    }
+
+    HashTable& operator=(HashTable&& rhs) {
+        destroy_elements();
+        free();
+
+        std::swap(buf, rhs.buf);
+        std::swap(m_size, rhs.m_size);
+        std::swap(grower, rhs.grower);
+
+        Hash::operator=(std::move(rhs));
+        Allocator::operator=(std::move(rhs));
+        Cell::State::operator=(std::move(rhs));
+        ZeroValueStorage<Cell::need_zero_value_storage, Cell>::operator=(std::move(rhs));
+
+        return *this;
+    }
+
+    class iterator : public iterator_base<iterator, false> {
+    public:
+        using iterator_base<iterator, false>::iterator_base;
+    };
+
+    class const_iterator : public iterator_base<const_iterator, true> {
+    public:
+        using iterator_base<const_iterator, true>::iterator_base;
+    };
+
+    const_iterator begin() const {
+        if (!buf) return end();
+
+        if (this->get_has_zero()) return iterator_to_zero();
+
+        const Cell* ptr = buf;
+        auto buf_end = buf + grower.buf_size();
+        while (ptr < buf_end && ptr->is_zero(*this)) ++ptr;
+
+        return const_iterator(this, ptr);
+    }
+
+    const_iterator cbegin() const { return begin(); }
+
+    iterator begin() {
+        if (!buf) return end();
+
+        if (this->get_has_zero()) return iterator_to_zero();
+
+        Cell* ptr = buf;
+        auto buf_end = buf + grower.buf_size();
+        while (ptr < buf_end && ptr->is_zero(*this)) ++ptr;
+
+        return iterator(this, ptr);
+    }
+
+    const_iterator end() const { return const_iterator(this, buf + grower.buf_size()); }
+    const_iterator cend() const { return end(); }
+    iterator end() { return iterator(this, buf + grower.buf_size()); }
+
+protected:
+    const_iterator iterator_to(const Cell* ptr) const { return const_iterator(this, ptr); }
+    iterator iterator_to(Cell* ptr) { return iterator(this, ptr); }
+    const_iterator iterator_to_zero() const { return iterator_to(this->zero_value()); }
+    iterator iterator_to_zero() { return iterator_to(this->zero_value()); }
+
+    /// If the key is zero, insert it into a special place and return true.
+    /// We don't have to persist a zero key, because it's not actually inserted.
+    /// That's why we just take a Key by value, an not a key holder.
+    bool ALWAYS_INLINE emplace_if_zero(Key x, LookupResult& it, bool& inserted, size_t hash_value) {
+        /// If it is claimed that the zero key can not be inserted into the table.
+        if (!Cell::need_zero_value_storage) return false;
+
+        if (Cell::is_zero(x, *this)) {
+            it = this->zero_value();
+
+            if (!this->get_has_zero()) {
+                ++m_size;
+                this->set_get_has_zero();
+                this->zero_value()->set_hash(hash_value);
+                inserted = true;
+            } else
+                inserted = false;
+
+            return true;
+        }
+
+        return false;
+    }
+
+    template <typename KeyHolder>
+    void ALWAYS_INLINE emplace_non_zero_impl(size_t place_value, KeyHolder&& key_holder,
+                                             LookupResult& it, bool& inserted, size_t hash_value) {
+        it = &buf[place_value];
+
+        if (!buf[place_value].is_zero(*this)) {
+            key_holder_discard_key(key_holder);
+            inserted = false;
+            return;
+        }
+
+        key_holder_persist_key(key_holder);
+        const auto& key = key_holder_get_key(key_holder);
+
+        new (&buf[place_value]) Cell(key, *this);
+        buf[place_value].set_hash(hash_value);
+        inserted = true;
+        ++m_size;
+
+        if (UNLIKELY(grower.overflow(m_size))) {
+            try {
+                resize();
+            } catch (...) {
+                /** If we have not resized successfully, then there will be problems.
+                  * There remains a key, but uninitialized mapped-value,
+                  *  which, perhaps, can not even be called a destructor.
+                  */
+                --m_size;
+                buf[place_value].set_zero();
+                throw;
+            }
+
+            // The hash table was rehashed, so we have to re-find the key.
+            size_t new_place = find_cell(key, hash_value, grower.place(hash_value));
+            assert(!buf[new_place].is_zero(*this));
+            it = &buf[new_place];
+        }
+    }
+
+    /// Only for non-zero keys. Find the right place, insert the key there, if it does not already exist. Set iterator to the cell in output parameter.
+    template <typename KeyHolder>
+    void ALWAYS_INLINE emplace_non_zero(KeyHolder&& key_holder, LookupResult& it, bool& inserted,
+                                        size_t hash_value) {
+        const auto& key = key_holder_get_key(key_holder);
+        size_t place_value = find_cell(key, hash_value, grower.place(hash_value));
+        emplace_non_zero_impl(place_value, key_holder, it, inserted, hash_value);
+    }
+
+public:
+    /// Insert a value. In the case of any more complex values, it is better to use the `emplace` function.
+    std::pair<LookupResult, bool> ALWAYS_INLINE insert(const value_type& x) {
+        std::pair<LookupResult, bool> res;
+
+        size_t hash_value = hash(Cell::get_key(x));
+        if (!emplace_if_zero(Cell::get_key(x), res.first, res.second, hash_value)) {
+            emplace_non_zero(Cell::get_key(x), res.first, res.second, hash_value);
+        }
+
+        if (res.second) insert_set_mapped(lookup_result_get_mapped(res.first), x);
+
+        return res;
+    }
+
+    template <typename KeyHolder>
+    void ALWAYS_INLINE prefetch(KeyHolder& key_holder) {
+        const auto& key = key_holder_get_key(key_holder);
+        auto hash_value = hash(key);
+        auto place_value = grower.place(hash_value);
+        __builtin_prefetch(&buf[place_value]);
+    }
+
+    /// Reinsert node pointed to by iterator
+    void ALWAYS_INLINE reinsert(iterator& it, size_t hash_value) {
+        reinsert(*it.get_ptr(), hash_value);
+    }
+
+    /** Insert the key.
+      * Return values:
+      * 'it' -- a LookupResult pointing to the corresponding key/mapped pair.
+      * 'inserted' -- whether a new key was inserted.
+      *
+      * You have to make `placement new` of value if you inserted a new key,
+      * since when destroying a hash table, it will call the destructor!
+      *
+      * Example usage:
+      *
+      * Map::iterator it;
+      * bool inserted;
+      * map.emplace(key, it, inserted);
+      * if (inserted)
+      *     new(&it->second) Mapped(value);
+      */
+    template <typename KeyHolder>
+    void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, bool& inserted) {
+        const auto& key = key_holder_get_key(key_holder);
+        emplace(key_holder, it, inserted, hash(key));
+    }
+
+    template <typename KeyHolder>
+    void ALWAYS_INLINE emplace(KeyHolder&& key_holder, LookupResult& it, bool& inserted,
+                               size_t hash_value) {
+        const auto& key = key_holder_get_key(key_holder);
+        if (!emplace_if_zero(key, it, inserted, hash_value))
+            emplace_non_zero(key_holder, it, inserted, hash_value);
+    }
+
+    /// Copy the cell from another hash table. It is assumed that the cell is not zero, and also that there was no such key in the table yet.
+    void ALWAYS_INLINE insert_unique_non_zero(const Cell* cell, size_t hash_value) {
+        size_t place_value = find_empty_cell(grower.place(hash_value));
+
+        memcpy(static_cast<void*>(&buf[place_value]), cell, sizeof(*cell));
+        ++m_size;
+
+        if (UNLIKELY(grower.overflow(m_size))) resize();
+    }
+
+    LookupResult ALWAYS_INLINE find(Key x) {
+        if (Cell::is_zero(x, *this)) return this->get_has_zero() ? this->zero_value() : nullptr;
+
+        size_t hash_value = hash(x);
+        auto [is_zero, place_value] = find_cell_opt(x, hash_value, grower.place(hash_value));
+        return !is_zero ? &buf[place_value] : nullptr;
+    }
+
+    ConstLookupResult ALWAYS_INLINE find(Key x) const {
+        return const_cast<std::decay_t<decltype(*this)>*>(this)->find(x);
+    }
+
+    LookupResult ALWAYS_INLINE find(Key x, size_t hash_value) {
+        if (Cell::is_zero(x, *this)) return this->get_has_zero() ? this->zero_value() : nullptr;
+
+        size_t place_value = find_cell(x, hash_value, grower.place(hash_value));
+        return !buf[place_value].is_zero(*this) ? &buf[place_value] : nullptr;
+    }
+
+    bool ALWAYS_INLINE has(Key x) const {
+        if (Cell::is_zero(x, *this)) return this->get_has_zero();
+
+        size_t hash_value = hash(x);
+        size_t place_value = find_cell(x, hash_value, grower.place(hash_value));
+        return !buf[place_value].is_zero(*this);
+    }
+
+    bool ALWAYS_INLINE has(Key x, size_t hash_value) const {
+        if (Cell::is_zero(x, *this)) return this->get_has_zero();
+
+        size_t place_value = find_cell(x, hash_value, grower.place(hash_value));
+        return !buf[place_value].is_zero(*this);
+    }
+
+    void write(doris::vectorized::BufferWritable& wb) const {
+        Cell::State::write(wb);
+        doris::vectorized::write_var_uint(m_size, wb);
+
+        if (this->get_has_zero()) this->zero_value()->write(wb);
+
+        for (auto ptr = buf, buf_end = buf + grower.buf_size(); ptr < buf_end; ++ptr)
+            if (!ptr->is_zero(*this)) ptr->write(wb);
+    }
+
+    void read(doris::vectorized::BufferReadable& rb) {
+        Cell::State::read(rb);
+
+        destroy_elements();
+        this->clear_get_has_zero();
+        m_size = 0;
+
+        size_t new_size = 0;
+        doris::vectorized::read_var_uint(new_size, rb);
+
+        free();
+        Grower new_grower = grower;
+        new_grower.set(new_size);
+        alloc(new_grower);
+
+        for (size_t i = 0; i < new_size; ++i) {
+            Cell x;
+            x.read(rb);
+            insert(Cell::get_key(x.get_value()));
+        }
+    }
+
+    size_t size() const { return m_size; }
+
+    bool empty() const { return 0 == m_size; }
+
+    float get_factor() const { return MAX_BUCKET_OCCUPANCY_FRACTION; }
+
+    bool should_be_shrink(int64_t valid_row) {
+        return valid_row < get_factor() * (size() / 2.0);
+    }
+
+    void init_buf_size(size_t reserve_for_num_elements) {
+        free();
+        grower.set(reserve_for_num_elements);
+        alloc(grower);
+    }
+
+    void delete_zero_key(Key key) {
+        if (Cell::is_zero(key, *this))
+             this->clear_get_has_zero();
+    }
+    void clear() {
+        destroy_elements();
+        this->clear_get_has_zero();
+        m_size = 0;
+
+        memset(static_cast<void*>(buf), 0, grower.buf_size() * sizeof(*buf));
+    }
+
+    /// After executing this function, the table can only be destroyed,
+    ///  and also you can use the methods `size`, `empty`, `begin`, `end`.
+    void clear_and_shrink() {
+        destroy_elements();
+        this->clear_get_has_zero();
+        m_size = 0;
+        free();
+    }
+
+    size_t get_buffer_size_in_bytes() const { return grower.buf_size() * sizeof(Cell); }
+
+    size_t get_buffer_size_in_cells() const { return grower.buf_size(); }
+
+    bool add_elem_size_overflow(size_t add_size) const {
+        return grower.overflow(add_size + m_size);
+    }
+#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
+    size_t getCollisions() const { return collisions; }
+#endif
+};
diff --git a/be/src/vec/common/hash_table/hash_table_allocator.h b/be/src/vec/common/hash_table/hash_table_allocator.h
new file mode 100644
index 0000000000..77a58ce14b
--- /dev/null
+++ b/be/src/vec/common/hash_table/hash_table_allocator.h
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/HashTableAllocator.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/common/allocator.h"
+
+/**
+  * We are going to use the entire memory we allocated when resizing a hash
+  * table, so it makes sense to pre-fault the pages so that page faults don't
+  * interrupt the resize loop. Set the allocator parameter accordingly.
+  */
+using HashTableAllocator = Allocator<true /* clear_memory */, true /* mmap_populate */>;
+
+template <size_t N = 64>
+using HashTableAllocatorWithStackMemory = AllocatorWithStackMemory<HashTableAllocator, N>;
diff --git a/be/src/vec/common/hash_table/hash_table_key_holder.h b/be/src/vec/common/hash_table/hash_table_key_holder.h
new file mode 100644
index 0000000000..eb507d16a5
--- /dev/null
+++ b/be/src/vec/common/hash_table/hash_table_key_holder.h
@@ -0,0 +1,139 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/HashTable/HashTableKeyHolder.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/common/arena.h"
+#include "vec/common/string_ref.h"
+
+/**
+  * In some aggregation scenarios, when adding a key to the hash table, we
+  * start with a temporary key object, and if it turns out to be a new key,
+  * we must make it persistent (e.g. copy to an Arena) and use the resulting
+  * persistent object as hash table key. This happens only for StringRef keys,
+  * because other key types are stored by value, but StringRef is a pointer-like
+  * type: the actual data are stored elsewhere. Even for StringRef, we don't
+  * make a persistent copy of the key in each of the following cases:
+  * 1) the aggregation method doesn't use temporary keys, so they're persistent
+  *    from the start;
+  * 1) the key is already present in the hash table;
+  * 3) that particular key is stored by value, e.g. a short StringRef key in
+  *    StringHashMap.
+  *
+  * In the past, the caller was responsible for making the key persistent after
+  * in was inserted. emplace() returned whether the key is new or not, so the
+  * caller only stored new keys (this is case (2) from the above list). However,
+  * now we are adding a compound hash table for StringRef keys, so case (3)
+  * appears. The decision about persistence now depends on some properties of
+  * the key, and the logic of this decision is tied to the particular hash table
+  * implementation. This means that the hash table user now doesn't have enough
+  * data and logic to make this decision by itself.
+  *
+  * To support these new requirements, we now manage key persistence by passing
+  * a special key holder to emplace(), which has the functions to make the key
+  * persistent or to discard it. emplace() then calls these functions at the
+  * appropriate moments.
+  *
+  * This approach has the following benefits:
+  * - no extra runtime branches in the caller to make the key persistent.
+  * - no additional data is stored in the hash table itself, which is important
+  *   when it's used in aggregate function states.
+  * - no overhead when the key memory management isn't needed: we just pass the
+  *   bare key without any wrapper to emplace(), and the default callbacks do
+  *   nothing.
+  *
+  * This file defines the default key persistence functions, as well as two
+  * different key holders and corresponding functions for storing StringRef
+  * keys to Arena.
+  */
+
+/**
+  * Returns the key. Can return the temporary key initially.
+  * After the call to key_holder_persist_key(), must return the persistent key.
+  */
+template <typename Key>
+inline Key& ALWAYS_INLINE key_holder_get_key(Key&& key) {
+    return key;
+}
+
+/**
+  * Make the key persistent. key_holder_get_key() must return the persistent key
+  * after this call.
+  */
+template <typename Key>
+inline void ALWAYS_INLINE key_holder_persist_key(Key&&) {}
+
+/**
+  * Discard the key. Calling key_holder_get_key() is ill-defined after this.
+  */
+template <typename Key>
+inline void ALWAYS_INLINE key_holder_discard_key(Key&&) {}
+
+namespace doris::vectorized {
+
+/**
+  * ArenaKeyHolder is a key holder for hash tables that serializes a StringRef
+  * key to an Arena.
+  */
+struct ArenaKeyHolder {
+    StringRef key;
+    Arena& pool;
+};
+
+} // namespace doris::vectorized
+
+inline StringRef& ALWAYS_INLINE key_holder_get_key(doris::vectorized::ArenaKeyHolder& holder) {
+    return holder.key;
+}
+
+inline void ALWAYS_INLINE key_holder_persist_key(doris::vectorized::ArenaKeyHolder& holder) {
+    // Hash table shouldn't ask us to persist a zero key
+    assert(holder.key.size > 0);
+    holder.key.data = holder.pool.insert(holder.key.data, holder.key.size);
+}
+
+inline void ALWAYS_INLINE key_holder_discard_key(doris::vectorized::ArenaKeyHolder&) {}
+
+namespace doris::vectorized {
+
+/**
+  * SerializedKeyHolder is a key holder for a StringRef key that is already
+  * serialized to an Arena. The key must be the last allocation in this Arena,
+  * and is discarded by rolling back the allocation.
+  */
+struct SerializedKeyHolder {
+    StringRef key;
+    Arena& pool;
+};
+
+} // namespace doris::vectorized
+
+inline StringRef& ALWAYS_INLINE key_holder_get_key(doris::vectorized::SerializedKeyHolder& holder) {
+    return holder.key;
+}
+
+inline void ALWAYS_INLINE key_holder_persist_key(doris::vectorized::SerializedKeyHolder&) {}
+
+inline void ALWAYS_INLINE key_holder_discard_key(doris::vectorized::SerializedKeyHolder& holder) {
+    [[maybe_unused]] void* new_head = holder.pool.rollback(holder.key.size);
+    assert(new_head == holder.key.data);
+    holder.key.data = nullptr;
+    holder.key.size = 0;
+}
diff --git a/be/src/vec/common/int_exp.h b/be/src/vec/common/int_exp.h
new file mode 100644
index 0000000000..440366ac16
--- /dev/null
+++ b/be/src/vec/common/int_exp.h
@@ -0,0 +1,135 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/IntExp.h
+// and modified by Doris
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+
+/// On overlow, the function returns unspecified value.
+
+inline uint64_t int_exp2(int x) {
+    return 1ULL << x;
+}
+
+inline uint64_t int_exp10(int x) {
+    if (x < 0) return 0;
+    if (x > 19) return std::numeric_limits<uint64_t>::max();
+
+    static const uint64_t table[20] = {1ULL,
+                                       10ULL,
+                                       100ULL,
+                                       1000ULL,
+                                       10000ULL,
+                                       100000ULL,
+                                       1000000ULL,
+                                       10000000ULL,
+                                       100000000ULL,
+                                       1000000000ULL,
+                                       10000000000ULL,
+                                       100000000000ULL,
+                                       1000000000000ULL,
+                                       10000000000000ULL,
+                                       100000000000000ULL,
+                                       1000000000000000ULL,
+                                       10000000000000000ULL,
+                                       100000000000000000ULL,
+                                       1000000000000000000ULL,
+                                       10000000000000000000ULL};
+
+    return table[x];
+}
+
+namespace common {
+
+inline int exp10_i32(int x) {
+    static const int values[] = {1,      10,      100,      1000,      10000,
+                                 100000, 1000000, 10000000, 100000000, 1000000000};
+    return values[x];
+}
+
+inline int64_t exp10_i64(int x) {
+    static const int64_t values[] = {1ll,
+                                     10ll,
+                                     100ll,
+                                     1000ll,
+                                     10000ll,
+                                     100000ll,
+                                     1000000ll,
+                                     10000000ll,
+                                     100000000ll,
+                                     1000000000ll,
+                                     10000000000ll,
+                                     100000000000ll,
+                                     1000000000000ll,
+                                     10000000000000ll,
+                                     100000000000000ll,
+                                     1000000000000000ll,
+                                     10000000000000000ll,
+                                     100000000000000000ll,
+                                     1000000000000000000ll};
+    return values[x];
+}
+
+inline __int128 exp10_i128(int x) {
+    static const __int128 values[] = {
+            static_cast<__int128>(1ll),
+            static_cast<__int128>(10ll),
+            static_cast<__int128>(100ll),
+            static_cast<__int128>(1000ll),
+            static_cast<__int128>(10000ll),
+            static_cast<__int128>(100000ll),
+            static_cast<__int128>(1000000ll),
+            static_cast<__int128>(10000000ll),
+            static_cast<__int128>(100000000ll),
+            static_cast<__int128>(1000000000ll),
+            static_cast<__int128>(10000000000ll),
+            static_cast<__int128>(100000000000ll),
+            static_cast<__int128>(1000000000000ll),
+            static_cast<__int128>(10000000000000ll),
+            static_cast<__int128>(100000000000000ll),
+            static_cast<__int128>(1000000000000000ll),
+            static_cast<__int128>(10000000000000000ll),
+            static_cast<__int128>(100000000000000000ll),
+            static_cast<__int128>(1000000000000000000ll),
+            static_cast<__int128>(1000000000000000000ll) * 10ll,
+            static_cast<__int128>(1000000000000000000ll) * 100ll,
+            static_cast<__int128>(1000000000000000000ll) * 1000ll,
+            static_cast<__int128>(1000000000000000000ll) * 10000ll,
+            static_cast<__int128>(1000000000000000000ll) * 100000ll,
+            static_cast<__int128>(1000000000000000000ll) * 1000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 10000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 100000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 1000000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 10000000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 100000000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 1000000000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 10000000000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 100000000000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 1000000000000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 10000000000000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll,
+            static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll * 10ll,
+            static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll * 100ll,
+            static_cast<__int128>(1000000000000000000ll) * 100000000000000000ll * 1000ll};
+    return values[x];
+}
+
+} // namespace common
diff --git a/be/src/vec/common/memcmp_small.h b/be/src/vec/common/memcmp_small.h
new file mode 100644
index 0000000000..d21af0568b
--- /dev/null
+++ b/be/src/vec/common/memcmp_small.h
@@ -0,0 +1,228 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/MemcmpSmall.h
+// and modified by Doris
+
+#pragma once
+
+#include <algorithm>
+#include <cstdint>
+
+namespace detail {
+
+template <typename T>
+inline int cmp(T a, T b) {
+    if (a < b) return -1;
+    if (a > b) return 1;
+    return 0;
+}
+
+} // namespace detail
+
+/// We can process uninitialized memory in the functions below.
+/// Results don't depend on the values inside uninitialized memory but Memory Sanitizer cannot see it.
+/// Disable optimized functions if compile with Memory Sanitizer.
+
+#if defined(__SSE2__) && !defined(MEMORY_SANITIZER)
+#include <emmintrin.h>
+
+/** All functions works under the following assumptions:
+  * - it's possible to read up to 15 excessive bytes after end of 'a' and 'b' region;
+  * - memory regions are relatively small and extra loop unrolling is not worth to do.
+  */
+
+/** Variant when memory regions may have different sizes.
+  */
+template <typename Char>
+inline int memcmp_small_allow_overflow15(const Char* a, size_t a_size, const Char* b, size_t b_size) {
+    size_t min_size = std::min(a_size, b_size);
+
+    for (size_t offset = 0; offset < min_size; offset += 16) {
+        uint16_t mask = _mm_movemask_epi8(
+                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
+                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset))));
+        mask = ~mask;
+
+        if (mask) {
+            offset += __builtin_ctz(mask);
+
+            if (offset >= min_size) break;
+
+            return detail::cmp(a[offset], b[offset]);
+        }
+    }
+
+    return detail::cmp(a_size, b_size);
+}
+
+/** Variant when memory regions have same size.
+  * TODO Check if the compiler can optimize previous function when the caller pass identical sizes.
+  */
+template <typename Char>
+inline int memcmp_small_allow_overflow15(const Char* a, const Char* b, size_t size) {
+    for (size_t offset = 0; offset < size; offset += 16) {
+        uint16_t mask = _mm_movemask_epi8(
+                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
+                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset))));
+        mask = ~mask;
+
+        if (mask) {
+            offset += __builtin_ctz(mask);
+
+            if (offset >= size) return 0;
+
+            return detail::cmp(a[offset], b[offset]);
+        }
+    }
+
+    return 0;
+}
+
+/** Compare memory regions for equality.
+  */
+template <typename Char>
+inline bool memequal_small_allow_overflow15(const Char* a, size_t a_size, const Char* b,
+                                         size_t b_size) {
+    if (a_size != b_size) return false;
+
+    for (size_t offset = 0; offset < a_size; offset += 16) {
+        uint16_t mask = _mm_movemask_epi8(
+                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
+                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset))));
+        mask = ~mask;
+
+        if (mask) {
+            offset += __builtin_ctz(mask);
+            return offset >= a_size;
+        }
+    }
+
+    return true;
+}
+
+/** Variant when the caller know in advance that the size is a multiple of 16.
+  */
+template <typename Char>
+inline int memcmp_small_multiple_of16(const Char* a, const Char* b, size_t size) {
+    for (size_t offset = 0; offset < size; offset += 16) {
+        uint16_t mask = _mm_movemask_epi8(
+                _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a + offset)),
+                               _mm_loadu_si128(reinterpret_cast<const __m128i*>(b + offset))));
+        mask = ~mask;
+
+        if (mask) {
+            offset += __builtin_ctz(mask);
+            return detail::cmp(a[offset], b[offset]);
+        }
+    }
+
+    return 0;
+}
+
+/** Variant when the size is 16 exactly.
+  */
+template <typename Char>
+inline int memcmp16(const Char* a, const Char* b) {
+    uint16_t mask =
+            _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
+                                             _mm_loadu_si128(reinterpret_cast<const __m128i*>(b))));
+    mask = ~mask;
+
+    if (mask) {
+        auto offset = __builtin_ctz(mask);
+        return detail::cmp(a[offset], b[offset]);
+    }
+
+    return 0;
+}
+
+/** Variant when the size is 16 exactly.
+  */
+inline bool memequal16(const void* a, const void* b) {
+    return 0xFFFF ==
+           _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(a)),
+                                            _mm_loadu_si128(reinterpret_cast<const __m128i*>(b))));
+}
+
+/** Compare memory region to zero */
+inline bool memory_is_zero_small_allow_overflow15(const void* data, size_t size) {
+    const __m128i zero16 = _mm_setzero_si128();
+
+    for (size_t offset = 0; offset < size; offset += 16) {
+        uint16_t mask = _mm_movemask_epi8(
+                _mm_cmpeq_epi8(zero16, _mm_loadu_si128(reinterpret_cast<const __m128i*>(
+                                               reinterpret_cast<const char*>(data) + offset))));
+        mask = ~mask;
+
+        if (mask) {
+            offset += __builtin_ctz(mask);
+            return offset >= size;
+        }
+    }
+
+    return true;
+}
+
+#else
+
+#include <cstring>
+
+template <typename Char>
+inline int memcmp_small_allow_overflow15(const Char* a, size_t a_size, const Char* b, size_t b_size) {
+    if (auto res = memcmp(a, b, std::min(a_size, b_size)))
+        return res;
+    else
+        return detail::cmp(a_size, b_size);
+}
+
+template <typename Char>
+inline int memcmp_small_allow_overflow15(const Char* a, const Char* b, size_t size) {
+    return memcmp(a, b, size);
+}
+
+template <typename Char>
+inline bool memequal_small_allow_overflow15(const Char* a, size_t a_size, const Char* b,
+                                         size_t b_size) {
+    return a_size == b_size && 0 == memcmp(a, b, a_size);
+}
+
+template <typename Char>
+inline int memcmp_small_multiple_of16(const Char* a, const Char* b, size_t size) {
+    return memcmp(a, b, size);
+}
+
+template <typename Char>
+inline int memcmp16(const Char* a, const Char* b) {
+    return memcmp(a, b, 16);
+}
+
+inline bool memequal16(const void* a, const void* b) {
+    return 0 == memcmp(a, b, 16);
+}
+
+inline bool memory_is_zero_small_allow_overflow15(const void* data, size_t size) {
+    const char* pos = reinterpret_cast<const char*>(data);
+    const char* end = pos + size;
+
+    for (; pos < end; ++pos)
+        if (*pos) return false;
+
+    return true;
+}
+
+#endif
diff --git a/be/src/vec/common/memcpy_small.h b/be/src/vec/common/memcpy_small.h
new file mode 100644
index 0000000000..2dfddf5574
--- /dev/null
+++ b/be/src/vec/common/memcpy_small.h
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/MemcpySmall.h
+// and modified by Doris
+
+#pragma once
+
+#include <string.h>
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+
+/** memcpy function could work suboptimal if all the following conditions are met:
+  * 1. Size of memory region is relatively small (approximately, under 50 bytes).
+  * 2. Size of memory region is not known at compile-time.
+  *
+  * In that case, memcpy works suboptimal by following reasons:
+  * 1. Function is not inlined.
+  * 2. Much time/instructions are spend to process "tails" of data.
+  *
+  * There are cases when function could be implemented in more optimal way, with help of some assumptions.
+  * One of that assumptions - ability to read and write some number of bytes after end of passed memory regions.
+  * Under that assumption, it is possible not to implement difficult code to process tails of data and do copy always by big chunks.
+  *
+  * This case is typical, for example, when many small pieces of data are gathered to single contiguous piece of memory in a loop.
+  * - because each next copy will overwrite excessive data after previous copy.
+  *
+  * Assumption that size of memory region is small enough allows us to not unroll the loop.
+  * This is slower, when size of memory is actually big.
+  *
+  * Use with caution.
+  */
+
+namespace detail {
+inline void memcpy_small_allow_read_write_overflow15_impl(char* __restrict dst,
+                                                    const char* __restrict src, ssize_t n) {
+    while (n > 0) {
+        _mm_storeu_si128(reinterpret_cast<__m128i*>(dst),
+                         _mm_loadu_si128(reinterpret_cast<const __m128i*>(src)));
+
+        dst += 16;
+        src += 16;
+        n -= 16;
+    }
+}
+} // namespace detail
+
+/** Works under assumption, that it's possible to read up to 15 excessive bytes after end of 'src' region
+  *  and to write any garbage into up to 15 bytes after end of 'dst' region.
+  */
+inline void memcpy_small_allow_read_write_overflow15(void* __restrict dst, const void* __restrict src,
+                                                size_t n) {
+    detail::memcpy_small_allow_read_write_overflow15_impl(reinterpret_cast<char*>(dst),
+                                                    reinterpret_cast<const char*>(src), n);
+}
+
+/** NOTE There was also a function, that assumes, that you could read any bytes inside same memory page of src.
+  * This function was unused, and also it requires special handling for Valgrind and ASan.
+  */
+
+#else /// Implementation for other platforms.
+
+inline void memcpy_small_allow_read_write_overflow15(void* __restrict dst, const void* __restrict src,
+                                                size_t n) {
+    memcpy(dst, src, n);
+}
+
+#endif
diff --git a/be/src/vec/common/mremap.h b/be/src/vec/common/mremap.h
new file mode 100644
index 0000000000..eeeee20e96
--- /dev/null
+++ b/be/src/vec/common/mremap.h
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/mremap.h
+// and modified by Doris
+
+#pragma once
+
+#include <sys/types.h>
+
+#include <cstddef>
+#if !defined(_MSC_VER)
+#include <sys/mman.h>
+#endif
+
+#ifdef MREMAP_MAYMOVE
+#define HAS_MREMAP 1
+#else
+#define HAS_MREMAP 0
+#endif
+
+/// You can forcely disable mremap by defining DISABLE_MREMAP to 1 before including this file.
+#ifndef DISABLE_MREMAP
+#if HAS_MREMAP
+#define DISABLE_MREMAP 0
+#else
+#define DISABLE_MREMAP 1
+#endif
+#endif
+
+/// Implement mremap with mmap/memcpy/munmap.
+void* mremap_fallback(void* old_address, size_t old_size, size_t new_size, int flags, int mmap_prot,
+                      int mmap_flags, int mmap_fd, off_t mmap_offset);
+
+#if !HAS_MREMAP
+#define MREMAP_MAYMOVE 1
+
+inline void* mremap(void* old_address, size_t old_size, size_t new_size, int flags = 0,
+                    int mmap_prot = 0, int mmap_flags = 0, int mmap_fd = -1,
+                    off_t mmap_offset = 0) {
+    return mremap_fallback(old_address, old_size, new_size, flags, mmap_prot, mmap_flags, mmap_fd,
+                           mmap_offset);
+}
+#endif
+
+inline void* clickhouse_mremap(void* old_address, size_t old_size, size_t new_size, int flags = 0,
+                               [[maybe_unused]] int mmap_prot = 0,
+                               [[maybe_unused]] int mmap_flags = 0,
+                               [[maybe_unused]] int mmap_fd = -1,
+                               [[maybe_unused]] off_t mmap_offset = 0) {
+#if DISABLE_MREMAP
+    return mremap_fallback(old_address, old_size, new_size, flags, mmap_prot, mmap_flags, mmap_fd,
+                           mmap_offset);
+#else
+
+    return mremap(old_address, old_size, new_size, flags
+#if !defined(MREMAP_FIXED)
+                  ,
+                  mmap_prot, mmap_flags, mmap_fd, mmap_offset
+#endif
+    );
+#endif
+}
diff --git a/be/src/vec/common/nan_utils.h b/be/src/vec/common/nan_utils.h
new file mode 100644
index 0000000000..9a4e5989e2
--- /dev/null
+++ b/be/src/vec/common/nan_utils.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/NanUtils.h
+// and modified by Doris
+
+#pragma once
+
+#include <cmath>
+#include <limits>
+#include <type_traits>
+
+/// To be sure, that this function is zero-cost for non-floating point types.
+template <typename T>
+inline std::enable_if_t<std::is_floating_point_v<T>, bool> is_nan(T x) {
+    return std::isnan(x);
+}
+
+template <typename T>
+inline std::enable_if_t<!std::is_floating_point_v<T>, bool> is_nan(T) {
+    return false;
+}
+
+template <typename T>
+inline std::enable_if_t<std::is_floating_point_v<T>, bool> is_finite(T x) {
+    return std::isfinite(x);
+}
+
+template <typename T>
+inline std::enable_if_t<!std::is_floating_point_v<T>, bool> is_finite(T) {
+    return true;
+}
+
+template <typename T>
+std::enable_if_t<std::is_floating_point_v<T>, T> nan_or_zero() {
+    return std::numeric_limits<T>::quiet_NaN();
+}
+
+template <typename T>
+std::enable_if_t<std::numeric_limits<T>::is_integer, T> nan_or_zero() {
+    return 0;
+}
+
+template <typename T>
+std::enable_if_t<std::is_class_v<T>, T> nan_or_zero() {
+    return T {};
+}
+
+#if 1 /// __int128
+template <typename T>
+std::enable_if_t<std::is_same_v<T, __int128> && !std::numeric_limits<T>::is_integer, __int128>
+nan_or_zero() {
+    return __int128(0);
+}
+#endif
diff --git a/be/src/vec/common/pod_array.cpp b/be/src/vec/common/pod_array.cpp
new file mode 100644
index 0000000000..322aceab60
--- /dev/null
+++ b/be/src/vec/common/pod_array.cpp
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/PODArray.cpp
+// and modified by Doris
+
+#include "vec/common/pod_array.h"
+
+namespace doris::vectorized {
+/// Used for left padding of PODArray when empty
+const char empty_pod_array[EmptyPODArraySize] {};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/common/pod_array.h b/be/src/vec/common/pod_array.h
new file mode 100644
index 0000000000..48e529562c
--- /dev/null
+++ b/be/src/vec/common/pod_array.h
@@ -0,0 +1,591 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/PODArray.h
+// and modified by Doris
+
+#pragma once
+
+#include <common/compiler_util.h>
+#include <string.h>
+
+#include <algorithm>
+#include <boost/iterator_adaptors.hpp>
+#include <boost/noncopyable.hpp>
+#include <cassert>
+#include <cstddef>
+#include <memory>
+
+#include "vec/common/allocator.h"
+#include "vec/common/bit_helpers.h"
+#include "vec/common/memcpy_small.h"
+#include "vec/common/strong_typedef.h"
+
+#ifndef NDEBUG
+#include <sys/mman.h>
+#endif
+
+#include "vec/common/pod_array_fwd.h"
+
+namespace doris::vectorized {
+
+/** A dynamic array for POD types.
+  * Designed for a small number of large arrays (rather than a lot of small ones).
+  * To be more precise - for use in ColumnVector.
+  * It differs from std::vector in that it does not initialize the elements.
+  *
+  * Made noncopyable so that there are no accidential copies. You can copy the data using `assign` method.
+  *
+  * Only part of the std::vector interface is supported.
+  *
+  * The default constructor creates an empty object that does not allocate memory.
+  * Then the memory is allocated at least initial_bytes bytes.
+  *
+  * If you insert elements with push_back, without making a `reserve`, then PODArray is about 2.5 times faster than std::vector.
+  *
+  * The template parameter `pad_right` - always allocate at the end of the array as many unused bytes.
+  * Can be used to make optimistic reading, writing, copying with unaligned SIMD instructions.
+  *
+  * The template parameter `pad_left` - always allocate memory before 0th element of the array (rounded up to the whole number of elements)
+  *  and zero initialize -1th element. It allows to use -1th element that will have value 0.
+  * This gives performance benefits when converting an array of offsets to array of sizes.
+  *
+  * Some methods using allocator have TAllocatorParams variadic arguments.
+  * These arguments will be passed to corresponding methods of TAllocator.
+  * Example: pointer to Arena, that is used for allocations.
+  *
+  * Why Allocator is not passed through constructor, as it is done in C++ standard library?
+  * Because sometimes we have many small objects, that share same allocator with same parameters,
+  *  and we must avoid larger object size due to storing the same parameters in each object.
+  * This is required for states of aggregate functions.
+  *
+  * TODO Pass alignment to Allocator.
+  * TODO Allow greater alignment than alignof(T). Example: array of char aligned to page size.
+  */
+static constexpr size_t EmptyPODArraySize = 1024;
+extern const char empty_pod_array[EmptyPODArraySize];
+
+/** Base class that depend only on size of element, not on element itself.
+  * You can static_cast to this class if you want to insert some data regardless to the actual type T.
+  */
+template <size_t ELEMENT_SIZE, size_t initial_bytes, typename TAllocator, size_t pad_right_,
+          size_t pad_left_>
+class PODArrayBase : private boost::noncopyable,
+                     private TAllocator /// empty base optimization
+{
+protected:
+    /// Round padding up to an whole number of elements to simplify arithmetic.
+    static constexpr size_t pad_right = integerRoundUp(pad_right_, ELEMENT_SIZE);
+    /// pad_left is also rounded up to 16 bytes to maintain alignment of allocated memory.
+    static constexpr size_t pad_left = integerRoundUp(integerRoundUp(pad_left_, ELEMENT_SIZE), 16);
+    /// Empty array will point to this static memory as padding.
+    static constexpr char* null =
+            pad_left ? const_cast<char*>(empty_pod_array) + EmptyPODArraySize : nullptr;
+
+    static_assert(pad_left <= EmptyPODArraySize &&
+                  "Left Padding exceeds EmptyPODArraySize. Is the element size too large?");
+
+    char* c_start = null; /// Does not include pad_left.
+    char* c_end = null;
+    char* c_end_of_storage = null; /// Does not include pad_right.
+
+    /// The amount of memory occupied by the num_elements of the elements.
+    static size_t byte_size(size_t num_elements) { return num_elements * ELEMENT_SIZE; }
+
+    /// Minimum amount of memory to allocate for num_elements, including padding.
+    static size_t minimum_memory_for_elements(size_t num_elements) {
+        return byte_size(num_elements) + pad_right + pad_left;
+    }
+
+    void alloc_for_num_elements(size_t num_elements) {
+        alloc(round_up_to_power_of_two_or_zero(minimum_memory_for_elements(num_elements)));
+    }
+
+    template <typename... TAllocatorParams>
+    void alloc(size_t bytes, TAllocatorParams&&... allocator_params) {
+        c_start = c_end = reinterpret_cast<char*>(TAllocator::alloc(
+                                  bytes, std::forward<TAllocatorParams>(allocator_params)...)) +
+                          pad_left;
+        c_end_of_storage = c_start + bytes - pad_right - pad_left;
+
+        if (pad_left) memset(c_start - ELEMENT_SIZE, 0, ELEMENT_SIZE);
+    }
+
+    void dealloc() {
+        if (c_start == null) return;
+
+        unprotect();
+
+        TAllocator::free(c_start - pad_left, allocated_bytes());
+    }
+
+    template <typename... TAllocatorParams>
+    void realloc(size_t bytes, TAllocatorParams&&... allocator_params) {
+        if (c_start == null) {
+            alloc(bytes, std::forward<TAllocatorParams>(allocator_params)...);
+            return;
+        }
+
+        unprotect();
+
+        ptrdiff_t end_diff = c_end - c_start;
+
+        c_start = reinterpret_cast<char*>(TAllocator::realloc(
+                          c_start - pad_left, allocated_bytes(), bytes,
+                          std::forward<TAllocatorParams>(allocator_params)...)) +
+                  pad_left;
+
+        c_end = c_start + end_diff;
+        c_end_of_storage = c_start + bytes - pad_right - pad_left;
+    }
+
+    bool is_initialized() const {
+        return (c_start != null) && (c_end != null) && (c_end_of_storage != null);
+    }
+
+    bool is_allocated_from_stack() const {
+        constexpr size_t stack_threshold = TAllocator::getStackThreshold();
+        return (stack_threshold > 0) && (allocated_bytes() <= stack_threshold);
+    }
+
+    template <typename... TAllocatorParams>
+    void reserve_for_next_size(TAllocatorParams&&... allocator_params) {
+        if (size() == 0) {
+            // The allocated memory should be multiplication of ELEMENT_SIZE to hold the element, otherwise,
+            // memory issue such as corruption could appear in edge case.
+            realloc(std::max(integerRoundUp(initial_bytes, ELEMENT_SIZE),
+                             minimum_memory_for_elements(1)),
+                    std::forward<TAllocatorParams>(allocator_params)...);
+        } else
+            realloc(allocated_bytes() * 2, std::forward<TAllocatorParams>(allocator_params)...);
+    }
+
+#ifndef NDEBUG
+    /// Make memory region readonly with mprotect if it is large enough.
+    /// The operation is slow and performed only for debug builds.
+    void protect_impl(int prot) {
+        static constexpr size_t PROTECT_PAGE_SIZE = 4096;
+
+        char* left_rounded_up = reinterpret_cast<char*>(
+                (reinterpret_cast<intptr_t>(c_start) - pad_left + PROTECT_PAGE_SIZE - 1) /
+                PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE);
+        char* right_rounded_down =
+                reinterpret_cast<char*>((reinterpret_cast<intptr_t>(c_end_of_storage) + pad_right) /
+                                        PROTECT_PAGE_SIZE * PROTECT_PAGE_SIZE);
+
+        if (right_rounded_down > left_rounded_up) {
+            size_t length = right_rounded_down - left_rounded_up;
+            if (0 != mprotect(left_rounded_up, length, prot)) throw std::exception();
+        }
+    }
+
+    /// Restore memory protection in destructor or realloc for further reuse by allocator.
+    bool mprotected = false;
+#endif
+
+public:
+    bool empty() const { return c_end == c_start; }
+    size_t size() const { return (c_end - c_start) / ELEMENT_SIZE; }
+    size_t capacity() const { return (c_end_of_storage - c_start) / ELEMENT_SIZE; }
+
+    /// This method is safe to use only for information about memory usage.
+    size_t allocated_bytes() const { return c_end_of_storage - c_start + pad_right + pad_left; }
+
+    void clear() { c_end = c_start; }
+
+    template <typename... TAllocatorParams>
+    void reserve(size_t n, TAllocatorParams&&... allocator_params) {
+        if (n > capacity())
+            realloc(round_up_to_power_of_two_or_zero(minimum_memory_for_elements(n)),
+                    std::forward<TAllocatorParams>(allocator_params)...);
+    }
+
+    template <typename... TAllocatorParams>
+    void resize(size_t n, TAllocatorParams&&... allocator_params) {
+        reserve(n, std::forward<TAllocatorParams>(allocator_params)...);
+        resize_assume_reserved(n);
+    }
+
+    void resize_assume_reserved(const size_t n) { c_end = c_start + byte_size(n); }
+
+    const char* raw_data() const { return c_start; }
+
+    template <typename... TAllocatorParams>
+    void push_back_raw(const char* ptr, TAllocatorParams&&... allocator_params) {
+        if (UNLIKELY(c_end == c_end_of_storage))
+            reserve_for_next_size(std::forward<TAllocatorParams>(allocator_params)...);
+
+        memcpy(c_end, ptr, ELEMENT_SIZE);
+        c_end += byte_size(1);
+    }
+
+    void protect() {
+#ifndef NDEBUG
+        protect_impl(PROT_READ);
+        mprotected = true;
+#endif
+    }
+
+    void unprotect() {
+#ifndef NDEBUG
+        if (mprotected) protect_impl(PROT_WRITE);
+        mprotected = false;
+#endif
+    }
+
+    ~PODArrayBase() { dealloc(); }
+};
+
+template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_,
+          size_t pad_left_>
+class PODArray : public PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_> {
+protected:
+    using Base = PODArrayBase<sizeof(T), initial_bytes, TAllocator, pad_right_, pad_left_>;
+
+    T* t_start() { return reinterpret_cast<T*>(this->c_start); }
+    T* t_end() { return reinterpret_cast<T*>(this->c_end); }
+    T* t_end_of_storage() { return reinterpret_cast<T*>(this->c_end_of_storage); }
+
+    const T* t_start() const { return reinterpret_cast<const T*>(this->c_start); }
+    const T* t_end() const { return reinterpret_cast<const T*>(this->c_end); }
+    const T* t_end_of_storage() const { return reinterpret_cast<const T*>(this->c_end_of_storage); }
+
+public:
+    using value_type = T;
+
+    /// You can not just use `typedef`, because there is ambiguity for the constructors and `assign` functions.
+    struct iterator : public boost::iterator_adaptor<iterator, T*> {
+        iterator() {}
+        iterator(T* ptr_) : iterator::iterator_adaptor_(ptr_) {}
+    };
+
+    struct const_iterator : public boost::iterator_adaptor<const_iterator, const T*> {
+        const_iterator() {}
+        const_iterator(const T* ptr_) : const_iterator::iterator_adaptor_(ptr_) {}
+    };
+
+    PODArray() {}
+
+    PODArray(size_t n) {
+        this->alloc_for_num_elements(n);
+        this->c_end += this->byte_size(n);
+    }
+
+    PODArray(size_t n, const T& x) {
+        this->alloc_for_num_elements(n);
+        assign(n, x);
+    }
+
+    PODArray(const_iterator from_begin, const_iterator from_end) {
+        this->alloc_for_num_elements(from_end - from_begin);
+        insert(from_begin, from_end);
+    }
+
+    PODArray(std::initializer_list<T> il) : PODArray(std::begin(il), std::end(il)) {}
+
+    PODArray(PODArray&& other) { this->swap(other); }
+
+    PODArray& operator=(PODArray&& other) {
+        this->swap(other);
+        return *this;
+    }
+
+    T* data() { return t_start(); }
+    const T* data() const { return t_start(); }
+
+    /// The index is signed to access -1th element without pointer overflow.
+    T& operator[](ssize_t n) {
+        /// <= size, because taking address of one element past memory range is Ok in C++ (expression like &arr[arr.size()] is perfectly valid).
+        assert((n >= (static_cast<ssize_t>(pad_left_) ? -1 : 0)) &&
+               (n <= static_cast<ssize_t>(this->size())));
+        return t_start()[n];
+    }
+
+    const T& operator[](ssize_t n) const {
+        assert((n >= (static_cast<ssize_t>(pad_left_) ? -1 : 0)) &&
+               (n <= static_cast<ssize_t>(this->size())));
+        return t_start()[n];
+    }
+
+    T& front() { return t_start()[0]; }
+    T& back() { return t_end()[-1]; }
+    const T& front() const { return t_start()[0]; }
+    const T& back() const { return t_end()[-1]; }
+
+    iterator begin() { return t_start(); }
+    iterator end() { return t_end(); }
+    const_iterator begin() const { return t_start(); }
+    const_iterator end() const { return t_end(); }
+    const_iterator cbegin() const { return t_start(); }
+    const_iterator cend() const { return t_end(); }
+
+    /// Same as resize, but zeroes new elements.
+    void resize_fill(size_t n) {
+        size_t old_size = this->size();
+        if (n > old_size) {
+            this->reserve(n);
+            memset(this->c_end, 0, this->byte_size(n - old_size));
+        }
+        this->c_end = this->c_start + this->byte_size(n);
+    }
+
+    void resize_fill(size_t n, const T& value) {
+        size_t old_size = this->size();
+        if (n > old_size) {
+            this->reserve(n);
+            std::fill(t_end(), t_end() + n - old_size, value);
+        }
+        this->c_end = this->c_start + this->byte_size(n);
+    }
+
+    template <typename U, typename... TAllocatorParams>
+    void push_back(U&& x, TAllocatorParams&&... allocator_params) {
+        if (UNLIKELY(this->c_end == this->c_end_of_storage))
+            this->reserve_for_next_size(std::forward<TAllocatorParams>(allocator_params)...);
+
+        new (t_end()) T(std::forward<U>(x));
+        this->c_end += this->byte_size(1);
+    }
+
+    template <typename U, typename... TAllocatorParams>
+    void add_num_element(U&& x, uint32_t num, TAllocatorParams&&... allocator_params) {
+        if (num != 0) {
+            const auto new_end = this->c_end + this->byte_size(num);
+            if (UNLIKELY( new_end > this->c_end_of_storage)) {
+                this->reserve(this->size() + num);
+            }
+            std::fill(t_end(), t_end() + num, x);
+            this->c_end = new_end;
+        }
+    }
+
+    template <typename U, typename... TAllocatorParams>
+    void add_num_element_without_reserve(U&& x, uint32_t num, TAllocatorParams&&... allocator_params) {
+            std::fill(t_end(), t_end() + num, x);
+            this->c_end += sizeof(T) * num;
+    }
+
+    /**
+     * you must make sure to reserve podarray before calling this method
+     * remove branch if can improve performance
+     */
+    template <typename U, typename... TAllocatorParams>
+    void push_back_without_reserve(U&& x, TAllocatorParams&&... allocator_params) {
+        new (t_end()) T(std::forward<U>(x));
+        this->c_end += this->byte_size(1);
+    }
+
+    /** This method doesn't allow to pass parameters for Allocator,
+      *  and it couldn't be used if Allocator requires custom parameters.
+      */
+    template <typename... Args>
+    void emplace_back(Args&&... args) {
+        if (UNLIKELY(this->c_end == this->c_end_of_storage)) this->reserve_for_next_size();
+
+        new (t_end()) T(std::forward<Args>(args)...);
+        this->c_end += this->byte_size(1);
+    }
+
+    void pop_back() { this->c_end -= this->byte_size(1); }
+
+    /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
+    template <typename It1, typename It2, typename... TAllocatorParams>
+    void insert_prepare(It1 from_begin, It2 from_end, TAllocatorParams&&... allocator_params) {
+        size_t required_capacity = this->size() + (from_end - from_begin);
+        if (required_capacity > this->capacity())
+            this->reserve(round_up_to_power_of_two_or_zero(required_capacity),
+                          std::forward<TAllocatorParams>(allocator_params)...);
+    }
+
+    /// Do not insert into the array a piece of itself. Because with the resize, the iterators on themselves can be invalidated.
+    template <typename It1, typename It2, typename... TAllocatorParams>
+    void insert(It1 from_begin, It2 from_end, TAllocatorParams&&... allocator_params) {
+        insert_prepare(from_begin, from_end, std::forward<TAllocatorParams>(allocator_params)...);
+        insert_assume_reserved(from_begin, from_end);
+    }
+
+    /// Works under assumption, that it's possible to read up to 15 excessive bytes after `from_end` and this PODArray is padded.
+    template <typename It1, typename It2, typename... TAllocatorParams>
+    void insert_small_allow_read_write_overflow15(It1 from_begin, It2 from_end,
+                                                  TAllocatorParams&&... allocator_params) {
+        static_assert(pad_right_ >= 15);
+        insert_prepare(from_begin, from_end, std::forward<TAllocatorParams>(allocator_params)...);
+        size_t bytes_to_copy = this->byte_size(from_end - from_begin);
+        memcpy_small_allow_read_write_overflow15(
+                this->c_end, reinterpret_cast<const void*>(&*from_begin), bytes_to_copy);
+        this->c_end += bytes_to_copy;
+    }
+
+    template <typename It1, typename It2>
+    void insert(iterator it, It1 from_begin, It2 from_end) {
+        insert_prepare(from_begin, from_end);
+
+        size_t bytes_to_copy = this->byte_size(from_end - from_begin);
+        size_t bytes_to_move = (end() - it) * sizeof(T);
+
+        if (UNLIKELY(bytes_to_move))
+            memcpy(this->c_end + bytes_to_copy - bytes_to_move, this->c_end - bytes_to_move,
+                   bytes_to_move);
+
+        memcpy(this->c_end - bytes_to_move, reinterpret_cast<const void*>(&*from_begin),
+               bytes_to_copy);
+        this->c_end += bytes_to_copy;
+    }
+
+    template <typename It1, typename It2>
+    void insert_assume_reserved(It1 from_begin, It2 from_end) {
+        size_t bytes_to_copy = this->byte_size(from_end - from_begin);
+        memcpy(this->c_end, reinterpret_cast<const void*>(&*from_begin), bytes_to_copy);
+        this->c_end += bytes_to_copy;
+    }
+
+    void swap(PODArray& rhs) {
+#ifndef NDEBUG
+        this->unprotect();
+        rhs.unprotect();
+#endif
+
+        /// Swap two PODArray objects, arr1 and arr2, that satisfy the following conditions:
+        /// - The elements of arr1 are stored on stack.
+        /// - The elements of arr2 are stored on heap.
+        auto swap_stack_heap = [this](PODArray& arr1, PODArray& arr2) {
+            size_t stack_size = arr1.size();
+            size_t stack_allocated = arr1.allocated_bytes();
+
+            size_t heap_size = arr2.size();
+            size_t heap_allocated = arr2.allocated_bytes();
+
+            /// Keep track of the stack content we have to copy.
+            char* stack_c_start = arr1.c_start;
+
+            /// arr1 takes ownership of the heap memory of arr2.
+            arr1.c_start = arr2.c_start;
+            arr1.c_end_of_storage = arr1.c_start + heap_allocated - arr1.pad_right;
+            arr1.c_end = arr1.c_start + this->byte_size(heap_size);
+
+            /// Allocate stack space for arr2.
+            arr2.alloc(stack_allocated);
+            /// Copy the stack content.
+            memcpy(arr2.c_start, stack_c_start, this->byte_size(stack_size));
+            arr2.c_end = arr2.c_start + this->byte_size(stack_size);
+        };
+
+        auto do_move = [this](PODArray& src, PODArray& dest) {
+            if (src.is_allocated_from_stack()) {
+                dest.dealloc();
+                dest.alloc(src.allocated_bytes());
+                memcpy(dest.c_start, src.c_start, this->byte_size(src.size()));
+                dest.c_end = dest.c_start + (src.c_end - src.c_start);
+
+                src.c_start = Base::null;
+                src.c_end = Base::null;
+                src.c_end_of_storage = Base::null;
+            } else {
+                std::swap(dest.c_start, src.c_start);
+                std::swap(dest.c_end, src.c_end);
+                std::swap(dest.c_end_of_storage, src.c_end_of_storage);
+            }
+        };
+
+        if (!this->is_initialized() && !rhs.is_initialized()) {
+            return;
+        } else if (!this->is_initialized() && rhs.is_initialized()) {
+            do_move(rhs, *this);
+            return;
+        } else if (this->is_initialized() && !rhs.is_initialized()) {
+            do_move(*this, rhs);
+            return;
+        }
+
+        if (this->is_allocated_from_stack() && rhs.is_allocated_from_stack()) {
+            size_t min_size = std::min(this->size(), rhs.size());
+            size_t max_size = std::max(this->size(), rhs.size());
+
+            for (size_t i = 0; i < min_size; ++i) std::swap(this->operator[](i), rhs[i]);
+
+            if (this->size() == max_size) {
+                for (size_t i = min_size; i < max_size; ++i) rhs[i] = this->operator[](i);
+            } else {
+                for (size_t i = min_size; i < max_size; ++i) this->operator[](i) = rhs[i];
+            }
+
+            size_t lhs_size = this->size();
+            size_t lhs_allocated = this->allocated_bytes();
+
+            size_t rhs_size = rhs.size();
+            size_t rhs_allocated = rhs.allocated_bytes();
+
+            this->c_end_of_storage = this->c_start + rhs_allocated - Base::pad_right;
+            rhs.c_end_of_storage = rhs.c_start + lhs_allocated - Base::pad_right;
+
+            this->c_end = this->c_start + this->byte_size(rhs_size);
+            rhs.c_end = rhs.c_start + this->byte_size(lhs_size);
+        } else if (this->is_allocated_from_stack() && !rhs.is_allocated_from_stack()) {
+            swap_stack_heap(*this, rhs);
+        } else if (!this->is_allocated_from_stack() && rhs.is_allocated_from_stack()) {
+            swap_stack_heap(rhs, *this);
+        } else {
+            std::swap(this->c_start, rhs.c_start);
+            std::swap(this->c_end, rhs.c_end);
+            std::swap(this->c_end_of_storage, rhs.c_end_of_storage);
+        }
+    }
+
+    void assign(size_t n, const T& x) {
+        this->resize(n);
+        std::fill(begin(), end(), x);
+    }
+
+    template <typename It1, typename It2>
+    void assign(It1 from_begin, It2 from_end) {
+        size_t required_capacity = from_end - from_begin;
+        if (required_capacity > this->capacity())
+            this->reserve(round_up_to_power_of_two_or_zero(required_capacity));
+
+        size_t bytes_to_copy = this->byte_size(required_capacity);
+        memcpy(this->c_start, reinterpret_cast<const void*>(&*from_begin), bytes_to_copy);
+        this->c_end = this->c_start + bytes_to_copy;
+    }
+
+    void assign(const PODArray& from) { assign(from.begin(), from.end()); }
+
+    bool operator==(const PODArray& other) const {
+        if (this->size() != other.size()) return false;
+
+        const_iterator this_it = begin();
+        const_iterator that_it = other.begin();
+
+        while (this_it != end()) {
+            if (*this_it != *that_it) return false;
+
+            ++this_it;
+            ++that_it;
+        }
+
+        return true;
+    }
+
+    bool operator!=(const PODArray& other) const { return !operator==(other); }
+};
+
+template <typename T, size_t initial_bytes, typename TAllocator, size_t pad_right_>
+void swap(PODArray<T, initial_bytes, TAllocator, pad_right_>& lhs,
+          PODArray<T, initial_bytes, TAllocator, pad_right_>& rhs) {
+    lhs.swap(rhs);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/common/pod_array_fwd.h b/be/src/vec/common/pod_array_fwd.h
new file mode 100644
index 0000000000..ff00b31257
--- /dev/null
+++ b/be/src/vec/common/pod_array_fwd.h
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/PODArray_fwd.h
+// and modified by Doris
+
+/**
+  * This file contains some using-declarations that define various kinds of
+  * PODArray.
+  */
+#pragma once
+
+#include "vec/common/allocator_fwd.h"
+
+namespace doris::vectorized {
+
+inline constexpr size_t integerRoundUp(size_t value, size_t dividend) {
+    return ((value + dividend - 1) / dividend) * dividend;
+}
+
+template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocator<false>,
+          size_t pad_right_ = 0, size_t pad_left_ = 0>
+class PODArray;
+
+/** For columns. Padding is enough to read and write xmm-register at the address of the last element. */
+template <typename T, size_t initial_bytes = 4096, typename TAllocator = Allocator<false>>
+using PaddedPODArray = PODArray<T, initial_bytes, TAllocator, 15, 16>;
+
+/** A helper for declaring PODArray that uses inline memory.
+  * The initial size is set to use all the inline bytes, since using less would
+  * only add some extra allocation calls.
+  */
+template <typename T, size_t inline_bytes,
+          size_t rounded_bytes = integerRoundUp(inline_bytes, sizeof(T))>
+using PODArrayWithStackMemory =
+        PODArray<T, rounded_bytes,
+                 AllocatorWithStackMemory<Allocator<false>, rounded_bytes, alignof(T)>>;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/common/radix_sort.h b/be/src/vec/common/radix_sort.h
new file mode 100644
index 0000000000..13a8ad1f8b
--- /dev/null
+++ b/be/src/vec/common/radix_sort.h
@@ -0,0 +1,389 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/RadixSort.h
+// and modified by Doris
+
+#pragma once
+
+#include "string.h"
+#if !defined(__APPLE__) && !defined(__FreeBSD__)
+#include "malloc.h"
+#endif
+#include "algorithm"
+#include "cmath"
+#include "cstdint"
+#include "cstdlib"
+#include "type_traits"
+#include "vec/common/bit_cast.h"
+#include "vec/core/types.h"
+
+/** Radix sort, has the following functionality:
+  * Can sort unsigned, signed numbers, and floats.
+  * Can sort an array of fixed length elements that contain something else besides the key.
+  * Customizable radix size.
+  *
+  * LSB, stable.
+  * NOTE For some applications it makes sense to add MSB-radix-sort,
+  *  as well as radix-select, radix-partial-sort, radix-get-permutation algorithms based on it.
+  */
+
+/** Used as a template parameter. See below.
+  */
+struct RadixSortMallocAllocator {
+    void* allocate(size_t size) { return malloc(size); }
+
+    void deallocate(void* ptr, size_t /*size*/) { return free(ptr); }
+};
+
+/** A transformation that transforms the bit representation of a key into an unsigned integer number,
+  *  that the order relation over the keys will match the order relation over the obtained unsigned numbers.
+  * For floats this conversion does the following:
+  *  if the signed bit is set, it flips all other bits.
+  * In this case, NaN-s are bigger than all normal numbers.
+  */
+template <typename KeyBits>
+struct RadixSortFloatTransform {
+    /// Is it worth writing the result in memory, or is it better to do calculation every time again?
+    static constexpr bool transform_is_simple = false;
+
+    static KeyBits forward(KeyBits x) {
+        return x ^
+               ((-(x >> (sizeof(KeyBits) * 8 - 1))) | (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)));
+    }
+
+    static KeyBits backward(KeyBits x) {
+        return x ^
+               (((x >> (sizeof(KeyBits) * 8 - 1)) - 1) | (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)));
+    }
+};
+
+template <typename TElement>
+struct RadixSortFloatTraits {
+    using Element =
+            TElement; /// The type of the element. It can be a structure with a key and some other payload. Or just a key.
+    using Key = Element; /// The key to sort by.
+    using CountType =
+            uint32_t; /// Type for calculating histograms. In the case of a known small number of elements, it can be less than size_t.
+
+    /// The type to which the key is transformed to do bit operations. This UInt is the same size as the key.
+    using KeyBits = std::conditional_t<sizeof(Key) == 8, uint64_t, uint32_t>;
+
+    static constexpr size_t PART_SIZE_BITS =
+            8; /// With what pieces of the key, in bits, to do one pass - reshuffle of the array.
+
+    /// Converting a key into KeyBits is such that the order relation over the key corresponds to the order relation over KeyBits.
+    using Transform = RadixSortFloatTransform<KeyBits>;
+
+    /// An object with the functions allocate and deallocate.
+    /// Can be used, for example, to allocate memory for a temporary array on the stack.
+    /// To do this, the allocator itself is created on the stack.
+    using Allocator = RadixSortMallocAllocator;
+
+    /// The function to get the key from an array element.
+    static Key& extract_key(Element& elem) { return elem; }
+
+    /// Used when fallback to comparison based sorting is needed.
+    /// TODO: Correct handling of NaNs, NULLs, etc
+    static bool less(Key x, Key y) { return x < y; }
+};
+
+template <typename KeyBits>
+struct RadixSortIdentityTransform {
+    static constexpr bool transform_is_simple = true;
+
+    static KeyBits forward(KeyBits x) { return x; }
+    static KeyBits backward(KeyBits x) { return x; }
+};
+
+template <typename TElement>
+struct RadixSortUIntTraits {
+    using Element = TElement;
+    using Key = Element;
+    using CountType = uint32_t;
+    using KeyBits = Key;
+
+    static constexpr size_t PART_SIZE_BITS = 8;
+
+    using Transform = RadixSortIdentityTransform<KeyBits>;
+    using Allocator = RadixSortMallocAllocator;
+
+    static Key& extract_key(Element& elem) { return elem; }
+
+    static bool less(Key x, Key y) { return x < y; }
+};
+
+template <typename KeyBits>
+struct RadixSortSignedTransform {
+    static constexpr bool transform_is_simple = true;
+
+    static KeyBits forward(KeyBits x) { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
+    static KeyBits backward(KeyBits x) { return x ^ (KeyBits(1) << (sizeof(KeyBits) * 8 - 1)); }
+};
+
+template <typename TElement>
+struct RadixSortIntTraits {
+    using Element = TElement;
+    using Key = Element;
+    using CountType = uint32_t;
+    using KeyBits = std::make_unsigned_t<Key>;
+
+    static constexpr size_t PART_SIZE_BITS = 8;
+
+    using Transform = RadixSortSignedTransform<KeyBits>;
+    using Allocator = RadixSortMallocAllocator;
+
+    static Key& extract_key(Element& elem) { return elem; }
+
+    static bool less(Key x, Key y) { return x < y; }
+};
+
+template <typename T>
+using RadixSortNumTraits = std::conditional_t<
+        std::is_integral_v<T>,
+        std::conditional_t<std::is_unsigned_v<T>, RadixSortUIntTraits<T>, RadixSortIntTraits<T>>,
+        RadixSortFloatTraits<T>>;
+
+template <typename Traits>
+struct RadixSort {
+private:
+    using Element = typename Traits::Element;
+    using Key = typename Traits::Key;
+    using CountType = typename Traits::CountType;
+    using KeyBits = typename Traits::KeyBits;
+
+    // Use insertion sort if the size of the array is less than equal to this threshold
+    static constexpr size_t INSERTION_SORT_THRESHOLD = 64;
+
+    static constexpr size_t HISTOGRAM_SIZE = 1 << Traits::PART_SIZE_BITS;
+    static constexpr size_t PART_BITMASK = HISTOGRAM_SIZE - 1;
+    static constexpr size_t KEY_BITS = sizeof(Key) * 8;
+    static constexpr size_t NUM_PASSES =
+            (KEY_BITS + (Traits::PART_SIZE_BITS - 1)) / Traits::PART_SIZE_BITS;
+
+    static ALWAYS_INLINE KeyBits get_part(size_t N, KeyBits x) {
+        if (Traits::Transform::transform_is_simple) x = Traits::Transform::forward(x);
+
+        return (x >> (N * Traits::PART_SIZE_BITS)) & PART_BITMASK;
+    }
+
+    static KeyBits key_to_bits(Key x) { return ext::bit_cast<KeyBits>(x); }
+    static Key bits_to_key(KeyBits x) { return ext::bit_cast<Key>(x); }
+
+    static void insertion_sort_internal(Element* arr, size_t size) {
+        Element* end = arr + size;
+        for (Element* i = arr + 1; i < end; ++i) {
+            if (Traits::less(Traits::extract_key(*i), Traits::extract_key(*(i - 1)))) {
+                Element* j;
+                Element tmp = *i;
+                *i = *(i - 1);
+                for (j = i - 1;
+                     j > arr && Traits::less(Traits::extract_key(tmp), Traits::extract_key(*(j - 1)));
+                     --j)
+                    *j = *(j - 1);
+                *j = tmp;
+            }
+        }
+    }
+
+    /* Main MSD radix sort subroutine
+     * Puts elements to buckets based on PASS-th digit, then recursively calls insertion sort or itself on the buckets
+     */
+    template <size_t PASS>
+    static inline void radix_sort_msd_internal(Element* arr, size_t size, size_t limit) {
+        Element* last_list[HISTOGRAM_SIZE + 1];
+        Element** last = last_list + 1;
+        size_t count[HISTOGRAM_SIZE] = {0};
+
+        for (Element* i = arr; i < arr + size; ++i) ++count[get_part(PASS, *i)];
+
+        last_list[0] = last_list[1] = arr;
+
+        size_t buckets_for_recursion = HISTOGRAM_SIZE;
+        Element* finish = arr + size;
+        for (size_t i = 1; i < HISTOGRAM_SIZE; ++i) {
+            last[i] = last[i - 1] + count[i - 1];
+            if (last[i] >= arr + limit) {
+                buckets_for_recursion = i;
+                finish = last[i];
+            }
+        }
+
+        /* At this point, we have the following variables:
+         * count[i] is the size of i-th bucket
+         * last[i] is a pointer to the beginning of i-th bucket, last[-1] == last[0]
+         * buckets_for_recursion is the number of buckets that should be sorted, the last of them only partially
+         * finish is a pointer to the end of the first buckets_for_recursion buckets
+         */
+
+        // Scatter array elements to buckets until the first buckets_for_recursion buckets are full
+        for (size_t i = 0; i < buckets_for_recursion; ++i) {
+            Element* end = last[i - 1] + count[i];
+            if (end == finish) {
+                last[i] = end;
+                break;
+            }
+            while (last[i] != end) {
+                Element swapper = *last[i];
+                KeyBits tag = get_part(PASS, swapper);
+                if (tag != i) {
+                    do {
+                        std::swap(swapper, *last[tag]++);
+                    } while ((tag = get_part(PASS, swapper)) != i);
+                    *last[i] = swapper;
+                }
+                ++last[i];
+            }
+        }
+
+        if constexpr (PASS > 0) {
+            // Recursively sort buckets, except the last one
+            for (size_t i = 0; i < buckets_for_recursion - 1; ++i) {
+                Element* start = last[i - 1];
+                size_t subsize = last[i] - last[i - 1];
+                radix_sort_msd_internal_helper<PASS - 1>(start, subsize, subsize);
+            }
+
+            // Sort last necessary bucket with limit
+            Element* start = last[buckets_for_recursion - 2];
+            size_t subsize = last[buckets_for_recursion - 1] - last[buckets_for_recursion - 2];
+            size_t sublimit = limit - (last[buckets_for_recursion - 1] - arr);
+            radix_sort_msd_internal_helper<PASS - 1>(start, subsize, sublimit);
+        }
+    }
+
+    // A helper to choose sorting algorithm based on array length
+    template <size_t PASS>
+    static inline void radix_sort_msd_internal_helper(Element* arr, size_t size, size_t limit) {
+        if (size <= INSERTION_SORT_THRESHOLD)
+            insertion_sort_internal(arr, size);
+        else
+            radix_sort_msd_internal<PASS>(arr, size, limit);
+    }
+
+public:
+    /// Least significant digit radix sort (stable)
+    static void execute_lsd(Element* arr, size_t size) {
+        /// If the array is smaller than 256, then it is better to use another algorithm.
+
+        /// There are loops of NUM_PASSES. It is very important that they are unfolded at compile-time.
+
+        /// For each of the NUM_PASSES bit ranges of the key, consider how many times each value of this bit range met.
+        CountType histograms[HISTOGRAM_SIZE * NUM_PASSES] = {0};
+
+        typename Traits::Allocator allocator;
+
+        /// We will do several passes through the array. On each pass, the data is transferred to another array. Let's allocate this temporary array.
+        Element* swap_buffer =
+                reinterpret_cast<Element*>(allocator.allocate(size * sizeof(Element)));
+
+        /// Transform the array and calculate the histogram.
+        /// NOTE This is slightly suboptimal. Look at https://github.com/powturbo/TurboHist
+        for (size_t i = 0; i < size; ++i) {
+            if (!Traits::Transform::transform_is_simple)
+                Traits::extract_key(arr[i]) = bits_to_key(
+                        Traits::Transform::forward(key_to_bits(Traits::extract_key(arr[i]))));
+
+            for (size_t pass = 0; pass < NUM_PASSES; ++pass)
+                ++histograms[pass * HISTOGRAM_SIZE +
+                             get_part(pass, key_to_bits(Traits::extract_key(arr[i])))];
+        }
+
+        {
+            /// Replace the histograms with the accumulated sums: the value in position i is the sum of the previous positions minus one.
+            size_t sums[NUM_PASSES] = {0};
+
+            for (size_t i = 0; i < HISTOGRAM_SIZE; ++i) {
+                for (size_t pass = 0; pass < NUM_PASSES; ++pass) {
+                    size_t tmp = histograms[pass * HISTOGRAM_SIZE + i] + sums[pass];
+                    histograms[pass * HISTOGRAM_SIZE + i] = sums[pass] - 1;
+                    sums[pass] = tmp;
+                }
+            }
+        }
+
+        /// Move the elements in the order starting from the least bit piece, and then do a few passes on the number of pieces.
+        for (size_t pass = 0; pass < NUM_PASSES; ++pass) {
+            Element* writer = pass % 2 ? arr : swap_buffer;
+            Element* reader = pass % 2 ? swap_buffer : arr;
+
+            for (size_t i = 0; i < size; ++i) {
+                size_t pos = get_part(pass, key_to_bits(Traits::extract_key(reader[i])));
+
+                /// Place the element on the next free position.
+                auto& dest = writer[++histograms[pass * HISTOGRAM_SIZE + pos]];
+                dest = reader[i];
+
+                /// On the last pass, we do the reverse transformation.
+                if (!Traits::Transform::transform_is_simple && pass == NUM_PASSES - 1)
+                    Traits::extract_key(dest) = bits_to_key(
+                            Traits::Transform::backward(key_to_bits(Traits::extract_key(reader[i]))));
+            }
+        }
+
+        /// If the number of passes is odd, the result array is in a temporary buffer. Copy it to the place of the original array.
+        /// NOTE Sometimes it will be more optimal to provide non-destructive interface, that will not modify original array.
+        if (NUM_PASSES % 2) memcpy(arr, swap_buffer, size * sizeof(Element));
+
+        allocator.deallocate(swap_buffer, size * sizeof(Element));
+    }
+
+    /* Most significant digit radix sort
+     * Usually slower than LSD and is not stable, but allows partial sorting
+     *
+     * Based on https://github.com/voutcn/kxsort, license:
+     * The MIT License
+     * Copyright (c) 2016 Dinghua Li <voutcn@gmail.com>
+     *
+     * Permission is hereby granted, free of charge, to any person obtaining
+     * a copy of this software and associated documentation files (the
+     * "Software"), to deal in the Software without restriction, including
+     * without limitation the rights to use, copy, modify, merge, publish,
+     * distribute, sublicense, and/or sell copies of the Software, and to
+     * permit persons to whom the Software is furnished to do so, subject to
+     * the following conditions:
+     *
+     * The above copyright notice and this permission notice shall be
+     * included in all copies or substantial portions of the Software.
+     *
+     * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+     * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+     * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+     * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+     * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+     * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+     * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+     * SOFTWARE.
+     */
+    static void execute_msd(Element* arr, size_t size, size_t limit) {
+        limit = std::min(limit, size);
+        radix_sort_msd_internal_helper<NUM_PASSES - 1>(arr, size, limit);
+    }
+};
+
+/// Helper functions for numeric types.
+/// Use RadixSort with custom traits for complex types instead.
+
+template <typename T>
+void radix_sort_lsd(T* arr, size_t size) {
+    RadixSort<RadixSortNumTraits<T>>::execute_lsd(arr, size);
+}
+
+template <typename T>
+void radix_sort_msd(T* arr, size_t size, size_t limit) {
+    RadixSort<RadixSortNumTraits<T>>::execute_msd(arr, size, limit);
+}
diff --git a/be/src/vec/common/sip_hash.h b/be/src/vec/common/sip_hash.h
new file mode 100644
index 0000000000..21f1870e83
--- /dev/null
+++ b/be/src/vec/common/sip_hash.h
@@ -0,0 +1,231 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/SipHash.h
+// and modified by Doris
+
+#pragma once
+
+/** SipHash is a fast cryptographic hash function for short strings.
+  * Taken from here: https://www.131002.net/siphash/
+  *
+  * This is SipHash 2-4 variant.
+  *
+  * Two changes are made:
+  * - returns also 128 bits, not only 64;
+  * - done streaming (can be calculated in parts).
+  *
+  * On short strings (URL, search phrases) more than 3 times faster than MD5 from OpenSSL.
+  * (~ 700 MB/sec, 15 million strings per second)
+  */
+
+#include <string>
+#include <type_traits>
+
+#include "common/compiler_util.h"
+#include "vec/common/unaligned.h"
+#include "vec/core/types.h"
+
+#define ROTL(x, b) static_cast<doris::vectorized::UInt64>(((x) << (b)) | ((x) >> (64 - (b))))
+
+#define SIPROUND           \
+    do {                   \
+        v0 += v1;          \
+        v1 = ROTL(v1, 13); \
+        v1 ^= v0;          \
+        v0 = ROTL(v0, 32); \
+        v2 += v3;          \
+        v3 = ROTL(v3, 16); \
+        v3 ^= v2;          \
+        v0 += v3;          \
+        v3 = ROTL(v3, 21); \
+        v3 ^= v0;          \
+        v2 += v1;          \
+        v1 = ROTL(v1, 17); \
+        v1 ^= v2;          \
+        v2 = ROTL(v2, 32); \
+    } while (0)
+
+class SipHash {
+private:
+    /// State.
+    doris::vectorized::UInt64 v0;
+    doris::vectorized::UInt64 v1;
+    doris::vectorized::UInt64 v2;
+    doris::vectorized::UInt64 v3;
+
+    /// How many bytes have been processed.
+    doris::vectorized::UInt64 cnt;
+
+    /// The current 8 bytes of input data.
+    union {
+        doris::vectorized::UInt64 current_word;
+        doris::vectorized::UInt8 current_bytes[8];
+    };
+
+    ALWAYS_INLINE void finalize() {
+        /// In the last free byte, we write the remainder of the division by 256.
+        current_bytes[7] = cnt;
+
+        v3 ^= current_word;
+        SIPROUND;
+        SIPROUND;
+        v0 ^= current_word;
+
+        v2 ^= 0xff;
+        SIPROUND;
+        SIPROUND;
+        SIPROUND;
+        SIPROUND;
+    }
+
+public:
+    /// Arguments - seed.
+    SipHash(doris::vectorized::UInt64 k0 = 0, doris::vectorized::UInt64 k1 = 0) {
+        /// Initialize the state with some random bytes and seed.
+        v0 = 0x736f6d6570736575ULL ^ k0;
+        v1 = 0x646f72616e646f6dULL ^ k1;
+        v2 = 0x6c7967656e657261ULL ^ k0;
+        v3 = 0x7465646279746573ULL ^ k1;
+
+        cnt = 0;
+        current_word = 0;
+    }
+
+    void update(const char* data, doris::vectorized::UInt64 size) {
+        const char* end = data + size;
+
+        /// We'll finish to process the remainder of the previous update, if any.
+        if (cnt & 7) {
+            while (cnt & 7 && data < end) {
+                current_bytes[cnt & 7] = *data;
+                ++data;
+                ++cnt;
+            }
+
+            /// If we still do not have enough bytes to an 8-byte word.
+            if (cnt & 7) return;
+
+            v3 ^= current_word;
+            SIPROUND;
+            SIPROUND;
+            v0 ^= current_word;
+        }
+
+        cnt += end - data;
+
+        while (data + 8 <= end) {
+            current_word = unaligned_load<doris::vectorized::UInt64>(data);
+
+            v3 ^= current_word;
+            SIPROUND;
+            SIPROUND;
+            v0 ^= current_word;
+
+            data += 8;
+        }
+
+        /// Pad the remainder, which is missing up to an 8-byte word.
+        current_word = 0;
+        switch (end - data) {
+        case 7:
+            current_bytes[6] = data[6];
+            [[fallthrough]];
+        case 6:
+            current_bytes[5] = data[5];
+            [[fallthrough]];
+        case 5:
+            current_bytes[4] = data[4];
+            [[fallthrough]];
+        case 4:
+            current_bytes[3] = data[3];
+            [[fallthrough]];
+        case 3:
+            current_bytes[2] = data[2];
+            [[fallthrough]];
+        case 2:
+            current_bytes[1] = data[1];
+            [[fallthrough]];
+        case 1:
+            current_bytes[0] = data[0];
+            [[fallthrough]];
+        case 0:
+            break;
+        }
+    }
+
+    /// NOTE: std::has_unique_object_representations is only available since clang 6. As of Mar 2017 we still use clang 5 sometimes.
+    template <typename T>
+    std::enable_if_t<std::/*has_unique_object_representations_v*/ is_standard_layout_v<T>, void>
+    update(const T& x) {
+        update(reinterpret_cast<const char*>(&x), sizeof(x));
+    }
+
+    void update(const std::string& x) { update(x.data(), x.length()); }
+
+    /// Get the result in some form. This can only be done once!
+
+    void get128(char* out) {
+        finalize();
+        reinterpret_cast<doris::vectorized::UInt64*>(out)[0] = v0 ^ v1;
+        reinterpret_cast<doris::vectorized::UInt64*>(out)[1] = v2 ^ v3;
+    }
+
+    /// template for avoiding 'unsigned long long' vs 'unsigned long' problem on old poco in macos
+    template <typename T>
+    ALWAYS_INLINE void get128(T& lo, T& hi) {
+        static_assert(sizeof(T) == 8);
+        finalize();
+        lo = v0 ^ v1;
+        hi = v2 ^ v3;
+    }
+
+    doris::vectorized::UInt64 get64() {
+        finalize();
+        return v0 ^ v1 ^ v2 ^ v3;
+    }
+};
+
+#undef ROTL
+#undef SIPROUND
+
+#include <cstddef>
+
+inline void sip_hash128(const char* data, const size_t size, char* out) {
+    SipHash hash;
+    hash.update(data, size);
+    hash.get128(out);
+}
+
+inline doris::vectorized::UInt64 sip_hash64(const char* data, const size_t size) {
+    SipHash hash;
+    hash.update(data, size);
+    return hash.get64();
+}
+
+template <typename T>
+std::enable_if_t<std::/*has_unique_object_representations_v*/ is_standard_layout_v<T>,
+                 doris::vectorized::UInt64>
+sip_hash64(const T& x) {
+    SipHash hash;
+    hash.update(x);
+    return hash.get64();
+}
+
+inline doris::vectorized::UInt64 sip_hash64(const std::string& s) {
+    return sip_hash64(s.data(), s.size());
+}
diff --git a/be/src/vec/common/string_buffer.hpp b/be/src/vec/common/string_buffer.hpp
new file mode 100644
index 0000000000..d0de99cdad
--- /dev/null
+++ b/be/src/vec/common/string_buffer.hpp
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <cstring>
+#include <fmt/format.h>
+
+#include "vec/columns/column_string.h"
+
+#include "vec/common/string_ref.h"
+
+namespace doris::vectorized {
+class BufferWritable {
+public:
+    virtual void write(const char* data, int len) = 0;
+    virtual void commit() = 0;
+    virtual ~BufferWritable() = default;
+
+    template <typename T>
+    void write_number(T data) {
+        fmt::memory_buffer buffer;
+        fmt::format_to(buffer, "{}", data);
+        write(buffer.data(), buffer.size());
+    }
+};
+
+class VectorBufferWriter final : public BufferWritable {
+public:
+    explicit VectorBufferWriter(ColumnString& vector) :
+        _data(vector.get_chars()), _offsets(vector.get_offsets()) {}
+
+    void write(const char* data, int len) override {
+        _data.insert(data, data + len);
+        _now_offset += len;
+    }
+
+    void commit() override {
+        _data.push_back(0);
+        _offsets.push_back(_offsets.back() + _now_offset + 1);
+        _now_offset = 0;
+    }
+
+    ~VectorBufferWriter() {
+        DCHECK(_now_offset == 0);
+    }
+
+private:
+    ColumnString::Chars& _data;
+    ColumnString::Offsets& _offsets;
+    size_t _now_offset = 0;
+};
+
+class BufferReadable {
+public:
+    virtual void read(char* data, int len) = 0;
+    virtual StringRef read(int len) = 0;
+};
+
+class VectorBufferReader final : public BufferReadable {
+public:
+    explicit VectorBufferReader(StringRef& ref) : _data(ref.data) {}
+    explicit VectorBufferReader(StringRef&& ref) : _data(ref.data) {}
+
+    StringRef read(int len) override {
+        StringRef ref(_data, len);
+        _data += len;
+        return ref;
+    }
+
+    void read(char* data, int len) override {
+        memcpy(data, _data, len);
+        _data += len;
+    }
+
+private:
+    const char* _data;
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h
new file mode 100644
index 0000000000..bd81342986
--- /dev/null
+++ b/be/src/vec/common/string_ref.h
@@ -0,0 +1,327 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/StringRef.h
+// and modified by Doris
+
+#pragma once
+
+#include <functional>
+#include <ostream>
+#include <string>
+#include <vector>
+
+#include "gutil/hash/city.h"
+#include "gutil/hash/hash128to64.h"
+#include "vec/common/unaligned.h"
+#include "vec/core/types.h"
+
+#if defined(__SSE2__)
+#include <emmintrin.h>
+#endif
+
+#if defined(__SSE4_2__)
+#include <nmmintrin.h>
+#include <smmintrin.h>
+#endif
+
+/// The thing to avoid creating strings to find substrings in the hash table.
+struct StringRef {
+    const char* data = nullptr;
+    size_t size = 0;
+
+    StringRef(const char* data_, size_t size_) : data(data_), size(size_) {}
+    StringRef(const unsigned char* data_, size_t size_)
+            : data(reinterpret_cast<const char*>(data_)), size(size_) {}
+    StringRef(const std::string& s) : data(s.data()), size(s.size()) {}
+    StringRef() = default;
+
+    std::string to_string() const { return std::string(data, size); }
+
+    explicit operator std::string() const { return to_string(); }
+};
+
+using StringRefs = std::vector<StringRef>;
+
+#if defined(__SSE2__)
+
+/** Compare strings for equality.
+  * The approach is controversial and does not win in all cases.
+  * For more information, see hash_map_string_2.cpp
+  */
+
+inline bool compareSSE2(const char* p1, const char* p2) {
+    return 0xFFFF ==
+           _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(p1)),
+                                            _mm_loadu_si128(reinterpret_cast<const __m128i*>(p2))));
+}
+
+inline bool compareSSE2x4(const char* p1, const char* p2) {
+    return 0xFFFF ==
+           _mm_movemask_epi8(_mm_and_si128(
+                   _mm_and_si128(
+                           _mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i*>(p1)),
+                                          _mm_loadu_si128(reinterpret_cast<const __m128i*>(p2))),
+                           _mm_cmpeq_epi8(
+                                   _mm_loadu_si128(reinterpret_cast<const __m128i*>(p1) + 1),
+                                   _mm_loadu_si128(reinterpret_cast<const __m128i*>(p2) + 1))),
+                   _mm_and_si128(
+                           _mm_cmpeq_epi8(
+                                   _mm_loadu_si128(reinterpret_cast<const __m128i*>(p1) + 2),
+                                   _mm_loadu_si128(reinterpret_cast<const __m128i*>(p2) + 2)),
+                           _mm_cmpeq_epi8(
+                                   _mm_loadu_si128(reinterpret_cast<const __m128i*>(p1) + 3),
+                                   _mm_loadu_si128(reinterpret_cast<const __m128i*>(p2) + 3)))));
+}
+
+inline bool memequalSSE2Wide(const char* p1, const char* p2, size_t size) {
+    while (size >= 64) {
+        if (compareSSE2x4(p1, p2)) {
+            p1 += 64;
+            p2 += 64;
+            size -= 64;
+        } else
+            return false;
+    }
+
+    switch ((size % 64) / 16) {
+    case 3:
+        if (!compareSSE2(p1 + 32, p2 + 32)) return false;
+        [[fallthrough]];
+    case 2:
+        if (!compareSSE2(p1 + 16, p2 + 16)) return false;
+        [[fallthrough]];
+    case 1:
+        if (!compareSSE2(p1, p2)) return false;
+        [[fallthrough]];
+    case 0:
+        break;
+    }
+
+    p1 += (size % 64) / 16 * 16;
+    p2 += (size % 64) / 16 * 16;
+
+    switch (size % 16) {
+    case 15:
+        if (p1[14] != p2[14]) return false;
+        [[fallthrough]];
+    case 14:
+        if (p1[13] != p2[13]) return false;
+        [[fallthrough]];
+    case 13:
+        if (p1[12] != p2[12]) return false;
+        [[fallthrough]];
+    case 12:
+        if (unaligned_load<uint32_t>(p1 + 8) == unaligned_load<uint32_t>(p2 + 8))
+            goto l8;
+        else
+            return false;
+    case 11:
+        if (p1[10] != p2[10]) return false;
+        [[fallthrough]];
+    case 10:
+        if (p1[9] != p2[9]) return false;
+        [[fallthrough]];
+    case 9:
+        if (p1[8] != p2[8]) return false;
+    l8:
+        [[fallthrough]];
+    case 8:
+        return unaligned_load<uint64_t>(p1) == unaligned_load<uint64_t>(p2);
+    case 7:
+        if (p1[6] != p2[6]) return false;
+        [[fallthrough]];
+    case 6:
+        if (p1[5] != p2[5]) return false;
+        [[fallthrough]];
+    case 5:
+        if (p1[4] != p2[4]) return false;
+        [[fallthrough]];
+    case 4:
+        return unaligned_load<uint32_t>(p1) == unaligned_load<uint32_t>(p2);
+    case 3:
+        if (p1[2] != p2[2]) return false;
+        [[fallthrough]];
+    case 2:
+        return unaligned_load<uint16_t>(p1) == unaligned_load<uint16_t>(p2);
+    case 1:
+        if (p1[0] != p2[0]) return false;
+        [[fallthrough]];
+    case 0:
+        break;
+    }
+
+    return true;
+}
+
+#endif
+
+inline bool operator==(StringRef lhs, StringRef rhs) {
+    if (lhs.size != rhs.size) return false;
+
+    if (lhs.size == 0) return true;
+
+#if defined(__SSE2__)
+    return memequalSSE2Wide(lhs.data, rhs.data, lhs.size);
+#else
+    return 0 == memcmp(lhs.data, rhs.data, lhs.size);
+#endif
+}
+
+inline bool operator!=(StringRef lhs, StringRef rhs) {
+    return !(lhs == rhs);
+}
+
+inline bool operator<(StringRef lhs, StringRef rhs) {
+    int cmp = memcmp(lhs.data, rhs.data, std::min(lhs.size, rhs.size));
+    return cmp < 0 || (cmp == 0 && lhs.size < rhs.size);
+}
+
+inline bool operator>(StringRef lhs, StringRef rhs) {
+    int cmp = memcmp(lhs.data, rhs.data, std::min(lhs.size, rhs.size));
+    return cmp > 0 || (cmp == 0 && lhs.size > rhs.size);
+}
+
+/** Hash functions.
+  * You can use either CityHash64,
+  *  or a function based on the crc32 statement,
+  *  which is obviously less qualitative, but on real data sets,
+  *  when used in a hash table, works much faster.
+  * For more information, see hash_map_string_3.cpp
+  */
+
+struct StringRefHash64 {
+    size_t operator()(StringRef x) const { return util_hash::CityHash64(x.data, x.size); }
+};
+
+#if defined(__SSE4_2__)
+
+/// Parts are taken from CityHash.
+
+inline doris::vectorized::UInt64 hash_len16(doris::vectorized::UInt64 u,
+                                            doris::vectorized::UInt64 v) {
+    return Hash128to64(uint128(u, v));
+}
+
+inline doris::vectorized::UInt64 shift_mix(doris::vectorized::UInt64 val) {
+    return val ^ (val >> 47);
+}
+
+inline doris::vectorized::UInt64 rotate_by_at_least1(doris::vectorized::UInt64 val, int shift) {
+    return (val >> shift) | (val << (64 - shift));
+}
+
+inline size_t hash_less_than8(const char* data, size_t size) {
+    static constexpr doris::vectorized::UInt64 k2 = 0x9ae16a3b2f90404fULL;
+    static constexpr doris::vectorized::UInt64 k3 = 0xc949d7c7509e6557ULL;
+
+    if (size >= 4) {
+        doris::vectorized::UInt64 a = unaligned_load<uint32_t>(data);
+        return hash_len16(size + (a << 3), unaligned_load<uint32_t>(data + size - 4));
+    }
+
+    if (size > 0) {
+        uint8_t a = data[0];
+        uint8_t b = data[size >> 1];
+        uint8_t c = data[size - 1];
+        uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8);
+        uint32_t z = size + (static_cast<uint32_t>(c) << 2);
+        return shift_mix(y * k2 ^ z * k3) * k2;
+    }
+
+    return k2;
+}
+
+inline size_t hash_less_than16(const char* data, size_t size) {
+    if (size > 8) {
+        doris::vectorized::UInt64 a = unaligned_load<doris::vectorized::UInt64>(data);
+        doris::vectorized::UInt64 b = unaligned_load<doris::vectorized::UInt64>(data + size - 8);
+        return hash_len16(a, rotate_by_at_least1(b + size, size)) ^ b;
+    }
+
+    return hash_less_than8(data, size);
+}
+
+struct CRC32Hash {
+    size_t operator()(StringRef x) const {
+        const char* pos = x.data;
+        size_t size = x.size;
+
+        if (size == 0) return 0;
+
+        if (size < 8) {
+            return hash_less_than8(x.data, x.size);
+        }
+
+        const char* end = pos + size;
+        size_t res = -1ULL;
+
+        do {
+            doris::vectorized::UInt64 word = unaligned_load<doris::vectorized::UInt64>(pos);
+            res = _mm_crc32_u64(res, word);
+
+            pos += 8;
+        } while (pos + 8 < end);
+
+        doris::vectorized::UInt64 word = unaligned_load<doris::vectorized::UInt64>(
+                end - 8); /// I'm not sure if this is normal.
+        res = _mm_crc32_u64(res, word);
+
+        return res;
+    }
+};
+
+struct StringRefHash : CRC32Hash {};
+
+#else
+
+struct CRC32Hash {
+    size_t operator()(StringRef /* x */) const {
+        throw std::logic_error{"Not implemented CRC32Hash without SSE"};
+    }
+};
+
+struct StringRefHash : StringRefHash64 {};
+
+#endif
+
+namespace std {
+template <>
+struct hash<StringRef> : public StringRefHash {};
+} // namespace std
+
+namespace ZeroTraits {
+inline bool check(const StringRef& x) {
+    return 0 == x.size;
+}
+inline void set(StringRef& x) {
+    x.size = 0;
+}
+} // namespace ZeroTraits
+
+inline bool operator==(StringRef lhs, const char* rhs) {
+    for (size_t pos = 0; pos < lhs.size; ++pos)
+        if (!rhs[pos] || lhs.data[pos] != rhs[pos]) return false;
+
+    return true;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const StringRef& str) {
+    if (str.data) os.write(str.data, str.size);
+
+    return os;
+}
diff --git a/be/src/vec/common/string_utils/string_utils.cpp b/be/src/vec/common/string_utils/string_utils.cpp
new file mode 100644
index 0000000000..d1552eb481
--- /dev/null
+++ b/be/src/vec/common/string_utils/string_utils.cpp
@@ -0,0 +1,34 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Commom/StringUtils/StringUtils.cpp
+// and modified by Doris
+
+#include "vec/common/string_utils/string_utils.h"
+
+namespace detail {
+
+bool starts_with(const std::string& s, const char* prefix, size_t prefix_size) {
+    return s.size() >= prefix_size && 0 == memcmp(s.data(), prefix, prefix_size);
+}
+
+bool ends_with(const std::string& s, const char* suffix, size_t suffix_size) {
+    return s.size() >= suffix_size &&
+           0 == memcmp(s.data() + s.size() - suffix_size, suffix, suffix_size);
+}
+
+} // namespace detail
diff --git a/be/src/vec/common/string_utils/string_utils.h b/be/src/vec/common/string_utils/string_utils.h
new file mode 100644
index 0000000000..f8c64013e3
--- /dev/null
+++ b/be/src/vec/common/string_utils/string_utils.h
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Commom/StringUtils/StringUtils.h
+// and modified by Doris
+
+#pragma once
+
+#include <cstddef>
+#include <cstring>
+#include <string>
+#include <type_traits>
+
+namespace detail {
+bool starts_with(const std::string& s, const char* prefix, size_t prefix_size);
+bool ends_with(const std::string& s, const char* suffix, size_t suffix_size);
+} // namespace detail
+
+inline bool starts_with(const std::string& s, const std::string& prefix) {
+    return detail::starts_with(s, prefix.data(), prefix.size());
+}
+
+inline bool ends_with(const std::string& s, const std::string& suffix) {
+    return detail::ends_with(s, suffix.data(), suffix.size());
+}
+
+/// With GCC, strlen is evaluated compile time if we pass it a constant
+/// string that is known at compile time.
+inline bool starts_with(const std::string& s, const char* prefix) {
+    return detail::starts_with(s, prefix, strlen(prefix));
+}
+
+inline bool ends_with(const std::string& s, const char* suffix) {
+    return detail::ends_with(s, suffix, strlen(suffix));
+}
+
+/// Given an integer, return the adequate suffix for
+/// printing an ordinal number.
+template <typename T>
+std::string get_ordinal_suffix(T n) {
+    static_assert(std::is_integral_v<T> && std::is_unsigned_v<T>,
+                  "Unsigned integer value required");
+
+    const auto last_digit = n % 10;
+
+    if ((last_digit < 1 || last_digit > 3) || ((n > 10) && (((n / 10) % 10) == 1))) return "th";
+
+    switch (last_digit) {
+    case 1:
+        return "st";
+    case 2:
+        return "nd";
+    case 3:
+        return "rd";
+    default:
+        return "th";
+    }
+}
+
+/// More efficient than libc, because doesn't respect locale. But for some functions table implementation could be better.
+
+inline bool is_ascii(char c) {
+    return static_cast<unsigned char>(c) < 0x80;
+}
+
+inline bool is_alpha_ascii(char c) {
+    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+inline bool is_numeric_ascii(char c) {
+    /// This is faster than
+    /// return UInt8(UInt8(c) - UInt8('0')) < UInt8(10);
+    /// on Intel CPUs when compiled by gcc 8.
+    return (c >= '0' && c <= '9');
+}
+
+inline bool is_hex_digit(char c) {
+    return is_numeric_ascii(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+}
+
+inline bool is_alpha_numeric_ascii(char c) {
+    return is_alpha_ascii(c) || is_numeric_ascii(c);
+}
+
+inline bool is_word_char_ascii(char c) {
+    return is_alpha_numeric_ascii(c) || c == '_';
+}
+
+inline bool is_valid_identifier_begin(char c) {
+    return is_alpha_ascii(c) || c == '_';
+}
+
+inline bool is_whitespace_ascii(char c) {
+    return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == '\v';
+}
+
+inline bool is_control_ascii(char c) {
+    return static_cast<unsigned char>(c) <= 31;
+}
+
+/// Works assuming is_alpha_ascii.
+inline char to_lower_if_alpha_ascii(char c) {
+    return c | 0x20;
+}
+
+inline char to_upper_if_alpha_ascii(char c) {
+    return c & (~0x20);
+}
+
+inline char alternate_case_if_alpha_ascii(char c) {
+    return c ^ 0x20;
+}
+
+inline bool equals_case_insensitive(char a, char b) {
+    return a == b || (is_alpha_ascii(a) && alternate_case_if_alpha_ascii(a) == b);
+}
+
+template <typename F>
+std::string trim(const std::string& str, F&& predicate) {
+    size_t cut_front = 0;
+    size_t cut_back = 0;
+    size_t size = str.size();
+
+    for (size_t i = 0; i < size; ++i) {
+        if (predicate(str[i]))
+            ++cut_front;
+        else
+            break;
+    }
+
+    if (cut_front == size) return {};
+
+    for (auto it = str.rbegin(); it != str.rend(); ++it) {
+        if (predicate(*it))
+            ++cut_back;
+        else
+            break;
+    }
+
+    return str.substr(cut_front, size - cut_front - cut_back);
+}
diff --git a/be/src/vec/common/strong_typedef.h b/be/src/vec/common/strong_typedef.h
new file mode 100644
index 0000000000..1f48d5b82c
--- /dev/null
+++ b/be/src/vec/common/strong_typedef.h
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/StrongTypedef.h
+// and modified by Doris
+
+#pragma once
+
+#include <boost/operators.hpp>
+#include <type_traits>
+
+/** https://svn.boost.org/trac/boost/ticket/5182
+  */
+
+template <class T, class Tag>
+struct StrongTypedef : boost::totally_ordered1<StrongTypedef<T, Tag>,
+                                               boost::totally_ordered2<StrongTypedef<T, Tag>, T>> {
+private:
+    using Self = StrongTypedef;
+    T t;
+
+public:
+    template <class Enable = typename std::is_copy_constructible<T>::type>
+    explicit StrongTypedef(const T& t_) : t(t_) {}
+    template <class Enable = typename std::is_move_constructible<T>::type>
+    explicit StrongTypedef(T&& t_) : t(std::move(t_)) {}
+
+    template <class Enable = typename std::is_default_constructible<T>::type>
+    StrongTypedef() : t() {}
+
+    StrongTypedef(const Self&) = default;
+    StrongTypedef(Self&&) = default;
+
+    Self& operator=(const Self&) = default;
+    Self& operator=(Self&&) = default;
+
+    template <class Enable = typename std::is_copy_assignable<T>::type>
+    Self& operator=(const T& rhs) {
+        t = rhs;
+        return *this;
+    }
+
+    template <class Enable = typename std::is_move_assignable<T>::type>
+    Self& operator=(T&& rhs) {
+        t = std::move(rhs);
+        return *this;
+    }
+
+    operator const T&() const { return t; }
+    operator T&() { return t; }
+
+    bool operator==(const Self& rhs) const { return t == rhs.t; }
+    bool operator<(const Self& rhs) const { return t < rhs.t; }
+
+    T& to_under_type() { return t; }
+    const T& to_under_type() const { return t; }
+};
+
+namespace std {
+template <class T, class Tag>
+struct hash<StrongTypedef<T, Tag>> {
+    size_t operator()(const StrongTypedef<T, Tag>& x) const {
+        return std::hash<T>()(x.to_under_type());
+    }
+};
+} // namespace std
+
+#define STRONG_TYPEDEF(T, D) \
+    struct D##Tag {};        \
+    using D = StrongTypedef<T, D##Tag>;
diff --git a/be/src/vec/common/typeid_cast.h b/be/src/vec/common/typeid_cast.h
new file mode 100644
index 0000000000..60ef9743d9
--- /dev/null
+++ b/be/src/vec/common/typeid_cast.h
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/typeid_cast.h
+// and modified by Doris
+
+#pragma once
+
+#include <string>
+#include <type_traits>
+#include <typeindex>
+#include <typeinfo>
+
+#include "common/status.h"
+#include "vec/common/demangle.h"
+#include "vec/common/exception.h"
+
+/** Checks type by comparing typeid.
+  * The exact match of the type is checked. That is, cast to the ancestor will be unsuccessful.
+  * In the rest, behaves like a dynamic_cast.
+  */
+template <typename To, typename From>
+std::enable_if_t<std::is_reference_v<To>, To> typeid_cast(From& from) {
+    try {
+        if (typeid(from) == typeid(To)) {
+            return static_cast<To>(from);
+        }
+    } catch (const std::exception& e) {
+        throw doris::vectorized::Exception(e.what(), doris::TStatusCode::VEC_BAD_CAST);
+    }
+
+    throw doris::vectorized::Exception("Bad cast from type " + demangle(typeid(from).name()) +
+                                               " to " + demangle(typeid(To).name()),
+                                       doris::TStatusCode::VEC_BAD_CAST);
+}
+
+template <typename To, typename From>
+To typeid_cast(From* from) {
+    try {
+        if (typeid(*from) == typeid(std::remove_pointer_t<To>)) {
+            return static_cast<To>(from);
+        } else {
+            return nullptr;
+        }
+    } catch (const std::exception& e) {
+        throw doris::vectorized::Exception(e.what(), doris::TStatusCode::VEC_BAD_CAST);
+    }
+}
diff --git a/be/src/vec/common/uint128.h b/be/src/vec/common/uint128.h
new file mode 100644
index 0000000000..f5e2f9db77
--- /dev/null
+++ b/be/src/vec/common/uint128.h
@@ -0,0 +1,237 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/Uint128.h
+// and modified by Doris
+
+#pragma once
+
+#include <iomanip>
+#include <sstream>
+#include <tuple>
+
+#include "gutil/hash/city.h"
+#include "gutil/hash/hash128to64.h"
+#include "vec/core/types.h"
+
+#ifdef __SSE4_2__
+#include <nmmintrin.h>
+#endif
+
+namespace doris::vectorized {
+
+/// For aggregation by SipHash, UUID type or concatenation of several fields.
+struct UInt128 {
+/// Suppress gcc7 warnings: 'prev_key.doris::vectorized::UInt128::low' may be used uninitialized in this function
+#if !__clang__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
+    /// This naming assumes little endian.
+    UInt64 low;
+    UInt64 high;
+
+    UInt128() = default;
+    explicit UInt128(const UInt64 low_, const UInt64 high_) : low(low_), high(high_) {}
+    explicit UInt128(const UInt64 rhs) : low(rhs), high() {}
+
+    auto tuple() const { return std::tie(high, low); }
+
+    String to_hex_string() const {
+        std::ostringstream os;
+        os << std::setw(16) << std::setfill('0') << std::hex << high << low;
+        return String(os.str());
+    }
+
+    bool inline operator==(const UInt128 rhs) const { return tuple() == rhs.tuple(); }
+    bool inline operator!=(const UInt128 rhs) const { return tuple() != rhs.tuple(); }
+    bool inline operator<(const UInt128 rhs) const { return tuple() < rhs.tuple(); }
+    bool inline operator<=(const UInt128 rhs) const { return tuple() <= rhs.tuple(); }
+    bool inline operator>(const UInt128 rhs) const { return tuple() > rhs.tuple(); }
+    bool inline operator>=(const UInt128 rhs) const { return tuple() >= rhs.tuple(); }
+
+    template <typename T>
+    bool inline operator==(const T rhs) const {
+        return *this == UInt128(rhs);
+    }
+    template <typename T>
+    bool inline operator!=(const T rhs) const {
+        return *this != UInt128(rhs);
+    }
+    template <typename T>
+    bool inline operator>=(const T rhs) const {
+        return *this >= UInt128(rhs);
+    }
+    template <typename T>
+    bool inline operator>(const T rhs) const {
+        return *this > UInt128(rhs);
+    }
+    template <typename T>
+    bool inline operator<=(const T rhs) const {
+        return *this <= UInt128(rhs);
+    }
+    template <typename T>
+    bool inline operator<(const T rhs) const {
+        return *this < UInt128(rhs);
+    }
+
+    template <typename T>
+    explicit operator T() const {
+        return static_cast<T>(low);
+    }
+
+#if !__clang__
+#pragma GCC diagnostic pop
+#endif
+
+    UInt128& operator=(const UInt64 rhs) {
+        low = rhs;
+        high = 0;
+        return *this;
+    }
+};
+
+template <typename T>
+bool inline operator==(T a, const UInt128 b) {
+    return UInt128(a) == b;
+}
+template <typename T>
+bool inline operator!=(T a, const UInt128 b) {
+    return UInt128(a) != b;
+}
+template <typename T>
+bool inline operator>=(T a, const UInt128 b) {
+    return UInt128(a) >= b;
+}
+template <typename T>
+bool inline operator>(T a, const UInt128 b) {
+    return UInt128(a) > b;
+}
+template <typename T>
+bool inline operator<=(T a, const UInt128 b) {
+    return UInt128(a) <= b;
+}
+template <typename T>
+bool inline operator<(T a, const UInt128 b) {
+    return UInt128(a) < b;
+}
+
+template <>
+inline constexpr bool IsNumber<UInt128> = true;
+template <>
+struct TypeName<UInt128> {
+    static const char* get() { return "UInt128"; }
+};
+template <>
+struct TypeId<UInt128> {
+    static constexpr const TypeIndex value = TypeIndex::UInt128;
+};
+
+struct UInt128Hash {
+    size_t operator()(UInt128 x) const { return Hash128to64({x.low, x.high}); }
+};
+
+#ifdef __SSE4_2__
+
+struct UInt128HashCRC32 {
+    size_t operator()(UInt128 x) const {
+        UInt64 crc = -1ULL;
+        crc = _mm_crc32_u64(crc, x.low);
+        crc = _mm_crc32_u64(crc, x.high);
+        return crc;
+    }
+};
+
+#else
+
+/// On other platforms we do not use CRC32. NOTE This can be confusing.
+struct UInt128HashCRC32 : public UInt128Hash {};
+
+#endif
+
+struct UInt128TrivialHash {
+    size_t operator()(UInt128 x) const { return x.low; }
+};
+
+/** Used for aggregation, for putting a large number of constant-length keys in a hash table.
+  */
+struct UInt256 {
+/// Suppress gcc7 warnings: 'prev_key.doris::vectorized::UInt256::a' may be used uninitialized in this function
+#if !__clang__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
+    UInt64 a;
+    UInt64 b;
+    UInt64 c;
+    UInt64 d;
+
+    bool operator==(const UInt256 rhs) const {
+        return a == rhs.a && b == rhs.b && c == rhs.c && d == rhs.d;
+    }
+
+    bool operator!=(const UInt256 rhs) const { return !operator==(rhs); }
+
+    bool operator==(const UInt64 rhs) const { return a == rhs && b == 0 && c == 0 && d == 0; }
+    bool operator!=(const UInt64 rhs) const { return !operator==(rhs); }
+
+#if !__clang__
+#pragma GCC diagnostic pop
+#endif
+
+    UInt256& operator=(const UInt64 rhs) {
+        a = rhs;
+        b = 0;
+        c = 0;
+        d = 0;
+        return *this;
+    }
+};
+} // namespace doris::vectorized
+
+/// Overload hash for type casting
+namespace std {
+template <>
+struct hash<doris::vectorized::UInt128> {
+    size_t operator()(const doris::vectorized::UInt128& u) const {
+        return Hash128to64({u.low, u.high});
+    }
+};
+
+template <>
+struct is_signed<doris::vectorized::UInt128> {
+    static constexpr bool value = false;
+};
+
+template <>
+struct is_unsigned<doris::vectorized::UInt128> {
+    static constexpr bool value = true;
+};
+
+template <>
+struct is_integral<doris::vectorized::UInt128> {
+    static constexpr bool value = true;
+};
+
+// Operator +, -, /, *, % aren't implemented so it's not an arithmetic type
+template <>
+struct is_arithmetic<doris::vectorized::UInt128> {
+    static constexpr bool value = false;
+};
+} // namespace std
diff --git a/be/src/vec/common/unaligned.h b/be/src/vec/common/unaligned.h
new file mode 100644
index 0000000000..11b18535ce
--- /dev/null
+++ b/be/src/vec/common/unaligned.h
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/base/base/unaligned.h
+// and modified by Doris
+
+#pragma once
+
+#include <string.h>
+
+#include <type_traits>
+
+template <typename T>
+inline T unaligned_load(const void* address) {
+    T res {};
+    memcpy(&res, address, sizeof(res));
+    return res;
+}
+
+/// We've had troubles before with wrong store size due to integral promotions
+/// (e.g., unaligned_store(dest, uint16_t + uint16_t) stores an uint32_t).
+/// To prevent this, make the caller specify the stored type explicitly.
+/// To disable deduction of T, wrap the argument type with std::enable_if.
+template <typename T>
+inline void unaligned_store(void* address, const typename std::enable_if<true, T>::type& src) {
+    static_assert(std::is_trivially_copyable_v<T>);
+    memcpy(address, &src, sizeof(src));
+}
diff --git a/be/src/vec/core/accurate_comparison.h b/be/src/vec/core/accurate_comparison.h
new file mode 100644
index 0000000000..9789fc72c8
--- /dev/null
+++ b/be/src/vec/core/accurate_comparison.h
@@ -0,0 +1,567 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/AccurateComparison.h
+// and modified by Doris
+
+#pragma once
+
+#include <cmath>
+#include <limits>
+
+#include "runtime/datetime_value.h"
+#include "util/binary_cast.hpp"
+
+#include "vec/common/nan_utils.h"
+#include "vec/common/uint128.h"
+#include "vec/core/types.h"
+#include "vec/runtime/vdatetime_value.h"
+/** Preceptually-correct number comparisons.
+  * Example: Int8(-1) != UInt8(255)
+*/
+
+namespace accurate {
+
+/** Cases:
+    1) Safe conversion (in case of default C++ operators)
+        a) int vs any int
+        b) uint vs any uint
+        c) float vs any float
+    2) int vs uint
+        a) sizeof(int) <= sizeof(uint). Accurate comparison with MAX_INT tresholds
+        b) sizeof(int)  > sizeof(uint). Casting to int
+    3) integral_type vs floating_type
+        a) sizeof(integral_type) <= 4. Comparison via casting arguments to Float64
+        b) sizeof(integral_type) == 8. Accurate comparison. Consider 3 sets of intervals:
+            1) interval between adjacent floats less or equal 1
+            2) interval between adjacent floats greater then 2
+            3) float is outside [MIN_INT64; MAX_INT64]
+*/
+
+// Case 1. Is pair of floats or pair of ints or pair of uints
+template <typename A, typename B>
+constexpr bool is_safe_conversion = (std::is_floating_point_v<A> && std::is_floating_point_v<B>) ||
+                                    (std::is_integral_v<A> && std::is_integral_v<B> &&
+                                     !(std::is_signed_v<A> ^ std::is_signed_v<B>)) ||
+                                    (std::is_same_v<A, doris::vectorized::Int128> &&
+                                     std::is_same_v<B, doris::vectorized::Int128>) ||
+                                    (std::is_integral_v<A> &&
+                                     std::is_same_v<B, doris::vectorized::Int128>) ||
+                                    (std::is_same_v<A, doris::vectorized::Int128> &&
+                                     std::is_integral_v<B>);
+template <typename A, typename B>
+using bool_if_safe_conversion = std::enable_if_t<is_safe_conversion<A, B>, bool>;
+template <typename A, typename B>
+using bool_if_not_safe_conversion = std::enable_if_t<!is_safe_conversion<A, B>, bool>;
+
+/// Case 2. Are params IntXX and UIntYY ?
+template <typename TInt, typename TUInt>
+constexpr bool is_any_int_vs_uint = std::is_integral_v<TInt>&& std::is_integral_v<TUInt>&&
+        std::is_signed_v<TInt>&& std::is_unsigned_v<TUInt>;
+
+// Case 2a. Are params IntXX and UIntYY and sizeof(IntXX) >= sizeof(UIntYY) (in such case will use accurate compare)
+template <typename TInt, typename TUInt>
+constexpr bool is_le_int_vs_uint = is_any_int_vs_uint<TInt, TUInt> &&
+                                   (sizeof(TInt) <= sizeof(TUInt));
+
+template <typename TInt, typename TUInt>
+using bool_if_le_int_vs_uint_t = std::enable_if_t<is_le_int_vs_uint<TInt, TUInt>, bool>;
+
+template <typename TInt, typename TUInt>
+inline bool_if_le_int_vs_uint_t<TInt, TUInt> greaterOpTmpl(TInt a, TUInt b) {
+    return static_cast<TUInt>(a) > b && a >= 0 &&
+           b <= static_cast<TUInt>(std::numeric_limits<TInt>::max());
+}
+
+template <typename TUInt, typename TInt>
+inline bool_if_le_int_vs_uint_t<TInt, TUInt> greaterOpTmpl(TUInt a, TInt b) {
+    return a > static_cast<TUInt>(b) || b < 0 ||
+           a > static_cast<TUInt>(std::numeric_limits<TInt>::max());
+}
+
+template <typename TInt, typename TUInt>
+inline bool_if_le_int_vs_uint_t<TInt, TUInt> equalsOpTmpl(TInt a, TUInt b) {
+    return static_cast<TUInt>(a) == b && a >= 0 &&
+           b <= static_cast<TUInt>(std::numeric_limits<TInt>::max());
+}
+
+template <typename TUInt, typename TInt>
+inline bool_if_le_int_vs_uint_t<TInt, TUInt> equalsOpTmpl(TUInt a, TInt b) {
+    return a == static_cast<TUInt>(b) && b >= 0 &&
+           a <= static_cast<TUInt>(std::numeric_limits<TInt>::max());
+}
+
+// Case 2b. Are params IntXX and UIntYY and sizeof(IntXX) > sizeof(UIntYY) (in such case will cast UIntYY to IntXX and compare)
+template <typename TInt, typename TUInt>
+constexpr bool is_gt_int_vs_uint = is_any_int_vs_uint<TInt, TUInt> &&
+                                   (sizeof(TInt) > sizeof(TUInt));
+
+template <typename TInt, typename TUInt>
+using bool_if_gt_int_vs_uint = std::enable_if_t<is_gt_int_vs_uint<TInt, TUInt>, bool>;
+
+template <typename TInt, typename TUInt>
+inline bool_if_gt_int_vs_uint<TInt, TUInt> greaterOpTmpl(TInt a, TUInt b) {
+    return static_cast<TInt>(a) > static_cast<TInt>(b);
+}
+
+template <typename TInt, typename TUInt>
+inline bool_if_gt_int_vs_uint<TInt, TUInt> greaterOpTmpl(TUInt a, TInt b) {
+    return static_cast<TInt>(a) > static_cast<TInt>(b);
+}
+
+template <typename TInt, typename TUInt>
+inline bool_if_gt_int_vs_uint<TInt, TUInt> equalsOpTmpl(TInt a, TUInt b) {
+    return static_cast<TInt>(a) == static_cast<TInt>(b);
+}
+
+template <typename TInt, typename TUInt>
+inline bool_if_gt_int_vs_uint<TInt, TUInt> equalsOpTmpl(TUInt a, TInt b) {
+    return static_cast<TInt>(a) == static_cast<TInt>(b);
+}
+
+// Case 3a. Comparison via conversion to double.
+template <typename TAInt, typename TAFloat>
+using bool_if_double_can_be_used =
+        std::enable_if_t<std::is_integral_v<TAInt> && (sizeof(TAInt) <= 4) &&
+                                 std::is_floating_point_v<TAFloat>,
+                         bool>;
+
+template <typename TAInt, typename TAFloat>
+inline bool_if_double_can_be_used<TAInt, TAFloat> greaterOpTmpl(TAInt a, TAFloat b) {
+    return static_cast<double>(a) > static_cast<double>(b);
+}
+
+template <typename TAInt, typename TAFloat>
+inline bool_if_double_can_be_used<TAInt, TAFloat> greaterOpTmpl(TAFloat a, TAInt b) {
+    return static_cast<double>(a) > static_cast<double>(b);
+}
+
+template <typename TAInt, typename TAFloat>
+inline bool_if_double_can_be_used<TAInt, TAFloat> equalsOpTmpl(TAInt a, TAFloat b) {
+    return static_cast<double>(a) == static_cast<double>(b);
+}
+
+template <typename TAInt, typename TAFloat>
+inline bool_if_double_can_be_used<TAInt, TAFloat> equalsOpTmpl(TAFloat a, TAInt b) {
+    return static_cast<double>(a) == static_cast<double>(b);
+}
+
+/* Final realiztions */
+
+template <typename A, typename B>
+inline bool_if_not_safe_conversion<A, B> greaterOp(A a, B b) {
+    return greaterOpTmpl(a, b);
+}
+
+template <typename A, typename B>
+inline bool_if_safe_conversion<A, B> greaterOp(A a, B b) {
+    return a > b;
+}
+
+// Case 3b. 64-bit integers vs floats comparison.
+// See hint at https://github.com/JuliaLang/julia/issues/257 (but it doesn't work properly for -2**63)
+
+constexpr doris::vectorized::Int64 MAX_INT64_WITH_EXACT_FLOAT64_REPR = 9007199254740992LL; // 2^53
+
+template <>
+inline bool greaterOp<doris::vectorized::Float64, doris::vectorized::Int64>(
+        doris::vectorized::Float64 f, doris::vectorized::Int64 i) {
+    if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR)
+        return f > static_cast<doris::vectorized::Float64>(i);
+
+    return (f >= static_cast<doris::vectorized::Float64>(
+                         std::numeric_limits<
+                                 doris::vectorized::Int64>::max())) // rhs is 2**63 (not 2^63 - 1)
+           || (f > static_cast<doris::vectorized::Float64>(
+                           std::numeric_limits<doris::vectorized::Int64>::min()) &&
+               static_cast<doris::vectorized::Int64>(f) > i);
+}
+
+template <>
+inline bool greaterOp<doris::vectorized::Int64, doris::vectorized::Float64>(
+        doris::vectorized::Int64 i, doris::vectorized::Float64 f) {
+    if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR)
+        return f < static_cast<doris::vectorized::Float64>(i);
+
+    return (f < static_cast<doris::vectorized::Float64>(
+                        std::numeric_limits<doris::vectorized::Int64>::min())) ||
+           (f < static_cast<doris::vectorized::Float64>(
+                        std::numeric_limits<doris::vectorized::Int64>::max()) &&
+            i > static_cast<doris::vectorized::Int64>(f));
+}
+
+template <>
+inline bool greaterOp<doris::vectorized::Float64, doris::vectorized::UInt64>(
+        doris::vectorized::Float64 f, doris::vectorized::UInt64 u) {
+    if (u <= static_cast<doris::vectorized::UInt64>(MAX_INT64_WITH_EXACT_FLOAT64_REPR))
+        return f > static_cast<doris::vectorized::Float64>(u);
+
+    return (f >= static_cast<doris::vectorized::Float64>(
+                         std::numeric_limits<doris::vectorized::UInt64>::max())) ||
+           (f >= 0 && static_cast<doris::vectorized::UInt64>(f) > u);
+}
+
+template <>
+inline bool greaterOp<doris::vectorized::UInt64, doris::vectorized::Float64>(
+        doris::vectorized::UInt64 u, doris::vectorized::Float64 f) {
+    if (u <= static_cast<doris::vectorized::UInt64>(MAX_INT64_WITH_EXACT_FLOAT64_REPR))
+        return static_cast<doris::vectorized::Float64>(u) > f;
+
+    return (f < 0) || (f < static_cast<doris::vectorized::Float64>(
+                                   std::numeric_limits<doris::vectorized::UInt64>::max()) &&
+                       u > static_cast<doris::vectorized::UInt64>(f));
+}
+
+// Case 3b for float32
+template <>
+inline bool greaterOp<doris::vectorized::Float32, doris::vectorized::Int64>(
+        doris::vectorized::Float32 f, doris::vectorized::Int64 i) {
+    return greaterOp(static_cast<doris::vectorized::Float64>(f), i);
+}
+
+template <>
+inline bool greaterOp<doris::vectorized::Int64, doris::vectorized::Float32>(
+        doris::vectorized::Int64 i, doris::vectorized::Float32 f) {
+    return greaterOp(i, static_cast<doris::vectorized::Float64>(f));
+}
+
+template <>
+inline bool greaterOp<doris::vectorized::Float32, doris::vectorized::UInt64>(
+        doris::vectorized::Float32 f, doris::vectorized::UInt64 u) {
+    return greaterOp(static_cast<doris::vectorized::Float64>(f), u);
+}
+
+template <>
+inline bool greaterOp<doris::vectorized::UInt64, doris::vectorized::Float32>(
+        doris::vectorized::UInt64 u, doris::vectorized::Float32 f) {
+    return greaterOp(u, static_cast<doris::vectorized::Float64>(f));
+}
+
+template <>
+inline bool greaterOp<doris::vectorized::Float64, doris::vectorized::UInt128>(
+        doris::vectorized::Float64 f, doris::vectorized::UInt128 u) {
+    return u.low == 0 && greaterOp(f, u.high);
+}
+
+template <>
+inline bool greaterOp<doris::vectorized::UInt128, doris::vectorized::Float64>(
+        doris::vectorized::UInt128 u, doris::vectorized::Float64 f) {
+    return u.low != 0 || greaterOp(u.high, f);
+}
+
+template <>
+inline bool greaterOp<doris::vectorized::Float32, doris::vectorized::UInt128>(
+        doris::vectorized::Float32 f, doris::vectorized::UInt128 u) {
+    return greaterOp(static_cast<doris::vectorized::Float64>(f), u);
+}
+
+template <>
+inline bool greaterOp<doris::vectorized::UInt128, doris::vectorized::Float32>(
+        doris::vectorized::UInt128 u, doris::vectorized::Float32 f) {
+    return greaterOp(u, static_cast<doris::vectorized::Float64>(f));
+}
+
+template <typename A, typename B>
+inline bool_if_not_safe_conversion<A, B> equalsOp(A a, B b) {
+    return equalsOpTmpl(a, b);
+}
+
+template <typename A, typename B>
+inline bool_if_safe_conversion<A, B> equalsOp(A a, B b) {
+    using LargestType = std::conditional_t<sizeof(A) >= sizeof(B), A, B>;
+    return static_cast<LargestType>(a) == static_cast<LargestType>(b);
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::Float64, doris::vectorized::UInt64>(
+        doris::vectorized::Float64 f, doris::vectorized::UInt64 u) {
+    return static_cast<doris::vectorized::UInt64>(f) == u &&
+           f == static_cast<doris::vectorized::Float64>(u);
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::UInt64, doris::vectorized::Float64>(
+        doris::vectorized::UInt64 u, doris::vectorized::Float64 f) {
+    return u == static_cast<doris::vectorized::UInt64>(f) &&
+           static_cast<doris::vectorized::Float64>(u) == f;
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::Float64, doris::vectorized::Int64>(
+        doris::vectorized::Float64 f, doris::vectorized::Int64 u) {
+    return static_cast<doris::vectorized::Int64>(f) == u &&
+           f == static_cast<doris::vectorized::Float64>(u);
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::Int64, doris::vectorized::Float64>(
+        doris::vectorized::Int64 u, doris::vectorized::Float64 f) {
+    return u == static_cast<doris::vectorized::Int64>(f) &&
+           static_cast<doris::vectorized::Float64>(u) == f;
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::Float32, doris::vectorized::UInt64>(
+        doris::vectorized::Float32 f, doris::vectorized::UInt64 u) {
+    return static_cast<doris::vectorized::UInt64>(f) == u &&
+           f == static_cast<doris::vectorized::Float32>(u);
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::UInt64, doris::vectorized::Float32>(
+        doris::vectorized::UInt64 u, doris::vectorized::Float32 f) {
+    return u == static_cast<doris::vectorized::UInt64>(f) &&
+           static_cast<doris::vectorized::Float32>(u) == f;
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::Float32, doris::vectorized::Int64>(
+        doris::vectorized::Float32 f, doris::vectorized::Int64 u) {
+    return static_cast<doris::vectorized::Int64>(f) == u &&
+           f == static_cast<doris::vectorized::Float32>(u);
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::Int64, doris::vectorized::Float32>(
+        doris::vectorized::Int64 u, doris::vectorized::Float32 f) {
+    return u == static_cast<doris::vectorized::Int64>(f) &&
+           static_cast<doris::vectorized::Float32>(u) == f;
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::UInt128, doris::vectorized::Float64>(
+        doris::vectorized::UInt128 u, doris::vectorized::Float64 f) {
+    return u.low == 0 && equalsOp(static_cast<doris::vectorized::UInt64>(u.high), f);
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::UInt128, doris::vectorized::Float32>(
+        doris::vectorized::UInt128 u, doris::vectorized::Float32 f) {
+    return equalsOp(u, static_cast<doris::vectorized::Float64>(f));
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::Float64, doris::vectorized::UInt128>(
+        doris::vectorized::Float64 f, doris::vectorized::UInt128 u) {
+    return equalsOp(u, f);
+}
+
+template <>
+inline bool equalsOp<doris::vectorized::Float32, doris::vectorized::UInt128>(
+        doris::vectorized::Float32 f, doris::vectorized::UInt128 u) {
+    return equalsOp(static_cast<doris::vectorized::Float64>(f), u);
+}
+
+inline bool greaterOp(doris::vectorized::Int128 i, doris::vectorized::Float64 f) {
+    static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64;
+    static constexpr __int128 max_int128 =
+            (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll;
+
+    if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR)
+        return static_cast<doris::vectorized::Float64>(i) > f;
+
+    return (f < static_cast<doris::vectorized::Float64>(min_int128)) ||
+           (f < static_cast<doris::vectorized::Float64>(max_int128) &&
+            i > static_cast<doris::vectorized::Int128>(f));
+}
+
+inline bool greaterOp(doris::vectorized::Float64 f, doris::vectorized::Int128 i) {
+    static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64;
+    static constexpr __int128 max_int128 =
+            (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll;
+
+    if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR)
+        return f > static_cast<doris::vectorized::Float64>(i);
+
+    return (f >= static_cast<doris::vectorized::Float64>(max_int128)) ||
+           (f > static_cast<doris::vectorized::Float64>(min_int128) &&
+            static_cast<doris::vectorized::Int128>(f) > i);
+}
+
+inline bool greaterOp(doris::vectorized::Int128 i, doris::vectorized::Float32 f) {
+    return greaterOp(i, static_cast<doris::vectorized::Float64>(f));
+}
+inline bool greaterOp(doris::vectorized::Float32 f, doris::vectorized::Int128 i) {
+    return greaterOp(static_cast<doris::vectorized::Float64>(f), i);
+}
+
+inline bool equalsOp(doris::vectorized::Int128 i, doris::vectorized::Float64 f) {
+    return i == static_cast<doris::vectorized::Int128>(f) &&
+           static_cast<doris::vectorized::Float64>(i) == f;
+}
+inline bool equalsOp(doris::vectorized::Int128 i, doris::vectorized::Float32 f) {
+    return i == static_cast<doris::vectorized::Int128>(f) &&
+           static_cast<doris::vectorized::Float32>(i) == f;
+}
+inline bool equalsOp(doris::vectorized::Float64 f, doris::vectorized::Int128 i) {
+    return equalsOp(i, f);
+}
+inline bool equalsOp(doris::vectorized::Float32 f, doris::vectorized::Int128 i) {
+    return equalsOp(i, f);
+}
+
+template <typename A, typename B>
+inline bool_if_not_safe_conversion<A, B> notEqualsOp(A a, B b) {
+    return !equalsOp(a, b);
+}
+
+template <typename A, typename B>
+inline bool_if_safe_conversion<A, B> notEqualsOp(A a, B b) {
+    return a != b;
+}
+
+template <typename A, typename B>
+inline bool_if_not_safe_conversion<A, B> lessOp(A a, B b) {
+    return greaterOp(b, a);
+}
+
+template <typename A, typename B>
+inline bool_if_safe_conversion<A, B> lessOp(A a, B b) {
+    return a < b;
+}
+
+template <typename A, typename B>
+inline bool_if_not_safe_conversion<A, B> lessOrEqualsOp(A a, B b) {
+    if (is_nan(a) || is_nan(b)) return false;
+    return !greaterOp(a, b);
+}
+
+template <typename A, typename B>
+inline bool_if_safe_conversion<A, B> lessOrEqualsOp(A a, B b) {
+    return a <= b;
+}
+
+template <typename A, typename B>
+inline bool_if_not_safe_conversion<A, B> greaterOrEqualsOp(A a, B b) {
+    if (is_nan(a) || is_nan(b)) return false;
+    return !greaterOp(b, a);
+}
+
+template <typename A, typename B>
+inline bool_if_safe_conversion<A, B> greaterOrEqualsOp(A a, B b) {
+    return a >= b;
+}
+
+/// Converts numeric to an equal numeric of other type.
+template <typename From, typename To>
+inline bool convertNumeric(From value, To& result) {
+    /// If the type is actually the same it's not necessary to do any checks.
+    if constexpr (std::is_same_v<From, To>) {
+        result = value;
+        return true;
+    }
+
+    /// Note that NaNs doesn't compare equal to anything, but they are still in range of any Float type.
+    if (is_nan(value) && std::is_floating_point_v<To>) {
+        result = value;
+        return true;
+    }
+
+    result = static_cast<To>(value);
+    return equalsOp(value, result);
+}
+
+} // namespace accurate
+
+namespace doris::vectorized {
+
+template <typename A, typename B>
+struct EqualsOp {
+    /// An operation that gives the same result, if arguments are passed in reverse order.
+    using SymmetricOp = EqualsOp<B, A>;
+    static UInt8 apply(A a, B b) { return accurate::equalsOp(a, b); }
+};
+
+template <>
+struct EqualsOp<VecDateTimeValue, VecDateTimeValue> {
+    static UInt8 apply(const Int64& a, const Int64& b) {
+        return a == b;
+    }
+};
+
+template <typename A, typename B>
+struct NotEqualsOp {
+    using SymmetricOp = NotEqualsOp<B, A>;
+    static UInt8 apply(A a, B b) { return accurate::notEqualsOp(a, b); }
+};
+
+template <>
+struct NotEqualsOp<VecDateTimeValue, VecDateTimeValue> {
+    static UInt8 apply(const Int64& a, const Int64& b) {
+        return a != b;
+    }
+};
+
+template <typename A, typename B>
+struct GreaterOp;
+
+template <typename A, typename B>
+struct LessOp {
+    using SymmetricOp = GreaterOp<B, A>;
+    static UInt8 apply(A a, B b) { return accurate::lessOp(a, b); }
+};
+
+template <>
+struct LessOp<VecDateTimeValue, VecDateTimeValue> {
+    static UInt8 apply(Int64 a, Int64 b) {
+        return binary_cast<Int64, VecDateTimeValue>(a) < binary_cast<Int64, VecDateTimeValue>(b);
+    }
+};
+
+template <typename A, typename B>
+struct GreaterOp {
+    using SymmetricOp = LessOp<B, A>;
+    static UInt8 apply(A a, B b) { return accurate::greaterOp(a, b); }
+};
+
+template <>
+struct GreaterOp<VecDateTimeValue, VecDateTimeValue> {
+    static UInt8 apply(Int64 a, Int64 b) {
+        return binary_cast<Int64, VecDateTimeValue>(a) > binary_cast<Int64, VecDateTimeValue>(b);
+    }
+};
+
+template <typename A, typename B>
+struct GreaterOrEqualsOp;
+
+template <typename A, typename B>
+struct LessOrEqualsOp {
+    using SymmetricOp = GreaterOrEqualsOp<B, A>;
+    static UInt8 apply(A a, B b) { return accurate::lessOrEqualsOp(a, b); }
+};
+
+template <>
+struct LessOrEqualsOp<VecDateTimeValue, VecDateTimeValue> {
+    static UInt8 apply(Int64 a, Int64 b) {
+        return binary_cast<Int64, VecDateTimeValue>(a) <= binary_cast<Int64, VecDateTimeValue>(b);
+    }
+};
+
+template <typename A, typename B>
+struct GreaterOrEqualsOp {
+    using SymmetricOp = LessOrEqualsOp<B, A>;
+    static UInt8 apply(A a, B b) { return accurate::greaterOrEqualsOp(a, b); }
+};
+
+template <>
+struct GreaterOrEqualsOp<VecDateTimeValue, VecDateTimeValue> {
+    static UInt8 apply(Int64 a, Int64 b) {
+        return binary_cast<Int64, VecDateTimeValue>(a) >= binary_cast<Int64, VecDateTimeValue>(b);
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
new file mode 100644
index 0000000000..3664a2a5fa
--- /dev/null
+++ b/be/src/vec/core/block.cpp
@@ -0,0 +1,926 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Block.cpp
+// and modified by Doris
+
+#include "vec/core/block.h"
+
+#include <fmt/format.h>
+#include <snappy.h>
+
+#include <iomanip>
+#include <iterator>
+#include <memory>
+
+#include "common/status.h"
+#include "gen_cpp/data.pb.h"
+#include "runtime/descriptors.h"
+#include "runtime/row_batch.h"
+#include "runtime/tuple.h"
+#include "runtime/tuple_row.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/columns_common.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/exception.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/data_types/data_type_bitmap.h"
+#include "vec/data_types/data_type_date.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+
+namespace doris::vectorized {
+
+inline DataTypePtr create_data_type(const PColumn& pcolumn) {
+    switch (pcolumn.type()) {
+    case PColumn::UINT8: {
+        return std::make_shared<DataTypeUInt8>();
+    }
+    case PColumn::UINT16: {
+        return std::make_shared<DataTypeUInt16>();
+    }
+    case PColumn::UINT32: {
+        return std::make_shared<DataTypeUInt32>();
+    }
+    case PColumn::UINT64: {
+        return std::make_shared<DataTypeUInt64>();
+    }
+    case PColumn::UINT128: {
+        return std::make_shared<DataTypeUInt128>();
+    }
+    case PColumn::INT8: {
+        return std::make_shared<DataTypeInt8>();
+    }
+    case PColumn::INT16: {
+        return std::make_shared<DataTypeInt16>();
+    }
+    case PColumn::INT32: {
+        return std::make_shared<DataTypeInt32>();
+    }
+    case PColumn::INT64: {
+        return std::make_shared<DataTypeInt64>();
+    }
+    case PColumn::INT128: {
+        return std::make_shared<DataTypeInt128>();
+    }
+    case PColumn::FLOAT32: {
+        return std::make_shared<DataTypeFloat32>();
+    }
+    case PColumn::FLOAT64: {
+        return std::make_shared<DataTypeFloat64>();
+    }
+    case PColumn::STRING: {
+        return std::make_shared<DataTypeString>();
+    }
+    case PColumn::DATE: {
+        return std::make_shared<DataTypeDate>();
+    }
+    case PColumn::DATETIME: {
+        return std::make_shared<DataTypeDateTime>();
+    }
+    case PColumn::DECIMAL32: {
+        return std::make_shared<DataTypeDecimal<Decimal32>>(pcolumn.decimal_param().precision(),
+                                                            pcolumn.decimal_param().scale());
+    }
+    case PColumn::DECIMAL64: {
+        return std::make_shared<DataTypeDecimal<Decimal64>>(pcolumn.decimal_param().precision(),
+                                                            pcolumn.decimal_param().scale());
+    }
+    case PColumn::DECIMAL128: {
+        return std::make_shared<DataTypeDecimal<Decimal128>>(pcolumn.decimal_param().precision(),
+                                                             pcolumn.decimal_param().scale());
+    }
+    case PColumn::BITMAP: {
+        return std::make_shared<DataTypeBitMap>();
+    }
+    default: {
+        LOG(FATAL) << fmt::format("Unknown data type: {}, data type name: {}", pcolumn.type(),
+                                  PColumn::DataType_Name(pcolumn.type()));
+        return nullptr;
+    }
+    }
+}
+
+PColumn::DataType get_pdata_type(DataTypePtr data_type) {
+    switch (data_type->get_type_id()) {
+    case TypeIndex::UInt8:
+        return PColumn::UINT8;
+    case TypeIndex::UInt16:
+        return PColumn::UINT16;
+    case TypeIndex::UInt32:
+        return PColumn::UINT32;
+    case TypeIndex::UInt64:
+        return PColumn::UINT64;
+    case TypeIndex::UInt128:
+        return PColumn::UINT128;
+    case TypeIndex::Int8:
+        return PColumn::INT8;
+    case TypeIndex::Int16:
+        return PColumn::INT16;
+    case TypeIndex::Int32:
+        return PColumn::INT32;
+    case TypeIndex::Int64:
+        return PColumn::INT64;
+    case TypeIndex::Int128:
+        return PColumn::INT128;
+    case TypeIndex::Float32:
+        return PColumn::FLOAT32;
+    case TypeIndex::Float64:
+        return PColumn::FLOAT64;
+    case TypeIndex::Decimal32:
+        return PColumn::DECIMAL32;
+    case TypeIndex::Decimal64:
+        return PColumn::DECIMAL64;
+    case TypeIndex::Decimal128:
+        return PColumn::DECIMAL128;
+    case TypeIndex::String:
+        return PColumn::STRING;
+    case TypeIndex::Date:
+        return PColumn::DATE;
+    case TypeIndex::DateTime:
+        return PColumn::DATETIME;
+    case TypeIndex::BitMap:
+        return PColumn::BITMAP;
+    default:
+        return PColumn::UNKNOWN;
+    }
+}
+
+Block::Block(std::initializer_list<ColumnWithTypeAndName> il) : data {il} {
+    initialize_index_by_name();
+}
+
+Block::Block(const ColumnsWithTypeAndName& data_) : data {data_} {
+    initialize_index_by_name();
+}
+
+Block::Block(const PBlock& pblock) {
+    for (const auto& pcolumn : pblock.columns()) {
+        DataTypePtr type = create_data_type(pcolumn);
+        MutableColumnPtr data_column;
+        if (pcolumn.is_null_size() > 0) {
+            data_column =
+                    ColumnNullable::create(std::move(type->create_column()), ColumnUInt8::create());
+            type = make_nullable(type);
+        } else {
+            data_column = type->create_column();
+        }
+        type->deserialize(pcolumn, data_column.get());
+        data.emplace_back(data_column->get_ptr(), type, pcolumn.name());
+    }
+    initialize_index_by_name();
+}
+
+void Block::initialize_index_by_name() {
+    for (size_t i = 0, size = data.size(); i < size; ++i) {
+        index_by_name[data[i].name] = i;
+    }
+}
+
+void Block::insert(size_t position, const ColumnWithTypeAndName& elem) {
+    if (position > data.size()) {
+        LOG(FATAL) << fmt::format("Position out of bound in Block::insert(), max position = {}",
+                                  data.size());
+    }
+
+    for (auto& name_pos : index_by_name) {
+        if (name_pos.second >= position) {
+            ++name_pos.second;
+        }
+    }
+
+    index_by_name.emplace(elem.name, position);
+    data.emplace(data.begin() + position, elem);
+}
+
+void Block::insert(size_t position, ColumnWithTypeAndName&& elem) {
+    if (position > data.size()) {
+        LOG(FATAL) << fmt::format("Position out of bound in Block::insert(), max position = {}",
+                                  data.size());
+    }
+
+    for (auto& name_pos : index_by_name) {
+        if (name_pos.second >= position) {
+            ++name_pos.second;
+        }
+    }
+
+    index_by_name.emplace(elem.name, position);
+    data.emplace(data.begin() + position, std::move(elem));
+}
+
+void Block::insert(const ColumnWithTypeAndName& elem) {
+    index_by_name.emplace(elem.name, data.size());
+    data.emplace_back(elem);
+}
+
+void Block::insert(ColumnWithTypeAndName&& elem) {
+    index_by_name.emplace(elem.name, data.size());
+    data.emplace_back(std::move(elem));
+}
+
+void Block::insert_unique(const ColumnWithTypeAndName& elem) {
+    if (index_by_name.end() == index_by_name.find(elem.name)) {
+        insert(elem);
+    }
+}
+
+void Block::insert_unique(ColumnWithTypeAndName&& elem) {
+    if (index_by_name.end() == index_by_name.find(elem.name)) {
+        insert(std::move(elem));
+    }
+}
+
+void Block::erase(const std::set<size_t>& positions) {
+    for (auto it = positions.rbegin(); it != positions.rend(); ++it) {
+        erase(*it);
+    }
+}
+
+void Block::erase(size_t position) {
+    if (data.empty()) {
+        LOG(FATAL) << "Block is empty";
+    }
+
+    if (position >= data.size()) {
+        LOG(FATAL) << fmt::format("Position out of bound in Block::erase(), max position = {}",
+                                  data.size() - 1);
+    }
+
+    erase_impl(position);
+}
+
+void Block::erase_impl(size_t position) {
+    data.erase(data.begin() + position);
+
+    for (auto it = index_by_name.begin(); it != index_by_name.end();) {
+        if (it->second == position)
+            index_by_name.erase(it++);
+        else {
+            if (it->second > position) --it->second;
+            ++it;
+        }
+    }
+}
+
+void Block::erase(const String& name) {
+    auto index_it = index_by_name.find(name);
+    if (index_it == index_by_name.end()) {
+        LOG(FATAL) << fmt::format("No such name in Block::erase(): '{}'", name);
+    }
+
+    erase_impl(index_it->second);
+}
+
+ColumnWithTypeAndName& Block::safe_get_by_position(size_t position) {
+    if (data.empty()) {
+        LOG(FATAL) << "Block is empty";
+    }
+
+    if (position >= data.size()) {
+        LOG(FATAL) << fmt::format(
+                "Position {} is out of bound in Block::safe_get_by_position(), max position = {}, "
+                "there are columns: {}",
+                position, data.size() - 1, dump_names());
+    }
+
+    return data[position];
+}
+
+const ColumnWithTypeAndName& Block::safe_get_by_position(size_t position) const {
+    if (data.empty()) {
+        LOG(FATAL) << "Block is empty";
+    }
+
+    if (position >= data.size()) {
+        LOG(FATAL) << fmt::format(
+                "Position {} is out of bound in Block::safe_get_by_position(), max position = {}, "
+                "there are columns: {}",
+                position, data.size() - 1, dump_names());
+    }
+
+    return data[position];
+}
+
+ColumnWithTypeAndName& Block::get_by_name(const std::string& name) {
+    auto it = index_by_name.find(name);
+    if (index_by_name.end() == it) {
+        LOG(FATAL) << fmt::format("Not found column {} in block. There are only columns: {}", name,
+                                  dump_names());
+    }
+
+    return data[it->second];
+}
+
+const ColumnWithTypeAndName& Block::get_by_name(const std::string& name) const {
+    auto it = index_by_name.find(name);
+    if (index_by_name.end() == it) {
+        LOG(FATAL) << fmt::format("Not found column {} in block. There are only columns: {}", name,
+                                  dump_names());
+    }
+
+    return data[it->second];
+}
+
+bool Block::has(const std::string& name) const {
+    return index_by_name.end() != index_by_name.find(name);
+}
+
+size_t Block::get_position_by_name(const std::string& name) const {
+    auto it = index_by_name.find(name);
+    if (index_by_name.end() == it) {
+        LOG(FATAL) << fmt::format("Not found column {} in block. There are only columns: {}", name,
+                                  dump_names());
+    }
+
+    return it->second;
+}
+
+void Block::check_number_of_rows(bool allow_null_columns) const {
+    ssize_t rows = -1;
+    for (const auto& elem : data) {
+        if (!elem.column && allow_null_columns) continue;
+
+        if (!elem.column) {
+            LOG(FATAL) << fmt::format(
+                    "Column {} in block is nullptr, in method check_number_of_rows.", elem.name);
+        }
+
+        ssize_t size = elem.column->size();
+
+        if (rows == -1) {
+            rows = size;
+        } else if (rows != size) {
+            LOG(FATAL) << fmt::format("Sizes of columns doesn't match: {}:{},{}:{}",
+                                      data.front().name, rows, elem.name, size);
+        }
+    }
+}
+
+size_t Block::rows() const {
+    for (const auto& elem : data) {
+        if (elem.column) {
+            return elem.column->size();
+        }
+    }
+
+    return 0;
+}
+
+void Block::set_num_rows(size_t length) {
+    if (rows() > length) {
+        for (auto& elem : data) {
+            if (elem.column) {
+                elem.column = elem.column->cut(0, length);
+            }
+        }
+    }
+}
+
+void Block::skip_num_rows(int64_t& length) {
+    auto origin_rows = rows();
+    if (origin_rows <= length) {
+        clear();
+        length -= origin_rows;
+    } else {
+        for (auto& elem : data) {
+            if (elem.column) {
+                elem.column = elem.column->cut(length, origin_rows - length);
+            }
+        }
+    }
+}
+
+size_t Block::bytes() const {
+    size_t res = 0;
+    for (const auto& elem : data) {
+        res += elem.column->byte_size();
+    }
+
+    return res;
+}
+
+size_t Block::allocated_bytes() const {
+    size_t res = 0;
+    for (const auto& elem : data) {
+        res += elem.column->allocated_bytes();
+    }
+
+    return res;
+}
+
+std::string Block::dump_names() const {
+    std::stringstream out;
+    for (auto it = data.begin(); it != data.end(); ++it) {
+        if (it != data.begin()) out << ", ";
+        out << it->name;
+    }
+    return out.str();
+}
+
+std::string Block::dump_data(size_t begin, size_t row_limit) const {
+    if (rows() == 0) {
+        return "empty block.";
+    }
+    std::vector<std::string> headers;
+    std::vector<size_t> headers_size;
+    for (auto it = data.begin(); it != data.end(); ++it) {
+        std::string s = fmt::format("{}({})", it->name, it->type->get_name());
+        headers_size.push_back(s.size() > 15 ? s.size() : 15);
+        headers.emplace_back(s);
+    }
+
+    std::stringstream out;
+    // header upper line
+    auto line = [&]() {
+        for (size_t i = 0; i < columns(); ++i) {
+            out << std::setfill('-') << std::setw(1) << "+" << std::setw(headers_size[i]) << "-";
+        }
+        out << std::setw(1) << "+" << std::endl;
+    };
+    line();
+    // header text
+    for (size_t i = 0; i < columns(); ++i) {
+        out << std::setfill(' ') << std::setw(1) << "|" << std::left << std::setw(headers_size[i])
+            << headers[i];
+    }
+    out << std::setw(1) << "|" << std::endl;
+    // header bottom line
+    line();
+    // content
+    for (size_t row_num = begin; row_num < rows() && row_num < row_limit + begin; ++row_num) {
+        for (size_t i = 0; i < columns(); ++i) {
+            std::string s = "";
+            if (data[i].column) {
+                s = data[i].to_string(row_num);
+            }
+            if (s.length() > headers_size[i]) {
+                s = s.substr(0, headers_size[i] - 3) + "...";
+            }
+            out << std::setfill(' ') << std::setw(1) << "|" << std::setw(headers_size[i])
+                << std::right << s;
+        }
+        out << std::setw(1) << "|" << std::endl;
+    }
+    // bottom line
+    line();
+    if (row_limit < rows()) {
+        out << rows() << " rows in block, only show first " << row_limit << " rows." << std::endl;
+    }
+    return out.str();
+}
+
+std::string Block::dump_structure() const {
+    // WriteBufferFromOwnString out;
+    std::stringstream out;
+    for (auto it = data.begin(); it != data.end(); ++it) {
+        if (it != data.begin()) {
+            out << ", ";
+        }
+        out << it->dump_structure();
+    }
+    return out.str();
+}
+
+Block Block::clone_empty() const {
+    Block res;
+    for (const auto& elem : data) {
+        res.insert(elem.clone_empty());
+    }
+    return res;
+}
+
+MutableColumns Block::clone_empty_columns() const {
+    size_t num_columns = data.size();
+    MutableColumns columns(num_columns);
+    for (size_t i = 0; i < num_columns; ++i) {
+        columns[i] = data[i].column ? data[i].column->clone_empty() : data[i].type->create_column();
+    }
+    return columns;
+}
+
+Columns Block::get_columns() const {
+    size_t num_columns = data.size();
+    Columns columns(num_columns);
+    for (size_t i = 0; i < num_columns; ++i) {
+        columns[i] = data[i].column;
+    }
+    return columns;
+}
+
+MutableColumns Block::mutate_columns() {
+    size_t num_columns = data.size();
+    MutableColumns columns(num_columns);
+    for (size_t i = 0; i < num_columns; ++i) {
+        columns[i] = data[i].column ? (*std::move(data[i].column)).mutate()
+                                    : data[i].type->create_column();
+    }
+    return columns;
+}
+
+void Block::set_columns(MutableColumns&& columns) {
+    /// TODO: assert if |columns| doesn't match |data|!
+    size_t num_columns = data.size();
+    for (size_t i = 0; i < num_columns; ++i) {
+        data[i].column = std::move(columns[i]);
+    }
+}
+
+void Block::set_columns(const Columns& columns) {
+    /// TODO: assert if |columns| doesn't match |data|!
+    size_t num_columns = data.size();
+    for (size_t i = 0; i < num_columns; ++i) {
+        data[i].column = columns[i];
+    }
+}
+
+Block Block::clone_with_columns(MutableColumns&& columns) const {
+    Block res;
+
+    size_t num_columns = data.size();
+    for (size_t i = 0; i < num_columns; ++i) {
+        res.insert({std::move(columns[i]), data[i].type, data[i].name});
+    }
+
+    return res;
+}
+
+Block Block::clone_with_columns(const Columns& columns) const {
+    Block res;
+
+    size_t num_columns = data.size();
+
+    if (num_columns != columns.size()) {
+        LOG(FATAL) << fmt::format(
+                "Cannot clone block with columns because block has {} columns, but {} columns "
+                "given.",
+                num_columns, columns.size());
+    }
+
+    for (size_t i = 0; i < num_columns; ++i) {
+        res.insert({columns[i], data[i].type, data[i].name});
+    }
+
+    return res;
+}
+
+Block Block::clone_without_columns() const {
+    Block res;
+
+    size_t num_columns = data.size();
+    for (size_t i = 0; i < num_columns; ++i) {
+        res.insert({nullptr, data[i].type, data[i].name});
+    }
+
+    return res;
+}
+
+Block Block::sort_columns() const {
+    Block sorted_block;
+
+    for (const auto& name : index_by_name) {
+        sorted_block.insert(data[name.second]);
+    }
+
+    return sorted_block;
+}
+
+const ColumnsWithTypeAndName& Block::get_columns_with_type_and_name() const {
+    return data;
+}
+
+Names Block::get_names() const {
+    Names res;
+    res.reserve(columns());
+
+    for (const auto& elem : data) {
+        res.push_back(elem.name);
+    }
+
+    return res;
+}
+
+DataTypes Block::get_data_types() const {
+    DataTypes res;
+    res.reserve(columns());
+
+    for (const auto& elem : data) {
+        res.push_back(elem.type);
+    }
+
+    return res;
+}
+
+void Block::clear() {
+    info = BlockInfo();
+    data.clear();
+    index_by_name.clear();
+}
+
+void Block::clear_column_data(int column_size) noexcept {
+    // data.size() greater than column_size, means here have some
+    // function exec result in block, need erase it here
+    if (column_size != -1 and data.size() > column_size) {
+        for (int i = data.size() - 1; i >= column_size; --i) {
+            erase(i);
+        }
+    }
+    for (auto& d : data) {
+        DCHECK(d.column->use_count() == 1);
+        (*std::move(d.column)).assume_mutable()->clear();
+    }
+}
+
+void Block::swap(Block& other) noexcept {
+    std::swap(info, other.info);
+    data.swap(other.data);
+    index_by_name.swap(other.index_by_name);
+}
+
+void Block::swap(Block&& other) noexcept {
+    clear();
+    data = std::move(other.data);
+    initialize_index_by_name();
+}
+
+void Block::update_hash(SipHash& hash) const {
+    for (size_t row_no = 0, num_rows = rows(); row_no < num_rows; ++row_no) {
+        for (const auto& col : data) {
+            col.column->update_hash_with_value(row_no, hash);
+        }
+    }
+}
+
+void filter_block_internal(Block* block, const IColumn::Filter& filter, uint32_t column_to_keep) {
+    auto count = count_bytes_in_filter(filter);
+    if (count == 0) {
+        for (size_t i = 0; i < column_to_keep; ++i) {
+            std::move(*block->get_by_position(i).column).mutate()->clear();
+        }
+    } else {
+        if (count != block->rows()) {
+            for (size_t i = 0; i < column_to_keep; ++i) {
+                block->get_by_position(i).column =
+                        block->get_by_position(i).column->filter(filter, 0);
+            }
+        }
+    }
+}
+
+Status Block::filter_block(Block* block, int filter_column_id, int column_to_keep) {
+    ColumnPtr filter_column = block->get_by_position(filter_column_id).column;
+    if (auto* nullable_column = check_and_get_column<ColumnNullable>(*filter_column)) {
+        ColumnPtr nested_column = nullable_column->get_nested_column_ptr();
+
+        MutableColumnPtr mutable_holder =
+                nested_column->use_count() == 1
+                        ? nested_column->assume_mutable()
+                        : nested_column->clone_resized(nested_column->size());
+
+        ColumnUInt8* concrete_column = typeid_cast<ColumnUInt8*>(mutable_holder.get());
+        if (!concrete_column) {
+            return Status::InvalidArgument(
+                    "Illegal type " + filter_column->get_name() +
+                    " of column for filter. Must be UInt8 or Nullable(UInt8).");
+        }
+        auto* __restrict null_map = nullable_column->get_null_map_data().data();
+        IColumn::Filter& filter = concrete_column->get_data();
+        auto* __restrict filter_data = filter.data();
+
+        const size_t size = filter.size();
+        for (size_t i = 0; i < size; ++i) {
+            filter_data[i] &= !null_map[i];
+        }
+        filter_block_internal(block, filter, column_to_keep);
+    } else if (auto* const_column = check_and_get_column<ColumnConst>(*filter_column)) {
+        bool ret = const_column->get_bool(0);
+        if (!ret) {
+            for (size_t i = 0; i < column_to_keep; ++i) {
+                std::move(*block->get_by_position(i).column).mutate()->clear();
+            }
+        }
+    } else {
+        const IColumn::Filter& filter =
+                assert_cast<const doris::vectorized::ColumnVector<UInt8>&>(*filter_column)
+                        .get_data();
+        filter_block_internal(block, filter, column_to_keep);
+    }
+
+    erase_useless_column(block, column_to_keep);
+    return Status::OK();
+}
+
+size_t Block::serialize(PBlock* pblock) const {
+    size_t block_size_before_compress = 0;
+
+    for (const auto& c : *this) {
+        // name serialize
+        PColumn* pc = pblock->add_columns();
+        pc->set_name(c.name);
+        block_size_before_compress += c.name.size();
+
+        // type serialize
+        if (c.type->is_nullable()) {
+            pc->set_type(get_pdata_type(
+                    std::dynamic_pointer_cast<const DataTypeNullable>(c.type)->get_nested_type()));
+        } else {
+            pc->set_type(get_pdata_type(c.type));
+        }
+        // content serialize
+        block_size_before_compress += c.type->serialize(*(c.column), pc);
+    }
+
+    return block_size_before_compress;
+}
+
+void Block::serialize(RowBatch* output_batch, const RowDescriptor& row_desc) {
+    auto num_rows = rows();
+    auto mem_pool = output_batch->tuple_data_pool();
+
+    for (int i = 0; i < num_rows; ++i) {
+        auto tuple_row = output_batch->get_row(i);
+        const auto& tuple_descs = row_desc.tuple_descriptors();
+        auto column_offset = 0;
+
+        for (int j = 0; j < tuple_descs.size(); ++j) {
+            auto tuple_desc = tuple_descs[j];
+            tuple_row->set_tuple(j, deep_copy_tuple(*tuple_desc, mem_pool, i, column_offset));
+            column_offset += tuple_desc->slots().size();
+        }
+        output_batch->commit_last_row();
+    }
+}
+
+doris::Tuple* Block::deep_copy_tuple(const doris::TupleDescriptor& desc, MemPool* pool, int row,
+                                     int column_offset, bool padding_char) {
+    auto dst = reinterpret_cast<doris::Tuple*>(pool->allocate(desc.byte_size()));
+
+    for (int i = 0; i < desc.slots().size(); ++i) {
+        auto slot_desc = desc.slots()[i];
+        auto column_ptr = get_by_position(column_offset + i).column;
+        auto data_ref = column_ptr->get_data_at(row);
+
+        if (data_ref.size == 0) {
+            dst->set_null(slot_desc->null_indicator_offset());
+            continue;
+        } else {
+            dst->set_not_null(slot_desc->null_indicator_offset());
+        }
+
+        if (!slot_desc->type().is_string_type() && !slot_desc->type().is_date_type()) {
+            memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), data_ref.data, data_ref.size);
+        } else if (slot_desc->type().is_string_type() && slot_desc->type() != TYPE_OBJECT) {
+            memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), (const void*)(&data_ref),
+                   sizeof(data_ref));
+            // Copy the content of string
+            if (padding_char && slot_desc->type() == TYPE_CHAR) {
+                // serialize the content of string
+                auto string_slot = dst->get_string_slot(slot_desc->tuple_offset());
+                string_slot->ptr = reinterpret_cast<char*>(pool->allocate(slot_desc->type().len));
+                string_slot->len = slot_desc->type().len;
+                memset(string_slot->ptr, 0, slot_desc->type().len);
+                memcpy(string_slot->ptr, data_ref.data, data_ref.size);
+            } else {
+                auto str_ptr = pool->allocate(data_ref.size);
+                memcpy(str_ptr, data_ref.data, data_ref.size);
+                dst->get_string_slot(slot_desc->tuple_offset())->ptr =
+                        reinterpret_cast<char*>(str_ptr);
+            }
+        } else if (slot_desc->type() == TYPE_OBJECT) {
+            auto bitmap_value = (BitmapValue*)(data_ref.data);
+            auto size = bitmap_value->getSizeInBytes();
+
+            // serialize the content of string
+            auto string_slot = dst->get_string_slot(slot_desc->tuple_offset());
+            string_slot->ptr = reinterpret_cast<char*>(pool->allocate(size));
+            bitmap_value->write(string_slot->ptr);
+            string_slot->len = size;
+        } else {
+            VecDateTimeValue ts =
+                    *reinterpret_cast<const doris::vectorized::VecDateTimeValue*>(data_ref.data);
+            DateTimeValue dt;
+            ts.convert_vec_dt_to_dt(&dt);
+            memcpy((void*)dst->get_slot(slot_desc->tuple_offset()), &dt, sizeof(DateTimeValue));
+        }
+    }
+    return dst;
+}
+
+size_t MutableBlock::rows() const {
+    for (const auto& column : _columns) {
+        if (column) {
+            return column->size();
+        }
+    }
+
+    return 0;
+}
+
+void MutableBlock::add_row(const Block* block, int row) {
+    auto& block_data = block->get_columns_with_type_and_name();
+    for (size_t i = 0; i < _columns.size(); ++i) {
+        _columns[i]->insert_from(*block_data[i].column.get(), row);
+    }
+}
+
+void MutableBlock::add_rows(const Block* block, const int* row_begin, const int* row_end) {
+    auto& block_data = block->get_columns_with_type_and_name();
+    for (size_t i = 0; i < _columns.size(); ++i) {
+        auto& dst = _columns[i];
+        auto& src = *block_data[i].column.get();
+        dst->insert_indices_from(src, row_begin, row_end);
+    }
+}
+
+Block MutableBlock::to_block(int start_column) {
+    return to_block(start_column, _columns.size());
+}
+
+Block MutableBlock::to_block(int start_column, int end_column) {
+    ColumnsWithTypeAndName columns_with_schema;
+    for (size_t i = start_column; i < end_column; ++i) {
+        columns_with_schema.emplace_back(std::move(_columns[i]), _data_types[i], "");
+    }
+    return {columns_with_schema};
+}
+
+std::string MutableBlock::dump_data(size_t row_limit) const {
+    if (rows() == 0) {
+        return "empty block.";
+    }
+    std::vector<std::string> headers;
+    std::vector<size_t> headers_size;
+    for (size_t i = 0; i < columns(); ++i) {
+        std::string s = _data_types[i]->get_name();
+        headers_size.push_back(s.size() > 15 ? s.size() : 15);
+        headers.emplace_back(s);
+    }
+
+    std::stringstream out;
+    // header upper line
+    auto line = [&]() {
+        for (size_t i = 0; i < columns(); ++i) {
+            out << std::setfill('-') << std::setw(1) << "+" << std::setw(headers_size[i]) << "-";
+        }
+        out << std::setw(1) << "+" << std::endl;
+    };
+    line();
+    // header text
+    for (size_t i = 0; i < columns(); ++i) {
+        out << std::setfill(' ') << std::setw(1) << "|" << std::left << std::setw(headers_size[i])
+            << headers[i];
+    }
+    out << std::setw(1) << "|" << std::endl;
+    // header bottom line
+    line();
+    // content
+    for (size_t row_num = 0; row_num < rows() && row_num < row_limit; ++row_num) {
+        for (size_t i = 0; i < columns(); ++i) {
+            std::string s = _data_types[i]->to_string(*_columns[i].get(), row_num);
+            if (s.length() > headers_size[i]) {
+                s = s.substr(0, headers_size[i] - 3) + "...";
+            }
+            out << std::setfill(' ') << std::setw(1) << "|" << std::setw(headers_size[i])
+                << std::right << s;
+        }
+        out << std::setw(1) << "|" << std::endl;
+    }
+    // bottom line
+    line();
+    if (row_limit < rows()) {
+        out << rows() << " rows in block, only show first " << row_limit << " rows." << std::endl;
+    }
+    return out.str();
+}
+
+std::unique_ptr<Block> Block::create_same_struct_block(size_t size) const {
+    auto temp_block = std::make_unique<Block>();
+    for (const auto& d : data) {
+        auto column = d.type->create_column();
+        column->resize(size);
+        temp_block->insert({std::move(column), d.type, d.name});
+    }
+    return temp_block;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h
new file mode 100644
index 0000000000..a39baa9e36
--- /dev/null
+++ b/be/src/vec/core/block.h
@@ -0,0 +1,356 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Block.h
+// and modified by Doris
+
+#pragma once
+
+#include <initializer_list>
+#include <list>
+#include <set>
+#include <vector>
+#include <parallel_hashmap/phmap.h>
+
+#include "vec/columns/column_nullable.h"
+#include "vec/core/block_info.h"
+#include "vec/core/column_with_type_and_name.h"
+#include "vec/core/columns_with_type_and_name.h"
+#include "vec/core/names.h"
+#include "vec/data_types/data_type_nullable.h"
+
+namespace doris {
+class Status;
+class RowBatch;
+class RowDescriptor;
+class Tuple;
+class TupleDescriptor;
+class MemPool;
+
+namespace vectorized {
+
+/** Container for set of columns for bunch of rows in memory.
+  * This is unit of data processing.
+  * Also contains metadata - data types of columns and their names
+  *  (either original names from a table, or generated names during temporary calculations).
+  * Allows to insert, remove columns in arbitrary position, to change order of columns.
+  */
+
+class Block {
+private:
+    using Container = ColumnsWithTypeAndName;
+    using IndexByName = phmap::flat_hash_map<String, size_t>;
+
+    Container data;
+    IndexByName index_by_name;
+
+public:
+    BlockInfo info;
+
+    Block() = default;
+    Block(std::initializer_list<ColumnWithTypeAndName> il);
+    Block(const ColumnsWithTypeAndName& data_);
+    Block(const PBlock& pblock);
+
+    /// insert the column at the specified position
+    void insert(size_t position, const ColumnWithTypeAndName& elem);
+    void insert(size_t position, ColumnWithTypeAndName&& elem);
+    /// insert the column to the end
+    void insert(const ColumnWithTypeAndName& elem);
+    void insert(ColumnWithTypeAndName&& elem);
+    /// insert the column to the end, if there is no column with that name yet
+    void insert_unique(const ColumnWithTypeAndName& elem);
+    void insert_unique(ColumnWithTypeAndName&& elem);
+    /// remove the column at the specified position
+    void erase(size_t position);
+    /// remove the columns at the specified positions
+    void erase(const std::set<size_t>& positions);
+    /// remove the column with the specified name
+    void erase(const String& name);
+    // T was std::set<int>, std::vector<int>, std::list<int>
+    template <class T>
+    void erase_not_in(const T& container) {
+        Container new_data;
+        for (auto pos : container) {
+            new_data.emplace_back(std::move(data[pos]));
+        }
+        std::swap(data, new_data);
+    }
+
+    /// References are invalidated after calling functions above.
+    ColumnWithTypeAndName& get_by_position(size_t position) { return data[position]; }
+    const ColumnWithTypeAndName& get_by_position(size_t position) const { return data[position]; }
+
+    void replace_by_position(size_t position, ColumnPtr&& res) {
+        this->get_by_position(position).column = std::move(res);
+    }
+
+    void replace_by_position(size_t position, const ColumnPtr& res) {
+        this->get_by_position(position).column = res;
+    }
+
+    void replace_by_position_if_const(size_t position) {
+        auto& element = this->get_by_position(position);
+        element.column = element.column->convert_to_full_column_if_const();
+    }
+
+    ColumnWithTypeAndName& safe_get_by_position(size_t position);
+    const ColumnWithTypeAndName& safe_get_by_position(size_t position) const;
+
+    ColumnWithTypeAndName& get_by_name(const std::string& name);
+    const ColumnWithTypeAndName& get_by_name(const std::string& name) const;
+
+    Container::iterator begin() { return data.begin(); }
+    Container::iterator end() { return data.end(); }
+    Container::const_iterator begin() const { return data.begin(); }
+    Container::const_iterator end() const { return data.end(); }
+    Container::const_iterator cbegin() const { return data.cbegin(); }
+    Container::const_iterator cend() const { return data.cend(); }
+
+    bool has(const std::string& name) const;
+
+    size_t get_position_by_name(const std::string& name) const;
+
+    const ColumnsWithTypeAndName& get_columns_with_type_and_name() const;
+
+    Names get_names() const;
+    DataTypes get_data_types() const;
+
+    DataTypePtr get_data_type(size_t index) const { 
+        CHECK(index < data.size());
+        return data[index].type; 
+    }
+
+    /// Returns number of rows from first column in block, not equal to nullptr. If no columns, returns 0.
+    size_t rows() const;
+
+    // Cut the rows in block, use in LIMIT operation
+    void set_num_rows(size_t length);
+
+    // Skip the rows in block, use in OFFSET, LIMIT operation
+    void skip_num_rows(int64_t & offset);
+
+    size_t columns() const { return data.size(); }
+
+    /// Checks that every column in block is not nullptr and has same number of elements.
+    void check_number_of_rows(bool allow_null_columns = false) const;
+
+    /// Approximate number of bytes in memory - for profiling and limits.
+    size_t bytes() const;
+
+    /// Approximate number of allocated bytes in memory - for profiling and limits.
+    size_t allocated_bytes() const;
+
+    operator bool() const { return !!columns(); }
+    bool operator!() const { return !this->operator bool(); }
+
+    /** Get a list of column names separated by commas. */
+    std::string dump_names() const;
+
+    /** List of names, types and lengths of columns. Designed for debugging. */
+    std::string dump_structure() const;
+
+    /** Get the same block, but empty. */
+    Block clone_empty() const;
+
+    Columns get_columns() const;
+    void set_columns(const Columns& columns);
+    Block clone_with_columns(const Columns& columns) const;
+    Block clone_without_columns() const;
+
+    /** Get empty columns with the same types as in block. */
+    MutableColumns clone_empty_columns() const;
+
+    /** Get columns from block for mutation. Columns in block will be nullptr. */
+    MutableColumns mutate_columns();
+
+    /** Replace columns in a block */
+    void set_columns(MutableColumns&& columns);
+    Block clone_with_columns(MutableColumns&& columns) const;
+
+    /** Get a block with columns that have been rearranged in the order of their names. */
+    Block sort_columns() const;
+
+    void clear();
+    void swap(Block& other) noexcept;
+    void swap(Block&& other) noexcept;
+
+    // Default column size = -1 means clear all column in block
+    // Else clear column [0, column_size) delete column [column_size, data.size)
+    void clear_column_data(int column_size = -1) noexcept;
+
+    bool mem_reuse() { return !data.empty(); }
+
+    bool is_empty_column() { return data.empty(); }
+
+    /** Updates SipHash of the Block, using update method of columns.
+      * Returns hash for block, that could be used to differentiate blocks
+      *  with same structure, but different data.
+      */
+    void update_hash(SipHash& hash) const;
+
+    /** Get block data in string. */
+    std::string dump_data(size_t begin = 0, size_t row_limit = 100) const;
+
+    static Status filter_block(Block* block, int filter_conlumn_id, int column_to_keep);
+
+    static inline void erase_useless_column(Block* block, int column_to_keep) {
+        for (int i = block->columns() - 1; i >= column_to_keep; --i) {
+            block->erase(i);
+        }
+    }
+
+    // serialize block to PBlock
+    size_t serialize(PBlock* pblock) const;
+
+    // serialize block to PRowbatch
+    void serialize(RowBatch*, const RowDescriptor&);
+
+    std::unique_ptr<Block> create_same_struct_block(size_t size) const;
+
+    /** Compares (*this) n-th row and rhs m-th row. 
+      * Returns negative number, 0, or positive number  (*this) n-th row is less, equal, greater than rhs m-th row respectively.
+      * Is used in sortings.
+      *
+      * If one of element's value is NaN or NULLs, then:
+      * - if nan_direction_hint == -1, NaN and NULLs are considered as least than everything other;
+      * - if nan_direction_hint ==  1, NaN and NULLs are considered as greatest than everything other.
+      * For example, if nan_direction_hint == -1 is used by descending sorting, NaNs will be at the end.
+      *
+      * For non Nullable and non floating point types, nan_direction_hint is ignored.
+      */
+    int compare_at(size_t n, size_t m, const Block& rhs, int nan_direction_hint) const {
+        DCHECK_EQ(columns(), rhs.columns());
+        return compare_at(n, m, columns(), rhs, nan_direction_hint);
+    }
+
+    int compare_at(size_t n, size_t m, size_t num_columns, const Block& rhs,
+                   int nan_direction_hint) const {
+        DCHECK_GE(columns(), num_columns);
+        DCHECK_GE(rhs.columns(), num_columns);
+
+        DCHECK_LE(n, rows());
+        DCHECK_LE(m, rhs.rows());
+        for (size_t i = 0; i < num_columns; ++i) {
+            DCHECK(get_by_position(i).type->equals(*rhs.get_by_position(i).type));
+            auto res = get_by_position(i).column->compare_at(n, m, *(rhs.get_by_position(i).column),
+                                                             nan_direction_hint);
+            if (res) {
+                return res;
+            }
+        }
+        return 0;
+    }
+
+    doris::Tuple* deep_copy_tuple(const TupleDescriptor&, MemPool*, int, int, bool padding_char = false);
+
+private:
+    void erase_impl(size_t position);
+    void initialize_index_by_name();
+};
+
+using Blocks = std::vector<Block>;
+using BlocksList = std::list<Block>;
+using BlocksPtr = std::shared_ptr<Blocks>;
+using BlocksPtrs = std::shared_ptr<std::vector<BlocksPtr>>;
+
+class MutableBlock {
+private:
+    MutableColumns _columns;
+    DataTypes _data_types;
+
+public:
+    static MutableBlock build_mutable_block(Block* block) {
+        return block == nullptr ? MutableBlock() : MutableBlock(block);
+    }
+    MutableBlock() = default;
+    ~MutableBlock() = default;
+
+    MutableBlock(MutableColumns&& columns, DataTypes&& data_types)
+            : _columns(std::move(columns)), _data_types(std::move(data_types)) {}
+    MutableBlock(Block* block)
+            : _columns(block->mutate_columns()), _data_types(block->get_data_types()) {}
+    MutableBlock(Block&& block)
+            : _columns(block.mutate_columns()), _data_types(block.get_data_types()) {}
+
+    size_t rows() const;
+    size_t columns() const { return _columns.size(); }
+
+    bool empty() { return rows() == 0; }
+
+    MutableColumns& mutable_columns() { return _columns; }
+
+    void set_muatable_columns(MutableColumns&& columns) { _columns = std::move(columns); }
+
+    DataTypes& data_types() { return _data_types; }
+
+    template <typename T>
+    void merge(T&& block) {
+        if (_columns.size() == 0 && _data_types.size() == 0) {
+            _data_types = block.get_data_types();
+            _columns.resize(block.columns());
+            for (size_t i = 0; i < block.columns(); ++i) {
+                if (block.get_by_position(i).column) {
+                    _columns[i] = (*std::move(block.get_by_position(i)
+                                                      .column->convert_to_full_column_if_const()))
+                                          .mutate();
+                } else {
+                    _columns[i] = _data_types[i]->create_column();
+                }
+            }
+        } else {
+            for (int i = 0; i < _columns.size(); ++i) {
+                if (!_data_types[i]->equals(*block.get_by_position(i).type)) {
+                    DCHECK(_data_types[i]->is_nullable());
+                    DCHECK(((DataTypeNullable*)_data_types[i].get())
+                                   ->get_nested_type()
+                                   ->equals(*block.get_by_position(i).type));
+                    DCHECK(!block.get_by_position(i).type->is_nullable());
+                    _columns[i]->insert_range_from(*make_nullable(block.get_by_position(i).column)
+                                                            ->convert_to_full_column_if_const(),
+                                                   0, block.rows());
+                } else {
+                    _columns[i]->insert_range_from(
+                            *block.get_by_position(i)
+                                     .column->convert_to_full_column_if_const()
+                                     .get(),
+                            0, block.rows());
+                }
+            }
+        }
+    }
+
+    Block to_block(int start_column = 0);
+
+    Block to_block(int start_column, int end_column);
+
+    void add_row(const Block* block, int row);
+    void add_rows(const Block* block, const int* row_begin, const int* row_end);
+
+    std::string dump_data(size_t row_limit = 100) const;
+
+    void clear() {
+        _columns.clear();
+        _data_types.clear();
+    }
+
+    // TODO: use add_rows instead of this
+    // add_rows(Block* block,PODArray<Int32>& group,int group_num);
+};
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/core/block_info.cpp b/be/src/vec/core/block_info.cpp
new file mode 100644
index 0000000000..3672a25b7b
--- /dev/null
+++ b/be/src/vec/core/block_info.cpp
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/BlockInfo.cpp
+// and modified by Doris
+
+#include "vec/core/block_info.h"
+
+#include "vec/common/exception.h"
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+void BlockMissingValues::set_bit(size_t column_idx, size_t row_idx) {
+    RowsBitMask& mask = rows_mask_by_column_id[column_idx];
+    mask.resize(row_idx + 1);
+    mask[row_idx] = true;
+}
+
+const BlockMissingValues::RowsBitMask& BlockMissingValues::get_defaults_bitmask(
+        size_t column_idx) const {
+    static RowsBitMask none;
+    auto it = rows_mask_by_column_id.find(column_idx);
+    if (it != rows_mask_by_column_id.end()) return it->second;
+    return none;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/block_info.h b/be/src/vec/core/block_info.h
new file mode 100644
index 0000000000..5912b7b053
--- /dev/null
+++ b/be/src/vec/core/block_info.h
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/BlockInfo.h
+// and modified by Doris
+
+#pragma once
+
+#include <unordered_map>
+
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+
+/** More information about the block.
+  */
+struct BlockInfo {
+    /** is_overflows:
+      * After running GROUP BY ... WITH TOTALS with the max_rows_to_group_by and group_by_overflow_mode = 'any' settings,
+      *  a row is inserted in the separate block with aggregated values that have not passed max_rows_to_group_by.
+      * If it is such a block, then is_overflows is set to true for it.
+      */
+
+    /** bucket_num:
+      * When using the two-level aggregation method, data with different key groups are scattered across different buckets.
+      * In this case, the bucket number is indicated here. It is used to optimize the merge for distributed aggregation.
+      * Otherwise -1.
+      */
+
+#define APPLY_FOR_BLOCK_INFO_FIELDS(M) \
+    M(bool, is_overflows, false, 1)    \
+    M(Int32, bucket_num, -1, 2)
+
+#define DECLARE_FIELD_VEC(TYPE, NAME, DEFAULT, FIELD_NUM) TYPE NAME = DEFAULT;
+
+    APPLY_FOR_BLOCK_INFO_FIELDS(DECLARE_FIELD_VEC)
+
+#undef DECLARE_FIELD_VEC
+
+};
+
+/// Block extention to support delayed defaults. AddingDefaultsBlockInputStream uses it to replace missing values with column defaults.
+class BlockMissingValues {
+public:
+    using RowsBitMask = std::vector<bool>; /// a bit per row for a column
+
+    const RowsBitMask& get_defaults_bitmask(size_t column_idx) const;
+    void set_bit(size_t column_idx, size_t row_idx);
+    bool empty() const { return rows_mask_by_column_id.empty(); }
+    size_t size() const { return rows_mask_by_column_id.size(); }
+    void clear() { rows_mask_by_column_id.clear(); }
+
+private:
+    using RowsMaskByColumnId = std::unordered_map<size_t, RowsBitMask>;
+
+    /// If rows_mask_by_column_id[column_id][row_id] is true related value in Block should be replaced with column default.
+    /// It could contain less columns and rows then related block.
+    RowsMaskByColumnId rows_mask_by_column_id;
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/call_on_type_index.h b/be/src/vec/core/call_on_type_index.h
new file mode 100644
index 0000000000..9b552ac381
--- /dev/null
+++ b/be/src/vec/core/call_on_type_index.h
@@ -0,0 +1,260 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/callOnTypeIndex.h
+// and modified by Doris
+
+#pragma once
+
+#include <utility>
+
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+template <typename T, typename U>
+struct TypePair {
+    using LeftType = T;
+    using RightType = U;
+};
+
+template <typename T, bool _int, bool _float, bool _decimal, bool _datetime, typename F>
+bool call_on_basic_type(TypeIndex number, F&& f) {
+    if constexpr (_int) {
+        switch (number) {
+        case TypeIndex::UInt8:
+            return f(TypePair<T, UInt8>());
+        case TypeIndex::UInt16:
+            return f(TypePair<T, UInt16>());
+        case TypeIndex::UInt32:
+            return f(TypePair<T, UInt32>());
+        case TypeIndex::UInt64:
+            return f(TypePair<T, UInt64>());
+
+        case TypeIndex::Int8:
+            return f(TypePair<T, Int8>());
+        case TypeIndex::Int16:
+            return f(TypePair<T, Int16>());
+        case TypeIndex::Int32:
+            return f(TypePair<T, Int32>());
+        case TypeIndex::Int64:
+            return f(TypePair<T, Int64>());
+        case TypeIndex::Int128:
+            return f(TypePair<T, Int128>());
+
+        default:
+            break;
+        }
+    }
+
+    if constexpr (_decimal) {
+        switch (number) {
+        case TypeIndex::Decimal32:
+            return f(TypePair<T, Decimal32>());
+        case TypeIndex::Decimal64:
+            return f(TypePair<T, Decimal64>());
+        case TypeIndex::Decimal128:
+            return f(TypePair<T, Decimal128>());
+        default:
+            break;
+        }
+    }
+
+    if constexpr (_float) {
+        switch (number) {
+        case TypeIndex::Float32:
+            return f(TypePair<T, Float32>());
+        case TypeIndex::Float64:
+            return f(TypePair<T, Float64>());
+        default:
+            break;
+        }
+    }
+
+    return false;
+}
+
+/// Unroll template using TypeIndex
+template <bool _int, bool _float, bool _decimal, bool _datetime, typename F>
+inline bool call_on_basic_types(TypeIndex type_num1, TypeIndex type_num2, F&& f) {
+    if constexpr (_int) {
+        switch (type_num1) {
+        case TypeIndex::UInt8:
+            return call_on_basic_type<UInt8, _int, _float, _decimal, _datetime>(type_num2,
+                                                                                std::forward<F>(f));
+        case TypeIndex::UInt16:
+            return call_on_basic_type<UInt16, _int, _float, _decimal, _datetime>(
+                    type_num2, std::forward<F>(f));
+        case TypeIndex::UInt32:
+            return call_on_basic_type<UInt32, _int, _float, _decimal, _datetime>(
+                    type_num2, std::forward<F>(f));
+        case TypeIndex::UInt64:
+            return call_on_basic_type<UInt64, _int, _float, _decimal, _datetime>(
+                    type_num2, std::forward<F>(f));
+
+        case TypeIndex::Int8:
+            return call_on_basic_type<Int8, _int, _float, _decimal, _datetime>(type_num2,
+                                                                               std::forward<F>(f));
+        case TypeIndex::Int16:
+            return call_on_basic_type<Int16, _int, _float, _decimal, _datetime>(type_num2,
+                                                                                std::forward<F>(f));
+        case TypeIndex::Int32:
+            return call_on_basic_type<Int32, _int, _float, _decimal, _datetime>(type_num2,
+                                                                                std::forward<F>(f));
+        case TypeIndex::Int64:
+            return call_on_basic_type<Int64, _int, _float, _decimal, _datetime>(type_num2,
+                                                                                std::forward<F>(f));
+        case TypeIndex::Int128:
+            return call_on_basic_type<Int128, _int, _float, _decimal, _datetime>(
+                    type_num2, std::forward<F>(f));
+        default:
+            break;
+        }
+    }
+
+    if constexpr (_decimal) {
+        switch (type_num1) {
+        case TypeIndex::Decimal32:
+            return call_on_basic_type<Decimal32, _int, _float, _decimal, _datetime>(
+                    type_num2, std::forward<F>(f));
+        case TypeIndex::Decimal64:
+            return call_on_basic_type<Decimal64, _int, _float, _decimal, _datetime>(
+                    type_num2, std::forward<F>(f));
+        case TypeIndex::Decimal128:
+            return call_on_basic_type<Decimal128, _int, _float, _decimal, _datetime>(
+                    type_num2, std::forward<F>(f));
+        default:
+            break;
+        }
+    }
+
+    if constexpr (_float) {
+        switch (type_num1) {
+        case TypeIndex::Float32:
+            return call_on_basic_type<Float32, _int, _float, _decimal, _datetime>(
+                    type_num2, std::forward<F>(f));
+        case TypeIndex::Float64:
+            return call_on_basic_type<Float64, _int, _float, _decimal, _datetime>(
+                    type_num2, std::forward<F>(f));
+        default:
+            break;
+        }
+    }
+
+    return false;
+}
+
+class DataTypeDate;
+class DataTypeDateTime;
+class DataTypeString;
+template <typename T>
+class DataTypeEnum;
+template <typename T>
+class DataTypeNumber;
+template <typename T>
+class DataTypeDecimal;
+
+template <typename T, typename F>
+bool call_on_index_and_data_type(TypeIndex number, F&& f) {
+    switch (number) {
+    case TypeIndex::UInt8:
+        return f(TypePair<DataTypeNumber<UInt8>, T>());
+    case TypeIndex::UInt16:
+        return f(TypePair<DataTypeNumber<UInt16>, T>());
+    case TypeIndex::UInt32:
+        return f(TypePair<DataTypeNumber<UInt32>, T>());
+    case TypeIndex::UInt64:
+        return f(TypePair<DataTypeNumber<UInt64>, T>());
+
+    case TypeIndex::Int8:
+        return f(TypePair<DataTypeNumber<Int8>, T>());
+    case TypeIndex::Int16:
+        return f(TypePair<DataTypeNumber<Int16>, T>());
+    case TypeIndex::Int32:
+        return f(TypePair<DataTypeNumber<Int32>, T>());
+    case TypeIndex::Int64:
+        return f(TypePair<DataTypeNumber<Int64>, T>());
+    case TypeIndex::Int128:
+        return f(TypePair<DataTypeNumber<Int128>, T>());
+
+    case TypeIndex::Float32:
+        return f(TypePair<DataTypeNumber<Float32>, T>());
+    case TypeIndex::Float64:
+        return f(TypePair<DataTypeNumber<Float64>, T>());
+
+    case TypeIndex::Decimal32:
+        return f(TypePair<DataTypeDecimal<Decimal32>, T>());
+    case TypeIndex::Decimal64:
+        return f(TypePair<DataTypeDecimal<Decimal64>, T>());
+    case TypeIndex::Decimal128:
+        return f(TypePair<DataTypeDecimal<Decimal128>, T>());
+
+    case TypeIndex::Date:
+        return f(TypePair<DataTypeDate, T>());
+    case TypeIndex::DateTime:
+        return f(TypePair<DataTypeDateTime, T>());
+
+    case TypeIndex::String:
+        return f(TypePair<DataTypeString, T>());
+
+    default:
+        break;
+    }
+    return false;
+}
+
+template <typename T, typename F>
+bool call_on_index_and_number_data_type(TypeIndex number, F&& f) {
+    switch (number) {
+    case TypeIndex::UInt8:
+        return f(TypePair<DataTypeNumber<UInt8>, T>());
+    case TypeIndex::UInt16:
+        return f(TypePair<DataTypeNumber<UInt16>, T>());
+    case TypeIndex::UInt32:
+        return f(TypePair<DataTypeNumber<UInt32>, T>());
+    case TypeIndex::UInt64:
+        return f(TypePair<DataTypeNumber<UInt64>, T>());
+
+    case TypeIndex::Int8:
+        return f(TypePair<DataTypeNumber<Int8>, T>());
+    case TypeIndex::Int16:
+        return f(TypePair<DataTypeNumber<Int16>, T>());
+    case TypeIndex::Int32:
+        return f(TypePair<DataTypeNumber<Int32>, T>());
+    case TypeIndex::Int64:
+        return f(TypePair<DataTypeNumber<Int64>, T>());
+    case TypeIndex::Int128:
+        return f(TypePair<DataTypeNumber<Int128>, T>());
+
+    case TypeIndex::Float32:
+        return f(TypePair<DataTypeNumber<Float32>, T>());
+    case TypeIndex::Float64:
+        return f(TypePair<DataTypeNumber<Float64>, T>());
+
+    case TypeIndex::Decimal32:
+        return f(TypePair<DataTypeDecimal<Decimal32>, T>());
+    case TypeIndex::Decimal64:
+        return f(TypePair<DataTypeDecimal<Decimal64>, T>());
+    case TypeIndex::Decimal128:
+        return f(TypePair<DataTypeDecimal<Decimal128>, T>());
+    default:
+        break;
+    }
+    return false;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/column_numbers.h b/be/src/vec/core/column_numbers.h
new file mode 100644
index 0000000000..25c8912f73
--- /dev/null
+++ b/be/src/vec/core/column_numbers.h
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/ColumnNumbers.h
+// and modified by Doris
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+namespace doris::vectorized {
+
+using ColumnNumbers = std::vector<size_t>;
+
+}
diff --git a/be/src/vec/core/column_with_type_and_name.cpp b/be/src/vec/core/column_with_type_and_name.cpp
new file mode 100644
index 0000000000..a0d8a21f3e
--- /dev/null
+++ b/be/src/vec/core/column_with_type_and_name.cpp
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/ColumnWithTypeAndName.cpp
+// and modified by Doris
+
+#include <ostream>
+#include <sstream>
+
+#include "vec/core/columns_with_type_and_name.h"
+
+namespace doris::vectorized {
+
+ColumnWithTypeAndName ColumnWithTypeAndName::clone_empty() const {
+    ColumnWithTypeAndName res;
+
+    res.name = name;
+    res.type = type;
+    if (column) {
+        res.column = column->clone_empty();
+    } else {
+        res.column = nullptr;
+    }
+
+    return res;
+}
+
+bool ColumnWithTypeAndName::operator==(const ColumnWithTypeAndName& other) const {
+    return name == other.name &&
+           ((!type && !other.type) || (type && other.type && type->equals(*other.type))) &&
+           ((!column && !other.column) ||
+            (column && other.column && column->get_name() == other.column->get_name()));
+}
+
+void ColumnWithTypeAndName::dump_structure(std::ostream& out) const {
+    out << name;
+
+    if (type)
+        out << " ";
+    else
+        out << " nullptr";
+
+    if (column)
+        out << ' ' << column->dump_structure();
+    else
+        out << " nullptr";
+}
+
+String ColumnWithTypeAndName::dump_structure() const {
+    std::stringstream out;
+    dump_structure(out);
+    return out.str();
+}
+std::string ColumnWithTypeAndName::to_string(size_t row_num) const {
+    return type->to_string(*column->convert_to_full_column_if_const().get(), row_num);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/column_with_type_and_name.h b/be/src/vec/core/column_with_type_and_name.h
new file mode 100644
index 0000000000..8608f61dd4
--- /dev/null
+++ b/be/src/vec/core/column_with_type_and_name.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/ColumnWithTypeAndName.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+
+// class WriteBuffer;
+
+/** Column data along with its data type and name.
+  * Column data could be nullptr - to represent just 'header' of column.
+  * Name could be either name from a table or some temporary generated name during expression evaluation.
+  */
+
+struct ColumnWithTypeAndName {
+    ColumnPtr column;
+    DataTypePtr type;
+    String name;
+
+    ColumnWithTypeAndName() {}
+    ColumnWithTypeAndName(const ColumnPtr& column_, const DataTypePtr& type_, const String& name_)
+            : column(column_), type(type_), name(name_) {}
+
+    /// Uses type->create_column() to create column
+    ColumnWithTypeAndName(const DataTypePtr& type_, const String& name_)
+            : column(type_->create_column()), type(type_), name(name_) {}
+
+    ColumnWithTypeAndName clone_empty() const;
+    bool operator==(const ColumnWithTypeAndName& other) const;
+
+    void dump_structure(std::ostream& out) const;
+    String dump_structure() const;
+    std::string to_string(size_t row_num) const;
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/columns_with_type_and_name.h b/be/src/vec/core/columns_with_type_and_name.h
new file mode 100644
index 0000000000..e77ec4930b
--- /dev/null
+++ b/be/src/vec/core/columns_with_type_and_name.h
@@ -0,0 +1,31 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/ColumnsWithTypeAndName.h
+// and modified by Doris
+
+#pragma once
+
+#include <vector>
+
+#include "vec/core/column_with_type_and_name.h"
+
+namespace doris::vectorized {
+
+using ColumnsWithTypeAndName = std::vector<ColumnWithTypeAndName>;
+
+}
diff --git a/be/src/vec/core/decimal_comparison.h b/be/src/vec/core/decimal_comparison.h
new file mode 100644
index 0000000000..6ce3e9bb56
--- /dev/null
+++ b/be/src/vec/core/decimal_comparison.h
@@ -0,0 +1,303 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/DecimalComparison.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/arithmetic_overflow.h"
+#include "vec/core/accurate_comparison.h"
+#include "vec/core/block.h"
+#include "vec/core/call_on_type_index.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/functions/function_helpers.h" /// todo core should not depend on function"
+
+namespace doris::vectorized {
+
+inline bool allow_decimal_comparison(const DataTypePtr& left_type, const DataTypePtr& right_type) {
+    if (is_decimal(left_type)) {
+        if (is_decimal(right_type) || is_not_decimal_but_comparable_to_decimal(right_type))
+            return true;
+    } else if (is_not_decimal_but_comparable_to_decimal(left_type) && is_decimal(right_type))
+        return true;
+    return false;
+}
+
+template <size_t>
+struct ConstructDecInt {
+    using Type = Int32;
+};
+template <>
+struct ConstructDecInt<8> {
+    using Type = Int64;
+};
+template <>
+struct ConstructDecInt<16> {
+    using Type = Int128;
+};
+
+template <typename T, typename U>
+struct DecCompareInt {
+    using Type = typename ConstructDecInt<
+            (!IsDecimalNumber<U> || sizeof(T) > sizeof(U)) ? sizeof(T) : sizeof(U)>::Type;
+    using TypeA = Type;
+    using TypeB = Type;
+};
+
+///
+template <typename A, typename B, template <typename, typename> typename Operation,
+          bool _check_overflow = true, bool _actual = IsDecimalNumber<A> || IsDecimalNumber<B>>
+class DecimalComparison {
+public:
+    using CompareInt = typename DecCompareInt<A, B>::Type;
+    using Op = Operation<CompareInt, CompareInt>;
+    using ColVecA = std::conditional_t<IsDecimalNumber<A>, ColumnDecimal<A>, ColumnVector<A>>;
+    using ColVecB = std::conditional_t<IsDecimalNumber<B>, ColumnDecimal<B>, ColumnVector<B>>;
+    using ArrayA = typename ColVecA::Container;
+    using ArrayB = typename ColVecB::Container;
+
+    DecimalComparison(Block& block, size_t result, const ColumnWithTypeAndName& col_left,
+                      const ColumnWithTypeAndName& col_right) {
+        if (!apply(block, result, col_left, col_right)) {
+            LOG(FATAL) << fmt::format("Wrong decimal comparison with {} and {}",
+                                      col_left.type->get_name(), col_right.type->get_name());
+        }
+    }
+
+    static bool apply(Block& block, size_t result [[maybe_unused]],
+                      const ColumnWithTypeAndName& col_left,
+                      const ColumnWithTypeAndName& col_right) {
+        if constexpr (_actual) {
+            ColumnPtr c_res;
+            Shift shift = getScales<A, B>(col_left.type, col_right.type);
+
+            c_res = apply_with_scale(col_left.column, col_right.column, shift);
+            if (c_res) {
+                block.replace_by_position(result, std::move(c_res));
+            }
+            return true;
+        }
+        return false;
+    }
+
+    static bool compare(A a, B b, UInt32 scale_a, UInt32 scale_b) {
+        static const UInt32 max_scale = max_decimal_precision<Decimal128>();
+        if (scale_a > max_scale || scale_b > max_scale) {
+            LOG(FATAL) << "Bad scale of decimal field";
+        }
+
+        Shift shift;
+        if (scale_a < scale_b)
+            shift.a = DataTypeDecimal<B>(max_decimal_precision<B>(), scale_b)
+                              .get_scale_multiplier(scale_b - scale_a);
+        if (scale_a > scale_b)
+            shift.b = DataTypeDecimal<A>(max_decimal_precision<A>(), scale_a)
+                              .get_scale_multiplier(scale_a - scale_b);
+
+        return apply_with_scale(a, b, shift);
+    }
+
+private:
+    struct Shift {
+        CompareInt a = 1;
+        CompareInt b = 1;
+
+        bool none() const { return a == 1 && b == 1; }
+        bool left() const { return a != 1; }
+        bool right() const { return b != 1; }
+    };
+
+    template <typename T, typename U>
+    static auto apply_with_scale(T a, U b, const Shift& shift) {
+        if (shift.left())
+            return apply<true, false>(a, b, shift.a);
+        else if (shift.right())
+            return apply<false, true>(a, b, shift.b);
+        return apply<false, false>(a, b, 1);
+    }
+
+    template <typename T, typename U>
+    static std::enable_if_t<IsDecimalNumber<T> && IsDecimalNumber<U>, Shift> getScales(
+            const DataTypePtr& left_type, const DataTypePtr& right_type) {
+        const DataTypeDecimal<T>* decimal0 = check_decimal<T>(*left_type);
+        const DataTypeDecimal<U>* decimal1 = check_decimal<U>(*right_type);
+
+        Shift shift;
+        if (decimal0 && decimal1) {
+            auto result_type = decimal_result_type(*decimal0, *decimal1, false, false);
+            shift.a = result_type.scale_factor_for(*decimal0, false);
+            shift.b = result_type.scale_factor_for(*decimal1, false);
+        } else if (decimal0)
+            shift.b = decimal0->get_scale_multiplier();
+        else if (decimal1)
+            shift.a = decimal1->get_scale_multiplier();
+
+        return shift;
+    }
+
+    template <typename T, typename U>
+    static std::enable_if_t<IsDecimalNumber<T> && !IsDecimalNumber<U>, Shift> getScales(
+            const DataTypePtr& left_type, const DataTypePtr&) {
+        Shift shift;
+        const DataTypeDecimal<T>* decimal0 = check_decimal<T>(*left_type);
+        if (decimal0) shift.b = decimal0->get_scale_multiplier();
+        return shift;
+    }
+
+    template <typename T, typename U>
+    static std::enable_if_t<!IsDecimalNumber<T> && IsDecimalNumber<U>, Shift> getScales(
+            const DataTypePtr&, const DataTypePtr& right_type) {
+        Shift shift;
+        const DataTypeDecimal<U>* decimal1 = check_decimal<U>(*right_type);
+        if (decimal1) shift.a = decimal1->get_scale_multiplier();
+        return shift;
+    }
+
+    template <bool scale_left, bool scale_right>
+    static ColumnPtr apply(const ColumnPtr& c0, const ColumnPtr& c1, CompareInt scale) {
+        auto c_res = ColumnUInt8::create();
+
+        if constexpr (_actual) {
+            bool c0_is_const = is_column_const(*c0);
+            bool c1_is_const = is_column_const(*c1);
+
+            if (c0_is_const && c1_is_const) {
+                const ColumnConst* c0_const = check_and_get_column_const<ColVecA>(c0.get());
+                const ColumnConst* c1_const = check_and_get_column_const<ColVecB>(c1.get());
+
+                A a = c0_const->template get_value<A>();
+                B b = c1_const->template get_value<B>();
+                UInt8 res = apply<scale_left, scale_right>(a, b, scale);
+                return DataTypeUInt8().create_column_const(c0->size(), to_field(res));
+            }
+
+            ColumnUInt8::Container& vec_res = c_res->get_data();
+            vec_res.resize(c0->size());
+
+            if (c0_is_const) {
+                const ColumnConst* c0_const = check_and_get_column_const<ColVecA>(c0.get());
+                A a = c0_const->template get_value<A>();
+                if (const ColVecB* c1_vec = check_and_get_column<ColVecB>(c1.get()))
+                    constant_vector<scale_left, scale_right>(a, c1_vec->get_data(), vec_res, scale);
+                else {
+                    LOG(FATAL) << "Wrong column in Decimal comparison";
+                }
+            } else if (c1_is_const) {
+                const ColumnConst* c1_const = check_and_get_column_const<ColVecB>(c1.get());
+                B b = c1_const->template get_value<B>();
+                if (const ColVecA* c0_vec = check_and_get_column<ColVecA>(c0.get()))
+                    vector_constant<scale_left, scale_right>(c0_vec->get_data(), b, vec_res, scale);
+                else {
+                    LOG(FATAL) << "Wrong column in Decimal comparison";
+                }
+            } else {
+                if (const ColVecA* c0_vec = check_and_get_column<ColVecA>(c0.get())) {
+                    if (const ColVecB* c1_vec = check_and_get_column<ColVecB>(c1.get()))
+                        vector_vector<scale_left, scale_right>(c0_vec->get_data(),
+                                                               c1_vec->get_data(), vec_res, scale);
+                    else {
+                        LOG(FATAL) << "Wrong column in Decimal comparison";
+                    }
+                } else {
+                    LOG(FATAL) << "Wrong column in Decimal comparison";
+                }
+            }
+        }
+
+        return c_res;
+    }
+
+    template <bool scale_left, bool scale_right>
+    static NO_INLINE UInt8 apply(A a, B b, CompareInt scale [[maybe_unused]]) {
+        CompareInt x = a;
+        CompareInt y = b;
+
+        if constexpr (_check_overflow) {
+            bool overflow = false;
+
+            if constexpr (sizeof(A) > sizeof(CompareInt)) overflow |= (A(x) != a);
+            if constexpr (sizeof(B) > sizeof(CompareInt)) overflow |= (B(y) != b);
+            if constexpr (std::is_unsigned_v<A>) overflow |= (x < 0);
+            if constexpr (std::is_unsigned_v<B>) overflow |= (y < 0);
+
+            if constexpr (scale_left) overflow |= common::mul_overflow(x, scale, x);
+            if constexpr (scale_right) overflow |= common::mul_overflow(y, scale, y);
+
+            if (overflow) {
+                LOG(FATAL) << "Can't compare";
+            }
+        } else {
+            if constexpr (scale_left) x *= scale;
+            if constexpr (scale_right) y *= scale;
+        }
+
+        return Op::apply(x, y);
+    }
+
+    template <bool scale_left, bool scale_right>
+    static void NO_INLINE vector_vector(const ArrayA& a, const ArrayB& b, PaddedPODArray<UInt8>& c,
+                                        CompareInt scale) {
+        size_t size = a.size();
+        const A* a_pos = a.data();
+        const B* b_pos = b.data();
+        UInt8* c_pos = c.data();
+        const A* a_end = a_pos + size;
+
+        while (a_pos < a_end) {
+            *c_pos = apply<scale_left, scale_right>(*a_pos, *b_pos, scale);
+            ++a_pos;
+            ++b_pos;
+            ++c_pos;
+        }
+    }
+
+    template <bool scale_left, bool scale_right>
+    static void NO_INLINE vector_constant(const ArrayA& a, B b, PaddedPODArray<UInt8>& c,
+                                          CompareInt scale) {
+        size_t size = a.size();
+        const A* a_pos = a.data();
+        UInt8* c_pos = c.data();
+        const A* a_end = a_pos + size;
+
+        while (a_pos < a_end) {
+            *c_pos = apply<scale_left, scale_right>(*a_pos, b, scale);
+            ++a_pos;
+            ++c_pos;
+        }
+    }
+
+    template <bool scale_left, bool scale_right>
+    static void NO_INLINE constant_vector(A a, const ArrayB& b, PaddedPODArray<UInt8>& c,
+                                          CompareInt scale) {
+        size_t size = b.size();
+        const B* b_pos = b.data();
+        UInt8* c_pos = c.data();
+        const B* b_end = b_pos + size;
+
+        while (b_pos < b_end) {
+            *c_pos = apply<scale_left, scale_right>(a, *b_pos, scale);
+            ++b_pos;
+            ++c_pos;
+        }
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp
new file mode 100644
index 0000000000..4bb3e2c91b
--- /dev/null
+++ b/be/src/vec/core/field.cpp
@@ -0,0 +1,193 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Field.cpp
+// and modified by Doris
+
+#include "vec/core/field.h"
+
+#include "vec/core/decimal_comparison.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+void read_binary(Array& x, BufferReadable& buf) {
+    size_t size;
+    UInt8 type;
+    doris::vectorized::read_binary(type, buf);
+    doris::vectorized::read_binary(size, buf);
+
+    for (size_t index = 0; index < size; ++index) {
+        switch (type) {
+        case Field::Types::Null: {
+            x.push_back(doris::vectorized::Field());
+            break;
+        }
+        case Field::Types::UInt64: {
+            UInt64 value;
+            doris::vectorized::read_var_uint(value, buf);
+            x.push_back(value);
+            break;
+        }
+        case Field::Types::UInt128: {
+            UInt128 value;
+            doris::vectorized::read_binary(value, buf);
+            x.push_back(value);
+            break;
+        }
+        case Field::Types::Int64: {
+            Int64 value;
+            doris::vectorized::read_var_int(value, buf);
+            x.push_back(value);
+            break;
+        }
+        case Field::Types::Float64: {
+            Float64 value;
+            doris::vectorized::read_float_binary(value, buf);
+            x.push_back(value);
+            break;
+        }
+        case Field::Types::String: {
+            std::string value;
+            doris::vectorized::read_string_binary(value, buf);
+            x.push_back(value);
+            break;
+        }
+        case Field::Types::AggregateFunctionState: {
+            AggregateFunctionStateData value;
+            doris::vectorized::read_string_binary(value.name, buf);
+            doris::vectorized::read_string_binary(value.data, buf);
+            x.push_back(value);
+            break;
+        }
+        }
+    }
+}
+
+void write_binary(const Array& x, BufferWritable& buf) {
+    UInt8 type = Field::Types::Null;
+    size_t size = x.size();
+    if (size) type = x.front().get_type();
+    doris::vectorized::write_binary(type, buf);
+    doris::vectorized::write_binary(size, buf);
+
+    for (Array::const_iterator it = x.begin(); it != x.end(); ++it) {
+        switch (type) {
+        case Field::Types::Null:
+            break;
+        case Field::Types::UInt64: {
+            doris::vectorized::write_var_uint(get<UInt64>(*it), buf);
+            break;
+        }
+        case Field::Types::UInt128: {
+            doris::vectorized::write_binary(get<UInt128>(*it), buf);
+            break;
+        }
+        case Field::Types::Int64: {
+            doris::vectorized::write_var_int(get<Int64>(*it), buf);
+            break;
+        }
+        case Field::Types::Float64: {
+            doris::vectorized::write_float_binary(get<Float64>(*it), buf);
+            break;
+        }
+        case Field::Types::String: {
+            doris::vectorized::write_string_binary(get<std::string>(*it), buf);
+            break;
+        }
+        case Field::Types::AggregateFunctionState: {
+            doris::vectorized::write_string_binary(it->get<AggregateFunctionStateData>().name, buf);
+            doris::vectorized::write_string_binary(it->get<AggregateFunctionStateData>().data, buf);
+            break;
+        }
+        }
+    }
+    ;
+}
+
+template <>
+Decimal32 DecimalField<Decimal32>::get_scale_multiplier() const {
+    return DataTypeDecimal<Decimal32>::get_scale_multiplier(scale);
+}
+
+template <>
+Decimal64 DecimalField<Decimal64>::get_scale_multiplier() const {
+    return DataTypeDecimal<Decimal64>::get_scale_multiplier(scale);
+}
+
+template <>
+Decimal128 DecimalField<Decimal128>::get_scale_multiplier() const {
+    return DataTypeDecimal<Decimal128>::get_scale_multiplier(scale);
+}
+
+template <typename T>
+static bool dec_equal(T x, T y, UInt32 x_scale, UInt32 y_scale) {
+    using Comparator = DecimalComparison<T, T, EqualsOp>;
+    return Comparator::compare(x, y, x_scale, y_scale);
+}
+
+template <typename T>
+static bool dec_less(T x, T y, UInt32 x_scale, UInt32 y_scale) {
+    using Comparator = DecimalComparison<T, T, LessOp>;
+    return Comparator::compare(x, y, x_scale, y_scale);
+}
+
+template <typename T>
+static bool dec_less_or_equal(T x, T y, UInt32 x_scale, UInt32 y_scale) {
+    using Comparator = DecimalComparison<T, T, LessOrEqualsOp>;
+    return Comparator::compare(x, y, x_scale, y_scale);
+}
+
+template <>
+bool decimal_equal(Decimal32 x, Decimal32 y, UInt32 xs, UInt32 ys) {
+    return dec_equal(x, y, xs, ys);
+}
+template <>
+bool decimal_less(Decimal32 x, Decimal32 y, UInt32 xs, UInt32 ys) {
+    return dec_less(x, y, xs, ys);
+}
+template <>
+bool decimal_less_or_equal(Decimal32 x, Decimal32 y, UInt32 xs, UInt32 ys) {
+    return dec_less_or_equal(x, y, xs, ys);
+}
+
+template <>
+bool decimal_equal(Decimal64 x, Decimal64 y, UInt32 xs, UInt32 ys) {
+    return dec_equal(x, y, xs, ys);
+}
+template <>
+bool decimal_less(Decimal64 x, Decimal64 y, UInt32 xs, UInt32 ys) {
+    return dec_less(x, y, xs, ys);
+}
+template <>
+bool decimal_less_or_equal(Decimal64 x, Decimal64 y, UInt32 xs, UInt32 ys) {
+    return dec_less_or_equal(x, y, xs, ys);
+}
+
+template <>
+bool decimal_equal(Decimal128 x, Decimal128 y, UInt32 xs, UInt32 ys) {
+    return dec_equal(x, y, xs, ys);
+}
+template <>
+bool decimal_less(Decimal128 x, Decimal128 y, UInt32 xs, UInt32 ys) {
+    return dec_less(x, y, xs, ys);
+}
+template <>
+bool decimal_less_or_equal(Decimal128 x, Decimal128 y, UInt32 xs, UInt32 ys) {
+    return dec_less_or_equal(x, y, xs, ys);
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h
new file mode 100644
index 0000000000..cddd7037d7
--- /dev/null
+++ b/be/src/vec/core/field.h
@@ -0,0 +1,909 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Field.h
+// and modified by Doris
+
+#pragma once
+
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <type_traits>
+#include <vector>
+
+#include "vec/common/exception.h"
+#include "vec/common/int_exp.h"
+#include "vec/common/strong_typedef.h"
+#include "vec/common/uint128.h"
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+struct NearestFieldTypeImpl;
+
+template <typename T>
+using NearestFieldType = typename NearestFieldTypeImpl<T>::Type;
+
+class Field;
+using FieldVector = std::vector<Field>;
+
+/// Array and Tuple use the same storage type -- FieldVector, but we declare
+/// distinct types for them, so that the caller can choose whether it wants to
+/// construct a Field of Array or a Tuple type. An alternative approach would be
+/// to construct both of these types from FieldVector, and have the caller
+/// specify the desired Field type explicitly.
+#define DEFINE_FIELD_VECTOR(X)          \
+    struct X : public FieldVector {     \
+        using FieldVector::FieldVector; \
+    }
+
+DEFINE_FIELD_VECTOR(Array);
+DEFINE_FIELD_VECTOR(Tuple);
+
+#undef DEFINE_FIELD_VECTOR
+
+struct AggregateFunctionStateData {
+    String name; /// Name with arguments.
+    String data;
+
+    bool operator<(const AggregateFunctionStateData&) const {
+        LOG(FATAL) << "Operator < is not implemented for AggregateFunctionStateData.";
+    }
+
+    bool operator<=(const AggregateFunctionStateData&) const {
+        LOG(FATAL) << "Operator <= is not implemented for AggregateFunctionStateData.";
+    }
+
+    bool operator>(const AggregateFunctionStateData&) const {
+        LOG(FATAL) << "Operator <= is not implemented for AggregateFunctionStateData.";
+    }
+
+    bool operator>=(const AggregateFunctionStateData&) const {
+        LOG(FATAL) << "Operator >= is not implemented for AggregateFunctionStateData.";
+    }
+
+    bool operator==(const AggregateFunctionStateData& rhs) const {
+        if (name != rhs.name) {
+            LOG(FATAL) << fmt::format(
+                    "Comparing aggregate functions with different types: {} and {}", name,
+                    rhs.name);
+        }
+
+        return data == rhs.data;
+    }
+};
+
+template <typename T>
+bool decimal_equal(T x, T y, UInt32 x_scale, UInt32 y_scale);
+template <typename T>
+bool decimal_less(T x, T y, UInt32 x_scale, UInt32 y_scale);
+template <typename T>
+bool decimal_less_or_equal(T x, T y, UInt32 x_scale, UInt32 y_scale);
+
+template <typename T>
+class DecimalField {
+public:
+    DecimalField(T value, UInt32 scale_) : dec(value), scale(scale_) {}
+
+    operator T() const { return dec; }
+    T get_value() const { return dec; }
+    T get_scale_multiplier() const;
+    UInt32 get_scale() const { return scale; }
+
+    template <typename U>
+    bool operator<(const DecimalField<U>& r) const {
+        using MaxType = std::conditional_t<(sizeof(T) > sizeof(U)), T, U>;
+        return decimal_less<MaxType>(dec, r.get_value(), scale, r.get_scale());
+    }
+
+    template <typename U>
+    bool operator<=(const DecimalField<U>& r) const {
+        using MaxType = std::conditional_t<(sizeof(T) > sizeof(U)), T, U>;
+        return decimal_less_or_equal<MaxType>(dec, r.get_value(), scale, r.get_scale());
+    }
+
+    template <typename U>
+    bool operator==(const DecimalField<U>& r) const {
+        using MaxType = std::conditional_t<(sizeof(T) > sizeof(U)), T, U>;
+        return decimal_equal<MaxType>(dec, r.get_value(), scale, r.get_scale());
+    }
+
+    template <typename U>
+    bool operator>(const DecimalField<U>& r) const {
+        return r < *this;
+    }
+    template <typename U>
+    bool operator>=(const DecimalField<U>& r) const {
+        return r <= *this;
+    }
+    template <typename U>
+    bool operator!=(const DecimalField<U>& r) const {
+        return !(*this == r);
+    }
+
+    const DecimalField<T>& operator+=(const DecimalField<T>& r) {
+        if (scale != r.get_scale()) {
+            LOG(FATAL) << "Add different decimal fields";
+        }
+        dec += r.get_value();
+        return *this;
+    }
+
+    const DecimalField<T>& operator-=(const DecimalField<T>& r) {
+        if (scale != r.get_scale()) {
+            LOG(FATAL) << "Sub different decimal fields";
+        }
+        dec -= r.get_value();
+        return *this;
+    }
+
+private:
+    T dec;
+    UInt32 scale;
+};
+
+/** 32 is enough. Round number is used for alignment and for better arithmetic inside std::vector.
+  * NOTE: Actually, sizeof(std::string) is 32 when using libc++, so Field is 40 bytes.
+  */
+#define DBMS_MIN_FIELD_SIZE 32
+
+/** Discriminated union of several types.
+  * Made for replacement of `boost::variant`
+  *  is not generalized,
+  *  but somewhat more efficient, and simpler.
+  *
+  * Used to represent a single value of one of several types in memory.
+  * Warning! Prefer to use chunks of columns instead of single values. See Column.h
+  */
+class Field {
+public:
+    struct Types {
+        /// Type tag.
+        enum Which {
+            Null = 0,
+            UInt64 = 1,
+            Int64 = 2,
+            Float64 = 3,
+            UInt128 = 4,
+            Int128 = 5,
+
+            /// Non-POD types.
+
+            String = 16,
+            Array = 17,
+            Tuple = 18,
+            Decimal32 = 19,
+            Decimal64 = 20,
+            Decimal128 = 21,
+            AggregateFunctionState = 22,
+        };
+
+        static const int MIN_NON_POD = 16;
+
+        static const char* to_string(Which which) {
+            switch (which) {
+            case Null:
+                return "Null";
+            case UInt64:
+                return "UInt64";
+            case UInt128:
+                return "UInt128";
+            case Int64:
+                return "Int64";
+            case Int128:
+                return "Int128";
+            case Float64:
+                return "Float64";
+            case String:
+                return "String";
+            case Array:
+                return "Array";
+            case Tuple:
+                return "Tuple";
+            case Decimal32:
+                return "Decimal32";
+            case Decimal64:
+                return "Decimal64";
+            case Decimal128:
+                return "Decimal128";
+            case AggregateFunctionState:
+                return "AggregateFunctionState";
+            }
+
+            LOG(FATAL) << "Bad type of Field";
+            return nullptr;
+        }
+    };
+
+    /// Returns an identifier for the type or vice versa.
+    template <typename T>
+    struct TypeToEnum;
+    template <Types::Which which>
+    struct EnumToType;
+
+    static bool is_decimal(Types::Which which) {
+        return which >= Types::Decimal32 && which <= Types::Decimal128;
+    }
+
+    Field() : which(Types::Null) {}
+
+    /** Despite the presence of a template constructor, this constructor is still needed,
+      *  since, in its absence, the compiler will still generate the default constructor.
+      */
+    Field(const Field& rhs) { create(rhs); }
+
+    Field(Field&& rhs) { create(std::move(rhs)); }
+
+    template <typename T>
+    Field(T&& rhs, std::enable_if_t<!std::is_same_v<std::decay_t<T>, Field>, void*> = nullptr);
+
+    /// Create a string inplace.
+    Field(const char* data, size_t size) { create(data, size); }
+
+    Field(const unsigned char* data, size_t size) { create(data, size); }
+
+    /// NOTE In case when field already has string type, more direct assign is possible.
+    void assign_string(const char* data, size_t size) {
+        destroy();
+        create(data, size);
+    }
+
+    void assign_string(const unsigned char* data, size_t size) {
+        destroy();
+        create(data, size);
+    }
+
+    Field& operator=(const Field& rhs) {
+        if (this != &rhs) {
+            if (which != rhs.which) {
+                destroy();
+                create(rhs);
+            } else
+                assign(rhs); /// This assigns string or vector without deallocation of existing buffer.
+        }
+        return *this;
+    }
+
+    Field& operator=(Field&& rhs) {
+        if (this != &rhs) {
+            if (which != rhs.which) {
+                destroy();
+                create(std::move(rhs));
+            } else
+                assign(std::move(rhs));
+        }
+        return *this;
+    }
+
+    template <typename T>
+    std::enable_if_t<!std::is_same_v<std::decay_t<T>, Field>, Field&> operator=(T&& rhs);
+
+    ~Field() { destroy(); }
+
+    Types::Which get_type() const { return which; }
+    const char* get_type_name() const { return Types::to_string(which); }
+
+    bool is_null() const { return which == Types::Null; }
+
+    template <typename T>
+    T& get() {
+        using TWithoutRef = std::remove_reference_t<T>;
+        TWithoutRef* MAY_ALIAS ptr = reinterpret_cast<TWithoutRef*>(&storage);
+        return *ptr;
+    }
+
+    template <typename T>
+    const T& get() const {
+        using TWithoutRef = std::remove_reference_t<T>;
+        const TWithoutRef* MAY_ALIAS ptr = reinterpret_cast<const TWithoutRef*>(&storage);
+        return *ptr;
+    }
+
+    template <typename T>
+    bool try_get(T& result) {
+        const Types::Which requested = TypeToEnum<std::decay_t<T>>::value;
+        if (which != requested) return false;
+        result = get<T>();
+        return true;
+    }
+
+    template <typename T>
+    bool try_get(T& result) const {
+        const Types::Which requested = TypeToEnum<std::decay_t<T>>::value;
+        if (which != requested) return false;
+        result = get<T>();
+        return true;
+    }
+
+    template <typename T>
+    T& safe_get() {
+        const Types::Which requested = TypeToEnum<std::decay_t<T>>::value;
+        CHECK_EQ(which, requested) << fmt::format("Bad get: has {}, requested {}", get_type_name(),
+                                                  Types::to_string(requested));
+        return get<T>();
+    }
+
+    template <typename T>
+    const T& safe_get() const {
+        const Types::Which requested = TypeToEnum<std::decay_t<T>>::value;
+        CHECK_EQ(which, requested) << fmt::format("Bad get: has {}, requested {}", get_type_name(),
+                                                  Types::to_string(requested));
+        return get<T>();
+    }
+
+    bool operator<(const Field& rhs) const {
+        if (which < rhs.which) return true;
+        if (which > rhs.which) return false;
+
+        switch (which) {
+        case Types::Null:
+            return false;
+        case Types::UInt64:
+            return get<UInt64>() < rhs.get<UInt64>();
+        case Types::UInt128:
+            return get<UInt128>() < rhs.get<UInt128>();
+        case Types::Int64:
+            return get<Int64>() < rhs.get<Int64>();
+        case Types::Int128:
+            return get<Int128>() < rhs.get<Int128>();
+        case Types::Float64:
+            return get<Float64>() < rhs.get<Float64>();
+        case Types::String:
+            return get<String>() < rhs.get<String>();
+        case Types::Array:
+            return get<Array>() < rhs.get<Array>();
+        case Types::Tuple:
+            return get<Tuple>() < rhs.get<Tuple>();
+        case Types::Decimal32:
+            return get<DecimalField<Decimal32>>() < rhs.get<DecimalField<Decimal32>>();
+        case Types::Decimal64:
+            return get<DecimalField<Decimal64>>() < rhs.get<DecimalField<Decimal64>>();
+        case Types::Decimal128:
+            return get<DecimalField<Decimal128>>() < rhs.get<DecimalField<Decimal128>>();
+        case Types::AggregateFunctionState:
+            return get<AggregateFunctionStateData>() < rhs.get<AggregateFunctionStateData>();
+        }
+
+        LOG(FATAL) << "Bad type of Field";
+        return {};
+    }
+
+    bool operator>(const Field& rhs) const { return rhs < *this; }
+
+    bool operator<=(const Field& rhs) const {
+        if (which < rhs.which) return true;
+        if (which > rhs.which) return false;
+
+        switch (which) {
+        case Types::Null:
+            return true;
+        case Types::UInt64:
+            return get<UInt64>() <= rhs.get<UInt64>();
+        case Types::UInt128:
+            return get<UInt128>() <= rhs.get<UInt128>();
+        case Types::Int64:
+            return get<Int64>() <= rhs.get<Int64>();
+        case Types::Int128:
+            return get<Int128>() <= rhs.get<Int128>();
+        case Types::Float64:
+            return get<Float64>() <= rhs.get<Float64>();
+        case Types::String:
+            return get<String>() <= rhs.get<String>();
+        case Types::Array:
+            return get<Array>() <= rhs.get<Array>();
+        case Types::Tuple:
+            return get<Tuple>() <= rhs.get<Tuple>();
+        case Types::Decimal32:
+            return get<DecimalField<Decimal32>>() <= rhs.get<DecimalField<Decimal32>>();
+        case Types::Decimal64:
+            return get<DecimalField<Decimal64>>() <= rhs.get<DecimalField<Decimal64>>();
+        case Types::Decimal128:
+            return get<DecimalField<Decimal128>>() <= rhs.get<DecimalField<Decimal128>>();
+        case Types::AggregateFunctionState:
+            return get<AggregateFunctionStateData>() <= rhs.get<AggregateFunctionStateData>();
+        }
+        LOG(FATAL) << "Bad type of Field";
+        return {};
+    }
+
+    bool operator>=(const Field& rhs) const { return rhs <= *this; }
+
+    bool operator==(const Field& rhs) const {
+        if (which != rhs.which) return false;
+
+        switch (which) {
+        case Types::Null:
+            return true;
+        case Types::UInt64:
+        case Types::Int64:
+        case Types::Float64:
+            return get<UInt64>() == rhs.get<UInt64>();
+        case Types::String:
+            return get<String>() == rhs.get<String>();
+        case Types::Array:
+            return get<Array>() == rhs.get<Array>();
+        case Types::Tuple:
+            return get<Tuple>() == rhs.get<Tuple>();
+        case Types::UInt128:
+            return get<UInt128>() == rhs.get<UInt128>();
+        case Types::Int128:
+            return get<Int128>() == rhs.get<Int128>();
+        case Types::Decimal32:
+            return get<DecimalField<Decimal32>>() == rhs.get<DecimalField<Decimal32>>();
+        case Types::Decimal64:
+            return get<DecimalField<Decimal64>>() == rhs.get<DecimalField<Decimal64>>();
+        case Types::Decimal128:
+            return get<DecimalField<Decimal128>>() == rhs.get<DecimalField<Decimal128>>();
+        case Types::AggregateFunctionState:
+            return get<AggregateFunctionStateData>() == rhs.get<AggregateFunctionStateData>();
+        }
+
+        CHECK(false) << "Bad type of Field";
+    }
+
+    bool operator!=(const Field& rhs) const { return !(*this == rhs); }
+
+private:
+    std::aligned_union_t<DBMS_MIN_FIELD_SIZE - sizeof(Types::Which), Null, UInt64, UInt128, Int64,
+                         Int128, Float64, String, Array, Tuple, DecimalField<Decimal32>,
+                         DecimalField<Decimal64>, DecimalField<Decimal128>,
+                         AggregateFunctionStateData>
+            storage;
+
+    Types::Which which;
+
+    /// Assuming there was no allocated state or it was deallocated (see destroy).
+    template <typename T>
+    void create_concrete(T&& x) {
+        using UnqualifiedType = std::decay_t<T>;
+
+        // In both Field and PODArray, small types may be stored as wider types,
+        // e.g. char is stored as UInt64. Field can return this extended value
+        // with get<StorageType>(). To avoid uninitialized results from get(),
+        // we must initialize the entire wide stored type, and not just the
+        // nominal type.
+        using StorageType = NearestFieldType<UnqualifiedType>;
+        new (&storage) StorageType(std::forward<T>(x));
+        which = TypeToEnum<UnqualifiedType>::value;
+    }
+
+    /// Assuming same types.
+    template <typename T>
+    void assign_concrete(T&& x) {
+        using JustT = std::decay_t<T>;
+        assert(which == TypeToEnum<JustT>::value);
+        JustT* MAY_ALIAS ptr = reinterpret_cast<JustT*>(&storage);
+        *ptr = std::forward<T>(x);
+    }
+
+    template <typename F,
+              typename Field> /// Field template parameter may be const or non-const Field.
+    static void dispatch(F&& f, Field& field) {
+        switch (field.which) {
+        case Types::Null:
+            f(field.template get<Null>());
+            return;
+
+// gcc 7.3.0
+#if !__clang__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+        case Types::UInt64:
+            f(field.template get<UInt64>());
+            return;
+        case Types::UInt128:
+            f(field.template get<UInt128>());
+            return;
+        case Types::Int64:
+            f(field.template get<Int64>());
+            return;
+        case Types::Int128:
+            f(field.template get<Int128>());
+            return;
+        case Types::Float64:
+            f(field.template get<Float64>());
+            return;
+#if !__clang__
+#pragma GCC diagnostic pop
+#endif
+        case Types::String:
+            f(field.template get<String>());
+            return;
+        case Types::Array:
+            f(field.template get<Array>());
+            return;
+        case Types::Tuple:
+            f(field.template get<Tuple>());
+            return;
+        case Types::Decimal32:
+            f(field.template get<DecimalField<Decimal32>>());
+            return;
+        case Types::Decimal64:
+            f(field.template get<DecimalField<Decimal64>>());
+            return;
+        case Types::Decimal128:
+            f(field.template get<DecimalField<Decimal128>>());
+            return;
+        case Types::AggregateFunctionState:
+            f(field.template get<AggregateFunctionStateData>());
+            return;
+        }
+    }
+
+    void create(const Field& x) {
+        dispatch([this](auto& value) { create_concrete(value); }, x);
+    }
+
+    void create(Field&& x) {
+        dispatch([this](auto& value) { create_concrete(std::move(value)); }, x);
+    }
+
+    void assign(const Field& x) {
+        dispatch([this](auto& value) { assign_concrete(value); }, x);
+    }
+
+    void assign(Field&& x) {
+        dispatch([this](auto& value) { assign_concrete(std::move(value)); }, x);
+    }
+
+    void create(const char* data, size_t size) {
+        new (&storage) String(data, size);
+        which = Types::String;
+    }
+
+    void create(const unsigned char* data, size_t size) {
+        create(reinterpret_cast<const char*>(data), size);
+    }
+
+    ALWAYS_INLINE void destroy() {
+        if (which < Types::MIN_NON_POD) return;
+
+        switch (which) {
+        case Types::String:
+            destroy<String>();
+            break;
+        case Types::Array:
+            destroy<Array>();
+            break;
+        case Types::Tuple:
+            destroy<Tuple>();
+            break;
+        case Types::AggregateFunctionState:
+            destroy<AggregateFunctionStateData>();
+            break;
+        default:
+            break;
+        }
+
+        which = Types::
+                Null; /// for exception safety in subsequent calls to destroy and create, when create fails.
+    }
+
+    template <typename T>
+    void destroy() {
+        T* MAY_ALIAS ptr = reinterpret_cast<T*>(&storage);
+        ptr->~T();
+    }
+};
+
+#undef DBMS_MIN_FIELD_SIZE
+
+template <>
+struct Field::TypeToEnum<Null> {
+    static const Types::Which value = Types::Null;
+};
+template <>
+struct Field::TypeToEnum<UInt64> {
+    static const Types::Which value = Types::UInt64;
+};
+template <>
+struct Field::TypeToEnum<UInt128> {
+    static const Types::Which value = Types::UInt128;
+};
+template <>
+struct Field::TypeToEnum<Int64> {
+    static const Types::Which value = Types::Int64;
+};
+template <>
+struct Field::TypeToEnum<Int128> {
+    static const Types::Which value = Types::Int128;
+};
+template <>
+struct Field::TypeToEnum<Float64> {
+    static const Types::Which value = Types::Float64;
+};
+template <>
+struct Field::TypeToEnum<String> {
+    static const Types::Which value = Types::String;
+};
+template <>
+struct Field::TypeToEnum<Array> {
+    static const Types::Which value = Types::Array;
+};
+template <>
+struct Field::TypeToEnum<Tuple> {
+    static const Types::Which value = Types::Tuple;
+};
+template <>
+struct Field::TypeToEnum<DecimalField<Decimal32>> {
+    static const Types::Which value = Types::Decimal32;
+};
+template <>
+struct Field::TypeToEnum<DecimalField<Decimal64>> {
+    static const Types::Which value = Types::Decimal64;
+};
+template <>
+struct Field::TypeToEnum<DecimalField<Decimal128>> {
+    static const Types::Which value = Types::Decimal128;
+};
+template <>
+struct Field::TypeToEnum<AggregateFunctionStateData> {
+    static const Types::Which value = Types::AggregateFunctionState;
+};
+
+template <>
+struct Field::EnumToType<Field::Types::Null> {
+    using Type = Null;
+};
+template <>
+struct Field::EnumToType<Field::Types::UInt64> {
+    using Type = UInt64;
+};
+template <>
+struct Field::EnumToType<Field::Types::UInt128> {
+    using Type = UInt128;
+};
+template <>
+struct Field::EnumToType<Field::Types::Int64> {
+    using Type = Int64;
+};
+template <>
+struct Field::EnumToType<Field::Types::Int128> {
+    using Type = Int128;
+};
+template <>
+struct Field::EnumToType<Field::Types::Float64> {
+    using Type = Float64;
+};
+template <>
+struct Field::EnumToType<Field::Types::String> {
+    using Type = String;
+};
+template <>
+struct Field::EnumToType<Field::Types::Array> {
+    using Type = Array;
+};
+template <>
+struct Field::EnumToType<Field::Types::Tuple> {
+    using Type = Tuple;
+};
+template <>
+struct Field::EnumToType<Field::Types::Decimal32> {
+    using Type = DecimalField<Decimal32>;
+};
+template <>
+struct Field::EnumToType<Field::Types::Decimal64> {
+    using Type = DecimalField<Decimal64>;
+};
+template <>
+struct Field::EnumToType<Field::Types::Decimal128> {
+    using Type = DecimalField<Decimal128>;
+};
+template <>
+struct Field::EnumToType<Field::Types::AggregateFunctionState> {
+    using Type = DecimalField<AggregateFunctionStateData>;
+};
+
+template <typename T>
+T get(const Field& field) {
+    return field.template get<T>();
+}
+
+template <typename T>
+T get(Field& field) {
+    return field.template get<T>();
+}
+
+template <typename T>
+T safe_get(const Field& field) {
+    return field.template safe_get<T>();
+}
+
+template <typename T>
+T safe_get(Field& field) {
+    return field.template safe_get<T>();
+}
+
+template <>
+struct TypeName<Array> {
+    static std::string get() { return "Array"; }
+};
+template <>
+struct TypeName<Tuple> {
+    static std::string get() { return "Tuple"; }
+};
+template <>
+struct TypeName<AggregateFunctionStateData> {
+    static std::string get() { return "AggregateFunctionState"; }
+};
+
+/// char may be signed or unsigned, and behave identically to signed char or unsigned char,
+///  but they are always three different types.
+/// signedness of char is different in Linux on x86 and Linux on ARM.
+template <>
+struct NearestFieldTypeImpl<char> {
+    using Type = std::conditional_t<std::is_signed_v<char>, Int64, UInt64>;
+};
+template <>
+struct NearestFieldTypeImpl<signed char> {
+    using Type = Int64;
+};
+template <>
+struct NearestFieldTypeImpl<unsigned char> {
+    using Type = UInt64;
+};
+
+template <>
+struct NearestFieldTypeImpl<UInt16> {
+    using Type = UInt64;
+};
+template <>
+struct NearestFieldTypeImpl<UInt32> {
+    using Type = UInt64;
+};
+
+template <>
+struct NearestFieldTypeImpl<UInt128> {
+    using Type = UInt128;
+};
+//template <> struct NearestFieldTypeImpl<UUID> { using Type = UInt128; };
+template <>
+struct NearestFieldTypeImpl<Int16> {
+    using Type = Int64;
+};
+template <>
+struct NearestFieldTypeImpl<Int32> {
+    using Type = Int64;
+};
+
+/// long and long long are always different types that may behave identically or not.
+/// This is different on Linux and Mac.
+template <>
+struct NearestFieldTypeImpl<long> {
+    using Type = Int64;
+};
+template <>
+struct NearestFieldTypeImpl<long long> {
+    using Type = Int64;
+};
+template <>
+struct NearestFieldTypeImpl<unsigned long> {
+    using Type = UInt64;
+};
+template <>
+struct NearestFieldTypeImpl<unsigned long long> {
+    using Type = UInt64;
+};
+
+template <>
+struct NearestFieldTypeImpl<Int128> {
+    using Type = Int128;
+};
+template <>
+struct NearestFieldTypeImpl<Decimal32> {
+    using Type = DecimalField<Decimal32>;
+};
+template <>
+struct NearestFieldTypeImpl<Decimal64> {
+    using Type = DecimalField<Decimal64>;
+};
+template <>
+struct NearestFieldTypeImpl<Decimal128> {
+    using Type = DecimalField<Decimal128>;
+};
+template <>
+struct NearestFieldTypeImpl<DecimalField<Decimal32>> {
+    using Type = DecimalField<Decimal32>;
+};
+template <>
+struct NearestFieldTypeImpl<DecimalField<Decimal64>> {
+    using Type = DecimalField<Decimal64>;
+};
+template <>
+struct NearestFieldTypeImpl<DecimalField<Decimal128>> {
+    using Type = DecimalField<Decimal128>;
+};
+template <>
+struct NearestFieldTypeImpl<Float32> {
+    using Type = Float64;
+};
+template <>
+struct NearestFieldTypeImpl<Float64> {
+    using Type = Float64;
+};
+template <>
+struct NearestFieldTypeImpl<const char*> {
+    using Type = String;
+};
+template <>
+struct NearestFieldTypeImpl<String> {
+    using Type = String;
+};
+template <>
+struct NearestFieldTypeImpl<Array> {
+    using Type = Array;
+};
+template <>
+struct NearestFieldTypeImpl<Tuple> {
+    using Type = Tuple;
+};
+template <>
+struct NearestFieldTypeImpl<bool> {
+    using Type = UInt64;
+};
+template <>
+struct NearestFieldTypeImpl<Null> {
+    using Type = Null;
+};
+
+template <>
+struct NearestFieldTypeImpl<AggregateFunctionStateData> {
+    using Type = AggregateFunctionStateData;
+};
+
+template <typename T>
+decltype(auto) cast_to_nearest_field_type(T&& x) {
+    using U = NearestFieldType<std::decay_t<T>>;
+    if constexpr (std::is_same_v<std::decay_t<T>, U>)
+        return std::forward<T>(x);
+    else
+        return U(x);
+}
+
+/// This (rather tricky) code is to avoid ambiguity in expressions like
+/// Field f = 1;
+/// instead of
+/// Field f = Int64(1);
+/// Things to note:
+/// 1. float <--> int needs explicit cast
+/// 2. customized types needs explicit cast
+template <typename T>
+Field::Field(T&& rhs, std::enable_if_t<!std::is_same_v<std::decay_t<T>, Field>, void*>) {
+    auto&& val = cast_to_nearest_field_type(std::forward<T>(rhs));
+    create_concrete(std::forward<decltype(val)>(val));
+}
+
+template <typename T>
+std::enable_if_t<!std::is_same_v<std::decay_t<T>, Field>, Field&> Field::operator=(T&& rhs) {
+    auto&& val = cast_to_nearest_field_type(std::forward<T>(rhs));
+    using U = decltype(val);
+    if (which != TypeToEnum<std::decay_t<U>>::value) {
+        destroy();
+        create_concrete(std::forward<U>(val));
+    } else
+        assign_concrete(std::forward<U>(val));
+
+    return *this;
+}
+
+class ReadBuffer;
+class WriteBuffer;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/materialize_block.cpp b/be/src/vec/core/materialize_block.cpp
new file mode 100644
index 0000000000..18e6e46d8e
--- /dev/null
+++ b/be/src/vec/core/materialize_block.cpp
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataStreams/materializeBlock.cpp
+// and modified by Doris
+
+#include "vec/core/materialize_block.h"
+namespace doris::vectorized {
+
+Block materialize_block(const Block& block) {
+    if (!block) return block;
+
+    Block res = block;
+    size_t columns = res.columns();
+    for (size_t i = 0; i < columns; ++i) {
+        auto& element = res.get_by_position(i);
+        element.column = element.column->convert_to_full_column_if_const();
+    }
+
+    return res;
+}
+
+void materialize_block_inplace(Block& block) {
+    for (size_t i = 0; i < block.columns(); ++i) {
+        block.replace_by_position_if_const(i);
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/materialize_block.h b/be/src/vec/core/materialize_block.h
new file mode 100644
index 0000000000..d445d0eb43
--- /dev/null
+++ b/be/src/vec/core/materialize_block.h
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataStreams/materializeBlock.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/core/block.h"
+
+namespace doris::vectorized {
+
+/** Converts columns-constants to full columns ("materializes" them).
+  */
+Block materialize_block(const Block& block);
+void materialize_block_inplace(Block& block);
+
+template <typename Iterator>
+void materialize_block_inplace(Block& block, Iterator start, Iterator end) {
+    for (; start < end;) {
+        block.replace_by_position_if_const(*start);
+        ++start;
+    }
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/names.h b/be/src/vec/core/names.h
new file mode 100644
index 0000000000..33d8306845
--- /dev/null
+++ b/be/src/vec/core/names.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Names.h
+// and modified by Doris
+
+#pragma once
+
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+namespace doris::vectorized {
+
+using Names = std::vector<std::string>;
+using NameSet = std::unordered_set<std::string>;
+using NameOrderedSet = std::set<std::string>;
+using NameToNameMap = std::unordered_map<std::string, std::string>;
+using NameToNameSetMap = std::unordered_map<std::string, NameSet>;
+
+using NameWithAlias = std::pair<std::string, std::string>;
+using NamesWithAliases = std::vector<NameWithAlias>;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/sort_block.cpp b/be/src/vec/core/sort_block.cpp
new file mode 100644
index 0000000000..2192a4cfaa
--- /dev/null
+++ b/be/src/vec/core/sort_block.cpp
@@ -0,0 +1,202 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/sortBlock.cpp
+// and modified by Doris
+
+#include "vec/core/sort_block.h"
+
+#include <pdqsort.h>
+
+#include "vec/columns/column_string.h"
+#include "vec/common/typeid_cast.h"
+
+namespace doris::vectorized {
+
+static inline bool needCollation(const IColumn* column, const SortColumnDescription& description) {
+    if (!description.collator) return false;
+
+    if (!typeid_cast<const ColumnString*>(column)) { /// TODO Nullable(String)
+        LOG(FATAL) << "Collations could be specified only for String columns.";
+    }
+
+    return true;
+}
+
+ColumnsWithSortDescriptions get_columns_with_sort_description(const Block& block,
+                                                              const SortDescription& description) {
+    size_t size = description.size();
+    ColumnsWithSortDescriptions res;
+    res.reserve(size);
+
+    for (size_t i = 0; i < size; ++i) {
+        const IColumn* column =
+                !description[i].column_name.empty()
+                        ? block.get_by_name(description[i].column_name).column.get()
+                        : block.safe_get_by_position(description[i].column_number).column.get();
+
+        res.emplace_back(column, description[i]);
+    }
+
+    return res;
+}
+
+struct PartialSortingLess {
+    const ColumnsWithSortDescriptions& columns;
+
+    explicit PartialSortingLess(const ColumnsWithSortDescriptions& columns_) : columns(columns_) {}
+
+    bool operator()(size_t a, size_t b) const {
+        for (ColumnsWithSortDescriptions::const_iterator it = columns.begin(); it != columns.end();
+             ++it) {
+            int res = it->second.direction *
+                      it->first->compare_at(a, b, *it->first, it->second.nulls_direction);
+            if (res < 0)
+                return true;
+            else if (res > 0)
+                return false;
+        }
+        return false;
+    }
+};
+
+struct PartialSortingLessWithCollation {
+    const ColumnsWithSortDescriptions& columns;
+
+    explicit PartialSortingLessWithCollation(const ColumnsWithSortDescriptions& columns_)
+            : columns(columns_) {}
+
+    bool operator()(size_t a, size_t b) const {
+        for (ColumnsWithSortDescriptions::const_iterator it = columns.begin(); it != columns.end();
+             ++it) {
+            int res;
+            if (needCollation(it->first, it->second)) {
+                const ColumnString& column_string = typeid_cast<const ColumnString&>(*it->first);
+                res = column_string.compare_at_with_collation(a, b, *it->first,
+                                                              *it->second.collator);
+            } else
+                res = it->first->compare_at(a, b, *it->first, it->second.nulls_direction);
+
+            res *= it->second.direction;
+            if (res < 0)
+                return true;
+            else if (res > 0)
+                return false;
+        }
+        return false;
+    }
+};
+
+void sort_block(Block& block, const SortDescription& description, UInt64 limit) {
+    if (!block) return;
+
+    /// If only one column to sort by
+    if (description.size() == 1) {
+        bool reverse = description[0].direction == -1;
+
+        const IColumn* column =
+                !description[0].column_name.empty()
+                        ? block.get_by_name(description[0].column_name).column.get()
+                        : block.safe_get_by_position(description[0].column_number).column.get();
+
+        IColumn::Permutation perm;
+        column->get_permutation(reverse, limit, description[0].nulls_direction, perm);
+
+        size_t columns = block.columns();
+        for (size_t i = 0; i < columns; ++i)
+            block.get_by_position(i).column = block.get_by_position(i).column->permute(perm, limit);
+    } else {
+        size_t size = block.rows();
+        IColumn::Permutation perm(size);
+        for (size_t i = 0; i < size; ++i) perm[i] = i;
+
+        if (limit >= size) limit = 0;
+
+        ColumnsWithSortDescriptions columns_with_sort_desc =
+                get_columns_with_sort_description(block, description);
+        {
+            PartialSortingLess less(columns_with_sort_desc);
+
+            if (limit)
+                std::partial_sort(perm.begin(), perm.begin() + limit, perm.end(), less);
+            else
+                pdqsort(perm.begin(), perm.end(), less);
+        }
+
+        size_t columns = block.columns();
+        for (size_t i = 0; i < columns; ++i)
+            block.get_by_position(i).column = block.get_by_position(i).column->permute(perm, limit);
+    }
+}
+
+void stable_get_permutation(const Block& block, const SortDescription& description,
+                            IColumn::Permutation& out_permutation) {
+    if (!block) return;
+
+    size_t size = block.rows();
+    out_permutation.resize(size);
+    for (size_t i = 0; i < size; ++i) out_permutation[i] = i;
+
+    ColumnsWithSortDescriptions columns_with_sort_desc =
+            get_columns_with_sort_description(block, description);
+
+    std::stable_sort(out_permutation.begin(), out_permutation.end(),
+                     PartialSortingLess(columns_with_sort_desc));
+}
+
+bool is_already_sorted(const Block& block, const SortDescription& description) {
+    if (!block) return true;
+
+    size_t rows = block.rows();
+
+    ColumnsWithSortDescriptions columns_with_sort_desc =
+            get_columns_with_sort_description(block, description);
+
+    PartialSortingLess less(columns_with_sort_desc);
+
+    /** If the rows are not too few, then let's make a quick attempt to verify that the block is not sorted.
+     * Constants - at random.
+     */
+    static constexpr size_t num_rows_to_try = 10;
+    if (rows > num_rows_to_try * 5) {
+        for (size_t i = 1; i < num_rows_to_try; ++i) {
+            size_t prev_position = rows * (i - 1) / num_rows_to_try;
+            size_t curr_position = rows * i / num_rows_to_try;
+
+            if (less(curr_position, prev_position)) return false;
+        }
+    }
+
+    for (size_t i = 1; i < rows; ++i)
+        if (less(i, i - 1)) return false;
+
+    return true;
+}
+
+void stable_sort_block(Block& block, const SortDescription& description) {
+    if (!block) return;
+
+    IColumn::Permutation perm;
+    stable_get_permutation(block, description, perm);
+
+    size_t columns = block.columns();
+    for (size_t i = 0; i < columns; ++i)
+        block.safe_get_by_position(i).column =
+                block.safe_get_by_position(i).column->permute(perm, 0);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/sort_block.h b/be/src/vec/core/sort_block.h
new file mode 100644
index 0000000000..0db281b6f2
--- /dev/null
+++ b/be/src/vec/core/sort_block.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/sortBlock.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/core/block.h"
+#include "vec/core/sort_description.h"
+
+namespace doris::vectorized
+{
+
+/// Sort one block by `description`. If limit != 0, then the partial sort of the first `limit` rows is produced.
+void sort_block(Block & block, const SortDescription & description, UInt64 limit = 0);
+
+
+/** Used only in StorageMergeTree to sort the data with INSERT.
+  * Sorting is stable. This is important for keeping the order of rows in the CollapsingMergeTree engine
+  *  - because based on the order of rows it is determined whether to delete or leave groups of rows when collapsing.
+  * Collations are not supported. Partial sorting is not supported.
+  */
+void stable_sort_block(Block & block, const SortDescription & description);
+
+/** Same as stable_sort_block, but do not sort the block, but only calculate the permutation of the values,
+  *  so that you can rearrange the column values yourself.
+  */
+void stable_get_permutation(const Block & block, const SortDescription & description, IColumn::Permutation & out_permutation);
+
+
+/** Quickly check whether the block is already sorted. If the block is not sorted - returns false as fast as possible.
+  * Collations are not supported.
+  */
+bool is_already_sorted(const Block & block, const SortDescription & description);
+
+using ColumnsWithSortDescriptions = std::vector<std::pair<const IColumn *, SortColumnDescription>>;
+
+ColumnsWithSortDescriptions get_columns_with_sort_description(const Block & block, const SortDescription & description);
+
+}
diff --git a/be/src/vec/core/sort_cursor.h b/be/src/vec/core/sort_cursor.h
new file mode 100644
index 0000000000..d9712b4a67
--- /dev/null
+++ b/be/src/vec/core/sort_cursor.h
@@ -0,0 +1,228 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/SortCursor.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_string.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/core/block.h"
+#include "vec/core/column_numbers.h"
+#include "vec/core/sort_description.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/runtime/vdata_stream_recvr.h"
+
+namespace doris::vectorized {
+
+/** Cursor allows to compare rows in different blocks (and parts).
+  * Cursor moves inside single block.
+  * It is used in priority queue.
+  */
+struct SortCursorImpl {
+    ColumnRawPtrs all_columns;
+    ColumnRawPtrs sort_columns;
+    SortDescription desc;
+    size_t sort_columns_size = 0;
+    size_t pos = 0;
+    size_t rows = 0;
+
+    using NeedCollationFlags = std::vector<UInt8>;
+
+    /** Should we use Collator to sort a column? */
+    NeedCollationFlags need_collation;
+
+    /** Is there at least one column with Collator. */
+    bool has_collation = false;
+
+    SortCursorImpl() = default;
+    virtual ~SortCursorImpl() = default;
+
+    SortCursorImpl(const Block& block, const SortDescription& desc_)
+            : desc(desc_), sort_columns_size(desc.size()), need_collation(desc.size()) {
+        reset(block);
+    }
+
+    SortCursorImpl(const Columns& columns, const SortDescription& desc_)
+            : desc(desc_), sort_columns_size(desc.size()), need_collation(desc.size()) {
+        for (auto& column_desc : desc) {
+            if (!column_desc.column_name.empty()) {
+                LOG(FATAL) << "SortDesctiption should contain column position if SortCursor was "
+                              "used without header.";
+            }
+        }
+        reset(columns, {});
+    }
+
+    bool empty() const { return rows == 0; }
+
+    /// Set the cursor to the beginning of the new block.
+    void reset(const Block& block) { reset(block.get_columns(), block); }
+
+    /// Set the cursor to the beginning of the new block.
+    void reset(const Columns& columns, const Block& block) {
+        all_columns.clear();
+        sort_columns.clear();
+
+        size_t num_columns = columns.size();
+
+        for (size_t j = 0; j < num_columns; ++j) all_columns.push_back(columns[j].get());
+
+        for (size_t j = 0, size = desc.size(); j < size; ++j) {
+            auto& column_desc = desc[j];
+            size_t column_number = !column_desc.column_name.empty()
+                                           ? block.get_position_by_name(column_desc.column_name)
+                                           : column_desc.column_number;
+            sort_columns.push_back(columns[column_number].get());
+        }
+
+        pos = 0;
+        rows = all_columns[0]->size();
+    }
+
+    bool isFirst() const { return pos == 0; }
+    bool isLast() { return pos + 1 >= rows; }
+    void next() { ++pos; }
+
+    virtual bool has_next_block() { return false; }
+    virtual Block* block_ptr() { return nullptr; }
+};
+
+using BlockSupplier = std::function<Status(Block**)>;
+
+struct ReceiveQueueSortCursorImpl : public SortCursorImpl {
+    ReceiveQueueSortCursorImpl(const BlockSupplier& block_supplier,
+                               const std::vector<VExprContext*>& ordering_expr,
+                               const std::vector<bool>& is_asc_order,
+                               const std::vector<bool>& nulls_first)
+            : SortCursorImpl(), _ordering_expr(ordering_expr), _block_supplier(block_supplier) {
+        sort_columns_size = ordering_expr.size();
+
+        desc.resize(ordering_expr.size());
+        for (int i = 0; i < desc.size(); i++) {
+            desc[i].direction = is_asc_order[i] ? 1 : -1;
+            desc[i].nulls_direction = nulls_first[i] ? -1 : 1;
+        }
+        _is_eof = !has_next_block();
+    }
+
+    bool has_next_block() override {
+        auto status = _block_supplier(&_block_ptr);
+        if (status.ok() && _block_ptr != nullptr) {
+            for (int i = 0; i < desc.size(); ++i) {
+                _ordering_expr[i]->execute(_block_ptr, &desc[i].column_number);
+            }
+            SortCursorImpl::reset(*_block_ptr);
+            return true;
+        }
+        _block_ptr = nullptr;
+        return false;
+    }
+
+    Block* block_ptr() override { return _block_ptr; }
+
+    size_t columns_num() const { return all_columns.size(); }
+
+    Block create_empty_blocks() const {
+        size_t num_columns = columns_num();
+        MutableColumns columns(num_columns);
+        for (size_t i = 0; i < num_columns; ++i) columns[i] = all_columns[i]->clone_empty();
+        return _block_ptr->clone_with_columns(std::move(columns));
+    }
+
+    const std::vector<VExprContext*>& _ordering_expr;
+    Block* _block_ptr = nullptr;
+    BlockSupplier _block_supplier{};
+    bool _is_eof = false;
+};
+
+/// For easy copying.
+struct SortCursor {
+    SortCursorImpl* impl;
+
+    SortCursor(SortCursorImpl* impl_) : impl(impl_) {}
+    SortCursorImpl* operator->() { return impl; }
+    const SortCursorImpl* operator->() const { return impl; }
+
+    /// The specified row of this cursor is greater than the specified row of another cursor.
+    int8_t greater_at(const SortCursor& rhs, size_t lhs_pos, size_t rhs_pos) const {
+        for (size_t i = 0; i < impl->sort_columns_size; ++i) {
+            int direction = impl->desc[i].direction;
+            int nulls_direction = impl->desc[i].nulls_direction;
+            int res = direction * impl->sort_columns[i]->compare_at(lhs_pos, rhs_pos,
+                                                                    *(rhs.impl->sort_columns[i]),
+                                                                    nulls_direction);
+            if (res > 0) return 1;
+            if (res < 0) return -1;
+        }
+        return 0;
+    }
+
+    /// Checks that all rows in the current block of this cursor are less than or equal to all the rows of the current block of another cursor.
+    bool totally_less(const SortCursor& rhs) const {
+        if (impl->rows == 0 || rhs.impl->rows == 0) return false;
+
+        /// The last row of this cursor is no larger than the first row of the another cursor.
+        return greater_at(rhs, impl->rows - 1, 0) == -1;
+    }
+
+    bool greater(const SortCursor& rhs) const {
+        return !impl->empty() && greater_at(rhs, impl->pos, rhs.impl->pos) > 0;
+    }
+
+    /// Inverted so that the priority queue elements are removed in ascending order.
+    bool operator<(const SortCursor& rhs) const { return greater(rhs); }
+};
+
+/// For easy copying.
+struct SortBlockCursor {
+    SortCursorImpl* impl;
+
+    SortBlockCursor(SortCursorImpl* impl_) : impl(impl_) {}
+    SortCursorImpl* operator->() { return impl; }
+    const SortCursorImpl* operator->() const { return impl; }
+
+    /// The specified row of this cursor is greater than the specified row of another cursor.
+    int8_t less_at(const SortBlockCursor& rhs, int rows) const {
+        for (size_t i = 0; i < impl->sort_columns_size; ++i) {
+            int direction = impl->desc[i].direction;
+            int nulls_direction = impl->desc[i].nulls_direction;
+            int res = direction * impl->sort_columns[i]->compare_at(rows, rhs->rows - 1,
+                                                                    *(rhs.impl->sort_columns[i]),
+                                                                    nulls_direction);
+            if (res < 0) return 1;
+            if (res > 0) return -1;
+        }
+        return 0;
+    }
+
+    /// Checks that all rows in the current block of this cursor are less than or equal to all the rows of the current block of another cursor.
+    bool totally_greater(const SortBlockCursor& rhs) const {
+        if (impl->rows == 0 || rhs.impl->rows == 0) return false;
+
+        /// The last row of this cursor is no larger than the first row of the another cursor.
+        return less_at(rhs, 0) == -1;
+    }
+
+    /// Inverted so that the priority queue elements are removed in ascending order.
+    bool operator<(const SortBlockCursor& rhs) const { return less_at(rhs, impl->rows - 1) >= 0; }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/core/sort_description.h b/be/src/vec/core/sort_description.h
new file mode 100644
index 0000000000..3a4fbca140
--- /dev/null
+++ b/be/src/vec/core/sort_description.h
@@ -0,0 +1,80 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/SortDescription.h
+// and modified by Doris
+
+#pragma once
+
+#include "vector"
+#include "memory"
+#include "cstddef"
+#include "string"
+#include "vec/core/field.h"
+
+class Collator;
+
+namespace doris::vectorized
+{
+
+struct FillColumnDescription
+{
+    /// All missed values in range [FROM, TO) will be filled
+    /// Range [FROM, TO) respects sorting direction
+    Field fill_from;        /// Fill value >= FILL_FROM
+    Field fill_to;          /// Fill value + STEP < FILL_TO
+    Field fill_step;        /// Default = 1 or -1 according to direction
+};
+
+/// Description of the sorting rule by one column.
+struct SortColumnDescription
+{
+    std::string column_name; /// The name of the column.
+    int column_number;    /// Column number (used if no name is given).
+    int direction;           /// 1 - ascending, -1 - descending.
+    int nulls_direction;     /// 1 - NULLs and NaNs are greater, -1 - less.
+                             /// To achieve NULLS LAST, set it equal to direction, to achieve NULLS FIRST, set it opposite.
+    std::shared_ptr<Collator> collator = nullptr; /// Collator for locale-specific comparison of strings
+    bool with_fill = false;
+    FillColumnDescription fill_description = {};
+
+
+    SortColumnDescription(
+            int column_number_, int direction_, int nulls_direction_,
+            const std::shared_ptr<Collator> & collator_ = nullptr, bool with_fill_ = false,
+            const FillColumnDescription & fill_description_ = {})
+            : column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_)
+            , with_fill(with_fill_), fill_description(fill_description_) {}
+
+    SortColumnDescription() {}
+
+    bool operator == (const SortColumnDescription & other) const
+    {
+        return column_name == other.column_name && column_number == other.column_number
+            && direction == other.direction && nulls_direction == other.nulls_direction;
+    }
+
+    bool operator != (const SortColumnDescription & other) const
+    {
+        return !(*this == other);
+    }
+};
+
+/// Description of the sorting rule for several columns.
+using SortDescription = std::vector<SortColumnDescription>;
+
+}
diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h
new file mode 100644
index 0000000000..dddeff86c0
--- /dev/null
+++ b/be/src/vec/core/types.h
@@ -0,0 +1,445 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Core/Types.h
+// and modified by Doris
+
+#pragma once
+
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include "util/binary_cast.hpp"
+#include "util/bitmap_value.h"
+
+namespace doris::vectorized {
+
+/// Data types for representing elementary values from a database in RAM.
+
+struct Null {};
+
+enum class TypeIndex {
+    Nothing = 0,
+    UInt8,
+    UInt16,
+    UInt32,
+    UInt64,
+    UInt128,
+    Int8,
+    Int16,
+    Int32,
+    Int64,
+    Int128,
+    Float32,
+    Float64,
+    Date,
+    DateTime,
+    String,
+    FixedString,
+    Enum8,
+    Enum16,
+    Decimal32,
+    Decimal64,
+    Decimal128,
+    UUID,
+    Array,
+    Tuple,
+    Set,
+    Interval,
+    Nullable,
+    Function,
+    AggregateFunction,
+    LowCardinality,
+    BitMap,
+};
+
+struct Consted {
+    TypeIndex tp;
+};
+
+using UInt8 = uint8_t;
+using UInt16 = uint16_t;
+using UInt32 = uint32_t;
+using UInt64 = uint64_t;
+
+using Int8 = int8_t;
+using Int16 = int16_t;
+using Int32 = int32_t;
+using Int64 = int64_t;
+
+using Float32 = float;
+using Float64 = double;
+
+using String = std::string;
+
+/** Note that for types not used in DB, IsNumber is false.
+  */
+template <typename T>
+constexpr bool IsNumber = false;
+
+template <>
+inline constexpr bool IsNumber<UInt8> = true;
+template <>
+inline constexpr bool IsNumber<UInt16> = true;
+template <>
+inline constexpr bool IsNumber<UInt32> = true;
+template <>
+inline constexpr bool IsNumber<UInt64> = true;
+template <>
+inline constexpr bool IsNumber<Int8> = true;
+template <>
+inline constexpr bool IsNumber<Int16> = true;
+template <>
+inline constexpr bool IsNumber<Int32> = true;
+template <>
+inline constexpr bool IsNumber<Int64> = true;
+template <>
+inline constexpr bool IsNumber<Float32> = true;
+template <>
+inline constexpr bool IsNumber<Float64> = true;
+
+template <typename T>
+struct TypeName;
+
+template <>
+struct TypeName<UInt8> {
+    static const char* get() { return "UInt8"; }
+};
+template <>
+struct TypeName<UInt16> {
+    static const char* get() { return "UInt16"; }
+};
+template <>
+struct TypeName<UInt32> {
+    static const char* get() { return "UInt32"; }
+};
+template <>
+struct TypeName<UInt64> {
+    static const char* get() { return "UInt64"; }
+};
+template <>
+struct TypeName<Int8> {
+    static const char* get() { return "Int8"; }
+};
+template <>
+struct TypeName<Int16> {
+    static const char* get() { return "Int16"; }
+};
+template <>
+struct TypeName<Int32> {
+    static const char* get() { return "Int32"; }
+};
+template <>
+struct TypeName<Int64> {
+    static const char* get() { return "Int64"; }
+};
+template <>
+struct TypeName<Float32> {
+    static const char* get() { return "Float32"; }
+};
+template <>
+struct TypeName<Float64> {
+    static const char* get() { return "Float64"; }
+};
+template <>
+struct TypeName<String> {
+    static const char* get() { return "String"; }
+};
+template <>
+struct TypeName<BitmapValue> {
+    static const char* get() { return "BitMap"; }
+};
+
+template <typename T>
+struct TypeId;
+template <>
+struct TypeId<UInt8> {
+    static constexpr const TypeIndex value = TypeIndex::UInt8;
+};
+template <>
+struct TypeId<UInt16> {
+    static constexpr const TypeIndex value = TypeIndex::UInt16;
+};
+template <>
+struct TypeId<UInt32> {
+    static constexpr const TypeIndex value = TypeIndex::UInt32;
+};
+template <>
+struct TypeId<UInt64> {
+    static constexpr const TypeIndex value = TypeIndex::UInt64;
+};
+template <>
+struct TypeId<Int8> {
+    static constexpr const TypeIndex value = TypeIndex::Int8;
+};
+template <>
+struct TypeId<Int16> {
+    static constexpr const TypeIndex value = TypeIndex::Int16;
+};
+template <>
+struct TypeId<Int32> {
+    static constexpr const TypeIndex value = TypeIndex::Int32;
+};
+template <>
+struct TypeId<Int64> {
+    static constexpr const TypeIndex value = TypeIndex::Int64;
+};
+template <>
+struct TypeId<Float32> {
+    static constexpr const TypeIndex value = TypeIndex::Float32;
+};
+template <>
+struct TypeId<Float64> {
+    static constexpr const TypeIndex value = TypeIndex::Float64;
+};
+
+/// Not a data type in database, defined just for convenience.
+using Strings = std::vector<String>;
+
+using Int128 = __int128;
+template <>
+inline constexpr bool IsNumber<Int128> = true;
+template <>
+struct TypeName<Int128> {
+    static const char* get() { return "Int128"; }
+};
+template <>
+struct TypeId<Int128> {
+    static constexpr const TypeIndex value = TypeIndex::Int128;
+};
+
+using Date = Int64;
+using DateTime = Int64;
+
+/// Own FieldType for Decimal.
+/// It is only a "storage" for decimal. To perform operations, you also have to provide a scale (number of digits after point).
+template <typename T>
+struct Decimal {
+    using NativeType = T;
+
+    Decimal() = default;
+    Decimal(Decimal<T>&&) = default;
+    Decimal(const Decimal<T>&) = default;
+
+    Decimal(const T& value_) : value(value_) {}
+
+    static Decimal double_to_decimal(double value_) {
+        DecimalV2Value decimal_value;
+        decimal_value.assign_from_double(value_);
+        return Decimal(binary_cast<DecimalV2Value, T>(decimal_value));
+    }
+
+    template <typename U>
+    Decimal(const Decimal<U>& x) : value(x) {}
+
+    constexpr Decimal<T>& operator=(Decimal<T>&&) = default;
+    constexpr Decimal<T>& operator=(const Decimal<T>&) = default;
+
+    operator T() const { return value; }
+
+    const Decimal<T>& operator+=(const T& x) {
+        value += x;
+        return *this;
+    }
+    const Decimal<T>& operator-=(const T& x) {
+        value -= x;
+        return *this;
+    }
+    const Decimal<T>& operator*=(const T& x) {
+        value *= x;
+        return *this;
+    }
+    const Decimal<T>& operator/=(const T& x) {
+        value /= x;
+        return *this;
+    }
+    const Decimal<T>& operator%=(const T& x) {
+        value %= x;
+        return *this;
+    }
+
+    T value;
+};
+
+using Decimal32 = Decimal<Int32>;
+using Decimal64 = Decimal<Int64>;
+using Decimal128 = Decimal<Int128>;
+
+template <>
+struct TypeName<Decimal32> {
+    static const char* get() { return "Decimal32"; }
+};
+template <>
+struct TypeName<Decimal64> {
+    static const char* get() { return "Decimal64"; }
+};
+template <>
+struct TypeName<Decimal128> {
+    static const char* get() { return "Decimal128"; }
+};
+
+template <>
+struct TypeId<Decimal32> {
+    static constexpr const TypeIndex value = TypeIndex::Decimal32;
+};
+template <>
+struct TypeId<Decimal64> {
+    static constexpr const TypeIndex value = TypeIndex::Decimal64;
+};
+template <>
+struct TypeId<Decimal128> {
+    static constexpr const TypeIndex value = TypeIndex::Decimal128;
+};
+
+template <typename T>
+constexpr bool IsDecimalNumber = false;
+template <>
+inline constexpr bool IsDecimalNumber<Decimal32> = true;
+template <>
+inline constexpr bool IsDecimalNumber<Decimal64> = true;
+template <>
+inline constexpr bool IsDecimalNumber<Decimal128> = true;
+
+template <typename T>
+struct NativeType {
+    using Type = T;
+};
+template <>
+struct NativeType<Decimal32> {
+    using Type = Int32;
+};
+template <>
+struct NativeType<Decimal64> {
+    using Type = Int64;
+};
+template <>
+struct NativeType<Decimal128> {
+    using Type = Int128;
+};
+
+inline const char* getTypeName(TypeIndex idx) {
+    switch (idx) {
+    case TypeIndex::Nothing:
+        return "Nothing";
+    case TypeIndex::UInt8:
+        return TypeName<UInt8>::get();
+    case TypeIndex::UInt16:
+        return TypeName<UInt16>::get();
+    case TypeIndex::UInt32:
+        return TypeName<UInt32>::get();
+    case TypeIndex::UInt64:
+        return TypeName<UInt64>::get();
+    case TypeIndex::UInt128:
+        return "UInt128";
+    case TypeIndex::Int8:
+        return TypeName<Int8>::get();
+    case TypeIndex::Int16:
+        return TypeName<Int16>::get();
+    case TypeIndex::Int32:
+        return TypeName<Int32>::get();
+    case TypeIndex::Int64:
+        return TypeName<Int64>::get();
+    case TypeIndex::Int128:
+        return TypeName<Int128>::get();
+    case TypeIndex::Float32:
+        return TypeName<Float32>::get();
+    case TypeIndex::Float64:
+        return TypeName<Float64>::get();
+    case TypeIndex::Date:
+        return "Date";
+    case TypeIndex::DateTime:
+        return "DateTime";
+    case TypeIndex::String:
+        return TypeName<String>::get();
+    case TypeIndex::FixedString:
+        return "FixedString";
+    case TypeIndex::Enum8:
+        return "Enum8";
+    case TypeIndex::Enum16:
+        return "Enum16";
+    case TypeIndex::Decimal32:
+        return TypeName<Decimal32>::get();
+    case TypeIndex::Decimal64:
+        return TypeName<Decimal64>::get();
+    case TypeIndex::Decimal128:
+        return TypeName<Decimal128>::get();
+    case TypeIndex::UUID:
+        return "UUID";
+    case TypeIndex::Array:
+        return "Array";
+    case TypeIndex::Tuple:
+        return "Tuple";
+    case TypeIndex::Set:
+        return "Set";
+    case TypeIndex::Interval:
+        return "Interval";
+    case TypeIndex::Nullable:
+        return "Nullable";
+    case TypeIndex::Function:
+        return "Function";
+    case TypeIndex::AggregateFunction:
+        return "AggregateFunction";
+    case TypeIndex::LowCardinality:
+        return "LowCardinality";
+    case TypeIndex::BitMap:
+        return TypeName<BitmapValue>::get();
+    }
+
+    __builtin_unreachable();
+}
+
+} // namespace doris::vectorized
+
+/// Specialization of `std::hash` for the Decimal<T> types.
+namespace std {
+template <typename T>
+struct hash<doris::vectorized::Decimal<T>> {
+    size_t operator()(const doris::vectorized::Decimal<T>& x) const { return hash<T>()(x.value); }
+};
+
+template <>
+struct hash<doris::vectorized::Decimal128> {
+    size_t operator()(const doris::vectorized::Decimal128& x) const {
+        return std::hash<doris::vectorized::Int64>()(x.value >> 64) ^
+               std::hash<doris::vectorized::Int64>()(
+                       x.value & std::numeric_limits<doris::vectorized::UInt64>::max());
+    }
+};
+
+constexpr bool is_integer(doris::vectorized::TypeIndex index) {
+    using TypeIndex = doris::vectorized::TypeIndex;
+    switch (index) {
+    case TypeIndex::UInt8:
+    case TypeIndex::UInt16:
+    case TypeIndex::UInt32:
+    case TypeIndex::UInt64:
+    case TypeIndex::UInt128:
+    case TypeIndex::Int8:
+    case TypeIndex::Int16:
+    case TypeIndex::Int32:
+    case TypeIndex::Int64:
+    case TypeIndex::Int128: {
+        return true;
+    }
+    default: {
+        return false;
+    }
+    }
+}
+} // namespace std
diff --git a/be/src/vec/data_types/data_type.cpp b/be/src/vec/data_types/data_type.cpp
new file mode 100644
index 0000000000..ef40e10934
--- /dev/null
+++ b/be/src/vec/data_types/data_type.cpp
@@ -0,0 +1,217 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/IDataType.cpp
+// and modified by Doris
+
+#include "vec/data_types/data_type.h"
+
+#include <fmt/format.h>
+
+#include "common/logging.h"
+#include "olap/olap_common.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_const.h"
+#include "vec/data_types/data_type_bitmap.h"
+#include "vec/data_types/data_type_date.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_nothing.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/data_types/data_type_nullable.h"
+
+namespace doris::vectorized {
+
+IDataType::IDataType() {}
+
+IDataType::~IDataType() {}
+
+String IDataType::get_name() const {
+    return do_get_name();
+}
+
+String IDataType::do_get_name() const {
+    return get_family_name();
+}
+
+void IDataType::update_avg_value_size_hint(const IColumn& column, double& avg_value_size_hint) {
+    /// Update the average value size hint if amount of read rows isn't too small
+    size_t column_size = column.size();
+    if (column_size > 10) {
+        double current_avg_value_size = static_cast<double>(column.byte_size()) / column_size;
+
+        /// Heuristic is chosen so that avg_value_size_hint increases rapidly but decreases slowly.
+        if (current_avg_value_size > avg_value_size_hint)
+            avg_value_size_hint = std::min(1024., current_avg_value_size); /// avoid overestimation
+        else if (current_avg_value_size * 2 < avg_value_size_hint)
+            avg_value_size_hint = (current_avg_value_size + avg_value_size_hint * 3) / 4;
+    }
+}
+
+ColumnPtr IDataType::create_column_const(size_t size, const Field& field) const {
+    auto column = create_column();
+    column->insert(field);
+    return ColumnConst::create(std::move(column), size);
+}
+
+ColumnPtr IDataType::create_column_const_with_default_value(size_t size) const {
+    return create_column_const(size, get_default());
+}
+
+DataTypePtr IDataType::promote_numeric_type() const {
+    LOG(FATAL) << fmt::format("Data type {} can't be promoted.", get_name());
+    return nullptr;
+}
+
+size_t IDataType::get_size_of_value_in_memory() const {
+    LOG(FATAL) << fmt::format("Value of type {} in memory is not of fixed size.", get_name());
+    return 0;
+}
+
+void IDataType::to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const {
+    LOG(FATAL) << fmt::format("Data type {} to_string not implement.", get_name());
+}
+
+std::string IDataType::to_string(const IColumn& column, size_t row_num) const {
+    LOG(FATAL) << fmt::format("Data type {} to_string not implement.", get_name());
+}
+
+void IDataType::insert_default_into(IColumn& column) const {
+    column.insert_default();
+}
+
+DataTypePtr IDataType::from_thrift(const doris::PrimitiveType& type, const bool is_nullable){
+    DataTypePtr result;
+    switch (type) {
+        case TYPE_BOOLEAN:
+            result = std::make_shared<DataTypeUInt8>();
+            break;
+        case TYPE_TINYINT:
+            result = std::make_shared<DataTypeInt8>();
+            break;
+        case TYPE_SMALLINT:
+            result = std::make_shared<DataTypeInt16>();
+            break;
+        case TYPE_INT:
+            result = std::make_shared<DataTypeInt32>();
+            break;
+        case TYPE_FLOAT:
+            result = std::make_shared<DataTypeFloat32>();
+            break;
+        case TYPE_BIGINT:
+            result = std::make_shared<DataTypeInt64>();
+            break;
+        case TYPE_LARGEINT:
+            result = std::make_shared<DataTypeInt128>();
+            break;
+        case TYPE_DATE:
+            result = std::make_shared<DataTypeDate>();
+            break;
+        case TYPE_DATETIME:
+            result = std::make_shared<DataTypeDateTime>();
+            break;
+        case TYPE_TIME:
+        case TYPE_DOUBLE:
+            result = std::make_shared<DataTypeFloat64>();
+            break;
+        case TYPE_CHAR:
+        case TYPE_VARCHAR:
+        case TYPE_HLL:
+        case TYPE_STRING:
+            result = std::make_shared<DataTypeString>();
+            break;
+        case TYPE_OBJECT:
+            result = std::make_shared<DataTypeBitMap>();
+            break;
+        case TYPE_DECIMALV2:
+            result = std::make_shared<DataTypeDecimal<Decimal128>>(27, 9);
+            break;
+        case TYPE_NULL:
+            result = std::make_shared<DataTypeNothing>();
+            break;
+        case INVALID_TYPE:
+        default:
+            DCHECK(false);
+            result = nullptr;
+            break;
+    }
+    if (is_nullable) {
+        result = std::make_shared<DataTypeNullable>(result);
+    }
+
+    return result;
+}
+
+DataTypePtr IDataType::from_olap_engine(const doris::FieldType & type, const _Bool is_nullable) {
+    DataTypePtr result;
+    switch (type) {
+        case OLAP_FIELD_TYPE_BOOL:
+            result = std::make_shared<DataTypeUInt8>();
+            break;
+        case OLAP_FIELD_TYPE_TINYINT:
+            result = std::make_shared<DataTypeInt8>();
+            break;
+        case OLAP_FIELD_TYPE_SMALLINT:
+            result = std::make_shared<DataTypeInt16>();
+            break;
+        case OLAP_FIELD_TYPE_INT:
+            result = std::make_shared<DataTypeInt32>();
+            break;
+        case OLAP_FIELD_TYPE_FLOAT:
+            result = std::make_shared<DataTypeFloat32>();
+            break;
+        case OLAP_FIELD_TYPE_BIGINT:
+            result = std::make_shared<DataTypeInt64>();
+            break;
+        case OLAP_FIELD_TYPE_LARGEINT:
+            result = std::make_shared<DataTypeInt128>();
+            break;
+        case OLAP_FIELD_TYPE_DATE:
+            result = std::make_shared<DataTypeDate>();
+            break;
+        case OLAP_FIELD_TYPE_DATETIME:
+            result = std::make_shared<DataTypeDateTime>();
+            break;
+        case OLAP_FIELD_TYPE_DOUBLE:
+            result = std::make_shared<DataTypeFloat64>();
+            break;
+        case OLAP_FIELD_TYPE_CHAR:
+        case OLAP_FIELD_TYPE_VARCHAR:
+        case OLAP_FIELD_TYPE_HLL:
+        case OLAP_FIELD_TYPE_STRING:
+            result = std::make_shared<DataTypeString>();
+            break;
+        case OLAP_FIELD_TYPE_OBJECT:
+            result = std::make_shared<DataTypeBitMap>();
+            break;
+        case OLAP_FIELD_TYPE_DECIMAL:
+            result = std::make_shared<DataTypeDecimal<Decimal128>>(27, 9);
+            break;
+
+        default:
+            DCHECK(false) << "Invalid olap engine type";
+            result = nullptr;
+            break;
+    }
+    if (is_nullable) {
+        result = std::make_shared<DataTypeNullable>(result);
+    }
+
+    return result;
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h
new file mode 100644
index 0000000000..70f4074658
--- /dev/null
+++ b/be/src/vec/data_types/data_type.h
@@ -0,0 +1,402 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/IDataType.h
+// and modified by Doris
+
+#pragma once
+
+#include <boost/noncopyable.hpp>
+#include <memory>
+
+#include "runtime/primitive_type.h"
+#include "vec/common/cow.h"
+#include "vec/common/string_buffer.hpp"
+#include "vec/core/types.h"
+
+namespace doris {
+class PBlock;
+class PColumn;
+enum FieldType;
+
+namespace vectorized {
+
+class IDataType;
+
+class IColumn;
+using ColumnPtr = COW<IColumn>::Ptr;
+using MutableColumnPtr = COW<IColumn>::MutablePtr;
+
+class Field;
+
+using DataTypePtr = std::shared_ptr<const IDataType>;
+using DataTypes = std::vector<DataTypePtr>;
+
+/** Properties of data type.
+  * Contains methods for serialization/deserialization.
+  * Implementations of this interface represent a data type (example: UInt8)
+  *  or parametric family of data types (example: Array(...)).
+  *
+  * DataType is totally immutable object. You can always share them.
+  */
+class IDataType : private boost::noncopyable {
+public:
+    IDataType();
+    virtual ~IDataType();
+
+    /// Name of data type (examples: UInt64, Array(String)).
+    String get_name() const;
+
+    /// Name of data type family (example: FixedString, Array).
+    virtual const char* get_family_name() const = 0;
+
+    /// Data type id. It's used for runtime type checks.
+    virtual TypeIndex get_type_id() const = 0;
+
+    virtual void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const;
+    virtual std::string to_string(const IColumn& column, size_t row_num) const;
+
+protected:
+    virtual String do_get_name() const;
+
+public:
+    /** Create empty column for corresponding type.
+      */
+    virtual MutableColumnPtr create_column() const = 0;
+
+    /** Create ColumnConst for corresponding type, with specified size and value.
+      */
+    ColumnPtr create_column_const(size_t size, const Field& field) const;
+    ColumnPtr create_column_const_with_default_value(size_t size) const;
+
+    /** Get default value of data type.
+      * It is the "default" default, regardless the fact that a table could contain different user-specified default.
+      */
+    virtual Field get_default() const = 0;
+
+    /** The data type can be promoted in order to try to avoid overflows.
+      * Data types which can be promoted are typically Number or Decimal data types.
+      */
+    virtual bool can_be_promoted() const { return false; }
+
+    /** Return the promoted numeric data type of the current data type. Throw an exception if `can_be_promoted() == false`.
+      */
+    virtual DataTypePtr promote_numeric_type() const;
+
+    /** Directly insert default value into a column. Default implementation use method IColumn::insert_default.
+      * This should be overriden if data type default value differs from column default value (example: Enum data types).
+      */
+    virtual void insert_default_into(IColumn& column) const;
+
+    /// Checks that two instances belong to the same type
+    virtual bool equals(const IDataType& rhs) const = 0;
+
+    /// Various properties on behaviour of data type.
+
+    /** The data type is dependent on parameters and types with different parameters are different.
+      * Examples: FixedString(N), Tuple(T1, T2), Nullable(T).
+      * Otherwise all instances of the same class are the same types.
+      */
+    virtual bool get_is_parametric() const = 0;
+
+    /** The data type is dependent on parameters and at least one of them is another type.
+      * Examples: Tuple(T1, T2), Nullable(T). But FixedString(N) is not.
+      */
+    virtual bool have_subtypes() const = 0;
+
+    /** Can appear in table definition.
+      * Counterexamples: Interval, Nothing.
+      */
+    virtual bool cannot_be_stored_in_tables() const { return false; }
+
+    /** In text formats that render "pretty" tables,
+      *  is it better to align value right in table cell.
+      * Examples: numbers, even nullable.
+      */
+    virtual bool should_align_right_in_pretty_formats() const { return false; }
+
+    /** Does formatted value in any text format can contain anything but valid UTF8 sequences.
+      * Example: String (because it can contain arbitrary bytes).
+      * Counterexamples: numbers, Date, DateTime.
+      * For Enum, it depends.
+      */
+    virtual bool text_can_contain_only_valid_utf8() const { return false; }
+
+    /** Is it possible to compare for less/greater, to calculate min/max?
+      * Not necessarily totally comparable. For example, floats are comparable despite the fact that NaNs compares to nothing.
+      * The same for nullable of comparable types: they are comparable (but not totally-comparable).
+      */
+    virtual bool is_comparable() const { return false; }
+
+    /** Does it make sense to use this type with COLLATE modifier in ORDER BY.
+      * Example: String, but not FixedString.
+      */
+    virtual bool can_be_compared_with_collation() const { return false; }
+
+    /** If the type is totally comparable (Ints, Date, DateTime, not nullable, not floats)
+      *  and "simple" enough (not String, FixedString) to be used as version number
+      *  (to select rows with maximum version).
+      */
+    virtual bool can_be_used_as_version() const { return false; }
+
+    /** Values of data type can be summed (possibly with overflow, within the same data type).
+      * Example: numbers, even nullable. Not Date/DateTime. Not Enum.
+      * Enums can be passed to aggregate function 'sum', but the result is Int64, not Enum, so they are not summable.
+      */
+    virtual bool is_summable() const { return false; }
+
+    /** Can be used in operations like bit and, bit shift, bit not, etc.
+      */
+    virtual bool can_be_used_in_bit_operations() const { return false; }
+
+    /** Can be used in boolean context (WHERE, HAVING).
+      * UInt8, maybe nullable.
+      */
+    virtual bool can_be_used_in_boolean_context() const { return false; }
+
+    /** Numbers, Enums, Date, DateTime. Not nullable.
+      */
+    virtual bool is_value_represented_by_number() const { return false; }
+
+    /** Integers, Enums, Date, DateTime. Not nullable.
+      */
+    virtual bool is_value_represented_by_integer() const { return false; }
+
+    /** Unsigned Integers, Date, DateTime. Not nullable.
+      */
+    virtual bool is_value_represented_by_unsigned_integer() const { return false; }
+
+    /** Values are unambiguously identified by contents of contiguous memory region,
+      *  that can be obtained by IColumn::get_data_at method.
+      * Examples: numbers, Date, DateTime, String, FixedString,
+      *  and Arrays of numbers, Date, DateTime, FixedString, Enum, but not String.
+      *  (because Array(String) values became ambiguous if you concatenate Strings).
+      * Counterexamples: Nullable, Tuple.
+      */
+    virtual bool is_value_unambiguously_represented_in_contiguous_memory_region() const {
+        return false;
+    }
+
+    virtual bool is_value_unambiguously_represented_in_fixed_size_contiguous_memory_region() const {
+        return is_value_represented_by_number();
+    }
+
+    /** Example: numbers, Date, DateTime, FixedString, Enum... Nullable and Tuple of such types.
+      * Counterexamples: String, Array.
+      * It's Ok to return false for AggregateFunction despite the fact that some of them have fixed size state.
+      */
+    virtual bool have_maximum_size_of_value() const { return false; }
+
+    /** Size in amount of bytes in memory. Throws an exception if not have_maximum_size_of_value.
+      */
+    virtual size_t get_maximum_size_of_value_in_memory() const {
+        return get_size_of_value_in_memory();
+    }
+
+    /** Throws an exception if value is not of fixed size.
+      */
+    virtual size_t get_size_of_value_in_memory() const;
+
+    /** Integers (not floats), Enum, String, FixedString.
+      */
+    virtual bool is_categorial() const { return false; }
+
+    virtual bool is_nullable() const { return false; }
+
+    /** Is this type can represent only NULL value? (It also implies is_nullable)
+      */
+    virtual bool only_null() const { return false; }
+
+    /** If this data type cannot be wrapped in Nullable data type.
+      */
+    virtual bool can_be_inside_nullable() const { return false; }
+
+    virtual bool low_cardinality() const { return false; }
+
+    /// Strings, Numbers, Date, DateTime, Nullable
+    virtual bool can_be_inside_low_cardinality() const { return false; }
+
+    /// Updates avg_value_size_hint for newly read column. Uses to optimize deserialization. Zero expected for first column.
+    static void update_avg_value_size_hint(const IColumn& column, double& avg_value_size_hint);
+
+    virtual size_t serialize(const IColumn& column, PColumn* pcolumn) const = 0;
+    virtual void deserialize(const PColumn& pcolumn, IColumn* column) const = 0;
+
+    static DataTypePtr from_thrift(const doris::PrimitiveType& type, const bool is_nullable = true);
+    static DataTypePtr from_olap_engine(const doris::FieldType& type, const bool is_nullable = true);
+
+private:
+    friend class DataTypeFactory;
+};
+
+/// Some sugar to check data type of IDataType
+struct WhichDataType {
+    TypeIndex idx;
+
+    WhichDataType(TypeIndex idx_ = TypeIndex::Nothing) : idx(idx_) {}
+
+    WhichDataType(const IDataType& data_type) : idx(data_type.get_type_id()) {}
+
+    WhichDataType(const IDataType* data_type) : idx(data_type->get_type_id()) {}
+
+    WhichDataType(const DataTypePtr& data_type) : idx(data_type->get_type_id()) {}
+
+    bool is_uint8() const { return idx == TypeIndex::UInt8; }
+    bool is_uint16() const { return idx == TypeIndex::UInt16; }
+    bool is_uint32() const { return idx == TypeIndex::UInt32; }
+    bool is_uint64() const { return idx == TypeIndex::UInt64; }
+    bool is_uint128() const { return idx == TypeIndex::UInt128; }
+    bool is_uint() const {
+        return is_uint8() || is_uint16() || is_uint32() || is_uint64() || is_uint128();
+    }
+    bool is_native_uint() const { return is_uint8() || is_uint16() || is_uint32() || is_uint64(); }
+
+    bool is_int8() const { return idx == TypeIndex::Int8; }
+    bool is_int16() const { return idx == TypeIndex::Int16; }
+    bool is_int32() const { return idx == TypeIndex::Int32; }
+    bool is_int64() const { return idx == TypeIndex::Int64; }
+    bool is_int128() const { return idx == TypeIndex::Int128; }
+    bool is_int() const {
+        return is_int8() || is_int16() || is_int32() || is_int64() || is_int128();
+    }
+    bool is_native_int() const { return is_int8() || is_int16() || is_int32() || is_int64(); }
+
+    bool is_decimal32() const { return idx == TypeIndex::Decimal32; }
+    bool is_decimal64() const { return idx == TypeIndex::Decimal64; }
+    bool is_decimal128() const { return idx == TypeIndex::Decimal128; }
+    bool is_decimal() const { return is_decimal32() || is_decimal64() || is_decimal128(); }
+
+    bool is_float32() const { return idx == TypeIndex::Float32; }
+    bool is_float64() const { return idx == TypeIndex::Float64; }
+    bool is_float() const { return is_float32() || is_float64(); }
+
+    bool is_enum8() const { return idx == TypeIndex::Enum8; }
+    bool is_enum16() const { return idx == TypeIndex::Enum16; }
+    bool is_enum() const { return is_enum8() || is_enum16(); }
+
+    bool is_date() const { return idx == TypeIndex::Date; }
+    bool is_date_time() const { return idx == TypeIndex::DateTime; }
+    bool is_date_or_datetime() const { return is_date() || is_date_time(); }
+
+    bool is_string() const { return idx == TypeIndex::String; }
+    bool is_fixed_string() const { return idx == TypeIndex::FixedString; }
+    bool is_string_or_fixed_string() const { return is_string() || is_fixed_string(); }
+
+    bool is_uuid() const { return idx == TypeIndex::UUID; }
+    bool is_array() const { return idx == TypeIndex::Array; }
+    bool is_tuple() const { return idx == TypeIndex::Tuple; }
+    bool is_set() const { return idx == TypeIndex::Set; }
+    bool is_interval() const { return idx == TypeIndex::Interval; }
+
+    bool is_nothing() const { return idx == TypeIndex::Nothing; }
+    bool is_nullable() const { return idx == TypeIndex::Nullable; }
+    bool is_function() const { return idx == TypeIndex::Function; }
+    bool is_aggregate_function() const { return idx == TypeIndex::AggregateFunction; }
+};
+
+/// IDataType helpers (alternative for IDataType virtual methods with single point of truth)
+
+inline bool is_date(const DataTypePtr& data_type) {
+    return WhichDataType(data_type).is_date();
+}
+inline bool is_date_or_datetime(const DataTypePtr& data_type) {
+    return WhichDataType(data_type).is_date_or_datetime();
+}
+inline bool is_enum(const DataTypePtr& data_type) {
+    return WhichDataType(data_type).is_enum();
+}
+inline bool is_decimal(const DataTypePtr& data_type) {
+    return WhichDataType(data_type).is_decimal();
+}
+inline bool is_tuple(const DataTypePtr& data_type) {
+    return WhichDataType(data_type).is_tuple();
+}
+inline bool is_array(const DataTypePtr& data_type) {
+    return WhichDataType(data_type).is_array();
+}
+
+inline bool is_nothing(const DataTypePtr& data_type) {
+    return WhichDataType(data_type).is_nothing();
+}
+
+template <typename T>
+inline bool is_uint8(const T& data_type) {
+    return WhichDataType(data_type).is_uint8();
+}
+
+template <typename T>
+inline bool is_unsigned_integer(const T& data_type) {
+    return WhichDataType(data_type).is_uint();
+}
+
+template <typename T>
+inline bool is_integer(const T& data_type) {
+    WhichDataType which(data_type);
+    return which.is_int() || which.is_uint();
+}
+
+template <typename T>
+inline bool is_float(const T& data_type) {
+    WhichDataType which(data_type);
+    return which.is_float();
+}
+
+template <typename T>
+inline bool is_native_number(const T& data_type) {
+    WhichDataType which(data_type);
+    return which.is_native_int() || which.is_native_uint() || which.is_float();
+}
+
+template <typename T>
+inline bool is_number(const T& data_type) {
+    WhichDataType which(data_type);
+    return which.is_int() || which.is_uint() || which.is_float() || which.is_decimal();
+}
+
+template <typename T>
+inline bool is_columned_as_number(const T& data_type) {
+    WhichDataType which(data_type);
+    return which.is_int() || which.is_uint() || which.is_float() || which.is_date_or_datetime() ||
+           which.is_uuid();
+}
+
+template <typename T>
+inline bool is_string(const T& data_type) {
+    return WhichDataType(data_type).is_string();
+}
+
+template <typename T>
+inline bool is_fixed_string(const T& data_type) {
+    return WhichDataType(data_type).is_fixed_string();
+}
+
+template <typename T>
+inline bool is_string_or_fixed_string(const T& data_type) {
+    return WhichDataType(data_type).is_string_or_fixed_string();
+}
+
+inline bool is_not_decimal_but_comparable_to_decimal(const DataTypePtr& data_type) {
+    WhichDataType which(data_type);
+    return which.is_int() || which.is_uint();
+}
+
+inline bool is_compilable_type(const DataTypePtr& data_type) {
+    return data_type->is_value_represented_by_number() && !is_decimal(data_type);
+}
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/data_types/data_type_bitmap.cpp b/be/src/vec/data_types/data_type_bitmap.cpp
new file mode 100644
index 0000000000..c6bc9f0765
--- /dev/null
+++ b/be/src/vec/data_types/data_type_bitmap.cpp
@@ -0,0 +1,102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/data_types/data_type_bitmap.h"
+
+#include "vec/columns/column_complex.h"
+#include "vec/common/assert_cast.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+size_t DataTypeBitMap::serialize(const IColumn& column, PColumn* pcolumn) const {
+    auto ptr = column.convert_to_full_column_if_const();
+    auto& data_column = assert_cast<const ColumnBitmap&>(*ptr);
+
+    auto allocate_len_size = sizeof(size_t) * (column.size() + 1);
+    auto allocate_content_size = 0;
+    size_t bitmap_size_array[column.size() + 1];
+    bitmap_size_array[0] = column.size();
+
+    // compute each bitmap size and save
+    for (size_t i = 0; i < column.size(); ++i) {
+        auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i));
+        bitmap_size_array[i + 1] = bitmap.getSizeInBytes();
+        allocate_content_size += bitmap_size_array[i + 1];
+    }
+    // serialize the bitmap size array
+    pcolumn->mutable_binary()->resize(allocate_len_size + allocate_content_size);
+    auto* data = pcolumn->mutable_binary()->data();
+    memcpy(data, bitmap_size_array, allocate_len_size);
+    data += allocate_len_size;
+    // serialize each bitmap
+    for (size_t i = 0; i < column.size(); ++i) {
+        auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i));
+        bitmap.write(data);
+        data += bitmap_size_array[i + 1];
+    }
+
+    return compress_binary(pcolumn);
+}
+
+void DataTypeBitMap::deserialize(const PColumn& pcolumn, IColumn* column) const {
+    auto& data_column = assert_cast<ColumnBitmap&>(*column);
+    auto& data = data_column.get_data();
+
+    std::string uncompressed;
+    read_binary(pcolumn, &uncompressed);
+
+    auto bitmap_size_array_size = *reinterpret_cast<size_t*>(uncompressed.data());
+    size_t bitmap_size_array[bitmap_size_array_size];
+    memcpy(bitmap_size_array, uncompressed.data() + sizeof(size_t), sizeof(size_t) * bitmap_size_array_size);
+    auto bitmap_content_ptr = uncompressed.data() + sizeof(size_t) * (bitmap_size_array_size + 1);
+
+    data.resize(bitmap_size_array_size);
+    for (int i = 0; i < bitmap_size_array_size; ++i) {
+        data[i].deserialize(bitmap_content_ptr);
+        bitmap_content_ptr += bitmap_size_array[i];
+    }
+}
+
+MutableColumnPtr DataTypeBitMap::create_column() const {
+    return ColumnBitmap::create();
+}
+
+void DataTypeBitMap::serialize_as_stream(const BitmapValue& cvalue, BufferWritable& buf) {
+    auto& value = const_cast<BitmapValue&>(cvalue);
+    std::string memory_buffer;
+    int bytesize = value.getSizeInBytes();
+    memory_buffer.resize(bytesize);
+    value.write(const_cast<char*>(memory_buffer.data()));
+    write_string_binary(memory_buffer, buf);
+}
+
+void DataTypeBitMap::deserialize_as_stream(BitmapValue& value, BufferReadable& buf) {
+    StringRef ref;
+    read_string_binary(ref, buf);
+    value.deserialize(ref.data);
+}
+
+void DataTypeBitMap::to_string(const class doris::vectorized::IColumn& column, size_t row_num,
+        doris::vectorized::BufferWritable& ostr) const {
+    auto& data = const_cast<BitmapValue&>(assert_cast<const ColumnBitmap&>(column).get_element(row_num));
+    std::string result(data.getSizeInBytes(), '0');
+    data.write((char*)result.data());
+
+    ostr.write(result.data(), result.size());
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_bitmap.h b/be/src/vec/data_types/data_type_bitmap.h
new file mode 100644
index 0000000000..c2166fb8d0
--- /dev/null
+++ b/be/src/vec/data_types/data_type_bitmap.h
@@ -0,0 +1,89 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "util/bitmap_value.h"
+#include "vec/columns/column.h"
+#include "vec/columns/column_complex.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+class DataTypeBitMap : public IDataType {
+public:
+    DataTypeBitMap() = default;
+    ~DataTypeBitMap() override = default;
+
+    using ColumnType = ColumnBitmap;
+    using FieldType = BitmapValue;
+
+    std::string do_get_name() const override { return get_family_name(); }
+    const char* get_family_name() const override { return "BitMap"; }
+
+    TypeIndex get_type_id() const override { return TypeIndex::BitMap; }
+
+    size_t serialize(const IColumn& column, PColumn* pcolumn) const override;
+    void deserialize(const PColumn& pcolumn, IColumn* column) const override;
+    MutableColumnPtr create_column() const override;
+
+    bool get_is_parametric() const override { return false; }
+    bool have_subtypes() const override { return false; }
+    bool should_align_right_in_pretty_formats() const override { return false; }
+    bool text_can_contain_only_valid_utf8() const override { return true; }
+    bool is_comparable() const override { return false; }
+    bool is_value_represented_by_number() const override { return false; }
+    bool is_value_represented_by_integer() const override { return false; }
+    bool is_value_represented_by_unsigned_integer() const override { return false; }
+    // TODO:
+    bool is_value_unambiguously_represented_in_contiguous_memory_region() const override {
+        return true;
+    }
+    bool have_maximum_size_of_value() const override { return false; }
+
+    bool can_be_used_as_version() const override { return false; }
+
+    bool can_be_inside_nullable() const override { return true; }
+
+    bool equals(const IDataType& rhs) const override { return typeid(rhs) == typeid(*this); }
+
+    bool is_categorial() const override { return is_value_represented_by_integer(); }
+
+    bool can_be_inside_low_cardinality() const override { return false; }
+
+    std::string to_string(const IColumn& column, size_t row_num) const { return "BitMap()"; }
+    void to_string(const IColumn &column, size_t row_num, BufferWritable &ostr) const override;
+
+    [[noreturn]] virtual Field get_default() const {
+        LOG(FATAL) << "Method get_default() is not implemented for data type " << get_name();
+        __builtin_unreachable();
+    }
+
+    static void serialize_as_stream(const BitmapValue& value, BufferWritable& buf);
+
+    static void deserialize_as_stream(BitmapValue& value, BufferReadable& buf);
+};
+
+template <typename T>
+struct is_complex : std::false_type {};
+
+template <>
+struct is_complex<DataTypeBitMap::FieldType> : std::true_type {};
+
+template <class T>
+constexpr bool is_complex_v = is_complex<T>::value;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_date.cpp b/be/src/vec/data_types/data_type_date.cpp
new file mode 100644
index 0000000000..8588e01cc0
--- /dev/null
+++ b/be/src/vec/data_types/data_type_date.cpp
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeDate.cpp
+// and modified by Doris
+
+#include "vec/data_types/data_type_date.h"
+
+#include "runtime/datetime_value.h"
+#include "vec/columns/columns_number.h"
+#include "util/binary_cast.hpp"
+#include "vec/runtime/vdatetime_value.h"
+namespace doris::vectorized {
+bool DataTypeDate::equals(const IDataType& rhs) const {
+    return typeid(rhs) == typeid(*this);
+}
+
+std::string DataTypeDate::to_string(const IColumn& column, size_t row_num) const {
+    Int64 int_val = assert_cast<const ColumnInt64&>(*column.convert_to_full_column_if_const().get())
+                             .get_data()[row_num];
+    doris::vectorized::VecDateTimeValue value = binary_cast<Int64, doris::vectorized::VecDateTimeValue>(int_val);
+    std::stringstream ss;
+    // Year
+    uint32_t temp = value.year() / 100;
+    ss << (char)('0' + (temp / 10)) << (char)('0' + (temp % 10));
+    temp = value.year() % 100;
+    ss << (char)('0' + (temp / 10)) << (char)('0' + (temp % 10)) << '-';
+    // Month
+    ss << (char)('0' + (value.month() / 10)) << (char)('0' + (value.month() % 10)) << '-';
+    // Day
+    ss << (char)('0' + (value.day() / 10)) << (char)('0' + (value.day() % 10));
+    return ss.str();
+}
+
+void DataTypeDate::to_string(const IColumn & column, size_t row_num, BufferWritable & ostr) const {
+    Int64 int_val = assert_cast<const ColumnInt64&>(*column.convert_to_full_column_if_const().get())
+                             .get_data()[row_num];
+    doris::vectorized::VecDateTimeValue value = binary_cast<Int64, doris::vectorized::VecDateTimeValue>(int_val);
+
+    char buf[64];
+    char* pos = value.to_string(buf);
+    // DateTime to_string the end is /0
+    ostr.write(buf, pos - buf - 1);
+}
+
+void DataTypeDate::cast_to_date(Int64& x) {
+    auto value = binary_cast<Int64, VecDateTimeValue>(x);
+    value.cast_to_date();
+    x = binary_cast<VecDateTimeValue, Int64>(value);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_date.h b/be/src/vec/data_types/data_type_date.h
new file mode 100644
index 0000000000..b5d148b8bd
--- /dev/null
+++ b/be/src/vec/data_types/data_type_date.h
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeDate.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/data_types/data_type_number_base.h"
+
+namespace doris::vectorized {
+
+class DataTypeDate final : public DataTypeNumberBase<Int64> {
+public:
+    TypeIndex get_type_id() const override { return TypeIndex::Date; }
+    const char* get_family_name() const override { return "Date"; }
+
+    bool can_be_used_as_version() const override { return true; }
+    bool can_be_inside_nullable() const override { return true; }
+
+    bool equals(const IDataType& rhs) const override;
+    std::string to_string(const IColumn& column, size_t row_num) const;
+    void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override;
+
+    static void cast_to_date(Int64& x);
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_date_time.cpp b/be/src/vec/data_types/data_type_date_time.cpp
new file mode 100644
index 0000000000..ae6c94d19c
--- /dev/null
+++ b/be/src/vec/data_types/data_type_date_time.cpp
@@ -0,0 +1,95 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeDateTime.cpp
+// and modified by Doris
+
+#include "vec/data_types/data_type_date_time.h"
+
+#include "runtime/datetime_value.h"
+#include "util/binary_cast.hpp"
+#include "vec/columns/columns_number.h"
+#include "vec/runtime/vdatetime_value.h"
+namespace doris::vectorized {
+
+DataTypeDateTime::DataTypeDateTime() {}
+
+std::string DataTypeDateTime::do_get_name() const {
+    return "DateTime";
+}
+
+bool DataTypeDateTime::equals(const IDataType& rhs) const {
+    return typeid(rhs) == typeid(*this);
+}
+
+std::string DataTypeDateTime::to_string(const IColumn& column, size_t row_num) const {
+    Int64 int_val =
+            assert_cast<const ColumnInt64&>(*column.convert_to_full_column_if_const().get())
+                    .get_data()[row_num];
+    // TODO: Rethink we really need to do copy replace const reference here?
+    doris::vectorized::VecDateTimeValue value = binary_cast<Int64, doris::vectorized::VecDateTimeValue>(int_val);
+
+    std::stringstream ss;
+    // Year
+    uint32_t temp = value.year() / 100;
+    ss << (char)('0' + (temp / 10)) << (char)('0' + (temp % 10));
+    temp = value.year() % 100;
+    ss << (char)('0' + (temp / 10)) << (char)('0' + (temp % 10)) << '-';
+    // Month
+    ss << (char)('0' + (value.month() / 10)) << (char)('0' + (value.month() % 10)) << '-';
+    // Day
+    ss << (char)('0' + (value.day() / 10)) << (char)('0' + (value.day() % 10));
+    if (value.neg()) {
+        ss << '-';
+    }
+    ss << ' ';
+    // Hour
+    temp = value.hour();
+    if (temp >= 100) {
+        ss << (char)('0' + (temp / 100));
+        temp %= 100;
+    }
+    ss << (char)('0' + (temp / 10)) << (char)('0' + (temp % 10)) << ':';
+    // Minute
+    ss << (char)('0' + (value.minute() / 10)) << (char)('0' + (value.minute() % 10)) << ':';
+    /* Second */
+    ss << (char)('0' + (value.second() / 10)) << (char)('0' + (value.second() % 10));
+
+    return ss.str();
+}
+
+void DataTypeDateTime::to_string(const IColumn& column, size_t row_num,
+                                 BufferWritable& ostr) const {
+    Int64 int_val =
+            assert_cast<const ColumnInt64&>(*column.convert_to_full_column_if_const().get())
+                    .get_data()[row_num];
+    // TODO: Rethink we really need to do copy replace const reference here?
+    doris::vectorized::VecDateTimeValue value = binary_cast<Int64, doris::vectorized::VecDateTimeValue>(int_val);
+
+    char buf[64];
+    char* pos = value.to_string(buf);
+    // DateTime to_string the end is /0
+    ostr.write(buf, pos - buf - 1);
+}
+
+void DataTypeDateTime::cast_to_date_time(Int64& x) {
+    auto value = binary_cast<Int64, doris::vectorized::VecDateTimeValue>(x);
+    value.to_datetime();
+    x = binary_cast<doris::vectorized::VecDateTimeValue, Int64>(value);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_date_time.h b/be/src/vec/data_types/data_type_date_time.h
new file mode 100644
index 0000000000..6b6af04fb6
--- /dev/null
+++ b/be/src/vec/data_types/data_type_date_time.h
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeDateTime.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/data_types/data_type_date.h"
+#include "vec/data_types/data_type_number_base.h"
+
+class DateLUTImpl;
+
+namespace doris::vectorized {
+
+/** DateTime stores time as unix timestamp.
+	* The value itself is independent of time zone.
+	*
+	* In binary format it is represented as unix timestamp.
+	* In text format it is serialized to and parsed from YYYY-MM-DD hh:mm:ss format.
+	* The text format is dependent of time zone.
+	*
+	* To convert from/to text format, time zone may be specified explicitly or implicit time zone may be used.
+	*
+	* Time zone may be specified explicitly as type parameter, example: DateTime('Europe/Moscow').
+	* As it does not affect the internal representation of values,
+	*  all types with different time zones are equivalent and may be used interchangingly.
+	* Time zone only affects parsing and displaying in text formats.
+	*
+	* If time zone is not specified (example: DateTime without parameter), then default time zone is used.
+	* Default time zone is server time zone, if server is doing transformations
+	*  and if client is doing transformations, unless 'use_client_time_zone' setting is passed to client;
+	* Server time zone is the time zone specified in 'timezone' parameter in configuration file,
+	*  or system time zone at the moment of server startup.
+	*/
+class DataTypeDateTime final : public DataTypeNumberBase<Int64> {
+public:
+    DataTypeDateTime();
+
+    const char* get_family_name() const override { return "DateTime"; }
+    std::string do_get_name() const override;
+    TypeIndex get_type_id() const override { return TypeIndex::DateTime; }
+
+    bool can_be_used_as_version() const override { return true; }
+    bool can_be_inside_nullable() const override { return true; }
+
+    bool equals(const IDataType& rhs) const override;
+
+    std::string to_string(const IColumn& column, size_t row_num) const;
+
+    void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const override;
+
+    static void cast_to_date_time(Int64& x);
+};
+
+template <typename DataType>
+constexpr bool IsDateTimeType = false;
+template <>
+inline constexpr bool IsDateTimeType<DataTypeDateTime> = true;
+
+template <typename DataType>
+constexpr bool IsDateType = false;
+template <>
+inline constexpr bool IsDateType<DataTypeDate> = true;
+
+template <typename DataType>
+constexpr bool IsTimeType = IsDateTimeType<DataType> || IsDateType<DataType>;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_decimal.cpp b/be/src/vec/data_types/data_type_decimal.cpp
new file mode 100644
index 0000000000..04f72d8d29
--- /dev/null
+++ b/be/src/vec/data_types/data_type_decimal.cpp
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypesDecimal.cpp
+// and modified by Doris
+
+#include "vec/data_types/data_type_decimal.h"
+
+#include "gen_cpp/data.pb.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/int_exp.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+std::string DataTypeDecimal<T>::do_get_name() const {
+    std::stringstream ss;
+    ss << "Decimal(" << precision << ", " << scale << ")";
+    return ss.str();
+}
+
+template <typename T>
+bool DataTypeDecimal<T>::equals(const IDataType& rhs) const {
+    if (auto* ptype = typeid_cast<const DataTypeDecimal<T>*>(&rhs))
+        return scale == ptype->get_scale();
+    return false;
+}
+
+template <typename T>
+std::string DataTypeDecimal<T>::to_string(const IColumn& column, size_t row_num) const {
+    T value = assert_cast<const ColumnType&>(*column.convert_to_full_column_if_const().get())
+                      .get_data()[row_num];
+    std::ostringstream buf;
+    write_text(value, scale, buf);
+    return buf.str();
+}
+
+template <typename T>
+void DataTypeDecimal<T>::to_string(const IColumn& column, size_t row_num,
+                                   BufferWritable& ostr) const {
+    // TODO: Reduce the copy in std::string mem to ostr, like DataTypeNumber
+    DecimalV2Value value = (DecimalV2Value)assert_cast<const ColumnType&>(
+                                   *column.convert_to_full_column_if_const().get())
+                                   .get_data()[row_num];
+    auto str = value.to_string();
+    ostr.write(str.data(), str.size());
+}
+
+template <typename T>
+size_t DataTypeDecimal<T>::serialize(const IColumn& column, PColumn* pcolumn) const {
+    const auto column_len = column.size();
+    pcolumn->mutable_binary()->resize(column_len * sizeof(FieldType));
+    auto* data = pcolumn->mutable_binary()->data();
+
+    // copy the data
+    auto ptr = column.convert_to_full_column_if_const();
+    const auto* origin_data = assert_cast<const ColumnType&>(*ptr.get()).get_data().data();
+    memcpy(data, origin_data, column_len * sizeof(FieldType));
+
+    // set precision and scale
+    pcolumn->mutable_decimal_param()->set_precision(precision);
+    pcolumn->mutable_decimal_param()->set_scale(scale);
+
+    return compress_binary(pcolumn);
+}
+
+template <typename T>
+void DataTypeDecimal<T>::deserialize(const PColumn& pcolumn, IColumn* column) const {
+    std::string uncompressed;
+    read_binary(pcolumn, &uncompressed);
+
+    auto& container = assert_cast<ColumnType*>(column)->get_data();
+    container.resize(uncompressed.size() / sizeof(T));
+    memcpy(container.data(), uncompressed.data(), uncompressed.size());
+}
+
+template <typename T>
+Field DataTypeDecimal<T>::get_default() const {
+    return DecimalField(T(0), scale);
+}
+
+template <typename T>
+DataTypePtr DataTypeDecimal<T>::promote_numeric_type() const {
+    using PromotedType = DataTypeDecimal<Decimal128>;
+    return std::make_shared<PromotedType>(PromotedType::max_precision(), scale);
+}
+
+template <typename T>
+MutableColumnPtr DataTypeDecimal<T>::create_column() const {
+    return ColumnType::create(0, scale);
+}
+
+DataTypePtr create_decimal(UInt64 precision_value, UInt64 scale_value) {
+    if (precision_value < min_decimal_precision() ||
+        precision_value > max_decimal_precision<Decimal128>()) {
+        LOG(FATAL) << "Wrong precision";
+    }
+
+    if (static_cast<UInt64>(scale_value) > precision_value) {
+        LOG(FATAL) << "Negative scales and scales larger than precision are not supported";
+    }
+
+    if (precision_value <= max_decimal_precision<Decimal32>())
+        return std::make_shared<DataTypeDecimal<Decimal32>>(precision_value, scale_value);
+    else if (precision_value <= max_decimal_precision<Decimal64>())
+        return std::make_shared<DataTypeDecimal<Decimal64>>(precision_value, scale_value);
+    return std::make_shared<DataTypeDecimal<Decimal128>>(precision_value, scale_value);
+}
+
+template <>
+Decimal32 DataTypeDecimal<Decimal32>::get_scale_multiplier(UInt32 scale_) {
+    return common::exp10_i32(scale_);
+}
+
+template <>
+Decimal64 DataTypeDecimal<Decimal64>::get_scale_multiplier(UInt32 scale_) {
+    return common::exp10_i64(scale_);
+}
+
+template <>
+Decimal128 DataTypeDecimal<Decimal128>::get_scale_multiplier(UInt32 scale_) {
+    return common::exp10_i128(scale_);
+}
+
+/// Explicit template instantiations.
+template class DataTypeDecimal<Decimal32>;
+template class DataTypeDecimal<Decimal64>;
+template class DataTypeDecimal<Decimal128>;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_decimal.h b/be/src/vec/data_types/data_type_decimal.h
new file mode 100644
index 0000000000..c99e507eed
--- /dev/null
+++ b/be/src/vec/data_types/data_type_decimal.h
@@ -0,0 +1,352 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypesDecimal.h
+// and modified by Doris
+
+#pragma once
+#include <cmath>
+
+#include "vec/columns/column_decimal.h"
+#include "vec/common/arithmetic_overflow.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_number.h"
+
+namespace doris::vectorized {
+
+static constexpr size_t min_decimal_precision() {
+    return 1;
+}
+template <typename T>
+static constexpr size_t max_decimal_precision() {
+    return 0;
+}
+template <>
+constexpr size_t max_decimal_precision<Decimal32>() {
+    return 9;
+}
+template <>
+constexpr size_t max_decimal_precision<Decimal64>() {
+    return 18;
+}
+template <>
+constexpr size_t max_decimal_precision<Decimal128>() {
+    return 27;
+}
+
+DataTypePtr create_decimal(UInt64 precision, UInt64 scale);
+
+inline UInt32 least_decimal_precision_for(TypeIndex int_type) {
+    switch (int_type) {
+    case TypeIndex::Int8:
+        [[fallthrough]];
+    case TypeIndex::UInt8:
+        return 3;
+    case TypeIndex::Int16:
+        [[fallthrough]];
+    case TypeIndex::UInt16:
+        return 5;
+    case TypeIndex::Int32:
+        [[fallthrough]];
+    case TypeIndex::UInt32:
+        return 10;
+    case TypeIndex::Int64:
+        return 19;
+    case TypeIndex::UInt64:
+        return 20;
+    default:
+        break;
+    }
+    return 0;
+}
+
+/// Implements Decimal(P, S), where P is precision, S is scale.
+/// Maximum precisions for underlying types are:
+/// Int32    9
+/// Int64   18
+/// Int128  38
+/// Operation between two decimals leads to Decimal(P, S), where
+///     P is one of (9, 18, 38); equals to the maximum precision for the biggest underlying type of operands.
+///     S is maximum scale of operands. The allowed valuas are [0, precision]
+template <typename T>
+class DataTypeDecimal final : public IDataType {
+    static_assert(IsDecimalNumber<T>);
+
+public:
+    using ColumnType = ColumnDecimal<T>;
+    using FieldType = T;
+
+    static constexpr bool is_parametric = true;
+
+    static constexpr size_t max_precision() { return max_decimal_precision<T>(); }
+
+    DataTypeDecimal(UInt32 precision_ = 27, UInt32 scale_ = 9)
+            : precision(precision_), scale(scale_) {
+        if (UNLIKELY(precision < 1 || precision > max_precision())) {
+            LOG(FATAL) << fmt::format("Precision {} is out of bounds", precision);
+        }
+
+        if (UNLIKELY(scale < 0 || static_cast<UInt32>(scale) > max_precision())) {
+            LOG(FATAL) << fmt::format("Scale {} is out of bounds", scale);
+        }
+
+        // Now, Doris only support precision:27, scale: 9
+        DCHECK(precision_ == 27);
+        DCHECK(scale_ == 9);
+        precision_ = 27;
+        scale_ = 9;
+    }
+
+    const char* get_family_name() const override { return "Decimal"; }
+    std::string do_get_name() const override;
+    TypeIndex get_type_id() const override { return TypeId<T>::value; }
+
+    size_t serialize(const IColumn& column, PColumn* pcolumn) const override;
+    void deserialize(const PColumn& pcolumn, IColumn* column) const override;
+    Field get_default() const override;
+    bool can_be_promoted() const override { return true; }
+    DataTypePtr promote_numeric_type() const override;
+    MutableColumnPtr create_column() const override;
+    bool equals(const IDataType& rhs) const override;
+
+    bool get_is_parametric() const override { return true; }
+    bool have_subtypes() const override { return false; }
+    bool should_align_right_in_pretty_formats() const override { return true; }
+    bool text_can_contain_only_valid_utf8() const override { return true; }
+    bool is_comparable() const override { return true; }
+    bool is_value_represented_by_number() const override { return true; }
+    bool is_value_unambiguously_represented_in_contiguous_memory_region() const override {
+        return true;
+    }
+    bool have_maximum_size_of_value() const override { return true; }
+    size_t get_size_of_value_in_memory() const override { return sizeof(T); }
+
+    bool is_summable() const override { return true; }
+    bool can_be_used_in_boolean_context() const override { return true; }
+    bool can_be_inside_nullable() const override { return true; }
+    std::string to_string(const IColumn& column, size_t row_num) const;
+    void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const;
+
+    /// Decimal specific
+
+    UInt32 get_precision() const { return precision; }
+    UInt32 get_scale() const { return scale; }
+    T get_scale_multiplier() const { return get_scale_multiplier(scale); }
+
+    T whole_part(T x) const {
+        if (scale == 0) return x;
+        return x / get_scale_multiplier();
+    }
+
+    T fractional_part(T x) const {
+        if (scale == 0) return 0;
+        if (x < T(0)) x *= T(-1);
+        return x % get_scale_multiplier();
+    }
+
+    T max_whole_value() const { return get_scale_multiplier(max_precision() - scale) - T(1); }
+
+    bool can_store_whole(T x) const {
+        T max = max_whole_value();
+        if (x > max || x < -max) return false;
+        return true;
+    }
+
+    /// @returns multiplier for U to become T with correct scale
+    template <typename U>
+    T scale_factor_for(const DataTypeDecimal<U>& x, bool) const {
+        if (get_scale() < x.get_scale()) {
+            LOG(FATAL) << "Decimal result's scale is less then argiment's one";
+        }
+
+        UInt32 scale_delta = get_scale() - x.get_scale(); /// scale_delta >= 0
+        return get_scale_multiplier(scale_delta);
+    }
+
+    template <typename U>
+    T scale_factor_for(const DataTypeNumber<U>&, bool is_multiply_or_divisor) const {
+        if (is_multiply_or_divisor) return 1;
+        return get_scale_multiplier();
+    }
+
+    static T get_scale_multiplier(UInt32 scale);
+
+private:
+    const UInt32 precision;
+    const UInt32 scale;
+};
+
+template <typename T, typename U>
+typename std::enable_if_t<(sizeof(T) >= sizeof(U)), const DataTypeDecimal<T>> decimal_result_type(
+        const DataTypeDecimal<T>& tx, const DataTypeDecimal<U>& ty, bool is_multiply,
+        bool is_divide) {
+    return DataTypeDecimal<T>(max_decimal_precision<T>(), 9);
+}
+
+template <typename T, typename U>
+typename std::enable_if_t<(sizeof(T) < sizeof(U)), const DataTypeDecimal<U>> decimal_result_type(
+        const DataTypeDecimal<T>& tx, const DataTypeDecimal<U>& ty, bool is_multiply,
+        bool is_divide) {
+    return DataTypeDecimal<U>(max_decimal_precision<U>(), 9);
+}
+
+template <typename T, typename U>
+const DataTypeDecimal<T> decimal_result_type(const DataTypeDecimal<T>& tx, const DataTypeNumber<U>&,
+                                             bool, bool) {
+    return DataTypeDecimal<T>(max_decimal_precision<T>(), 9);
+}
+
+template <typename T, typename U>
+const DataTypeDecimal<U> decimal_result_type(const DataTypeNumber<T>&, const DataTypeDecimal<U>& ty,
+                                             bool, bool) {
+    return DataTypeDecimal<U>(max_decimal_precision<U>(), 9);
+}
+
+template <typename T>
+inline const DataTypeDecimal<T>* check_decimal(const IDataType& data_type) {
+    return typeid_cast<const DataTypeDecimal<T>*>(&data_type);
+}
+
+inline UInt32 get_decimal_scale(const IDataType& data_type,
+                                UInt32 default_value = std::numeric_limits<UInt32>::max()) {
+    if (auto* decimal_type = check_decimal<Decimal32>(data_type)) return decimal_type->get_scale();
+    if (auto* decimal_type = check_decimal<Decimal64>(data_type)) return decimal_type->get_scale();
+    if (auto* decimal_type = check_decimal<Decimal128>(data_type)) return decimal_type->get_scale();
+    return default_value;
+}
+
+///
+
+template <typename DataType>
+constexpr bool IsDataTypeDecimal = false;
+template <>
+inline constexpr bool IsDataTypeDecimal<DataTypeDecimal<Decimal32>> = true;
+template <>
+inline constexpr bool IsDataTypeDecimal<DataTypeDecimal<Decimal64>> = true;
+template <>
+inline constexpr bool IsDataTypeDecimal<DataTypeDecimal<Decimal128>> = true;
+
+template <typename DataType>
+constexpr bool IsDataTypeDecimalOrNumber =
+        IsDataTypeDecimal<DataType> || IsDataTypeNumber<DataType>;
+
+template <typename FromDataType, typename ToDataType>
+inline std::enable_if_t<IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>,
+                        typename ToDataType::FieldType>
+convert_decimals(const typename FromDataType::FieldType& value, UInt32 scale_from,
+                 UInt32 scale_to) {
+    using FromFieldType = typename FromDataType::FieldType;
+    using ToFieldType = typename ToDataType::FieldType;
+    using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) > sizeof(ToFieldType)),
+                                            FromFieldType, ToFieldType>;
+    using MaxNativeType = typename MaxFieldType::NativeType;
+
+    MaxNativeType converted_value;
+    if (scale_to > scale_from) {
+        converted_value =
+                DataTypeDecimal<MaxFieldType>::get_scale_multiplier(scale_to - scale_from);
+        if (common::mul_overflow(static_cast<MaxNativeType>(value), converted_value,
+                                 converted_value)) {
+            LOG(FATAL) << "Decimal convert overflow";
+        }
+    } else
+        converted_value =
+                value / DataTypeDecimal<MaxFieldType>::get_scale_multiplier(scale_from - scale_to);
+
+    if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType)) {
+        if (converted_value < std::numeric_limits<typename ToFieldType::NativeType>::min() ||
+            converted_value > std::numeric_limits<typename ToFieldType::NativeType>::max()) {
+            LOG(FATAL) << "Decimal convert overflow";
+        }
+    }
+
+    return converted_value;
+}
+
+template <typename FromDataType, typename ToDataType>
+inline std::enable_if_t<IsDataTypeDecimal<FromDataType> && IsDataTypeNumber<ToDataType>,
+                        typename ToDataType::FieldType>
+convert_from_decimal(const typename FromDataType::FieldType& value, UInt32 scale) {
+    using FromFieldType = typename FromDataType::FieldType;
+    using ToFieldType = typename ToDataType::FieldType;
+
+    if constexpr (std::is_floating_point_v<ToFieldType>)
+        return static_cast<ToFieldType>(value) / FromDataType::get_scale_multiplier(scale);
+    else {
+        FromFieldType converted_value =
+                convert_decimals<FromDataType, FromDataType>(value, scale, 0);
+
+        if constexpr (sizeof(FromFieldType) > sizeof(ToFieldType) ||
+                      !std::numeric_limits<ToFieldType>::is_signed) {
+            if constexpr (std::numeric_limits<ToFieldType>::is_signed) {
+                if (converted_value < std::numeric_limits<ToFieldType>::min() ||
+                    converted_value > std::numeric_limits<ToFieldType>::max()) {
+                    LOG(FATAL) << "Decimal convert overflow";
+                }
+            } else {
+                using CastIntType =
+                        std::conditional_t<std::is_same_v<ToFieldType, UInt64>, Int128, Int64>;
+
+                if (converted_value < 0 ||
+                    converted_value >
+                            static_cast<CastIntType>(std::numeric_limits<ToFieldType>::max())) {
+                    LOG(FATAL) << "Decimal convert overflow";
+                }
+            }
+        }
+        return converted_value;
+    }
+}
+
+template <typename FromDataType, typename ToDataType>
+inline std::enable_if_t<IsDataTypeNumber<FromDataType> && IsDataTypeDecimal<ToDataType>,
+                        typename ToDataType::FieldType>
+convert_to_decimal(const typename FromDataType::FieldType& value, UInt32 scale) {
+    using FromFieldType = typename FromDataType::FieldType;
+    using ToNativeType = typename ToDataType::FieldType::NativeType;
+
+    if constexpr (std::is_floating_point_v<FromFieldType>) {
+        if (!std::isfinite(value)) {
+            LOG(FATAL) << "Decimal convert overflow. Cannot convert infinity or NaN to decimal";
+        }
+
+        auto out = value * ToDataType::get_scale_multiplier(scale);
+        if constexpr (std::is_same_v<ToNativeType, Int128>) {
+            static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64;
+            static constexpr __int128 max_int128 =
+                    (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll;
+            if (out <= static_cast<ToNativeType>(min_int128) ||
+                out >= static_cast<ToNativeType>(max_int128)) {
+                LOG(FATAL) << "Decimal convert overflow. Float is out of Decimal range";
+            }
+        } else {
+            if (out <= std::numeric_limits<ToNativeType>::min() ||
+                out >= std::numeric_limits<ToNativeType>::max()) {
+                LOG(FATAL) << "Decimal convert overflow. Float is out of Decimal range";
+            }
+        }
+        return out;
+    } else {
+        if constexpr (std::is_same_v<FromFieldType, UInt64>)
+            if (value > static_cast<UInt64>(std::numeric_limits<Int64>::max()))
+                return convert_decimals<DataTypeDecimal<Decimal128>, ToDataType>(value, 0, scale);
+        return convert_decimals<DataTypeDecimal<Decimal64>, ToDataType>(value, 0, scale);
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_factory.hpp b/be/src/vec/data_types/data_type_factory.hpp
new file mode 100644
index 0000000000..e06a962c2f
--- /dev/null
+++ b/be/src/vec/data_types/data_type_factory.hpp
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeFactory.h
+// and modified by Doris
+
+#pragma once
+#include <mutex>
+#include <string>
+
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_date.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_nothing.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_string.h"
+
+namespace doris::vectorized {
+
+class DataTypeFactory {
+using DataTypeMap = std::unordered_map<std::string, DataTypePtr>;
+using InvertedDataTypeMap = std::vector<std::pair<DataTypePtr, std::string>>;
+
+public:
+    static DataTypeFactory& instance() {
+        static std::once_flag oc;
+        static DataTypeFactory instance;
+        std::call_once(oc, [&]() {
+            instance.regist_data_type("UInt8", DataTypePtr(std::make_shared<DataTypeUInt8>()));
+            instance.regist_data_type("UInt16", DataTypePtr(std::make_shared<DataTypeUInt16>()));
+            instance.regist_data_type("UInt32", DataTypePtr(std::make_shared<DataTypeUInt32>()));
+            instance.regist_data_type("UInt64", DataTypePtr(std::make_shared<DataTypeUInt64>()));
+            instance.regist_data_type("Int8", DataTypePtr(std::make_shared<DataTypeInt8>()));
+            instance.regist_data_type("Int16", DataTypePtr(std::make_shared<DataTypeInt16>()));
+            instance.regist_data_type("Int32", DataTypePtr(std::make_shared<DataTypeInt32>()));
+            instance.regist_data_type("Int64", DataTypePtr(std::make_shared<DataTypeInt64>()));
+            instance.regist_data_type("Int128", DataTypePtr(std::make_shared<DataTypeInt128>()));
+            instance.regist_data_type("Float32", DataTypePtr(std::make_shared<DataTypeFloat32>()));
+            instance.regist_data_type("Float64", DataTypePtr(std::make_shared<DataTypeFloat64>()));
+            instance.regist_data_type("Date", DataTypePtr(std::make_shared<DataTypeDate>()));
+            instance.regist_data_type("DateTime",
+                                      DataTypePtr(std::make_shared<DataTypeDateTime>()));
+            instance.regist_data_type("String", DataTypePtr(std::make_shared<DataTypeString>()));
+            instance.regist_data_type("Decimal",
+                    DataTypePtr(std::make_shared<DataTypeDecimal<Decimal128>>(27, 9)));
+        });
+        return instance;
+    }
+    DataTypePtr get(const std::string& name) { return _data_type_map[name]; }
+    const std::string& get(const DataTypePtr& data_type) const {
+        auto type_ptr = data_type->is_nullable() ?
+                        ((DataTypeNullable*)(data_type.get()))->get_nested_type() : data_type;
+        for (const auto& entity : _invert_data_type_map) {
+            if (entity.first->equals(*type_ptr)) {
+                return entity.second;
+            }
+        }
+        return _empty_string;
+    }
+
+private:
+    void regist_data_type(const std::string& name, const DataTypePtr& data_type) {
+        _data_type_map.emplace(name, data_type);
+        _invert_data_type_map.emplace_back(data_type, name);
+    }
+    // TODO: Here is a little trick here, use bimap to replace map and vector
+    DataTypeMap _data_type_map;
+    InvertedDataTypeMap _invert_data_type_map;
+    std::string _empty_string;
+};
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_nothing.cpp b/be/src/vec/data_types/data_type_nothing.cpp
new file mode 100644
index 0000000000..6328c8f3e8
--- /dev/null
+++ b/be/src/vec/data_types/data_type_nothing.cpp
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeNothing.cpp
+// and modified by Doris
+
+#include "vec/data_types/data_type_nothing.h"
+
+#include "gen_cpp/data.pb.h"
+#include "vec/columns/column_nothing.h"
+#include "vec/common/typeid_cast.h"
+
+namespace doris::vectorized {
+
+MutableColumnPtr DataTypeNothing::create_column() const {
+    return ColumnNothing::create(0);
+}
+
+size_t DataTypeNothing::serialize(const IColumn&, PColumn* pcolumn) const {
+    return 0;
+}
+
+void DataTypeNothing::deserialize(const PColumn& pcolumn, IColumn* column) const {}
+bool DataTypeNothing::equals(const IDataType& rhs) const {
+    return typeid(rhs) == typeid(*this);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_nothing.h b/be/src/vec/data_types/data_type_nothing.h
new file mode 100644
index 0000000000..e84c4440df
--- /dev/null
+++ b/be/src/vec/data_types/data_type_nothing.h
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeNothing.h
+// and modified by Doris
+
+#pragma once
+
+#include <vec/common/exception.h>
+
+#include "vec/core/field.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+
+/** Data type that cannot have any values.
+  * Used to represent NULL of unknown type as Nullable(Nothing),
+  * and possibly for empty array of unknown type as Array(Nothing).
+  */
+class DataTypeNothing final : public IDataType {
+public:
+    static constexpr bool is_parametric = false;
+
+    const char* get_family_name() const override { return "Nothing"; }
+    TypeIndex get_type_id() const override { return TypeIndex::Nothing; }
+
+    MutableColumnPtr create_column() const override;
+
+    bool equals(const IDataType& rhs) const override;
+
+    bool get_is_parametric() const override { return false; }
+    bool text_can_contain_only_valid_utf8() const override { return true; }
+    bool have_maximum_size_of_value() const override { return true; }
+    size_t get_size_of_value_in_memory() const override { return 0; }
+    bool can_be_inside_nullable() const override { return true; }
+
+    size_t serialize(const IColumn& column, PColumn* pcolumn) const override;
+    void deserialize(const PColumn& pcolumn, IColumn* column) const override;
+    [[noreturn]] Field get_default() const override {
+        LOG(FATAL) << "Method get_default() is not implemented for data type " << get_name();
+    }
+
+    void insert_default_into(IColumn&) const override {
+        LOG(FATAL) << "Method insert_default_into() is not implemented for data type " << get_name();
+    }
+
+    bool have_subtypes() const override { return false; }
+    bool cannot_be_stored_in_tables() const override { return true; }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_nullable.cpp b/be/src/vec/data_types/data_type_nullable.cpp
new file mode 100644
index 0000000000..5c122d3b1b
--- /dev/null
+++ b/be/src/vec/data_types/data_type_nullable.cpp
@@ -0,0 +1,113 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeNullable.cpp
+// and modified by Doris
+
+#include "vec/data_types/data_type_nullable.h"
+
+#include "common/logging.h"
+#include "gen_cpp/data.pb.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/core/field.h"
+#include "vec/data_types/data_type_nothing.h"
+
+namespace doris::vectorized {
+
+DataTypeNullable::DataTypeNullable(const DataTypePtr& nested_data_type_)
+        : nested_data_type{nested_data_type_} {
+    if (!nested_data_type->can_be_inside_nullable()) {
+        LOG(FATAL) << fmt::format("Nested type {} cannot be inside Nullable type",
+                                  nested_data_type->get_name());
+    }
+}
+
+bool DataTypeNullable::only_null() const {
+    return typeid_cast<const DataTypeNothing*>(nested_data_type.get());
+}
+
+std::string DataTypeNullable::to_string(const IColumn& column, size_t row_num) const {
+    const ColumnNullable& col =
+            assert_cast<const ColumnNullable&>(*column.convert_to_full_column_if_const().get());
+
+    if (col.is_null_at(row_num)) {
+        return "\\N";
+    } else {
+        return nested_data_type->to_string(col.get_nested_column(), row_num);
+    }
+}
+
+size_t DataTypeNullable::serialize(const IColumn& column, PColumn* pcolumn) const {
+    auto ptr = column.convert_to_full_column_if_const();
+    const ColumnNullable& col = assert_cast<const ColumnNullable&>(*ptr.get());
+    pcolumn->mutable_is_null()->Reserve(column.size());
+
+    for (size_t i = 0; i < column.size(); ++i) {
+        bool is_null = col.is_null_at(i);
+        pcolumn->add_is_null(is_null);
+    }
+
+    return nested_data_type->serialize(col.get_nested_column(), pcolumn) +
+           sizeof(bool) * column.size();
+}
+
+void DataTypeNullable::deserialize(const PColumn& pcolumn, IColumn* column) const {
+    ColumnNullable* col = assert_cast<ColumnNullable*>(column);
+    col->get_null_map_data().reserve(pcolumn.is_null_size());
+
+    for (int i = 0; i < pcolumn.is_null_size(); ++i) {
+        if (pcolumn.is_null(i)) {
+            col->get_null_map_data().push_back(1);
+        } else {
+            col->get_null_map_data().push_back(0);
+        }
+    }
+    IColumn& nested = col->get_nested_column();
+    nested_data_type->deserialize(pcolumn, &nested);
+}
+
+MutableColumnPtr DataTypeNullable::create_column() const {
+    return ColumnNullable::create(nested_data_type->create_column(), ColumnUInt8::create());
+}
+
+Field DataTypeNullable::get_default() const {
+    return Null();
+}
+
+size_t DataTypeNullable::get_size_of_value_in_memory() const {
+    LOG(FATAL) << fmt::format("Value of type {} in memory is not of fixed size.", get_name());
+    return 0;
+}
+
+bool DataTypeNullable::equals(const IDataType& rhs) const {
+    return rhs.is_nullable() &&
+           nested_data_type->equals(*static_cast<const DataTypeNullable&>(rhs).nested_data_type);
+}
+
+DataTypePtr make_nullable(const DataTypePtr& type) {
+    if (type->is_nullable()) return type;
+    return std::make_shared<DataTypeNullable>(type);
+}
+
+DataTypePtr remove_nullable(const DataTypePtr& type) {
+    if (type->is_nullable()) return static_cast<const DataTypeNullable&>(*type).get_nested_type();
+    return type;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_nullable.h b/be/src/vec/data_types/data_type_nullable.h
new file mode 100644
index 0000000000..e6e86d1d78
--- /dev/null
+++ b/be/src/vec/data_types/data_type_nullable.h
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeNullable.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+
+/// A nullable data type is an ordinary data type provided with a tag
+/// indicating that it also contains the NULL value. The following class
+/// embodies this concept.
+class DataTypeNullable final : public IDataType {
+public:
+    explicit DataTypeNullable(const DataTypePtr& nested_data_type_);
+    std::string do_get_name() const override {
+        return "Nullable(" + nested_data_type->get_name() + ")";
+    }
+    const char* get_family_name() const override { return "Nullable"; }
+    TypeIndex get_type_id() const override { return TypeIndex::Nullable; }
+
+    size_t serialize(const IColumn& column, PColumn* pcolumn) const override;
+    void deserialize(const PColumn& pcolumn, IColumn* column) const override;
+    MutableColumnPtr create_column() const override;
+
+    Field get_default() const override;
+
+    bool equals(const IDataType& rhs) const override;
+
+    bool get_is_parametric() const override { return true; }
+    bool have_subtypes() const override { return true; }
+    bool cannot_be_stored_in_tables() const override {
+        return nested_data_type->cannot_be_stored_in_tables();
+    }
+    bool should_align_right_in_pretty_formats() const override {
+        return nested_data_type->should_align_right_in_pretty_formats();
+    }
+    bool text_can_contain_only_valid_utf8() const override {
+        return nested_data_type->text_can_contain_only_valid_utf8();
+    }
+    bool is_comparable() const override { return nested_data_type->is_comparable(); }
+    bool can_be_compared_with_collation() const override {
+        return nested_data_type->can_be_compared_with_collation();
+    }
+    bool can_be_used_as_version() const override { return false; }
+    bool is_summable() const override { return nested_data_type->is_summable(); }
+    bool can_be_used_in_boolean_context() const override {
+        return nested_data_type->can_be_used_in_boolean_context();
+    }
+    bool have_maximum_size_of_value() const override {
+        return nested_data_type->have_maximum_size_of_value();
+    }
+    size_t get_maximum_size_of_value_in_memory() const override {
+        return 1 + nested_data_type->get_maximum_size_of_value_in_memory();
+    }
+    bool is_nullable() const override { return true; }
+    size_t get_size_of_value_in_memory() const override;
+    bool only_null() const override;
+    bool can_be_inside_low_cardinality() const override {
+        return nested_data_type->can_be_inside_low_cardinality();
+    }
+    std::string to_string(const IColumn& column, size_t row_num) const;
+
+    const DataTypePtr& get_nested_type() const { return nested_data_type; }
+
+private:
+    DataTypePtr nested_data_type;
+};
+
+DataTypePtr make_nullable(const DataTypePtr& type);
+DataTypePtr remove_nullable(const DataTypePtr& type);
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_number.h b/be/src/vec/data_types/data_type_number.h
new file mode 100644
index 0000000000..fa3dd55459
--- /dev/null
+++ b/be/src/vec/data_types/data_type_number.h
@@ -0,0 +1,87 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeNumber.h
+// and modified by Doris
+
+#pragma once
+
+#include <type_traits>
+
+#include "vec/core/field.h"
+#include "vec/data_types/data_type_number_base.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+class DataTypeNumber final : public DataTypeNumberBase<T> {
+    bool equals(const IDataType& rhs) const override { return typeid(rhs) == typeid(*this); }
+
+    bool can_be_used_as_version() const override { return true; }
+    bool is_summable() const override { return true; }
+    bool can_be_used_in_bit_operations() const override { return true; }
+    bool can_be_used_in_boolean_context() const override { return true; }
+    bool can_be_inside_nullable() const override { return true; }
+
+    bool can_be_promoted() const override { return true; }
+    DataTypePtr promote_numeric_type() const override {
+        using PromotedType = DataTypeNumber<NearestFieldType<T>>;
+        return std::make_shared<PromotedType>();
+    }
+};
+
+using DataTypeUInt8 = DataTypeNumber<UInt8>;
+using DataTypeUInt16 = DataTypeNumber<UInt16>;
+using DataTypeUInt32 = DataTypeNumber<UInt32>;
+using DataTypeUInt64 = DataTypeNumber<UInt64>;
+using DataTypeUInt128 = DataTypeNumber<UInt128>;
+using DataTypeInt8 = DataTypeNumber<Int8>;
+using DataTypeInt16 = DataTypeNumber<Int16>;
+using DataTypeInt32 = DataTypeNumber<Int32>;
+using DataTypeInt64 = DataTypeNumber<Int64>;
+using DataTypeInt128 = DataTypeNumber<Int128>;
+using DataTypeFloat32 = DataTypeNumber<Float32>;
+using DataTypeFloat64 = DataTypeNumber<Float64>;
+
+template <typename DataType>
+constexpr bool IsDataTypeNumber = false;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<UInt8>> = true;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<UInt16>> = true;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<UInt32>> = true;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<UInt64>> = true;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<UInt128>> = true;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<Int8>> = true;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<Int16>> = true;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<Int32>> = true;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<Int64>> = true;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<Int128>> = true;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<Float32>> = true;
+template <>
+inline constexpr bool IsDataTypeNumber<DataTypeNumber<Float64>> = true;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_number_base.cpp b/be/src/vec/data_types/data_type_number_base.cpp
new file mode 100644
index 0000000000..01a424813d
--- /dev/null
+++ b/be/src/vec/data_types/data_type_number_base.cpp
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeNumberBase.cpp
+// and modified by Doris
+
+#include "vec/data_types/data_type_number_base.h"
+
+#include <type_traits>
+
+#include "gen_cpp/data.pb.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/nan_utils.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/io/io_helper.h"
+
+namespace doris::vectorized {
+
+template <typename T>
+void DataTypeNumberBase<T>::to_string(const IColumn& column, size_t row_num,
+                                      BufferWritable& ostr) const {
+    if constexpr (std::is_same<T, UInt128>::value) {
+        std::string hex = int128_to_string(
+                assert_cast<const ColumnVector<T>&>(*column.convert_to_full_column_if_const().get())
+                        .get_data()[row_num]);
+        ostr.write(hex.data(), hex.size());
+    } else if constexpr (std::is_integral<T>::value || std::numeric_limits<T>::is_iec559) {
+        ostr.write_number(
+                assert_cast<const ColumnVector<T>&>(*column.convert_to_full_column_if_const().get())
+                        .get_data()[row_num]);
+    }
+}
+
+template <typename T>
+Field DataTypeNumberBase<T>::get_default() const {
+    return NearestFieldType<FieldType>();
+}
+
+template <typename T>
+std::string DataTypeNumberBase<T>::to_string(const IColumn& column, size_t row_num) const {
+    if constexpr (std::is_same<T, __int128_t>::value || std::is_same<T, UInt128>::value) {
+        return int128_to_string(
+                assert_cast<const ColumnVector<T>&>(*column.convert_to_full_column_if_const().get())
+                        .get_data()[row_num]);
+    } else if constexpr (std::is_integral<T>::value || std::numeric_limits<T>::is_iec559) {
+        return std::to_string(
+                assert_cast<const ColumnVector<T>&>(*column.convert_to_full_column_if_const().get())
+                        .get_data()[row_num]);
+    }
+}
+
+template <typename T>
+size_t DataTypeNumberBase<T>::serialize(const IColumn& column, PColumn* pcolumn) const {
+    const auto column_len = column.size();
+    pcolumn->mutable_binary()->resize(column_len * sizeof(FieldType));
+    auto* data = pcolumn->mutable_binary()->data();
+
+    // copy the data
+    auto ptr = column.convert_to_full_column_if_const();
+    const auto* origin_data =
+            assert_cast<const ColumnVector<T>&>(*ptr.get()).get_data().data();
+    memcpy(data, origin_data, column_len * sizeof(FieldType));
+
+    return compress_binary(pcolumn);
+}
+
+template <typename T>
+void DataTypeNumberBase<T>::deserialize(const PColumn& pcolumn, IColumn* column) const {
+    std::string uncompressed;
+    read_binary(pcolumn, &uncompressed);
+
+    // read column_size
+    auto& container = assert_cast<ColumnVector<T>*>(column)->get_data();
+    container.resize(uncompressed.size() / sizeof(T));
+    memcpy(container.data(), uncompressed.data(), uncompressed.size());
+}
+
+template <typename T>
+MutableColumnPtr DataTypeNumberBase<T>::create_column() const {
+    return ColumnVector<T>::create();
+}
+
+template <typename T>
+bool DataTypeNumberBase<T>::is_value_represented_by_integer() const {
+    return std::is_integral_v<T>;
+}
+
+template <typename T>
+bool DataTypeNumberBase<T>::is_value_represented_by_unsigned_integer() const {
+    return std::is_integral_v<T> && std::is_unsigned_v<T>;
+}
+
+/// Explicit template instantiations - to avoid code bloat in headers.
+template class DataTypeNumberBase<UInt8>;
+template class DataTypeNumberBase<UInt16>;
+template class DataTypeNumberBase<UInt32>;
+template class DataTypeNumberBase<UInt64>;
+template class DataTypeNumberBase<UInt128>;
+template class DataTypeNumberBase<Int8>;
+template class DataTypeNumberBase<Int16>;
+template class DataTypeNumberBase<Int32>;
+template class DataTypeNumberBase<Int64>;
+template class DataTypeNumberBase<Int128>;
+template class DataTypeNumberBase<Float32>;
+template class DataTypeNumberBase<Float64>;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_number_base.h b/be/src/vec/data_types/data_type_number_base.h
new file mode 100644
index 0000000000..e19383eb29
--- /dev/null
+++ b/be/src/vec/data_types/data_type_number_base.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeNumberBase.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/common/assert_cast.h"
+#include "vec/common/string_ref.h"
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+
+/** Implements part of the IDataType interface, common to all numbers and for Date and DateTime.
+  */
+template <typename T>
+class DataTypeNumberBase : public IDataType {
+    static_assert(IsNumber<T>);
+
+public:
+    static constexpr bool is_parametric = false;
+    using ColumnType = ColumnVector<T>;
+    using FieldType = T;
+
+    const char* get_family_name() const override { return TypeName<T>::get(); }
+    TypeIndex get_type_id() const override { return TypeId<T>::value; }
+    Field get_default() const override;
+
+    size_t serialize(const IColumn& column, PColumn* pcolumn) const override;
+    void deserialize(const PColumn& pcolumn, IColumn* column) const override;
+    MutableColumnPtr create_column() const override;
+
+    bool get_is_parametric() const override { return false; }
+    bool have_subtypes() const override { return false; }
+    bool should_align_right_in_pretty_formats() const override { return true; }
+    bool text_can_contain_only_valid_utf8() const override { return true; }
+    bool is_comparable() const override { return true; }
+    bool is_value_represented_by_number() const override { return true; }
+    bool is_value_represented_by_integer() const override;
+    bool is_value_represented_by_unsigned_integer() const override;
+    bool is_value_unambiguously_represented_in_contiguous_memory_region() const override {
+        return true;
+    }
+    bool have_maximum_size_of_value() const override { return true; }
+    size_t get_size_of_value_in_memory() const override { return sizeof(T); }
+    bool is_categorial() const override { return is_value_represented_by_integer(); }
+    bool can_be_inside_low_cardinality() const override { return true; }
+
+    void to_string(const IColumn& column, size_t row_num, BufferWritable& ostr) const;
+    std::string to_string(const IColumn& column, size_t row_num) const;
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_string.cpp b/be/src/vec/data_types/data_type_string.cpp
new file mode 100644
index 0000000000..86b0aac8a9
--- /dev/null
+++ b/be/src/vec/data_types/data_type_string.cpp
@@ -0,0 +1,123 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeString.cpp
+// and modified by Doris
+
+#include "vec/data_types/data_type_string.h"
+
+#include "gen_cpp/data.pb.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_string.h"
+#include "vec/common/assert_cast.h"
+#include "vec/core/field.h"
+#include "vec/io/io_helper.h"
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+namespace doris::vectorized {
+
+template <typename Reader>
+static inline void read(IColumn& column, Reader&& reader) {
+    ColumnString& column_string = assert_cast<ColumnString&>(column);
+    ColumnString::Chars& data = column_string.get_chars();
+    ColumnString::Offsets& offsets = column_string.get_offsets();
+    size_t old_chars_size = data.size();
+    size_t old_offsets_size = offsets.size();
+    try {
+        reader(data);
+        data.push_back(0);
+        offsets.push_back(data.size());
+    } catch (...) {
+        offsets.resize_assume_reserved(old_offsets_size);
+        data.resize_assume_reserved(old_chars_size);
+        throw;
+    }
+}
+
+std::string DataTypeString::to_string(const IColumn& column, size_t row_num) const {
+    const StringRef& s =
+            assert_cast<const ColumnString&>(*column.convert_to_full_column_if_const().get())
+                    .get_data_at(row_num);
+    return s.to_string();
+}
+
+void DataTypeString::to_string(const class doris::vectorized::IColumn & column, size_t row_num,
+        class doris::vectorized::BufferWritable & ostr) const {
+    const StringRef& s =
+            assert_cast<const ColumnString&>(*column.convert_to_full_column_if_const().get())
+                    .get_data_at(row_num);
+    ostr.write(s.data, s.size);
+}
+
+Field DataTypeString::get_default() const {
+    return String();
+}
+
+MutableColumnPtr DataTypeString::create_column() const {
+    return ColumnString::create();
+}
+
+bool DataTypeString::equals(const IDataType& rhs) const {
+    return typeid(rhs) == typeid(*this);
+}
+
+size_t DataTypeString::serialize(const IColumn& column, PColumn* pcolumn) const {
+    auto ptr = column.convert_to_full_column_if_const();
+    const auto& data_column = assert_cast<const ColumnString&>(*ptr.get());
+
+    // compute the mem need to be allocate
+    auto allocate_len_size = sizeof(uint32_t) * (column.size() + 1);
+    auto allocate_content_size = data_column.get_chars().size();
+    pcolumn->mutable_binary()->resize(allocate_len_size + allocate_content_size);
+    auto* data = pcolumn->mutable_binary()->data();
+
+    // serialize the string size array
+    *reinterpret_cast<uint32_t*>(data) = column.size();
+    data += sizeof(uint32_t);
+    memcpy(data, data_column.get_offsets().data(), column.size() * sizeof(uint32_t));
+    data += column.size() * sizeof(uint32_t);
+
+    // serialize the string content array
+    memcpy(data, data_column.get_chars().data(), data_column.get_chars().size());
+
+    return compress_binary(pcolumn);
+}
+
+void DataTypeString::deserialize(const PColumn& pcolumn, IColumn* column) const {
+    ColumnString* column_string = assert_cast<ColumnString*>(column);
+    ColumnString::Chars& data = column_string->get_chars();
+    ColumnString::Offsets& offsets = column_string->get_offsets();
+    std::string uncompressed;
+    read_binary(pcolumn, &uncompressed);
+
+    // deserialize the string size array
+    auto* origin_data = uncompressed.data();
+    uint32_t column_len = *reinterpret_cast<uint32_t*>(origin_data);
+    origin_data += sizeof(uint32_t);
+    offsets.resize(column_len);
+    memcpy(offsets.data(), origin_data, sizeof(uint32_t) * column_len);
+    origin_data += sizeof(uint32_t) * column_len;
+
+    // deserialize the string content array
+    uint32_t content_len = uncompressed.size() - sizeof(uint32_t) * (column_len + 1);
+    data.resize(content_len);
+    memcpy(data.data(), origin_data, content_len);
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/data_type_string.h b/be/src/vec/data_types/data_type_string.h
new file mode 100644
index 0000000000..85064738d1
--- /dev/null
+++ b/be/src/vec/data_types/data_type_string.h
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/DataTypeString.h
+// and modified by Doris
+
+#pragma once
+
+#include <ostream>
+
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+
+class DataTypeString final : public IDataType {
+public:
+    using ColumnType = ColumnString;
+    using FieldType = String;
+    static constexpr bool is_parametric = false;
+
+    const char* get_family_name() const override { return "String"; }
+
+    TypeIndex get_type_id() const override { return TypeIndex::String; }
+    size_t serialize(const IColumn& column, PColumn* pcolumn) const override;
+    void deserialize(const PColumn& pcolumn, IColumn* column) const override;
+
+    MutableColumnPtr create_column() const override;
+
+    Field get_default() const override;
+
+    bool equals(const IDataType& rhs) const override;
+
+    bool get_is_parametric() const override { return false; }
+    bool have_subtypes() const override { return false; }
+    bool is_comparable() const override { return true; }
+    bool can_be_compared_with_collation() const override { return true; }
+    bool is_value_unambiguously_represented_in_contiguous_memory_region() const override {
+        return true;
+    }
+    bool is_categorial() const override { return true; }
+    bool can_be_inside_nullable() const override { return true; }
+    bool can_be_inside_low_cardinality() const override { return true; }
+    std::string to_string(const IColumn& column, size_t row_num) const;
+    void to_string(const IColumn &column, size_t row_num, BufferWritable &ostr) const override;
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/get_least_supertype.cpp b/be/src/vec/data_types/get_least_supertype.cpp
new file mode 100644
index 0000000000..a661035af9
--- /dev/null
+++ b/be/src/vec/data_types/get_least_supertype.cpp
@@ -0,0 +1,322 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/getLeastSupertype.cpp
+// and modified by Doris
+
+#include "vec/data_types/get_least_supertype.h"
+
+#include <unordered_set>
+
+#include "vec/common/typeid_cast.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_nothing.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+
+namespace doris::vectorized {
+
+namespace {
+
+String get_exception_message_prefix(const DataTypes& types) {
+    std::stringstream res;
+    res << "There is no supertype for types ";
+
+    bool first = true;
+    for (const auto& type : types) {
+        if (!first) res << ", ";
+        first = false;
+
+        res << type->get_name();
+    }
+
+    return res.str();
+}
+} // namespace
+
+DataTypePtr get_least_supertype(const DataTypes& types) {
+    /// Trivial cases
+
+    if (types.empty()) return std::make_shared<DataTypeNothing>();
+
+    if (types.size() == 1) return types[0];
+
+    /// All types are equal
+    {
+        bool all_equal = true;
+        for (size_t i = 1, size = types.size(); i < size; ++i) {
+            if (!types[i]->equals(*types[0])) {
+                all_equal = false;
+                break;
+            }
+        }
+
+        if (all_equal) return types[0];
+    }
+
+    /// Recursive rules
+
+    /// If there are Nothing types, skip them
+    {
+        DataTypes non_nothing_types;
+        non_nothing_types.reserve(types.size());
+
+        for (const auto& type : types)
+            if (!typeid_cast<const DataTypeNothing*>(type.get()))
+                non_nothing_types.emplace_back(type);
+
+        if (non_nothing_types.size() < types.size()) return get_least_supertype(non_nothing_types);
+    }
+
+    /// For Nullable
+    {
+        bool have_nullable = false;
+
+        DataTypes nested_types;
+        nested_types.reserve(types.size());
+
+        for (const auto& type : types) {
+            if (const DataTypeNullable* type_nullable =
+                        typeid_cast<const DataTypeNullable*>(type.get())) {
+                have_nullable = true;
+
+                if (!type_nullable->only_null())
+                    nested_types.emplace_back(type_nullable->get_nested_type());
+            } else
+                nested_types.emplace_back(type);
+        }
+
+        if (have_nullable) {
+            return std::make_shared<DataTypeNullable>(get_least_supertype(nested_types));
+        }
+    }
+
+    /// Non-recursive rules
+
+    std::unordered_set<TypeIndex> type_ids;
+    for (const auto& type : types) type_ids.insert(type->get_type_id());
+
+    /// For String and FixedString, or for different FixedStrings, the common type is String.
+    /// No other types are compatible with Strings. TODO Enums?
+    {
+        UInt32 have_string = type_ids.count(TypeIndex::String);
+        UInt32 have_fixed_string = type_ids.count(TypeIndex::FixedString);
+
+        if (have_string || have_fixed_string) {
+            bool all_strings = type_ids.size() == (have_string + have_fixed_string);
+            if (!all_strings) {
+                LOG(FATAL)
+                        << get_exception_message_prefix(types)
+                        << " because some of them are String/FixedString and some of them are not";
+            }
+
+            return std::make_shared<DataTypeString>();
+        }
+    }
+
+    /// For Date and DateTime, the common type is DateTime. No other types are compatible.
+    {
+        UInt32 have_date = type_ids.count(TypeIndex::Date);
+        UInt32 have_datetime = type_ids.count(TypeIndex::DateTime);
+
+        if (have_date || have_datetime) {
+            bool all_date_or_datetime = type_ids.size() == (have_date + have_datetime);
+            if (!all_date_or_datetime) {
+                LOG(FATAL) << get_exception_message_prefix(types)
+                           << " because some of them are Date/DateTime and some of them are not";
+            }
+
+            return std::make_shared<DataTypeDateTime>();
+        }
+    }
+
+    /// Decimals
+    {
+        UInt32 have_decimal32 = type_ids.count(TypeIndex::Decimal32);
+        UInt32 have_decimal64 = type_ids.count(TypeIndex::Decimal64);
+        UInt32 have_decimal128 = type_ids.count(TypeIndex::Decimal128);
+
+        if (have_decimal32 || have_decimal64 || have_decimal128) {
+            UInt32 num_supported = have_decimal32 + have_decimal64 + have_decimal128;
+
+            std::vector<TypeIndex> int_ids = {
+                    TypeIndex::Int8,  TypeIndex::UInt8,  TypeIndex::Int16, TypeIndex::UInt16,
+                    TypeIndex::Int32, TypeIndex::UInt32, TypeIndex::Int64, TypeIndex::UInt64};
+            std::vector<UInt32> num_ints(int_ids.size(), 0);
+
+            TypeIndex max_int = TypeIndex::Nothing;
+            for (size_t i = 0; i < int_ids.size(); ++i) {
+                UInt32 num = type_ids.count(int_ids[i]);
+                num_ints[i] = num;
+                num_supported += num;
+                if (num) max_int = int_ids[i];
+            }
+
+            if (num_supported != type_ids.size()) {
+                LOG(FATAL) << get_exception_message_prefix(types)
+                           << " because some of them have no lossless convertion to Decimal";
+            }
+
+            UInt32 max_scale = 0;
+            for (const auto& type : types) {
+                UInt32 scale = get_decimal_scale(*type, 0);
+                if (scale > max_scale) max_scale = scale;
+            }
+
+            UInt32 min_precision = max_scale + least_decimal_precision_for(max_int);
+
+            /// special cases Int32 -> Dec32, Int64 -> Dec64
+            if (max_scale == 0) {
+                if (max_int == TypeIndex::Int32)
+                    min_precision = DataTypeDecimal<Decimal32>::max_precision();
+                else if (max_int == TypeIndex::Int64)
+                    min_precision = DataTypeDecimal<Decimal64>::max_precision();
+            }
+
+            if (min_precision > DataTypeDecimal<Decimal128>::max_precision()) {
+                LOG(FATAL) << fmt::format("{} because the least supertype is Decimal({},{})",
+                                          get_exception_message_prefix(types), min_precision,
+                                          max_scale);
+            }
+
+            if (have_decimal128 || min_precision > DataTypeDecimal<Decimal64>::max_precision())
+                return std::make_shared<DataTypeDecimal<Decimal128>>(
+                        DataTypeDecimal<Decimal128>::max_precision(), max_scale);
+            if (have_decimal64 || min_precision > DataTypeDecimal<Decimal32>::max_precision())
+                return std::make_shared<DataTypeDecimal<Decimal64>>(
+                        DataTypeDecimal<Decimal64>::max_precision(), max_scale);
+            return std::make_shared<DataTypeDecimal<Decimal32>>(
+                    DataTypeDecimal<Decimal32>::max_precision(), max_scale);
+        }
+    }
+
+    /// For numeric types, the most complicated part.
+    {
+        bool all_numbers = true;
+
+        size_t max_bits_of_signed_integer = 0;
+        size_t max_bits_of_unsigned_integer = 0;
+        size_t max_mantissa_bits_of_floating = 0;
+
+        auto maximize = [](size_t& what, size_t value) {
+            if (value > what) what = value;
+        };
+
+        for (const auto& type : types) {
+            if (typeid_cast<const DataTypeUInt8*>(type.get()))
+                maximize(max_bits_of_unsigned_integer, 8);
+            else if (typeid_cast<const DataTypeUInt16*>(type.get()))
+                maximize(max_bits_of_unsigned_integer, 16);
+            else if (typeid_cast<const DataTypeUInt32*>(type.get()))
+                maximize(max_bits_of_unsigned_integer, 32);
+            else if (typeid_cast<const DataTypeUInt64*>(type.get()))
+                maximize(max_bits_of_unsigned_integer, 64);
+            else if (typeid_cast<const DataTypeInt8*>(type.get()))
+                maximize(max_bits_of_signed_integer, 8);
+            else if (typeid_cast<const DataTypeInt16*>(type.get()))
+                maximize(max_bits_of_signed_integer, 16);
+            else if (typeid_cast<const DataTypeInt32*>(type.get()))
+                maximize(max_bits_of_signed_integer, 32);
+            else if (typeid_cast<const DataTypeInt64*>(type.get()))
+                maximize(max_bits_of_signed_integer, 64);
+            else if (typeid_cast<const DataTypeFloat32*>(type.get()))
+                maximize(max_mantissa_bits_of_floating, 24);
+            else if (typeid_cast<const DataTypeFloat64*>(type.get()))
+                maximize(max_mantissa_bits_of_floating, 53);
+            else
+                all_numbers = false;
+        }
+
+        if (max_bits_of_signed_integer || max_bits_of_unsigned_integer ||
+            max_mantissa_bits_of_floating) {
+            if (!all_numbers) {
+                LOG(FATAL) << get_exception_message_prefix(types)
+                           << " because some of them are numbers and some of them are not";
+            }
+
+            /// If there are signed and unsigned types of same bit-width, the result must be signed number with at least one more bit.
+            /// Example, common of Int32, UInt32 = Int64.
+
+            size_t min_bit_width_of_integer =
+                    std::max(max_bits_of_signed_integer, max_bits_of_unsigned_integer);
+
+            /// If unsigned is not covered by signed.
+            if (max_bits_of_signed_integer &&
+                max_bits_of_unsigned_integer >= max_bits_of_signed_integer)
+                ++min_bit_width_of_integer;
+
+            /// If the result must be floating.
+            if (max_mantissa_bits_of_floating) {
+                size_t min_mantissa_bits =
+                        std::max(min_bit_width_of_integer, max_mantissa_bits_of_floating);
+                if (min_mantissa_bits <= 24)
+                    return std::make_shared<DataTypeFloat32>();
+                else if (min_mantissa_bits <= 53)
+                    return std::make_shared<DataTypeFloat64>();
+                else {
+                    LOG(FATAL) << get_exception_message_prefix(types)
+                               << " because some of them are integers and some are floating point "
+                                  "but there is no floating point type, that can exactly represent "
+                                  "all required integers";
+                }
+            }
+
+            /// If the result must be signed integer.
+            if (max_bits_of_signed_integer) {
+                if (min_bit_width_of_integer <= 8)
+                    return std::make_shared<DataTypeInt8>();
+                else if (min_bit_width_of_integer <= 16)
+                    return std::make_shared<DataTypeInt16>();
+                else if (min_bit_width_of_integer <= 32)
+                    return std::make_shared<DataTypeInt32>();
+                else if (min_bit_width_of_integer <= 64)
+                    return std::make_shared<DataTypeInt64>();
+                else {
+                    LOG(FATAL) << get_exception_message_prefix(types)
+                               << " because some of them are signed integers and some are unsigned "
+                                  "integers, but there is no signed integer type, that can exactly "
+                                  "represent all required unsigned integer values";
+                }
+            }
+
+            /// All unsigned.
+            {
+                if (min_bit_width_of_integer <= 8)
+                    return std::make_shared<DataTypeUInt8>();
+                else if (min_bit_width_of_integer <= 16)
+                    return std::make_shared<DataTypeUInt16>();
+                else if (min_bit_width_of_integer <= 32)
+                    return std::make_shared<DataTypeUInt32>();
+                else if (min_bit_width_of_integer <= 64)
+                    return std::make_shared<DataTypeUInt64>();
+                else {
+                    LOG(FATAL) << "Logical error: " << get_exception_message_prefix(types)
+                               << "but as all data types are unsigned integers, we must have found "
+                                  "maximum unsigned integer type";
+                }
+            }
+        }
+    }
+
+    /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases).
+    LOG(FATAL) << get_exception_message_prefix(types);
+    return nullptr;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/get_least_supertype.h b/be/src/vec/data_types/get_least_supertype.h
new file mode 100644
index 0000000000..64f6e7619a
--- /dev/null
+++ b/be/src/vec/data_types/get_least_supertype.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/getLeastSupertype.h
+// and modified by Doris
+
+#pragma once
+
+#include <vec/data_types/data_type.h>
+
+namespace doris::vectorized {
+
+/** Get data type that covers all possible values of passed data types.
+  * If there is no such data type, throws an exception.
+  *
+  * Examples: least common supertype for UInt8, Int8 - Int16.
+  * Examples: there is no least common supertype for Array(UInt8), Int8.
+  */
+DataTypePtr get_least_supertype(const DataTypes& types);
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/nested_utils.cpp b/be/src/vec/data_types/nested_utils.cpp
new file mode 100644
index 0000000000..8a957828b8
--- /dev/null
+++ b/be/src/vec/data_types/nested_utils.cpp
@@ -0,0 +1,73 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/NestedUtils.cpp
+// and modified by Doris
+
+#include "vec/data_types/nested_utils.h"
+
+#include "vec/columns/column_const.h"
+#include "vec/common/string_utils/string_utils.h"
+#include "vec/common/typeid_cast.h"
+
+namespace doris::vectorized {
+
+namespace Nested {
+
+std::string concatenate_name(const std::string& nested_table_name,
+                             const std::string& nested_field_name) {
+    return nested_table_name + "." + nested_field_name;
+}
+
+/** Name can be treated as compound if and only if both parts are simple identifiers.
+  */
+std::pair<std::string, std::string> splitName(const std::string& name) {
+    const char* begin = name.data();
+    const char* pos = begin;
+    const char* end = begin + name.size();
+
+    if (pos >= end || !is_valid_identifier_begin(*pos)) return {name, {}};
+
+    ++pos;
+
+    while (pos < end && is_word_char_ascii(*pos)) ++pos;
+
+    if (pos >= end || *pos != '.') return {name, {}};
+
+    const char* first_end = pos;
+    ++pos;
+    const char* second_begin = pos;
+
+    if (pos >= end || !is_valid_identifier_begin(*pos)) return {name, {}};
+
+    ++pos;
+
+    while (pos < end && is_word_char_ascii(*pos)) ++pos;
+
+    if (pos != end) return {name, {}};
+
+    return {{begin, first_end}, {second_begin, end}};
+}
+
+std::string extract_table_name(const std::string& nested_name) {
+    auto splitted = splitName(nested_name);
+    return splitted.first;
+}
+
+} // namespace Nested
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/nested_utils.h b/be/src/vec/data_types/nested_utils.h
new file mode 100644
index 0000000000..8c2684e7eb
--- /dev/null
+++ b/be/src/vec/data_types/nested_utils.h
@@ -0,0 +1,43 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/NestedUtils.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/core/block.h"
+
+namespace doris::vectorized {
+
+namespace Nested {
+std::string concatenate_name(const std::string& nested_table_name,
+                             const std::string& nested_field_name);
+
+std::pair<std::string, std::string> splitName(const std::string& name);
+
+/// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot.
+std::string extract_table_name(const std::string& nested_name);
+
+/// Replace Array(Tuple(...)) columns to a multiple of Array columns in a form of `column_name.element_name`.
+Block flatten(const Block& block);
+
+/// Check that sizes of arrays - elements of nested data structures - are equal.
+void validate_array_sizes(const Block& block);
+} // namespace Nested
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/data_types/number_traits.h b/be/src/vec/data_types/number_traits.h
new file mode 100644
index 0000000000..8405215a05
--- /dev/null
+++ b/be/src/vec/data_types/number_traits.h
@@ -0,0 +1,275 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/NumberTraits.h
+// and modified by Doris
+
+#pragma once
+
+#include <type_traits>
+
+#include "vec/common/uint128.h"
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+/** Allows get the result type of the functions +, -, *, /, %, intDiv (integer division).
+  * The rules are different from those used in C++.
+  */
+
+namespace NumberTraits {
+
+struct Error {};
+
+constexpr size_t max(size_t x, size_t y) {
+    return x > y ? x : y;
+}
+
+constexpr size_t min(size_t x, size_t y) {
+    return x < y ? x : y;
+}
+
+// only largeint as argument should return 16
+constexpr size_t next_size(size_t size) {
+    return size > 8 ? 16 : min(size * 2, 8);
+}
+
+template <bool is_signed, bool is_floating, size_t size>
+struct Construct {
+    using Type = Error;
+};
+
+template <>
+struct Construct<false, false, 1> {
+    using Type = Int16;
+};
+template <>
+struct Construct<false, false, 2> {
+    using Type = Int32;
+};
+template <>
+struct Construct<false, false, 4> {
+    using Type = Int64;
+};
+template <>
+struct Construct<false, false, 8> {
+    using Type = Int128;
+};
+template <>
+struct Construct<false, false, 16> {
+    using Type = Int128;
+};
+template <>
+struct Construct<false, true, 1> {
+    using Type = Float32;
+};
+template <>
+struct Construct<false, true, 2> {
+    using Type = Float32;
+};
+template <>
+struct Construct<false, true, 4> {
+    using Type = Float32;
+};
+template <>
+struct Construct<false, true, 8> {
+    using Type = Float64;
+};
+template <>
+struct Construct<true, false, 1> {
+    using Type = Int8;
+};
+template <>
+struct Construct<true, false, 2> {
+    using Type = Int16;
+};
+template <>
+struct Construct<true, false, 4> {
+    using Type = Int32;
+};
+template <>
+struct Construct<true, false, 8> {
+    using Type = Int64;
+};
+template <>
+struct Construct<true, false, 16> {
+    using Type = Int128;
+};
+template <>
+struct Construct<true, true, 1> {
+    using Type = Float32;
+};
+template <>
+struct Construct<true, true, 2> {
+    using Type = Float32;
+};
+template <>
+struct Construct<true, true, 4> {
+    using Type = Float32;
+};
+template <>
+struct Construct<true, true, 8> {
+    using Type = Float64;
+};
+
+template <>
+struct Construct<true, true, 16> {
+    using Type = Float64;
+};
+
+/** The result of addition or multiplication is calculated according to the following rules:
+    * - if one of the arguments is floating-point, the result is a floating point, otherwise - the whole;
+    * - if one of the arguments is signed, the result is signed, otherwise it is unsigned;
+    * - the result contains more bits (not only meaningful) than the maximum in the arguments
+    *   (for example, UInt8 + Int32 = Int64).
+    */
+template <typename A, typename B>
+struct ResultOfAdditionMultiplication {
+    using Type = typename Construct<std::is_signed_v<A> || std::is_signed_v<B>,
+                                    std::is_floating_point_v<A> || std::is_floating_point_v<B>,
+                                    next_size(max(sizeof(A), sizeof(B)))>::Type;
+};
+
+template <typename A, typename B>
+struct ResultOfSubtraction {
+    using Type =
+            typename Construct<true, std::is_floating_point_v<A> || std::is_floating_point_v<B>,
+                               next_size(max(sizeof(A), sizeof(B)))>::Type;
+};
+
+/** When dividing, you always get a floating-point number.
+    */
+template <typename A, typename B>
+struct ResultOfFloatingPointDivision {
+    using Type = Float64;
+};
+
+/** For integer division, we get a number with the same number of bits as in divisible.
+    */
+template <typename A, typename B>
+struct ResultOfIntegerDivision {
+    using Type =
+            typename Construct<std::is_signed_v<A> || std::is_signed_v<B>, false, sizeof(A)>::Type;
+};
+
+/** Division with remainder you get a number with the same number of bits as in divisor.
+    */
+template <typename A, typename B>
+struct ResultOfModulo {
+    using Type =
+            typename Construct<std::is_signed_v<A> || std::is_signed_v<B>, std::is_floating_point_v<A>, max(sizeof(A), sizeof(B))>::Type;
+};
+
+template <typename A>
+struct ResultOfNegate {
+    using Type = typename Construct<true, std::is_floating_point_v<A>,
+                                    std::is_signed_v<A> ? sizeof(A) : next_size(sizeof(A))>::Type;
+};
+
+template <typename A>
+struct ResultOfAbs {
+    using Type = typename Construct<false, std::is_floating_point_v<A>, sizeof(A)>::Type;
+};
+
+/** For bitwise operations, an integer is obtained with number of bits is equal to the maximum of the arguments.
+    */
+template <typename A, typename B>
+struct ResultOfBit {
+    using Type = typename Construct<std::is_signed_v<A> || std::is_signed_v<B>, false,
+                                    std::is_floating_point_v<A> || std::is_floating_point_v<B>
+                                            ? 8
+                                            : max(sizeof(A), sizeof(B))>::Type;
+};
+
+template <typename A>
+struct ResultOfBitNot {
+    using Type = typename Construct<std::is_signed_v<A>, false, sizeof(A)>::Type;
+};
+
+/** Type casting for `if` function:
+  * UInt<x>,  UInt<y>   ->  UInt<max(x,y)>
+  * Int<x>,   Int<y>    ->   Int<max(x,y)>
+  * Float<x>, Float<y>  -> Float<max(x, y)>
+  * UInt<x>,  Int<y>    ->   Int<max(x*2, y)>
+  * Float<x>, [U]Int<y> -> Float<max(x, y*2)>
+  * Decimal<x>, Decimal<y> -> Decimal<max(x,y)>
+  * UUID, UUID          -> UUID
+  * UInt64 ,  Int<x>    -> Error
+  * Float<x>, [U]Int64  -> Error
+  */
+template <typename A, typename B>
+struct ResultOfIf {
+    static constexpr bool has_float = std::is_floating_point_v<A> || std::is_floating_point_v<B>;
+    static constexpr bool has_integer = std::is_integral_v<A> || std::is_integral_v<B>;
+    static constexpr bool has_signed = std::is_signed_v<A> || std::is_signed_v<B>;
+    static constexpr bool has_unsigned = !std::is_signed_v<A> || !std::is_signed_v<B>;
+
+    static constexpr size_t max_size_of_unsigned_integer =
+            max(std::is_signed_v<A> ? 0 : sizeof(A), std::is_signed_v<B> ? 0 : sizeof(B));
+    static constexpr size_t max_size_of_signed_integer =
+            max(std::is_signed_v<A> ? sizeof(A) : 0, std::is_signed_v<B> ? sizeof(B) : 0);
+    static constexpr size_t max_size_of_integer =
+            max(std::is_integral_v<A> ? sizeof(A) : 0, std::is_integral_v<B> ? sizeof(B) : 0);
+    static constexpr size_t max_size_of_float = max(std::is_floating_point_v<A> ? sizeof(A) : 0,
+                                                    std::is_floating_point_v<B> ? sizeof(B) : 0);
+
+    using ConstructedType =
+            typename Construct<has_signed, has_float,
+                               ((has_float && has_integer &&
+                                 max_size_of_integer >= max_size_of_float) ||
+                                (has_signed && has_unsigned &&
+                                 max_size_of_unsigned_integer >= max_size_of_signed_integer))
+                                       ? max(sizeof(A), sizeof(B)) * 2
+                                       : max(sizeof(A), sizeof(B))>::Type;
+
+    using ConstructedWithUUID =
+            std::conditional_t<std::is_same_v<A, UInt128> && std::is_same_v<B, UInt128>, A,
+                               ConstructedType>;
+
+    using Type = std::conditional_t<
+            !IsDecimalNumber<A> && !IsDecimalNumber<B>, ConstructedWithUUID,
+            std::conditional_t<IsDecimalNumber<A> && IsDecimalNumber<B>,
+                               std::conditional_t<(sizeof(A) > sizeof(B)), A, B>, Error>>;
+};
+
+/** Before applying operator `%` and bitwise operations, operands are casted to whole numbers. */
+template <typename A>
+struct ToInteger {
+    using Type = typename Construct<std::is_signed_v<A>, false,
+                                    std::is_floating_point_v<A> ? 8 : sizeof(A)>::Type;
+};
+
+// CLICKHOUSE-29. The same depth, different signs
+// NOTE: This case is applied for 64-bit integers only (for backward compatibility), but could be used for any-bit integers
+template <typename A, typename B>
+constexpr bool LeastGreatestSpecialCase = std::is_integral_v<A>&& std::is_integral_v<B> &&
+                                          (8 == sizeof(A) && sizeof(A) == sizeof(B)) &&
+                                          (std::is_signed_v<A> ^ std::is_signed_v<B>);
+
+template <typename A, typename B>
+using ResultOfLeast = std::conditional_t<LeastGreatestSpecialCase<A, B>,
+                                         typename Construct<true, false, sizeof(A)>::Type,
+                                         typename ResultOfIf<A, B>::Type>;
+
+template <typename A, typename B>
+using ResultOfGreatest = std::conditional_t<LeastGreatestSpecialCase<A, B>,
+                                            typename Construct<false, false, sizeof(A)>::Type,
+                                            typename ResultOfIf<A, B>::Type>;
+
+} // namespace NumberTraits
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/join/join_op.h b/be/src/vec/exec/join/join_op.h
new file mode 100644
index 0000000000..71fec00d21
--- /dev/null
+++ b/be/src/vec/exec/join/join_op.h
@@ -0,0 +1,143 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "vec/common/arena.h"
+#include "vec/common/columns_hashing.h"
+#include "vec/common/hash_table/hash_map.h"
+#include "vec/core/block.h"
+
+namespace doris::vectorized {
+/// Reference to the row in block.
+struct RowRef {
+    using SizeT = uint32_t; /// Do not use size_t cause of memory economy
+
+    const Block* block = nullptr;
+    SizeT row_num = 0;
+    // Use in right join to mark row is visited
+    // TODO: opt the varaible to use it only need
+    bool visited = false;
+
+    RowRef() {}
+    RowRef(const Block* block_ptr, size_t row_num_count, bool is_visited = false)
+            : block(block_ptr), row_num(row_num_count), visited(is_visited) {}
+};
+
+/// Single linked list of references to rows. Used for ALL JOINs (non-unique JOINs)
+struct RowRefList : RowRef {
+    /// Portion of RowRefs, 16 * (MAX_SIZE + 1) bytes sized.
+    struct Batch {
+        static constexpr size_t MAX_SIZE = 7; /// Adequate values are 3, 7, 15, 31.
+
+        SizeT size = 0; /// It's smaller than size_t but keeps align in Arena.
+        Batch* next;
+        RowRef row_refs[MAX_SIZE];
+
+        Batch(Batch* parent) : next(parent) {}
+
+        bool full() const { return size == MAX_SIZE; }
+
+        Batch* insert(RowRef&& row_ref, Arena& pool) {
+            if (full()) {
+                auto batch = pool.alloc<Batch>();
+                *batch = Batch(this);
+                batch->insert(std::move(row_ref), pool);
+                return batch;
+            }
+
+            row_refs[size++] = std::move(row_ref);
+            return this;
+        }
+    };
+
+    class ForwardIterator {
+    public:
+        ForwardIterator(RowRefList* begin)
+                : root(begin), first(true), batch(root->next), position(0) {}
+
+        RowRef& operator*() {
+            if (first) return *root;
+            return batch->row_refs[position];
+        }
+        RowRef* operator->() { return &(**this); }
+
+        bool operator==(const ForwardIterator& rhs) const {
+            if (ok() != rhs.ok()) {
+                return false;
+            }
+            if (first && rhs.first) {
+                return true;
+            }
+            return batch == rhs.batch && position == rhs.position;
+        }
+        bool operator!=(const ForwardIterator& rhs) const { return !(*this == rhs); }
+
+        void operator++() {
+            if (first) {
+                first = false;
+                return;
+            }
+
+            if (batch) {
+                ++position;
+                if (position >= batch->size) {
+                    batch = batch->next;
+                    position = 0;
+                }
+            }
+        }
+
+        bool ok() const { return first || batch; }
+
+        static ForwardIterator end() { return ForwardIterator(); }
+
+    private:
+        RowRefList* root;
+        bool first;
+        Batch* batch;
+        size_t position;
+
+        ForwardIterator() : root(nullptr), first(false), batch(nullptr), position(0) {}
+    };
+
+    RowRefList() {}
+    RowRefList(const Block* block_, size_t row_num_) : RowRef(block_, row_num_) {}
+
+    ForwardIterator begin() { return ForwardIterator(this); }
+    ForwardIterator end() { return ForwardIterator::end(); }
+
+    /// insert element after current one
+    void insert(RowRef&& row_ref, Arena& pool) {
+        row_count++;
+
+        if (!next) {
+            next = pool.alloc<Batch>();
+            *next = Batch(nullptr);
+        }
+        next = next->insert(std::move(row_ref), pool);
+    }
+
+    uint32_t get_row_count() { return row_count; }
+
+private:
+    Batch* next = nullptr;
+    uint32_t row_count = 1;
+};
+
+// using MapI32 = doris::vectorized::HashMap<UInt32, MappedAll, HashCRC32<UInt32>>;
+// using I32KeyType = doris::vectorized::ColumnsHashing::HashMethodOneNumber<MapI32::value_type, MappedAll, UInt32, false>;
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/join/vacquire_list.hpp b/be/src/vec/exec/join/vacquire_list.hpp
new file mode 100644
index 0000000000..6a3157cc38
--- /dev/null
+++ b/be/src/vec/exec/join/vacquire_list.hpp
@@ -0,0 +1,54 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+namespace doris::vectorized {
+
+template <typename Element, int batch_size = 8>
+struct AcquireList {
+    using Batch = Element[batch_size];
+
+    Element& acquire(Element&& element) {
+        if (_current_batch == nullptr) {
+            _current_batch.reset(new Element[batch_size]);
+        }
+        if (current_full()) {
+            _lst.emplace_back(std::move(_current_batch));
+            _current_batch.reset(new Element[batch_size]);
+            _current_offset = 0;
+        }
+
+        auto base_addr = _current_batch.get();
+        base_addr[_current_offset] = std::move(element);
+        auto& ref = base_addr[_current_offset];
+        _current_offset++;
+        return ref;
+    }
+
+    void remove_last_element() { _current_offset--; }
+
+private:
+    bool current_full() { return _current_offset == batch_size; }
+    std::vector<std::unique_ptr<Element[]>> _lst;
+    std::unique_ptr<Element[]> _current_batch;
+    int _current_offset = 0;
+};
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp
new file mode 100644
index 0000000000..9563ebf169
--- /dev/null
+++ b/be/src/vec/exec/join/vhash_join_node.cpp
@@ -0,0 +1,1062 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/join/vhash_join_node.h"
+
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/mem_tracker.h"
+#include "runtime/runtime_filter_mgr.h"
+#include "util/defer_op.h"
+#include "vec/core/materialize_block.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+
+using ProfileCounter = RuntimeProfile::Counter;
+template <class HashTableContext, bool ignore_null, bool build_unique>
+struct ProcessHashTableBuild {
+    ProcessHashTableBuild(int rows, Block& acquired_block, ColumnRawPtrs& build_raw_ptrs,
+                          HashJoinNode* join_node, int batch_size)
+            : _rows(rows),
+              _skip_rows(0),
+              _acquired_block(acquired_block),
+              _build_raw_ptrs(build_raw_ptrs),
+              _join_node(join_node),
+              _batch_size(batch_size) {}
+
+    Status operator()(HashTableContext& hash_table_ctx, ConstNullMapPtr null_map,
+                      bool has_runtime_filter) {
+        using KeyGetter = typename HashTableContext::State;
+        using Mapped = typename HashTableContext::Mapped;
+        int64_t old_bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes();
+
+        Defer defer {[&]() {
+            int64_t bucket_size = hash_table_ctx.hash_table.get_buffer_size_in_cells();
+            int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes();
+            _join_node->_mem_tracker->Consume(bucket_bytes - old_bucket_bytes);
+            _join_node->_mem_used += bucket_bytes - old_bucket_bytes;
+            COUNTER_SET(_join_node->_build_buckets_counter, bucket_size);
+        }};
+
+        KeyGetter key_getter(_build_raw_ptrs, _join_node->_build_key_sz, nullptr);
+
+        SCOPED_TIMER(_join_node->_build_table_insert_timer);
+        hash_table_ctx.hash_table.reset_resize_timer();
+
+        vector<int>& inserted_rows = _join_node->_inserted_rows[&_acquired_block];
+        if (has_runtime_filter) {
+            inserted_rows.reserve(_batch_size);
+        }
+
+        for (size_t k = 0; k < _rows; ++k) {
+            if constexpr (ignore_null) {
+                if ((*null_map)[k]) {
+                    continue;
+                }
+            }
+
+            auto emplace_result =
+                    key_getter.emplace_key(hash_table_ctx.hash_table, k, _join_node->_arena);
+            if (k + 1 < _rows) {
+                key_getter.prefetch(hash_table_ctx.hash_table, k + 1, _join_node->_arena);
+            }
+
+            if (emplace_result.is_inserted()) {
+                new (&emplace_result.get_mapped()) Mapped({&_acquired_block, k});
+                if (has_runtime_filter) {
+                    inserted_rows.push_back(k);
+                }
+            } else {
+                if constexpr (!build_unique) {
+                    /// The first element of the list is stored in the value of the hash table, the rest in the pool.
+                    emplace_result.get_mapped().insert({&_acquired_block, k}, _join_node->_arena);
+                    if (has_runtime_filter) {
+                        inserted_rows.push_back(k);
+                    }
+                } else {
+                    _skip_rows++;
+                }
+            }
+        }
+
+        if constexpr (build_unique) {
+            // If all row in build block is skip, just remove it
+            // to reduce mem pressure
+            if (_skip_rows == _rows) {
+                _join_node->_acquire_list.remove_last_element();
+            }
+        }
+
+        COUNTER_UPDATE(_join_node->_build_table_expanse_timer,
+                       hash_table_ctx.hash_table.get_resize_timer_value());
+
+        return Status::OK();
+    }
+
+private:
+    const int _rows;
+    int _skip_rows;
+    Block& _acquired_block;
+    ColumnRawPtrs& _build_raw_ptrs;
+    HashJoinNode* _join_node;
+    int _batch_size;
+};
+
+template <class HashTableContext>
+struct ProcessRuntimeFilterBuild {
+    ProcessRuntimeFilterBuild(HashJoinNode* join_node) : _join_node(join_node) {}
+
+    Status operator()(RuntimeState* state, HashTableContext& hash_table_ctx) {
+        if (_join_node->_runtime_filter_descs.empty()) {
+            return Status::OK();
+        }
+        VRuntimeFilterSlots* runtime_filter_slots =
+                new VRuntimeFilterSlots(_join_node->_probe_expr_ctxs, _join_node->_build_expr_ctxs,
+                                        _join_node->_runtime_filter_descs);
+
+        RETURN_IF_ERROR(runtime_filter_slots->init(state, hash_table_ctx.hash_table.get_size()));
+
+        if (!runtime_filter_slots->empty() && !_join_node->_inserted_rows.empty()) {
+            {
+                SCOPED_TIMER(_join_node->_push_compute_timer);
+                runtime_filter_slots->insert(_join_node->_inserted_rows);
+            }
+        }
+        {
+            SCOPED_TIMER(_join_node->_push_down_timer);
+            runtime_filter_slots->publish();
+        }
+
+        return Status::OK();
+    }
+
+private:
+    HashJoinNode* _join_node;
+};
+
+template <class HashTableContext, bool ignore_null>
+struct ProcessHashTableProbe {
+    ProcessHashTableProbe(HashJoinNode* join_node, int batch_size, int probe_rows)
+            : _join_node(join_node),
+              _left_table_data_types(join_node->_left_table_data_types),
+              _right_table_data_types(join_node->_right_table_data_types),
+              _batch_size(batch_size),
+              _probe_rows(probe_rows),
+              _probe_block(join_node->_probe_block),
+              _probe_index(join_node->_probe_index),
+              _probe_raw_ptrs(join_node->_probe_columns),
+              _rows_returned_counter(join_node->_rows_returned_counter) {}
+
+    // Only process the join with no other join conjunt, because of no other join conjunt
+    // the output block struct is same with mutable block. we can do more opt on it and simplify
+    // the logic of probe
+    // TODO: opt the visited here to reduce the size of hash table
+    Status do_process(HashTableContext& hash_table_ctx, ConstNullMapPtr null_map,
+                      MutableBlock& mutable_block, Block* output_block) {
+        using KeyGetter = typename HashTableContext::State;
+        using Mapped = typename HashTableContext::Mapped;
+
+        KeyGetter key_getter(_probe_raw_ptrs, _join_node->_probe_key_sz, nullptr);
+
+        IColumn::Offsets offset_data;
+        auto& mcol = mutable_block.mutable_columns();
+        offset_data.assign(_probe_rows, (uint32_t)0);
+
+        int right_col_idx = _join_node->_is_right_semi_anti ? 0 : _left_table_data_types.size();
+        int right_col_len = _right_table_data_types.size();
+        int current_offset = 0;
+
+        for (; _probe_index < _probe_rows;) {
+            // ignore null rows
+            if constexpr (ignore_null) {
+                if ((*null_map)[_probe_index]) {
+                    offset_data[_probe_index++] = current_offset;
+                    continue;
+                }
+            }
+            auto find_result =
+                    (*null_map)[_probe_index]
+                            ? decltype(key_getter.find_key(hash_table_ctx.hash_table, _probe_index,
+                                                           _arena)) {nullptr, false}
+                            : key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+
+            if (_probe_index + 2 < _probe_rows)
+                key_getter.prefetch(hash_table_ctx.hash_table, _probe_index + 2, _arena);
+
+            if (find_result.is_found()) {
+                // left semi join only need one match, do not need insert the data of right table
+                if (_join_node->_join_op == TJoinOp::LEFT_SEMI_JOIN) {
+                    ++current_offset;
+                } else if (_join_node->_join_op == TJoinOp::LEFT_ANTI_JOIN) {
+                    // do nothing
+                } else {
+                    auto& mapped = find_result.get_mapped();
+                    // TODO: Iterators are currently considered to be a heavy operation and have a certain impact on performance.
+                    // We should rethink whether to use this iterator mode in the future. Now just opt the one row case
+                    if (mapped.get_row_count() == 1) {
+                        mapped.visited = true;
+                        // right semi/anti join should dispose the data in hash table
+                        // after probe data eof
+                        if (!_join_node->_is_right_semi_anti) {
+                            ++current_offset;
+                            for (size_t j = 0; j < right_col_len; ++j) {
+                                auto& column = *mapped.block->get_by_position(j).column;
+                                mcol[j + right_col_idx]->insert_from(column, mapped.row_num);
+                            }
+                        }
+                    } else {
+                        for (auto it = mapped.begin(); it.ok(); ++it) {
+                            // right semi/anti join should dispose the data in hash table
+                            // after probe data eof
+                            if (!_join_node->_is_right_semi_anti) {
+                                ++current_offset;
+                                for (size_t j = 0; j < right_col_len; ++j) {
+                                    auto& column = *it->block->get_by_position(j).column;
+                                    // TODO: interface insert from cause serious performance problems
+                                    //  when column is nullable. Try to make more effective way
+                                    mcol[j + right_col_idx]->insert_from(column, it->row_num);
+                                }
+                            }
+                            it->visited = true;
+                        }
+                    }
+                }
+            } else if (_join_node->_match_all_probe ||
+                       _join_node->_join_op == TJoinOp::LEFT_ANTI_JOIN) {
+                ++current_offset;
+                // only full outer / left outer need insert the data of right table
+                if (_join_node->_match_all_probe) {
+                    for (size_t j = 0; j < right_col_len; ++j) {
+                        DCHECK(mcol[j + right_col_idx]->is_nullable());
+                        mcol[j + right_col_idx]->insert_data(nullptr, 0);
+                    }
+                }
+            }
+
+            offset_data[_probe_index++] = current_offset;
+
+            if (current_offset >= _batch_size) {
+                break;
+            }
+        }
+
+        for (int i = _probe_index; i < _probe_rows; ++i) {
+            offset_data[i] = current_offset;
+        }
+        output_block->swap(mutable_block.to_block());
+
+        for (int i = 0; i < right_col_idx; ++i) {
+            auto& column = _probe_block.get_by_position(i).column;
+            output_block->get_by_position(i).column = column->replicate(offset_data);
+        }
+
+        return Status::OK();
+    }
+
+    // In the presence of other join conjunt, the process of join become more complicated.
+    // each matching join column need to be processed by other join conjunt. so the sturct of mutable block
+    // and output block may be different
+    // The output result is determined by the other join conjunt result and same_to_prev struct
+    Status do_process_with_other_join_conjunts(HashTableContext& hash_table_ctx,
+                                               ConstNullMapPtr null_map,
+                                               MutableBlock& mutable_block, Block* output_block) {
+        using KeyGetter = typename HashTableContext::State;
+        using Mapped = typename HashTableContext::Mapped;
+
+        KeyGetter key_getter(_probe_raw_ptrs, _join_node->_probe_key_sz, nullptr);
+
+        IColumn::Offsets offset_data;
+        auto& mcol = mutable_block.mutable_columns();
+        offset_data.assign(_probe_rows, (uint32_t)0);
+
+        // use in right join to change visited state after
+        // exec the vother join conjunt
+        std::vector<bool*> visited_map;
+        visited_map.reserve(1.2 * _batch_size);
+
+        std::vector<bool> same_to_prev;
+        same_to_prev.reserve(1.2 * _batch_size);
+
+        int right_col_idx = _left_table_data_types.size();
+        int right_col_len = _right_table_data_types.size();
+        int current_offset = 0;
+
+        for (; _probe_index < _probe_rows;) {
+            // ignore null rows
+            if constexpr (ignore_null) {
+                if ((*null_map)[_probe_index]) {
+                    offset_data[_probe_index++] = current_offset;
+                    continue;
+                }
+            }
+            auto find_result =
+                    (*null_map)[_probe_index]
+                            ? decltype(key_getter.find_key(hash_table_ctx.hash_table, _probe_index,
+                                                           _arena)) {nullptr, false}
+                            : key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+
+            if (find_result.is_found()) {
+                auto& mapped = find_result.get_mapped();
+                auto origin_offset = current_offset;
+
+                for (auto it = mapped.begin(); it.ok(); ++it) {
+                    ++current_offset;
+                    for (size_t j = 0; j < right_col_len; ++j) {
+                        auto& column = *it->block->get_by_position(j).column;
+                        mcol[j + right_col_idx]->insert_from(column, it->row_num);
+                    }
+                    visited_map.emplace_back(&it->visited);
+                }
+                same_to_prev.emplace_back(false);
+                for (int i = 0; i < current_offset - origin_offset - 1; ++i) {
+                    same_to_prev.emplace_back(true);
+                }
+            } else if (_join_node->_match_all_probe ||
+                       _join_node->_join_op == TJoinOp::LEFT_ANTI_JOIN) {
+                ++current_offset;
+                same_to_prev.emplace_back(false);
+                visited_map.emplace_back(nullptr);
+                // only full outer / left outer need insert the data of right table
+                if (_join_node->_match_all_probe) {
+                    for (size_t j = 0; j < right_col_len; ++j) {
+                        DCHECK(mcol[j + right_col_idx]->is_nullable());
+                        mcol[j + right_col_idx]->insert_data(nullptr, 0);
+                    }
+                } else {
+                    for (size_t j = 0; j < right_col_len; ++j) {
+                        mcol[j + right_col_idx]->insert_default();
+                    }
+                }
+            } else {
+                // other join, no nothing
+            }
+
+            offset_data[_probe_index++] = current_offset;
+
+            if (current_offset >= _batch_size) {
+                break;
+            }
+        }
+
+        for (int i = _probe_index; i < _probe_rows; ++i) {
+            offset_data[i] = current_offset;
+        }
+        output_block->swap(mutable_block.to_block());
+        for (int i = 0; i < right_col_idx; ++i) {
+            auto& column = _probe_block.get_by_position(i).column;
+            output_block->get_by_position(i).column = column->replicate(offset_data);
+        }
+
+        if (_join_node->_vother_join_conjunct_ptr) {
+            int result_column_id = -1;
+            int orig_columns = output_block->columns();
+            (*_join_node->_vother_join_conjunct_ptr)->execute(output_block, &result_column_id);
+
+            auto column = output_block->get_by_position(result_column_id).column;
+            if (_join_node->_match_all_probe) {
+                auto new_filter_column = ColumnVector<UInt8>::create();
+                auto& filter_map = new_filter_column->get_data();
+
+                for (int i = 0; i < column->size(); ++i) {
+                    auto join_hit = visited_map[i] != nullptr;
+                    auto other_hit = column->get_bool(i);
+
+                    if (!other_hit) {
+                        for (size_t j = 0; j < right_col_len; ++j) {
+                            typeid_cast<ColumnNullable*>(
+                                    std::move(*output_block->get_by_position(j + right_col_idx)
+                                                       .column)
+                                            .mutate()
+                                            .get())
+                                    ->get_null_map_data()[i] = true;
+                        }
+                    }
+
+                    if (join_hit) {
+                        *visited_map[i] |= other_hit;
+                        filter_map.push_back(other_hit || !same_to_prev[i] ||
+                                             (!column->get_bool(i - 1) && filter_map.back()));
+                        // Here to keep only hit join conjunt and other join conjunt is true need to be output.
+                        // if not, only some key must keep one row will output will null right table column
+                        if (same_to_prev[i] && filter_map.back() && !column->get_bool(i - 1))
+                            filter_map[i - 1] = false;
+                    } else {
+                        filter_map.push_back(true);
+                    }
+                }
+                output_block->get_by_position(result_column_id).column =
+                        std::move(new_filter_column);
+            } else if (_join_node->_join_op == TJoinOp::RIGHT_OUTER_JOIN) {
+                for (int i = 0; i < column->size(); ++i) {
+                    DCHECK(visited_map[i]);
+                    *visited_map[i] |= column->get_bool(i);
+                }
+            } else if (_join_node->_join_op == TJoinOp::LEFT_SEMI_JOIN) {
+                auto new_filter_column = ColumnVector<UInt8>::create();
+                auto& filter_map = new_filter_column->get_data();
+
+                if (!column->empty()) filter_map.emplace_back(column->get_bool(0));
+                for (int i = 1; i < column->size(); ++i) {
+                    if (column->get_bool(i) || (same_to_prev[i] && filter_map[i - 1])) {
+                        // Only last same element is true, output last one
+                        filter_map.push_back(true);
+                        filter_map[i - 1] = !same_to_prev[i] && filter_map[i - 1];
+                    } else {
+                        filter_map.push_back(false);
+                    }
+                }
+
+                output_block->get_by_position(result_column_id).column =
+                        std::move(new_filter_column);
+            } else if (_join_node->_join_op == TJoinOp::LEFT_ANTI_JOIN) {
+                auto new_filter_column = ColumnVector<UInt8>::create();
+                auto& filter_map = new_filter_column->get_data();
+
+                if (!column->empty()) filter_map.emplace_back(column->get_bool(0) && visited_map[0]);
+                for (int i = 1; i < column->size(); ++i) {
+                    if ((visited_map[i] && column->get_bool(i)) || (same_to_prev[i] && filter_map[i - 1])) {
+                        filter_map.push_back(true);
+                        filter_map[i - 1] = !same_to_prev[i] && filter_map[i - 1];
+                    } else {
+                        filter_map.push_back(false);
+                    }
+                }
+
+                // Same to the semi join, but change the last value to opposite value
+                for (int i = 1; i < same_to_prev.size(); ++i) {
+                    if (!same_to_prev[i]) filter_map[i - 1] = !filter_map[i - 1];
+                }
+                filter_map[same_to_prev.size() - 1] = !filter_map[same_to_prev.size() - 1];
+
+                output_block->get_by_position(result_column_id).column =
+                        std::move(new_filter_column);
+            } else if (_join_node->_is_right_semi_anti) {
+                for (int i = 0; i < column->size(); ++i) {
+                    DCHECK(visited_map[i]);
+                    *visited_map[i] |= column->get_bool(i);
+                }
+            } else {
+                // inner join do nothing
+            }
+
+            if (_join_node->_is_right_semi_anti) {
+                output_block->clear();
+            } else {
+                Block::filter_block(output_block, result_column_id, orig_columns);
+            }
+        }
+
+        return Status::OK();
+    }
+
+    // Process full outer join/ right join / right semi/anti join to output the join result
+    // in hash table
+    Status process_data_in_hashtable(HashTableContext& hash_table_ctx, MutableBlock& mutable_block,
+                                     Block* output_block, bool* eos) {
+        hash_table_ctx.init_once();
+        auto& mcol = mutable_block.mutable_columns();
+        int right_col_idx = _join_node->_is_right_semi_anti ? 0 : _left_table_data_types.size();
+        int right_col_len = _right_table_data_types.size();
+
+        auto& iter = hash_table_ctx.iter;
+        auto block_size = 0;
+        for (; iter != hash_table_ctx.hash_table.end() && block_size < _batch_size; ++iter) {
+            auto& mapped = iter->get_second();
+            for (auto it = mapped.begin(); it.ok(); ++it) {
+                if ((it->visited && _join_node->_join_op == TJoinOp::RIGHT_SEMI_JOIN) ||
+                    (!it->visited && _join_node->_join_op != TJoinOp::RIGHT_SEMI_JOIN)) {
+                    block_size++;
+                    for (size_t j = 0; j < right_col_len; ++j) {
+                        auto& column = *it->block->get_by_position(j).column;
+                        mcol[j + right_col_idx]->insert_from(column, it->row_num);
+                    }
+                }
+            }
+        }
+
+        // right outer join / full join need insert data of left table
+        if (_join_node->_is_outer_join) {
+            for (int i = 0; i < right_col_idx; ++i) {
+                for (int j = 0; j < block_size; ++j) {
+                    mcol[i]->insert_data(nullptr, 0);
+                }
+            }
+        }
+        *eos = iter == hash_table_ctx.hash_table.end();
+
+        output_block->swap(mutable_block.to_block());
+        return Status::OK();
+    }
+
+private:
+    HashJoinNode* _join_node;
+    const DataTypes& _left_table_data_types;
+    const DataTypes& _right_table_data_types;
+    const int _batch_size;
+    const size_t _probe_rows;
+    const Block& _probe_block;
+    int& _probe_index;
+    ColumnRawPtrs& _probe_raw_ptrs;
+    Arena _arena;
+
+    ProfileCounter* _rows_returned_counter;
+};
+
+// now we only support inner join
+HashJoinNode::HashJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : ExecNode(pool, tnode, descs),
+          _join_op(tnode.hash_join_node.join_op),
+          _hash_table_rows(0),
+          _mem_used(0),
+          _match_all_probe(_join_op == TJoinOp::LEFT_OUTER_JOIN ||
+                           _join_op == TJoinOp::FULL_OUTER_JOIN),
+          _match_one_build(_join_op == TJoinOp::LEFT_SEMI_JOIN),
+          _match_all_build(_join_op == TJoinOp::RIGHT_OUTER_JOIN ||
+                           _join_op == TJoinOp::FULL_OUTER_JOIN),
+          _build_unique(_join_op == TJoinOp::LEFT_ANTI_JOIN || _join_op == TJoinOp::LEFT_SEMI_JOIN),
+          _is_left_semi_anti(_join_op == TJoinOp::LEFT_ANTI_JOIN ||
+                             _join_op == TJoinOp::LEFT_SEMI_JOIN),
+          _is_right_semi_anti(_join_op == TJoinOp::RIGHT_ANTI_JOIN ||
+                              _join_op == TJoinOp::RIGHT_SEMI_JOIN),
+          _is_outer_join(_match_all_build || _match_all_probe) {
+    _runtime_filter_descs = tnode.runtime_filters;
+}
+
+HashJoinNode::~HashJoinNode() = default;
+
+Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::init(tnode, state));
+    DCHECK(tnode.__isset.hash_join_node);
+    if (tnode.hash_join_node.join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) {
+        return Status::InternalError("Do not support null aware left anti join");
+    }
+    _row_desc_for_other_join_conjunt = RowDescriptor(child(0)->row_desc(), child(1)->row_desc());
+
+    const bool build_stores_null = _join_op == TJoinOp::RIGHT_OUTER_JOIN ||
+                                   _join_op == TJoinOp::FULL_OUTER_JOIN ||
+                                   _join_op == TJoinOp::RIGHT_ANTI_JOIN;
+    const bool probe_dispose_null =
+            _match_all_probe || _build_unique || _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN;
+
+    const std::vector<TEqJoinCondition>& eq_join_conjuncts = tnode.hash_join_node.eq_join_conjuncts;
+    for (int i = 0; i < eq_join_conjuncts.size(); ++i) {
+        VExprContext* ctx = nullptr;
+        RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, eq_join_conjuncts[i].left, &ctx));
+        _probe_expr_ctxs.push_back(ctx);
+        RETURN_IF_ERROR(VExpr::create_expr_tree(_pool, eq_join_conjuncts[i].right, &ctx));
+        _build_expr_ctxs.push_back(ctx);
+
+        bool null_aware = eq_join_conjuncts[i].__isset.opcode &&
+                          eq_join_conjuncts[i].opcode == TExprOpcode::EQ_FOR_NULL;
+        _is_null_safe_eq_join.push_back(null_aware);
+
+        // if is null aware, build join column and probe join column both need dispose null value
+        _build_not_ignore_null.emplace_back(
+                null_aware ||
+                (_build_expr_ctxs.back()->root()->is_nullable() && build_stores_null));
+        _probe_not_ignore_null.emplace_back(
+                null_aware ||
+                (_probe_expr_ctxs.back()->root()->is_nullable() && probe_dispose_null));
+    }
+    _probe_column_disguise_null.reserve(eq_join_conjuncts.size());
+
+    if (tnode.hash_join_node.__isset.vother_join_conjunct) {
+        _vother_join_conjunct_ptr.reset(new doris::vectorized::VExprContext*);
+        RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_tree(
+                _pool, tnode.hash_join_node.vother_join_conjunct, _vother_join_conjunct_ptr.get()));
+
+        // If LEFT SEMI JOIN/LEFT ANTI JOIN with not equal predicate,
+        // build table should not be deduplicated.
+        _build_unique = false;
+        _have_other_join_conjunct = true;
+    }
+
+    for (const auto& filter_desc : _runtime_filter_descs) {
+        RETURN_IF_ERROR(state->runtime_filter_mgr()->regist_filter(RuntimeFilterRole::PRODUCER,
+                                                                   filter_desc, state->query_options()));
+    }
+
+    return Status::OK();
+}
+
+Status HashJoinNode::prepare(RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::prepare(state));
+
+    // Build phase
+    auto build_phase_profile = runtime_profile()->create_child("BuildPhase", true, true);
+    runtime_profile()->add_child(build_phase_profile, false, nullptr);
+    _build_timer = ADD_TIMER(build_phase_profile, "BuildTime");
+    _build_table_timer = ADD_TIMER(build_phase_profile, "BuildTableTime");
+    _build_table_insert_timer = ADD_TIMER(build_phase_profile, "BuildTableInsertTime");
+    _build_expr_call_timer = ADD_TIMER(build_phase_profile, "BuildExprCallTime");
+    _build_table_expanse_timer = ADD_TIMER(build_phase_profile, "BuildTableExpanseTime");
+    _build_rows_counter = ADD_COUNTER(build_phase_profile, "BuildRows", TUnit::UNIT);
+
+    // Probe phase
+    auto probe_phase_profile = runtime_profile()->create_child("ProbePhase", true, true);
+    _probe_timer = ADD_TIMER(probe_phase_profile, "ProbeTime");
+    _probe_next_timer = ADD_TIMER(probe_phase_profile, "ProbeFindNextTime");
+    _probe_expr_call_timer = ADD_TIMER(probe_phase_profile, "ProbeExprCallTime");
+    _probe_rows_counter = ADD_COUNTER(probe_phase_profile, "ProbeRows", TUnit::UNIT);
+
+    _push_down_timer = ADD_TIMER(runtime_profile(), "PushDownTime");
+    _push_compute_timer = ADD_TIMER(runtime_profile(), "PushDownComputeTime");
+    _build_buckets_counter = ADD_COUNTER(runtime_profile(), "BuildBuckets", TUnit::UNIT);
+
+    RETURN_IF_ERROR(
+            VExpr::prepare(_build_expr_ctxs, state, child(1)->row_desc(), expr_mem_tracker()));
+    RETURN_IF_ERROR(
+            VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker()));
+
+    // _vother_join_conjuncts are evaluated in the context of the rows produced by this node
+    if (_vother_join_conjunct_ptr) {
+        RETURN_IF_ERROR(
+                (*_vother_join_conjunct_ptr)
+                        ->prepare(state, _row_desc_for_other_join_conjunt, expr_mem_tracker()));
+    }
+    // right table data types
+    _right_table_data_types = VectorizedUtils::get_data_types(child(1)->row_desc());
+    _left_table_data_types = VectorizedUtils::get_data_types(child(0)->row_desc());
+
+    // Hash Table Init
+    _hash_table_init();
+    return Status::OK();
+}
+
+Status HashJoinNode::close(RuntimeState* state) {
+    if (is_closed()) {
+        return Status::OK();
+    }
+
+    if (_vother_join_conjunct_ptr) (*_vother_join_conjunct_ptr)->close(state);
+
+    _mem_tracker->Release(_mem_used);
+
+    return ExecNode::close(state);
+}
+
+Status HashJoinNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
+    return Status::NotSupported("Not Implemented HashJoin Node::get_next scalar");
+}
+
+Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eos) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    SCOPED_TIMER(_probe_timer);
+
+    size_t probe_rows = _probe_block.rows();
+    if ((probe_rows == 0 || _probe_index == probe_rows) && !_probe_eos) {
+        _probe_index = 0;
+        // clear_column_data of _probe_block
+        {
+            if (!_probe_column_disguise_null.empty()) {
+                for (int i = 0; i < _probe_column_disguise_null.size(); ++i) {
+                    auto column_to_erase = _probe_column_disguise_null[i];
+                    _probe_block.erase(column_to_erase - i);
+                }
+                _probe_column_disguise_null.clear();
+            }
+            release_block_memory(_probe_block);
+        }
+
+        do {
+            SCOPED_TIMER(_probe_next_timer);
+            RETURN_IF_ERROR(child(0)->get_next(state, &_probe_block, &_probe_eos));
+        } while (_probe_block.rows() == 0 && !_probe_eos);
+
+        probe_rows = _probe_block.rows();
+        if (probe_rows != 0) {
+            COUNTER_UPDATE(_probe_rows_counter, probe_rows);
+
+            int probe_expr_ctxs_sz = _probe_expr_ctxs.size();
+            _probe_columns.resize(probe_expr_ctxs_sz);
+            if (_null_map_column == nullptr) {
+                _null_map_column = ColumnUInt8::create();
+            }
+            _null_map_column->get_data().assign(probe_rows, (uint8_t)0);
+
+            Status st = std::visit(
+                    [&](auto&& arg) -> Status {
+                        using HashTableCtxType = std::decay_t<decltype(arg)>;
+                        if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+                            auto& null_map_val = _null_map_column->get_data();
+                            return extract_probe_join_column(_probe_block, null_map_val,
+                                                             _probe_columns, _probe_ignore_null,
+                                                             *_probe_expr_call_timer);
+                        } else {
+                            LOG(FATAL) << "FATAL: uninited hash table";
+                        }
+                        __builtin_unreachable();
+                    },
+                    _hash_table_variants);
+
+            RETURN_IF_ERROR(st);
+        }
+    }
+
+    Status st;
+    output_block->clear();
+
+    if (_probe_index < _probe_block.rows()) {
+        MutableBlock mutable_block(VectorizedUtils::create_empty_columnswithtypename(
+                _have_other_join_conjunct ? _row_desc_for_other_join_conjunt : row_desc()));
+        std::visit(
+                [&](auto&& arg) {
+                    using HashTableCtxType = std::decay_t<decltype(arg)>;
+                    if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+                        if (_probe_ignore_null) {
+                            ProcessHashTableProbe<HashTableCtxType, true> process_hashtable_ctx(
+                                    this, state->batch_size(), probe_rows);
+
+                            st = _have_other_join_conjunct
+                                         ? process_hashtable_ctx
+                                                   .do_process_with_other_join_conjunts(
+                                                           arg, &_null_map_column->get_data(),
+                                                           mutable_block, output_block)
+                                         : process_hashtable_ctx.do_process(
+                                                   arg, &_null_map_column->get_data(),
+                                                   mutable_block, output_block);
+                        } else {
+                            ProcessHashTableProbe<HashTableCtxType, false> process_hashtable_ctx(
+                                    this, state->batch_size(), probe_rows);
+
+                            st = _have_other_join_conjunct
+                                         ? process_hashtable_ctx
+                                                   .do_process_with_other_join_conjunts(
+                                                           arg, &_null_map_column->get_data(),
+                                                           mutable_block, output_block)
+                                         : process_hashtable_ctx.do_process(
+                                                   arg, &_null_map_column->get_data(),
+                                                   mutable_block, output_block);
+                        }
+                    } else {
+                        LOG(FATAL) << "FATAL: uninited hash table";
+                    }
+                },
+                _hash_table_variants);
+    } else if (_probe_eos) {
+        if (_is_right_semi_anti || (_is_outer_join && _join_op != TJoinOp::LEFT_OUTER_JOIN)) {
+            MutableBlock mutable_block(
+                    VectorizedUtils::create_empty_columnswithtypename(row_desc()));
+            std::visit(
+                    [&](auto&& arg) {
+                        using HashTableCtxType = std::decay_t<decltype(arg)>;
+                        if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+                            ProcessHashTableProbe<HashTableCtxType, false> process_hashtable_ctx(
+                                    this, state->batch_size(), probe_rows);
+                            st = process_hashtable_ctx.process_data_in_hashtable(arg, mutable_block,
+                                                                                 output_block, eos);
+                        } else {
+                            LOG(FATAL) << "FATAL: uninited hash table";
+                        }
+                    },
+                    _hash_table_variants);
+        } else {
+            *eos = true;
+            return Status::OK();
+        }
+    } else {
+        return Status::OK();
+    }
+
+    RETURN_IF_ERROR(
+            VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns()));
+    reached_limit(output_block, eos);
+
+    return st;
+}
+
+Status HashJoinNode::open(RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::open(state));
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN));
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_CANCELLED(state);
+
+    RETURN_IF_ERROR(VExpr::open(_build_expr_ctxs, state));
+    RETURN_IF_ERROR(VExpr::open(_probe_expr_ctxs, state));
+    if (_vother_join_conjunct_ptr) {
+        RETURN_IF_ERROR((*_vother_join_conjunct_ptr)->open(state));
+    }
+
+    RETURN_IF_ERROR(_hash_table_build(state));
+    RETURN_IF_ERROR(child(0)->open(state));
+
+    return Status::OK();
+}
+
+Status HashJoinNode::_hash_table_build(RuntimeState* state) {
+    RETURN_IF_ERROR(child(1)->open(state));
+    SCOPED_TIMER(_build_timer);
+    Block block;
+
+    bool eos = false;
+    while (!eos) {
+        block.clear();
+        RETURN_IF_CANCELLED(state);
+
+        RETURN_IF_ERROR(child(1)->get_next(state, &block, &eos));
+        _mem_tracker->Consume(block.allocated_bytes());
+        _mem_used += block.allocated_bytes();
+        RETURN_IF_LIMIT_EXCEEDED(state, "Hash join, while getting next from the child 1.");
+
+        RETURN_IF_ERROR(_process_build_block(state, block));
+        RETURN_IF_LIMIT_EXCEEDED(state, "Hash join, while constructing the hash table.");
+    }
+
+    return std::visit(
+            [&](auto&& arg) -> Status {
+                using HashTableCtxType = std::decay_t<decltype(arg)>;
+                if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+                    ProcessRuntimeFilterBuild<HashTableCtxType> runtime_filter_build_process(this);
+                    return runtime_filter_build_process(state, arg);
+                } else {
+                    LOG(FATAL) << "FATAL: uninited hash table";
+                }
+            },
+            _hash_table_variants);
+}
+
+// TODO:: unify the code of extract probe join column
+Status HashJoinNode::extract_build_join_column(Block& block, NullMap& null_map,
+                                               ColumnRawPtrs& raw_ptrs, bool& ignore_null,
+                                               RuntimeProfile::Counter& expr_call_timer) {
+    for (size_t i = 0; i < _build_expr_ctxs.size(); ++i) {
+        int result_col_id = -1;
+        // execute build column
+        {
+            SCOPED_TIMER(&expr_call_timer);
+            RETURN_IF_ERROR(_build_expr_ctxs[i]->execute(&block, &result_col_id));
+        }
+
+        // TODO: opt the column is const
+        block.get_by_position(result_col_id).column =
+                block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
+
+        if (_is_null_safe_eq_join[i]) {
+            raw_ptrs[i] = block.get_by_position(result_col_id).column.get();
+        } else {
+            auto column = block.get_by_position(result_col_id).column.get();
+            if (auto* nullable = check_and_get_column<ColumnNullable>(*column)) {
+                auto& col_nested = nullable->get_nested_column();
+                auto& col_nullmap = nullable->get_null_map_data();
+
+                ignore_null |= !_build_not_ignore_null[i];
+                if (_build_not_ignore_null[i]) {
+                    raw_ptrs[i] = nullable;
+                } else {
+                    VectorizedUtils::update_null_map(null_map, col_nullmap);
+                    raw_ptrs[i] = &col_nested;
+                }
+            } else {
+                raw_ptrs[i] = column;
+            }
+        }
+    }
+    return Status::OK();
+}
+
+Status HashJoinNode::extract_probe_join_column(Block& block, NullMap& null_map,
+                                               ColumnRawPtrs& raw_ptrs, bool& ignore_null,
+                                               RuntimeProfile::Counter& expr_call_timer) {
+    for (size_t i = 0; i < _probe_expr_ctxs.size(); ++i) {
+        int result_col_id = -1;
+        // execute build column
+        {
+            SCOPED_TIMER(&expr_call_timer);
+            RETURN_IF_ERROR(_probe_expr_ctxs[i]->execute(&block, &result_col_id));
+        }
+
+        // TODO: opt the column is const
+        block.get_by_position(result_col_id).column =
+                block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
+
+        if (_is_null_safe_eq_join[i]) {
+            raw_ptrs[i] = block.get_by_position(result_col_id).column.get();
+        } else {
+            auto column = block.get_by_position(result_col_id).column.get();
+            if (auto* nullable = check_and_get_column<ColumnNullable>(*column)) {
+                auto& col_nested = nullable->get_nested_column();
+                auto& col_nullmap = nullable->get_null_map_data();
+
+                ignore_null |= !_probe_not_ignore_null[i];
+                if (_build_not_ignore_null[i]) {
+                    raw_ptrs[i] = nullable;
+                } else {
+                    VectorizedUtils::update_null_map(null_map, col_nullmap);
+                    raw_ptrs[i] = &col_nested;
+                }
+            } else {
+                if (_build_not_ignore_null[i]) {
+                    auto column_ptr =
+                            make_nullable(block.get_by_position(result_col_id).column, false);
+                    _probe_column_disguise_null.emplace_back(block.columns());
+                    block.insert({column_ptr,
+                                  make_nullable(block.get_by_position(result_col_id).type), ""});
+                    column = column_ptr.get();
+                }
+                raw_ptrs[i] = column;
+            }
+        }
+    }
+    return Status::OK();
+}
+
+Status HashJoinNode::_process_build_block(RuntimeState* state, Block& block) {
+    SCOPED_TIMER(_build_table_timer);
+    size_t rows = block.rows();
+    if (rows == 0) {
+        return Status::OK();
+    }
+    COUNTER_UPDATE(_build_rows_counter, rows);
+
+    auto& acquired_block = _acquire_list.acquire(std::move(block));
+    materialize_block_inplace(acquired_block);
+
+    ColumnRawPtrs raw_ptrs(_build_expr_ctxs.size());
+
+    NullMap null_map_val(rows);
+    null_map_val.assign(rows, (uint8_t)0);
+    bool has_null = false;
+
+    // Get the key column that needs to be built
+    Status st = std::visit(
+            [&](auto&& arg) -> Status {
+                using HashTableCtxType = std::decay_t<decltype(arg)>;
+                if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+                    return extract_build_join_column(acquired_block, null_map_val, raw_ptrs,
+                                                     has_null, *_build_expr_call_timer);
+                } else {
+                    LOG(FATAL) << "FATAL: uninited hash table";
+                }
+                __builtin_unreachable();
+            },
+            _hash_table_variants);
+
+    bool has_runtime_filter = !_runtime_filter_descs.empty();
+
+    std::visit(
+            [&](auto&& arg) {
+                using HashTableCtxType = std::decay_t<decltype(arg)>;
+                if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+#define CALL_BUILD_FUNCTION(HAS_NULL, BUILD_UNIQUE)                                           \
+    ProcessHashTableBuild<HashTableCtxType, HAS_NULL, BUILD_UNIQUE> hash_table_build_process( \
+            rows, acquired_block, raw_ptrs, this, state->batch_size());                       \
+    st = hash_table_build_process(arg, &null_map_val, has_runtime_filter);
+                    if (std::pair {has_null, _build_unique} == std::pair {true, true}) {
+                        CALL_BUILD_FUNCTION(true, true);
+                    } else if (std::pair {has_null, _build_unique} == std::pair {true, false}) {
+                        CALL_BUILD_FUNCTION(true, false);
+                    } else if (std::pair {has_null, _build_unique} == std::pair {false, true}) {
+                        CALL_BUILD_FUNCTION(false, true);
+                    } else {
+                        CALL_BUILD_FUNCTION(false, false);
+                    }
+                } else {
+                    LOG(FATAL) << "FATAL: uninited hash table";
+                }
+            },
+            _hash_table_variants);
+
+    return st;
+}
+
+void HashJoinNode::_hash_table_init() {
+    if (_build_expr_ctxs.size() == 1 && !_build_not_ignore_null[0]) {
+        // Single column optimization
+        switch (_build_expr_ctxs[0]->root()->result_type()) {
+        case TYPE_BOOLEAN:
+        case TYPE_TINYINT:
+            _hash_table_variants.emplace<I8HashTableContext>();
+            break;
+        case TYPE_SMALLINT:
+            _hash_table_variants.emplace<I16HashTableContext>();
+            break;
+        case TYPE_INT:
+        case TYPE_FLOAT:
+            _hash_table_variants.emplace<I32HashTableContext>();
+            break;
+        case TYPE_BIGINT:
+        case TYPE_DOUBLE:
+        case TYPE_DATETIME:
+        case TYPE_DATE:
+            _hash_table_variants.emplace<I64HashTableContext>();
+            break;
+        case TYPE_LARGEINT:
+        case TYPE_DECIMALV2:
+            _hash_table_variants.emplace<I128HashTableContext>();
+            break;
+        default:
+            _hash_table_variants.emplace<SerializedHashTableContext>();
+        }
+        return;
+    }
+
+    bool use_fixed_key = true;
+    bool has_null = false;
+    int key_byte_size = 0;
+
+    _probe_key_sz.resize(_probe_expr_ctxs.size());
+    _build_key_sz.resize(_build_expr_ctxs.size());
+
+    for (int i = 0; i < _build_expr_ctxs.size(); ++i) {
+        const auto vexpr = _build_expr_ctxs[i]->root();
+        const auto& data_type = vexpr->data_type();
+
+        if (!data_type->have_maximum_size_of_value()) {
+            use_fixed_key = false;
+            break;
+        }
+
+        auto is_null = data_type->is_nullable();
+        has_null |= is_null;
+        _build_key_sz[i] = data_type->get_maximum_size_of_value_in_memory() - (is_null ? 1 : 0);
+        _probe_key_sz[i] = _build_key_sz[i];
+        key_byte_size += _probe_key_sz[i];
+    }
+
+    if (std::tuple_size<KeysNullMap<UInt256>>::value + key_byte_size > sizeof(UInt256)) {
+        use_fixed_key = false;
+    }
+
+    if (use_fixed_key) {
+        // TODO: may we should support uint256 in the future
+        if (has_null) {
+            if (std::tuple_size<KeysNullMap<UInt64>>::value + key_byte_size <= sizeof(UInt64)) {
+                _hash_table_variants.emplace<I64FixedKeyHashTableContext<true>>();
+            } else if (std::tuple_size<KeysNullMap<UInt128>>::value + key_byte_size <=
+                       sizeof(UInt128)) {
+                _hash_table_variants.emplace<I128FixedKeyHashTableContext<true>>();
+            } else {
+                _hash_table_variants.emplace<I256FixedKeyHashTableContext<true>>();
+            }
+        } else {
+            if (key_byte_size <= sizeof(UInt64)) {
+                _hash_table_variants.emplace<I64FixedKeyHashTableContext<false>>();
+            } else if (key_byte_size <= sizeof(UInt128)) {
+                _hash_table_variants.emplace<I128FixedKeyHashTableContext<false>>();
+            } else {
+                _hash_table_variants.emplace<I256FixedKeyHashTableContext<false>>();
+            }
+        }
+    } else {
+        _hash_table_variants.emplace<SerializedHashTableContext>();
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h
new file mode 100644
index 0000000000..65b2328e3f
--- /dev/null
+++ b/be/src/vec/exec/join/vhash_join_node.h
@@ -0,0 +1,225 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <variant>
+
+#include "common/object_pool.h"
+#include "exec/exec_node.h"
+#include "exprs/runtime_filter_slots.h"
+#include "vec/common/columns_hashing.h"
+#include "vec/common/hash_table/hash_map.h"
+#include "vec/common/hash_table/hash_table.h"
+#include "vec/exec/join/join_op.h"
+#include "vec/exec/join/vacquire_list.hpp"
+#include "vec/functions/function.h"
+
+namespace doris {
+namespace vectorized {
+
+struct SerializedHashTableContext {
+    using Mapped = RowRefList;
+    using HashTable = HashMap<StringRef, Mapped>;
+    using State = ColumnsHashing::HashMethodSerialized<typename HashTable::value_type, Mapped>;
+    using Iter = typename HashTable::iterator;
+
+    HashTable hash_table;
+    Iter iter;
+    bool inited = false;
+
+    void init_once() {
+        if (!inited) {
+            inited = true;
+            iter = hash_table.begin();
+        }
+    }
+};
+
+// T should be UInt32 UInt64 UInt128
+template <class T>
+struct PrimaryTypeHashTableContext {
+    using Mapped = RowRefList;
+    using HashTable = HashMap<T, Mapped, HashCRC32<T>>;
+    using State =
+            ColumnsHashing::HashMethodOneNumber<typename HashTable::value_type, Mapped, T, false>;
+    using Iter = typename HashTable::iterator;
+
+    HashTable hash_table;
+    Iter iter;
+    bool inited = false;
+
+    void init_once() {
+        if (!inited) {
+            inited = true;
+            iter = hash_table.begin();
+        }
+    }
+};
+
+// TODO: use FixedHashTable instead of HashTable
+using I8HashTableContext = PrimaryTypeHashTableContext<UInt8>;
+using I16HashTableContext = PrimaryTypeHashTableContext<UInt16>;
+using I32HashTableContext = PrimaryTypeHashTableContext<UInt32>;
+using I64HashTableContext = PrimaryTypeHashTableContext<UInt64>;
+using I128HashTableContext = PrimaryTypeHashTableContext<UInt128>;
+using I256HashTableContext = PrimaryTypeHashTableContext<UInt256>;
+
+template <class T, bool has_null>
+struct FixedKeyHashTableContext {
+    using Mapped = RowRefList;
+    using HashTable = HashMap<T, Mapped, HashCRC32<T>>;
+    using State = ColumnsHashing::HashMethodKeysFixed<typename HashTable::value_type, T, Mapped,
+                                                      has_null, false>;
+    using Iter = typename HashTable::iterator;
+
+    HashTable hash_table;
+    Iter iter;
+    bool inited = false;
+
+    void init_once() {
+        if (!inited) {
+            inited = true;
+            iter = hash_table.begin();
+        }
+    }
+};
+
+template <bool has_null>
+using I64FixedKeyHashTableContext = FixedKeyHashTableContext<UInt64, has_null>;
+
+template <bool has_null>
+using I128FixedKeyHashTableContext = FixedKeyHashTableContext<UInt128, has_null>;
+
+template <bool has_null>
+using I256FixedKeyHashTableContext = FixedKeyHashTableContext<UInt256, has_null>;
+
+using HashTableVariants =
+        std::variant<std::monostate, SerializedHashTableContext, I8HashTableContext,
+                     I16HashTableContext, I32HashTableContext, I64HashTableContext,
+                     I128HashTableContext, I256HashTableContext, I64FixedKeyHashTableContext<true>,
+                     I64FixedKeyHashTableContext<false>, I128FixedKeyHashTableContext<true>,
+                     I128FixedKeyHashTableContext<false>, I256FixedKeyHashTableContext<true>,
+                     I256FixedKeyHashTableContext<false>>;
+
+class VExprContext;
+
+class HashJoinNode : public ::doris::ExecNode {
+public:
+    HashJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    ~HashJoinNode() override;
+
+    virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr);
+    virtual Status prepare(RuntimeState* state);
+    virtual Status open(RuntimeState* state);
+    virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos);
+    virtual Status get_next(RuntimeState* state, Block* block, bool* eos);
+    virtual Status close(RuntimeState* state);
+    HashTableVariants& get_hash_table_variants() { return _hash_table_variants; }
+
+private:
+    using VExprContexts = std::vector<VExprContext*>;
+
+    TJoinOp::type _join_op;
+    // probe expr
+    VExprContexts _probe_expr_ctxs;
+    // build expr
+    VExprContexts _build_expr_ctxs;
+    // other expr
+    std::unique_ptr<VExprContext*> _vother_join_conjunct_ptr;
+
+    // mark the join column whether support null eq
+    std::vector<bool> _is_null_safe_eq_join;
+
+    // mark the build hash table whether contain null column
+    std::vector<bool> _build_not_ignore_null;
+    // mark the probe table should dispose null column
+    std::vector<bool> _probe_not_ignore_null;
+
+    std::vector<uint16_t> _probe_column_disguise_null;
+
+    DataTypes _right_table_data_types;
+    DataTypes _left_table_data_types;
+
+    RuntimeProfile::Counter* _build_timer;
+    RuntimeProfile::Counter* _build_table_timer;
+    RuntimeProfile::Counter* _build_expr_call_timer;
+    RuntimeProfile::Counter* _build_table_insert_timer;
+    RuntimeProfile::Counter* _build_table_expanse_timer;
+    RuntimeProfile::Counter* _probe_timer;
+    RuntimeProfile::Counter* _probe_expr_call_timer;
+    RuntimeProfile::Counter* _probe_next_timer;
+    RuntimeProfile::Counter* _build_buckets_counter;
+    RuntimeProfile::Counter* _push_down_timer;
+    RuntimeProfile::Counter* _push_compute_timer;
+    RuntimeProfile::Counter* _build_rows_counter;
+    RuntimeProfile::Counter* _probe_rows_counter;
+
+    int64_t _hash_table_rows;
+    int64_t _mem_used;
+
+    Arena _arena;
+    HashTableVariants _hash_table_variants;
+    AcquireList<Block> _acquire_list;
+
+    Block _probe_block;
+    ColumnRawPtrs _probe_columns;
+    ColumnUInt8::MutablePtr _null_map_column;
+    bool _probe_ignore_null = false;
+    int _probe_index = -1;
+    bool _probe_eos = false;
+
+    Sizes _probe_key_sz;
+    Sizes _build_key_sz;
+
+    const bool _match_all_probe; // output all rows coming from the probe input. Full/Left Join
+    const bool _match_one_build; // match at most one build row to each probe row. Left semi Join
+    const bool _match_all_build; // output all rows coming from the build input. Full/Right Join
+    bool _build_unique;          // build a hash table without duplicated rows. Left semi/anti Join
+
+    const bool _is_left_semi_anti;
+    const bool _is_right_semi_anti;
+    const bool _is_outer_join;
+    bool _have_other_join_conjunct = false;
+
+    RowDescriptor _row_desc_for_other_join_conjunt;
+
+private:
+    Status _hash_table_build(RuntimeState* state);
+    Status _process_build_block(RuntimeState* state, Block& block);
+
+    Status extract_build_join_column(Block& block, NullMap& null_map, ColumnRawPtrs& raw_ptrs,
+                                     bool& ignore_null, RuntimeProfile::Counter& expr_call_timer);
+
+    Status extract_probe_join_column(Block& block, NullMap& null_map, ColumnRawPtrs& raw_ptrs,
+                                     bool& ignore_null, RuntimeProfile::Counter& expr_call_timer);
+
+    void _hash_table_init();
+
+    template <class HashTableContext, bool ignore_null, bool build_unique>
+    friend class ProcessHashTableBuild;
+
+    template <class HashTableContext, bool ignore_null>
+    friend class ProcessHashTableProbe;
+
+    template <class HashTableContext>
+    friend class ProcessRuntimeFilterBuild;
+
+    std::vector<TRuntimeFilterDesc> _runtime_filter_descs;
+    std::unordered_map<const Block*, std::vector<int>> _inserted_rows;
+};
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vaggregation_node.cpp b/be/src/vec/exec/vaggregation_node.cpp
new file mode 100644
index 0000000000..ed8a1ba58f
--- /dev/null
+++ b/be/src/vec/exec/vaggregation_node.cpp
@@ -0,0 +1,1102 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vaggregation_node.h"
+
+#include <memory>
+
+#include "exec/exec_node.h"
+#include "runtime/mem_pool.h"
+#include "runtime/row_batch.h"
+#include "util/defer_op.h"
+#include "vec/core/block.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/exprs/vslot_ref.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+
+/// The minimum reduction factor (input rows divided by output rows) to grow hash tables
+/// in a streaming preaggregation, given that the hash tables are currently the given
+/// size or above. The sizes roughly correspond to hash table sizes where the bucket
+/// arrays will fit in  a cache level. Intuitively, we don't want the working set of the
+/// aggregation to expand to the next level of cache unless we're reducing the input
+/// enough to outweigh the increased memory latency we'll incur for each hash table
+/// lookup.
+///
+/// Note that the current reduction achieved is not always a good estimate of the
+/// final reduction. It may be biased either way depending on the ordering of the
+/// input. If the input order is random, we will underestimate the final reduction
+/// factor because the probability of a row having the same key as a previous row
+/// increases as more input is processed.  If the input order is correlated with the
+/// key, skew may bias the estimate. If high cardinality keys appear first, we
+/// may overestimate and if low cardinality keys appear first, we underestimate.
+/// To estimate the eventual reduction achieved, we estimate the final reduction
+/// using the planner's estimated input cardinality and the assumption that input
+/// is in a random order. This means that we assume that the reduction factor will
+/// increase over time.
+struct StreamingHtMinReductionEntry {
+    // Use 'streaming_ht_min_reduction' if the total size of hash table bucket directories in
+    // bytes is greater than this threshold.
+    int min_ht_mem;
+    // The minimum reduction factor to expand the hash tables.
+    double streaming_ht_min_reduction;
+};
+
+// TODO: experimentally tune these values and also programmatically get the cache size
+// of the machine that we're running on.
+static constexpr StreamingHtMinReductionEntry STREAMING_HT_MIN_REDUCTION[] = {
+        // Expand up to L2 cache always.
+        {0, 0.0},
+        // Expand into L3 cache if we look like we're getting some reduction.
+        {256 * 1024, 1.1},
+        // Expand into main memory if we're getting a significant reduction.
+        {2 * 1024 * 1024, 2.0},
+};
+
+static constexpr int STREAMING_HT_MIN_REDUCTION_SIZE =
+        sizeof(STREAMING_HT_MIN_REDUCTION) / sizeof(STREAMING_HT_MIN_REDUCTION[0]);
+
+AggregationNode::AggregationNode(ObjectPool* pool, const TPlanNode& tnode,
+                                 const DescriptorTbl& descs)
+        : ExecNode(pool, tnode, descs),
+          _intermediate_tuple_id(tnode.agg_node.intermediate_tuple_id),
+          _intermediate_tuple_desc(NULL),
+          _output_tuple_id(tnode.agg_node.output_tuple_id),
+          _output_tuple_desc(NULL),
+          _needs_finalize(tnode.agg_node.need_finalize),
+          _is_merge(false),
+          _agg_data(),
+          _build_timer(nullptr),
+          _exec_timer(nullptr),
+          _merge_timer(nullptr) {
+    if (tnode.agg_node.__isset.use_streaming_preaggregation) {
+        _is_streaming_preagg = tnode.agg_node.use_streaming_preaggregation;
+        if (_is_streaming_preagg) {
+            DCHECK(_conjunct_ctxs.empty()) << "Preaggs have no conjuncts";
+            DCHECK(!tnode.agg_node.grouping_exprs.empty()) << "Streaming preaggs do grouping";
+            DCHECK(_limit == -1) << "Preaggs have no limits";
+        }
+    } else {
+        _is_streaming_preagg = false;
+    }
+}
+
+AggregationNode::~AggregationNode() = default;
+
+Status AggregationNode::init(const TPlanNode& tnode, RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::init(tnode, state));
+    // ignore return status for now , so we need to introduce ExecNode::init()
+    RETURN_IF_ERROR(
+            VExpr::create_expr_trees(_pool, tnode.agg_node.grouping_exprs, &_probe_expr_ctxs));
+
+    // init aggregate functions
+    _aggregate_evaluators.reserve(tnode.agg_node.aggregate_functions.size());
+    for (int i = 0; i < tnode.agg_node.aggregate_functions.size(); ++i) {
+        AggFnEvaluator* evaluator = nullptr;
+        RETURN_IF_ERROR(
+                AggFnEvaluator::create(_pool, tnode.agg_node.aggregate_functions[i], &evaluator));
+        _aggregate_evaluators.push_back(evaluator);
+    }
+
+    const auto& agg_functions = tnode.agg_node.aggregate_functions;
+    _is_merge = std::any_of(agg_functions.cbegin(), agg_functions.cend(),
+                            [](const auto& e) { return e.nodes[0].agg_expr.is_merge_agg; });
+    return Status::OK();
+}
+
+void AggregationNode::_init_hash_method(std::vector<VExprContext*>& probe_exprs) {
+    DCHECK(probe_exprs.size() >= 1);
+    if (probe_exprs.size() == 1) {
+        auto is_nullable = probe_exprs[0]->root()->is_nullable();
+        switch (probe_exprs[0]->root()->result_type()) {
+        case TYPE_TINYINT:
+        case TYPE_BOOLEAN:
+            _agg_data.init(AggregatedDataVariants::Type::int8_key, is_nullable);
+            return;
+        case TYPE_SMALLINT:
+            _agg_data.init(AggregatedDataVariants::Type::int16_key, is_nullable);
+            return;
+        case TYPE_INT:
+        case TYPE_FLOAT:
+            _agg_data.init(AggregatedDataVariants::Type::int32_key, is_nullable);
+            return;
+        case TYPE_BIGINT:
+        case TYPE_DOUBLE:
+        case TYPE_DATE:
+        case TYPE_DATETIME:
+            _agg_data.init(AggregatedDataVariants::Type::int64_key, is_nullable);
+            return;
+        case TYPE_LARGEINT:
+        case TYPE_DECIMALV2:
+            _agg_data.init(AggregatedDataVariants::Type::int128_key, is_nullable);
+            return;
+        default:
+            _agg_data.init(AggregatedDataVariants::Type::serialized);
+        }
+    } else {
+        bool use_fixed_key = true;
+        bool has_null = false;
+        int key_byte_size = 0;
+
+        _probe_key_sz.resize(_probe_expr_ctxs.size());
+        for (int i = 0; i < _probe_expr_ctxs.size(); ++i) {
+            const auto vexpr = _probe_expr_ctxs[i]->root();
+            const auto& data_type = vexpr->data_type();
+
+            if (!data_type->have_maximum_size_of_value()) {
+                use_fixed_key = false;
+                break;
+            }
+
+            auto is_null = data_type->is_nullable();
+            has_null |= is_null;
+            _probe_key_sz[i] = data_type->get_maximum_size_of_value_in_memory() - (is_null ? 1 : 0);
+            key_byte_size += _probe_key_sz[i];
+        }
+
+        if (std::tuple_size<KeysNullMap<UInt256>>::value + key_byte_size > sizeof(UInt256)) {
+            use_fixed_key = false;
+        }
+
+        if (use_fixed_key) {
+            if (has_null) {
+                if (std::tuple_size<KeysNullMap<UInt64>>::value + key_byte_size <= sizeof(UInt64)) {
+                    _agg_data.init(AggregatedDataVariants::Type::int64_keys, has_null);
+                } else if (std::tuple_size<KeysNullMap<UInt128>>::value + key_byte_size <=
+                           sizeof(UInt128)) {
+                    _agg_data.init(AggregatedDataVariants::Type::int128_keys, has_null);
+                } else {
+                    _agg_data.init(AggregatedDataVariants::Type::int256_keys, has_null);
+                }
+            } else {
+                if (key_byte_size <= sizeof(UInt64)) {
+                    _agg_data.init(AggregatedDataVariants::Type::int64_keys, has_null);
+                } else if (key_byte_size <= sizeof(UInt128)) {
+                    _agg_data.init(AggregatedDataVariants::Type::int128_keys, has_null);
+                } else {
+                    _agg_data.init(AggregatedDataVariants::Type::int256_keys, has_null);
+                }
+            }
+        } else {
+            _agg_data.init(AggregatedDataVariants::Type::serialized);
+        }
+    }
+}
+
+Status AggregationNode::prepare(RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::prepare(state));
+    _build_timer = ADD_TIMER(runtime_profile(), "BuildTime");
+    _exec_timer = ADD_TIMER(runtime_profile(), "ExecTime");
+    _merge_timer = ADD_TIMER(runtime_profile(), "MergeTime");
+    _expr_timer = ADD_TIMER(runtime_profile(), "ExprTime");
+    _get_results_timer = ADD_TIMER(runtime_profile(), "GetResultsTime");
+
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    _intermediate_tuple_desc = state->desc_tbl().get_tuple_descriptor(_intermediate_tuple_id);
+    _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id);
+    DCHECK_EQ(_intermediate_tuple_desc->slots().size(), _output_tuple_desc->slots().size());
+    RETURN_IF_ERROR(
+            VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc(), expr_mem_tracker()));
+
+    _mem_pool = std::make_unique<MemPool>(mem_tracker().get());
+
+    int j = _probe_expr_ctxs.size();
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i, ++j) {
+        SlotDescriptor* intermediate_slot_desc = _intermediate_tuple_desc->slots()[j];
+        SlotDescriptor* output_slot_desc = _output_tuple_desc->slots()[j];
+        RETURN_IF_ERROR(_aggregate_evaluators[i]->prepare(state, child(0)->row_desc(),
+                                                          _mem_pool.get(), intermediate_slot_desc,
+                                                          output_slot_desc, mem_tracker()));
+    }
+
+    // set profile timer to evaluators
+    for (auto& evaluator : _aggregate_evaluators) {
+        evaluator->set_timer(_exec_timer, _merge_timer, _expr_timer);
+    }
+
+    _offsets_of_aggregate_states.resize(_aggregate_evaluators.size());
+
+    for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
+        _offsets_of_aggregate_states[i] = _total_size_of_aggregate_states;
+
+        const auto& agg_function = _aggregate_evaluators[i]->function();
+        // aggreate states are aligned based on maximum requirement
+        _align_aggregate_states = std::max(_align_aggregate_states, agg_function->align_of_data());
+        _total_size_of_aggregate_states += agg_function->size_of_data();
+
+        // If not the last aggregate_state, we need pad it so that next aggregate_state will be aligned.
+        if (i + 1 < _aggregate_evaluators.size()) {
+            size_t alignment_of_next_state =
+                    _aggregate_evaluators[i + 1]->function()->align_of_data();
+            if ((alignment_of_next_state & (alignment_of_next_state - 1)) != 0) {
+                return Status::RuntimeError(fmt::format("Logical error: align_of_data is not 2^N"));
+            }
+
+            /// Extend total_size to next alignment requirement
+            /// Add padding by rounding up 'total_size_of_aggregate_states' to be a multiplier of alignment_of_next_state.
+            _total_size_of_aggregate_states =
+                    (_total_size_of_aggregate_states + alignment_of_next_state - 1) /
+                    alignment_of_next_state * alignment_of_next_state;
+        }
+    }
+
+    if (_probe_expr_ctxs.empty()) {
+        _agg_data.init(AggregatedDataVariants::Type::without_key);
+
+        _agg_data.without_key = reinterpret_cast<AggregateDataPtr>(
+                _mem_pool->allocate(_total_size_of_aggregate_states));
+
+        _create_agg_status(_agg_data.without_key);
+
+        if (_is_merge) {
+            _executor.execute = std::bind<Status>(&AggregationNode::_merge_without_key, this,
+                                                  std::placeholders::_1);
+        } else {
+            _executor.execute = std::bind<Status>(&AggregationNode::_execute_without_key, this,
+                                                  std::placeholders::_1);
+        }
+
+        if (_needs_finalize) {
+            _executor.get_result = std::bind<Status>(&AggregationNode::_get_without_key_result,
+                                                     this, std::placeholders::_1,
+                                                     std::placeholders::_2, std::placeholders::_3);
+        } else {
+            _executor.get_result = std::bind<Status>(&AggregationNode::_serialize_without_key, this,
+                                                     std::placeholders::_1, std::placeholders::_2,
+                                                     std::placeholders::_3);
+        }
+
+        _executor.update_memusage =
+                std::bind<void>(&AggregationNode::_update_memusage_without_key, this);
+        _executor.close = std::bind<void>(&AggregationNode::_close_without_key, this);
+    } else {
+        _init_hash_method(_probe_expr_ctxs);
+        if (_is_merge) {
+            _executor.execute = std::bind<Status>(&AggregationNode::_merge_with_serialized_key,
+                                                  this, std::placeholders::_1);
+        } else {
+            _executor.execute = std::bind<Status>(&AggregationNode::_execute_with_serialized_key,
+                                                  this, std::placeholders::_1);
+        }
+
+        if (_is_streaming_preagg) {
+            runtime_profile()->append_exec_option("Streaming Preaggregation");
+            _executor.pre_agg =
+                    std::bind<Status>(&AggregationNode::_pre_agg_with_serialized_key, this,
+                                      std::placeholders::_1, std::placeholders::_2);
+            _max_size_of_stream_pre_agg_buffer = state->batch_size();
+        }
+
+        if (_needs_finalize) {
+            _executor.get_result = std::bind<Status>(
+                    &AggregationNode::_get_with_serialized_key_result, this, std::placeholders::_1,
+                    std::placeholders::_2, std::placeholders::_3);
+        } else {
+            _executor.get_result = std::bind<Status>(
+                    &AggregationNode::_serialize_with_serialized_key_result, this,
+                    std::placeholders::_1, std::placeholders::_2, std::placeholders::_3);
+        }
+        _executor.update_memusage =
+                std::bind<void>(&AggregationNode::_update_memusage_with_serialized_key, this);
+        _executor.close = std::bind<void>(&AggregationNode::_close_with_serialized_key, this);
+    }
+
+    return Status::OK();
+}
+
+Status AggregationNode::open(RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+
+    RETURN_IF_ERROR(VExpr::open(_probe_expr_ctxs, state));
+
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        RETURN_IF_ERROR(_aggregate_evaluators[i]->open(state));
+    }
+
+    RETURN_IF_ERROR(_children[0]->open(state));
+
+    // Streaming preaggregations do all processing in GetNext().
+    if (_is_streaming_preagg) return Status::OK();
+
+    bool eos = false;
+    Block block;
+    while (!eos) {
+        RETURN_IF_CANCELLED(state);
+        release_block_memory(block);
+        RETURN_IF_ERROR(_children[0]->get_next(state, &block, &eos));
+        if (block.rows() == 0) {
+            continue;
+        }
+        RETURN_IF_ERROR(_executor.execute(&block));
+        _executor.update_memusage();
+        RETURN_IF_LIMIT_EXCEEDED(state, "aggregator, while execute open.");
+    }
+
+    return Status::OK();
+}
+
+Status AggregationNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
+    return Status::NotSupported("Not Implemented Aggregation Node::get_next scalar");
+}
+
+Status AggregationNode::get_next(RuntimeState* state, Block* block, bool* eos) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+
+    if (_is_streaming_preagg) {
+        bool child_eos = false;
+
+        RETURN_IF_CANCELLED(state);
+        do {
+            release_block_memory(_preagg_block);
+            RETURN_IF_ERROR(_children[0]->get_next(state, &_preagg_block, &child_eos));
+        } while (_preagg_block.rows() == 0 && !child_eos);
+
+        if (_preagg_block.rows() != 0) {
+            RETURN_IF_ERROR(_executor.pre_agg(&_preagg_block, block));
+        } else {
+            RETURN_IF_ERROR(_executor.get_result(state, block, eos));
+        }
+        // pre stream agg need use _num_row_return to decide whether to do pre stream agg
+        _num_rows_returned += block->rows();
+        if (*eos) COUNTER_SET(_rows_returned_counter, _num_rows_returned);
+    } else {
+        RETURN_IF_ERROR(_executor.get_result(state, block, eos));
+        // dispose the having clause, should not be execute in prestreaming agg
+        RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns()));
+        reached_limit(block, eos);
+    }
+
+    _executor.update_memusage();
+    RETURN_IF_LIMIT_EXCEEDED(state, "aggregator, while execute get_next.");
+    return Status::OK();
+}
+
+Status AggregationNode::close(RuntimeState* state) {
+    if (is_closed()) return Status::OK();
+
+    RETURN_IF_ERROR(ExecNode::close(state));
+    VExpr::close(_probe_expr_ctxs, state);
+    if (_executor.close) _executor.close();
+    delete [] _streaming_pre_agg_buffer;
+    return Status::OK();
+}
+
+Status AggregationNode::_create_agg_status(AggregateDataPtr data) {
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        _aggregate_evaluators[i]->create(data + _offsets_of_aggregate_states[i]);
+    }
+    return Status::OK();
+}
+
+Status AggregationNode::_destory_agg_status(AggregateDataPtr data) {
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        _aggregate_evaluators[i]->function()->destroy(data + _offsets_of_aggregate_states[i]);
+    }
+    return Status::OK();
+}
+
+Status AggregationNode::_get_without_key_result(RuntimeState* state, Block* block, bool* eos) {
+    DCHECK(_agg_data.without_key != nullptr);
+    block->clear();
+
+    *block = VectorizedUtils::create_empty_columnswithtypename(row_desc());
+    int agg_size = _aggregate_evaluators.size();
+
+    MutableColumns columns(agg_size);
+    std::vector<DataTypePtr> data_types(agg_size);
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        data_types[i] = _aggregate_evaluators[i]->function()->get_return_type();
+        columns[i] = data_types[i]->create_column();
+    }
+
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        auto column = columns[i].get();
+        _aggregate_evaluators[i]->insert_result_info(
+                _agg_data.without_key + _offsets_of_aggregate_states[i], column);
+    }
+
+    const auto& block_schema = block->get_columns_with_type_and_name();
+    DCHECK_EQ(block_schema.size(), columns.size());
+    for (int i = 0; i < block_schema.size(); ++i) {
+        const auto column_type = block_schema[i].type;
+        if (!column_type->equals(*data_types[i])) {
+            DCHECK(column_type->is_nullable());
+            DCHECK(((DataTypeNullable*)column_type.get())
+                           ->get_nested_type()
+                           ->equals(*data_types[i]));
+            DCHECK(!data_types[i]->is_nullable());
+            ColumnPtr ptr = std::move(columns[i]);
+            // unless `count`, other aggregate function dispose empty set should be null
+            // so here check the children row return
+            ptr = make_nullable(ptr, _children[0]->rows_returned() == 0);
+            columns[i] = std::move(*ptr).mutate();
+        }
+    }
+
+    block->set_columns(std::move(columns));
+    *eos = true;
+    return Status::OK();
+}
+
+Status AggregationNode::_serialize_without_key(RuntimeState* state, Block* block, bool* eos) {
+    // 1. `child(0)->rows_returned() == 0` mean not data from child
+    // in level two aggregation node should return NULL result
+    //    level one aggregation node set `eos = true` return directly
+    if (UNLIKELY(_children[0]->rows_returned() == 0)) {
+        *eos = true;
+        return Status::OK();
+    }
+    block->clear();
+
+    DCHECK(_agg_data.without_key != nullptr);
+    int agg_size = _aggregate_evaluators.size();
+
+    MutableColumns value_columns(agg_size);
+    std::vector<DataTypePtr> data_types(agg_size);
+
+    // will serialize data to string column
+    std::vector<VectorBufferWriter> value_buffer_writers;
+    auto serialize_string_type = std::make_shared<DataTypeString>();
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        data_types[i] = serialize_string_type;
+        value_columns[i] = serialize_string_type->create_column();
+        value_buffer_writers.emplace_back(*reinterpret_cast<ColumnString*>(value_columns[i].get()));
+    }
+
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        _aggregate_evaluators[i]->function()->serialize(
+                _agg_data.without_key + _offsets_of_aggregate_states[i], value_buffer_writers[i]);
+        value_buffer_writers[i].commit();
+    }
+    {
+        ColumnsWithTypeAndName data_with_schema;
+        for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+            ColumnWithTypeAndName column_with_schema = {nullptr, data_types[i], ""};
+            data_with_schema.push_back(std::move(column_with_schema));
+        }
+        *block = Block(data_with_schema);
+    }
+
+    block->set_columns(std::move(value_columns));
+    *eos = true;
+    return Status::OK();
+}
+
+Status AggregationNode::_execute_without_key(Block* block) {
+    DCHECK(_agg_data.without_key != nullptr);
+    SCOPED_TIMER(_build_timer);
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        _aggregate_evaluators[i]->execute_single_add(
+                block, _agg_data.without_key + _offsets_of_aggregate_states[i]);
+    }
+    return Status::OK();
+}
+
+Status AggregationNode::_merge_without_key(Block* block) {
+    SCOPED_TIMER(_merge_timer);
+    DCHECK(_agg_data.without_key != nullptr);
+    std::unique_ptr<char[]> deserialize_buffer(new char[_total_size_of_aggregate_states]);
+    int rows = block->rows();
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        if (_aggregate_evaluators[i]->is_merge()) {
+            auto column = block->get_by_position(i).column;
+            if (column->is_nullable()) {
+                column = ((ColumnNullable*)column.get())->get_nested_column_ptr();
+            }
+
+            for (int j = 0; j < rows; ++j) {
+                VectorBufferReader buffer_reader(((ColumnString*)(column.get()))->get_data_at(j));
+                _create_agg_status(deserialize_buffer.get());
+
+                _aggregate_evaluators[i]->function()->deserialize(
+                        deserialize_buffer.get() + _offsets_of_aggregate_states[i], buffer_reader,
+                        &_agg_arena_pool);
+
+                _aggregate_evaluators[i]->function()->merge(
+                        _agg_data.without_key + _offsets_of_aggregate_states[i],
+                        deserialize_buffer.get() + _offsets_of_aggregate_states[i],
+                        &_agg_arena_pool);
+
+                _destory_agg_status(deserialize_buffer.get());
+            }
+        } else {
+            _aggregate_evaluators[i]->execute_single_add(
+                    block, _agg_data.without_key + _offsets_of_aggregate_states[i]);
+        }
+    }
+    return Status::OK();
+}
+
+void AggregationNode::_update_memusage_without_key() {
+    mem_tracker()->Consume(_agg_arena_pool.size() - _mem_usage_record.used_in_arena);
+    _mem_usage_record.used_in_arena = _agg_arena_pool.size();
+}
+
+void AggregationNode::_close_without_key() {
+    _destory_agg_status(_agg_data.without_key);
+    release_tracker();
+}
+
+bool AggregationNode::_should_expand_preagg_hash_tables() {
+    if (!_should_expand_hash_table) return false;
+
+    return std::visit(
+            [&](auto&& agg_method) -> bool {
+                auto& hash_tbl = agg_method.data;
+                auto [ht_mem, ht_rows] =
+                        std::pair {hash_tbl.get_buffer_size_in_bytes(), hash_tbl.size()};
+
+                // Need some rows in tables to have valid statistics.
+                if (ht_rows == 0) return true;
+
+                // Find the appropriate reduction factor in our table for the current hash table sizes.
+                int cache_level = 0;
+                while (cache_level + 1 < STREAMING_HT_MIN_REDUCTION_SIZE &&
+                       ht_mem >= STREAMING_HT_MIN_REDUCTION[cache_level + 1].min_ht_mem) {
+                    ++cache_level;
+                }
+
+                // Compare the number of rows in the hash table with the number of input rows that
+                // were aggregated into it. Exclude passed through rows from this calculation since
+                // they were not in hash tables.
+                const int64_t input_rows = _children[0]->rows_returned();
+                const int64_t aggregated_input_rows = input_rows - _num_rows_returned;
+                // TODO chenhao
+                //  const int64_t expected_input_rows = estimated_input_cardinality_ - num_rows_returned_;
+                double current_reduction = static_cast<double>(aggregated_input_rows) / ht_rows;
+
+                // TODO: workaround for IMPALA-2490: subplan node rows_returned counter may be
+                // inaccurate, which could lead to a divide by zero below.
+                if (aggregated_input_rows <= 0) return true;
+
+                // Extrapolate the current reduction factor (r) using the formula
+                // R = 1 + (N / n) * (r - 1), where R is the reduction factor over the full input data
+                // set, N is the number of input rows, excluding passed-through rows, and n is the
+                // number of rows inserted or merged into the hash tables. This is a very rough
+                // approximation but is good enough to be useful.
+                // TODO: consider collecting more statistics to better estimate reduction.
+                //  double estimated_reduction = aggregated_input_rows >= expected_input_rows
+                //      ? current_reduction
+                //      : 1 + (expected_input_rows / aggregated_input_rows) * (current_reduction - 1);
+                double min_reduction =
+                        STREAMING_HT_MIN_REDUCTION[cache_level].streaming_ht_min_reduction;
+
+                //  COUNTER_SET(preagg_estimated_reduction_, estimated_reduction);
+                //    COUNTER_SET(preagg_streaming_ht_min_reduction_, min_reduction);
+                //  return estimated_reduction > min_reduction;
+                _should_expand_hash_table = current_reduction > min_reduction;
+                return _should_expand_hash_table;
+            },
+            _agg_data._aggregated_method_variant);
+}
+
+Status AggregationNode::_pre_agg_with_serialized_key(doris::vectorized::Block* in_block,
+                                                     doris::vectorized::Block* out_block) {
+    SCOPED_TIMER(_build_timer);
+    DCHECK(!_probe_expr_ctxs.empty());
+
+    size_t key_size = _probe_expr_ctxs.size();
+    ColumnRawPtrs key_columns(key_size);
+    {
+        SCOPED_TIMER(_expr_timer);
+        for (size_t i = 0; i < key_size; ++i) {
+            int result_column_id = -1;
+            RETURN_IF_ERROR(_probe_expr_ctxs[i]->execute(in_block, &result_column_id));
+            in_block->get_by_position(result_column_id).column =
+                    in_block->get_by_position(result_column_id)
+                            .column->convert_to_full_column_if_const();
+            key_columns[i] = in_block->get_by_position(result_column_id).column.get();
+        }
+    }
+
+    int rows = in_block->rows();
+    PODArray<AggregateDataPtr> places(rows);
+
+    // Stop expanding hash tables if we're not reducing the input sufficiently. As our
+    // hash tables expand out of each level of cache hierarchy, every hash table lookup
+    // will take longer. We also may not be able to expand hash tables because of memory
+    // pressure. In either case we should always use the remaining space in the hash table
+    // to avoid wasting memory.
+    // But for fixed hash map, it never need to expand
+    bool ret_flag = false;
+    std::visit(
+            [&](auto&& agg_method) -> void {
+                if (auto& hash_tbl = agg_method.data; hash_tbl.add_elem_size_overflow(rows)) {
+                    // do not try to do agg, just init and serialize directly return the out_block
+                    if (!_should_expand_preagg_hash_tables()) {
+                        ret_flag = true;
+                        if (_streaming_pre_agg_buffer == nullptr) {
+                            _streaming_pre_agg_buffer =
+                                    new char[((_total_size_of_aggregate_states *
+                                               _max_size_of_stream_pre_agg_buffer) /
+                                                      _align_aggregate_states +
+                                              1) *
+                                             _align_aggregate_states];
+                        }
+
+                        if (UNLIKELY(_max_size_of_stream_pre_agg_buffer < rows)) {
+                            delete[] _streaming_pre_agg_buffer;
+                            _streaming_pre_agg_buffer = new char[((_total_size_of_aggregate_states *
+                                                                   rows) / _align_aggregate_states +
+                                                                  1) *
+                                                                 _align_aggregate_states];
+                        }
+
+                        auto aggregate_data = _streaming_pre_agg_buffer;
+                        for (size_t i = 0; i < rows; ++i) {
+                            _create_agg_status(aggregate_data);
+                            places[i] = aggregate_data;
+                            aggregate_data += _total_size_of_aggregate_states;
+                        }
+                        for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+                            _aggregate_evaluators[i]->execute_batch_add(
+                                    in_block, _offsets_of_aggregate_states[i], places.data(),
+                                    &_agg_arena_pool);
+                        }
+
+                        // will serialize value data to string column
+                        std::vector<VectorBufferWriter> value_buffer_writers;
+                        bool mem_reuse = out_block->mem_reuse();
+                        auto serialize_string_type = std::make_shared<DataTypeString>();
+                        MutableColumns value_columns;
+                        for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+                            if (mem_reuse) {
+                                value_columns.emplace_back(
+                                        std::move(*out_block->get_by_position(i + key_size).column)
+                                                .mutate());
+                            } else {
+                                // slot type of value it should always be string type
+                                value_columns.emplace_back(serialize_string_type->create_column());
+                            }
+                            value_buffer_writers.emplace_back(
+                                    *reinterpret_cast<ColumnString*>(value_columns[i].get()));
+                        }
+
+                        aggregate_data = _streaming_pre_agg_buffer;
+                        for (size_t j = 0; j < rows; ++j) {
+                            for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
+                                _aggregate_evaluators[i]->function()->serialize(
+                                        aggregate_data + _offsets_of_aggregate_states[i],
+                                        value_buffer_writers[i]);
+                                value_buffer_writers[i].commit();
+                            }
+                            aggregate_data += _total_size_of_aggregate_states;
+                        }
+
+                        if (!mem_reuse) {
+                            ColumnsWithTypeAndName columns_with_schema;
+                            for (int i = 0; i < key_size; ++i) {
+                                columns_with_schema.emplace_back(
+                                        key_columns[i]->clone_resized(rows),
+                                        _probe_expr_ctxs[i]->root()->data_type(), "");
+                            }
+                            for (int i = 0; i < value_columns.size(); ++i) {
+                                columns_with_schema.emplace_back(std::move(value_columns[i]),
+                                                                 serialize_string_type, "");
+                            }
+                            out_block->swap(Block(columns_with_schema));
+                        } else {
+                            for (int i = 0; i < key_size; ++i) {
+                                std::move(*out_block->get_by_position(i).column)
+                                        .mutate()
+                                        ->insert_range_from(*key_columns[i], 0, rows);
+                            }
+                        }
+                    }
+                }
+            },
+            _agg_data._aggregated_method_variant);
+
+    if (!ret_flag) {
+        std::visit(
+                [&](auto &&agg_method) -> void {
+                    using HashMethodType = std::decay_t<decltype(agg_method)>;
+                    using AggState = typename HashMethodType::State;
+                    AggState state(key_columns, _probe_key_sz, nullptr);
+                    /// For all rows.
+                    for (size_t i = 0; i < rows; ++i) {
+                        AggregateDataPtr aggregate_data = nullptr;
+
+                        auto emplace_result = state.emplace_key(agg_method.data, i, _agg_arena_pool);
+
+                        /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
+                        if (emplace_result.is_inserted()) {
+                            /// exception-safety - if you can not allocate memory or create states, then destructors will not be called.
+                            emplace_result.set_mapped(nullptr);
+
+                            aggregate_data = _agg_arena_pool.aligned_alloc(
+                                    _total_size_of_aggregate_states, _align_aggregate_states);
+                            _create_agg_status(aggregate_data);
+
+                            emplace_result.set_mapped(aggregate_data);
+                        } else
+                            aggregate_data = emplace_result.get_mapped();
+
+                        places[i] = aggregate_data;
+                        assert(places[i] != nullptr);
+                    }
+                },
+                _agg_data._aggregated_method_variant);
+
+        for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+            _aggregate_evaluators[i]->execute_batch_add(in_block, _offsets_of_aggregate_states[i],
+                                                        places.data(), &_agg_arena_pool);
+        }
+    }
+
+    return Status::OK();
+}
+
+Status AggregationNode::_execute_with_serialized_key(Block* block) {
+    SCOPED_TIMER(_build_timer);
+    DCHECK(!_probe_expr_ctxs.empty());
+
+    size_t key_size = _probe_expr_ctxs.size();
+    ColumnRawPtrs key_columns(key_size);
+    {
+        SCOPED_TIMER(_expr_timer);
+        for (size_t i = 0; i < key_size; ++i) {
+            int result_column_id = -1;
+            RETURN_IF_ERROR(_probe_expr_ctxs[i]->execute(block, &result_column_id));
+            block->get_by_position(result_column_id).column =
+                    block->get_by_position(result_column_id)
+                            .column->convert_to_full_column_if_const();
+            key_columns[i] = block->get_by_position(result_column_id).column.get();
+        }
+    }
+
+    int rows = block->rows();
+    PODArray<AggregateDataPtr> places(rows);
+
+    std::visit(
+            [&](auto&& agg_method) -> void {
+                using HashMethodType = std::decay_t<decltype(agg_method)>;
+                using AggState = typename HashMethodType::State;
+                AggState state(key_columns, _probe_key_sz, nullptr);
+                /// For all rows.
+                for (size_t i = 0; i < rows; ++i) {
+                    AggregateDataPtr aggregate_data = nullptr;
+
+                    auto emplace_result = state.emplace_key(agg_method.data, i, _agg_arena_pool);
+
+                    /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
+                    if (emplace_result.is_inserted()) {
+                        /// exception-safety - if you can not allocate memory or create states, then destructors will not be called.
+                        emplace_result.set_mapped(nullptr);
+
+                        aggregate_data = _agg_arena_pool.aligned_alloc(
+                                _total_size_of_aggregate_states, _align_aggregate_states);
+                        _create_agg_status(aggregate_data);
+
+                        emplace_result.set_mapped(aggregate_data);
+                    } else
+                        aggregate_data = emplace_result.get_mapped();
+
+                    places[i] = aggregate_data;
+                    assert(places[i] != nullptr);
+                }
+            },
+            _agg_data._aggregated_method_variant);
+
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        _aggregate_evaluators[i]->execute_batch_add(block, _offsets_of_aggregate_states[i],
+                                                    places.data(), &_agg_arena_pool);
+    }
+
+    return Status::OK();
+}
+
+Status AggregationNode::_get_with_serialized_key_result(RuntimeState* state, Block* block,
+                                                        bool* eos) {
+    bool mem_reuse = block->mem_reuse();
+    auto column_withschema = VectorizedUtils::create_columns_with_type_and_name(row_desc());
+    int key_size = _probe_expr_ctxs.size();
+
+    MutableColumns key_columns;
+    for (int i = 0; i < key_size; ++i) {
+        if (!mem_reuse) {
+            key_columns.emplace_back(column_withschema[i].type->create_column());
+        } else {
+            key_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate());
+        }
+    }
+    MutableColumns value_columns;
+    for (int i = key_size; i < column_withschema.size(); ++i) {
+        if (!mem_reuse) {
+            value_columns.emplace_back(column_withschema[i].type->create_column());
+        } else {
+            value_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate());
+        }
+    }
+
+    SCOPED_TIMER(_get_results_timer);
+    std::visit(
+            [&](auto&& agg_method) -> void {
+                auto& data = agg_method.data;
+                auto& iter = agg_method.iterator;
+                agg_method.init_once();
+                while (iter != data.end() && key_columns[0]->size() < state->batch_size()) {
+                    const auto& key = iter->get_first();
+                    auto& mapped = iter->get_second();
+                    agg_method.insert_key_into_columns(key, key_columns, _probe_key_sz);
+                    for (size_t i = 0; i < _aggregate_evaluators.size(); ++i)
+                        _aggregate_evaluators[i]->insert_result_info(
+                                mapped + _offsets_of_aggregate_states[i], value_columns[i].get());
+
+                    ++iter;
+                }
+                if (iter == data.end()) {
+                    if (agg_method.data.has_null_key_data()) {
+                        // only one key of group by support wrap null key
+                        // here need additional processing logic on the null key / value
+                        DCHECK(key_columns.size() == 1);
+                        DCHECK(key_columns[0]->is_nullable());
+                        if (key_columns[0]->size() < state->batch_size()) {
+                            key_columns[0]->insert_data(nullptr, 0);
+                            auto mapped = agg_method.data.get_null_key_data();
+                            for (size_t i = 0; i < _aggregate_evaluators.size(); ++i)
+                                _aggregate_evaluators[i]->insert_result_info(
+                                        mapped + _offsets_of_aggregate_states[i],
+                                        value_columns[i].get());
+                            *eos = true;
+                        }
+                    } else {
+                        *eos = true;
+                    }
+                }
+            },
+            _agg_data._aggregated_method_variant);
+
+    if (!mem_reuse) {
+        *block = column_withschema;
+        MutableColumns columns(block->columns());
+        for (int i = 0; i < block->columns(); ++i) {
+            if (i < key_size) {
+                columns[i] = std::move(key_columns[i]);
+            } else {
+                columns[i] = std::move(value_columns[i - key_size]);
+            }
+        }
+        block->set_columns(std::move(columns));
+    }
+
+    return Status::OK();
+}
+
+Status AggregationNode::_serialize_with_serialized_key_result(RuntimeState* state, Block* block,
+                                                              bool* eos) {
+    int key_size = _probe_expr_ctxs.size();
+    int agg_size = _aggregate_evaluators.size();
+    MutableColumns value_columns(agg_size);
+    DataTypes value_data_types(agg_size);
+
+    bool mem_reuse = block->mem_reuse();
+
+    MutableColumns key_columns;
+    for (int i = 0; i < key_size; ++i) {
+        if (mem_reuse) {
+            key_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate());
+        } else {
+            key_columns.emplace_back(_probe_expr_ctxs[i]->root()->data_type()->create_column());
+        }
+    }
+
+    // will serialize data to string column
+    std::vector<VectorBufferWriter> value_buffer_writers;
+    auto serialize_string_type = std::make_shared<DataTypeString>();
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        value_data_types[i] = serialize_string_type;
+        if (mem_reuse) {
+            value_columns[i] = std::move(*block->get_by_position(i + key_size).column).mutate();
+        } else {
+            value_columns[i] = serialize_string_type->create_column();
+        }
+        value_buffer_writers.emplace_back(*reinterpret_cast<ColumnString*>(value_columns[i].get()));
+    }
+
+    std::visit(
+            [&](auto&& agg_method) -> void {
+                agg_method.init_once();
+                auto& data = agg_method.data;
+                auto& iter = agg_method.iterator;
+                while (iter != data.end() && key_columns[0]->size() < state->batch_size()) {
+                    const auto& key = iter->get_first();
+                    auto& mapped = iter->get_second();
+                    // insert keys
+                    agg_method.insert_key_into_columns(key, key_columns, _probe_key_sz);
+
+                    // serialize values
+                    for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
+                        _aggregate_evaluators[i]->function()->serialize(
+                                mapped + _offsets_of_aggregate_states[i], value_buffer_writers[i]);
+                        value_buffer_writers[i].commit();
+                    }
+                    ++iter;
+                }
+
+                if (iter == data.end()) {
+                    if (agg_method.data.has_null_key_data()) {
+                        DCHECK(key_columns.size() == 1);
+                        DCHECK(key_columns[0]->is_nullable());
+                        if (agg_method.data.has_null_key_data()) {
+                            key_columns[0]->insert_data(nullptr, 0);
+                            auto mapped = agg_method.data.get_null_key_data();
+                            for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
+                                _aggregate_evaluators[i]->function()->serialize(
+                                        mapped + _offsets_of_aggregate_states[i],
+                                        value_buffer_writers[i]);
+                                value_buffer_writers[i].commit();
+                            }
+                            *eos = true;
+                        }
+                    } else {
+                        *eos = true;
+                    }
+                }
+            },
+            _agg_data._aggregated_method_variant);
+
+    if (!mem_reuse) {
+        ColumnsWithTypeAndName columns_with_schema;
+        for (int i = 0; i < key_size; ++i) {
+            columns_with_schema.emplace_back(std::move(key_columns[i]),
+                                             _probe_expr_ctxs[i]->root()->data_type(), "");
+        }
+        for (int i = 0; i < agg_size; ++i) {
+            columns_with_schema.emplace_back(std::move(value_columns[i]), value_data_types[i], "");
+        }
+        *block = Block(columns_with_schema);
+    }
+
+    return Status::OK();
+}
+
+Status AggregationNode::_merge_with_serialized_key(Block* block) {
+    SCOPED_TIMER(_merge_timer);
+
+    size_t key_size = _probe_expr_ctxs.size();
+    ColumnRawPtrs key_columns(key_size);
+
+    for (size_t i = 0; i < key_size; ++i) {
+        int result_column_id = -1;
+        RETURN_IF_ERROR(_probe_expr_ctxs[i]->execute(block, &result_column_id));
+        key_columns[i] = block->get_by_position(result_column_id).column.get();
+    }
+
+    int rows = block->rows();
+    PODArray<AggregateDataPtr> places(rows);
+
+    std::visit(
+            [&](auto&& agg_method) -> void {
+                using HashMethodType = std::decay_t<decltype(agg_method)>;
+                using AggState = typename HashMethodType::State;
+                AggState state(key_columns, _probe_key_sz, nullptr);
+                /// For all rows.
+                for (size_t i = 0; i < rows; ++i) {
+                    AggregateDataPtr aggregate_data = nullptr;
+
+                    auto emplace_result = state.emplace_key(agg_method.data, i, _agg_arena_pool);
+
+                    /// If a new key is inserted, initialize the states of the aggregate functions, and possibly something related to the key.
+                    if (emplace_result.is_inserted()) {
+                        /// exception-safety - if you can not allocate memory or create states, then destructors will not be called.
+                        emplace_result.set_mapped(nullptr);
+
+                        aggregate_data = _agg_arena_pool.aligned_alloc(
+                                _total_size_of_aggregate_states, _align_aggregate_states);
+                        _create_agg_status(aggregate_data);
+
+                        emplace_result.set_mapped(aggregate_data);
+                    } else
+                        aggregate_data = emplace_result.get_mapped();
+
+                    places[i] = aggregate_data;
+                    assert(places[i] != nullptr);
+                }
+            },
+            _agg_data._aggregated_method_variant);
+
+    std::unique_ptr<char[]> deserialize_buffer(new char[_total_size_of_aggregate_states]);
+
+    for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
+        if (_aggregate_evaluators[i]->is_merge()) {
+            auto column = block->get_by_position(i + key_size).column;
+            if (column->is_nullable()) {
+                column = ((ColumnNullable*)column.get())->get_nested_column_ptr();
+            }
+
+            for (int j = 0; j < rows; ++j) {
+                VectorBufferReader buffer_reader(((ColumnString*)(column.get()))->get_data_at(j));
+                _create_agg_status(deserialize_buffer.get());
+
+                _aggregate_evaluators[i]->function()->deserialize(
+                        deserialize_buffer.get() + _offsets_of_aggregate_states[i], buffer_reader,
+                        &_agg_arena_pool);
+
+                _aggregate_evaluators[i]->function()->merge(
+                        places.data()[j] + _offsets_of_aggregate_states[i],
+                        deserialize_buffer.get() + _offsets_of_aggregate_states[i],
+                        &_agg_arena_pool);
+
+                _destory_agg_status(deserialize_buffer.get());
+            }
+        } else {
+            _aggregate_evaluators[i]->execute_batch_add(block, _offsets_of_aggregate_states[i],
+                                                        places.data(), &_agg_arena_pool);
+        }
+    }
+    return Status::OK();
+}
+
+void AggregationNode::_update_memusage_with_serialized_key() {
+    std::visit(
+            [&](auto&& agg_method) -> void {
+                auto& data = agg_method.data;
+                mem_tracker()->Consume(_agg_arena_pool.size() - _mem_usage_record.used_in_arena);
+                mem_tracker()->Consume(data.get_buffer_size_in_bytes() -
+                                       _mem_usage_record.used_in_state);
+                _mem_usage_record.used_in_state = data.get_buffer_size_in_bytes();
+                _mem_usage_record.used_in_arena = _agg_arena_pool.size();
+            },
+            _agg_data._aggregated_method_variant);
+}
+
+void AggregationNode::_close_with_serialized_key() {
+    std::visit(
+            [&](auto&& agg_method) -> void {
+                auto& data = agg_method.data;
+                data.for_each_mapped([&](auto& mapped) {
+                    if (mapped) {
+                        _destory_agg_status(mapped);
+                        mapped = nullptr;
+                    }
+                });
+            },
+            _agg_data._aggregated_method_variant);
+    release_tracker();
+}
+
+void AggregationNode::release_tracker() {
+    mem_tracker()->Release(_mem_usage_record.used_in_state + _mem_usage_record.used_in_arena);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/vaggregation_node.h b/be/src/vec/exec/vaggregation_node.h
new file mode 100644
index 0000000000..45f1d59038
--- /dev/null
+++ b/be/src/vec/exec/vaggregation_node.h
@@ -0,0 +1,483 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <variant>
+
+#include "common/object_pool.h"
+#include "exec/exec_node.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/common/columns_hashing.h"
+#include "vec/common/hash_table/fixed_hash_map.h"
+#include "vec/exprs/vectorized_agg_fn.h"
+
+namespace doris {
+class TPlanNode;
+class DescriptorTbl;
+class MemPool;
+
+namespace vectorized {
+class VExprContext;
+
+/** Aggregates by concatenating serialized key values.
+  * The serialized value differs in that it uniquely allows to deserialize it, having only the position with which it starts.
+  * That is, for example, for strings, it contains first the serialized length of the string, and then the bytes.
+  * Therefore, when aggregating by several strings, there is no ambiguity.
+  */
+template <typename TData>
+struct AggregationMethodSerialized {
+    using Data = TData;
+    using Key = typename Data::key_type;
+    using Mapped = typename Data::mapped_type;
+    using Iterator = typename Data::iterator;
+
+    Data data;
+    Iterator iterator;
+    bool inited = false;
+
+    AggregationMethodSerialized() = default;
+
+    template <typename Other>
+    explicit AggregationMethodSerialized(const Other& other) : data(other.data) {}
+
+    using State = ColumnsHashing::HashMethodSerialized<typename Data::value_type, Mapped>;
+
+    static void insert_key_into_columns(const StringRef& key, MutableColumns& key_columns,
+                                     const Sizes&) {
+        auto pos = key.data;
+        for (auto& column : key_columns) pos = column->deserialize_and_insert_from_arena(pos);
+    }
+
+    void init_once() {
+        if (!inited) {
+            inited = true;
+            iterator = data.begin();
+        }
+    }
+};
+
+using AggregatedDataWithoutKey = AggregateDataPtr;
+using AggregatedDataWithStringKey = HashMapWithSavedHash<StringRef, AggregateDataPtr>;
+
+/// For the case where there is one numeric key.
+/// FieldType is UInt8/16/32/64 for any type with corresponding bit width.
+template <typename FieldType, typename TData,
+        bool consecutive_keys_optimization = true>
+struct AggregationMethodOneNumber
+{
+    using Data = TData;
+    using Key = typename Data::key_type;
+    using Mapped = typename Data::mapped_type;
+    using Iterator = typename Data::iterator;
+
+    Data data;
+    Iterator iterator;
+    bool inited = false;
+
+    AggregationMethodOneNumber() = default;
+
+    template <typename Other>
+    AggregationMethodOneNumber(const Other & other) : data(other.data) {}
+
+    /// To use one `Method` in different threads, use different `State`.
+    using State = ColumnsHashing::HashMethodOneNumber<typename Data::value_type,
+        Mapped, FieldType, consecutive_keys_optimization>;
+
+    // Insert the key from the hash table into columns.
+    static void insert_key_into_columns(const Key & key, MutableColumns & key_columns, const Sizes & /*key_sizes*/) {
+        const auto * key_holder = reinterpret_cast<const char *>(&key);
+        auto * column = static_cast<ColumnVectorHelper *>(key_columns[0].get());
+        column->insert_raw_data<sizeof(FieldType)>(key_holder);
+    }
+
+    void init_once() {
+        if (!inited) {
+            inited = true;
+            iterator = data.begin();
+        }
+    }
+};
+
+template <typename Base>
+struct AggregationDataWithNullKey : public Base {
+    using Base::Base;
+
+    bool & has_null_key_data() { return has_null_key; }
+    AggregateDataPtr& get_null_key_data() { return null_key_data; }
+    bool has_null_key_data() const { return has_null_key; }
+    const AggregateDataPtr get_null_key_data() const { return null_key_data; }
+    size_t size() const { return Base::size() + (has_null_key ? 1 : 0); }
+    bool empty() const { return Base::empty() && !has_null_key; }
+
+    void clear() {
+        Base::clear();
+        has_null_key = false;
+    }
+
+    void clear_and_shrink() {
+        Base::clear_and_shrink();
+        has_null_key = false;
+    }
+
+private:
+    bool has_null_key = false;
+    AggregateDataPtr null_key_data = nullptr;
+};
+
+template <typename TData, bool has_nullable_keys_ = false>
+struct AggregationMethodKeysFixed {
+    using Data = TData;
+    using Key = typename Data::key_type;
+    using Mapped = typename Data::mapped_type;
+    using Iterator = typename Data::iterator;
+    static constexpr bool has_nullable_keys = has_nullable_keys_;
+
+    Data data;
+    Iterator iterator;
+    bool inited = false;
+
+    AggregationMethodKeysFixed() {}
+
+    template <typename Other>
+    AggregationMethodKeysFixed(const Other & other) : data(other.data) {}
+
+    using State = ColumnsHashing::HashMethodKeysFixed<typename Data::value_type, Key, Mapped, has_nullable_keys>;
+
+    static void insert_key_into_columns(const Key & key, MutableColumns & key_columns, const Sizes & key_sizes) {
+        size_t keys_size = key_columns.size();
+
+        static constexpr auto bitmap_size = has_nullable_keys ? std::tuple_size<KeysNullMap<Key>>::value : 0;
+        /// In any hash key value, column values to be read start just after the bitmap, if it exists.
+        size_t pos = bitmap_size;
+
+        for (size_t i = 0; i < keys_size; ++i) {
+            IColumn * observed_column;
+            ColumnUInt8 * null_map;
+
+            bool column_nullable = false;
+            if constexpr (has_nullable_keys)
+                column_nullable = is_column_nullable(*key_columns[i]);
+
+            /// If we have a nullable column, get its nested column and its null map.
+            if (column_nullable) {
+                ColumnNullable & nullable_col = assert_cast<ColumnNullable &>(*key_columns[i]);
+                observed_column = &nullable_col.get_nested_column();
+                null_map = assert_cast<ColumnUInt8 *>(&nullable_col.get_null_map_column());
+            } else {
+                observed_column = key_columns[i].get();
+                null_map = nullptr;
+            }
+
+            bool is_null = false;
+            if (column_nullable) {
+                /// The current column is nullable. Check if the value of the
+                /// corresponding key is nullable. Update the null map accordingly.
+                size_t bucket = i / 8;
+                size_t offset = i % 8;
+                UInt8 val = (reinterpret_cast<const UInt8 *>(&key)[bucket] >> offset) & 1;
+                null_map->insert_value(val);
+                is_null = val == 1;
+            }
+
+            if (has_nullable_keys && is_null)
+                observed_column->insert_default();
+            else {
+                size_t size = key_sizes[i];
+                observed_column->insert_data(reinterpret_cast<const char *>(&key) + pos, size);
+                pos += size;
+            }
+        }
+    }
+
+    void init_once() {
+        if (!inited) {
+            inited = true;
+            iterator = data.begin();
+        }
+    }
+};
+
+/// Single low cardinality column.
+template <typename SingleColumnMethod>
+struct AggregationMethodSingleNullableColumn : public SingleColumnMethod {
+    using Base = SingleColumnMethod;
+    using BaseState = typename Base::State;
+
+    using Data = typename Base::Data;
+    using Key = typename Base::Key;
+    using Mapped = typename Base::Mapped;
+
+    using Base::data;
+
+    AggregationMethodSingleNullableColumn() = default;
+
+    template <typename Other>
+    explicit AggregationMethodSingleNullableColumn(const Other & other) : Base(other) {}
+
+    using State = ColumnsHashing::HashMethodSingleLowNullableColumn<BaseState, Mapped, true>;
+
+    static void insert_key_into_columns(const Key & key,
+        MutableColumns & key_columns, const Sizes & /*key_sizes*/) {
+        auto col = key_columns[0].get();
+
+        if constexpr (std::is_same_v<Key, StringRef>) {
+            col->insert_data(key.data, key.size);
+        } else {
+            col->insert_data(reinterpret_cast<const char *>(&key), sizeof(key));
+        }
+    }
+};
+
+using AggregatedDataWithUInt8Key = FixedImplicitZeroHashMapWithCalculatedSize<UInt8, AggregateDataPtr>;
+using AggregatedDataWithUInt16Key = FixedImplicitZeroHashMap<UInt16, AggregateDataPtr>;
+using AggregatedDataWithUInt32Key = HashMap<UInt32, AggregateDataPtr, HashCRC32<UInt32>>;
+using AggregatedDataWithUInt64Key = HashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
+using AggregatedDataWithUInt128Key = HashMap<UInt128, AggregateDataPtr, HashCRC32<UInt128>>;
+using AggregatedDataWithUInt256Key = HashMap<UInt256, AggregateDataPtr, HashCRC32<UInt256>>;
+
+using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey<AggregatedDataWithUInt8Key>;
+using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey<AggregatedDataWithUInt16Key>;
+using AggregatedDataWithNullableUInt32Key = AggregationDataWithNullKey<AggregatedDataWithUInt32Key>;
+using AggregatedDataWithNullableUInt64Key = AggregationDataWithNullKey<AggregatedDataWithUInt64Key>;
+using AggregatedDataWithNullableUInt128Key = AggregationDataWithNullKey<AggregatedDataWithUInt128Key>;
+
+using AggregatedMethodVariants = std::variant<AggregationMethodSerialized<AggregatedDataWithStringKey>,
+                                    AggregationMethodOneNumber<UInt8, AggregatedDataWithUInt8Key, false>,
+                                    AggregationMethodOneNumber<UInt16, AggregatedDataWithUInt16Key, false>,
+                                    AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt32Key>,
+                                    AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64Key>,
+                                    AggregationMethodOneNumber<UInt128, AggregatedDataWithUInt128Key>,
+                                    AggregationMethodSingleNullableColumn<AggregationMethodOneNumber<UInt8, AggregatedDataWithNullableUInt8Key, false>>,
+                                    AggregationMethodSingleNullableColumn<AggregationMethodOneNumber<UInt16, AggregatedDataWithNullableUInt16Key, false>>,
+                                    AggregationMethodSingleNullableColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt32Key>>,
+                                    AggregationMethodSingleNullableColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64Key>>,
+                                    AggregationMethodSingleNullableColumn<AggregationMethodOneNumber<UInt128, AggregatedDataWithNullableUInt128Key>>,
+                                    AggregationMethodKeysFixed<AggregatedDataWithUInt64Key, false>,
+                                    AggregationMethodKeysFixed<AggregatedDataWithUInt64Key, true>,
+                                    AggregationMethodKeysFixed<AggregatedDataWithUInt128Key, false>,
+                                    AggregationMethodKeysFixed<AggregatedDataWithUInt128Key, true>,
+                                    AggregationMethodKeysFixed<AggregatedDataWithUInt256Key, false>,
+                                    AggregationMethodKeysFixed<AggregatedDataWithUInt256Key, true>>;
+
+struct AggregatedDataVariants {
+    AggregatedDataVariants() = default;
+    AggregatedDataVariants(const AggregatedDataVariants&) = delete;
+    AggregatedDataVariants& operator=(const AggregatedDataVariants&) = delete;
+    AggregatedDataWithoutKey without_key = nullptr;
+    AggregatedMethodVariants _aggregated_method_variant;
+
+    // TODO: may we should support uint256 in the future
+    enum class Type {
+        EMPTY = 0,
+        without_key,
+        serialized,
+        int8_key,
+        int16_key,
+        int32_key,
+        int64_key,
+        int128_key,
+        int64_keys,
+        int128_keys,
+        int256_keys
+    };
+
+    Type _type = Type::EMPTY;
+
+    void init(Type type, bool is_nullable = false) {
+        _type = type;
+        switch (_type) {
+        case Type::without_key:
+            break;
+        case Type::serialized:
+            _aggregated_method_variant.emplace<AggregationMethodSerialized<AggregatedDataWithStringKey>>();
+            break;
+        case Type::int8_key:
+            if (is_nullable) {
+                _aggregated_method_variant.emplace<AggregationMethodSingleNullableColumn<AggregationMethodOneNumber<UInt8, AggregatedDataWithNullableUInt8Key, false>>>();
+            } else {
+                _aggregated_method_variant.emplace<AggregationMethodOneNumber<UInt8, AggregatedDataWithUInt8Key, false>>();
+            }
+            break;
+        case Type::int16_key:
+            if (is_nullable) {
+                _aggregated_method_variant.emplace<AggregationMethodSingleNullableColumn<AggregationMethodOneNumber<UInt16, AggregatedDataWithNullableUInt16Key, false>>>();
+            } else {
+                _aggregated_method_variant.emplace<AggregationMethodOneNumber<UInt16, AggregatedDataWithUInt16Key, false>>();
+            }
+            break;
+        case Type::int32_key:
+            if (is_nullable) {
+                _aggregated_method_variant.emplace<AggregationMethodSingleNullableColumn<AggregationMethodOneNumber<UInt32, AggregatedDataWithNullableUInt32Key>>>();
+            } else {
+                _aggregated_method_variant.emplace<AggregationMethodOneNumber<UInt32, AggregatedDataWithUInt32Key>>();
+            }
+            break;
+        case Type::int64_key:
+            if (is_nullable) {
+                _aggregated_method_variant.emplace<AggregationMethodSingleNullableColumn<AggregationMethodOneNumber<UInt64, AggregatedDataWithNullableUInt64Key>>>();
+            } else {
+                _aggregated_method_variant.emplace<AggregationMethodOneNumber<UInt64, AggregatedDataWithUInt64Key>>();
+            }
+            break;
+        case Type::int128_key:
+            if (is_nullable) {
+                _aggregated_method_variant.emplace<AggregationMethodSingleNullableColumn<AggregationMethodOneNumber<UInt128, AggregatedDataWithNullableUInt128Key>>>();
+            } else {
+                _aggregated_method_variant.emplace<AggregationMethodOneNumber<UInt128, AggregatedDataWithUInt128Key>>();
+            }
+            break;
+        case Type::int64_keys:
+            if (is_nullable) {
+                _aggregated_method_variant.emplace<AggregationMethodKeysFixed<AggregatedDataWithUInt64Key, true>>();
+            } else {
+                _aggregated_method_variant.emplace<AggregationMethodKeysFixed<AggregatedDataWithUInt64Key, false>>();
+            }
+            break;
+        case Type::int128_keys:
+            if (is_nullable) {
+                _aggregated_method_variant.emplace<AggregationMethodKeysFixed<AggregatedDataWithUInt128Key, true>>();
+            } else {
+                _aggregated_method_variant.emplace<AggregationMethodKeysFixed<AggregatedDataWithUInt128Key, false>>();
+            }
+            break;
+        case Type::int256_keys:
+            if (is_nullable) {
+                _aggregated_method_variant.emplace<AggregationMethodKeysFixed<AggregatedDataWithUInt256Key, true>>();
+            } else {
+                _aggregated_method_variant.emplace<AggregationMethodKeysFixed<AggregatedDataWithUInt256Key, false>>();
+            }
+            break;
+        default:
+            DCHECK(false) << "Do not have a rigth agg data type";
+        }
+    }
+};
+
+using AggregatedDataVariantsPtr = std::shared_ptr<AggregatedDataVariants>;
+
+// not support spill
+class AggregationNode : public ::doris::ExecNode {
+public:
+    using Sizes = std::vector<size_t>;
+
+    AggregationNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    ~AggregationNode();
+    virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr);
+    virtual Status prepare(RuntimeState* state);
+    virtual Status open(RuntimeState* state);
+    virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos);
+    virtual Status get_next(RuntimeState* state, Block* block, bool* eos);
+    virtual Status close(RuntimeState* state);
+
+private:
+    // group by k1,k2
+    std::vector<VExprContext*> _probe_expr_ctxs;
+    std::vector<size_t> _probe_key_sz;
+
+    std::vector<AggFnEvaluator*> _aggregate_evaluators;
+
+    // may be we don't have to know the tuple id
+    TupleId _intermediate_tuple_id;
+    TupleDescriptor* _intermediate_tuple_desc;
+
+    TupleId _output_tuple_id;
+    TupleDescriptor* _output_tuple_desc;
+
+    bool _needs_finalize;
+    bool _is_merge;
+    std::unique_ptr<MemPool> _mem_pool;
+
+    size_t _align_aggregate_states = 1;
+    /// The offset to the n-th aggregate function in a row of aggregate functions.
+    Sizes _offsets_of_aggregate_states;
+    /// The total size of the row from the aggregate functions.
+    size_t _total_size_of_aggregate_states = 0;
+
+    AggregatedDataVariants _agg_data;
+
+    Arena _agg_arena_pool;
+
+    RuntimeProfile::Counter* _build_timer;
+    RuntimeProfile::Counter* _exec_timer;
+    RuntimeProfile::Counter* _merge_timer;
+    RuntimeProfile::Counter* _expr_timer;
+    RuntimeProfile::Counter* _get_results_timer;
+
+    bool _is_streaming_preagg;
+    Block _preagg_block = Block();
+    bool _should_expand_hash_table = true;
+    char* _streaming_pre_agg_buffer = nullptr;
+    size_t _max_size_of_stream_pre_agg_buffer = 0;
+
+    /// Expose the minimum reduction factor to continue growing the hash tables.
+    RuntimeProfile::Counter* preagg_streaming_ht_min_reduction_;
+
+private:
+    /// Return true if we should keep expanding hash tables in the preagg. If false,
+    /// the preagg should pass through any rows it can't fit in its tables.
+    bool _should_expand_preagg_hash_tables();
+
+    Status _create_agg_status(AggregateDataPtr data);
+    Status _destory_agg_status(AggregateDataPtr data);
+
+    Status _get_without_key_result(RuntimeState* state, Block* block, bool* eos);
+    Status _serialize_without_key(RuntimeState* state, Block* block, bool* eos);
+    Status _execute_without_key(Block* block);
+    Status _merge_without_key(Block* block);
+    void _update_memusage_without_key();
+    void _close_without_key();
+
+    Status _get_with_serialized_key_result(RuntimeState* state, Block* block, bool* eos);
+    Status _serialize_with_serialized_key_result(RuntimeState* state, Block* block, bool* eos);
+    Status _pre_agg_with_serialized_key(Block* in_block, Block* out_block);
+    Status _execute_with_serialized_key(Block* block);
+    Status _merge_with_serialized_key(Block* block);
+    void _update_memusage_with_serialized_key();
+    void _close_with_serialized_key();
+    void _init_hash_method(std::vector<VExprContext*>& probe_exprs);
+
+    void release_tracker();
+
+    using vectorized_execute = std::function<Status(Block* block)>;
+    using vectorized_pre_agg = std::function<Status(Block* in_block, Block* out_block)>;
+    using vectorized_get_result =
+            std::function<Status(RuntimeState* state, Block* block, bool* eos)>;
+    using vectorized_closer = std::function<void()>;
+    using vectorized_update_memusage = std::function<void()>;
+
+    struct executor {
+        vectorized_execute execute;
+        vectorized_pre_agg pre_agg;
+        vectorized_get_result get_result;
+        vectorized_closer close;
+        vectorized_update_memusage update_memusage;
+    };
+
+    executor _executor;
+
+    struct MemoryRecord {
+        MemoryRecord() : used_in_arena(0), used_in_state(0) {}
+        int64_t used_in_arena;
+        int64_t used_in_state;
+    };
+
+    MemoryRecord _mem_usage_record;
+};
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vanalytic_eval_node.cpp b/be/src/vec/exec/vanalytic_eval_node.cpp
new file mode 100644
index 0000000000..4d69716216
--- /dev/null
+++ b/be/src/vec/exec/vanalytic_eval_node.cpp
@@ -0,0 +1,658 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vanalytic_eval_node.h"
+
+#include "exprs/agg_fn_evaluator.h"
+#include "exprs/anyval_util.h"
+#include "runtime/descriptors.h"
+#include "runtime/row_batch.h"
+#include "runtime/runtime_state.h"
+#include "udf/udf_internal.h"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+
+VAnalyticEvalNode::VAnalyticEvalNode(ObjectPool* pool, const TPlanNode& tnode,
+                                     const DescriptorTbl& descs)
+        : ExecNode(pool, tnode, descs),
+          _intermediate_tuple_id(tnode.analytic_node.intermediate_tuple_id),
+          _output_tuple_id(tnode.analytic_node.output_tuple_id),
+          _window(tnode.analytic_node.window) {
+    if (tnode.analytic_node.__isset.buffered_tuple_id) {
+        _buffered_tuple_id = tnode.analytic_node.buffered_tuple_id;
+    }
+
+    _fn_scope = AnalyticFnScope::PARTITION;
+    if (!tnode.analytic_node.__isset.window) { //haven't set window, Unbounded:  [unbounded preceding,unbounded following]
+        _executor.get_next = std::bind<Status>(&VAnalyticEvalNode::_get_next_for_partition, this,
+                                               std::placeholders::_1, std::placeholders::_2,
+                                               std::placeholders::_3);
+
+    } else if (tnode.analytic_node.window.type == TAnalyticWindowType::RANGE) {
+        DCHECK(!_window.__isset.window_start) << "RANGE windows must have UNBOUNDED PRECEDING";
+        DCHECK(!_window.__isset.window_end ||
+               _window.window_end.type == TAnalyticWindowBoundaryType::CURRENT_ROW)
+                << "RANGE window end bound must be CURRENT ROW or UNBOUNDED FOLLOWING";
+
+        if (!_window.__isset.window_end) { //haven't set end, so same as PARTITION, [unbounded preceding, unbounded following]
+            _executor.get_next = std::bind<Status>(&VAnalyticEvalNode::_get_next_for_partition,
+                                                   this, std::placeholders::_1,
+                                                   std::placeholders::_2, std::placeholders::_3);
+        } else {
+            _fn_scope = AnalyticFnScope::RANGE; //range:  [unbounded preceding,current row]
+            _executor.get_next = std::bind<Status>(&VAnalyticEvalNode::_get_next_for_range, this,
+                                                   std::placeholders::_1, std::placeholders::_2,
+                                                   std::placeholders::_3);
+        }
+
+    } else {
+        if (!_window.__isset.window_start &&
+            !_window.__isset.window_end) { //haven't set start and end, same as PARTITION
+            _executor.get_next = std::bind<Status>(&VAnalyticEvalNode::_get_next_for_partition,
+                                                   this, std::placeholders::_1,
+                                                   std::placeholders::_2, std::placeholders::_3);
+        } else {
+            if (_window.__isset.window_start) { //calculate start boundary
+                TAnalyticWindowBoundary b = _window.window_start;
+                if (b.__isset.rows_offset_value) {                               //[offset     ,   ]
+                    _rows_start_offset = b.rows_offset_value;
+                    if (b.type == TAnalyticWindowBoundaryType::PRECEDING) {
+                        _rows_start_offset *= -1; //preceding--> negative
+                    }                             //current_row  0
+                } else {                          //following    positive
+                    DCHECK_EQ(b.type, TAnalyticWindowBoundaryType::CURRENT_ROW); //[current row,   ]
+                    _rows_start_offset = 0;
+                }
+            }
+
+            if (_window.__isset.window_end) { //calculate end boundary
+                TAnalyticWindowBoundary b = _window.window_end;
+                if (b.__isset.rows_offset_value) {                               //[       , offset]
+                    _rows_end_offset = b.rows_offset_value;
+                    if (b.type == TAnalyticWindowBoundaryType::PRECEDING) {
+                        _rows_end_offset *= -1;
+                    }
+                } else {
+                    DCHECK_EQ(b.type, TAnalyticWindowBoundaryType::CURRENT_ROW); //[   ,current row]
+                    _rows_end_offset = 0;
+                }
+            }
+
+            _fn_scope = AnalyticFnScope::ROWS;
+            _executor.get_next = std::bind<Status>(&VAnalyticEvalNode::_get_next_for_rows, this,
+                                                   std::placeholders::_1, std::placeholders::_2,
+                                                   std::placeholders::_3);
+        }
+    }
+    VLOG_ROW << "tnode=" << apache::thrift::ThriftDebugString(tnode)
+             << " AnalyticFnScope: " << _fn_scope;
+}
+
+Status VAnalyticEvalNode::init(const TPlanNode& tnode, RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::init(tnode, state));
+    const TAnalyticNode& analytic_node = tnode.analytic_node;
+    size_t agg_size = analytic_node.analytic_functions.size();
+    _agg_expr_ctxs.resize(agg_size);
+    _agg_intput_columns.resize(agg_size);
+
+    for (int i = 0; i < agg_size; ++i) {
+        const TExpr& desc = analytic_node.analytic_functions[i];
+        int node_idx = 0;
+        _agg_intput_columns[i].resize(desc.nodes[0].num_children);
+        for (int j = 0; j < desc.nodes[0].num_children; ++j) {
+            ++node_idx;
+            VExpr* expr = nullptr;
+            VExprContext* ctx = nullptr;
+            RETURN_IF_ERROR(VExpr::create_tree_from_thrift(_pool, desc.nodes, nullptr, &node_idx,
+                                                           &expr, &ctx));
+            _agg_expr_ctxs[i].emplace_back(ctx);
+        }
+
+        AggFnEvaluator* evaluator = nullptr;
+        RETURN_IF_ERROR(AggFnEvaluator::create(_pool, analytic_node.analytic_functions[i], &evaluator));
+        _agg_functions.emplace_back(evaluator);
+        for (size_t j = 0; j < _agg_expr_ctxs[i].size(); ++j) {
+            _agg_intput_columns[i][j] = _agg_expr_ctxs[i][j]->root()->data_type()->create_column();
+        }
+    }
+
+    RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, analytic_node.partition_exprs, &_partition_by_eq_expr_ctxs));
+    RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, analytic_node.order_by_exprs, &_order_by_eq_expr_ctxs));
+    _partition_by_column_idxs.resize(_partition_by_eq_expr_ctxs.size());
+    _ordey_by_column_idxs.resize(_order_by_eq_expr_ctxs.size());
+    _agg_functions_size = _agg_functions.size();
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::prepare(RuntimeState* state) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(ExecNode::prepare(state));
+    DCHECK(child(0)->row_desc().is_prefix_of(row_desc()));
+    _mem_pool.reset(new MemPool(mem_tracker().get()));
+    _evaluation_timer = ADD_TIMER(runtime_profile(), "EvaluationTime");
+    SCOPED_TIMER(_evaluation_timer);
+
+    _intermediate_tuple_desc = state->desc_tbl().get_tuple_descriptor(_intermediate_tuple_id);
+    _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id);
+    for (size_t i = 0; i < _agg_functions_size; ++i) {
+        SlotDescriptor* intermediate_slot_desc = _intermediate_tuple_desc->slots()[i];
+        SlotDescriptor* output_slot_desc = _output_tuple_desc->slots()[i];
+        RETURN_IF_ERROR(_agg_functions[i]->prepare(state, child(0)->row_desc(), _mem_pool.get(),
+                                                   intermediate_slot_desc, output_slot_desc,
+                                                   mem_tracker()));
+    }
+
+    _offsets_of_aggregate_states.resize(_agg_functions_size);
+    for (size_t i = 0; i < _agg_functions_size; ++i) {
+        _offsets_of_aggregate_states[i] = _total_size_of_aggregate_states;
+        const auto& agg_function = _agg_functions[i]->function();
+        // aggreate states are aligned based on maximum requirement
+        _align_aggregate_states = std::max(_align_aggregate_states, agg_function->align_of_data());
+        _total_size_of_aggregate_states += agg_function->size_of_data();
+        // If not the last aggregate_state, we need pad it so that next aggregate_state will be aligned.
+        if (i + 1 < _agg_functions_size) {
+            size_t alignment_of_next_state = _agg_functions[i + 1]->function()->align_of_data();
+            if ((alignment_of_next_state & (alignment_of_next_state - 1)) != 0) {
+                return Status::RuntimeError(fmt::format("Logical error: align_of_data is not 2^N"));
+            }
+            /// Extend total_size to next alignment requirement
+            /// Add padding by rounding up 'total_size_of_aggregate_states' to be a multiplier of alignment_of_next_state.
+            _total_size_of_aggregate_states =
+                    (_total_size_of_aggregate_states + alignment_of_next_state - 1) /
+                    alignment_of_next_state * alignment_of_next_state;
+        }
+    }
+    _fn_place_ptr =
+            _agg_arena_pool.aligned_alloc(_total_size_of_aggregate_states, _align_aggregate_states);
+    _create_agg_status();
+    _executor.insert_result =
+            std::bind<void>(&VAnalyticEvalNode::_insert_result_info, this, std::placeholders::_1);
+    _executor.execute =
+            std::bind<void>(&VAnalyticEvalNode::_execute_for_win_func, this, std::placeholders::_1,
+                            std::placeholders::_2, std::placeholders::_3, std::placeholders::_4);
+
+    for (const auto& ctx : _agg_expr_ctxs) {
+        VExpr::prepare(ctx, state, child(0)->row_desc(), expr_mem_tracker());
+    }
+    if (!_partition_by_eq_expr_ctxs.empty() || !_order_by_eq_expr_ctxs.empty()) {
+        vector<TTupleId> tuple_ids;
+        tuple_ids.push_back(child(0)->row_desc().tuple_descriptors()[0]->id());
+        tuple_ids.push_back(_buffered_tuple_id);
+        RowDescriptor cmp_row_desc(state->desc_tbl(), tuple_ids, vector<bool>(2, false));
+        if (!_partition_by_eq_expr_ctxs.empty()) {
+            RETURN_IF_ERROR(VExpr::prepare(_partition_by_eq_expr_ctxs, state, cmp_row_desc,
+                                           expr_mem_tracker()));
+        }
+        if (!_order_by_eq_expr_ctxs.empty()) {
+            RETURN_IF_ERROR(VExpr::prepare(_order_by_eq_expr_ctxs, state, cmp_row_desc,
+                                           expr_mem_tracker()));
+        }
+    }
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::open(RuntimeState* state) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(ExecNode::open(state));
+    RETURN_IF_CANCELLED(state);
+    RETURN_IF_ERROR(child(0)->open(state));
+    RETURN_IF_ERROR(VExpr::open(_partition_by_eq_expr_ctxs, state));
+    RETURN_IF_ERROR(VExpr::open(_order_by_eq_expr_ctxs, state));
+    for (size_t i = 0; i < _agg_functions_size; ++i) {
+        RETURN_IF_ERROR(VExpr::open(_agg_expr_ctxs[i], state));
+    }
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::close(RuntimeState* state) {
+    if (is_closed()) {
+        return Status::OK();
+    }
+    ExecNode::close(state);
+    _destory_agg_status();
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
+    return Status::NotSupported("Not Implemented VAnalyticEvalNode::get_next.");
+}
+
+Status VAnalyticEvalNode::get_next(RuntimeState* state, vectorized::Block* block, bool* eos) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
+    RETURN_IF_CANCELLED(state);
+
+    if (_input_eos && _output_block_index == _input_blocks.size()) {
+        *eos = true;
+        return Status::OK();
+    }
+    RETURN_IF_ERROR(_executor.get_next(state, block, eos));
+
+    RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns()));
+    reached_limit(block, eos);
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::_get_next_for_partition(RuntimeState* state, Block* block, bool* eos) {
+    while (!_input_eos || _output_block_index < _input_blocks.size()) {
+        bool next_partition = false;
+        RETURN_IF_ERROR(_consumed_block_and_init_partition(state, &next_partition, eos));
+        if (*eos) { break; }
+
+        size_t current_block_rows = _input_blocks[_output_block_index].rows();
+        if (next_partition) {
+            _executor.execute(_partition_by_start, _partition_by_end, _partition_by_start,
+                              _partition_by_end);
+        }
+        _executor.insert_result(current_block_rows);
+        if (_window_end_position == current_block_rows) {
+            return _output_current_block(block);
+        }
+    }
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::_get_next_for_range(RuntimeState* state, Block* block, bool* eos) {
+    while (!_input_eos || _output_block_index < _input_blocks.size()) {
+        bool next_partition = false;
+        RETURN_IF_ERROR(_consumed_block_and_init_partition(state, &next_partition, eos));
+        if (*eos) { break; }
+
+        size_t current_block_rows = _input_blocks[_output_block_index].rows();
+        while (_current_row_position < _partition_by_end.pos && _window_end_position < current_block_rows) {
+            if (_current_row_position >= _order_by_end.pos) {
+                _update_order_by_range();
+                _executor.execute(_order_by_start, _order_by_end, _order_by_start, _order_by_end);
+            }
+            _executor.insert_result(current_block_rows);
+        }
+        if (_window_end_position == current_block_rows) {
+            return _output_current_block(block);
+        }
+    }
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::_get_next_for_rows(RuntimeState* state, Block* block, bool* eos) {
+    while (!_input_eos || _output_block_index < _input_blocks.size()) {
+        bool next_partition = false;
+        RETURN_IF_ERROR(_consumed_block_and_init_partition(state, &next_partition, eos));
+        if (*eos) { break; }
+
+        size_t current_block_rows = _input_blocks[_output_block_index].rows();
+        while (_current_row_position < _partition_by_end.pos && _window_end_position < current_block_rows) {
+            BlockRowPos range_start, range_end;
+            if (!_window.__isset.window_start &&
+                _window.window_end.type == TAnalyticWindowBoundaryType::CURRENT_ROW) { //[preceding, current_row],[current_row, following]
+                range_start.pos = _current_row_position;
+                range_end.pos = _current_row_position + 1; //going on calculate,add up data, no need to reset state
+            } else {
+                _reset_agg_status();
+                if (!_window.__isset.window_start) { //[preceding, offset]        --unbound: [preceding, following]
+                    range_start.pos = _partition_by_start.pos;
+                } else {
+                    range_start.pos = _current_row_position + _rows_start_offset;
+                }
+                range_end.pos = _current_row_position + _rows_end_offset + 1;
+            }
+            _executor.execute(_partition_by_start, _partition_by_end, range_start, range_end);
+            _executor.insert_result(current_block_rows);
+        }
+        if (_window_end_position == current_block_rows) {
+            return _output_current_block(block);
+        }
+    }
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::_consumed_block_and_init_partition(RuntimeState* state,
+                                                             bool* next_partition, bool* eos) {
+    BlockRowPos found_partition_end = _get_partition_by_end(); //claculate current partition end
+    while (whether_need_next_partition(found_partition_end)) { //check whether need get next partition, if current partition haven't execute done, return false
+        RETURN_IF_ERROR(_fetch_next_block_data(state)); //return true, fetch next block
+        found_partition_end = _get_partition_by_end();  //claculate new partition end
+    }
+    if (_input_eos && _input_total_rows == 0) {
+        *eos = true;
+        return Status::OK();
+    }
+    SCOPED_TIMER(_evaluation_timer);
+    *next_partition = _init_next_partition(found_partition_end);
+    RETURN_IF_ERROR(_init_result_columns());
+    return Status::OK();
+}
+
+BlockRowPos VAnalyticEvalNode::_get_partition_by_end() {
+    SCOPED_TIMER(_evaluation_timer);
+
+    if (_current_row_position < _partition_by_end.pos) { //still have data, return partition_by_end directly
+        return _partition_by_end;
+    }
+
+    if (_partition_by_eq_expr_ctxs.empty() || (_input_total_rows == 0)) { //no partition_by, the all block is end
+        return _all_block_end;
+    }
+
+    BlockRowPos cal_end = _all_block_end;
+    for (size_t i = 0; i < _partition_by_eq_expr_ctxs.size(); ++i) { //have partition_by, binary search the partiton end
+        cal_end = _compare_row_to_find_end(_partition_by_column_idxs[i], _partition_by_end,
+                                           cal_end);
+    }
+    cal_end.pos = input_block_first_row_positions[cal_end.block_num] + cal_end.row_num;
+    return cal_end;
+}
+
+//_partition_by_columns,_order_by_columns save in blocks, so if need to calculate the boundary, may find in which blocks firstly
+BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos start,
+                                                        BlockRowPos end) {
+    int64_t start_init_row_num = start.row_num;
+    ColumnPtr start_column = _input_blocks[start.block_num].get_by_position(idx).column;
+    ColumnPtr start_next_block_column = start_column;
+
+    DCHECK_LE(start.block_num, end.block_num);
+    DCHECK_LE(start.block_num, _input_blocks.size() - 1);
+    int64_t start_block_num = start.block_num;
+    int64_t end_block_num = end.block_num;
+    int64_t mid_blcok_num = end.block_num;
+    //binary search find in which block
+    while (start_block_num < end_block_num) {
+        mid_blcok_num = (start_block_num + end_block_num + 1) >> 1;
+        start_next_block_column = _input_blocks[mid_blcok_num].get_by_position(idx).column;
+        if (start_column->compare_at(start_init_row_num, 0, *start_next_block_column, 1) == 0) {
+            start_block_num = mid_blcok_num;
+        } else {
+            end_block_num = mid_blcok_num - 1;
+        }
+    }
+
+    if (end_block_num == mid_blcok_num - 1) {
+        start_next_block_column = _input_blocks[end_block_num].get_by_position(idx).column;
+        int64_t block_size = _input_blocks[end_block_num].rows();
+        if ((start_column->compare_at(start_init_row_num, block_size - 1, *start_next_block_column, 1) == 0)) {
+            start.block_num = end_block_num + 1;
+            start.row_num = 0;
+            return start;
+        }
+    }
+
+    //check whether need get column again, maybe same as first init
+    if (start_column.get() != start_next_block_column.get()) {
+        start_init_row_num = 0;
+        start.block_num = start_block_num;
+        start_column = _input_blocks[start.block_num].get_by_position(idx).column;
+    }
+    //binary search, set start and end pos
+    int64_t start_pos = start_init_row_num;
+    int64_t end_pos = _input_blocks[start.block_num].rows() - 1;
+    if (start.block_num == end.block_num) {
+        end_pos = end.row_num;
+    }
+    while (start_pos < end_pos) {
+        int64_t mid_pos = (start_pos + end_pos) >> 1;
+        if (start_column->compare_at(start_init_row_num, mid_pos, *start_column, 1))
+            end_pos = mid_pos;
+        else
+            start_pos = mid_pos + 1;
+    }
+    start.row_num = start_pos; //upadte row num, return the find end
+    return start;
+}
+
+//according to partition end check whether need next partition data
+bool VAnalyticEvalNode::whether_need_next_partition(BlockRowPos found_partition_end) {
+    if (_input_eos ||
+        (_current_row_position < _partition_by_end.pos)) { //now still have partition data
+        return false;
+    }
+    if ((_partition_by_eq_expr_ctxs.empty() && !_input_eos) ||
+        (found_partition_end.pos == 0)) { //no partition, get until fetch to EOS
+        return true;
+    }
+    if (!_partition_by_eq_expr_ctxs.empty() && found_partition_end.pos == _all_block_end.pos &&
+        !_input_eos) { //current partition data calculate done
+        return true;
+    }
+    return false;
+}
+
+Status VAnalyticEvalNode::_fetch_next_block_data(RuntimeState* state) {
+    Block block;
+    RETURN_IF_CANCELLED(state);
+    do {
+        RETURN_IF_ERROR(_children[0]->get_next(state, &block, &_input_eos));
+    } while (!_input_eos && block.rows() == 0);
+
+    if (_input_eos && block.rows() == 0) {
+        return Status::OK();
+    }
+
+    input_block_first_row_positions.emplace_back(_input_total_rows);
+    size_t block_rows = block.rows();
+    _input_total_rows += block_rows;
+    _all_block_end.block_num = _input_blocks.size();
+    _all_block_end.row_num = block_rows;
+    _all_block_end.pos = _input_total_rows;
+
+    if (_origin_cols.empty()) { //record origin columns, maybe be after this, could cast some column but no need to save
+        for (int c = 0; c < block.columns(); ++c) {
+            _origin_cols.emplace_back(c);
+        }
+    }
+
+    for (size_t i = 0; i < _agg_functions_size; ++i) { //insert _agg_intput_columns, execute calculate for its
+        for (size_t j = 0; j < _agg_expr_ctxs[i].size(); ++j) {
+            RETURN_IF_ERROR(_insert_range_column(&block, _agg_expr_ctxs[i][j],
+                                                 _agg_intput_columns[i][j].get(), block_rows));
+        }
+    }
+    //record column idx in block
+    for (size_t i = 0; i < _partition_by_eq_expr_ctxs.size(); ++i) {
+        int result_col_id = -1;
+        RETURN_IF_ERROR(_partition_by_eq_expr_ctxs[i]->execute(&block, &result_col_id));
+        DCHECK_GE(result_col_id, 0);
+        _partition_by_column_idxs[i] = result_col_id;
+    }
+
+    for (size_t i = 0; i < _order_by_eq_expr_ctxs.size(); ++i) {
+        int result_col_id = -1;
+        RETURN_IF_ERROR(_order_by_eq_expr_ctxs[i]->execute(&block, &result_col_id));
+        DCHECK_GE(result_col_id, 0);
+        _ordey_by_column_idxs[i] = result_col_id;
+    }
+    //TODO: if need improvement, the is a tips to maintain a free queue,
+    //so the memory could reuse, no need to new/delete again;
+    _input_blocks.emplace_back(std::move(block));
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::_insert_range_column(vectorized::Block* block, VExprContext* expr,
+                                               IColumn* dst_column, size_t length) {
+    int result_col_id = -1;
+    RETURN_IF_ERROR(expr->execute(block, &result_col_id));
+    DCHECK_GE(result_col_id, 0);
+    auto column = block->get_by_position(result_col_id).column->convert_to_full_column_if_const();
+    dst_column->insert_range_from(*column, 0, length);
+    return Status::OK();
+}
+
+//calculate pos have arrive partition end, so it's needed to init next partition, and update the boundary of partition
+bool VAnalyticEvalNode::_init_next_partition(BlockRowPos found_partition_end) {
+    if ((_current_row_position >= _partition_by_end.pos) &&
+        ((_partition_by_end.pos == 0) || (_partition_by_end.pos != found_partition_end.pos))) {
+        _partition_by_start = _partition_by_end;
+        _partition_by_end = found_partition_end;
+        _current_row_position = _partition_by_start.pos;
+        _reset_agg_status();
+        return true;
+    }
+    return false;
+}
+
+void VAnalyticEvalNode::_insert_result_info(int64_t current_block_rows) {
+    int64_t current_block_row_pos = input_block_first_row_positions[_output_block_index];
+    int64_t get_result_start = _current_row_position - current_block_row_pos;
+    if (_fn_scope == AnalyticFnScope::PARTITION) {
+        int64_t get_result_end = std::min<int64_t>(_current_row_position + current_block_rows,
+                                                   _partition_by_end.pos);
+        _window_end_position =
+                std::min<int64_t>(get_result_end - current_block_row_pos, current_block_rows);
+        _current_row_position += (_window_end_position - get_result_start);
+    } else if (_fn_scope == AnalyticFnScope::RANGE) {
+        _window_end_position =
+                std::min<int64_t>(_order_by_end.pos - current_block_row_pos, current_block_rows);
+        _current_row_position += (_window_end_position - get_result_start);
+    } else {
+        _window_end_position++;
+        _current_row_position++;
+    }
+
+    for (int i = 0; i < _agg_functions_size; ++i) {
+        for (int j = get_result_start; j < _window_end_position; ++j) {
+            _agg_functions[i]->insert_result_info(_fn_place_ptr + _offsets_of_aggregate_states[i],
+                                                  _result_window_columns[i].get());
+        }
+    }
+}
+
+Status VAnalyticEvalNode::_output_current_block(Block* block) {
+    block->swap(std::move(_input_blocks[_output_block_index]));
+    if (_origin_cols.size() < block->columns()) {
+        block->erase_not_in(_origin_cols);
+    }
+
+    for (size_t i = 0; i < _result_window_columns.size(); ++i) {
+        block->insert({std::move(_result_window_columns[i]), _agg_functions[i]->data_type(), ""});
+    }
+
+    _output_block_index++;
+    _window_end_position = 0;
+
+    return Status::OK();
+}
+
+//now is execute for lead/lag row_number/rank/dense_rank functions
+//sum min max count avg first_value last_value functions
+void VAnalyticEvalNode::_execute_for_win_func(BlockRowPos partition_start,
+                                              BlockRowPos partition_end, BlockRowPos frame_start,
+                                              BlockRowPos frame_end) {
+    for (size_t i = 0; i < _agg_functions_size; ++i) {
+        std::vector<const IColumn*> _agg_columns;
+        for (int j = 0; j < _agg_intput_columns[i].size(); ++j) {
+            _agg_columns.push_back(_agg_intput_columns[i][j].get());
+        }
+        _agg_functions[i]->function()->add_range_single_place(
+                partition_start.pos, partition_end.pos, frame_start.pos, frame_end.pos,
+                _fn_place_ptr + _offsets_of_aggregate_states[i], _agg_columns.data(), nullptr);
+    }
+}
+
+//binary search for range to calculate peer group
+void VAnalyticEvalNode::_update_order_by_range() {
+    _order_by_start = _order_by_end;
+    _order_by_end = _partition_by_end;
+    for (size_t i = 0; i < _order_by_eq_expr_ctxs.size(); ++i) {
+        _order_by_end =
+                _compare_row_to_find_end(_ordey_by_column_idxs[i], _order_by_start, _order_by_end);
+    }
+    _order_by_start.pos =
+            input_block_first_row_positions[_order_by_start.block_num] + _order_by_start.row_num;
+    _order_by_end.pos =
+            input_block_first_row_positions[_order_by_end.block_num] + _order_by_end.row_num;
+}
+
+Status VAnalyticEvalNode::_init_result_columns() {
+    if (!_window_end_position) {
+        _result_window_columns.resize(_agg_functions_size);
+        for (size_t i = 0; i < _agg_functions_size; ++i) {
+            _result_window_columns[i] =
+                    _agg_functions[i]->data_type()->create_column(); //return type
+        }
+    }
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::_reset_agg_status() {
+    for (size_t i = 0; i < _agg_functions_size; ++i) {
+        _agg_functions[i]->reset(_fn_place_ptr + _offsets_of_aggregate_states[i]);
+    }
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::_create_agg_status() {
+    for (size_t i = 0; i < _agg_functions_size; ++i) {
+        _agg_functions[i]->create(_fn_place_ptr + _offsets_of_aggregate_states[i]);
+    }
+    return Status::OK();
+}
+
+Status VAnalyticEvalNode::_destory_agg_status() {
+    for (size_t i = 0; i < _agg_functions_size; ++i) {
+        _agg_functions[i]->destroy(_fn_place_ptr + _offsets_of_aggregate_states[i]);
+    }
+    return Status::OK();
+}
+
+std::string VAnalyticEvalNode::debug_string() {
+    std::stringstream ss;
+    if (_fn_scope == PARTITION) {
+        ss << "NO WINDOW";
+        return ss.str();
+    }
+    ss << "{type=";
+    if (_fn_scope == RANGE) {
+        ss << "RANGE";
+    } else {
+        ss << "ROWS";
+    }
+    ss << ", start=";
+    if (_window.__isset.window_start) {
+        TAnalyticWindowBoundary start = _window.window_start;
+        ss << debug_window_bound_string(start);
+    } else {
+        ss << "UNBOUNDED_PRECEDING";
+    }
+    ss << ", end=";
+    if (_window.__isset.window_end) {
+        TAnalyticWindowBoundary end = _window.window_end;
+        ss << debug_window_bound_string(end) << "}";
+    } else {
+        ss << "UNBOUNDED_FOLLOWING";
+    }
+    return ss.str();
+}
+
+std::string VAnalyticEvalNode::debug_window_bound_string(TAnalyticWindowBoundary b) {
+    if (b.type == TAnalyticWindowBoundaryType::CURRENT_ROW) {
+        return "CURRENT_ROW";
+    }
+    std::stringstream ss;
+    if (b.__isset.rows_offset_value) {
+        ss << b.rows_offset_value;
+    } else {
+        DCHECK(false) << "Range offsets not yet implemented";
+    }
+    if (b.type == TAnalyticWindowBoundaryType::PRECEDING) {
+        ss << " PRECEDING";
+    } else {
+        DCHECK_EQ(b.type, TAnalyticWindowBoundaryType::FOLLOWING);
+        ss << " FOLLOWING";
+    }
+    return ss.str();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/vanalytic_eval_node.h b/be/src/vec/exec/vanalytic_eval_node.h
new file mode 100644
index 0000000000..3289dd145e
--- /dev/null
+++ b/be/src/vec/exec/vanalytic_eval_node.h
@@ -0,0 +1,146 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/exec_node.h"
+#include "exprs/expr.h"
+#include "runtime/tuple.h"
+#include "thrift/protocol/TDebugProtocol.h"
+#include "vec/common/arena.h"
+#include "vec/core/block.h"
+#include "vec/exprs/vectorized_agg_fn.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vexpr_context.h"
+namespace doris::vectorized {
+
+struct BlockRowPos {
+    BlockRowPos() : block_num(0), row_num(0), pos(0) {}
+    int64_t block_num; //the pos at which block
+    int64_t row_num;   //the pos at which row
+    int64_t pos;       //pos = all blocks size + row_num
+};
+
+class AggFnEvaluator;
+class VAnalyticEvalNode : public ExecNode {
+public:
+    ~VAnalyticEvalNode() {}
+    VAnalyticEvalNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+
+    virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr);
+    virtual Status prepare(RuntimeState* state);
+    virtual Status open(RuntimeState* state);
+    virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos);
+    virtual Status get_next(RuntimeState* state, vectorized::Block* block, bool* eos);
+    virtual Status close(RuntimeState* state);
+
+protected:
+    virtual std::string debug_string();
+
+private:
+    Status _get_next_for_rows(RuntimeState* state, Block* block, bool* eos);
+    Status _get_next_for_range(RuntimeState* state, Block* block, bool* eos);
+    Status _get_next_for_partition(RuntimeState* state, Block* block, bool* eos);
+
+    void _execute_for_win_func(BlockRowPos partition_start, BlockRowPos partition_end,
+                               BlockRowPos frame_start, BlockRowPos frame_end);
+
+    Status _reset_agg_status();
+    Status _init_result_columns();
+    Status _create_agg_status();
+    Status _destory_agg_status();
+    Status _insert_range_column(vectorized::Block* block, VExprContext* expr, IColumn* dst_column,
+                                size_t length);
+
+    void _update_order_by_range();
+    bool _init_next_partition(BlockRowPos found_partition_end);
+    void _insert_result_info(int64_t current_block_rows);
+    Status _output_current_block(Block* block);
+    BlockRowPos _get_partition_by_end();
+    BlockRowPos _compare_row_to_find_end(int idx, BlockRowPos start, BlockRowPos end);
+    
+    Status _fetch_next_block_data(RuntimeState* state);
+    Status _consumed_block_and_init_partition(RuntimeState* state, bool* next_partition, bool* eos);
+    bool whether_need_next_partition(BlockRowPos found_partition_end);
+
+    std::string debug_window_bound_string(TAnalyticWindowBoundary b);
+    using vectorized_execute =
+            std::function<void(BlockRowPos peer_group_start, BlockRowPos peer_group_end,
+                               BlockRowPos frame_start, BlockRowPos frame_end)>;
+    using vectorized_get_next = std::function<Status(RuntimeState* state, Block* block, bool* eos)>;
+    using vectorized_get_result = std::function<void(int64_t current_block_rows)>;
+    using vectorized_closer = std::function<void()>;
+
+    struct executor {
+        vectorized_execute execute;
+        vectorized_get_next get_next;
+        vectorized_get_result insert_result;
+        vectorized_closer close;
+    };
+
+    executor _executor;
+
+private:
+    enum AnalyticFnScope { PARTITION, RANGE, ROWS };
+    std::vector<Block> _input_blocks;
+    std::vector<int64_t> input_block_first_row_positions;
+    std::vector<AggFnEvaluator*> _agg_functions;
+    std::vector<std::vector<VExprContext*>> _agg_expr_ctxs;
+    std::vector<VExprContext*> _partition_by_eq_expr_ctxs;
+    std::vector<VExprContext*> _order_by_eq_expr_ctxs;
+    std::vector<std::vector<MutableColumnPtr>> _agg_intput_columns;
+    std::vector<MutableColumnPtr> _result_window_columns;
+
+    BlockRowPos _order_by_start;
+    BlockRowPos _order_by_end;
+    BlockRowPos _partition_by_start;
+    BlockRowPos _partition_by_end;
+    BlockRowPos _all_block_end;
+    std::vector<int64_t> _ordey_by_column_idxs;
+    std::vector<int64_t> _partition_by_column_idxs;
+
+    bool _input_eos = false;
+    int64_t _input_total_rows = 0;
+    int64_t _output_block_index = 0;
+    int64_t _window_end_position = 0;
+    int64_t _current_row_position = 0;
+    int64_t _rows_start_offset = 0;
+    int64_t _rows_end_offset = 0;
+    size_t _agg_functions_size = 0;
+    std::unique_ptr<MemPool> _mem_pool;
+
+    /// The offset of the n-th functions.
+    std::vector<size_t> _offsets_of_aggregate_states;
+    /// The total size of the row from the functions.
+    size_t _total_size_of_aggregate_states = 0;
+    /// The max align size for functions
+    size_t _align_aggregate_states = 1;
+    Arena _agg_arena_pool;
+    AggregateDataPtr _fn_place_ptr;
+
+    TTupleId _buffered_tuple_id = 0;
+    TupleId _intermediate_tuple_id;
+    TupleId _output_tuple_id;
+    TAnalyticWindow _window;
+    AnalyticFnScope _fn_scope;
+    TupleDescriptor* _intermediate_tuple_desc;
+    TupleDescriptor* _output_tuple_desc;
+    std::vector<int64_t> _origin_cols;
+
+    RuntimeProfile::Counter* _evaluation_timer;
+};
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/exec/vassert_num_rows_node.cpp b/be/src/vec/exec/vassert_num_rows_node.cpp
new file mode 100644
index 0000000000..b499175bed
--- /dev/null
+++ b/be/src/vec/exec/vassert_num_rows_node.cpp
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vassert_num_rows_node.h"
+
+#include "vec/core/block.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "gutil/strings/substitute.h"
+#include "runtime/row_batch.h"
+#include "runtime/runtime_state.h"
+#include "util/runtime_profile.h"
+
+namespace doris::vectorized {
+
+VAssertNumRowsNode::VAssertNumRowsNode(ObjectPool* pool, const TPlanNode& tnode,
+                                     const DescriptorTbl& descs)
+        : ExecNode(pool, tnode, descs),
+          _desired_num_rows(tnode.assert_num_rows_node.desired_num_rows),
+          _subquery_string(tnode.assert_num_rows_node.subquery_string) {
+    if (tnode.assert_num_rows_node.__isset.assertion) {
+        _assertion = tnode.assert_num_rows_node.assertion;
+    } else {
+        _assertion = TAssertion::LE; // just compatible for the previous code
+    }
+}
+
+Status VAssertNumRowsNode::open(RuntimeState* state) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(ExecNode::open(state));
+    // ISSUE-3435
+    RETURN_IF_ERROR(child(0)->open(state));
+    return Status::OK();
+}
+
+Status VAssertNumRowsNode::get_next(RuntimeState* state, Block* block, bool* eos) {
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    child(0)->get_next(state, block, eos);
+    _num_rows_returned += block->rows();
+    bool assert_res = false;
+    switch (_assertion) {
+    case TAssertion::EQ:
+        assert_res = _num_rows_returned == _desired_num_rows;
+        break;
+    case TAssertion::NE:
+        assert_res = _num_rows_returned != _desired_num_rows;
+        break;
+    case TAssertion::LT:
+        assert_res = _num_rows_returned < _desired_num_rows;
+        break;
+    case TAssertion::LE:
+        assert_res = _num_rows_returned <= _desired_num_rows;
+        break;
+    case TAssertion::GT:
+        assert_res = _num_rows_returned > _desired_num_rows;
+        break;
+    case TAssertion::GE:
+        assert_res = _num_rows_returned >= _desired_num_rows;
+        break;
+    default:
+        break;
+    }
+
+    if (!assert_res) {
+        auto to_string_lambda = [](TAssertion::type assertion) {
+            std::map<int, const char*>::const_iterator it =
+                    _TAssertion_VALUES_TO_NAMES.find(assertion);
+
+            if (it == _TAggregationOp_VALUES_TO_NAMES.end()) {
+                return "NULL";
+            } else {
+                return it->second;
+            }
+        };
+        LOG(INFO) << "Expected " << to_string_lambda(_assertion) << " " << _desired_num_rows
+                  << " to be returned by expression " << _subquery_string;
+        return Status::Cancelled(strings::Substitute(
+                "Expected $0 $1 to be returned by expression $2", to_string_lambda(_assertion),
+                _desired_num_rows, _subquery_string));
+    }
+    COUNTER_SET(_rows_returned_counter, _num_rows_returned);
+    return Status::OK();
+}
+
+} // namespace doris
diff --git a/be/src/vec/exec/vassert_num_rows_node.h b/be/src/vec/exec/vassert_num_rows_node.h
new file mode 100644
index 0000000000..95e11540a6
--- /dev/null
+++ b/be/src/vec/exec/vassert_num_rows_node.h
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exec/exec_node.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+namespace doris::vectorized {
+class Block;
+
+// Node for assert row count
+class VAssertNumRowsNode : public ExecNode {
+public:
+    VAssertNumRowsNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+
+    Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override {
+        return Status::NotSupported("Not Implemented VAnalyticEvalNode::get_next.");
+    }
+
+    virtual Status open(RuntimeState* state);
+    virtual Status get_next(RuntimeState* state, Block* block, bool* eos);
+
+private:
+    int64_t _desired_num_rows;
+    const std::string _subquery_string;
+    TAssertion::type _assertion;
+};
+
+} // namespace doris
diff --git a/be/src/vec/exec/vblocking_join_node.cpp b/be/src/vec/exec/vblocking_join_node.cpp
new file mode 100644
index 0000000000..af1adb957e
--- /dev/null
+++ b/be/src/vec/exec/vblocking_join_node.cpp
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vblocking_join_node.h"
+
+#include <sstream>
+
+#include "exprs/expr.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/runtime_state.h"
+#include "util/runtime_profile.h"
+
+namespace doris::vectorized {
+
+VBlockingJoinNode::VBlockingJoinNode(const std::string& node_name, const TJoinOp::type join_op,
+                                   ObjectPool* pool, const TPlanNode& tnode,
+                                   const DescriptorTbl& descs)
+        : ExecNode(pool, tnode, descs), _node_name(node_name), _join_op(join_op),
+          _left_side_eos(false) {}
+
+Status VBlockingJoinNode::init(const TPlanNode& tnode, RuntimeState* state) {
+    return ExecNode::init(tnode, state);
+}
+
+Status VBlockingJoinNode::prepare(RuntimeState* state) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(ExecNode::prepare(state));
+
+    _build_pool.reset(new MemPool(mem_tracker().get()));
+    _build_timer = ADD_TIMER(runtime_profile(), "BuildTime");
+    _left_child_timer = ADD_TIMER(runtime_profile(), "LeftChildTime");
+    _build_row_counter = ADD_COUNTER(runtime_profile(), "BuildRows", TUnit::UNIT);
+    _left_child_row_counter = ADD_COUNTER(runtime_profile(), "LeftChildRows", TUnit::UNIT);
+
+    // pre-compute the tuple index of build tuples in the output row
+    int num_build_tuples = child(1)->row_desc().tuple_descriptors().size();
+
+    _build_tuple_size = num_build_tuples;
+    _build_tuple_idx.reserve(_build_tuple_size);
+
+    for (int i = 0; i < _build_tuple_size; ++i) {
+        TupleDescriptor* build_tuple_desc = child(1)->row_desc().tuple_descriptors()[i];
+        _build_tuple_idx.push_back(_row_descriptor.get_tuple_idx(build_tuple_desc->id()));
+    }
+
+    return Status::OK();
+}
+
+Status VBlockingJoinNode::close(RuntimeState* state) {
+    if (is_closed()) return Status::OK();
+    ExecNode::close(state);
+    return Status::OK();
+}
+
+void VBlockingJoinNode::build_side_thread(RuntimeState* state, std::promise<Status>* status) {
+    status->set_value(construct_build_side(state));
+    // Release the thread token as soon as possible (before the main thread joins
+    // on it).  This way, if we had a chain of 10 joins using 1 additional thread,
+    // we'd keep the additional thread busy the whole time.
+}
+
+Status VBlockingJoinNode::open(RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::open(state));
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+
+    RETURN_IF_CANCELLED(state);
+
+    _eos = false;
+
+    // Kick-off the construction of the build-side table in a separate
+    // thread, so that the left child can do any initialisation in parallel.
+    // Only do this if we can get a thread token.  Otherwise, do this in the
+    // main thread
+    std::promise<Status> build_side_status;
+
+    add_runtime_exec_option("Join Build-Side Prepared Asynchronously");
+    std::thread(bind(&VBlockingJoinNode::build_side_thread, this, state, &build_side_status)).detach();
+
+    // Open the left child so that it may perform any initialisation in parallel.
+    // Don't exit even if we see an error, we still need to wait for the build thread
+    // to finish.
+    Status open_status = child(0)->open(state);
+
+    // Blocks until ConstructBuildSide has returned, after which the build side structures
+    // are fully constructed.
+    RETURN_IF_ERROR(build_side_status.get_future().get());
+    // We can close the right child to release its resources because its input has been
+    // fully consumed.
+    child(1)->close(state);
+
+    RETURN_IF_ERROR(open_status);
+
+    // Seed left child in preparation for get_next().
+    while (true) {
+        release_block_memory(_left_block);
+        RETURN_IF_ERROR(child(0)->get_next(state, &_left_block, &_left_side_eos));
+        COUNTER_UPDATE(_left_child_row_counter, _left_block.rows());
+        _left_block_pos = 0;
+
+        if (_left_block.rows() == 0) {
+            if (_left_side_eos) {
+                init_get_next(-1);
+                _eos = true;
+                break;
+            }
+
+            continue;
+        } else {
+            init_get_next(_left_block_pos);
+            break;
+        }
+    }
+
+    return Status::OK();
+}
+
+void VBlockingJoinNode::debug_string(int indentation_level, std::stringstream* out) const {
+    *out << std::string(indentation_level * 2, ' ');
+    *out << _node_name;
+    *out << "(eos=" << (_eos ? "true" : "false") << " left_block_pos=" << _left_block_pos;
+    add_to_debug_string(indentation_level, out);
+    ExecNode::debug_string(indentation_level, out);
+    *out << ")";
+}
+
+} // namespace doris
diff --git a/be/src/vec/exec/vblocking_join_node.h b/be/src/vec/exec/vblocking_join_node.h
new file mode 100644
index 0000000000..80ddb5acf8
--- /dev/null
+++ b/be/src/vec/exec/vblocking_join_node.h
@@ -0,0 +1,132 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef DORIS_BE_SRC_QUERY_EXEC_VBLOCKING_JOIN_NODE_H
+#define DORIS_BE_SRC_QUERY_EXEC_VBLOCKING_JOIN_NODE_H
+
+#include <future>
+#include <string>
+
+#include "exec/exec_node.h"
+#include "gen_cpp/PlanNodes_types.h"
+
+#include "vec/core/block.h"
+
+namespace doris {
+
+class MemPool;
+class TupleRow;
+
+namespace vectorized {
+// Abstract base class for join nodes that block while consuming all rows from their
+// right child in open().
+class VBlockingJoinNode : public doris::ExecNode {
+public:
+    VBlockingJoinNode(const std::string &node_name, const TJoinOp::type join_op, ObjectPool *pool,
+                      const TPlanNode &tnode, const DescriptorTbl &descs);
+
+    virtual ~VBlockingJoinNode() = default;
+
+    // Subclasses should call VBlockingJoinNode::init() and then perform any other init()
+    // work, e.g. creating expr trees.
+    virtual Status init(const TPlanNode &tnode, RuntimeState *state = nullptr);
+
+    // Subclasses should call VBlockingJoinNode::prepare() and then perform any other
+    // prepare() work, e.g. codegen.
+    virtual Status prepare(RuntimeState *state);
+
+    // Open prepares the build side structures (subclasses should implement
+    // construct_build_side()) and then prepares for GetNext with the first left child row
+    // (subclasses should implement init_get_next()).
+    virtual Status open(RuntimeState *state);
+
+    virtual Status get_next(RuntimeState *state, RowBatch *row_batch, bool *eos) {
+        return Status::NotSupported("Not Implemented VBlocking Join Node::get_next scalar");
+    }
+
+    virtual Status close(RuntimeState *state);
+
+private:
+    const std::string _node_name;
+    TJoinOp::type _join_op;
+    bool _eos;                              // if true, nothing left to return in get_next()
+    std::unique_ptr<MemPool> _build_pool; // holds everything referenced from build side
+
+    // _left_block must be cleared before calling get_next().  The child node
+    // does not initialize all tuple ptrs in the row, only the ones that it
+    // is responsible for.
+    Block _left_block;
+
+    int _left_block_pos; // current scan pos in _left_block
+    bool _left_side_eos; // if true, left child has no more rows to process
+
+    // _build_tuple_idx[i] is the tuple index of child(1)'s tuple[i] in the output row
+    std::vector<int> _build_tuple_idx;
+    int _build_tuple_size;
+
+    // byte size of result tuple row (sum of the tuple ptrs, not the tuple data).
+    // This should be the same size as the left child tuple row.
+    int _result_tuple_row_size;
+
+    RuntimeProfile::Counter *_build_timer;            // time to prepare build side
+    RuntimeProfile::Counter *_left_child_timer;       // time to process left child batch
+    RuntimeProfile::Counter *_build_row_counter;      // num build rows
+    RuntimeProfile::Counter *_left_child_row_counter; // num left child rows
+
+    // Init the build-side state for a new left child row (e.g. hash table iterator or list
+    // iterator) given the first row. Used in open() to prepare for get_next().
+    // -1 for left_side_pos indicates the left child eos.
+    virtual void init_get_next(int left_side_pos) = 0;
+
+    // We parallelize building the build-side with Opening the
+    // left child. If, for example, the left child is another
+    // join node, it can start to build its own build-side at the
+    // same time.
+    virtual Status construct_build_side(RuntimeState *state) = 0;
+
+    // Gives subclasses an opportunity to add debug output to the debug string printed by
+    // debug_string().
+    virtual void add_to_debug_string(int indentation_level, std::stringstream *out) const {}
+
+    // Subclasses should not override, use add_to_debug_string() to add to the result.
+    virtual void debug_string(int indentation_level, std::stringstream *out) const;
+
+//    // Returns a debug string for the left child's 'row'. They have tuple ptrs that are
+//    // uninitialized; the left child only populates the tuple ptrs it is responsible
+//    // for.  This function outputs just the row values and leaves the build
+//    // side values as NULL.
+//    // This is only used for debugging and outputting the left child rows before
+//    // doing the join.
+//    std::string get_left_child_row_string(TupleRow *row);
+//
+//    // Write combined row, consisting of the left child's 'left_row' and right child's
+//    // 'build_row' to 'out_row'.
+//    // This is replaced by codegen.
+//    void create_output_row(TupleRow *out_row, TupleRow *left_row, TupleRow *build_row);
+//
+    friend class VCrossJoinNode;
+
+private:
+    // Supervises ConstructBuildSide in a separate thread, and returns its status in the
+    // promise parameter.
+    void build_side_thread(RuntimeState *state, std::promise<Status> *status);
+};
+
+} // namespace vectorized
+} // namespace doris
+
+#endif
diff --git a/be/src/vec/exec/vcross_join_node.cpp b/be/src/vec/exec/vcross_join_node.cpp
new file mode 100644
index 0000000000..6d48527f73
--- /dev/null
+++ b/be/src/vec/exec/vcross_join_node.cpp
@@ -0,0 +1,184 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vcross_join_node.h"
+
+#include <sstream>
+
+#include "exprs/expr.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/row_batch.h"
+#include "runtime/runtime_state.h"
+#include "util/runtime_profile.h"
+
+namespace doris::vectorized {
+
+VCrossJoinNode::VCrossJoinNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : VBlockingJoinNode("VCrossJoinNode", TJoinOp::CROSS_JOIN, pool, tnode, descs) {}
+
+Status VCrossJoinNode::prepare(RuntimeState* state) {
+    DCHECK(_join_op == TJoinOp::CROSS_JOIN);
+    RETURN_IF_ERROR(VBlockingJoinNode::prepare(state));
+
+    _num_existing_columns = child(0)->row_desc().num_materialized_slots();
+    _num_columns_to_add = child(1)->row_desc().num_materialized_slots();
+    return Status::OK();
+}
+
+Status VCrossJoinNode::close(RuntimeState* state) {
+    // avoid double close
+    if (is_closed()) {
+        return Status::OK();
+    }
+    _mem_tracker->Release(_total_mem_usage);
+    VBlockingJoinNode::close(state);
+    return Status::OK();
+}
+
+Status VCrossJoinNode::construct_build_side(RuntimeState* state) {
+    // Do a full scan of child(1) and store all build row batches.
+    RETURN_IF_ERROR(child(1)->open(state));
+
+    bool eos = false;
+    while (true) {
+        SCOPED_TIMER(_build_timer);
+        RETURN_IF_CANCELLED(state);
+
+        Block block;
+        RETURN_IF_ERROR(child(1)->get_next(state, &block, &eos));
+        auto rows = block.rows();
+        auto mem_usage = block.allocated_bytes();
+
+        if (rows != 0) {
+            _build_rows += rows;
+            _total_mem_usage += mem_usage;
+            _build_blocks.emplace_back(std::move(block));
+            _mem_tracker->Consume(mem_usage);
+        }
+        // to prevent use too many memory
+        RETURN_IF_LIMIT_EXCEEDED(state, "Cross join, while getting next from the child 1.");
+
+        if (eos) {
+            break;
+        }
+    }
+
+    COUNTER_UPDATE(_build_row_counter, _build_rows);
+    // If right table in join is empty, the node is eos
+    _eos = _build_rows == 0;
+    return Status::OK();
+}
+
+void VCrossJoinNode::init_get_next(int left_batch_row) {
+    _current_build_pos = 0;
+}
+
+Status VCrossJoinNode::get_next(RuntimeState* state, Block* block, bool* eos) {
+    RETURN_IF_CANCELLED(state);
+    *eos = false;
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+
+    if (_eos) {
+        *eos = true;
+        return Status::OK();
+    }
+
+    auto dst_columns = get_mutable_columns(block);
+    ScopedTimer<MonotonicStopWatch> timer(_left_child_timer);
+
+    while (block->rows() < state->batch_size() && !_eos) {
+        // Check to see if we're done processing the current left child batch
+        if (_current_build_pos == _build_blocks.size()) {
+            _current_build_pos = 0;
+            _left_block_pos++;
+
+            if (_left_block_pos == _left_block.rows()) {
+                _left_block_pos = 0;
+
+                if (_left_side_eos) {
+                    *eos = _eos = true;
+                } else {
+                    do {
+                        release_block_memory(_left_block);
+                        timer.stop();
+                        RETURN_IF_ERROR(child(0)->get_next(state, &_left_block, &_left_side_eos));
+                        timer.start();
+                    } while (_left_block.rows() == 0 && !_left_side_eos);
+                    COUNTER_UPDATE(_left_child_row_counter, _left_block.rows());
+                    if (_left_block.rows() == 0) {
+                        *eos = _eos = _left_side_eos;
+                    }
+                }
+            }
+        }
+
+        if (!_eos) {
+            do {
+                const auto& now_process_build_block = _build_blocks[_current_build_pos++];
+                process_left_child_block(dst_columns, now_process_build_block);
+            } while (block->rows() < state->batch_size() &&
+                     _current_build_pos < _build_blocks.size());
+        }
+    }
+    dst_columns.clear();
+    RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns()));
+
+    reached_limit(block, eos);
+    return Status::OK();
+}
+
+std::string VCrossJoinNode::build_list_debug_string() {
+    std::stringstream out;
+    out << "BuildBlock(";
+    for (const auto& block : _build_blocks) {
+        out << block.dump_structure() << "\n";
+    }
+    out << ")";
+    return out.str();
+}
+
+MutableColumns VCrossJoinNode::get_mutable_columns(Block* block) {
+    bool mem_reuse = block->mem_reuse();
+    if (!mem_reuse) {
+        for (size_t i = 0; i < _num_existing_columns; ++i) {
+            const ColumnWithTypeAndName& src_column = _left_block.get_by_position(i);
+            block->insert({src_column.type->create_column(), src_column.type, src_column.name});
+        }
+
+        for (size_t i = 0; i < _num_columns_to_add; ++i) {
+            const ColumnWithTypeAndName& src_column = _build_blocks[0].get_by_position(i);
+            block->insert({src_column.type->create_column(), src_column.type, src_column.name});
+        }
+    }
+    return block->mutate_columns();
+}
+
+void VCrossJoinNode::process_left_child_block(MutableColumns& dst_columns,
+                                              const Block& now_process_build_block) {
+    const int max_added_rows = now_process_build_block.rows();
+    for (size_t i = 0; i < _num_existing_columns; ++i) {
+        const ColumnWithTypeAndName& src_column = _left_block.get_by_position(i);
+        dst_columns[i]->insert_many_from(*src_column.column, _left_block_pos, max_added_rows);
+    }
+    for (size_t i = 0; i < _num_columns_to_add; ++i) {
+        const ColumnWithTypeAndName& src_column = now_process_build_block.get_by_position(i);
+        dst_columns[_num_existing_columns + i]->insert_range_from(*src_column.column.get(), 0,
+                                                                  max_added_rows);
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/vcross_join_node.h b/be/src/vec/exec/vcross_join_node.h
new file mode 100644
index 0000000000..aeeeb3a2db
--- /dev/null
+++ b/be/src/vec/exec/vcross_join_node.h
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef DORIS_BE_SRC_QUERY_EXEC_VCROSS_JOIN_NODE_H
+#define DORIS_BE_SRC_QUERY_EXEC_VCROSS_JOIN_NODE_H
+
+#include <boost/thread.hpp>
+#include <string>
+#include <unordered_set>
+
+#include "exec/exec_node.h"
+#include "exec/row_batch_list.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/descriptors.h"
+#include "runtime/mem_pool.h"
+
+#include "vec/core/block.h"
+#include "vec/exec/vblocking_join_node.h"
+
+namespace doris::vectorized {
+// Node for cross joins.
+// Iterates over the left child rows and then the right child rows and, for
+// each combination, writes the output row if the conjuncts are satisfied. The
+// build batches are kept in a list that is fully constructed from the right child in
+// construct_build_side() (called by BlockingJoinNode::open()) while rows are fetched from
+// the left child as necessary in get_next().
+class VCrossJoinNode final : public VBlockingJoinNode {
+public:
+    VCrossJoinNode(ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs);
+
+    Status prepare(RuntimeState *state) override;
+
+    Status get_next(RuntimeState* state, Block* block, bool* eos) override;
+
+    Status close(RuntimeState *state) override;
+
+protected:
+    void init_get_next(int first_left_row) override;
+
+    Status construct_build_side(RuntimeState *state) override;
+
+private:
+    // List of build blocks, constructed in prepare()
+    Blocks _build_blocks;
+    size_t _current_build_pos = 0;
+
+    size_t _num_existing_columns = 0;
+    size_t _num_columns_to_add = 0;
+
+    uint64_t _build_rows = 0;
+    uint64_t _total_mem_usage = 0;
+
+    // Build mutable columns to insert data.
+    // if block can mem reuse, just clear data in block
+    // else build a new block and alloc mem of column from left and right child block
+    MutableColumns get_mutable_columns(Block* block);
+    
+    // Processes a block from the left child.
+    //  dst_columns: left_child_row and now_process_build_block to construct a bundle column of new block
+    //  now_process_build_block: right child block now to process
+    void process_left_child_block(MutableColumns& dst_columns, const Block& now_process_build_block);
+
+    // Returns a debug string for _build_rows. This is used for debugging during the
+    // build list construction and before doing the join.
+    std::string build_list_debug_string();
+};
+
+}
+
+#endif
+
diff --git a/be/src/vec/exec/vempty_set_node.cpp b/be/src/vec/exec/vempty_set_node.cpp
new file mode 100644
index 0000000000..1d33f94517
--- /dev/null
+++ b/be/src/vec/exec/vempty_set_node.cpp
@@ -0,0 +1,30 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vempty_set_node.h"
+
+namespace doris {
+namespace vectorized {
+    VEmptySetNode::VEmptySetNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : ExecNode(pool, tnode, descs) {}
+
+    Status VEmptySetNode::get_next(RuntimeState* state, Block* block, bool* eos) {
+        *eos = true;
+        return Status::OK();
+    }
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vempty_set_node.h b/be/src/vec/exec/vempty_set_node.h
new file mode 100644
index 0000000000..501ba17d2a
--- /dev/null
+++ b/be/src/vec/exec/vempty_set_node.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/exec_node.h"
+
+namespace doris {
+namespace vectorized {
+    /// Node that returns an empty result set, i.e., just sets eos_ in GetNext().
+    /// Corresponds to EmptySetNode.java in the FE.
+    class VEmptySetNode : public ExecNode {
+    public:
+        VEmptySetNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+        virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
+            return Status::NotSupported("Not Implemented get RowBatch in vecorized execution.");
+        }
+        virtual Status get_next(RuntimeState* state, Block* block, bool* eos) override;
+    };
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/ves_http_scan_node.cpp b/be/src/vec/exec/ves_http_scan_node.cpp
new file mode 100644
index 0000000000..a70acadb89
--- /dev/null
+++ b/be/src/vec/exec/ves_http_scan_node.cpp
@@ -0,0 +1,176 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/ves_http_scan_node.h"
+
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/runtime_state.h"
+#include "runtime/string_value.h"
+#include "runtime/tuple.h"
+#include "runtime/tuple_row.h"
+#include "util/runtime_profile.h"
+#include "util/types.h"
+#include "vec/exprs/vexpr_context.h"
+
+namespace doris::vectorized {
+
+VEsHttpScanNode::VEsHttpScanNode(ObjectPool* pool, const TPlanNode& tnode,
+                                 const DescriptorTbl& descs)
+        : EsHttpScanNode(pool, tnode, descs) {
+    _vectorized = true;
+}
+
+VEsHttpScanNode::~VEsHttpScanNode() {}
+
+Status VEsHttpScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool* eos) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    if (state->is_cancelled()) {
+        std::unique_lock<std::mutex> l(_block_queue_lock);
+        if (update_status(Status::Cancelled("Cancelled"))) {
+            _queue_writer_cond.notify_all();
+        }
+    }
+
+    if (_scan_finished.load() || _eos) {
+        *eos = true;
+        return Status::OK();
+    }
+
+    std::shared_ptr<vectorized::Block> scanner_block;
+    {
+        std::unique_lock<std::mutex> l(_block_queue_lock);
+        while (_process_status.ok() && !_runtime_state->is_cancelled() &&
+               _num_running_scanners > 0 && _block_queue.empty()) {
+            SCOPED_TIMER(_wait_scanner_timer);
+            _queue_reader_cond.wait_for(l, std::chrono::seconds(1));
+        }
+        if (!_process_status.ok()) {
+            // Some scanner process failed.
+            return _process_status;
+        }
+        if (_runtime_state->is_cancelled()) {
+            if (update_status(Status::Cancelled("Cancelled"))) {
+                _queue_writer_cond.notify_all();
+            }
+            return _process_status;
+        }
+        if (!_block_queue.empty()) {
+            scanner_block = _block_queue.front();
+            _block_queue.pop_front();
+        }
+    }
+
+    // All scanner has been finished, and all cached batch has been read
+    if (scanner_block == nullptr) {
+        _scan_finished.store(true);
+        *eos = true;
+        return Status::OK();
+    }
+
+    // notify one scanner
+    _queue_writer_cond.notify_one();
+
+    reached_limit(scanner_block.get(), eos);
+    *block = *scanner_block;
+
+    // This is first time reach limit.
+    // Only valid when query 'select * from table1 limit 20'
+    if (*eos) {
+        _scan_finished.store(true);
+        _queue_writer_cond.notify_all();
+        LOG(INFO) << "VEsHttpScanNode ReachedLimit.";
+        *eos = true;
+    } else {
+        *eos = false;
+    }
+
+    return Status::OK();
+}
+
+Status VEsHttpScanNode::scanner_scan(std::unique_ptr<VEsHttpScanner> scanner) {
+    RETURN_IF_ERROR(scanner->open());
+    bool scanner_eof = false;
+
+    const int batch_size = _runtime_state->batch_size();
+    std::unique_ptr<MemPool> tuple_pool(new MemPool(mem_tracker().get()));
+    size_t slot_num = _tuple_desc->slots().size();
+
+    while (!scanner_eof) {
+        std::shared_ptr<vectorized::Block> block(new vectorized::Block());
+        std::vector<vectorized::MutableColumnPtr> columns(slot_num);
+        for (int i = 0; i < slot_num; i++) {
+            columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column();
+        }
+        while (columns[0]->size() < batch_size && !scanner_eof) {
+            RETURN_IF_CANCELLED(_runtime_state);
+
+            // If we have finished all works
+            if (_scan_finished.load()) {
+                return Status::OK();
+            }
+
+            // Get from scanner
+            RETURN_IF_ERROR(
+                    scanner->get_next(columns, tuple_pool.get(), &scanner_eof, _docvalue_context));
+        }
+
+        if (columns[0]->size() > 0) {
+            auto n_columns = 0;
+            for (const auto slot_desc : _tuple_desc->slots()) {
+                block->insert(ColumnWithTypeAndName(std::move(columns[n_columns++]),
+                                                    slot_desc->get_data_type_ptr(),
+                                                    slot_desc->col_name()));
+            }
+
+            RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block.get(),
+                                                       _tuple_desc->slots().size()));
+
+            std::unique_lock<std::mutex> l(_block_queue_lock);
+            while (_process_status.ok() && !_scan_finished.load() &&
+                   !_runtime_state->is_cancelled() &&
+                   _block_queue.size() >= _max_buffered_batches) {
+                _queue_writer_cond.wait_for(l, std::chrono::seconds(1));
+            }
+            // Process already set failed, so we just return OK
+            if (!_process_status.ok()) {
+                return Status::OK();
+            }
+            // Scan already finished, just return
+            if (_scan_finished.load()) {
+                return Status::OK();
+            }
+            // Runtime state is canceled, just return cancel
+            if (_runtime_state->is_cancelled()) {
+                return Status::Cancelled("Cancelled");
+            }
+            _block_queue.push_back(block);
+
+            // Notify reader to
+            _queue_reader_cond.notify_one();
+        }
+    }
+
+    return Status::OK();
+}
+
+Status VEsHttpScanNode::close(RuntimeState* state) {
+    EsHttpScanNode::close(state);
+    _block_queue.clear();
+    return _process_status;
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/exec/ves_http_scan_node.h b/be/src/vec/exec/ves_http_scan_node.h
new file mode 100644
index 0000000000..d1835a1dba
--- /dev/null
+++ b/be/src/vec/exec/ves_http_scan_node.h
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "exec/es_http_scan_node.h"
+#include "exec/scan_node.h"
+#include "runtime/descriptors.h"
+#include "vec/exec/ves_http_scanner.h"
+namespace doris {
+
+class RuntimeState;
+class Status;
+
+namespace vectorized {
+
+class VEsHttpScanNode : public EsHttpScanNode {
+public:
+    VEsHttpScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    ~VEsHttpScanNode();
+
+    virtual Status get_next(RuntimeState* state, vectorized::Block* block, bool* eos);
+
+    virtual Status close(RuntimeState* state) override;
+
+private:
+    virtual Status scanner_scan(std::unique_ptr<VEsHttpScanner> scanner);
+
+    std::deque<std::shared_ptr<vectorized::Block>> _block_queue;
+    std::mutex _block_queue_lock;
+};
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/exec/ves_http_scanner.cpp b/be/src/vec/exec/ves_http_scanner.cpp
new file mode 100644
index 0000000000..6087257d1d
--- /dev/null
+++ b/be/src/vec/exec/ves_http_scanner.cpp
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/ves_http_scanner.h"
+
+namespace doris {
+
+VEsHttpScanner::~VEsHttpScanner() {
+    close();
+}
+
+Status VEsHttpScanner::get_next(std::vector<vectorized::MutableColumnPtr>& columns,
+                                MemPool* tuple_pool, bool* eof,
+                                const std::map<std::string, std::string>& docvalue_context) {
+    SCOPED_TIMER(_read_timer);
+    if (_line_eof && _batch_eof) {
+        *eof = true;
+        return Status::OK();
+    }
+
+    while (!_batch_eof) {
+        if (_line_eof || _es_scroll_parser == nullptr) {
+            RETURN_IF_ERROR(_es_reader->get_next(&_batch_eof, _es_scroll_parser));
+            if (_batch_eof) {
+                *eof = true;
+                return Status::OK();
+            }
+        }
+
+        COUNTER_UPDATE(_rows_read_counter, 1);
+        SCOPED_TIMER(_materialize_timer);
+        RETURN_IF_ERROR(_es_scroll_parser->fill_columns(_tuple_desc, columns, tuple_pool, &_line_eof,
+                                                      docvalue_context));
+        if (!_line_eof) {
+            break;
+        }
+    }
+
+    return Status::OK();
+}
+
+} // namespace doris
diff --git a/be/src/vec/exec/ves_http_scanner.h b/be/src/vec/exec/ves_http_scanner.h
new file mode 100644
index 0000000000..3692a61b7d
--- /dev/null
+++ b/be/src/vec/exec/ves_http_scanner.h
@@ -0,0 +1,43 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef BE_EXEC_VES_HTTP_SCANNER_H
+#define BE_EXEC_VES_HTTP_SCANNER_H
+
+#include <exec/es_http_scanner.h>
+
+namespace doris {
+
+class VEsHttpScanner : public EsHttpScanner {
+public:
+    VEsHttpScanner(RuntimeState* state, RuntimeProfile* profile, TupleId tuple_id,
+                  const std::map<std::string, std::string>& properties,
+                  const std::vector<ExprContext*>& conjunct_ctxs, EsScanCounter* counter,
+                  bool doc_value_mode): EsHttpScanner(state, profile, tuple_id, properties,
+                            conjunct_ctxs, counter, doc_value_mode) {};
+    ~VEsHttpScanner();
+
+    Status get_next(std::vector<vectorized::MutableColumnPtr>& columns,
+                    MemPool* tuple_pool, bool* eof,
+                    const std::map<std::string, std::string>& docvalue_context);
+
+};
+
+} // namespace doris
+
+#endif
diff --git a/be/src/vec/exec/vexcept_node.cpp b/be/src/vec/exec/vexcept_node.cpp
new file mode 100644
index 0000000000..4240743704
--- /dev/null
+++ b/be/src/vec/exec/vexcept_node.cpp
@@ -0,0 +1,112 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vexcept_node.h"
+
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/runtime_state.h"
+#include "util/runtime_profile.h"
+#include "vec/core/block.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/exec/vset_operation_node.h"
+namespace doris {
+namespace vectorized {
+
+VExceptNode::VExceptNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : VSetOperationNode(pool, tnode, descs) {}
+
+Status VExceptNode::init(const TPlanNode& tnode, RuntimeState* state) {
+    RETURN_IF_ERROR(VSetOperationNode::init(tnode, state));
+    DCHECK(tnode.__isset.except_node);
+    return Status::OK();
+}
+
+Status VExceptNode::prepare(RuntimeState* state) {
+    RETURN_IF_ERROR(VSetOperationNode::prepare(state));
+    return Status::OK();
+}
+
+Status VExceptNode::open(RuntimeState* state) {
+    RETURN_IF_ERROR(VSetOperationNode::open(state));
+    bool eos = false;
+    Status st;
+    for (int i = 1; i < _children.size(); ++i) {
+        if (i > 1) {
+            refresh_hash_table<false>();
+        }
+
+        RETURN_IF_ERROR(child(i)->open(state));
+        eos = false;
+        int probe_expr_ctxs_sz = _child_expr_lists[i].size();
+        _probe_columns.resize(probe_expr_ctxs_sz);
+
+        while (!eos) {
+            RETURN_IF_ERROR(process_probe_block(state, i, &eos));
+            if (_probe_rows == 0) continue;
+
+            std::visit(
+                    [&](auto&& arg) {
+                        using HashTableCtxType = std::decay_t<decltype(arg)>;
+                        if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+
+                            HashTableProbe<HashTableCtxType, false> process_hashtable_ctx(
+                                    this, state->batch_size(), _probe_rows);
+                            st = process_hashtable_ctx.mark_data_in_hashtable(arg);
+
+                        } else {
+                            LOG(FATAL) << "FATAL: uninited hash table";
+                        }
+                    },
+                    _hash_table_variants);
+        }
+    }
+    return st;
+}
+
+Status VExceptNode::get_next(RuntimeState* state, Block* output_block, bool* eos) {
+    SCOPED_TIMER(_probe_timer);
+    Status st;
+    create_mutable_cols(output_block);
+
+    std::visit(
+            [&](auto&& arg) {
+                using HashTableCtxType = std::decay_t<decltype(arg)>;
+                if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+                    
+                    HashTableProbe<HashTableCtxType, false> process_hashtable_ctx(
+                            this, state->batch_size(), _probe_rows);
+                    st = process_hashtable_ctx.get_data_in_hashtable(arg, _mutable_cols,
+                                                                     output_block, eos);
+                } else {
+                    LOG(FATAL) << "FATAL: uninited hash table";
+                }
+            },
+            _hash_table_variants);
+
+    RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns()));
+    reached_limit(output_block, eos);
+
+    return st;
+}
+
+Status VExceptNode::close(RuntimeState* state) {
+    return VSetOperationNode::close(state);
+}
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vexcept_node.h b/be/src/vec/exec/vexcept_node.h
new file mode 100644
index 0000000000..e986c28694
--- /dev/null
+++ b/be/src/vec/exec/vexcept_node.h
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/exec/vset_operation_node.h"
+
+namespace doris {
+namespace vectorized {
+
+class VExceptNode : public VSetOperationNode {
+public:
+    VExceptNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr);
+    virtual Status prepare(RuntimeState* state);
+    virtual Status open(RuntimeState* state);
+    virtual Status get_next(RuntimeState* state, vectorized::Block* output_block, bool* eos);
+    virtual Status close(RuntimeState* state);
+
+private:
+    template <class HashTableContext, bool is_intersected>
+    friend class HashTableProbe;
+};
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vexchange_node.cpp b/be/src/vec/exec/vexchange_node.cpp
new file mode 100644
index 0000000000..91b107904d
--- /dev/null
+++ b/be/src/vec/exec/vexchange_node.cpp
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vexchange_node.h"
+
+#include "runtime/exec_env.h"
+#include "runtime/runtime_state.h"
+#include "vec/runtime/vdata_stream_mgr.h"
+#include "vec/runtime/vdata_stream_recvr.h"
+
+namespace doris::vectorized {
+VExchangeNode::VExchangeNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : ExecNode(pool, tnode, descs),
+          _num_senders(0),
+          _is_merging(tnode.exchange_node.__isset.sort_info),
+          _stream_recvr(nullptr),
+          _input_row_desc(descs, tnode.exchange_node.input_row_tuples,
+                          std::vector<bool>(tnode.nullable_tuples.begin(),
+                                            tnode.nullable_tuples.begin() +
+                                                    tnode.exchange_node.input_row_tuples.size())),
+          _offset(tnode.exchange_node.__isset.offset ? tnode.exchange_node.offset : 0) {}
+
+Status VExchangeNode::init(const TPlanNode& tnode, RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::init(tnode, state));
+    if (!_is_merging) {
+        return Status::OK();
+    }
+
+    RETURN_IF_ERROR(_vsort_exec_exprs.init(tnode.exchange_node.sort_info, _pool));
+    _is_asc_order = tnode.exchange_node.sort_info.is_asc_order;
+    _nulls_first = tnode.exchange_node.sort_info.nulls_first;
+    return Status::OK();
+}
+
+Status VExchangeNode::prepare(RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::prepare(state));
+    DCHECK_GT(_num_senders, 0);
+    _sub_plan_query_statistics_recvr.reset(new QueryStatisticsRecvr());
+    _stream_recvr = state->exec_env()->vstream_mgr()->create_recvr(
+            state, _input_row_desc, state->fragment_instance_id(), _id, _num_senders,
+            config::exchg_node_buffer_size_bytes, _runtime_profile.get(), _is_merging,
+            _sub_plan_query_statistics_recvr);
+
+    if (_is_merging) {
+        RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, _row_descriptor, _row_descriptor,
+                                                  expr_mem_tracker()));
+    }
+    return Status::OK();
+}
+Status VExchangeNode::open(RuntimeState* state) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(ExecNode::open(state));
+
+    if (_is_merging) {
+        RETURN_IF_ERROR(_vsort_exec_exprs.open(state));
+        RETURN_IF_ERROR(_stream_recvr->create_merger(_vsort_exec_exprs.lhs_ordering_expr_ctxs(),
+                                                     _is_asc_order, _nulls_first,
+                                                     state->batch_size(), _limit, _offset));
+    }
+
+    return Status::OK();
+}
+Status VExchangeNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
+    return Status::NotSupported("Not Implemented VExchange Node::get_next scalar");
+}
+
+Status VExchangeNode::get_next(RuntimeState* state, Block* block, bool* eos) {
+    SCOPED_TIMER(runtime_profile()->total_time_counter());
+    auto status = _stream_recvr->get_next(block, eos);
+    if (block != nullptr) {
+        if (_num_rows_returned + block->rows() < _limit) {
+            _num_rows_returned += block->rows();
+        } else {
+            *eos = true;
+            auto limit = _limit - _num_rows_returned;
+            block->set_num_rows(limit);
+        }
+        COUNTER_SET(_rows_returned_counter, _num_rows_returned);
+    }
+    return status;
+}
+
+Status VExchangeNode::close(RuntimeState* state) {
+    if (_stream_recvr != nullptr) {
+        _stream_recvr->close();
+    }
+    return ExecNode::close(state);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/vexchange_node.h b/be/src/vec/exec/vexchange_node.h
new file mode 100644
index 0000000000..241acc1258
--- /dev/null
+++ b/be/src/vec/exec/vexchange_node.h
@@ -0,0 +1,58 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "exec/exec_node.h"
+#include "vec/exec/vsort_exec_exprs.h"
+
+namespace doris {
+namespace vectorized {
+class VDataStreamRecvr;
+
+class VExchangeNode : public ExecNode {
+public:
+    VExchangeNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    virtual ~VExchangeNode() {}
+
+    virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override;
+    virtual Status prepare(RuntimeState* state) override;
+    virtual Status open(RuntimeState* state) override;
+    virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override;
+    virtual Status get_next(RuntimeState* state, Block* row_batch, bool* eos) override;
+    virtual Status close(RuntimeState* state) override;
+
+    // Status collect_query_statistics(QueryStatistics* statistics) override;
+    void set_num_senders(int num_senders) { _num_senders = num_senders; }
+
+private:
+    int _num_senders;
+    bool _is_merging;
+    std::shared_ptr<VDataStreamRecvr> _stream_recvr;
+    RowDescriptor _input_row_desc;
+    std::shared_ptr<QueryStatisticsRecvr> _sub_plan_query_statistics_recvr;
+
+    // use in merge sort
+    size_t _offset;
+    VSortExecExprs _vsort_exec_exprs;
+    std::vector<bool> _is_asc_order;
+    std::vector<bool> _nulls_first;
+};
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vintersect_node.cpp b/be/src/vec/exec/vintersect_node.cpp
new file mode 100644
index 0000000000..65ee121b26
--- /dev/null
+++ b/be/src/vec/exec/vintersect_node.cpp
@@ -0,0 +1,111 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vintersect_node.h"
+
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/runtime_state.h"
+#include "util/runtime_profile.h"
+#include "vec/core/block.h"
+#include "vec/exec/vset_operation_node.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vexpr_context.h"
+namespace doris {
+namespace vectorized {
+
+VIntersectNode::VIntersectNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : VSetOperationNode(pool, tnode, descs) {}
+
+Status VIntersectNode::init(const TPlanNode& tnode, RuntimeState* state) {
+    RETURN_IF_ERROR(VSetOperationNode::init(tnode, state));
+    DCHECK(tnode.__isset.intersect_node);
+    return Status::OK();
+}
+
+Status VIntersectNode::prepare(RuntimeState* state) {
+    RETURN_IF_ERROR(VSetOperationNode::prepare(state));
+    return Status::OK();
+}
+
+Status VIntersectNode::open(RuntimeState* state) {
+    RETURN_IF_ERROR(VSetOperationNode::open(state));
+    bool eos = false;
+    Status st;
+    
+    for (int i = 1; i < _children.size(); ++i) {
+        if (i > 1) {
+            refresh_hash_table<true>();
+        }
+
+        _valid_element_in_hash_tbl = 0;
+        RETURN_IF_ERROR(child(i)->open(state));
+        eos = false;
+        _probe_columns.resize(_child_expr_lists[i].size());
+
+        while (!eos) {
+            RETURN_IF_ERROR(process_probe_block(state, i, &eos));
+            if (_probe_rows == 0) continue;
+
+            std::visit(
+                    [&](auto&& arg) {
+                        using HashTableCtxType = std::decay_t<decltype(arg)>;
+                        if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+                            HashTableProbe<HashTableCtxType, true> process_hashtable_ctx(
+                                    this, state->batch_size(), _probe_rows);
+                            st = process_hashtable_ctx.mark_data_in_hashtable(arg);
+
+                        } else {
+                            LOG(FATAL) << "FATAL: uninited hash table";
+                        }
+                    },
+                    _hash_table_variants);
+        }
+    }
+    return st;
+}
+
+Status VIntersectNode::get_next(RuntimeState* state, Block* output_block, bool* eos) {
+    SCOPED_TIMER(_probe_timer);
+    create_mutable_cols(output_block);
+    Status st;
+
+    std::visit(
+            [&](auto&& arg) {
+                using HashTableCtxType = std::decay_t<decltype(arg)>;
+                if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+                    HashTableProbe<HashTableCtxType, true> process_hashtable_ctx(
+                            this, state->batch_size(), _probe_rows);
+                    st = process_hashtable_ctx.get_data_in_hashtable(arg, _mutable_cols,
+                                                                     output_block, eos);
+
+                } else {
+                    LOG(FATAL) << "FATAL: uninited hash table";
+                }
+            },
+            _hash_table_variants);
+
+    RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, output_block, output_block->columns()));
+    reached_limit(output_block, eos);
+
+    return st;
+}
+
+Status VIntersectNode::close(RuntimeState* state) {
+    return VSetOperationNode::close(state);
+}
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vintersect_node.h b/be/src/vec/exec/vintersect_node.h
new file mode 100644
index 0000000000..4e9b21e327
--- /dev/null
+++ b/be/src/vec/exec/vintersect_node.h
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/exec_node.h"
+#include "vec/common/columns_hashing.h"
+#include "vec/common/hash_table/hash_table.h"
+#include "vec/core/materialize_block.h"
+#include "vec/exec/join/join_op.h"
+#include "vec/exec/join/vacquire_list.hpp"
+#include "vec/exec/join/vhash_join_node.h"
+#include "vec/exec/vset_operation_node.h"
+#include "vec/functions/function.h"
+#include "vec/utils/util.hpp"
+
+namespace doris {
+namespace vectorized {
+
+class VExprContext;
+class VIntersectNode : public VSetOperationNode {
+public:
+    VIntersectNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr);
+    virtual Status prepare(RuntimeState* state);
+    virtual Status open(RuntimeState* state);
+    virtual Status get_next(RuntimeState* state, vectorized::Block* output_block, bool* eos);
+    virtual Status close(RuntimeState* state);
+
+private:
+    template <class HashTableContext, bool is_intersected>
+    friend class HashTableProbe;
+};
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/exec/vmysql_scan_node.cpp b/be/src/vec/exec/vmysql_scan_node.cpp
new file mode 100644
index 0000000000..980fb57eb8
--- /dev/null
+++ b/be/src/vec/exec/vmysql_scan_node.cpp
@@ -0,0 +1,124 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vmysql_scan_node.h"
+
+#include "exec/text_converter.h"
+#include "exec/text_converter.hpp"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/row_batch.h"
+#include "runtime/runtime_state.h"
+#include "runtime/string_value.h"
+#include "runtime/tuple_row.h"
+#include "util/runtime_profile.h"
+#include "util/types.h"
+namespace doris::vectorized {
+
+VMysqlScanNode::VMysqlScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : MysqlScanNode(pool, tnode, descs) {}
+
+VMysqlScanNode::~VMysqlScanNode() {}
+
+Status VMysqlScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool* eos) {
+    VLOG_CRITICAL << "VMysqlScanNode::GetNext";
+    if (state == NULL || block == NULL || eos == NULL)
+        return Status::InternalError("input is NULL pointer");
+    if (!_is_init) return Status::InternalError("used before initialize.");
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
+    RETURN_IF_CANCELLED(state);
+    bool mem_reuse = block->mem_reuse();
+    DCHECK(block->rows() == 0);
+    std::vector<vectorized::MutableColumnPtr> columns(_slot_num);
+    bool mysql_eos = false;
+
+    do {
+        for (int i = 0; i < _slot_num; ++i) {
+            if (mem_reuse) {
+                columns[i] = std::move(*block->get_by_position(i).column).mutate();
+            } else {
+                columns[i] = _tuple_desc->slots()[i]->get_empty_mutable_column();
+            }
+        }
+        while (true) {
+            RETURN_IF_CANCELLED(state);
+            int batch_size = state->batch_size();
+            if (columns[0]->size() == batch_size) {
+                break;
+            }
+
+            char** data = NULL;
+            unsigned long* length = NULL;
+            RETURN_IF_ERROR(_mysql_scanner->get_next_row(&data, &length, &mysql_eos));
+
+            if (mysql_eos) {
+                *eos = true;
+                break;
+            }
+            int j = 0;
+            for (int i = 0; i < _slot_num; ++i) {
+                auto slot_desc = _tuple_desc->slots()[i];
+                if (!slot_desc->is_materialized()) {
+                    continue;
+                }
+
+                if (data[j] == nullptr) {
+                    if (slot_desc->is_nullable()) {
+                        auto* nullable_column =
+                                reinterpret_cast<vectorized::ColumnNullable*>(columns[i].get());
+                        nullable_column->insert_data(nullptr, 0);
+                    } else {
+                        std::stringstream ss;
+                        ss << "nonnull column contains NULL. table=" << _table_name
+                           << ", column=" << slot_desc->col_name();
+                        return Status::InternalError(ss.str());
+                    }
+                } else {
+                    RETURN_IF_ERROR(
+                            write_text_column(data[j], length[j], slot_desc, &columns[i], state));
+                }
+                j++;
+            }
+        }
+        auto n_columns = 0;
+        if (!mem_reuse) {
+            for (const auto slot_desc : _tuple_desc->slots()) {
+                block->insert(ColumnWithTypeAndName(std::move(columns[n_columns++]),
+                                                    slot_desc->get_data_type_ptr(),
+                                                    slot_desc->col_name()));
+            }
+        } else {
+            columns.clear();
+        }
+        VLOG_ROW << "VMYSQLScanNode output rows: " << block->rows();
+    } while (block->rows() == 0 && !(*eos));
+
+    reached_limit(block, eos);
+    return Status::OK();
+}
+
+Status VMysqlScanNode::write_text_column(char* value, int value_length, SlotDescriptor* slot,
+                                         vectorized::MutableColumnPtr* column_ptr,
+                                         RuntimeState* state) {
+    if (!_text_converter->write_column(slot, column_ptr, value, value_length, true, false)) {
+        std::stringstream ss;
+        ss << "Fail to convert mysql value:'" << value << "' to " << slot->type() << " on column:`"
+           << slot->col_name() + "`";
+        return Status::InternalError(ss.str());
+    }
+    return Status::OK();
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/exec/vmysql_scan_node.h b/be/src/vec/exec/vmysql_scan_node.h
new file mode 100644
index 0000000000..c297caec79
--- /dev/null
+++ b/be/src/vec/exec/vmysql_scan_node.h
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "exec/mysql_scan_node.h"
+#include "exec/mysql_scanner.h"
+#include "exec/scan_node.h"
+#include "runtime/descriptors.h"
+namespace doris {
+
+class TextConverter;
+class TupleDescriptor;
+class RuntimeState;
+class Status;
+
+namespace vectorized {
+
+class VMysqlScanNode : public MysqlScanNode {
+public:
+    VMysqlScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    ~VMysqlScanNode();
+
+    // Fill the next block by calling next() on the _mysql_scanner,
+    // converting text data in MySQL cells to binary data.
+    virtual Status get_next(RuntimeState* state, vectorized::Block* block, bool* eos);
+
+private:
+    Status write_text_column(char* value, int value_length, SlotDescriptor* slot,
+                             vectorized::MutableColumnPtr* column_ptr, RuntimeState* state);
+};
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/exec/vodbc_scan_node.cpp b/be/src/vec/exec/vodbc_scan_node.cpp
new file mode 100644
index 0000000000..dc685c57cc
--- /dev/null
+++ b/be/src/vec/exec/vodbc_scan_node.cpp
@@ -0,0 +1,130 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vodbc_scan_node.h"
+
+#include "exec/text_converter.h"
+#include "exec/text_converter.hpp"
+
+namespace doris {
+namespace vectorized {
+
+VOdbcScanNode::VOdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : OdbcScanNode(pool, tnode, descs, "VOdbcScanNode") {}
+VOdbcScanNode::~VOdbcScanNode() {}
+
+Status VOdbcScanNode::get_next(RuntimeState* state, Block* block, bool* eos) {
+    VLOG_CRITICAL << get_scan_node_type() << "::GetNext";
+
+    if (nullptr == state || nullptr == block || nullptr == eos) {
+        return Status::InternalError("input is NULL pointer");
+    }
+
+    if (!is_init()) {
+        return Status::InternalError("used before initialize.");
+    }
+
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
+    RETURN_IF_CANCELLED(state);
+
+    auto odbc_scanner = get_odbc_scanner();
+    auto tuple_desc = get_tuple_desc();
+    auto text_converter = get_text_converter();
+
+    auto column_size = tuple_desc->slots().size();
+    std::vector<MutableColumnPtr> columns(column_size);
+
+    bool mem_reuse = block->mem_reuse();
+    // only empty block should be here
+    DCHECK(block->rows() == 0);
+
+    // Indicates whether there are more rows to process. Set in _odbc_scanner.next().
+    bool odbc_eos = false;
+
+    do {
+        RETURN_IF_CANCELLED(state);
+
+        for (auto i = 0; i < column_size; i++) {
+            if (mem_reuse) {
+                columns[i] = std::move(*block->get_by_position(i).column).mutate();
+            } else {
+                columns[i] = tuple_desc->slots()[i]->get_empty_mutable_column();
+            }
+        }
+
+        for (int row_index = 0; true; row_index++) {
+            // block is full, break
+            if (state->batch_size() <= columns[0]->size()) {
+                break;
+            }
+
+            RETURN_IF_ERROR(odbc_scanner->get_next_row(&odbc_eos));
+
+            if (odbc_eos) {
+                *eos = true;
+                break;
+            }
+
+            // Read one row from reader
+
+            for (int column_index = 0, materialized_column_index = 0; column_index < column_size;
+                 ++column_index) {
+                auto slot_desc = tuple_desc->slots()[column_index];
+                // because the fe planner filter the non_materialize column
+                if (!slot_desc->is_materialized()) {
+                    continue;
+                }
+                const auto& column_data = odbc_scanner->get_column_data(materialized_column_index);
+
+                char* value_data = static_cast<char*>(column_data.target_value_ptr);
+                int value_len = column_data.strlen_or_ind;
+
+                if (!text_converter->write_column(slot_desc, &columns[column_index], value_data,
+                                                  value_len, true, false)) {
+                    std::stringstream ss;
+                    ss << "Fail to convert odbc value:'" << value_data << "' to "
+                       << slot_desc->type() << " on column:`" << slot_desc->col_name() + "`";
+                    return Status::InternalError(ss.str());
+                }
+                materialized_column_index++;
+            }
+        }
+
+        // Before really use the Block, muse clear other ptr of column in block
+        // So here need do std::move and clear in `columns`
+        if (!mem_reuse) {
+            int column_index = 0;
+            for (const auto slot_desc : tuple_desc->slots()) {
+                block->insert(ColumnWithTypeAndName(std::move(columns[column_index++]),
+                                                    slot_desc->get_data_type_ptr(),
+                                                    slot_desc->col_name()));
+            }
+        } else {
+            columns.clear();
+        }
+        VLOG_ROW << "VOdbcScanNode output rows: " << block->rows();
+    } while (block->rows() == 0 && !(*eos));
+
+
+    RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns()));
+    reached_limit(block, eos);
+
+    return Status::OK();
+}
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vodbc_scan_node.h b/be/src/vec/exec/vodbc_scan_node.h
new file mode 100644
index 0000000000..7c644cdbb8
--- /dev/null
+++ b/be/src/vec/exec/vodbc_scan_node.h
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/odbc_scan_node.h"
+
+namespace doris {
+namespace vectorized {
+
+class VOdbcScanNode : public OdbcScanNode {
+public:
+    VOdbcScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    ~VOdbcScanNode();
+
+    Status get_next(RuntimeState* state, Block* block, bool* eos);
+};
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/volap_scan_node.cpp b/be/src/vec/exec/volap_scan_node.cpp
new file mode 100644
index 0000000000..b365c1dfb0
--- /dev/null
+++ b/be/src/vec/exec/volap_scan_node.cpp
@@ -0,0 +1,559 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/volap_scan_node.h"
+
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/descriptors.h"
+#include "runtime/exec_env.h"
+#include "runtime/runtime_filter_mgr.h"
+#include "util/priority_thread_pool.hpp"
+#include "vec/core/block.h"
+#include "vec/exec/volap_scanner.h"
+#include "vec/exprs/vexpr.h"
+
+namespace doris::vectorized {
+VOlapScanNode::VOlapScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : OlapScanNode(pool, tnode, descs),
+          _max_materialized_blocks(config::doris_scanner_queue_size) {
+    _materialized_blocks.reserve(_max_materialized_blocks);
+    _free_blocks.reserve(_max_materialized_blocks);
+}
+
+void VOlapScanNode::transfer_thread(RuntimeState* state) {
+    // scanner open pushdown to scanThread
+    Status status = Status::OK();
+
+    if (_vconjunct_ctx_ptr) {
+        for (auto scanner : _volap_scanners) {
+            status = (*_vconjunct_ctx_ptr)->clone(state, scanner->vconjunct_ctx_ptr());
+            if (!status.ok()) {
+                std::lock_guard<SpinLock> guard(_status_mutex);
+                _status = status;
+                break;
+            }
+        }
+    }
+
+    /*********************************
+     * 优先级调度基本策略:
+     * 1. 通过查询拆分的Range个数来确定初始nice值
+     *    Range个数越多，越倾向于认定为大查询，nice值越小
+     * 2. 通过查询累计读取的数据量来调整nice值
+     *    读取的数据越多，越倾向于认定为大查询，nice值越小
+     * 3. 通过nice值来判断查询的优先级
+     *    nice值越大的，越优先获得的查询资源
+     * 4. 定期提高队列内残留任务的优先级，避免大查询完全饿死
+     *********************************/
+    _total_assign_num = 0;
+    _nice = 18 + std::max(0, 2 - (int)_volap_scanners.size() / 5);
+
+    auto block_per_scanner = (config::doris_scanner_row_num + (state->batch_size() - 1)) / state->batch_size();
+    for (int i = 0; i < _volap_scanners.size() * block_per_scanner; ++i) {
+        auto block = new Block;
+        for (const auto slot_desc : _tuple_desc->slots()) {
+            auto column_ptr = slot_desc->get_empty_mutable_column();
+            column_ptr->reserve(state->batch_size());
+            block->insert(ColumnWithTypeAndName(std::move(column_ptr),
+                                                    slot_desc->get_data_type_ptr(),
+                                                    slot_desc->col_name()));
+        }
+        _free_blocks.emplace_back(block);
+        _buffered_bytes += block->allocated_bytes();
+    }
+    _mem_tracker->Consume(_buffered_bytes);
+
+    // read from scanner
+    while (LIKELY(status.ok())) {
+        int assigned_thread_num = _start_scanner_thread_task(state, block_per_scanner);
+
+        std::vector<Block*> blocks;
+        {
+            // 1 scanner idle task not empty, assign new scanner task
+            std::unique_lock<std::mutex> l(_scan_blocks_lock);
+
+            // scanner_row_num = 16k
+            // 16k * 10 * 12 * 8 = 15M(>2s)  --> nice=10
+            // 16k * 20 * 22 * 8 = 55M(>6s)  --> nice=0
+            while (_nice > 0 && _total_assign_num > (22 - _nice) * (20 - _nice) * 6) {
+                --_nice;
+            }
+
+            // 2 wait when all scanner are running & no result in queue
+            while (UNLIKELY(_running_thread == assigned_thread_num && _scan_blocks.empty() &&
+                            !_scanner_done)) {
+                SCOPED_TIMER(_scanner_wait_batch_timer);
+                _scan_block_added_cv.wait(l);
+            }
+
+            // 3 transfer result block when queue is not empty
+            if (LIKELY(!_scan_blocks.empty())) {
+                blocks.swap(_scan_blocks);
+                // delete scan_block if transfer thread should be stopped
+                // because scan_block wouldn't be useful anymore
+                if (UNLIKELY(_transfer_done)) {
+                    std::for_each(blocks.begin(), blocks.end(), std::default_delete<Block>());
+                    blocks.clear();
+                }
+            } else {
+                if (_scanner_done) {
+                    break;
+                }
+            }
+        }
+
+        if (!blocks.empty()) {
+            _add_blocks(blocks);
+        }
+    }
+
+    VLOG_CRITICAL << "TransferThread finish.";
+    _transfer_done = true;
+    _block_added_cv.notify_all();
+    {
+        std::unique_lock<std::mutex> l(_scan_blocks_lock);
+        _scan_thread_exit_cv.wait(l, [this] { return _running_thread == 0; });
+    }
+    VLOG_CRITICAL << "Scanner threads have been exited. TransferThread exit.";
+}
+
+void VOlapScanNode::scanner_thread(VOlapScanner* scanner) {
+    int64_t wait_time = scanner->update_wait_worker_timer();
+    // Do not use ScopedTimer. There is no guarantee that, the counter
+    // (_scan_cpu_timer, the class member) is not destroyed after `_running_thread==0`.
+    ThreadCpuStopWatch cpu_watch;
+    cpu_watch.start();
+    Status status = Status::OK();
+    bool eos = false;
+    RuntimeState* state = scanner->runtime_state();
+    DCHECK(NULL != state);
+    if (!scanner->is_open()) {
+        status = scanner->open();
+        if (!status.ok()) {
+            std::lock_guard<SpinLock> guard(_status_mutex);
+            _status = status;
+            eos = true;
+        }
+        scanner->set_opened();
+    }
+
+    std::vector<ExprContext*> contexts;
+    auto& scanner_filter_apply_marks = *scanner->mutable_runtime_filter_marks();
+    DCHECK(scanner_filter_apply_marks.size() == _runtime_filter_descs.size());
+    for (size_t i = 0; i < scanner_filter_apply_marks.size(); i++) {
+        if (!scanner_filter_apply_marks[i] && !_runtime_filter_ctxs[i].apply_mark) {
+            IRuntimeFilter* runtime_filter = nullptr;
+            state->runtime_filter_mgr()->get_consume_filter(_runtime_filter_descs[i].filter_id,
+                                                            &runtime_filter);
+            DCHECK(runtime_filter != nullptr);
+            bool ready = runtime_filter->is_ready();
+            if (ready) {
+                runtime_filter->get_prepared_context(&contexts, row_desc(), _expr_mem_tracker);
+                _runtime_filter_ctxs[i].apply_mark = true;
+            }
+        }
+    }
+
+    if (!contexts.empty()) {
+        std::vector<ExprContext*> new_contexts;
+        auto& scanner_conjunct_ctxs = *scanner->conjunct_ctxs();
+        Expr::clone_if_not_exists(contexts, state, &new_contexts);
+        scanner_conjunct_ctxs.insert(scanner_conjunct_ctxs.end(), new_contexts.begin(),
+                                     new_contexts.end());
+        scanner->set_use_pushdown_conjuncts(true);
+    }
+
+    std::vector<Block*> blocks;
+
+    // Because we use thread pool to scan data from storage. One scanner can't
+    // use this thread too long, this can starve other query's scanner. So, we
+    // need yield this thread when we do enough work. However, OlapStorage read
+    // data in pre-aggregate mode, then we can't use storage returned data to
+    // judge if we need to yield. So we record all raw data read in this round
+    // scan, if this exceed threshold, we yield this thread.
+    int64_t raw_rows_read = scanner->raw_rows_read();
+    int64_t raw_rows_threshold = raw_rows_read + config::doris_scanner_row_num;
+    bool get_free_block = true;
+
+    while (!eos && raw_rows_read < raw_rows_threshold && get_free_block) {
+        if (UNLIKELY(_transfer_done)) {
+            eos = true;
+            status = Status::Cancelled("Cancelled");
+            LOG(INFO) << "Scan thread cancelled, cause query done, maybe reach limit.";
+            break;
+        }
+
+        auto block = _alloc_block(get_free_block);
+        status = scanner->get_block(_runtime_state, block, &eos);
+        VLOG_ROW << "VOlapScanNode input rows: " << block->rows();
+        if (!status.ok()) {
+            LOG(WARNING) << "Scan thread read OlapScanner failed: " << status.to_string();
+            // Add block ptr in blocks, prevent mem leak in read failed
+            blocks.push_back(block);
+            eos = true;
+            break;
+        }
+        // 4. if status not ok, change status_.
+        if (UNLIKELY(block->rows() == 0)) {
+            std::lock_guard<std::mutex> l(_free_blocks_lock);
+            _free_blocks.emplace_back(block);
+        } else {
+            blocks.push_back(block);
+        }
+        raw_rows_read = scanner->raw_rows_read();
+    }
+
+    {
+        // if we failed, check status.
+        if (UNLIKELY(!status.ok())) {
+            _transfer_done = true;
+            std::lock_guard<SpinLock> guard(_status_mutex);
+            if (LIKELY(_status.ok())) {
+                _status = status;
+            }
+        }
+
+        bool global_status_ok = false;
+        {
+            std::lock_guard<SpinLock> guard(_status_mutex);
+            global_status_ok = _status.ok();
+        }
+        if (UNLIKELY(!global_status_ok)) {
+            eos = true;
+            std::for_each(blocks.begin(), blocks.end(), std::default_delete<Block>());
+        } else {
+            std::lock_guard<std::mutex> l(_scan_blocks_lock);
+            _scan_blocks.insert(_scan_blocks.end(), blocks.begin(), blocks.end());
+        }
+        // If eos is true, we will process out of this lock block.
+        if (!eos) {
+            std::lock_guard<std::mutex> l(_volap_scanners_lock);
+            _volap_scanners.push_front(scanner);
+        }
+    }
+    if (eos) {
+        // close out of blocks lock. we do this before _progress update
+        // that can assure this object can keep live before we finish.
+        scanner->close(_runtime_state);
+
+        std::lock_guard<std::mutex> l(_scan_blocks_lock);
+        _progress.update(1);
+        if (_progress.done()) {
+            // this is the right out
+            _scanner_done = true;
+        }
+    }
+    _scan_cpu_timer->update(cpu_watch.elapsed_time());
+    _scanner_wait_worker_timer->update(wait_time);
+
+    std::unique_lock<std::mutex> l(_scan_blocks_lock);
+    _running_thread--;
+
+    // The transfer thead will wait for `_running_thread==0`, to make sure all scanner threads won't access class members.
+    // Do not access class members after this code.
+    _scan_block_added_cv.notify_one();
+    _scan_thread_exit_cv.notify_one();
+}
+
+Status VOlapScanNode::_add_blocks(std::vector<Block*>& block) {
+    {
+        std::unique_lock<std::mutex> l(_blocks_lock);
+
+        while (UNLIKELY(_materialized_blocks.size() >= _max_materialized_blocks &&
+                        !_transfer_done)) {
+            _block_consumed_cv.wait(l);
+        }
+
+        VLOG_CRITICAL << "Push block to materialized_blocks";
+        _materialized_blocks.insert(_materialized_blocks.end(), block.cbegin(), block.cend());
+    }
+    // remove one block, notify main thread
+    _block_added_cv.notify_one();
+    return Status::OK();
+}
+
+Status VOlapScanNode::start_scan_thread(RuntimeState* state) {
+    if (_scan_ranges.empty()) {
+        _transfer_done = true;
+        return Status::OK();
+    }
+
+    // ranges constructed from scan keys
+    std::vector<std::unique_ptr<OlapScanRange>> cond_ranges;
+    RETURN_IF_ERROR(_scan_keys.get_key_range(&cond_ranges));
+    // if we can't get ranges from conditions, we give it a total range
+    if (cond_ranges.empty()) {
+        cond_ranges.emplace_back(new OlapScanRange());
+    }
+
+    bool need_split = true;
+    // If we have ranges more than 64, there is no need to call
+    // ShowHint to split ranges
+    if (limit() != -1 || cond_ranges.size() > 64) {
+        need_split = false;
+    }
+
+    int scanners_per_tablet = std::max(1, 64 / (int)_scan_ranges.size());
+
+    std::unordered_set<std::string> disk_set;
+    for (auto& scan_range : _scan_ranges) {
+        std::vector<std::unique_ptr<OlapScanRange>>* ranges = &cond_ranges;
+        std::vector<std::unique_ptr<OlapScanRange>> split_ranges;
+        if (need_split) {
+            auto st = OlapScanNode::get_hints(*scan_range, config::doris_scan_range_row_count,
+                                              _scan_keys.begin_include(), _scan_keys.end_include(),
+                                              cond_ranges, &split_ranges, _runtime_profile.get());
+            if (st.ok()) {
+                ranges = &split_ranges;
+            }
+        }
+
+        int ranges_per_scanner = std::max(1, (int)ranges->size() / scanners_per_tablet);
+        int num_ranges = ranges->size();
+        for (int i = 0; i < num_ranges;) {
+            std::vector<OlapScanRange*> scanner_ranges;
+            scanner_ranges.push_back((*ranges)[i].get());
+            ++i;
+            for (int j = 1; i < num_ranges && j < ranges_per_scanner &&
+                            (*ranges)[i]->end_include == (*ranges)[i - 1]->end_include;
+                 ++j, ++i) {
+                scanner_ranges.push_back((*ranges)[i].get());
+            }
+            VOlapScanner* scanner =
+                    new VOlapScanner(state, this, _olap_scan_node.is_preaggregation,
+                                     _need_agg_finalize, *scan_range);
+            // add scanner to pool before doing prepare.
+            // so that scanner can be automatically deconstructed if prepare failed.
+            _scanner_pool.add(scanner);
+            RETURN_IF_ERROR(scanner->prepare(*scan_range, scanner_ranges, _olap_filter,
+                                             _bloom_filters_push_down));
+
+            _volap_scanners.push_back(scanner);
+            disk_set.insert(scanner->scan_disk());
+        }
+    }
+    COUNTER_SET(_num_disks_accessed_counter, static_cast<int64_t>(disk_set.size()));
+    COUNTER_SET(_num_scanners, static_cast<int64_t>(_volap_scanners.size()));
+
+    // init progress
+    std::stringstream ss;
+    ss << "ScanThread complete (node=" << id() << "):";
+    _progress = ProgressUpdater(ss.str(), _volap_scanners.size(), 1);
+
+    _transfer_thread.reset(new std::thread(&VOlapScanNode::transfer_thread, this, state));
+
+    return Status::OK();
+}
+
+Status VOlapScanNode::close(RuntimeState* state) {
+    if (is_closed()) {
+        return Status::OK();
+    }
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::CLOSE));
+
+    // change done status
+    {
+        std::unique_lock<std::mutex> l(_blocks_lock);
+        _transfer_done = true;
+    }
+    // notify all scanner thread
+    _block_consumed_cv.notify_all();
+    _block_added_cv.notify_all();
+    _scan_block_added_cv.notify_all();
+
+    // join transfer thread
+    if (_transfer_thread) _transfer_thread->join();
+
+    // clear some block in queue
+    // TODO: The presence of transfer_thread here may cause Block's memory alloc and be released not in a thread,
+    // which may lead to potential performance problems. we should rethink whether to delete the transfer thread
+    std::for_each(_materialized_blocks.begin(), _materialized_blocks.end(), std::default_delete<Block>());
+    std::for_each(_scan_blocks.begin(), _scan_blocks.end(), std::default_delete<Block>());
+    std::for_each(_free_blocks.begin(), _free_blocks.end(), std::default_delete<Block>());
+    _mem_tracker->Release(_buffered_bytes);
+
+    // OlapScanNode terminate by exception
+    // so that initiative close the Scanner
+    for (auto scanner : _volap_scanners) {
+        scanner->close(state);
+    }
+
+    for (auto& filter_desc : _runtime_filter_descs) {
+        IRuntimeFilter* runtime_filter = nullptr;
+        state->runtime_filter_mgr()->get_consume_filter(filter_desc.filter_id, &runtime_filter);
+        DCHECK(runtime_filter != nullptr);
+        runtime_filter->consumer_close();
+    }
+
+    VLOG_CRITICAL << "VOlapScanNode::close()";
+    return ScanNode::close(state);
+}
+
+Status VOlapScanNode::get_next(RuntimeState* state, Block* block, bool* eos) {
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+
+    // check if Canceled.
+    if (state->is_cancelled()) {
+        std::unique_lock<std::mutex> l(_blocks_lock);
+        _transfer_done = true;
+        std::lock_guard<SpinLock> guard(_status_mutex);
+        if (LIKELY(_status.ok())) {
+            _status = Status::Cancelled("Cancelled");
+        }
+        return _status;
+    }
+
+    // check if started.
+    if (!_start) {
+        Status status = start_scan(state);
+
+        if (!status.ok()) {
+            LOG(ERROR) << "StartScan Failed cause " << status.get_error_msg();
+            *eos = true;
+            return status;
+        }
+
+        _start = true;
+    }
+
+    // some conjuncts will be disposed in start_scan function, so
+    // we should check _eos after call start_scan
+    if (_eos) {
+        *eos = true;
+        return Status::OK();
+    }
+
+    // wait for block from queue
+    Block* materialized_block = NULL;
+    {
+        std::unique_lock<std::mutex> l(_blocks_lock);
+        SCOPED_TIMER(_olap_wait_batch_queue_timer);
+        while (_materialized_blocks.empty() && !_transfer_done) {
+            if (state->is_cancelled()) {
+                _transfer_done = true;
+            }
+
+            // use wait_for, not wait, in case to capture the state->is_cancelled()
+            _block_added_cv.wait_for(l, std::chrono::seconds(1));
+        }
+
+        if (!_materialized_blocks.empty()) {
+            materialized_block = _materialized_blocks.back();
+            DCHECK(materialized_block != NULL);
+            _materialized_blocks.pop_back();
+        }
+    }
+
+    // return block
+    if (NULL != materialized_block) {
+        // notify scanner
+        _block_consumed_cv.notify_one();
+        // get scanner's block memory
+        block->swap(*materialized_block);
+        VLOG_ROW << "VOlapScanNode output rows: " << block->rows();
+        reached_limit(block, eos);
+
+        // reach scan node limit
+        if (*eos) {
+            {
+                std::unique_lock<std::mutex> l(_blocks_lock);
+                _transfer_done = true;
+            }
+
+            _block_consumed_cv.notify_all();
+            *eos = true;
+            LOG(INFO) << "VOlapScanNode ReachedLimit.";
+        } else {
+            *eos = false;
+        }
+
+        {
+            // ReThink whether the SpinLock Better
+            std::lock_guard<std::mutex> l(_free_blocks_lock);
+            _free_blocks.emplace_back(materialized_block);
+        }
+        return Status::OK();
+    }
+
+    // all scanner done, change *eos to true
+    *eos = true;
+    std::lock_guard<SpinLock> guard(_status_mutex);
+    return _status;
+}
+
+// TODO: we should register the mem cost of new Block in
+// alloc block
+Block* VOlapScanNode::_alloc_block(bool& get_free_block) {
+    {
+        std::lock_guard<std::mutex> l(_free_blocks_lock);
+        if (!_free_blocks.empty()) {
+            auto block = _free_blocks.back();
+            _free_blocks.pop_back();
+            return block;
+        }
+    }
+    get_free_block = false;
+    return new Block();
+}
+
+int VOlapScanNode::_start_scanner_thread_task(RuntimeState* state, int block_per_scanner) {
+    std::list<VOlapScanner*> olap_scanners;
+    int assigned_thread_num = _running_thread;
+    // copy to local
+    {
+        // How many thread can apply to this query
+        size_t thread_slot_num = 0;
+        {
+            std::lock_guard<std::mutex> l(_free_blocks_lock);
+            thread_slot_num = (_free_blocks.size() - (assigned_thread_num * block_per_scanner)) / block_per_scanner;
+            if (thread_slot_num == 0) thread_slot_num++;
+        }
+
+        {
+            std::lock_guard<std::mutex> l(_volap_scanners_lock);
+            thread_slot_num = std::min(thread_slot_num, _volap_scanners.size());
+            for (int i = 0; i < thread_slot_num; ++i) {
+                olap_scanners.push_back(_volap_scanners.front());
+                _volap_scanners.pop_front();
+                _running_thread++;
+                assigned_thread_num++;
+            }
+        }
+    }
+
+    // post volap scanners to thread-pool
+    PriorityThreadPool* thread_pool = state->exec_env()->scan_thread_pool();
+    auto iter = olap_scanners.begin();
+    while (iter != olap_scanners.end()) {
+        PriorityThreadPool::Task task;
+        task.work_function = std::bind(&VOlapScanNode::scanner_thread, this, *iter);
+        task.priority = _nice;
+        (*iter)->start_wait_worker_timer();
+        if (thread_pool->offer(task)) {
+            olap_scanners.erase(iter++);
+        } else {
+            LOG(FATAL) << "Failed to assign scanner task to thread pool!";
+        }
+        ++_total_assign_num;
+    }
+
+    return assigned_thread_num;
+}
+
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/volap_scan_node.h b/be/src/vec/exec/volap_scan_node.h
new file mode 100644
index 0000000000..921399ee6b
--- /dev/null
+++ b/be/src/vec/exec/volap_scan_node.h
@@ -0,0 +1,69 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/olap_scan_node.h"
+#include "exprs/runtime_filter.h"
+
+namespace doris {
+class ObjectPool;
+class TPlanNode;
+class DescriptorTbl;
+class RowBatch;
+namespace vectorized {
+
+class VOlapScanner;
+
+class VOlapScanNode final : public OlapScanNode {
+public:
+    VOlapScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    friend class VOlapScanner;
+
+    Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override {
+        return Status::NotSupported("Not Implemented VOlapScanNode Node::get_next scalar");
+    }
+    Status get_next(RuntimeState* state, Block* block, bool* eos) override;
+    Status close(RuntimeState* state) override;
+private:
+    void transfer_thread(RuntimeState* state);
+    void scanner_thread(VOlapScanner* scanner);
+    Status start_scan_thread(RuntimeState* state) override;
+
+    Status _add_blocks(std::vector<Block*>& block);
+    int _start_scanner_thread_task(RuntimeState* state, int block_per_scanner);
+    Block* _alloc_block(bool& get_free_block);
+
+    std::vector<Block*> _scan_blocks;
+    std::vector<Block*> _materialized_blocks;
+    std::mutex _blocks_lock;
+    std::condition_variable _block_added_cv;
+    std::condition_variable _block_consumed_cv;
+
+    std::mutex _scan_blocks_lock;
+    std::condition_variable _scan_block_added_cv;
+
+    std::vector<Block*> _free_blocks;
+    std::mutex _free_blocks_lock;
+
+    std::list<VOlapScanner*> _volap_scanners;
+    std::mutex _volap_scanners_lock;
+
+    int _max_materialized_blocks;
+};
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/volap_scanner.cpp b/be/src/vec/exec/volap_scanner.cpp
new file mode 100644
index 0000000000..1b4bb02690
--- /dev/null
+++ b/be/src/vec/exec/volap_scanner.cpp
@@ -0,0 +1,206 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/volap_scanner.h"
+
+#include <memory>
+
+#include "vec/columns/column_complex.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/assert_cast.h"
+#include "vec/core/block.h"
+#include "vec/exec/volap_scan_node.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/runtime/vdatetime_value.h"
+
+namespace doris::vectorized {
+
+VOlapScanner::VOlapScanner(RuntimeState* runtime_state, VOlapScanNode* parent, bool aggregation,
+                           bool need_agg_finalize, const TPaloScanRange& scan_range)
+        : OlapScanner(runtime_state, parent, aggregation, need_agg_finalize, scan_range) {
+}
+
+Status VOlapScanner::get_block(RuntimeState* state, vectorized::Block* block, bool* eof) {
+    // only empty block should be here
+    DCHECK(block->rows() == 0);
+
+    int64_t raw_rows_threshold = raw_rows_read() + config::doris_scanner_row_num;
+    if (!block->mem_reuse()) {
+        for (const auto slot_desc : _tuple_desc->slots()) {
+            block->insert(ColumnWithTypeAndName(slot_desc->get_empty_mutable_column(),
+                                                    slot_desc->get_data_type_ptr(),
+                                                    slot_desc->col_name()));
+        }
+    }
+
+    do {
+        // Read one block from block reader
+        auto res = _tablet_reader->next_block_with_aggregation(block, nullptr, nullptr, eof);
+        if (res != OLAP_SUCCESS) {
+            std::stringstream ss;
+            ss << "Internal Error: read storage fail. res=" << res
+               << ", tablet=" << _tablet->full_name()
+               << ", backend=" << BackendOptions::get_localhost();
+            return Status::InternalError(ss.str());
+        }
+        _num_rows_read += block->rows();
+        _update_realtime_counter();
+
+        RETURN_IF_ERROR(
+                VExprContext::filter_block(_vconjunct_ctx, block, _tuple_desc->slots().size()));
+    } while (block->rows() == 0 && !(*eof) && raw_rows_read() < raw_rows_threshold);
+
+    return Status::OK();
+}
+
+void VOlapScanner::_convert_row_to_block(std::vector<vectorized::MutableColumnPtr>* columns) {
+    size_t slots_size = _query_slots.size();
+    for (int i = 0; i < slots_size; ++i) {
+        SlotDescriptor* slot_desc = _query_slots[i];
+        auto cid = _return_columns[i];
+
+        auto* column_ptr = (*columns)[i].get();
+        if (slot_desc->is_nullable()) {
+            auto* nullable_column = reinterpret_cast<ColumnNullable*>((*columns)[i].get());
+            if (_read_row_cursor.is_null(cid)) {
+                nullable_column->insert_data(nullptr, 0);
+                continue;
+            } else {
+                nullable_column->get_null_map_data().push_back(0);
+                column_ptr = &nullable_column->get_nested_column();
+            }
+        }
+
+        char* ptr = (char*)_read_row_cursor.cell_ptr(cid);
+        switch (slot_desc->type().type) {
+        case TYPE_BOOLEAN: {
+            assert_cast<ColumnVector<UInt8>*>(column_ptr)->insert_data(ptr, 0);
+            break;
+        }
+        case TYPE_TINYINT: {
+            assert_cast<ColumnVector<Int8>*>(column_ptr)->insert_data(ptr, 0);
+            break;
+        }
+        case TYPE_SMALLINT: {
+            assert_cast<ColumnVector<Int16>*>(column_ptr)->insert_data(ptr, 0);
+            break;
+        }
+        case TYPE_INT: {
+            assert_cast<ColumnVector<Int32>*>(column_ptr)->insert_data(ptr, 0);
+            break;
+        }
+        case TYPE_BIGINT: {
+            assert_cast<ColumnVector<Int64>*>(column_ptr)->insert_data(ptr, 0);
+            break;
+        }
+        case TYPE_LARGEINT: {
+            assert_cast<ColumnVector<Int128>*>(column_ptr)->insert_data(ptr, 0);
+            break;
+        }
+        case TYPE_FLOAT: {
+            assert_cast<ColumnVector<Float32>*>(column_ptr)->insert_data(ptr, 0);
+            break;
+        }
+        case TYPE_DOUBLE: {
+            assert_cast<ColumnVector<Float64>*>(column_ptr)->insert_data(ptr, 0);
+            break;
+        }
+        case TYPE_CHAR: {
+            Slice* slice = reinterpret_cast<Slice*>(ptr);
+            assert_cast<ColumnString*>(column_ptr)
+                    ->insert_data(slice->data, strnlen(slice->data, slice->size));
+            break;
+        }
+        case TYPE_VARCHAR:
+        case TYPE_STRING: {
+            Slice* slice = reinterpret_cast<Slice*>(ptr);
+            assert_cast<ColumnString*>(column_ptr)->insert_data(slice->data, slice->size);
+            break;
+        }
+        case TYPE_OBJECT: {
+            Slice* slice = reinterpret_cast<Slice*>(ptr);
+            // insert_default()
+            auto* target_column = assert_cast<ColumnBitmap*>(column_ptr);
+
+            target_column->insert_default();
+            BitmapValue* pvalue = nullptr;
+            int pos = target_column->size() - 1;
+            pvalue = &target_column->get_element(pos);
+
+            if (slice->size != 0) {
+                BitmapValue value;
+                value.deserialize(slice->data);
+                *pvalue = std::move(value);
+            } else {
+                *pvalue = std::move(*reinterpret_cast<BitmapValue*>(slice->data));
+            }
+            break;
+        }
+        case TYPE_HLL: {
+            Slice* slice = reinterpret_cast<Slice*>(ptr);
+            if (slice->size != 0) {
+                assert_cast<ColumnString*>(column_ptr)->insert_data(slice->data, slice->size);
+                // TODO: in vector exec engine, it is diffcult to set hll size = 0
+                // so we have to serialize here. which will cause two problem
+                //      1. some unnecessary mem malloc and delay mem release
+                //      2. some unnecessary CPU cost in serialize
+            } else {
+                auto* dst_hll = reinterpret_cast<HyperLogLog*>(slice->data);
+                std::string result(dst_hll->max_serialized_size(), '0');
+                int size = dst_hll->serialize((uint8_t*)result.c_str());
+                result.resize(size);
+                assert_cast<ColumnString*>(column_ptr)->insert_data(result.c_str(), size);
+            }
+            break;
+        }
+        case TYPE_DECIMALV2: {
+            int64_t int_value = *(int64_t*)(ptr);
+            int32_t frac_value = *(int32_t*)(ptr + sizeof(int64_t));
+            DecimalV2Value data(int_value, frac_value);
+            assert_cast<ColumnDecimal<Decimal128>*>(column_ptr)
+                    ->insert_data(reinterpret_cast<char*>(&data), 0);
+            break;
+        }
+        case TYPE_DATETIME: {
+            uint64_t value = *reinterpret_cast<uint64_t*>(ptr);
+            VecDateTimeValue data(value);
+            assert_cast<ColumnVector<Int64>*>(column_ptr)
+                    ->insert_data(reinterpret_cast<char*>(&data), 0);
+            break;
+        }
+        case TYPE_DATE: {
+            uint64_t value = 0;
+            value = *(unsigned char*)(ptr + 2);
+            value <<= 8;
+            value |= *(unsigned char*)(ptr + 1);
+            value <<= 8;
+            value |= *(unsigned char*)(ptr);
+            VecDateTimeValue date;
+            date.from_olap_date(value);
+            assert_cast<ColumnVector<Int64>*>(column_ptr)
+                    ->insert_data(reinterpret_cast<char*>(&date), 0);
+            break;
+        }
+        default: {
+            break;
+        }
+        }
+    }
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/volap_scanner.h b/be/src/vec/exec/volap_scanner.h
new file mode 100644
index 0000000000..5efaf9dcd7
--- /dev/null
+++ b/be/src/vec/exec/volap_scanner.h
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/olap_scanner.h"
+
+#include "vec/olap/block_reader.h"
+
+namespace doris {
+class OlapScanNode;
+class RuntimeProfile;
+class Field;
+class RowBatch;
+
+namespace vectorized {
+class VOlapScanNode;
+
+class VOlapScanner : public OlapScanner {
+public:
+    VOlapScanner(RuntimeState* runtime_state, VOlapScanNode* parent, bool aggregation,
+                 bool need_agg_finalize, const TPaloScanRange& scan_range);
+
+    Status get_block(RuntimeState* state, vectorized::Block* block, bool* eof);
+    Status get_batch(RuntimeState* state, RowBatch* row_batch, bool* eos) {
+        return Status::NotSupported("Not Implemented VOlapScanNode Node::get_next scalar");
+    }
+
+    VExprContext** vconjunct_ctx_ptr() { return &_vconjunct_ctx; }
+
+protected:
+    virtual void set_tablet_reader() { _tablet_reader = std::make_unique<BlockReader>(); }
+
+private:
+    // TODO: Remove this function after we finish reader vec
+    void _convert_row_to_block(std::vector<vectorized::MutableColumnPtr>* columns);
+    VExprContext* _vconjunct_ctx = nullptr;
+};
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vrepeat_node.cpp b/be/src/vec/exec/vrepeat_node.cpp
new file mode 100644
index 0000000000..287aa6e7bb
--- /dev/null
+++ b/be/src/vec/exec/vrepeat_node.cpp
@@ -0,0 +1,244 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vrepeat_node.h"
+#include "exprs/expr.h"
+#include "gutil/strings/join.h"
+#include "runtime/runtime_state.h"
+#include "util/runtime_profile.h"
+
+namespace doris::vectorized {
+VRepeatNode::VRepeatNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : RepeatNode(pool, tnode, descs), _child_block(nullptr), _virtual_tuple_id(tnode.repeat_node.output_tuple_id) {}
+
+Status VRepeatNode::prepare(RuntimeState* state) {
+    VLOG_CRITICAL << "VRepeatNode::prepare";
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(RepeatNode::prepare(state));
+
+    // get current all output slots
+    for (const auto& tuple_desc : this->row_desc().tuple_descriptors()) {
+        for (const auto& slot_desc : tuple_desc->slots()) {
+            _output_slots.push_back(slot_desc);
+        }
+    }
+
+    // get all input slots
+    for (const auto& child_tuple_desc : child(0)->row_desc().tuple_descriptors()) {
+        for (const auto& child_slot_desc : child_tuple_desc->slots()) {
+            _child_slots.push_back(child_slot_desc);
+        }
+    }
+
+    _virtual_tuple_desc = state->desc_tbl().get_tuple_descriptor(_virtual_tuple_id);
+    if (_virtual_tuple_desc == NULL) {
+        return Status::InternalError("Failed to get virtual tuple descriptor.");
+    }
+
+    std::stringstream ss;
+    ss << "The output slots size " << _output_slots.size() 
+        << " is not equal to the sum of child_slots_size " << _child_slots.size()
+        << ",virtual_slots_size " << _virtual_tuple_desc->slots().size();
+    if (_output_slots.size() != (_child_slots.size() + _virtual_tuple_desc->slots().size())) {
+        return Status::InternalError(ss.str());
+    }
+
+    _child_block.reset(new Block());
+
+    return Status::OK();
+}
+
+Status VRepeatNode::open(RuntimeState* state) {
+    VLOG_CRITICAL << "VRepeatNode::open";
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(RepeatNode::open(state));
+    return Status::OK();
+}
+
+Status VRepeatNode::get_repeated_block(Block* child_block, int repeat_id_idx, Block* output_block) {
+    VLOG_CRITICAL << "VRepeatNode::get_repeated_block";
+    DCHECK(child_block != nullptr);
+    DCHECK_EQ(output_block->rows(), 0);
+
+    size_t child_column_size = child_block->columns();
+    size_t column_size = _output_slots.size();
+    bool mem_reuse = output_block->mem_reuse();
+    DCHECK_EQ(child_column_size, _child_slots.size());
+    DCHECK_LT(child_column_size, column_size);
+    std::vector<vectorized::MutableColumnPtr> columns(column_size);
+    for (size_t i = 0; i < column_size; i++) {
+        if (mem_reuse) {
+            columns[i] = std::move(*output_block->get_by_position(i).column).mutate();
+        } else {
+            columns[i] = _output_slots[i]->get_empty_mutable_column();
+        }
+    }
+
+    /* Fill all slots according to child, for example:select tc1,tc2,sum(tc3) from t1 group by grouping sets((tc1),(tc2));
+     * insert into t1 values(1,2,1),(1,3,1),(2,1,1),(3,1,1);
+     * slot_id_set_list=[[0],[1]],repeat_id_idx=0,
+     * child_block 1,2,1 | 1,3,1 | 2,1,1 | 3,1,1
+     * output_block 1,null,1,1 | 1,null,1,1 | 2,nul,1,1 | 3,null,1,1
+     */
+    size_t cur_col = 0;
+    for (size_t i = 0; i < child_column_size; i++) {
+        const ColumnWithTypeAndName& src_column = child_block->get_by_position(i);
+
+        DCHECK_EQ(_child_slots[i]->type().type, _output_slots[cur_col]->type().type);
+        DCHECK_EQ(_child_slots[i]->col_name(), _output_slots[cur_col]->col_name());
+
+        std::set<SlotId>& repeat_ids = _slot_id_set_list[repeat_id_idx];
+        bool is_repeat_slot = _all_slot_ids.find(_output_slots[cur_col]->id()) != _all_slot_ids.end();
+        bool is_set_null_slot = repeat_ids.find(_output_slots[cur_col]->id()) == repeat_ids.end();
+        const auto column_size = src_column.column->size();
+
+        if (is_repeat_slot) {
+            DCHECK(_output_slots[cur_col]->is_nullable());
+            auto* nullable_column = reinterpret_cast<ColumnNullable *>(columns[cur_col].get());
+            auto& null_map = nullable_column->get_null_map_data();
+            auto* column_ptr = columns[cur_col].get();
+
+            // set slot null not in repeat_ids
+            if (is_set_null_slot) {
+                nullable_column->resize(column_size);
+                memset(nullable_column->get_null_map_data().data(), 1, sizeof(UInt8) * column_size);
+            } else {
+                if (!src_column.type->is_nullable()) {
+                    for (size_t j = 0; j < column_size; ++j) {
+                        null_map.push_back(0);
+                    }
+                    column_ptr = &nullable_column->get_nested_column();
+                }
+                column_ptr->insert_range_from(*src_column.column, 0, column_size);
+            }
+        } else {
+            columns[cur_col]->insert_range_from(*src_column.column, 0, column_size);
+        }
+        cur_col++;
+    }
+
+    // Fill grouping ID to tuple
+    for (auto slot_idx = 0; slot_idx < _grouping_list.size(); slot_idx++) {
+        DCHECK_LT(slot_idx, _virtual_tuple_desc->slots().size());
+        const SlotDescriptor* _virtual_slot_desc = _virtual_tuple_desc->slots()[slot_idx];
+        DCHECK_EQ(_virtual_slot_desc->type().type, _output_slots[cur_col]->type().type);
+        DCHECK_EQ(_virtual_slot_desc->col_name(), _output_slots[cur_col]->col_name());
+        int64_t val = _grouping_list[slot_idx][repeat_id_idx];
+        auto* column_ptr = columns[cur_col].get();
+        if (_output_slots[cur_col]->is_nullable()) {
+            auto* nullable_column = reinterpret_cast<ColumnNullable *>(columns[cur_col].get());
+            auto& null_map = nullable_column->get_null_map_data();
+            column_ptr = &nullable_column->get_nested_column();
+
+            for (size_t i = 0; i < child_block->rows(); ++i) {
+                null_map.push_back(0);
+            }
+        }
+
+        auto* col = assert_cast<ColumnVector<Int64> *>(column_ptr);
+        for (size_t i = 0; i < child_block->rows(); ++i) {
+            col->insert_value(val);
+        }
+        cur_col ++;
+    }
+
+    DCHECK_EQ(cur_col, column_size);
+
+    if (!columns.empty() && !columns[0]->empty()) {
+        auto n_columns = 0;
+        if (!mem_reuse) {
+            for (const auto slot_desc : _output_slots) {
+                output_block->insert(
+                    ColumnWithTypeAndName(std::move(columns[n_columns++]), slot_desc->get_data_type_ptr(), slot_desc->col_name()));
+            }
+        } else {
+            columns.clear();
+        }
+    }
+    return Status::OK();
+}
+
+Status VRepeatNode::get_next(RuntimeState* state, Block* block, bool* eos) {
+    VLOG_CRITICAL << "VRepeatNode::get_next";
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+
+    if (state == NULL || block == NULL || eos == NULL)
+        return Status::InternalError("input is NULL pointer");
+
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
+    RETURN_IF_CANCELLED(state);
+    DCHECK(_repeat_id_idx >= 0);
+    for (const std::vector<int64_t>& v : _grouping_list) {
+        DCHECK(_repeat_id_idx <= (int)v.size());
+    }
+    DCHECK(block->rows() == 0);
+
+    // current child block has finished its repeat, get child's next block
+    if (_child_block->rows() == 0) {
+        if (_child_eos) {
+            *eos = true;
+            return Status::OK();
+        }
+
+        RETURN_IF_ERROR(child(0)->get_next(state, _child_block.get(), &_child_eos));
+
+        if (_child_block->rows() == 0) {
+            *eos = true;
+            return Status::OK();
+        }
+    }
+
+    RETURN_IF_ERROR(get_repeated_block(_child_block.get(), _repeat_id_idx, block));
+
+    _repeat_id_idx++;
+
+    int size = _repeat_id_list.size();
+    if (_repeat_id_idx >= size) {
+        release_block_memory(*_child_block.get());
+        _repeat_id_idx = 0;
+    }
+
+    _num_rows_returned += block->rows();
+    COUNTER_SET(_rows_returned_counter, _num_rows_returned);
+    VLOG_ROW << "VRepeatNode output rows: " << block->rows();
+    return Status::OK();
+}
+
+Status VRepeatNode::close(RuntimeState* state) {
+    VLOG_CRITICAL << "VRepeatNode::close";
+    if (is_closed()) {
+        return Status::OK();
+    }
+    release_block_memory(*_child_block.get());
+    RETURN_IF_ERROR(child(0)->close(state));
+    return ExecNode::close(state);
+}
+
+void VRepeatNode::debug_string(int indentation_level, std::stringstream* out) const {
+    *out << string(indentation_level * 2, ' ');
+    *out << "VRepeatNode(";
+    *out << "repeat pattern: [" << JoinElements(_repeat_id_list, ",") << "]\n";
+    *out << "add " << _grouping_list.size() << " columns. \n";
+    *out << "added column values: ";
+    for (const std::vector<int64_t>& v : _grouping_list) {
+        *out << "[" << JoinElements(v, ",") << "] ";
+    }
+    *out << "\n";
+    ExecNode::debug_string(indentation_level, out);
+    *out << ")";
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/vrepeat_node.h b/be/src/vec/exec/vrepeat_node.h
new file mode 100644
index 0000000000..cccf7ad74f
--- /dev/null
+++ b/be/src/vec/exec/vrepeat_node.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/repeat_node.h"
+
+namespace doris {
+
+class ObjectPool;
+class TPlanNode;
+class DescriptorTbl;
+class RuntimeState;
+class Status;
+
+namespace vectorized {
+class VRepeatNode : public RepeatNode {
+public:
+    VRepeatNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    ~VRepeatNode() override = default;
+
+    virtual Status prepare(RuntimeState* state) override;
+    virtual Status open(RuntimeState* state) override;
+    virtual Status get_next(RuntimeState* state, Block* block, bool* eos) override;
+    virtual Status close(RuntimeState* state) override;
+
+protected:
+    virtual void debug_string(int indentation_level, std::stringstream* out) const override;
+
+private:
+    Status get_repeated_block(Block* child_block, int repeat_id_idx, Block* output_block);
+
+    std::unique_ptr<Block> _child_block;
+    std::vector<SlotDescriptor*> _child_slots;
+    std::vector<SlotDescriptor*> _output_slots;
+
+    // _virtual_tuple_id id used for GROUPING_ID().
+    TupleId _virtual_tuple_id;
+    const TupleDescriptor* _virtual_tuple_desc;
+};
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vschema_scan_node.cpp b/be/src/vec/exec/vschema_scan_node.cpp
new file mode 100644
index 0000000000..47db6cd233
--- /dev/null
+++ b/be/src/vec/exec/vschema_scan_node.cpp
@@ -0,0 +1,263 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vschema_scan_node.h"
+
+#include "exec/text_converter.h"
+#include "exec/text_converter.hpp"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/row_batch.h"
+#include "runtime/runtime_state.h"
+#include "runtime/string_value.h"
+#include "runtime/tuple_row.h"
+#include "util/runtime_profile.h"
+#include "util/types.h"
+namespace doris::vectorized {
+
+VSchemaScanNode::VSchemaScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : SchemaScanNode(pool, tnode, descs), _src_single_tuple(nullptr), _dest_single_tuple(nullptr) {}
+
+VSchemaScanNode::~VSchemaScanNode() {
+    delete[] reinterpret_cast<char*>(_src_single_tuple);
+    _src_single_tuple = NULL;
+
+    delete[] reinterpret_cast<char*>(_dest_single_tuple);
+    _dest_single_tuple = NULL;
+}
+
+Status VSchemaScanNode::prepare(RuntimeState* state) {
+    RETURN_IF_ERROR(SchemaScanNode::prepare(state));
+
+    _src_single_tuple = reinterpret_cast<doris::Tuple*>(new (std::nothrow) char[_src_tuple_desc->byte_size()]);
+    if (NULL == _src_single_tuple) {
+        return Status::InternalError("new src single tuple failed.");
+    }
+
+    _dest_single_tuple = reinterpret_cast<doris::Tuple*>(new (std::nothrow) char[_dest_tuple_desc->byte_size()]);
+    if (NULL == _dest_single_tuple) {
+        return Status::InternalError("new desc single tuple failed.");
+    }
+
+    return Status::OK();
+}
+
+Status VSchemaScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool* eos) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+
+    VLOG_CRITICAL << "VSchemaScanNode::GetNext";
+    if (state == NULL || block == NULL || eos == NULL)
+        return Status::InternalError("input is NULL pointer");
+    if (!_is_init) return Status::InternalError("used before initialize.");
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
+    RETURN_IF_CANCELLED(state);
+    bool mem_reuse = block->mem_reuse();
+    DCHECK(block->rows() == 0);
+    std::vector<vectorized::MutableColumnPtr> columns(_slot_num);
+    bool schema_eos = false;
+
+    do {
+        for (int i = 0; i < _slot_num; ++i) {
+            if (mem_reuse) {
+                columns[i] = std::move(*block->get_by_position(i).column).mutate();
+            } else {
+                columns[i] = _dest_tuple_desc->slots()[i]->get_empty_mutable_column();
+            }
+        }
+        while (true) {
+            RETURN_IF_CANCELLED(state);
+
+            // get all slots from schema table.
+            RETURN_IF_ERROR(_schema_scanner->get_next_row(_src_single_tuple, _tuple_pool.get(), &schema_eos));
+
+            if (schema_eos) {
+                *eos = true;
+                break;
+            }
+            // tuple project
+            project_tuple();
+
+            for (int i = 0; i < _slot_num; ++i) {
+                auto slot_desc = _dest_tuple_desc->slots()[i];
+                if (!slot_desc->is_materialized()) {
+                    continue;
+                }
+
+                if (_dest_single_tuple->is_null(slot_desc->null_indicator_offset())) {
+                    if (slot_desc->is_nullable()) {
+                        auto* nullable_column =
+                                reinterpret_cast<vectorized::ColumnNullable*>(columns[i].get());
+                        nullable_column->insert_data(nullptr, 0);
+                    } else {
+                        std::stringstream ss;
+                        ss << "nonnull column contains NULL. table=" << _table_name
+                           << ", column=" << slot_desc->col_name();
+                        return Status::InternalError(ss.str());
+                    }
+                } else {
+                    RETURN_IF_ERROR(
+                            write_slot_to_vectorized_column(_dest_single_tuple->get_slot(slot_desc->tuple_offset()), slot_desc, &columns[i]));
+                }
+            }
+            if (columns[0]->size() == state->batch_size()) {
+                break;
+            }
+        }
+        if (!columns.empty() && !columns[0]->empty()) {
+            auto n_columns = 0;
+            if (!mem_reuse) {
+                for (const auto slot_desc : _dest_tuple_desc->slots()) {
+                    block->insert(ColumnWithTypeAndName(std::move(columns[n_columns++]),
+                                                        slot_desc->get_data_type_ptr(),
+                                                        slot_desc->col_name()));
+                }
+            } else {
+                columns.clear();
+            }
+            RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, _dest_tuple_desc->slots().size()));
+            VLOG_ROW << "VSchemaScanNode output rows: " << block->rows();
+        }
+    } while (block->rows() == 0 && !(*eos));
+
+    reached_limit(block, eos);
+    return Status::OK();
+}
+
+Status VSchemaScanNode::write_slot_to_vectorized_column(void* slot, 
+                                                        SlotDescriptor* slot_desc,
+                                                        vectorized::MutableColumnPtr* column_ptr) {
+    vectorized::MutableColumnPtr* col_ptr = column_ptr;
+    if (slot_desc->is_nullable()) {
+        auto* nullable_column = reinterpret_cast<vectorized::ColumnNullable*>(column_ptr->get());
+        nullable_column->get_null_map_data().push_back(0);
+        col_ptr = reinterpret_cast<vectorized::MutableColumnPtr*>(&nullable_column->get_nested_column());
+    }
+    switch (slot_desc->type().type) {
+        case TYPE_HLL:
+        case TYPE_VARCHAR:
+        case TYPE_CHAR:
+        case TYPE_STRING: {
+            StringValue* str_slot = reinterpret_cast<StringValue*>(slot);
+            reinterpret_cast<vectorized::ColumnString*>(col_ptr)->insert_data(str_slot->ptr, str_slot->len);
+            break;
+        }
+
+        case TYPE_BOOLEAN: {
+            uint8_t num = *reinterpret_cast<bool*>(slot);
+            reinterpret_cast<vectorized::ColumnVector<vectorized::UInt8>*>(col_ptr)->insert_value(num);
+            break;
+        }
+
+        case TYPE_TINYINT: {
+            int8_t num = *reinterpret_cast<int8_t*>(slot);
+            reinterpret_cast<vectorized::ColumnVector<vectorized::Int8>*>(col_ptr)->insert_value(num);
+            break;
+        }
+
+        case TYPE_SMALLINT: {
+            int16_t num = *reinterpret_cast<int16_t*>(slot);
+            reinterpret_cast<vectorized::ColumnVector<vectorized::Int16>*>(col_ptr)->insert_value(num);
+            break;
+        }
+
+        case TYPE_INT: {
+            int32_t num = *reinterpret_cast<int32_t*>(slot);
+            reinterpret_cast<vectorized::ColumnVector<vectorized::Int32>*>(col_ptr)->insert_value(num);
+            break;
+        }
+
+        case TYPE_BIGINT: {
+            int64_t num = *reinterpret_cast<int64_t*>(slot);
+            reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_value(num);
+            break;
+        }
+
+        case TYPE_LARGEINT: {
+            __int128 num;
+            memcpy(&num, slot, sizeof(__int128));
+            reinterpret_cast<vectorized::ColumnVector<vectorized::Int128>*>(col_ptr)->insert_value(num);
+            break;
+        }
+
+        case TYPE_FLOAT: {
+            float num = *reinterpret_cast<float*>(slot);
+            reinterpret_cast<vectorized::ColumnVector<vectorized::Float32>*>(col_ptr)->insert_value(num);
+            break;
+        }
+
+        case TYPE_DOUBLE: {
+            double num = *reinterpret_cast<double*>(slot);
+            reinterpret_cast<vectorized::ColumnVector<vectorized::Float64>*>(col_ptr)->insert_value(num);
+            break;
+        }
+
+        case TYPE_DATE: {
+            VecDateTimeValue value;
+            DateTimeValue* ts_slot = reinterpret_cast<DateTimeValue*>(slot);
+            value.convert_dt_to_vec_dt(ts_slot);
+            reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_data(
+                reinterpret_cast<char*>(&value), 0);
+            break;
+        }
+
+        case TYPE_DATETIME: {
+            VecDateTimeValue value;
+            DateTimeValue* ts_slot = reinterpret_cast<DateTimeValue*>(slot);
+            value.convert_dt_to_vec_dt(ts_slot);
+            reinterpret_cast<vectorized::ColumnVector<vectorized::Int64>*>(col_ptr)->insert_data(
+                reinterpret_cast<char*>(&value), 0);
+            break;
+        }
+
+        case TYPE_DECIMALV2: {
+            __int128 num = (reinterpret_cast<PackedInt128*>(slot))->value;
+            reinterpret_cast<vectorized::ColumnVector<doris::PackedInt128>*>(col_ptr)->insert_value(num);
+            break;
+        }
+
+        default: {
+            DCHECK(false) << "bad slot type: " << slot_desc->type();
+            std::stringstream ss;
+            ss << "Fail to convert schema type:'" << slot_desc->type() << " on column:`"
+            << slot_desc->col_name() + "`";
+            return Status::InternalError(ss.str());
+        }
+    }
+
+    return Status::OK();
+}
+
+void VSchemaScanNode::project_tuple() {
+    memset(_dest_single_tuple, 0, _dest_tuple_desc->num_null_bytes());
+
+    for (int i = 0; i < _slot_num; ++i) {
+        if (!_dest_tuple_desc->slots()[i]->is_materialized()) {
+            continue;
+        }
+        int j = _index_map[i];
+
+        if (_src_single_tuple->is_null(_src_tuple_desc->slots()[j]->null_indicator_offset())) {
+            _dest_single_tuple->set_null(_dest_tuple_desc->slots()[i]->null_indicator_offset());
+        } else {
+            void* dest_slot = _dest_single_tuple->get_slot(_dest_tuple_desc->slots()[i]->tuple_offset());
+            void* src_slot = _src_single_tuple->get_slot(_src_tuple_desc->slots()[j]->tuple_offset());
+            int slot_size = _src_tuple_desc->slots()[j]->type().get_slot_size();
+            memcpy(dest_slot, src_slot, slot_size);
+        }
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exec/vschema_scan_node.h b/be/src/vec/exec/vschema_scan_node.h
new file mode 100644
index 0000000000..51b523256c
--- /dev/null
+++ b/be/src/vec/exec/vschema_scan_node.h
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+
+#include "exec/scan_node.h"
+#include "exec/schema_scan_node.h"
+#include "runtime/descriptors.h"
+namespace doris {
+
+class TextConverter;
+class TupleDescriptor;
+class RuntimeState;
+class Status;
+
+namespace vectorized {
+
+class VSchemaScanNode : public SchemaScanNode {
+public:
+    VSchemaScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    ~VSchemaScanNode();
+    Status prepare(RuntimeState* state) override;
+
+    virtual Status get_next(RuntimeState* state, vectorized::Block* block, bool* eos);
+
+private:
+    Status write_slot_to_vectorized_column(void* slot, SlotDescriptor* slot_desc,
+                                           vectorized::MutableColumnPtr* col_ptr);
+    void project_tuple();
+    doris::Tuple* _src_single_tuple;
+    doris::Tuple* _dest_single_tuple;
+};
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/exec/vselect_node.cpp b/be/src/vec/exec/vselect_node.cpp
new file mode 100644
index 0000000000..7482024d63
--- /dev/null
+++ b/be/src/vec/exec/vselect_node.cpp
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vselect_node.h"
+
+namespace doris {
+namespace vectorized {
+
+VSelectNode::VSelectNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : ExecNode(pool, tnode, descs), _child_eos(false) {}
+
+Status VSelectNode::init(const TPlanNode& tnode, RuntimeState* state) {
+    return ExecNode::init(tnode, state);
+}
+
+Status VSelectNode::prepare(RuntimeState* state) {
+    return ExecNode::prepare(state);
+}
+
+Status VSelectNode::open(RuntimeState* state) {
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN));
+    RETURN_IF_ERROR(ExecNode::open(state));
+    RETURN_IF_ERROR(child(0)->open(state));
+    return Status::OK();
+}
+
+Status VSelectNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
+    return Status::NotSupported("Not Implemented VSelectNode::get_next.");
+}
+
+Status VSelectNode::get_next(RuntimeState* state, vectorized::Block* block, bool* eos) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
+    RETURN_IF_CANCELLED(state);
+    do {
+        RETURN_IF_CANCELLED(state);
+        RETURN_IF_ERROR(_children[0]->get_next(state, block, &_child_eos));
+        if (_child_eos) {
+            *eos = true;
+            break;
+        }
+    } while (block->rows() == 0);
+
+    RETURN_IF_ERROR(VExprContext::filter_block(_vconjunct_ctx_ptr, block, block->columns()));
+    reached_limit(block, eos);
+
+    return Status::OK();
+}
+
+Status VSelectNode::close(RuntimeState* state) {
+    if (is_closed()) {
+        return Status::OK();
+    }
+    return ExecNode::close(state);
+}
+
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/exec/vselect_node.h b/be/src/vec/exec/vselect_node.h
new file mode 100644
index 0000000000..9cabb82189
--- /dev/null
+++ b/be/src/vec/exec/vselect_node.h
@@ -0,0 +1,39 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "exec/exec_node.h"
+
+namespace doris {
+namespace vectorized {
+
+class VSelectNode : public ExecNode {
+public:
+    VSelectNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr);
+    virtual Status prepare(RuntimeState* state);
+    virtual Status open(RuntimeState* state);
+    virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos);
+    virtual Status get_next(RuntimeState* state, vectorized::Block* block, bool* eos);
+    virtual Status close(RuntimeState* state);
+
+private:
+    // true if last get_next() call on child signalled eos
+    bool _child_eos;
+};
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp
new file mode 100644
index 0000000000..3e6f73dae7
--- /dev/null
+++ b/be/src/vec/exec/vset_operation_node.cpp
@@ -0,0 +1,379 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vset_operation_node.h"
+
+#include "util/defer_op.h"
+#include "vec/exprs/vexpr.h"
+namespace doris {
+namespace vectorized {
+
+//build hash table for operation node, intersect/except node
+template <class HashTableContext>
+struct HashTableBuild {
+    HashTableBuild(int rows, Block& acquired_block, ColumnRawPtrs& build_raw_ptrs,
+                   VSetOperationNode* operation_node)
+            : _rows(rows),
+              _acquired_block(acquired_block),
+              _build_raw_ptrs(build_raw_ptrs),
+              _operation_node(operation_node) {}
+
+    Status operator()(HashTableContext& hash_table_ctx) {
+        using KeyGetter = typename HashTableContext::State;
+        using Mapped = typename HashTableContext::Mapped;
+        int64_t old_bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes();
+        
+        Defer defer {[&]() {
+            int64_t bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes();
+            _operation_node->_mem_tracker->Consume(bucket_bytes - old_bucket_bytes);
+            _operation_node->_mem_used += bucket_bytes - old_bucket_bytes;
+        }};
+
+        KeyGetter key_getter(_build_raw_ptrs, _operation_node->_build_key_sz, nullptr);
+
+        for (size_t k = 0; k < _rows; ++k) {
+            auto emplace_result =
+                    key_getter.emplace_key(hash_table_ctx.hash_table, k, _operation_node->_arena);
+
+            if (k + 1 < _rows) {
+                key_getter.prefetch(hash_table_ctx.hash_table, k + 1, _operation_node->_arena);
+            }
+
+            if (emplace_result.is_inserted()) { //only inserted once as the same key, others skip
+                new (&emplace_result.get_mapped()) Mapped({&_acquired_block, k});
+                _operation_node->_valid_element_in_hash_tbl++;
+            }
+        }
+        return Status::OK();
+    }
+
+private:
+    const int _rows;
+    Block& _acquired_block;
+    ColumnRawPtrs& _build_raw_ptrs;
+    VSetOperationNode* _operation_node;
+};
+
+VSetOperationNode::VSetOperationNode(ObjectPool* pool, const TPlanNode& tnode,
+                                     const DescriptorTbl& descs)
+        : ExecNode(pool, tnode, descs),
+          _valid_element_in_hash_tbl(0),
+          _mem_used(0),
+          _probe_index(-1),
+          _probe_rows(0) {}
+
+Status VSetOperationNode::close(RuntimeState* state) {
+    if (is_closed()) {
+        return Status::OK();
+    }
+    for (auto& exprs : _child_expr_lists) {
+        VExpr::close(exprs, state);
+    }
+    _mem_tracker->Release(_mem_used);
+    return ExecNode::close(state);
+}
+
+Status VSetOperationNode::init(const TPlanNode& tnode, RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::init(tnode, state));
+    DCHECK_EQ(_conjunct_ctxs.size(), 0);
+    std::vector<std::vector<::doris::TExpr>> result_texpr_lists;
+
+    // Create result_expr_ctx_lists_ from thrift exprs.
+    if (tnode.node_type == TPlanNodeType::type::INTERSECT_NODE) {
+        result_texpr_lists = tnode.intersect_node.result_expr_lists;
+    } else if (tnode.node_type == TPlanNodeType::type::EXCEPT_NODE) {
+        result_texpr_lists = tnode.except_node.result_expr_lists;
+    } else {
+        return Status::NotSupported("Not Implemented, Check The Operation Node.");
+    }
+
+    for (auto& texprs : result_texpr_lists) {
+        std::vector<VExprContext*> ctxs;
+        RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, texprs, &ctxs));
+        _child_expr_lists.push_back(ctxs);
+    }
+
+    return Status::OK();
+}
+
+Status VSetOperationNode::open(RuntimeState* state) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(ExecNode::open(state));
+    // open result expr lists.
+    for (const std::vector<VExprContext*>& exprs : _child_expr_lists) {
+        RETURN_IF_ERROR(VExpr::open(exprs, state));
+    }
+    RETURN_IF_ERROR(hash_table_build(state));
+    return Status::OK();
+}
+
+Status VSetOperationNode::prepare(RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::prepare(state));
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    _build_timer = ADD_TIMER(runtime_profile(), "BuildTime");
+    _probe_timer = ADD_TIMER(runtime_profile(), "ProbeTime");
+
+    // Prepare result expr lists.
+    for (int i = 0; i < _child_expr_lists.size(); ++i) {
+        RETURN_IF_ERROR(VExpr::prepare(_child_expr_lists[i], state, child(i)->row_desc(),
+                                       expr_mem_tracker()));
+    }
+
+    for (auto ctx : _child_expr_lists[0]) {
+        _build_not_ignore_null.push_back(ctx->root()->is_nullable());
+        _left_table_data_types.push_back(ctx->root()->data_type());
+    }
+    hash_table_init();
+    return Status::OK();
+}
+
+void VSetOperationNode::hash_table_init() {
+    if (_child_expr_lists[0].size() == 1 && (!_build_not_ignore_null[0])) {
+        // Single column optimization
+        switch (_child_expr_lists[0][0]->root()->result_type()) {
+        case TYPE_BOOLEAN:
+        case TYPE_TINYINT:
+            _hash_table_variants.emplace<I8HashTableContext>();
+            break;
+        case TYPE_SMALLINT:
+            _hash_table_variants.emplace<I16HashTableContext>();
+            break;
+        case TYPE_INT:
+        case TYPE_FLOAT:
+            _hash_table_variants.emplace<I32HashTableContext>();
+            break;
+        case TYPE_BIGINT:
+        case TYPE_DOUBLE:
+        case TYPE_DATETIME:
+        case TYPE_DATE:
+            _hash_table_variants.emplace<I64HashTableContext>();
+            break;
+        case TYPE_LARGEINT:
+        case TYPE_DECIMALV2:
+            _hash_table_variants.emplace<I128HashTableContext>();
+            break;
+        default:
+            _hash_table_variants.emplace<SerializedHashTableContext>();
+        }
+        return;
+    }
+
+    bool use_fixed_key = true;
+    bool has_null = false;
+    int key_byte_size = 0;
+
+    _probe_key_sz.resize(_child_expr_lists[1].size());
+    _build_key_sz.resize(_child_expr_lists[0].size());
+    for (int i = 0; i < _child_expr_lists[0].size(); ++i) {
+        const auto vexpr = _child_expr_lists[0][i]->root();
+        const auto& data_type = vexpr->data_type();
+
+        if (!data_type->have_maximum_size_of_value()) {
+            use_fixed_key = false;
+            break;
+        }
+
+        auto is_null = data_type->is_nullable();
+        has_null |= is_null;
+        _build_key_sz[i] = data_type->get_maximum_size_of_value_in_memory() - (is_null ? 1 : 0);
+        _probe_key_sz[i] = _build_key_sz[i];
+        key_byte_size += _probe_key_sz[i];
+    }
+
+    if (std::tuple_size<KeysNullMap<UInt256>>::value + key_byte_size > sizeof(UInt256)) {
+        use_fixed_key = false;
+    }
+    if (use_fixed_key) {
+        if (has_null) {
+            if (std::tuple_size<KeysNullMap<UInt64>>::value + key_byte_size <= sizeof(UInt64)) {
+                _hash_table_variants.emplace<I64FixedKeyHashTableContext<true>>();
+            } else if (std::tuple_size<KeysNullMap<UInt128>>::value + key_byte_size <=
+                       sizeof(UInt128)) {
+                _hash_table_variants.emplace<I128FixedKeyHashTableContext<true>>();
+            } else {
+                _hash_table_variants.emplace<I256FixedKeyHashTableContext<true>>();
+            }
+        } else {
+            if (key_byte_size <= sizeof(UInt64)) {
+                _hash_table_variants.emplace<I64FixedKeyHashTableContext<false>>();
+            } else if (key_byte_size <= sizeof(UInt128)) {
+                _hash_table_variants.emplace<I128FixedKeyHashTableContext<false>>();
+            } else {
+                _hash_table_variants.emplace<I256FixedKeyHashTableContext<false>>();
+            }
+        }
+    } else {
+        _hash_table_variants.emplace<SerializedHashTableContext>();
+    }
+}
+
+//build a hash table from child(0)
+Status VSetOperationNode::hash_table_build(RuntimeState* state) {
+    RETURN_IF_ERROR(child(0)->open(state));
+    Block block;
+    bool eos = false;
+    while (!eos) {
+        block.clear();
+        SCOPED_TIMER(_build_timer);
+        RETURN_IF_CANCELLED(state);
+        RETURN_IF_ERROR(child(0)->get_next(state, &block, &eos));
+
+        size_t allocated_bytes = block.allocated_bytes();
+        _mem_tracker->Consume(allocated_bytes);
+        _mem_used += allocated_bytes;
+
+        RETURN_IF_LIMIT_EXCEEDED(state, "Set Operation Node, while getting next from the child 0.");
+        RETURN_IF_ERROR(process_build_block(block));
+        RETURN_IF_LIMIT_EXCEEDED(state, "Set Operation Node, while constructing the hash table.");
+    }
+    return Status::OK();
+}
+
+Status VSetOperationNode::process_build_block(Block& block) {
+    size_t rows = block.rows();
+    if (rows == 0) {
+        return Status::OK();
+    }
+
+    auto& acquired_block = _acquire_list.acquire(std::move(block));
+    vectorized::materialize_block_inplace(acquired_block);
+    ColumnRawPtrs raw_ptrs(_child_expr_lists[0].size());
+    RETURN_IF_ERROR(extract_build_column(acquired_block, raw_ptrs));
+
+    std::visit(
+            [&](auto&& arg) {
+                using HashTableCtxType = std::decay_t<decltype(arg)>;
+                if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+                    HashTableBuild<HashTableCtxType> hash_table_build_process(rows, acquired_block,
+                                                                              raw_ptrs, this);
+                    hash_table_build_process(arg);
+                } else {
+                    LOG(FATAL) << "FATAL: uninited hash table";
+                }
+            },
+            _hash_table_variants);
+
+    return Status::OK();
+}
+
+Status VSetOperationNode::process_probe_block(RuntimeState* state, int child_id, bool* eos) {
+    if (!_probe_column_inserted_id.empty()) {
+        for (int j = 0; j < _probe_column_inserted_id.size(); ++j) {
+            auto column_to_erase = _probe_column_inserted_id[j];
+            _probe_block.erase(column_to_erase - j);
+        }
+        _probe_column_inserted_id.clear();
+    }
+    release_block_memory(_probe_block, child_id);
+    _probe_index = 0;
+    _probe_rows = 0;
+
+    RETURN_IF_CANCELLED(state);
+    RETURN_IF_ERROR(child(child_id)->get_next(state, &_probe_block, eos));
+    _probe_rows = _probe_block.rows();
+    RETURN_IF_ERROR(extract_probe_column(_probe_block, _probe_columns, child_id));
+    return Status::OK();
+}
+
+Status VSetOperationNode::extract_build_column(Block& block, ColumnRawPtrs& raw_ptrs) {
+    for (size_t i = 0; i < _child_expr_lists[0].size(); ++i) {
+        int result_col_id = -1;
+        RETURN_IF_ERROR(_child_expr_lists[0][i]->execute(&block, &result_col_id));
+
+        block.get_by_position(result_col_id).column =
+                 block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
+        auto column = block.get_by_position(result_col_id).column.get();
+
+        if (auto* nullable = check_and_get_column<ColumnNullable>(*column)) {
+            auto& col_nested = nullable->get_nested_column();
+            if (_build_not_ignore_null[i])
+                raw_ptrs[i] = nullable;
+            else
+                raw_ptrs[i] = &col_nested;
+
+        } else {
+            raw_ptrs[i] = column;
+        }
+        DCHECK_GE(result_col_id, 0);
+        _build_col_idx.insert({result_col_id, i});
+    }
+    return Status::OK();
+}
+
+Status VSetOperationNode::extract_probe_column(Block& block, ColumnRawPtrs& raw_ptrs,
+                                               int child_id) {
+    if (_probe_rows == 0) {
+        return Status::OK();
+    }
+
+    for (size_t i = 0; i < _child_expr_lists[child_id].size(); ++i) {
+        int result_col_id = -1;
+        RETURN_IF_ERROR(_child_expr_lists[child_id][i]->execute(&block, &result_col_id));
+
+        block.get_by_position(result_col_id).column =
+                 block.get_by_position(result_col_id).column->convert_to_full_column_if_const();
+        auto column = block.get_by_position(result_col_id).column.get();
+
+        if (auto* nullable = check_and_get_column<ColumnNullable>(*column)) {
+            auto& col_nested = nullable->get_nested_column();
+            if (_build_not_ignore_null[i]) { //same as build column
+                raw_ptrs[i] = nullable;
+            } else {
+                raw_ptrs[i] = &col_nested;
+            }
+
+        } else {
+            if (_build_not_ignore_null[i]) {
+                auto column_ptr = make_nullable(block.get_by_position(result_col_id).column, false);
+                _probe_column_inserted_id.emplace_back(block.columns());
+                block.insert(
+                        {column_ptr, make_nullable(block.get_by_position(result_col_id).type), ""});
+                column = column_ptr.get();
+            }
+
+            raw_ptrs[i] = column;
+        }
+    }
+    return Status::OK();
+}
+
+void VSetOperationNode::create_mutable_cols(Block* output_block) {
+    _mutable_cols.resize(_left_table_data_types.size());
+    bool mem_reuse = output_block->mem_reuse();
+
+    for (int i = 0; i < _left_table_data_types.size(); ++i) {
+        if (mem_reuse) {
+            _mutable_cols[i] = (std::move(*output_block->get_by_position(i).column).mutate());
+        } else {
+            _mutable_cols[i] = (_left_table_data_types[i]->create_column());
+        }
+    }
+}
+
+void VSetOperationNode::debug_string(int indentation_level, std::stringstream* out) const {
+    *out << string(indentation_level * 2, ' ');
+    *out << " _child_expr_lists=[";
+    for (int i = 0; i < _child_expr_lists.size(); ++i) {
+        *out << VExpr::debug_string(_child_expr_lists[i]) << ", ";
+    }
+    *out << "] \n";
+    ExecNode::debug_string(indentation_level, out);
+    *out << ")" << std::endl;
+}
+
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h
new file mode 100644
index 0000000000..be97d3c7b6
--- /dev/null
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -0,0 +1,245 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "codegen/doris_ir.h"
+#include "exec/exec_node.h"
+#include "runtime/row_batch.h"
+#include "runtime/runtime_state.h"
+#include "vec/core/materialize_block.h"
+#include "vec/exec/join/join_op.h"
+#include "vec/exec/join/vacquire_list.hpp"
+#include "vec/exec/join/vhash_join_node.h"
+#include "vec/functions/function.h"
+#include "vec/utils/util.hpp"
+
+namespace doris {
+
+namespace vectorized {
+
+class VSetOperationNode : public ExecNode {
+public:
+    VSetOperationNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+
+    virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr);
+    virtual Status prepare(RuntimeState* state);
+    virtual Status open(RuntimeState* state);
+    virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
+        return Status::NotSupported("Not Implemented get RowBatch in vecorized execution.");
+    }
+    virtual Status close(RuntimeState* state);
+    virtual void debug_string(int indentation_level, std::stringstream* out) const;
+
+protected:
+    //Todo: In build process of hashtable, It's same as join node. 
+    //It's time to abstract out the same methods and provide them directly to others; 
+    void hash_table_init();
+    Status hash_table_build(RuntimeState* state);
+    Status process_build_block(Block& block);
+    Status extract_build_column(Block& block, ColumnRawPtrs& raw_ptrs);
+    Status extract_probe_column(Block& block, ColumnRawPtrs& raw_ptrs, int child_id);
+    template <bool keep_matched>
+    void refresh_hash_table();
+    Status process_probe_block(RuntimeState* state, int child_id, bool* eos);
+    void create_mutable_cols(Block* output_block);
+
+protected:
+    HashTableVariants _hash_table_variants;
+
+    std::vector<size_t> _probe_key_sz;
+    std::vector<size_t> _build_key_sz;
+    std::vector<bool> _build_not_ignore_null;
+
+    Arena _arena;
+    AcquireList<Block> _acquire_list;
+    //record element size in hashtable
+    int64_t _valid_element_in_hash_tbl;
+
+    //The i-th result expr list refers to the i-th child.
+    std::vector<std::vector<VExprContext*>> _child_expr_lists;
+    //record build column type
+    DataTypes _left_table_data_types;
+    //first:column_id, could point to origin column or cast column
+    //second:idx mapped to column types
+    std::unordered_map<int, int> _build_col_idx;
+    //record memory during running
+    int64_t _mem_used;
+    //record insert column id during probe
+    std::vector<uint16_t> _probe_column_inserted_id;
+
+    Block _probe_block;
+    ColumnRawPtrs _probe_columns;
+    std::vector<MutableColumnPtr> _mutable_cols;
+    int _probe_index;
+    size_t _probe_rows;
+    RuntimeProfile::Counter* _build_timer; // time to build hash table
+    RuntimeProfile::Counter* _probe_timer; // time to probe
+
+    template <class HashTableContext>
+    friend class HashTableBuild;
+    template <class HashTableContext, bool is_intersected>
+    friend class HashTableProbe;
+};
+
+template <bool keep_matched>
+void VSetOperationNode::refresh_hash_table() {
+    std::visit(
+            [&](auto&& arg) {
+                using HashTableCtxType = std::decay_t<decltype(arg)>;
+                if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) {
+                    HashTableCtxType tmp_hash_table;
+                    bool is_need_shrink =
+                            arg.hash_table.should_be_shrink(_valid_element_in_hash_tbl);
+                    if (is_need_shrink) {
+                        tmp_hash_table.hash_table.init_buf_size(
+                                _valid_element_in_hash_tbl / arg.hash_table.get_factor() + 1);
+                    }
+
+                    arg.init_once();
+                    auto& iter = arg.iter;
+                    for (; iter != arg.hash_table.end(); ++iter) {
+                        auto& mapped = iter->get_second();
+                        auto it = mapped.begin();
+
+                        if constexpr (keep_matched) { //intersected
+                            if (it->visited) {
+                                it->visited = false;
+                                if (is_need_shrink)
+                                    tmp_hash_table.hash_table.insert(iter->get_value());
+                            } else {
+                                arg.hash_table.delete_zero_key(iter->get_first());
+                                iter->set_zero();
+                            }
+                        } else { //except
+                            if (!it->visited && is_need_shrink) {
+                                tmp_hash_table.hash_table.insert(iter->get_value());
+                            }
+                        }
+                    }
+
+                    arg.inited = false;
+                    if (is_need_shrink) {
+                        arg.hash_table = std::move(tmp_hash_table.hash_table);
+                    }
+
+                } else {
+                    LOG(FATAL) << "FATAL: uninited hash table";
+                }
+            },
+            _hash_table_variants);
+}
+
+template <class HashTableContext, bool is_intersected>
+struct HashTableProbe {
+    HashTableProbe(VSetOperationNode* operation_node, int batch_size, int probe_rows)
+            : _operation_node(operation_node),
+              _left_table_data_types(operation_node->_left_table_data_types),
+              _batch_size(batch_size),
+              _probe_rows(probe_rows),
+              _probe_block(operation_node->_probe_block),
+              _probe_index(operation_node->_probe_index),
+              _num_rows_returned(operation_node->_num_rows_returned),
+              _probe_raw_ptrs(operation_node->_probe_columns),
+              _arena(operation_node->_arena),
+              _rows_returned_counter(operation_node->_rows_returned_counter),
+              _build_col_idx(operation_node->_build_col_idx),
+              _mutable_cols(operation_node->_mutable_cols) {}
+
+    Status mark_data_in_hashtable(HashTableContext& hash_table_ctx) {
+        using KeyGetter = typename HashTableContext::State;
+        using Mapped = typename HashTableContext::Mapped;
+
+        KeyGetter key_getter(_probe_raw_ptrs, _operation_node->_probe_key_sz, nullptr);
+
+        for (; _probe_index < _probe_rows;) {
+            auto find_result = key_getter.find_key(hash_table_ctx.hash_table, _probe_index, _arena);
+            if (find_result.is_found()) { //if found, marked visited
+                auto it = find_result.get_mapped().begin();
+                if (!(it->visited)) {
+                    it->visited = true;
+                    if constexpr (is_intersected) //intersected
+                        _operation_node->_valid_element_in_hash_tbl++;
+                    else
+                        _operation_node->_valid_element_in_hash_tbl--; //except
+                }
+            }
+            _probe_index++;
+        }
+        return Status::OK();
+    }
+
+    void add_result_columns(RowRefList& value, int& block_size) {
+        for (auto idx = _build_col_idx.begin(); idx != _build_col_idx.end(); ++idx) {
+            auto& column = *value.begin()->block->get_by_position(idx->first).column;
+            _mutable_cols[idx->second]->insert_from(column, value.begin()->row_num);
+        }
+        block_size++;
+    }
+
+    Status get_data_in_hashtable(HashTableContext& hash_table_ctx,
+                                 std::vector<MutableColumnPtr>& mutable_cols, Block* output_block,
+                                 bool* eos) {
+        hash_table_ctx.init_once();
+        int left_col_len = _left_table_data_types.size();
+        auto& iter = hash_table_ctx.iter;
+        auto block_size = 0;
+
+        for (; iter != hash_table_ctx.hash_table.end() && block_size < _batch_size; ++iter) {
+            auto& value = iter->get_second();
+            auto it = value.begin();
+            if constexpr (is_intersected) {
+                if (it->visited) { //intersected: have done probe, so visited values it's the result
+                    add_result_columns(value, block_size);
+                }
+            } else {
+                if (!it->visited) { //except: haven't visited values it's the needed result
+                    add_result_columns(value, block_size);
+                }
+            }
+        }
+
+        *eos = iter == hash_table_ctx.hash_table.end();
+        if (!output_block->mem_reuse()) {
+            for (int i = 0; i < left_col_len; ++i) {
+                output_block->insert(ColumnWithTypeAndName(std::move(_mutable_cols[i]),
+                                                           _left_table_data_types[i], ""));
+            }
+        } else {
+            _mutable_cols.clear();
+        }
+
+        return Status::OK();
+    }
+
+private:
+    VSetOperationNode* _operation_node;
+    const DataTypes& _left_table_data_types;
+    const int _batch_size;
+    const size_t _probe_rows;
+    const Block& _probe_block;
+    int& _probe_index;
+    int64_t& _num_rows_returned;
+    ColumnRawPtrs& _probe_raw_ptrs;
+    Arena& _arena;
+    RuntimeProfile::Counter* _rows_returned_counter;
+    std::unordered_map<int, int>& _build_col_idx;
+    std::vector<MutableColumnPtr>& _mutable_cols;
+};
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vsort_exec_exprs.cpp b/be/src/vec/exec/vsort_exec_exprs.cpp
new file mode 100644
index 0000000000..a125424a65
--- /dev/null
+++ b/be/src/vec/exec/vsort_exec_exprs.cpp
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vsort_exec_exprs.h"
+
+namespace doris::vectorized {
+
+Status VSortExecExprs::init(const TSortInfo& sort_info, ObjectPool* pool) {
+    return init(sort_info.ordering_exprs,
+                sort_info.__isset.sort_tuple_slot_exprs ? &sort_info.sort_tuple_slot_exprs : NULL,
+                pool);
+}
+
+Status VSortExecExprs::init(const std::vector<TExpr>& ordering_exprs,
+                            const std::vector<TExpr>* sort_tuple_slot_exprs, ObjectPool* pool) {
+    RETURN_IF_ERROR(VExpr::create_expr_trees(pool, ordering_exprs, &_lhs_ordering_expr_ctxs));
+    if (sort_tuple_slot_exprs != NULL) {
+        _materialize_tuple = true;
+        RETURN_IF_ERROR(
+                VExpr::create_expr_trees(pool, *sort_tuple_slot_exprs, &_sort_tuple_slot_expr_ctxs));
+    } else {
+        _materialize_tuple = false;
+    }
+    return Status::OK();
+}
+
+Status VSortExecExprs::init(const std::vector<VExprContext*>& lhs_ordering_expr_ctxs,
+                            const std::vector<VExprContext*>& rhs_ordering_expr_ctxs) {
+    _lhs_ordering_expr_ctxs = lhs_ordering_expr_ctxs;
+    _rhs_ordering_expr_ctxs = rhs_ordering_expr_ctxs;
+    return Status::OK();
+}
+
+Status VSortExecExprs::prepare(RuntimeState* state, const RowDescriptor& child_row_desc,
+                               const RowDescriptor& output_row_desc,
+                               const std::shared_ptr<MemTracker>& expr_mem_tracker) {
+    if (_materialize_tuple) {
+        RETURN_IF_ERROR(
+                VExpr::prepare(_sort_tuple_slot_expr_ctxs, state, child_row_desc, expr_mem_tracker));
+    }
+    RETURN_IF_ERROR(
+            VExpr::prepare(_lhs_ordering_expr_ctxs, state, output_row_desc, expr_mem_tracker));
+    return Status::OK();
+}
+
+Status VSortExecExprs::open(RuntimeState* state) {
+    if (_materialize_tuple) {
+        RETURN_IF_ERROR(VExpr::open(_sort_tuple_slot_expr_ctxs, state));
+    }
+    RETURN_IF_ERROR(VExpr::open(_lhs_ordering_expr_ctxs, state));
+    RETURN_IF_ERROR(
+            VExpr::clone_if_not_exists(_lhs_ordering_expr_ctxs, state, &_rhs_ordering_expr_ctxs));
+    return Status::OK();
+}
+
+void VSortExecExprs::close(RuntimeState* state) {
+    if (_materialize_tuple) {
+        VExpr::close(_sort_tuple_slot_expr_ctxs, state);
+    }
+    VExpr::close(_lhs_ordering_expr_ctxs, state);
+    VExpr::close(_rhs_ordering_expr_ctxs, state);
+}
+
+} //namespace doris
+
diff --git a/be/src/vec/exec/vsort_exec_exprs.h b/be/src/vec/exec/vsort_exec_exprs.h
new file mode 100644
index 0000000000..fd81be2bed
--- /dev/null
+++ b/be/src/vec/exec/vsort_exec_exprs.h
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/exprs/vexpr.h"
+#include "runtime/runtime_state.h"
+
+namespace doris {
+
+class MemTracker;
+
+// Helper class to Prepare() , Open() and Close() the ordering expressions used to perform
+// comparisons in a sort. Used by TopNNode, SortNode.  When two
+// rows are compared, the ordering expressions are evaluated once for each side.
+// TopN and Sort materialize input rows into a single tuple before sorting.
+// If _materialize_tuple is true, SortExecExprs also stores the slot expressions used to
+// materialize the sort tuples.
+namespace vectorized {
+
+class VSortExecExprs {
+public:
+    // Initialize the expressions from a TSortInfo using the specified pool.
+    Status init(const TSortInfo &sort_info, ObjectPool *pool);
+
+    // Initialize the ordering and (optionally) materialization expressions from the thrift
+    // TExprs into the specified pool. sort_tuple_slot_exprs is NULL if the tuple is not
+    // materialized.
+    Status init(const std::vector<TExpr> &ordering_exprs,
+                const std::vector<TExpr> *sort_tuple_slot_exprs, ObjectPool *pool);
+
+    // prepare all expressions used for sorting and tuple materialization.
+    Status prepare(RuntimeState *state, const RowDescriptor &child_row_desc,
+                   const RowDescriptor &output_row_desc,
+                   const std::shared_ptr<MemTracker> &mem_tracker);
+
+    // open all expressions used for sorting and tuple materialization.
+    Status open(RuntimeState *state);
+
+    // close all expressions used for sorting and tuple materialization.
+    void close(RuntimeState *state);
+
+    const std::vector<VExprContext *> &sort_tuple_slot_expr_ctxs() const {
+        return _sort_tuple_slot_expr_ctxs;
+    }
+
+    // Can only be used after calling prepare()
+    const std::vector<VExprContext *> &lhs_ordering_expr_ctxs() const {
+        return _lhs_ordering_expr_ctxs;
+    }
+
+    // Can only be used after calling open()
+    const std::vector<VExprContext *> &rhs_ordering_expr_ctxs() const {
+        return _rhs_ordering_expr_ctxs;
+    }
+
+    bool need_materialize_tuple() const {
+        return _materialize_tuple;
+    }
+
+private:
+    // Create two VExprContexts for evaluating over the TupleRows.
+    std::vector<VExprContext *> _lhs_ordering_expr_ctxs;
+    std::vector<VExprContext *> _rhs_ordering_expr_ctxs;
+
+    // If true, the tuples to be sorted are materialized by
+    // _sort_tuple_slot_exprs before the actual sort is performed.
+    bool _materialize_tuple;
+
+    // Expressions used to materialize slots in the tuples to be sorted.
+    // One expr per slot in the materialized tuple. Valid only if
+    // _materialize_tuple is true.
+    std::vector<VExprContext *> _sort_tuple_slot_expr_ctxs;
+
+    // Initialize directly from already-created VExprContexts. Callers should manually call
+    // Prepare(), Open(), and Close() on input VExprContexts (instead of calling the
+    // analogous functions in this class). Used for testing.
+    Status init(const std::vector<VExprContext *> &lhs_ordering_expr_ctxs,
+                const std::vector<VExprContext *> &rhs_ordering_expr_ctxs);
+};
+
+} // namepace vectorized
+} // namespace doris
+
diff --git a/be/src/vec/exec/vsort_node.cpp b/be/src/vec/exec/vsort_node.cpp
new file mode 100644
index 0000000000..734af91baa
--- /dev/null
+++ b/be/src/vec/exec/vsort_node.cpp
@@ -0,0 +1,256 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vsort_node.h"
+
+#include "exec/sort_exec_exprs.h"
+#include "runtime/row_batch.h"
+#include "runtime/runtime_state.h"
+#include "util/debug_util.h"
+
+#include "vec/core/sort_block.h"
+
+namespace doris::vectorized {
+
+VSortNode::VSortNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : ExecNode(pool, tnode, descs),
+          _offset(tnode.sort_node.__isset.offset ? tnode.sort_node.offset : 0),
+          _num_rows_skipped(0) {}
+
+Status VSortNode::init(const TPlanNode& tnode, RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::init(tnode, state));
+    RETURN_IF_ERROR(_vsort_exec_exprs.init(tnode.sort_node.sort_info, _pool));
+    _is_asc_order = tnode.sort_node.sort_info.is_asc_order;
+    _nulls_first = tnode.sort_node.sort_info.nulls_first;
+    return Status::OK();
+}
+
+Status VSortNode::prepare(RuntimeState* state) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    _runtime_profile->add_info_string("TOP-N", _limit == -1 ? "false" : "true");
+    RETURN_IF_ERROR(ExecNode::prepare(state));
+    RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, child(0)->row_desc(), _row_descriptor,
+                                              expr_mem_tracker()));
+    return Status::OK();
+}
+
+Status VSortNode::open(RuntimeState* state) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(ExecNode::open(state));
+    RETURN_IF_ERROR(_vsort_exec_exprs.open(state));
+    RETURN_IF_CANCELLED(state);
+    RETURN_IF_ERROR(state->check_query_state("vsort, while open."));
+    RETURN_IF_ERROR(child(0)->open(state));
+
+    // The child has been opened and the sorter created. Sort the input.
+    // The final merge is done on-demand as rows are requested in get_next().
+    RETURN_IF_ERROR(sort_input(state));
+
+    // Unless we are inside a subplan expecting to call open()/get_next() on the child
+    // again, the child can be closed at this point.
+    // if (!IsInSubplan()) {
+//    child(0)->close(state);
+    // }
+    return Status::OK();
+}
+
+Status VSortNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
+    *eos = true;
+    return Status::NotSupported("Not Implemented VSortNode::get_next scalar");
+}
+
+Status VSortNode::get_next(RuntimeState* state, Block* block, bool* eos) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+
+    auto status = Status::OK();
+    if (_sorted_blocks.empty()) {
+        *eos = true;
+    } else if (_sorted_blocks.size() == 1) {
+        if (_offset != 0) {
+            _sorted_blocks[0].skip_num_rows(_offset);
+        }
+        block->swap(_sorted_blocks[0]);
+        *eos = true;
+    } else {
+        RETURN_IF_ERROR(merge_sort_read(state, block, eos));
+    }
+
+    reached_limit(block, eos);
+    return status;
+}
+
+Status VSortNode::reset(RuntimeState* state) {
+    _num_rows_skipped = 0;
+    return Status::OK();
+}
+
+Status VSortNode::close(RuntimeState* state) {
+    if (is_closed()) {
+        return Status::OK();
+    }
+    _mem_tracker->Release(_total_mem_usage);
+    _vsort_exec_exprs.close(state);
+    ExecNode::close(state);
+    return Status::OK();
+}
+
+void VSortNode::debug_string(int indentation_level, stringstream* out) const {
+    *out << string(indentation_level * 2, ' ');
+    *out << "VSortNode(";
+    for (int i = 0; i < _is_asc_order.size(); ++i) {
+        *out << (i > 0 ? " " : "") << (_is_asc_order[i] ? "asc" : "desc") << " nulls "
+             << (_nulls_first[i] ? "first" : "last");
+    }
+    ExecNode::debug_string(indentation_level, out);
+    *out << ")";
+}
+
+Status VSortNode::sort_input(RuntimeState* state) {
+    bool eos = false;
+    do {
+        Block block;
+        RETURN_IF_ERROR(child(0)->get_next(state, &block, &eos));
+        auto rows = block.rows();
+
+        if (rows != 0) {
+            RETURN_IF_ERROR(pretreat_block(block));
+            size_t mem_usage = block.allocated_bytes();
+
+            // dispose TOP-N logic
+            if (_limit != -1 ) {
+                // Here is a little opt to reduce the mem uasge, we build a max heap
+                // to order the block in _block_priority_queue.
+                // if one block totally greater the heap top of _block_priority_queue
+                // we can throw the block data directly.
+                if (_num_rows_in_block < _limit) {
+                    _total_mem_usage += mem_usage;
+                    _sorted_blocks.emplace_back(std::move(block));
+                    _num_rows_in_block += rows;
+                    _block_priority_queue.emplace(
+                            _pool->add(new SortCursorImpl(_sorted_blocks.back(), _sort_description)));
+                } else {
+                    SortBlockCursor block_cursor(
+                            _pool->add(new SortCursorImpl(block, _sort_description)));
+                    if (!block_cursor.totally_greater(_block_priority_queue.top())) {
+                        _sorted_blocks.emplace_back(std::move(block));
+                        _block_priority_queue.push(block_cursor);
+                        _total_mem_usage += mem_usage;
+                    } else {
+                        continue;
+                    }
+                }
+            } else {
+                // dispose normal sort logic
+                _total_mem_usage += mem_usage;
+                _sorted_blocks.emplace_back(std::move(block));
+            }
+
+            _mem_tracker->Consume(mem_usage);
+            RETURN_IF_CANCELLED(state);
+            RETURN_IF_ERROR(state->check_query_state("vsort, while sorting input."));
+        }
+    } while (!eos);
+
+    build_merge_tree();
+    return Status::OK();
+}
+
+Status VSortNode::pretreat_block(doris::vectorized::Block& block) {
+    if (_vsort_exec_exprs.need_materialize_tuple()) {
+        auto output_tuple_expr_ctxs = _vsort_exec_exprs.sort_tuple_slot_expr_ctxs();
+        std::vector<int> valid_column_ids(output_tuple_expr_ctxs.size());
+        for (int i = 0; i < output_tuple_expr_ctxs.size(); ++i) {
+            RETURN_IF_ERROR(output_tuple_expr_ctxs[i]->execute(&block, &valid_column_ids[i]));
+        }
+
+        Block new_block;
+        for (auto column_id : valid_column_ids) {
+            new_block.insert(block.get_by_position(column_id));
+        }
+        block.swap(new_block);
+    }
+
+    _sort_description.resize(_vsort_exec_exprs.lhs_ordering_expr_ctxs().size());
+    for (int i = 0; i < _sort_description.size(); i++) {
+        const auto& ordering_expr = _vsort_exec_exprs.lhs_ordering_expr_ctxs()[i];
+        RETURN_IF_ERROR(ordering_expr->execute(&block, &_sort_description[i].column_number));
+
+        _sort_description[i].direction = _is_asc_order[i] ? 1 : -1;
+        _sort_description[i].nulls_direction =
+                _nulls_first[i] ? -_sort_description[i].direction : _sort_description[i].direction;
+    }
+
+    sort_block(block, _sort_description, _offset + _limit);
+
+    return Status::OK();
+}
+
+void VSortNode::build_merge_tree() {
+    for (const auto &block : _sorted_blocks) {
+        _cursors.emplace_back(block, _sort_description);
+    }
+
+    if (_sorted_blocks.size() > 1) {
+        for (auto& _cursor : _cursors)
+            _priority_queue.push(SortCursor(&_cursor));
+    }
+}
+
+Status VSortNode::merge_sort_read(doris::RuntimeState *state, doris::vectorized::Block *block, bool *eos) {
+    size_t num_columns = _sorted_blocks[0].columns();
+
+    bool mem_reuse = block->mem_reuse();
+    MutableColumns merged_columns =
+            mem_reuse ? block->mutate_columns() : _sorted_blocks[0].clone_empty_columns();
+
+    /// Take rows from queue in right order and push to 'merged'.
+    size_t merged_rows = 0;
+    while (!_priority_queue.empty()) {
+        auto current = _priority_queue.top();
+        _priority_queue.pop();
+
+        if (_offset == 0) {
+            for (size_t i = 0; i < num_columns; ++i)
+                merged_columns[i]->insert_from(*current->all_columns[i], current->pos);
+            ++merged_rows;
+        } else {
+            _offset--;
+        }
+
+        if (!current->isLast()) {
+            current->next();
+            _priority_queue.push(current);
+        }
+
+        if (merged_rows == state->batch_size())
+            break;
+    }
+
+    if (merged_rows == 0) {
+        *eos = true;
+        return Status::OK();
+    }
+
+    if (!mem_reuse) {
+        Block merge_block = _sorted_blocks[0].clone_with_columns(std::move(merged_columns));
+        merge_block.swap(*block);
+    }
+
+    return Status::OK();
+}
+
+} // end namespace doris
diff --git a/be/src/vec/exec/vsort_node.h b/be/src/vec/exec/vsort_node.h
new file mode 100644
index 0000000000..66876aa149
--- /dev/null
+++ b/be/src/vec/exec/vsort_node.h
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/exec_node.h"
+
+#include <queue>
+
+#include "vec/core/block.h"
+#include "vec/core/sort_cursor.h"
+#include "vec/exec/vsort_exec_exprs.h"
+
+namespace doris::vectorized {
+// Node that implements a full sort of its input with a fixed memory budget
+// In open() the input Block to VSortNode will sort firstly, using the expressions specified in _sort_exec_exprs.
+// In get_next(), VSortNode do the merge sort to gather data to a new block
+
+// support spill to disk in the future
+class VSortNode : public doris::ExecNode {
+public:
+    VSortNode(ObjectPool *pool, const TPlanNode &tnode, const DescriptorTbl &descs);
+
+    ~VSortNode() override = default;
+
+    virtual Status init(const TPlanNode &tnode, RuntimeState *state = nullptr);
+
+    virtual Status prepare(RuntimeState *state);
+
+    virtual Status open(RuntimeState *state);
+
+    virtual Status get_next(RuntimeState *state, RowBatch *row_batch, bool *eos);
+
+    virtual Status get_next(RuntimeState* state, Block* block, bool* eos);
+
+    virtual Status reset(RuntimeState *state);
+
+    virtual Status close(RuntimeState *state);
+
+protected:
+    virtual void debug_string(int indentation_level, std::stringstream *out) const;
+
+private:
+    // Fetch input rows and feed them to the sorter until the input is exhausted.
+    Status sort_input(RuntimeState *state);
+
+    Status pretreat_block(Block& block);
+
+    void build_merge_tree();
+
+    Status merge_sort_read(RuntimeState* state, Block* block, bool* eos);
+
+    // Number of rows to skip.
+    int64_t _offset;
+
+    // Expressions and parameters used for build _sort_description
+    VSortExecExprs _vsort_exec_exprs;
+    std::vector<bool> _is_asc_order;
+    std::vector<bool> _nulls_first;
+
+    SortDescription _sort_description;
+    std::vector<SortCursorImpl> _cursors;
+    std::vector<Block> _sorted_blocks;
+    std::priority_queue<SortCursor> _priority_queue;
+
+    // TODO: Not using now, maybe should be delete
+    // Keeps track of the number of rows skipped for handling _offset.
+    int64_t _num_rows_skipped;
+    uint64_t _total_mem_usage = 0;
+
+    // only valid in TOP-N node
+    uint64_t _num_rows_in_block = 0;
+    std::priority_queue<SortBlockCursor> _block_priority_queue;
+};
+
+} // end namespace doris
+
+
diff --git a/be/src/vec/exec/vunion_node.cpp b/be/src/vec/exec/vunion_node.cpp
new file mode 100644
index 0000000000..c05b3ef6a8
--- /dev/null
+++ b/be/src/vec/exec/vunion_node.cpp
@@ -0,0 +1,279 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exec/vunion_node.h"
+
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/runtime_state.h"
+#include "util/runtime_profile.h"
+
+#include "vec/core/block.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/utils/util.hpp"
+
+namespace doris {
+
+namespace vectorized {
+
+VUnionNode::VUnionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
+        : ExecNode(pool, tnode, descs),
+          _first_materialized_child_idx(tnode.union_node.first_materialized_child_idx),
+          _const_expr_list_idx(0),
+          _child_idx(0),
+          _child_row_idx(0),
+          _child_eos(false),
+          _to_close_child_idx(-1) {}
+
+Status VUnionNode::init(const TPlanNode& tnode, RuntimeState* state) {
+    RETURN_IF_ERROR(ExecNode::init(tnode, state));
+    DCHECK(tnode.__isset.union_node);
+    DCHECK_EQ(_conjunct_ctxs.size(), 0);
+    // Create const_expr_ctx_lists_ from thrift exprs.
+    auto& const_texpr_lists = tnode.union_node.const_expr_lists;
+    for (auto& texprs : const_texpr_lists) {
+        std::vector<VExprContext*> ctxs;
+        RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, texprs, &ctxs));
+        _const_expr_lists.push_back(ctxs);
+    }
+    // Create result_expr_ctx_lists_ from thrift exprs.
+    auto& result_texpr_lists = tnode.union_node.result_expr_lists;
+    for (auto& texprs : result_texpr_lists) {
+        std::vector<VExprContext*> ctxs;
+        RETURN_IF_ERROR(VExpr::create_expr_trees(_pool, texprs, &ctxs));
+        _child_expr_lists.push_back(ctxs);
+    }
+    return Status::OK();
+}
+
+Status VUnionNode::prepare(RuntimeState* state) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(ExecNode::prepare(state));
+    _materialize_exprs_evaluate_timer =
+            ADD_TIMER(_runtime_profile, "MaterializeExprsEvaluateTimer");
+    // Prepare const expr lists.
+    for (const std::vector<VExprContext*>& exprs : _const_expr_lists) {
+        RETURN_IF_ERROR(VExpr::prepare(exprs, state, row_desc(), expr_mem_tracker()));
+    }
+
+    // Prepare result expr lists.
+    for (int i = 0; i < _child_expr_lists.size(); ++i) {
+        RETURN_IF_ERROR(VExpr::prepare(_child_expr_lists[i], state, child(i)->row_desc(),
+                                       expr_mem_tracker()));
+    }
+    return Status::OK();
+}
+
+Status VUnionNode::open(RuntimeState* state) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(ExecNode::open(state));
+    // open const expr lists.
+    for (const std::vector<VExprContext*>& exprs : _const_expr_lists) {
+        RETURN_IF_ERROR(VExpr::open(exprs, state));
+    }
+    // open result expr lists.
+    for (const std::vector<VExprContext*>& exprs : _child_expr_lists) {
+        RETURN_IF_ERROR(VExpr::open(exprs, state));
+    }
+
+    // Ensures that rows are available for clients to fetch after this open() has
+    // succeeded.
+    if (!_children.empty()) RETURN_IF_ERROR(child(_child_idx)->open(state));
+
+    return Status::OK();
+}
+
+Status VUnionNode::get_next_pass_through(RuntimeState* state, Block* block) {
+    DCHECK(!reached_limit());
+    DCHECK(!is_in_subplan());
+    DCHECK_LT(_child_idx, _children.size());
+    DCHECK(is_child_passthrough(_child_idx));
+    if (_child_eos) {
+        RETURN_IF_ERROR(child(_child_idx)->open(state));
+        _child_eos = false;
+    }
+    DCHECK_EQ(block->rows(), 0);
+    RETURN_IF_ERROR(child(_child_idx)->get_next(state, block, &_child_eos));
+    if (_child_eos) {
+        // Even though the child is at eos, it's not OK to close() it here. Once we close
+        // the child, the row batches that it produced are invalid. Marking the batch as
+        // needing a deep copy let's us safely close the child in the next get_next() call.
+        // TODO: Remove this as part of IMPALA-4179.
+        _to_close_child_idx = _child_idx;
+        ++_child_idx;
+    }
+    return Status::OK();
+}
+
+Status VUnionNode::get_next_materialized(RuntimeState* state, Block* block) {
+    // Fetch from children, evaluate corresponding exprs and materialize.
+    DCHECK(!reached_limit());
+    DCHECK_LT(_child_idx, _children.size());
+
+    bool mem_reuse = block->mem_reuse();
+    MutableBlock mblock = mem_reuse ? MutableBlock::build_mutable_block(block) :
+        MutableBlock(Block(VectorizedUtils::create_columns_with_type_and_name(row_desc())));
+
+    Block child_block;
+    while (has_more_materialized() && mblock.rows() <= state->batch_size()) {
+        // The loop runs until we are either done iterating over the children that require
+        // materialization, or the row batch is at capacity.
+        DCHECK(!is_child_passthrough(_child_idx));
+        // Child row batch was either never set or we're moving on to a different child.
+        DCHECK_LT(_child_idx, _children.size());
+        // open the current child unless it's the first child, which was already opened in
+        // VUnionNode::open().
+        if (_child_eos) {
+            RETURN_IF_ERROR(child(_child_idx)->open(state));
+            _child_eos = false;
+            _child_row_idx = 0;
+        }
+        // Here need materialize block of child block, so here so not mem_reuse
+        child_block.clear();
+        // The first batch from each child is always fetched here.
+        RETURN_IF_ERROR(child(_child_idx)->get_next(state, &child_block, &_child_eos));
+        SCOPED_TIMER(_materialize_exprs_evaluate_timer);
+        if (child_block.rows() > 0) {
+            mblock.merge(materialize_block(&child_block));
+        }
+        // It shouldn't be the case that we reached the limit because we shouldn't have
+        // incremented '_num_rows_returned' yet.
+        DCHECK(!reached_limit());
+        if (_child_eos) {
+            // Unless we are inside a subplan expecting to call open()/get_next() on the child
+            // again, the child can be closed at this point.
+            // TODO: Recheck whether is_in_subplan() is right
+//            if (!is_in_subplan()) {
+//                child(_child_idx)->close(state);
+//            }
+            ++_child_idx;
+        }
+    }
+
+    if (!mem_reuse) {
+        block->swap(mblock.to_block());
+    }
+
+    DCHECK_LE(_child_idx, _children.size());
+    return Status::OK();
+}
+
+Status VUnionNode::get_next_const(RuntimeState* state, Block* block) {
+    DCHECK_EQ(state->per_fragment_instance_idx(), 0);
+    DCHECK_LT(_const_expr_list_idx, _const_expr_lists.size());
+
+    bool mem_reuse = block->mem_reuse();
+    MutableBlock mblock = mem_reuse ? MutableBlock::build_mutable_block(block) :
+        MutableBlock(Block(VectorizedUtils::create_columns_with_type_and_name(row_desc())));
+    for (; _const_expr_list_idx < _const_expr_lists.size(); ++_const_expr_list_idx) {
+        Block tmp_block;
+        tmp_block.insert({vectorized::ColumnUInt8::create(1),
+                    std::make_shared<vectorized::DataTypeUInt8>(), ""});
+        int const_expr_lists_size = _const_expr_lists[_const_expr_list_idx].size();
+        std::vector<int> result_list(const_expr_lists_size);
+        for (size_t i = 0; i < const_expr_lists_size; ++i) {
+            _const_expr_lists[_const_expr_list_idx][i]->execute(&tmp_block, &result_list[i]);
+        }
+        tmp_block.erase_not_in(result_list);
+        mblock.merge(tmp_block);
+    }
+
+    if (!mem_reuse) {
+        block->swap(mblock.to_block());
+    }
+
+    // some insert query like "insert into string_test select 1, repeat('a', 1024 * 1024);"
+    // the const expr will be in output expr cause the union node return a empty block. so here we
+    // need add one row to make sure the union node exec const expr return at least one row
+    if (block->rows() == 0) {
+        block->insert({vectorized::ColumnUInt8::create(1),
+                    std::make_shared<vectorized::DataTypeUInt8>(), ""});
+    }
+
+    return Status::OK();
+}
+
+Status VUnionNode::get_next(RuntimeState* state, Block* block, bool* eos) {
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
+    RETURN_IF_CANCELLED(state);
+    // RETURN_IF_ERROR(QueryMaintenance(state));
+
+    if (_to_close_child_idx != -1) {
+        // The previous child needs to be closed if passthrough was enabled for it. In the non
+        // passthrough case, the child was already closed in the previous call to get_next().
+        DCHECK(is_child_passthrough(_to_close_child_idx));
+        DCHECK(!is_in_subplan());
+        child(_to_close_child_idx)->close(state);
+        _to_close_child_idx = -1;
+    }
+
+    // Save the number of rows in case get_next() is called with a non-empty batch, which can
+    // happen in a subplan.
+    if (has_more_passthrough()) {
+        RETURN_IF_ERROR(get_next_pass_through(state, block));
+    } else if (has_more_materialized()) {
+        RETURN_IF_ERROR(get_next_materialized(state, block));
+    } else if (has_more_const(state)) {
+        RETURN_IF_ERROR(get_next_const(state, block));
+    }
+
+    *eos = (!has_more_passthrough() && !has_more_materialized() && !has_more_const(state));
+    reached_limit(block, eos);
+
+    return Status::OK();
+}
+
+Status VUnionNode::close(RuntimeState* state) {
+    if (is_closed()) {
+        return Status::OK();
+    }
+    for (auto& exprs : _const_expr_lists) {
+        VExpr::close(exprs, state);
+    }
+    for (auto& exprs : _child_expr_lists) {
+        VExpr::close(exprs, state);
+    }
+    return ExecNode::close(state);
+}
+
+void VUnionNode::debug_string(int indentation_level, std::stringstream* out) const {
+    *out << string(indentation_level * 2, ' ');
+    *out << "_union(_first_materialized_child_idx=" << _first_materialized_child_idx
+         << " _child_expr_lists=[";
+    for (int i = 0; i < _child_expr_lists.size(); ++i) {
+        *out << VExpr::debug_string(_child_expr_lists[i]) << ", ";
+    }
+    *out << "] \n";
+    ExecNode::debug_string(indentation_level, out);
+    *out << ")" << std::endl;
+}
+
+Block VUnionNode::materialize_block(Block* src_block) {
+    const std::vector<VExprContext*>& child_exprs = _child_expr_lists[_child_idx];
+    ColumnsWithTypeAndName colunms;
+    for (size_t i = 0; i < child_exprs.size(); ++i) {
+        int result_column_id = -1;
+        child_exprs[i]->execute(src_block, &result_column_id);
+        colunms.emplace_back(src_block->get_by_position(result_column_id));
+    }
+    _child_row_idx += src_block->rows();
+    return {colunms};
+}
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exec/vunion_node.h b/be/src/vec/exec/vunion_node.h
new file mode 100644
index 0000000000..5650d38988
--- /dev/null
+++ b/be/src/vec/exec/vunion_node.h
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/exec/vset_operation_node.h"
+
+namespace doris {
+namespace vectorized {
+
+class VUnionNode : public ExecNode {
+public:
+    VUnionNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    virtual Status init(const TPlanNode& tnode, RuntimeState* state = nullptr);
+    virtual Status prepare(RuntimeState* state);
+    virtual Status open(RuntimeState* state);
+    virtual Status get_next(RuntimeState* state, vectorized::Block* block, bool* eos);
+    virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
+        return Status::NotSupported("Not Implemented get RowBatch in vecorized execution.");
+    }
+    virtual Status close(RuntimeState* state);
+
+private:
+    /// Const exprs materialized by this node. These exprs don't refer to any children.
+    /// Only materialized by the first fragment instance to avoid duplication.
+    std::vector<std::vector<VExprContext*>> _const_expr_lists;
+
+    /// Exprs materialized by this node. The i-th result expr list refers to the i-th child.
+    std::vector<std::vector<VExprContext*>> _child_expr_lists;
+    /// Index of the first non-passthrough child; i.e. a child that needs materialization.
+    /// 0 when all children are materialized, '_children.size()' when no children are
+    /// materialized.
+    const int _first_materialized_child_idx;
+    /// Index of current const result expr list.
+    int _const_expr_list_idx;
+
+    /// Index of current child.
+    int _child_idx;
+
+    /// Index of current row in child_row_block_.
+    int _child_row_idx;
+
+    /// Saved from the last to GetNext() on the current child.
+    bool _child_eos;
+
+    /// Index of the child that needs to be closed on the next GetNext() call. Should be set
+    /// to -1 if no child needs to be closed.
+    int _to_close_child_idx;
+
+    // Time spent to evaluates exprs and materializes the results
+    RuntimeProfile::Counter* _materialize_exprs_evaluate_timer = nullptr;
+    /// GetNext() for the passthrough case. We pass 'block' directly into the GetNext()
+    /// call on the child.
+    Status get_next_pass_through(RuntimeState* state, Block* block);
+
+    /// GetNext() for the materialized case. Materializes and evaluates rows from each
+    /// non-passthrough child.
+    Status get_next_materialized(RuntimeState* state, Block* block);
+
+    /// GetNext() for the constant expression case.
+    Status get_next_const(RuntimeState* state, Block* block);
+
+    /// Evaluates exprs for the current child and materializes the results into 'tuple_buf',
+    /// which is attached to 'dst_block'. Runs until 'dst_block' is at capacity, or all rows
+    /// have been consumed from the current child block. Updates '_child_row_idx'.
+    Block materialize_block(Block* dst_block);
+
+    Status get_error_msg(const std::vector<VExprContext*>& exprs);
+
+    /// Returns true if the child at 'child_idx' can be passed through.
+    bool is_child_passthrough(int child_idx) const {
+        DCHECK_LT(child_idx, _children.size());
+        return child_idx < _first_materialized_child_idx;
+    }
+
+    /// Returns true if there are still rows to be returned from passthrough children.
+    bool has_more_passthrough() const { return _child_idx < _first_materialized_child_idx; }
+
+    /// Returns true if there are still rows to be returned from children that need
+    /// materialization.
+    bool has_more_materialized() const {
+        return _first_materialized_child_idx != _children.size() && _child_idx < _children.size();
+    }
+
+    /// Returns true if there are still rows to be returned from constant expressions.
+    bool has_more_const(const RuntimeState* state) const {
+        return state->per_fragment_instance_idx() == 0 &&
+               _const_expr_list_idx < _const_expr_lists.size();
+    }
+
+    virtual void debug_string(int indentation_level, std::stringstream* out) const;
+};
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exprs/vcase_expr.cpp b/be/src/vec/exprs/vcase_expr.cpp
new file mode 100644
index 0000000000..7ecf092574
--- /dev/null
+++ b/be/src/vec/exprs/vcase_expr.cpp
@@ -0,0 +1,114 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vcase_expr.h"
+
+#include "vec/columns/column_nullable.h"
+
+namespace doris::vectorized {
+
+VCaseExpr::VCaseExpr(const TExprNode& node)
+        : VExpr(node),
+          _is_prepare(false),
+          _has_case_expr(node.case_expr.has_case_expr),
+          _has_else_expr(node.case_expr.has_else_expr) {
+    if (_has_case_expr) {
+        _function_name += "_has_case";
+    }
+    if (_has_else_expr) {
+        _function_name += "_has_else";
+    }
+}
+
+Status VCaseExpr::prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc,
+                          VExprContext* context) {
+    RETURN_IF_ERROR(VExpr::prepare(state, desc, context));
+
+    if (_is_prepare) {
+        return Status::OK();
+    }
+    _is_prepare = true;
+
+    ColumnsWithTypeAndName argument_template;
+    DataTypes arguments;
+    for (int i = 0; i < _children.size(); i++) {
+        auto child = _children[i];
+        const auto& child_name = child->expr_name();
+        auto child_column = child->data_type()->create_column();
+        argument_template.emplace_back(std::move(child_column), child->data_type(), child_name);
+        arguments.emplace_back(child->data_type());
+    }
+
+    _function = SimpleFunctionFactory::instance().get_function(_function_name, argument_template,
+                                                               _data_type);
+    if (_function == nullptr) {
+        return Status::NotSupported(
+                fmt::format("vcase_expr Function {} is not implemented", _fn.name.function_name));
+    }
+
+    VExpr::register_function_context(state, context);
+    return Status::OK();
+}
+
+Status VCaseExpr::open(RuntimeState* state, VExprContext* context,
+                       FunctionContext::FunctionStateScope scope) {
+    RETURN_IF_ERROR(VExpr::open(state, context, scope));
+    RETURN_IF_ERROR(VExpr::init_function_context(context, scope, _function));
+    CaseState* case_state = new CaseState {_data_type};
+    context->fn_context(_fn_context_index)
+            ->set_function_state(FunctionContext::FRAGMENT_LOCAL, case_state);
+    return Status::OK();
+}
+
+void VCaseExpr::close(RuntimeState* state, VExprContext* context,
+                      FunctionContext::FunctionStateScope scope) {
+    CaseState* case_state = reinterpret_cast<CaseState*>(
+            context->fn_context(_fn_context_index)
+                    ->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+    delete case_state;
+
+    VExpr::close_function_context(context, scope, _function);
+    VExpr::close(state, context, scope);
+}
+
+Status VCaseExpr::execute(VExprContext* context, Block* block, int* result_column_id) {
+    ColumnNumbers arguments(_children.size());
+
+    for (int i = 0; i < _children.size(); i++) {
+        int column_id = -1;
+        _children[i]->execute(context, block, &column_id);
+        arguments[i] = column_id;
+
+        block->replace_by_position_if_const(column_id);
+        auto child_column = block->get_by_position(column_id).column;
+    }
+
+    size_t num_columns_without_result = block->columns();
+    block->insert({nullptr, _data_type, _expr_name});
+
+    RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, arguments,
+                                       num_columns_without_result, block->rows(), false));
+    *result_column_id = num_columns_without_result;
+
+    return Status::OK();
+}
+
+const std::string& VCaseExpr::expr_name() const {
+    return _expr_name;
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vcase_expr.h b/be/src/vec/exprs/vcase_expr.h
new file mode 100644
index 0000000000..6291f9e074
--- /dev/null
+++ b/be/src/vec/exprs/vcase_expr.h
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/exprs/vexpr.h"
+#include "vec/functions/function_case.h"
+
+namespace doris::vectorized {
+
+class VCaseExpr final : public VExpr {
+public:
+    VCaseExpr(const TExprNode& node);
+    ~VCaseExpr() = default;
+    virtual Status execute(VExprContext* context, vectorized::Block* block, int* result_column_id);
+    virtual Status prepare(RuntimeState* state, const RowDescriptor& desc, VExprContext* context);
+    virtual Status open(RuntimeState* state, VExprContext* context,
+                        FunctionContext::FunctionStateScope scope);
+    virtual void close(RuntimeState* state, VExprContext* context,
+                       FunctionContext::FunctionStateScope scope);
+    virtual VExpr* clone(ObjectPool* pool) const override {
+        return pool->add(new VCaseExpr(*this));
+    }
+    virtual const std::string& expr_name() const override;
+
+private:
+    bool _is_prepare;
+    bool _has_case_expr;
+    bool _has_else_expr;
+
+    FunctionBasePtr _function;
+    std::string _function_name = "case";
+    const std::string _expr_name = "vcase expr";
+};
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vcast_expr.cpp b/be/src/vec/exprs/vcast_expr.cpp
new file mode 100644
index 0000000000..a1a441b7b8
--- /dev/null
+++ b/be/src/vec/exprs/vcast_expr.cpp
@@ -0,0 +1,97 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vcast_expr.h"
+
+#include <string_view>
+
+#include "vec/core/field.h"
+#include "vec/data_types/data_type_factory.hpp"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+doris::Status VCastExpr::prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc,
+                                 VExprContext* context) {
+    RETURN_IF_ERROR(VExpr::prepare(state, desc, context));
+
+    DCHECK_EQ(_children.size(), 1);
+    auto child = _children[0];
+    const auto& child_name = child->expr_name();
+    auto child_column = child->data_type()->create_column();
+
+    // create a const string column
+    _target_data_type = _data_type;
+    _target_data_type_name = DataTypeFactory::instance().get(_target_data_type);
+    _cast_param_data_type = std::make_shared<DataTypeString>();
+    _cast_param = _cast_param_data_type->create_column_const(1, _target_data_type_name);
+
+    ColumnsWithTypeAndName argument_template;
+    argument_template.reserve(2);
+    argument_template.emplace_back(std::move(child_column), child->data_type(), child_name);
+    argument_template.emplace_back(_cast_param, _cast_param_data_type, _target_data_type_name);
+
+    _function = SimpleFunctionFactory::instance().get_function(function_name, argument_template, _data_type);
+
+    if (_function == nullptr) {
+        return Status::NotSupported(
+                fmt::format("Function {} is not implemented", _fn.name.function_name));
+    }
+    VExpr::register_function_context(state, context);
+    _expr_name = fmt::format("(CAST {}, TO {})", child_name, _target_data_type_name);
+    return Status::OK();
+}
+
+doris::Status VCastExpr::open(doris::RuntimeState* state, VExprContext* context,
+                              FunctionContext::FunctionStateScope scope) {
+    RETURN_IF_ERROR(VExpr::open(state, context, scope));
+    RETURN_IF_ERROR(VExpr::init_function_context(context, scope, _function));
+    return Status::OK();
+}
+
+void VCastExpr::close(doris::RuntimeState* state, VExprContext* context,
+                      FunctionContext::FunctionStateScope scope) {
+    VExpr::close_function_context(context, scope, _function);
+    VExpr::close(state, context, scope);
+}
+
+doris::Status VCastExpr::execute(VExprContext* context, doris::vectorized::Block* block,
+                                 int* result_column_id) {
+    // for each child call execute
+    doris::vectorized::ColumnNumbers arguments(2);
+    int column_id = -1;
+    _children[0]->execute(context, block, &column_id);
+    arguments[0] = column_id;
+
+    size_t const_param_id = block->columns();
+    block->insert({_cast_param, _cast_param_data_type, _target_data_type_name});
+    arguments[1] = const_param_id;
+
+    // call function
+    size_t num_columns_without_result = block->columns();
+    // prepare a column to save result
+    block->insert({nullptr, _data_type, _expr_name});
+    _function->execute(context->fn_context(_fn_context_index), *block, arguments,
+                       num_columns_without_result, block->rows(), false);
+    *result_column_id = num_columns_without_result;
+    return Status::OK();
+}
+
+const std::string& VCastExpr::expr_name() const {
+    return _expr_name;
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/exprs/vcast_expr.h b/be/src/vec/exprs/vcast_expr.h
new file mode 100644
index 0000000000..b33828c60c
--- /dev/null
+++ b/be/src/vec/exprs/vcast_expr.h
@@ -0,0 +1,53 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "vec/exprs/vexpr.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+class VCastExpr final: public VExpr {
+public:
+    VCastExpr(const TExprNode& node) : VExpr(node) {}
+    ~VCastExpr() = default;
+    virtual doris::Status execute(VExprContext* context, doris::vectorized::Block* block,
+                                  int* result_column_id);
+    virtual doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc,
+                                  VExprContext* context);
+    virtual doris::Status open(doris::RuntimeState* state, VExprContext* context,
+                               FunctionContext::FunctionStateScope scope);
+    virtual void close(doris::RuntimeState* state, VExprContext* context,
+                       FunctionContext::FunctionStateScope scope);
+    virtual VExpr* clone(doris::ObjectPool* pool) const override {
+        return pool->add(new VCastExpr(*this));
+    }
+    virtual const std::string& expr_name() const override;
+
+private:
+    FunctionBasePtr _function;
+    std::string _expr_name;
+
+    DataTypePtr _target_data_type;
+    std::string _target_data_type_name;
+
+    DataTypePtr _cast_param_data_type;
+    ColumnPtr _cast_param;
+
+private:
+    static const constexpr char* function_name = "CAST";
+};
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h
new file mode 100644
index 0000000000..723e6013e0
--- /dev/null
+++ b/be/src/vec/exprs/vcompound_pred.h
@@ -0,0 +1,42 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "runtime/runtime_state.h"
+#include "vec/exprs/vectorized_fn_call.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+class VcompoundPred final : public VectorizedFnCall {
+public:
+    VcompoundPred(const TExprNode& node) : VectorizedFnCall(node) {
+        switch (node.opcode) {
+        case TExprOpcode::COMPOUND_AND:
+            _fn.name.function_name = "and";
+            break;
+        case TExprOpcode::COMPOUND_OR:
+            _fn.name.function_name = "or";
+            break;
+        default:
+            _fn.name.function_name = "not";
+            break;
+        }
+    }
+};
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vectorized_agg_fn.cpp b/be/src/vec/exprs/vectorized_agg_fn.cpp
new file mode 100644
index 0000000000..0bf87254d5
--- /dev/null
+++ b/be/src/vec/exprs/vectorized_agg_fn.cpp
@@ -0,0 +1,175 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vectorized_agg_fn.h"
+
+#include "fmt/format.h"
+#include "fmt/ranges.h"
+#include "runtime/descriptors.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/core/materialize_block.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/exprs/vexpr.h"
+
+namespace doris::vectorized {
+
+AggFnEvaluator::AggFnEvaluator(const TExprNode& desc)
+        : _fn(desc.fn),
+          _is_merge(desc.agg_expr.is_merge_agg),
+          _return_type(TypeDescriptor::from_thrift(desc.fn.ret_type)),
+          _intermediate_type(TypeDescriptor::from_thrift(desc.fn.aggregate_fn.intermediate_type)),
+          _intermediate_slot_desc(nullptr),
+          _output_slot_desc(nullptr),
+          _exec_timer(nullptr),
+          _merge_timer(nullptr),
+          _expr_timer(nullptr) {
+        if (desc.__isset.is_nullable) {
+          _data_type = IDataType::from_thrift(_return_type.type, desc.is_nullable);
+        } else {
+          _data_type = IDataType::from_thrift(_return_type.type);
+        }
+    }
+
+Status AggFnEvaluator::create(ObjectPool* pool, const TExpr& desc, AggFnEvaluator** result) {
+    *result = pool->add(new AggFnEvaluator(desc.nodes[0]));
+    auto& agg_fn_evaluator = *result;
+    int node_idx = 0;
+    for (int i = 0; i < desc.nodes[0].num_children; ++i) {
+        ++node_idx;
+        VExpr* expr = nullptr;
+        VExprContext* ctx = nullptr;
+        RETURN_IF_ERROR(
+                VExpr::create_tree_from_thrift(pool, desc.nodes, NULL, &node_idx, &expr, &ctx));
+        agg_fn_evaluator->_input_exprs_ctxs.push_back(ctx);
+    }
+    return Status::OK();
+}
+
+Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc, MemPool* pool,
+                               const SlotDescriptor* intermediate_slot_desc,
+                               const SlotDescriptor* output_slot_desc,
+                               const std::shared_ptr<MemTracker>& mem_tracker) {
+    DCHECK(pool != NULL);
+    DCHECK(intermediate_slot_desc != NULL);
+    DCHECK(_intermediate_slot_desc == NULL);
+    _output_slot_desc = output_slot_desc;
+    _intermediate_slot_desc = intermediate_slot_desc;
+
+    Status status = VExpr::prepare(_input_exprs_ctxs, state, desc, mem_tracker);
+    RETURN_IF_ERROR(status);
+
+    DataTypes argument_types;
+    argument_types.reserve(_input_exprs_ctxs.size());
+
+    std::vector<std::string_view> child_expr_name;
+
+    doris::vectorized::Array params;
+    // prepare for argument
+    for (int i = 0; i < _input_exprs_ctxs.size(); ++i) {
+        auto data_type = _input_exprs_ctxs[i]->root()->data_type();
+        argument_types.emplace_back(data_type);
+        child_expr_name.emplace_back(_input_exprs_ctxs[i]->root()->expr_name());
+    }
+
+    _function = AggregateFunctionSimpleFactory::instance().get(_fn.name.function_name, argument_types,
+                                                               params, _data_type->is_nullable());
+    if (_function == nullptr) {
+        return Status::InternalError(
+                fmt::format("Agg Function {} is not implemented", _fn.name.function_name));
+    }
+
+    _expr_name = fmt::format("{}({})", _fn.name.function_name, child_expr_name);
+    return Status::OK();
+}
+
+Status AggFnEvaluator::open(RuntimeState* state) {
+    return VExpr::open(_input_exprs_ctxs, state);
+}
+
+void AggFnEvaluator::close(RuntimeState* state) {
+    VExpr::close(_input_exprs_ctxs, state);
+}
+void AggFnEvaluator::create(AggregateDataPtr place) {
+    _function->create(place);
+}
+void AggFnEvaluator::destroy(AggregateDataPtr place) {
+    _function->destroy(place);
+}
+
+void AggFnEvaluator::execute_single_add(Block* block, AggregateDataPtr place, Arena* arena) {
+    _calc_argment_columns(block);
+    SCOPED_TIMER(_exec_timer);
+    _function->add_batch_single_place(block->rows(), place, _agg_columns.data(), nullptr);
+}
+
+void AggFnEvaluator::execute_batch_add(Block* block, size_t offset, AggregateDataPtr* places,
+                                       Arena* arena) {
+    _calc_argment_columns(block);
+    SCOPED_TIMER(_exec_timer);
+    _function->add_batch(block->rows(), places, offset, _agg_columns.data(), arena);
+}
+
+void AggFnEvaluator::execute_single_merge(AggregateDataPtr place, ConstAggregateDataPtr rhs,
+                                          Arena* arena) {
+    SCOPED_TIMER(_merge_timer);
+    _function->merge(place, rhs, arena);
+}
+
+void AggFnEvaluator::insert_result_info(AggregateDataPtr place, IColumn* column) {
+    _function->insert_result_into(place, *column);
+}
+
+void AggFnEvaluator::reset(AggregateDataPtr place) {
+    _function->reset(place);
+}
+
+std::string AggFnEvaluator::debug_string(const std::vector<AggFnEvaluator*>& exprs) {
+    std::stringstream out;
+    out << "[";
+
+    for (int i = 0; i < exprs.size(); ++i) {
+        out << (i == 0 ? "" : " ") << exprs[i]->debug_string();
+    }
+
+    out << "]";
+    return out.str();
+}
+
+std::string AggFnEvaluator::debug_string() const {
+    std::stringstream out;
+    out << "AggFnEvaluator(";
+    out << ")";
+    return out.str();
+}
+
+void AggFnEvaluator::_calc_argment_columns(Block* block) {
+    SCOPED_TIMER(_expr_timer);
+    _agg_columns.resize(_input_exprs_ctxs.size());
+    int column_ids[_input_exprs_ctxs.size()];
+    for (int i = 0; i < _input_exprs_ctxs.size(); ++i) {
+        int column_id = -1;
+        _input_exprs_ctxs[i]->execute(block, &column_id);
+        column_ids[i] = column_id;
+    }
+    materialize_block_inplace(*block, column_ids, column_ids + _input_exprs_ctxs.size());
+    for (int i = 0; i < _input_exprs_ctxs.size(); ++i) {
+        _agg_columns[i] = block->get_by_position(column_ids[i]).column.get();
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vectorized_agg_fn.h b/be/src/vec/exprs/vectorized_agg_fn.h
new file mode 100644
index 0000000000..f451c298f7
--- /dev/null
+++ b/be/src/vec/exprs/vectorized_agg_fn.h
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "runtime/types.h"
+#include "util/runtime_profile.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/core/block.h"
+#include "vec/data_types/data_type.h"
+#include "vec/exprs/vexpr_context.h"
+
+namespace doris {
+class RuntimeState;
+class SlotDescriptor;
+namespace vectorized {
+class AggFnEvaluator {
+public:
+    static Status create(ObjectPool* pool, const TExpr& desc, AggFnEvaluator** result);
+
+    Status prepare(RuntimeState* state, const RowDescriptor& desc, MemPool* pool,
+                   const SlotDescriptor* intermediate_slot_desc,
+                   const SlotDescriptor* output_slot_desc,
+                   const std::shared_ptr<MemTracker>& mem_tracker);
+
+    void set_timer(RuntimeProfile::Counter* exec_timer, RuntimeProfile::Counter* merge_timer,
+                   RuntimeProfile::Counter* expr_timer) {
+        _exec_timer = exec_timer;
+        _merge_timer = merge_timer;
+        _expr_timer = expr_timer;
+    }
+
+    Status open(RuntimeState* state);
+
+    void close(RuntimeState* state);
+
+    // create/destroy AGG Data
+    void create(AggregateDataPtr place);
+    void destroy(AggregateDataPtr place);
+
+    // agg_function
+    void execute_single_add(Block* block, AggregateDataPtr place, Arena* arena = nullptr);
+
+    void execute_batch_add(Block* block, size_t offset, AggregateDataPtr* places,
+                           Arena* arena = nullptr);
+
+    void execute_single_merge(AggregateDataPtr place, ConstAggregateDataPtr rhs,
+                              Arena* arena = nullptr);
+
+    void insert_result_info(AggregateDataPtr place, IColumn* column);
+
+    void reset(AggregateDataPtr place);
+
+    DataTypePtr& data_type() { return _data_type; }
+
+    const AggregateFunctionPtr& function() { return _function; }
+    static std::string debug_string(const std::vector<AggFnEvaluator*>& exprs);
+    std::string debug_string() const;
+    bool is_merge() const { return _is_merge; }
+
+private:
+    const TFunction _fn;
+
+    const bool _is_merge;
+
+    AggFnEvaluator(const TExprNode& desc);
+
+    void _calc_argment_columns(Block* block);
+
+    const TypeDescriptor _return_type;
+    const TypeDescriptor _intermediate_type;
+
+    const SlotDescriptor* _intermediate_slot_desc;
+    const SlotDescriptor* _output_slot_desc;
+
+    RuntimeProfile::Counter* _exec_timer;
+    RuntimeProfile::Counter* _merge_timer;
+    RuntimeProfile::Counter* _expr_timer;
+
+    // input context
+    std::vector<VExprContext*> _input_exprs_ctxs;
+
+    DataTypePtr _data_type;
+
+    AggregateFunctionPtr _function;
+
+    std::string _expr_name;
+
+    std::vector<const IColumn*> _agg_columns;
+};
+} // namespace vectorized
+
+} // namespace doris
diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp
new file mode 100644
index 0000000000..deecc16103
--- /dev/null
+++ b/be/src/vec/exprs/vectorized_fn_call.cpp
@@ -0,0 +1,120 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vectorized_fn_call.h"
+
+#include <string_view>
+
+#include "exprs/anyval_util.h"
+#include "fmt/format.h"
+#include "fmt/ranges.h"
+#include "udf/udf_internal.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+VectorizedFnCall::VectorizedFnCall(const doris::TExprNode& node) : VExpr(node) {}
+
+doris::Status VectorizedFnCall::prepare(doris::RuntimeState* state,
+                                        const doris::RowDescriptor& desc, VExprContext* context) {
+    RETURN_IF_ERROR(VExpr::prepare(state, desc, context));
+    ColumnsWithTypeAndName argument_template;
+    argument_template.reserve(_children.size());
+    std::vector<std::string_view> child_expr_name;
+    for (auto child : _children) {
+        auto column = child->data_type()->create_column();
+        argument_template.emplace_back(std::move(column), child->data_type(), child->expr_name());
+        child_expr_name.emplace_back(child->expr_name());
+    }
+    _function = SimpleFunctionFactory::instance().get_function(_fn.name.function_name,
+                                                               argument_template, _data_type);
+    if (_function == nullptr) {
+        return Status::InternalError(
+                fmt::format("Function {} is not implemented", _fn.name.function_name));
+    }
+    VExpr::register_function_context(state, context);
+    _expr_name = fmt::format("{}({})", _fn.name.function_name, child_expr_name);
+
+    return Status::OK();
+}
+
+doris::Status VectorizedFnCall::open(doris::RuntimeState* state, VExprContext* context,
+                                     FunctionContext::FunctionStateScope scope) {
+    RETURN_IF_ERROR(VExpr::open(state, context, scope));
+    RETURN_IF_ERROR(VExpr::init_function_context(context, scope, _function));
+    return Status::OK();
+}
+
+void VectorizedFnCall::close(doris::RuntimeState* state, VExprContext* context,
+                             FunctionContext::FunctionStateScope scope) {
+    VExpr::close_function_context(context, scope, _function);
+    VExpr::close(state, context, scope);
+}
+
+doris::Status VectorizedFnCall::execute(VExprContext* context, doris::vectorized::Block* block,
+                                        int* result_column_id) {
+    // TODO: not execute const expr again, but use the const column in function context
+    doris::vectorized::ColumnNumbers arguments(_children.size());
+    for (int i = 0; i < _children.size(); ++i) {
+        int column_id = -1;
+        _children[i]->execute(context, block, &column_id);
+        arguments[i] = column_id;
+    }
+    // call function
+    size_t num_columns_without_result = block->columns();
+    // prepare a column to save result
+    block->insert({nullptr, _data_type, _expr_name});
+    RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, arguments,
+                                       num_columns_without_result, block->rows(), false));
+    *result_column_id = num_columns_without_result;
+    return Status::OK();
+}
+
+const std::string& VectorizedFnCall::expr_name() const {
+    return _expr_name;
+}
+
+std::string VectorizedFnCall::debug_string() const {
+    std::stringstream out;
+    out << "VectorizedFn[";
+    out << _expr_name;
+    out << "]{";
+    bool first = true;
+    for (VExpr* input_expr : children()) {
+        if (first) {
+            first = false;
+        } else {
+            out << ",";
+        }
+        out << input_expr->debug_string();
+    }
+    out << "}";
+    return out.str();
+}
+
+std::string VectorizedFnCall::debug_string(const std::vector<VectorizedFnCall*>& agg_fns) {
+    std::stringstream out;
+    out << "[";
+    for (int i = 0; i < agg_fns.size(); ++i) {
+        out << (i == 0 ? "" : " ") << agg_fns[i]->debug_string();
+    }
+    out << "]";
+    return out.str();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vectorized_fn_call.h b/be/src/vec/exprs/vectorized_fn_call.h
new file mode 100644
index 0000000000..9776917ba7
--- /dev/null
+++ b/be/src/vec/exprs/vectorized_fn_call.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "runtime/runtime_state.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+class VectorizedFnCall : public VExpr {
+public:
+    VectorizedFnCall(const doris::TExprNode& node);
+    virtual doris::Status execute(VExprContext* context, doris::vectorized::Block* block,
+                                  int* result_column_id);
+    virtual doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc,
+                                  VExprContext* context);
+    virtual doris::Status open(doris::RuntimeState* state, VExprContext* context,
+                               FunctionContext::FunctionStateScope scope);
+    virtual void close(doris::RuntimeState* state, VExprContext* context,
+                       FunctionContext::FunctionStateScope scope);
+    virtual VExpr* clone(doris::ObjectPool* pool) const override {
+        return pool->add(new VectorizedFnCall(*this));
+    }
+    virtual const std::string& expr_name() const override;
+    virtual std::string debug_string() const;
+    static std::string debug_string(const std::vector<VectorizedFnCall*>& exprs);
+
+private:
+    FunctionBasePtr _function;
+    std::string _expr_name;
+};
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp
new file mode 100644
index 0000000000..ff072d00f0
--- /dev/null
+++ b/be/src/vec/exprs/vexpr.cpp
@@ -0,0 +1,343 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vexpr.h"
+
+#include <fmt/format.h>
+
+#include <memory>
+
+#include "exprs/anyval_util.h"
+#include "gen_cpp/Exprs_types.h"
+#include "vec/exprs/vcase_expr.h"
+#include "vec/exprs/vcast_expr.h"
+#include "vec/exprs/vcompound_pred.h"
+#include "vec/exprs/vectorized_fn_call.h"
+#include "vec/exprs/vin_predicate.h"
+#include "vec/exprs/vliteral.h"
+#include "vec/exprs/vslot_ref.h"
+#include "vec/exprs/vinfo_func.h"
+
+namespace doris::vectorized {
+using doris::Status;
+using doris::RuntimeState;
+using doris::RowDescriptor;
+using doris::TypeDescriptor;
+
+VExpr::VExpr(const doris::TExprNode& node)
+        : _node_type(node.node_type),
+          _type(TypeDescriptor::from_thrift(node.type)),
+          _fn_context_index(-1) {
+    if (node.__isset.fn) {
+        _fn = node.fn;
+    }
+    if (node.__isset.is_nullable) {
+        _data_type = IDataType::from_thrift(_type.type, node.is_nullable);
+    } else {
+        _data_type = IDataType::from_thrift(_type.type);
+    }
+}
+
+VExpr::VExpr(const TypeDescriptor& type, bool is_slotref, bool is_nullable)
+        : _type(type), _fn_context_index(-1) {
+    if (is_slotref) {
+        _node_type = TExprNodeType::SLOT_REF;
+    }
+    _data_type = IDataType::from_thrift(_type.type, is_nullable);
+}
+
+Status VExpr::prepare(RuntimeState* state, const RowDescriptor& row_desc, VExprContext* context) {
+    for (int i = 0; i < _children.size(); ++i) {
+        RETURN_IF_ERROR(_children[i]->prepare(state, row_desc, context));
+    }
+    return Status::OK();
+}
+
+Status VExpr::open(RuntimeState* state, VExprContext* context,
+                   FunctionContext::FunctionStateScope scope) {
+    for (int i = 0; i < _children.size(); ++i) {
+        RETURN_IF_ERROR(_children[i]->open(state, context, scope));
+    }
+    return Status::OK();
+}
+
+void VExpr::close(doris::RuntimeState* state, VExprContext* context,
+                  FunctionContext::FunctionStateScope scope) {
+    for (int i = 0; i < _children.size(); ++i) {
+        _children[i]->close(state, context, scope);
+    }
+}
+
+Status VExpr::create_expr(doris::ObjectPool* pool, const doris::TExprNode& texpr_node,
+                          VExpr** expr) {
+    switch (texpr_node.node_type) {
+    case TExprNodeType::BOOL_LITERAL:
+    case TExprNodeType::INT_LITERAL:
+    case TExprNodeType::LARGE_INT_LITERAL:
+    case TExprNodeType::FLOAT_LITERAL:
+    case TExprNodeType::DECIMAL_LITERAL:
+    case TExprNodeType::DATE_LITERAL:
+    case TExprNodeType::STRING_LITERAL:
+    case TExprNodeType::NULL_LITERAL: {
+        *expr = pool->add(new VLiteral(texpr_node));
+        return Status::OK();
+    }
+    case doris::TExprNodeType::SLOT_REF: {
+        *expr = pool->add(new VSlotRef(texpr_node));
+        break;
+    }
+    case doris::TExprNodeType::COMPOUND_PRED: {
+        *expr = pool->add(new VcompoundPred(texpr_node));
+        break;
+    }
+    case doris::TExprNodeType::ARITHMETIC_EXPR:
+    case doris::TExprNodeType::BINARY_PRED:
+    case doris::TExprNodeType::FUNCTION_CALL:
+    case doris::TExprNodeType::COMPUTE_FUNCTION_CALL: {
+        *expr = pool->add(new VectorizedFnCall(texpr_node));
+        break;
+    }
+    case doris::TExprNodeType::CAST_EXPR: {
+        *expr = pool->add(new VCastExpr(texpr_node));
+        break;
+    }
+    case doris::TExprNodeType::IN_PRED: {
+        *expr = pool->add(new VInPredicate(texpr_node));
+        break;
+    }
+    case doris::TExprNodeType::CASE_EXPR: {
+        if (!texpr_node.__isset.case_expr) {
+            return Status::InternalError("Case expression not set in thrift node");
+        }
+        *expr = pool->add(new VCaseExpr(texpr_node));
+        break;
+    }
+    case TExprNodeType::INFO_FUNC: {
+        *expr = pool->add(new VInfoFunc(texpr_node));
+        break;
+    }
+    default:
+        return Status::InternalError(
+                fmt::format("Unknown expr node type: {}", texpr_node.node_type));
+    }
+    return Status::OK();
+}
+
+Status VExpr::create_tree_from_thrift(doris::ObjectPool* pool,
+                                      const std::vector<doris::TExprNode>& nodes, VExpr* parent,
+                                      int* node_idx, VExpr** root_expr, VExprContext** ctx) {
+    // propagate error case
+    if (*node_idx >= nodes.size()) {
+        return Status::InternalError("Failed to reconstruct expression tree from thrift.");
+    }
+    int num_children = nodes[*node_idx].num_children;
+    VExpr* expr = nullptr;
+    RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr));
+    DCHECK(expr != nullptr);
+    if (parent != nullptr) {
+        parent->add_child(expr);
+    } else {
+        DCHECK(root_expr != nullptr);
+        DCHECK(ctx != nullptr);
+        *root_expr = expr;
+        *ctx = pool->add(new VExprContext(expr));
+    }
+    for (int i = 0; i < num_children; i++) {
+        *node_idx += 1;
+        RETURN_IF_ERROR(create_tree_from_thrift(pool, nodes, expr, node_idx, nullptr, nullptr));
+        // we are expecting a child, but have used all nodes
+        // this means we have been given a bad tree and must fail
+        if (*node_idx >= nodes.size()) {
+            return Status::InternalError("Failed to reconstruct expression tree from thrift.");
+        }
+    }
+    return Status::OK();
+}
+
+Status VExpr::create_expr_tree(doris::ObjectPool* pool, const doris::TExpr& texpr,
+                               VExprContext** ctx) {
+    if (texpr.nodes.size() == 0) {
+        *ctx = nullptr;
+        return Status::OK();
+    }
+    int node_idx = 0;
+    VExpr* e = nullptr;
+    Status status = create_tree_from_thrift(pool, texpr.nodes, NULL, &node_idx, &e, ctx);
+    if (status.ok() && node_idx + 1 != texpr.nodes.size()) {
+        status = Status::InternalError(
+                "Expression tree only partially reconstructed. Not all thrift nodes were used.");
+    }
+    if (!status.ok()) {
+        LOG(ERROR) << "Could not construct expr tree.\n"
+                   << status.get_error_msg() << "\n"
+                   << apache::thrift::ThriftDebugString(texpr);
+    }
+    return status;
+}
+
+Status VExpr::create_expr_trees(ObjectPool* pool, const std::vector<doris::TExpr>& texprs,
+                                std::vector<VExprContext*>* ctxs) {
+    ctxs->clear();
+    for (int i = 0; i < texprs.size(); ++i) {
+        VExprContext* ctx = nullptr;
+        RETURN_IF_ERROR(create_expr_tree(pool, texprs[i], &ctx));
+        ctxs->push_back(ctx);
+    }
+    return Status::OK();
+}
+
+Status VExpr::prepare(const std::vector<VExprContext*>& ctxs, RuntimeState* state,
+                      const RowDescriptor& row_desc, const std::shared_ptr<MemTracker>& tracker) {
+    for (int i = 0; i < ctxs.size(); ++i) {
+        RETURN_IF_ERROR(ctxs[i]->prepare(state, row_desc, tracker));
+    }
+    return Status::OK();
+}
+
+void VExpr::close(const std::vector<VExprContext*>& ctxs, RuntimeState* state) {
+    for (int i = 0; i < ctxs.size(); ++i) {
+        ctxs[i]->close(state);
+    }
+}
+
+Status VExpr::open(const std::vector<VExprContext*>& ctxs, RuntimeState* state) {
+    for (int i = 0; i < ctxs.size(); ++i) {
+        RETURN_IF_ERROR(ctxs[i]->open(state));
+    }
+    return Status::OK();
+}
+
+Status VExpr::clone_if_not_exists(const std::vector<VExprContext*>& ctxs, RuntimeState* state,
+                                  std::vector<VExprContext*>* new_ctxs) {
+    DCHECK(new_ctxs != NULL);
+    if (!new_ctxs->empty()) {
+        // 'ctxs' was already cloned into '*new_ctxs', nothing to do.
+        DCHECK_EQ(new_ctxs->size(), ctxs.size());
+        for (int i = 0; i < new_ctxs->size(); ++i) {
+            DCHECK((*new_ctxs)[i]->_is_clone);
+        }
+        return Status::OK();
+    }
+    new_ctxs->resize(ctxs.size());
+    for (int i = 0; i < ctxs.size(); ++i) {
+        RETURN_IF_ERROR(ctxs[i]->clone(state, &(*new_ctxs)[i]));
+    }
+    return Status::OK();
+}
+std::string VExpr::debug_string() const {
+    // TODO: implement partial debug string for member vars
+    std::stringstream out;
+    out << " type=" << _type.debug_string();
+    out << " codegen="
+        << "false";
+
+    if (!_children.empty()) {
+        out << " children=" << debug_string(_children);
+    }
+
+    return out.str();
+}
+
+std::string VExpr::debug_string(const std::vector<VExpr*>& exprs) {
+    std::stringstream out;
+    out << "[";
+
+    for (int i = 0; i < exprs.size(); ++i) {
+        out << (i == 0 ? "" : " ") << exprs[i]->debug_string();
+    }
+
+    out << "]";
+    return out.str();
+}
+
+std::string VExpr::debug_string(const std::vector<VExprContext*>& ctxs) {
+    std::vector<VExpr*> exprs;
+    for (int i = 0; i < ctxs.size(); ++i) {
+        exprs.push_back(ctxs[i]->root());
+    }
+    return debug_string(exprs);
+}
+
+bool VExpr::is_constant() const {
+    for (int i = 0; i < _children.size(); ++i) {
+        if (!_children[i]->is_constant()) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+ColumnPtrWrapper* VExpr::get_const_col(VExprContext* context) {
+    if (!is_constant()) {
+        return nullptr;
+    }
+
+    if (_constant_col != nullptr) {
+        return _constant_col.get();
+    }
+
+    int result = -1;
+    Block block;
+    execute(context, &block, &result);
+    DCHECK(result != -1);
+    const auto& column = block.get_by_position(result).column;
+    _constant_col = std::make_shared<ColumnPtrWrapper>(column);
+    return _constant_col.get();
+}
+
+void VExpr::register_function_context(doris::RuntimeState* state, VExprContext* context) {
+    FunctionContext::TypeDesc return_type = AnyValUtil::column_type_to_type_desc(_type);
+    std::vector<FunctionContext::TypeDesc> arg_types;
+    for (int i = 0; i < _children.size(); ++i) {
+        arg_types.push_back(AnyValUtil::column_type_to_type_desc(_children[i]->type()));
+    }
+
+    _fn_context_index = context->register_func(state, return_type, arg_types, 0);
+}
+
+Status VExpr::init_function_context(VExprContext* context,
+                                    FunctionContext::FunctionStateScope scope,
+                                    const FunctionBasePtr& function) {
+    FunctionContext* fn_ctx = context->fn_context(_fn_context_index);
+    if (scope == FunctionContext::FRAGMENT_LOCAL) {
+        std::vector<ColumnPtrWrapper*> constant_cols;
+        for (auto c : _children) {
+            constant_cols.push_back(c->get_const_col(context));
+        }
+        fn_ctx->impl()->set_constant_cols(constant_cols);
+    }
+
+    if (scope == FunctionContext::FRAGMENT_LOCAL) {
+        RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::FRAGMENT_LOCAL));
+    }
+    RETURN_IF_ERROR(function->prepare(fn_ctx, FunctionContext::THREAD_LOCAL));
+    return Status::OK();
+}
+
+void VExpr::close_function_context(VExprContext* context, FunctionContext::FunctionStateScope scope,
+                                   const FunctionBasePtr& function) {
+    if (_fn_context_index != -1) {
+        FunctionContext* fn_ctx = context->fn_context(_fn_context_index);
+        function->close(fn_ctx, FunctionContext::THREAD_LOCAL);
+        if (scope == FunctionContext::FRAGMENT_LOCAL) {
+            function->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL);
+        }
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h
new file mode 100644
index 0000000000..6a9524491a
--- /dev/null
+++ b/be/src/vec/exprs/vexpr.h
@@ -0,0 +1,168 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <memory>
+#include <vector>
+
+#include "common/status.h"
+#include "gen_cpp/Exprs_types.h"
+#include "runtime/types.h"
+#include "udf/udf_internal.h"
+#include "vec/data_types/data_type.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/functions/function.h"
+
+namespace doris {
+namespace vectorized {
+
+class VExpr {
+public:
+    VExpr(const TExprNode& node);
+    VExpr(const TypeDescriptor& type, bool is_slotref, bool is_nullable);
+    virtual ~VExpr() = default;
+
+    virtual VExpr* clone(ObjectPool* pool) const = 0;
+
+    virtual const std::string& expr_name() const = 0;
+
+    /// Initializes this expr instance for execution. This does not include initializing
+    /// state in the VExprContext; 'context' should only be used to register a
+    /// FunctionContext via RegisterFunctionContext().
+    ///
+    /// Subclasses overriding this function should call VExpr::Prepare() to recursively call
+    /// Prepare() on the expr tree
+    virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc,
+                           VExprContext* context);
+
+    /// Initializes 'context' for execution. If scope if FRAGMENT_LOCAL, both fragment- and
+    /// thread-local state should be initialized. Otherwise, if scope is THREAD_LOCAL, only
+    /// thread-local state should be initialized.
+    //
+    /// Subclasses overriding this function should call Expr::Open() to recursively call
+    /// Open() on the expr tree
+    virtual Status open(RuntimeState* state, VExprContext* context,
+                        FunctionContext::FunctionStateScope scope);
+
+    virtual Status execute(VExprContext* context, vectorized::Block* block,
+                           int* result_column_id) = 0;
+
+    /// Subclasses overriding this function should call VExpr::Close().
+    //
+    /// If scope if FRAGMENT_LOCAL, both fragment- and thread-local state should be torn
+    /// down. Otherwise, if scope is THREAD_LOCAL, only thread-local state should be torn
+    /// down.
+    virtual void close(RuntimeState* state, VExprContext* context,
+                       FunctionContext::FunctionStateScope scope);
+
+    DataTypePtr& data_type() { return _data_type; }
+
+    TypeDescriptor type() { return _type; }
+
+    bool is_slot_ref() const { return _node_type == TExprNodeType::SLOT_REF; }
+
+    TExprNodeType::type node_type() const { return _node_type; }
+
+    void add_child(VExpr* expr) { _children.push_back(expr); }
+
+    static Status create_expr_tree(ObjectPool* pool, const TExpr& texpr, VExprContext** ctx);
+
+    static Status create_expr_trees(ObjectPool* pool, const std::vector<TExpr>& texprs,
+                                    std::vector<VExprContext*>* ctxs);
+
+    static Status prepare(const std::vector<VExprContext*>& ctxs, RuntimeState* state,
+                          const RowDescriptor& row_desc,
+                          const std::shared_ptr<MemTracker>& tracker);
+
+    static Status open(const std::vector<VExprContext*>& ctxs, RuntimeState* state);
+
+    static Status clone_if_not_exists(const std::vector<VExprContext*>& ctxs, RuntimeState* state,
+                                      std::vector<VExprContext*>* new_ctxs);
+
+    static void close(const std::vector<VExprContext*>& ctxs, RuntimeState* state);
+
+    bool is_nullable() const { return _data_type->is_nullable(); }
+
+    PrimitiveType result_type() const { return _type.type; }
+
+    static Status create_expr(ObjectPool* pool, const TExprNode& texpr_node, VExpr** expr);
+
+    static Status create_tree_from_thrift(ObjectPool* pool, const std::vector<TExprNode>& nodes,
+                                          VExpr* parent, int* node_idx, VExpr** root_expr,
+                                          VExprContext** ctx);
+    const std::vector<VExpr*>& children() const { return _children; }
+    void set_children(std::vector<VExpr*> children) { _children = children; }
+    virtual std::string debug_string() const;
+    static std::string debug_string(const std::vector<VExpr*>& exprs);
+    static std::string debug_string(const std::vector<VExprContext*>& ctxs);
+
+    bool is_and_expr() { return _fn.name.function_name == "and"; }
+
+    /// Returns true if expr doesn't contain slotrefs, i.e., can be evaluated
+    /// with get_value(NULL). The default implementation returns true if all of
+    /// the children are constant.
+    virtual bool is_constant() const;
+
+    /// If this expr is constant, evaluates the expr with no input row argument and returns
+    /// the output. Returns nullptr if the argument is not constant. The returned ColumnPtr is
+    /// owned by this expr. This should only be called after Open() has been called on this
+    /// expr.
+    virtual ColumnPtrWrapper* get_const_col(VExprContext* context);
+
+protected:
+    /// Simple debug string that provides no expr subclass-specific information
+    std::string debug_string(const std::string& expr_name) const {
+        std::stringstream out;
+        out << expr_name << "(" << VExpr::debug_string() << ")";
+        return out.str();
+    }
+
+    /// Helper function that calls ctx->register(), sets fn_context_index_, and returns the
+    /// registered FunctionContext
+    void register_function_context(doris::RuntimeState* state, VExprContext* context);
+
+    /// Helper function to initialize function context, called in `open` phase of VExpr:
+    /// 1. Set constant columns result of function arguments.
+    /// 2. Call function's prepare() to initialize function state, fragment-local or
+    /// thread-local according the input `FunctionStateScope` argument.
+    Status init_function_context(VExprContext* context, FunctionContext::FunctionStateScope scope,
+                                 const FunctionBasePtr& function);
+
+    /// Helper function to close function context, fragment-local or thread-local according
+    /// the input `FunctionStateScope` argument. Called in `close` phase of VExpr.
+    void close_function_context(VExprContext* context, FunctionContext::FunctionStateScope scope,
+                                const FunctionBasePtr& function);
+
+    TExprNodeType::type _node_type;
+    TypeDescriptor _type;
+    DataTypePtr _data_type;
+    std::vector<VExpr*> _children;
+    TFunction _fn;
+
+    /// Index to pass to ExprContext::fn_context() to retrieve this expr's FunctionContext.
+    /// Set in RegisterFunctionContext(). -1 if this expr does not need a FunctionContext and
+    /// doesn't call RegisterFunctionContext().
+    int _fn_context_index;
+
+    // If this expr is constant, this will store and cache the value generated by
+    // get_const_col()
+    std::shared_ptr<ColumnPtrWrapper> _constant_col;
+};
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp
new file mode 100644
index 0000000000..a8f1d5291f
--- /dev/null
+++ b/be/src/vec/exprs/vexpr_context.cpp
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vexpr_context.h"
+
+#include "udf/udf_internal.h"
+#include "vec/exprs/vexpr.h"
+
+namespace doris::vectorized {
+VExprContext::VExprContext(VExpr* expr)
+        : _root(expr),
+          _is_clone(false),
+          _prepared(false),
+          _opened(false),
+          _closed(false),
+          _last_result_column_id(-1) {}
+
+doris::Status VExprContext::execute(doris::vectorized::Block* block, int* result_column_id) {
+    Status st = _root->execute(this, block, result_column_id);
+    _last_result_column_id = *result_column_id;
+    return st;
+}
+
+doris::Status VExprContext::prepare(doris::RuntimeState* state,
+                                    const doris::RowDescriptor& row_desc,
+                                    const std::shared_ptr<doris::MemTracker>& tracker) {
+    _prepared = true;
+    _pool.reset(new MemPool(state->instance_mem_tracker().get()));
+    return _root->prepare(state, row_desc, this);
+}
+
+doris::Status VExprContext::open(doris::RuntimeState* state) {
+    DCHECK(_prepared);
+    if (_opened) {
+        return Status::OK();
+    }
+    _opened = true;
+    // Fragment-local state is only initialized for original contexts. Clones inherit the
+    // original's fragment state and only need to have thread-local state initialized.
+    FunctionContext::FunctionStateScope scope =
+            _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL;
+    return _root->open(state, this, scope);
+}
+
+void VExprContext::close(doris::RuntimeState* state) {
+    DCHECK(!_closed);
+    FunctionContext::FunctionStateScope scope =
+            _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL;
+    _root->close(state, this, scope);
+
+    for (int i = 0; i < _fn_contexts.size(); ++i) {
+        _fn_contexts[i]->impl()->close();
+    }
+    // _pool can be NULL if Prepare() was never called
+    if (_pool != NULL) {
+        _pool->free_all();
+    }
+    _closed = true;
+}
+
+doris::Status VExprContext::clone(RuntimeState* state, VExprContext** new_ctx) {
+    DCHECK(_prepared);
+    DCHECK(_opened);
+    DCHECK(*new_ctx == nullptr);
+
+    *new_ctx = state->obj_pool()->add(new VExprContext(_root));
+    (*new_ctx)->_pool.reset(new MemPool(_pool->mem_tracker()));
+    for (auto& _fn_context : _fn_contexts) {
+        (*new_ctx)->_fn_contexts.push_back(_fn_context->impl()->clone((*new_ctx)->_pool.get()));
+    }
+
+    (*new_ctx)->_is_clone = true;
+    (*new_ctx)->_prepared = true;
+    (*new_ctx)->_opened = true;
+
+    return _root->open(state, *new_ctx, FunctionContext::THREAD_LOCAL);
+}
+
+int VExprContext::register_func(RuntimeState* state, const FunctionContext::TypeDesc& return_type,
+                                const std::vector<FunctionContext::TypeDesc>& arg_types,
+                                int varargs_buffer_size) {
+    _fn_contexts.push_back(FunctionContextImpl::create_context(
+            state, _pool.get(), return_type, arg_types, varargs_buffer_size, false));
+    return _fn_contexts.size() - 1;
+}
+
+Status VExprContext::filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep) {
+    if (vexpr_ctx == nullptr || block->rows() == 0) {
+        return Status::OK();
+    }
+    int result_column_id = -1;
+    vexpr_ctx->execute(block, &result_column_id);
+    return Block::filter_block(block, result_column_id, column_to_keep);
+}
+
+Status VExprContext::filter_block(const std::unique_ptr<VExprContext*>& vexpr_ctx_ptr, Block* block,
+                                  int column_to_keep) {
+    if (vexpr_ctx_ptr == nullptr || block->rows() == 0) {
+        return Status::OK();
+    }
+    DCHECK((*vexpr_ctx_ptr) != nullptr);
+    int result_column_id = -1;
+    (*vexpr_ctx_ptr)->execute(block, &result_column_id);
+    return Block::filter_block(block, result_column_id, column_to_keep);
+}
+
+Block VExprContext::get_output_block_after_execute_exprs(
+        const std::vector<vectorized::VExprContext*>& output_vexpr_ctxs, const Block& input_block,
+        Status& status) {
+    vectorized::Block tmp_block(input_block.get_columns_with_type_and_name());
+    vectorized::ColumnsWithTypeAndName result_columns;
+    for (auto vexpr_ctx : output_vexpr_ctxs) {
+        int result_column_id = -1;
+        status = vexpr_ctx->execute(&tmp_block, &result_column_id);
+        if (UNLIKELY(!status.ok())) return {};
+        DCHECK(result_column_id != -1);
+        result_columns.emplace_back(tmp_block.get_by_position(result_column_id));
+    }
+
+    return {result_columns};
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h
new file mode 100644
index 0000000000..0021779b35
--- /dev/null
+++ b/be/src/vec/exprs/vexpr_context.h
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "common/status.h"
+#include "runtime/runtime_state.h"
+#include "vec/core/block.h"
+
+namespace doris::vectorized {
+class VExpr;
+
+class VExprContext {
+public:
+    VExprContext(VExpr* expr);
+    Status prepare(RuntimeState* state, const RowDescriptor& row_desc,
+                   const std::shared_ptr<MemTracker>& tracker);
+    Status open(RuntimeState* state);
+    void close(RuntimeState* state);
+    Status clone(RuntimeState* state, VExprContext** new_ctx);
+    Status execute(Block* block, int* result_column_id);
+
+    VExpr* root() { return _root; }
+    void set_root(VExpr* expr) { _root = expr; }
+
+    /// Creates a FunctionContext, and returns the index that's passed to fn_context() to
+    /// retrieve the created context. Exprs that need a FunctionContext should call this in
+    /// Prepare() and save the returned index. 'varargs_buffer_size', if specified, is the
+    /// size of the varargs buffer in the created FunctionContext (see udf-internal.h).
+    int register_func(RuntimeState* state, const FunctionContext::TypeDesc& return_type,
+                      const std::vector<FunctionContext::TypeDesc>& arg_types,
+                      int varargs_buffer_size);
+
+    /// Retrieves a registered FunctionContext. 'i' is the index returned by the call to
+    /// register_func(). This should only be called by VExprs.
+    FunctionContext* fn_context(int i) {
+        DCHECK_GE(i, 0);
+        DCHECK_LT(i, _fn_contexts.size());
+        return _fn_contexts[i];
+    }
+
+    static Status filter_block(VExprContext* vexpr_ctx, Block* block, int column_to_keep);
+    static Status filter_block(const std::unique_ptr<VExprContext*>& vexpr_ctx_ptr, Block* block,
+                               int column_to_keep);
+
+    static Block get_output_block_after_execute_exprs(const std::vector<vectorized::VExprContext*>&,
+                                                      const Block&, Status&);
+
+    int get_last_result_column_id() {
+        DCHECK(_last_result_column_id != -1);
+        return _last_result_column_id;
+    }
+
+private:
+    friend class VExpr;
+
+    /// The expr tree this context is for.
+    VExpr* _root;
+
+    /// True if this context came from a Clone() call. Used to manage FunctionStateScope.
+    bool _is_clone;
+
+    /// Variables keeping track of current state.
+    bool _prepared;
+    bool _opened;
+    bool _closed;
+
+    /// FunctionContexts for each registered expression. The FunctionContexts are created
+    /// and owned by this VExprContext.
+    std::vector<FunctionContext*> _fn_contexts;
+
+    /// Pool backing fn_contexts_. Counts against the runtime state's UDF mem tracker.
+    std::unique_ptr<MemPool> _pool;
+
+    int _last_result_column_id;
+};
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vin_predicate.cpp b/be/src/vec/exprs/vin_predicate.cpp
new file mode 100644
index 0000000000..f8c096d8ee
--- /dev/null
+++ b/be/src/vec/exprs/vin_predicate.cpp
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vin_predicate.h"
+
+#include <string_view>
+
+#include "exprs/create_predicate_function.h"
+
+#include "vec/columns/column_set.h"
+#include "vec/core/field.h"
+#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+VInPredicate::VInPredicate(const TExprNode& node)
+        : VExpr(node),
+          _is_not_in(node.in_predicate.is_not_in),
+          _is_prepare(false) {}
+
+Status VInPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,
+                             VExprContext* context) {
+    RETURN_IF_ERROR(VExpr::prepare(state, desc, context));
+
+    if (_is_prepare) {
+        return Status::OK();
+    }
+    if (_children.size() < 1) {
+        return Status::InternalError("no Function operator in.");
+    }
+
+    _expr_name =
+            fmt::format("({} {} set)", _children[0]->expr_name(), _is_not_in ? "not_in" : "in");
+    _is_prepare = true;
+
+    DCHECK(_children.size() > 1);
+    ColumnsWithTypeAndName argument_template;
+    argument_template.reserve(_children.size());
+    for (auto child : _children) {
+        auto column = child->data_type()->create_column();
+        argument_template.emplace_back(std::move(column), child->data_type(), child->expr_name());
+    }
+
+    // contruct the proper function_name
+    std::string head(_is_not_in ? "not_" : "");
+    std::string real_function_name = head + std::string(function_name);
+    _function = SimpleFunctionFactory::instance().get_function(real_function_name,
+                                                               argument_template, _data_type);
+    if (_function == nullptr) {
+        return Status::NotSupported(
+                fmt::format("Function {} is not implemented", real_function_name));
+    }
+
+    VExpr::register_function_context(state, context);
+    return Status::OK();
+}
+
+Status VInPredicate::open(RuntimeState* state, VExprContext* context,
+                          FunctionContext::FunctionStateScope scope) {
+    RETURN_IF_ERROR(VExpr::open(state, context, scope));
+    RETURN_IF_ERROR(VExpr::init_function_context(context, scope, _function));
+    return Status::OK();
+}
+
+void VInPredicate::close(RuntimeState* state, VExprContext* context,
+                         FunctionContext::FunctionStateScope scope) {
+    VExpr::close_function_context(context, scope, _function);
+    VExpr::close(state, context, scope);
+}
+
+Status VInPredicate::execute(VExprContext* context, Block* block, int* result_column_id) {
+    // TODO: not execute const expr again, but use the const column in function context
+    doris::vectorized::ColumnNumbers arguments(_children.size());
+    for (int i = 0; i < _children.size(); ++i) {
+        int column_id = -1;
+        _children[i]->execute(context, block, &column_id);
+        arguments[i] = column_id;
+    }
+    // call function
+    size_t num_columns_without_result = block->columns();
+    // prepare a column to save result
+    block->insert({nullptr, _data_type, _expr_name});
+    RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index), *block, arguments,
+                                       num_columns_without_result, block->rows(), false));
+    *result_column_id = num_columns_without_result;
+    return Status::OK();
+}
+
+const std::string& VInPredicate::expr_name() const {
+    return _expr_name;
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/exprs/vin_predicate.h b/be/src/vec/exprs/vin_predicate.h
new file mode 100644
index 0000000000..96e9a0ae3d
--- /dev/null
+++ b/be/src/vec/exprs/vin_predicate.h
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exprs/hybrid_set.h"
+
+#include "vec/exprs/vexpr.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+class VInPredicate final: public VExpr {
+public:
+    VInPredicate(const TExprNode& node);
+    ~VInPredicate() = default;
+    virtual doris::Status execute(VExprContext* context, doris::vectorized::Block* block,
+                                  int* result_column_id);
+    virtual doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc,
+                                  VExprContext* context);
+    virtual doris::Status open(doris::RuntimeState* state, VExprContext* context,
+                               FunctionContext::FunctionStateScope scope);
+    virtual void close(doris::RuntimeState* state, VExprContext* context,
+                       FunctionContext::FunctionStateScope scope);
+    virtual VExpr* clone(doris::ObjectPool* pool) const override {
+        return pool->add(new VInPredicate(*this));
+    }
+    virtual const std::string& expr_name() const override;
+
+private:
+    FunctionBasePtr _function;
+    std::string _expr_name;
+
+    const bool _is_not_in;
+    bool _is_prepare;
+private:
+    static const constexpr char* function_name = "in";
+};
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/exprs/vinfo_func.cpp b/be/src/vec/exprs/vinfo_func.cpp
new file mode 100644
index 0000000000..d703c3790f
--- /dev/null
+++ b/be/src/vec/exprs/vinfo_func.cpp
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vinfo_func.h"
+
+#include <fmt/format.h>
+
+#include "util/string_parser.hpp"
+#include "vec/core/field.h"
+#include "vec/data_types/data_type_nullable.h"
+
+namespace doris::vectorized {
+
+VInfoFunc::VInfoFunc(const TExprNode& node) : VExpr(node) {
+    Field field;
+    switch (_type.type) {
+        case TYPE_BIGINT: {
+            field = Int64(node.info_func.int_value);
+            break;
+        }
+        case TYPE_STRING:
+        case TYPE_CHAR:
+        case TYPE_VARCHAR: {
+            field = node.info_func.str_value;
+            break;
+        }
+        default: {
+            DCHECK(false) << "Invalid type: " << _type.type;
+            break;
+        }
+    }
+    this->_column_ptr = _data_type->create_column_const(1, field);
+}
+
+Status VInfoFunc::execute(VExprContext* context, vectorized::Block* block, int* result_column_id) {
+    int rows = block->rows();
+    if (rows < 1) {
+        rows = 1;
+    }
+    *result_column_id = block->columns();
+    block->insert({_column_ptr->clone_resized(rows), _data_type, _expr_name});
+    return Status::OK();
+}
+
+} // namespace doris
diff --git a/be/src/vec/exprs/vinfo_func.h b/be/src/vec/exprs/vinfo_func.h
new file mode 100644
index 0000000000..89ed0f5cf2
--- /dev/null
+++ b/be/src/vec/exprs/vinfo_func.h
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_const.h"
+#include "vec/exprs/vexpr.h"
+
+#include <iostream>
+#include <string>
+
+namespace doris {
+class TExprNode;
+
+namespace vectorized {
+class VInfoFunc : public VExpr {
+public:
+    VInfoFunc(const TExprNode& node);
+    virtual ~VInfoFunc() {}
+
+    virtual VExpr* clone(doris::ObjectPool* pool) const override { return pool->add(new VInfoFunc(*this)); }
+    virtual const std::string& expr_name() const override { return _expr_name; }
+    virtual Status execute(VExprContext* context, vectorized::Block* block, int* result_column_id) override;
+
+private:
+    const std::string _expr_name = "vinfofunc expr";
+    ColumnPtr _column_ptr;
+};
+} // namespace vectorized
+
+} // namespace doris
+
diff --git a/be/src/vec/exprs/vliteral.cpp b/be/src/vec/exprs/vliteral.cpp
new file mode 100644
index 0000000000..38ec4e1bf6
--- /dev/null
+++ b/be/src/vec/exprs/vliteral.cpp
@@ -0,0 +1,138 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vliteral.h"
+
+#include <fmt/format.h>
+
+#include "util/string_parser.hpp"
+#include "vec/core/field.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/runtime/vdatetime_value.h"
+namespace doris::vectorized {
+VLiteral::VLiteral(const TExprNode& node) : VExpr(node) {
+    Field field;
+    if (node.node_type != TExprNodeType::NULL_LITERAL) {
+        switch (_type.type) {
+            case TYPE_BOOLEAN: {
+                DCHECK_EQ(node.node_type, TExprNodeType::BOOL_LITERAL);
+                DCHECK(node.__isset.bool_literal);
+                field = Int8(node.bool_literal.value);
+                break;
+            }
+            case TYPE_TINYINT: {
+                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+                DCHECK(node.__isset.int_literal);
+                field = Int8(node.int_literal.value);
+                break;
+            }
+            case TYPE_SMALLINT: {
+                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+                DCHECK(node.__isset.int_literal);
+                field = Int16(node.int_literal.value);
+                break;
+            }
+            case TYPE_INT: {
+                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+                DCHECK(node.__isset.int_literal);
+                field = Int32(node.int_literal.value);
+                break;
+            }
+            case TYPE_BIGINT: {
+                DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL);
+                DCHECK(node.__isset.int_literal);
+                field = Int64(node.int_literal.value);
+                break;
+            }
+            case TYPE_LARGEINT: {
+                StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
+                DCHECK_EQ(node.node_type, TExprNodeType::LARGE_INT_LITERAL);
+                __int128_t value = StringParser::string_to_int<__int128>(
+                        node.large_int_literal.value.c_str(), node.large_int_literal.value.size(),
+                        &parse_result);
+                if (parse_result != StringParser::PARSE_SUCCESS) {
+                    value = MAX_INT128;
+                }
+                field = Int128(value);
+                break;
+            }
+            case TYPE_FLOAT: {
+                DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
+                DCHECK(node.__isset.float_literal);
+                field = Float32(node.float_literal.value);
+                break;
+            }
+            case TYPE_TIME:
+            case TYPE_DOUBLE: {
+                DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL);
+                DCHECK(node.__isset.float_literal);
+                field = Float64(node.float_literal.value);
+                break;
+            }
+            case TYPE_DATE: {
+                VecDateTimeValue value;
+                value.from_date_str(node.date_literal.value.c_str(), node.date_literal.value.size());
+                value.cast_to_date();
+                field = Int64(*reinterpret_cast<__int64_t*>(&value));
+                break;
+            }
+            case TYPE_DATETIME: {
+                VecDateTimeValue value;
+                value.from_date_str(node.date_literal.value.c_str(), node.date_literal.value.size());
+                value.to_datetime();
+                field = Int64(*reinterpret_cast<__int64_t*>(&value));
+                break;
+            }
+            case TYPE_STRING:
+            case TYPE_CHAR:
+            case TYPE_VARCHAR: {
+                DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL);
+                DCHECK(node.__isset.string_literal);
+                field = node.string_literal.value;
+                break;
+            }
+            case TYPE_DECIMALV2: {
+                DCHECK_EQ(node.node_type, TExprNodeType::DECIMAL_LITERAL);
+                DCHECK(node.__isset.decimal_literal);
+                DecimalV2Value value(node.decimal_literal.value);
+                field = DecimalField<Decimal128>(value.value(), value.scale());
+                break;
+            }
+            default: {
+                DCHECK(false) << "Invalid type: " << _type.type;
+                break;
+            }
+        }
+    }
+
+    this->_column_ptr = _data_type->create_column_const(1, field);
+    _expr_name = _data_type->get_name();
+}
+
+VLiteral::~VLiteral() {}
+
+Status VLiteral::execute(VExprContext* context, vectorized::Block* block, int* result_column_id) {
+    int rows = block->rows();
+    if (rows < 1) {
+        rows = 1;
+    }
+    size_t res = block->columns();
+    block->insert({_column_ptr->clone_resized(rows), _data_type, _expr_name});
+    *result_column_id = res;
+    return Status::OK();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vliteral.h b/be/src/vec/exprs/vliteral.h
new file mode 100644
index 0000000000..d1d3dcc003
--- /dev/null
+++ b/be/src/vec/exprs/vliteral.h
@@ -0,0 +1,45 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_const.h"
+#include "vec/exprs/vexpr.h"
+
+namespace doris {
+class TExprNode;
+
+namespace vectorized {
+class VLiteral : public VExpr {
+public:
+    virtual ~VLiteral();
+    VLiteral(const TExprNode& node);
+    virtual Status execute(VExprContext* context, vectorized::Block* block,
+                           int* result_column_id) override;
+    virtual const std::string& expr_name() const override { return _expr_name; }
+    virtual VExpr* clone(doris::ObjectPool* pool) const override {
+        return pool->add(new VLiteral(*this));
+    }
+
+private:
+    ColumnPtr _column_ptr;
+    std::string _expr_name;
+};
+} // namespace vectorized
+
+} // namespace doris
diff --git a/be/src/vec/exprs/vslot_ref.cpp b/be/src/vec/exprs/vslot_ref.cpp
new file mode 100644
index 0000000000..57038eb63f
--- /dev/null
+++ b/be/src/vec/exprs/vslot_ref.cpp
@@ -0,0 +1,75 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vslot_ref.h"
+
+#include <fmt/format.h>
+
+#include "runtime/descriptors.h"
+
+namespace doris::vectorized {
+using doris::Status;
+using doris::SlotDescriptor;
+VSlotRef::VSlotRef(const doris::TExprNode& node)
+        : VExpr(node),
+          _slot_id(node.slot_ref.slot_id),
+          _column_id(-1),
+          _column_name(nullptr) {
+            if (node.__isset.is_nullable) {
+              _is_nullable = node.is_nullable;
+            } else {
+              _is_nullable = true;
+            }
+        }
+
+VSlotRef::VSlotRef(const SlotDescriptor* desc)
+        : VExpr(desc->type(), true, desc->is_nullable()),
+          _slot_id(desc->id()),
+          _column_id(-1),
+          _is_nullable(desc->is_nullable()),
+          _column_name(nullptr) {}
+
+Status VSlotRef::prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc,
+                         VExprContext* context) {
+    DCHECK_EQ(_children.size(), 0);
+    if (_slot_id == -1) {
+        return Status::OK();
+    }
+    const SlotDescriptor* slot_desc = state->desc_tbl().get_slot_descriptor(_slot_id);
+    if (slot_desc == NULL) {
+        return Status::InternalError(fmt::format("couldn't resolve slot descriptor {}", _slot_id));
+    }
+    _column_id = desc.get_column_id(_slot_id);
+    _column_name = &slot_desc->col_name();
+    return Status::OK();
+}
+
+Status VSlotRef::execute(VExprContext* context, Block* block, int* result_column_id) {
+    DCHECK_GE(_column_id, 0);
+    *result_column_id = _column_id;
+    return Status::OK();
+}
+
+const std::string& VSlotRef::expr_name() const {
+    return *_column_name;
+}
+std::string VSlotRef::debug_string() const {
+    std::stringstream out;
+    out << "SlotRef(slot_id=" << _slot_id << VExpr::debug_string() << ")";
+    return out.str();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/exprs/vslot_ref.h b/be/src/vec/exprs/vslot_ref.h
new file mode 100644
index 0000000000..61d3eb99e3
--- /dev/null
+++ b/be/src/vec/exprs/vslot_ref.h
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "runtime/runtime_state.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/functions/function.h"
+
+namespace doris {
+class SlotDescriptor;
+namespace vectorized {
+class VSlotRef final : public VExpr {
+public:
+    VSlotRef(const doris::TExprNode& node);
+    VSlotRef(const SlotDescriptor* desc);
+    virtual doris::Status execute(VExprContext* context, doris::vectorized::Block* block,
+                                  int* result_column_id);
+    virtual doris::Status prepare(doris::RuntimeState* state, const doris::RowDescriptor& desc,
+                                  VExprContext* context);
+    virtual VExpr* clone(doris::ObjectPool* pool) const override {
+        return pool->add(new VSlotRef(*this));
+    }
+
+    virtual const std::string& expr_name() const override;
+    virtual std::string debug_string() const;
+    virtual bool is_constant() const { return false; }
+
+private:
+    FunctionPtr _function;
+    int _slot_id;
+    int _column_id;
+    bool _is_nullable;
+    const std::string* _column_name;
+};
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/functions/cast_type_to_either.h b/be/src/vec/functions/cast_type_to_either.h
new file mode 100644
index 0000000000..3a7b3ec0d6
--- /dev/null
+++ b/be/src/vec/functions/cast_type_to_either.h
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/CastTypeToEither.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/common/typeid_cast.h"
+
+namespace doris::vectorized {
+
+class IDataType;
+
+template <typename... Ts, typename F>
+static bool cast_type_to_either(const IDataType* type, F&& f) {
+    /// XXX can't use && here because gcc-7 complains about parentheses around && within ||
+    return ((typeid_cast<const Ts*>(type) ? f(*typeid_cast<const Ts*>(type)) : false) || ...);
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/comparison.cpp b/be/src/vec/functions/comparison.cpp
new file mode 100644
index 0000000000..979e8844e9
--- /dev/null
+++ b/be/src/vec/functions/comparison.cpp
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/registerFunctionsComparison.cpp
+// and modified by Doris
+
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+void register_function_comparison_equals(SimpleFunctionFactory& factory);
+void register_function_comparison_greater(SimpleFunctionFactory& factory);
+void register_function_comparison_less(SimpleFunctionFactory& factory);
+
+void register_function_comparison(SimpleFunctionFactory& factory) {
+    register_function_comparison_equals(factory);
+    register_function_comparison_greater(factory);
+    register_function_comparison_less(factory);
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/comparison_equal_for_null.cpp b/be/src/vec/functions/comparison_equal_for_null.cpp
new file mode 100644
index 0000000000..0e3b3315f8
--- /dev/null
+++ b/be/src/vec/functions/comparison_equal_for_null.cpp
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/columns/column_nullable.h"
+#include "vec/data_types/get_least_supertype.h"
+#include "vec/functions/function_string.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+class FunctionEqForNull : public IFunction {
+public:
+    static constexpr auto name = "eq_for_null";
+
+    static FunctionPtr create() { return std::make_shared<FunctionEqForNull>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<DataTypeUInt8>();
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        ColumnWithTypeAndName& col_left = block.get_by_position(arguments[0]);
+        ColumnWithTypeAndName& col_right = block.get_by_position(arguments[1]);
+
+        // TODO: opt for the const column in the future
+        col_left.column = col_left.column->convert_to_full_column_if_const();
+        col_right.column = col_right.column->convert_to_full_column_if_const();
+        const auto left_column = check_and_get_column<ColumnNullable>(col_left.column.get());
+        const auto right_column = check_and_get_column<ColumnNullable>(col_right.column.get());
+
+        bool left_nullable = left_column != nullptr;
+        bool right_nullable = right_column != nullptr;
+
+        if (left_nullable == right_nullable) {
+            auto return_type = std::make_shared<DataTypeUInt8>();
+
+            ColumnsWithTypeAndName eq_columns
+            {
+                ColumnWithTypeAndName{left_nullable ? left_column->get_nested_column_ptr() : col_left.column,
+                                      left_nullable ? assert_cast<const DataTypeNullable*>
+                                              (col_left.type.get())->get_nested_type() : col_left.type, ""},
+                ColumnWithTypeAndName{left_nullable ? right_column->get_nested_column_ptr() : col_right.column,
+                                      left_nullable ? assert_cast<const DataTypeNullable*>
+                                              (col_right.type.get())->get_nested_type() : col_right.type, ""}
+            };
+            Block temporary_block(eq_columns);
+
+            auto func_eq = SimpleFunctionFactory::instance().get_function("eq", eq_columns, return_type);
+            DCHECK(func_eq);
+            temporary_block.insert(ColumnWithTypeAndName{nullptr, return_type, ""});
+            func_eq->execute(context, temporary_block, {0, 1}, 2, input_rows_count);
+
+            if (left_nullable) {
+                auto res_column = std::move(*temporary_block.get_by_position(2).column).mutate();
+                auto& res_map = assert_cast<ColumnVector<UInt8>*>(res_column.get())->get_data();
+                const auto& left_null_map = left_column->get_null_map_data();
+                const auto& right_null_map = right_column->get_null_map_data();
+
+                auto* __restrict res = res_map.data();
+                auto* __restrict l = left_null_map.data();
+                auto* __restrict r = right_null_map.data();
+
+                for (int i = 0; i < input_rows_count; ++i) {
+                    res[i] |= l[i] & (l[i] == r[i]);
+                }
+            }
+
+            block.get_by_position(result).column = temporary_block.get_by_position(2).column;
+        } else {
+            auto return_type = make_nullable(std::make_shared<DataTypeUInt8>());
+
+            const ColumnsWithTypeAndName eq_columns
+            {
+                ColumnWithTypeAndName{col_left.column, col_left.type, ""},
+                ColumnWithTypeAndName{col_right.column, col_right.type, ""}
+            };
+            auto func_eq = SimpleFunctionFactory::instance().get_function("eq", eq_columns, return_type);
+            DCHECK(func_eq);
+
+            Block temporary_block(eq_columns);
+            temporary_block.insert(ColumnWithTypeAndName{nullptr, return_type, ""});
+            func_eq->execute(context, temporary_block, {0, 1}, 2, input_rows_count);
+
+            auto res_nullable_column = assert_cast<ColumnNullable*>(
+                    std::move(*temporary_block.get_by_position(2).column).mutate().get());
+            auto& null_map = res_nullable_column->get_null_map_data();
+            auto& res_map = assert_cast<ColumnVector<UInt8>&>(res_nullable_column->get_nested_column()).get_data();
+
+            auto* __restrict res = res_map.data();
+            auto* __restrict l = null_map.data();
+            for (int i = 0; i < input_rows_count; ++i) {
+                res[i] &= (l[i] != 1);
+            }
+
+            block.get_by_position(result).column = res_nullable_column->get_nested_column_ptr();
+        }
+        return Status::OK();
+    }
+};
+
+void register_function_comparison_eq_for_null(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionEqForNull>();
+}
+}
\ No newline at end of file
diff --git a/be/src/vec/functions/comparison_equals.cpp b/be/src/vec/functions/comparison_equals.cpp
new file mode 100644
index 0000000000..087e5dae89
--- /dev/null
+++ b/be/src/vec/functions/comparison_equals.cpp
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/functions_comparison.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+using FunctionEquals = FunctionComparison<EqualsOp, NameEquals>;
+using FunctionNotEquals = FunctionComparison<NotEqualsOp, NameNotEquals>;
+
+void register_function_comparison_equals(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionEquals>();
+    factory.register_function<FunctionNotEquals>();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/comparison_greater.cpp b/be/src/vec/functions/comparison_greater.cpp
new file mode 100644
index 0000000000..a2c8e6fe06
--- /dev/null
+++ b/be/src/vec/functions/comparison_greater.cpp
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/functions_comparison.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+using FunctionGreater = FunctionComparison<GreaterOp, NameGreater>;
+using FunctionGreaterOrEquals = FunctionComparison<GreaterOrEqualsOp, NameGreaterOrEquals>;
+
+void register_function_comparison_greater(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionGreater>();
+    factory.register_function<FunctionGreaterOrEquals>();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/comparison_less.cpp b/be/src/vec/functions/comparison_less.cpp
new file mode 100644
index 0000000000..05e4a8c86d
--- /dev/null
+++ b/be/src/vec/functions/comparison_less.cpp
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/functions_comparison.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+using FunctionLess = FunctionComparison<LessOp, NameLess>;
+using FunctionLessOrEquals = FunctionComparison<LessOrEqualsOp, NameLessOrEquals>;
+
+void register_function_comparison_less(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionLess>();
+    factory.register_function<FunctionLessOrEquals>();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/date_time_transforms.h b/be/src/vec/functions/date_time_transforms.h
new file mode 100644
index 0000000000..eaab91806e
--- /dev/null
+++ b/be/src/vec/functions/date_time_transforms.h
@@ -0,0 +1,261 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/DateTimeTransforms.h
+// and modified by Doris
+
+#pragma once
+
+#include "common/status.h"
+#include "runtime/datetime_value.h"
+#include "util/binary_cast.hpp"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/exception.h"
+#include "vec/core/types.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/runtime/vdatetime_value.h"
+namespace doris::vectorized {
+
+#define TIME_FUNCTION_IMPL(CLASS, UNIT, FUNCTION)                     \
+    struct CLASS {                                                    \
+        static constexpr auto name = #UNIT;                           \
+        static inline auto execute(const Int64& t, bool& is_null) {  \
+            const auto& date_time_value = (doris::vectorized::VecDateTimeValue&)(t); \
+            is_null = !date_time_value.is_valid_date();               \
+            return date_time_value.FUNCTION;                          \
+        }                                                             \
+    }
+
+#define TO_TIME_FUNCTION(CLASS, UNIT) TIME_FUNCTION_IMPL(CLASS, UNIT, UNIT())
+
+TO_TIME_FUNCTION(ToYearImpl, year);
+TO_TIME_FUNCTION(ToQuarterImpl, quarter);
+TO_TIME_FUNCTION(ToMonthImpl, month);
+TO_TIME_FUNCTION(ToWeekImpl, week);
+TO_TIME_FUNCTION(ToDayImpl, day);
+TO_TIME_FUNCTION(ToHourImpl, hour);
+TO_TIME_FUNCTION(ToMinuteImpl, minute);
+TO_TIME_FUNCTION(ToSecondImpl, second);
+
+TIME_FUNCTION_IMPL(WeekOfYearImpl, weekofyear, week(mysql_week_mode(3)));
+TIME_FUNCTION_IMPL(DayOfYearImpl, dayofyear, day_of_year());
+TIME_FUNCTION_IMPL(DayOfMonthImpl, dayofmonth, day());
+TIME_FUNCTION_IMPL(DayOfWeekImpl, dayofweek, day_of_week());
+// TODO: the method should be always not nullable
+TIME_FUNCTION_IMPL(ToDaysImpl, to_days, daynr());
+TIME_FUNCTION_IMPL(ToYearWeekImpl, yearweek, year_week(mysql_week_mode(0)));
+struct ToDateImpl {
+    static constexpr auto name = "to_date";
+
+    static inline auto execute(const Int64& t, bool& is_null) {
+        auto dt = binary_cast<Int64, doris::vectorized::VecDateTimeValue>(t);
+        is_null = !dt.is_valid_date();
+        dt.cast_to_date();
+        return binary_cast<doris::vectorized::VecDateTimeValue, Int64>(dt);
+    }
+};
+struct DateImpl : public ToDateImpl {
+    static constexpr auto name = "date";
+};
+
+// TODO: This function look like no need do indeed copy here, we should optimize
+// this function
+struct TimeStampImpl {
+    static constexpr auto name = "timestamp";
+    static inline auto execute(const Int64& t, bool& is_null) { return t; }
+};
+
+struct DayNameImpl {
+    static constexpr auto name = "dayname";
+    static constexpr auto max_size = MAX_DAY_NAME_LEN;
+
+    static inline auto execute(const VecDateTimeValue& dt, ColumnString::Chars& res_data,
+                               size_t& offset, bool& is_null) {
+        const auto* day_name = dt.day_name();
+        is_null = !dt.is_valid_date();
+        if (day_name == nullptr || is_null) {
+            offset += 1;
+            res_data[offset - 1] = 0;
+        } else {
+            auto len = strlen(day_name);
+            memcpy(&res_data[offset], day_name, len);
+            offset += len + 1;
+            res_data[offset - 1] = 0;
+        }
+        return offset;
+    }
+};
+
+struct MonthNameImpl {
+    static constexpr auto name = "monthname";
+    static constexpr auto max_size = MAX_MONTH_NAME_LEN;
+
+    static inline auto execute(const VecDateTimeValue& dt, ColumnString::Chars& res_data,
+                               size_t& offset, bool& is_null) {
+        const auto* month_name = dt.month_name();
+        is_null = !dt.is_valid_date();
+        if (month_name == nullptr || is_null) {
+            offset += 1;
+            res_data[offset - 1] = 0;
+        } else {
+            auto len = strlen(month_name);
+            memcpy(&res_data[offset], month_name, len);
+            offset += (len + 1);
+            res_data[offset - 1] = 0;
+        }
+        return offset;
+    }
+};
+
+struct DateFormatImpl {
+    using FromType = Int64;
+
+    static constexpr auto name = "date_format";
+
+    static inline auto execute(const Int64& t, StringRef format, ColumnString::Chars& res_data,
+                               size_t& offset) {
+        const auto& dt = (VecDateTimeValue&)t;
+        if (format.size > 128) {
+            offset += 1;
+            res_data.emplace_back(0);
+            return std::pair{offset, true};
+        }
+        char buf[128];
+        if (!dt.to_format_string(format.data, format.size, buf)) {
+            offset += 1;
+            res_data.emplace_back(0);
+            return std::pair{offset, true};
+        }
+
+        auto len = strlen(buf) + 1;
+        res_data.insert(buf, buf + len);
+        offset += len;
+        return std::pair{offset, false};
+    }
+};
+
+// TODO: This function should be depend on argments not always nullable
+struct FromUnixTimeImpl {
+    using FromType = Int32;
+
+    static constexpr auto name = "from_unixtime";
+
+    static inline auto execute(FromType val, StringRef format, ColumnString::Chars& res_data,
+                               size_t& offset) {
+        // TODO: use default time zone, slowly and incorrect, just for test use
+        static cctz::time_zone time_zone = cctz::fixed_time_zone(cctz::seconds(8 * 60 * 60));
+
+        VecDateTimeValue dt;
+        if (format.size > 128 || val < 0 || val > INT_MAX || !dt.from_unixtime(val, time_zone)) {
+            offset += 1;
+            res_data.emplace_back(0);
+            return std::pair{offset, true};
+        }
+
+        char buf[128];
+        if (!dt.to_format_string(format.data, format.size, buf)) {
+            offset += 1;
+            res_data.emplace_back(0);
+            return std::pair{offset, true};
+        }
+
+        auto len = strlen(buf) + 1;
+        res_data.insert(buf, buf + len);
+        offset += len;
+        return std::pair{offset, false};
+    }
+};
+
+template <typename Transform>
+struct TransformerToStringOneArgument {
+    static void vector(const PaddedPODArray<Int64>& ts, ColumnString::Chars& res_data,
+                       ColumnString::Offsets& res_offsets, NullMap& null_map) {
+        const auto len = ts.size();
+        res_data.resize(len * Transform::max_size);
+        res_offsets.resize(len);
+        null_map.resize_fill(len, false);
+
+        size_t offset = 0;
+        for (int i = 0; i < len; ++i) {
+            const auto& t = ts[i];
+            const auto& date_time_value = reinterpret_cast<const VecDateTimeValue&>(t);
+            res_offsets[i] = Transform::execute(date_time_value, res_data, offset,
+                    reinterpret_cast<bool&>(null_map[i]));
+        }
+    }
+};
+
+template <typename Transform>
+struct TransformerToStringTwoArgument {
+    static void vector_constant(const PaddedPODArray<typename Transform::FromType>& ts,
+                                const std::string& format, ColumnString::Chars& res_data,
+                                ColumnString::Offsets& res_offsets,
+                                PaddedPODArray<UInt8>& null_map) {
+        auto len = ts.size();
+        res_offsets.resize(len);
+        null_map.resize_fill(len, false);
+
+        size_t offset = 0;
+        for (int i = 0; i < len; ++i) {
+            const auto& t = ts[i];
+            const auto [new_offset, is_null] = Transform::execute(
+                    t, StringRef(format.c_str(), format.size()), res_data, offset);
+
+            res_offsets[i] = new_offset;
+            null_map[i] = is_null;
+        }
+    }
+};
+
+template <typename FromType, typename ToType, typename Transform>
+struct Transformer {
+    static void vector(const PaddedPODArray<FromType>& vec_from, PaddedPODArray<ToType>& vec_to,
+            NullMap& null_map) {
+        size_t size = vec_from.size();
+        vec_to.resize(size);
+        null_map.resize_fill(size, false);
+
+        for (size_t i = 0; i < size; ++i) {
+            vec_to[i] = Transform::execute(vec_from[i], reinterpret_cast<bool&>(null_map[i]));
+        }
+    }
+};
+
+template <typename FromType, typename ToType, typename Transform>
+struct DateTimeTransformImpl {
+    static Status execute(Block& block, const ColumnNumbers& arguments, size_t result,
+                          size_t /*input_rows_count*/) {
+        using Op = Transformer<FromType, ToType, Transform>;
+
+        const ColumnPtr source_col = block.get_by_position(arguments[0]).column;
+        if (const auto* sources = check_and_get_column<ColumnVector<FromType>>(source_col.get())) {
+            auto col_to = ColumnVector<ToType>::create();
+            auto null_map = ColumnVector<UInt8>::create();
+            Op::vector(sources->get_data(), col_to->get_data(), null_map->get_data());
+            block.replace_by_position(result,
+                    ColumnNullable::create(std::move(col_to), std::move(null_map)));
+        } else {
+            return Status::RuntimeError(fmt::format(
+                    "Illegal column {} of first argument of function {}",
+                    block.get_by_position(arguments[0]).column->get_name(), Transform::name));
+        }
+        return Status::OK();
+    }
+};
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/divide.cpp b/be/src/vec/functions/divide.cpp
new file mode 100644
index 0000000000..f08f947318
--- /dev/null
+++ b/be/src/vec/functions/divide.cpp
@@ -0,0 +1,48 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/divide.cpp
+// and modified by Doris
+
+
+#include "vec/functions/function_binary_arithmetic_to_null_type.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+template <typename A, typename B>
+struct DivideFloatingImpl {
+    using ResultType = typename NumberTraits::ResultOfFloatingPointDivision<A, B>::Type;
+    static const constexpr bool allow_decimal = true;
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b, NullMap& null_map, size_t index) {
+        null_map[index] = b == 0;
+        return static_cast<Result>(a) / (b + (b == 0));
+    }
+};
+
+struct NameDivide {
+    static constexpr auto name = "divide";
+};
+using FunctionDivide = FunctionBinaryArithmeticToNullType<DivideFloatingImpl, NameDivide>;
+
+void register_function_divide(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionDivide>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp
new file mode 100644
index 0000000000..9ed3edb6d1
--- /dev/null
+++ b/be/src/vec/functions/function.cpp
@@ -0,0 +1,330 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/IFunction.cpp
+// and modified by Doris
+
+#include "vec/functions/function.h"
+
+#include <memory>
+#include <optional>
+
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/data_types/data_type_nothing.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+
+ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const ColumnNumbers& args,
+                           size_t result, size_t input_rows_count) {
+    ColumnPtr result_null_map_column;
+
+    /// If result is already nullable.
+    ColumnPtr src_not_nullable = src;
+
+    if (src->only_null())
+        return src;
+    else if (auto* nullable = check_and_get_column<ColumnNullable>(*src)) {
+        src_not_nullable = nullable->get_nested_column_ptr();
+        result_null_map_column = nullable->get_null_map_column_ptr();
+    }
+
+    for (const auto& arg : args) {
+        const ColumnWithTypeAndName& elem = block.get_by_position(arg);
+        if (!elem.type->is_nullable()) continue;
+
+        /// Const Nullable that are NULL.
+        if (elem.column->only_null())
+            return block.get_by_position(result).type->create_column_const(input_rows_count,
+                                                                           Null());
+
+        if (is_column_const(*elem.column)) continue;
+
+        if (auto* nullable = check_and_get_column<ColumnNullable>(*elem.column)) {
+            const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr();
+            if (!result_null_map_column) {
+                result_null_map_column =
+                        null_map_column->clone_resized(null_map_column->size());
+            } else {
+                MutableColumnPtr mutable_result_null_map_column =
+                        (*std::move(result_null_map_column)).assume_mutable();
+
+                NullMap& result_null_map =
+                        assert_cast<ColumnUInt8&>(*mutable_result_null_map_column).get_data();
+                const NullMap& src_null_map =
+                        assert_cast<const ColumnUInt8&>(*null_map_column).get_data();
+
+                VectorizedUtils::update_null_map(result_null_map, src_null_map);
+                result_null_map_column = std::move(mutable_result_null_map_column);
+            }
+        }
+    }
+
+    if (!result_null_map_column) return make_nullable(src);
+
+    return ColumnNullable::create(src_not_nullable->convert_to_full_column_if_const(),
+                                  result_null_map_column);
+}
+
+namespace {
+
+struct NullPresence {
+    bool has_nullable = false;
+    bool has_null_constant = false;
+};
+
+NullPresence getNullPresense(const Block& block, const ColumnNumbers& args) {
+    NullPresence res;
+
+    for (const auto& arg : args) {
+        const auto& elem = block.get_by_position(arg);
+
+        if (!res.has_nullable) res.has_nullable = elem.type->is_nullable();
+        if (!res.has_null_constant) res.has_null_constant = elem.type->only_null();
+    }
+
+    return res;
+}
+
+[[maybe_unused]] NullPresence getNullPresense(const ColumnsWithTypeAndName& args) {
+    NullPresence res;
+
+    for (const auto& elem : args) {
+        if (!res.has_nullable) res.has_nullable = elem.type->is_nullable();
+        if (!res.has_null_constant) res.has_null_constant = elem.type->only_null();
+    }
+
+    return res;
+}
+
+bool allArgumentsAreConstants(const Block& block, const ColumnNumbers& args) {
+    for (auto arg : args)
+        if (!is_column_const(*block.get_by_position(arg).column)) return false;
+    return true;
+}
+} // namespace
+
+Status PreparedFunctionImpl::default_implementation_for_constant_arguments(
+        FunctionContext* context, Block& block, const ColumnNumbers& args, size_t result,
+        size_t input_rows_count, bool dry_run, bool* executed) {
+    *executed = false;
+    ColumnNumbers arguments_to_remain_constants = get_arguments_that_are_always_constant();
+
+    /// Check that these arguments are really constant.
+    for (auto arg_num : arguments_to_remain_constants)
+        if (arg_num < args.size() &&
+            !is_column_const(*block.get_by_position(args[arg_num]).column)) {
+            return Status::RuntimeError(fmt::format(
+                    "Argument at index {} for function {}  must be constant", arg_num, get_name()));
+        }
+
+    if (args.empty() || !use_default_implementation_for_constants() ||
+        !allArgumentsAreConstants(block, args))
+        return Status::OK();
+
+    Block temporary_block;
+    bool have_converted_columns = false;
+
+    size_t arguments_size = args.size();
+    for (size_t arg_num = 0; arg_num < arguments_size; ++arg_num) {
+        const ColumnWithTypeAndName& column = block.get_by_position(args[arg_num]);
+
+        if (arguments_to_remain_constants.end() != std::find(arguments_to_remain_constants.begin(),
+                                                             arguments_to_remain_constants.end(),
+                                                             arg_num)) {
+            temporary_block.insert({column.column->clone_resized(1), column.type, column.name});
+        } else {
+            have_converted_columns = true;
+            temporary_block.insert(
+                    {assert_cast<const ColumnConst*>(column.column.get())->get_data_column_ptr(),
+                     column.type, column.name});
+        }
+    }
+
+    /** When using default implementation for constants, the function requires at least one argument
+      *  not in "arguments_to_remain_constants" set. Otherwise we get infinite recursion.
+      */
+    if (!have_converted_columns) {
+        return Status::RuntimeError(
+                fmt::format("Number of arguments for function {} doesn't match: the function "
+                            "requires more arguments",
+                            get_name()));
+    }
+
+    temporary_block.insert(block.get_by_position(result));
+
+    ColumnNumbers temporary_argument_numbers(arguments_size);
+    for (size_t i = 0; i < arguments_size; ++i) temporary_argument_numbers[i] = i;
+
+    RETURN_IF_ERROR(execute_without_low_cardinality_columns(
+            context, temporary_block, temporary_argument_numbers, arguments_size,
+            temporary_block.rows(), dry_run));
+
+    ColumnPtr result_column;
+    /// extremely rare case, when we have function with completely const arguments
+    /// but some of them produced by non is_deterministic function
+    if (temporary_block.get_by_position(arguments_size).column->size() > 1)
+        result_column = temporary_block.get_by_position(arguments_size).column->clone_resized(1);
+    else
+        result_column = temporary_block.get_by_position(arguments_size).column;
+
+    block.get_by_position(result).column = ColumnConst::create(result_column, input_rows_count);
+    *executed = true;
+    return Status::OK();
+}
+
+Status PreparedFunctionImpl::default_implementation_for_nulls(FunctionContext* context,
+                                                              Block& block,
+                                                              const ColumnNumbers& args,
+                                                              size_t result,
+                                                              size_t input_rows_count, bool dry_run,
+                                                              bool* executed) {
+    *executed = false;
+    if (args.empty() || !use_default_implementation_for_nulls()) return Status::OK();
+
+    NullPresence null_presence = getNullPresense(block, args);
+
+    if (null_presence.has_null_constant) {
+        block.get_by_position(result).column =
+                block.get_by_position(result).type->create_column_const(input_rows_count, Null());
+        *executed = true;
+        return Status::OK();
+    }
+
+    if (null_presence.has_nullable) {
+        Block temporary_block = create_block_with_nested_columns(block, args, result);
+        RETURN_IF_ERROR(execute_without_low_cardinality_columns(
+                context, temporary_block, args, result, temporary_block.rows(), dry_run));
+        block.get_by_position(result).column =
+                wrap_in_nullable(temporary_block.get_by_position(result).column, block, args,
+                                 result, input_rows_count);
+        *executed = true;
+        return Status::OK();
+    }
+    *executed = false;
+    return Status::OK();
+}
+
+Status PreparedFunctionImpl::execute_without_low_cardinality_columns(FunctionContext* context,
+                                                                     Block& block,
+                                                                     const ColumnNumbers& args,
+                                                                     size_t result,
+                                                                     size_t input_rows_count,
+                                                                     bool dry_run) {
+    bool executed = false;
+    RETURN_IF_ERROR(default_implementation_for_constant_arguments(
+            context, block, args, result, input_rows_count, dry_run, &executed));
+    if (executed) {
+        return Status::OK();
+    }
+    RETURN_IF_ERROR(default_implementation_for_nulls(context, block, args, result, input_rows_count,
+                                                     dry_run, &executed));
+    if (executed) {
+        return Status::OK();
+    }
+
+    if (dry_run)
+        return execute_impl_dry_run(context, block, args, result, input_rows_count);
+    else
+        return execute_impl(context, block, args, result, input_rows_count);
+}
+
+Status PreparedFunctionImpl::execute(FunctionContext* context, Block& block,
+                                     const ColumnNumbers& args, size_t result,
+                                     size_t input_rows_count, bool dry_run) {
+//    if (use_default_implementation_for_low_cardinality_columns()) {
+//        auto& res = block.safe_get_by_position(result);
+//        Block block_without_low_cardinality = block.clone_without_columns();
+//
+//        for (auto arg : args)
+//            block_without_low_cardinality.safe_get_by_position(arg).column =
+//                    block.safe_get_by_position(arg).column;
+//
+//        {
+//            RETURN_IF_ERROR(execute_without_low_cardinality_columns(
+//                    context, block_without_low_cardinality, args, result, input_rows_count,
+//                    dry_run));
+//            res.column = block_without_low_cardinality.safe_get_by_position(result).column;
+//        }
+//    } else
+        execute_without_low_cardinality_columns(context, block, args, result, input_rows_count,
+                                                dry_run);
+    return Status::OK();
+}
+
+void FunctionBuilderImpl::check_number_of_arguments(size_t number_of_arguments) const {
+    if (is_variadic()) return;
+
+    size_t expected_number_of_arguments = get_number_of_arguments();
+
+    CHECK_EQ(number_of_arguments, expected_number_of_arguments) << fmt::format(
+            "Number of arguments for function {} doesn't match: passed {} , should be {}",
+            get_name(), number_of_arguments, expected_number_of_arguments);
+}
+
+DataTypePtr FunctionBuilderImpl::get_return_type_without_low_cardinality(
+        const ColumnsWithTypeAndName& arguments) const {
+    check_number_of_arguments(arguments.size());
+
+    if (!arguments.empty() && use_default_implementation_for_nulls()) {
+        NullPresence null_presence = getNullPresense(arguments);
+
+        if (null_presence.has_null_constant) {
+            return make_nullable(std::make_shared<DataTypeNothing>());
+        }
+        if (null_presence.has_nullable) {
+            ColumnNumbers numbers(arguments.size());
+            for (size_t i = 0; i < arguments.size(); i++) {
+                numbers[i] = i;
+            }
+            Block nested_block = create_block_with_nested_columns(Block(arguments), numbers);
+            auto return_type = get_return_type_impl(
+                    ColumnsWithTypeAndName(nested_block.begin(), nested_block.end()));
+            return make_nullable(return_type);
+        }
+    }
+
+    return get_return_type_impl(arguments);
+}
+
+DataTypePtr FunctionBuilderImpl::get_return_type(const ColumnsWithTypeAndName& arguments) const {
+    if (use_default_implementation_for_low_cardinality_columns()) {
+        size_t num_full_ordinary_columns = 0;
+
+        ColumnsWithTypeAndName args_without_low_cardinality(arguments);
+
+        for (ColumnWithTypeAndName& arg : args_without_low_cardinality) {
+            bool is_const = arg.column && is_column_const(*arg.column);
+            if (is_const)
+                arg.column = assert_cast<const ColumnConst&>(*arg.column).remove_low_cardinality();
+            if (!is_const) ++num_full_ordinary_columns;
+        }
+
+        auto type_without_low_cardinality =
+                get_return_type_without_low_cardinality(args_without_low_cardinality);
+
+        return type_without_low_cardinality;
+    }
+
+    return get_return_type_without_low_cardinality(arguments);
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h
new file mode 100644
index 0000000000..cf00c08b5e
--- /dev/null
+++ b/be/src/vec/functions/function.h
@@ -0,0 +1,595 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/IFunction.h
+// and modified by Doris
+
+#pragma once
+
+#include <memory>
+
+#include "common/status.h"
+#include "vec/core/block.h"
+#include "vec/core/column_numbers.h"
+#include "vec/core/names.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+
+class Field;
+
+// Only use dispose the variadic argument
+template <typename T>
+auto has_variadic_argument_types(T&& arg) -> decltype(T::get_variadic_argument_types()) {};
+void has_variadic_argument_types(...);
+
+/// The simplest executable object.
+/// Motivation:
+///  * Prepare something heavy once before main execution loop instead of doing it for each block.
+///  * Provide const interface for IFunctionBase (later).
+class IPreparedFunction {
+public:
+    virtual ~IPreparedFunction() = default;
+
+    /// Get the main function name.
+    virtual String get_name() const = 0;
+
+    virtual Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                           size_t result, size_t input_rows_count, bool dry_run) = 0;
+};
+
+using PreparedFunctionPtr = std::shared_ptr<IPreparedFunction>;
+
+class PreparedFunctionImpl : public IPreparedFunction {
+public:
+    Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                   size_t result, size_t input_rows_count, bool dry_run = false) final;
+
+protected:
+    virtual Status execute_impl_dry_run(FunctionContext* context, Block& block,
+                                        const ColumnNumbers& arguments, size_t result,
+                                        size_t input_rows_count) {
+        return execute_impl(context, block, arguments, result, input_rows_count);
+    }
+
+    virtual Status execute_impl(FunctionContext* context, Block& block,
+                                const ColumnNumbers& arguments, size_t result,
+                                size_t input_rows_count) = 0;
+
+    /** Default implementation in presence of Nullable arguments or NULL constants as arguments is the following:
+      *  if some of arguments are NULL constants then return NULL constant,
+      *  if some of arguments are Nullable, then execute function as usual for block,
+      *   where Nullable columns are substituted with nested columns (they have arbitrary values in rows corresponding to NULL value)
+      *   and wrap result in Nullable column where NULLs are in all rows where any of arguments are NULL.
+      */
+    virtual bool use_default_implementation_for_nulls() const { return true; }
+
+    /** If the function have non-zero number of arguments,
+      *  and if all arguments are constant, that we could automatically provide default implementation:
+      *  arguments are converted to ordinary columns with single value, then function is executed as usual,
+      *  and then the result is converted to constant column.
+      */
+    virtual bool use_default_implementation_for_constants() const { return false; }
+
+    /** If function arguments has single low cardinality column and all other arguments are constants, call function on nested column.
+      * Otherwise, convert all low cardinality columns to ordinary columns.
+      * Returns ColumnLowCardinality if at least one argument is ColumnLowCardinality.
+      */
+    virtual bool use_default_implementation_for_low_cardinality_columns() const { return true; }
+
+    /** Some arguments could remain constant during this implementation.
+      */
+    virtual ColumnNumbers get_arguments_that_are_always_constant() const { return {}; }
+
+    /** True if function can be called on default arguments (include Nullable's) and won't throw.
+      * Counterexample: modulo(0, 0)
+      */
+    virtual bool can_be_executed_on_default_arguments() const { return true; }
+
+private:
+    Status default_implementation_for_nulls(FunctionContext* context, Block& block,
+                                            const ColumnNumbers& args, size_t result,
+                                            size_t input_rows_count, bool dry_run, bool* executed);
+    Status default_implementation_for_constant_arguments(FunctionContext* context, Block& block,
+                                                         const ColumnNumbers& args, size_t result,
+                                                         size_t input_rows_count, bool dry_run,
+                                                         bool* executed);
+    Status execute_without_low_cardinality_columns(FunctionContext* context, Block& block,
+                                                   const ColumnNumbers& arguments, size_t result,
+                                                   size_t input_rows_count, bool dry_run);
+};
+
+/// Function with known arguments and return type.
+class IFunctionBase {
+public:
+    virtual ~IFunctionBase() = default;
+
+    /// Get the main function name.
+    virtual String get_name() const = 0;
+
+    virtual const DataTypes& get_argument_types() const = 0;
+    virtual const DataTypePtr& get_return_type() const = 0;
+
+    /// Do preparations and return executable.
+    /// sample_block should contain data types of arguments and values of constants, if relevant.
+    virtual PreparedFunctionPtr prepare(FunctionContext* context, const Block& sample_block,
+                                        const ColumnNumbers& arguments, size_t result) const = 0;
+
+    /// Override this when function need to store state in the `FunctionContext`, or do some
+    /// preparation work according to information from `FunctionContext`.
+    virtual Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
+        return Status::OK();
+    }
+
+    /// TODO: make const
+    virtual Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                           size_t result, size_t input_rows_count, bool dry_run = false) {
+        return prepare(context, block, arguments, result)
+                ->execute(context, block, arguments, result, input_rows_count, dry_run);
+    }
+
+    /// Do cleaning work when function is finished, i.e., release state variables in the
+    /// `FunctionContext` which are registered in `prepare` phase.
+    virtual Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
+        return Status::OK();
+    }
+
+    virtual bool is_stateful() const { return false; }
+
+    /** Should we evaluate this function while constant folding, if arguments are constants?
+      * Usually this is true. Notable counterexample is function 'sleep'.
+      * If we will call it during query analysis, we will sleep extra amount of time.
+      */
+    virtual bool is_suitable_for_constant_folding() const { return true; }
+
+    /** Some functions like ignore(...) or toTypeName(...) always return constant result which doesn't depend on arguments.
+      * In this case we can calculate result and assume that it's constant in stream header.
+      * There is no need to implement function if it has zero arguments.
+      * Must return ColumnConst with single row or nullptr.
+      */
+    virtual ColumnPtr get_result_if_always_returns_constant_and_has_arguments(
+            const Block& /*block*/, const ColumnNumbers& /*arguments*/) const {
+        return nullptr;
+    }
+
+    /** Function is called "injective" if it returns different result for different values of arguments.
+      * Example: hex, negate, tuple...
+      *
+      * Function could be injective with some arguments fixed to some constant values.
+      * Examples:
+      *  plus(const, x);
+      *  multiply(const, x) where x is an integer and constant is not divisible by two;
+      *  concat(x, 'const');
+      *  concat(x, 'const', y) where const contain at least one non-numeric character;
+      *  concat with FixedString
+      *  dictGet... functions takes name of dictionary as its argument,
+      *   and some dictionaries could be explicitly defined as injective.
+      *
+      * It could be used, for example, to remove useless function applications from GROUP BY.
+      *
+      * Sometimes, function is not really injective, but considered as injective, for purpose of query optimization.
+      * For example, to_string function is not injective for Float64 data type,
+      *  as it returns 'nan' for many different representation of NaNs.
+      * But we assume, that it is injective. This could be documented as implementation-specific behaviour.
+      *
+      * sample_block should contain data types of arguments and values of constants, if relevant.
+      */
+    virtual bool get_is_injective(const Block& /*sample_block*/) { return false; }
+
+    /** Function is called "deterministic", if it returns same result for same values of arguments.
+      * Most of functions are deterministic. Notable counterexample is rand().
+      * Sometimes, functions are "deterministic" in scope of single query
+      *  (even for distributed query), but not deterministic it general.
+      * Example: now(). Another example: functions that work with periodically updated dictionaries.
+      */
+
+    virtual bool is_deterministic() const = 0;
+
+    virtual bool is_deterministic_in_scope_of_query() const = 0;
+
+    /** Lets you know if the function is monotonic in a range of values.
+      * This is used to work with the index in a sorted chunk of data.
+      * And allows to use the index not only when it is written, for example `date >= const`, but also, for example, `toMonth(date) >= 11`.
+      * All this is considered only for functions of one argument.
+      */
+    virtual bool has_information_about_monotonicity() const { return false; }
+
+    /// The property of monotonicity for a certain range.
+    struct Monotonicity {
+        bool is_monotonic = false; /// Is the function monotonous (nondecreasing or nonincreasing).
+        bool is_positive =
+                true; /// true if the function is nondecreasing, false, if notincreasing. If is_monotonic = false, then it does not matter.
+        bool is_always_monotonic =
+                false; /// Is true if function is monotonic on the whole input range I
+
+        Monotonicity(bool is_monotonic_ = false, bool is_positive_ = true,
+                     bool is_always_monotonic_ = false)
+                : is_monotonic(is_monotonic_),
+                  is_positive(is_positive_),
+                  is_always_monotonic(is_always_monotonic_) {}
+    };
+
+    /** Get information about monotonicity on a range of values. Call only if hasInformationAboutMonotonicity.
+      * NULL can be passed as one of the arguments. This means that the corresponding range is unlimited on the left or on the right.
+      */
+    virtual Monotonicity get_monotonicity_for_range(const IDataType& /*type*/,
+                                                    const Field& /*left*/,
+                                                    const Field& /*right*/) const {
+        LOG(FATAL) << fmt::format("Function {} has no information about its monotonicity.",
+                                  get_name());
+        return Monotonicity {};
+    }
+};
+
+using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
+
+/// Creates IFunctionBase from argument types list.
+class IFunctionBuilder {
+public:
+    virtual ~IFunctionBuilder() = default;
+
+    /// Get the main function name.
+    virtual String get_name() const = 0;
+
+    /// See the comment for the same method in IFunctionBase
+    virtual bool is_deterministic() const = 0;
+
+    virtual bool is_deterministic_in_scope_of_query() const = 0;
+
+    /// Override and return true if function needs to depend on the state of the data.
+    virtual bool is_stateful() const = 0;
+
+    /// Override and return true if function could take different number of arguments.
+    virtual bool is_variadic() const = 0;
+
+    /// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored).
+    virtual size_t get_number_of_arguments() const = 0;
+
+    /// Throw if number of arguments is incorrect. Default implementation will check only in non-variadic case.
+    virtual void check_number_of_arguments(size_t number_of_arguments) const = 0;
+
+    /// Check arguments and return IFunctionBase.
+    virtual FunctionBasePtr build(const ColumnsWithTypeAndName& arguments,
+                                  const DataTypePtr& return_type) const = 0;
+
+    /// For higher-order functions (functions, that have lambda expression as at least one argument).
+    /// You pass data types with empty DataTypeFunction for lambda arguments.
+    /// This function will replace it with DataTypeFunction containing actual types.
+    virtual DataTypes get_variadic_argument_types() const = 0;
+
+    /// Returns indexes of arguments, that must be ColumnConst
+    virtual ColumnNumbers get_arguments_that_are_always_constant() const = 0;
+    /// Returns indexes if arguments, that can be Nullable without making result of function Nullable
+    /// (for functions like is_null(x))
+    virtual ColumnNumbers get_arguments_that_dont_imply_nullable_return_type(
+            size_t number_of_arguments) const = 0;
+};
+
+using FunctionBuilderPtr = std::shared_ptr<IFunctionBuilder>;
+
+class FunctionBuilderImpl : public IFunctionBuilder {
+public:
+    FunctionBasePtr build(const ColumnsWithTypeAndName& arguments,
+                          const DataTypePtr& return_type) const final {
+        const DataTypePtr& func_return_type = get_return_type(arguments);
+        DCHECK(return_type->equals(*func_return_type) ||
+               // For null constant argument, `get_return_type` would return
+               // Nullable<DataTypeNothing> when `use_default_implementation_for_nulls` is true.
+               (return_type->is_nullable() && func_return_type->is_nullable() &&
+                is_nothing(((DataTypeNullable*)func_return_type.get())->get_nested_type())) ||
+               (is_date_or_datetime(
+                        return_type->is_nullable()
+                                ? ((DataTypeNullable*)return_type.get())->get_nested_type()
+                                : return_type) &&
+                is_date_or_datetime(get_return_type(arguments)->is_nullable()
+                                            ? ((DataTypeNullable*)get_return_type(arguments).get())
+                                                      ->get_nested_type()
+                                            : get_return_type(arguments))))
+                << " with " << return_type->get_name() << " and " << func_return_type->get_name();
+
+        return build_impl(arguments, return_type);
+    }
+
+    bool is_deterministic() const override { return true; }
+    bool is_deterministic_in_scope_of_query() const override { return true; }
+    bool is_stateful() const override { return false; }
+    bool is_variadic() const override { return false; }
+
+    /// Default implementation. Will check only in non-variadic case.
+    void check_number_of_arguments(size_t number_of_arguments) const override;
+
+    DataTypePtr get_return_type(const ColumnsWithTypeAndName& arguments) const;
+
+    DataTypes get_variadic_argument_types() const override {
+        return get_variadic_argument_types_impl();
+    }
+
+    ColumnNumbers get_arguments_that_are_always_constant() const override { return {}; }
+    ColumnNumbers get_arguments_that_dont_imply_nullable_return_type(
+            size_t /*number_of_arguments*/) const override {
+        return {};
+    }
+
+protected:
+    /// Get the result type by argument type. If the function does not apply to these arguments, throw an exception.
+    virtual DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const {
+        DataTypes data_types(arguments.size());
+        for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type;
+
+        return get_return_type_impl(data_types);
+    }
+
+    virtual DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const {
+        LOG(FATAL) << fmt::format("get_return_type is not implemented for {}", get_name());
+        return nullptr;
+    }
+
+    /** If use_default_implementation_for_nulls() is true, than change arguments for get_return_type() and build_impl():
+      *  if some of arguments are Nullable(Nothing) then don't call get_return_type(), call build_impl() with return_type = Nullable(Nothing),
+      *  if some of arguments are Nullable, then:
+      *   - Nullable types are substituted with nested types for get_return_type() function
+      *   - wrap get_return_type() result in Nullable type and pass to build_impl
+      *
+      * Otherwise build returns build_impl(arguments, get_return_type(arguments));
+      */
+    virtual bool use_default_implementation_for_nulls() const { return true; }
+
+    /** If use_default_implementation_for_nulls() is true, than change arguments for get_return_type() and build_impl().
+      * If function arguments has low cardinality types, convert them to ordinary types.
+      * get_return_type returns ColumnLowCardinality if at least one argument type is ColumnLowCardinality.
+      */
+    virtual bool use_default_implementation_for_low_cardinality_columns() const { return true; }
+
+    /// If it isn't, will convert all ColumnLowCardinality arguments to full columns.
+    virtual bool can_be_executed_on_low_cardinality_dictionary() const { return true; }
+
+    virtual FunctionBasePtr build_impl(const ColumnsWithTypeAndName& arguments,
+                                       const DataTypePtr& return_type) const = 0;
+
+    virtual DataTypes get_variadic_argument_types_impl() const { return DataTypes(); }
+
+private:
+    DataTypePtr get_return_type_without_low_cardinality(
+            const ColumnsWithTypeAndName& arguments) const;
+};
+
+/// Previous function interface.
+class IFunction : public std::enable_shared_from_this<IFunction>,
+                  public FunctionBuilderImpl,
+                  public IFunctionBase,
+                  public PreparedFunctionImpl {
+public:
+    String get_name() const override = 0;
+
+    bool is_stateful() const override { return false; }
+
+    /// TODO: make const
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override = 0;
+
+    /// Override this functions to change default implementation behavior. See details in IMyFunction.
+    bool use_default_implementation_for_nulls() const override { return true; }
+    bool use_default_implementation_for_constants() const override { return false; }
+    bool use_default_implementation_for_low_cardinality_columns() const override { return true; }
+    ColumnNumbers get_arguments_that_are_always_constant() const override { return {}; }
+    bool can_be_executed_on_default_arguments() const override { return true; }
+    bool can_be_executed_on_low_cardinality_dictionary() const override {
+        return is_deterministic_in_scope_of_query();
+    }
+    bool is_deterministic() const override { return true; }
+    bool is_deterministic_in_scope_of_query() const override { return true; }
+
+    using PreparedFunctionImpl::execute;
+    using PreparedFunctionImpl::execute_impl_dry_run;
+    using FunctionBuilderImpl::get_return_type_impl;
+    using FunctionBuilderImpl::get_variadic_argument_types_impl;
+    using FunctionBuilderImpl::get_return_type;
+
+    [[noreturn]] PreparedFunctionPtr prepare(FunctionContext* context,
+                                             const Block& /*sample_block*/,
+                                             const ColumnNumbers& /*arguments*/,
+                                             size_t /*result*/) const final {
+        LOG(FATAL) << "prepare is not implemented for IFunction";
+        __builtin_unreachable();
+    }
+
+    Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
+        return Status::OK();
+    }
+
+    [[noreturn]] const DataTypes& get_argument_types() const final {
+        LOG(FATAL) << "get_argument_types is not implemented for IFunction";
+        __builtin_unreachable();
+    }
+
+    [[noreturn]] const DataTypePtr& get_return_type() const final {
+        LOG(FATAL) << "get_return_type is not implemented for IFunction";
+        __builtin_unreachable();
+    }
+
+protected:
+    FunctionBasePtr build_impl(const ColumnsWithTypeAndName& /*arguments*/,
+                               const DataTypePtr& /*return_type*/) const final {
+        LOG(FATAL) << "build_impl is not implemented for IFunction";
+        return {};
+    }
+};
+
+/// Wrappers over IFunction.
+
+class DefaultExecutable final : public PreparedFunctionImpl {
+public:
+    explicit DefaultExecutable(std::shared_ptr<IFunction> function_)
+            : function(std::move(function_)) {}
+
+    String get_name() const override { return function->get_name(); }
+
+protected:
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) final {
+        return function->execute_impl(context, block, arguments, result, input_rows_count);
+    }
+    Status execute_impl_dry_run(FunctionContext* context, Block& block,
+                                const ColumnNumbers& arguments, size_t result,
+                                size_t input_rows_count) final {
+        return function->execute_impl_dry_run(context, block, arguments, result, input_rows_count);
+    }
+    bool use_default_implementation_for_nulls() const final {
+        return function->use_default_implementation_for_nulls();
+    }
+    bool use_default_implementation_for_constants() const final {
+        return function->use_default_implementation_for_constants();
+    }
+    bool use_default_implementation_for_low_cardinality_columns() const final {
+        return function->use_default_implementation_for_low_cardinality_columns();
+    }
+    ColumnNumbers get_arguments_that_are_always_constant() const final {
+        return function->get_arguments_that_are_always_constant();
+    }
+    bool can_be_executed_on_default_arguments() const override {
+        return function->can_be_executed_on_default_arguments();
+    }
+
+private:
+    std::shared_ptr<IFunction> function;
+};
+
+class DefaultFunction final : public IFunctionBase {
+public:
+    DefaultFunction(std::shared_ptr<IFunction> function_, DataTypes arguments_,
+                    DataTypePtr return_type_)
+            : function(std::move(function_)),
+              arguments(std::move(arguments_)),
+              return_type(std::move(return_type_)) {}
+
+    String get_name() const override { return function->get_name(); }
+
+    const DataTypes& get_argument_types() const override { return arguments; }
+    const DataTypePtr& get_return_type() const override { return return_type; }
+
+    PreparedFunctionPtr prepare(FunctionContext* context, const Block& /*sample_block*/,
+                                const ColumnNumbers& /*arguments*/,
+                                size_t /*result*/) const override {
+        return std::make_shared<DefaultExecutable>(function);
+    }
+
+    Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
+        return function->prepare(context, scope);
+    }
+
+    bool is_suitable_for_constant_folding() const override {
+        return function->is_suitable_for_constant_folding();
+    }
+    ColumnPtr get_result_if_always_returns_constant_and_has_arguments(
+            const Block& block, const ColumnNumbers& arguments_) const override {
+        return function->get_result_if_always_returns_constant_and_has_arguments(block, arguments_);
+    }
+
+    bool get_is_injective(const Block& sample_block) override {
+        return function->get_is_injective(sample_block);
+    }
+
+    bool is_deterministic() const override { return function->is_deterministic(); }
+
+    bool is_deterministic_in_scope_of_query() const override {
+        return function->is_deterministic_in_scope_of_query();
+    }
+
+    bool has_information_about_monotonicity() const override {
+        return function->has_information_about_monotonicity();
+    }
+
+    IFunctionBase::Monotonicity get_monotonicity_for_range(const IDataType& type, const Field& left,
+                                                           const Field& right) const override {
+        return function->get_monotonicity_for_range(type, left, right);
+    }
+
+private:
+    std::shared_ptr<IFunction> function;
+    DataTypes arguments;
+    DataTypePtr return_type;
+};
+
+class DefaultFunctionBuilder : public FunctionBuilderImpl {
+public:
+    explicit DefaultFunctionBuilder(std::shared_ptr<IFunction> function_)
+            : function(std::move(function_)) {}
+
+    void check_number_of_arguments(size_t number_of_arguments) const override {
+        return function->check_number_of_arguments(number_of_arguments);
+    }
+
+    bool is_deterministic() const override { return function->is_deterministic(); }
+    bool is_deterministic_in_scope_of_query() const override {
+        return function->is_deterministic_in_scope_of_query();
+    }
+
+    String get_name() const override { return function->get_name(); }
+    bool is_stateful() const override { return function->is_stateful(); }
+    bool is_variadic() const override { return function->is_variadic(); }
+    size_t get_number_of_arguments() const override { return function->get_number_of_arguments(); }
+
+    ColumnNumbers get_arguments_that_are_always_constant() const override {
+        return function->get_arguments_that_are_always_constant();
+    }
+    ColumnNumbers get_arguments_that_dont_imply_nullable_return_type(
+            size_t number_of_arguments) const override {
+        return function->get_arguments_that_dont_imply_nullable_return_type(number_of_arguments);
+    }
+
+protected:
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return function->get_return_type_impl(arguments);
+    }
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        return function->get_return_type_impl(arguments);
+    }
+
+    bool use_default_implementation_for_nulls() const override {
+        return function->use_default_implementation_for_nulls();
+    }
+    bool use_default_implementation_for_low_cardinality_columns() const override {
+        return function->use_default_implementation_for_low_cardinality_columns();
+    }
+    bool can_be_executed_on_low_cardinality_dictionary() const override {
+        return function->can_be_executed_on_low_cardinality_dictionary();
+    }
+
+    FunctionBasePtr build_impl(const ColumnsWithTypeAndName& arguments,
+                               const DataTypePtr& return_type) const override {
+        DataTypes data_types(arguments.size());
+        for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type;
+        return std::make_shared<DefaultFunction>(function, data_types, return_type);
+    }
+
+    DataTypes get_variadic_argument_types_impl() const override {
+        return function->get_variadic_argument_types_impl();
+    }
+
+private:
+    std::shared_ptr<IFunction> function;
+};
+
+using FunctionPtr = std::shared_ptr<IFunction>;
+
+/** Return ColumnNullable of src, with null map as OR-ed null maps of args columns in blocks.
+  * Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
+  */
+ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const ColumnNumbers& args,
+                           size_t result, size_t input_rows_count);
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_always_not_nullable.h b/be/src/vec/functions/function_always_not_nullable.h
new file mode 100644
index 0000000000..baafda6700
--- /dev/null
+++ b/be/src/vec/functions/function_always_not_nullable.h
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionStringOrArrayToT.h
+// and modified by Doris
+
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+
+namespace doris::vectorized {
+
+template <typename Function>
+class FunctionAlwaysNotNullable : public IFunction {
+public:
+    static constexpr auto name = Function::name;
+
+    static FunctionPtr create() { return std::make_shared<FunctionAlwaysNotNullable>(); }
+
+    String get_name() const override { return Function::name; }
+
+    size_t get_number_of_arguments() const override { return 1; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<typename Function::ReturnType>();
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto column = block.get_by_position(arguments[0]).column;
+
+        MutableColumnPtr column_result = get_return_type_impl({})->create_column();
+        column_result->resize(input_rows_count);
+
+        if (const ColumnNullable* col_nullable =
+                    check_and_get_column<ColumnNullable>(column.get())) {
+            const ColumnString* col =
+                    check_and_get_column<ColumnString>(col_nullable->get_nested_column_ptr().get());
+            const ColumnUInt8* col_nullmap = check_and_get_column<ColumnUInt8>(
+                    col_nullable->get_null_map_column_ptr().get());
+
+            if (col != nullptr && col_nullmap != nullptr) {
+                Function::vector_nullable(col->get_chars(), col->get_offsets(),
+                                          col_nullmap->get_data(), column_result);
+
+                block.replace_by_position(result, std::move(column_result));
+                return Status::OK();
+            }
+        } else if (const ColumnString* col = check_and_get_column<ColumnString>(column.get())) {
+            Function::vector(col->get_chars(), col->get_offsets(), column_result);
+
+            block.replace_by_position(result, std::move(column_result));
+            return Status::OK();
+        } else {
+            return Status::RuntimeError(fmt::format(
+                    "Illegal column {} of argument of function {}",
+                    block.get_by_position(arguments[0]).column->get_name(), get_name()));
+        }
+
+        block.replace_by_position(result, std::move(column_result));
+        return Status::OK();
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_binary_arithmetic.h b/be/src/vec/functions/function_binary_arithmetic.h
new file mode 100644
index 0000000000..f987f90eeb
--- /dev/null
+++ b/be/src/vec/functions/function_binary_arithmetic.h
@@ -0,0 +1,719 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionBinaryArithmetic.h
+// and modified by Doris
+
+#pragma once
+
+#include "common/logging.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_decimal.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/number_traits.h"
+#include "vec/functions/cast_type_to_either.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/functions/int_div.h"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+
+/** Arithmetic operations: +, -, *,
+  * Bitwise operations: |, &, ^, ~.
+  * Etc.
+  */
+
+template <typename A, typename B, typename Op, typename ResultType_ = typename Op::ResultType>
+struct BinaryOperationImplBase {
+    using ResultType = ResultType_;
+
+    static void NO_INLINE vector_vector(const PaddedPODArray<A>& a, const PaddedPODArray<B>& b,
+                                        PaddedPODArray<ResultType>& c) {
+        size_t size = a.size();
+        for (size_t i = 0; i < size; ++i) {
+            c[i] = Op::template apply<ResultType>(a[i], b[i]);
+        }
+    }
+
+    static void NO_INLINE vector_vector(const PaddedPODArray<A>& a, const PaddedPODArray<B>& b,
+                                        PaddedPODArray<ResultType>& c, NullMap& null_map) {
+        size_t size = a.size();
+        for (size_t i = 0; i < size; ++i) {
+            c[i] = Op::template apply<ResultType>(a[i], b[i], null_map, i);
+        }
+    }
+
+    static void NO_INLINE vector_constant(const PaddedPODArray<A>& a, B b,
+                                          PaddedPODArray<ResultType>& c) {
+        size_t size = a.size();
+        for (size_t i = 0; i < size; ++i) c[i] = Op::template apply<ResultType>(a[i], b);
+    }
+
+    static void NO_INLINE constant_vector(A a, const PaddedPODArray<B>& b,
+                                          PaddedPODArray<ResultType>& c) {
+        size_t size = b.size();
+        for (size_t i = 0; i < size; ++i) c[i] = Op::template apply<ResultType>(a, b[i]);
+    }
+
+    static ResultType constant_constant(A a, B b) { return Op::template apply<ResultType>(a, b); }
+};
+
+template <typename A, typename B, typename Op, typename ResultType = typename Op::ResultType>
+struct BinaryOperationImpl : BinaryOperationImplBase<A, B, Op, ResultType> {};
+
+template <typename, typename>
+struct PlusImpl;
+template <typename, typename>
+struct MinusImpl;
+template <typename, typename>
+struct MultiplyImpl;
+template <typename, typename>
+struct DivideFloatingImpl;
+template <typename, typename>
+struct DivideIntegralImpl;
+template <typename, typename>
+struct DivideIntegralOrZeroImpl;
+template <typename, typename>
+struct LeastBaseImpl;
+template <typename, typename>
+struct GreatestBaseImpl;
+template <typename, typename>
+struct ModuloImpl;
+
+/// Binary operations for Decimals need scale args
+/// +|- scale one of args (which scale factor is not 1). ScaleR = oneof(Scale1, Scale2);
+/// *   no agrs scale. ScaleR = Scale1 + Scale2;
+/// /   first arg scale. ScaleR = Scale1 (scale_a = DecimalType<B>::get_scale()).
+template <typename A, typename B, template <typename, typename> typename Operation,
+          typename ResultType_, bool _check_overflow = true>
+struct DecimalBinaryOperation {
+    static constexpr bool is_plus_minus =
+            std::is_same_v<Operation<Int32, Int32>, PlusImpl<Int32, Int32>> ||
+            std::is_same_v<Operation<Int32, Int32>, MinusImpl<Int32, Int32>>;
+    static constexpr bool is_multiply =
+            std::is_same_v<Operation<Int32, Int32>, MultiplyImpl<Int32, Int32>>;
+    static constexpr bool is_float_division =
+            std::is_same_v<Operation<Int32, Int32>, DivideFloatingImpl<Int32, Int32>>;
+    static constexpr bool is_int_division =
+            std::is_same_v<Operation<Int32, Int32>, DivideIntegralImpl<Int32, Int32>> ||
+            std::is_same_v<Operation<Int32, Int32>, DivideIntegralOrZeroImpl<Int32, Int32>>;
+    static constexpr bool is_division = is_float_division || is_int_division;
+    static constexpr bool is_compare =
+            std::is_same_v<Operation<Int32, Int32>, LeastBaseImpl<Int32, Int32>> ||
+            std::is_same_v<Operation<Int32, Int32>, GreatestBaseImpl<Int32, Int32>>;
+    static constexpr bool is_plus_minus_compare = is_plus_minus || is_compare;
+    static constexpr bool can_overflow = is_plus_minus || is_multiply;
+
+    using ResultType = ResultType_;
+    using NativeResultType = typename NativeType<ResultType>::Type;
+    using Op = Operation<NativeResultType, NativeResultType>;
+
+    using ColVecA = std::conditional_t<IsDecimalNumber<A>, ColumnDecimal<A>, ColumnVector<A>>;
+    using ColVecB = std::conditional_t<IsDecimalNumber<B>, ColumnDecimal<B>, ColumnVector<B>>;
+    using ArrayA = typename ColVecA::Container;
+    using ArrayB = typename ColVecB::Container;
+    using ArrayC = typename ColumnDecimal<ResultType>::Container;
+    using SelfNoOverflow = DecimalBinaryOperation<A, B, Operation, ResultType_, false>;
+
+    static void vector_vector(const ArrayA& a, const ArrayB& b, ArrayC& c, ResultType scale_a,
+                              ResultType scale_b, bool check_overflow) {
+        if (check_overflow)
+            vector_vector(a, b, c, scale_a, scale_b);
+        else
+            SelfNoOverflow::vector_vector(a, b, c, scale_a, scale_b);
+    }
+
+    /// null_map for divide and mod
+    static void vector_vector(const ArrayA& a, const ArrayB& b, ArrayC& c, ResultType scale_a,
+                              ResultType scale_b, bool check_overflow, NullMap& null_map) {
+        if (check_overflow)
+            vector_vector(a, b, c, scale_a, scale_b, null_map);
+        else
+            SelfNoOverflow::vector_vector(a, b, c, scale_a, scale_b, null_map);
+    }
+
+    static void vector_constant(const ArrayA& a, B b, ArrayC& c, ResultType scale_a,
+                                ResultType scale_b, bool check_overflow) {
+        if (check_overflow)
+            vector_constant(a, b, c, scale_a, scale_b);
+        else
+            SelfNoOverflow::vector_constant(a, b, c, scale_a, scale_b);
+    }
+
+    static void constant_vector(A a, const ArrayB& b, ArrayC& c, ResultType scale_a,
+                                ResultType scale_b, bool check_overflow) {
+        if (check_overflow)
+            constant_vector(a, b, c, scale_a, scale_b);
+        else
+            SelfNoOverflow::constant_vector(a, b, c, scale_a, scale_b);
+    }
+
+    static ResultType constant_constant(A a, B b, ResultType scale_a, ResultType scale_b,
+                                        bool check_overflow) {
+        if (check_overflow)
+            return constant_constant(a, b, scale_a, scale_b);
+        else
+            return SelfNoOverflow::constant_constant(a, b, scale_a, scale_b);
+    }
+
+    static void NO_INLINE vector_vector(const ArrayA& a, const ArrayB& b, ArrayC& c,
+                                        ResultType scale_a [[maybe_unused]],
+                                        ResultType scale_b [[maybe_unused]]) {
+        size_t size = a.size();
+        if constexpr (is_plus_minus_compare) {
+            if (scale_a != 1) {
+                for (size_t i = 0; i < size; ++i) {
+                    c[i] = apply_scaled<true>(a[i], b[i], scale_a);
+                }
+                return;
+            } else if (scale_b != 1) {
+                for (size_t i = 0; i < size; ++i) {
+                    c[i] = apply_scaled<false>(a[i], b[i], scale_b);
+                }
+                return;
+            }
+        }
+
+        /// default: use it if no return before
+        for (size_t i = 0; i < size; ++i) {
+            c[i] = apply(a[i], b[i]);
+        }
+    }
+
+    /// null_map for divide and mod
+    static void NO_INLINE vector_vector(const ArrayA& a, const ArrayB& b, ArrayC& c,
+                                        ResultType scale_a [[maybe_unused]],
+                                        ResultType scale_b [[maybe_unused]], NullMap& null_map) {
+        size_t size = a.size();
+        if constexpr (is_division && IsDecimalNumber<B>) {
+            for (size_t i = 0; i < size; ++i) {
+                c[i] = apply_scaled_div(a[i], b[i], scale_a, null_map, i);
+            }
+            return;
+        }
+
+        /// default: use it if no return before
+        for (size_t i = 0; i < size; ++i) {
+            c[i] = apply(a[i], b[i], null_map, i);
+        }
+    }
+
+    static void NO_INLINE vector_constant(const ArrayA& a, B b, ArrayC& c,
+                                          ResultType scale_a [[maybe_unused]],
+                                          ResultType scale_b [[maybe_unused]]) {
+        size_t size = a.size();
+        if constexpr (is_plus_minus_compare) {
+            if (scale_a != 1) {
+                for (size_t i = 0; i < size; ++i) c[i] = apply_scaled<true>(a[i], b, scale_a);
+                return;
+            } else if (scale_b != 1) {
+                for (size_t i = 0; i < size; ++i) c[i] = apply_scaled<false>(a[i], b, scale_b);
+                return;
+            }
+        } else if constexpr (is_division && IsDecimalNumber<B>) {
+            for (size_t i = 0; i < size; ++i) c[i] = apply_scaled_div(a[i], b, scale_a);
+            return;
+        }
+
+        /// default: use it if no return before
+        for (size_t i = 0; i < size; ++i) c[i] = apply(a[i], b);
+    }
+
+    static void NO_INLINE constant_vector(A a, const ArrayB& b, ArrayC& c,
+                                          ResultType scale_a [[maybe_unused]],
+                                          ResultType scale_b [[maybe_unused]]) {
+        size_t size = b.size();
+        if constexpr (is_plus_minus_compare) {
+            if (scale_a != 1) {
+                for (size_t i = 0; i < size; ++i) c[i] = apply_scaled<true>(a, b[i], scale_a);
+                return;
+            } else if (scale_b != 1) {
+                for (size_t i = 0; i < size; ++i) c[i] = apply_scaled<false>(a, b[i], scale_b);
+                return;
+            }
+        } else if constexpr (is_division && IsDecimalNumber<B>) {
+            for (size_t i = 0; i < size; ++i) c[i] = apply_scaled_div(a, b[i], scale_a);
+            return;
+        }
+
+        /// default: use it if no return before
+        for (size_t i = 0; i < size; ++i) c[i] = apply(a, b[i]);
+    }
+
+    static ResultType constant_constant(A a, B b, ResultType scale_a [[maybe_unused]],
+                                        ResultType scale_b [[maybe_unused]]) {
+        if constexpr (is_plus_minus_compare) {
+            if (scale_a != 1)
+                return apply_scaled<true>(a, b, scale_a);
+            else if (scale_b != 1)
+                return apply_scaled<false>(a, b, scale_b);
+        } else if constexpr (is_division && IsDecimalNumber<B>)
+            return apply_scaled_div(a, b, scale_a);
+        return apply(a, b);
+    }
+
+private:
+    /// there's implicit type convertion here
+    static NativeResultType apply(NativeResultType a, NativeResultType b) {
+        // Now, Doris only support decimal +-*/ decimal.
+        // overflow in consider in operator
+        DecimalV2Value l(a);
+        DecimalV2Value r(b);
+        auto ans = Op::template apply(l, r);
+        NativeResultType result;
+        memcpy(&result, &ans, sizeof(NativeResultType));
+        return result;
+    }
+
+    /// null_map for divide and mod
+    static NativeResultType apply(NativeResultType a, NativeResultType b, NullMap& null_map,
+                                  size_t index) {
+        DecimalV2Value l(a);
+        DecimalV2Value r(b);
+        auto ans = Op::template apply(l, r, null_map, index);
+        NativeResultType result;
+        memcpy(&result, &ans, std::min(sizeof(result), sizeof(ans)));
+        return result;
+    }
+
+    template <bool scale_left>
+    static NativeResultType apply_scaled(NativeResultType a, NativeResultType b,
+                                         NativeResultType scale) {
+        if constexpr (is_plus_minus_compare) {
+            NativeResultType res;
+
+            if constexpr (_check_overflow) {
+                bool overflow = false;
+                if constexpr (scale_left)
+                    overflow |= common::mul_overflow(a, scale, a);
+                else
+                    overflow |= common::mul_overflow(b, scale, b);
+
+                if constexpr (can_overflow)
+                    overflow |= Op::template apply<NativeResultType>(a, b, res);
+                else
+                    res = Op::template apply<NativeResultType>(a, b);
+
+                if (overflow) {
+                    LOG(FATAL) << "Decimal math overflow";
+                }
+            } else {
+                if constexpr (scale_left)
+                    a *= scale;
+                else
+                    b *= scale;
+                res = Op::template apply<NativeResultType>(a, b);
+            }
+
+            return res;
+        }
+    }
+
+    static NativeResultType apply_scaled_div(NativeResultType a, NativeResultType b,
+                                             NativeResultType scale, NullMap& null_map,
+                                             size_t index) {
+        if constexpr (is_division) {
+            if constexpr (_check_overflow) {
+                bool overflow = false;
+                if constexpr (!IsDecimalNumber<A>)
+                    overflow |= common::mul_overflow(scale, scale, scale);
+                overflow |= common::mul_overflow(a, scale, a);
+                if (overflow) {
+                    LOG(FATAL) << "Decimal math overflow";
+                }
+            } else {
+                if constexpr (!IsDecimalNumber<A>) scale *= scale;
+                a *= scale;
+            }
+
+            return Op::template apply<NativeResultType>(a, b, null_map, index);
+        }
+    }
+};
+
+/// Used to indicate undefined operation
+struct InvalidType;
+
+template <bool V, typename T>
+struct Case : std::bool_constant<V> {
+    using type = T;
+};
+
+/// Switch<Case<C0, T0>, ...> -- select the first Ti for which Ci is true; InvalidType if none.
+template <typename... Ts>
+using Switch = typename std::disjunction<Ts..., Case<true, InvalidType>>::type;
+
+template <typename DataType>
+constexpr bool IsIntegral = false;
+template <>
+inline constexpr bool IsIntegral<DataTypeUInt8> = true;
+template <>
+inline constexpr bool IsIntegral<DataTypeUInt16> = true;
+template <>
+inline constexpr bool IsIntegral<DataTypeUInt32> = true;
+template <>
+inline constexpr bool IsIntegral<DataTypeUInt64> = true;
+template <>
+inline constexpr bool IsIntegral<DataTypeInt8> = true;
+template <>
+inline constexpr bool IsIntegral<DataTypeInt16> = true;
+template <>
+inline constexpr bool IsIntegral<DataTypeInt32> = true;
+template <>
+inline constexpr bool IsIntegral<DataTypeInt64> = true;
+template <>
+inline constexpr bool IsIntegral<DataTypeInt128> = true;
+
+template <typename DataType>
+constexpr bool IsFloatingPoint = false;
+template <>
+inline constexpr bool IsFloatingPoint<DataTypeFloat32> = true;
+template <>
+inline constexpr bool IsFloatingPoint<DataTypeFloat64> = true;
+
+template <typename T0, typename T1>
+constexpr bool UseLeftDecimal = false;
+template <>
+inline constexpr bool UseLeftDecimal<DataTypeDecimal<Decimal128>, DataTypeDecimal<Decimal32>> =
+        true;
+template <>
+inline constexpr bool UseLeftDecimal<DataTypeDecimal<Decimal128>, DataTypeDecimal<Decimal64>> =
+        true;
+template <>
+inline constexpr bool UseLeftDecimal<DataTypeDecimal<Decimal64>, DataTypeDecimal<Decimal32>> = true;
+
+template <typename T>
+using DataTypeFromFieldType =
+        std::conditional_t<std::is_same_v<T, NumberTraits::Error>, InvalidType, DataTypeNumber<T>>;
+
+template <template <typename, typename> class Operation, typename LeftDataType,
+          typename RightDataType>
+struct BinaryOperationTraits {
+    using T0 = typename LeftDataType::FieldType;
+    using T1 = typename RightDataType::FieldType;
+
+private: /// it's not correct for Decimal
+    using Op = Operation<T0, T1>;
+
+public:
+    static constexpr bool allow_decimal =
+            std::is_same_v<Operation<T0, T0>, PlusImpl<T0, T0>> ||
+            std::is_same_v<Operation<T0, T0>, MinusImpl<T0, T0>> ||
+            std::is_same_v<Operation<T0, T0>, MultiplyImpl<T0, T0>> ||
+            std::is_same_v<Operation<T0, T0>, ModuloImpl<T0, T0>> ||
+            std::is_same_v<Operation<T0, T0>, DivideFloatingImpl<T0, T0>> ||
+            std::is_same_v<Operation<T0, T0>, DivideIntegralImpl<T0, T0>> ||
+            std::is_same_v<Operation<T0, T0>, DivideIntegralOrZeroImpl<T0, T0>> ||
+            std::is_same_v<Operation<T0, T0>, LeastBaseImpl<T0, T0>> ||
+            std::is_same_v<Operation<T0, T0>, GreatestBaseImpl<T0, T0>>;
+
+    /// Appropriate result type for binary operator on numeric types. "Date" can also mean
+    /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid).
+    using ResultDataType = Switch<
+            /// Decimal cases
+            Case<!allow_decimal &&
+                         (IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>),
+                 InvalidType>,
+            Case<IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType> &&
+                         UseLeftDecimal<LeftDataType, RightDataType>,
+                 LeftDataType>,
+            Case<IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType>,
+                 RightDataType>,
+            Case<IsDataTypeDecimal<LeftDataType> && !IsDataTypeDecimal<RightDataType> &&
+                         IsIntegral<RightDataType>,
+                 LeftDataType>,
+            Case<!IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType> &&
+                         IsIntegral<LeftDataType>,
+                 RightDataType>,
+            /// Decimal <op> Real is not supported (traditional DBs convert Decimal <op> Real to Real)
+            Case<IsDataTypeDecimal<LeftDataType> && !IsDataTypeDecimal<RightDataType> &&
+                         !IsIntegral<RightDataType>,
+                 InvalidType>,
+            Case<!IsDataTypeDecimal<LeftDataType> && IsDataTypeDecimal<RightDataType> &&
+                         !IsIntegral<LeftDataType>,
+                 InvalidType>,
+            /// number <op> number -> see corresponding impl
+            Case<!IsDataTypeDecimal<LeftDataType> && !IsDataTypeDecimal<RightDataType>,
+                 DataTypeFromFieldType<typename Op::ResultType>>>;
+};
+
+template <template <typename, typename> class Op, typename Name,
+          bool CanBeExecutedOnDefaultArguments = true>
+class FunctionBinaryArithmetic : public IFunction {
+    bool check_decimal_overflow = true;
+
+    template <typename F>
+    static bool cast_type(const IDataType* type, F&& f) {
+        return cast_type_to_either<DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64,
+                                   DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64,
+                                   DataTypeInt128, DataTypeFloat32, DataTypeFloat64,
+                                   //            DataTypeDate,
+                                   //            DataTypeDateTime,
+                                   DataTypeDecimal<Decimal32>, DataTypeDecimal<Decimal64>,
+                                   DataTypeDecimal<Decimal128>>(type, std::forward<F>(f));
+    }
+
+    template <typename F>
+    static bool cast_both_types(const IDataType* left, const IDataType* right, F&& f) {
+        return cast_type(left, [&](const auto& left_) {
+            return cast_type(right, [&](const auto& right_) { return f(left_, right_); });
+        });
+    }
+
+    bool is_aggregate_multiply(const DataTypePtr& type0, const DataTypePtr& type1) const {
+        if constexpr (!std::is_same_v<Op<UInt8, UInt8>, MultiplyImpl<UInt8, UInt8>>) return false;
+
+        WhichDataType which0(type0);
+        WhichDataType which1(type1);
+
+        return (which0.is_aggregate_function() && which1.is_native_uint()) ||
+               (which0.is_native_uint() && which1.is_aggregate_function());
+    }
+
+    bool is_aggregate_addition(const DataTypePtr& type0, const DataTypePtr& type1) const {
+        if constexpr (!std::is_same_v<Op<UInt8, UInt8>, PlusImpl<UInt8, UInt8>>) return false;
+
+        WhichDataType which0(type0);
+        WhichDataType which1(type1);
+
+        return which0.is_aggregate_function() && which1.is_aggregate_function();
+    }
+
+public:
+    static constexpr auto name = Name::name;
+    static FunctionPtr create() { return std::make_shared<FunctionBinaryArithmetic>(); }
+
+    FunctionBinaryArithmetic() {}
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        DataTypePtr type_res;
+        bool valid = cast_both_types(
+                arguments[0].get(), arguments[1].get(), [&](const auto& left, const auto& right) {
+                    using LeftDataType = std::decay_t<decltype(left)>;
+                    using RightDataType = std::decay_t<decltype(right)>;
+                    using ResultDataType =
+                            typename BinaryOperationTraits<Op, LeftDataType,
+                                                           RightDataType>::ResultDataType;
+                    if constexpr (!std::is_same_v<ResultDataType, InvalidType>) {
+                        if constexpr (IsDataTypeDecimal<LeftDataType> &&
+                                      IsDataTypeDecimal<RightDataType>) {
+                            constexpr bool is_multiply =
+                                    std::is_same_v<Op<UInt8, UInt8>, MultiplyImpl<UInt8, UInt8>>;
+                            constexpr bool is_division = false;
+
+                            ResultDataType result_type =
+                                    decimal_result_type(left, right, is_multiply, is_division);
+                            type_res = std::make_shared<ResultDataType>(result_type.get_precision(),
+                                                                        result_type.get_scale());
+                        } else if constexpr (IsDataTypeDecimal<LeftDataType>)
+                            type_res = std::make_shared<LeftDataType>(left.get_precision(),
+                                                                      left.get_scale());
+                        else if constexpr (IsDataTypeDecimal<RightDataType>)
+                            type_res = std::make_shared<RightDataType>(right.get_precision(),
+                                                                       right.get_scale());
+                        else if constexpr (IsDataTypeDecimal<ResultDataType>)
+                            type_res = std::make_shared<ResultDataType>(27, 9);
+                        else
+                            type_res = std::make_shared<ResultDataType>();
+                        return true;
+                    }
+                    return false;
+                });
+        if (!valid) {
+            LOG(FATAL) << fmt::format("Illegal types {} and {} of arguments of function {}",
+                                      arguments[0]->get_name(), arguments[1]->get_name(),
+                                      get_name());
+        }
+
+        return type_res;
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto* left_generic = block.get_by_position(arguments[0]).type.get();
+        auto* right_generic = block.get_by_position(arguments[1]).type.get();
+        bool valid = cast_both_types(
+                left_generic, right_generic, [&](const auto& left, const auto& right) {
+                    using LeftDataType = std::decay_t<decltype(left)>;
+                    using RightDataType = std::decay_t<decltype(right)>;
+                    using ResultDataType =
+                            typename BinaryOperationTraits<Op, LeftDataType,
+                                                           RightDataType>::ResultDataType;
+                    if constexpr (!std::is_same_v<ResultDataType, InvalidType>) {
+                        constexpr bool result_is_decimal =
+                                IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>;
+                        constexpr bool is_multiply =
+                                std::is_same_v<Op<UInt8, UInt8>, MultiplyImpl<UInt8, UInt8>>;
+                        constexpr bool is_division = false;
+
+                        using T0 = typename LeftDataType::FieldType;
+                        using T1 = typename RightDataType::FieldType;
+                        using ResultType = typename ResultDataType::FieldType;
+                        using ColVecT0 = std::conditional_t<IsDecimalNumber<T0>, ColumnDecimal<T0>,
+                                                            ColumnVector<T0>>;
+                        using ColVecT1 = std::conditional_t<IsDecimalNumber<T1>, ColumnDecimal<T1>,
+                                                            ColumnVector<T1>>;
+                        using ColVecResult = std::conditional_t<IsDecimalNumber<ResultType>,
+                                                                ColumnDecimal<ResultType>,
+                                                                ColumnVector<ResultType>>;
+
+                        /// Decimal operations need scale. Operations are on result type.
+                        using OpImpl = std::conditional_t<
+                                IsDataTypeDecimal<ResultDataType>,
+                                DecimalBinaryOperation<T0, T1, Op, ResultType>,
+                                BinaryOperationImpl<T0, T1, Op<T0, T1>, ResultType>>;
+
+                        auto col_left_raw = block.get_by_position(arguments[0]).column.get();
+                        auto col_right_raw = block.get_by_position(arguments[1]).column.get();
+                        if (auto col_left = check_and_get_column_const<ColVecT0>(col_left_raw)) {
+                            if (auto col_right =
+                                        check_and_get_column_const<ColVecT1>(col_right_raw)) {
+                                /// the only case with a non-vector result
+                                if constexpr (result_is_decimal) {
+                                    ResultDataType type = decimal_result_type(
+                                            left, right, is_multiply, is_division);
+                                    typename ResultDataType::FieldType scale_a =
+                                            type.scale_factor_for(left, is_multiply);
+                                    typename ResultDataType::FieldType scale_b =
+                                            type.scale_factor_for(right,
+                                                                  is_multiply || is_division);
+
+                                    auto res = OpImpl::constant_constant(
+                                            col_left->template get_value<T0>(),
+                                            col_right->template get_value<T1>(), scale_a, scale_b,
+                                            check_decimal_overflow);
+                                    block.get_by_position(result).column =
+                                            ResultDataType(type.get_precision(), type.get_scale())
+                                                    .create_column_const(
+                                                            col_left->size(),
+                                                            to_field(res, type.get_scale()));
+
+                                } else {
+                                    auto res = OpImpl::constant_constant(
+                                            col_left->template get_value<T0>(),
+                                            col_right->template get_value<T1>());
+                                    block.get_by_position(result).column =
+                                            ResultDataType().create_column_const(col_left->size(),
+                                                                                 to_field(res));
+                                }
+                                return true;
+                            }
+                        }
+
+                        typename ColVecResult::MutablePtr col_res = nullptr;
+                        if constexpr (result_is_decimal) {
+                            ResultDataType type =
+                                    decimal_result_type(left, right, is_multiply, is_division);
+                            col_res = ColVecResult::create(0, type.get_scale());
+                        } else
+                            col_res = ColVecResult::create();
+
+                        auto& vec_res = col_res->get_data();
+                        vec_res.resize(block.rows());
+
+                        if (auto col_left_const =
+                                    check_and_get_column_const<ColVecT0>(col_left_raw)) {
+                            if (auto col_right = check_and_get_column<ColVecT1>(col_right_raw)) {
+                                if constexpr (result_is_decimal) {
+                                    ResultDataType type = decimal_result_type(
+                                            left, right, is_multiply, is_division);
+
+                                    typename ResultDataType::FieldType scale_a =
+                                            type.scale_factor_for(left, is_multiply);
+                                    typename ResultDataType::FieldType scale_b =
+                                            type.scale_factor_for(right,
+                                                                  is_multiply || is_division);
+                                    if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
+                                        scale_a = right.get_scale_multiplier();
+
+                                    OpImpl::constant_vector(
+                                            col_left_const->template get_value<T0>(),
+                                            col_right->get_data(), vec_res, scale_a, scale_b,
+                                            check_decimal_overflow);
+                                } else
+                                    OpImpl::constant_vector(
+                                            col_left_const->template get_value<T0>(),
+                                            col_right->get_data(), vec_res);
+                            } else
+                                return false;
+                        } else if (auto col_left = check_and_get_column<ColVecT0>(col_left_raw)) {
+                            if constexpr (result_is_decimal) {
+                                ResultDataType type =
+                                        decimal_result_type(left, right, is_multiply, is_division);
+
+                                typename ResultDataType::FieldType scale_a =
+                                        type.scale_factor_for(left, is_multiply);
+                                typename ResultDataType::FieldType scale_b =
+                                        type.scale_factor_for(right, is_multiply || is_division);
+                                if (auto col_right =
+                                            check_and_get_column<ColVecT1>(col_right_raw)) {
+                                    OpImpl::vector_vector(col_left->get_data(),
+                                                          col_right->get_data(), vec_res, scale_a,
+                                                          scale_b, check_decimal_overflow);
+                                } else if (auto col_right_const =
+                                                   check_and_get_column_const<ColVecT1>(
+                                                           col_right_raw)) {
+                                    OpImpl::vector_constant(
+                                            col_left->get_data(),
+                                            col_right_const->template get_value<T1>(), vec_res,
+                                            scale_a, scale_b, check_decimal_overflow);
+                                } else
+                                    return false;
+                            } else {
+                                if (auto col_right = check_and_get_column<ColVecT1>(col_right_raw))
+                                    OpImpl::vector_vector(col_left->get_data(),
+                                                          col_right->get_data(), vec_res);
+                                else if (auto col_right_const =
+                                                 check_and_get_column_const<ColVecT1>(
+                                                         col_right_raw))
+                                    OpImpl::vector_constant(
+                                            col_left->get_data(),
+                                            col_right_const->template get_value<T1>(), vec_res);
+                                else
+                                    return false;
+                            }
+                        } else
+                            return false;
+
+                        block.replace_by_position(result, std::move(col_res));
+                        return true;
+                    }
+                    return false;
+                });
+        if (!valid) {
+            return Status::RuntimeError(
+                    fmt::format("{}'s arguments do not match the expected data types", get_name()));
+        }
+
+        return Status::OK();
+    }
+
+    bool can_be_executed_on_default_arguments() const override {
+        return CanBeExecutedOnDefaultArguments;
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_binary_arithmetic_to_null_type.h b/be/src/vec/functions/function_binary_arithmetic_to_null_type.h
new file mode 100644
index 0000000000..66316116ae
--- /dev/null
+++ b/be/src/vec/functions/function_binary_arithmetic_to_null_type.h
@@ -0,0 +1,247 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/functions/function_binary_arithmetic.h"
+
+namespace doris::vectorized {
+
+/**
+ * Arithmetic operations: /, %
+ * intDiv (integer division)
+ */
+
+template <template <typename, typename> class Op, typename Name,
+          bool CanBeExecutedOnDefaultArguments = true>
+class FunctionBinaryArithmeticToNullType : public IFunction {
+    bool check_decimal_overflow = true;
+
+    template <typename F>
+    static bool cast_type(const IDataType* type, F&& f) {
+        return cast_type_to_either<DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64,
+                                   DataTypeInt8, DataTypeInt16, DataTypeInt32, DataTypeInt64,
+                                   DataTypeInt128, DataTypeFloat32, DataTypeFloat64,
+                                   DataTypeDecimal<Decimal32>, DataTypeDecimal<Decimal64>,
+                                   DataTypeDecimal<Decimal128>>(type, std::forward<F>(f));
+    }
+
+    template <typename F>
+    static bool cast_both_types(const IDataType* left, const IDataType* right, F&& f) {
+        return cast_type(left, [&](const auto& left_) {
+            return cast_type(right, [&](const auto& right_) { return f(left_, right_); });
+        });
+    }
+
+public:
+    static constexpr auto name = Name::name;
+    static FunctionPtr create() { return std::make_shared<FunctionBinaryArithmeticToNullType>(); }
+
+    FunctionBinaryArithmeticToNullType() {}
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        DataTypePtr type_res;
+
+        const IDataType* first_type = arguments[0].get();
+        const IDataType* secord_type = arguments[1].get();
+        if (first_type->is_nullable()) {
+            first_type = static_cast<const DataTypeNullable*>(first_type)->get_nested_type().get();
+        }
+        if (secord_type->is_nullable()) {
+            secord_type =
+                    static_cast<const DataTypeNullable*>(secord_type)->get_nested_type().get();
+        }
+
+        bool valid =
+                cast_both_types(first_type, secord_type, [&](const auto& left, const auto& right) {
+                    using LeftDataType = std::decay_t<decltype(left)>;
+                    using RightDataType = std::decay_t<decltype(right)>;
+                    using ResultDataType =
+                            typename BinaryOperationTraits<Op, LeftDataType,
+                                                           RightDataType>::ResultDataType;
+                    if constexpr (!std::is_same_v<ResultDataType, InvalidType>) {
+                        if constexpr (IsDataTypeDecimal<LeftDataType> &&
+                                      IsDataTypeDecimal<RightDataType>) {
+                            constexpr bool is_multiply = false;
+                            constexpr bool is_division =
+                                    std::is_same_v<Op<UInt8, UInt8>,
+                                                   DivideFloatingImpl<UInt8, UInt8>> ||
+                                    std::is_same_v<Op<UInt8, UInt8>,
+                                                   DivideIntegralImpl<UInt8, UInt8>> ||
+                                    std::is_same_v<Op<UInt8, UInt8>,
+                                                   DivideIntegralOrZeroImpl<UInt8, UInt8>>;
+
+                            ResultDataType result_type =
+                                    decimal_result_type(left, right, is_multiply, is_division);
+                            type_res = std::make_shared<ResultDataType>(result_type.get_precision(),
+                                                                        result_type.get_scale());
+                        } else if constexpr (IsDataTypeDecimal<LeftDataType>)
+                            type_res = std::make_shared<LeftDataType>(left.get_precision(),
+                                                                      left.get_scale());
+                        else if constexpr (IsDataTypeDecimal<RightDataType>)
+                            type_res = std::make_shared<RightDataType>(right.get_precision(),
+                                                                       right.get_scale());
+                        else if constexpr (IsDataTypeDecimal<ResultDataType>)
+                            type_res = std::make_shared<ResultDataType>(27, 9);
+                        else
+                            type_res = std::make_shared<ResultDataType>();
+                        return true;
+                    }
+                    return false;
+                });
+        if (!valid) {
+            LOG(FATAL) << fmt::format("Illegal types {} and {} of arguments of function {}",
+                                      arguments[0]->get_name(), arguments[1]->get_name(),
+                                      get_name());
+        }
+
+        return make_nullable(type_res);
+    }
+
+    bool use_default_implementation_for_nulls() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto* left_generic = block.get_by_position(arguments[0]).type.get();
+        auto* right_generic = block.get_by_position(arguments[1]).type.get();
+        if (left_generic->is_nullable()) {
+            left_generic =
+                    static_cast<const DataTypeNullable*>(left_generic)->get_nested_type().get();
+        }
+        if (right_generic->is_nullable()) {
+            right_generic =
+                    static_cast<const DataTypeNullable*>(right_generic)->get_nested_type().get();
+        }
+
+        bool valid = cast_both_types(
+                left_generic, right_generic, [&](const auto& left, const auto& right) {
+                    using LeftDataType = std::decay_t<decltype(left)>;
+                    using RightDataType = std::decay_t<decltype(right)>;
+                    using ResultDataType =
+                            typename BinaryOperationTraits<Op, LeftDataType,
+                                                           RightDataType>::ResultDataType;
+
+                    if constexpr (!std::is_same_v<ResultDataType, InvalidType>) {
+                        constexpr bool result_is_decimal =
+                                IsDataTypeDecimal<LeftDataType> || IsDataTypeDecimal<RightDataType>;
+                        constexpr bool is_multiply = false;
+                        constexpr bool is_division =
+                                std::is_same_v<Op<UInt8, UInt8>,
+                                               DivideFloatingImpl<UInt8, UInt8>> ||
+                                std::is_same_v<Op<UInt8, UInt8>,
+                                               DivideIntegralImpl<UInt8, UInt8>> ||
+                                std::is_same_v<Op<UInt8, UInt8>,
+                                               DivideIntegralOrZeroImpl<UInt8, UInt8>>;
+
+                        using T0 = typename LeftDataType::FieldType;
+                        using T1 = typename RightDataType::FieldType;
+                        using ResultType = typename ResultDataType::FieldType;
+                        using ColVecT0 = std::conditional_t<IsDecimalNumber<T0>, ColumnDecimal<T0>,
+                                                            ColumnVector<T0>>;
+                        using ColVecT1 = std::conditional_t<IsDecimalNumber<T1>, ColumnDecimal<T1>,
+                                                            ColumnVector<T1>>;
+                        using ColVecResult = std::conditional_t<IsDecimalNumber<ResultType>,
+                                                                ColumnDecimal<ResultType>,
+                                                                ColumnVector<ResultType>>;
+
+                        /// Decimal operations need scale. Operations are on result type.
+                        using OpImpl = std::conditional_t<
+                                IsDataTypeDecimal<ResultDataType>,
+                                DecimalBinaryOperation<T0, T1, Op, ResultType>,
+                                BinaryOperationImpl<T0, T1, Op<T0, T1>, ResultType>>;
+
+                        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+                        auto& null_map_data = null_map->get_data();
+                        size_t argument_size = arguments.size();
+                        ColumnPtr argument_columns[argument_size];
+
+                        for (size_t i = 0; i < argument_size; ++i) {
+                            argument_columns[i] =
+                                    block.get_by_position(arguments[i])
+                                            .column->convert_to_full_column_if_const();
+                        }
+
+                        auto col_left_raw = argument_columns[0].get();
+                        auto col_right_raw = argument_columns[1].get();
+
+                        typename ColVecResult::MutablePtr col_res = nullptr;
+                        if constexpr (result_is_decimal) {
+                            ResultDataType type =
+                                    decimal_result_type(left, right, is_multiply, is_division);
+                            col_res = ColVecResult::create(0, type.get_scale());
+                        } else {
+                            col_res = ColVecResult::create();
+                        }
+
+                        auto& vec_res = col_res->get_data();
+                        vec_res.resize(block.rows());
+
+                        if (auto col_left = check_and_get_column<ColVecT0>(col_left_raw)) {
+                            if constexpr (result_is_decimal) {
+                                ResultDataType type =
+                                        decimal_result_type(left, right, is_multiply, is_division);
+
+                                typename ResultDataType::FieldType scale_a =
+                                        type.scale_factor_for(left, is_multiply);
+                                typename ResultDataType::FieldType scale_b =
+                                        type.scale_factor_for(right, is_multiply || is_division);
+                                if constexpr (IsDataTypeDecimal<RightDataType> && is_division)
+                                    scale_a = right.get_scale_multiplier();
+                                if (auto col_right =
+                                            check_and_get_column<ColVecT1>(col_right_raw)) {
+                                    OpImpl::vector_vector(col_left->get_data(),
+                                                          col_right->get_data(), vec_res, scale_a,
+                                                          scale_b, check_decimal_overflow,
+                                                          null_map_data);
+                                }
+                            } else {
+                                if (auto col_right =
+                                            check_and_get_column<ColVecT1>(col_right_raw)) {
+                                    OpImpl::vector_vector(col_left->get_data(),
+                                                          col_right->get_data(), vec_res,
+                                                          null_map_data);
+                                }
+                            }
+                        } else {
+                            return false;
+                        }
+
+                        block.get_by_position(result).column =
+                                ColumnNullable::create(std::move(col_res), std::move(null_map));
+                        return true;
+                    } else {
+                        return false;
+                    }
+                });
+
+        if (!valid) {
+            return Status::RuntimeError(
+                    fmt::format("{}'s arguments do not match the expected data types", get_name()));
+        }
+
+        return Status::OK();
+    }
+
+    bool can_be_executed_on_default_arguments() const override {
+        return CanBeExecutedOnDefaultArguments;
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_bit.cpp b/be/src/vec/functions/function_bit.cpp
new file mode 100644
index 0000000000..2a8db7b4a4
--- /dev/null
+++ b/be/src/vec/functions/function_bit.cpp
@@ -0,0 +1,92 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/bitAnd.cpp
+// and modified by Doris
+
+#include "vec/data_types/number_traits.h"
+#include "vec/functions/function_binary_arithmetic.h"
+#include "vec/functions/function_unary_arithmetic.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+struct NameBitAnd {
+    static constexpr auto name = "bitand";
+};
+
+template <typename A, typename B>
+struct BitAndImpl {
+    using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b) {
+        return static_cast<Result>(a) & static_cast<Result>(b);
+    }
+};
+
+struct NameBitNot {
+    static constexpr auto name = "bitnot";
+};
+
+template <typename A>
+struct BitNotImpl {
+    using ResultType = typename NumberTraits::ResultOfBitNot<A>::Type;
+
+    static inline ResultType apply(A a) { return ~static_cast<ResultType>(a); }
+};
+
+struct NameBitOr {
+    static constexpr auto name = "bitor";
+};
+
+template <typename A, typename B>
+struct BitOrImpl {
+    using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b) {
+        return static_cast<Result>(a) | static_cast<Result>(b);
+    }
+};
+
+struct NameBitXor {
+    static constexpr auto name = "bitxor";
+};
+
+template <typename A, typename B>
+struct BitXorImpl {
+    using ResultType = typename NumberTraits::ResultOfBit<A, B>::Type;
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b) {
+        return static_cast<Result>(a) ^ static_cast<Result>(b);
+    }
+};
+
+using FunctionBitAnd = FunctionBinaryArithmetic<BitAndImpl, NameBitAnd>;
+using FunctionBitNot = FunctionUnaryArithmetic<BitNotImpl, NameBitNot, false>;
+using FunctionBitOr = FunctionBinaryArithmetic<BitOrImpl, NameBitOr>;
+using FunctionBitXor = FunctionBinaryArithmetic<BitXorImpl, NameBitXor>;
+
+void register_function_bit(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionBitAnd>();
+    factory.register_function<FunctionBitNot>();
+    factory.register_function<FunctionBitOr>();
+    factory.register_function<FunctionBitXor>();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp
new file mode 100644
index 0000000000..bde20ad185
--- /dev/null
+++ b/be/src/vec/functions/function_bitmap.cpp
@@ -0,0 +1,471 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionBitmap.h
+// and modified by Doris
+
+#include "util/string_parser.hpp"
+#include "vec/functions/function_totype.h"
+#include "vec/functions/function_const.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/functions/function_string.h"
+#include "gutil/strings/numbers.h"
+#include "gutil/strings/split.h"
+
+namespace doris::vectorized {
+
+struct BitmapEmpty {
+    static constexpr auto name = "bitmap_empty";
+    using ReturnColVec = ColumnBitmap;
+    static DataTypePtr get_return_type() {
+        return std::make_shared<DataTypeBitMap>();
+    }
+    static auto init_value() {
+        return BitmapValue{};
+    }
+};
+
+struct NameToBitmap {
+    static constexpr auto name = "to_bitmap";
+};
+
+struct ToBitmapImpl {
+    using ReturnType = DataTypeBitMap;
+    static constexpr auto TYPE_INDEX = TypeIndex::String;
+    using Type = String;
+    using ReturnColumnType = ColumnBitmap;
+
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         std::vector<BitmapValue>& res) {
+        auto size = offsets.size();
+        res.reserve(size);
+        for (size_t i = 0; i < size; ++i) {
+            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+            size_t str_size = offsets[i] - offsets[i - 1] - 1;
+            StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS;
+            uint64_t int_value = StringParser::string_to_unsigned_int<uint64_t>(raw_str, str_size,
+                                                                                &parse_result);
+
+            // TODO: which where cause problem in to_bitmap(null), rethink how to slove the problem
+            // of null
+//            if (UNLIKELY(parse_result != StringParser::PARSE_SUCCESS)) {
+//                return Status::RuntimeError(
+//                        fmt::format("The input: {:.{}} is not valid, to_bitmap only support bigint "
+//                                    "value from 0 to 18446744073709551615 currently",
+//                                    raw_str, str_size));
+//            }
+            res.emplace_back();
+            res.back().add(int_value);
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapFromString {
+    static constexpr auto name = "bitmap_from_string";
+};
+
+struct BitmapFromString {
+    using ReturnType = DataTypeBitMap;
+    static constexpr auto TYPE_INDEX = TypeIndex::String;
+    using Type = String;
+    using ReturnColumnType = ColumnBitmap;
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         std::vector<BitmapValue>& res) {
+        auto size = offsets.size();
+        res.reserve(size);
+        std::vector<uint64_t> bits;
+        for (size_t i = 0; i < size; ++i) {
+            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+            int str_size = offsets[i] - offsets[i - 1] - 1;
+            if (SplitStringAndParse({raw_str, str_size}, ",", &safe_strtou64, &bits)) {
+                res.emplace_back(bits);
+            } else {
+                res.emplace_back();
+            }
+            bits.clear();
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapHash {
+    static constexpr auto name = "bitmap_hash";
+};
+
+struct BitmapHash {
+    using ReturnType = DataTypeBitMap;
+    static constexpr auto TYPE_INDEX = TypeIndex::String;
+    using Type = String;
+    using ReturnColumnType = ColumnBitmap;
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         std::vector<BitmapValue>& res) {
+        auto size = offsets.size();
+        res.reserve(size);
+        for (size_t i = 0; i < size; ++i) {
+            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+            size_t str_size = offsets[i] - offsets[i - 1] - 1;
+            uint32_t hash_value =
+                HashUtil::murmur_hash3_32(raw_str, str_size, HashUtil::MURMUR3_32_SEED);
+            res.emplace_back();
+            res.back().add(hash_value);
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapCount {
+    static constexpr auto name = "bitmap_count";
+};
+
+struct BitmapCount {
+    using ReturnType = DataTypeInt64;
+    static constexpr auto TYPE_INDEX = TypeIndex::BitMap;
+    using Type = DataTypeBitMap::FieldType;
+    using ReturnColumnType = ColumnVector<Int64>;
+    using ReturnColumnContainer = ColumnVector<Int64>::Container;
+
+    static Status vector(const std::vector<BitmapValue>& data, ReturnColumnContainer& res) {
+        size_t size = data.size();
+        res.reserve(size);
+        for (size_t i = 0; i < size; ++i) {
+            res.push_back(data[i].cardinality());
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapAnd {
+    static constexpr auto name = "bitmap_and";
+};
+
+template <typename LeftDataType, typename RightDataType>
+struct BitmapAnd {
+    using ResultDataType = DataTypeBitMap;
+    using T0 = typename LeftDataType::FieldType;
+    using T1 = typename RightDataType::FieldType;
+    using TData = std::vector<BitmapValue>;
+
+    static Status vector_vector(const TData& lvec, const TData& rvec, TData& res) {
+        size_t size = lvec.size();
+        for (size_t i = 0; i < size; ++i) {
+            res[i] = lvec[i];
+            res[i] &= rvec[i];
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapOr {
+    static constexpr auto name = "bitmap_or";
+};
+
+template <typename LeftDataType, typename RightDataType>
+struct BitmapOr {
+    using ResultDataType = DataTypeBitMap;
+    using T0 = typename LeftDataType::FieldType;
+    using T1 = typename RightDataType::FieldType;
+    using TData = std::vector<BitmapValue>;
+
+    static Status vector_vector(const TData& lvec, const TData& rvec, TData& res) {
+        size_t size = lvec.size();
+        for (size_t i = 0; i < size; ++i) {
+            res[i] = lvec[i];
+            res[i] |= rvec[i];
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapXor {
+    static constexpr auto name = "bitmap_xor";
+};
+
+template <typename LeftDataType, typename RightDataType>
+struct BitmapXor {
+    using ResultDataType = DataTypeBitMap;
+    using T0 = typename LeftDataType::FieldType;
+    using T1 = typename RightDataType::FieldType;
+    using TData = std::vector<BitmapValue>;
+
+    static Status vector_vector(const TData& lvec, const TData& rvec, TData& res) {
+        size_t size = lvec.size();
+        for (size_t i = 0; i < size; ++i) {
+            res[i] = lvec[i];
+            res[i] ^= rvec[i];
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapNot {
+    static constexpr auto name = "bitmap_not";
+};
+
+template <typename LeftDataType, typename RightDataType>
+struct BitmapNot {
+    using ResultDataType = DataTypeBitMap;
+    using T0 = typename LeftDataType::FieldType;
+    using T1 = typename RightDataType::FieldType;
+    using TData = std::vector<BitmapValue>;
+
+    static Status vector_vector(const TData& lvec, const TData& rvec, TData& res) {
+        size_t size = lvec.size();
+        for (size_t i = 0; i < size; ++i) {
+            res[i] = lvec[i];
+            res[i] -= rvec[i];
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapContains {
+    static constexpr auto name = "bitmap_contains";
+};
+
+template <typename LeftDataType, typename RightDataType>
+struct BitmapContains {
+    using ResultDataType = DataTypeUInt8;
+    using T0 = typename LeftDataType::FieldType;
+    using T1 = typename RightDataType::FieldType;
+    using LTData = std::vector<BitmapValue>;
+    using RTData = typename ColumnVector<T1>::Container;
+    using ResTData = typename ColumnVector<UInt8>::Container;
+
+    static Status vector_vector(const LTData& lvec, const RTData& rvec, ResTData& res) {
+        size_t size = lvec.size();
+        for (size_t i = 0; i < size; ++i) {
+            res[i] = lvec[i].contains(rvec[i]);
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapHasAny {
+    static constexpr auto name = "bitmap_has_any";
+};
+
+template <typename LeftDataType, typename RightDataType>
+struct BitmapHasAny {
+    using ResultDataType = DataTypeUInt8;
+    using T0 = typename LeftDataType::FieldType;
+    using T1 = typename RightDataType::FieldType;
+    using TData = std::vector<BitmapValue>;
+    using ResTData = typename ColumnVector<UInt8>::Container;
+
+    static Status vector_vector(const TData& lvec, const TData& rvec, ResTData& res) {
+        size_t size = lvec.size();
+        for (size_t i = 0; i < size; ++i) {
+            auto bitmap = const_cast<BitmapValue&>(lvec[i]);
+            bitmap &= rvec[i];
+            res[i] = bitmap.cardinality() != 0;
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapMin {
+    static constexpr auto name = "bitmap_min";
+};
+
+struct BitmapMin {
+    using ReturnType = DataTypeInt64;
+    static constexpr auto TYPE_INDEX = TypeIndex::BitMap;
+    using Type = DataTypeBitMap::FieldType;
+    using ReturnColumnType = ColumnVector<Int64>;
+    using ReturnColumnContainer = ColumnVector<Int64>::Container;
+
+    static Status vector(const std::vector<BitmapValue>& data, ReturnColumnContainer& res) {
+        size_t size = data.size();
+        res.reserve(size);
+        for (size_t i = 0; i < size; ++i) {
+            auto min = const_cast<std::vector<BitmapValue>&>(data)[i].minimum();
+            res.push_back(min.val);
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapMax {
+    static constexpr auto name = "bitmap_max";
+};
+
+struct BitmapMax {
+    using ReturnType = DataTypeInt64;
+    static constexpr auto TYPE_INDEX = TypeIndex::BitMap;
+    using Type = DataTypeBitMap::FieldType;
+    using ReturnColumnType = ColumnVector<Int64>;
+    using ReturnColumnContainer = ColumnVector<Int64>::Container;
+
+    static Status vector(const std::vector<BitmapValue>& data, ReturnColumnContainer& res) {
+        size_t size = data.size();
+        res.reserve(size);
+        for (size_t i = 0; i < size; ++i) {
+            auto max = const_cast<std::vector<BitmapValue>&>(data)[i].maximum();
+            res.push_back(max.val);
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapToString {
+    static constexpr auto name = "bitmap_to_string";
+};
+
+struct BitmapToString {
+    using ReturnType = DataTypeString;
+    static constexpr auto TYPE_INDEX = TypeIndex::BitMap;
+    using Type = DataTypeBitMap::FieldType;
+    using ReturnColumnType = ColumnString;
+    using Chars = ColumnString::Chars;
+    using Offsets = ColumnString::Offsets;
+
+    static Status vector(const std::vector<BitmapValue>& data, Chars& chars, Offsets& offsets) {
+        size_t size = data.size();
+        offsets.resize(size);
+        chars.reserve(size);
+        for (size_t i = 0; i < size; ++i) {
+            StringOP::push_value_string(data[i].to_string(), i, chars, offsets);
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapAndCount {
+    static constexpr auto name = "bitmap_and_count";
+};
+template <typename LeftDataType, typename RightDataType>
+struct BitmapAndCount {
+    using ResultDataType = DataTypeInt64;
+    using T0 = typename LeftDataType::FieldType;
+    using T1 = typename RightDataType::FieldType;
+    using TData = std::vector<BitmapValue>;
+    using ResTData = typename ColumnVector<Int64>::Container;
+
+    static Status vector_vector(const TData& lvec, const TData& rvec, ResTData& res) {
+        size_t size = lvec.size();
+        BitmapValue val;
+        for (size_t i = 0; i < size; ++i) {
+            val |= lvec[i];
+            val &= rvec[i];
+            res[i] = val.cardinality();
+            val.clear();
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapOrCount {
+    static constexpr auto name = "bitmap_or_count";
+};
+template <typename LeftDataType, typename RightDataType>
+struct BitmapOrCount {
+    using ResultDataType = DataTypeInt64;
+    using T0 = typename LeftDataType::FieldType;
+    using T1 = typename RightDataType::FieldType;
+    using TData = std::vector<BitmapValue>;
+    using ResTData = typename ColumnVector<Int64>::Container;
+
+    static Status vector_vector(const TData& lvec, const TData& rvec, ResTData& res) {
+        size_t size = lvec.size();
+        BitmapValue val;
+        for (size_t i = 0; i < size; ++i) {
+            val |= lvec[i];
+            val |= rvec[i];
+            res[i] = val.cardinality();
+            val.clear();
+        }
+        return Status::OK();
+    }
+};
+
+struct NameBitmapXorCount {
+    static constexpr auto name = "bitmap_xor_count";
+};
+template <typename LeftDataType, typename RightDataType>
+struct BitmapXorCount {
+    using ResultDataType = DataTypeInt64;
+    using T0 = typename LeftDataType::FieldType;
+    using T1 = typename RightDataType::FieldType;
+    using TData = std::vector<BitmapValue>;
+    using ResTData = typename ColumnVector<Int64>::Container;
+
+    static Status vector_vector(const TData& lvec, const TData& rvec, ResTData& res) {
+        size_t size = lvec.size();
+        BitmapValue val;
+        for (size_t i = 0; i < size; ++i) {
+            val |= lvec[i];
+            val ^= rvec[i];
+            res[i] = val.cardinality();
+            val.clear();
+        }
+        return Status::OK();
+    }
+};
+
+using FunctionBitmapEmpty = FunctionConst<BitmapEmpty, false>;
+using FunctionToBitmap = FunctionUnaryToType<ToBitmapImpl, NameToBitmap>;
+using FunctionBitmapFromString = FunctionUnaryToType<BitmapFromString,NameBitmapFromString>;
+using FunctionBitmapHash = FunctionUnaryToType<BitmapHash, NameBitmapHash>;
+
+using FunctionBitmapCount = FunctionUnaryToType<BitmapCount, NameBitmapCount>;
+using FunctionBitmapAndCount =
+        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapAndCount, NameBitmapAndCount>;
+using FunctionBitmapOrCount =
+        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapOrCount, NameBitmapOrCount>;
+using FunctionBitmapXorCount =
+        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapXorCount, NameBitmapXorCount>;
+using FunctionBitmapMin = FunctionUnaryToType<BitmapMin, NameBitmapMin>;
+using FunctionBitmapMax = FunctionUnaryToType<BitmapMax, NameBitmapMax>;
+using FunctionBitmapToString = FunctionUnaryToType<BitmapToString, NameBitmapToString>;
+
+using FunctionBitmapAnd =
+        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapAnd, NameBitmapAnd>;
+using FunctionBitmapOr =
+        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapOr, NameBitmapOr>;
+using FunctionBitmapXor =
+        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapXor, NameBitmapXor>;
+using FunctionBitmapNot =
+        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapNot, NameBitmapNot>;
+
+using FunctionBitmapContains =
+        FunctionBinaryToType<DataTypeBitMap, DataTypeInt64, BitmapContains, NameBitmapContains>;
+
+using FunctionBitmapHasAny =
+        FunctionBinaryToType<DataTypeBitMap, DataTypeBitMap, BitmapHasAny, NameBitmapHasAny>;
+
+void register_function_bitmap(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionBitmapEmpty>();
+    factory.register_function<FunctionToBitmap>();
+    factory.register_function<FunctionBitmapFromString>();
+    factory.register_function<FunctionBitmapHash>();
+    factory.register_function<FunctionBitmapCount>();
+    factory.register_function<FunctionBitmapAndCount>();
+    factory.register_function<FunctionBitmapOrCount>();
+    factory.register_function<FunctionBitmapXorCount>();
+    factory.register_function<FunctionBitmapMin>();
+    factory.register_function<FunctionBitmapMax>();
+    factory.register_function<FunctionBitmapToString>();
+    factory.register_function<FunctionBitmapAnd>();
+    factory.register_function<FunctionBitmapOr>();
+    factory.register_function<FunctionBitmapXor>();
+    factory.register_function<FunctionBitmapNot>();
+    factory.register_function<FunctionBitmapContains>();
+    factory.register_function<FunctionBitmapHasAny>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_case.cpp b/be/src/vec/functions/function_case.cpp
new file mode 100644
index 0000000000..6ce94ab5aa
--- /dev/null
+++ b/be/src/vec/functions/function_case.cpp
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/function_case.h"
+
+namespace doris::vectorized {
+
+void register_function_case(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionCase<false, false>>();
+    factory.register_function<FunctionCase<false, true>>();
+    factory.register_function<FunctionCase<true, false>>();
+    factory.register_function<FunctionCase<true, true>>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_case.h b/be/src/vec/functions/function_case.h
new file mode 100644
index 0000000000..1113b5f407
--- /dev/null
+++ b/be/src/vec/functions/function_case.h
@@ -0,0 +1,371 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+struct CaseState {
+    DataTypePtr result_type = nullptr;
+};
+
+template <bool has_case, bool has_else>
+struct FunctionCaseName;
+
+template <>
+struct FunctionCaseName<false, false> {
+    static constexpr auto name = "case";
+};
+
+template <>
+struct FunctionCaseName<true, false> {
+    static constexpr auto name = "case_has_case";
+};
+
+template <>
+struct FunctionCaseName<false, true> {
+    static constexpr auto name = "case_has_else";
+};
+
+template <>
+struct FunctionCaseName<true, true> {
+    static constexpr auto name = "case_has_case_has_else";
+};
+
+struct CaseWhenColumnHolder {
+    using OptionalPtr = std::optional<ColumnPtr>;
+
+    std::vector<OptionalPtr> when_ptrs; // case, when, when...
+    std::vector<OptionalPtr> then_ptrs; // else, then, then...
+    size_t pair_count;
+    size_t rows_count;
+
+    CaseWhenColumnHolder(Block& block, const ColumnNumbers& arguments, size_t input_rows_count,
+                         bool has_case, bool has_else, bool when_null, bool then_null)
+            : rows_count(input_rows_count) {
+        when_ptrs.emplace_back(has_case ? OptionalPtr(block.get_by_position(arguments[0]).column)
+                                        : std::nullopt);
+        then_ptrs.emplace_back(
+                has_else
+                        ? OptionalPtr(block.get_by_position(arguments[arguments.size() - 1]).column)
+                        : std::nullopt);
+
+        int begin = 0 + has_case;
+        int end = arguments.size() - has_else;
+        pair_count = (end - begin) / 2 + 1; // when/then at [1: pair_count)
+
+        for (int i = begin; i < end; i += 2) {
+            when_ptrs.emplace_back(OptionalPtr(block.get_by_position(arguments[i]).column));
+            then_ptrs.emplace_back(OptionalPtr(block.get_by_position(arguments[i + 1]).column));
+        }
+
+        // if case_column/when_column is nullable. cast all case_column/when_column to nullable.
+        if (when_null) {
+            for (OptionalPtr& column_ptr : when_ptrs) {
+                cast_to_nullable(column_ptr);
+            }
+        }
+
+        // if else_column/then_column is nullable. cast all else_column/then_column to nullable.
+        if (then_null) {
+            for (OptionalPtr& column_ptr : then_ptrs) {
+                cast_to_nullable(column_ptr);
+            }
+        }
+    }
+
+    void cast_to_nullable(OptionalPtr& column_ptr) {
+        if (!column_ptr.has_value() || column_ptr.value()->is_nullable()) {
+            return;
+        }
+        column_ptr.emplace(make_nullable(column_ptr.value()));
+    }
+};
+
+template <bool has_case, bool has_else>
+class FunctionCase : public IFunction {
+public:
+    static constexpr auto name = FunctionCaseName<has_case, has_else>::name;
+    static FunctionPtr create() { return std::make_shared<FunctionCase>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 0; }
+    bool is_variadic() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        int loop_start = has_case ? 2 : 1;
+        int loop_end = has_else ? arguments.size() - 1 : arguments.size();
+
+        bool is_nullable = false;
+        if (!has_else || arguments[loop_end].get()->is_nullable()) {
+            is_nullable = true;
+        }
+        for (int i = loop_start; !is_nullable && i < loop_end; i += 2) {
+            if (arguments[i].get()->is_nullable()) {
+                is_nullable = true;
+            }
+        }
+
+        if (is_nullable) {
+            return make_nullable(arguments[loop_start]);
+        } else {
+            return arguments[loop_start];
+        }
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    template <typename ColumnType, bool when_null, bool then_null>
+    Status execute_impl(const DataTypePtr& data_type, Block& block, size_t result,
+                        CaseWhenColumnHolder column_holder) {
+        int rows_count = column_holder.rows_count;
+
+        // `then` data index corresponding to each row of results, 0 represents `else`.
+        uint8_t then_idx[rows_count];
+        uint8_t* __restrict then_idx_ptr = &then_idx[0];
+        memset(then_idx_ptr, 0, sizeof(then_idx));
+
+        auto case_column_ptr = column_holder.when_ptrs[0].value_or(nullptr);
+
+        for (uint8_t i = 1; i < column_holder.pair_count; i++) {
+            auto when_column_ptr = column_holder.when_ptrs[i].value();
+            if constexpr (has_case) {
+                // TODO: need simd
+                for (int row_idx = 0; row_idx < rows_count; row_idx++) {
+                    if (!then_idx_ptr[row_idx] && !case_column_ptr->is_null_at(row_idx) &&
+                        case_column_ptr->compare_at(row_idx, row_idx, *when_column_ptr, -1) == 0) {
+                        then_idx_ptr[row_idx] = i;
+                    }
+                }
+            } else {
+                if constexpr (when_null) {
+                    // TODO: need simd
+                    for (int row_idx = 0; row_idx < rows_count; row_idx++) {
+                        if (!then_idx_ptr[row_idx] && when_column_ptr->get_bool(row_idx)) {
+                            then_idx_ptr[row_idx] = i;
+                        }
+                    }
+                } else {
+                    auto* __restrict cond_raw_data =
+                            reinterpret_cast<const ColumnUInt8*>(when_column_ptr.get())
+                                    ->get_data()
+                                    .data();
+
+                    // simd automatically
+                    for (int row_idx = 0; row_idx < rows_count; row_idx++) {
+                        then_idx_ptr[row_idx] |=
+                                (!then_idx_ptr[row_idx]) * cond_raw_data[row_idx] * i;
+                    }
+                }
+            }
+        }
+
+        return execute_update_result<ColumnType, then_null>(data_type, result, block, then_idx,
+                                                            column_holder);
+    }
+
+    template <typename ColumnType, bool then_null>
+    Status execute_update_result(const DataTypePtr& data_type, size_t result, Block& block,
+                                 uint8* then_idx, CaseWhenColumnHolder& column_holder) {
+        auto result_column_ptr = data_type->create_column();
+
+        if constexpr (std::is_same_v<ColumnType, ColumnString>) {
+            // result_column and all then_column is not nullable.
+            // can't simd when type is string.
+            update_result_normal(result_column_ptr, then_idx, column_holder);
+        } else if constexpr (then_null) {
+            // result_column and all then_column is nullable.
+            // TODO: make here simd automatically.
+            update_result_normal(result_column_ptr, then_idx, column_holder);
+        } else {
+            update_result_auto_simd<ColumnType>(result_column_ptr, then_idx, column_holder);
+        }
+
+        block.replace_by_position(result, std::move(result_column_ptr));
+        return Status::OK();
+    }
+
+    void update_result_normal(MutableColumnPtr& result_column_ptr, uint8* then_idx,
+                              CaseWhenColumnHolder& column_holder) {
+        for (int row_idx = 0; row_idx < column_holder.rows_count; row_idx++) {
+            if constexpr (!has_else) {
+                if (!then_idx[row_idx]) {
+                    result_column_ptr->insert_default();
+                    continue;
+                }
+            }
+            result_column_ptr->insert_from(*column_holder.then_ptrs[then_idx[row_idx]].value(),
+                                           row_idx);
+        }
+    }
+
+    template <typename ColumnType>
+    void update_result_auto_simd(MutableColumnPtr& result_column_ptr, uint8* __restrict then_idx,
+                                 CaseWhenColumnHolder& column_holder) {
+        size_t rows_count = column_holder.rows_count;
+        result_column_ptr->resize(rows_count);
+        auto* __restrict result_raw_data =
+                reinterpret_cast<ColumnType*>(result_column_ptr.get())->get_data().data();
+
+        // set default value
+        for (int i = 0; i < rows_count; i++) {
+            result_raw_data[i] = 0;
+        }
+
+        // some types had simd automatically, but some not.
+        for (uint8_t i = (has_else ? 0 : 1); i < column_holder.pair_count; i++) {
+            auto* __restrict column_raw_data =
+                    reinterpret_cast<ColumnType*>(
+                            column_holder.then_ptrs[i].value()->assume_mutable().get())
+                            ->get_data()
+                            .data();
+
+            for (int row_idx = 0; row_idx < rows_count; row_idx++) {
+                result_raw_data[row_idx] += (then_idx[row_idx] == i) * column_raw_data[row_idx];
+            }
+        }
+    }
+
+    template <typename ColumnType, bool when_null>
+    Status execute_get_then_null(const DataTypePtr& data_type, Block& block,
+                                 const ColumnNumbers& arguments, size_t result,
+                                 size_t input_rows_count) {
+        bool then_null = false;
+        for (int i = 1 + has_case; i < arguments.size() - has_else; i += 2) {
+            auto then_column_ptr = block.get_by_position(arguments[i]).column;
+            if (then_column_ptr->is_nullable()) {
+                then_null = true;
+            }
+        }
+        if constexpr (has_else) {
+            auto else_column_ptr = block.get_by_position(arguments[arguments.size() - 1]).column;
+            if (else_column_ptr->is_nullable()) {
+                then_null = true;
+            }
+        } else {
+            then_null = true;
+        }
+
+        CaseWhenColumnHolder column_holder = CaseWhenColumnHolder(
+                block, arguments, input_rows_count, has_case, has_else, when_null, then_null);
+
+        if (then_null) {
+            return execute_impl<ColumnType, when_null, true>(data_type, block, result,
+                                                             column_holder);
+        } else {
+            return execute_impl<ColumnType, when_null, false>(data_type, block, result,
+                                                              column_holder);
+        }
+    }
+
+    template <typename ColumnType>
+    Status execute_get_when_null(const DataTypePtr& data_type, Block& block,
+                                 const ColumnNumbers& arguments, size_t result,
+                                 size_t input_rows_count) {
+        bool when_null = false;
+        if constexpr (has_case) {
+            auto case_column_ptr = block.get_by_position(arguments[0]).column;
+            if (case_column_ptr->is_nullable()) {
+                when_null = true;
+            }
+        }
+        for (int i = has_case; i < arguments.size() - has_else; i += 2) {
+            auto when_column_ptr = block.get_by_position(arguments[i]).column;
+            if (when_column_ptr->is_nullable()) {
+                when_null = true;
+            }
+        }
+
+        if (when_null) {
+            return execute_get_then_null<ColumnType, true>(data_type, block, arguments, result,
+                                                           input_rows_count);
+        } else {
+            return execute_get_then_null<ColumnType, false>(data_type, block, arguments, result,
+                                                            input_rows_count);
+        }
+    }
+
+    Status execute_get_type(const DataTypePtr& data_type, Block& block,
+                            const ColumnNumbers& arguments, size_t result,
+                            size_t input_rows_count) {
+        WhichDataType which(data_type->is_nullable()
+                                    ? reinterpret_cast<const DataTypeNullable*>(data_type.get())
+                                              ->get_nested_type()
+                                    : data_type);
+
+        // TODO: use template traits here.
+        if (which.is_uint8()) {
+            return execute_get_when_null<ColumnUInt8>(data_type, block, arguments, result,
+                                                      input_rows_count);
+        } else if (which.is_int16()) {
+            return execute_get_when_null<ColumnInt16>(data_type, block, arguments, result,
+                                                      input_rows_count);
+        } else if (which.is_uint32()) {
+            return execute_get_when_null<ColumnUInt32>(data_type, block, arguments, result,
+                                                       input_rows_count);
+        } else if (which.is_uint64()) {
+            return execute_get_when_null<ColumnUInt64>(data_type, block, arguments, result,
+                                                       input_rows_count);
+        } else if (which.is_int8()) {
+            return execute_get_when_null<ColumnInt8>(data_type, block, arguments, result,
+                                                     input_rows_count);
+        } else if (which.is_int16()) {
+            return execute_get_when_null<ColumnInt16>(data_type, block, arguments, result,
+                                                      input_rows_count);
+        } else if (which.is_int32()) {
+            return execute_get_when_null<ColumnInt32>(data_type, block, arguments, result,
+                                                      input_rows_count);
+        } else if (which.is_int64()) {
+            return execute_get_when_null<ColumnInt64>(data_type, block, arguments, result,
+                                                      input_rows_count);
+        } else if (which.is_date_or_datetime()) {
+            return execute_get_when_null<ColumnVector<DateTime>>(data_type, block, arguments,
+                                                                 result, input_rows_count);
+        } else if (which.is_float32()) {
+            return execute_get_when_null<ColumnFloat32>(data_type, block, arguments, result,
+                                                        input_rows_count);
+        } else if (which.is_float64()) {
+            return execute_get_when_null<ColumnFloat64>(data_type, block, arguments, result,
+                                                        input_rows_count);
+        } else if (which.is_decimal()) {
+            return execute_get_when_null<ColumnDecimal<Decimal128>>(data_type, block, arguments,
+                                                                    result, input_rows_count);
+        } else if (which.is_string()) {
+            return execute_get_when_null<ColumnString>(data_type, block, arguments, result,
+                                                       input_rows_count);
+        } else {
+            return Status::NotSupported(fmt::format("Unexpected type {} of argument of function {}",
+                                                    data_type->get_name(), get_name()));
+        }
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        CaseState* case_state = reinterpret_cast<CaseState*>(
+                context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+
+        return execute_get_type(case_state->result_type, block, arguments, result,
+                                input_rows_count);
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_cast.cpp b/be/src/vec/functions/function_cast.cpp
new file mode 100644
index 0000000000..761440c917
--- /dev/null
+++ b/be/src/vec/functions/function_cast.cpp
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsConversion.h
+// and modified by Doris
+#include "vec/functions/function_cast.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+void register_function_cast(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionBuilderCast>();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h
new file mode 100644
index 0000000000..0aa93d0df5
--- /dev/null
+++ b/be/src/vec/functions/function_cast.h
@@ -0,0 +1,1307 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsConversion.h
+// and modified by Doris
+
+#pragma once
+
+#include <fmt/format.h>
+
+#include "common/logging.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/columns_common.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/field_visitors.h"
+#include "vec/common/string_buffer.hpp"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_factory.hpp"
+#include "vec/data_types/data_type_nothing.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/io/io_helper.h"
+#include "vec/io/reader_buffer.h"
+#include "vec/runtime/vdatetime_value.h"
+namespace doris::vectorized {
+/** Type conversion functions.
+  * toType - conversion in "natural way";
+  */
+inline UInt32 extract_to_decimal_scale(const ColumnWithTypeAndName& named_column) {
+    const auto* arg_type = named_column.type.get();
+    bool ok = check_and_get_data_type<DataTypeUInt64>(arg_type) ||
+              check_and_get_data_type<DataTypeUInt32>(arg_type) ||
+              check_and_get_data_type<DataTypeUInt16>(arg_type) ||
+              check_and_get_data_type<DataTypeUInt8>(arg_type);
+    if (!ok) {
+        LOG(FATAL) << fmt::format("Illegal type of toDecimal() scale {}",
+                                  named_column.type->get_name());
+    }
+
+    Field field;
+    named_column.column->get(0, field);
+    return field.get<UInt32>();
+}
+
+/** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment.
+  *  (Date is represented internally as number of days from some day; DateTime - as unix timestamp)
+  */
+template <typename FromDataType, typename ToDataType, typename Name>
+struct ConvertImpl {
+    using FromFieldType = typename FromDataType::FieldType;
+    using ToFieldType = typename ToDataType::FieldType;
+
+    template <typename Additions = void*>
+    static Status execute(Block& block, const ColumnNumbers& arguments, size_t result,
+                          size_t /*input_rows_count*/,
+                          Additions additions [[maybe_unused]] = Additions()) {
+        const ColumnWithTypeAndName& named_from = block.get_by_position(arguments[0]);
+
+        using ColVecFrom =
+                std::conditional_t<IsDecimalNumber<FromFieldType>, ColumnDecimal<FromFieldType>,
+                                   ColumnVector<FromFieldType>>;
+        using ColVecTo = std::conditional_t<IsDecimalNumber<ToFieldType>,
+                                            ColumnDecimal<ToFieldType>, ColumnVector<ToFieldType>>;
+
+        if constexpr (IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>) {
+            if constexpr (!(IsDataTypeDecimalOrNumber<FromDataType> || IsTimeType<FromDataType>) ||
+                          !IsDataTypeDecimalOrNumber<ToDataType>)
+                return Status::RuntimeError(
+                        fmt::format("Illegal column {} of first argument of function {}",
+                                    named_from.column->get_name(), Name::name));
+        }
+
+        if (const ColVecFrom* col_from =
+                    check_and_get_column<ColVecFrom>(named_from.column.get())) {
+            typename ColVecTo::MutablePtr col_to = nullptr;
+            if constexpr (IsDataTypeDecimal<ToDataType>) {
+                UInt32 scale = additions;
+                col_to = ColVecTo::create(0, scale);
+            } else
+                col_to = ColVecTo::create();
+
+            const auto& vec_from = col_from->get_data();
+            auto& vec_to = col_to->get_data();
+            size_t size = vec_from.size();
+            vec_to.resize(size);
+
+            for (size_t i = 0; i < size; ++i) {
+                if constexpr (IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>) {
+                    if constexpr (IsDataTypeDecimal<FromDataType> && IsDataTypeDecimal<ToDataType>)
+                        vec_to[i] = convert_decimals<FromDataType, ToDataType>(
+                                vec_from[i], vec_from.get_scale(), vec_to.get_scale());
+                    else if constexpr (IsDataTypeDecimal<FromDataType> &&
+                                       IsDataTypeNumber<ToDataType>)
+                        vec_to[i] = convert_from_decimal<FromDataType, ToDataType>(
+                                vec_from[i], vec_from.get_scale());
+                    else if constexpr (IsDataTypeNumber<FromDataType> &&
+                                       IsDataTypeDecimal<ToDataType>)
+                        vec_to[i] = convert_to_decimal<FromDataType, ToDataType>(
+                                vec_from[i], vec_to.get_scale());
+                    else if constexpr (IsTimeType<FromDataType> && IsDataTypeDecimal<ToDataType>) {
+                        vec_to[i] = convert_to_decimal<DataTypeInt64, ToDataType>
+                                (reinterpret_cast<const VecDateTimeValue&>(vec_from[i]).to_int64(), vec_to.get_scale());
+                    }
+                } else if constexpr (IsTimeType<FromDataType>) {
+                    if constexpr (IsTimeType<ToDataType>) {
+                        vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
+                        if constexpr (IsDateType<FromDataType> && IsDateTimeType<ToDataType>) {
+                            DataTypeDateTime::cast_to_date_time(vec_to[i]);
+                        } else {
+                            DataTypeDate::cast_to_date(vec_to[i]);
+                        }
+                    } else {
+                        vec_to[i] = reinterpret_cast<const VecDateTimeValue&>(vec_from[i]).to_int64();
+                    }
+                } else
+                    vec_to[i] = static_cast<ToFieldType>(vec_from[i]);
+            }
+
+            block.replace_by_position(result, std::move(col_to));
+        } else {
+            return Status::RuntimeError(
+                    fmt::format("Illegal column {} of first argument of function {}",
+                                named_from.column->get_name(), Name::name));
+        }
+        return Status::OK();
+    }
+};
+
+/** If types are identical, just take reference to column.
+  */
+template <typename T, typename Name>
+struct ConvertImpl<std::enable_if_t<!T::is_parametric, T>, T, Name> {
+    static Status execute(Block& block, const ColumnNumbers& arguments, size_t result,
+                          size_t /*input_rows_count*/) {
+        block.get_by_position(result).column = block.get_by_position(arguments[0]).column;
+        return Status::OK();
+    }
+};
+
+// using other type cast to Date/DateTime, unless String
+// Date/DateTime
+template <typename FromDataType, typename ToDataType, typename Name>
+struct ConvertImplToTimeType {
+    using FromFieldType = typename FromDataType::FieldType;
+    using ToFieldType = typename ToDataType::FieldType;
+
+    static Status execute(Block& block, const ColumnNumbers& arguments, size_t result,
+                          size_t /*input_rows_count*/) {
+        const ColumnWithTypeAndName& named_from = block.get_by_position(arguments[0]);
+
+        using ColVecFrom =
+                std::conditional_t<IsDecimalNumber<FromFieldType>, ColumnDecimal<FromFieldType>,
+                                   ColumnVector<FromFieldType>>;
+        using ColVecTo = ColumnVector<Int64>;
+
+        if (const ColVecFrom* col_from =
+                    check_and_get_column<ColVecFrom>(named_from.column.get())) {
+            const auto& vec_from = col_from->get_data();
+            size_t size = vec_from.size();
+
+            // create nested column
+            auto col_to = ColVecTo::create(size);
+            auto& vec_to = col_to->get_data();
+
+            // create null column
+            ColumnUInt8::MutablePtr col_null_map_to;
+            col_null_map_to = ColumnUInt8::create(size);
+            auto& vec_null_map_to = col_null_map_to->get_data();
+
+            for (size_t i = 0; i < size; ++i) {
+                auto& date_value = reinterpret_cast<VecDateTimeValue&>(vec_to[i]);
+                if constexpr (IsDecimalNumber<FromFieldType>) {
+                    vec_null_map_to[i] = !date_value.from_date_int64(
+                            convert_from_decimal<FromDataType, DataTypeInt64>(
+                                vec_from[i], vec_from.get_scale()));
+                } else {
+                    vec_null_map_to[i] = !date_value.from_date_int64(vec_from[i]);
+                }
+                // DateType of VecDateTimeValue should cast to date
+                if constexpr (IsDateType<ToDataType>) {
+                    date_value.cast_to_date();
+                }
+            }
+            block.get_by_position(result).column =
+                    ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
+        } else {
+            return Status::RuntimeError(
+                    fmt::format("Illegal column {} of first argument of function {}",
+                                named_from.column->get_name(), Name::name));
+        }
+
+        return Status::OK();
+    }
+};
+
+// Generic conversion of any type to String.
+struct ConvertImplGenericToString {
+    static Status execute(Block& block, const ColumnNumbers& arguments, size_t result) {
+        const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
+        const IDataType& type = *col_with_type_and_name.type;
+        const IColumn& col_from = *col_with_type_and_name.column;
+
+        size_t size = col_from.size();
+
+        auto col_to = ColumnString::create();
+        VectorBufferWriter write_buffer(*col_to.get());
+        for (size_t i = 0; i < size; ++i) {
+            type.to_string(col_from, i, write_buffer);
+            write_buffer.commit();
+        }
+
+        block.replace_by_position(result, std::move(col_to));
+        return Status::OK();
+    }
+};
+
+template <typename ToDataType, typename Name>
+struct ConvertImpl<DataTypeString, ToDataType, Name> {
+    template <typename Additions = void*>
+
+    static Status execute(Block& block, const ColumnNumbers& arguments, size_t result,
+                          size_t /*input_rows_count*/,
+                          Additions additions [[maybe_unused]] = Additions()) {
+        return Status::RuntimeError("not support convert from string");
+    }
+};
+
+struct NameToString {
+    static constexpr auto name = "to_string";
+};
+struct NameToDecimal32 {
+    static constexpr auto name = "toDecimal32";
+};
+struct NameToDecimal64 {
+    static constexpr auto name = "toDecimal64";
+};
+struct NameToDecimal128 {
+    static constexpr auto name = "toDecimal128";
+};
+struct NameToUInt8 {
+    static constexpr auto name = "toUInt8";
+};
+struct NameToUInt16 {
+    static constexpr auto name = "toUInt16";
+};
+struct NameToUInt32 {
+    static constexpr auto name = "toUInt32";
+};
+struct NameToUInt64 {
+    static constexpr auto name = "toUInt64";
+};
+struct NameToInt8 {
+    static constexpr auto name = "toInt8";
+};
+struct NameToInt16 {
+    static constexpr auto name = "toInt16";
+};
+struct NameToInt32 {
+    static constexpr auto name = "toInt32";
+};
+struct NameToInt64 {
+    static constexpr auto name = "toInt64";
+};
+struct NameToInt128 {
+    static constexpr auto name = "toInt128";
+};
+struct NameToFloat32 {
+    static constexpr auto name = "toFloat32";
+};
+struct NameToFloat64 {
+    static constexpr auto name = "toFloat64";
+};
+struct NameToDate {
+    static constexpr auto name = "toDate";
+};
+struct NameToDateTime {
+    static constexpr auto name = "toDateTime";
+};
+
+template <typename DataType>
+bool try_parse_impl(typename DataType::FieldType& x, ReadBuffer& rb, const DateLUTImpl*) {
+    if constexpr (IsDateTimeType<DataType>) {
+        return try_read_datetime_text(x, rb);
+    }
+
+    if constexpr (IsDateType<DataType>) {
+        return try_read_date_text(x, rb);
+    }
+
+    if constexpr (std::is_floating_point_v<typename DataType::FieldType>) {
+        return try_read_float_text(x, rb);
+    }
+
+    if constexpr (std::is_integral_v<typename DataType::FieldType>) {
+        return try_read_int_text(x, rb);
+    }
+
+    if constexpr (IsDataTypeDecimal<DataType>) {
+        return try_read_decimal_text(x, rb);
+    }
+}
+
+/// Monotonicity.
+
+struct PositiveMonotonicity {
+    static bool has() { return true; }
+    static IFunction::Monotonicity get(const IDataType&, const Field&, const Field&) {
+        return {true};
+    }
+};
+
+struct UnknownMonotonicity {
+    static bool has() { return false; }
+    static IFunction::Monotonicity get(const IDataType&, const Field&, const Field&) {
+        return {false};
+    }
+};
+
+template <typename T>
+struct ToNumberMonotonicity {
+    static bool has() { return true; }
+
+    static UInt64 divide_by_range_of_type(UInt64 x) {
+        if constexpr (sizeof(T) < sizeof(UInt64))
+            return x >> (sizeof(T) * 8);
+        else
+            return 0;
+    }
+
+    static IFunction::Monotonicity get(const IDataType& type, const Field& left,
+                                       const Field& right) {
+        if (!type.is_value_represented_by_number()) return {};
+
+        /// If type is same, the conversion is always monotonic.
+        /// (Enum has separate case, because it is different data type)
+        if (check_and_get_data_type<DataTypeNumber<T>>(
+                    &type) /*|| check_and_get_data_type<DataTypeEnum<T>>(&type)*/)
+            return {true, true, true};
+
+        /// Float cases.
+
+        /// When converting to Float, the conversion is always monotonic.
+        if (std::is_floating_point_v<T>) return {true, true, true};
+
+        /// If converting from Float, for monotonicity, arguments must fit in range of result type.
+        if (WhichDataType(type).is_float()) {
+            if (left.is_null() || right.is_null()) return {};
+
+            Float64 left_float = left.get<Float64>();
+            Float64 right_float = right.get<Float64>();
+
+            if (left_float >= std::numeric_limits<T>::min() &&
+                left_float <= std::numeric_limits<T>::max() &&
+                right_float >= std::numeric_limits<T>::min() &&
+                right_float <= std::numeric_limits<T>::max())
+                return {true};
+
+            return {};
+        }
+
+        /// Integer cases.
+
+        const bool from_is_unsigned = type.is_value_represented_by_unsigned_integer();
+        const bool to_is_unsigned = std::is_unsigned_v<T>;
+
+        const size_t size_of_from = type.get_size_of_value_in_memory();
+        const size_t size_of_to = sizeof(T);
+
+        const bool left_in_first_half =
+                left.is_null() ? from_is_unsigned : (left.get<Int64>() >= 0);
+
+        const bool right_in_first_half =
+                right.is_null() ? !from_is_unsigned : (right.get<Int64>() >= 0);
+
+        /// Size of type is the same.
+        if (size_of_from == size_of_to) {
+            if (from_is_unsigned == to_is_unsigned) return {true, true, true};
+
+            if (left_in_first_half == right_in_first_half) return {true};
+
+            return {};
+        }
+
+        /// Size of type is expanded.
+        if (size_of_from < size_of_to) {
+            if (from_is_unsigned == to_is_unsigned) return {true, true, true};
+
+            if (!to_is_unsigned) return {true, true, true};
+
+            /// signed -> unsigned. If arguments from the same half, then function is monotonic.
+            if (left_in_first_half == right_in_first_half) return {true};
+
+            return {};
+        }
+
+        /// Size of type is shrinked.
+        if (size_of_from > size_of_to) {
+            /// Function cannot be monotonic on unbounded ranges.
+            if (left.is_null() || right.is_null()) return {};
+
+            if (from_is_unsigned == to_is_unsigned) {
+                /// all bits other than that fits, must be same.
+                if (divide_by_range_of_type(left.get<UInt64>()) ==
+                    divide_by_range_of_type(right.get<UInt64>()))
+                    return {true};
+
+                return {};
+            } else {
+                /// When signedness is changed, it's also required for arguments to be from the same half.
+                /// And they must be in the same half after converting to the result type.
+                if (left_in_first_half == right_in_first_half &&
+                    (T(left.get<Int64>()) >= 0) == (T(right.get<Int64>()) >= 0) &&
+                    divide_by_range_of_type(left.get<UInt64>()) ==
+                            divide_by_range_of_type(right.get<UInt64>()))
+                    return {true};
+
+                return {};
+            }
+        }
+
+        __builtin_unreachable();
+    }
+};
+
+/** The monotonicity for the `to_string` function is mainly determined for test purposes.
+  * It is doubtful that anyone is looking to optimize queries with conditions `std::to_string(CounterID) = 34`.
+  */
+struct ToStringMonotonicity {
+    static bool has() { return true; }
+
+    static IFunction::Monotonicity get(const IDataType& type, const Field& left,
+                                       const Field& right) {
+        IFunction::Monotonicity positive(true, true);
+        IFunction::Monotonicity not_monotonic;
+
+        if (left.is_null() || right.is_null()) return {};
+
+        if (left.get_type() == Field::Types::UInt64 && right.get_type() == Field::Types::UInt64) {
+            return (left.get<Int64>() == 0 && right.get<Int64>() == 0) ||
+                                   (floor(log10(left.get<UInt64>())) ==
+                                    floor(log10(right.get<UInt64>())))
+                           ? positive
+                           : not_monotonic;
+        }
+
+        if (left.get_type() == Field::Types::Int64 && right.get_type() == Field::Types::Int64) {
+            return (left.get<Int64>() == 0 && right.get<Int64>() == 0) ||
+                                   (left.get<Int64>() > 0 && right.get<Int64>() > 0 &&
+                                    floor(log10(left.get<Int64>())) ==
+                                            floor(log10(right.get<Int64>())))
+                           ? positive
+                           : not_monotonic;
+        }
+
+        return not_monotonic;
+    }
+};
+
+template <typename ToDataType, typename Name, typename MonotonicityImpl>
+class FunctionConvert : public IFunction {
+public:
+    using Monotonic = MonotonicityImpl;
+
+    static constexpr auto name = Name::name;
+    static constexpr bool to_decimal = std::is_same_v<Name, NameToDecimal32> ||
+                                       std::is_same_v<Name, NameToDecimal64> ||
+                                       std::is_same_v<Name, NameToDecimal128>;
+
+    static FunctionPtr create() { return std::make_shared<FunctionConvert>(); }
+
+    String get_name() const override { return name; }
+
+    bool is_variadic() const override { return true; }
+    size_t get_number_of_arguments() const override { return 0; }
+    bool get_is_injective(const Block&) override { return std::is_same_v<Name, NameToString>; }
+
+    // This function should not be called for get DateType Ptr
+    // using the FunctionCast::get_return_type_impl
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        return std::make_shared<ToDataType>();
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
+    bool can_be_executed_on_default_arguments() const override { return false; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        return executeInternal(block, arguments, result, input_rows_count);
+    }
+
+    bool has_information_about_monotonicity() const override { return Monotonic::has(); }
+
+    Monotonicity get_monotonicity_for_range(const IDataType& type, const Field& left,
+                                            const Field& right) const override {
+        return Monotonic::get(type, left, right);
+    }
+
+private:
+    Status executeInternal(Block& block, const ColumnNumbers& arguments, size_t result,
+                           size_t input_rows_count) {
+        if (!arguments.size()) {
+            return Status::RuntimeError(
+                    fmt::format("Function {} expects at least 1 arguments", get_name()));
+        }
+
+        const IDataType* from_type = block.get_by_position(arguments[0]).type.get();
+
+        Status ret_status;
+        /// Generic conversion of any type to String.
+        if constexpr (std::is_same_v<ToDataType, DataTypeString>) {
+            return ConvertImplGenericToString::execute(block, arguments, result);
+        } else {
+            auto call = [&](const auto& types) -> bool {
+                using Types = std::decay_t<decltype(types)>;
+                using LeftDataType = typename Types::LeftType;
+                using RightDataType = typename Types::RightType;
+
+                // now, cast to decimal do not execute the code
+                if constexpr (IsDataTypeDecimal<RightDataType>) {
+                    if (arguments.size() != 2) {
+                        ret_status = Status::RuntimeError(fmt::format(
+                                "Function {} expects 2 arguments for Decimal.", get_name()));
+                        return true;
+                    }
+
+                    const ColumnWithTypeAndName& scale_column = block.get_by_position(arguments[1]);
+                    UInt32 scale = extract_to_decimal_scale(scale_column);
+
+                    ret_status = ConvertImpl<LeftDataType, RightDataType, Name>::execute(
+                            block, arguments, result, input_rows_count, scale);
+                } else
+                    ret_status = ConvertImpl<LeftDataType, RightDataType, Name>::execute(
+                            block, arguments, result, input_rows_count);
+                return true;
+            };
+
+            bool done = call_on_index_and_data_type<ToDataType>(from_type->get_type_id(), call);
+            if (!done) {
+                ret_status = Status::RuntimeError(fmt::format(
+                        "Illegal type {} of argument of function {}",
+                        block.get_by_position(arguments[0]).type->get_name(), get_name()));
+            }
+            return ret_status;
+        }
+    }
+};
+
+using FunctionToUInt8 = FunctionConvert<DataTypeUInt8, NameToUInt8, ToNumberMonotonicity<UInt8>>;
+using FunctionToUInt16 =
+        FunctionConvert<DataTypeUInt16, NameToUInt16, ToNumberMonotonicity<UInt16>>;
+using FunctionToUInt32 =
+        FunctionConvert<DataTypeUInt32, NameToUInt32, ToNumberMonotonicity<UInt32>>;
+using FunctionToUInt64 =
+        FunctionConvert<DataTypeUInt64, NameToUInt64, ToNumberMonotonicity<UInt64>>;
+using FunctionToInt8 = FunctionConvert<DataTypeInt8, NameToInt8, ToNumberMonotonicity<Int8>>;
+using FunctionToInt16 = FunctionConvert<DataTypeInt16, NameToInt16, ToNumberMonotonicity<Int16>>;
+using FunctionToInt32 = FunctionConvert<DataTypeInt32, NameToInt32, ToNumberMonotonicity<Int32>>;
+using FunctionToInt64 = FunctionConvert<DataTypeInt64, NameToInt64, ToNumberMonotonicity<Int64>>;
+using FunctionToInt128 =
+        FunctionConvert<DataTypeInt128, NameToInt128, ToNumberMonotonicity<Int128>>;
+using FunctionToFloat32 =
+        FunctionConvert<DataTypeFloat32, NameToFloat32, ToNumberMonotonicity<Float32>>;
+using FunctionToFloat64 =
+        FunctionConvert<DataTypeFloat64, NameToFloat64, ToNumberMonotonicity<Float64>>;
+using FunctionToString = FunctionConvert<DataTypeString, NameToString, ToStringMonotonicity>;
+using FunctionToDecimal32 =
+        FunctionConvert<DataTypeDecimal<Decimal32>, NameToDecimal32, UnknownMonotonicity>;
+using FunctionToDecimal64 =
+        FunctionConvert<DataTypeDecimal<Decimal64>, NameToDecimal64, UnknownMonotonicity>;
+using FunctionToDecimal128 =
+        FunctionConvert<DataTypeDecimal<Decimal128>, NameToDecimal128, UnknownMonotonicity>;
+using FunctionToDate = FunctionConvert<DataTypeDate, NameToDate, UnknownMonotonicity>;
+using FunctionToDateTime = FunctionConvert<DataTypeDateTime, NameToDateTime, UnknownMonotonicity>;
+
+template <typename DataType>
+struct FunctionTo;
+template <>
+struct FunctionTo<DataTypeUInt8> {
+    using Type = FunctionToUInt8;
+};
+template <>
+struct FunctionTo<DataTypeUInt16> {
+    using Type = FunctionToUInt16;
+};
+template <>
+struct FunctionTo<DataTypeUInt32> {
+    using Type = FunctionToUInt32;
+};
+template <>
+struct FunctionTo<DataTypeUInt64> {
+    using Type = FunctionToUInt64;
+};
+template <>
+struct FunctionTo<DataTypeInt8> {
+    using Type = FunctionToInt8;
+};
+template <>
+struct FunctionTo<DataTypeInt16> {
+    using Type = FunctionToInt16;
+};
+template <>
+struct FunctionTo<DataTypeInt32> {
+    using Type = FunctionToInt32;
+};
+template <>
+struct FunctionTo<DataTypeInt64> {
+    using Type = FunctionToInt64;
+};
+template <>
+struct FunctionTo<DataTypeInt128> {
+    using Type = FunctionToInt128;
+};
+template <>
+struct FunctionTo<DataTypeFloat32> {
+    using Type = FunctionToFloat32;
+};
+template <>
+struct FunctionTo<DataTypeFloat64> {
+    using Type = FunctionToFloat64;
+};
+template <>
+struct FunctionTo<DataTypeDecimal<Decimal32>> {
+    using Type = FunctionToDecimal32;
+};
+template <>
+struct FunctionTo<DataTypeDecimal<Decimal64>> {
+    using Type = FunctionToDecimal64;
+};
+template <>
+struct FunctionTo<DataTypeDecimal<Decimal128>> {
+    using Type = FunctionToDecimal128;
+};
+template <>
+struct FunctionTo<DataTypeDate> {
+    using Type = FunctionToDate;
+};
+template <>
+struct FunctionTo<DataTypeDateTime> {
+    using Type = FunctionToDateTime;
+};
+
+class PreparedFunctionCast : public PreparedFunctionImpl {
+public:
+    using WrapperType = std::function<Status(FunctionContext* context, Block&, const ColumnNumbers&,
+                                             size_t, size_t)>;
+
+    explicit PreparedFunctionCast(WrapperType&& wrapper_function_, const char* name_)
+            : wrapper_function(std::move(wrapper_function_)), name(name_) {}
+
+    String get_name() const override { return name; }
+
+protected:
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        /// drop second argument, pass others
+        ColumnNumbers new_arguments{arguments.front()};
+        if (arguments.size() > 2)
+            new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2),
+                                 std::end(arguments));
+
+        return wrapper_function(context, block, new_arguments, result, input_rows_count);
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_constants() const override { return true; }
+    bool use_default_implementation_for_low_cardinality_columns() const override { return false; }
+    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
+
+private:
+    WrapperType wrapper_function;
+    const char* name;
+};
+
+struct NameCast {
+    static constexpr auto name = "CAST";
+};
+
+template <typename FromDataType, typename ToDataType, typename Name>
+struct ConvertThroughParsing {
+    static_assert(std::is_same_v<FromDataType, DataTypeString>,
+                  "ConvertThroughParsing is only applicable for String or FixedString data types");
+
+    using ToFieldType = typename ToDataType::FieldType;
+
+    static bool is_all_read(ReadBuffer& in) {
+        if (in.eof()) return true;
+
+        return false;
+    }
+
+    template <typename Additions = void*>
+    static Status execute(Block& block, const ColumnNumbers& arguments, size_t result,
+                          size_t input_rows_count,
+                          Additions additions [[maybe_unused]] = Additions()) {
+        using ColVecTo = std::conditional_t<IsDecimalNumber<ToFieldType>,
+                                            ColumnDecimal<ToFieldType>, ColumnVector<ToFieldType>>;
+
+        const DateLUTImpl* local_time_zone [[maybe_unused]] = nullptr;
+        const DateLUTImpl* utc_time_zone [[maybe_unused]] = nullptr;
+
+        const IColumn* col_from = block.get_by_position(arguments[0]).column.get();
+        const ColumnString* col_from_string = check_and_get_column<ColumnString>(col_from);
+
+        if (std::is_same_v<FromDataType, DataTypeString> && !col_from_string) {
+            return Status::RuntimeError(
+                    fmt::format("Illegal column {} of first argument of function {}",
+                                col_from->get_name(), Name::name));
+        }
+
+        size_t size = input_rows_count;
+        typename ColVecTo::MutablePtr col_to = nullptr;
+
+        if constexpr (IsDataTypeDecimal<ToDataType>) {
+            col_to = ColVecTo::create(size, 9);
+        } else
+            col_to = ColVecTo::create(size);
+
+        typename ColVecTo::Container& vec_to = col_to->get_data();
+
+        ColumnUInt8::MutablePtr col_null_map_to;
+        ColumnUInt8::Container* vec_null_map_to [[maybe_unused]] = nullptr;
+        col_null_map_to = ColumnUInt8::create(size);
+        vec_null_map_to = &col_null_map_to->get_data();
+
+        const ColumnString::Chars* chars = nullptr;
+        const IColumn::Offsets* offsets = nullptr;
+        size_t fixed_string_size = 0;
+
+        if constexpr (std::is_same_v<FromDataType, DataTypeString>) {
+            chars = &col_from_string->get_chars();
+            offsets = &col_from_string->get_offsets();
+        }
+
+        size_t current_offset = 0;
+
+        for (size_t i = 0; i < size; ++i) {
+            size_t next_offset = std::is_same_v<FromDataType, DataTypeString>
+                                         ? (*offsets)[i]
+                                         : (current_offset + fixed_string_size);
+            size_t string_size = std::is_same_v<FromDataType, DataTypeString>
+                                         ? next_offset - current_offset - 1
+                                         : fixed_string_size;
+
+            ReadBuffer read_buffer(&(*chars)[current_offset], string_size);
+
+            {
+                bool parsed;
+
+                {
+                    parsed = try_parse_impl<ToDataType>(vec_to[i], read_buffer, local_time_zone);
+
+                    parsed = parsed && is_all_read(read_buffer);
+                }
+
+                if (!parsed) vec_to[i] = 0;
+
+                (*vec_null_map_to)[i] = !parsed;
+            }
+
+            current_offset = next_offset;
+        }
+
+        block.get_by_position(result).column =
+                ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
+        return Status::OK();
+    }
+};
+
+template <typename ToDataType, typename Name>
+class FunctionConvertFromString : public IFunction {
+public:
+    static constexpr auto name = Name::name;
+    static FunctionPtr create() { return std::make_shared<FunctionConvertFromString>(); }
+    String get_name() const override { return name; }
+
+    bool is_variadic() const override { return true; }
+    size_t get_number_of_arguments() const override { return 0; }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
+
+    // This function should not be called for get DateType Ptr
+    // using the FunctionCast::get_return_type_impl
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        DataTypePtr res;
+        if constexpr (IsDataTypeDecimal<ToDataType>) {
+            res = create_decimal(27, 9);
+
+            if (!res) {
+                LOG(FATAL) << "Someting wrong with toDecimalNNOrZero() or toDecimalNNOrNull()";
+            }
+
+        } else
+            res = std::make_shared<ToDataType>();
+
+        return res;
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const IDataType* from_type = block.get_by_position(arguments[0]).type.get();
+
+        bool ok = true;
+
+        {
+            if (check_and_get_data_type<DataTypeString>(from_type)) {
+                return ConvertThroughParsing<DataTypeString, ToDataType, Name>::execute(
+                        block, arguments, result, input_rows_count);
+            }
+
+            else
+                ok = false;
+        }
+
+        if (!ok) {
+            return Status::RuntimeError(fmt::format(
+                    "Illegal type {} of argument of function {} . Only String or FixedString "
+                    "argument is accepted for try-conversion function. For other arguments, use "
+                    "function without 'orZero' or 'orNull'.",
+                    block.get_by_position(arguments[0]).type->get_name(), get_name()));
+        }
+
+        return Status::OK();
+    }
+};
+
+template <typename ToDataType, typename Name>
+class FunctionConvertToTimeType : public IFunction {
+public:
+    static constexpr auto name = Name::name;
+    static FunctionPtr create() { return std::make_shared<FunctionConvertToTimeType>(); }
+
+    String get_name() const override { return name; }
+
+    bool is_variadic() const override { return true; }
+    size_t get_number_of_arguments() const override { return 0; }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
+
+    // This function should not be called for get DateType Ptr
+    // using the FunctionCast::get_return_type_impl
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        auto res = std::make_shared<ToDataType>();
+        return res;
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        Status ret_status = Status::OK();
+        const IDataType* from_type = block.get_by_position(arguments[0]).type.get();
+        auto call = [&](const auto& types) -> bool {
+            using Types = std::decay_t<decltype(types)>;
+            using LeftDataType = typename Types::LeftType;
+            using RightDataType = typename Types::RightType;
+
+            ret_status = ConvertImplToTimeType<LeftDataType, RightDataType, Name>::execute(
+                    block, arguments, result, input_rows_count);
+            return true;
+        };
+
+        bool done = call_on_index_and_number_data_type<ToDataType>(from_type->get_type_id(), call);
+        if (!done) {
+            return Status::RuntimeError(
+                    fmt::format("Illegal type {} of argument of function {}",
+                                block.get_by_position(arguments[0]).type->get_name(), get_name()));
+        }
+
+        return ret_status;
+    }
+};
+
+class FunctionCast final : public IFunctionBase {
+public:
+    using WrapperType =
+            std::function<Status(FunctionContext*, Block&, const ColumnNumbers&, size_t, size_t)>;
+    using MonotonicityForRange =
+            std::function<Monotonicity(const IDataType&, const Field&, const Field&)>;
+
+    FunctionCast(const char* name_, MonotonicityForRange&& monotonicity_for_range_,
+                 const DataTypes& argument_types_, const DataTypePtr& return_type_)
+            : name(name_),
+              monotonicity_for_range(monotonicity_for_range_),
+              argument_types(argument_types_),
+              return_type(return_type_) {}
+
+    const DataTypes& get_argument_types() const override { return argument_types; }
+    const DataTypePtr& get_return_type() const override { return return_type; }
+
+    PreparedFunctionPtr prepare(FunctionContext* context, const Block& /*sample_block*/,
+                                const ColumnNumbers& /*arguments*/,
+                                size_t /*result*/) const override {
+        return std::make_shared<PreparedFunctionCast>(
+                prepare_unpack_dictionaries(get_argument_types()[0], get_return_type()), name);
+    }
+
+    String get_name() const override { return name; }
+
+    bool is_deterministic() const override { return true; }
+    bool is_deterministic_in_scope_of_query() const override { return true; }
+
+    bool has_information_about_monotonicity() const override {
+        return static_cast<bool>(monotonicity_for_range);
+    }
+
+    Monotonicity get_monotonicity_for_range(const IDataType& type, const Field& left,
+                                            const Field& right) const override {
+        return monotonicity_for_range(type, left, right);
+    }
+
+private:
+    const char* name;
+    MonotonicityForRange monotonicity_for_range;
+
+    DataTypes argument_types;
+    DataTypePtr return_type;
+
+    template <typename DataType>
+    WrapperType create_wrapper(const DataTypePtr& from_type, const DataType* const,
+                               bool requested_result_is_nullable) const {
+        FunctionPtr function;
+
+        if (requested_result_is_nullable &&
+            check_and_get_data_type<DataTypeString>(from_type.get())) {
+            /// In case when converting to Nullable type, we apply different parsing rule,
+            /// that will not throw an exception but return NULL in case of malformed input.
+            function = FunctionConvertFromString<DataType, NameCast>::create();
+        } else if (requested_result_is_nullable && IsTimeType<DataType> &&
+                   !(check_and_get_data_type<DataTypeDateTime>(from_type.get()) ||
+                     check_and_get_data_type<DataTypeDate>(from_type.get()))) {
+            function = FunctionConvertToTimeType<DataType, NameCast>::create();
+        } else
+            function = FunctionTo<DataType>::Type::create();
+
+        /// Check conversion using underlying function
+        { function->get_return_type(ColumnsWithTypeAndName(1, {nullptr, from_type, ""})); }
+
+        return [function](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                          const size_t result, size_t input_rows_count) {
+            return function->execute(context, block, arguments, result, input_rows_count);
+        };
+    }
+
+    WrapperType create_string_wrapper(const DataTypePtr& from_type) const {
+        FunctionPtr function = FunctionToString::create();
+
+        /// Check conversion using underlying function
+        { function->get_return_type(ColumnsWithTypeAndName(1, {nullptr, from_type, ""})); }
+
+        return [function](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                          const size_t result, size_t input_rows_count) {
+            return function->execute(context, block, arguments, result, input_rows_count);
+        };
+    }
+
+    template <typename FieldType>
+    WrapperType create_decimal_wrapper(const DataTypePtr& from_type,
+                                       const DataTypeDecimal<FieldType>* to_type) const {
+        using ToDataType = DataTypeDecimal<FieldType>;
+
+        TypeIndex type_index = from_type->get_type_id();
+        UInt32 precision = to_type->get_precision();
+        UInt32 scale = to_type->get_scale();
+
+        WhichDataType which(type_index);
+        bool ok = which.is_int() || which.is_native_uint() || which.is_decimal() ||
+                  which.is_float() || which.is_date_or_datetime() ||
+                  which.is_string_or_fixed_string();
+        if (!ok) {
+            LOG(FATAL) << fmt::format(
+                    "Conversion from {} to {} to_type->get_name() is not supported",
+                    from_type->get_name(), to_type->get_name());
+        }
+
+        if (which.is_string_or_fixed_string()) {
+            auto function =
+                    FunctionConvertFromString<DataTypeDecimal<FieldType>, NameCast>::create();
+
+            /// Check conversion using underlying function
+            { function->get_return_type(ColumnsWithTypeAndName(1, {nullptr, from_type, ""})); }
+
+            return [function](FunctionContext* context, Block& block,
+                              const ColumnNumbers& arguments, const size_t result,
+                              size_t input_rows_count) {
+                return function->execute(context, block, arguments, result, input_rows_count);
+            };
+        }
+
+        return [type_index, precision, scale](FunctionContext* context, Block& block,
+                                              const ColumnNumbers& arguments, const size_t result,
+                                              size_t input_rows_count) {
+            auto res = call_on_index_and_data_type<ToDataType>(
+                    type_index, [&](const auto& types) -> bool {
+                        using Types = std::decay_t<decltype(types)>;
+                        using LeftDataType = typename Types::LeftType;
+                        using RightDataType = typename Types::RightType;
+
+                        ConvertImpl<LeftDataType, RightDataType, NameCast>::execute(
+                                block, arguments, result, input_rows_count, scale);
+                        return true;
+                    });
+
+            /// Additionally check if call_on_index_and_data_type wasn't called at all.
+            if (!res) {
+                auto to = DataTypeDecimal<FieldType>(precision, scale);
+                return Status::RuntimeError(fmt::format("Conversion from {} to {} is not supported",
+                                                        getTypeName(type_index), to.get_name()));
+            }
+            return Status::OK();
+        };
+    }
+
+    WrapperType create_identity_wrapper(const DataTypePtr&) const {
+        return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                  const size_t result, size_t /*input_rows_count*/) {
+            block.get_by_position(result).column = block.get_by_position(arguments.front()).column;
+            return Status::OK();
+        };
+    }
+
+    WrapperType create_nothing_wrapper(const IDataType* to_type) const {
+        ColumnPtr res = to_type->create_column_const_with_default_value(1);
+        return [res](FunctionContext* context, Block& block, const ColumnNumbers&,
+                     const size_t result, size_t input_rows_count) {
+            /// Column of Nothing type is trivially convertible to any other column
+            block.get_by_position(result).column =
+                    res->clone_resized(input_rows_count)->convert_to_full_column_if_const();
+            return Status::OK();
+        };
+    }
+
+    WrapperType prepare_unpack_dictionaries(const DataTypePtr& from_type,
+                                            const DataTypePtr& to_type) const {
+        const auto& from_nested = from_type;
+        const auto& to_nested = to_type;
+
+        if (from_type->only_null()) {
+            if (!to_nested->is_nullable()) {
+                LOG(FATAL) << "Cannot convert NULL to a non-nullable type";
+            }
+
+            return [](FunctionContext* context, Block& block, const ColumnNumbers&,
+                      const size_t result, size_t input_rows_count) {
+                auto& res = block.get_by_position(result);
+                res.column = res.type->create_column_const_with_default_value(input_rows_count)
+                                     ->convert_to_full_column_if_const();
+                return Status::OK();
+            };
+        }
+
+        bool skip_not_null_check = false;
+
+        auto wrapper = prepare_remove_nullable(from_nested, to_nested, skip_not_null_check);
+
+        return wrapper;
+    }
+
+    WrapperType prepare_remove_nullable(const DataTypePtr& from_type, const DataTypePtr& to_type,
+                                        bool skip_not_null_check) const {
+        /// Determine whether pre-processing and/or post-processing must take place during conversion.
+
+        bool source_is_nullable = from_type->is_nullable();
+        bool result_is_nullable = to_type->is_nullable();
+
+        auto wrapper = prepare_impl(remove_nullable(from_type), remove_nullable(to_type),
+                                    result_is_nullable);
+
+        if (result_is_nullable) {
+            return [wrapper, source_is_nullable](FunctionContext* context, Block& block,
+                                                 const ColumnNumbers& arguments,
+                                                 const size_t result, size_t input_rows_count) {
+                /// Create a temporary block on which to perform the operation.
+                auto& res = block.get_by_position(result);
+                const auto& ret_type = res.type;
+                const auto& nullable_type = static_cast<const DataTypeNullable&>(*ret_type);
+                const auto& nested_type = nullable_type.get_nested_type();
+
+                Block tmp_block;
+                if (source_is_nullable)
+                    tmp_block = create_block_with_nested_columns(block, arguments);
+                else
+                    tmp_block = block;
+
+                size_t tmp_res_index = block.columns();
+                tmp_block.insert({nullptr, nested_type, ""});
+
+                /// Perform the requested conversion.
+                wrapper(context, tmp_block, arguments, tmp_res_index, input_rows_count);
+
+                const auto& tmp_res = tmp_block.get_by_position(tmp_res_index);
+
+                /// May happen in fuzzy tests. For debug purpose.
+                if (!tmp_res.column.get()) {
+                    return Status::RuntimeError(fmt::format(
+                            "Couldn't convert {} to {} in prepare_remove_nullable wrapper.",
+                            block.get_by_position(arguments[0]).type->get_name(),
+                            nested_type->get_name()));
+                }
+
+                res.column = wrap_in_nullable(tmp_res.column,
+                                              Block({block.get_by_position(arguments[0]), tmp_res}),
+                                              {0}, 1, input_rows_count);
+                return Status::OK();
+            };
+        } else if (source_is_nullable) {
+            /// Conversion from Nullable to non-Nullable.
+
+            return [wrapper, skip_not_null_check](FunctionContext* context, Block& block,
+                                                  const ColumnNumbers& arguments,
+                                                  const size_t result, size_t input_rows_count) {
+                Block tmp_block = create_block_with_nested_columns(block, arguments, result);
+
+                /// Check that all values are not-NULL.
+                /// Check can be skipped in case if LowCardinality dictionary is transformed.
+                /// In that case, correctness will be checked beforehand.
+                if (!skip_not_null_check) {
+                    const auto& col = block.get_by_position(arguments[0]).column;
+                    const auto& nullable_col = assert_cast<const ColumnNullable&>(*col);
+                    const auto& null_map = nullable_col.get_null_map_data();
+
+                    if (!memory_is_zero(null_map.data(), null_map.size())) {
+                        return Status::RuntimeError(
+                                fmt::format("Cannot convert NULL value to non-Nullable type"));
+                    }
+                }
+
+                wrapper(context, tmp_block, arguments, result, input_rows_count);
+                block.get_by_position(result).column = tmp_block.get_by_position(result).column;
+                return Status::OK();
+            };
+        } else
+            return wrapper;
+    }
+
+    /// 'from_type' and 'to_type' are nested types in case of Nullable.
+    /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested.
+    WrapperType prepare_impl(const DataTypePtr& from_type, const DataTypePtr& to_type,
+                             bool requested_result_is_nullable) const {
+        if (from_type->equals(*to_type))
+            return create_identity_wrapper(from_type);
+        else if (WhichDataType(from_type).is_nothing())
+            return create_nothing_wrapper(to_type.get());
+
+        WrapperType ret;
+
+        auto make_default_wrapper = [&](const auto& types) -> bool {
+            using Types = std::decay_t<decltype(types)>;
+            using ToDataType = typename Types::LeftType;
+
+            if constexpr (std::is_same_v<ToDataType, DataTypeUInt8> ||
+                          std::is_same_v<ToDataType, DataTypeUInt16> ||
+                          std::is_same_v<ToDataType, DataTypeUInt32> ||
+                          std::is_same_v<ToDataType, DataTypeUInt64> ||
+                          std::is_same_v<ToDataType, DataTypeInt8> ||
+                          std::is_same_v<ToDataType, DataTypeInt16> ||
+                          std::is_same_v<ToDataType, DataTypeInt32> ||
+                          std::is_same_v<ToDataType, DataTypeInt64> ||
+                          std::is_same_v<ToDataType, DataTypeInt128> ||
+                          std::is_same_v<ToDataType, DataTypeFloat32> ||
+                          std::is_same_v<ToDataType, DataTypeFloat64> ||
+                          std::is_same_v<ToDataType, DataTypeDate> ||
+                          std::is_same_v<ToDataType, DataTypeDateTime>) {
+                ret = create_wrapper(from_type, check_and_get_data_type<ToDataType>(to_type.get()),
+                                     requested_result_is_nullable);
+                return true;
+            }
+
+            if constexpr (std::is_same_v<ToDataType, DataTypeDecimal<Decimal32>> ||
+                          std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>> ||
+                          std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>) {
+                ret = create_decimal_wrapper(from_type,
+                                             check_and_get_data_type<ToDataType>(to_type.get()));
+                return true;
+            }
+
+            return false;
+        };
+
+        if (call_on_index_and_data_type<void>(to_type->get_type_id(), make_default_wrapper))
+            return ret;
+
+        switch (to_type->get_type_id()) {
+        case TypeIndex::String:
+            return create_string_wrapper(from_type);
+
+        default:
+            break;
+        }
+
+        LOG(FATAL) << fmt::format("Conversion from {} to {} is not supported",
+                                  from_type->get_name(), to_type->get_name());
+        return WrapperType{};
+    }
+};
+
+class FunctionBuilderCast : public FunctionBuilderImpl {
+public:
+    using MonotonicityForRange = FunctionCast::MonotonicityForRange;
+
+    static constexpr auto name = "CAST";
+    static FunctionBuilderPtr create() { return std::make_shared<FunctionBuilderCast>(); }
+
+    FunctionBuilderCast() {}
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
+
+protected:
+    FunctionBasePtr build_impl(const ColumnsWithTypeAndName& arguments,
+                               const DataTypePtr& return_type) const override {
+        DataTypes data_types(arguments.size());
+
+        for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type;
+
+        auto monotonicity = get_monotonicity_information(arguments.front().type, return_type.get());
+        return std::make_shared<FunctionCast>(name, std::move(monotonicity), data_types,
+                                              return_type);
+    }
+
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        const auto type_col =
+                check_and_get_column_const<ColumnString>(arguments.back().column.get());
+        if (!type_col) {
+            LOG(FATAL) << fmt::format(
+                    "Second argument to {} must be a constant string describing type", get_name());
+        }
+
+        auto type = DataTypeFactory::instance().get(type_col->get_value<String>());
+
+        bool need_to_be_nullable = false;
+        // 1. from_type is nullable
+        need_to_be_nullable |= arguments[0].type->is_nullable();
+        // 2. from_type is string, to_type is not string
+        need_to_be_nullable |= (arguments[0].type->get_type_id() == TypeIndex::String) &&
+                               (type->get_type_id() != TypeIndex::String);
+        // 3. from_type is not DateTime/Date, to_type is DateTime/Date
+        need_to_be_nullable |= (arguments[0].type->get_type_id() != TypeIndex::Date &&
+                                arguments[0].type->get_type_id() != TypeIndex::DateTime) &&
+                               (type->get_type_id() == TypeIndex::Date ||
+                                type->get_type_id() == TypeIndex::DateTime);
+        if (need_to_be_nullable) {
+            return make_nullable(type);
+        }
+
+        return type;
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_low_cardinality_columns() const override { return false; }
+
+private:
+    template <typename DataType>
+    static auto monotonicity_for_type(const DataType* const) {
+        return FunctionTo<DataType>::Type::Monotonic::get;
+    }
+
+    MonotonicityForRange get_monotonicity_information(const DataTypePtr& from_type,
+                                                      const IDataType* to_type) const {
+        if (const auto type = check_and_get_data_type<DataTypeUInt8>(to_type))
+            return monotonicity_for_type(type);
+        if (const auto type = check_and_get_data_type<DataTypeUInt16>(to_type))
+            return monotonicity_for_type(type);
+        if (const auto type = check_and_get_data_type<DataTypeUInt32>(to_type))
+            return monotonicity_for_type(type);
+        if (const auto type = check_and_get_data_type<DataTypeUInt64>(to_type))
+            return monotonicity_for_type(type);
+        if (const auto type = check_and_get_data_type<DataTypeInt8>(to_type))
+            return monotonicity_for_type(type);
+        if (const auto type = check_and_get_data_type<DataTypeInt16>(to_type))
+            return monotonicity_for_type(type);
+        if (const auto type = check_and_get_data_type<DataTypeInt32>(to_type))
+            return monotonicity_for_type(type);
+        if (const auto type = check_and_get_data_type<DataTypeInt64>(to_type))
+            return monotonicity_for_type(type);
+        if (const auto type = check_and_get_data_type<DataTypeFloat32>(to_type))
+            return monotonicity_for_type(type);
+        if (const auto type = check_and_get_data_type<DataTypeFloat64>(to_type))
+            return monotonicity_for_type(type);
+        /// other types like Null, FixedString, Array and Tuple have no monotonicity defined
+        return {};
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_coalesce.cpp b/be/src/vec/functions/function_coalesce.cpp
new file mode 100644
index 0000000000..99b6110b95
--- /dev/null
+++ b/be/src/vec/functions/function_coalesce.cpp
@@ -0,0 +1,267 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "udf/udf.h"
+#include "vec/data_types/data_type_nothing.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/get_least_supertype.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+class FunctionCoalesce : public IFunction {
+public:
+    static constexpr auto name = "coalesce";
+
+    mutable FunctionBasePtr func_is_not_null;
+
+    static FunctionPtr create() { return std::make_shared<FunctionCoalesce>(); }
+
+    String get_name() const override { return name; }
+
+    bool use_default_implementation_for_constants() const override { return false; }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    bool is_variadic() const override { return true; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        DataTypePtr res;
+        for (const auto& arg : arguments) {
+            if (!arg->is_nullable()) {
+                res = arg;
+                break;
+            }
+        }
+
+        res = res ? res : arguments[0];
+
+        const ColumnsWithTypeAndName is_not_null_col{
+                {nullptr, make_nullable(res), ""}
+        };
+        func_is_not_null = SimpleFunctionFactory::instance().
+                get_function("is_not_null_pred", is_not_null_col, std::make_shared<DataTypeUInt8>());
+
+        return res;
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        DCHECK_GE(arguments.size(), 1);
+        DataTypePtr result_type = block.get_by_position(result).type;
+        ColumnNumbers filtered_args;
+        filtered_args.reserve(arguments.size());
+
+        for (size_t i = 0; i < arguments.size(); ++i) {
+            const auto& arg_type = block.get_by_position(arguments[i]).type;
+            filtered_args.push_back(arguments[i]);
+            if (!arg_type->is_nullable()) {
+                if (i == 0) { //if the first column not null, return it's directly
+                    block.get_by_position(result).column = block.get_by_position(arguments[0]).column;
+                    return Status::OK();
+                } else {
+                    break;
+                }
+            }
+        }
+
+        size_t remaining_rows = input_rows_count;
+        size_t argument_size = filtered_args.size();
+        std::vector<uint32_t> record_idx(input_rows_count, 0); //used to save column idx, record the result data of each row from which column
+        std::vector<uint8_t> filled_flags(input_rows_count, 0); //used to save filled flag, in order to check current row whether have filled data
+
+        MutableColumnPtr result_column;
+        if (!result_type->is_nullable()) {
+            result_column = result_type->create_column();
+        } else {
+            result_column = remove_nullable(result_type)->create_column();
+        }
+
+        // because now the string types does not support random position writing,
+        // so insert into result data have two methods, one is for string types, one is for others type remaining
+        bool is_string_result = result_column->is_column_string();
+        if (is_string_result) {
+            result_column->reserve(input_rows_count);
+        } else {
+            result_column->resize(input_rows_count);
+        }
+
+        auto return_type = std::make_shared<DataTypeUInt8>();
+        auto null_map = ColumnUInt8::create(input_rows_count, 1);  //if null_map_data==1, the current row should be null
+        auto* __restrict null_map_data = null_map->get_data().data();
+        ColumnPtr argument_columns[argument_size]; //use to save nested_column if is nullable column
+
+        for (size_t i = 0; i < argument_size; ++i) {
+            block.get_by_position(filtered_args[i]).column =
+                    block.get_by_position(filtered_args[i])
+                            .column->convert_to_full_column_if_const();
+            argument_columns[i] = block.get_by_position(filtered_args[i]).column;
+            if (auto* nullable = check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
+                argument_columns[i] = nullable->get_nested_column_ptr();
+            }
+        }
+
+        Block temporary_block {
+            ColumnsWithTypeAndName {
+                    block.get_by_position(filtered_args[0]),
+                    {nullptr, std::make_shared<DataTypeUInt8>(), ""}
+            }
+        };
+
+        for (size_t i = 0; i < argument_size && remaining_rows; ++i) {
+            temporary_block.get_by_position(0).column = block.get_by_position(filtered_args[i]).column;
+            func_is_not_null->execute(context, temporary_block, {0}, 1, input_rows_count);
+
+            auto res_column = (*temporary_block.get_by_position(1).column->convert_to_full_column_if_const()).mutate();
+            auto& res_map = assert_cast<ColumnVector<UInt8>*>(res_column.get())->get_data();
+            auto* __restrict res = res_map.data();
+
+            // Here it's SIMD thought the compiler automatically
+            // true: res[j]==1 && null_map_data[j]==1, false: others
+            // if true: remaining_rows--; record_idx[j]=column_idx; null_map_data[j]=0, so the current row could fill result
+            for (size_t j = 0; j < input_rows_count; ++j) {
+                remaining_rows -= (res[j] & null_map_data[j]);
+                record_idx[j] += (res[j] & null_map_data[j]) * i;
+                null_map_data[j] -= (res[j] & null_map_data[j]);
+            }
+
+            if (remaining_rows == 0) {
+                //check whether all result data from the same column
+                size_t is_same_column_count = 0;
+                const auto data = record_idx[0];
+                for (size_t row = 0; row < input_rows_count; ++row) {
+                    is_same_column_count += (record_idx[row] == data);
+                }
+
+                if (is_same_column_count == input_rows_count) {
+                    if (result_type->is_nullable()) {
+                        block.get_by_position(result).column = make_nullable(argument_columns[i], false);
+                    } else {
+                        block.get_by_position(result).column = argument_columns[i];
+                    }
+                    return Status::OK();
+                }
+            }
+
+            if (!is_string_result) {
+                //if not string type, could check one column firstly,
+                //and then fill the not null value in result column,
+                //this method may result in higher CPU cache
+                filled_result_column(result_type, result_column, argument_columns[i], null_map_data,
+                                     filled_flags.data(), input_rows_count);
+            }
+        }
+
+        if (is_string_result) {
+            //if string type,  should according to the record results, fill in result one by one, 
+            for (size_t row = 0; row < input_rows_count; ++row) {
+                if (null_map_data[row]) { //should be null
+                    result_column->insert_default();
+                } else {
+                    result_column->insert_from(*argument_columns[record_idx[row]].get(), row);
+                }
+            }
+        }
+
+        if (result_type->is_nullable()) {
+            block.replace_by_position(result, ColumnNullable::create(std::move(result_column), std::move(null_map)));
+        } else {
+            block.replace_by_position(result, std::move(result_column));
+        }
+
+        return Status::OK();
+    }
+
+    template <typename ColumnType>
+    Status insert_result_data(MutableColumnPtr& result_column, ColumnPtr& argument_column,
+                              const UInt8* __restrict null_map_data, UInt8* __restrict filled_flag,
+                              const size_t input_rows_count) {
+        auto* __restrict result_raw_data =
+                reinterpret_cast<ColumnType*>(result_column.get())->get_data().data();
+        auto* __restrict column_raw_data =
+                reinterpret_cast<const ColumnType*>(argument_column.get())->get_data().data();
+
+
+        // Here it's SIMD thought the compiler automatically also
+        // true: null_map_data[row]==0 && filled_idx[row]==0
+        // if true, could filled current row data into result column
+        for (size_t row = 0; row < input_rows_count; ++row) {
+            result_raw_data[row] += (!(null_map_data[row] | filled_flag[row])) * column_raw_data[row];
+            filled_flag[row] += (!(null_map_data[row] | filled_flag[row]));
+        }
+        return Status::OK();
+    }
+
+    //TODO: this function is same as case when, should be replaced by macro
+    Status filled_result_column(const DataTypePtr& data_type, MutableColumnPtr& result_column,
+                                ColumnPtr& argument_column, UInt8* __restrict null_map_data,
+                                UInt8* __restrict filled_flag, const size_t input_rows_count) {
+        WhichDataType which(data_type->is_nullable()
+                                    ? reinterpret_cast<const DataTypeNullable*>(data_type.get())
+                                              ->get_nested_type()
+                                    : data_type);
+        if (which.is_uint8()) {
+            return insert_result_data<ColumnUInt8>(result_column, argument_column, null_map_data,
+                                                   filled_flag, input_rows_count);
+        } else if (which.is_int16()) {
+            return insert_result_data<ColumnInt16>(result_column, argument_column, null_map_data,
+                                                   filled_flag, input_rows_count);
+        } else if (which.is_uint32()) {
+            return insert_result_data<ColumnUInt32>(result_column, argument_column, null_map_data,
+                                                    filled_flag, input_rows_count);
+        } else if (which.is_uint64()) {
+            return insert_result_data<ColumnUInt64>(result_column, argument_column, null_map_data,
+                                                    filled_flag, input_rows_count);
+        } else if (which.is_int8()) {
+            return insert_result_data<ColumnInt8>(result_column, argument_column, null_map_data,
+                                                  filled_flag, input_rows_count);
+        } else if (which.is_int16()) {
+            return insert_result_data<ColumnInt16>(result_column, argument_column, null_map_data,
+                                                   filled_flag, input_rows_count);
+        } else if (which.is_int32()) {
+            return insert_result_data<ColumnInt32>(result_column, argument_column, null_map_data,
+                                                   filled_flag, input_rows_count);
+        } else if (which.is_int64()) {
+            return insert_result_data<ColumnInt64>(result_column, argument_column, null_map_data,
+                                                   filled_flag, input_rows_count);
+        } else if (which.is_date_or_datetime()) {
+            return insert_result_data<ColumnVector<DateTime>>(
+                    result_column, argument_column, null_map_data, filled_flag, input_rows_count);
+        } else if (which.is_float32()) {
+            return insert_result_data<ColumnFloat32>(result_column, argument_column, null_map_data,
+                                                     filled_flag, input_rows_count);
+        } else if (which.is_float64()) {
+            return insert_result_data<ColumnFloat64>(result_column, argument_column, null_map_data,
+                                                     filled_flag, input_rows_count);
+        } else if (which.is_decimal()) {
+            return insert_result_data<ColumnDecimal<Decimal128>>(
+                    result_column, argument_column, null_map_data, filled_flag, input_rows_count);
+        } else {
+            return Status::NotSupported(fmt::format("Unexpected type {} of argument of function {}",
+                                                    data_type->get_name(), get_name()));
+        }
+    }
+};
+
+void register_function_coalesce(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionCoalesce>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_const.h b/be/src/vec/functions/function_const.h
new file mode 100644
index 0000000000..46547b53eb
--- /dev/null
+++ b/be/src/vec/functions/function_const.h
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionConst.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_const.h"
+#include "vec/columns/columns_number.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+template <typename Impl, bool use_field = true>
+class FunctionConst : public IFunction {
+public:
+    static constexpr auto name = Impl::name;
+
+    static FunctionPtr create() { return std::make_shared<FunctionConst>(); }
+
+public:
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
+        return Impl::get_return_type();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        block.get_by_position(result).column =
+                block.get_by_position(result).type->create_column_const(input_rows_count,
+                                                                        Impl::init_value());
+        return Status::OK();
+    }
+};
+
+template <typename Impl>
+class FunctionConst<Impl, false> : public IFunction {
+public:
+    static constexpr auto name = Impl::name;
+    static FunctionPtr create() { return std::make_shared<FunctionConst>(); }
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
+        return Impl::get_return_type();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto column = Impl::ReturnColVec::create();
+        column->get_data().emplace_back(Impl::init_value());
+        block.replace_by_position(result, ColumnConst::create(std::move(column), 1));
+        return Status::OK();
+    }
+};
+
+template <typename Impl>
+class FunctionMathConstFloat64 : public IFunction {
+public:
+    static constexpr auto name = Impl::name;
+    static FunctionPtr create() { return std::make_shared<FunctionMathConstFloat64>(); }
+
+private:
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
+        return std::make_shared<DataTypeFloat64>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        block.get_by_position(result).column =
+                block.get_by_position(result).type->create_column_const(
+                        input_rows_count == 0 ? 1 : input_rows_count, Impl::value);
+        return Status::OK();
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_conv.cpp b/be/src/vec/functions/function_conv.cpp
new file mode 100644
index 0000000000..6f755ff561
--- /dev/null
+++ b/be/src/vec/functions/function_conv.cpp
@@ -0,0 +1,163 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/math_functions.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+
+template <typename Impl>
+class FunctionConv : public IFunction {
+public:
+    static constexpr auto name = "conv";
+    String get_name() const override { return name; }
+    static FunctionPtr create() { return std::make_shared<FunctionConv<Impl>>(); }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return make_nullable(std::make_shared<DataTypeString>());
+    }
+    DataTypes get_variadic_argument_types_impl() const override {
+        return {std::make_shared<typename Impl::DataType>(), std::make_shared<DataTypeInt8>(),
+                std::make_shared<DataTypeInt8>()};
+    }
+    size_t get_number_of_arguments() const override {
+        return get_variadic_argument_types_impl().size();
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto result_column = ColumnString::create();
+        auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0);
+
+        ColumnPtr argument_columns[3];
+
+        for (int i = 0; i < 3; ++i) {
+            argument_columns[i] = block.get_by_position(arguments[i]).column;
+            if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) {
+                argument_columns[i] = nullable->get_nested_column_ptr();
+                VectorizedUtils::update_null_map(result_null_map_column->get_data(),
+                                                 nullable->get_null_map_data());
+            }
+        }
+
+        execute_straight(
+                context,
+                assert_cast<const typename Impl::DataType::ColumnType*>(argument_columns[0].get()),
+                assert_cast<const ColumnInt8*>(argument_columns[1].get()),
+                assert_cast<const ColumnInt8*>(argument_columns[2].get()),
+                assert_cast<ColumnString*>(result_column.get()),
+                assert_cast<ColumnUInt8*>(result_null_map_column.get())->get_data(),
+                input_rows_count);
+
+        block.get_by_position(result).column =
+                ColumnNullable::create(std::move(result_column), std::move(result_null_map_column));
+        return Status::OK();
+    }
+
+private:
+    void execute_straight(FunctionContext* context,
+                          const typename Impl::DataType::ColumnType* data_column,
+                          const ColumnInt8* src_base_column, const ColumnInt8* dst_base_column,
+                          ColumnString* result_column, NullMap& result_null_map,
+                          size_t input_rows_count) {
+        for (size_t i = 0; i < input_rows_count; i++) {
+            if (result_null_map[i]) {
+                result_column->insert_default();
+                continue;
+            }
+
+            Int8 src_base = src_base_column->get_element(i);
+            Int8 dst_base = dst_base_column->get_element(i);
+            if (std::abs(src_base) < MathFunctions::MIN_BASE ||
+                std::abs(src_base) > MathFunctions::MAX_BASE ||
+                std::abs(dst_base) < MathFunctions::MIN_BASE ||
+                std::abs(dst_base) > MathFunctions::MAX_BASE) {
+                result_null_map[i] = true;
+                result_column->insert_default();
+                continue;
+            }
+
+            Impl::calculate_cell(context, data_column, src_base, dst_base, result_column,
+                                 result_null_map, i);
+        }
+    }
+};
+
+struct ConvInt64Impl {
+    using DataType = DataTypeInt64;
+
+    static void calculate_cell(FunctionContext* context, const DataType::ColumnType* data_column,
+                               const Int8 src_base, const Int8 dst_base,
+                               ColumnString* result_column, NullMap& result_null_map,
+                               size_t index) {
+        Int64 num = data_column->get_element(index);
+        if (src_base < 0 && num >= 0) {
+            result_null_map[index] = true;
+            result_column->insert_default();
+            return;
+        }
+
+        int64_t decimal_num = num;
+        if (src_base != 10) {
+            if (!MathFunctions::decimal_in_base_to_decimal(num, src_base, &decimal_num)) {
+                MathFunctions::handle_parse_result(dst_base, &decimal_num,
+                                                   StringParser::PARSE_OVERFLOW);
+            }
+        }
+        StringVal str = MathFunctions::decimal_to_base(context, decimal_num, dst_base);
+        result_column->insert_data(reinterpret_cast<const char*>(str.ptr), str.len);
+    }
+};
+
+struct ConvStringImpl {
+    using DataType = DataTypeString;
+
+    static void calculate_cell(FunctionContext* context, const DataType::ColumnType* data_column,
+                               const Int8 src_base, const Int8 dst_base,
+                               ColumnString* result_column, NullMap& result_null_map,
+                               size_t index) {
+        StringRef str = data_column->get_data_at(index);
+        StringParser::ParseResult parse_res;
+        int64_t decimal_num =
+                StringParser::string_to_int<int64_t>(str.data, str.size, src_base, &parse_res);
+        if (src_base < 0 && decimal_num >= 0) {
+            result_null_map[index] = true;
+            result_column->insert_default();
+            return;
+        }
+
+        if (!MathFunctions::handle_parse_result(dst_base, &decimal_num, parse_res)) {
+            result_column->insert_data("0", 1);
+        } else {
+            StringVal str = MathFunctions::decimal_to_base(context, decimal_num, dst_base);
+            result_column->insert_data(reinterpret_cast<const char*>(str.ptr), str.len);
+        }
+    }
+};
+
+void register_function_conv(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionConv<ConvInt64Impl>>();
+    factory.register_function<FunctionConv<ConvStringImpl>>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_date_or_datetime_computation.cpp b/be/src/vec/functions/function_date_or_datetime_computation.cpp
new file mode 100644
index 0000000000..ecab986247
--- /dev/null
+++ b/be/src/vec/functions/function_date_or_datetime_computation.cpp
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/function_date_or_datetime_computation.h"
+
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+using FunctionAddSeconds = FunctionDateOrDateTimeComputation<AddSecondsImpl>;
+using FunctionAddMinutes = FunctionDateOrDateTimeComputation<AddMinutesImpl>;
+using FunctionAddHours = FunctionDateOrDateTimeComputation<AddHoursImpl>;
+using FunctionAddDays = FunctionDateOrDateTimeComputation<AddDaysImpl>;
+using FunctionAddWeeks = FunctionDateOrDateTimeComputation<AddWeeksImpl>;
+using FunctionAddMonths = FunctionDateOrDateTimeComputation<AddMonthsImpl>;
+using FunctionAddQuarters = FunctionDateOrDateTimeComputation<AddQuartersImpl>;
+using FunctionAddYears = FunctionDateOrDateTimeComputation<AddYearsImpl>;
+
+using FunctionSubSeconds = FunctionDateOrDateTimeComputation<SubtractSecondsImpl>;
+using FunctionSubMinutes = FunctionDateOrDateTimeComputation<SubtractMinutesImpl>;
+using FunctionSubHours = FunctionDateOrDateTimeComputation<SubtractHoursImpl>;
+using FunctionSubDays = FunctionDateOrDateTimeComputation<SubtractDaysImpl>;
+using FunctionSubWeeks = FunctionDateOrDateTimeComputation<SubtractWeeksImpl>;
+using FunctionSubMonths = FunctionDateOrDateTimeComputation<SubtractMonthsImpl>;
+using FunctionSubQuarters = FunctionDateOrDateTimeComputation<SubtractQuartersImpl>;
+using FunctionSubYears = FunctionDateOrDateTimeComputation<SubtractYearsImpl>;
+
+using FunctionDateDiff = FunctionDateOrDateTimeComputation<DateDiffImpl>;
+using FunctionTimeDiff = FunctionDateOrDateTimeComputation<TimeDiffImpl>;
+using FunctionYearsDiff = FunctionDateOrDateTimeComputation<YearsDiffImpl>;
+using FunctionMonthsDiff = FunctionDateOrDateTimeComputation<MonthsDiffImpl>;
+using FunctionDaysDiff = FunctionDateOrDateTimeComputation<DaysDiffImpl>;
+using FunctionWeeksDiff = FunctionDateOrDateTimeComputation<WeeksDiffImpl>;
+using FunctionHoursDiff = FunctionDateOrDateTimeComputation<HoursDiffImpl>;
+using FunctionMinutesDiff = FunctionDateOrDateTimeComputation<MintueSDiffImpl>;
+using FunctionSecondsDiff = FunctionDateOrDateTimeComputation<SecondsDiffImpl>;
+
+struct NowFunctionName {
+    static constexpr auto name = "now";
+};
+
+struct CurrentTimestampFunctionName {
+    static constexpr auto name = "current_timestamp";
+};
+
+struct LocalTimeFunctionName {
+    static constexpr auto name = "localtime";
+};
+
+struct LocalTimestampFunctionName {
+    static constexpr auto name = "localtimestamp";
+};
+
+using FunctionNow = FunctionCurrentDateOrDateTime<CurrentDateTimeImpl<NowFunctionName>>;
+using FunctionCurrentTimestamp =
+        FunctionCurrentDateOrDateTime<CurrentDateTimeImpl<CurrentTimestampFunctionName>>;
+using FunctionLocalTime = FunctionCurrentDateOrDateTime<CurrentDateTimeImpl<LocalTimeFunctionName>>;
+using FunctionLocalTimestamp =
+        FunctionCurrentDateOrDateTime<CurrentDateTimeImpl<LocalTimestampFunctionName>>;
+
+struct CurDateFunctionName {
+    static constexpr auto name = "curdate";
+};
+
+struct CurrentDateFunctionName {
+    static constexpr auto name = "current_date";
+};
+
+using FunctionCurDate =
+        FunctionCurrentDateOrDateTime<CurrentDateImpl<CurDateFunctionName>>;
+using FunctionCurrentDate =
+        FunctionCurrentDateOrDateTime<CurrentDateImpl<CurrentDateFunctionName>>;
+
+struct CurTimeFunctionName {
+    static constexpr auto name = "curtime";
+};
+struct CurrentTimeFunctionName {
+    static constexpr auto name = "current_time";
+};
+
+using FunctionCurTime =
+        FunctionCurrentDateOrDateTime<CurrentTimeImpl<CurTimeFunctionName>>;
+using FunctionCurrentTime =
+        FunctionCurrentDateOrDateTime<CurrentTimeImpl<CurrentTimeFunctionName>>;
+using FunctionUtcTimeStamp = FunctionCurrentDateOrDateTime<UtcTimestampImpl>;
+
+void register_function_date_time_computation(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionAddSeconds>();
+    factory.register_function<FunctionAddMinutes>();
+    factory.register_function<FunctionAddHours>();
+    factory.register_function<FunctionAddDays>();
+    factory.register_function<FunctionAddWeeks>();
+    factory.register_function<FunctionAddMonths>();
+    factory.register_function<FunctionAddYears>();
+    factory.register_function<FunctionAddQuarters>();
+
+    factory.register_function<FunctionSubSeconds>();
+    factory.register_function<FunctionSubMinutes>();
+    factory.register_function<FunctionSubHours>();
+    factory.register_function<FunctionSubDays>();
+    factory.register_function<FunctionSubMonths>();
+    factory.register_function<FunctionSubYears>();
+    factory.register_function<FunctionSubQuarters>();
+    factory.register_function<FunctionSubWeeks>();
+
+    factory.register_function<FunctionDateDiff>();
+    factory.register_function<FunctionTimeDiff>();
+    factory.register_function<FunctionYearsDiff>();
+    factory.register_function<FunctionMonthsDiff>();
+    factory.register_function<FunctionWeeksDiff>();
+    factory.register_function<FunctionDaysDiff>();
+    factory.register_function<FunctionHoursDiff>();
+    factory.register_function<FunctionMinutesDiff>();
+    factory.register_function<FunctionSecondsDiff>();
+    factory.register_function<FunctionNow>();
+    factory.register_function<FunctionCurrentTimestamp>();
+    factory.register_function<FunctionLocalTime>();
+    factory.register_function<FunctionLocalTimestamp>();
+    factory.register_function<FunctionCurDate>();
+    factory.register_function<FunctionCurrentDate>();
+    factory.register_function<FunctionCurTime>();
+    factory.register_function<FunctionCurrentTime>();
+    factory.register_function<FunctionUtcTimeStamp>();
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h b/be/src/vec/functions/function_date_or_datetime_computation.h
new file mode 100644
index 0000000000..607039c6c7
--- /dev/null
+++ b/be/src/vec/functions/function_date_or_datetime_computation.h
@@ -0,0 +1,488 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "common/logging.h"
+#include "fmt/format.h"
+#include "runtime/datetime_value.h"
+#include "runtime/runtime_state.h"
+#include "udf/udf_internal.h"
+#include "util/binary_cast.hpp"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type_date.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/runtime/vdatetime_value.h"
+namespace doris::vectorized {
+
+template <TimeUnit unit>
+inline Int64 date_time_add(const Int64& t, Int64 delta, bool& is_null) {
+    auto ts_value = binary_cast<Int64, doris::vectorized::VecDateTimeValue>(t);
+    TimeInterval interval(unit, delta, false);
+    is_null = !ts_value.date_add_interval(interval, unit);
+
+    return binary_cast<doris::vectorized::VecDateTimeValue, Int64>(ts_value);
+}
+
+#define ADD_TIME_FUNCTION_IMPL(CLASS, NAME, UNIT)                                   \
+    struct CLASS {                                                                  \
+        using ReturnType = DataTypeDateTime;                                        \
+        static constexpr auto name = #NAME;                                         \
+        static constexpr auto is_nullable = true;                                   \
+        static inline Int64 execute(const Int64& t, Int64 delta, bool& is_null) { \
+            return date_time_add<TimeUnit::UNIT>(t, delta, is_null);                \
+        }                                                                           \
+    }
+
+ADD_TIME_FUNCTION_IMPL(AddSecondsImpl, seconds_add, SECOND);
+ADD_TIME_FUNCTION_IMPL(AddMinutesImpl, minutes_add, MINUTE);
+ADD_TIME_FUNCTION_IMPL(AddHoursImpl, hours_add, HOUR);
+ADD_TIME_FUNCTION_IMPL(AddDaysImpl, days_add, DAY);
+ADD_TIME_FUNCTION_IMPL(AddWeeksImpl, weeks_add, WEEK);
+ADD_TIME_FUNCTION_IMPL(AddMonthsImpl, months_add, MONTH);
+ADD_TIME_FUNCTION_IMPL(AddYearsImpl, years_add, YEAR);
+
+struct AddQuartersImpl {
+    using ReturnType = DataTypeDateTime;
+    static constexpr auto name = "quarters_add";
+    static constexpr auto is_nullable = true;
+    static inline Int64 execute(const Int64& t, Int64 delta, bool& is_null) {
+        return date_time_add<TimeUnit::MONTH>(t, delta * 3, is_null);
+    }
+};
+
+template <typename Transform>
+struct SubtractIntervalImpl {
+    using ReturnType = DataTypeDateTime;
+    static constexpr auto is_nullable = true;
+    static inline Int64 execute(const Int64& t, Int64 delta, bool& is_null) {
+        return Transform::execute(t, -delta, is_null);
+    }
+};
+
+struct SubtractSecondsImpl : SubtractIntervalImpl<AddSecondsImpl> {
+    static constexpr auto name = "seconds_sub";
+};
+struct SubtractMinutesImpl : SubtractIntervalImpl<AddMinutesImpl> {
+    static constexpr auto name = "minutes_sub";
+};
+struct SubtractHoursImpl : SubtractIntervalImpl<AddHoursImpl> {
+    static constexpr auto name = "hours_sub";
+};
+struct SubtractDaysImpl : SubtractIntervalImpl<AddDaysImpl> {
+    static constexpr auto name = "days_sub";
+};
+struct SubtractWeeksImpl : SubtractIntervalImpl<AddWeeksImpl> {
+    static constexpr auto name = "weeks_sub";
+};
+struct SubtractMonthsImpl : SubtractIntervalImpl<AddMonthsImpl> {
+    static constexpr auto name = "months_sub";
+};
+struct SubtractQuartersImpl : SubtractIntervalImpl<AddQuartersImpl> {
+    static constexpr auto name = "quarters_sub";
+};
+struct SubtractYearsImpl : SubtractIntervalImpl<AddYearsImpl> {
+    static constexpr auto name = "years_sub";
+};
+
+struct DateDiffImpl {
+    using ReturnType = DataTypeInt32;
+    static constexpr auto name = "datediff";
+    static constexpr auto is_nullable = false;
+    static inline Int32 execute(const Int64& t0, const Int64& t1, bool& is_null) {
+        const auto& ts0 = reinterpret_cast<const doris::vectorized::VecDateTimeValue&>(t0);
+        const auto& ts1 = reinterpret_cast<const doris::vectorized::VecDateTimeValue&>(t1);
+        is_null = !ts0.is_valid_date() || !ts1.is_valid_date();
+        return ts0.daynr() - ts1.daynr();
+    }
+};
+
+struct TimeDiffImpl {
+    using ReturnType = DataTypeFloat64;
+    static constexpr auto name = "timediff";
+    static constexpr auto is_nullable = false;
+    static inline double execute(const Int64& t0, const Int64& t1, bool& is_null) {
+        const auto& ts0 = reinterpret_cast<const doris::vectorized::VecDateTimeValue&>(t0);
+        const auto& ts1 = reinterpret_cast<const doris::vectorized::VecDateTimeValue&>(t1);
+        is_null = !ts0.is_valid_date() || !ts1.is_valid_date();
+        return ts0.second_diff(ts1);
+    }
+};
+
+#define TIME_DIFF_FUNCTION_IMPL(CLASS, NAME, UNIT)                                         \
+    struct CLASS {                                                                         \
+        using ReturnType = DataTypeInt64;                                                  \
+        static constexpr auto name = #NAME;                                                \
+        static constexpr auto is_nullable = false;                                         \
+        static inline int64_t execute(const Int64& t0, const Int64& t1, bool& is_null) { \
+            const auto& ts0 = reinterpret_cast<const doris::vectorized::VecDateTimeValue&>(t0);           \
+            const auto& ts1 = reinterpret_cast<const doris::vectorized::VecDateTimeValue&>(t1);           \
+            is_null = !ts0.is_valid_date() || !ts1.is_valid_date();                         \
+            return VecDateTimeValue::datetime_diff<TimeUnit::UNIT>(ts1, ts0);                 \
+        }                                                                                  \
+    }
+
+TIME_DIFF_FUNCTION_IMPL(YearsDiffImpl, years_diff, YEAR);
+TIME_DIFF_FUNCTION_IMPL(MonthsDiffImpl, months_diff, MONTH);
+TIME_DIFF_FUNCTION_IMPL(WeeksDiffImpl, weeks_diff, WEEK);
+TIME_DIFF_FUNCTION_IMPL(DaysDiffImpl, days_diff, DAY);
+TIME_DIFF_FUNCTION_IMPL(HoursDiffImpl, hours_diff, HOUR);
+TIME_DIFF_FUNCTION_IMPL(MintueSDiffImpl, minutes_diff, MINUTE);
+TIME_DIFF_FUNCTION_IMPL(SecondsDiffImpl, seconds_diff, SECOND);
+
+template <typename FromType, typename ToType, typename Transform>
+struct DateTimeOp {
+    // use for (DateTime, DateTime) -> other_type
+    static void vector_vector(const PaddedPODArray<FromType>& vec_from0,
+                              const PaddedPODArray<FromType>& vec_from1,
+                              PaddedPODArray<ToType>& vec_to, NullMap& null_map) {
+        size_t size = vec_from0.size();
+        vec_to.resize(size);
+        null_map.resize_fill(size, false);
+
+        for (size_t i = 0; i < size; ++i) {
+            // here reinterpret_cast is used to convert uint8& to bool&,
+            // otherwise it will be implicitly converted to bool, causing the rvalue to fail to match the lvalue.
+            // the same goes for the following.
+            vec_to[i] = Transform::execute(vec_from0[i], vec_from1[i],
+                                           reinterpret_cast<bool&>(null_map[i]));
+        }
+    }
+
+    // use for (DateTime, int32) -> other_type
+    static void vector_vector(const PaddedPODArray<FromType>& vec_from0,
+                              const PaddedPODArray<Int32>& vec_from1,
+                              PaddedPODArray<ToType>& vec_to, NullMap& null_map) {
+        size_t size = vec_from0.size();
+        vec_to.resize(size);
+        null_map.resize_fill(size, false);
+
+        for (size_t i = 0; i < size; ++i)
+            vec_to[i] = Transform::execute(vec_from0[i], vec_from1[i],
+                    reinterpret_cast<bool&>(null_map[i]));
+    }
+
+    // use for (DateTime, const DateTime) -> other_type
+    static void vector_constant(const PaddedPODArray<FromType>& vec_from,
+                                PaddedPODArray<ToType>& vec_to, NullMap& null_map, Int128& delta) {
+        size_t size = vec_from.size();
+        vec_to.resize(size);
+        null_map.resize_fill(size, false);
+
+        for (size_t i = 0; i < size; ++i) {
+            vec_to[i] =
+                    Transform::execute(vec_from[i], delta, reinterpret_cast<bool&>(null_map[i]));
+        }
+    }
+
+    // use for (DateTime, const ColumnNumber) -> other_type
+    static void vector_constant(const PaddedPODArray<FromType>& vec_from,
+                                PaddedPODArray<ToType>& vec_to, NullMap& null_map, Int64 delta) {
+        size_t size = vec_from.size();
+        vec_to.resize(size);
+        null_map.resize_fill(size, false);
+
+        for (size_t i = 0; i < size; ++i) {
+            vec_to[i] =
+                    Transform::execute(vec_from[i], delta, reinterpret_cast<bool&>(null_map[i]));
+        }
+    }
+
+    // use for (const DateTime, ColumnNumber) -> other_type
+    static void constant_vector(const FromType& from, PaddedPODArray<ToType>& vec_to,
+                                NullMap& null_map, const IColumn& delta) {
+        size_t size = delta.size();
+        vec_to.resize(size);
+        null_map.resize_fill(size, false);
+
+        for (size_t i = 0; i < size; ++i) {
+            vec_to[i] = Transform::execute(from, delta.get_int(i),
+                                           reinterpret_cast<bool&>(null_map[i]));
+        }
+    }
+
+    static void constant_vector(const FromType& from, PaddedPODArray<ToType>& vec_to,
+                                NullMap& null_map, const PaddedPODArray<Int64>& delta) {
+        size_t size = delta.size();
+        vec_to.resize(size);
+        null_map.resize_fill(size, false);
+
+        for (size_t i = 0; i < size; ++i) {
+            vec_to[i] = Transform::execute(from, delta[i], reinterpret_cast<bool&>(null_map[i]));
+        }
+    }
+};
+
+template <typename FromType, typename Transform>
+struct DateTimeAddIntervalImpl {
+    static Status execute(Block& block, const ColumnNumbers& arguments, size_t result) {
+        using ToType = typename Transform::ReturnType::FieldType;
+        using Op = DateTimeOp<FromType, ToType, Transform>;
+
+        const ColumnPtr source_col = block.get_by_position(arguments[0]).column;
+        if (const auto* sources = check_and_get_column<ColumnVector<FromType>>(source_col.get())) {
+            auto col_to = ColumnVector<ToType>::create();
+            auto null_map = ColumnUInt8::create();
+            const IColumn& delta_column = *block.get_by_position(arguments[1]).column;
+
+            if (const auto* delta_const_column = typeid_cast<const ColumnConst*>(&delta_column)) {
+                if (delta_const_column->get_field().get_type() == Field::Types::Int128) {
+                    Op::vector_constant(sources->get_data(), col_to->get_data(),
+                                        null_map->get_data(),
+                                        delta_const_column->get_field().get<Int128>());
+                } else {
+                    Op::vector_constant(sources->get_data(), col_to->get_data(),
+                                        null_map->get_data(),
+                                        delta_const_column->get_field().get<Int64>());
+                }
+            } else {
+                if (const auto* delta_vec_column0 =
+                        check_and_get_column<ColumnVector<FromType>>(delta_column)) {
+                    Op::vector_vector(sources->get_data(), delta_vec_column0->get_data(),
+                                      col_to->get_data(), null_map->get_data());
+                } else {
+                    const auto* delta_vec_column1 =
+                        check_and_get_column<ColumnVector<Int32>>(delta_column);
+                    DCHECK(delta_vec_column1 != nullptr);
+                    Op::vector_vector(sources->get_data(), delta_vec_column1->get_data(),
+                                      col_to->get_data(), null_map->get_data());
+                }
+            }
+
+            block.get_by_position(result).column =
+                        ColumnNullable::create(std::move(col_to), std::move(null_map));
+        } else if (const auto* sources_const =
+                           check_and_get_column_const<ColumnVector<FromType>>(source_col.get())) {
+            auto col_to = ColumnVector<ToType>::create();
+            auto null_map = ColumnUInt8::create();
+
+            if (const auto* delta_vec_column = check_and_get_column<ColumnVector<FromType>>(
+                        *block.get_by_position(arguments[1]).column)) {
+                Op::constant_vector(sources_const->template get_value<FromType>(),
+                                    col_to->get_data(), null_map->get_data(),
+                                    delta_vec_column->get_data());
+            } else {
+                Op::constant_vector(sources_const->template get_value<FromType>(),
+                                    col_to->get_data(), null_map->get_data(),
+                                    *block.get_by_position(arguments[1]).column);
+            }
+            block.get_by_position(result).column =
+                        ColumnNullable::create(std::move(col_to), std::move(null_map));
+        } else {
+            return Status::RuntimeError(fmt::format(
+                    "Illegal column {} of first argument of function {}",
+                    block.get_by_position(arguments[0]).column->get_name(), Transform::name));
+        }
+        return Status::OK();
+    }
+};
+
+template <typename Transform>
+class FunctionDateOrDateTimeComputation : public IFunction {
+public:
+    static constexpr auto name = Transform::name;
+    static FunctionPtr create() { return std::make_shared<FunctionDateOrDateTimeComputation>(); }
+
+    String get_name() const override { return name; }
+
+    bool is_variadic() const override { return true; }
+    size_t get_number_of_arguments() const override { return 0; }
+
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        if (arguments.size() != 2 && arguments.size() != 3) {
+            LOG(FATAL) << fmt::format(
+                    "Number of arguments for function {} doesn't match: passed {} , should be 2 or "
+                    "3",
+                    get_name(), arguments.size());
+        }
+
+        if (arguments.size() == 2) {
+            if (!is_date_or_datetime(arguments[0].type)) {
+                LOG(FATAL) << fmt::format(
+                        "Illegal type {} of argument of function {}. Should be a date or a date "
+                        "with time",
+                        arguments[0].type->get_name(), get_name());
+            }
+        } else {
+            if (!WhichDataType(arguments[0].type).is_date_time() ||
+                !WhichDataType(arguments[2].type).is_string()) {
+                LOG(FATAL) << fmt::format(
+                        "Function {} supports 2 or 3 arguments. The 1st argument must be of type "
+                        "Date or DateTime. The 2nd argument must be number. The 3rd argument "
+                        "(optional) must be a constant string with timezone name. The timezone "
+                        "argument is allowed only when the 1st argument has the type DateTime",
+                        get_name());
+            }
+        }
+        return make_nullable(std::make_shared<typename Transform::ReturnType>());
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const IDataType* from_type = block.get_by_position(arguments[0]).type.get();
+        WhichDataType which(from_type);
+
+        if (which.is_date()) {
+            return DateTimeAddIntervalImpl<DataTypeDate::FieldType, Transform>::execute(
+                    block, arguments, result);
+        } else if (which.is_date_time()) {
+            return DateTimeAddIntervalImpl<DataTypeDateTime::FieldType, Transform>::execute(
+                    block, arguments, result);
+        } else {
+            return Status::RuntimeError(
+                    fmt::format("Illegal type {} of argument of function {}",
+                                block.get_by_position(arguments[0]).type->get_name(), get_name()));
+        }
+    }
+};
+
+template <typename FunctionImpl>
+class FunctionCurrentDateOrDateTime : public IFunction {
+public:
+    static constexpr auto name = FunctionImpl::name;
+    static FunctionPtr create() { return std::make_shared<FunctionCurrentDateOrDateTime>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        return std::make_shared<typename FunctionImpl::ReturnType>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        DCHECK(arguments.empty());
+        return FunctionImpl::execute(context, block, result, input_rows_count);
+    }
+};
+
+template<typename FunctionName>
+struct CurrentDateTimeImpl {
+    using ReturnType = DataTypeDateTime;
+    static constexpr auto name = FunctionName::name;
+    static Status execute(FunctionContext* context, Block& block, size_t result,
+                          size_t input_rows_count) {
+        auto col_to = ColumnVector<Int64>::create();
+        VecDateTimeValue dtv;
+        if (dtv.from_unixtime(context->impl()->state()->timestamp_ms() / 1000,
+                              context->impl()->state()->timezone_obj())) {
+            reinterpret_cast<VecDateTimeValue*>(&dtv)->set_type(TIME_DATETIME);
+            auto date_packed_int = binary_cast<doris::vectorized::VecDateTimeValue, int64_t>(
+                    *reinterpret_cast<VecDateTimeValue*>(&dtv));
+            for (int i = 0; i < input_rows_count; i ++) {
+                col_to->insert_data(
+                        const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
+            }
+        } else {
+            auto invalid_val = 0;
+            for (int i = 0; i < input_rows_count; i ++) {
+                col_to->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&invalid_val)),
+                                    0);
+            }
+        }
+        block.get_by_position(result).column = std::move(col_to);
+        return Status::OK();
+    }
+};
+
+template<typename FunctionName>
+struct CurrentDateImpl {
+    using ReturnType = DataTypeDate;
+    static constexpr auto name = FunctionName::name;
+    static Status execute(FunctionContext* context, Block& block, size_t result,
+                          size_t input_rows_count) {
+        auto col_to = ColumnVector<Int64>::create();
+        VecDateTimeValue dtv;
+        if (dtv.from_unixtime(context->impl()->state()->timestamp_ms() / 1000,
+                              context->impl()->state()->timezone_obj())) {
+            reinterpret_cast<VecDateTimeValue*>(&dtv)->set_type(TIME_DATE);
+            auto date_packed_int = binary_cast<doris::vectorized::VecDateTimeValue, int64_t>(
+                    *reinterpret_cast<VecDateTimeValue*>(&dtv));
+            for (int i = 0; i < input_rows_count; i ++) {
+                col_to->insert_data(
+                        const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
+            }
+        } else {
+            auto invalid_val = 0;
+            for (int i = 0; i < input_rows_count; i ++) {
+                col_to->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&invalid_val)),
+                                    0);
+            }
+        }
+        block.get_by_position(result).column = std::move(col_to);
+        return Status::OK();
+    }
+};
+
+template<typename FunctionName>
+struct CurrentTimeImpl {
+    using ReturnType = DataTypeFloat64;
+    static constexpr auto name = FunctionName::name;
+    static Status execute(FunctionContext* context, Block& block, size_t result,
+                          size_t input_rows_count) {
+        auto col_to = ColumnVector<Float64>::create();
+        VecDateTimeValue dtv;
+        if (dtv.from_unixtime(context->impl()->state()->timestamp_ms() / 1000,
+                              context->impl()->state()->timezone_obj())) {
+            double time = dtv.hour() * 3600 + dtv.minute() * 60 + dtv.second();
+            for (int i = 0; i < input_rows_count; i ++) {
+                col_to->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&time)), 0);
+            }
+        } else {
+            auto invalid_val = 0;
+            for (int i = 0; i < input_rows_count; i ++) {
+                col_to->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&invalid_val)),
+                                    0);
+            }
+        }
+        block.get_by_position(result).column = std::move(col_to);
+        return Status::OK();
+    }
+};
+
+struct UtcTimestampImpl {
+    using ReturnType = DataTypeDateTime;
+    static constexpr auto name = "utc_timestamp";
+    static Status execute(FunctionContext* context, Block& block, size_t result,
+                          size_t input_rows_count) {
+        auto col_to = ColumnVector<Int64>::create();
+        VecDateTimeValue dtv;
+        if (dtv.from_unixtime(context->impl()->state()->timestamp_ms() / 1000, "+00:00")) {
+            reinterpret_cast<VecDateTimeValue*>(&dtv)->set_type(TIME_DATETIME);
+            auto date_packed_int = binary_cast<doris::vectorized::VecDateTimeValue, int64_t>(
+                    *reinterpret_cast<VecDateTimeValue*>(&dtv));
+            for (int i = 0; i < input_rows_count; i ++) {
+                col_to->insert_data(
+                        const_cast<const char*>(reinterpret_cast<char*>(&date_packed_int)), 0);
+            }
+        } else {
+            auto invalid_val = 0;
+            for (int i = 0; i < input_rows_count; i ++) {
+                col_to->insert_data(const_cast<const char*>(reinterpret_cast<char*>(&invalid_val)),
+                                    0);
+            }
+        }
+        block.get_by_position(result).column = std::move(col_to);
+        return Status::OK();
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_date_or_datetime_to_something.h b/be/src/vec/functions/function_date_or_datetime_to_something.h
new file mode 100644
index 0000000000..31e87b17c0
--- /dev/null
+++ b/be/src/vec/functions/function_date_or_datetime_to_something.h
@@ -0,0 +1,93 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionDateOrDatetimeToSomething.h
+// and modified by Doris
+
+#include "vec/data_types/data_type_date.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/functions/date_time_transforms.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+/// See DateTimeTransforms.h
+template <typename ToDataType, typename Transform>
+class FunctionDateOrDateTimeToSomething : public IFunction {
+public:
+    static constexpr auto name = Transform::name;
+    static FunctionPtr create() { return std::make_shared<FunctionDateOrDateTimeToSomething>(); }
+
+    String get_name() const override { return name; }
+
+    bool is_variadic() const override { return true; }
+    size_t get_number_of_arguments() const override { return 0; }
+
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        if (arguments.size() == 1) {
+            if (!is_date_or_datetime(arguments[0].type)) {
+                LOG(FATAL) << fmt::format(
+                        "Illegal type {} of argument of function {}. Should be a date or a date "
+                        "with time",
+                        arguments[0].type->get_name(), get_name());
+            }
+        } else if (arguments.size() == 2) {
+            if (!is_date_or_datetime(arguments[0].type)) {
+                LOG(FATAL) << fmt::format(
+                        "Illegal type {} of argument of function {}. Should be a date or a date "
+                        "with time",
+                        arguments[0].type->get_name(), get_name());
+            }
+            if (!is_string(arguments[1].type)) {
+                LOG(FATAL) << fmt::format(
+                        "Function {} supports 1 or 2 arguments. The 1st argument must be of type "
+                        "Date or DateTime. The 2nd argument (optional) must be a constant string "
+                        "with timezone name",
+                        get_name());
+            }
+            if (is_date(arguments[0].type) && std::is_same_v<ToDataType, DataTypeDate>) {
+                LOG(FATAL) << fmt::format(
+                        "The timezone argument of function {} is allowed only when the 1st "
+                        "argument has the type DateTime",
+                        get_name());
+            }
+        } else {
+            LOG(FATAL) << fmt::format(
+                    "Number of arguments for function {} doesn't match: passed {}, should be 1 or "
+                    "2",
+                    get_name(), arguments.size());
+        }
+
+        return make_nullable(std::make_shared<ToDataType>());
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const IDataType* from_type = block.get_by_position(arguments[0]).type.get();
+        WhichDataType which(from_type);
+
+        return DateTimeTransformImpl<Int64, typename ToDataType::FieldType, Transform>::execute(
+                block, arguments, result, input_rows_count);
+    }
+
+    bool has_information_about_monotonicity() const override { return true; }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_date_or_datetime_to_string.cpp b/be/src/vec/functions/function_date_or_datetime_to_string.cpp
new file mode 100644
index 0000000000..7815f41480
--- /dev/null
+++ b/be/src/vec/functions/function_date_or_datetime_to_string.cpp
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionDateOrDatetimeToString.cpp
+// and modified by Doris
+
+#include "vec/functions/function_date_or_datetime_to_string.h"
+
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+using FunctionDayName = FunctionDateOrDateTimeToString<DayNameImpl>;
+using FunctionMonthName = FunctionDateOrDateTimeToString<MonthNameImpl>;
+
+void register_function_date_time_to_string(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionDayName>();
+    factory.register_function<FunctionMonthName>();
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/function_date_or_datetime_to_string.h b/be/src/vec/functions/function_date_or_datetime_to_string.h
new file mode 100644
index 0000000000..42922c8af4
--- /dev/null
+++ b/be/src/vec/functions/function_date_or_datetime_to_string.h
@@ -0,0 +1,66 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionDateOrDatetimeToString.h
+// and modified by Doris
+
+#include "vec/data_types/data_type_date.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/date_time_transforms.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+template <typename Transform>
+class FunctionDateOrDateTimeToString : public IFunction {
+public:
+    static constexpr auto name = Transform::name;
+    static FunctionPtr create() { return std::make_shared<FunctionDateOrDateTimeToString>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 1; }
+
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        return make_nullable(std::make_shared<DataTypeString>());
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const ColumnPtr source_col = block.get_by_position(arguments[0]).column;
+        const auto* sources = check_and_get_column<ColumnVector<Int64>>(source_col.get());
+        auto col_res = ColumnString::create();
+        auto null_map = ColumnVector<UInt8>::create();
+        // Support all input of datetime is valind to make sure not null return
+        if (sources) {
+            TransformerToStringOneArgument<Transform>::vector(
+                    sources->get_data(), col_res->get_chars(), col_res->get_offsets(), null_map->get_data());
+            block.replace_by_position(result, ColumnNullable::create(std::move(col_res), std::move(null_map)));
+        } else {
+            return Status::InternalError("Illegal column " +
+                                         block.get_by_position(arguments[0]).column->get_name() +
+                                         " of first argument of function " + name);
+        }
+        return Status::OK();
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_datetime_string_to_string.cpp b/be/src/vec/functions/function_datetime_string_to_string.cpp
new file mode 100644
index 0000000000..a499773669
--- /dev/null
+++ b/be/src/vec/functions/function_datetime_string_to_string.cpp
@@ -0,0 +1,32 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/function_datetime_string_to_string.h"
+
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+using FunctionDateFormat = FunctionDateTimeStringToString<DateFormatImpl>;
+using FunctionFromUnixTime = FunctionDateTimeStringToString<FromUnixTimeImpl>;
+
+void register_function_date_time_string_to_string(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionDateFormat>();
+    factory.register_function<FunctionFromUnixTime>();
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/function_datetime_string_to_string.h b/be/src/vec/functions/function_datetime_string_to_string.h
new file mode 100644
index 0000000000..b63bfcde5d
--- /dev/null
+++ b/be/src/vec/functions/function_datetime_string_to_string.h
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/columns_number.h"
+#include "vec/data_types/data_type_date.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/date_time_transforms.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+template <typename Transform>
+class FunctionDateTimeStringToString : public IFunction {
+public:
+    static constexpr auto name = Transform::name;
+    static FunctionPtr create() { return std::make_shared<FunctionDateTimeStringToString>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+    bool is_variadic() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        return make_nullable(std::make_shared<DataTypeString>());
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_constants() const override { return true; }
+    ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const ColumnPtr source_col = block.get_by_position(arguments[0]).column;
+
+        const auto* nullable_column = check_and_get_column<ColumnNullable>(source_col.get());
+        const auto* sources = check_and_get_column<ColumnVector<typename Transform::FromType>>(
+                nullable_column ? nullable_column->get_nested_column_ptr().get()
+                                : source_col.get());
+
+        if (sources) {
+            auto col_res = ColumnString::create();
+            ColumnUInt8::MutablePtr col_null_map_to;
+            col_null_map_to = ColumnUInt8::create();
+            auto& vec_null_map_to = col_null_map_to->get_data();
+
+            if (arguments.size() == 2) {
+                const IColumn& source_col1 = *block.get_by_position(arguments[1]).column;
+                if (const auto* delta_const_column =
+                            typeid_cast<const ColumnConst*>(&source_col1)) {
+                    TransformerToStringTwoArgument<Transform>::vector_constant(
+                            sources->get_data(), delta_const_column->get_field().get<String>(),
+                            col_res->get_chars(), col_res->get_offsets(), vec_null_map_to);
+                } else {
+                    return Status::InternalError(
+                            "Illegal column " +
+                            block.get_by_position(arguments[1]).column->get_name() +
+                            " is not const" + name);
+                }
+            } else {
+                TransformerToStringTwoArgument<Transform>::vector_constant(
+                        sources->get_data(), "%Y-%m-%d %H:%i:%s", col_res->get_chars(),
+                        col_res->get_offsets(), vec_null_map_to);
+            }
+
+            if (nullable_column) {
+                const auto& origin_null_map = nullable_column->get_null_map_column().get_data();
+                for (int i = 0; i < origin_null_map.size(); ++i) {
+                    vec_null_map_to[i] |= origin_null_map[i];
+                }
+            }
+            block.get_by_position(result).column =
+                    ColumnNullable::create(std::move(col_res), std::move(col_null_map_to));
+        } else {
+            return Status::InternalError("Illegal column " +
+                                         block.get_by_position(arguments[0]).column->get_name() +
+                                         " of first argument of function " + name);
+        }
+        return Status::OK();
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_grouping.cpp b/be/src/vec/functions/function_grouping.cpp
new file mode 100644
index 0000000000..07872ee5ca
--- /dev/null
+++ b/be/src/vec/functions/function_grouping.cpp
@@ -0,0 +1,25 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/function_grouping.h"
+
+namespace doris::vectorized {
+void register_function_grouping(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionGrouping>();
+    factory.register_function<FunctionGroupingId>();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_grouping.h b/be/src/vec/functions/function_grouping.h
new file mode 100644
index 0000000000..6bdaa7e848
--- /dev/null
+++ b/be/src/vec/functions/function_grouping.h
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef DORIS_FUNCTION_GROUPING_H
+#define DORIS_FUNCTION_GROUPING_H
+
+#include "vec/functions/simple_function_factory.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/utils/util.hpp"
+#include "vec/data_types/get_least_supertype.h"
+
+namespace doris::vectorized {
+
+class FunctionGroupingBase : public IFunction {
+public:
+    size_t get_number_of_arguments() const override { return 1; }
+
+    bool use_default_implementation_for_constants() const override { return false; }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        return std::make_shared<DataTypeInt64>();
+    }
+};
+
+class FunctionGrouping : public FunctionGroupingBase {
+public:
+    static constexpr auto name = "grouping";
+
+    static FunctionPtr create() { return std::make_shared<FunctionGrouping>(); }
+
+    String get_name() const override { return name; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const ColumnWithTypeAndName& src_column = block.get_by_position(arguments[0]);
+        const ColumnWithTypeAndName& rel_column = block.get_by_position(result);
+        if (!src_column.column)
+            return Status::InternalError("Illegal column " + src_column.column->get_name() + " of first argument of function " + name);
+
+        DCHECK(src_column.type->is_nullable() == true);
+        MutableColumnPtr res_column = rel_column.type->create_column();
+        auto* src_nullable_column = reinterpret_cast<ColumnNullable *>(const_cast<IColumn *>(src_column.column.get()));
+        res_column->insert_range_from(*src_nullable_column->get_nested_column_ptr().get(), 0, src_column.column->size());
+        block.get_by_position(result).column = std::move(res_column);
+        return Status::OK();
+    }
+};
+
+class FunctionGroupingId : public FunctionGroupingBase {
+public:
+    static constexpr auto name = "grouping_id";
+
+    static FunctionPtr create() { return std::make_shared<FunctionGroupingId>(); }
+
+    String get_name() const override { return name; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const ColumnWithTypeAndName& src_column = block.get_by_position(arguments[0]);
+        const ColumnWithTypeAndName& rel_column = block.get_by_position(result);
+        if (!src_column.column)
+            return Status::InternalError("Illegal column " + src_column.column->get_name() + " of first argument of function " + name);
+
+        DCHECK(src_column.type->is_nullable() == true);
+        MutableColumnPtr res_column = rel_column.type->create_column();
+        auto* src_nullable_column = reinterpret_cast<ColumnNullable *>(const_cast<IColumn *>(src_column.column.get()));
+        res_column->insert_range_from(*src_nullable_column->get_nested_column_ptr().get(), 0, src_column.column->size());
+        block.get_by_position(result).column = std::move(res_column);
+        return Status::OK();
+    }
+};
+}
+#endif //DORIS_FUNCTION_GROUPING_H
diff --git a/be/src/vec/functions/function_hash.cpp b/be/src/vec/functions/function_hash.cpp
new file mode 100644
index 0000000000..18c7bccf81
--- /dev/null
+++ b/be/src/vec/functions/function_hash.cpp
@@ -0,0 +1,254 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.cpp
+// and modified by Doris
+
+#include "vec/functions/function_hash.h"
+
+#include "util/hash_util.hpp"
+#include "vec/functions/function_variadic_arguments.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+struct MurmurHash2Impl64 {
+    static constexpr auto name = "murmurHash2_64";
+    using ReturnType = UInt64;
+
+    static Status empty_apply(IColumn& icolumn,
+                              size_t input_rows_count) {
+        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
+        vec_to.get_data().assign(input_rows_count, static_cast<ReturnType>(0xe28dbde7fe22e41c));
+        return Status::OK();
+    }
+
+    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
+                              IColumn& icolumn) {
+        execute_any<true>(type, column, icolumn, input_rows_count);
+        return Status::OK();
+    }
+
+    static Status combine_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
+                                IColumn& icolumn) {
+        execute_any<false>(type, column, icolumn, input_rows_count);
+        return Status::OK();
+    }
+
+    template <typename FromType, bool first>
+    static Status execute_int_type(const IColumn* column, IColumn& col_to,
+                                   size_t input_rows_count) {
+        if (const ColumnVector<FromType>* col_from =
+                    check_and_get_column<ColumnVector<FromType>>(column)) {
+            const typename ColumnVector<FromType>::Container& vec_from = col_from->get_data();
+            size_t size = vec_from.size();
+            for (size_t i = 0; i < size; ++i) {
+                ReturnType val = HashUtil::murmur_hash2_64(
+                        reinterpret_cast<const char*>(reinterpret_cast<const char*>(&vec_from[i])),
+                                                      sizeof(vec_from[i]), 0);
+                if (first)
+                    col_to.insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
+                else
+                    assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i] =
+                            IntHash64Impl::apply(
+                                    assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i]) ^
+                            val;
+            }
+        } else if (auto col_from_const =
+                           check_and_get_column_const<ColumnVector<FromType>>(column)) {
+            auto value = col_from_const->template get_value<FromType>();
+            ReturnType val;
+            val = IntHash64Impl::apply(ext::bit_cast<ReturnType>(value));
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                if (first) {
+                    col_to.insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
+                } else {
+                    assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i] =
+                            IntHash64Impl::apply(
+                                    assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i]) ^
+                            val;
+                }
+            }
+        } else {
+            DCHECK(false);
+            return Status::NotSupported(fmt::format("Illegal column {} of argument of function {}",
+                                                    column->get_name(), name));
+        }
+        return Status::OK();
+    }
+
+    template <bool first>
+    static Status execute_string(const IColumn* column, IColumn& col_to, size_t input_rows_count) {
+        if (const ColumnString* col_from = check_and_get_column<ColumnString>(column)) {
+            const typename ColumnString::Chars& data = col_from->get_chars();
+            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
+            size_t size = offsets.size();
+
+            ColumnString::Offset current_offset = 0;
+            for (size_t i = 0; i < size; ++i) {
+                const ReturnType val = HashUtil::murmur_hash2_64(
+                        reinterpret_cast<const char*>(&data[current_offset]),
+                        offsets[i] - current_offset - 1, 0);
+
+                if (first)
+                    col_to.insert_data(reinterpret_cast<const char*>(&val), 0);
+                else
+                    assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i] =
+                            IntHash64Impl::apply(
+                                    assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i]) ^
+                            val;
+
+                current_offset = offsets[i];
+            }
+        } else if (const ColumnConst* col_from_const =
+                           check_and_get_column_const_string_or_fixedstring(column)) {
+            String value = col_from_const->get_value<String>().data();
+            const ReturnType val = HashUtil::murmur_hash2_64(value.data(), value.size(), 0);
+
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                if (first) {
+                    col_to.insert_data(reinterpret_cast<const char*>(&val), 0);
+                } else {
+                    assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i] =
+                            IntHash64Impl::apply(
+                                    assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i]) ^
+                            val;
+                }
+            }
+        } else {
+            DCHECK(false);
+            return Status::NotSupported(fmt::format("Illegal column {} of argument of function {}",
+                                                    column->get_name(), name));
+        }
+        return Status::OK();
+    }
+
+    template <bool first>
+    static Status execute_any(const IDataType* from_type, const IColumn* icolumn,
+                              IColumn& col_to, size_t input_rows_count) {
+        WhichDataType which(from_type);
+
+        if (which.is_uint8())
+            execute_int_type<UInt8, first>(icolumn, col_to, input_rows_count);
+        else if (which.is_int16())
+            execute_int_type<UInt16, first>(icolumn, col_to, input_rows_count);
+        else if (which.is_uint32())
+            execute_int_type<UInt32, first>(icolumn, col_to, input_rows_count);
+        else if (which.is_uint64())
+            execute_int_type<UInt64, first>(icolumn, col_to, input_rows_count);
+        else if (which.is_int8())
+            execute_int_type<Int8, first>(icolumn, col_to, input_rows_count);
+        else if (which.is_int16())
+            execute_int_type<Int16, first>(icolumn, col_to, input_rows_count);
+        else if (which.is_int32())
+            execute_int_type<Int32, first>(icolumn, col_to, input_rows_count);
+        else if (which.is_int64())
+            execute_int_type<Int64, first>(icolumn, col_to, input_rows_count);
+        else if (which.is_float32())
+            execute_int_type<Float32, first>(icolumn, col_to, input_rows_count);
+        else if (which.is_float64())
+            execute_int_type<Float64, first>(icolumn, col_to, input_rows_count);
+        else if (which.is_string())
+            execute_string<first>(icolumn, col_to, input_rows_count);
+        else {
+            DCHECK(false);
+            return Status::NotSupported(fmt::format("Illegal column {} of argument of function {}",
+                                                    icolumn->get_name(), name));
+        }
+        return Status::OK();
+    }
+};
+using FunctionMurmurHash2_64 = FunctionVariadicArgumentsBase<DataTypeUInt64, MurmurHash2Impl64>;
+
+struct MurmurHash3Impl32 {
+    static constexpr auto name = "murmur_hash3_32";
+    using ReturnType = Int32;
+
+    static Status empty_apply(IColumn& icolumn,
+                              size_t input_rows_count) {
+        ColumnVector<ReturnType>& vec_to = assert_cast<ColumnVector<ReturnType>&>(icolumn);
+        vec_to.get_data().assign(input_rows_count, static_cast<ReturnType>(0xe28dbde7fe22e41c));
+        return Status::OK();
+    }
+
+    static Status first_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
+                              IColumn& icolumn) {
+        return execute<true>(type, column, input_rows_count, icolumn);
+    }
+
+    static Status combine_apply(const IDataType* type, const IColumn* column, size_t input_rows_count,
+                                IColumn& icolumn) {
+        return execute<false>(type, column, input_rows_count, icolumn);
+    }
+
+    template <bool first>
+    static Status execute(const IDataType* type, const IColumn* column, size_t input_rows_count,
+                          IColumn& col_to) {
+        if (const ColumnString* col_from = check_and_get_column<ColumnString>(column)) {
+            const typename ColumnString::Chars& data = col_from->get_chars();
+            const typename ColumnString::Offsets& offsets = col_from->get_offsets();
+            size_t size = offsets.size();
+
+            ColumnString::Offset current_offset = 0;
+            for (size_t i = 0; i < size; ++i) {
+                if (first) {
+                    UInt32 val = HashUtil::murmur_hash3_32(
+                            reinterpret_cast<const char*>(&data[current_offset]),
+                            offsets[i] - current_offset - 1,
+                            HashUtil::MURMUR3_32_SEED);
+                    col_to.insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
+                } else {
+                    assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i] =
+                            HashUtil::murmur_hash3_32(
+                            reinterpret_cast<const char*>(&data[current_offset]),
+                            offsets[i] - current_offset - 1,
+                            ext::bit_cast<UInt32>(col_to[i]));
+                }
+                current_offset = offsets[i];
+            }
+        } else if (const ColumnConst* col_from_const =
+                           check_and_get_column_const_string_or_fixedstring(column)) {
+            String value = col_from_const->get_value<String>().data();
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                if (first) {
+                    UInt32 val = HashUtil::murmur_hash3_32(
+                            value.data(),
+                            value.size(),
+                            HashUtil::MURMUR3_32_SEED);
+                    col_to.insert_data(const_cast<const char*>(reinterpret_cast<char*>(&val)), 0);
+                } else {
+                    assert_cast<ColumnVector<ReturnType>&>(col_to).get_data()[i] =
+                            HashUtil::murmur_hash3_32(
+                            value.data(),
+                            value.size(),
+                            ext::bit_cast<UInt32>(col_to[i]));
+                }
+            }
+        } else {
+            DCHECK(false);
+            return Status::NotSupported(fmt::format("Illegal column {} of argument of function {}",
+                                                    column->get_name(), name));
+        }
+        return Status::OK();
+    }
+};
+using FunctionMurmurHash3_32 = FunctionVariadicArgumentsBase<DataTypeInt32, MurmurHash3Impl32>;
+
+void register_function_function_hash(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionMurmurHash2_64>();
+    factory.register_function<FunctionMurmurHash3_32>();
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/function_hash.h b/be/src/vec/functions/function_hash.h
new file mode 100644
index 0000000000..a908c36489
--- /dev/null
+++ b/be/src/vec/functions/function_hash.h
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHash.h
+// and modified by Doris
+
+#pragma once
+#include <utility>
+
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/bit_cast.h"
+#include "vec/common/hash_table/hash.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+
+namespace doris::vectorized {
+
+struct IntHash64Impl {
+    using ReturnType = UInt64;
+
+    static UInt64 apply(UInt64 x) { return int_hash64(x ^ 0x4CF2D2BAAE6DA887ULL); }
+};
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_helpers.cpp b/be/src/vec/functions/function_helpers.cpp
new file mode 100644
index 0000000000..0e98d88ee0
--- /dev/null
+++ b/be/src/vec/functions/function_helpers.cpp
@@ -0,0 +1,99 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHelpers.cpp
+// and modified by Doris
+
+#include "vec/functions/function_helpers.h"
+
+#include "vec/columns/column_nullable.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+static Block create_block_with_nested_columns_impl(const Block& block,
+                                                   const std::unordered_set<size_t>& args) {
+    Block res;
+    size_t columns = block.columns();
+
+    for (size_t i = 0; i < columns; ++i) {
+        const auto& col = block.get_by_position(i);
+
+        if (args.count(i) && col.type->is_nullable()) {
+            const DataTypePtr& nested_type =
+                    static_cast<const DataTypeNullable&>(*col.type).get_nested_type();
+
+            if (!col.column) {
+                res.insert({nullptr, nested_type, col.name});
+            } else if (auto* nullable = check_and_get_column<ColumnNullable>(*col.column)) {
+                const auto& nested_col = nullable->get_nested_column_ptr();
+                res.insert({nested_col, nested_type, col.name});
+            } else if (auto* const_column = check_and_get_column<ColumnConst>(*col.column)) {
+                const auto& nested_col =
+                        check_and_get_column<ColumnNullable>(const_column->get_data_column())
+                                ->get_nested_column_ptr();
+                res.insert({ColumnConst::create(nested_col, col.column->size()), nested_type,
+                            col.name});
+            } else {
+                LOG(FATAL) << "Illegal column for DataTypeNullable";
+            }
+        } else
+            res.insert(col);
+    }
+
+    return res;
+}
+
+Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args) {
+    std::unordered_set<size_t> args_set(args.begin(), args.end());
+    return create_block_with_nested_columns_impl(block, args_set);
+}
+
+Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args,
+                                       size_t result) {
+    std::unordered_set<size_t> args_set(args.begin(), args.end());
+    args_set.insert(result);
+    return create_block_with_nested_columns_impl(block, args_set);
+}
+
+void validate_argument_type(const IFunction& func, const DataTypes& arguments,
+                            size_t argument_index, bool (*validator_func)(const IDataType&),
+                            const char* expected_type_description) {
+    if (arguments.size() <= argument_index) {
+        LOG(FATAL) << "Incorrect number of arguments of function " << func.get_name();
+    }
+
+    const auto& argument = arguments[argument_index];
+    if (validator_func(*argument) == false) {
+        LOG(FATAL) << fmt::format("Illegal type {} of {} argument of function {} expected {}",
+                                  argument->get_name(), argument_index, func.get_name(),
+                                  expected_type_description);
+    }
+}
+
+const ColumnConst* check_and_get_column_const_string_or_fixedstring(const IColumn* column) {
+    if (!is_column_const(*column)) return {};
+
+    const ColumnConst* res = assert_cast<const ColumnConst*>(column);
+
+    if (check_column<ColumnString>(&res->get_data_column())) return res;
+
+    return {};
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_helpers.h b/be/src/vec/functions/function_helpers.h
new file mode 100644
index 0000000000..ac6601b06e
--- /dev/null
+++ b/be/src/vec/functions/function_helpers.h
@@ -0,0 +1,104 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionHelpers.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_const.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/core/block.h"
+#include "vec/core/call_on_type_index.h"
+#include "vec/core/column_numbers.h"
+#include "vec/data_types/data_type.h"
+
+namespace doris::vectorized {
+
+class IFunction;
+
+/// Methods, that helps dispatching over real column types.
+
+template <typename... Type>
+bool check_data_type(const IDataType* data_type) {
+    return ((typeid_cast<const Type*>(data_type)) || ...);
+}
+
+template <typename Type>
+const Type* check_and_get_data_type(const IDataType* data_type) {
+    return typeid_cast<const Type*>(data_type);
+}
+
+template <typename Type>
+const ColumnConst* check_and_get_column_const(const IColumn* column) {
+    if (!column || !is_column_const(*column)) return {};
+
+    const ColumnConst* res = assert_cast<const ColumnConst*>(column);
+
+    if (!check_column<Type>(&res->get_data_column())) return {};
+
+    return res;
+}
+
+template <typename Type>
+const Type* check_and_get_column_constData(const IColumn* column) {
+    const ColumnConst* res = check_and_get_column_const<Type>(column);
+
+    if (!res) return {};
+
+    return static_cast<const Type*>(&res->get_data_column());
+}
+
+template <typename Type>
+bool check_column_const(const IColumn* column) {
+    return check_and_get_column_const<Type>(column);
+}
+
+/// Returns non-nullptr if column is ColumnConst with ColumnString or ColumnFixedString inside.
+const ColumnConst* check_and_get_column_const_string_or_fixedstring(const IColumn* column);
+
+/// Transform anything to Field.
+template <typename T>
+inline std::enable_if_t<!IsDecimalNumber<T>, Field> to_field(const T& x) {
+    return Field(NearestFieldType<T>(x));
+}
+
+template <typename T>
+inline std::enable_if_t<IsDecimalNumber<T>, Field> to_field(const T& x, UInt32 scale) {
+    return Field(NearestFieldType<T>(x, scale));
+}
+
+Columns convert_const_tuple_to_constant_elements(const ColumnConst& column);
+
+/// Returns the copy of a given block in which each column specified in
+/// the "arguments" parameter is replaced with its respective nested
+/// column if it is nullable.
+Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args);
+
+/// Similar function as above. Additionally transform the result type if needed.
+Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args,
+                                       size_t result);
+
+/// Checks argument type at specified index with predicate.
+/// throws if there is no argument at specified index or if predicate returns false.
+void validate_argument_type(const IFunction& func, const DataTypes& arguments,
+                            size_t argument_index, bool (*validator_func)(const IDataType&),
+                            const char* expected_type_description);
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_ifnull.cpp b/be/src/vec/functions/function_ifnull.cpp
new file mode 100644
index 0000000000..f3601cfc16
--- /dev/null
+++ b/be/src/vec/functions/function_ifnull.cpp
@@ -0,0 +1,27 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Ifnull.cpp
+// and modified by Doris
+
+#include "function_ifnull.h"
+
+namespace doris::vectorized {
+void register_function_function_ifnull(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionIfNull>();
+}
+}
\ No newline at end of file
diff --git a/be/src/vec/functions/function_ifnull.h b/be/src/vec/functions/function_ifnull.h
new file mode 100644
index 0000000000..26fe18cf0e
--- /dev/null
+++ b/be/src/vec/functions/function_ifnull.h
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Ifnull.h
+// and modified by Doris
+
+#ifndef DORIS_FUNCTION_IFNULL_H
+#define DORIS_FUNCTION_IFNULL_H
+
+#include "vec/functions/simple_function_factory.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/utils/util.hpp"
+#include "vec/functions/function_string.h"
+#include "vec/data_types/get_least_supertype.h"
+
+namespace doris::vectorized {
+class FunctionIfNull : public IFunction {
+public:
+    static constexpr auto name = "ifnull";
+
+    static FunctionPtr create() { return std::make_shared<FunctionIfNull>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    bool use_default_implementation_for_constants() const override { return false; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        if (!arguments[0]->is_nullable() && arguments[1]->is_nullable()) {
+            return reinterpret_cast<const DataTypeNullable*>(arguments[1].get())->get_nested_type();
+        }
+        return arguments[1];
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    // ifnull(col_left, col_right) == if(isnull(col_left), col_right, col_left)
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const ColumnWithTypeAndName& col_left = block.get_by_position(arguments[0]);
+        if (col_left.column->only_null()) {
+            block.get_by_position(result).column = block.get_by_position(arguments[1]).column;
+            return Status::OK();
+        }
+
+        ColumnWithTypeAndName null_column_arg0 {
+            nullptr, std::make_shared<DataTypeUInt8>(),""
+        };
+        ColumnWithTypeAndName nested_column_arg0 {
+            nullptr, col_left.type, ""
+        };
+
+        /// implement isnull(col_left) logic
+        if (auto* nullable = check_and_get_column<ColumnNullable>(*col_left.column)) {
+            null_column_arg0.column = nullable->get_null_map_column_ptr();
+            nested_column_arg0.column = nullable->get_nested_column_ptr();
+            nested_column_arg0.type = reinterpret_cast<const DataTypeNullable*>(
+                    nested_column_arg0.type.get())->get_nested_type();
+        } else {
+            block.get_by_position(result).column = col_left.column;
+            return Status::OK();
+        }
+        const ColumnsWithTypeAndName if_columns
+        {
+            null_column_arg0,
+            block.get_by_position(arguments[1]),
+            nested_column_arg0
+        };
+
+        Block temporary_block(
+                {
+                        null_column_arg0,
+                        block.get_by_position(arguments[1]),
+                        nested_column_arg0,
+                        block.get_by_position(result),
+                });
+
+        auto func_if = SimpleFunctionFactory::instance().get_function("if", if_columns, block.get_by_position(result).type);
+        func_if->execute(context, temporary_block, {0, 1, 2}, 3, input_rows_count);
+        block.get_by_position(result).column = temporary_block.get_by_position(3).column;
+        return Status::OK();
+    }
+};
+}
+#endif //DORIS_FUNCTION_IFNULL_H
diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp
new file mode 100644
index 0000000000..fdfab43f64
--- /dev/null
+++ b/be/src/vec/functions/function_json.cpp
@@ -0,0 +1,369 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <rapidjson/document.h>
+#include <rapidjson/stringbuffer.h>
+#include <rapidjson/writer.h>
+#include <boost/token_functions.hpp>
+#include <boost/tokenizer.hpp>
+
+#include "exprs/json_functions.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/function_string.h"
+#include "vec/functions/function_totype.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+static const re2::RE2 JSON_PATTERN("^([^\\\"\\[\\]]*)(?:\\[([0-9]+|\\*)\\])?");
+
+template <typename T, typename U>
+void char_split(std::vector<T>& res, const U& var, char p) {
+    int start = 0;
+    int pos = start;
+    int end = var.length();
+    while (pos < end) {
+        while (var[pos] != p && pos < end) pos++;
+        res.emplace_back(&var[start], pos - start);
+        pos++;
+        start = pos;
+    }
+}
+
+// T = std::vector<std::string>
+// TODO: update RE2 to support std::vector<std::string_view>
+template <typename T>
+void get_parsed_paths(const T& path_exprs, std::vector<JsonPath>* parsed_paths) {
+    if (path_exprs.empty()) {
+        return;
+    }
+
+    if (path_exprs[0] != "$") {
+        parsed_paths->emplace_back("", -1, false);
+    } else {
+        parsed_paths->emplace_back("$", -1, true);
+    }
+
+    for (int i = 1; i < path_exprs.size(); i++) {
+        std::string col;
+        std::string index;
+        if (UNLIKELY(!RE2::FullMatch(path_exprs[i], JSON_PATTERN, &col, &index))) {
+            parsed_paths->emplace_back("", -1, false);
+        } else {
+            int idx = -1;
+            if (!index.empty()) {
+                if (index == "*") {
+                    idx = -2;
+                } else {
+                    idx = atoi(index.c_str());
+                }
+            }
+            parsed_paths->emplace_back(col, idx, true);
+        }
+    }
+}
+
+rapidjson::Value* match_value(const std::vector<JsonPath>& parsed_paths, rapidjson::Value* document,
+                              rapidjson::Document::AllocatorType& mem_allocator,
+                              bool is_insert_null = false) {
+    rapidjson::Value* root = document;
+    rapidjson::Value* array_obj = nullptr;
+    for (int i = 1; i < parsed_paths.size(); i++) {
+        if (root == nullptr || root->IsNull()) {
+            return nullptr;
+        }
+
+        if (UNLIKELY(!parsed_paths[i].is_valid)) {
+            return nullptr;
+        }
+
+        const std::string& col = parsed_paths[i].key;
+        int index = parsed_paths[i].idx;
+        if (LIKELY(!col.empty())) {
+            if (root->IsArray()) {
+                array_obj = static_cast<rapidjson::Value*>(
+                        mem_allocator.Malloc(sizeof(rapidjson::Value)));
+                array_obj->SetArray();
+                bool is_null = true;
+
+                // if array ,loop the array,find out all Objects,then find the results from the objects
+                for (int j = 0; j < root->Size(); j++) {
+                    rapidjson::Value* json_elem = &((*root)[j]);
+
+                    if (json_elem->IsArray() || json_elem->IsNull()) {
+                        continue;
+                    } else {
+                        if (!json_elem->IsObject()) {
+                            continue;
+                        }
+                        if (!json_elem->HasMember(col.c_str())) {
+                            if (is_insert_null) { // not found item, then insert a null object.
+                                is_null = false;
+                                rapidjson::Value nullObject(rapidjson::kNullType);
+                                array_obj->PushBack(nullObject, mem_allocator);
+                            }
+                            continue;
+                        }
+                        rapidjson::Value* obj = &((*json_elem)[col.c_str()]);
+                        if (obj->IsArray()) {
+                            is_null = false;
+                            for (int k = 0; k < obj->Size(); k++) {
+                                array_obj->PushBack((*obj)[k], mem_allocator);
+                            }
+                        } else if (!obj->IsNull()) {
+                            is_null = false;
+                            array_obj->PushBack(*obj, mem_allocator);
+                        }
+                    }
+                }
+
+                root = is_null ? &(array_obj->SetNull()) : array_obj;
+            } else if (root->IsObject()) {
+                if (!root->HasMember(col.c_str())) {
+                    return nullptr;
+                } else {
+                    root = &((*root)[col.c_str()]);
+                }
+            } else {
+                // root is not a nested type, return NULL
+                return nullptr;
+            }
+        }
+
+        if (UNLIKELY(index != -1)) {
+            // judge the rapidjson:Value, which base the top's result,
+            // if not array return NULL;else get the index value from the array
+            if (root->IsArray()) {
+                if (root->IsNull()) {
+                    return nullptr;
+                } else if (index == -2) {
+                    // [*]
+                    array_obj = static_cast<rapidjson::Value*>(
+                            mem_allocator.Malloc(sizeof(rapidjson::Value)));
+                    array_obj->SetArray();
+
+                    for (int j = 0; j < root->Size(); j++) {
+                        rapidjson::Value v;
+                        v.CopyFrom((*root)[j], mem_allocator);
+                        array_obj->PushBack(v, mem_allocator);
+                    }
+                    root = array_obj;
+                } else if (index >= root->Size()) {
+                    return nullptr;
+                } else {
+                    root = &((*root)[index]);
+                }
+            } else {
+                return nullptr;
+            }
+        }
+    }
+    return root;
+}
+
+template <JsonFunctionType fntype>
+rapidjson::Value* get_json_object(const std::string_view& json_string,
+                                  const std::string_view& path_string,
+                                  rapidjson::Document* document) {
+    std::vector<JsonPath>* parsed_paths;
+    std::vector<JsonPath> tmp_parsed_paths;
+
+    boost::tokenizer<boost::escaped_list_separator<char>> tok(
+            path_string, boost::escaped_list_separator<char>("\\", ".", "\""));
+    std::vector<std::string> paths(tok.begin(), tok.end());
+    get_parsed_paths(paths, &tmp_parsed_paths);
+    parsed_paths = &tmp_parsed_paths;
+
+    if (!(*parsed_paths)[0].is_valid) {
+        return document;
+    }
+
+    if (UNLIKELY((*parsed_paths).size() == 1)) {
+        if (fntype == JSON_FUN_STRING) {
+            document->SetString(json_string.data(), document->GetAllocator());
+        } else {
+            return document;
+        }
+    }
+
+    document->Parse(json_string.data());
+    if (UNLIKELY(document->HasParseError())) {
+        // VLOG_CRITICAL << "Error at offset " << document->GetErrorOffset() << ": "
+        //         << GetParseError_En(document->GetParseError());
+        document->SetNull();
+        return document;
+    }
+
+    return match_value(*parsed_paths, document, document->GetAllocator());
+}
+
+template <typename NumberType>
+struct GetJsonNumberType {
+    using ReturnType = typename NumberType::ReturnType;
+    using ColumnType = typename NumberType::ColumnType;
+    using Container = typename ColumnType::Container;
+    static void vector_vector(const ColumnString::Chars& ldata,
+                              const ColumnString::Offsets& loffsets,
+                              const ColumnString::Chars& rdata,
+                              const ColumnString::Offsets& roffsets, Container& res,
+                              NullMap& null_map) {
+        size_t size = loffsets.size();
+        res.resize(size);
+        for (size_t i = 0; i < size; ++i) {
+            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
+            int l_str_size = loffsets[i] - loffsets[i - 1] - 1;
+
+            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
+            int r_str_size = roffsets[i] - roffsets[i - 1] - 1;
+
+            if (null_map[i]) {
+                res[i] = 0;
+                continue;
+            }
+
+            std::string_view json_string(l_raw_str, l_str_size);
+            std::string_view path_string(r_raw_str, r_str_size);
+
+            rapidjson::Document document;
+            rapidjson::Value* root = nullptr;
+
+            if constexpr (std::is_same_v<double, typename NumberType::T>) {
+                root = get_json_object<JSON_FUN_DOUBLE>(json_string, path_string, &document);
+                handle_result<double>(root, res[i], null_map[i]);
+            } else if constexpr (std::is_same_v<int32_t, typename NumberType::T>) {
+                root = get_json_object<JSON_FUN_DOUBLE>(json_string, path_string, &document);
+                handle_result<int32_t>(root, res[i], null_map[i]);
+            }
+        }
+    }
+
+    template <typename T, std::enable_if_t<std::is_same_v<double, T>, T>* = nullptr>
+    static void handle_result(rapidjson::Value* root, T& res, uint8_t& res_null) {
+        if (root == nullptr || root->IsNull()) {
+            res = 0;
+            res_null = 1;
+        } else if (root->IsInt()) {
+            res = root->GetInt();
+        } else if (root->IsDouble()) {
+            res = root->GetDouble();
+        } else {
+            res_null = 1;
+        }
+    }
+
+    template <typename T, std::enable_if_t<std::is_same_v<int32_t, T>, T>* = nullptr>
+    static void handle_result(rapidjson::Value* root, int32_t& res, uint8_t& res_null) {
+        if (root != nullptr && root->IsInt()) {
+            res = root->GetInt();
+        } else {
+            res_null = 1;
+        }
+    }
+};
+
+// Helper Class
+struct JsonNumberTypeDouble {
+    using T = Float64;
+    using ReturnType = DataTypeFloat64;
+    using ColumnType = ColumnVector<T>;
+};
+
+struct JsonNumberTypeInt {
+    using T = int32_t;
+    using ReturnType = DataTypeInt32;
+    using ColumnType = ColumnVector<T>;
+};
+
+struct GetJsonDouble : public GetJsonNumberType<JsonNumberTypeDouble> {
+    static constexpr auto name = "get_json_double";
+    using ReturnType = typename JsonNumberTypeDouble::ReturnType;
+    using ColumnType = typename JsonNumberTypeDouble::ColumnType;
+};
+
+struct GetJsonInt : public GetJsonNumberType<JsonNumberTypeInt> {
+    static constexpr auto name = "get_json_int";
+    using ReturnType = typename JsonNumberTypeInt::ReturnType;
+    using ColumnType = typename JsonNumberTypeInt::ColumnType;
+};
+
+struct GetJsonString {
+    static constexpr auto name = "get_json_string";
+    using ReturnType = DataTypeString;
+    using ColumnType = ColumnString;
+    using Chars = ColumnString::Chars;
+    using Offsets = ColumnString::Offsets;
+    static void vector_vector(const Chars& ldata, const Offsets& loffsets, const Chars& rdata,
+                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
+                              NullMap& null_map) {
+        size_t input_rows_count = loffsets.size();
+        res_offsets.resize(input_rows_count);
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            int l_size = loffsets[i] - loffsets[i - 1] - 1;
+            const auto l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
+
+            int r_size = roffsets[i] - roffsets[i - 1] - 1;
+            const auto r_raw = reinterpret_cast<const char*>(&rdata[loffsets[i - 1]]);
+
+            if (null_map[i]) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map);
+                continue;
+            }
+
+            std::string_view json_string(l_raw, l_size);
+            std::string_view path_string(r_raw, r_size);
+
+            rapidjson::Document document;
+            rapidjson::Value* root = nullptr;
+
+            root = get_json_object<JSON_FUN_STRING>(json_string, path_string, &document);
+            const int max_string_len = 65535;
+
+            if (root == nullptr || root->IsNull()) {
+                null_map[i] = 1;
+                StringOP::push_null_string(i, res_data, res_offsets, null_map);
+            } else if (root->IsString()) {
+                const auto ptr = root->GetString();
+                size_t len = strnlen(ptr, max_string_len);
+                StringOP::push_value_string(std::string_view(ptr, len), i, res_data, res_offsets);
+            } else {
+                rapidjson::StringBuffer buf;
+                rapidjson::Writer<rapidjson::StringBuffer> writer(buf);
+                root->Accept(writer);
+
+                const auto ptr = buf.GetString();
+                size_t len = strnlen(ptr, max_string_len);
+                StringOP::push_value_string(std::string_view(ptr, len), i, res_data, res_offsets);
+            }
+        }
+    }
+};
+
+using FunctionGetJsonDouble = FunctionBinaryStringOperateToNullType<GetJsonDouble>;
+using FunctionGetJsonInt = FunctionBinaryStringOperateToNullType<GetJsonInt>;
+using FunctionGetJsonString = FunctionBinaryStringOperateToNullType<GetJsonString>;
+
+void register_function_json(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionGetJsonInt>();
+    factory.register_function<FunctionGetJsonDouble>();
+    factory.register_function<FunctionGetJsonString>();
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/function_math_binary_float64.h b/be/src/vec/functions/function_math_binary_float64.h
new file mode 100644
index 0000000000..8f41931dc4
--- /dev/null
+++ b/be/src/vec/functions/function_math_binary_float64.h
@@ -0,0 +1,248 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionMathBinaryFloat64.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_decimal.h"
+#include "vec/columns/columns_number.h"
+#include "vec/core/call_on_type_index.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+
+namespace doris::vectorized {
+
+template <typename Impl>
+class FunctionMathBinaryFloat64 : public IFunction {
+public:
+    static constexpr auto name = Impl::name;
+    static constexpr bool has_variadic_argument =
+            !std::is_void_v<decltype(has_variadic_argument_types(std::declval<Impl>()))>;
+    static FunctionPtr create() { return std::make_shared<FunctionMathBinaryFloat64>(); }
+    static_assert(Impl::rows_per_iteration > 0, "Impl must process at least one row per iteration");
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+private:
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        const auto check_argument_type = [this](const IDataType* arg) -> bool {
+            if (!is_native_number(arg)) {
+                LOG(ERROR) << "Illegal type " << arg->get_name() << " of argument of function "
+                           << get_name();
+                return false;
+            }
+            return true;
+        };
+
+        if (check_argument_type(arguments.front().get()) &&
+            check_argument_type(arguments.back().get())) {
+            return std::make_shared<DataTypeFloat64>();
+        } else {
+            return nullptr;
+        }
+    }
+
+    DataTypes get_variadic_argument_types_impl() const override {
+        if constexpr (has_variadic_argument) return Impl::get_variadic_argument_types();
+        return {};
+    }
+
+    template <typename LeftType, typename RightType>
+    bool execute_typed(Block& block, const size_t result, const ColumnConst* left_arg,
+                       const IColumn* right_arg) {
+        if (const auto right_arg_typed = check_and_get_column<ColumnVector<RightType>>(right_arg)) {
+            auto dst = ColumnVector<Float64>::create();
+
+            LeftType left_src_data[Impl::rows_per_iteration];
+            std::fill(std::begin(left_src_data), std::end(left_src_data),
+                      left_arg->template get_value<LeftType>());
+            const auto& right_src_data = right_arg_typed->get_data();
+            const auto src_size = right_src_data.size();
+            auto& dst_data = dst->get_data();
+            dst_data.resize(src_size);
+
+            const auto rows_remaining = src_size % Impl::rows_per_iteration;
+            const auto rows_size = src_size - rows_remaining;
+
+            for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
+                Impl::execute(left_src_data, &right_src_data[i], &dst_data[i]);
+
+            if (rows_remaining != 0) {
+                RightType right_src_remaining[Impl::rows_per_iteration];
+                memcpy(right_src_remaining, &right_src_data[rows_size],
+                       rows_remaining * sizeof(RightType));
+                memset(right_src_remaining + rows_remaining, 0,
+                       (Impl::rows_per_iteration - rows_remaining) * sizeof(RightType));
+                Float64 dst_remaining[Impl::rows_per_iteration];
+
+                Impl::execute(left_src_data, right_src_remaining, dst_remaining);
+
+                memcpy(&dst_data[rows_size], dst_remaining, rows_remaining * sizeof(Float64));
+            }
+
+            block.replace_by_position(result, std::move(dst));
+            return true;
+        }
+
+        return false;
+    }
+
+    template <typename LeftType, typename RightType>
+    bool execute_typed(Block& block, const size_t result, const ColumnVector<LeftType>* left_arg,
+                       const IColumn* right_arg) {
+        if (const auto right_arg_typed = check_and_get_column<ColumnVector<RightType>>(right_arg)) {
+            auto dst = ColumnVector<Float64>::create();
+
+            const auto& left_src_data = left_arg->get_data();
+            const auto& right_src_data = right_arg_typed->get_data();
+            const auto src_size = left_src_data.size();
+            auto& dst_data = dst->get_data();
+            dst_data.resize(src_size);
+
+            const auto rows_remaining = src_size % Impl::rows_per_iteration;
+            const auto rows_size = src_size - rows_remaining;
+
+            for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
+                Impl::execute(&left_src_data[i], &right_src_data[i], &dst_data[i]);
+
+            if (rows_remaining != 0) {
+                LeftType left_src_remaining[Impl::rows_per_iteration];
+                memcpy(left_src_remaining, &left_src_data[rows_size],
+                       rows_remaining * sizeof(LeftType));
+                memset(left_src_remaining + rows_remaining, 0,
+                       (Impl::rows_per_iteration - rows_remaining) * sizeof(LeftType));
+                RightType right_src_remaining[Impl::rows_per_iteration];
+                memcpy(right_src_remaining, &right_src_data[rows_size],
+                       rows_remaining * sizeof(RightType));
+                memset(right_src_remaining + rows_remaining, 0,
+                       (Impl::rows_per_iteration - rows_remaining) * sizeof(RightType));
+                Float64 dst_remaining[Impl::rows_per_iteration];
+
+                Impl::execute(left_src_remaining, right_src_remaining, dst_remaining);
+
+                memcpy(&dst_data[rows_size], dst_remaining, rows_remaining * sizeof(Float64));
+            }
+
+            block.replace_by_position(result, std::move(dst));
+            return true;
+        }
+        if (const auto right_arg_typed =
+                    check_and_get_column_const<ColumnVector<RightType>>(right_arg)) {
+            auto dst = ColumnVector<Float64>::create();
+
+            const auto& left_src_data = left_arg->get_data();
+            RightType right_src_data[Impl::rows_per_iteration];
+            std::fill(std::begin(right_src_data), std::end(right_src_data),
+                      right_arg_typed->template get_value<RightType>());
+            const auto src_size = left_src_data.size();
+            auto& dst_data = dst->get_data();
+            dst_data.resize(src_size);
+
+            const auto rows_remaining = src_size % Impl::rows_per_iteration;
+            const auto rows_size = src_size - rows_remaining;
+
+            for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
+                Impl::execute(&left_src_data[i], right_src_data, &dst_data[i]);
+
+            if (rows_remaining != 0) {
+                LeftType left_src_remaining[Impl::rows_per_iteration];
+                memcpy(left_src_remaining, &left_src_data[rows_size],
+                       rows_remaining * sizeof(LeftType));
+                memset(left_src_remaining + rows_remaining, 0,
+                       (Impl::rows_per_iteration - rows_remaining) * sizeof(LeftType));
+                Float64 dst_remaining[Impl::rows_per_iteration];
+
+                Impl::execute(left_src_remaining, right_src_data, dst_remaining);
+
+                memcpy(&dst_data[rows_size], dst_remaining, rows_remaining * sizeof(Float64));
+            }
+
+            block.replace_by_position(result, std::move(dst));
+            return true;
+        }
+
+        return false;
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t /*input_rows_count*/) override {
+        const ColumnWithTypeAndName& col_left = block.get_by_position(arguments[0]);
+        const ColumnWithTypeAndName& col_right = block.get_by_position(arguments[1]);
+
+        auto call = [&](const auto& types) -> bool {
+            using Types = std::decay_t<decltype(types)>;
+            using LeftType = typename Types::LeftType;
+            using RightType = typename Types::RightType;
+            using ColVecLeft = ColumnVector<LeftType>;
+
+            const IColumn* left_arg = col_left.column.get();
+            const IColumn* right_arg = col_right.column.get();
+
+            if (const auto left_arg_typed = check_and_get_column<ColVecLeft>(left_arg)) {
+                if (execute_typed<LeftType, RightType>(block, result, left_arg_typed, right_arg)) {
+                    return true;
+                }
+                DCHECK(false) << "Illegal column " << right_arg->get_name()
+                              << " of second argument of function " << get_name();
+            }
+            if (const auto left_arg_typed = check_and_get_column_const<ColVecLeft>(left_arg)) {
+                if (execute_typed<LeftType, RightType>(block, result, left_arg_typed, right_arg)) {
+                    return true;
+                }
+
+                DCHECK(false) << "Illegal column " << right_arg->get_name()
+                              << " of second argument of function " << get_name();
+            }
+
+            return false;
+        };
+
+        TypeIndex left_index = col_left.type->get_type_id();
+        TypeIndex right_index = col_right.type->get_type_id();
+
+        if (!call_on_basic_types<true, true, false, false>(left_index, right_index, call)) {
+            return Status::InvalidArgument("Illegal column " + col_left.column->get_name() +
+                                           " of argument of function " + get_name());
+        }
+        return Status::OK();
+    }
+};
+
+template <typename Name, Float64(Function)(Float64, Float64)>
+struct BinaryFunctionPlain {
+    static constexpr auto name = Name::name;
+    static constexpr auto rows_per_iteration = 1;
+
+    template <typename T1, typename T2>
+    static void execute(const T1* src_left, const T2* src_right, Float64* dst) {
+        dst[0] = static_cast<Float64>(
+                Function(static_cast<Float64>(src_left[0]), static_cast<Float64>(src_right[0])));
+    }
+};
+
+#define BinaryFunctionVectorized BinaryFunctionPlain
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_math_unary.h b/be/src/vec/functions/function_math_unary.h
new file mode 100644
index 0000000000..f12936c395
--- /dev/null
+++ b/be/src/vec/functions/function_math_unary.h
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionMathUnary.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_decimal.h"
+#include "vec/columns/columns_number.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+
+namespace doris::vectorized {
+
+template <typename Impl>
+class FunctionMathUnary : public IFunction {
+public:
+    static constexpr auto name = Impl::name;
+    static constexpr bool has_variadic_argument =
+            !std::is_void_v<decltype(has_variadic_argument_types(std::declval<Impl>()))>;
+    static FunctionPtr create() { return std::make_shared<FunctionMathUnary>(); }
+
+private:
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 1; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        const auto& arg = arguments.front();
+        if (!is_number(arg)) {
+            return nullptr;
+        }
+        return std::make_shared<typename Impl::Type>();
+    }
+    DataTypes get_variadic_argument_types_impl() const override {
+        if constexpr (has_variadic_argument) return Impl::get_variadic_argument_types();
+        return {};
+    }
+    
+    template <typename T, typename ReturnType>
+    static void execute_in_iterations(const T* src_data, ReturnType* dst_data, size_t size) {
+        if constexpr (Impl::rows_per_iteration == 0) {
+            /// Process all data as a whole and use FastOps implementation
+
+            /// If the argument is integer, convert to Float64 beforehand
+            if constexpr (!std::is_floating_point_v<T>) {
+                PODArray<Float64> tmp_vec(size);
+                for (size_t i = 0; i < size; ++i) tmp_vec[i] = src_data[i];
+
+                Impl::execute(tmp_vec.data(), size, dst_data);
+            } else {
+                Impl::execute(src_data, size, dst_data);
+            }
+        } else {
+            const size_t rows_remaining = size % Impl::rows_per_iteration;
+            const size_t rows_size = size - rows_remaining;
+
+            for (size_t i = 0; i < rows_size; i += Impl::rows_per_iteration)
+                Impl::execute(&src_data[i], &dst_data[i]);
+
+            if (rows_remaining != 0) {
+                T src_remaining[Impl::rows_per_iteration];
+                memcpy(src_remaining, &src_data[rows_size], rows_remaining * sizeof(T));
+                memset(src_remaining + rows_remaining, 0,
+                       (Impl::rows_per_iteration - rows_remaining) * sizeof(T));
+                ReturnType dst_remaining[Impl::rows_per_iteration];
+
+                Impl::execute(src_remaining, dst_remaining);
+
+                memcpy(&dst_data[rows_size], dst_remaining, rows_remaining * sizeof(ReturnType));
+            }
+        }
+    }
+
+    template <typename T, typename ReturnType>
+    static bool execute(Block& block, const ColumnVector<T>* col, const size_t result) {
+        const auto& src_data = col->get_data();
+        const size_t size = src_data.size();
+
+        auto dst = ColumnVector<ReturnType>::create();
+        auto& dst_data = dst->get_data();
+        dst_data.resize(size);
+
+        execute_in_iterations(src_data.data(), dst_data.data(), size);
+
+        block.replace_by_position(result, std::move(dst));
+        return true;
+    }
+
+    template <typename T, typename ReturnType>
+    static bool execute(Block& block, const ColumnDecimal<T>* col, const size_t result) {
+        const auto& src_data = col->get_data();
+        const size_t size = src_data.size();
+        UInt32 scale = src_data.get_scale();
+
+        auto dst = ColumnVector<ReturnType>::create();
+        auto& dst_data = dst->get_data();
+        dst_data.resize(size);
+
+        for (size_t i = 0; i < size; ++i)
+            dst_data[i] = convert_from_decimal<DataTypeDecimal<T>, DataTypeNumber<ReturnType>>(
+                    src_data[i], scale);
+
+        execute_in_iterations(dst_data.data(), dst_data.data(), size);
+
+        block.replace_by_position(result, std::move(dst));
+        return true;
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const ColumnWithTypeAndName& col = block.get_by_position(arguments[0]);
+
+        auto call = [&](const auto& types) -> bool {
+            using Types = std::decay_t<decltype(types)>;
+            using Type = typename Types::RightType;
+            using ReturnType = std::conditional_t<
+                    Impl::always_returns_float64, Float64, Int64>;
+            using ColVecType = std::conditional_t<IsDecimalNumber<Type>, ColumnDecimal<Type>,
+                                                  ColumnVector<Type>>;
+
+            const auto col_vec = check_and_get_column<ColVecType>(col.column.get());
+            return execute<Type, ReturnType>(block, col_vec, result);
+        };
+
+        if (!call_on_basic_type<void, true, true, true, false>(col.type->get_type_id(), call)) {
+            return Status::InvalidArgument("Illegal column " + col.column->get_name() +
+                                           " of argument of function " + get_name());
+        }
+        return Status::OK();
+    }
+};
+
+template <typename Name, Float64(Function)(Float64), typename ReturnType = DataTypeFloat64>
+struct UnaryFunctionPlain {
+    using Type = ReturnType;
+    static constexpr auto name = Name::name;
+    static constexpr auto rows_per_iteration = 1;
+    static constexpr bool always_returns_float64 = std::is_same_v<Type, DataTypeFloat64>;
+
+    template <typename T, typename U>
+    static void execute(const T* src, U* dst) {
+        dst[0] = static_cast<Float64>(Function(static_cast<Float64>(src[0])));
+    }
+};
+
+#define UnaryFunctionVectorized UnaryFunctionPlain
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp
new file mode 100644
index 0000000000..bdeb7e57b6
--- /dev/null
+++ b/be/src/vec/functions/function_string.cpp
@@ -0,0 +1,809 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/function_string.h"
+
+#include <re2/re2.h>
+
+#include <cstddef>
+#include <cstdlib>
+#include <string_view>
+
+#include "exprs/v_string_functions.h"
+#include "runtime/string_search.hpp"
+#include "util/encryption_util.h"
+#include "util/url_coding.h"
+#include "vec/common/pod_array_fwd.h"
+#include "vec/functions/function_string_to_string.h"
+#include "vec/functions/function_totype.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+struct NameStringASCII {
+    static constexpr auto name = "ascii";
+};
+
+struct StringASCII {
+    using ReturnType = DataTypeInt32;
+    static constexpr auto TYPE_INDEX = TypeIndex::String;
+    using Type = String;
+    using ReturnColumnType = ColumnVector<Int32>;
+
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         PaddedPODArray<Int32>& res) {
+        auto size = offsets.size();
+        res.resize(size);
+        for (int i = 0; i < size; ++i) {
+            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+            // if strlen(raw_str) == 0, raw_str[0] is '\0'
+            res[i] = (strlen(raw_str) == 0) ? 0 : static_cast<uint8_t>(raw_str[0]);
+        }
+        return Status::OK();
+    }
+};
+
+struct NameStringLenght {
+    static constexpr auto name = "length";
+};
+
+struct StringLengthImpl {
+    using ReturnType = DataTypeInt32;
+    static constexpr auto TYPE_INDEX = TypeIndex::String;
+    using Type = String;
+    using ReturnColumnType = ColumnVector<Int32>;
+
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         PaddedPODArray<Int32>& res) {
+        auto size = offsets.size();
+        res.resize(size);
+        for (int i = 0; i < size; ++i) {
+            int str_size = offsets[i] - offsets[i - 1] - 1;
+            res[i] = str_size;
+        }
+        return Status::OK();
+    }
+};
+
+struct NameStringUtf8Length {
+    static constexpr auto name = "char_length";
+};
+
+struct StringUtf8LengthImpl {
+    using ReturnType = DataTypeInt32;
+    static constexpr auto TYPE_INDEX = TypeIndex::String;
+    using Type = String;
+    using ReturnColumnType = ColumnVector<Int32>;
+
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         PaddedPODArray<Int32>& res) {
+        auto size = offsets.size();
+        res.resize(size);
+        for (int i = 0; i < size; ++i) {
+            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+            int str_size = offsets[i] - offsets[i - 1] - 1;
+
+            size_t char_len = 0;
+            for (size_t i = 0, char_size = 0; i < str_size; i += char_size) {
+                char_size = get_utf8_byte_length((unsigned)(raw_str)[i]);
+                ++char_len;
+            }
+
+            res[i] = char_len;
+        }
+        return Status::OK();
+    }
+};
+
+struct NameStartsWith {
+    static constexpr auto name = "starts_with";
+};
+
+struct StartsWithOp {
+    using ResultDataType = DataTypeUInt8;
+    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
+
+    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
+        re2::StringPiece str_sp(const_cast<char*>(strl.data()), strl.length());
+        re2::StringPiece prefix_sp(const_cast<char*>(strr.data()), strr.length());
+        res = str_sp.starts_with(prefix_sp);
+    }
+};
+
+struct NameEndsWith {
+    static constexpr auto name = "ends_with";
+};
+
+struct EndsWithOp {
+    using ResultDataType = DataTypeUInt8;
+    using ResultPaddedPODArray = PaddedPODArray<UInt8>;
+
+    static void execute(const std::string_view& strl, const std::string_view& strr, uint8_t& res) {
+        re2::StringPiece str_sp(const_cast<char*>(strl.data()), strl.length());
+        re2::StringPiece prefix_sp(const_cast<char*>(strr.data()), strr.length());
+        res = str_sp.ends_with(prefix_sp);
+    }
+};
+
+struct NameFindInSet {
+    static constexpr auto name = "find_in_set";
+};
+
+struct FindInSetOp {
+    using ResultDataType = DataTypeInt32;
+    using ResultPaddedPODArray = PaddedPODArray<Int32>;
+    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
+        for (int i = 0; i < strl.length(); ++i) {
+            if (strl[i] == ',') {
+                res = 0;
+                return;
+            }
+        }
+
+        int32_t token_index = 1;
+        int32_t start = 0;
+        int32_t end;
+
+        do {
+            end = start;
+            // Position end.
+            while (strr[end] != ',' && end < strr.length()) {
+                ++end;
+            }
+
+            if (strl == std::string_view {strr.data() + start, (size_t)end - start}) {
+                res = token_index;
+                return;
+            }
+
+            // Re-position start and end past ','
+            start = end + 1;
+            ++token_index;
+        } while (start < strr.length());
+        res = 0;
+    }
+};
+
+struct NameInstr {
+    static constexpr auto name = "instr";
+};
+
+// the same impl as instr
+struct NameLocate {
+    static constexpr auto name = "locate";
+};
+
+struct InStrOP {
+    using ResultDataType = DataTypeInt32;
+    using ResultPaddedPODArray = PaddedPODArray<Int32>;
+    static void execute(const std::string_view& strl, const std::string_view& strr, int32_t& res) {
+        if (strr.length() == 0) {
+            res = 1;
+            return;
+        }
+
+        StringValue str_sv(const_cast<char*>(strl.data()), strl.length());
+        StringValue substr_sv(const_cast<char*>(strr.data()), strr.length());
+        StringSearch search(&substr_sv);
+        // Hive returns positions starting from 1.
+        int loc = search.search(&str_sv);
+        if (loc > 0) {
+            size_t char_len = 0;
+            for (size_t i = 0, char_size = 0; i < loc; i += char_size) {
+                char_size = get_utf8_byte_length((unsigned)(strl.data())[i]);
+                ++char_len;
+            }
+            loc = char_len;
+        }
+
+        res = loc + 1;
+    }
+};
+
+// LeftDataType and RightDataType are DataTypeString
+template <typename LeftDataType, typename RightDataType, typename OP>
+struct StringFunctionImpl {
+    using ResultDataType = typename OP::ResultDataType;
+    using ResultPaddedPODArray = typename OP::ResultPaddedPODArray;
+
+    static Status vector_vector(const ColumnString::Chars& ldata,
+                                const ColumnString::Offsets& loffsets,
+                                const ColumnString::Chars& rdata,
+                                const ColumnString::Offsets& roffsets, ResultPaddedPODArray& res) {
+        DCHECK_EQ(loffsets.size(), roffsets.size());
+
+        auto size = loffsets.size();
+        res.resize(size);
+        for (int i = 0; i < size; ++i) {
+            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
+            int l_str_size = loffsets[i] - loffsets[i - 1] - 1;
+
+            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
+            int r_str_size = roffsets[i] - roffsets[i - 1] - 1;
+
+            std::string_view lview(l_raw_str, l_str_size);
+            std::string_view rview(r_raw_str, r_str_size);
+
+            OP::execute(lview, rview, res[i]);
+        }
+
+        return Status::OK();
+    }
+};
+
+struct NameReverse {
+    static constexpr auto name = "reverse";
+};
+
+struct ReverseImpl {
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
+        auto rows_count = offsets.size();
+        res_offsets.resize(rows_count);
+        res_data.reserve(data.size());
+        for (int i = 0; i < rows_count; ++i) {
+            auto src_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+            int64_t src_len = offsets[i] - offsets[i - 1] - 1;
+            char dst[src_len];
+            VStringFunctions::reverse(StringVal((uint8_t*)src_str, src_len),
+                                      StringVal((uint8_t*)dst, src_len));
+            StringOP::push_value_string(std::string_view(dst, src_len), i, res_data, res_offsets);
+        }
+        return Status::OK();
+    }
+};
+
+struct HexStringName {
+    static constexpr auto name = "hex";
+};
+
+struct HexStringImpl {
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<DataTypeString>()};
+    }
+
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets) {
+        auto rows_count = offsets.size();
+        dst_offsets.resize(rows_count);
+        dst_data.resize(data.size() * 2);
+
+        size_t offset = 0;
+        auto dst_data_ptr = dst_data.data();
+        for (int i = 0; i < rows_count; ++i) {
+            auto source = reinterpret_cast<const unsigned char*>(&data[offsets[i - 1]]);
+            size_t srclen = offsets[i] - offsets[i - 1] - 1;
+
+            if (srclen == 0) {
+                DCHECK(*source == '\0');
+                *dst_data_ptr = '\0';
+                dst_data_ptr++;
+                offset++;
+            } else {
+                VStringFunctions::hex_encode(source, srclen, reinterpret_cast<char*>(dst_data_ptr));
+                dst_data_ptr[srclen * 2] = '\0';
+                dst_data_ptr += (srclen * 2 + 1);
+                offset += (srclen * 2 + 1);
+            }
+            dst_offsets[i] = offset;
+        }
+        return Status::OK();
+    }
+};
+
+struct NameToLower {
+    static constexpr auto name = "lower";
+};
+
+struct NameToUpper {
+    static constexpr auto name = "upper";
+};
+
+using char_transter_op = int (*)(int);
+template <char_transter_op op>
+struct TransferImpl {
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
+        size_t offset_size = offsets.size();
+        res_offsets.resize(offsets.size());
+        for (size_t i = 0; i < offset_size; ++i) {
+            res_offsets[i] = offsets[i];
+        }
+
+        size_t data_length = data.size();
+        res_data.resize(data_length);
+        for (size_t i = 0; i < data_length; ++i) {
+            res_data[i] = op(data[i]);
+        }
+        return Status::OK();
+    }
+};
+
+struct NameTrim {
+    static constexpr auto name = "trim";
+};
+
+struct NameLTrim {
+    static constexpr auto name = "ltrim";
+};
+
+struct NameRTrim {
+    static constexpr auto name = "rtrim";
+};
+
+template <bool is_ltrim, bool is_rtrim>
+struct TrimImpl {
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
+        size_t offset_size = offsets.size();
+        res_offsets.resize(offsets.size());
+
+        for (size_t i = 0; i < offset_size; ++i) {
+            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+            StringVal str(raw_str);
+            if constexpr (is_ltrim) {
+                str = VStringFunctions::ltrim(str);
+            }
+            if constexpr (is_rtrim) {
+                str = VStringFunctions::rtrim(str);
+            }
+            StringOP::push_value_string(std::string_view((char*)str.ptr, str.len), i, res_data,
+                                        res_offsets);
+        }
+        return Status::OK();
+    }
+};
+
+struct UnHexImpl {
+    static constexpr auto name = "unhex";
+    using ReturnType = DataTypeString;
+    using ColumnType = ColumnString;
+
+    static bool check_and_decode_one(char& c, const char src_c, bool flag) {
+        int k = flag ? 16 : 1;
+        int value = src_c - '0';
+        // 9 = ('9'-'0')
+        if (value >= 0 && value <= 9) {
+            c += value * k;
+            return true;
+        }
+
+        value = src_c - 'A';
+        // 5 = ('F'-'A')
+        if (value >= 0 && value <= 5) {
+            c += (value + 10) * k;
+            return true;
+        }
+
+        value = src_c - 'a';
+        // 5 = ('f'-'a')
+        if (value >= 0 && value <= 5) {
+            c += (value + 10) * k;
+            return true;
+        }
+        // not in ( ['0','9'], ['a','f'], ['A','F'] )
+        return false;
+    }
+
+    static int hex_decode(const char* src_str, size_t src_len, char* dst_str) {
+        // if str length is odd or 0, return empty string like mysql dose.
+        if ((src_len & 1) != 0 or src_len == 0) {
+            return 0;
+        }
+        //check and decode one character at the same time
+        // character in ( ['0','9'], ['a','f'], ['A','F'] ), return 'NULL' like mysql dose.
+        for (auto i = 0, dst_index = 0; i < src_len; i += 2, dst_index++) {
+            char c = 0;
+            // combine two character into dst_str one character
+            bool left_4bits_flag = check_and_decode_one(c, *(src_str + i), true);
+            bool right_4bits_flag = check_and_decode_one(c, *(src_str + i + 1), false);
+
+            if (!left_4bits_flag || !right_4bits_flag) {
+                return 0;
+            }
+            *(dst_str + dst_index) = c;
+        }
+        return src_len / 2;
+    }
+
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
+                         NullMap& null_map) {
+        auto rows_count = offsets.size();
+        dst_offsets.resize(rows_count);
+
+        for (int i = 0; i < rows_count; ++i) {
+            if (null_map[i]) {
+                StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
+                continue;
+            }
+
+            auto source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+            size_t srclen = offsets[i] - offsets[i - 1] - 1;
+
+            if (*source == '\0' && srclen == 0) {
+                StringOP::push_empty_string(i, dst_data, dst_offsets);
+                continue;
+            }
+
+            int cipher_len = srclen / 2;
+            char dst[cipher_len];
+            int outlen = hex_decode(source, srclen, dst);
+
+            if (outlen < 0) {
+                StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
+            } else {
+                StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data,
+                                            dst_offsets);
+            }
+        }
+
+        return Status::OK();
+    }
+};
+struct NameStringSpace {
+    static constexpr auto name = "space";
+};
+
+struct StringSpace {
+    using ReturnType = DataTypeString;
+    static constexpr auto TYPE_INDEX = TypeIndex::Int32;
+    using Type = Int32;
+    using ReturnColumnType = ColumnString;
+
+    static Status vector(const ColumnInt32::Container& data, ColumnString::Chars& res_data,
+                         ColumnString::Offsets& res_offsets) {
+        res_offsets.resize(data.size());
+        size_t input_size = res_offsets.size();
+        fmt::memory_buffer buffer;
+        for (size_t i = 0; i < input_size; ++i) {
+            buffer.clear();
+            if (data[i] > 0) {
+                for (size_t j = 0; j < data[i]; ++j) {
+                    buffer.push_back(' ');
+                }
+                StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
+                                            res_data, res_offsets);
+            } else {
+                StringOP::push_empty_string(i, res_data, res_offsets);
+            }
+        }
+        return Status::OK();
+    }
+};
+
+struct AesEncryptImpl {
+    static constexpr auto name = "aes_encrypt";
+    using Chars = ColumnString::Chars;
+    using Offsets = ColumnString::Offsets;
+    using ReturnType = DataTypeString;
+    using ColumnType = ColumnString;
+    static void vector_vector(const Chars& ldata, const Offsets& loffsets, const Chars& rdata,
+                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
+                              NullMap& null_map_data) {
+        DCHECK_EQ(loffsets.size(), roffsets.size());
+        size_t input_rows_count = loffsets.size();
+        res_offsets.resize(input_rows_count);
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            int l_size = loffsets[i] - loffsets[i - 1] - 1;
+            const auto l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
+
+            int r_size = roffsets[i] - roffsets[i - 1] - 1;
+            const auto r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
+
+            if (*l_raw == '\0' || l_size == 0) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
+                continue;
+            }
+
+            int cipher_len = l_size + 16;
+            char p[cipher_len];
+
+            int outlen = EncryptionUtil::encrypt(AES_128_ECB, (unsigned char*)l_raw, l_size,
+                                                 (unsigned char*)r_raw, r_size, NULL, true,
+                                                 (unsigned char*)p);
+            if (outlen < 0) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
+            } else {
+                StringOP::push_value_string(std::string_view(p, outlen), i, res_data, res_offsets);
+            }
+        }
+    }
+};
+
+struct AesDecryptImpl {
+    static constexpr auto name = "aes_decrypt";
+    using Chars = ColumnString::Chars;
+    using Offsets = ColumnString::Offsets;
+    using ReturnType = DataTypeString;
+    using ColumnType = ColumnString;
+    static void vector_vector(const Chars& ldata, const Offsets& loffsets, const Chars& rdata,
+                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
+                              NullMap& null_map_data) {
+        DCHECK_EQ(loffsets.size(), roffsets.size());
+        size_t input_rows_count = loffsets.size();
+        res_offsets.resize(input_rows_count);
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            int l_size = loffsets[i] - loffsets[i - 1] - 1;
+            const auto l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
+
+            int r_size = roffsets[i] - roffsets[i - 1] - 1;
+            const auto r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
+
+            if (*l_raw == '\0' || l_size == 0) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
+                continue;
+            }
+
+            int cipher_len = l_size;
+            char p[cipher_len];
+
+            int outlen = EncryptionUtil::decrypt(AES_128_ECB, (unsigned char*)l_raw, l_size,
+                                                 (unsigned char*)r_raw, r_size, NULL, true,
+                                                 (unsigned char*)p);
+            if (outlen < 0) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
+            } else {
+                StringOP::push_value_string(std::string_view(p, outlen), i, res_data, res_offsets);
+            }
+        }
+    }
+};
+
+struct ToBase64Impl {
+    static constexpr auto name = "to_base64";
+    using ReturnType = DataTypeString;
+    using ColumnType = ColumnString;
+
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
+                         NullMap& null_map) {
+        auto rows_count = offsets.size();
+        dst_offsets.resize(rows_count);
+
+        for (int i = 0; i < rows_count; ++i) {
+            if (null_map[i]) {
+                StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
+                continue;
+            }
+
+            auto source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+            size_t srclen = offsets[i] - offsets[i - 1] - 1;
+
+            if (*source == '\0' || srclen == 0) {
+                StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
+                continue;
+            }
+
+            int cipher_len = (int)(4.0 * ceil((double)srclen / 3.0));
+            char dst[cipher_len];
+            int outlen = base64_encode((const unsigned char*)source, srclen, (unsigned char*)dst);
+
+            if (outlen < 0) {
+                StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
+            } else {
+                StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data,
+                                            dst_offsets);
+            }
+        }
+        return Status::OK();
+    }
+};
+
+struct FromBase64Impl {
+    static constexpr auto name = "from_base64";
+    using ReturnType = DataTypeString;
+    using ColumnType = ColumnString;
+
+    static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                         ColumnString::Chars& dst_data, ColumnString::Offsets& dst_offsets,
+                         NullMap& null_map) {
+        auto rows_count = offsets.size();
+        dst_offsets.resize(rows_count);
+
+        for (int i = 0; i < rows_count; ++i) {
+            if (null_map[i]) {
+                StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
+                continue;
+            }
+
+            auto source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+            size_t srclen = offsets[i] - offsets[i - 1] - 1;
+
+            if (*source == '\0' || srclen == 0) {
+                StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
+                continue;
+            }
+
+            int cipher_len = srclen;
+            char dst[cipher_len];
+            int outlen = base64_decode(source, srclen, dst);
+
+            if (outlen < 0) {
+                StringOP::push_null_string(i, dst_data, dst_offsets, null_map);
+            } else {
+                StringOP::push_value_string(std::string_view(dst, outlen), i, dst_data,
+                                            dst_offsets);
+            }
+        }
+
+        return Status::OK();
+    }
+};
+
+struct StringAppendTrailingCharIfAbsent {
+    static constexpr auto name = "append_trailing_char_if_absent";
+    using Chars = ColumnString::Chars;
+    using Offsets = ColumnString::Offsets;
+    using ReturnType = DataTypeString;
+    using ColumnType = ColumnString;
+    static void vector_vector(const Chars& ldata, const Offsets& loffsets, const Chars& rdata,
+                              const Offsets& roffsets, Chars& res_data, Offsets& res_offsets,
+                              NullMap& null_map_data) {
+        DCHECK_EQ(loffsets.size(), roffsets.size());
+        size_t input_rows_count = loffsets.size();
+        res_offsets.resize(input_rows_count);
+        fmt::memory_buffer buffer;
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            buffer.clear();
+
+            int l_size = loffsets[i] - loffsets[i - 1] - 1;
+            const auto l_raw = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
+
+            int r_size = roffsets[i] - roffsets[i - 1] - 1;
+            const auto r_raw = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
+
+            if (r_size != 1) {
+                StringOP::push_null_string(i, res_data, res_offsets, null_map_data);
+                continue;
+            }
+            if (l_raw[l_size - 1] == r_raw[0]) {
+                StringOP::push_value_string(std::string_view(l_raw, l_size), i, res_data,
+                                            res_offsets);
+                continue;
+            }
+
+            buffer.append(l_raw, l_raw + l_size);
+            buffer.append(r_raw, r_raw + 1);
+            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
+                                        res_offsets);
+        }
+    }
+};
+
+struct StringLPad {
+    static constexpr auto name = "lpad";
+    static constexpr auto is_lpad = true;
+};
+
+struct StringRPad {
+    static constexpr auto name = "rpad";
+    static constexpr auto is_lpad = false;
+};
+
+template <typename LeftDataType, typename RightDataType>
+using StringStartsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, StartsWithOp>;
+
+template <typename LeftDataType, typename RightDataType>
+using StringEndsWithImpl = StringFunctionImpl<LeftDataType, RightDataType, EndsWithOp>;
+
+template <typename LeftDataType, typename RightDataType>
+using StringInstrImpl = StringFunctionImpl<LeftDataType, RightDataType, InStrOP>;
+
+template <typename LeftDataType, typename RightDataType>
+using StringFindInSetImpl = StringFunctionImpl<LeftDataType, RightDataType, FindInSetOp>;
+
+// ready for regist function
+using FunctionStringASCII = FunctionUnaryToType<StringASCII, NameStringASCII>;
+using FunctionStringLength = FunctionUnaryToType<StringLengthImpl, NameStringLenght>;
+using FunctionStringUTF8Length = FunctionUnaryToType<StringUtf8LengthImpl, NameStringUtf8Length>;
+using FunctionStringSpace = FunctionUnaryToType<StringSpace, NameStringSpace>;
+using FunctionStringStartsWith =
+        FunctionBinaryToType<DataTypeString, DataTypeString, StringStartsWithImpl, NameStartsWith>;
+using FunctionStringEndsWith =
+        FunctionBinaryToType<DataTypeString, DataTypeString, StringEndsWithImpl, NameEndsWith>;
+using FunctionStringInstr =
+        FunctionBinaryToType<DataTypeString, DataTypeString, StringInstrImpl, NameInstr>;
+using FunctionStringLocate =
+        FunctionBinaryToType<DataTypeString, DataTypeString, StringInstrImpl, NameLocate>;
+using FunctionStringFindInSet =
+        FunctionBinaryToType<DataTypeString, DataTypeString, StringFindInSetImpl, NameFindInSet>;
+
+using FunctionReverse = FunctionStringToString<ReverseImpl, NameReverse>;
+
+using FunctionHexString = FunctionStringToString<HexStringImpl, HexStringName>;
+
+using FunctionUnHex = FunctionStringOperateToNullType<UnHexImpl>;
+
+using FunctionToLower = FunctionStringToString<TransferImpl<::tolower>, NameToLower>;
+
+using FunctionToUpper = FunctionStringToString<TransferImpl<::toupper>, NameToUpper>;
+
+using FunctionLTrim = FunctionStringToString<TrimImpl<true, false>, NameLTrim>;
+
+using FunctionRTrim = FunctionStringToString<TrimImpl<false, true>, NameRTrim>;
+
+using FunctionTrim = FunctionStringToString<TrimImpl<true, true>, NameTrim>;
+
+using FunctionAesEncrypt = FunctionBinaryStringOperateToNullType<AesEncryptImpl>;
+
+using FunctionAesDecrypt = FunctionBinaryStringOperateToNullType<AesDecryptImpl>;
+
+using FunctionToBase64 = FunctionStringOperateToNullType<ToBase64Impl>;
+
+using FunctionFromBase64 = FunctionStringOperateToNullType<FromBase64Impl>;
+
+using FunctionStringAppendTrailingCharIfAbsent =
+        FunctionBinaryStringOperateToNullType<StringAppendTrailingCharIfAbsent>;
+
+using FunctionStringLPad = FunctionStringPad<StringLPad>;
+using FunctionStringRPad = FunctionStringPad<StringRPad>;
+
+void register_function_string(SimpleFunctionFactory& factory) {
+    // factory.register_function<>();
+    factory.register_function<FunctionStringASCII>();
+    factory.register_function<FunctionStringLength>();
+    factory.register_function<FunctionStringUTF8Length>();
+    factory.register_function<FunctionStringSpace>();
+    factory.register_function<FunctionStringStartsWith>();
+    factory.register_function<FunctionStringEndsWith>();
+    factory.register_function<FunctionStringInstr>();
+    factory.register_function<FunctionStringFindInSet>();
+    //    factory.register_function<FunctionStringLocate>();
+    factory.register_function<FunctionReverse>();
+    factory.register_function<FunctionHexString>();
+    factory.register_function<FunctionUnHex>();
+    factory.register_function<FunctionToLower>();
+    factory.register_function<FunctionToUpper>();
+    factory.register_function<FunctionLTrim>();
+    factory.register_function<FunctionRTrim>();
+    factory.register_function<FunctionTrim>();
+    factory.register_function<FunctionSubstring<Substr3Impl>>();
+    factory.register_function<FunctionSubstring<Substr2Impl>>();
+    factory.register_function<FunctionLeft>();
+    factory.register_function<FunctionRight>();
+    factory.register_function<FunctionNullOrEmpty>();
+    factory.register_function<FunctionStringConcat>();
+    factory.register_function<FunctionStringConcatWs>();
+    factory.register_function<FunctionStringAppendTrailingCharIfAbsent>();
+    factory.register_function<FunctionStringRepeat>();
+    factory.register_function<FunctionStringLPad>();
+    factory.register_function<FunctionStringRPad>();
+    factory.register_function<FunctionAesEncrypt>();
+    factory.register_function<FunctionAesDecrypt>();
+    factory.register_function<FunctionToBase64>();
+    factory.register_function<FunctionFromBase64>();
+    factory.register_function<FunctionSplitPart>();
+    factory.register_function<FunctionStringMd5sum>();
+    factory.register_function<FunctionStringParseUrl>();
+    factory.register_function<FunctionMoneyFormat<MoneyFormatDoubleImpl>>();
+    factory.register_function<FunctionMoneyFormat<MoneyFormatInt64Impl>>();
+    factory.register_function<FunctionMoneyFormat<MoneyFormatInt128Impl>>();
+    factory.register_function<FunctionMoneyFormat<MoneyFormatDecimalImpl>>();
+
+    factory.register_alias(FunctionLeft::name, "strleft");
+    factory.register_alias(FunctionRight::name, "strright");
+    factory.register_alias(SubstringUtil::name, "substr");
+    factory.register_alias(FunctionToLower::name, "lcase");
+    factory.register_alias(FunctionToUpper::name, "ucase");
+    factory.register_alias(FunctionStringMd5sum::name, "md5");
+    factory.register_alias(FunctionStringUTF8Length::name, "character_length");
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h
new file mode 100644
index 0000000000..af5806240b
--- /dev/null
+++ b/be/src/vec/functions/function_string.h
@@ -0,0 +1,1145 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <fmt/core.h>
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+#include <string_view>
+
+#include "exprs/anyval_util.h"
+#include "exprs/math_functions.h"
+#include "exprs/string_functions.h"
+#include "runtime/string_value.hpp"
+#include "util/md5.h"
+#include "util/url_parser.h"
+#include "vec/columns/column_decimal.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/columns_number.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+
+inline size_t get_utf8_byte_length(unsigned char byte) {
+    size_t char_size = 0;
+    if (byte >= 0xFC) {
+        char_size = 6;
+    } else if (byte >= 0xF8) {
+        char_size = 5;
+    } else if (byte >= 0xF0) {
+        char_size = 4;
+    } else if (byte >= 0xE0) {
+        char_size = 3;
+    } else if (byte >= 0xC0) {
+        char_size = 2;
+    } else {
+        char_size = 1;
+    }
+    return char_size;
+}
+
+inline size_t get_char_len(const std::string_view& str, std::vector<size_t>* str_index) {
+    size_t char_len = 0;
+    for (size_t i = 0, char_size = 0; i < str.length(); i += char_size) {
+        char_size = get_utf8_byte_length(str[i]);
+        str_index->push_back(i);
+        ++char_len;
+    }
+    return char_len;
+}
+
+struct StringOP {
+    static void push_empty_string(int index, ColumnString::Chars& chars,
+                                  ColumnString::Offsets& offsets) {
+        chars.push_back('\0');
+        offsets[index] = chars.size();
+    }
+
+    static void push_null_string(int index, ColumnString::Chars& chars,
+                                 ColumnString::Offsets& offsets, NullMap& null_map) {
+        null_map[index] = 1;
+        push_empty_string(index, chars, offsets);
+    }
+
+    static void push_value_string(const std::string_view& string_value, int index,
+                                  ColumnString::Chars& chars, ColumnString::Offsets& offsets) {
+        chars.insert(string_value.data(), string_value.data() + string_value.size());
+        chars.push_back('\0');
+        offsets[index] = chars.size();
+    }
+};
+
+struct SubstringUtil {
+    static constexpr auto name = "substring";
+
+    static void substring_execute(Block& block, const ColumnNumbers& arguments, size_t result,
+                                  size_t input_rows_count) {
+        DCHECK_EQ(arguments.size(), 3);
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+
+        ColumnPtr argument_columns[3];
+
+        for (int i = 0; i < 3; ++i) {
+            argument_columns[i] =
+                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+            if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) {
+                argument_columns[i] = nullable->get_nested_column_ptr();
+                VectorizedUtils::update_null_map(null_map->get_data(),
+                                                 nullable->get_null_map_data());
+            }
+        }
+
+        auto res = ColumnString::create();
+
+        auto specific_str_column = assert_cast<const ColumnString*>(argument_columns[0].get());
+        auto specific_start_column =
+                assert_cast<const ColumnVector<Int32>*>(argument_columns[1].get());
+        auto specific_len_column =
+                assert_cast<const ColumnVector<Int32>*>(argument_columns[2].get());
+        vector(specific_str_column->get_chars(), specific_str_column->get_offsets(),
+               specific_start_column->get_data(), specific_len_column->get_data(),
+               null_map->get_data(), res->get_chars(), res->get_offsets());
+
+        block.get_by_position(result).column =
+                ColumnNullable::create(std::move(res), std::move(null_map));
+    }
+
+private:
+    static void vector(const ColumnString::Chars& chars, const ColumnString::Offsets& offsets,
+                       const PaddedPODArray<Int32>& start, const PaddedPODArray<Int32>& len,
+                       NullMap& null_map, ColumnString::Chars& res_chars,
+                       ColumnString::Offsets& res_offsets) {
+        int size = offsets.size();
+        res_offsets.resize(size);
+        res_chars.reserve(chars.size());
+        std::vector<size_t> index;
+
+        for (int i = 0; i < size; ++i) {
+            auto* raw_str = reinterpret_cast<const unsigned char*>(&chars[offsets[i - 1]]);
+            int str_size = offsets[i] - offsets[i - 1] - 1;
+            // return null if start > src.length
+            if (start[i] > str_size) {
+                StringOP::push_null_string(i, res_chars, res_offsets, null_map);
+                continue;
+            }
+            // return "" if len < 0 or str == 0 or start == 0
+            if (len[i] <= 0 || str_size == 0 || start[i] == 0) {
+                StringOP::push_empty_string(i, res_chars, res_offsets);
+                continue;
+            }
+            // reference to string_function.cpp: substring
+            size_t byte_pos = 0;
+            index.clear();
+            for (size_t j = 0, char_size = 0; j < str_size; j += char_size) {
+                char_size = get_utf8_byte_length((unsigned)(raw_str)[j]);
+                index.push_back(j);
+                if (start[i] > 0 && index.size() > start[i] + len[i]) {
+                    break;
+                }
+            }
+
+            int fixed_pos = start[i];
+            if (fixed_pos < 0) {
+                fixed_pos = index.size() + fixed_pos + 1;
+            }
+            if (fixed_pos > index.size()) {
+                StringOP::push_null_string(i, res_chars, res_offsets, null_map);
+                continue;
+            }
+
+            byte_pos = index[fixed_pos - 1];
+            int fixed_len = str_size - byte_pos;
+            if (fixed_pos + len[i] <= index.size()) {
+                fixed_len = index[fixed_pos + len[i] - 1] - byte_pos;
+            }
+
+            if (byte_pos <= str_size && fixed_len > 0) {
+                // return StringVal(str.ptr + byte_pos, fixed_len);
+                StringOP::push_value_string(
+                        std::string_view {reinterpret_cast<const char*>(raw_str + byte_pos),
+                                          (size_t)fixed_len},
+                        i, res_chars, res_offsets);
+            } else {
+                StringOP::push_empty_string(i, res_chars, res_offsets);
+            }
+        }
+    }
+};
+
+template <typename Impl>
+class FunctionSubstring : public IFunction {
+public:
+    static constexpr auto name = SubstringUtil::name;
+    String get_name() const override { return name; }
+    static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return make_nullable(std::make_shared<DataTypeString>());
+    }
+    DataTypes get_variadic_argument_types_impl() const override {
+        return Impl::get_variadic_argument_types();
+    }
+    size_t get_number_of_arguments() const override {
+        return get_variadic_argument_types_impl().size();
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
+    }
+};
+
+struct Substr3Impl {
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>(),
+                std::make_shared<DataTypeInt32>()};
+    }
+
+    static Status execute_impl(FunctionContext* context, Block& block,
+                               const ColumnNumbers& arguments, size_t result,
+                               size_t input_rows_count) {
+        SubstringUtil::substring_execute(block, arguments, result, input_rows_count);
+        return Status::OK();
+    }
+};
+
+struct Substr2Impl {
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>()};
+    }
+
+    static Status execute_impl(FunctionContext* context, Block& block,
+                               const ColumnNumbers& arguments, size_t result,
+                               size_t input_rows_count) {
+        auto params = ColumnInt32::create(input_rows_count);
+        auto& strlen_data = params->get_data();
+
+        auto str_col =
+                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        if (auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
+            str_col = nullable->get_nested_column_ptr();
+        }
+        auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
+
+        for (int i = 0; i < input_rows_count; ++i) {
+            strlen_data[i] = str_offset[i] - str_offset[i - 1];
+        }
+
+        block.insert({std::move(params), std::make_shared<DataTypeInt32>(), "strlen"});
+
+        ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1};
+
+        SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
+        return Status::OK();
+    }
+};
+
+class FunctionLeft : public IFunction {
+public:
+    static constexpr auto name = "left";
+    static FunctionPtr create() { return std::make_shared<FunctionLeft>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 2; }
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return make_nullable(std::make_shared<DataTypeString>());
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto int_type = std::make_shared<DataTypeInt32>();
+        size_t num_columns_without_result = block.columns();
+        block.insert({int_type->create_column_const(input_rows_count, to_field(1))
+                              ->convert_to_full_column_if_const(),
+                      int_type, "const 1"});
+        ColumnNumbers temp_arguments(3);
+        temp_arguments[0] = arguments[0];
+        temp_arguments[1] = num_columns_without_result;
+        temp_arguments[2] = arguments[1];
+        SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
+        return Status::OK();
+    }
+};
+
+class FunctionRight : public IFunction {
+public:
+    static constexpr auto name = "right";
+    static FunctionPtr create() { return std::make_shared<FunctionRight>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 2; }
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return make_nullable(std::make_shared<DataTypeString>());
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto int_type = std::make_shared<DataTypeInt32>();
+        auto params1 = ColumnInt32::create(input_rows_count);
+        auto params2 = ColumnInt32::create(input_rows_count);
+        size_t num_columns_without_result = block.columns();
+
+        // params1 = max(arg[1], -len(arg))
+        auto& index_data = params1->get_data();
+        auto& strlen_data = params2->get_data();
+
+        // we don't have to update null_map because FunctionSubstring will
+        // update it
+        // getNestedColumnIfNull arg[0]
+        auto str_col =
+                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        if (auto* nullable = check_and_get_column<const ColumnNullable>(*str_col)) {
+            str_col = nullable->get_nested_column_ptr();
+        }
+        auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
+
+        // getNestedColumnIfNull arg[1]
+        auto pos_col =
+                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+        if (auto* nullable = check_and_get_column<const ColumnNullable>(*pos_col)) {
+            pos_col = nullable->get_nested_column_ptr();
+        }
+        auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data();
+
+        for (int i = 0; i < input_rows_count; ++i) {
+            strlen_data[i] = str_offset[i] - str_offset[i - 1] - 1;
+        }
+
+        for (int i = 0; i < input_rows_count; ++i) {
+            index_data[i] = std::max(-pos_data[i], -strlen_data[i]);
+        }
+
+        block.insert({std::move(params1), int_type, "index"});
+        block.insert({std::move(params2), int_type, "strlen"});
+
+        ColumnNumbers temp_arguments(3);
+        temp_arguments[0] = arguments[0];
+        temp_arguments[1] = num_columns_without_result;
+        temp_arguments[2] = num_columns_without_result + 1;
+        SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
+        return Status::OK();
+    }
+};
+
+class FunctionNullOrEmpty : public IFunction {
+public:
+    static constexpr auto name = "null_or_empty";
+    static FunctionPtr create() { return std::make_shared<FunctionNullOrEmpty>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 1; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<DataTypeUInt8>();
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto res_map = ColumnUInt8::create(input_rows_count, 0);
+
+        auto column = block.get_by_position(arguments[0]).column;
+        if (auto* nullable = check_and_get_column<const ColumnNullable>(*column)) {
+            column = nullable->get_nested_column_ptr();
+            VectorizedUtils::update_null_map(res_map->get_data(), nullable->get_null_map_data());
+        }
+        auto str_col = assert_cast<const ColumnString*>(column.get());
+        const auto& offsets = str_col->get_offsets();
+
+        auto& res_map_data = res_map->get_data();
+        for (int i = 0; i < input_rows_count; ++i) {
+            int size = offsets[i] - offsets[i - 1] - 1;
+            res_map_data[i] |= (size == 0);
+        }
+
+        block.replace_by_position(result, std::move(res_map));
+        return Status::OK();
+    }
+};
+
+class FunctionStringConcat : public IFunction {
+public:
+    static constexpr auto name = "concat";
+    static FunctionPtr create() { return std::make_shared<FunctionStringConcat>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 0; }
+    bool is_variadic() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<DataTypeString>();
+    }
+    bool use_default_implementation_for_nulls() const override { return true; }
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        DCHECK_GE(arguments.size(), 1);
+
+        if (arguments.size() == 1) {
+            block.get_by_position(result).column = block.get_by_position(arguments[0]).column;
+            return Status::OK();
+        }
+
+        int argument_size = arguments.size();
+        ColumnPtr argument_columns[argument_size];
+
+        std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
+        std::vector<const ColumnString::Chars*> chars_list(argument_size);
+
+        for (int i = 0; i < argument_size; ++i) {
+            argument_columns[i] =
+                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+            auto col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
+            offsets_list[i] = &col_str->get_offsets();
+            chars_list[i] = &col_str->get_chars();
+        }
+
+        auto res = ColumnString::create();
+        auto& res_data = res->get_chars();
+        auto& res_offset = res->get_offsets();
+
+        res_offset.resize(input_rows_count);
+
+        int res_reserve_size = 0;
+        // we could ignore null string column
+        // but it's not necessary to ignore it
+        for (size_t i = 0; i < offsets_list.size(); ++i) {
+            for (size_t j = 0; j < input_rows_count; ++j) {
+                res_reserve_size += (*offsets_list[i])[j] - (*offsets_list[i])[j - 1] - 1;
+            }
+        }
+        // for each terminal zero
+        res_reserve_size += input_rows_count;
+
+        res_data.resize(res_reserve_size);
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            int current_length = 0;
+            for (size_t j = 0; j < offsets_list.size(); ++j) {
+                auto& current_offsets = *offsets_list[j];
+                auto& current_chars = *chars_list[j];
+
+                int size = current_offsets[i] - current_offsets[i - 1] - 1;
+                memcpy(&res_data[res_offset[i - 1]] + current_length,
+                       &current_chars[current_offsets[i - 1]], size);
+                current_length += size;
+            }
+            // add terminal zero
+            *(&res_data[res_offset[i - 1]] + current_length) = '\0';
+            current_length++;
+            res_offset[i] = res_offset[i - 1] + current_length;
+        }
+
+        block.get_by_position(result).column = std::move(res);
+        return Status::OK();
+    }
+};
+
+// concat_ws (string,string....)
+// TODO: avoid use fmtlib
+class FunctionStringConcatWs : public IFunction {
+public:
+    static constexpr auto name = "concat_ws";
+    static FunctionPtr create() { return std::make_shared<FunctionStringConcatWs>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 0; }
+    bool is_variadic() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        const IDataType* first_type = arguments[0].get();
+        if (first_type->is_nullable())
+            return make_nullable(std::make_shared<DataTypeString>());
+        else
+            return std::make_shared<DataTypeString>();
+    }
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        DCHECK_GE(arguments.size(), 2);
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+        // we create a zero column to simply implement
+        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
+        auto res = ColumnString::create();
+        bool is_null_type = block.get_by_position(arguments[0]).type.get()->is_nullable();
+        size_t argument_size = arguments.size();
+        std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
+        std::vector<const ColumnString::Chars*> chars_list(argument_size);
+        std::vector<const ColumnUInt8::Container*> null_list(argument_size);
+
+        ColumnPtr argument_columns[argument_size];
+
+        for (size_t i = 0; i < argument_size; ++i) {
+            argument_columns[i] =
+                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+            if (auto* nullable = check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
+                argument_columns[i] = nullable->get_nested_column_ptr();
+                null_list[i] = &nullable->get_null_map_data();
+            } else {
+                null_list[i] = &const_null_map->get_data();
+            }
+            auto col_str = assert_cast<const ColumnString*>(argument_columns[i].get());
+            offsets_list[i] = &col_str->get_offsets();
+            chars_list[i] = &col_str->get_chars();
+        }
+
+        auto& res_data = res->get_chars();
+        auto& res_offset = res->get_offsets();
+        res_offset.resize(input_rows_count);
+
+        VectorizedUtils::update_null_map(null_map->get_data(), *null_list[0]);
+        fmt::memory_buffer buffer;
+        std::vector<std::string_view> views;
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            auto& seq_offsets = *offsets_list[0];
+            auto& seq_chars = *chars_list[0];
+            auto& seq_nullmap = *null_list[0];
+            if (seq_nullmap[i]) {
+                res_data.push_back('\0');
+                res_offset[i] = res_data.size();
+                continue;
+            }
+
+            int seq_size = seq_offsets[i] - seq_offsets[i - 1] - 1;
+            const char* seq_data = reinterpret_cast<const char*>(&seq_chars[seq_offsets[i - 1]]);
+
+            std::string_view seq(seq_data, seq_size);
+            buffer.clear();
+            views.clear();
+            for (size_t j = 1; j < argument_size; ++j) {
+                auto& current_offsets = *offsets_list[j];
+                auto& current_chars = *chars_list[j];
+                auto& current_nullmap = *null_list[j];
+                int size = current_offsets[i] - current_offsets[i - 1] - 1;
+                const char* ptr =
+                        reinterpret_cast<const char*>(&current_chars[current_offsets[i - 1]]);
+                if (!current_nullmap[i]) {
+                    views.emplace_back(ptr, size);
+                }
+            }
+            fmt::format_to(buffer, "{}", fmt::join(views, seq));
+            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
+                                        res_offset);
+        }
+        if (is_null_type) {
+            block.get_by_position(result).column =
+                    ColumnNullable::create(std::move(res), std::move(null_map));
+        } else {
+            block.get_by_position(result).column = std::move(res);
+        }
+        return Status::OK();
+    }
+};
+
+class FunctionStringRepeat : public IFunction {
+public:
+    static constexpr auto name = "repeat";
+    static FunctionPtr create() { return std::make_shared<FunctionStringRepeat>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<DataTypeString>();
+    }
+    bool use_default_implementation_for_constants() const override { return true; }
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        DCHECK_EQ(arguments.size(), 2);
+        auto res = ColumnString::create();
+
+        ColumnPtr argument_ptr[2];
+        argument_ptr[0] =
+                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        argument_ptr[1] =
+                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+
+        if (auto* col1 = check_and_get_column<ColumnString>(*argument_ptr[0])) {
+            if (auto* col2 = check_and_get_column<ColumnInt32>(*argument_ptr[1])) {
+                vector_vector(col1->get_chars(), col1->get_offsets(), col2->get_data(),
+                              res->get_chars(), res->get_offsets());
+                block.replace_by_position(result, std::move(res));
+                return Status::OK();
+            }
+        }
+
+        return Status::RuntimeError(fmt::format("not support {}", get_name()));
+    }
+
+    void vector_vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                       const ColumnInt32::Container& repeats, ColumnString::Chars& res_data,
+                       ColumnString::Offsets& res_offsets) {
+        size_t input_row_size = offsets.size();
+        //
+        fmt::memory_buffer buffer;
+        res_offsets.resize(input_row_size);
+        for (size_t i = 0; i < input_row_size; ++i) {
+            buffer.clear();
+            const char* raw_str = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
+            int size = offsets[i] - offsets[i - 1] - 1;
+            int repeat = repeats[i];
+            // assert size * repeat won't exceed
+            DCHECK_LE(static_cast<int64_t>(size) * repeat, std::numeric_limits<int32_t>::max());
+            for (int i = 0; i < repeat; ++i) {
+                buffer.append(raw_str, raw_str + size);
+            }
+            StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, res_data,
+                                        res_offsets);
+        }
+    }
+};
+
+template <typename Impl>
+class FunctionStringPad : public IFunction {
+public:
+    static constexpr auto name = Impl::name;
+    static FunctionPtr create() { return std::make_shared<FunctionStringPad>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 3; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<DataTypeString>();
+    }
+    bool use_default_implementation_for_nulls() const override { return true; }
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        DCHECK_GE(arguments.size(), 3);
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+        // we create a zero column to simply implement
+        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
+        auto res = ColumnString::create();
+
+        size_t argument_size = arguments.size();
+        ColumnPtr argument_columns[argument_size];
+        for (size_t i = 0; i < argument_size; ++i) {
+            argument_columns[i] =
+                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+            if (auto* nullable = check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
+                argument_columns[i] = nullable->get_nested_column_ptr();
+                VectorizedUtils::update_null_map(null_map->get_data(),
+                                                 nullable->get_null_map_data());
+            }
+        }
+
+        auto& null_map_data = null_map->get_data();
+        auto& res_offsets = res->get_offsets();
+        auto& res_chars = res->get_chars();
+        res_offsets.resize(input_rows_count);
+
+        auto strcol = assert_cast<const ColumnString*>(argument_columns[0].get());
+        auto& strcol_offsets = strcol->get_offsets();
+        auto& strcol_chars = strcol->get_chars();
+
+        auto col_len = assert_cast<const ColumnInt32*>(argument_columns[1].get());
+        auto& col_len_data = col_len->get_data();
+
+        auto padcol = assert_cast<const ColumnString*>(argument_columns[2].get());
+        auto& padcol_offsets = padcol->get_offsets();
+        auto& padcol_chars = padcol->get_chars();
+
+        std::vector<size_t> str_index;
+        std::vector<size_t> pad_index;
+
+        fmt::memory_buffer buffer;
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            str_index.clear();
+            pad_index.clear();
+            buffer.clear();
+            if (null_map_data[i] || col_len_data[i] < 0) {
+                // return NULL when input string is NULL or input length is invalid number
+                null_map_data[i] = true;
+                StringOP::push_empty_string(i, res_chars, res_offsets);
+            } else {
+                int str_len = strcol_offsets[i] - strcol_offsets[i - 1] - 1;
+                const char* str_data =
+                        reinterpret_cast<const char*>(&strcol_chars[strcol_offsets[i - 1]]);
+
+                int pad_len = padcol_offsets[i] - padcol_offsets[i - 1] - 1;
+                const char* pad_data =
+                        reinterpret_cast<const char*>(&padcol_chars[padcol_offsets[i - 1]]);
+
+                size_t str_char_size =
+                        get_char_len(std::string_view(str_data, str_len), &str_index);
+                size_t pad_char_size =
+                        get_char_len(std::string_view(pad_data, pad_len), &pad_index);
+
+                if (col_len_data[i] <= str_char_size) {
+                    // truncate the input string
+                    if (col_len_data[i] < str_char_size) {
+                        buffer.append(str_data, str_data + str_index[col_len_data[i]]);
+                    } else {
+                        buffer.append(str_data, str_data + str_len);
+                    }
+
+                    StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
+                                                res_chars, res_offsets);
+                    continue;
+                }
+                if (pad_char_size == 0) {
+                    // return NULL when the string to be paded is missing
+                    null_map_data[i] = true;
+                    StringOP::push_empty_string(i, res_chars, res_offsets);
+                    continue;
+                }
+
+                int32_t pad_byte_len = 0;
+                int32_t pad_times = (col_len_data[i] - str_char_size) / pad_char_size;
+                int32_t pad_remainder = (col_len_data[i] - str_char_size) % pad_char_size;
+                pad_byte_len = pad_times * pad_len;
+                pad_byte_len += pad_index[pad_remainder];
+                int32_t byte_len = str_len + pad_byte_len;
+                // StringVal result(context, byte_len);
+                if constexpr (Impl::is_lpad) {
+                    int pad_idx = 0;
+                    int result_index = 0;
+
+                    // Prepend chars of pad.
+                    while (result_index++ < pad_byte_len) {
+                        buffer.push_back(pad_data[pad_idx++]);
+                        pad_idx = pad_idx % pad_len;
+                    }
+
+                    // Append given string.
+                    buffer.append(str_data, str_data + str_len);
+                    StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
+                                                res_chars, res_offsets);
+
+                } else {
+                    // is rpad
+                    buffer.append(str_data, str_data + str_len);
+
+                    // Append chars of pad until desired length
+                    int pad_idx = 0;
+                    int result_len = str_len;
+                    while (result_len++ < byte_len) {
+                        buffer.push_back(pad_data[pad_idx++]);
+                        pad_idx = pad_idx % pad_len;
+                    }
+                    StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i,
+                                                res_chars, res_offsets);
+                }
+            }
+        }
+
+        block.get_by_position(result).column =
+                ColumnNullable::create(std::move(res), std::move(null_map));
+        return Status::OK();
+    }
+};
+
+class FunctionSplitPart : public IFunction {
+public:
+    static constexpr auto name = "split_part";
+    static FunctionPtr create() { return std::make_shared<FunctionSplitPart>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 3; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return make_nullable(std::make_shared<DataTypeString>());
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        DCHECK_EQ(arguments.size(), 3);
+
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+        // Create a zero column to simply implement
+        auto const_null_map = ColumnUInt8::create(input_rows_count, 0);
+        auto res = ColumnString::create();
+
+        auto& null_map_data = null_map->get_data();
+        auto& res_offsets = res->get_offsets();
+        auto& res_chars = res->get_chars();
+        res_offsets.resize(input_rows_count);
+
+        size_t argument_size = arguments.size();
+        ColumnPtr argument_columns[argument_size];
+        for (size_t i = 0; i < argument_size; ++i) {
+            argument_columns[i] =
+                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+            if (auto* nullable = check_and_get_column<const ColumnNullable>(*argument_columns[i])) {
+                argument_columns[i] = nullable->get_nested_column_ptr();
+                VectorizedUtils::update_null_map(null_map->get_data(),
+                                                 nullable->get_null_map_data());
+            }
+        }
+
+        auto str_col = assert_cast<const ColumnString*>(argument_columns[0].get());
+
+        auto delimiter_col = assert_cast<const ColumnString*>(argument_columns[1].get());
+
+        auto part_num_col = assert_cast<const ColumnInt32*>(argument_columns[2].get());
+        auto& part_num_col_data = part_num_col->get_data();
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (part_num_col_data[i] <= 0) {
+                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
+                continue;
+            }
+
+            auto delimiter = delimiter_col->get_data_at(i);
+            auto delimiter_str = delimiter_col->get_data_at(i).to_string();
+            auto part_number = part_num_col_data[i];
+            auto str = str_col->get_data_at(i);
+            if (delimiter.size == 0) {
+                StringOP::push_empty_string(i, res_chars, res_offsets);
+            } else if (delimiter.size == 1) {
+                // If delimiter is a char, use memchr to split
+                int32_t pre_offset = -1;
+                int32_t offset = -1;
+                int32_t num = 0;
+                while (num < part_number) {
+                    pre_offset = offset;
+                    size_t n = str.size - offset - 1;
+                    const char* pos = reinterpret_cast<const char*>(
+                            memchr(str.data + offset + 1, delimiter_str[0], n));
+                    if (pos != nullptr) {
+                        offset = pos - str.data;
+                        num++;
+                    } else {
+                        offset = str.size;
+                        num = (num == 0) ? 0 : num + 1;
+                        break;
+                    }
+                }
+
+                if (num == part_number) {
+                    StringOP::push_value_string(
+                            std::string_view {
+                                    reinterpret_cast<const char*>(str.data + pre_offset + 1),
+                                    (size_t)offset - pre_offset - 1},
+                            i, res_chars, res_offsets);
+                } else {
+                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
+                }
+            } else {
+                // If delimiter is a string, use memmem to split
+                int32_t pre_offset = -delimiter.size;
+                int32_t offset = -delimiter.size;
+                int32_t num = 0;
+                while (num < part_number) {
+                    pre_offset = offset;
+                    size_t n = str.size - offset - delimiter.size;
+                    char* pos = reinterpret_cast<char*>(memmem(str.data + offset + delimiter.size,
+                                                               n, delimiter.data, delimiter.size));
+                    if (pos != nullptr) {
+                        offset = pos - str.data;
+                        num++;
+                    } else {
+                        offset = str.size;
+                        num = (num == 0) ? 0 : num + 1;
+                        break;
+                    }
+                }
+
+                if (num == part_number) {
+                    StringOP::push_value_string(
+                            std::string_view {reinterpret_cast<const char*>(str.data + pre_offset +
+                                                                            delimiter.size),
+                                              (size_t)offset - pre_offset - delimiter.size},
+                            i, res_chars, res_offsets);
+                } else {
+                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
+                }
+            }
+        }
+        block.get_by_position(result).column =
+                ColumnNullable::create(std::move(res), std::move(null_map));
+        return Status::OK();
+    }
+};
+
+class FunctionStringMd5sum : public IFunction {
+public:
+    static constexpr auto name = "md5sum";
+    static FunctionPtr create() { return std::make_shared<FunctionStringMd5sum>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 0; }
+    bool is_variadic() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<DataTypeString>();
+    }
+    bool use_default_implementation_for_nulls() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        DCHECK_GE(arguments.size(), 1);
+
+        int argument_size = arguments.size();
+        ColumnPtr argument_columns[argument_size];
+
+        std::vector<const ColumnString::Offsets*> offsets_list(argument_size);
+        std::vector<const ColumnString::Chars*> chars_list(argument_size);
+
+        for (int i = 0; i < argument_size; ++i) {
+            argument_columns[i] =
+                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+            if (auto col_str = assert_cast<const ColumnString*>(argument_columns[i].get())) {
+                offsets_list[i] = &col_str->get_offsets();
+                chars_list[i] = &col_str->get_chars();
+            } else {
+                return Status::RuntimeError(fmt::format(
+                        "Illegal column {} of argument of function {}",
+                        block.get_by_position(arguments[0]).column->get_name(), get_name()));
+            }
+        }
+
+        auto res = ColumnString::create();
+        auto& res_data = res->get_chars();
+        auto& res_offset = res->get_offsets();
+
+        res_offset.resize(input_rows_count);
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            Md5Digest digest;
+            for (size_t j = 0; j < offsets_list.size(); ++j) {
+                auto& current_offsets = *offsets_list[j];
+                auto& current_chars = *chars_list[j];
+
+                int size = current_offsets[i] - current_offsets[i - 1] - 1;
+                if (size < 1) {
+                    continue;
+                }
+                digest.update(&current_chars[current_offsets[i - 1]], size);
+            }
+            digest.digest();
+
+            StringOP::push_value_string(std::string_view(digest.hex().c_str(), digest.hex().size()),
+                                        i, res_data, res_offset);
+        }
+
+        block.replace_by_position(result, std::move(res));
+        return Status::OK();
+    }
+};
+
+class FunctionStringParseUrl : public IFunction {
+public:
+    static constexpr auto name = "parse_url";
+    static FunctionPtr create() { return std::make_shared<FunctionStringParseUrl>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 0; }
+    bool is_variadic() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return make_nullable(std::make_shared<DataTypeString>());
+    }
+    bool use_default_implementation_for_nulls() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+        auto& null_map_data = null_map->get_data();
+
+        auto res = ColumnString::create();
+        auto& res_offsets = res->get_offsets();
+        auto& res_chars = res->get_chars();
+        res_offsets.resize(input_rows_count);
+
+        size_t argument_size = arguments.size();
+        bool has_key = argument_size >= 3;
+
+        ColumnPtr argument_columns[argument_size];
+        for (size_t i = 0; i < argument_size; ++i) {
+            argument_columns[i] =
+                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+        }
+
+        const auto* url_col = check_and_get_column<ColumnString>(argument_columns[0].get());
+        const auto* part_col = check_and_get_column<ColumnString>(argument_columns[1].get());
+        const ColumnString* key_col = nullptr;
+        if (has_key) {
+            key_col = check_and_get_column<ColumnString>(argument_columns[2].get());
+        }
+
+        if (!url_col || !part_col || (has_key && !key_col)) {
+            return Status::InternalError("Not supported input arguments types");
+        }
+
+        for (size_t i = 0; i < input_rows_count; ++i) {
+            if (null_map_data[i]) {
+                StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
+                continue;
+            }
+
+            auto part = part_col->get_data_at(i);
+            StringValue p(const_cast<char*>(part.data), part.size);
+            UrlParser::UrlPart url_part = UrlParser::get_url_part(p);
+            StringValue url_key;
+            if (has_key) {
+                auto key = key_col->get_data_at(i);
+                url_key = StringValue(const_cast<char*>(key.data), key.size);
+            }
+
+            auto source = url_col->get_data_at(i);
+            StringValue url_val(const_cast<char*>(source.data), source.size);
+
+            StringValue parse_res;
+            bool success = false;
+            if (has_key) {
+                success = UrlParser::parse_url_key(url_val, url_part, url_key, &parse_res);
+            } else {
+                success = UrlParser::parse_url(url_val, url_part, &parse_res);
+            }
+
+            if (!success) {
+                // url is malformed, or url_part is invalid.
+                if (url_part == UrlParser::INVALID) {
+                    return Status::RuntimeError(fmt::format(
+                            "Invalid URL part: {}\n{}", std::string(part.data, part.size),
+                            "(Valid URL parts are 'PROTOCOL', 'HOST', 'PATH', 'REF', 'AUTHORITY', "
+                            "'FILE', 'USERINFO', 'PORT' and 'QUERY')"));
+                } else {
+                    StringOP::push_null_string(i, res_chars, res_offsets, null_map_data);
+                    continue;
+                }
+            }
+
+            StringOP::push_value_string(std::string_view(parse_res.ptr, parse_res.len), i,
+                                        res_chars, res_offsets);
+        }
+        block.get_by_position(result).column =
+                ColumnNullable::create(std::move(res), std::move(null_map));
+        return Status::OK();
+    }
+};
+
+template <typename Impl>
+class FunctionMoneyFormat : public IFunction {
+public:
+    static constexpr auto name = "money_format";
+    static FunctionPtr create() { return std::make_shared<FunctionMoneyFormat<Impl>>(); }
+    String get_name() const override { return name; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<DataTypeString>();
+    }
+    DataTypes get_variadic_argument_types_impl() const override {
+        return Impl::get_variadic_argument_types();
+    }
+    size_t get_number_of_arguments() const override { return 1; }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto res_column = ColumnString::create();
+        ColumnPtr argument_column = block.get_by_position(arguments[0]).column;
+
+        auto result_column = assert_cast<ColumnString*>(res_column.get());
+        auto data_column = assert_cast<const typename Impl::ColumnType*>(argument_column.get());
+
+        Impl::execute(context, result_column, data_column, input_rows_count);
+
+        block.replace_by_position(result, std::move(res_column));
+        return Status::OK();
+    }
+};
+
+struct MoneyFormatDoubleImpl {
+    using ColumnType = ColumnVector<Float64>;
+
+    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeFloat64>()}; }
+
+    static void execute(FunctionContext* context, ColumnString* result_column,
+                          const ColumnType* data_column, size_t input_rows_count) {
+        for (size_t i = 0; i < input_rows_count; i++) {
+            double value =
+                    MathFunctions::my_double_round(data_column->get_element(i), 2, false, false);
+            StringVal str = StringFunctions::do_money_format(context, fmt::format("{:.2f}", value));
+            result_column->insert_data(reinterpret_cast<const char*>(str.ptr), str.len);
+        }
+    }
+};
+
+struct MoneyFormatInt64Impl {
+    using ColumnType = ColumnVector<Int64>;
+
+    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt64>()}; }
+
+    static void execute(FunctionContext* context, ColumnString* result_column,
+                          const ColumnType* data_column, size_t input_rows_count) {
+        for (size_t i = 0; i < input_rows_count; i++) {
+            Int64 value = data_column->get_element(i);
+            StringVal str = StringFunctions::do_money_format<Int64, 26>(context, value);
+            result_column->insert_data(reinterpret_cast<const char*>(str.ptr), str.len);
+        }
+    }
+};
+
+struct MoneyFormatInt128Impl {
+    using ColumnType = ColumnVector<Int128>;
+
+    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeInt128>()}; }
+
+    static void execute(FunctionContext* context, ColumnString* result_column,
+                          const ColumnType* data_column, size_t input_rows_count) {
+        for (size_t i = 0; i < input_rows_count; i++) {
+            Int128 value = data_column->get_element(i);
+            StringVal str = StringFunctions::do_money_format<Int128, 52>(context, value);
+            result_column->insert_data(reinterpret_cast<const char*>(str.ptr), str.len);
+        }
+    }
+};
+
+struct MoneyFormatDecimalImpl {
+    using ColumnType = ColumnDecimal<Decimal128>;
+
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<DataTypeDecimal<Decimal128>>(27, 9)};
+    }
+
+    static void execute(FunctionContext* context, ColumnString* result_column,
+                          const ColumnType* data_column, size_t input_rows_count) {
+        for (size_t i = 0; i < input_rows_count; i++) {
+            DecimalV2Val value = DecimalV2Val(data_column->get_element(i));
+
+            DecimalV2Value rounded(0);
+            DecimalV2Value::from_decimal_val(value).round(&rounded, 2, HALF_UP);
+
+            StringVal str = StringFunctions::do_money_format<int64_t, 26>(
+                    context, rounded.int_value(), abs(rounded.frac_value() / 10000000));
+
+            result_column->insert_data(reinterpret_cast<const char*>(str.ptr), str.len);
+        }
+    }
+};
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/function_string_to_string.h b/be/src/vec/functions/function_string_to_string.h
new file mode 100644
index 0000000000..cd4ae44519
--- /dev/null
+++ b/be/src/vec/functions/function_string_to_string.h
@@ -0,0 +1,78 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionStringToString.h
+// and modified by Doris
+
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+
+namespace doris::vectorized {
+
+template <typename Impl, typename Name, bool is_injective = false>
+class FunctionStringToString : public IFunction {
+public:
+    static constexpr auto name = Name::name;
+    static constexpr bool has_variadic_argument =
+            !std::is_void_v<decltype(has_variadic_argument_types(std::declval<Impl>()))>;
+
+    static FunctionPtr create() { return std::make_shared<FunctionStringToString>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 1; }
+
+    bool get_is_injective(const Block&) override { return is_injective; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        if (!is_string_or_fixed_string(arguments[0])) {
+            LOG(FATAL) << fmt::format("Illegal type {} of argument of function {}",
+                                      arguments[0]->get_name(), get_name());
+        }
+
+        return arguments[0];
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    DataTypes get_variadic_argument_types_impl() const override {
+        if constexpr (has_variadic_argument) return Impl::get_variadic_argument_types();
+        return {};
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const ColumnPtr column = block.get_by_position(arguments[0]).column;
+        if (const ColumnString* col = check_and_get_column<ColumnString>(column.get())) {
+            auto col_res = ColumnString::create();
+            Impl::vector(col->get_chars(), col->get_offsets(), col_res->get_chars(),
+                         col_res->get_offsets());
+            block.replace_by_position(result, std::move(col_res));
+        } else {
+            return Status::RuntimeError(fmt::format(
+                    "Illegal column {} of argument of function {}",
+                    block.get_by_position(arguments[0]).column->get_name(), get_name()));
+        }
+        return Status::OK();
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp
new file mode 100644
index 0000000000..f5216e94cc
--- /dev/null
+++ b/be/src/vec/functions/function_timestamp.cpp
@@ -0,0 +1,325 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "runtime/datetime_value.h"
+#include "runtime/runtime_state.h"
+#include "udf/udf_internal.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type_date.h"
+#include "vec/data_types/data_type_date_time.h"
+#include "vec/functions/function_totype.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/runtime/vdatetime_value.h"
+
+namespace doris::vectorized {
+
+struct StrToDate {
+    static constexpr auto name = "str_to_date";
+    using ReturnType = DataTypeDateTime;
+    using ColumnType = ColumnVector<Int64>;
+
+    static void vector_vector(const ColumnString::Chars& ldata,
+                              const ColumnString::Offsets& loffsets,
+                              const ColumnString::Chars& rdata,
+                              const ColumnString::Offsets& roffsets, ColumnType::Container& res,
+                              NullMap& null_map) {
+        size_t size = loffsets.size();
+        res.resize(size);
+        for (size_t i = 0; i < size; ++i) {
+            const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
+            int l_str_size = loffsets[i] - loffsets[i - 1] - 1;
+
+            const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
+            int r_str_size = roffsets[i] - roffsets[i - 1] - 1;
+
+            auto& ts_val = *reinterpret_cast<VecDateTimeValue*>(&res[i]);
+            if (!ts_val.from_date_format_str(r_raw_str, r_str_size, l_raw_str, l_str_size)) {
+                null_map[i] = 1;
+            }
+            ts_val.to_datetime();
+        }
+    }
+};
+
+struct NameMakeDate {
+    static constexpr auto name = "makedate";
+};
+
+template <typename LeftDataType, typename RightDataType>
+struct MakeDateImpl {
+    using ResultDataType = DataTypeDateTime;
+    using LeftDataColumnType = ColumnVector<typename LeftDataType::FieldType>;
+    using RightDataColumnType = ColumnVector<typename RightDataType::FieldType>;
+    using ColumnType = ColumnVector<Int64>;
+
+    static void vector_vector(const typename LeftDataColumnType::Container& ldata,
+                              const typename RightDataColumnType::Container& rdata,
+                              ColumnType::Container& res, NullMap& null_map) {
+        auto len = ldata.size();
+        res.resize(len);
+
+        for (size_t i = 0; i < len; ++i) {
+            const auto& l = ldata[i];
+            const auto& r = rdata[i];
+            if (r <= 0 || l < 0 || l > 9999) {
+                null_map[i] = 1;
+                continue;
+            }
+
+            auto& res_val = *reinterpret_cast<VecDateTimeValue*>(&res[i]);
+
+            VecDateTimeValue ts_value {l * 10000000000 + 101000000};
+            ts_value.set_type(TIME_DATE);
+            DateTimeVal ts_val;
+            ts_value.to_datetime_val(&ts_val);
+            if (ts_val.is_null) {
+                null_map[i] = 1;
+                continue;
+            }
+
+            TimeInterval interval(DAY, r - 1, false);
+            res_val = VecDateTimeValue::from_datetime_val(ts_val);
+            if (!res_val.date_add_interval(interval, DAY)) {
+                null_map[i] = 1;
+                continue;
+            }
+            res_val.cast_to_date();
+        }
+    }
+};
+
+class FromDays : public IFunction {
+public:
+    static constexpr auto name = "from_days";
+
+    static FunctionPtr create() { return std::make_shared<FromDays>(); }
+
+    String get_name() const override { return name; }
+
+    bool use_default_implementation_for_constants() const override { return false; }
+
+    size_t get_number_of_arguments() const override { return 1; }
+
+    bool use_default_implementation_for_nulls() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return make_nullable(std::make_shared<DataTypeDate>());
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+        auto res_column = ColumnInt64::create(input_rows_count);
+        auto& res_data = assert_cast<ColumnInt64&>(*res_column).get_data();
+        ColumnPtr argument_column =
+                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+
+        auto data_col = assert_cast<const ColumnVector<Int32>*>(argument_column.get());
+        for (int i = 0; i < input_rows_count; i++) {
+            const auto& cur_data = data_col->get_data()[i];
+            auto& ts_value = *reinterpret_cast<VecDateTimeValue*>(&res_data[i]);
+            if (!ts_value.from_date_daynr(cur_data)) {
+                null_map->get_data()[i] = 1;
+                continue;
+            }
+            DateTimeVal ts_val;
+            ts_value.to_datetime_val(&ts_val);
+            ts_value = VecDateTimeValue::from_datetime_val(ts_val);
+        }
+        block.replace_by_position(
+                result, ColumnNullable::create(std::move(res_column), std::move(null_map)));
+        return Status::OK();
+    }
+};
+
+using FunctionStrToDate = FunctionBinaryStringOperateToNullType<StrToDate>;
+using FunctionMakeDate =
+        FunctionBinaryToNullType<DataTypeInt32, DataTypeInt32, MakeDateImpl, NameMakeDate>;
+
+struct UnixTimeStampImpl {
+    static Int32 trim_timestamp(Int64 timestamp) {
+        if (timestamp < 0 || timestamp > INT_MAX) {
+            timestamp = 0;
+        }
+        return (Int32)timestamp;
+    }
+
+    static DataTypes get_variadic_argument_types() { return {}; }
+
+    static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) {
+        return std::make_shared<DataTypeInt32>();
+    }
+
+    static Status execute_impl(FunctionContext* context, Block& block,
+                               const ColumnNumbers& arguments, size_t result,
+                               size_t input_rows_count) {
+        auto col_result = ColumnVector<Int32>::create();
+        col_result->insert(context->impl()->state()->timestamp_ms() / 1000);
+        block.replace_by_position(result, std::move(col_result));
+        return Status::OK();
+    }
+};
+
+struct UnixTimeStampDateImpl {
+    static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeDate>()}; }
+
+    static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) {
+        return make_nullable(std::make_shared<DataTypeInt32>());
+    }
+
+    static Status execute_impl(FunctionContext* context, Block& block,
+                               const ColumnNumbers& arguments, size_t result,
+                               size_t input_rows_count) {
+        const ColumnPtr col_source = block.get_by_position(arguments[0]).column;
+
+        auto col_result = ColumnVector<Int32>::create();
+        auto null_map = ColumnVector<UInt8>::create();
+
+        col_result->resize(input_rows_count);
+        null_map->resize(input_rows_count);
+
+        auto& col_result_data = col_result->get_data();
+        auto& null_map_data = null_map->get_data();
+
+        for (int i = 0; i < input_rows_count; i++) {
+            if (col_source->is_null_at(i)) {
+                null_map_data[i] = true;
+                continue;
+            }
+
+            StringRef source = col_source->get_data_at(i);
+            const VecDateTimeValue& ts_value =
+                    reinterpret_cast<const VecDateTimeValue&>(*source.data);
+            int64_t timestamp;
+            if (!ts_value.unix_timestamp(&timestamp, context->impl()->state()->timezone_obj())) {
+                null_map_data[i] = true;
+            } else {
+                null_map_data[i] = false;
+                col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp);
+            }
+        }
+
+        block.replace_by_position(
+                result, ColumnNullable::create(std::move(col_result), std::move(null_map)));
+
+        return Status::OK();
+    }
+};
+
+struct UnixTimeStampDatetimeImpl : public UnixTimeStampDateImpl {
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<DataTypeDateTime>()};
+    }
+};
+
+struct UnixTimeStampStrImpl {
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
+    }
+
+    static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) {
+        return make_nullable(std::make_shared<DataTypeInt32>());
+    }
+
+    static Status execute_impl(FunctionContext* context, Block& block,
+                               const ColumnNumbers& arguments, size_t result,
+                               size_t input_rows_count) {
+        const ColumnPtr col_source = block.get_by_position(arguments[0]).column;
+        const ColumnPtr col_format = block.get_by_position(arguments[1]).column;
+
+        auto col_result = ColumnVector<Int32>::create();
+        auto null_map = ColumnVector<UInt8>::create();
+
+        col_result->resize(input_rows_count);
+        null_map->resize(input_rows_count);
+
+        auto& col_result_data = col_result->get_data();
+        auto& null_map_data = null_map->get_data();
+
+        for (int i = 0; i < input_rows_count; i++) {
+            if (col_source->is_null_at(i) || col_format->is_null_at(i)) {
+                null_map_data[i] = true;
+                continue;
+            }
+
+            StringRef source = col_source->get_data_at(i);
+            StringRef fmt = col_format->get_data_at(i);
+
+            VecDateTimeValue ts_value;
+            if (!ts_value.from_date_format_str(fmt.data, fmt.size, source.data, source.size)) {
+                null_map_data[i] = true;
+                continue;
+            }
+
+            int64_t timestamp;
+            if (!ts_value.unix_timestamp(&timestamp, context->impl()->state()->timezone_obj())) {
+                null_map_data[i] = true;
+            } else {
+                null_map_data[i] = false;
+                col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp);
+            }
+        }
+
+        block.replace_by_position(
+                result, ColumnNullable::create(std::move(col_result), std::move(null_map)));
+
+        return Status::OK();
+    }
+};
+
+template <typename Impl>
+class FunctionUnixTimestamp : public IFunction {
+public:
+    static constexpr auto name = "unix_timestamp";
+    static FunctionPtr create() { return std::make_shared<FunctionUnixTimestamp<Impl>>(); }
+
+    String get_name() const override { return name; }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    size_t get_number_of_arguments() const override {
+        return get_variadic_argument_types_impl().size();
+    }
+
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        return Impl::get_return_type_impl(arguments);
+    }
+
+    DataTypes get_variadic_argument_types_impl() const override {
+        return Impl::get_variadic_argument_types();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        return Impl::execute_impl(context, block, arguments, result, input_rows_count);
+    }
+};
+
+void register_function_timestamp(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionStrToDate>();
+    factory.register_function<FunctionMakeDate>();
+    factory.register_function<FromDays>();
+
+    factory.register_function<FunctionUnixTimestamp<UnixTimeStampImpl>>();
+    factory.register_function<FunctionUnixTimestamp<UnixTimeStampDateImpl>>();
+    factory.register_function<FunctionUnixTimestamp<UnixTimeStampDatetimeImpl>>();
+    factory.register_function<FunctionUnixTimestamp<UnixTimeStampStrImpl>>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_totype.h b/be/src/vec/functions/function_totype.h
new file mode 100644
index 0000000000..72e74b31b3
--- /dev/null
+++ b/be/src/vec/functions/function_totype.h
@@ -0,0 +1,428 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <fmt/format.h>
+
+#include "vec/columns/column_complex.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_bitmap.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/cast_type_to_either.h"
+#include "vec/functions/function.h"
+#include "vec/utils/util.hpp"
+
+namespace doris::vectorized {
+
+// support string->complex/primary
+// support primary/complex->primary/complex
+// support primary -> string
+template <typename Impl, typename Name>
+class FunctionUnaryToType : public IFunction {
+public:
+    static constexpr auto name = Name::name;
+    static constexpr bool has_variadic_argument =
+            !std::is_void_v<decltype(has_variadic_argument_types(std::declval<Impl>()))>;
+
+    static FunctionPtr create() { return std::make_shared<FunctionUnaryToType>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 1; }
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<typename Impl::ReturnType>();
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        return execute_impl<typename Impl::ReturnType>(block, arguments, result, input_rows_count);
+    }
+
+    DataTypes get_variadic_argument_types_impl() const override {
+        if constexpr (has_variadic_argument) return Impl::get_variadic_argument_types();
+        return {};
+    }
+
+private:
+    // handle result == DataTypeString
+    template <typename T, std::enable_if_t<std::is_same_v<T, DataTypeString>, T>* = nullptr>
+    Status execute_impl(Block& block, const ColumnNumbers& arguments, size_t result,
+                        size_t input_rows_count) {
+        const ColumnPtr column = block.get_by_position(arguments[0]).column;
+        if constexpr (std::is_integer(Impl::TYPE_INDEX)) {
+            if (auto* col = check_and_get_column<ColumnVector<typename Impl::Type>>(column.get())) {
+                auto col_res = Impl::ReturnColumnType::create();
+                RETURN_IF_ERROR(Impl::vector(col->get_data(), col_res->get_chars(),
+                                             col_res->get_offsets()));
+                block.replace_by_position(result, std::move(col_res));
+                return Status::OK();
+            }
+        } else if constexpr (is_complex_v<typename Impl::Type>) {
+            if (const auto* col = check_and_get_column<ColumnComplexType<typename Impl::Type>>(
+                        column.get())) {
+                auto col_res = Impl::ReturnColumnType::create();
+                RETURN_IF_ERROR(Impl::vector(col->get_data(), col_res->get_chars(),
+                                             col_res->get_offsets()));
+                block.replace_by_position(result, std::move(col_res));
+                return Status::OK();
+            }
+        }
+
+        return Status::RuntimeError(
+                fmt::format("Illegal column {} of argument of function {}",
+                            block.get_by_position(arguments[0]).column->get_name(), get_name()));
+    }
+    template <typename T, std::enable_if_t<!std::is_same_v<T, DataTypeString>, T>* = nullptr>
+    Status execute_impl(Block& block, const ColumnNumbers& arguments, size_t result,
+                        size_t input_rows_count) {
+        const ColumnPtr column = block.get_by_position(arguments[0]).column;
+        if constexpr (Impl::TYPE_INDEX == TypeIndex::String) {
+            if (const ColumnString* col = check_and_get_column<ColumnString>(column.get())) {
+                auto col_res = Impl::ReturnColumnType::create();
+                RETURN_IF_ERROR(
+                        Impl::vector(col->get_chars(), col->get_offsets(), col_res->get_data()));
+                block.replace_by_position(result, std::move(col_res));
+                return Status::OK();
+            }
+        } else if constexpr (std::is_integer(Impl::TYPE_INDEX)) {
+            if (const auto* col =
+                        check_and_get_column<ColumnVector<typename Impl::Type>>(column.get())) {
+                auto col_res = Impl::ReturnColumnType::create();
+                RETURN_IF_ERROR(Impl::vector(col->get_data(), col_res->get_data()));
+                block.replace_by_position(result, std::move(col_res));
+                return Status::OK();
+            }
+        } else if constexpr (is_complex_v<typename Impl::Type>) {
+            if (const auto* col = check_and_get_column<ColumnComplexType<typename Impl::Type>>(
+                        column.get())) {
+                auto col_res = Impl::ReturnColumnType::create();
+                RETURN_IF_ERROR(Impl::vector(col->get_data(), col_res->get_data()));
+                block.replace_by_position(result, std::move(col_res));
+                return Status::OK();
+            }
+        }
+        return Status::RuntimeError(
+                fmt::format("Illegal column {} of argument of function {}",
+                            block.get_by_position(arguments[0]).column->get_name(), get_name()));
+    }
+};
+
+template <typename LeftDataType, typename RightDataType,
+          template <typename, typename> typename Impl, typename Name>
+class FunctionBinaryToType : public IFunction {
+public:
+    static constexpr auto name = Name::name;
+    static FunctionPtr create() { return std::make_shared<FunctionBinaryToType>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 2; }
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        using ResultDataType = typename Impl<LeftDataType, RightDataType>::ResultDataType;
+        return std::make_shared<ResultDataType>();
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t /*input_rows_count*/) override {
+        DCHECK_EQ(arguments.size(), 2);
+        const auto& left = block.get_by_position(arguments[0]);
+        auto lcol = left.column->convert_to_full_column_if_const();
+        const auto& right = block.get_by_position(arguments[1]);
+        auto rcol = right.column->convert_to_full_column_if_const();
+
+        using ResultDataType = typename Impl<LeftDataType, RightDataType>::ResultDataType;
+
+        using T0 = typename LeftDataType::FieldType;
+        using T1 = typename RightDataType::FieldType;
+        using ResultType = typename ResultDataType::FieldType;
+
+        using ColVecLeft =
+                std::conditional_t<is_complex_v<T0>, ColumnComplexType<T0>, ColumnVector<T0>>;
+        using ColVecRight =
+                std::conditional_t<is_complex_v<T1>, ColumnComplexType<T1>, ColumnVector<T1>>;
+
+        using ColVecResult =
+                std::conditional_t<is_complex_v<ResultType>, ColumnComplexType<ResultType>,
+                                   ColumnVector<ResultType>>;
+
+        typename ColVecResult::MutablePtr col_res = nullptr;
+
+        col_res = ColVecResult::create();
+        auto& vec_res = col_res->get_data();
+        vec_res.resize(block.rows());
+
+        if (auto col_left = check_and_get_column<ColVecLeft>(lcol.get())) {
+            if (auto col_right = check_and_get_column<ColVecRight>(rcol.get())) {
+                Impl<LeftDataType, RightDataType>::vector_vector(col_left->get_data(),
+                                                                 col_right->get_data(), vec_res);
+                block.replace_by_position(result, std::move(col_res));
+                return Status::OK();
+            }
+        }
+        return Status::RuntimeError(fmt::format("unimplements function {}", get_name()));
+    }
+};
+
+template <template <typename, typename> typename Impl, typename Name>
+class FunctionBinaryToType<DataTypeString, DataTypeString, Impl, Name> : public IFunction {
+public:
+    using LeftDataType = DataTypeString;
+    using RightDataType = DataTypeString;
+    using ResultDataType = typename Impl<LeftDataType, RightDataType>::ResultDataType;
+
+    using ColVecLeft = ColumnString;
+    using ColVecRight = ColumnString;
+
+    static constexpr auto name = Name::name;
+    static FunctionPtr create() { return std::make_shared<FunctionBinaryToType>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 2; }
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<ResultDataType>();
+    }
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t /*input_rows_count*/) override {
+        const auto& left = block.get_by_position(arguments[0]);
+        const auto& right = block.get_by_position(arguments[1]);
+        return execute_inner_impl<ResultDataType>(left, right, block, arguments, result);
+    }
+
+private:
+    template <typename ReturnDataType,
+              std::enable_if_t<!std::is_same_v<ResultDataType, DataTypeString>, ReturnDataType>* =
+                      nullptr>
+    Status execute_inner_impl(const ColumnWithTypeAndName& left, const ColumnWithTypeAndName& right,
+                              Block& block, const ColumnNumbers& arguments, size_t result) {
+        auto lcol = left.column->convert_to_full_column_if_const();
+        auto rcol = right.column->convert_to_full_column_if_const();
+
+        using ResultType = typename ResultDataType::FieldType;
+        using ColVecResult = ColumnVector<ResultType>;
+        typename ColVecResult::MutablePtr col_res = ColVecResult::create();
+
+        auto& vec_res = col_res->get_data();
+        vec_res.resize(block.rows());
+
+        if (auto col_left = check_and_get_column<ColVecLeft>(lcol.get())) {
+            if (auto col_right = check_and_get_column<ColVecRight>(rcol.get())) {
+                Impl<LeftDataType, RightDataType>::vector_vector(
+                        col_left->get_chars(), col_left->get_offsets(), col_right->get_chars(),
+                        col_right->get_offsets(), vec_res);
+                block.replace_by_position(result, std::move(col_res));
+                return Status::OK();
+            }
+        }
+        return Status::RuntimeError(fmt::format("unimplements function {}", get_name()));
+    }
+
+    template <typename ReturnDataType,
+              std::enable_if_t<std::is_same_v<ResultDataType, DataTypeString>, ReturnDataType>* =
+                      nullptr>
+    Status execute_inner_impl(const ColumnWithTypeAndName& left, const ColumnWithTypeAndName& right,
+                              Block& block, const ColumnNumbers& arguments, size_t result) {
+        auto lcol = left.column->convert_to_full_column_if_const();
+        auto rcol = right.column->convert_to_full_column_if_const();
+
+        using ColVecResult = ColumnString;
+        typename ColVecResult::MutablePtr col_res = ColVecResult::create();
+        if (auto col_left = check_and_get_column<ColVecLeft>(lcol.get())) {
+            if (auto col_right = check_and_get_column<ColVecRight>(rcol.get())) {
+                Impl<LeftDataType, RightDataType>::vector_vector(
+                        col_left->get_chars(), col_left->get_offsets(), col_right->get_chars(),
+                        col_right->get_offsets(), col_res->get_chars(), col_res->get_offsets());
+                block.replace_by_position(result, std::move(col_res));
+                return Status::OK();
+            }
+        }
+        return Status::RuntimeError(fmt::format("unimplements function {}", get_name()));
+    }
+};
+
+// func(type,type) -> nullable(type)
+template <typename LeftDataType, typename RightDataType,
+          template <typename, typename> typename Impl, typename Name>
+class FunctionBinaryToNullType : public IFunction {
+public:
+    static constexpr auto name = Name::name;
+    static FunctionPtr create() { return std::make_shared<FunctionBinaryToNullType>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 2; }
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        using ResultDataType = typename Impl<LeftDataType, RightDataType>::ResultDataType;
+        return make_nullable(std::make_shared<ResultDataType>());
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+        DCHECK_EQ(arguments.size(), 2);
+        ColumnPtr argument_columns[2];
+        for (int i = 0; i < 2; ++i) {
+            argument_columns[i] =
+                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+            if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) {
+                argument_columns[i] = nullable->get_nested_column_ptr();
+                VectorizedUtils::update_null_map(null_map->get_data(),
+                                                 nullable->get_null_map_data());
+            }
+        }
+
+        using ResultDataType = typename Impl<LeftDataType, RightDataType>::ResultDataType;
+
+        using T0 = typename LeftDataType::FieldType;
+        using T1 = typename RightDataType::FieldType;
+        using ResultType = typename ResultDataType::FieldType;
+
+        using ColVecLeft =
+                std::conditional_t<is_complex_v<T0>, ColumnComplexType<T0>, ColumnVector<T0>>;
+        using ColVecRight =
+                std::conditional_t<is_complex_v<T1>, ColumnComplexType<T1>, ColumnVector<T1>>;
+
+        using ColVecResult =
+                std::conditional_t<is_complex_v<ResultType>, ColumnComplexType<ResultType>,
+                                   ColumnVector<ResultType>>;
+
+        typename ColVecResult::MutablePtr col_res = nullptr;
+
+        col_res = ColVecResult::create();
+        auto& vec_res = col_res->get_data();
+        vec_res.resize(block.rows());
+
+        if (auto col_left = check_and_get_column<ColVecLeft>(argument_columns[0].get())) {
+            if (auto col_right = check_and_get_column<ColVecRight>(argument_columns[1].get())) {
+                Impl<LeftDataType, RightDataType>::vector_vector(
+                        col_left->get_data(), col_right->get_data(), vec_res, null_map->get_data());
+                block.get_by_position(result).column =
+                        ColumnNullable::create(std::move(col_res), std::move(null_map));
+                return Status::OK();
+            }
+        }
+        return Status::RuntimeError(fmt::format("unimplements function {}", get_name()));
+    }
+};
+
+// func(string,string) -> nullable(type)
+template <typename Impl>
+class FunctionBinaryStringOperateToNullType : public IFunction {
+public:
+    static constexpr auto name = Impl::name;
+    static FunctionPtr create() {
+        return std::make_shared<FunctionBinaryStringOperateToNullType>();
+    }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 2; }
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return make_nullable(std::make_shared<typename Impl::ReturnType>());
+    }
+    bool use_default_implementation_for_constants() const override { return true; }
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+        ColumnPtr argument_columns[2];
+
+        // focus convert const to full column to simply execute logic
+        // handle
+        for (int i = 0; i < 2; ++i) {
+            argument_columns[i] =
+                    block.get_by_position(arguments[i]).column->convert_to_full_column_if_const();
+            if (auto* nullable = check_and_get_column<ColumnNullable>(*argument_columns[i])) {
+                argument_columns[i] = nullable->get_nested_column_ptr();
+                VectorizedUtils::update_null_map(null_map->get_data(),
+                                                 nullable->get_null_map_data());
+            }
+        }
+
+        auto res = Impl::ColumnType::create();
+
+        auto specific_str_column = assert_cast<const ColumnString*>(argument_columns[0].get());
+        auto specific_char_column = assert_cast<const ColumnString*>(argument_columns[1].get());
+
+        auto& ldata = specific_str_column->get_chars();
+        auto& loffsets = specific_str_column->get_offsets();
+
+        auto& rdata = specific_char_column->get_chars();
+        auto& roffsets = specific_char_column->get_offsets();
+
+        // execute Impl
+        if constexpr (std::is_same_v<typename Impl::ReturnType, DataTypeString>) {
+            auto& res_data = res->get_chars();
+            auto& res_offsets = res->get_offsets();
+            Impl::vector_vector(ldata, loffsets, rdata, roffsets, res_data, res_offsets,
+                                null_map->get_data());
+        } else {
+            Impl::vector_vector(ldata, loffsets, rdata, roffsets, res->get_data(),
+                                null_map->get_data());
+        }
+
+        block.get_by_position(result).column =
+                ColumnNullable::create(std::move(res), std::move(null_map));
+        return Status::OK();
+    }
+};
+
+// func(string) -> nullable(type)
+template <typename Impl>
+class FunctionStringOperateToNullType : public IFunction {
+public:
+    static constexpr auto name = Impl::name;
+
+    static FunctionPtr create() { return std::make_shared<FunctionStringOperateToNullType>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 1; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return make_nullable(std::make_shared<typename Impl::ReturnType>());
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    bool use_default_implementation_for_nulls() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto null_map = ColumnUInt8::create(input_rows_count, 0);
+
+        auto col_ptr =
+                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+
+        auto res = Impl::ColumnType::create();
+        if (const ColumnString* col = check_and_get_column<ColumnString>(col_ptr.get())) {
+            auto col_res = Impl::ColumnType::create();
+            Impl::vector(col->get_chars(), col->get_offsets(), col_res->get_chars(),
+                         col_res->get_offsets(), null_map->get_data());
+            block.replace_by_position(
+                    result, ColumnNullable::create(std::move(col_res), std::move(null_map)));
+        } else {
+            return Status::RuntimeError(fmt::format(
+                    "Illegal column {} of argument of function {}",
+                    block.get_by_position(arguments[0]).column->get_name(), get_name()));
+        }
+        return Status::OK();
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_unary_arithmetic.h b/be/src/vec/functions/function_unary_arithmetic.h
new file mode 100644
index 0000000000..ec31b20fd0
--- /dev/null
+++ b/be/src/vec/functions/function_unary_arithmetic.h
@@ -0,0 +1,157 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionUnaryArithmetic.h
+// and modified by Doris
+
+#pragma once
+
+#include "vec/columns/column_decimal.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/cast_type_to_either.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+
+namespace doris::vectorized {
+
+template <typename A, typename Op>
+struct UnaryOperationImpl {
+    using ResultType = typename Op::ResultType;
+    using ColVecA = std::conditional_t<IsDecimalNumber<A>, ColumnDecimal<A>, ColumnVector<A>>;
+    using ColVecC = std::conditional_t<IsDecimalNumber<ResultType>, ColumnDecimal<ResultType>,
+                                       ColumnVector<ResultType>>;
+    using ArrayA = typename ColVecA::Container;
+    using ArrayC = typename ColVecC::Container;
+
+    static void NO_INLINE vector(const ArrayA& a, ArrayC& c) {
+        size_t size = a.size();
+        for (size_t i = 0; i < size; ++i) c[i] = Op::apply(a[i]);
+    }
+
+    static void constant(A a, ResultType& c) { c = Op::apply(a); }
+};
+
+template <typename FunctionName>
+struct FunctionUnaryArithmeticMonotonicity;
+
+template <typename>
+struct AbsImpl;
+template <typename>
+struct NegativeImpl;
+
+/// Used to indicate undefined operation
+struct InvalidType;
+
+template <template <typename> class Op, typename Name, bool is_injective>
+class FunctionUnaryArithmetic : public IFunction {
+    static constexpr bool allow_decimal =
+            std::is_same_v<Op<Int8>, NegativeImpl<Int8>> || std::is_same_v<Op<Int8>, AbsImpl<Int8>>;
+
+    template <typename F>
+    static bool cast_type(const IDataType* type, F&& f) {
+        return cast_type_to_either<
+                DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64, DataTypeInt8,
+                DataTypeInt16, DataTypeInt32, DataTypeInt64, DataTypeInt128, DataTypeFloat32, DataTypeFloat64,
+                //                                            DataTypeDecimal<Decimal32>,
+                //                                            DataTypeDecimal<Decimal64>,
+                DataTypeDecimal<Decimal128>>(type, std::forward<F>(f));
+    }
+
+public:
+    static constexpr auto name = Name::name;
+    static FunctionPtr create() { return std::make_shared<FunctionUnaryArithmetic>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 1; }
+    bool get_is_injective(const Block&) override { return is_injective; }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        DataTypePtr result;
+        bool valid = cast_type(arguments[0].get(), [&](const auto& type) {
+            using DataType = std::decay_t<decltype(type)>;
+            using T0 = typename DataType::FieldType;
+
+            if constexpr (IsDataTypeDecimal<DataType>) {
+                if constexpr (!allow_decimal) return false;
+                result = std::make_shared<DataType>(type.get_precision(), type.get_scale());
+            } else {
+                result = std::make_shared<DataTypeNumber<typename Op<T0>::ResultType>>();
+            }
+            return true;
+        });
+        if (!valid) {
+            LOG(FATAL) << fmt::format("Illegal type {} of argument of function {}",
+                                      arguments[0]->get_name(), get_name());
+        }
+        return result;
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        bool valid =
+                cast_type(block.get_by_position(arguments[0]).type.get(), [&](const auto& type) {
+                    using DataType = std::decay_t<decltype(type)>;
+                    using T0 = typename DataType::FieldType;
+
+                    if constexpr (IsDataTypeDecimal<DataType>) {
+                        if constexpr (allow_decimal) {
+                            if (auto col = check_and_get_column<ColumnDecimal<T0>>(
+                                        block.get_by_position(arguments[0]).column.get())) {
+                                auto col_res = ColumnDecimal<typename Op<T0>::ResultType>::create(
+                                        0, type.get_scale());
+                                auto& vec_res = col_res->get_data();
+                                vec_res.resize(col->get_data().size());
+                                UnaryOperationImpl<T0, Op<T0>>::vector(col->get_data(), vec_res);
+                                block.replace_by_position(result, std::move(col_res));
+                                return true;
+                            }
+                        }
+                    } else {
+                        if (auto col = check_and_get_column<ColumnVector<T0>>(
+                                    block.get_by_position(arguments[0]).column.get())) {
+                            auto col_res = ColumnVector<typename Op<T0>::ResultType>::create();
+                            auto& vec_res = col_res->get_data();
+                            vec_res.resize(col->get_data().size());
+                            UnaryOperationImpl<T0, Op<T0>>::vector(col->get_data(), vec_res);
+                            block.replace_by_position(result, std::move(col_res));
+                            return true;
+                        }
+                    }
+
+                    return false;
+                });
+        if (!valid) {
+            return Status::RuntimeError(
+                    fmt::format("{}'s argument does not match the expected data type", get_name()));
+        }
+        return Status::OK();
+    }
+
+    bool has_information_about_monotonicity() const override { return false; }
+};
+
+struct PositiveMonotonicity {
+    static bool has() { return true; }
+    static IFunction::Monotonicity get(const Field&, const Field&) { return {true}; }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/function_utility.cpp b/be/src/vec/functions/function_utility.cpp
new file mode 100644
index 0000000000..6c7da89a3a
--- /dev/null
+++ b/be/src/vec/functions/function_utility.cpp
@@ -0,0 +1,118 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/monotime.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+class FunctionSleep : public IFunction {
+public:
+    static constexpr auto name = "sleep";
+    static FunctionPtr create() { return std::make_shared<FunctionSleep>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 1; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        if (arguments[0].get()->is_nullable()) {
+            return make_nullable(std::make_shared<DataTypeUInt8>());
+        }
+        return std::make_shared<DataTypeUInt8>();
+    }
+
+    bool use_default_implementation_for_constants() const override { return true; }
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        ColumnPtr argument_column =
+                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+
+        auto res_column = ColumnUInt8::create();
+
+        if (auto* nullable_column = check_and_get_column<ColumnNullable>(*argument_column)) {
+            auto null_map_column = ColumnUInt8::create();
+
+            auto nested_column = nullable_column->get_nested_column_ptr();
+            auto data_column = assert_cast<const ColumnVector<Int32>*>(nested_column.get());
+
+            for (int i = 0; i < input_rows_count; i++) {
+                if (nullable_column->is_null_at(i)) {
+                    res_column->insert(0);
+                    null_map_column->insert(1);
+                } else {
+                    int seconds = data_column->get_data()[i];
+                    SleepFor(MonoDelta::FromSeconds(seconds));
+                    res_column->insert(1);
+                    null_map_column->insert(0);
+                }
+            }
+
+            block.replace_by_position(result, ColumnNullable::create(std::move(res_column),
+                                                                     std::move(null_map_column)));
+        } else {
+            auto data_column = assert_cast<const ColumnVector<Int32>*>(argument_column.get());
+
+            for (int i = 0; i < input_rows_count; i++) {
+                int seconds = data_column->get_element(i);
+                SleepFor(MonoDelta::FromSeconds(seconds));
+                res_column->insert(1);
+            }
+
+            block.replace_by_position(result, std::move(res_column));
+        }
+        return Status::OK();
+    }
+};
+
+class FunctionVersion : public IFunction {
+public:
+    static constexpr auto name = "version";
+
+    static const std::string version;
+
+    static FunctionPtr create() { return std::make_shared<FunctionVersion>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        return std::make_shared<DataTypeString>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto res_column = ColumnString::create();
+        res_column->insert_data(version.c_str(), version.length());
+        block.replace_by_position(result, std::move(res_column));
+        return Status::OK();
+    }
+};
+
+const std::string FunctionVersion::version = "5.1.0";
+
+void register_function_utility(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionSleep>();
+    factory.register_function<FunctionVersion>();
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/function_variadic_arguments.h b/be/src/vec/functions/function_variadic_arguments.h
new file mode 100644
index 0000000000..c993de7cff
--- /dev/null
+++ b/be/src/vec/functions/function_variadic_arguments.h
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <utility>
+
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/bit_cast.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_decimal.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+
+namespace doris::vectorized {
+
+template <typename ToDataType, typename Impl>
+class FunctionVariadicArgumentsBase : public IFunction {
+public:
+    static constexpr auto name = Impl::name;
+    String get_name() const override { return name; }
+    static FunctionPtr create() { return std::make_shared<FunctionVariadicArgumentsBase>(); }
+    bool is_variadic() const override { return true; }
+    size_t get_number_of_arguments() const override { return 0; }
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
+        DataTypePtr res;
+        if constexpr (IsDataTypeDecimal<ToDataType>) {
+            res = create_decimal(27, 9);
+            if (!res) {
+                LOG(FATAL) << "Someting wrong with toDecimalNNOrZero() or toDecimalNNOrNull()";
+            }
+        } else res = std::make_shared<ToDataType>();
+        return res;
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        ToDataType to_type;
+        auto column = to_type.create_column();
+        column->reserve(input_rows_count);
+
+        if (arguments.empty()) {
+            RETURN_IF_ERROR(Impl::empty_apply(column->assume_mutable_ref(), input_rows_count));
+        } else {
+            const ColumnWithTypeAndName& first_col = block.get_by_position(arguments[0]);
+            RETURN_IF_ERROR(
+                    Impl::first_apply(first_col.type.get(), first_col.column.get(),
+                                      input_rows_count, column->assume_mutable_ref()));
+
+            for (size_t i = 1; i < arguments.size(); ++i) {
+                const ColumnWithTypeAndName& col = block.get_by_position(arguments[i]);
+                RETURN_IF_ERROR(
+                        Impl::combine_apply(col.type.get(), col.column.get(),
+                                            input_rows_count, column->assume_mutable_ref()));
+            }
+        }
+
+        block.get_by_position(result).column = std::move(column);
+        return Status::OK();
+    }
+};
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/functions_comparison.h b/be/src/vec/functions/functions_comparison.h
new file mode 100644
index 0000000000..99070c29ec
--- /dev/null
+++ b/be/src/vec/functions/functions_comparison.h
@@ -0,0 +1,475 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsComparison.h
+// and modified by Doris
+
+#pragma once
+
+#include <limits>
+#include <type_traits>
+
+#include "common/logging.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_decimal.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/assert_cast.h"
+#include "vec/common/memcmp_small.h"
+#include "vec/core/accurate_comparison.h"
+#include "vec/core/decimal_comparison.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/data_types/get_least_supertype.h"
+#include "vec/functions/function.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/functions/functions_logical.h"
+#include "vec/runtime/vdatetime_value.h"
+namespace doris::vectorized {
+
+/** Comparison functions: ==, !=, <, >, <=, >=.
+  * The comparison functions always return 0 or 1 (UInt8).
+  *
+  * You can compare the following types:
+  * - numbers and decimals;
+  * - strings and fixed strings;
+  * - dates;
+  * - datetimes;
+  *   within each group, but not from different groups;
+  * - tuples (lexicographic comparison).
+  *
+  * Exception: You can compare the date and datetime with a constant string. Example: EventDate = '2015-01-01'.
+  */
+
+template <typename A, typename B, typename Op>
+struct NumComparisonImpl {
+    /// If you don't specify NO_INLINE, the compiler will inline this function, but we don't need this as this function contains tight loop inside.
+    static void NO_INLINE vector_vector(const PaddedPODArray<A>& a, const PaddedPODArray<B>& b,
+                                        PaddedPODArray<UInt8>& c) {
+        /** GCC 4.8.2 vectorizes a loop only if it is written in this form.
+          * In this case, if you loop through the array index (the code will look simpler),
+          *  the loop will not be vectorized.
+          */
+
+        size_t size = a.size();
+        const A* a_pos = a.data();
+        const B* b_pos = b.data();
+        UInt8* c_pos = c.data();
+        const A* a_end = a_pos + size;
+
+        while (a_pos < a_end) {
+            *c_pos = Op::apply(*a_pos, *b_pos);
+            ++a_pos;
+            ++b_pos;
+            ++c_pos;
+        }
+    }
+
+    static void NO_INLINE vector_constant(const PaddedPODArray<A>& a, B b,
+                                          PaddedPODArray<UInt8>& c) {
+        size_t size = a.size();
+        const A* a_pos = a.data();
+        UInt8* c_pos = c.data();
+        const A* a_end = a_pos + size;
+
+        while (a_pos < a_end) {
+            *c_pos = Op::apply(*a_pos, b);
+            ++a_pos;
+            ++c_pos;
+        }
+    }
+
+    static void constant_vector(A a, const PaddedPODArray<B>& b, PaddedPODArray<UInt8>& c) {
+        NumComparisonImpl<B, A, typename Op::SymmetricOp>::vector_constant(b, a, c);
+    }
+
+    static void constant_constant(A a, B b, UInt8& c) { c = Op::apply(a, b); }
+};
+
+/// Generic version, implemented for columns of same type.
+template <typename Op>
+struct GenericComparisonImpl {
+    static void NO_INLINE vector_vector(const IColumn& a, const IColumn& b,
+                                        PaddedPODArray<UInt8>& c) {
+        for (size_t i = 0, size = a.size(); i < size; ++i)
+            c[i] = Op::apply(a.compare_at(i, i, b, 1), 0);
+    }
+
+    static void NO_INLINE vector_constant(const IColumn& a, const IColumn& b,
+                                          PaddedPODArray<UInt8>& c) {
+        auto b_materialized = b.clone_resized(1)->convert_to_full_column_if_const();
+        for (size_t i = 0, size = a.size(); i < size; ++i)
+            c[i] = Op::apply(a.compare_at(i, 0, *b_materialized, 1), 0);
+    }
+
+    static void constant_vector(const IColumn& a, const IColumn& b, PaddedPODArray<UInt8>& c) {
+        GenericComparisonImpl<typename Op::SymmetricOp>::vector_constant(b, a, c);
+    }
+
+    static void constant_constant(const IColumn& a, const IColumn& b, UInt8& c) {
+        c = Op::apply(a.compare_at(0, 0, b, 1), 0);
+    }
+};
+
+#if USE_EMBEDDED_COMPILER
+
+template <template <typename, typename> typename Op>
+struct CompileOp;
+
+template <>
+struct CompileOp<EqualsOp> {
+    static llvm::Value* compile(llvm::IRBuilder<>& b, llvm::Value* x, llvm::Value* y,
+                                bool /*is_signed*/) {
+        return x->getType()->is_integer_ty() ? b.CreateICmpEQ(x, y)
+                                             : b.CreateFCmpOEQ(x, y); /// qNaNs always compare false
+    }
+};
+
+template <>
+struct CompileOp<NotEqualsOp> {
+    static llvm::Value* compile(llvm::IRBuilder<>& b, llvm::Value* x, llvm::Value* y,
+                                bool /*is_signed*/) {
+        return x->getType()->is_integer_ty() ? b.CreateICmpNE(x, y) : b.CreateFCmpONE(x, y);
+    }
+};
+
+template <>
+struct CompileOp<LessOp> {
+    static llvm::Value* compile(llvm::IRBuilder<>& b, llvm::Value* x, llvm::Value* y,
+                                bool is_signed) {
+        return x->getType()->is_integer_ty()
+                       ? (is_signed ? b.CreateICmpSLT(x, y) : b.CreateICmpULT(x, y))
+                       : b.CreateFCmpOLT(x, y);
+    }
+};
+
+template <>
+struct CompileOp<GreaterOp> {
+    static llvm::Value* compile(llvm::IRBuilder<>& b, llvm::Value* x, llvm::Value* y,
+                                bool is_signed) {
+        return x->getType()->is_integer_ty()
+                       ? (is_signed ? b.CreateICmpSGT(x, y) : b.CreateICmpUGT(x, y))
+                       : b.CreateFCmpOGT(x, y);
+    }
+};
+
+template <>
+struct CompileOp<LessOrEqualsOp> {
+    static llvm::Value* compile(llvm::IRBuilder<>& b, llvm::Value* x, llvm::Value* y,
+                                bool is_signed) {
+        return x->getType()->is_integer_ty()
+                       ? (is_signed ? b.CreateICmpSLE(x, y) : b.CreateICmpULE(x, y))
+                       : b.CreateFCmpOLE(x, y);
+    }
+};
+
+template <>
+struct CompileOp<GreaterOrEqualsOp> {
+    static llvm::Value* compile(llvm::IRBuilder<>& b, llvm::Value* x, llvm::Value* y,
+                                bool is_signed) {
+        return x->getType()->is_integer_ty()
+                       ? (is_signed ? b.CreateICmpSGE(x, y) : b.CreateICmpUGE(x, y))
+                       : b.CreateFCmpOGE(x, y);
+    }
+};
+
+#endif
+
+struct NameEquals {
+    static constexpr auto name = "eq";
+};
+struct NameNotEquals {
+    static constexpr auto name = "ne";
+};
+struct NameLess {
+    static constexpr auto name = "lt";
+};
+struct NameGreater {
+    static constexpr auto name = "gt";
+};
+struct NameLessOrEquals {
+    static constexpr auto name = "le";
+};
+struct NameGreaterOrEquals {
+    static constexpr auto name = "ge";
+};
+
+template <template <typename, typename> class Op, typename Name>
+class FunctionComparison : public IFunction {
+public:
+    static constexpr auto name = Name::name;
+    static FunctionPtr create() { return std::make_shared<FunctionComparison>(); }
+
+    FunctionComparison() = default;
+
+private:
+    template <typename T0, typename T1>
+    bool execute_num_right_type(Block& block, size_t result, const ColumnVector<T0>* col_left,
+                                const IColumn* col_right_untyped) {
+        if (const ColumnVector<T1>* col_right =
+                    check_and_get_column<ColumnVector<T1>>(col_right_untyped)) {
+            auto col_res = ColumnUInt8::create();
+
+            ColumnUInt8::Container& vec_res = col_res->get_data();
+            vec_res.resize(col_left->get_data().size());
+            NumComparisonImpl<T0, T1, Op<T0, T1>>::vector_vector(col_left->get_data(),
+                                                                 col_right->get_data(), vec_res);
+
+            block.replace_by_position(result, std::move(col_res));
+            return true;
+        } else if (auto col_right_const =
+                           check_and_get_column_const<ColumnVector<T1>>(col_right_untyped)) {
+            auto col_res = ColumnUInt8::create();
+
+            ColumnUInt8::Container& vec_res = col_res->get_data();
+            vec_res.resize(col_left->size());
+            NumComparisonImpl<T0, T1, Op<T0, T1>>::vector_constant(
+                    col_left->get_data(), col_right_const->template get_value<T1>(), vec_res);
+
+            block.replace_by_position(result, std::move(col_res));
+            return true;
+        }
+
+        return false;
+    }
+
+    template <typename T0, typename T1>
+    bool execute_num_const_right_type(Block& block, size_t result, const ColumnConst* col_left,
+                                      const IColumn* col_right_untyped) {
+        if (const ColumnVector<T1>* col_right =
+                    check_and_get_column<ColumnVector<T1>>(col_right_untyped)) {
+            auto col_res = ColumnUInt8::create();
+
+            ColumnUInt8::Container& vec_res = col_res->get_data();
+            vec_res.resize(col_left->size());
+            NumComparisonImpl<T0, T1, Op<T0, T1>>::constant_vector(
+                    col_left->template get_value<T0>(), col_right->get_data(), vec_res);
+
+            block.replace_by_position(result, std::move(col_res));
+            return true;
+        } else if (auto col_right_const =
+                           check_and_get_column_const<ColumnVector<T1>>(col_right_untyped)) {
+            UInt8 res = 0;
+            NumComparisonImpl<T0, T1, Op<T0, T1>>::constant_constant(
+                    col_left->template get_value<T0>(), col_right_const->template get_value<T1>(),
+                    res);
+
+            block.replace_by_position(
+                    result, DataTypeUInt8().create_column_const(col_left->size(), to_field(res)));
+            return true;
+        }
+
+        return false;
+    }
+
+    template <typename T0>
+    bool execute_num_left_type(Block& block, size_t result, const IColumn* col_left_untyped,
+                            const IColumn* col_right_untyped) {
+        if (const ColumnVector<T0>* col_left =
+                    check_and_get_column<ColumnVector<T0>>(col_left_untyped)) {
+            if (execute_num_right_type<T0, UInt8>(block, result, col_left, col_right_untyped) ||
+                execute_num_right_type<T0, UInt16>(block, result, col_left, col_right_untyped) ||
+                execute_num_right_type<T0, UInt32>(block, result, col_left, col_right_untyped) ||
+                execute_num_right_type<T0, UInt64>(block, result, col_left, col_right_untyped) ||
+                execute_num_right_type<T0, Int8>(block, result, col_left, col_right_untyped) ||
+                execute_num_right_type<T0, Int16>(block, result, col_left, col_right_untyped) ||
+                execute_num_right_type<T0, Int32>(block, result, col_left, col_right_untyped) ||
+                execute_num_right_type<T0, Int64>(block, result, col_left, col_right_untyped) ||
+                execute_num_right_type<T0, Int128>(block, result, col_left, col_right_untyped) ||
+                execute_num_right_type<T0, Float32>(block, result, col_left, col_right_untyped) ||
+                execute_num_right_type<T0, Float64>(block, result, col_left, col_right_untyped))
+                return true;
+            else {
+                LOG(FATAL) << "Illegal column " << col_right_untyped->get_name()
+                           << " of second argument of function " << get_name();
+            }
+
+        } else if (auto col_left_const =
+                           check_and_get_column_const<ColumnVector<T0>>(col_left_untyped)) {
+            if (execute_num_const_right_type<T0, UInt8>(block, result, col_left_const,
+                                                        col_right_untyped) ||
+                execute_num_const_right_type<T0, UInt16>(block, result, col_left_const,
+                                                         col_right_untyped) ||
+                execute_num_const_right_type<T0, UInt32>(block, result, col_left_const,
+                                                         col_right_untyped) ||
+                execute_num_const_right_type<T0, UInt64>(block, result, col_left_const,
+                                                         col_right_untyped) ||
+                execute_num_const_right_type<T0, Int8>(block, result, col_left_const,
+                                                       col_right_untyped) ||
+                execute_num_const_right_type<T0, Int16>(block, result, col_left_const,
+                                                        col_right_untyped) ||
+                execute_num_const_right_type<T0, Int32>(block, result, col_left_const,
+                                                        col_right_untyped) ||
+                execute_num_const_right_type<T0, Int64>(block, result, col_left_const,
+                                                        col_right_untyped) ||
+                execute_num_const_right_type<T0, Int128>(block, result, col_left_const,
+                                                         col_right_untyped) ||
+                execute_num_const_right_type<T0, Float32>(block, result, col_left_const,
+                                                          col_right_untyped) ||
+                execute_num_const_right_type<T0, Float64>(block, result, col_left_const,
+                                                          col_right_untyped))
+                return true;
+            else {
+                LOG(FATAL) << "Illegal column " << col_right_untyped->get_name()
+                           << " of second argument of function " << get_name();
+            }
+        }
+
+        return false;
+    }
+
+    Status execute_decimal(Block& block, size_t result, const ColumnWithTypeAndName& col_left,
+                           const ColumnWithTypeAndName& col_right) {
+        TypeIndex left_number = col_left.type->get_type_id();
+        TypeIndex right_number = col_right.type->get_type_id();
+
+        auto call = [&](const auto& types) -> bool {
+            using Types = std::decay_t<decltype(types)>;
+            using LeftDataType = typename Types::LeftType;
+            using RightDataType = typename Types::RightType;
+
+            DecimalComparison<LeftDataType, RightDataType, Op, false>(block, result, col_left,
+                                                                      col_right);
+            return true;
+        };
+
+        if (!call_on_basic_types<true, false, true, false>(left_number, right_number, call)) {
+            return Status::RuntimeError(fmt::format("Wrong call for {} with {} and {}", get_name(),
+                                                    col_left.type->get_name(),
+                                                    col_right.type->get_name()));
+        }
+        return Status::OK();
+    }
+
+    void execute_generic_identical_types(Block& block, size_t result, const IColumn* c0,
+                                         const IColumn* c1) {
+        bool c0_const = is_column_const(*c0);
+        bool c1_const = is_column_const(*c1);
+
+        if (c0_const && c1_const) {
+            UInt8 res = 0;
+            GenericComparisonImpl<Op<int, int>>::constant_constant(*c0, *c1, res);
+            block.replace_by_position(
+                    result, DataTypeUInt8().create_column_const(c0->size(), to_field(res)));
+        } else {
+            auto c_res = ColumnUInt8::create();
+            ColumnUInt8::Container& vec_res = c_res->get_data();
+            vec_res.resize(c0->size());
+
+            if (c0_const)
+                GenericComparisonImpl<Op<int, int>>::constant_vector(*c0, *c1, vec_res);
+            else if (c1_const)
+                GenericComparisonImpl<Op<int, int>>::vector_constant(*c0, *c1, vec_res);
+            else
+                GenericComparisonImpl<Op<int, int>>::vector_vector(*c0, *c1, vec_res);
+
+            block.replace_by_position(result, std::move(c_res));
+        }
+    }
+
+    Status execute_generic(Block& block, size_t result, const ColumnWithTypeAndName& c0,
+                           const ColumnWithTypeAndName& c1) {
+        execute_generic_identical_types(block, result, c0.column.get(), c1.column.get());
+        return Status::OK();
+    }
+
+public:
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    /// Get result types by argument types. If the function does not apply to these arguments, throw an exception.
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<DataTypeUInt8>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const auto& col_with_type_and_name_left = block.get_by_position(arguments[0]);
+        const auto& col_with_type_and_name_right = block.get_by_position(arguments[1]);
+        const IColumn* col_left_untyped = col_with_type_and_name_left.column.get();
+        const IColumn* col_right_untyped = col_with_type_and_name_right.column.get();
+
+        const DataTypePtr& left_type = col_with_type_and_name_left.type;
+        const DataTypePtr& right_type = col_with_type_and_name_right.type;
+
+        /// The case when arguments are the same (tautological comparison). Return constant.
+        /// NOTE: Nullable types are special case. (BTW, this function use default implementation for Nullable, so Nullable types cannot be here. Check just in case.)
+        /// NOTE: We consider NaN comparison to be implementation specific (and in our implementation NaNs are sometimes equal sometimes not).
+        if (left_type->equals(*right_type) && !left_type->is_nullable() &&
+            col_left_untyped == col_right_untyped) {
+            /// Always true: =, <=, >=
+            // TODO: Return const column in the future
+            if constexpr (std::is_same_v<Op<int, int>, EqualsOp<int, int>> ||
+                          std::is_same_v<Op<int, int>, LessOrEqualsOp<int, int>> ||
+                          std::is_same_v<Op<int, int>, GreaterOrEqualsOp<int, int>>) {
+                block.get_by_position(result).column =
+                        DataTypeUInt8().create_column_const(input_rows_count, 1u)->convert_to_full_column_if_const();
+                return Status::OK();
+            } else {
+                block.get_by_position(result).column =
+                        DataTypeUInt8().create_column_const(input_rows_count, 0u)->convert_to_full_column_if_const();
+                return Status::OK();
+            }
+        }
+
+        WhichDataType which_left{left_type};
+        WhichDataType which_right{right_type};
+
+        const bool left_is_num = col_left_untyped->is_numeric();
+        const bool right_is_num = col_right_untyped->is_numeric();
+
+        // Compare date and datetime direct use the Int64 compare. Keep the comment
+        // may we should refactor the code.
+//        bool date_and_datetime = (left_type != right_type) && which_left.is_date_or_datetime() &&
+//                                 which_right.is_date_or_datetime();
+
+        if (left_is_num && right_is_num) {
+            if (!(execute_num_left_type<UInt8>(block, result, col_left_untyped, col_right_untyped) ||
+                  execute_num_left_type<UInt16>(block, result, col_left_untyped, col_right_untyped) ||
+                  execute_num_left_type<UInt32>(block, result, col_left_untyped, col_right_untyped) ||
+                  execute_num_left_type<UInt64>(block, result, col_left_untyped, col_right_untyped) ||
+                  execute_num_left_type<Int8>(block, result, col_left_untyped, col_right_untyped) ||
+                  execute_num_left_type<Int16>(block, result, col_left_untyped, col_right_untyped) ||
+                  execute_num_left_type<Int32>(block, result, col_left_untyped, col_right_untyped) ||
+                  execute_num_left_type<Int64>(block, result, col_left_untyped, col_right_untyped) ||
+                  execute_num_left_type<Int128>(block, result, col_left_untyped, col_right_untyped) ||
+                  execute_num_left_type<Float32>(block, result, col_left_untyped, col_right_untyped) ||
+                  execute_num_left_type<Float64>(block, result, col_left_untyped, col_right_untyped)))
+
+                return Status::RuntimeError(
+                        fmt::format("Illegal column {} of first argument of function {}",
+                                    col_left_untyped->get_name(), get_name()));
+        } else if (is_decimal(left_type) || is_decimal(right_type)) {
+            if (!allow_decimal_comparison(left_type, right_type)) {
+                return Status::RuntimeError(fmt::format("No operation {} between {} and {}",
+                                                        get_name(), left_type->get_name(),
+                                                        right_type->get_name()));
+            }
+            return execute_decimal(block, result, col_with_type_and_name_left,
+                                   col_with_type_and_name_right);
+        } else {
+            // TODO: varchar and string maybe need a quickly way
+            return execute_generic(block, result, col_with_type_and_name_left,
+                                   col_with_type_and_name_right);
+        }
+        return Status::OK();
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/functions_logical.cpp b/be/src/vec/functions/functions_logical.cpp
new file mode 100644
index 0000000000..8ef554fd9b
--- /dev/null
+++ b/be/src/vec/functions/functions_logical.cpp
@@ -0,0 +1,531 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsLogical.cpp
+// and modified by Doris
+
+#include "vec/functions/functions_logical.h"
+
+#include <algorithm>
+
+#include "vec/columns/column.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/columns/columns_number.h"
+#include "vec/common/field_visitors.h"
+#include "vec/common/typeid_cast.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+namespace {
+using namespace FunctionsLogicalDetail;
+
+using UInt8Container = ColumnUInt8::Container;
+using UInt8ColumnPtrs = std::vector<const ColumnUInt8*>;
+
+MutableColumnPtr convert_from_ternary_data(const UInt8Container& ternary_data,
+                                           const bool make_nullable) {
+    const size_t rows_count = ternary_data.size();
+
+    auto new_column = ColumnUInt8::create(rows_count);
+    std::transform(ternary_data.cbegin(), ternary_data.cend(), new_column->get_data().begin(),
+                   [](const auto x) { return x == Ternary::True; });
+
+    if (!make_nullable) return new_column;
+
+    auto null_column = ColumnUInt8::create(rows_count);
+    std::transform(ternary_data.cbegin(), ternary_data.cend(), null_column->get_data().begin(),
+                   [](const auto x) { return x == Ternary::Null; });
+
+    return ColumnNullable::create(std::move(new_column), std::move(null_column));
+}
+
+template <typename T>
+bool try_convert_column_to_uint8(const IColumn* column, UInt8Container& res) {
+    const auto col = check_and_get_column<ColumnVector<T>>(column);
+    if (!col) return false;
+
+    std::transform(col->get_data().cbegin(), col->get_data().cend(), res.begin(),
+                   [](const auto x) { return x != 0; });
+
+    return true;
+}
+
+void convert_column_to_uint8(const IColumn* column, UInt8Container& res) {
+    if (!try_convert_column_to_uint8<Int8>(column, res) &&
+        !try_convert_column_to_uint8<Int16>(column, res) &&
+        !try_convert_column_to_uint8<Int32>(column, res) &&
+        !try_convert_column_to_uint8<Int64>(column, res) &&
+        !try_convert_column_to_uint8<UInt16>(column, res) &&
+        !try_convert_column_to_uint8<UInt32>(column, res) &&
+        !try_convert_column_to_uint8<UInt64>(column, res) &&
+        !try_convert_column_to_uint8<Float32>(column, res) &&
+        !try_convert_column_to_uint8<Float64>(column, res))
+        LOG(FATAL) << "Unexpected type of column: " << column->get_name();
+}
+
+template <class Op, typename Func>
+static bool extract_const_columns(ColumnRawPtrs& in, UInt8& res, Func&& func) {
+    bool has_res = false;
+
+    for (int i = static_cast<int>(in.size()) - 1; i >= 0; --i) {
+        if (!is_column_const(*in[i])) continue;
+
+        UInt8 x = func((*in[i])[0]);
+        if (has_res) {
+            res = Op::apply(res, x);
+        } else {
+            res = x;
+            has_res = true;
+        }
+
+        in.erase(in.begin() + i);
+    }
+
+    return has_res;
+}
+
+template <class Op>
+inline bool extract_const_columns(ColumnRawPtrs& in, UInt8& res) {
+    return extract_const_columns<Op>(in, res, [](const Field& value) {
+        return !value.is_null() && apply_visitor(FieldVisitorConvertToNumber<bool>(), value);
+    });
+}
+
+template <class Op>
+inline bool extract_const_columns_ternary(ColumnRawPtrs& in, UInt8& res_3v) {
+    return extract_const_columns<Op>(in, res_3v, [](const Field& value) {
+        return value.is_null() ? Ternary::make_value(false, true)
+                               : Ternary::make_value(
+                                         apply_visitor(FieldVisitorConvertToNumber<bool>(), value));
+    });
+}
+
+template <typename Op, size_t N>
+class AssociativeApplierImpl {
+    using ResultValueType = typename Op::ResultType;
+
+public:
+    /// Remembers the last N columns from `in`.
+    AssociativeApplierImpl(const UInt8ColumnPtrs& in)
+            : vec(in[in.size() - N]->get_data()), next(in) {}
+
+    /// Returns a combination of values in the i-th row of all columns stored in the constructor.
+    inline ResultValueType apply(const size_t i) const {
+        const auto& a = vec[i];
+        if constexpr (Op::is_saturable())
+            return Op::is_saturated_value(a) ? a : Op::apply(a, next.apply(i));
+        else
+            return Op::apply(a, next.apply(i));
+    }
+
+private:
+    const UInt8Container& vec;
+    const AssociativeApplierImpl<Op, N - 1> next;
+};
+
+template <typename Op>
+class AssociativeApplierImpl<Op, 1> {
+    using ResultValueType = typename Op::ResultType;
+
+public:
+    AssociativeApplierImpl(const UInt8ColumnPtrs& in) : vec(in[in.size() - 1]->get_data()) {}
+
+    inline ResultValueType apply(const size_t i) const { return vec[i]; }
+
+private:
+    const UInt8Container& vec;
+};
+
+/// A helper class used by AssociativeGenericApplierImpl
+/// Allows for on-the-fly conversion of any data type into intermediate ternary representation
+using ValueGetter = std::function<Ternary::ResultType(size_t)>;
+
+template <typename... Types>
+struct ValueGetterBuilderImpl;
+
+template <typename Type, typename... Types>
+struct ValueGetterBuilderImpl<Type, Types...> {
+    static ValueGetter build(const IColumn* x) {
+        if (const auto nullable_column = typeid_cast<const ColumnNullable*>(x)) {
+            if (const auto nested_column = typeid_cast<const ColumnVector<Type>*>(
+                        nullable_column->get_nested_column_ptr().get())) {
+                return [&null_data = nullable_column->get_null_map_data(),
+                        &column_data = nested_column->get_data()](size_t i) {
+                    return Ternary::make_value(column_data[i], null_data[i]);
+                };
+            } else
+                return ValueGetterBuilderImpl<Types...>::build(x);
+        } else if (const auto column = typeid_cast<const ColumnVector<Type>*>(x))
+            return [&column_data = column->get_data()](size_t i) {
+                return Ternary::make_value(column_data[i]);
+            };
+        else
+            return ValueGetterBuilderImpl<Types...>::build(x);
+    }
+};
+
+template <>
+struct ValueGetterBuilderImpl<> {
+    [[noreturn]] static ValueGetter build(const IColumn* x) {
+        LOG(FATAL) << "Unknown numeric column of type: " << demangle(typeid(x).name());
+    }
+};
+
+using ValueGetterBuilder = ValueGetterBuilderImpl<UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32,
+                                                  Int64, Float32, Float64>;
+
+/// This class together with helper class ValueGetterBuilder can be used with columns of arbitrary data type
+/// Allows for on-the-fly conversion of any type of data into intermediate ternary representation
+/// and eliminates the need to materialize data columns in intermediate representation
+template <typename Op, size_t N>
+class AssociativeGenericApplierImpl {
+    using ResultValueType = typename Op::ResultType;
+
+public:
+    /// Remembers the last N columns from `in`.
+    AssociativeGenericApplierImpl(const ColumnRawPtrs& in)
+            : val_getter{ValueGetterBuilder::build(in[in.size() - N])}, next{in} {}
+
+    /// Returns a combination of values in the i-th row of all columns stored in the constructor.
+    inline ResultValueType apply(const size_t i) const {
+        const auto a = val_getter(i);
+        if constexpr (Op::is_saturable())
+            return Op::is_saturated_value(a) ? a : Op::apply(a, next.apply(i));
+        else
+            return Op::apply(a, next.apply(i));
+    }
+
+private:
+    const ValueGetter val_getter;
+    const AssociativeGenericApplierImpl<Op, N - 1> next;
+};
+
+template <typename Op>
+class AssociativeGenericApplierImpl<Op, 1> {
+    using ResultValueType = typename Op::ResultType;
+
+public:
+    /// Remembers the last N columns from `in`.
+    AssociativeGenericApplierImpl(const ColumnRawPtrs& in)
+            : val_getter{ValueGetterBuilder::build(in[in.size() - 1])} {}
+
+    inline ResultValueType apply(const size_t i) const { return val_getter(i); }
+
+private:
+    const ValueGetter val_getter;
+};
+
+/// Apply target function by feeding it "batches" of N columns
+/// Combining 10 columns per pass is the fastest for large block sizes.
+/// For small block sizes - more columns is faster.
+template <typename Op, template <typename, size_t> typename OperationApplierImpl, size_t N = 10>
+struct OperationApplier {
+    template <typename Columns, typename ResultColumn>
+    static void apply(Columns& in, ResultColumn& result) {
+        while (in.size() > 1) {
+            do_batched_apply(in, result->get_data());
+            in.push_back(result.get());
+        }
+    }
+
+    template <typename Columns, typename ResultData>
+    static void NO_INLINE do_batched_apply(Columns& in, ResultData& result_data) {
+        if (N > in.size()) {
+            OperationApplier<Op, OperationApplierImpl, N - 1>::do_batched_apply(in, result_data);
+            return;
+        }
+
+        const OperationApplierImpl<Op, N> operationApplierImpl(in);
+        size_t i = 0;
+        for (auto& res : result_data) res = operationApplierImpl.apply(i++);
+
+        in.erase(in.end() - N, in.end());
+    }
+};
+
+template <typename Op, template <typename, size_t> typename OperationApplierImpl>
+struct OperationApplier<Op, OperationApplierImpl, 1> {
+    template <typename Columns, typename Result>
+    static void NO_INLINE do_batched_apply(Columns&, Result&) {
+        LOG(FATAL) << "OperationApplier<...>::apply(...): not enough arguments to run this method";
+    }
+};
+
+template <class Op>
+static void execute_for_ternary_logic_impl(ColumnRawPtrs arguments,
+                                           ColumnWithTypeAndName& result_info,
+                                           size_t input_rows_count) {
+    /// Combine all constant columns into a single constant value.
+    UInt8 const_3v_value = 0;
+    const bool has_consts = extract_const_columns_ternary<Op>(arguments, const_3v_value);
+
+    /// If the constant value uniquely determines the result, return it.
+    if (has_consts &&
+        (arguments.empty() || (Op::is_saturable() && Op::is_saturated_value(const_3v_value)))) {
+        result_info.column =
+                ColumnConst::create(convert_from_ternary_data(UInt8Container({const_3v_value}),
+                                                              result_info.type->is_nullable()),
+                                    input_rows_count);
+        return;
+    }
+
+    const auto result_column = ColumnUInt8::create(input_rows_count);
+    MutableColumnPtr const_column_holder;
+    if (has_consts) {
+        const_column_holder = convert_from_ternary_data(
+                UInt8Container(input_rows_count, const_3v_value), const_3v_value == Ternary::Null);
+        arguments.push_back(const_column_holder.get());
+    }
+
+    OperationApplier<Op, AssociativeGenericApplierImpl>::apply(arguments, result_column);
+
+    result_info.column =
+            convert_from_ternary_data(result_column->get_data(), result_info.type->is_nullable());
+}
+
+template <typename Op, typename... Types>
+struct TypedExecutorInvoker;
+
+template <typename Op>
+using FastApplierImpl = TypedExecutorInvoker<Op, UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32,
+                                             Int64, Float32, Float64>;
+
+template <typename Op, typename Type, typename... Types>
+struct TypedExecutorInvoker<Op, Type, Types...> {
+    template <typename T, typename Result>
+    static void apply(const ColumnVector<T>& x, const IColumn& y, Result& result) {
+        if (const auto column = typeid_cast<const ColumnVector<Type>*>(&y))
+            std::transform(x.get_data().cbegin(), x.get_data().cend(), column->get_data().cbegin(),
+                           result.begin(),
+                           [](const auto a, const auto b) { return Op::apply(!!a, !!b); });
+        else
+            TypedExecutorInvoker<Op, Types...>::template apply<T>(x, y, result);
+    }
+
+    template <typename Result>
+    static void apply(const IColumn& x, const IColumn& y, Result& result) {
+        if (const auto column = typeid_cast<const ColumnVector<Type>*>(&x))
+            FastApplierImpl<Op>::template apply<Type>(*column, y, result);
+        else
+            TypedExecutorInvoker<Op, Types...>::apply(x, y, result);
+    }
+};
+
+template <typename Op>
+struct TypedExecutorInvoker<Op> {
+    template <typename T, typename Result>
+    static void apply(const ColumnVector<T>&, const IColumn& y, Result&) {
+        LOG(FATAL) << "Unknown numeric column y of type: " << demangle(typeid(y).name());
+    }
+
+    template <typename Result>
+    static void apply(const IColumn& x, const IColumn&, Result&) {
+        LOG(FATAL) << "Unknown numeric column x of type: " << demangle(typeid(x).name());
+    }
+};
+
+template <class Op>
+static void basic_execute_impl(ColumnRawPtrs arguments, ColumnWithTypeAndName& result_info,
+                               size_t input_rows_count) {
+    /// Combine all constant columns into a single constant value.
+    UInt8 const_val = 0;
+    bool has_consts = extract_const_columns<Op>(arguments, const_val);
+
+    /// If the constant value uniquely determines the result, return it.
+    if (has_consts && (arguments.empty() || Op::apply(const_val, 0) == Op::apply(const_val, 1))) {
+        if (!arguments.empty()) const_val = Op::apply(const_val, 0);
+        result_info.column =
+                DataTypeUInt8().create_column_const(input_rows_count, to_field(const_val));
+        return;
+    }
+
+    /// If the constant value is a neutral element, let's forget about it.
+    if (has_consts && Op::apply(const_val, 0) == 0 && Op::apply(const_val, 1) == 1)
+        has_consts = false;
+
+    UInt8ColumnPtrs uint8_args;
+
+    auto col_res = ColumnUInt8::create();
+    UInt8Container& vec_res = col_res->get_data();
+    if (has_consts) {
+        vec_res.assign(input_rows_count, const_val);
+        uint8_args.push_back(col_res.get());
+    } else {
+        vec_res.resize(input_rows_count);
+    }
+
+    /// FastPath detection goes in here
+    if (arguments.size() == (has_consts ? 1 : 2)) {
+        if (has_consts)
+            FastApplierImpl<Op>::apply(*arguments[0], *col_res, col_res->get_data());
+        else
+            FastApplierImpl<Op>::apply(*arguments[0], *arguments[1], col_res->get_data());
+
+        result_info.column = std::move(col_res);
+        return;
+    }
+
+    /// Convert all columns to UInt8
+    Columns converted_columns;
+    for (const IColumn* column : arguments) {
+        if (auto uint8_column = check_and_get_column<ColumnUInt8>(column))
+            uint8_args.push_back(uint8_column);
+        else {
+            auto converted_column = ColumnUInt8::create(input_rows_count);
+            convert_column_to_uint8(column, converted_column->get_data());
+            uint8_args.push_back(converted_column.get());
+            converted_columns.emplace_back(std::move(converted_column));
+        }
+    }
+
+    OperationApplier<Op, AssociativeApplierImpl>::apply(uint8_args, col_res);
+
+    /// This is possible if there is exactly one non-constant among the arguments, and it is of type UInt8.
+    if (uint8_args[0] != col_res.get()) vec_res.assign(uint8_args[0]->get_data());
+
+    result_info.column = std::move(col_res);
+}
+
+} // namespace
+
+template <typename Impl, typename Name>
+DataTypePtr FunctionAnyArityLogical<Impl, Name>::get_return_type_impl(
+        const DataTypes& arguments) const {
+    if (arguments.size() < 2) {
+        LOG(FATAL) << fmt::format(
+                "Number of arguments for function \"{}\" should be at least 2: passed {}",
+                get_name(), arguments.size());
+    }
+
+    bool has_nullable_arguments = false;
+    for (size_t i = 0; i < arguments.size(); ++i) {
+        const auto& arg_type = arguments[i];
+
+        if (!has_nullable_arguments) {
+            has_nullable_arguments = arg_type->is_nullable();
+            if (has_nullable_arguments && !Impl::special_implementation_for_nulls()) {
+                LOG(WARNING) << fmt::format(
+                        "Logical error: Unexpected type of argument for function \"{}\" argument "
+                        "{} is of type {}",
+                        get_name(), i + 1, arg_type->get_name());
+            }
+        }
+
+        if (!(is_native_number(arg_type) ||
+              (Impl::special_implementation_for_nulls() &&
+               (arg_type->only_null() || is_native_number(remove_nullable(arg_type)))))) {
+            LOG(FATAL) << fmt::format("Illegal type ({}) of {} argument of function {}",
+                                      arg_type->get_name(), i + 1, get_name());
+        }
+    }
+
+    auto result_type = std::make_shared<DataTypeUInt8>();
+    return has_nullable_arguments ? make_nullable(result_type) : result_type;
+}
+
+template <typename Impl, typename Name>
+Status FunctionAnyArityLogical<Impl, Name>::execute_impl(FunctionContext* context, Block& block,
+                                                         const ColumnNumbers& arguments,
+                                                         size_t result_index,
+                                                         size_t input_rows_count) {
+    ColumnRawPtrs args_in;
+    for (const auto arg_index : arguments)
+        args_in.push_back(block.get_by_position(arg_index).column.get());
+
+    auto& result_info = block.get_by_position(result_index);
+    if (result_info.type->is_nullable())
+        execute_for_ternary_logic_impl<Impl>(std::move(args_in), result_info, input_rows_count);
+    else
+        basic_execute_impl<Impl>(std::move(args_in), result_info, input_rows_count);
+    return Status::OK();
+}
+
+template <typename A, typename Op>
+struct UnaryOperationImpl {
+    using ResultType = typename Op::ResultType;
+    using ArrayA = typename ColumnVector<A>::Container;
+    using ArrayC = typename ColumnVector<ResultType>::Container;
+
+    static void NO_INLINE vector(const ArrayA& a, ArrayC& c) {
+        std::transform(a.cbegin(), a.cend(), c.begin(), [](const auto x) { return Op::apply(x); });
+    }
+};
+
+template <template <typename> class Impl, typename Name>
+DataTypePtr FunctionUnaryLogical<Impl, Name>::get_return_type_impl(
+        const DataTypes& arguments) const {
+    if (!is_native_number(arguments[0])) {
+        LOG(FATAL) << fmt::format("Illegal type ({}) of argument of function {}",
+                                  arguments[0]->get_name(), get_name());
+    }
+
+    return std::make_shared<DataTypeUInt8>();
+}
+
+template <template <typename> class Impl, typename T>
+bool functionUnaryExecuteType(Block& block, const ColumnNumbers& arguments, size_t result) {
+    if (auto col = check_and_get_column<ColumnVector<T>>(
+                block.get_by_position(arguments[0]).column.get())) {
+        auto col_res = ColumnUInt8::create();
+
+        typename ColumnUInt8::Container& vec_res = col_res->get_data();
+        vec_res.resize(col->get_data().size());
+        UnaryOperationImpl<T, Impl<T>>::vector(col->get_data(), vec_res);
+
+        block.replace_by_position(result, std::move(col_res));
+        return true;
+    }
+
+    return false;
+}
+
+template <template <typename> class Impl, typename Name>
+Status FunctionUnaryLogical<Impl, Name>::execute_impl(FunctionContext* context, Block& block,
+                                                      const ColumnNumbers& arguments, size_t result,
+                                                      size_t /*input_rows_count*/) {
+    if (!(functionUnaryExecuteType<Impl, UInt8>(block, arguments, result) ||
+          functionUnaryExecuteType<Impl, UInt16>(block, arguments, result) ||
+          functionUnaryExecuteType<Impl, UInt32>(block, arguments, result) ||
+          functionUnaryExecuteType<Impl, UInt64>(block, arguments, result) ||
+          functionUnaryExecuteType<Impl, Int8>(block, arguments, result) ||
+          functionUnaryExecuteType<Impl, Int16>(block, arguments, result) ||
+          functionUnaryExecuteType<Impl, Int32>(block, arguments, result) ||
+          functionUnaryExecuteType<Impl, Int64>(block, arguments, result) ||
+          functionUnaryExecuteType<Impl, Float32>(block, arguments, result) ||
+          functionUnaryExecuteType<Impl, Float64>(block, arguments, result))) {
+        LOG(FATAL) << fmt::format("Illegal column {} of argument of function {}",
+                                  block.get_by_position(arguments[0]).column->get_name(),
+                                  get_name());
+    }
+
+    return Status::OK();
+}
+
+void register_function_logical(SimpleFunctionFactory& instance) {
+    instance.register_function<FunctionAnd>();
+    instance.register_function<FunctionOr>();
+    instance.register_function<FunctionXor>();
+    instance.register_function<FunctionNot>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/functions_logical.h b/be/src/vec/functions/functions_logical.h
new file mode 100644
index 0000000000..20982396dc
--- /dev/null
+++ b/be/src/vec/functions/functions_logical.h
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsLogical.h
+// and modified by Doris
+
+#pragma once
+
+#include <type_traits>
+
+#include "vec/core/types.h"
+#include "vec/data_types/data_type.h"
+#include "vec/functions/function.h"
+
+/** Logical functions AND, OR, XOR and NOT support three-valued (or ternary) logic
+  * https://en.wikibooks.org/wiki/Structured_Query_Language/NULLs_and_the_Three_Valued_Logic
+  *
+  * Functions XOR and NOT rely on "default implementation for NULLs":
+  *   - if any of the arguments is of Nullable type, the return value type is Nullable
+  *   - if any of the arguments is NULL, the return value is NULL
+  *
+  * Functions AND and OR provide their own special implementations for ternary logic
+  */
+
+namespace doris::vectorized {
+namespace FunctionsLogicalDetail {
+namespace Ternary {
+using ResultType = UInt8;
+
+static constexpr UInt8 False = 0;
+static constexpr UInt8 True = -1;
+static constexpr UInt8 Null = 1;
+
+template <typename T>
+inline ResultType make_value(T value) {
+    return value != 0 ? Ternary::True : Ternary::False;
+}
+
+template <typename T>
+inline ResultType make_value(T value, bool is_null) {
+    if (is_null) return Ternary::Null;
+    return make_value<T>(value);
+}
+} // namespace Ternary
+
+struct AndImpl {
+    using ResultType = UInt8;
+
+    static inline constexpr bool is_saturable() { return true; }
+    static inline constexpr bool is_saturated_value(UInt8 a) { return a == Ternary::False; }
+    static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a & b; }
+    static inline constexpr bool special_implementation_for_nulls() { return true; }
+};
+
+struct OrImpl {
+    using ResultType = UInt8;
+
+    static inline constexpr bool is_saturable() { return true; }
+    static inline constexpr bool is_saturated_value(UInt8 a) { return a == Ternary::True; }
+    static inline constexpr ResultType apply(UInt8 a, UInt8 b) { return a | b; }
+    static inline constexpr bool special_implementation_for_nulls() { return true; }
+};
+
+struct XorImpl {
+    using ResultType = UInt8;
+
+    static inline constexpr bool is_saturable() { return false; }
+    static inline constexpr bool is_saturated_value(bool) { return false; }
+    /** Considering that CH uses UInt8 for representation of boolean values this function
+      * returns 255 as "true" but the current implementation of logical functions suggests that
+      * any nonzero value is "true" as well. Also the current code provides no guarantee
+      * for "true" to be represented with the value of 1.
+      */
+    static inline constexpr ResultType apply(UInt8 a, UInt8 b) {
+        return (a != b) ? Ternary::True : Ternary::False;
+    }
+    static inline constexpr bool special_implementation_for_nulls() { return false; }
+};
+
+template <typename A>
+struct NotImpl {
+    using ResultType = UInt8;
+
+    static inline ResultType apply(A a) { return !a; }
+};
+
+template <typename Impl, typename Name>
+class FunctionAnyArityLogical : public IFunction {
+public:
+    static constexpr auto name = Name::name;
+    //    static FunctionPtr create(const Context &) { return std::make_shared<FunctionAnyArityLogical>(); }
+    static FunctionPtr create() { return std::make_shared<FunctionAnyArityLogical>(); }
+
+public:
+    String get_name() const override { return name; }
+
+    bool is_variadic() const override { return true; }
+    size_t get_number_of_arguments() const override { return 0; }
+
+    bool use_default_implementation_for_nulls() const override {
+        return !Impl::special_implementation_for_nulls();
+    }
+
+    /// Get result types by argument types. If the function does not apply to these arguments, throw an exception.
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override;
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override;
+};
+
+template <template <typename> class Impl, typename Name>
+class FunctionUnaryLogical : public IFunction {
+public:
+    static constexpr auto name = Name::name;
+    static FunctionPtr create() { return std::make_shared<FunctionUnaryLogical>(); }
+
+public:
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 1; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override;
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override;
+};
+
+} // namespace FunctionsLogicalDetail
+
+struct NameAnd {
+    static constexpr auto name = "and";
+};
+struct NameOr {
+    static constexpr auto name = "or";
+};
+struct NameXor {
+    static constexpr auto name = "xor";
+};
+struct NameNot {
+    static constexpr auto name = "not";
+};
+
+using FunctionAnd =
+        FunctionsLogicalDetail::FunctionAnyArityLogical<FunctionsLogicalDetail::AndImpl, NameAnd>;
+using FunctionOr =
+        FunctionsLogicalDetail::FunctionAnyArityLogical<FunctionsLogicalDetail::OrImpl, NameOr>;
+using FunctionXor =
+        FunctionsLogicalDetail::FunctionAnyArityLogical<FunctionsLogicalDetail::XorImpl, NameXor>;
+using FunctionNot =
+        FunctionsLogicalDetail::FunctionUnaryLogical<FunctionsLogicalDetail::NotImpl, NameNot>;
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/hll_cardinality.cpp b/be/src/vec/functions/hll_cardinality.cpp
new file mode 100644
index 0000000000..c44385d01f
--- /dev/null
+++ b/be/src/vec/functions/hll_cardinality.cpp
@@ -0,0 +1,71 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/hll_function.h"
+#include "udf/udf.h"
+#include "vec/data_types/number_traits.h"
+#include "vec/functions/function_always_not_nullable.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+struct HLLCardinality {
+    static constexpr auto name = "hll_cardinality";
+
+    using ReturnType = DataTypeNumber<Int64>;
+
+    static void vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                       MutableColumnPtr& col_res) {
+        typename ColumnVector<Int64>::Container& res =
+                reinterpret_cast<ColumnVector<Int64>*>(col_res.get())->get_data();
+
+        auto size = res.size();
+        for (int i = 0; i < size; ++i) {
+            auto val = HllFunctions::hll_cardinality(
+                    nullptr,
+                    StringVal((uint8_t*)&data[offsets[i - 1]], offsets[i] - offsets[i - 1] - 1));
+            res[i] = val.val;
+        }
+    }
+
+    static void vector_nullable(const ColumnString::Chars& data,
+                                const ColumnString::Offsets& offsets, const NullMap& nullmap,
+                                MutableColumnPtr& col_res) {
+        typename ColumnVector<Int64>::Container& res =
+                reinterpret_cast<ColumnVector<Int64>*>(col_res.get())->get_data();
+
+        auto size = res.size();
+        for (int i = 0; i < size; ++i) {
+            if (nullmap[i]) {
+                res[i] = 0;
+            } else {
+                auto val = HllFunctions::hll_cardinality(
+                        nullptr, StringVal((uint8_t*)&data[offsets[i - 1]],
+                                           offsets[i] - offsets[i - 1] - 1));
+                res[i] = val.val;
+            }
+        }
+    }
+};
+
+using FunctionHLLCardinality = FunctionAlwaysNotNullable<HLLCardinality>;
+
+void register_function_hll_cardinality(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionHLLCardinality>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/hll_empty.cpp b/be/src/vec/functions/hll_empty.cpp
new file mode 100644
index 0000000000..cbe38c0db7
--- /dev/null
+++ b/be/src/vec/functions/hll_empty.cpp
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/hll_function.h"
+#include "olap/hll.h"
+#include "vec/data_types/data_type_string.h"
+#include "vec/functions/function_const.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+struct HLLEmptyImpl {
+    static constexpr auto name = "hll_empty";
+    static auto get_return_type() { return std::make_shared<DataTypeString>(); }
+    static Field init_value() {
+        auto hll = HyperLogLog::empty();
+        return {hll.c_str(), hll.size()};
+    }
+};
+
+using FunctionHLLEmpty = FunctionConst<HLLEmptyImpl>;
+
+void register_function_hll_empty(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionHLLEmpty>();
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/hll_hash.cpp b/be/src/vec/functions/hll_hash.cpp
new file mode 100644
index 0000000000..0d40bef0b9
--- /dev/null
+++ b/be/src/vec/functions/hll_hash.cpp
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "exprs/hll_function.h"
+#include "olap/hll.h"
+#include "udf/udf.h"
+#include "vec/functions/function_always_not_nullable.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+struct HLLHash {
+    static constexpr auto name = "hll_hash";
+
+    using ReturnType = DataTypeString;
+
+    static void vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
+                       MutableColumnPtr& col_res) {
+        ColumnString::Chars& res_data = reinterpret_cast<ColumnString*>(col_res.get())->get_chars();
+        ColumnString::Offsets& res_offsets =
+                reinterpret_cast<ColumnString*>(col_res.get())->get_offsets();
+
+        size_t size = offsets.size();
+        res_offsets.resize(size);
+        res_data.reserve(data.size());
+
+        size_t prev_offset = 0;
+        size_t res_offset = 0;
+
+        for (size_t i = 0; i < size; ++i) {
+            auto hash_string = HllFunctions::hll_hash(
+                    StringVal((uint8_t*)(&data[prev_offset]), offsets[i] - prev_offset - 1));
+
+            res_data.resize(res_data.size() + hash_string.length() + 1);
+            memcpy_small_allow_read_write_overflow15(&res_data[res_offset], hash_string.c_str(),
+                                                     hash_string.length());
+            res_offset += hash_string.length() + 1;
+            res_data[res_offset - 1] = 0;
+
+            res_offsets[i] = res_offset;
+            prev_offset = offsets[i];
+        }
+    }
+
+    static void vector_nullable(const ColumnString::Chars& data,
+                                const ColumnString::Offsets& offsets, const NullMap& nullmap,
+                                MutableColumnPtr& col_res) {
+        ColumnString::Chars& res_data = reinterpret_cast<ColumnString*>(col_res.get())->get_chars();
+        ColumnString::Offsets& res_offsets =
+                reinterpret_cast<ColumnString*>(col_res.get())->get_offsets();
+
+        size_t size = offsets.size();
+        res_offsets.resize(size);
+        res_data.reserve(data.size());
+
+        size_t prev_offset = 0;
+        size_t res_offset = 0;
+
+        for (size_t i = 0; i < size; ++i) {
+            std::string hash_string;
+            if (nullmap[i]) {
+                hash_string = HyperLogLog::empty();
+            } else {
+                hash_string = HllFunctions::hll_hash(
+                        StringVal((uint8_t*)(&data[prev_offset]), offsets[i] - prev_offset - 1));
+            }
+
+            res_data.resize(res_data.size() + hash_string.length() + 1);
+            memcpy_small_allow_read_write_overflow15(&res_data[res_offset], hash_string.c_str(),
+                                                     hash_string.length());
+            res_offset += hash_string.length() + 1;
+            res_data[res_offset - 1] = 0;
+
+            res_offsets[i] = res_offset;
+            prev_offset = offsets[i];
+        }
+    }
+};
+
+using FunctionHLLHash = FunctionAlwaysNotNullable<HLLHash>;
+
+void register_function_hll_hash(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionHLLHash>();
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/if.cpp b/be/src/vec/functions/if.cpp
new file mode 100644
index 0000000000..0496222b2e
--- /dev/null
+++ b/be/src/vec/functions/if.cpp
@@ -0,0 +1,480 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/If.cpp
+// and modified by Doris
+
+#include "vec/columns/column_nullable.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/get_least_supertype.h"
+#include "vec/data_types/number_traits.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+template <typename A, typename B, typename ResultType>
+struct NumIfImpl {
+    using ArrayCond = PaddedPODArray<UInt8>;
+    using ArrayA = PaddedPODArray<A>;
+    using ArrayB = PaddedPODArray<B>;
+    using ColVecResult = ColumnVector<ResultType>;
+
+    static void vector_vector(const ArrayCond& cond, const ArrayA& a, const ArrayB& b, Block& block,
+                              size_t result, UInt32) {
+        size_t size = cond.size();
+        auto col_res = ColVecResult::create(size);
+        typename ColVecResult::Container& res = col_res->get_data();
+
+        for (size_t i = 0; i < size; ++i)
+            res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b[i]);
+        block.replace_by_position(result, std::move(col_res));
+    }
+
+    static void vector_constant(const ArrayCond& cond, const ArrayA& a, B b, Block& block,
+                                size_t result, UInt32) {
+        size_t size = cond.size();
+        auto col_res = ColVecResult::create(size);
+        typename ColVecResult::Container& res = col_res->get_data();
+
+        for (size_t i = 0; i < size; ++i)
+            res[i] = cond[i] ? static_cast<ResultType>(a[i]) : static_cast<ResultType>(b);
+        block.replace_by_position(result, std::move(col_res));
+    }
+
+    static void constant_vector(const ArrayCond& cond, A a, const ArrayB& b, Block& block,
+                                size_t result, UInt32) {
+        size_t size = cond.size();
+        auto col_res = ColVecResult::create(size);
+        typename ColVecResult::Container& res = col_res->get_data();
+
+        for (size_t i = 0; i < size; ++i)
+            res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b[i]);
+        block.replace_by_position(result, std::move(col_res));
+    }
+
+    static void constant_constant(const ArrayCond& cond, A a, B b, Block& block, size_t result,
+                                  UInt32) {
+        size_t size = cond.size();
+        auto col_res = ColVecResult::create(size);
+        typename ColVecResult::Container& res = col_res->get_data();
+
+        for (size_t i = 0; i < size; ++i)
+            res[i] = cond[i] ? static_cast<ResultType>(a) : static_cast<ResultType>(b);
+        block.replace_by_position(result, std::move(col_res));
+    }
+};
+
+template <typename A, typename B>
+struct NumIfImpl<A, B, NumberTraits::Error> {
+private:
+    [[noreturn]] static void throw_error() {
+        LOG(FATAL) << "Internal logic error: invalid types of arguments 2 and 3 of if";
+    }
+
+public:
+    template <typename... Args>
+    static void vector_vector(Args&&...) {
+        throw_error();
+    }
+    template <typename... Args>
+    static void vector_constant(Args&&...) {
+        throw_error();
+    }
+    template <typename... Args>
+    static void constant_vector(Args&&...) {
+        throw_error();
+    }
+    template <typename... Args>
+    static void constant_constant(Args&&...) {
+        throw_error();
+    }
+};
+
+// todo(wb) support llvm codegen
+class FunctionIf : public IFunction {
+public:
+    static constexpr auto name = "if";
+
+    static FunctionPtr create() { return std::make_shared<FunctionIf>(); }
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 3; }
+    bool use_default_implementation_for_nulls() const override { return false; }
+    ColumnNumbers get_arguments_that_dont_imply_nullable_return_type(
+            size_t /*number_of_arguments*/) const override {
+        return {0};
+    }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return get_least_supertype({arguments[1], arguments[2]});
+    }
+
+    static ColumnPtr materialize_column_if_const(const ColumnPtr& column) {
+        return column->convert_to_full_column_if_const();
+    }
+
+    static ColumnPtr make_nullable_column_if_not(const ColumnPtr& column) {
+        if (is_column_nullable(*column)) return column;
+
+        return ColumnNullable::create(materialize_column_if_const(column),
+                                      ColumnUInt8::create(column->size(), 0));
+    }
+
+    static ColumnPtr get_nested_column(const ColumnPtr& column) {
+        if (auto* nullable = check_and_get_column<ColumnNullable>(*column))
+            return nullable->get_nested_column_ptr();
+
+        return column;
+    }
+
+    Status execute_generic(Block& block, const ColumnUInt8* cond_col,
+                           const ColumnWithTypeAndName& then_col_type_name,
+                           const ColumnWithTypeAndName& else_col_type_name, size_t result,
+                           size_t input_row_count) {
+        MutableColumnPtr result_column = block.get_by_position(result).type->create_column();
+        result_column->reserve(input_row_count);
+
+        const IColumn& then_col = *then_col_type_name.column;
+        const IColumn& else_col = *else_col_type_name.column;
+        bool then_is_const = is_column_const(then_col);
+        bool else_is_const = is_column_const(else_col);
+
+        const auto& cond_array = cond_col->get_data();
+
+        if (then_is_const && else_is_const) {
+            const IColumn& then_nested_column =
+                    assert_cast<const ColumnConst&>(then_col).get_data_column();
+            const IColumn& else_nested_column =
+                    assert_cast<const ColumnConst&>(else_col).get_data_column();
+            for (size_t i = 0; i < input_row_count; i++) {
+                if (cond_array[i])
+                    result_column->insert_from(then_nested_column, 0);
+                else
+                    result_column->insert_from(else_nested_column, 0);
+            }
+        } else if (then_is_const) {
+            const IColumn& then_nested_column =
+                    assert_cast<const ColumnConst&>(then_col).get_data_column();
+
+            for (size_t i = 0; i < input_row_count; i++) {
+                if (cond_array[i])
+                    result_column->insert_from(then_nested_column, 0);
+                else
+                    result_column->insert_from(else_col, i);
+            }
+        } else if (else_is_const) {
+            const IColumn& else_nested_column =
+                    assert_cast<const ColumnConst&>(else_col).get_data_column();
+
+            for (size_t i = 0; i < input_row_count; i++) {
+                if (cond_array[i])
+                    result_column->insert_from(then_col, i);
+                else
+                    result_column->insert_from(else_nested_column, 0);
+            }
+        } else {
+            for (size_t i = 0; i < input_row_count; i++) {
+                result_column->insert_from(cond_array[i] ? then_col : else_col, i);
+            }
+        }
+        block.replace_by_position(result, std::move(result_column));
+        return Status::OK();
+    }
+
+    void execute_basic_type(Block& block, const ColumnUInt8* cond_col,
+                            const ColumnWithTypeAndName& then_col,
+                            const ColumnWithTypeAndName& else_col, size_t result, Status& status) {
+        auto call = [&](const auto& types) -> bool {
+            using Types = std::decay_t<decltype(types)>;
+            using T0 = typename Types::LeftType;
+            using T1 = typename Types::RightType;
+            using result_type = typename Types::LeftType;
+
+            // for doris, args type and return type must be sanme beacause of type cast has already done before, so here just need one type;
+            // but code still need a better impelement
+            using ColVecT0 = ColumnVector<T0>;
+
+            if (auto col_then = check_and_get_column<ColVecT0>(then_col.column.get())) {
+                if (auto col_else = check_and_get_column<ColVecT0>(else_col.column.get())) {
+                    NumIfImpl<T0, T0, result_type>::vector_vector(
+                            cond_col->get_data(), col_then->get_data(), col_else->get_data(), block,
+                            result, 0);
+                } else if (auto col_const_else =
+                                   check_and_get_column_const<ColVecT0>(else_col.column.get())) {
+                    NumIfImpl<T0, T0, result_type>::vector_constant(
+                            cond_col->get_data(), col_then->get_data(),
+                            col_const_else->template get_value<T0>(), block, result, 0);
+                }
+            } else if (auto col_const_then =
+                               check_and_get_column_const<ColVecT0>(then_col.column.get())) {
+                if (auto col_else = check_and_get_column<ColVecT0>(else_col.column.get())) {
+                    NumIfImpl<T0, T0, result_type>::constant_vector(
+                            cond_col->get_data(), col_const_then->template get_value<T0>(),
+                            col_else->get_data(), block, result, 0);
+                } else if (auto col_const_else =
+                                   check_and_get_column_const<ColVecT0>(else_col.column.get())) {
+                    NumIfImpl<T0, T0, result_type>::constant_constant(
+                            cond_col->get_data(), col_const_then->template get_value<T0>(),
+                            col_const_else->template get_value<T0>(), block, result, 0);
+                }
+            } else {
+                status = Status::InternalError("unexpected args column type");
+            }
+            return true;
+        };
+
+        // todo(wb): a better way to determine type
+        call_on_basic_types<true, true, false, false>(then_col.type->get_type_id(),
+                                                      else_col.type->get_type_id(), call);
+    }
+
+    bool execute_for_null_then_else(FunctionContext* context, Block& block,
+                                    const ColumnWithTypeAndName& arg_cond,
+                                    const ColumnWithTypeAndName& arg_then,
+                                    const ColumnWithTypeAndName& arg_else, size_t result,
+                                    size_t input_rows_count, Status& status) {
+        bool then_is_null = arg_then.column->only_null();
+        bool else_is_null = arg_else.column->only_null();
+
+        if (!then_is_null && !else_is_null) return false;
+
+        if (then_is_null && else_is_null) {
+            block.get_by_position(result).column =
+                    block.get_by_position(result).type->create_column_const_with_default_value(
+                            input_rows_count);
+            return true;
+        }
+
+        const ColumnUInt8* cond_col = typeid_cast<const ColumnUInt8*>(arg_cond.column.get());
+        const ColumnConst* cond_const_col =
+                check_and_get_column_const<ColumnVector<UInt8>>(arg_cond.column.get());
+
+        /// If then is NULL, we create Nullable column with null mask OR-ed with condition.
+        if (then_is_null) {
+            if (cond_col) {
+                if (is_column_nullable(*arg_else.column)) {
+                    auto arg_else_column = arg_else.column;
+                    auto result_column = (*std::move(arg_else_column)).mutate();
+                    assert_cast<ColumnNullable&>(*result_column)
+                            .apply_null_map(assert_cast<const ColumnUInt8&>(*arg_cond.column));
+                    block.replace_by_position(result, std::move(result_column));
+                } else {
+                    block.replace_by_position(
+                            result,
+                            ColumnNullable::create(materialize_column_if_const(arg_else.column),
+                                                   arg_cond.column));
+                }
+            } else if (cond_const_col) {
+                if (cond_const_col->get_value<UInt8>()) {
+                    block.get_by_position(result).column =
+                            block.get_by_position(result).type->create_column()->clone_resized(
+                                    input_rows_count);
+                } else {
+                    block.get_by_position(result).column =
+                            make_nullable_column_if_not(arg_else.column);
+                }
+            } else {
+                status = Status::InternalError("Illegal column " + arg_cond.column->get_name() +
+                                               " of first argument of function " + get_name() +
+                                               ". Must be ColumnUInt8 or ColumnConstUInt8.");
+            }
+            return true;
+        }
+
+        /// If else is NULL, we create Nullable column with null mask OR-ed with negated condition.
+        if (else_is_null) {
+            if (cond_col) {
+                size_t size = input_rows_count;
+                auto& null_map_data = cond_col->get_data();
+
+                auto negated_null_map = ColumnUInt8::create();
+                auto& negated_null_map_data = negated_null_map->get_data();
+                negated_null_map_data.resize(size);
+
+                for (size_t i = 0; i < size; ++i) {
+                    negated_null_map_data[i] = !null_map_data[i];
+                }
+
+                if (is_column_nullable(*arg_then.column)) {
+                    auto arg_then_column = arg_then.column;
+                    auto result_column = (*std::move(arg_then_column)).mutate();
+                    assert_cast<ColumnNullable&>(*result_column)
+                            .apply_negated_null_map(
+                                    assert_cast<const ColumnUInt8&>(*arg_cond.column));
+                    block.replace_by_position(result, std::move(result_column));
+                } else {
+                    block.replace_by_position(
+                            result,
+                            ColumnNullable::create(materialize_column_if_const(arg_then.column),
+                                                   std::move(negated_null_map)));
+                }
+            } else if (cond_const_col) {
+                if (cond_const_col->get_value<UInt8>()) {
+                    block.get_by_position(result).column =
+                            make_nullable_column_if_not(arg_then.column);
+                } else {
+                    block.get_by_position(result).column =
+                            block.get_by_position(result).type->create_column()->clone_resized(
+                                    input_rows_count);
+                }
+            } else {
+                status = Status::InternalError("Illegal column " + arg_cond.column->get_name() +
+                                               " of first argument of function " + get_name() +
+                                               ". Must be ColumnUInt8 or ColumnConstUInt8.");
+            }
+            return true;
+        }
+
+        return false;
+    }
+
+    bool execute_for_nullable_then_else(FunctionContext* context, Block& block,
+                                        const ColumnWithTypeAndName& arg_cond,
+                                        const ColumnWithTypeAndName& arg_then,
+                                        const ColumnWithTypeAndName& arg_else, size_t result,
+                                        size_t input_rows_count) {
+        auto* then_is_nullable = check_and_get_column<ColumnNullable>(*arg_then.column);
+        auto* else_is_nullable = check_and_get_column<ColumnNullable>(*arg_else.column);
+
+        if (!then_is_nullable && !else_is_nullable) return false;
+
+        /** Calculate null mask of result and nested column separately.
+          */
+        ColumnPtr result_null_mask;
+        {
+            Block temporary_block(
+                    {arg_cond,
+                     {then_is_nullable ? then_is_nullable->get_null_map_column_ptr()
+                                       : DataTypeUInt8().create_column_const_with_default_value(
+                                                 input_rows_count),
+                      std::make_shared<DataTypeUInt8>(), ""},
+                     {else_is_nullable ? else_is_nullable->get_null_map_column_ptr()
+                                       : DataTypeUInt8().create_column_const_with_default_value(
+                                                 input_rows_count),
+                      std::make_shared<DataTypeUInt8>(), ""},
+                     {nullptr, std::make_shared<DataTypeUInt8>(), ""}});
+
+            execute_impl(context, temporary_block, {0, 1, 2}, 3, temporary_block.rows());
+
+            result_null_mask = temporary_block.get_by_position(3).column;
+        }
+
+        ColumnPtr result_nested_column;
+
+        {
+            Block temporary_block(
+                    {arg_cond,
+                     {get_nested_column(arg_then.column), remove_nullable(arg_then.type), ""},
+                     {get_nested_column(arg_else.column), remove_nullable(arg_else.type), ""},
+                     {nullptr, remove_nullable(block.get_by_position(result).type), ""}});
+
+            execute_impl(context, temporary_block, {0, 1, 2}, 3, temporary_block.rows());
+
+            result_nested_column = temporary_block.get_by_position(3).column;
+        }
+
+        auto column = ColumnNullable::create(materialize_column_if_const(result_nested_column),
+                                             materialize_column_if_const(result_null_mask));
+        block.replace_by_position(result, std::move(column));
+        return true;
+    }
+
+    bool execute_for_null_condition(FunctionContext* context, Block& block,
+                                    const ColumnWithTypeAndName& arg_cond,
+                                    const ColumnWithTypeAndName& arg_then,
+                                    const ColumnWithTypeAndName& arg_else, size_t result) {
+        bool cond_is_null = arg_cond.column->only_null();
+
+        if (cond_is_null) {
+            block.replace_by_position(result, arg_else.column->clone_resized(arg_cond.column->size()));
+            return true;
+        }
+
+        if (auto * nullable = check_and_get_column<ColumnNullable>(*arg_cond.column)) {
+	        DCHECK(remove_nullable(arg_cond.type)->get_type_id() == TypeIndex::UInt8);
+	        Block temporary_block
+            {
+                { nullable->get_nested_column_ptr(), remove_nullable(arg_cond.type), arg_cond.name },
+                arg_then,
+                arg_else,
+                block.get_by_position(result)
+            };
+
+            execute_impl(context, temporary_block, {0, 1, 2}, 3, temporary_block.rows());
+
+            block.get_by_position(result).column = std::move(temporary_block.get_by_position(3).column);
+            return true;
+        }
+        return false;
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const ColumnWithTypeAndName& arg_cond = block.get_by_position(arguments[0]);
+        const ColumnWithTypeAndName& arg_then = block.get_by_position(arguments[1]);
+        const ColumnWithTypeAndName& arg_else = block.get_by_position(arguments[2]);
+
+        /// A case for identical then and else (pointers are the same).
+        if (arg_then.column.get() == arg_else.column.get()) {
+            /// Just point result to them.
+            block.replace_by_position(result, arg_then.column);
+            return Status::OK();
+        }
+
+        Status ret = Status::OK();
+        if (execute_for_null_condition(context, block, arg_cond, arg_then, arg_else, result) ||
+            execute_for_null_then_else(context, block, arg_cond, arg_then, arg_else, result,
+                                       input_rows_count, ret) ||
+            execute_for_nullable_then_else(context, block, arg_cond, arg_then, arg_else, result,
+                                           input_rows_count)) {
+            return ret;
+        }
+
+        const ColumnUInt8* cond_col = typeid_cast<const ColumnUInt8*>(arg_cond.column.get());
+        const ColumnConst* cond_const_col =
+                check_and_get_column_const<ColumnVector<UInt8>>(arg_cond.column.get());
+
+        if (cond_const_col) {
+            block.get_by_position(result).column =
+                    cond_const_col->get_value<UInt8>() ? arg_then.column : arg_else.column;
+            return Status::OK();
+        }
+
+        if (!cond_col) {
+            return Status::InvalidArgument("Illegal column " + arg_cond.column->get_name() +
+                                           " of first argument of function " + get_name() +
+                                           ",Must be ColumnUInt8 or ColumnConstUInt8.");
+        }
+
+        WhichDataType which_type(arg_then.type);
+        if (which_type.is_int() || which_type.is_float()) {
+            Status status;
+            execute_basic_type(block, cond_col, arg_then, arg_else, result, status);
+            return status;
+        } else {
+            return execute_generic(block, cond_col, arg_then, arg_else, result, input_rows_count);
+        }
+    }
+};
+
+void register_function_if(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionIf>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/in.cpp b/be/src/vec/functions/in.cpp
new file mode 100644
index 0000000000..65232c442e
--- /dev/null
+++ b/be/src/vec/functions/in.cpp
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+
+#include <fmt/format.h>
+
+#include "exprs/create_predicate_function.h"
+
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_set.h"
+#include "vec/columns/columns_number.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+struct InState {
+    bool use_set = true;
+
+    // only use in null in set
+    bool null_in_set = false;
+    std::unique_ptr<HybridSetBase> hybrid_set;
+};
+
+template <bool negative>
+class FunctionIn : public IFunction {
+public:
+    static constexpr auto name = negative ? "not_in" : "in";
+
+    static FunctionPtr create() { return std::make_shared<FunctionIn>(); }
+
+    String get_name() const override { return name; }
+
+    bool is_variadic() const override { return true; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& args) const override {
+        for (const auto& arg : args) {
+            if (arg->is_nullable()) return make_nullable(std::make_shared<DataTypeUInt8>());
+        }
+        return std::make_shared<DataTypeUInt8>();
+    }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
+        if (scope == FunctionContext::THREAD_LOCAL) {
+            return Status::OK();
+        }
+        auto* state = new InState();
+        context->set_function_state(scope, state);
+        state->hybrid_set.reset(create_set(convert_type_to_primitive(
+                context->get_arg_type(0)->type)));
+
+        DCHECK(context->get_num_args() > 1);
+        for (int i = 1; i < context->get_num_args(); ++i) {
+            const auto& const_column_ptr = context->get_constant_col(i);
+            if (const_column_ptr != nullptr) {
+                auto const_data = const_column_ptr->column_ptr->get_data_at(0);
+                if (const_data.data == nullptr) {
+                    state->null_in_set = true;
+                } else {
+                    state->hybrid_set->insert((void *) const_data.data, const_data.size);
+                }
+            } else {
+                state->use_set = false;
+                break;
+            }
+        }
+        return Status::OK();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto in_state = reinterpret_cast<InState*>(
+                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+        if (!in_state) {
+            return Status::RuntimeError(fmt::format("funciton context for function '{}' must have Set;",
+                                get_name()));
+        }
+        auto res = ColumnUInt8::create();
+        ColumnUInt8::Container& vec_res = res->get_data();
+        vec_res.resize(input_rows_count);
+
+        ColumnUInt8::MutablePtr col_null_map_to;
+        col_null_map_to = ColumnUInt8::create(input_rows_count);
+        auto& vec_null_map_to = col_null_map_to->get_data();
+
+        /// First argument may be a single column.
+        const ColumnWithTypeAndName& left_arg = block.get_by_position(arguments[0]);
+        auto materialized_column = left_arg.column->convert_to_full_column_if_const();
+
+        if (in_state->use_set) {
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                const auto& ref_data = materialized_column->get_data_at(i);
+                if (ref_data.data) {
+                    vec_res[i] = negative ^ in_state->hybrid_set->find((void *) ref_data.data, ref_data.size);
+                    if (in_state->null_in_set) {
+                        vec_null_map_to[i] = negative == vec_res[i];
+                    } else {
+                        vec_null_map_to[i] = false;
+                    }
+                } else {
+                    vec_null_map_to[i] = true;
+                }
+            }
+        } else {
+            std::vector<ColumnPtr> set_columns;
+            for (int i = 1; i < arguments.size(); ++i) {
+                set_columns.emplace_back(block.get_by_position(arguments[i]).column);
+            }
+
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                const auto& ref_data = materialized_column->get_data_at(i);
+                if (ref_data.data == nullptr) {
+                    vec_null_map_to[i] = true;
+                    continue;
+                }
+
+                std::unique_ptr<HybridSetBase> hybrid_set(create_set(convert_type_to_primitive(
+                context->get_arg_type(0)->type)));
+                bool null_in_set = false;
+
+                for (const auto& set_column : set_columns) {
+                    auto set_data = set_column->get_data_at(i);
+                    if (set_data.data == nullptr)
+                        null_in_set = true;
+                    else
+                        hybrid_set->insert((void *)(set_data.data), set_data.size);
+                }
+                vec_res[i] = negative ^ hybrid_set->find((void *) ref_data.data, ref_data.size);
+                if (null_in_set) {
+                    vec_null_map_to[i] = negative == vec_res[i];
+                } else {
+                    vec_null_map_to[i] = false;
+                }
+            }
+        }
+
+        if (block.get_by_position(result).type->is_nullable()) {
+            block.replace_by_position(result, ColumnNullable::create(std::move(res), std::move(col_null_map_to)));
+        } else {
+            block.replace_by_position(result, std::move(res));
+        }
+
+        return Status::OK();
+    }
+
+    Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
+        if (scope == FunctionContext::FRAGMENT_LOCAL) {
+            delete reinterpret_cast<InState*>(
+                    context->get_function_state(FunctionContext::FRAGMENT_LOCAL));
+        }
+        return Status::OK();
+    }
+};
+
+
+void register_function_in(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionIn<false>>();
+    factory.register_function<FunctionIn<true>>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/int_div.cpp b/be/src/vec/functions/int_div.cpp
new file mode 100644
index 0000000000..bbb4fd460f
--- /dev/null
+++ b/be/src/vec/functions/int_div.cpp
@@ -0,0 +1,153 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/IntDiv.cpp
+// and modified by Doris
+
+#ifdef __SSE2__
+#define LIBDIVIDE_SSE2 1
+#endif
+
+#include "vec/functions/int_div.h"
+
+#include <libdivide.h>
+
+#include "vec/functions/function_binary_arithmetic.h"
+#include "vec/functions/function_binary_arithmetic_to_null_type.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+/// Optimizations for integer division by a constant.
+
+template <typename A, typename B>
+struct DivideIntegralByConstantImpl : BinaryOperationImplBase<A, B, DivideIntegralImpl<A, B>> {
+    using ResultType = typename DivideIntegralImpl<A, B>::ResultType;
+
+    static void vector_constant(const PaddedPODArray<A>& a, B b, PaddedPODArray<ResultType>& c) {
+        // TODO: Support return null in the furture
+        if (UNLIKELY(b == 0)) {
+//            throw Exception("Division by zero", TStatusCode::VEC_ILLEGAL_DIVISION);
+            memset(c.data(), 0, sizeof(ResultType) * c.size());
+            return;
+        }
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wsign-compare"
+
+        if (UNLIKELY(std::is_signed_v<B> && b == -1)) {
+            size_t size = a.size();
+            for (size_t i = 0; i < size; ++i) c[i] = -c[i];
+            return;
+        }
+
+#pragma GCC diagnostic pop
+
+        libdivide::divider<A> divider(b);
+
+        size_t size = a.size();
+        const A* a_pos = a.data();
+        const A* a_end = a_pos + size;
+        ResultType* c_pos = c.data();
+
+#ifdef __SSE2__
+        static constexpr size_t values_per_sse_register = 16 / sizeof(A);
+        const A* a_end_sse = a_pos + size / values_per_sse_register * values_per_sse_register;
+
+        while (a_pos < a_end_sse) {
+            _mm_storeu_si128(reinterpret_cast<__m128i*>(c_pos),
+                             _mm_loadu_si128(reinterpret_cast<const __m128i*>(a_pos)) / divider);
+
+            a_pos += values_per_sse_register;
+            c_pos += values_per_sse_register;
+        }
+#endif
+
+        while (a_pos < a_end) {
+            *c_pos = *a_pos / divider;
+            ++a_pos;
+            ++c_pos;
+        }
+    }
+};
+
+/** Specializations are specified for dividing numbers of the type UInt64 and UInt32 by the numbers of the same sign.
+  * Can be expanded to all possible combinations, but more code is needed.
+  */
+
+template <>
+struct BinaryOperationImpl<UInt64, UInt8, DivideIntegralImpl<UInt64, UInt8>>
+        : DivideIntegralByConstantImpl<UInt64, UInt8> {};
+template <>
+struct BinaryOperationImpl<UInt64, UInt16, DivideIntegralImpl<UInt64, UInt16>>
+        : DivideIntegralByConstantImpl<UInt64, UInt16> {};
+template <>
+struct BinaryOperationImpl<UInt64, UInt32, DivideIntegralImpl<UInt64, UInt32>>
+        : DivideIntegralByConstantImpl<UInt64, UInt32> {};
+template <>
+struct BinaryOperationImpl<UInt64, UInt64, DivideIntegralImpl<UInt64, UInt64>>
+        : DivideIntegralByConstantImpl<UInt64, UInt64> {};
+
+template <>
+struct BinaryOperationImpl<UInt32, UInt8, DivideIntegralImpl<UInt32, UInt8>>
+        : DivideIntegralByConstantImpl<UInt32, UInt8> {};
+template <>
+struct BinaryOperationImpl<UInt32, UInt16, DivideIntegralImpl<UInt32, UInt16>>
+        : DivideIntegralByConstantImpl<UInt32, UInt16> {};
+template <>
+struct BinaryOperationImpl<UInt32, UInt32, DivideIntegralImpl<UInt32, UInt32>>
+        : DivideIntegralByConstantImpl<UInt32, UInt32> {};
+template <>
+struct BinaryOperationImpl<UInt32, UInt64, DivideIntegralImpl<UInt32, UInt64>>
+        : DivideIntegralByConstantImpl<UInt32, UInt64> {};
+
+template <>
+struct BinaryOperationImpl<Int64, Int8, DivideIntegralImpl<Int64, Int8>>
+        : DivideIntegralByConstantImpl<Int64, Int8> {};
+template <>
+struct BinaryOperationImpl<Int64, Int16, DivideIntegralImpl<Int64, Int16>>
+        : DivideIntegralByConstantImpl<Int64, Int16> {};
+template <>
+struct BinaryOperationImpl<Int64, Int32, DivideIntegralImpl<Int64, Int32>>
+        : DivideIntegralByConstantImpl<Int64, Int32> {};
+template <>
+struct BinaryOperationImpl<Int64, Int64, DivideIntegralImpl<Int64, Int64>>
+        : DivideIntegralByConstantImpl<Int64, Int64> {};
+
+template <>
+struct BinaryOperationImpl<Int32, Int8, DivideIntegralImpl<Int32, Int8>>
+        : DivideIntegralByConstantImpl<Int32, Int8> {};
+template <>
+struct BinaryOperationImpl<Int32, Int16, DivideIntegralImpl<Int32, Int16>>
+        : DivideIntegralByConstantImpl<Int32, Int16> {};
+template <>
+struct BinaryOperationImpl<Int32, Int32, DivideIntegralImpl<Int32, Int32>>
+        : DivideIntegralByConstantImpl<Int32, Int32> {};
+template <>
+struct BinaryOperationImpl<Int32, Int64, DivideIntegralImpl<Int32, Int64>>
+        : DivideIntegralByConstantImpl<Int32, Int64> {};
+
+struct NameIntDiv {
+    static constexpr auto name = "int_divide";
+};
+using FunctionIntDiv = FunctionBinaryArithmeticToNullType<DivideIntegralImpl, NameIntDiv, false>;
+
+void register_function_int_div(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionIntDiv>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/int_div.h b/be/src/vec/functions/int_div.h
new file mode 100644
index 0000000000..a74ecab413
--- /dev/null
+++ b/be/src/vec/functions/int_div.h
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/IntDiv.h
+// and modified by Doris
+
+#pragma once
+
+#include "common/compiler_util.h"
+#include "common/logging.h"
+#include "common/status.h"
+#include "runtime/decimalv2_value.h"
+#include "type_traits"
+#include "vec/columns/column_nullable.h"
+#include "vec/common/exception.h"
+#include "vec/data_types/number_traits.h"
+
+namespace doris::vectorized {
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wsign-compare"
+#pragma GCC diagnostic pop
+
+template <typename A, typename B>
+struct DivideIntegralImpl {
+    using ResultType = typename NumberTraits::ResultOfIntegerDivision<A, B>::Type;
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b, NullMap& null_map, size_t index) {
+        null_map[index] = b == 0;
+
+        /// Otherwise overflow may occur due to integer promotion. Example: int8_t(-1) / uint64_t(2).
+        /// NOTE: overflow is still possible when dividing large signed number to large unsigned number or vice-versa. But it's less harmful.
+        if constexpr (std::is_integral_v<A> && std::is_integral_v<B> &&
+                      (std::is_signed_v<A> || std::is_signed_v<B>))
+            return std::make_signed_t<A>(a) / (std::make_signed_t<B>(b) + (b == 0));
+        else
+            return a / (b + (b == 0));
+    }
+};
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/is_not_null.cpp b/be/src/vec/functions/is_not_null.cpp
new file mode 100644
index 0000000000..346dac1b2c
--- /dev/null
+++ b/be/src/vec/functions/is_not_null.cpp
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/IsNotNull.cpp
+// and modified by Doris
+
+#include "vec/columns/column_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+/// Implements the function isNotNull which returns true if a value
+/// is not null, false otherwise.
+class FunctionIsNotNull : public IFunction {
+public:
+    static constexpr auto name = "is_not_null_pred";
+
+    static FunctionPtr create() { return std::make_shared<FunctionIsNotNull>(); }
+
+    std::string get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 1; }
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_constants() const override { return true; }
+    ColumnNumbers get_arguments_that_dont_imply_nullable_return_type(
+            size_t /*number_of_arguments*/) const override {
+        return {0};
+    }
+
+    DataTypePtr get_return_type_impl(const DataTypes&) const override {
+        return std::make_shared<DataTypeUInt8>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const ColumnWithTypeAndName& elem = block.get_by_position(arguments[0]);
+        if (auto* nullable = check_and_get_column<ColumnNullable>(*elem.column)) {
+            /// Return the negated null map.
+            auto res_column = ColumnUInt8::create(input_rows_count);
+            const auto* __restrict src_data = nullable->get_null_map_data().data();
+            auto* __restrict res_data = assert_cast<ColumnUInt8&>(*res_column).get_data().data();
+
+            for (size_t i = 0; i < input_rows_count; ++i) {
+                res_data[i] = !src_data[i];
+            }
+
+            block.replace_by_position(result, std::move(res_column));
+        } else {
+            /// Since no element is nullable, return a constant one.
+            block.get_by_position(result).column =
+                    DataTypeUInt8().create_column_const(elem.column->size(), 1u);
+        }
+        return Status::OK();
+    }
+};
+
+void register_function_is_not_null(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionIsNotNull>();
+}
+
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/is_null.cpp b/be/src/vec/functions/is_null.cpp
new file mode 100644
index 0000000000..2b5c307583
--- /dev/null
+++ b/be/src/vec/functions/is_null.cpp
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/IsNull.cpp
+// and modified by Doris
+
+#include "vec/functions/simple_function_factory.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/columns/column_nullable.h"
+
+namespace doris::vectorized {
+
+/// Implements the function is_null which returns true if a value
+/// is null, false otherwise.
+class FunctionIsNull : public IFunction
+{
+public:
+    static constexpr auto name = "is_null_pred";
+
+    static FunctionPtr create()
+    {
+        return std::make_shared<FunctionIsNull>();
+    }
+
+    std::string get_name() const override
+    {
+        return name;
+    }
+
+    size_t get_number_of_arguments() const override { return 1; }
+    bool use_default_implementation_for_nulls() const override { return false; }
+    bool use_default_implementation_for_constants() const override { return true; }
+    ColumnNumbers get_arguments_that_dont_imply_nullable_return_type(size_t /*number_of_arguments*/) const override { return {0}; }
+
+    DataTypePtr get_return_type_impl(const DataTypes &) const override
+    {
+        return std::make_shared<DataTypeUInt8>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        const ColumnWithTypeAndName & elem = block.get_by_position(arguments[0]);
+        if (auto * nullable = check_and_get_column<ColumnNullable>(*elem.column))
+        {
+            /// Merely return the embedded null map.
+            block.get_by_position(result).column = nullable->get_null_map_column_ptr();
+        }
+        else
+        {
+            /// Since no element is nullable, return a zero-constant column representing
+            /// a zero-filled null map.
+            block.get_by_position(result).column = DataTypeUInt8().create_column_const(elem.column->size(), 0u);
+        }
+        return Status::OK();
+    }
+};
+
+void register_function_is_null(SimpleFunctionFactory& factory)
+{
+    factory.register_function<FunctionIsNull>();
+}
+
+}
\ No newline at end of file
diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp
new file mode 100644
index 0000000000..c6bf479582
--- /dev/null
+++ b/be/src/vec/functions/like.cpp
@@ -0,0 +1,321 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/functions/like.h"
+
+#include "runtime/string_value.h"
+#include "runtime/string_value.hpp"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_set.h"
+#include "vec/columns/columns_number.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+// A regex to match any regex pattern is equivalent to a substring search.
+static const RE2 SUBSTRING_RE(
+        "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
+
+// A regex to match any regex pattern which is equivalent to matching a constant string
+// at the end of the string values.
+static const RE2 ENDS_WITH_RE("(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
+
+// A regex to match any regex pattern which is equivalent to matching a constant string
+// at the end of the string values.
+static const RE2 STARTS_WITH_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
+
+// A regex to match any regex pattern which is equivalent to a constant string match.
+static const RE2 EQUALS_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
+
+// Like patterns
+static const re2::RE2 LIKE_SUBSTRING_RE("(?:%+)(((\\\\%)|(\\\\_)|([^%_]))+)(?:%+)");
+static const re2::RE2 LIKE_ENDS_WITH_RE("(?:%+)(((\\\\%)|(\\\\_)|([^%_]))+)");
+static const re2::RE2 LIKE_STARTS_WITH_RE("(((\\\\%)|(\\\\_)|([^%_]))+)(?:%+)");
+static const re2::RE2 LIKE_EQUALS_RE("(((\\\\%)|(\\\\_)|([^%_]))+)");
+
+Status FunctionLikeBase::constant_starts_with_fn(LikeSearchState* state, const StringValue& val,
+                                          const StringValue& pattern, unsigned char* result) {
+    *result = (val.len >= state->search_string_sv.len) &&
+              (state->search_string_sv == val.substring(0, state->search_string_sv.len));
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_ends_with_fn(LikeSearchState* state, const StringValue& val,
+                                        const StringValue& pattern, unsigned char* result) {
+    *result = (val.len >= state->search_string_sv.len) &&
+              (state->search_string_sv ==
+               val.substring(val.len - state->search_string_sv.len, state->search_string_sv.len));
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_equals_fn(LikeSearchState* state, const StringValue& val,
+                                     const StringValue& pattern, unsigned char* result) {
+    *result = (val == state->search_string_sv);
+    return Status::OK();
+}
+
+Status FunctionLikeBase::constant_substring_fn(LikeSearchState* state, const StringValue& val,
+                                        const StringValue& pattern, unsigned char* result) {
+    if (state->search_string_sv.len == 0) {
+        *result = true;
+        return Status::OK();
+    }
+    StringValue pattern_value = StringValue::from_string_val(val.ptr);
+    *result = state->substring_pattern.search(&pattern_value) != -1;
+    return Status::OK();
+}
+
+Status FunctionLikeBase::execute_impl(FunctionContext* context, Block& block,
+                               const ColumnNumbers& arguments, size_t result,
+                               size_t /*input_rows_count*/) {
+    // values and patterns
+    const auto values_col =
+            block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+    const auto pattern_col =
+            block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+
+    const auto* values = check_and_get_column<ColumnString>(values_col.get());
+    const auto* patterns = check_and_get_column<ColumnString>(pattern_col.get());
+
+    if (!values || !patterns) {
+        return Status::InternalError("Not supported input arguments types");
+    }
+
+    // result column
+    auto res = ColumnUInt8::create();
+    ColumnUInt8::Container& vec_res = res->get_data();
+    vec_res.resize(values->size());
+
+    auto* state = reinterpret_cast<LikeState*>(
+            context->get_function_state(FunctionContext::THREAD_LOCAL));
+
+    vector_vector(values->get_chars(), values->get_offsets(), patterns->get_chars(),
+                  patterns->get_offsets(), vec_res, state->function, &state->search_state);
+
+    block.replace_by_position(result, std::move(res));
+    return Status::OK();
+}
+
+Status FunctionLikeBase::close(FunctionContext* context,
+                               FunctionContext::FunctionStateScope scope) {
+    if (scope == FunctionContext::THREAD_LOCAL) {
+        auto* state = reinterpret_cast<LikeState*>(
+                context->get_function_state(FunctionContext::THREAD_LOCAL));
+        delete state;
+    }
+    return Status::OK();
+}
+
+Status FunctionLikeBase::vector_vector(const ColumnString::Chars& values,
+                                const ColumnString::Offsets& value_offsets,
+                                const ColumnString::Chars& patterns,
+                                const ColumnString::Offsets& pattern_offsets,
+                                ColumnUInt8::Container& result, const LikeFn& function,
+                                LikeSearchState* search_state) {
+    const auto size = value_offsets.size();
+
+    for (int i = 0; i < size; ++i) {
+        char* val_raw_str = (char*)(&values[value_offsets[i - 1]]);
+        UInt32 val_str_size = value_offsets[i] - value_offsets[i - 1] - 1;
+
+        char* pattern_raw_str = (char*)(&patterns[pattern_offsets[i - 1]]);
+        UInt32 patter_str_size = pattern_offsets[i] - pattern_offsets[i - 1] - 1;
+        RETURN_IF_ERROR((function)(search_state, StringValue(val_raw_str, val_str_size),
+                                   StringValue(pattern_raw_str, patter_str_size), &result[i]));
+    }
+    return Status::OK();
+}
+
+Status FunctionLike::like_fn(LikeSearchState* state, const StringValue& val,
+                              const StringValue& pattern, unsigned char* result) {
+    std::string re_pattern;
+    RE2::Options opts;
+    opts.set_never_nl(false);
+    opts.set_dot_nl(true);
+    convert_like_pattern(state, std::string(pattern.ptr, pattern.len), &re_pattern);
+    re2::RE2 re(re_pattern, opts);
+    if (re.ok()) {
+        *result = RE2::FullMatch(re2::StringPiece(val.ptr, val.len), re);
+        return Status::OK();
+    } else {
+        return Status::RuntimeError(fmt::format("Invalid pattern: {}", pattern.debug_string()));
+    }
+}
+
+Status FunctionLike::constant_regex_full_fn(LikeSearchState* state, const StringValue& val,
+                                             const StringValue& pattern, unsigned char* result) {
+    *result = RE2::FullMatch(re2::StringPiece(val.ptr, val.len), *state->regex.get());
+    return Status::OK();
+}
+
+void FunctionLike::convert_like_pattern(LikeSearchState* state, const std::string& pattern,
+                                         std::string* re_pattern) {
+    re_pattern->clear();
+    bool is_escaped = false;
+    for (size_t i = 0; i < pattern.size(); ++i) {
+        if (!is_escaped && pattern[i] == '%') {
+            re_pattern->append(".*");
+        } else if (!is_escaped && pattern[i] == '_') {
+            re_pattern->append(".");
+            // check for escape char before checking for regex special chars, they might overlap
+        } else if (!is_escaped && pattern[i] == state->escape_char) {
+            is_escaped = true;
+        } else if (pattern[i] == '.' || pattern[i] == '[' || pattern[i] == ']' ||
+                   pattern[i] == '{' || pattern[i] == '}' || pattern[i] == '(' ||
+                   pattern[i] == ')' || pattern[i] == '\\' || pattern[i] == '*' ||
+                   pattern[i] == '+' || pattern[i] == '?' || pattern[i] == '|' ||
+                   pattern[i] == '^' || pattern[i] == '$') {
+            // escape all regex special characters; see list at
+            re_pattern->append("\\");
+            re_pattern->append(1, pattern[i]);
+            is_escaped = false;
+        } else {
+            // regular character or escaped special character
+            re_pattern->append(1, pattern[i]);
+            is_escaped = false;
+        }
+    }
+}
+
+void FunctionLike::remove_escape_character(std::string* search_string) {
+    std::string tmp_search_string;
+    tmp_search_string.swap(*search_string);
+    int len = tmp_search_string.length();
+    for (int i = 0; i < len;) {
+        if (tmp_search_string[i] == '\\' && i + 1 < len &&
+            (tmp_search_string[i + 1] == '%' || tmp_search_string[i + 1] == '_')) {
+            search_string->append(1, tmp_search_string[i + 1]);
+            i += 2;
+        } else {
+            search_string->append(1, tmp_search_string[i]);
+            i++;
+        }
+    }
+}
+
+Status FunctionLike::prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
+    if (scope != FunctionContext::THREAD_LOCAL) {
+        return Status::OK();
+    }
+    auto* state = new LikeState();
+    context->set_function_state(scope, state);
+    state->function = like_fn;
+    if (context->is_col_constant(1)) {
+        const auto pattern_col = context->get_constant_col(1)->column_ptr;
+        const auto& pattern = pattern_col->get_data_at(0);
+
+        std::string pattern_str = pattern.to_string();
+        std::string search_string;
+        if (RE2::FullMatch(pattern_str, LIKE_EQUALS_RE, &search_string)) {
+            remove_escape_character(&search_string);
+            state->search_state.set_search_string(search_string);
+            state->function = constant_equals_fn;
+        } else if (RE2::FullMatch(pattern_str, LIKE_STARTS_WITH_RE, &search_string)) {
+            remove_escape_character(&search_string);
+            state->search_state.set_search_string(search_string);
+            state->function = constant_starts_with_fn;
+        } else if (RE2::FullMatch(pattern_str, LIKE_ENDS_WITH_RE, &search_string)) {
+            remove_escape_character(&search_string);
+            state->search_state.set_search_string(search_string);
+            state->function = constant_ends_with_fn;
+        } else if (RE2::FullMatch(pattern_str, LIKE_SUBSTRING_RE, &search_string)) {
+            remove_escape_character(&search_string);
+            state->search_state.set_search_string(search_string);
+            state->function = constant_substring_fn;
+        } else {
+            std::string re_pattern;
+            convert_like_pattern(&state->search_state, pattern_str, &re_pattern);
+            RE2::Options opts;
+            opts.set_never_nl(false);
+            opts.set_dot_nl(true);
+            state->search_state.regex = std::make_unique<RE2>(re_pattern, opts);
+            if (!state->search_state.regex->ok()) {
+                return Status::InternalError(
+                        fmt::format("Invalid regex expression: {}", pattern_str));
+            }
+            state->function = constant_regex_full_fn;
+        }
+    }
+    return Status::OK();
+}
+
+Status FunctionRegexp::prepare(FunctionContext* context,
+                                FunctionContext::FunctionStateScope scope) {
+    if (scope != FunctionContext::THREAD_LOCAL) {
+        return Status::OK();
+    }
+    auto* state = new LikeState();
+    context->set_function_state(scope, state);
+    state->function = regexp_fn;
+    if (context->is_col_constant(1)) {
+        const auto pattern_col = context->get_constant_col(1)->column_ptr;
+        const auto& pattern = pattern_col->get_data_at(0);
+
+        std::string pattern_str = pattern.to_string();
+        std::string search_string;
+        if (RE2::FullMatch(pattern_str, EQUALS_RE, &search_string)) {
+            state->search_state.set_search_string(search_string);
+            state->function = constant_equals_fn;
+        } else if (RE2::FullMatch(pattern_str, STARTS_WITH_RE, &search_string)) {
+            state->search_state.set_search_string(search_string);
+            state->function = constant_starts_with_fn;
+        } else if (RE2::FullMatch(pattern_str, ENDS_WITH_RE, &search_string)) {
+            state->search_state.set_search_string(search_string);
+            state->function = constant_ends_with_fn;
+        } else if (RE2::FullMatch(pattern_str, SUBSTRING_RE, &search_string)) {
+            state->search_state.set_search_string(search_string);
+            state->function = constant_substring_fn;
+        } else {
+            RE2::Options opts;
+            opts.set_never_nl(false);
+            opts.set_dot_nl(true);
+            state->search_state.regex = std::make_unique<RE2>(pattern_str, opts);
+            if (!state->search_state.regex->ok()) {
+                return Status::InternalError(
+                        fmt::format("Invalid regex expression: {}", pattern_str));
+            }
+            state->function = constant_regex_partial_fn;
+        }
+    }
+    return Status::OK();
+}
+
+Status FunctionRegexp::constant_regex_partial_fn(LikeSearchState* state, const StringValue& val,
+                                                  const StringValue& pattern,
+                                                  unsigned char* result) {
+    *result = RE2::PartialMatch(re2::StringPiece(val.ptr, val.len), *state->regex);
+    return Status::OK();
+}
+
+Status FunctionRegexp::regexp_fn(LikeSearchState* state, const StringValue& val,
+                                 const StringValue& pattern, unsigned char* result) {
+    std::string re_pattern(pattern.ptr, pattern.len);
+    RE2::Options opts;
+    opts.set_never_nl(false);
+    opts.set_dot_nl(true);
+    re2::RE2 re(re_pattern, opts);
+    if (re.ok()) {
+        *result = RE2::PartialMatch(re2::StringPiece(val.ptr, val.len), re);
+        return Status::OK();
+    } else {
+        return Status::RuntimeError(fmt::format("Invalid pattern: {}", pattern.debug_string()));
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/like.h b/be/src/vec/functions/like.h
new file mode 100644
index 0000000000..f8b8f11557
--- /dev/null
+++ b/be/src/vec/functions/like.h
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <functional>
+#include <memory>
+
+#include "runtime/string_search.hpp"
+#include "runtime/string_value.h"
+#include "vec/columns/column_const.h"
+#include "vec/columns/column_set.h"
+#include "vec/columns/columns_number.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/functions/function.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+// TODO: replace with std::string_view when `LikeSearchState.substring_pattern` can
+// construct from std::string_view.
+struct LikeSearchState {
+    char escape_char;
+
+    /// Holds the string the StringValue points to and is set any time StringValue is
+    /// used.
+    std::string search_string;
+
+    /// Used for LIKE predicates if the pattern is a constant argument, and is either a
+    /// constant string or has a constant string at the beginning or end of the pattern.
+    /// This will be set in order to check for that pattern in the corresponding part of
+    /// the string.
+    doris::StringValue search_string_sv;
+
+    /// Used for LIKE predicates if the pattern is a constant argument and has a constant
+    /// string in the middle of it. This will be use in order to check for the substring
+    /// in the value.
+    doris::StringSearch substring_pattern;
+
+    /// Used for RLIKE and REGEXP predicates if the pattern is a constant argument.
+    std::unique_ptr<re2::RE2> regex;
+
+    LikeSearchState() : escape_char('\\') {}
+
+    void set_search_string(const std::string& search_string_arg) {
+        search_string = search_string_arg;
+        search_string_sv = StringValue(search_string);
+        substring_pattern = StringSearch(&search_string_sv);
+    }
+};
+
+using LikeFn = std::function<doris::Status(LikeSearchState* state, const StringValue&,
+                                            const StringValue&, unsigned char*)>;
+
+struct LikeState {
+    LikeSearchState search_state;
+    LikeFn function;
+};
+
+class FunctionLikeBase : public IFunction {
+public:
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& /*arguments*/) const override {
+        return std::make_shared<DataTypeUInt8>();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t /*input_rows_count*/) override;
+
+    Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override;
+protected:
+    Status vector_vector(const ColumnString::Chars& values,
+                         const ColumnString::Offsets& value_offsets,
+                         const ColumnString::Chars& patterns,
+                         const ColumnString::Offsets& pattern_offsets,
+                         ColumnUInt8::Container& result, const LikeFn& function,
+                         LikeSearchState* search_state);
+
+    static Status constant_starts_with_fn(LikeSearchState* state, const StringValue& val,
+                                          const StringValue& pattern, unsigned char* result);
+
+    static Status constant_ends_with_fn(LikeSearchState* state, const StringValue& val,
+                                        const StringValue& pattern, unsigned char* result);
+
+    static Status constant_equals_fn(LikeSearchState* state, const StringValue& val,
+                                     const StringValue& pattern, unsigned char* result);
+
+    static Status constant_substring_fn(LikeSearchState* state, const StringValue& val,
+                                        const StringValue& pattern, unsigned char* result);
+};
+
+class FunctionLike : public FunctionLikeBase {
+public:
+    static constexpr auto name = "like";
+
+    static FunctionPtr create() { return std::make_shared<FunctionLike>(); }
+
+    String get_name() const override { return name; }
+
+    Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override;
+private:
+    static Status like_fn(LikeSearchState* state, const StringValue& val,
+                          const StringValue& pattern, unsigned char* result);
+
+    static Status constant_regex_full_fn(LikeSearchState* state, const StringValue& val,
+                                         const StringValue& pattern, unsigned char* result);
+
+    static void convert_like_pattern(LikeSearchState* state, const std::string& pattern,
+                                     std::string* re_pattern);
+
+    static void remove_escape_character(std::string* search_string);
+};
+
+class FunctionRegexp : public FunctionLikeBase {
+public:
+    static constexpr auto name = "regexp";
+
+    static FunctionPtr create() { return std::make_shared<FunctionRegexp>(); }
+
+    String get_name() const override { return name; }
+
+    Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override;
+private:
+    static Status regexp_fn(LikeSearchState* state, const StringValue& val,
+                            const StringValue& pattern, unsigned char* result);
+
+    static Status constant_regex_partial_fn(LikeSearchState* state, const StringValue& val,
+                                            const StringValue& pattern, unsigned char* result);
+};
+
+void register_function_like(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionLike>();
+}
+
+void register_function_regexp(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionRegexp>();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/math.cpp b/be/src/vec/functions/math.cpp
new file mode 100644
index 0000000000..4aa156afca
--- /dev/null
+++ b/be/src/vec/functions/math.cpp
@@ -0,0 +1,475 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/common/field_visitors.h"
+#include "vec/data_types/number_traits.h"
+#include "vec/functions/function_const.h"
+#include "vec/functions/function_math_binary_float64.h"
+#include "vec/functions/function_math_unary.h"
+#include "vec/functions/function_string.h"
+#include "vec/functions/function_totype.h"
+#include "vec/functions/function_unary_arithmetic.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+const double log_10[] = {
+        1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009, 1e010, 1e011, 1e012,
+        1e013, 1e014, 1e015, 1e016, 1e017, 1e018, 1e019, 1e020, 1e021, 1e022, 1e023, 1e024, 1e025,
+        1e026, 1e027, 1e028, 1e029, 1e030, 1e031, 1e032, 1e033, 1e034, 1e035, 1e036, 1e037, 1e038,
+        1e039, 1e040, 1e041, 1e042, 1e043, 1e044, 1e045, 1e046, 1e047, 1e048, 1e049, 1e050, 1e051,
+        1e052, 1e053, 1e054, 1e055, 1e056, 1e057, 1e058, 1e059, 1e060, 1e061, 1e062, 1e063, 1e064,
+        1e065, 1e066, 1e067, 1e068, 1e069, 1e070, 1e071, 1e072, 1e073, 1e074, 1e075, 1e076, 1e077,
+        1e078, 1e079, 1e080, 1e081, 1e082, 1e083, 1e084, 1e085, 1e086, 1e087, 1e088, 1e089, 1e090,
+        1e091, 1e092, 1e093, 1e094, 1e095, 1e096, 1e097, 1e098, 1e099, 1e100, 1e101, 1e102, 1e103,
+        1e104, 1e105, 1e106, 1e107, 1e108, 1e109, 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116,
+        1e117, 1e118, 1e119, 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129,
+        1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, 1e140, 1e141, 1e142,
+        1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, 1e150, 1e151, 1e152, 1e153, 1e154, 1e155,
+        1e156, 1e157, 1e158, 1e159, 1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168,
+        1e169, 1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179, 1e180, 1e181,
+        1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189, 1e190, 1e191, 1e192, 1e193, 1e194,
+        1e195, 1e196, 1e197, 1e198, 1e199, 1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207,
+        1e208, 1e209, 1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219, 1e220,
+        1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229, 1e230, 1e231, 1e232, 1e233,
+        1e234, 1e235, 1e236, 1e237, 1e238, 1e239, 1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246,
+        1e247, 1e248, 1e249, 1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259,
+        1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269, 1e270, 1e271, 1e272,
+        1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279, 1e280, 1e281, 1e282, 1e283, 1e284, 1e285,
+        1e286, 1e287, 1e288, 1e289, 1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298,
+        1e299, 1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308};
+
+#define ARRAY_ELEMENTS(A) ((uint64_t)(sizeof(A) / sizeof(A[0])))
+
+double my_double_round(double value, int64_t dec, bool dec_unsigned, bool truncate) {
+    bool dec_negative = (dec < 0) && !dec_unsigned;
+    uint64_t abs_dec = dec_negative ? -dec : dec;
+    /*
+       tmp2 is here to avoid return the value with 80 bit precision
+       This will fix that the test round(0.1,1) = round(0.1,1) is true
+       Tagging with volatile is no guarantee, it may still be optimized away...
+       */
+    volatile double tmp2 = 0.0;
+
+    double tmp =
+            (abs_dec < ARRAY_ELEMENTS(log_10) ? log_10[abs_dec] : std::pow(10.0, (double)abs_dec));
+
+    // Pre-compute these, to avoid optimizing away e.g. 'floor(v/tmp) * tmp'.
+    volatile double value_div_tmp = value / tmp;
+    volatile double value_mul_tmp = value * tmp;
+
+    if (dec_negative && std::isinf(tmp)) {
+        tmp2 = 0.0;
+    } else if (!dec_negative && std::isinf(value_mul_tmp)) {
+        tmp2 = value;
+    } else if (truncate) {
+        if (value >= 0.0) {
+            tmp2 = dec < 0 ? std::floor(value_div_tmp) * tmp : std::floor(value_mul_tmp) / tmp;
+        } else {
+            tmp2 = dec < 0 ? std::ceil(value_div_tmp) * tmp : std::ceil(value_mul_tmp) / tmp;
+        }
+    } else {
+        tmp2 = dec < 0 ? std::rint(value_div_tmp) * tmp : std::rint(value_mul_tmp) / tmp;
+    }
+
+    return tmp2;
+}
+struct AcosName {
+    static constexpr auto name = "acos";
+};
+using FunctionAcos = FunctionMathUnary<UnaryFunctionVectorized<AcosName, std::acos>>;
+
+struct AsinName {
+    static constexpr auto name = "asin";
+};
+using FunctionAsin = FunctionMathUnary<UnaryFunctionVectorized<AsinName, std::asin>>;
+
+struct AtanName {
+    static constexpr auto name = "atan";
+};
+using FunctionAtan = FunctionMathUnary<UnaryFunctionVectorized<AtanName, std::atan>>;
+
+struct CosName {
+    static constexpr auto name = "cos";
+};
+using FunctionCos = FunctionMathUnary<UnaryFunctionVectorized<CosName, std::cos>>;
+
+struct EImpl {
+    static constexpr auto name = "e";
+    static constexpr double value = 2.7182818284590452353602874713526624977572470;
+};
+using FunctionE = FunctionMathConstFloat64<EImpl>;
+
+struct PiImpl {
+    static constexpr auto name = "pi";
+    static constexpr double value = 3.1415926535897932384626433832795028841971693;
+};
+using FunctionPi = FunctionMathConstFloat64<PiImpl>;
+
+struct ExpName {
+    static constexpr auto name = "exp";
+};
+using FunctionExp = FunctionMathUnary<UnaryFunctionVectorized<ExpName, std::exp>>;
+
+struct LnName {
+    static constexpr auto name = "ln";
+};
+using FunctionLn = FunctionMathUnary<UnaryFunctionVectorized<LnName, std::log>>;
+
+struct Log2Name {
+    static constexpr auto name = "log2";
+};
+using FunctionLog2 = FunctionMathUnary<UnaryFunctionVectorized<Log2Name, std::log2>>;
+
+struct Log10Name {
+    static constexpr auto name = "log10";
+};
+using FunctionLog10 = FunctionMathUnary<UnaryFunctionVectorized<Log10Name, std::log10>>;
+
+struct LogName {
+    static constexpr auto name = "log";
+};
+
+template <typename Name>
+struct LogImpl {
+    static constexpr auto name = LogName::name;
+    static constexpr auto rows_per_iteration = 1;
+
+    template <typename T1, typename T2>
+    static void execute(const T1* src_left, const T2* src_right, Float64* dst) {
+        dst[0] = static_cast<Float64>(std::log(static_cast<Float64>(src_right[0])) /
+                                      std::log(static_cast<Float64>(src_left[0])));
+    }
+};
+using FunctionLog = FunctionMathBinaryFloat64<LogImpl<LogName>>;
+
+struct CeilName {
+    static constexpr auto name = "ceil";
+};
+using FunctionCeil = FunctionMathUnary<UnaryFunctionVectorized<CeilName, std::ceil, DataTypeInt64>>;
+
+struct HexIntName {
+    static constexpr auto name = "hex";
+};
+
+struct HexIntImpl {
+    using ReturnType = DataTypeString;
+    static constexpr auto TYPE_INDEX = TypeIndex::Int64;
+    using Type = Int64;
+    using ReturnColumnType = ColumnString;
+
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<vectorized::DataTypeInt64>()};
+    }
+    
+    static std::string_view hex(uint64_t num, char* ans){
+        static constexpr auto hex_table = "0123456789ABCDEF";
+        // uint64_t max value 0xFFFFFFFFFFFFFFFF , 16 'F'
+        if (num == 0) { return {hex_table, 1};}
+
+        size_t i = 0;
+        while (num) {
+            ans[i++] = hex_table[num & 15];
+            num = num >> 4;
+        }
+        ans[i] = '\0';
+
+        // reverse
+        for (int k = 0, j = i - 1; k <= j; k++, j--) {
+            char tmp = ans[j];
+            ans[j] = ans[k];
+            ans[k] = tmp;
+        }
+
+        return {ans, i};
+    }
+    
+    static Status vector(const ColumnInt64::Container& data, ColumnString::Chars& res_data,
+                         ColumnString::Offsets& res_offsets) {
+        res_offsets.resize(data.size());
+        size_t input_size = res_offsets.size();
+        char ans[17];
+        for (size_t i = 0; i < input_size; ++i) {
+            StringOP::push_value_string(hex(data[i], ans), i, res_data, res_offsets);
+        }
+        return Status::OK();
+    }
+};
+
+using FunctionHexInt = FunctionUnaryToType<HexIntImpl, HexIntName>;
+
+template <typename A>
+struct SignImpl {
+    using ResultType = Int8;
+    static inline ResultType apply(A a) {
+        if constexpr (IsDecimalNumber<A> || std::is_floating_point_v<A>)
+            return static_cast<ResultType>(a < A(0) ? -1 : a == A(0) ? 0 : 1);
+        else if constexpr (std::is_signed_v<A>)
+            return static_cast<ResultType>(a < 0 ? -1 : a == 0 ? 0 : 1);
+        else if constexpr (std::is_unsigned_v<A>)
+            return static_cast<ResultType>(a == 0 ? 0 : 1);
+    }
+};
+
+struct NameSign {
+    static constexpr auto name = "sign";
+};
+using FunctionSign = FunctionUnaryArithmetic<SignImpl, NameSign, false>;
+
+template <typename A>
+struct AbsImpl {
+    using ResultType =
+            std::conditional_t<IsDecimalNumber<A>, A, typename NumberTraits::ResultOfAbs<A>::Type>;
+
+    static inline ResultType apply(A a) {
+        if constexpr (IsDecimalNumber<A>)
+            return a < 0 ? A(-a) : a;
+        else if constexpr (std::is_integral_v<A> && std::is_signed_v<A>)
+            return a < 0 ? static_cast<ResultType>(~a) + 1 : a;
+        else if constexpr (std::is_integral_v<A> && std::is_unsigned_v<A>)
+            return static_cast<ResultType>(a);
+        else if constexpr (std::is_floating_point_v<A>)
+            return static_cast<ResultType>(std::abs(a));
+    }
+};
+
+struct NameAbs {
+    static constexpr auto name = "abs";
+};
+
+using FunctionAbs = FunctionUnaryArithmetic<AbsImpl, NameAbs, false>;
+
+template <typename A>
+struct NegativeImpl {
+    using ResultType = A;
+
+    static inline ResultType apply(A a) {
+        return -a;
+    }
+};
+
+struct NameNegative {
+    static constexpr auto name = "negative";
+};
+
+using FunctionNegative = FunctionUnaryArithmetic<NegativeImpl, NameNegative, false>;
+
+template <typename A>
+struct PositiveImpl {
+    using ResultType = A;
+
+    static inline ResultType apply(A a) { return static_cast<ResultType>(a); }
+};
+
+struct NamePositive {
+    static constexpr auto name = "positive";
+};
+
+using FunctionPositive = FunctionUnaryArithmetic<PositiveImpl, NamePositive, false>;
+
+struct SinName {
+    static constexpr auto name = "sin";
+};
+using FunctionSin = FunctionMathUnary<UnaryFunctionVectorized<SinName, std::sin>>;
+
+struct SqrtName {
+    static constexpr auto name = "sqrt";
+};
+using FunctionSqrt = FunctionMathUnary<UnaryFunctionVectorized<SqrtName, std::sqrt>>;
+
+struct TanName {
+    static constexpr auto name = "tan";
+};
+using FunctionTan = FunctionMathUnary<UnaryFunctionVectorized<TanName, std::tan>>;
+
+struct FloorName {
+    static constexpr auto name = "floor";
+};
+using FunctionFloor = FunctionMathUnary<UnaryFunctionVectorized<FloorName, std::floor, DataTypeInt64>>;
+
+struct PowName {
+    static constexpr auto name = "pow";
+};
+using FunctionPow = FunctionMathBinaryFloat64<BinaryFunctionVectorized<PowName, std::pow>>;
+
+struct TruncateName {
+    static constexpr auto name = "truncate";
+};
+
+template <typename Name>
+struct TruncateImpl {
+    static constexpr auto rows_per_iteration = 1;
+    static constexpr auto name = TruncateName::name;
+
+    template <typename T1, typename T2>
+    static void execute(const T1* src_left, const T2* src_right, Float64* dst) {
+        dst[0] = static_cast<Float64>(my_double_round(
+                static_cast<Float64>(src_left[0]), static_cast<Int32>(src_right[0]), false, true));
+    }
+};
+using FunctionTruncate = FunctionMathBinaryFloat64<TruncateImpl<TruncateName>>;
+
+template <typename A>
+struct RadiansImpl {
+    using ResultType = A;
+
+    static inline ResultType apply(A a) {
+        return static_cast<ResultType>(a * PiImpl::value / 180.0);
+    }
+};
+
+struct NameRadians {
+    static constexpr auto name = "radians";
+};
+
+using FunctionRadians = FunctionUnaryArithmetic<RadiansImpl, NameRadians, false>;
+
+template <typename A>
+struct DegreesImpl {
+    using ResultType = A;
+
+    static inline ResultType apply(A a) {
+        return static_cast<ResultType>(a * 180.0 / PiImpl::value);
+    }
+};
+
+struct NameDegrees {
+    static constexpr auto name = "degrees";
+};
+
+using FunctionDegrees = FunctionUnaryArithmetic<DegreesImpl, NameDegrees, false>;
+
+struct NameBin {
+    static constexpr auto name = "bin";
+};
+struct BinImpl {
+    using ReturnType = DataTypeString;
+    static constexpr auto TYPE_INDEX = TypeIndex::Int64;
+    using Type = Int64;
+    using ReturnColumnType = ColumnString;
+    
+    static std::string bin_impl(Int64 value) {
+        uint64_t n = static_cast<uint64_t>(value);
+        const size_t max_bits = sizeof(uint64_t) * 8;
+        char result[max_bits];
+        uint32_t index = max_bits;
+        do {
+            result[--index] = '0' + (n & 1);
+        } while (n >>= 1);
+        return std::string(result + index, max_bits - index);
+    }
+    
+    static Status vector(const ColumnInt64::Container& data, ColumnString::Chars& res_data,
+                         ColumnString::Offsets& res_offsets) {
+        
+        res_offsets.resize(data.size());
+        size_t input_size = res_offsets.size();
+        
+        for (size_t i = 0; i < input_size; ++i) {
+            StringOP::push_value_string(bin_impl(data[i]), i, res_data, res_offsets);
+        }
+        return Status::OK();
+    }
+};
+
+using FunctionBin = FunctionUnaryToType<BinImpl, NameBin>;
+
+struct RoundName {
+    static constexpr auto name = "round";
+};
+
+/// round(double)-->int64
+/// key_str:roundFloat64
+template <typename Name>
+struct RoundOneImpl {
+    using Type = DataTypeInt64;
+    static constexpr auto name = RoundName::name;
+    static constexpr auto rows_per_iteration = 1;
+    static constexpr bool always_returns_float64 = false;
+    
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<vectorized::DataTypeFloat64>()};
+    }
+    
+    template <typename T, typename U>
+    static void execute(const T* src, U* dst) {
+        dst[0] = static_cast<Int64>(std::round(static_cast<Float64>(src[0])));
+    }
+};
+using FunctionRoundOne = FunctionMathUnary<RoundOneImpl<RoundName>>;
+
+/// round(double,int32)-->double
+/// key_str:roundFloat64Int32
+template <typename Name>
+struct RoundTwoImpl {
+    static constexpr auto name = RoundName::name;
+    static constexpr auto rows_per_iteration = 1;
+    
+    static DataTypes get_variadic_argument_types() {
+        return {std::make_shared<vectorized::DataTypeFloat64>(),
+                std::make_shared<vectorized::DataTypeInt32>()};
+    }
+
+    template <typename T1, typename T2>
+    static void execute(const T1* src_left, const T2* src_right, Float64* dst) {
+        dst[0] = my_double_round(static_cast<Float64>(src_left[0]),
+                                 static_cast<Int32>(src_right[0]), false, false);
+    }
+};
+using FunctionRoundTwo = FunctionMathBinaryFloat64<RoundTwoImpl<RoundName>>;
+
+void register_function_math(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionAcos>();
+    factory.register_function<FunctionAsin>();
+    factory.register_function<FunctionAtan>();
+    factory.register_function<FunctionCos>();
+    factory.register_function<FunctionCeil>();
+    factory.register_alias("ceil", "dceil");
+    factory.register_alias("ceil", "ceiling");
+    factory.register_function<FunctionHexInt>();
+    factory.register_function<FunctionE>();
+    factory.register_function<FunctionLn>();
+    factory.register_alias("ln", "dlog1");
+    factory.register_function<FunctionLog>();
+    factory.register_function<FunctionLog2>();
+    factory.register_function<FunctionLog10>();
+    factory.register_alias("log10", "dlog10");
+    factory.register_function<FunctionPi>();
+    factory.register_function<FunctionSign>();
+    factory.register_function<FunctionAbs>();
+    factory.register_function<FunctionNegative>();
+    factory.register_function<FunctionPositive>();
+    factory.register_function<FunctionSin>();
+    factory.register_function<FunctionSqrt>();
+    factory.register_alias("sqrt", "dsqrt");
+    factory.register_function<FunctionTan>();
+    factory.register_function<FunctionFloor>();
+    factory.register_alias("floor", "dfloor");
+    factory.register_function<FunctionRoundOne>();
+    factory.register_function<FunctionRoundTwo>();
+    factory.register_function<FunctionPow>();
+    factory.register_alias("pow", "power");
+    factory.register_alias("pow", "dpow");
+    factory.register_alias("pow", "fpow");
+    factory.register_function<FunctionExp>();
+    factory.register_alias("exp", "dexp");
+    factory.register_function<FunctionTruncate>();
+    factory.register_function<FunctionRadians>();
+    factory.register_function<FunctionDegrees>();
+    factory.register_function<FunctionBin>();
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/minus.cpp b/be/src/vec/functions/minus.cpp
new file mode 100644
index 0000000000..e215a52f44
--- /dev/null
+++ b/be/src/vec/functions/minus.cpp
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Minus.cpp
+// and modified by Doris
+
+#include "vec/common/arithmetic_overflow.h"
+#include "vec/functions/function_binary_arithmetic.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+template <typename A, typename B>
+struct MinusImpl {
+    using ResultType = typename NumberTraits::ResultOfSubtraction<A, B>::Type;
+    static const constexpr bool allow_decimal = true;
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b) {
+        return static_cast<Result>(a) - b;
+    }
+
+    static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b) {
+        return a - b;
+    }
+
+    /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise.
+    template <typename Result = ResultType>
+    static inline bool apply(A a, B b, Result& c) {
+        return common::sub_overflow(static_cast<Result>(a), b, c);
+    }
+};
+
+struct NameMinus {
+    static constexpr auto name = "subtract";
+};
+using FunctionMinus = FunctionBinaryArithmetic<MinusImpl, NameMinus>;
+
+void register_function_minus(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionMinus>();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/modulo.cpp b/be/src/vec/functions/modulo.cpp
new file mode 100644
index 0000000000..94a02d01ba
--- /dev/null
+++ b/be/src/vec/functions/modulo.cpp
@@ -0,0 +1,159 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Modulo.cpp
+// and modified by Doris
+
+#ifdef __SSE2__
+#define LIBDIVIDE_SSE2 1
+#endif
+
+#include <libdivide.h>
+
+#include "common/status.h"
+#include "vec/functions/function_binary_arithmetic.h"
+#include "vec/functions/function_binary_arithmetic_to_null_type.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+template <typename A, typename B>
+struct ModuloImpl {
+    using ResultType = typename NumberTraits::ResultOfModulo<A, B>::Type;
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b, NullMap& null_map, size_t index) {
+        if constexpr (std::is_floating_point_v<Result>) {
+            null_map[index] = 0;
+            return fmod(a, b);
+        } else {
+            null_map[index] = b == 0;
+            return typename NumberTraits::ToInteger<A>::Type(a) %
+                   (typename NumberTraits::ToInteger<B>::Type(b) + (b == 0));
+        }
+    }
+
+    static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b, NullMap& null_map, size_t index) {
+        null_map[index] = b == DecimalV2Value(0);
+        return a % (b + DecimalV2Value(b == DecimalV2Value(0)));
+    }
+
+#if USE_EMBEDDED_COMPILER
+    static constexpr bool compilable = false; /// don't know how to throw from LLVM IR
+#endif
+};
+
+template <typename A, typename B>
+struct ModuloByConstantImpl : BinaryOperationImplBase<A, B, ModuloImpl<A, B>> {
+    using ResultType = typename ModuloImpl<A, B>::ResultType;
+
+    static void vector_constant(const PaddedPODArray<A>& a, B b, PaddedPODArray<ResultType>& c) {
+        // TODO: Support return NULL in the future
+        if (UNLIKELY(b == 0)) {
+//        throw Exception("Division by zero", TStatusCode::VEC_ILLEGAL_DIVISION);
+            memset(c.data(), 0, sizeof(ResultType) * c.size());
+            return;
+        }
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wsign-compare"
+
+        if (UNLIKELY((std::is_signed_v<B> && b == -1) || b == 1)) {
+            size_t size = a.size();
+            for (size_t i = 0; i < size; ++i) c[i] = 0;
+            return;
+        }
+
+#pragma GCC diagnostic pop
+
+        libdivide::divider<A> divider(b);
+
+        /// Here we failed to make the SSE variant from libdivide give an advantage.
+        size_t size = a.size();
+        for (size_t i = 0; i < size; ++i)
+            c[i] = a[i] -
+                   (a[i] / divider) *
+                           b; /// NOTE: perhaps, the division semantics with the remainder of negative numbers is not preserved.
+    }
+};
+
+/** Specializations are specified for dividing numbers of the type UInt64 and UInt32 by the numbers of the same sign.
+  * Can be expanded to all possible combinations, but more code is needed.
+  */
+
+template <>
+struct BinaryOperationImpl<UInt64, UInt8, ModuloImpl<UInt64, UInt8>>
+        : ModuloByConstantImpl<UInt64, UInt8> {};
+template <>
+struct BinaryOperationImpl<UInt64, UInt16, ModuloImpl<UInt64, UInt16>>
+        : ModuloByConstantImpl<UInt64, UInt16> {};
+template <>
+struct BinaryOperationImpl<UInt64, UInt32, ModuloImpl<UInt64, UInt32>>
+        : ModuloByConstantImpl<UInt64, UInt32> {};
+template <>
+struct BinaryOperationImpl<UInt64, UInt64, ModuloImpl<UInt64, UInt64>>
+        : ModuloByConstantImpl<UInt64, UInt64> {};
+
+template <>
+struct BinaryOperationImpl<UInt32, UInt8, ModuloImpl<UInt32, UInt8>>
+        : ModuloByConstantImpl<UInt32, UInt8> {};
+template <>
+struct BinaryOperationImpl<UInt32, UInt16, ModuloImpl<UInt32, UInt16>>
+        : ModuloByConstantImpl<UInt32, UInt16> {};
+template <>
+struct BinaryOperationImpl<UInt32, UInt32, ModuloImpl<UInt32, UInt32>>
+        : ModuloByConstantImpl<UInt32, UInt32> {};
+template <>
+struct BinaryOperationImpl<UInt32, UInt64, ModuloImpl<UInt32, UInt64>>
+        : ModuloByConstantImpl<UInt32, UInt64> {};
+
+template <>
+struct BinaryOperationImpl<Int64, Int8, ModuloImpl<Int64, Int8>>
+        : ModuloByConstantImpl<Int64, Int8> {};
+template <>
+struct BinaryOperationImpl<Int64, Int16, ModuloImpl<Int64, Int16>>
+        : ModuloByConstantImpl<Int64, Int16> {};
+template <>
+struct BinaryOperationImpl<Int64, Int32, ModuloImpl<Int64, Int32>>
+        : ModuloByConstantImpl<Int64, Int32> {};
+template <>
+struct BinaryOperationImpl<Int64, Int64, ModuloImpl<Int64, Int64>>
+        : ModuloByConstantImpl<Int64, Int64> {};
+
+template <>
+struct BinaryOperationImpl<Int32, Int8, ModuloImpl<Int32, Int8>>
+        : ModuloByConstantImpl<Int32, Int8> {};
+template <>
+struct BinaryOperationImpl<Int32, Int16, ModuloImpl<Int32, Int16>>
+        : ModuloByConstantImpl<Int32, Int16> {};
+template <>
+struct BinaryOperationImpl<Int32, Int32, ModuloImpl<Int32, Int32>>
+        : ModuloByConstantImpl<Int32, Int32> {};
+template <>
+struct BinaryOperationImpl<Int32, Int64, ModuloImpl<Int32, Int64>>
+        : ModuloByConstantImpl<Int32, Int64> {};
+
+struct NameModulo {
+    static constexpr auto name = "mod";
+};
+using FunctionModulo = FunctionBinaryArithmeticToNullType<ModuloImpl, NameModulo, false>;
+
+void register_function_modulo(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionModulo>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/multiply.cpp b/be/src/vec/functions/multiply.cpp
new file mode 100644
index 0000000000..73cc85443a
--- /dev/null
+++ b/be/src/vec/functions/multiply.cpp
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Multiply.cpp
+// and modified by Doris
+
+#include "vec/common/arithmetic_overflow.h"
+#include "vec/functions/function_binary_arithmetic.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+template <typename A, typename B>
+struct MultiplyImpl {
+    using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
+    static const constexpr bool allow_decimal = true;
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b) {
+        return static_cast<Result>(a) * b;
+    }
+
+    static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b) {
+        return a * b;
+    }
+    /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise.
+    template <typename Result = ResultType>
+    static inline bool apply(A a, B b, Result& c) {
+        return common::mul_overflow(static_cast<Result>(a), b, c);
+    }
+};
+
+struct NameMultiply {
+    static constexpr auto name = "multiply";
+};
+using FunctionMultiply = FunctionBinaryArithmetic<MultiplyImpl, NameMultiply>;
+
+void register_function_multiply(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionMultiply>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/nullif.cpp b/be/src/vec/functions/nullif.cpp
new file mode 100644
index 0000000000..30c5e0cf55
--- /dev/null
+++ b/be/src/vec/functions/nullif.cpp
@@ -0,0 +1,155 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Nullif.cpp
+// and modified by Doris
+
+#include "vec/functions/simple_function_factory.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/utils/util.hpp"
+#include "vec/functions/function_string.h"
+#include "vec/data_types/data_type_nothing.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/get_least_supertype.h"
+#include "vec/data_types/number_traits.h"
+
+namespace doris::vectorized {
+class FunctionNullIf : public IFunction {
+public:
+    struct NullPresence {
+        bool has_nullable = false;
+        bool has_null_constant = false;
+    };
+
+    static constexpr auto name = "nullif";
+
+    static FunctionPtr create() { return std::make_shared<FunctionNullIf>(); }
+
+    String get_name() const override { return name; }
+
+    size_t get_number_of_arguments() const override { return 2; }
+
+    bool use_default_implementation_for_constants() const override { return false; }
+
+    bool use_default_implementation_for_nulls() const override { return false; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return arguments[0];
+    }
+
+    NullPresence get_null_resense(const ColumnsWithTypeAndName& args) const {
+        NullPresence res;
+
+        for (const auto& elem : args) {
+            if (!res.has_nullable) res.has_nullable = elem.type->is_nullable();
+            if (!res.has_null_constant) res.has_null_constant = elem.type->only_null();
+        }
+
+        return res;
+    }
+
+    DataTypePtr get_return_type_for_equal(
+        const ColumnsWithTypeAndName& arguments) const {
+        
+        size_t num_full_ordinary_columns = 0;
+        ColumnsWithTypeAndName args_without_low_cardinality(arguments);
+
+        for (ColumnWithTypeAndName& arg : args_without_low_cardinality) {
+            bool is_const = arg.column && is_column_const(*arg.column);
+            if (is_const)
+                arg.column = assert_cast<const ColumnConst&>(*arg.column).remove_low_cardinality();
+            if (!is_const) ++num_full_ordinary_columns;
+        }
+        
+        if (!arguments.empty()) {
+            NullPresence null_presence = get_null_resense(arguments);
+
+            if (null_presence.has_null_constant) {
+                return make_nullable(std::make_shared<DataTypeNothing>());
+            }
+            if (null_presence.has_nullable) {
+                return make_nullable(std::make_shared<doris::vectorized::DataTypeUInt8>());
+            }
+        }
+
+        return std::make_shared<doris::vectorized::DataTypeUInt8>();
+    }
+
+
+    // nullIf(col1, col2) == if(col1 = col2, NULL, col1)
+    Status execute_impl(FunctionContext* context, Block &block, const ColumnNumbers &arguments, size_t result,
+                        size_t input_rows_count) override {
+
+        const ColumnsWithTypeAndName eq_columns
+        {
+            block.get_by_position(arguments[0]),
+            block.get_by_position(arguments[1]),
+        };
+        auto result_type = get_return_type_for_equal(eq_columns);
+        Block eq_temporary_block(
+                {block.get_by_position(arguments[0]),
+                 block.get_by_position(arguments[1]),
+                 {nullptr, result_type, ""}
+        });
+
+        auto equals_func = SimpleFunctionFactory::instance().get_function("eq", eq_columns, result_type);
+        equals_func->execute(context, eq_temporary_block, {0, 1}, 2, input_rows_count);
+
+        const ColumnWithTypeAndName new_result_column {
+            block.get_by_position(result),
+        };
+
+        const ColumnWithTypeAndName if_column {
+            eq_temporary_block.get_by_position(2),
+        };
+        const ColumnsWithTypeAndName if_columns
+        {
+            if_column,
+            {block.get_by_position(result).type->create_column_const_with_default_value(
+                                                 input_rows_count),
+                      block.get_by_position(result).type, "NULL"},
+            block.get_by_position(arguments[0]),
+        };
+        
+        Block temporary_block(
+                {if_column,
+                 {block.get_by_position(result).type->create_column_const_with_default_value(
+                                                 input_rows_count),
+                      block.get_by_position(result).type, "NULL"},
+                 block.get_by_position(arguments[0]),
+                 new_result_column
+                });
+        auto func_if = SimpleFunctionFactory::instance().get_function("if", if_columns, new_result_column.type);
+        func_if->execute(context, temporary_block, {0, 1, 2}, 3, input_rows_count);
+        /// need to handle nullable type and not nullable type differently,
+        /// because `IF` function always return nullable type, but result type is not always
+        if (block.get_by_position(result).type->is_nullable()) {
+            block.get_by_position(result).column = temporary_block.get_by_position(3).column;
+        } else {
+            auto cols = check_and_get_column<ColumnNullable>(temporary_block.get_by_position(3).column.get());
+            block.replace_by_position(result, std::move(cols->get_nested_column_ptr()));
+        }
+        return Status::OK();
+    }
+};
+
+void register_function_nullif(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionNullIf>();
+}
+}
\ No newline at end of file
diff --git a/be/src/vec/functions/plus.cpp b/be/src/vec/functions/plus.cpp
new file mode 100644
index 0000000000..9f64070aa2
--- /dev/null
+++ b/be/src/vec/functions/plus.cpp
@@ -0,0 +1,57 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Plus.cpp
+// and modified by Doris
+
+#include "vec/common/arithmetic_overflow.h"
+#include "vec/functions/function_binary_arithmetic.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+template <typename A, typename B>
+struct PlusImpl {
+    using ResultType = typename NumberTraits::ResultOfAdditionMultiplication<A, B>::Type;
+    static const constexpr bool allow_decimal = true;
+
+    template <typename Result = ResultType>
+    static inline Result apply(A a, B b) {
+        /// Next everywhere, static_cast - so that there is no wrong result in expressions of the form Int64 c = UInt32(a) * Int32(-1).
+        return static_cast<Result>(a) + b;
+    }
+
+    static inline DecimalV2Value apply(DecimalV2Value a, DecimalV2Value b) {
+        return a + b;
+    }
+
+    /// Apply operation and check overflow. It's used for Deciamal operations. @returns true if overflowed, false otherwise.
+    template <typename Result = ResultType>
+    static inline bool apply(A a, B b, Result& c) {
+        return common::add_overflow(static_cast<Result>(a), b, c);
+    }
+};
+
+struct NamePlus {
+    static constexpr auto name = "add";
+};
+using FunctionPlus = FunctionBinaryArithmetic<PlusImpl, NamePlus>;
+
+void register_function_plus(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionPlus>();
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/random.cpp b/be/src/vec/functions/random.cpp
new file mode 100644
index 0000000000..366cc48606
--- /dev/null
+++ b/be/src/vec/functions/random.cpp
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <random>
+
+#include "udf/udf.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/function_helpers.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+class Random : public IFunction {
+public:
+    static constexpr auto name = "random";
+
+    static FunctionPtr create() { return std::make_shared<Random>(); }
+
+    String get_name() const override { return name; }
+
+    bool use_default_implementation_for_constants() const override { return false; }
+
+    size_t get_number_of_arguments() const override { return 0; }
+
+    bool is_variadic() const override { return true; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+        return std::make_shared<DataTypeFloat64>();
+    }
+
+    Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
+        std::mt19937_64* generator =
+                reinterpret_cast<std::mt19937_64*>(context->allocate(sizeof(std::mt19937_64)));
+        if (UNLIKELY(generator == nullptr)) {
+            return Status::MemoryAllocFailed("allocate random seed generator failed.");
+        }
+
+        context->set_function_state(scope, generator);
+        new (generator) std::mt19937_64();
+        if (scope == FunctionContext::THREAD_LOCAL) {
+            if (context->get_num_args() == 1) {
+                // This is a call to RandSeed, initialize the seed
+                // TODO: should we support non-constant seed?
+                if (!context->is_col_constant(0)) {
+                    return Status::InvalidArgument("Seed argument to rand() must be constant.");
+                }
+                uint32_t seed = 0;
+                if (!context->get_constant_col(0)->column_ptr->is_null_at(0)) {
+                    seed = context->get_constant_col(0)->column_ptr->get64(0);
+                }
+                generator->seed(seed);
+            } else {
+                generator->seed(std::random_device()());
+            }
+        }
+
+        return Status::OK();
+    }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        static const double min = 0.0;
+        static const double max = 1.0;
+        auto res_column = ColumnFloat64::create(input_rows_count);
+        auto& res_data = assert_cast<ColumnFloat64&>(*res_column).get_data();
+
+        std::mt19937_64* generator = reinterpret_cast<std::mt19937_64*>(
+                context->get_function_state(FunctionContext::THREAD_LOCAL));
+        DCHECK(generator != nullptr);
+
+        std::uniform_real_distribution<double> distribution(min, max);
+        for (int i = 0; i < input_rows_count; i++) {
+            res_data[i] = distribution(*generator);
+        }
+
+        block.replace_by_position(result, std::move(res_column));
+        return Status::OK();
+    }
+
+    Status close(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
+        if (scope == FunctionContext::THREAD_LOCAL) {
+            uint8_t* generator = reinterpret_cast<uint8_t*>(
+                    context->get_function_state(FunctionContext::THREAD_LOCAL));
+            context->free(generator);
+            context->set_function_state(FunctionContext::THREAD_LOCAL, nullptr);
+        }
+        return Status::OK();
+    }
+};
+
+void register_function_random(SimpleFunctionFactory& factory) {
+    factory.register_function<Random>();
+    factory.register_alias(Random::name, "rand");
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h
new file mode 100644
index 0000000000..5b9f0fd213
--- /dev/null
+++ b/be/src/vec/functions/simple_function_factory.h
@@ -0,0 +1,190 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+// This file is copied from
+// https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/registerFunctions.h
+// and modified by Doris
+
+#pragma once
+#include <mutex>
+#include <string>
+
+#include "vec/functions/function.h"
+
+namespace doris::vectorized {
+
+class SimpleFunctionFactory;
+
+void register_function_comparison(SimpleFunctionFactory& factory);
+void register_function_comparison_eq_for_null(SimpleFunctionFactory& factory);
+void register_function_hll_cardinality(SimpleFunctionFactory& factory);
+void register_function_hll_empty(SimpleFunctionFactory& factory);
+void register_function_hll_hash(SimpleFunctionFactory& factory);
+void register_function_logical(SimpleFunctionFactory& factory);
+void register_function_case(SimpleFunctionFactory& factory);
+void register_function_cast(SimpleFunctionFactory& factory);
+void register_function_conv(SimpleFunctionFactory& factory);
+void register_function_plus(SimpleFunctionFactory& factory);
+void register_function_minus(SimpleFunctionFactory& factory);
+void register_function_multiply(SimpleFunctionFactory& factory);
+void register_function_divide(SimpleFunctionFactory& factory);
+void register_function_int_div(SimpleFunctionFactory& factory);
+void register_function_bit(SimpleFunctionFactory& factory);
+void register_function_math(SimpleFunctionFactory& factory);
+void register_function_modulo(SimpleFunctionFactory& factory);
+void register_function_bitmap(SimpleFunctionFactory& factory);
+void register_function_is_null(SimpleFunctionFactory& factory);
+void register_function_is_not_null(SimpleFunctionFactory& factory);
+void register_function_to_time_fuction(SimpleFunctionFactory& factory);
+void register_function_time_of_fuction(SimpleFunctionFactory& factory);
+void register_function_string(SimpleFunctionFactory& factory);
+void register_function_date_time_to_string(SimpleFunctionFactory& factory);
+void register_function_date_time_string_to_string(SimpleFunctionFactory& factory);
+void register_function_in(SimpleFunctionFactory& factory);
+void register_function_if(SimpleFunctionFactory& factory);
+void register_function_nullif(SimpleFunctionFactory& factory);
+void register_function_date_time_computation(SimpleFunctionFactory& factory);
+void register_function_timestamp(SimpleFunctionFactory& factory);
+void register_function_utility(SimpleFunctionFactory& factory);
+void register_function_json(SimpleFunctionFactory& factory);
+void register_function_function_hash(SimpleFunctionFactory& factory);
+void register_function_function_ifnull(SimpleFunctionFactory& factory);
+void register_function_like(SimpleFunctionFactory& factory);
+void register_function_regexp(SimpleFunctionFactory& factory);
+void register_function_random(SimpleFunctionFactory& factory);
+void register_function_coalesce(SimpleFunctionFactory& factory);
+void register_function_grouping(SimpleFunctionFactory& factory);
+
+class SimpleFunctionFactory {
+    using Creator = std::function<FunctionBuilderPtr()>;
+    using FunctionCreators = phmap::flat_hash_map<std::string, Creator>;
+    using FunctionIsVariadic = phmap::flat_hash_set<std::string>;
+
+public:
+    void register_function(const std::string& name, Creator ptr) {
+        DataTypes types = ptr()->get_variadic_argument_types();
+        // types.empty() means function is not variadic
+        if (!types.empty()) {
+            function_variadic_set.insert(name);
+        }
+
+        std::string key_str = name;
+        if (!types.empty()) {
+            for (auto type : types) {
+                key_str.append(type->get_name());
+            }
+        }
+        function_creators[key_str] = ptr;
+    }
+
+    template <class Function>
+    void register_function() {
+        if constexpr (std::is_base_of<IFunction, Function>::value)
+            register_function(Function::name, &createDefaultFunction<Function>);
+        else
+            register_function(Function::name, &Function::create);
+    }
+
+    void register_alias(const std::string& name, const std::string& alias) {
+        function_alias[alias] = name;
+    }
+
+    FunctionBasePtr get_function(const std::string& name, const ColumnsWithTypeAndName& arguments,
+                                 const DataTypePtr& return_type) {
+        std::string key_str = name;
+
+        if (function_alias.count(name)) {
+            key_str = function_alias[name];
+        }
+
+        // if function is variadic, added types_str as key
+        if (function_variadic_set.count(key_str)) {
+            for (auto& arg : arguments) {
+                key_str.append(arg.type->is_nullable()
+                                       ? reinterpret_cast<const DataTypeNullable*>(arg.type.get())
+                                                 ->get_nested_type()
+                                                 ->get_name()
+                                       : arg.type->get_name());
+            }
+        }
+
+        auto iter = function_creators.find(key_str);
+        if (iter != function_creators.end()) {
+            return iter->second()->build(arguments, return_type);
+        }
+
+        return nullptr;
+    }
+
+private:
+    FunctionCreators function_creators;
+    FunctionIsVariadic function_variadic_set;
+    std::unordered_map<std::string, std::string> function_alias;
+
+    template <typename Function>
+    static FunctionBuilderPtr createDefaultFunction() {
+        return std::make_shared<DefaultFunctionBuilder>(Function::create());
+    }
+
+public:
+    static SimpleFunctionFactory& instance() {
+        static std::once_flag oc;
+        static SimpleFunctionFactory instance;
+        std::call_once(oc, [&]() {
+            register_function_bitmap(instance);
+            register_function_hll_cardinality(instance);
+            register_function_hll_empty(instance);
+            register_function_hll_hash(instance);
+            register_function_comparison(instance);
+            register_function_logical(instance);
+            register_function_case(instance);
+            register_function_cast(instance);
+            register_function_conv(instance);
+            register_function_plus(instance);
+            register_function_minus(instance);
+            register_function_math(instance);
+            register_function_multiply(instance);
+            register_function_divide(instance);
+            register_function_int_div(instance);
+            register_function_modulo(instance);
+            register_function_bit(instance);
+            register_function_is_null(instance);
+            register_function_is_not_null(instance);
+            register_function_to_time_fuction(instance);
+            register_function_time_of_fuction(instance);
+            register_function_string(instance);
+            register_function_in(instance);
+            register_function_if(instance);
+            register_function_nullif(instance);
+            register_function_date_time_computation(instance);
+            register_function_timestamp(instance);
+            register_function_utility(instance);
+            register_function_date_time_to_string(instance);
+            register_function_date_time_string_to_string(instance);
+            register_function_json(instance);
+            register_function_function_hash(instance);
+            register_function_function_ifnull(instance);
+            register_function_comparison_eq_for_null(instance);
+            register_function_like(instance);
+            register_function_regexp(instance);
+            register_function_random(instance);
+            register_function_coalesce(instance);
+            register_function_grouping(instance);
+        });
+        return instance;
+    }
+};
+} // namespace doris::vectorized
diff --git a/be/src/vec/functions/time_of_function.cpp b/be/src/vec/functions/time_of_function.cpp
new file mode 100644
index 0000000000..f6d17aefe7
--- /dev/null
+++ b/be/src/vec/functions/time_of_function.cpp
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/date_time_transforms.h"
+#include "vec/functions/function_date_or_datetime_to_something.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+using FunctionWeekOfYear = FunctionDateOrDateTimeToSomething<DataTypeInt32, WeekOfYearImpl>;
+using FunctionDayOfYear = FunctionDateOrDateTimeToSomething<DataTypeInt32, DayOfYearImpl>;
+using FunctionDayOfWeek = FunctionDateOrDateTimeToSomething<DataTypeInt32, DayOfWeekImpl>;
+using FunctionDayOfMonth = FunctionDateOrDateTimeToSomething<DataTypeInt32, DayOfMonthImpl>;
+using FunctionYearWeek = FunctionDateOrDateTimeToSomething<DataTypeInt32, ToYearWeekImpl>;
+void register_function_time_of_fuction(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionDayOfWeek>();
+    factory.register_function<FunctionDayOfMonth>();
+    factory.register_function<FunctionDayOfYear>();
+    factory.register_function<FunctionWeekOfYear>();
+    factory.register_function<FunctionYearWeek>();
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/functions/to_time_function.cpp b/be/src/vec/functions/to_time_function.cpp
new file mode 100644
index 0000000000..c6940cc1f7
--- /dev/null
+++ b/be/src/vec/functions/to_time_function.cpp
@@ -0,0 +1,54 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/data_types/data_type_number.h"
+#include "vec/functions/date_time_transforms.h"
+#include "vec/functions/function_date_or_datetime_to_something.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+using FunctionYear = FunctionDateOrDateTimeToSomething<DataTypeInt32, ToYearImpl>;
+using FunctionQuarter = FunctionDateOrDateTimeToSomething<DataTypeInt32, ToQuarterImpl>;
+using FunctionMonth = FunctionDateOrDateTimeToSomething<DataTypeInt32, ToMonthImpl>;
+using FunctionDay = FunctionDateOrDateTimeToSomething<DataTypeInt32, ToDayImpl>;
+using FunctionWeek = FunctionDateOrDateTimeToSomething<DataTypeInt32, ToWeekImpl>;
+using FunctionHour = FunctionDateOrDateTimeToSomething<DataTypeInt32, ToHourImpl>;
+using FunctionMinute = FunctionDateOrDateTimeToSomething<DataTypeInt32, ToMinuteImpl>;
+using FunctionSecond = FunctionDateOrDateTimeToSomething<DataTypeInt32, ToSecondImpl>;
+using FunctionToDays = FunctionDateOrDateTimeToSomething<DataTypeInt32, ToDaysImpl>;
+using FunctionToDate = FunctionDateOrDateTimeToSomething<DataTypeDateTime, ToDateImpl>;
+using FunctionDate = FunctionDateOrDateTimeToSomething<DataTypeDateTime, DateImpl>;
+using FunctionTimeStamp = FunctionDateOrDateTimeToSomething<DataTypeDateTime, TimeStampImpl>;
+
+void register_function_to_time_fuction(SimpleFunctionFactory& factory) {
+    factory.register_function<FunctionSecond>();
+    factory.register_function<FunctionMinute>();
+    factory.register_function<FunctionHour>();
+    factory.register_function<FunctionDay>();
+    factory.register_function<FunctionWeek>();
+    factory.register_function<FunctionMonth>();
+    factory.register_function<FunctionYear>();
+    factory.register_function<FunctionQuarter>();
+    factory.register_function<FunctionToDays>();
+    factory.register_function<FunctionToDate>();
+    factory.register_function<FunctionDate>();
+    factory.register_function<FunctionTimeStamp>();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h
new file mode 100644
index 0000000000..ab3caa784a
--- /dev/null
+++ b/be/src/vec/io/io_helper.h
@@ -0,0 +1,357 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <snappy/snappy.h>
+
+#include <iostream>
+
+#include "gen_cpp/data.pb.h"
+#include "util/binary_cast.hpp"
+#include "util/string_parser.hpp"
+#include "vec/common/arena.h"
+#include "vec/common/exception.h"
+#include "vec/common/string_ref.h"
+#include "vec/common/uint128.h"
+#include "vec/core/types.h"
+#include "vec/io/reader_buffer.h"
+#include "vec/io/var_int.h"
+#include "vec/runtime/vdatetime_value.h"
+
+#define DEFAULT_MAX_STRING_SIZE (1ULL << 30)
+#define WRITE_HELPERS_MAX_INT_WIDTH 40U
+
+namespace doris::vectorized {
+
+template <typename T>
+inline T decimal_scale_multiplier(UInt32 scale);
+template <>
+inline Int32 decimal_scale_multiplier<Int32>(UInt32 scale) {
+    return common::exp10_i32(scale);
+}
+template <>
+inline Int64 decimal_scale_multiplier<Int64>(UInt32 scale) {
+    return common::exp10_i64(scale);
+}
+template <>
+inline Int128 decimal_scale_multiplier<Int128>(UInt32 scale) {
+    return common::exp10_i128(scale);
+}
+
+inline std::string int128_to_string(__int128_t value) {
+    fmt::memory_buffer buffer;
+    fmt::format_to(buffer, "{}", value);
+    return std::string(buffer.data(), buffer.size());
+}
+
+inline std::string int128_to_string(UInt128 value) {
+    return value.to_hex_string();
+}
+
+template <typename T>
+void write_text(Decimal<T> value, UInt32 scale, std::ostream& ostr) {
+    if (value < Decimal<T>(0)) {
+        value *= Decimal<T>(-1);
+        ostr << '-';
+    }
+
+    T whole_part = value;
+    if (scale) {
+        whole_part = value / decimal_scale_multiplier<T>(scale);
+    }
+    if constexpr (std::is_same<T, __int128_t>::value) {
+        ostr << int128_to_string(whole_part);
+    } else {
+        ostr << whole_part;
+    }
+    if (scale) {
+        ostr << '.';
+        String str_fractional(scale, '0');
+        for (Int32 pos = scale - 1; pos >= 0; --pos, value /= Decimal<T>(10))
+            str_fractional[pos] += value % Decimal<T>(10);
+        ostr.write(str_fractional.data(), scale);
+    }
+}
+/// Methods for output in binary format.
+
+/// Write POD-type in native format. It's recommended to use only with packed (dense) data types.
+template <typename Type>
+inline void write_pod_binary(const Type& x, BufferWritable& buf) {
+    buf.write(reinterpret_cast<const char*>(&x), sizeof(x));
+}
+
+template <typename Type>
+inline void write_int_binary(const Type& x, BufferWritable& buf) {
+    write_pod_binary(x, buf);
+}
+
+template <typename Type>
+inline void write_float_binary(const Type& x, BufferWritable& buf) {
+    write_pod_binary(x, buf);
+}
+
+inline void write_string_binary(const std::string& s, BufferWritable& buf) {
+    write_var_uint(s.size(), buf);
+    buf.write(s.data(), s.size());
+}
+
+inline void write_string_binary(const StringRef& s, BufferWritable& buf) {
+    write_var_uint(s.size, buf);
+    buf.write(s.data, s.size);
+}
+
+inline void write_string_binary(const char* s, BufferWritable& buf) {
+    write_string_binary(StringRef {s}, buf);
+}
+
+template <typename Type>
+void write_vector_binary(const std::vector<Type>& v, BufferWritable& buf) {
+    write_var_uint(v.size(), buf);
+
+    for (typename std::vector<Type>::const_iterator it = v.begin(); it != v.end(); ++it)
+        write_binary(*it, buf);
+}
+
+inline void write_binary(const String& x, BufferWritable& buf) {
+    write_string_binary(x, buf);
+}
+
+inline void write_binary(const StringRef& x, BufferWritable& buf) {
+    write_string_binary(x, buf);
+}
+
+template <typename Type>
+inline void write_binary(const Type& x, BufferWritable& buf) {
+    write_pod_binary(x, buf);
+}
+
+inline size_t write_binary(const std::ostringstream& buf, PColumn* pcolumn) {
+    std::string uncompressed = buf.str();
+    std::string compressed;
+    snappy::Compress(uncompressed.data(), uncompressed.size(), &compressed);
+    if (static_cast<double>(compressed.size()) / uncompressed.size() > 0.7) {
+        pcolumn->set_compressed(false);
+        pcolumn->mutable_binary()->append(uncompressed);
+    } else {
+        pcolumn->set_compressed(true);
+        pcolumn->mutable_binary()->append(compressed);
+    }
+
+    return uncompressed.size();
+}
+
+inline size_t compress_binary(PColumn* pcolumn) {
+    auto uncompressed = pcolumn->mutable_binary();
+    auto uncompressed_size = uncompressed->size();
+    std::string compressed;
+    snappy::Compress(uncompressed->data(), uncompressed_size, &compressed);
+
+    if (static_cast<double>(compressed.size()) / uncompressed_size > 0.7) {
+        pcolumn->set_compressed(false);
+    } else {
+        pcolumn->set_compressed(true);
+        pcolumn->mutable_binary()->swap(compressed);
+    }
+
+    return uncompressed_size;
+}
+
+/// Read POD-type in native format
+template <typename Type>
+inline void read_pod_binary(Type& x, BufferReadable& buf) {
+    buf.read(reinterpret_cast<char*>(&x), sizeof(x));
+}
+
+template <typename Type>
+inline void read_int_binary(Type& x, BufferReadable& buf) {
+    read_pod_binary(x, buf);
+}
+
+template <typename Type>
+inline void read_float_binary(Type& x, BufferReadable& buf) {
+    read_pod_binary(x, buf);
+}
+
+inline void read_string_binary(std::string& s, BufferReadable& buf,
+                               size_t MAX_STRING_SIZE = DEFAULT_MAX_STRING_SIZE) {
+    size_t size = 0;
+    read_var_uint(size, buf);
+
+    if (size > MAX_STRING_SIZE) {
+        throw Exception("Too large string size.", TStatusCode::VEC_EXCEPTION);
+    }
+
+    s.resize(size);
+    buf.read(s.data(), size);
+}
+
+inline void read_string_binary(StringRef& s, BufferReadable& buf,
+                               size_t MAX_STRING_SIZE = DEFAULT_MAX_STRING_SIZE) {
+    size_t size = 0;
+    read_var_uint(size, buf);
+
+    if (size > MAX_STRING_SIZE) {
+        throw Exception("Too large string size.", TStatusCode::VEC_EXCEPTION);
+    }
+
+    s = buf.read(size);
+}
+
+inline StringRef read_string_binary_into(Arena& arena, BufferReadable& buf) {
+    size_t size = 0;
+    read_var_uint(size, buf);
+
+    char* data = arena.alloc(size);
+    buf.read(data, size);
+
+    return StringRef(data, size);
+}
+
+template <typename Type>
+void read_vector_binary(std::vector<Type>& v, BufferReadable& buf,
+                        size_t MAX_VECTOR_SIZE = DEFAULT_MAX_STRING_SIZE) {
+    size_t size = 0;
+    read_var_uint(size, buf);
+
+    if (size > MAX_VECTOR_SIZE) {
+        throw Exception("Too large vector size.", TStatusCode::VEC_EXCEPTION);
+    }
+
+    v.resize(size);
+    for (size_t i = 0; i < size; ++i) read_binary(v[i], buf);
+}
+
+inline void read_binary(String& x, BufferReadable& buf) {
+    read_string_binary(x, buf);
+}
+
+inline void read_binary(StringRef& x, BufferReadable& buf) {
+    read_string_binary(x, buf);
+}
+
+template <typename Type>
+inline void read_binary(Type& x, BufferReadable& buf) {
+    read_pod_binary(x, buf);
+}
+
+inline void read_binary(const PColumn& pcolumn, std::string* data) {
+    if (pcolumn.compressed()) {
+        snappy::Uncompress(pcolumn.binary().data(), pcolumn.binary().size(), data);
+    } else {
+        *data = pcolumn.binary();
+    }
+}
+
+template <typename T>
+bool read_float_text_fast_impl(T& x, ReadBuffer& in) {
+    static_assert(std::is_same_v<T, double> || std::is_same_v<T, float>,
+                  "Argument for readFloatTextImpl must be float or double");
+    static_assert('a' > '.' && 'A' > '.' && '\n' < '.' && '\t' < '.' && '\'' < '.' && '"' < '.',
+                  "Layout of char is not like ASCII"); //-V590
+
+    StringParser::ParseResult result;
+    x = StringParser::string_to_float<T>(in.position(), in.count(), &result);
+
+    if (UNLIKELY(result != StringParser::PARSE_SUCCESS || std::isnan(x) || std::isinf(x))) {
+        return false;
+    }
+
+    // only to match the is_all_read() check to prevent return null
+    in.position() = in.end();
+    return true;
+}
+
+template <typename T>
+bool read_int_text_impl(T& x, ReadBuffer& buf) {
+    StringParser::ParseResult result;
+    x = StringParser::string_to_int<T>(buf.position(), buf.count(), &result);
+
+    if (UNLIKELY(result != StringParser::PARSE_SUCCESS)) {
+        return false;
+    }
+
+    // only to match the is_all_read() check to prevent return null
+    buf.position() = buf.end();
+    return true;
+}
+
+template <typename T>
+bool read_datetime_text_impl(T& x, ReadBuffer& buf) {
+    static_assert(std::is_same_v<Int64, T>);
+    auto dv = binary_cast<Int64, VecDateTimeValue>(x);
+    auto ans = dv.from_date_str(buf.position(), buf.count());
+
+    // only to match the is_all_read() check to prevent return null
+    buf.position() = buf.end();
+    x = binary_cast<VecDateTimeValue, Int64>(dv);
+    return ans;
+}
+
+template <typename T>
+bool read_date_text_impl(T& x, ReadBuffer& buf) {
+    static_assert(std::is_same_v<Int64, T>);
+    auto dv = binary_cast<Int64, VecDateTimeValue>(x);
+    auto ans = dv.from_date_str(buf.position(), buf.count());
+    dv.cast_to_date();
+
+    // only to match the is_all_read() check to prevent return null
+    buf.position() = buf.end();
+    x = binary_cast<VecDateTimeValue, Int64>(dv);
+    return ans;
+}
+
+template <typename T>
+bool read_decimal_text_impl(T& x, ReadBuffer& buf) {
+    static_assert(IsDecimalNumber<T>);
+    // TODO: open this static_assert
+    // static_assert(std::is_same_v<Decimal128, T>);
+    auto dv = binary_cast<Int128, DecimalV2Value>(x.value);
+    auto ans = dv.parse_from_str((const char*)buf.position(), buf.count()) == 0;
+
+    // only to match the is_all_read() check to prevent return null
+    buf.position() = buf.end();
+
+    x.value = binary_cast<DecimalV2Value, Int128>(dv);
+    return ans;
+}
+
+template <typename T>
+bool try_read_int_text(T& x, ReadBuffer& buf) {
+    return read_int_text_impl<T>(x, buf);
+}
+
+template <typename T>
+bool try_read_float_text(T& x, ReadBuffer& in) {
+    return read_float_text_fast_impl<T>(x, in);
+}
+
+template <typename T>
+bool try_read_decimal_text(T& x, ReadBuffer& in) {
+    return read_decimal_text_impl<T>(x, in);
+}
+
+template <typename T>
+bool try_read_datetime_text(T& x, ReadBuffer& in) {
+    return read_datetime_text_impl<T>(x, in);
+}
+
+template <typename T>
+bool try_read_date_text(T& x, ReadBuffer& in) {
+    return read_date_text_impl<T>(x, in);
+}
+} // namespace doris::vectorized
diff --git a/be/src/vec/io/reader_buffer.h b/be/src/vec/io/reader_buffer.h
new file mode 100644
index 0000000000..78bfbbceef
--- /dev/null
+++ b/be/src/vec/io/reader_buffer.h
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "vec/common/string_utils/string_utils.h"
+
+namespace doris::vectorized {
+
+class ReadBuffer {
+public:
+    ReadBuffer(char* d, size_t n) :
+         _start(d), _end(d + n) {}
+
+    ReadBuffer(const unsigned char* d, size_t n) :
+         _start((char*)(d)), _end((char*)(d) + n) {}
+
+    bool eof() { return _start == _end; }
+
+    char*& position() {
+        return _start;
+    }
+
+    char* end() { return _end; }
+
+    size_t count() { return _end - _start; }
+private:
+    char* _start;
+    char* _end;
+};
+
+}
diff --git a/be/src/vec/io/var_int.h b/be/src/vec/io/var_int.h
new file mode 100644
index 0000000000..45577a3bf3
--- /dev/null
+++ b/be/src/vec/io/var_int.h
@@ -0,0 +1,176 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <iostream>
+
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+/** Write UInt64 in variable length format (base128) NOTE Only up to 2^63 - 1 are supported. */
+void write_var_uint(UInt64 x, std::ostream& ostr);
+char* write_var_uint(UInt64 x, char* ostr);
+
+/** Read UInt64, written in variable length format (base128) */
+void read_var_uint(UInt64& x, std::istream& istr);
+const char* read_var_uint(UInt64& x, const char* istr, size_t size);
+
+/** Get the length of UInt64 in VarUInt format */
+size_t get_length_of_var_uint(UInt64 x);
+
+/** Get the Int64 length in VarInt format */
+size_t get_length_of_var_int(Int64 x);
+
+/** Write Int64 in variable length format (base128) */
+template <typename OUT>
+inline void write_var_int(Int64 x, OUT& ostr) {
+    write_var_uint(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
+}
+
+inline char* write_var_int(Int64 x, char* ostr) {
+    return write_var_uint(static_cast<UInt64>((x << 1) ^ (x >> 63)), ostr);
+}
+
+/** Read Int64, written in variable length format (base128) */
+template <typename IN>
+inline void read_var_int(Int64& x, IN& istr) {
+    read_var_uint(*reinterpret_cast<UInt64*>(&x), istr);
+    x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
+}
+
+inline const char* read_var_int(Int64& x, const char* istr, size_t size) {
+    const char* res = read_var_uint(*reinterpret_cast<UInt64*>(&x), istr, size);
+    x = (static_cast<UInt64>(x) >> 1) ^ -(x & 1);
+    return res;
+}
+
+inline void write_var_t(UInt64 x, std::ostream& ostr) {
+    write_var_uint(x, ostr);
+}
+inline void write_var_t(Int64 x, std::ostream& ostr) {
+    write_var_int(x, ostr);
+}
+
+inline char* write_var_t(UInt64 x, char*& ostr) {
+    return write_var_uint(x, ostr);
+}
+inline char* write_var_t(Int64 x, char*& ostr) {
+    return write_var_int(x, ostr);
+}
+
+inline void read_var_t(UInt64& x, std::istream& istr) {
+    read_var_uint(x, istr);
+}
+inline void read_var_t(Int64& x, std::istream& istr) {
+    read_var_int(x, istr);
+}
+
+inline const char* read_var_t(UInt64& x, const char* istr, size_t size) {
+    return read_var_uint(x, istr, size);
+}
+inline const char* read_var_t(Int64& x, const char* istr, size_t size) {
+    return read_var_int(x, istr, size);
+}
+
+inline void read_var_uint(UInt64& x, std::istream& istr) {
+    x = 0;
+    for (size_t i = 0; i < 9; ++i) {
+        UInt64 byte = istr.get();
+        x |= (byte & 0x7F) << (7 * i);
+        if (!(byte & 0x80)) return;
+    }
+}
+
+inline void write_var_uint(UInt64 x, std::ostream& ostr) {
+    for (size_t i = 0; i < 9; ++i) {
+        uint8_t byte = x & 0x7F;
+        if (x > 0x7F) byte |= 0x80;
+
+        ostr.put(byte);
+
+        x >>= 7;
+        if (!x) return;
+    }
+}
+
+// TODO: do real implement in the future
+inline void read_var_uint(UInt64& x, BufferReadable& buf) {
+    x = 0;
+    uint8_t len = 0;
+    buf.read((char*)&len, 1);
+    auto ref = buf.read(len);
+
+    char* bytes = const_cast<char *>(ref.data);
+    for (size_t i = 0; i < 9; ++i) {
+        UInt64 byte = bytes[i];
+        x |= (byte & 0x7F) << (7 * i);
+
+        if (!(byte & 0x80)) return;
+    }
+}
+
+inline void write_var_uint(UInt64 x, BufferWritable& ostr) {
+    char bytes[8];
+    uint8_t i = 0;
+    while (i < 9) {
+        uint8_t byte = x & 0x7F;
+        if (x > 0x7F) byte |= 0x80;
+
+        bytes[i++] = byte;
+
+        x >>= 7;
+        if (!x) break;
+    }
+    ostr.write((char*)&i, 1);
+    ostr.write(bytes, i);
+}
+
+inline char* write_var_uint(UInt64 x, char* ostr) {
+    for (size_t i = 0; i < 9; ++i) {
+        uint8_t byte = x & 0x7F;
+        if (x > 0x7F) byte |= 0x80;
+
+        *ostr = byte;
+        ++ostr;
+
+        x >>= 7;
+        if (!x) return ostr;
+    }
+
+    return ostr;
+}
+
+// clang-format off
+inline size_t get_length_of_var_uint(UInt64 x) {
+    return x < (1ULL << 7) ? 1
+        : (x < (1ULL << 14) ? 2
+        : (x < (1ULL << 21) ? 3
+        : (x < (1ULL << 28) ? 4
+        : (x < (1ULL << 35) ? 5
+        : (x < (1ULL << 42) ? 6
+        : (x < (1ULL << 49) ? 7
+        : (x < (1ULL << 56) ? 8
+        : 9)))))));
+}
+// clang-format on
+
+inline size_t get_length_of_var_int(Int64 x) {
+    return get_length_of_var_uint(static_cast<UInt64>((x << 1) ^ (x >> 63)));
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/olap/block_reader.cpp b/be/src/vec/olap/block_reader.cpp
new file mode 100644
index 0000000000..8fda9d684c
--- /dev/null
+++ b/be/src/vec/olap/block_reader.cpp
@@ -0,0 +1,366 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/olap/block_reader.h"
+
+#include "olap/row_block.h"
+#include "olap/rowset/beta_rowset_reader.h"
+#include "olap/schema.h"
+#include "olap/storage_engine.h"
+#include "runtime/mem_pool.h"
+#include "runtime/mem_tracker.h"
+#include "vec/olap/vcollect_iterator.h"
+
+namespace doris::vectorized {
+
+BlockReader::~BlockReader() {
+    for (int i = 0; i < _agg_functions.size(); ++i) {
+        AggregateFunctionPtr function = _agg_functions[i];
+        AggregateDataPtr place = _agg_places[i];
+        function->destroy(place);
+        delete[] place;
+    }
+}
+
+OLAPStatus BlockReader::_init_collect_iter(const ReaderParams& read_params,
+                                           std::vector<RowsetReaderSharedPtr>* valid_rs_readers) {
+    _vcollect_iter.init(this);
+    std::vector<RowsetReaderSharedPtr> rs_readers;
+    auto res = _capture_rs_readers(read_params, &rs_readers);
+    if (res != OLAP_SUCCESS) {
+        LOG(WARNING) << "fail to init reader when _capture_rs_readers. res:" << res
+                     << ", tablet_id:" << read_params.tablet->tablet_id()
+                     << ", schema_hash:" << read_params.tablet->schema_hash()
+                     << ", reader_type:" << read_params.reader_type
+                     << ", version:" << read_params.version;
+        return res;
+    }
+
+    for (auto& rs_reader : rs_readers) {
+        RETURN_NOT_OK(rs_reader->init(&_reader_context));
+        OLAPStatus res = _vcollect_iter.add_child(rs_reader);
+        if (res != OLAP_SUCCESS && res != OLAP_ERR_DATA_EOF) {
+            LOG(WARNING) << "failed to add child to iterator, err=" << res;
+            return res;
+        }
+        if (res == OLAP_SUCCESS) {
+            valid_rs_readers->push_back(rs_reader);
+        }
+    }
+
+    _vcollect_iter.build_heap(*valid_rs_readers);
+    if (_vcollect_iter.is_merge()) {
+        auto status = _vcollect_iter.current_row(&_next_row);
+        _eof = status == OLAP_ERR_DATA_EOF;
+    }
+
+    return OLAP_SUCCESS;
+}
+
+void BlockReader::_init_agg_state(const ReaderParams& read_params) {
+    if (_eof) {
+        return;
+    }
+
+    _stored_data_block = _next_row.block->create_same_struct_block(_batch_size);
+    _stored_data_columns = _stored_data_block->mutate_columns();
+
+    _stored_has_null_tag.resize(_stored_data_columns.size());
+    _stored_has_string_tag.resize(_stored_data_columns.size());
+
+    auto& tablet_schema = tablet()->tablet_schema();
+    for (auto idx : _agg_columns_idx) {
+        FieldAggregationMethod agg_method =
+                tablet_schema
+                        .column(read_params.origin_return_columns->at(_return_columns_loc[idx]))
+                        .aggregation();
+        std::string agg_name =
+                TabletColumn::get_string_by_aggregation_type(agg_method) + agg_reader_suffix;
+        std::transform(agg_name.begin(), agg_name.end(), agg_name.begin(),
+                       [](unsigned char c) { return std::tolower(c); });
+
+        // create aggregate function
+        DataTypes argument_types;
+        argument_types.push_back(_next_row.block->get_data_type(idx));
+        Array params;
+        AggregateFunctionPtr function = AggregateFunctionSimpleFactory::instance().get(
+                agg_name, argument_types, params,
+                _next_row.block->get_data_type(idx)->is_nullable());
+        DCHECK(function != nullptr);
+        _agg_functions.push_back(function);
+
+        // create aggregate data
+        AggregateDataPtr place = new char[function->size_of_data()];
+        function->create(place);
+        _agg_places.push_back(place);
+
+        //calculate has_string tag
+        _stored_has_string_tag[idx] =
+                _stored_data_columns[idx]->is_column_string() ||
+                (_stored_data_columns[idx]->is_nullable() &&
+                 reinterpret_cast<ColumnNullable*>(_stored_data_columns[idx].get())
+                         ->get_nested_column_ptr()
+                         ->is_column_string());
+    }
+}
+
+OLAPStatus BlockReader::init(const ReaderParams& read_params) {
+    TabletReader::init(read_params);
+    _batch_size = read_params.runtime_state->batch_size();
+
+    auto return_column_size =
+            read_params.origin_return_columns->size() - (_sequence_col_idx != -1 ? 1 : 0);
+    _return_columns_loc.resize(read_params.return_columns.size());
+    for (int i = 0; i < return_column_size; ++i) {
+        auto cid = read_params.origin_return_columns->at(i);
+        for (int j = 0; j < read_params.return_columns.size(); ++j) {
+            if (read_params.return_columns[j] == cid) {
+                if (j < _tablet->num_key_columns() || _tablet->keys_type() != AGG_KEYS) {
+                    _normal_columns_idx.emplace_back(j);
+                } else {
+                    _agg_columns_idx.emplace_back(j);
+                }
+                _return_columns_loc[j] = i;
+                break;
+            }
+        }
+    }
+
+    std::vector<RowsetReaderSharedPtr> rs_readers;
+    auto status = _init_collect_iter(read_params, &rs_readers);
+    if (status != OLAP_SUCCESS) {
+        return status;
+    }
+
+    if (_direct_mode) {
+        _next_block_func = &BlockReader::_direct_next_block;
+        return OLAP_SUCCESS;
+    }
+
+    switch (tablet()->keys_type()) {
+    case KeysType::DUP_KEYS:
+        _next_block_func = &BlockReader::_direct_next_block;
+        break;
+    case KeysType::UNIQUE_KEYS:
+        _next_block_func = &BlockReader::_unique_key_next_block;
+        break;
+    case KeysType::AGG_KEYS:
+        _next_block_func = &BlockReader::_agg_key_next_block;
+        _init_agg_state(read_params);
+        break;
+    default:
+        DCHECK(false) << "No next row function for type:" << tablet()->keys_type();
+        break;
+    }
+
+    return OLAP_SUCCESS;
+}
+
+OLAPStatus BlockReader::_direct_next_block(Block* block, MemPool* mem_pool, ObjectPool* agg_pool,
+                                           bool* eof) {
+    auto res = _vcollect_iter.next(block);
+    if (UNLIKELY(res != OLAP_SUCCESS && res != OLAP_ERR_DATA_EOF)) {
+        return res;
+    }
+    *eof = res == OLAP_ERR_DATA_EOF;
+    return OLAP_SUCCESS;
+}
+
+OLAPStatus BlockReader::_direct_agg_key_next_block(Block* block, MemPool* mem_pool,
+                                                   ObjectPool* agg_pool, bool* eof) {
+    return OLAP_SUCCESS;
+}
+
+OLAPStatus BlockReader::_agg_key_next_block(Block* block, MemPool* mem_pool, ObjectPool* agg_pool,
+                                            bool* eof) {
+    if (UNLIKELY(_eof)) {
+        *eof = true;
+        return OLAP_SUCCESS;
+    }
+
+    auto target_block_row = 0;
+    auto target_columns = block->mutate_columns();
+
+    _insert_data_normal(target_columns);
+    target_block_row++;
+    _append_agg_data(target_columns);
+
+    while (true) {
+        auto res = _vcollect_iter.next(&_next_row);
+        if (UNLIKELY(res == OLAP_ERR_DATA_EOF)) {
+            *eof = true;
+            break;
+        }
+        if (UNLIKELY(res != OLAP_SUCCESS)) {
+            LOG(WARNING) << "next failed: " << res;
+            return res;
+        }
+
+        if (!_next_row.is_same) {
+            if (target_block_row == _batch_size) {
+                break;
+            }
+            _agg_data_counters.push_back(_last_agg_data_counter);
+            _last_agg_data_counter = 0;
+
+            _insert_data_normal(target_columns);
+            target_block_row++;
+        }
+
+        _append_agg_data(target_columns);
+    }
+
+    _agg_data_counters.push_back(_last_agg_data_counter);
+    _last_agg_data_counter = 0;
+    _update_agg_data(target_columns);
+
+    _merged_rows += target_block_row;
+    return OLAP_SUCCESS;
+}
+OLAPStatus BlockReader::_unique_key_next_block(Block* block, MemPool* mem_pool,
+                                               ObjectPool* agg_pool, bool* eof) {
+    if (UNLIKELY(_eof)) {
+        *eof = true;
+        return OLAP_SUCCESS;
+    }
+
+    auto target_block_row = 0;
+    auto target_columns = block->mutate_columns();
+
+    do {
+        _insert_data_normal(target_columns);
+        target_block_row++;
+
+        // the version is in reverse order, the first row is the highest version,
+        // in UNIQUE_KEY highest version is the final result, there is no need to
+        // merge the lower versions
+        auto res = _vcollect_iter.next(&_next_row);
+        if (UNLIKELY(res == OLAP_ERR_DATA_EOF)) {
+            *eof = true;
+            break;
+        }
+
+        if (UNLIKELY(res != OLAP_SUCCESS)) {
+            LOG(WARNING) << "next failed: " << res;
+            return res;
+        }
+    } while (target_block_row < _batch_size);
+
+    _merged_rows += target_block_row;
+    return OLAP_SUCCESS;
+}
+
+void BlockReader::_insert_data_normal(MutableColumns& columns) {
+    auto block = _next_row.block;
+    for (auto idx : _normal_columns_idx) {
+        columns[_return_columns_loc[idx]]->insert_from(*block->get_by_position(idx).column,
+                                                       _next_row.row_pos);
+    }
+}
+
+void BlockReader::_append_agg_data(MutableColumns& columns) {
+    _stored_row_ref.push_back(_next_row);
+    _last_agg_data_counter++;
+
+    // execute aggregate when have `batch_size` column or some ref invalid soon
+    bool is_last = (_next_row.block->rows() == _next_row.row_pos + 1);
+    if (is_last || _stored_row_ref.size() == _batch_size) {
+        _update_agg_data(columns);
+    }
+}
+
+void BlockReader::_update_agg_data(MutableColumns& columns) {
+    // copy data to stored block
+    size_t copy_size = _copy_agg_data();
+
+    // calculate has_null_tag
+    for (auto idx : _agg_columns_idx) {
+        _stored_has_null_tag[idx] = _stored_data_columns[idx]->has_null(copy_size);
+    }
+
+    // calculate aggregate and insert
+    int counter_sum = 0;
+    for (int counter : _agg_data_counters) {
+        _update_agg_value(columns, counter_sum, counter_sum + counter - 1);
+        counter_sum += counter;
+    }
+
+    // some key still has value at next block, so do not insert
+    if (_last_agg_data_counter) {
+        _update_agg_value(columns, counter_sum, counter_sum + _last_agg_data_counter - 1, false);
+        _last_agg_data_counter = 0;
+    }
+
+    _agg_data_counters.clear();
+}
+
+size_t BlockReader::_copy_agg_data() {
+    size_t copy_size = _stored_row_ref.size();
+
+    for (size_t i = 0; i < copy_size; i++) {
+        auto& ref = _stored_row_ref[i];
+        _temp_ref_map[ref.block].emplace_back(ref.row_pos, i);
+    }
+
+    for (auto idx : _agg_columns_idx) {
+        auto& dst_column = _stored_data_columns[idx];
+        if (_stored_has_string_tag[idx]) {
+            //string type should replace ordered
+            for (size_t i = 0; i < copy_size; i++) {
+                auto& ref = _stored_row_ref[i];
+                dst_column->replace_column_data(*ref.block->get_by_position(idx).column,
+                                                ref.row_pos, i);
+            }
+        } else {
+            for (auto& it : _temp_ref_map) {
+                auto& src_column = *it.first->get_by_position(idx).column;
+                for (auto& pos : it.second) {
+                    dst_column->replace_column_data(src_column, pos.first, pos.second);
+                }
+            }
+        }
+    }
+
+    for (auto& it : _temp_ref_map) {
+        it.second.clear();
+    }
+    _stored_row_ref.clear();
+
+    return copy_size;
+}
+
+void BlockReader::_update_agg_value(MutableColumns& columns, int begin, int end, bool is_close) {
+    for (int i = 0; i < _agg_columns_idx.size(); i++) {
+        auto idx = _agg_columns_idx[i];
+
+        AggregateFunctionPtr function = _agg_functions[i];
+        AggregateDataPtr place = _agg_places[i];
+        auto column_ptr = _stored_data_columns[idx].get();
+
+        if (begin <= end) {
+            function->add_batch_range(begin, end, place, const_cast<const IColumn**>(&column_ptr),
+                                      nullptr, _stored_has_null_tag[idx]);
+        }
+
+        if (is_close) {
+            function->insert_result_into(place, *columns[_return_columns_loc[idx]]);
+            function->create(place); // reset aggregate data
+        }
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/olap/block_reader.h b/be/src/vec/olap/block_reader.h
new file mode 100644
index 0000000000..b1bc7e8c4c
--- /dev/null
+++ b/be/src/vec/olap/block_reader.h
@@ -0,0 +1,119 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <parallel_hashmap/phmap.h>
+
+#include "olap/collect_iterator.h"
+#include "olap/reader.h"
+#include "olap/rowset/rowset_reader.h"
+#include "olap/tablet.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/aggregate_function_reader.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/olap/vcollect_iterator.h"
+
+namespace doris {
+
+namespace vectorized {
+
+class BlockReader final : public TabletReader {
+public:
+    ~BlockReader() override;
+
+    // Initialize BlockReader with tablet, data version and fetch range.
+    OLAPStatus init(const ReaderParams& read_params) override;
+
+    OLAPStatus next_row_with_aggregation(RowCursor* row_cursor, MemPool* mem_pool,
+                                         ObjectPool* agg_pool, bool* eof) override {
+        return OLAP_ERR_READER_INITIALIZE_ERROR;
+    }
+
+    OLAPStatus next_block_with_aggregation(Block* block, MemPool* mem_pool, ObjectPool* agg_pool,
+                                           bool* eof) override {
+        return (this->*_next_block_func)(block, mem_pool, agg_pool, eof);
+    }
+
+private:
+    friend class VCollectIterator;
+    friend class DeleteHandler;
+
+    // Direcly read row from rowset and pass to upper caller. No need to do aggregation.
+    // This is usually used for DUPLICATE KEY tables
+    OLAPStatus _direct_next_block(Block* block, MemPool* mem_pool, ObjectPool* agg_pool, bool* eof);
+    // Just same as _direct_next_block, but this is only for AGGREGATE KEY tables.
+    // And this is an optimization for AGGR tables.
+    // When there is only one rowset and is not overlapping, we can read it directly without aggregation.
+    OLAPStatus _direct_agg_key_next_block(Block* block, MemPool* mem_pool, ObjectPool* agg_pool,
+                                          bool* eof);
+    // For normal AGGREGATE KEY tables, read data by a merge heap.
+    OLAPStatus _agg_key_next_block(Block* block, MemPool* mem_pool, ObjectPool* agg_pool,
+                                   bool* eof);
+    // For UNIQUE KEY tables, read data by a merge heap.
+    // The difference from _agg_key_next_block is that it will read the data from high version to low version,
+    // to minimize the comparison time in merge heap.
+    OLAPStatus _unique_key_next_block(Block* block, MemPool* mem_pool, ObjectPool* agg_pool,
+                                      bool* eof);
+
+    OLAPStatus _init_collect_iter(const ReaderParams& read_params,
+                                  std::vector<RowsetReaderSharedPtr>* valid_rs_readers);
+
+    void _init_agg_state(const ReaderParams& read_params);
+
+    void _insert_data_normal(MutableColumns& columns);
+
+    void _append_agg_data(MutableColumns& columns);
+
+    void _update_agg_data(MutableColumns& columns);
+
+    size_t _copy_agg_data();
+
+    void _update_agg_value(MutableColumns& columns, int begin, int end, bool is_close = true);
+
+    VCollectIterator _vcollect_iter;
+    IteratorRowRef _next_row {nullptr, -1, false};
+
+    std::vector<AggregateFunctionPtr> _agg_functions;
+    std::vector<AggregateDataPtr> _agg_places;
+
+    std::vector<int> _normal_columns_idx; // key column on agg mode, all column on uniq mode
+    std::vector<int> _agg_columns_idx;
+    std::vector<int> _return_columns_loc;
+
+    int _batch_size = 0;
+
+    std::vector<int> _agg_data_counters;
+    int _last_agg_data_counter = 0;
+
+    std::unique_ptr<Block> _stored_data_block;
+    MutableColumns _stored_data_columns;
+    std::vector<IteratorRowRef> _stored_row_ref;
+
+    std::vector<bool> _stored_has_null_tag;
+    std::vector<bool> _stored_has_string_tag;
+
+    phmap::flat_hash_map<const Block*, std::vector<std::pair<int16_t, int16_t>>> _temp_ref_map;
+
+    bool _eof = false;
+
+    OLAPStatus (BlockReader::*_next_block_func)(Block* block, MemPool* mem_pool,
+                                                ObjectPool* agg_pool, bool* eof) = nullptr;
+};
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/olap/vcollect_iterator.cpp b/be/src/vec/olap/vcollect_iterator.cpp
new file mode 100644
index 0000000000..73ef823c9e
--- /dev/null
+++ b/be/src/vec/olap/vcollect_iterator.cpp
@@ -0,0 +1,380 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/olap/vcollect_iterator.h"
+
+#include "olap/rowset/beta_rowset_reader.h"
+
+namespace doris {
+namespace vectorized {
+
+VCollectIterator::~VCollectIterator() {}
+
+void VCollectIterator::init(TabletReader* reader) {
+    _reader = reader;
+    // when aggregate is enabled or key_type is DUP_KEYS, we don't merge
+    // multiple data to aggregate for better performance
+    if (_reader->_reader_type == READER_QUERY &&
+        (_reader->_direct_mode || _reader->_tablet->keys_type() == KeysType::DUP_KEYS)) {
+        _merge = false;
+    }
+}
+
+OLAPStatus VCollectIterator::add_child(RowsetReaderSharedPtr rs_reader) {
+    std::unique_ptr<LevelIterator> child(new Level0Iterator(rs_reader, _reader));
+    _children.push_back(child.release());
+    return OLAP_SUCCESS;
+}
+
+// Build a merge heap. If _merge is true, a rowset with the max rownum
+// status will be used as the base rowset, and the other rowsets will be merged first and
+// then merged with the base rowset.
+void VCollectIterator::build_heap(std::vector<RowsetReaderSharedPtr>& rs_readers) {
+    DCHECK(rs_readers.size() == _children.size());
+    _skip_same = _reader->_tablet->tablet_schema().keys_type() == KeysType::UNIQUE_KEYS;
+    if (_children.empty()) {
+        _inner_iter.reset(nullptr);
+        return;
+    } else if (_merge) {
+        DCHECK(!rs_readers.empty());
+        for (auto [c_iter, r_iter] = std::pair {_children.begin(), rs_readers.begin()};
+             c_iter != _children.end();) {
+            if ((*c_iter)->init() != OLAP_SUCCESS) {
+                c_iter = _children.erase(c_iter);
+                r_iter = rs_readers.erase(r_iter);
+            } else {
+                ++c_iter;
+                ++r_iter;
+            }
+        }
+
+        // build merge heap with two children, a base rowset as level0iterator and
+        // other cumulative rowsets as a level1iterator
+        if (_children.size() > 1) {
+            // find 'base rowset', 'base rowset' is the rowset which contains the max row number
+            int64_t max_row_num = 0;
+            int base_reader_idx = 0;
+            for (size_t i = 0; i < rs_readers.size(); ++i) {
+                int64_t cur_row_num = rs_readers[i]->rowset()->rowset_meta()->num_rows();
+                if (cur_row_num > max_row_num) {
+                    max_row_num = cur_row_num;
+                    base_reader_idx = i;
+                }
+            }
+            auto base_reader_child = _children.begin();
+            std::advance(base_reader_child, base_reader_idx);
+
+            std::list<LevelIterator*> cumu_children;
+            int i = 0;
+            for (const auto& child : _children) {
+                if (i != base_reader_idx) {
+                    cumu_children.push_back(child);
+                }
+                ++i;
+            }
+            Level1Iterator* cumu_iter = new Level1Iterator(cumu_children, _reader,
+                                                           cumu_children.size() > 1, _skip_same);
+            cumu_iter->init();
+            std::list<LevelIterator*> children;
+            children.push_back(*base_reader_child);
+            children.push_back(cumu_iter);
+            _inner_iter.reset(new Level1Iterator(children, _reader, _merge, _skip_same));
+        } else {
+            // _children.size() == 1
+            _inner_iter.reset(new Level1Iterator(_children, _reader, _merge, _skip_same));
+        }
+    } else {
+        _inner_iter.reset(new Level1Iterator(_children, _reader, _merge, _skip_same));
+    }
+    _inner_iter->init();
+    // Clear _children earlier to release any related references
+    _children.clear();
+}
+
+bool VCollectIterator::LevelIteratorComparator::operator()(LevelIterator* lhs, LevelIterator* rhs) {
+    const IteratorRowRef& lhs_ref = *lhs->current_row_ref();
+    const IteratorRowRef& rhs_ref = *rhs->current_row_ref();
+
+    int cmp_res =
+            lhs_ref.block->compare_at(lhs_ref.row_pos, rhs_ref.row_pos,
+                                      lhs->tablet_schema().num_key_columns(), *rhs_ref.block, -1);
+    if (cmp_res != 0) {
+        return cmp_res > 0;
+    }
+
+    if (_sequence != -1) {
+        cmp_res = lhs_ref.block->get_by_position(_sequence).column->compare_at(
+                lhs_ref.row_pos, rhs_ref.row_pos,
+                *(rhs_ref.block->get_by_position(_sequence).column), -1);
+    }
+
+    // if row cursors equal, compare data version.
+    // read data from higher version to lower version.
+    // for UNIQUE_KEYS just read the highest version and no need agg_update.
+    // for AGG_KEYS if a version is deleted, the lower version no need to agg_update
+    bool lower = (cmp_res != 0) ? (cmp_res < 0) : (lhs->version() < rhs->version());
+    lower ? lhs->set_same(true) : rhs->set_same(true);
+    return lower;
+}
+
+OLAPStatus VCollectIterator::current_row(IteratorRowRef* ref) const {
+    if (LIKELY(_inner_iter)) {
+        *ref = *_inner_iter->current_row_ref();
+        if (ref->row_pos == -1) {
+            return OLAP_ERR_DATA_EOF;
+        } else {
+            return OLAP_SUCCESS;
+        }
+    }
+    return OLAP_ERR_DATA_ROW_BLOCK_ERROR;
+}
+
+OLAPStatus VCollectIterator::next(IteratorRowRef* ref) {
+    if (LIKELY(_inner_iter)) {
+        return _inner_iter->next(ref);
+    } else {
+        return OLAP_ERR_DATA_EOF;
+    }
+}
+
+OLAPStatus VCollectIterator::next(Block* block) {
+    if (LIKELY(_inner_iter)) {
+        return _inner_iter->next(block);
+    } else {
+        return OLAP_ERR_DATA_EOF;
+    }
+}
+
+VCollectIterator::Level0Iterator::Level0Iterator(RowsetReaderSharedPtr rs_reader, TabletReader* reader)
+        : LevelIterator(reader), _rs_reader(rs_reader), _reader(reader) {
+    DCHECK_EQ(RowsetReader::BETA, rs_reader->type());
+    _block = _schema.create_block(_reader->_return_columns);
+    _ref.block = &_block;
+    _ref.row_pos = 0;
+    _ref.is_same = false;
+}
+
+OLAPStatus VCollectIterator::Level0Iterator::init() {
+    return _refresh_current_row();
+}
+
+int64_t VCollectIterator::Level0Iterator::version() const {
+    return _rs_reader->version().second;
+}
+
+OLAPStatus VCollectIterator::Level0Iterator::_refresh_current_row() {
+    do {
+        if (_block.rows() != 0 && _ref.row_pos < _block.rows()) {
+            return OLAP_SUCCESS;
+        } else {
+            _ref.is_same = false;
+            _ref.row_pos = 0;
+            _block.clear_column_data();
+            auto res = _rs_reader->next_block(&_block);
+            if (res != OLAP_SUCCESS) {
+                return res;
+            }
+        }
+    } while (_block.rows() != 0);
+    _ref.row_pos = -1;
+    return OLAP_ERR_DATA_EOF;
+}
+
+OLAPStatus VCollectIterator::Level0Iterator::next(IteratorRowRef* ref) {
+    _ref.row_pos++;
+    RETURN_NOT_OK(_refresh_current_row());
+
+    *ref = _ref;
+    return OLAP_SUCCESS;
+}
+
+OLAPStatus VCollectIterator::Level0Iterator::next(Block* block) {
+    return _rs_reader->next_block(block);
+}
+
+VCollectIterator::Level1Iterator::Level1Iterator(
+        const std::list<VCollectIterator::LevelIterator*>& children, TabletReader* reader, bool merge,
+        bool skip_same)
+        : LevelIterator(reader),
+          _children(children),
+          _reader(reader),
+          _merge(merge),
+          _skip_same(skip_same) {
+    _ref.row_pos = -1; // represent eof
+}
+
+VCollectIterator::Level1Iterator::~Level1Iterator() {
+    for (auto child : _children) {
+        if (child != nullptr) {
+            delete child;
+            child = nullptr;
+        }
+    }
+}
+
+// Read next row into *row.
+// Returns
+//      OLAP_SUCCESS when read successfully.
+//      OLAP_ERR_DATA_EOF and set *row to nullptr when EOF is reached.
+//      Others when error happens
+OLAPStatus VCollectIterator::Level1Iterator::next(IteratorRowRef* ref) {
+    if (UNLIKELY(_cur_child == nullptr)) {
+        _ref.row_pos = -1;
+        return OLAP_ERR_DATA_EOF;
+    }
+    if (_merge) {
+        return _merge_next(ref);
+    } else {
+        return _normal_next(ref);
+    }
+}
+
+// Read next block
+// Returns
+//      OLAP_SUCCESS when read successfully.
+//      OLAP_ERR_DATA_EOF and set *row to nullptr when EOF is reached.
+//      Others when error happens
+OLAPStatus VCollectIterator::Level1Iterator::next(Block* block) {
+    if (UNLIKELY(_cur_child == nullptr)) {
+        return OLAP_ERR_DATA_EOF;
+    }
+    return _normal_next(block);
+}
+
+int64_t VCollectIterator::Level1Iterator::version() const {
+    if (_cur_child != nullptr) {
+        return _cur_child->version();
+    }
+    return -1;
+}
+
+OLAPStatus VCollectIterator::Level1Iterator::init() {
+    if (_children.empty()) {
+        return OLAP_SUCCESS;
+    }
+
+    // Only when there are multiple children that need to be merged
+    if (_merge && _children.size() > 1) {
+        auto sequence_loc = -1;
+        for (int loc = 0; loc < _reader->_return_columns.size(); loc++) {
+            if (_reader->_return_columns[loc] == _reader->_sequence_col_idx) {
+                sequence_loc = loc;
+                break;
+            }
+        }
+        _heap.reset(new MergeHeap {LevelIteratorComparator(sequence_loc)});
+        for (auto child : _children) {
+            DCHECK(child != nullptr);
+            //DCHECK(child->current_row() == OLAP_SUCCESS);
+            _heap->push(child);
+        }
+        _cur_child = _heap->top();
+        // Clear _children earlier to release any related references
+        _children.clear();
+    } else {
+        _merge = false;
+        _heap.reset(nullptr);
+        _cur_child = *_children.begin();
+    }
+    _ref = *_cur_child->current_row_ref();
+    return OLAP_SUCCESS;
+}
+
+OLAPStatus VCollectIterator::Level1Iterator::_merge_next(IteratorRowRef* ref) {
+    _heap->pop();
+    auto res = _cur_child->next(ref);
+    if (LIKELY(res == OLAP_SUCCESS)) {
+        _heap->push(_cur_child);
+        _cur_child = _heap->top();
+    } else if (res == OLAP_ERR_DATA_EOF) {
+        // current child has been read, to read next
+        delete _cur_child;
+        if (!_heap->empty()) {
+            _cur_child = _heap->top();
+        } else {
+            _cur_child = nullptr;
+            _ref.row_pos = -1;
+            return OLAP_ERR_DATA_EOF;
+        }
+    } else {
+        _cur_child = nullptr;
+        LOG(WARNING) << "failed to get next from child, res=" << res;
+        return res;
+    }
+
+    if (_skip_same && _cur_child->is_same()) {
+        _reader->_merged_rows++;
+        _cur_child->set_same(false);
+        return _merge_next(ref);
+    }
+
+    *ref = _ref = *_cur_child->current_row_ref();
+
+    _cur_child->set_same(false);
+
+    return OLAP_SUCCESS;
+}
+
+OLAPStatus VCollectIterator::Level1Iterator::_normal_next(IteratorRowRef* ref) {
+    auto res = _cur_child->next(ref);
+    if (LIKELY(res == OLAP_SUCCESS)) {
+        _ref = *ref;
+        return OLAP_SUCCESS;
+    } else if (res == OLAP_ERR_DATA_EOF) {
+        // current child has been read, to read next
+        delete _cur_child;
+        _children.pop_front();
+        if (!_children.empty()) {
+            _cur_child = *(_children.begin());
+            auto result = _cur_child->next(ref);
+            _ref = *ref;
+            return result;
+        } else {
+            _cur_child = nullptr;
+            return OLAP_ERR_DATA_EOF;
+        }
+    } else {
+        _cur_child = nullptr;
+        LOG(WARNING) << "failed to get next from child, res=" << res;
+        return res;
+    }
+}
+
+OLAPStatus VCollectIterator::Level1Iterator::_normal_next(Block* block) {
+    auto res = _cur_child->next(block);
+    if (LIKELY(res == OLAP_SUCCESS)) {
+        return OLAP_SUCCESS;
+    } else if (res == OLAP_ERR_DATA_EOF) {
+        // current child has been read, to read next
+        delete _cur_child;
+        _children.pop_front();
+        if (!_children.empty()) {
+            _cur_child = *(_children.begin());
+            return _normal_next(block);
+        } else {
+            _cur_child = nullptr;
+            return OLAP_ERR_DATA_EOF;
+        }
+    } else {
+        _cur_child = nullptr;
+        LOG(WARNING) << "failed to get next from child, res=" << res;
+        return res;
+    }
+}
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/olap/vcollect_iterator.h b/be/src/vec/olap/vcollect_iterator.h
new file mode 100644
index 0000000000..6cf9d625be
--- /dev/null
+++ b/be/src/vec/olap/vcollect_iterator.h
@@ -0,0 +1,193 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <ext/pb_ds/priority_queue.hpp>
+
+#include "olap/olap_define.h"
+#include "olap/reader.h"
+#include "olap/rowset/rowset_reader.h"
+#include "vec/core/block.h"
+
+namespace doris {
+
+class TabletSchema;
+
+namespace vectorized {
+
+struct IteratorRowRef {
+    const Block* block;
+    int16_t row_pos;
+    bool is_same;
+};
+
+class VCollectIterator {
+public:
+    // Hold reader point to get reader params
+    ~VCollectIterator();
+
+    void init(TabletReader* reader);
+
+    OLAPStatus add_child(RowsetReaderSharedPtr rs_reader);
+
+    void build_heap(std::vector<RowsetReaderSharedPtr>& rs_readers);
+    // Get top row of the heap, nullptr if reach end.
+    OLAPStatus current_row(IteratorRowRef* ref) const;
+
+    // Read nest order row in Block.
+    // Returns
+    //      OLAP_SUCCESS when read successfully.
+    //      OLAP_ERR_DATA_EOF and set *row to nullptr when EOF is reached.
+    //      Others when error happens
+    OLAPStatus next(IteratorRowRef* ref);
+
+    OLAPStatus next(Block* block);
+
+    bool is_merge() const { return _merge; }
+
+private:
+    // This interface is the actual implementation of the new version of iterator.
+    // It currently contains two implementations, one is Level0Iterator,
+    // which only reads data from the rowset reader, and the other is Level1Iterator,
+    // which can read merged data from multiple LevelIterators through MergeHeap.
+    // By using Level1Iterator, some rowset readers can be merged in advance and
+    // then merged with other rowset readers.
+    class LevelIterator {
+    public:
+        LevelIterator(TabletReader* reader) : _schema(reader->tablet()->tablet_schema()) {};
+
+        virtual OLAPStatus init() = 0;
+
+        virtual int64_t version() const = 0;
+
+        const IteratorRowRef* current_row_ref() const { return &_ref; }
+
+        virtual OLAPStatus next(IteratorRowRef* ref) = 0;
+
+        virtual OLAPStatus next(Block* block) = 0;
+
+        void set_same(bool same) { _ref.is_same = same; }
+
+        bool is_same() { return _ref.is_same; }
+
+        virtual ~LevelIterator() = default;
+
+        const TabletSchema& tablet_schema() const { return _schema; };
+
+    protected:
+        const TabletSchema& _schema;
+        IteratorRowRef _ref;
+    };
+
+    // Compare row cursors between multiple merge elements,
+    // if row cursors equal, compare data version.
+    class LevelIteratorComparator {
+    public:
+        LevelIteratorComparator(int sequence = -1) : _sequence(sequence) {}
+
+        bool operator()(LevelIterator* lhs, LevelIterator* rhs);
+
+    private:
+        int _sequence;
+    };
+
+    using MergeHeap = __gnu_pbds::priority_queue<LevelIterator*, LevelIteratorComparator,
+                                                 __gnu_pbds::pairing_heap_tag>;
+
+    // Iterate from rowset reader. This Iterator usually like a leaf node
+    class Level0Iterator : public LevelIterator {
+    public:
+        Level0Iterator(RowsetReaderSharedPtr rs_reader, TabletReader* reader);
+        ~Level0Iterator() {}
+
+        OLAPStatus init() override;
+
+        int64_t version() const override;
+
+        OLAPStatus next(IteratorRowRef* ref) override;
+
+        OLAPStatus next(Block* block) override;
+
+    private:
+        OLAPStatus _refresh_current_row();
+
+        RowsetReaderSharedPtr _rs_reader;
+        TabletReader* _reader = nullptr;
+        Block _block;
+    };
+
+    // Iterate from LevelIterators (maybe Level0Iterators or Level1Iterator or mixed)
+    class Level1Iterator : public LevelIterator {
+    public:
+        Level1Iterator(const std::list<LevelIterator*>& children, TabletReader* reader, bool merge,
+                       bool skip_same);
+
+        OLAPStatus init() override;
+
+        int64_t version() const override;
+
+        OLAPStatus next(IteratorRowRef* ref) override;
+
+        OLAPStatus next(Block* block) override;
+
+        ~Level1Iterator();
+
+    private:
+        inline OLAPStatus _merge_next(IteratorRowRef* ref);
+
+        inline OLAPStatus _normal_next(IteratorRowRef* ref);
+
+        inline OLAPStatus _normal_next(Block* block);
+
+        // Each LevelIterator corresponds to a rowset reader,
+        // it will be cleared after '_heap' has been initilized when '_merge == true'.
+        std::list<LevelIterator*> _children;
+        // point to the Level0Iterator containing the next output row.
+        // null when VCollectIterator hasn't been initialized or reaches EOF.
+        LevelIterator* _cur_child = nullptr;
+        TabletReader* _reader = nullptr;
+
+        // when `_merge == true`, rowset reader returns ordered rows and VCollectIterator uses a priority queue to merge
+        // sort them. The output of VCollectIterator is also ordered.
+        // When `_merge == false`, rowset reader returns *partial* ordered rows. VCollectIterator simply returns all rows
+        // from the first rowset, the second rowset, .., the last rowset. The output of CollectorIterator is also
+        // *partially* ordered.
+        bool _merge = true;
+
+        bool _skip_same;
+        // used when `_merge == true`
+        std::unique_ptr<MergeHeap> _heap;
+        // used when `_merge == false`
+        int _child_idx = 0;
+    };
+
+    std::unique_ptr<LevelIterator> _inner_iter;
+
+    // Each LevelIterator corresponds to a rowset reader,
+    // it will be cleared after '_inner_iter' has been initilized.
+    std::list<LevelIterator*> _children;
+
+    bool _merge = true;
+    // Hold reader point to access read params, such as fetch conditions.
+    TabletReader* _reader = nullptr;
+
+    bool _skip_same;
+};
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/olap/vgeneric_iterators.cpp b/be/src/vec/olap/vgeneric_iterators.cpp
new file mode 100644
index 0000000000..938d6d7a3e
--- /dev/null
+++ b/be/src/vec/olap/vgeneric_iterators.cpp
@@ -0,0 +1,443 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <queue>
+#include <utility>
+
+#include "olap/iterators.h"
+#include "olap/row.h"
+#include "olap/row_block2.h"
+#include "olap/row_cursor_cell.h"
+
+namespace doris {
+
+namespace vectorized {
+
+// This iterator will generate ordered data. For example for schema
+// (int, int) this iterator will generator data like
+// (0, 1), (1, 2), (2, 3), (3, 4)...
+//
+// Usage:
+//      Schema schema;
+//      VAutoIncrementIterator iter(schema, 1000);
+//      StorageReadOptions opts;
+//      RETURN_IF_ERROR(iter.init(opts));
+//      RowBlockV2 block;
+//      do {
+//          st = iter.next_batch(&block);
+//      } while (st.ok());
+class VAutoIncrementIterator : public RowwiseIterator {
+public:
+    // Will generate num_rows rows in total
+    VAutoIncrementIterator(const Schema& schema, size_t num_rows)
+            : _schema(schema), _num_rows(num_rows), _rows_returned() {}
+    ~VAutoIncrementIterator() override {}
+
+    // NOTE: Currently, this function will ignore StorageReadOptions
+    Status init(const StorageReadOptions& opts) override;
+
+    Status next_batch(vectorized::Block* block) override {
+        int row_idx = 0;
+        while (_rows_returned < _num_rows) {
+            for (int j = 0; j < _schema.num_columns(); ++j) {
+                vectorized::ColumnWithTypeAndName vc = block->get_by_position(j);
+                vectorized::IColumn& vi = (vectorized::IColumn&)(*vc.column);
+
+                char data[16] = {};
+                size_t data_len = 0;
+                const auto* col_schema = _schema.column(j);
+                switch (col_schema->type()) {
+                    case OLAP_FIELD_TYPE_SMALLINT:
+                        *(int16_t*)data = _rows_returned + j;
+                        data_len = sizeof(int16_t);
+                        break; 
+                    case OLAP_FIELD_TYPE_INT:
+                        *(int32_t*)data = _rows_returned + j;
+                        data_len = sizeof(int32_t);
+                        break;
+                    case OLAP_FIELD_TYPE_BIGINT:
+                        *(int64_t*)data = _rows_returned + j;
+                        data_len = sizeof(int64_t);
+                        break;
+                    case OLAP_FIELD_TYPE_FLOAT: 
+                        *(float*)data = _rows_returned + j;
+                        data_len = sizeof(float);
+                        break;
+                    case OLAP_FIELD_TYPE_DOUBLE: 
+                        *(double*)data = _rows_returned + j;
+                        data_len = sizeof(double);
+                        break;
+                    default:
+                        break;
+                }
+
+                vi.insert_data(data, data_len);
+            }
+
+            ++row_idx;
+            ++_rows_returned;
+        }
+
+        if (row_idx > 0)
+            return Status::OK();
+        return Status::EndOfFile("End of VAutoIncrementIterator");
+    }
+
+    const Schema& schema() const override { return _schema; }
+
+private:
+    Schema _schema;
+    size_t _num_rows;
+    size_t _rows_returned;
+};
+
+Status VAutoIncrementIterator::init(const StorageReadOptions& opts) {
+    return Status::OK();
+}
+
+// Used to store merge state for a VMergeIterator input.
+// This class will iterate all data from internal iterator
+// through client call advance().
+// Usage:
+//      VMergeIteratorContext ctx(iter);
+//      RETURN_IF_ERROR(ctx.init());
+//      while (ctx.valid()) {
+//          visit(ctx.current_row());
+//          RETURN_IF_ERROR(ctx.advance());
+//      }
+class VMergeIteratorContext {
+public:
+    VMergeIteratorContext(RowwiseIterator* iter, std::shared_ptr<MemTracker> parent) : _iter(iter) {}
+    VMergeIteratorContext(const VMergeIteratorContext&) = delete;
+    VMergeIteratorContext(VMergeIteratorContext&&) = delete;
+    VMergeIteratorContext& operator=(const VMergeIteratorContext&) = delete;
+    VMergeIteratorContext& operator=(VMergeIteratorContext&&) = delete;
+
+    ~VMergeIteratorContext() {
+        delete _iter;
+        _iter = nullptr;
+    }
+
+    Status block_reset()
+    {
+        if (!_block) {
+            const Schema& schema = _iter->schema();
+            for (auto &column_desc : schema.columns()) {
+                auto data_type = Schema::get_data_type_ptr(column_desc->type());
+                if (data_type == nullptr) {
+                    return Status::RuntimeError("invalid data type");
+                }
+                _block.insert(ColumnWithTypeAndName(data_type->create_column(), data_type, column_desc->name()));
+            }
+        } else {
+            _block.clear_column_data();
+        }
+        return Status::OK();
+    }
+
+    // Initialize this context and will prepare data for current_row()
+    Status init(const StorageReadOptions& opts);
+
+    int compare_row(const VMergeIteratorContext& rhs) const {
+        const Schema& schema = _iter->schema();
+        int num = schema.num_key_columns();
+        for (uint32_t cid = 0; cid < num; ++cid) {
+#if 0
+            auto name = schema.column(cid)->name();
+            auto l_col = this->_block.get_by_name(name);
+            auto r_col = rhs._block.get_by_name(name);
+
+#else
+            //because the columns of block will be inserted by cid asc order
+            //so no need to get column by get_by_name()
+            auto l_col = this->_block.get_by_position(cid);
+            auto r_col = rhs._block.get_by_position(cid);
+#endif
+
+            auto l_cp = l_col.column;
+            auto r_cp = r_col.column;
+
+            auto res = l_cp->compare_at(_index_in_block, rhs._index_in_block, *r_cp, -1);
+            if (res) {
+                return res;
+            }
+        }
+
+        return 0;
+    }
+
+    bool compare(const VMergeIteratorContext& rhs) const {
+        int cmp_res = this->compare_row(rhs);
+        if (cmp_res != 0) {
+            return cmp_res > 0;
+        }
+        return this->data_id() < rhs.data_id();
+    }
+
+    void copy_row_to(vectorized::Block* block) {
+        vectorized::Block& src = _block;
+        vectorized::Block& dst = *block;
+
+        auto columns = _iter->schema().columns();
+
+        for (size_t i = 0; i < columns.size(); ++i) {
+            vectorized::ColumnWithTypeAndName s_col = src.get_by_position(i);
+            vectorized::ColumnWithTypeAndName d_col = dst.get_by_position(i);
+
+            vectorized::ColumnPtr s_cp = s_col.column;
+            vectorized::ColumnPtr d_cp = d_col.column;
+
+            //copy a row to dst block column by column
+            ((vectorized::IColumn&)(*d_cp)).insert_range_from(*s_cp, _index_in_block, 1);
+        }
+    }
+
+    // Advance internal row index to next valid row
+    // Return error if error happens
+    // Don't call this when valid() is false, action is undefined
+    Status advance();
+
+    // Return if has remaining data in this context.
+    // Only when this function return true, current_row()
+    // will return a valid row
+    bool valid() const { return _valid; }
+
+    uint64_t data_id() const { return _iter->data_id(); }
+
+private:
+    // Load next block into _block
+    Status _load_next_block();
+
+private:
+    RowwiseIterator* _iter;
+
+    // used to store data load from iteerator->next_batch(Vectorized::Block*)
+    vectorized::Block _block;
+
+    bool _valid = false;
+    size_t _index_in_block = -1;
+};
+
+Status VMergeIteratorContext::init(const StorageReadOptions& opts) {
+    RETURN_IF_ERROR(_iter->init(opts));
+    RETURN_IF_ERROR(block_reset());
+    RETURN_IF_ERROR(_load_next_block());
+    if (valid()) {
+        RETURN_IF_ERROR(advance());
+    }
+    return Status::OK();
+}
+
+Status VMergeIteratorContext::advance() {
+    // NOTE: we increase _index_in_block directly to valid one check
+    do {
+        _index_in_block++;
+        if (_index_in_block < _block.rows()) {
+            return Status::OK();
+        }
+        // current batch has no data, load next batch
+        RETURN_IF_ERROR(_load_next_block());
+    } while (_valid);
+    return Status::OK();
+}
+
+Status VMergeIteratorContext::_load_next_block() {
+    do {
+        block_reset();
+        Status st = _iter->next_batch(&_block);
+        if (!st.ok()) {
+            _valid = false;
+            if (st.is_end_of_file()) {
+                return Status::OK();
+            } else {
+                return st;
+            }
+        }
+    } while (_block.rows() == 0);
+    _index_in_block = -1;
+    _valid = true;
+    return Status::OK();
+}
+
+class VMergeIterator : public RowwiseIterator {
+public:
+    // VMergeIterator takes the ownership of input iterators
+    VMergeIterator(std::vector<RowwiseIterator*>& iters, std::shared_ptr<MemTracker> parent) : _origin_iters(iters) {
+        // use for count the mem use of Block use in Merge
+        _mem_tracker = MemTracker::CreateTracker(-1, "VMergeIterator", parent, false);
+    }
+
+    ~VMergeIterator() override {
+        while (!_merge_heap.empty()) {
+            auto ctx = _merge_heap.top();
+            _merge_heap.pop();
+            delete ctx;
+        }
+    }
+
+    Status init(const StorageReadOptions& opts) override;
+
+    Status next_batch(vectorized::Block* block) override;
+
+    const Schema& schema() const override { return *_schema; }
+
+private:
+    // It will be released after '_merge_heap' has been built.
+    std::vector<RowwiseIterator*> _origin_iters;
+
+    std::unique_ptr<Schema> _schema;
+
+    struct VMergeContextComparator {
+        bool operator()(const VMergeIteratorContext* lhs, const VMergeIteratorContext* rhs) const {
+            return lhs->compare(*rhs);
+        }
+    };
+
+    using VMergeHeap = std::priority_queue<VMergeIteratorContext*, 
+                                        std::vector<VMergeIteratorContext*>,
+                                        VMergeContextComparator>;
+
+    VMergeHeap _merge_heap;
+
+    int block_row_max = 0;
+};
+
+Status VMergeIterator::init(const StorageReadOptions& opts) {
+    if (_origin_iters.empty()) {
+        return Status::OK();
+    }
+    _schema.reset(new Schema((*(_origin_iters.begin()))->schema()));
+
+    for (auto iter : _origin_iters) {
+        std::unique_ptr<VMergeIteratorContext> ctx(new VMergeIteratorContext(iter, _mem_tracker));
+        RETURN_IF_ERROR(ctx->init(opts));
+        if (!ctx->valid()) {
+            continue;
+        }
+        _merge_heap.push(ctx.release());
+    }
+
+    _origin_iters.clear();
+
+    block_row_max = opts.block_row_max;
+
+    return Status::OK();
+}
+
+Status VMergeIterator::next_batch(vectorized::Block* block) {
+    while (block->rows() < block_row_max) {
+        if (_merge_heap.empty())
+            break;
+
+        auto ctx = _merge_heap.top();
+        _merge_heap.pop();
+
+        // copy current row to block
+        ctx->copy_row_to(block);
+
+        RETURN_IF_ERROR(ctx->advance());
+        if (ctx->valid()) {
+            _merge_heap.push(ctx);
+        } else {
+            // Release ctx earlier to reduce resource consumed
+            delete ctx;
+        }
+    }
+
+    return Status::EndOfFile("no more data in segment");
+}
+
+// VUnionIterator will read data from input iterator one by one.
+class VUnionIterator : public RowwiseIterator {
+public:
+    // Iterators' ownership it transfered to this class.
+    // This class will delete all iterators when destructs
+    // Client should not use iterators any more.
+    VUnionIterator(std::vector<RowwiseIterator*>& v, std::shared_ptr<MemTracker> parent)
+            : _origin_iters(v.begin(), v.end()) {
+        _mem_tracker = MemTracker::CreateTracker(-1, "VUnionIterator", parent, false);
+    }
+
+    ~VUnionIterator() override {
+        std::for_each(_origin_iters.begin(), _origin_iters.end(), std::default_delete<RowwiseIterator>());
+    }
+
+    Status init(const StorageReadOptions& opts) override;
+
+    Status next_batch(vectorized::Block* block) override;
+
+    const Schema& schema() const override { return *_schema; }
+
+private:
+    std::unique_ptr<Schema> _schema;
+    RowwiseIterator* _cur_iter = nullptr;
+    std::deque<RowwiseIterator*> _origin_iters;
+};
+
+Status VUnionIterator::init(const StorageReadOptions& opts) {
+    if (_origin_iters.empty()) {
+        return Status::OK();
+    }
+
+    for (auto iter : _origin_iters) {
+        RETURN_IF_ERROR(iter->init(opts));
+    }
+    _schema.reset(new Schema((*(_origin_iters.begin()))->schema()));
+    _cur_iter = *(_origin_iters.begin());
+    return Status::OK();
+}
+
+Status VUnionIterator::next_batch(vectorized::Block* block) {
+    while (_cur_iter != nullptr) {
+        auto st = _cur_iter->next_batch(block);
+        if (st.is_end_of_file()) {
+            delete _cur_iter;
+            _origin_iters.pop_front();
+            if (!_origin_iters.empty()) {
+                _cur_iter = *(_origin_iters.begin());
+            } else {
+                _cur_iter = nullptr;
+            }
+        } else {
+            return st;
+        }
+    }
+    return Status::EndOfFile("End of VUnionIterator");
+}
+
+
+RowwiseIterator* new_merge_iterator(std::vector<RowwiseIterator*>& inputs, std::shared_ptr<MemTracker> parent) {
+    if (inputs.size() == 1) {
+        return *(inputs.begin());
+    }
+    return new VMergeIterator(inputs, parent);
+}
+
+RowwiseIterator* new_union_iterator(std::vector<RowwiseIterator*>& inputs, std::shared_ptr<MemTracker> parent) {
+    if (inputs.size() == 1) {
+        return *(inputs.begin());
+    }
+    return new VUnionIterator(inputs, parent);
+}
+
+RowwiseIterator* new_auto_increment_iterator(const Schema& schema, size_t num_rows) {
+    return new VAutoIncrementIterator(schema, num_rows);
+}
+
+}
+
+} // namespace doris
diff --git a/be/src/vec/olap/vgeneric_iterators.h b/be/src/vec/olap/vgeneric_iterators.h
new file mode 100644
index 0000000000..8177a63f8b
--- /dev/null
+++ b/be/src/vec/olap/vgeneric_iterators.h
@@ -0,0 +1,47 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/iterators.h"
+
+namespace doris {
+
+namespace vectorized {
+
+// Create a merge iterator for input iterators. Merge iterator will merge
+// ordered input iterator to one ordered iterator. So client should ensure
+// that every input iterator is ordered, otherwise result is undefined.
+//
+// Inputs iterators' ownership is taken by created merge iterator. And client
+// should delete returned iterator after usage.
+RowwiseIterator* new_merge_iterator(std::vector<RowwiseIterator*>& inputs, std::shared_ptr<MemTracker> parent);
+
+// Create a union iterator for input iterators. Union iterator will read
+// input iterators one by one.
+//
+// Inputs iterators' ownership is taken by created union iterator. And client
+// should delete returned iterator after usage.
+RowwiseIterator* new_union_iterator(std::vector<RowwiseIterator*>& inputs, std::shared_ptr<MemTracker> parent);
+
+// Create an auto increment iterator which returns num_rows data in format of schema.
+// This class aims to be used in unit test.
+//
+// Client should delete returned iterator.
+RowwiseIterator* new_auto_increment_iterator(const Schema& schema, size_t num_rows);
+
+}
+
+} // namespace doris
diff --git a/be/src/vec/runtime/vdata_stream_mgr.cpp b/be/src/vec/runtime/vdata_stream_mgr.cpp
new file mode 100644
index 0000000000..4b0bb5f75c
--- /dev/null
+++ b/be/src/vec/runtime/vdata_stream_mgr.cpp
@@ -0,0 +1,194 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/runtime/vdata_stream_mgr.h"
+
+#include "gen_cpp/internal_service.pb.h"
+#include "runtime/descriptors.h"
+#include "runtime/primitive_type.h"
+#include "runtime/raw_value.h"
+#include "runtime/runtime_state.h"
+#include "util/doris_metrics.h"
+#include "util/runtime_profile.h"
+#include "vec/runtime/vdata_stream_recvr.h"
+
+namespace doris {
+namespace vectorized {
+
+VDataStreamMgr::VDataStreamMgr() {
+    // TODO: metric
+}
+
+VDataStreamMgr::~VDataStreamMgr() {
+    // TODO: metric
+}
+
+inline uint32_t VDataStreamMgr::get_hash_value(const TUniqueId& fragment_instance_id,
+                                               PlanNodeId node_id) {
+    uint32_t value = RawValue::get_hash_value(&fragment_instance_id.lo, TYPE_BIGINT, 0);
+    value = RawValue::get_hash_value(&fragment_instance_id.hi, TYPE_BIGINT, value);
+    value = RawValue::get_hash_value(&node_id, TYPE_INT, value);
+    return value;
+}
+
+std::shared_ptr<VDataStreamRecvr> VDataStreamMgr::create_recvr(
+        RuntimeState* state, const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id,
+        PlanNodeId dest_node_id, int num_senders, int buffer_size, RuntimeProfile* profile,
+        bool is_merging, std::shared_ptr<QueryStatisticsRecvr> sub_plan_query_statistics_recvr) {
+    DCHECK(profile != NULL);
+    VLOG_FILE << "creating receiver for fragment=" << fragment_instance_id
+              << ", node=" << dest_node_id;
+    std::shared_ptr<VDataStreamRecvr> recvr(new VDataStreamRecvr(
+            this, state->instance_mem_tracker(), row_desc, fragment_instance_id, dest_node_id,
+            num_senders, is_merging, buffer_size, profile, sub_plan_query_statistics_recvr));
+    uint32_t hash_value = get_hash_value(fragment_instance_id, dest_node_id);
+    std::lock_guard<std::mutex> l(_lock);
+    _fragment_stream_set.insert(std::make_pair(fragment_instance_id, dest_node_id));
+    _receiver_map.insert(std::make_pair(hash_value, recvr));
+    return recvr;
+}
+
+std::shared_ptr<VDataStreamRecvr> VDataStreamMgr::find_recvr(const TUniqueId& fragment_instance_id,
+                                                             PlanNodeId node_id,
+                                                             bool acquire_lock) {
+    VLOG_ROW << "looking up fragment_instance_id=" << fragment_instance_id << ", node=" << node_id;
+    size_t hash_value = get_hash_value(fragment_instance_id, node_id);
+    if (acquire_lock) {
+        _lock.lock();
+    }
+    std::pair<StreamMap::iterator, StreamMap::iterator> range =
+            _receiver_map.equal_range(hash_value);
+    while (range.first != range.second) {
+        auto recvr = range.first->second;
+        if (recvr->fragment_instance_id() == fragment_instance_id &&
+            recvr->dest_node_id() == node_id) {
+            if (acquire_lock) {
+                _lock.unlock();
+            }
+            return recvr;
+        }
+        ++range.first;
+    }
+    if (acquire_lock) {
+        _lock.unlock();
+    }
+    return std::shared_ptr<VDataStreamRecvr>();
+}
+
+Status VDataStreamMgr::transmit_block(const PTransmitDataParams* request,
+                                      ::google::protobuf::Closure** done) {
+    const PUniqueId& finst_id = request->finst_id();
+    TUniqueId t_finst_id;
+    t_finst_id.hi = finst_id.hi();
+    t_finst_id.lo = finst_id.lo();
+    auto recvr = find_recvr(t_finst_id, request->node_id());
+    if (recvr == nullptr) {
+        // The receiver may remove itself from the receiver map via deregister_recvr()
+        // at any time without considering the remaining number of senders.
+        // As a consequence, find_recvr() may return an innocuous NULL if a thread
+        // calling deregister_recvr() beat the thread calling find_recvr()
+        // in acquiring _lock.
+        // TODO: Rethink the lifecycle of DataStreamRecvr to distinguish
+        // errors from receiver-initiated teardowns.
+        return Status::OK();
+    }
+
+    // request can only be used before calling recvr's add_batch or when request
+    // is the last for the sender, because request maybe released after it's batch
+    // is consumed by ExchangeNode.
+    if (request->has_query_statistics()) {
+        recvr->add_sub_plan_statistics(request->query_statistics(), request->sender_id());
+    }
+
+    bool eos = request->eos();
+    if (request->has_block()) {
+        recvr->add_block(request->block(), request->sender_id(), request->be_number(),
+                         request->packet_seq(), eos ? nullptr : done);
+    }
+
+    if (eos) {
+        recvr->remove_sender(request->sender_id(), request->be_number());
+    }
+    return Status::OK();
+}
+
+Status VDataStreamMgr::deregister_recvr(const TUniqueId& fragment_instance_id, PlanNodeId node_id) {
+    std::shared_ptr<VDataStreamRecvr> targert_recvr;
+    VLOG_QUERY << "deregister_recvr(): fragment_instance_id=" << fragment_instance_id
+               << ", node=" << node_id;
+    size_t hash_value = get_hash_value(fragment_instance_id, node_id);
+    {
+        std::lock_guard<std::mutex> l(_lock);
+        auto range = _receiver_map.equal_range(hash_value);
+        while (range.first != range.second) {
+            const std::shared_ptr<VDataStreamRecvr>& recvr = range.first->second;
+            if (recvr->fragment_instance_id() == fragment_instance_id &&
+                recvr->dest_node_id() == node_id) {
+                targert_recvr = recvr;
+                _fragment_stream_set.erase(
+                        std::make_pair(recvr->fragment_instance_id(), recvr->dest_node_id()));
+                _receiver_map.erase(range.first);
+                break;
+            }
+            ++range.first;
+        }
+    }
+
+    // Notify concurrent add_data() requests that the stream has been terminated.
+    // cancel_stream maybe take a long time, so we handle it out of lock.
+    if (targert_recvr) {
+        targert_recvr->cancel_stream();
+        return Status::OK();
+    } else {
+        std::stringstream err;
+        err << "unknown row receiver id: fragment_instance_id=" << fragment_instance_id
+            << " node_id=" << node_id;
+        LOG(ERROR) << err.str();
+        return Status::InternalError(err.str());
+    }
+}
+
+void VDataStreamMgr::cancel(const TUniqueId& fragment_instance_id) {
+    VLOG_QUERY << "cancelling all streams for fragment=" << fragment_instance_id;
+    std::vector<std::shared_ptr<VDataStreamRecvr>> recvrs;
+    {
+        std::lock_guard<std::mutex> l(_lock);
+        FragmentStreamSet::iterator i =
+                _fragment_stream_set.lower_bound(std::make_pair(fragment_instance_id, 0));
+        while (i != _fragment_stream_set.end() && i->first == fragment_instance_id) {
+            std::shared_ptr<VDataStreamRecvr> recvr = find_recvr(i->first, i->second, false);
+            if (recvr == NULL) {
+                // keep going but at least log it
+                std::stringstream err;
+                err << "cancel(): missing in stream_map: fragment=" << i->first
+                    << " node=" << i->second;
+                LOG(ERROR) << err.str();
+            } else {
+                recvrs.push_back(recvr);
+            }
+            ++i;
+        }
+    }
+
+    // cancel_stream maybe take a long time, so we handle it out of lock.
+    for (auto& it : recvrs) {
+        it->cancel_stream();
+    }
+}
+
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/runtime/vdata_stream_mgr.h b/be/src/vec/runtime/vdata_stream_mgr.h
new file mode 100644
index 0000000000..5cce8e479d
--- /dev/null
+++ b/be/src/vec/runtime/vdata_stream_mgr.h
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <memory>
+#include <mutex>
+#include <set>
+#include <unordered_map>
+
+#include "common/global_types.h"
+#include "common/status.h"
+#include "gen_cpp/Types_types.h"
+
+namespace google {
+namespace protobuf {
+class Closure;
+}
+} // namespace google
+
+namespace doris {
+class RuntimeState;
+class RowDescriptor;
+class TUniqueId;
+class RuntimeProfile;
+class QueryStatisticsRecvr;
+class PTransmitDataParams;
+
+namespace vectorized {
+class VDataStreamRecvr;
+
+class VDataStreamMgr {
+public:
+    VDataStreamMgr();
+    ~VDataStreamMgr();
+
+    std::shared_ptr<VDataStreamRecvr> create_recvr(
+            RuntimeState* state, const RowDescriptor& row_desc,
+            const TUniqueId& fragment_instance_id, PlanNodeId dest_node_id, int num_senders,
+            int buffer_size, RuntimeProfile* profile, bool is_merging,
+            std::shared_ptr<QueryStatisticsRecvr> sub_plan_query_statistics_recvr);
+
+    std::shared_ptr<VDataStreamRecvr> find_recvr(const TUniqueId& fragment_instance_id,
+                                                 PlanNodeId node_id, bool acquire_lock = true);
+
+    Status deregister_recvr(const TUniqueId& fragment_instance_id, PlanNodeId node_id);
+
+    Status transmit_block(const PTransmitDataParams* request, ::google::protobuf::Closure** done);
+
+    void cancel(const TUniqueId& fragment_instance_id);
+
+private:
+    std::mutex _lock;
+    using StreamMap = std::unordered_multimap<uint32_t, std::shared_ptr<VDataStreamRecvr>>;
+    StreamMap _receiver_map;
+
+    struct ComparisonOp {
+        bool operator()(const std::pair<doris::TUniqueId, PlanNodeId>& a,
+                        const std::pair<doris::TUniqueId, PlanNodeId>& b) const {
+            if (a.first.hi < b.first.hi) {
+                return true;
+            } else if (a.first.hi > b.first.hi) {
+                return false;
+            } else if (a.first.lo < b.first.lo) {
+                return true;
+            } else if (a.first.lo > b.first.lo) {
+                return false;
+            }
+            return a.second < b.second;
+        }
+    };
+    using FragmentStreamSet = std::set<std::pair<TUniqueId, PlanNodeId>, ComparisonOp>;
+    FragmentStreamSet _fragment_stream_set;
+
+    inline uint32_t get_hash_value(const TUniqueId& fragment_instance_id, PlanNodeId node_id);
+};
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp
new file mode 100644
index 0000000000..59e18d07a1
--- /dev/null
+++ b/be/src/vec/runtime/vdata_stream_recvr.cpp
@@ -0,0 +1,364 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/runtime/vdata_stream_recvr.h"
+
+#include "gen_cpp/data.pb.h"
+#include "runtime/mem_tracker.h"
+#include "util/uid_util.h"
+#include "vec/core/block.h"
+#include "vec/core/sort_cursor.h"
+#include "vec/runtime/vdata_stream_mgr.h"
+#include "vec/runtime/vsorted_run_merger.h"
+
+namespace doris::vectorized {
+
+VDataStreamRecvr::SenderQueue::SenderQueue(VDataStreamRecvr* parent_recvr, int num_senders,
+                                           RuntimeProfile* profile)
+        : _recvr(parent_recvr),
+          _is_cancelled(false),
+          _num_remaining_senders(num_senders),
+          _received_first_batch(false) {}
+
+VDataStreamRecvr::SenderQueue::~SenderQueue() = default;
+
+Status VDataStreamRecvr::SenderQueue::get_batch(Block** next_block) {
+    std::unique_lock<std::mutex> l(_lock);
+    // wait until something shows up or we know we're done
+    while (!_is_cancelled && _block_queue.empty() && _num_remaining_senders > 0) {
+        VLOG_ROW << "wait arrival fragment_instance_id=" << _recvr->fragment_instance_id()
+                 << " node=" << _recvr->dest_node_id();
+        // Don't count time spent waiting on the sender as active time.
+        CANCEL_SAFE_SCOPED_TIMER(_recvr->_data_arrival_timer, &_is_cancelled);
+        CANCEL_SAFE_SCOPED_TIMER(
+                _received_first_batch ? NULL : _recvr->_first_batch_wait_total_timer,
+                &_is_cancelled);
+        _data_arrival_cv.wait(l);
+    }
+
+    // _cur_batch must be replaced with the returned batch.
+    _current_block.reset();
+    *next_block = nullptr;
+    if (_is_cancelled) {
+        return Status::Cancelled("Cancelled");
+    }
+
+    if (_block_queue.empty()) {
+        DCHECK_EQ(_num_remaining_senders, 0);
+        return Status::OK();
+    }
+
+    _received_first_batch = true;
+
+    DCHECK(!_block_queue.empty());
+    Block* result = _block_queue.front().second;
+    _recvr->_num_buffered_bytes -= _block_queue.front().first;
+    VLOG_ROW << "fetched #rows=" << result->rows();
+    _block_queue.pop_front();
+
+    _current_block.reset(result);
+    *next_block = _current_block.get();
+
+    if (!_pending_closures.empty()) {
+        auto closure_pair = _pending_closures.front();
+        closure_pair.first->Run();
+        _pending_closures.pop_front();
+
+        closure_pair.second.stop();
+        _recvr->_buffer_full_total_timer->update(closure_pair.second.elapsed_time());
+    }
+
+    return Status::OK();
+}
+
+void VDataStreamRecvr::SenderQueue::add_block(const PBlock& pblock, int be_number,
+                                              int64_t packet_seq,
+                                              ::google::protobuf::Closure** done) {
+    std::unique_lock<std::mutex> l(_lock);
+    if (_is_cancelled) {
+        return;
+    }
+    auto iter = _packet_seq_map.find(be_number);
+    if (iter != _packet_seq_map.end()) {
+        if (iter->second >= packet_seq) {
+            LOG(WARNING) << fmt::format(
+                    "packet already exist [cur_packet_id= {} receive_packet_id={}]", iter->second,
+                    packet_seq);
+            return;
+        }
+        iter->second = packet_seq;
+    } else {
+        _packet_seq_map.emplace(be_number, packet_seq);
+    }
+    auto block_byte_size = pblock.ByteSizeLong();
+    COUNTER_UPDATE(_recvr->_bytes_received_counter, block_byte_size);
+
+    if (_num_remaining_senders <= 0) {
+        DCHECK(_sender_eos_set.end() != _sender_eos_set.find(be_number));
+        return;
+    }
+
+    if (_is_cancelled) {
+        return;
+    }
+
+    Block* block = nullptr;
+    {
+        SCOPED_TIMER(_recvr->_deserialize_row_batch_timer);
+        block = new Block(pblock);
+    }
+    _recvr->_mem_tracker->Consume(block->bytes());
+
+    VLOG_ROW << "added #rows=" << block->rows() << " batch_size=" << block_byte_size << "\n";
+    _block_queue.emplace_back(block_byte_size, block);
+    // if done is nullptr, this function can't delay this response
+    if (done != nullptr && _recvr->exceeds_limit(block_byte_size)) {
+        MonotonicStopWatch monotonicStopWatch;
+        monotonicStopWatch.start();
+        DCHECK(*done != nullptr);
+        _pending_closures.emplace_back(*done, monotonicStopWatch);
+        *done = nullptr;
+    }
+    _recvr->_num_buffered_bytes += block_byte_size;
+    _data_arrival_cv.notify_one();
+}
+
+void VDataStreamRecvr::SenderQueue::add_block(Block* block, bool use_move) {
+    std::unique_lock<std::mutex> l(_lock);
+    if (_is_cancelled) {
+        return;
+    }
+    Block* nblock = new Block(block->get_columns_with_type_and_name());
+    nblock->info = block->info;
+
+    // local exchange should copy the block contented if use move == false
+    if (use_move) {
+        block->clear();
+    } else {
+        auto rows = block->rows();
+        for (int i = 0; i < nblock->columns(); ++i) {
+            nblock->get_by_position(i).column =
+                    nblock->get_by_position(i).column->clone_resized(rows);
+        }
+    }
+
+    size_t block_size = nblock->bytes();
+    _block_queue.emplace_back(block_size, nblock);
+    _recvr->_mem_tracker->Consume(nblock->bytes());
+    _data_arrival_cv.notify_one();
+
+    if (_recvr->exceeds_limit(block_size)) {
+        std::thread::id tid = std::this_thread::get_id();
+        MonotonicStopWatch monotonicStopWatch;
+        monotonicStopWatch.start();
+        auto iter = _local_closure.find(tid);
+        if (iter == _local_closure.end()) {
+            _local_closure.emplace(tid, new ThreadClosure);
+            iter = _local_closure.find(tid);
+        }
+        _pending_closures.emplace_back(iter->second.get(), monotonicStopWatch);
+        iter->second->wait(l);
+    }
+
+    _recvr->_num_buffered_bytes += block_size;
+}
+
+void VDataStreamRecvr::SenderQueue::decrement_senders(int be_number) {
+    std::lock_guard<std::mutex> l(_lock);
+    if (_sender_eos_set.end() != _sender_eos_set.find(be_number)) {
+        return;
+    }
+    _sender_eos_set.insert(be_number);
+    DCHECK_GT(_num_remaining_senders, 0);
+    _num_remaining_senders--;
+    VLOG_FILE << "decremented senders: fragment_instance_id=" << _recvr->fragment_instance_id()
+              << " node_id=" << _recvr->dest_node_id() << " #senders=" << _num_remaining_senders;
+    if (_num_remaining_senders == 0) {
+        _data_arrival_cv.notify_one();
+    }
+}
+
+void VDataStreamRecvr::SenderQueue::cancel() {
+    {
+        std::lock_guard<std::mutex> l(_lock);
+        if (_is_cancelled) {
+            return;
+        }
+        _is_cancelled = true;
+        VLOG_QUERY << "cancelled stream: _fragment_instance_id=" << _recvr->fragment_instance_id()
+                   << " node_id=" << _recvr->dest_node_id();
+    }
+    // Wake up all threads waiting to produce/consume batches.  They will all
+    // notice that the stream is cancelled and handle it.
+    _data_arrival_cv.notify_all();
+    // _data_removal_cv.notify_all();
+    // PeriodicCounterUpdater::StopTimeSeriesCounter(
+    //         _recvr->_bytes_received_time_series_counter);
+
+    {
+        std::lock_guard<std::mutex> l(_lock);
+        for (auto closure_pair : _pending_closures) {
+            closure_pair.first->Run();
+        }
+        _pending_closures.clear();
+    }
+}
+
+void VDataStreamRecvr::SenderQueue::close() {
+    {
+        // If _is_cancelled is not set to true, there may be concurrent send
+        // which add batch to _block_queue. The batch added after _block_queue
+        // is clear will be memory leak
+        std::lock_guard<std::mutex> l(_lock);
+        _is_cancelled = true;
+
+        for (auto closure_pair : _pending_closures) {
+            closure_pair.first->Run();
+        }
+        _pending_closures.clear();
+    }
+
+    // Delete any batches queued in _block_queue
+    for (auto it = _block_queue.begin(); it != _block_queue.end(); ++it) {
+        delete it->second;
+    }
+
+    _current_block.reset();
+}
+
+VDataStreamRecvr::VDataStreamRecvr(
+        VDataStreamMgr* stream_mgr, const std::shared_ptr<MemTracker>& parent_tracker,
+        const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id,
+        PlanNodeId dest_node_id, int num_senders, bool is_merging, int total_buffer_limit,
+        RuntimeProfile* profile,
+        std::shared_ptr<QueryStatisticsRecvr> sub_plan_query_statistics_recvr)
+        : _mgr(stream_mgr),
+          _fragment_instance_id(fragment_instance_id),
+          _dest_node_id(dest_node_id),
+          _total_buffer_limit(total_buffer_limit),
+          _row_desc(row_desc),
+          _is_merging(is_merging),
+          _is_closed(false),
+          _num_buffered_bytes(0),
+          _profile(profile),
+          _sub_plan_query_statistics_recvr(sub_plan_query_statistics_recvr) {
+    _mem_tracker = MemTracker::CreateTracker(
+            _profile, -1, "VDataStreamRecvr:" + print_id(_fragment_instance_id), parent_tracker);
+
+    // Create one queue per sender if is_merging is true.
+    int num_queues = is_merging ? num_senders : 1;
+    _sender_queues.reserve(num_queues);
+    int num_sender_per_queue = is_merging ? 1 : num_senders;
+    for (int i = 0; i < num_queues; ++i) {
+        SenderQueue* queue =
+                _sender_queue_pool.add(new SenderQueue(this, num_sender_per_queue, profile));
+        _sender_queues.push_back(queue);
+    }
+
+    // Initialize the counters
+    _bytes_received_counter = ADD_COUNTER(_profile, "BytesReceived", TUnit::BYTES);
+
+    _deserialize_row_batch_timer = ADD_TIMER(_profile, "DeserializeRowBatchTimer");
+    _data_arrival_timer = ADD_TIMER(_profile, "DataArrivalWaitTime");
+    _buffer_full_total_timer = ADD_TIMER(_profile, "SendersBlockedTotalTimer(*)");
+    _first_batch_wait_total_timer = ADD_TIMER(_profile, "FirstBatchArrivalWaitTime");
+}
+
+VDataStreamRecvr::~VDataStreamRecvr() {
+    DCHECK(_mgr == nullptr) << "Must call close()";
+}
+
+Status VDataStreamRecvr::create_merger(const std::vector<VExprContext*>& ordering_expr,
+                                       const std::vector<bool>& is_asc_order,
+                                       const std::vector<bool>& nulls_first, size_t batch_size,
+                                       int64_t limit, size_t offset) {
+    DCHECK(_is_merging);
+    std::vector<BlockSupplier> child_block_suppliers;
+    // Create the merger that will a single stream of sorted rows.
+    _merger.reset(new VSortedRunMerger(ordering_expr, is_asc_order, nulls_first, batch_size, limit,
+                                       offset, _profile));
+
+    for (int i = 0; i < _sender_queues.size(); ++i) {
+        child_block_suppliers.emplace_back(std::bind(std::mem_fn(&SenderQueue::get_batch),
+                                                     _sender_queues[i], std::placeholders::_1));
+    }
+    RETURN_IF_ERROR(_merger->prepare(child_block_suppliers));
+    return Status::OK();
+}
+
+void VDataStreamRecvr::add_block(const PBlock& pblock, int sender_id, int be_number,
+                                 int64_t packet_seq, ::google::protobuf::Closure** done) {
+    int use_sender_id = _is_merging ? sender_id : 0;
+    _sender_queues[use_sender_id]->add_block(pblock, be_number, packet_seq, done);
+}
+
+void VDataStreamRecvr::add_block(Block* block, int sender_id, bool use_move) {
+    int use_sender_id = _is_merging ? sender_id : 0;
+    _sender_queues[use_sender_id]->add_block(block, use_move);
+}
+
+Status VDataStreamRecvr::get_next(Block* block, bool* eos) {
+    if (!_is_merging) {
+        Block* res = nullptr;
+        RETURN_IF_ERROR(_sender_queues[0]->get_batch(&res));
+        if (res != nullptr) {
+            block->swap(*res);
+        } else {
+            *eos = true;
+            return Status::OK();
+        }
+    } else {
+        RETURN_IF_ERROR(_merger->get_next(block, eos));
+    }
+
+    if (LIKELY(_mem_tracker->consumption() >= block->bytes())) {
+        _mem_tracker->Release(block->bytes());
+    } else {
+        _mem_tracker->Release(_mem_tracker->consumption());
+    }
+    return Status::OK();
+}
+
+void VDataStreamRecvr::remove_sender(int sender_id, int be_number) {
+    int use_sender_id = _is_merging ? sender_id : 0;
+    _sender_queues[use_sender_id]->decrement_senders(be_number);
+}
+
+void VDataStreamRecvr::cancel_stream() {
+    for (int i = 0; i < _sender_queues.size(); ++i) {
+        _sender_queues[i]->cancel();
+    }
+}
+
+void VDataStreamRecvr::close() {
+    if (_is_closed) {
+        return;
+    }
+    _is_closed = true;
+    for (int i = 0; i < _sender_queues.size(); ++i) {
+        _sender_queues[i]->close();
+    }
+    // Remove this receiver from the DataStreamMgr that created it.
+    // TODO: log error msg
+    _mgr->deregister_recvr(fragment_instance_id(), dest_node_id());
+    _mgr = nullptr;
+
+    _merger.reset();
+    _mem_tracker->Release(_mem_tracker->consumption());
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h
new file mode 100644
index 0000000000..1292b978b5
--- /dev/null
+++ b/be/src/vec/runtime/vdata_stream_recvr.h
@@ -0,0 +1,188 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <atomic>
+#include <condition_variable>
+#include <deque>
+#include <list>
+#include <thread>
+
+#include "common/global_types.h"
+#include "common/object_pool.h"
+#include "common/status.h"
+#include "gen_cpp/Types_types.h"
+#include "runtime/descriptors.h"
+#include "runtime/query_statistics.h"
+#include "util/runtime_profile.h"
+
+namespace google {
+namespace protobuf {
+class Closure;
+}
+} // namespace google
+
+namespace doris {
+class MemTracker;
+class RuntimeProfile;
+class PBlock;
+
+namespace vectorized {
+class Block;
+class VDataStreamMgr;
+class VSortedRunMerger;
+class VExprContext;
+
+class VDataStreamRecvr {
+public:
+    VDataStreamRecvr(VDataStreamMgr* stream_mgr, const std::shared_ptr<MemTracker>& parent_tracker,
+                     const RowDescriptor& row_desc, const TUniqueId& fragment_instance_id,
+                     PlanNodeId dest_node_id, int num_senders, bool is_merging,
+                     int total_buffer_limit, RuntimeProfile* profile,
+                     std::shared_ptr<QueryStatisticsRecvr> sub_plan_query_statistics_recvr);
+
+    ~VDataStreamRecvr();
+
+    Status create_merger(const std::vector<VExprContext*>& ordering_expr,
+                         const std::vector<bool>& is_asc_order,
+                         const std::vector<bool>& nulls_first, size_t batch_size, int64_t limit,
+                         size_t offset);
+
+    void add_block(const PBlock& pblock, int sender_id, int be_number, int64_t packet_seq,
+                   ::google::protobuf::Closure** done);
+
+    void add_block(Block* block, int sender_id, bool use_move);
+
+    Status get_next(Block* block, bool* eos);
+
+    const TUniqueId& fragment_instance_id() const { return _fragment_instance_id; }
+    PlanNodeId dest_node_id() const { return _dest_node_id; }
+    const RowDescriptor& row_desc() const { return _row_desc; }
+    std::shared_ptr<MemTracker> mem_tracker() const { return _mem_tracker; }
+
+    void add_sub_plan_statistics(const PQueryStatistics& statistics, int sender_id) {
+        _sub_plan_query_statistics_recvr->insert(statistics, sender_id);
+    }
+
+    // Indicate that a particular sender is done. Delegated to the appropriate
+    // sender queue. Called from DataStreamMgr.
+    void remove_sender(int sender_id, int be_number);
+
+    void cancel_stream();
+
+    void close();
+
+private:
+    class SenderQueue;
+    friend class ReceiveQueueSortCursorImpl;
+
+    bool exceeds_limit(int batch_size) {
+        return _num_buffered_bytes + batch_size > _total_buffer_limit;
+    }
+
+    // DataStreamMgr instance used to create this recvr. (Not owned)
+    VDataStreamMgr* _mgr;
+
+    // Fragment and node id of the destination exchange node this receiver is used by.
+    TUniqueId _fragment_instance_id;
+    PlanNodeId _dest_node_id;
+
+    // soft upper limit on the total amount of buffering allowed for this stream across
+    // all sender queues. we stop acking incoming data once the amount of buffered data
+    // exceeds this value
+    int _total_buffer_limit;
+
+    // Row schema, copied from the caller of CreateRecvr().
+    RowDescriptor _row_desc;
+
+    // True if this reciver merges incoming rows from different senders. Per-sender
+    // row batch queues are maintained in this case.
+    bool _is_merging;
+    bool _is_closed;
+
+    std::atomic<int> _num_buffered_bytes;
+    std::shared_ptr<MemTracker> _mem_tracker;
+    std::vector<SenderQueue*> _sender_queues;
+
+    std::unique_ptr<VSortedRunMerger> _merger;
+
+    ObjectPool _sender_queue_pool;
+    RuntimeProfile* _profile;
+
+    RuntimeProfile::Counter* _bytes_received_counter;
+    RuntimeProfile::Counter* _deserialize_row_batch_timer;
+    RuntimeProfile::Counter* _first_batch_wait_total_timer;
+    RuntimeProfile::Counter* _buffer_full_total_timer;
+    RuntimeProfile::Counter* _data_arrival_timer;
+
+    std::shared_ptr<QueryStatisticsRecvr> _sub_plan_query_statistics_recvr;
+};
+
+class ThreadClosure : public google::protobuf::Closure {
+public:
+    void Run() { _cv.notify_one(); }
+    void wait(std::unique_lock<std::mutex>& lock) { _cv.wait(lock); }
+
+private:
+    std::condition_variable _cv;
+}; 
+
+class VDataStreamRecvr::SenderQueue {
+public:
+    SenderQueue(VDataStreamRecvr* parent_recvr, int num_senders, RuntimeProfile* profile);
+
+    ~SenderQueue();
+
+    Status get_batch(Block** next_block);
+
+    void add_block(const PBlock& pblock, int be_number, int64_t packet_seq,
+                   ::google::protobuf::Closure** done);
+
+    void add_block(Block* block, bool use_move);
+
+    void decrement_senders(int sender_id);
+
+    void cancel();
+
+    void close();
+
+    Block* current_block() const { return _current_block.get(); }
+
+private:
+    VDataStreamRecvr* _recvr;
+    std::mutex _lock;
+    bool _is_cancelled;
+    int _num_remaining_senders;
+    std::condition_variable _data_arrival_cv;
+    std::condition_variable _data_removal_cv;
+
+    using VecBlockQueue = std::list<std::pair<int, Block*>>;
+    VecBlockQueue _block_queue;
+
+    std::unique_ptr<Block> _current_block;
+
+    bool _received_first_batch;
+    // sender_id
+    std::unordered_set<int> _sender_eos_set;
+    // be_number => packet_seq
+    std::unordered_map<int, int64_t> _packet_seq_map;
+    std::deque<std::pair<google::protobuf::Closure*, MonotonicStopWatch>> _pending_closures;
+    std::unordered_map<std::thread::id, std::unique_ptr<ThreadClosure>> _local_closure;
+};
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp
new file mode 100644
index 0000000000..4c59277764
--- /dev/null
+++ b/be/src/vec/runtime/vdatetime_value.cpp
@@ -0,0 +1,1626 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/runtime/vdatetime_value.h"
+
+#include <ctype.h>
+#include <string.h>
+#include <time.h>
+
+#include <limits>
+#include <sstream>
+
+#include "common/logging.h"
+#include "util/timezone_utils.h"
+#include "runtime/datetime_value.h"
+
+namespace doris::vectorized {
+
+static int s_days_in_month[13] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+
+static const char* s_ab_month_name[] = {"",    "Jan", "Feb", "Mar", "Apr", "May", "Jun",
+                                        "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", NULL};
+
+static const char* s_ab_day_name[] = {"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun", NULL};
+
+uint8_t mysql_week_mode(uint32_t mode) {
+    mode &= 7;
+    if (!(mode & WEEK_MONDAY_FIRST)) {
+        mode ^= WEEK_FIRST_WEEKDAY;
+    }
+    return mode;
+}
+
+static bool is_leap(uint32_t year) {
+    return ((year % 4) == 0) && ((year % 100 != 0) || ((year % 400) == 0 && year));
+}
+
+static uint32_t calc_days_in_year(uint32_t year) {
+    return is_leap(year) ? 366 : 365;
+}
+
+RE2 VecDateTimeValue::time_zone_offset_format_reg("^[+-]{1}\\d{2}\\:\\d{2}$");
+
+bool VecDateTimeValue::check_range(uint32_t year, uint32_t month, uint32_t day, uint32_t hour,
+        uint32_t minute, uint32_t second, uint16_t type) {
+    bool time = hour > (type == TIME_TIME ? TIME_MAX_HOUR : 23) || minute > 59 || second > 59;
+    return time || check_date(year, month, day);
+}
+
+bool VecDateTimeValue::check_date(uint32_t year, uint32_t month, uint32_t day) {
+    if (month != 0 && month <= 12 && day > s_days_in_month[month]) {
+        // Feb 29 in leap year is valid.
+        if (!(month == 2 && day == 29 && is_leap(year))) return true;
+    }
+    return year > 9999 || month > 12 || day > 31;
+}
+
+// The interval format is that with no delimiters
+// YYYY-MM-DD HH-MM-DD.FFFFFF AM in default format
+// 0    1  2  3  4  5  6      7
+bool VecDateTimeValue::from_date_str(const char* date_str, int len) {
+    const char* ptr = date_str;
+    const char* end = date_str + len;
+    // ONLY 2, 6 can follow by a sapce
+    const static int allow_space_mask = 4 | 64;
+    const static int MAX_DATE_PARTS = 8;
+    uint32_t date_val[MAX_DATE_PARTS];
+    int32_t date_len[MAX_DATE_PARTS];
+
+    _neg = false;
+    // Skip space character
+    while (ptr < end && isspace(*ptr)) {
+        ptr++;
+    }
+    if (ptr == end || !isdigit(*ptr)) {
+        return false;
+    }
+    // Fix year length
+    const char* pos = ptr;
+    while (pos < end && (isdigit(*pos) || *pos == 'T')) {
+        pos++;
+    }
+    int year_len = 4;
+    int digits = pos - ptr;
+    bool is_interval_format = false;
+
+    // Compatible with MySQL. Shit!!!
+    // For YYYYMMDD/YYYYMMDDHHMMSS is 4 digits years
+    if (pos == end || *pos == '.') {
+        if (digits == 4 || digits == 8 || digits >= 14) {
+            year_len = 4;
+        } else {
+            year_len = 2;
+        }
+        is_interval_format = true;
+    }
+
+    int field_idx = 0;
+    int field_len = year_len;
+    while (ptr < end && isdigit(*ptr) && field_idx < MAX_DATE_PARTS - 1) {
+        const char* start = ptr;
+        int temp_val = 0;
+        bool scan_to_delim = (!is_interval_format) && (field_idx != 6);
+        while (ptr < end && isdigit(*ptr) && (scan_to_delim || field_len--)) {
+            temp_val = temp_val * 10 + (*ptr++ - '0');
+        }
+        // Imposible
+        if (temp_val > 999999L) {
+            return false;
+        }
+        date_val[field_idx] = temp_val;
+        date_len[field_idx] = ptr - start;
+        field_len = 2;
+
+        if (ptr == end) {
+            field_idx++;
+            break;
+        }
+        if (field_idx == 2 && *ptr == 'T') {
+            // YYYYMMDDTHHMMDD, skip 'T' and continue
+            ptr++;
+            field_idx++;
+            continue;
+        }
+
+        // Second part
+        if (field_idx == 5) {
+            if (*ptr == '.') {
+                ptr++;
+                field_len = 6;
+            } else if (isdigit(*ptr)) {
+                field_idx++;
+                break;
+            }
+            field_idx++;
+            continue;
+        }
+        // escape separator
+        while (ptr < end && (ispunct(*ptr) || isspace(*ptr))) {
+            if (isspace(*ptr)) {
+                if (((1 << field_idx) & allow_space_mask) == 0) {
+                    return false;
+                }
+            }
+            ptr++;
+        }
+        field_idx++;
+    }
+    int num_field = field_idx;
+    if (num_field <= 3) {
+        _type = TIME_DATE;
+    } else {
+        _type = TIME_DATETIME;
+    }
+    if (!is_interval_format) {
+        year_len = date_len[0];
+    }
+    for (; field_idx < MAX_DATE_PARTS; ++field_idx) {
+        date_len[field_idx] = 0;
+        date_val[field_idx] = 0;
+    }
+
+    if (year_len == 2) {
+        if (date_val[0] < YY_PART_YEAR) {
+            date_val[0] += 2000;
+        } else {
+            date_val[0] += 1900;
+        }
+    }
+
+    if (num_field < 3) return false;
+    return check_range_and_set_time(date_val[0], date_val[1], date_val[2],
+            date_val[3], date_val[4], date_val[5], _type);
+}
+
+// [0, 101) invalid
+// [101, (YY_PART_YEAR - 1) * 10000 + 1231] for two digits year 2000 ~ 2069
+// [(YY_PART_YEAR - 1) * 10000 + 1231, YY_PART_YEAR * 10000L + 101) invalid
+// [YY_PART_YEAR * 10000L + 101, 991231] for two digits year 1970 ~1999
+// (991231, 10000101) invalid, because support 1000-01-01
+// [10000101, 99991231] for four digits year date value.
+// (99991231, 101000000) invalid, NOTE below this is datetime vaule hh:mm:ss must exist.
+// [101000000, (YY_PART_YEAR - 1)##1231235959] two digits year datetime value
+// ((YY_PART_YEAR - 1)##1231235959, YY_PART_YEAR##0101000000) invalid
+// ((YY_PART_YEAR)##1231235959, 99991231235959] two digits year datetime value 1970 ~ 1999
+// (999991231235959, ~) valid
+int64_t VecDateTimeValue::standardize_timevalue(int64_t value) {
+    _type = TIME_DATE;
+    if (value <= 0) {
+        return 0;
+    }
+    if (value >= 10000101000000L) {
+        // 9999-99-99 99:99:99
+        if (value > 99999999999999L) {
+            return 0;
+        }
+
+        // between 1000-01-01 00:00:00L and 9999-99-99 99:99:99
+        // all digits exist.
+        _type = TIME_DATETIME;
+        return value;
+    }
+    // 2000-01-01
+    if (value < 101) {
+        return 0;
+    }
+    // two digits  year. 2000 ~ 2069
+    if (value <= (YY_PART_YEAR - 1) * 10000L + 1231L) {
+        return (value + 20000000L) * 1000000L;
+    }
+    // two digits year, invalid date
+    if (value < YY_PART_YEAR * 10000L + 101) {
+        return 0;
+    }
+    // two digits year. 1970 ~ 1999
+    if (value <= 991231L) {
+        return (value + 19000000L) * 1000000L;
+    }
+    // TODO(zhaochun): Don't allow year betwen 1000-01-01
+    if (value < 10000101) {
+        return 0;
+    }
+    // four digits years without hour.
+    if (value <= 99991231L) {
+        return value * 1000000L;
+    }
+    // below 0000-01-01
+    if (value < 101000000) {
+        return 0;
+    }
+
+    // below is with datetime, must have hh:mm:ss
+    _type = TIME_DATETIME;
+    // 2000 ~ 2069
+    if (value <= (YY_PART_YEAR - 1) * 10000000000L + 1231235959L) {
+        return value + 20000000000000L;
+    }
+    if (value < YY_PART_YEAR * 10000000000L + 101000000L) {
+        return 0;
+    }
+    // 1970 ~ 1999
+    if (value <= 991231235959L) {
+        return value + 19000000000000L;
+    }
+    return value;
+}
+
+bool VecDateTimeValue::from_date_int64(int64_t value) {
+    _neg = false;
+    value = standardize_timevalue(value);
+    if (value <= 0) {
+        return false;
+    }
+    uint64_t date = value / 1000000;
+    uint64_t time = value % 1000000;
+
+    auto [year, month, day, hour, minute, second] = std::tuple{0,0,0,0,0,0};
+    year = date / 10000;
+    date %= 10000;
+    month = date / 100;
+    day = date % 100;
+    hour = time / 10000;
+    time %= 10000;
+    minute = time / 100;
+    second = time % 100;
+
+    return check_range_and_set_time(year, month, day, hour, minute, second, _type);
+}
+
+void VecDateTimeValue::set_zero(int type) {
+    memset(this, 0, sizeof(*this));
+    _type = type;
+}
+
+void VecDateTimeValue::set_type(int type) {
+    _type = type;
+}
+
+void VecDateTimeValue::set_max_time(bool neg) {
+    set_zero(TIME_TIME);
+    _hour = static_cast<uint8_t>(TIME_MAX_HOUR);
+    _minute = TIME_MAX_MINUTE;
+    _second = TIME_MAX_SECOND;
+    _neg = neg;
+}
+
+bool VecDateTimeValue::from_time_int64(int64_t value) {
+    _type = TIME_TIME;
+    if (value > TIME_MAX_VALUE) {
+        // 0001-01-01 00:00:00 to convert to a datetime
+        if (value > 10000000000L) {
+            if (from_date_int64(value)) {
+                return true;
+            }
+        }
+        set_max_time(false);
+        return false;
+    } else if (value < -1 * TIME_MAX_VALUE) {
+        set_max_time(true);
+        return false;
+    }
+    if (value < 0) {
+        _neg = 1;
+        value = -value;
+    }
+    _hour = value / 10000;
+    value %= 10000;
+    _minute = value / 100;
+    if (_minute > TIME_MAX_MINUTE) {
+        return false;
+    }
+    _second = value % 100;
+    if (_second > TIME_MAX_SECOND) {
+        return false;
+    }
+    return true;
+}
+
+char* VecDateTimeValue::append_date_buffer(char* to) const {
+    uint32_t temp;
+    // Year
+    temp = _year / 100;
+    *to++ = (char)('0' + (temp / 10));
+    *to++ = (char)('0' + (temp % 10));
+    temp = _year % 100;
+    *to++ = (char)('0' + (temp / 10));
+    *to++ = (char)('0' + (temp % 10));
+    *to++ = '-';
+    // Month
+    *to++ = (char)('0' + (_month / 10));
+    *to++ = (char)('0' + (_month % 10));
+    *to++ = '-';
+    // Day
+    *to++ = (char)('0' + (_day / 10));
+    *to++ = (char)('0' + (_day % 10));
+    return to;
+}
+
+char* VecDateTimeValue::append_time_buffer(char* to) const {
+    if (_neg) {
+        *to++ = '-';
+    }
+    // Hour
+    uint32_t temp = _hour;
+    if (temp >= 100) {
+        *to++ = (char)('0' + (temp / 100));
+        temp %= 100;
+    }
+    *to++ = (char)('0' + (temp / 10));
+    *to++ = (char)('0' + (temp % 10));
+    *to++ = ':';
+    // Minute
+    *to++ = (char)('0' + (_minute / 10));
+    *to++ = (char)('0' + (_minute % 10));
+    *to++ = ':';
+    /* Second */
+    *to++ = (char)('0' + (_second / 10));
+    *to++ = (char)('0' + (_second % 10));
+    return to;
+}
+
+char* VecDateTimeValue::to_datetime_buffer(char* to) const {
+    to = append_date_buffer(to);
+    *to++ = ' ';
+    return append_time_buffer(to);
+}
+
+char* VecDateTimeValue::to_date_buffer(char* to) const {
+    return append_date_buffer(to);
+}
+
+char* VecDateTimeValue::to_time_buffer(char* to) const {
+    return append_time_buffer(to);
+}
+
+int32_t VecDateTimeValue::to_buffer(char* buffer) const {
+    switch (_type) {
+        case TIME_TIME:
+            return to_time_buffer(buffer) - buffer;
+        case TIME_DATE:
+            return to_date_buffer(buffer) - buffer;
+        case TIME_DATETIME:
+            return to_datetime_buffer(buffer) - buffer;
+        default:
+            break;
+    }
+    return 0;
+}
+
+char* VecDateTimeValue::to_string(char* to) const {
+    int len = to_buffer(to);
+    *(to + len) = '\0';
+    return to + len + 1;
+}
+
+int64_t VecDateTimeValue::to_datetime_int64() const {
+    return (_year * 10000L + _month * 100 + _day) * 1000000L + _hour * 10000 + _minute * 100 + _second;
+}
+
+int64_t VecDateTimeValue::to_date_int64() const {
+    return _year * 10000 + _month * 100 + _day;
+}
+
+int64_t VecDateTimeValue::to_time_int64() const {
+    int sign = _neg == 0 ? 1 : -1;
+    return sign * (_hour * 10000 + _minute * 100 + _second);
+}
+
+int64_t VecDateTimeValue::to_int64() const {
+    switch (_type) {
+    case TIME_TIME:
+        return to_time_int64();
+    case TIME_DATE:
+        return to_date_int64();
+    case TIME_DATETIME:
+        return to_datetime_int64();
+    default:
+        return 0;
+    }
+}
+
+bool VecDateTimeValue::get_date_from_daynr(uint64_t daynr) {
+    if (daynr <= 0 || daynr > DATE_MAX_DAYNR) {
+        return false;
+    }
+
+    auto [year, month, day] = std::tuple{0, 0, 0};
+    year = daynr / 365;
+    uint32_t days_befor_year = 0;
+    while (daynr < (days_befor_year = calc_daynr(year, 1, 1))) {
+        year--;
+    }
+    uint32_t days_of_year = daynr - days_befor_year + 1;
+    int leap_day = 0;
+    if (is_leap(year)) {
+        if (days_of_year > 31 + 28) {
+            days_of_year--;
+            if (days_of_year == 31 + 28) {
+                leap_day = 1;
+            }
+        }
+    }
+    month = 1;
+    while (days_of_year > s_days_in_month[month]) {
+        days_of_year -= s_days_in_month[month];
+        month++;
+    }
+    day = days_of_year + leap_day;
+
+    if (check_range(year, month, day, 0, 0, 0, _type)) {
+        return false;
+    }
+    set_time(year, month, day, _hour, _minute, _second);
+    return true;
+}
+
+bool VecDateTimeValue::from_date_daynr(uint64_t daynr) {
+    _neg = false;
+    if (!get_date_from_daynr(daynr)) {
+        return false;
+    }
+    _hour = 0;
+    _minute = 0;
+    _second = 0;
+    _type = TIME_DATE;
+    return true;
+}
+
+// Following code is stolen from MySQL.
+uint64_t VecDateTimeValue::calc_daynr(uint32_t year, uint32_t month, uint32_t day) {
+    uint64_t delsum = 0;
+    int y = year;
+
+    if (year == 0 && month == 0) {
+        return 0;
+    }
+
+    /* Cast to int to be able to handle month == 0 */
+    delsum = 365 * y + 31 * (month - 1) + day;
+    if (month <= 2) {
+        // No leap year
+        y--;
+    } else {
+        // This is great!!!
+        // 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
+        // 0, 0, 3, 3, 4, 4, 5, 5, 5,  6,  7,  8
+        delsum -= (month * 4 + 23) / 10;
+    }
+    // Every 400 year has 97 leap year, 100, 200, 300 are not leap year.
+    return delsum + y / 4 - y / 100 + y / 400;
+}
+
+static char* int_to_str(uint64_t val, char* to) {
+    char buf[64];
+    char* ptr = buf;
+    // Use do/while for 0 value
+    do {
+        *ptr++ = '0' + (val % 10);
+        val /= 10;
+    } while (val);
+
+    while (ptr > buf) {
+        *to++ = *--ptr;
+    }
+
+    return to;
+}
+
+static char* append_string(const char* from, char* to) {
+    while (*from) {
+        *to++ = *from++;
+    }
+    return to;
+}
+
+static char* append_with_prefix(const char* str, int str_len, char prefix, int full_len, char* to) {
+    int len = (str_len > full_len) ? str_len : full_len;
+    len -= str_len;
+    while (len-- > 0) {
+        // push prefix;
+        *to++ = prefix;
+    }
+    while (str_len-- > 0) {
+        *to++ = *str++;
+    }
+
+    return to;
+}
+
+int VecDateTimeValue::compute_format_len(const char* format, int len) {
+    int size = 0;
+    const char* ptr = format;
+    const char* end = format + len;
+
+    while (ptr < end) {
+        if (*ptr != '%' || (ptr + 1) < end) {
+            size++;
+            ptr++;
+            continue;
+        }
+        switch (*++ptr) {
+        case 'M':
+        case 'W':
+            size += 10;
+            break;
+        case 'D':
+        case 'Y':
+        case 'x':
+        case 'X':
+            size += 4;
+            break;
+        case 'a':
+        case 'b':
+            size += 10;
+            break;
+        case 'j':
+            size += 3;
+            break;
+        case 'u':
+        case 'U':
+        case 'v':
+        case 'V':
+        case 'y':
+        case 'm':
+        case 'd':
+        case 'h':
+        case 'i':
+        case 'I':
+        case 'l':
+        case 'p':
+        case 'S':
+        case 's':
+        case 'c':
+        case 'e':
+            size += 2;
+            break;
+        case 'k':
+        case 'H':
+            size += 7;
+            break;
+        case 'r':
+            size += 11;
+            break;
+        case 'T':
+            size += 8;
+            break;
+        case 'f':
+            size += 6;
+            break;
+        case 'w':
+        case '%':
+        default:
+            size++;
+            break;
+        }
+    }
+    return size;
+}
+
+bool VecDateTimeValue::to_format_string(const char* format, int len, char* to) const {
+    char buf[64];
+    char* pos = NULL;
+    const char* ptr = format;
+    const char* end = format + len;
+    char ch = '\0';
+
+    while (ptr < end) {
+        if (*ptr != '%' || (ptr + 1) == end) {
+            *to++ = *ptr++;
+            continue;
+        }
+        // Skip '%'
+        ptr++;
+        switch (ch = *ptr++) {
+        case 'a':
+            // Abbreviated weekday name
+            if (_type == TIME_TIME || (_year == 0 && _month == 0)) {
+                return false;
+            }
+            to = append_string(s_ab_day_name[weekday()], to);
+            break;
+        case 'b':
+            // Abbreviated month name
+            if (_month == 0) {
+                return false;
+            }
+            to = append_string(s_ab_month_name[_month], to);
+            break;
+        case 'c':
+            // Month, numeric (0...12)
+            pos = int_to_str(_month, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 1, to);
+            break;
+        case 'd':
+            // Day of month (00...31)
+            pos = int_to_str(_day, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 2, to);
+            break;
+        case 'D':
+            // Day of the month with English suffix (0th, 1st, ...)
+            pos = int_to_str(_day, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 1, to);
+            if (_day >= 10 && _day <= 19) {
+                to = append_string("th", to);
+            } else {
+                switch (_day % 10) {
+                case 1:
+                    to = append_string("st", to);
+                    break;
+                case 2:
+                    to = append_string("nd", to);
+                    break;
+                case 3:
+                    to = append_string("rd", to);
+                    break;
+                default:
+                    to = append_string("th", to);
+                    break;
+                }
+            }
+            break;
+        case 'e':
+            // Day of the month, numeric (0..31)
+            pos = int_to_str(_day, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 1, to);
+            break;
+        case 'f':
+            // Microseconds (000000..999999)
+            pos = int_to_str(0, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 6, to);
+            break;
+        case 'h':
+        case 'I':
+            // Hour (01..12)
+            pos = int_to_str((_hour % 24 + 11) % 12 + 1, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 2, to);
+            break;
+        case 'H':
+            // Hour (00..23)
+            pos = int_to_str(_hour, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 2, to);
+            break;
+        case 'i':
+            // Minutes, numeric (00..59)
+            pos = int_to_str(_minute, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 2, to);
+            break;
+        case 'j':
+            // Day of year (001..366)
+            pos = int_to_str(daynr() - calc_daynr(_year, 1, 1) + 1, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 3, to);
+            break;
+        case 'k':
+            // Hour (0..23)
+            pos = int_to_str(_hour, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 1, to);
+            break;
+        case 'l':
+            // Hour (1..12)
+            pos = int_to_str((_hour % 24 + 11) % 12 + 1, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 1, to);
+            break;
+        case 'm':
+            // Month, numeric (00..12)
+            pos = int_to_str(_month, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 2, to);
+            break;
+        case 'M':
+            // Month name (January..December)
+            if (_month == 0) {
+                return false;
+            }
+            to = append_string(s_month_name[_month], to);
+            break;
+        case 'p':
+            // AM or PM
+            if ((_hour % 24) >= 12) {
+                to = append_string("PM", to);
+            } else {
+                to = append_string("AM", to);
+            }
+            break;
+        case 'r':
+            // Time, 12-hour (hh:mm:ss followed by AM or PM)
+            *to++ = (char)('0' + (((_hour + 11) % 12 + 1) / 10));
+            *to++ = (char)('0' + (((_hour + 11) % 12 + 1) % 10));
+            *to++ = ':';
+            // Minute
+            *to++ = (char)('0' + (_minute / 10));
+            *to++ = (char)('0' + (_minute % 10));
+            *to++ = ':';
+            /* Second */
+            *to++ = (char)('0' + (_second / 10));
+            *to++ = (char)('0' + (_second % 10));
+            if ((_hour % 24) >= 12) {
+                to = append_string(" PM", to);
+            } else {
+                to = append_string(" AM", to);
+            }
+            break;
+        case 's':
+        case 'S':
+            // Seconds (00..59)
+            pos = int_to_str(_second, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 2, to);
+            break;
+        case 'T':
+            // Time, 24-hour (hh:mm:ss)
+            *to++ = (char)('0' + ((_hour % 24) / 10));
+            *to++ = (char)('0' + ((_hour % 24) % 10));
+            *to++ = ':';
+            // Minute
+            *to++ = (char)('0' + (_minute / 10));
+            *to++ = (char)('0' + (_minute % 10));
+            *to++ = ':';
+            /* Second */
+            *to++ = (char)('0' + (_second / 10));
+            *to++ = (char)('0' + (_second % 10));
+            break;
+        case 'u':
+            // Week (00..53), where Monday is the first day of the week;
+            // WEEK() mode 1
+            if (_type == TIME_TIME) {
+                return false;
+            }
+            pos = int_to_str(week(mysql_week_mode(1)), buf);
+            to = append_with_prefix(buf, pos - buf, '0', 2, to);
+            break;
+        case 'U':
+            // Week (00..53), where Sunday is the first day of the week;
+            // WEEK() mode 0
+            if (_type == TIME_TIME) {
+                return false;
+            }
+            pos = int_to_str(week(mysql_week_mode(0)), buf);
+            to = append_with_prefix(buf, pos - buf, '0', 2, to);
+            break;
+        case 'v':
+            // Week (01..53), where Monday is the first day of the week;
+            // WEEK() mode 3; used with %x
+            if (_type == TIME_TIME) {
+                return false;
+            }
+            pos = int_to_str(week(mysql_week_mode(3)), buf);
+            to = append_with_prefix(buf, pos - buf, '0', 2, to);
+            break;
+        case 'V':
+            // Week (01..53), where Sunday is the first day of the week;
+            // WEEK() mode 2; used with %X
+            if (_type == TIME_TIME) {
+                return false;
+            }
+            pos = int_to_str(week(mysql_week_mode(2)), buf);
+            to = append_with_prefix(buf, pos - buf, '0', 2, to);
+            break;
+        case 'w':
+            // Day of the week (0=Sunday..6=Saturday)
+            if (_type == TIME_TIME || (_month == 0 && _year == 0)) {
+                return false;
+            }
+            pos = int_to_str(calc_weekday(daynr(), true), buf);
+            to = append_with_prefix(buf, pos - buf, '0', 1, to);
+            break;
+        case 'W':
+            // Weekday name (Sunday..Saturday)
+            to = append_string(s_day_name[weekday()], to);
+            break;
+        case 'x': {
+            // Year for the week, where Monday is the first day of the week,
+            // numeric, four digits; used with %v
+            if (_type == TIME_TIME) {
+                return false;
+            }
+            uint32_t year = 0;
+            calc_week(*this, mysql_week_mode(3), &year);
+            pos = int_to_str(year, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 4, to);
+            break;
+        }
+        case 'X': {
+            // Year for the week where Sunday is the first day of the week,
+            // numeric, four digits; used with %V
+            if (_type == TIME_TIME) {
+                return false;
+            }
+            uint32_t year = 0;
+            calc_week(*this, mysql_week_mode(2), &year);
+            pos = int_to_str(year, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 4, to);
+            break;
+        }
+        case 'y':
+            // Year, numeric (two digits)
+            pos = int_to_str(_year % 100, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 2, to);
+            break;
+        case 'Y':
+            // Year, numeric, four digits
+            pos = int_to_str(_year, buf);
+            to = append_with_prefix(buf, pos - buf, '0', 4, to);
+            break;
+        default:
+            *to++ = ch;
+            break;
+        }
+    }
+    *to++ = '\0';
+    return true;
+}
+
+uint8_t VecDateTimeValue::calc_week(const VecDateTimeValue& value, uint8_t mode, uint32_t* year) {
+    bool monday_first = mode & WEEK_MONDAY_FIRST;
+    bool week_year = mode & WEEK_YEAR;
+    bool first_weekday = mode & WEEK_FIRST_WEEKDAY;
+    uint64_t day_nr = value.daynr();
+    uint64_t daynr_first_day = calc_daynr(value._year, 1, 1);
+    uint8_t weekday_first_day = calc_weekday(daynr_first_day, !monday_first);
+
+    int days = 0;
+    *year = value._year;
+
+    // Check wether the first days of this year belongs to last year
+    if (value._month == 1 && value._day <= (7 - weekday_first_day)) {
+        if (!week_year && ((first_weekday && weekday_first_day != 0) ||
+                           (!first_weekday && weekday_first_day > 3))) {
+            return 0;
+        }
+        (*year)--;
+        week_year = true;
+        daynr_first_day -= (days = calc_days_in_year(*year));
+        weekday_first_day = (weekday_first_day + 53 * 7 - days) % 7;
+    }
+
+    // How many days since first week
+    if ((first_weekday && weekday_first_day != 0) || (!first_weekday && weekday_first_day > 3)) {
+        // days in new year belongs to last year.
+        days = day_nr - (daynr_first_day + (7 - weekday_first_day));
+    } else {
+        // days in new year belongs to this year.
+        days = day_nr - (daynr_first_day - weekday_first_day);
+    }
+
+    if (week_year && days >= 52 * 7) {
+        weekday_first_day = (weekday_first_day + calc_days_in_year(*year)) % 7;
+        if ((first_weekday && weekday_first_day == 0) ||
+            (!first_weekday && weekday_first_day <= 3)) {
+            // Belong to next year.
+            (*year)++;
+            return 1;
+        }
+    }
+
+    return days / 7 + 1;
+}
+
+uint8_t VecDateTimeValue::week(uint8_t mode) const {
+    uint32_t year = 0;
+    return calc_week(*this, mode, &year);
+}
+
+uint32_t VecDateTimeValue::year_week(uint8_t mode) const {
+    uint32_t year = 0;
+    // The range of the week in the year_week is 1-53, so the mode WEEK_YEAR is always true.
+    uint8_t week = calc_week(*this, mode | 2, &year);
+    // When the mode WEEK_FIRST_WEEKDAY is not set,
+    // the week in which the last three days of the year fall may belong to the following year.
+    if (week == 53 && day() >= 29 && !(mode & 4)) {
+        uint8_t monday_first = mode & WEEK_MONDAY_FIRST;
+        uint64_t daynr_of_last_day = calc_daynr(_year, 12, 31);
+        uint8_t weekday_of_last_day = calc_weekday(daynr_of_last_day, !monday_first);
+
+        if (weekday_of_last_day - monday_first < 2) {
+            ++year;
+            week = 1;
+        }
+    }
+    return year * 100 + week;
+}
+
+uint8_t VecDateTimeValue::calc_weekday(uint64_t day_nr, bool is_sunday_first_day) {
+    return (day_nr + 5L + (is_sunday_first_day ? 1L : 0L)) % 7;
+}
+
+// TODO(zhaochun): Think endptr is NULL
+// Return true if convert to a integer success. Otherwise false.
+static bool str_to_int64(const char* ptr, const char** endptr, int64_t* ret) {
+    const static uint64_t MAX_NEGATIVE_NUMBER = 0x8000000000000000;
+    const static uint64_t ULONGLONG_MAX = ~0;
+    const static uint64_t LFACTOR2 = 100000000000ULL;
+    const char* end = *endptr;
+    uint64_t cutoff_1 = 0;
+    uint64_t cutoff_2 = 0;
+    uint64_t cutoff_3 = 0;
+    // Skip space
+    while (ptr < end && (*ptr == ' ' || *ptr == '\t')) {
+        ptr++;
+    }
+    if (ptr >= end) {
+        return false;
+    }
+    // Sign
+    bool neg = false;
+    if (*ptr == '-') {
+        neg = true;
+        ptr++;
+        cutoff_1 = MAX_NEGATIVE_NUMBER / LFACTOR2;
+        cutoff_2 = (MAX_NEGATIVE_NUMBER % LFACTOR2) / 100;
+        cutoff_3 = (MAX_NEGATIVE_NUMBER % LFACTOR2) % 100;
+    } else {
+        if (*ptr == '+') {
+            ptr++;
+        }
+        cutoff_1 = ULONGLONG_MAX / LFACTOR2;
+        cutoff_2 = (ULONGLONG_MAX % LFACTOR2) / 100;
+        cutoff_3 = (ULONGLONG_MAX % LFACTOR2) % 100;
+    }
+    if (ptr >= end) {
+        return false;
+    }
+    // Skip '0'
+    while (ptr < end && *ptr == '0') {
+        ptr++;
+    }
+    const char* n_end = ptr + 9;
+    if (n_end > end) {
+        n_end = end;
+    }
+    uint64_t value_1 = 0;
+    while (ptr < n_end && isdigit(*ptr)) {
+        value_1 *= 10;
+        value_1 += *ptr++ - '0';
+    }
+    if (ptr == end || !isdigit(*ptr)) {
+        *endptr = ptr;
+        *ret = neg ? -value_1 : value_1;
+        return true;
+    }
+    // TODO
+    uint64_t value_2 = 0;
+    uint64_t value_3 = 0;
+
+    // Check overflow.
+    if (value_1 > cutoff_1 ||
+        (value_1 == cutoff_1 &&
+         (value_2 > cutoff_2 || (value_2 == cutoff_2 && value_3 > cutoff_3)))) {
+        return false;
+    }
+    return true;
+}
+
+static int min(int a, int b) {
+    return a < b ? a : b;
+}
+
+static int find_in_lib(const char* lib[], const char* str, const char* end) {
+    int pos = 0;
+    int find_count = 0;
+    int find_pos = 0;
+    for (; lib[pos] != NULL; ++pos) {
+        const char* i = str;
+        const char* j = lib[pos];
+        while (i < end && *j) {
+            if (toupper(*i) != toupper(*j)) {
+                break;
+            }
+            ++i;
+            ++j;
+        }
+        if (i == end) {
+            if (*j == '\0') {
+                return pos;
+            } else {
+                find_count++;
+                find_pos = pos;
+            }
+        }
+    }
+    return find_count == 1 ? find_pos : -1;
+}
+
+static int check_word(const char* lib[], const char* str, const char* end, const char** endptr) {
+    const char* ptr = str;
+    while (ptr < end && isalpha(*ptr)) {
+        ptr++;
+    }
+    int pos = find_in_lib(lib, str, ptr);
+    if (pos >= 0) {
+        *endptr = ptr;
+    }
+    return pos;
+}
+
+// this method is exactly same as fromDateFormatStr() in DateLiteral.java in FE
+// change this method should also change that.
+bool VecDateTimeValue::from_date_format_str(const char* format, int format_len, const char* value,
+                                         int value_len, const char** sub_val_end) {
+    const char* ptr = format;
+    const char* end = format + format_len;
+    const char* val = value;
+    const char* val_end = value + value_len;
+
+    bool date_part_used = false;
+    bool time_part_used = false;
+    bool frac_part_used = false;
+    bool already_set_time_part = false;
+
+    int day_part = 0;
+    int weekday = -1;
+    int yearday = -1;
+    int week_num = -1;
+
+    bool strict_week_number = false;
+    bool sunday_first = false;
+    bool strict_week_number_year_type = false;
+    int strict_week_number_year = -1;
+    bool usa_time = false;
+
+    auto [year, month, day, hour, minute, second] = std::tuple{0,0,0,0,0,0};
+    while (ptr < end && val < val_end) {
+        // Skip space character
+        while (val < val_end && isspace(*val)) {
+            val++;
+        }
+        if (val >= val_end) {
+            break;
+        }
+        // Check switch
+        if (*ptr == '%' && ptr + 1 < end) {
+            const char* tmp = NULL;
+            int64_t int_value = 0;
+            ptr++;
+            switch (*ptr++) {
+                // Year
+            case 'y':
+                // Year, numeric (two digits)
+                tmp = val + min(2, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                int_value += int_value >= 70 ? 1900 : 2000;
+                year = int_value;
+                val = tmp;
+                date_part_used = true;
+                break;
+            case 'Y':
+                // Year, numeric, four digits
+                tmp = val + min(4, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                if (tmp - val <= 2) {
+                    int_value += int_value >= 70 ? 1900 : 2000;
+                }
+                year = int_value;
+                val = tmp;
+                date_part_used = true;
+                break;
+                // Month
+            case 'm':
+            case 'c':
+                tmp = val + min(2, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                month = int_value;
+                val = tmp;
+                date_part_used = true;
+                break;
+            case 'M':
+                int_value = check_word(const_cast<const char**>(s_month_name), val, val_end, &val);
+                if (int_value < 0) {
+                    return false;
+                }
+                month = int_value;
+                break;
+            case 'b':
+                int_value = check_word(s_ab_month_name, val, val_end, &val);
+                if (int_value < 0) {
+                    return false;
+                }
+                month = int_value;
+                break;
+                // Day
+            case 'd':
+            case 'e':
+                tmp = val + min(2, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                day = int_value;
+                val = tmp;
+                date_part_used = true;
+                break;
+            case 'D':
+                tmp = val + min(2, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                day = int_value;
+                val = tmp + min(2, val_end - tmp);
+                date_part_used = true;
+                break;
+                // Hour
+            case 'h':
+            case 'I':
+            case 'l':
+                usa_time = true;
+                // Fall through
+            case 'k':
+            case 'H':
+                tmp = val + min(2, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                hour = int_value;
+                val = tmp;
+                time_part_used = true;
+                break;
+                // Minute
+            case 'i':
+                tmp = val + min(2, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                minute = int_value;
+                val = tmp;
+                time_part_used = true;
+                break;
+                // Second
+            case 's':
+            case 'S':
+                tmp = val + min(2, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                second = int_value;
+                val = tmp;
+                time_part_used = true;
+                break;
+                // Micro second
+            case 'f':
+                break;
+                // AM/PM
+            case 'p':
+                if ((val_end - val) < 2 || toupper(*(val + 1)) != 'M' || !usa_time) {
+                    return false;
+                }
+                if (toupper(*val) == 'P') {
+                    // PM
+                    day_part = 12;
+                }
+                time_part_used = true;
+                val += 2;
+                break;
+                // Weekday
+            case 'W':
+                int_value = check_word(const_cast<const char**>(s_day_name), val, val_end, &val);
+                if (int_value < 0) {
+                    return false;
+                }
+                int_value++;
+                weekday = int_value;
+                date_part_used = true;
+                break;
+            case 'a':
+                int_value = check_word(s_ab_day_name, val, val_end, &val);
+                if (int_value < 0) {
+                    return false;
+                }
+                int_value++;
+                weekday = int_value;
+                date_part_used = true;
+                break;
+            case 'w':
+                tmp = val + min(1, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                if (int_value >= 7) {
+                    return false;
+                }
+                if (int_value == 0) {
+                    int_value = 7;
+                }
+                weekday = int_value;
+                val = tmp;
+                date_part_used = true;
+                break;
+            case 'j':
+                tmp = val + min(3, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                yearday = int_value;
+                val = tmp;
+                date_part_used = true;
+                break;
+            case 'u':
+            case 'v':
+            case 'U':
+            case 'V':
+                sunday_first = (*(ptr - 1) == 'U' || *(ptr - 1) == 'V');
+                // Used to check if there is %x or %X
+                strict_week_number = (*(ptr - 1) == 'V' || *(ptr - 1) == 'v');
+                tmp = val + min(2, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                week_num = int_value;
+                if (week_num > 53 || (strict_week_number && week_num == 0)) {
+                    return false;
+                }
+                val = tmp;
+                date_part_used = true;
+                break;
+                // strict week number, must be used with %V or %v
+            case 'x':
+            case 'X':
+                strict_week_number_year_type = (*(ptr - 1) == 'X');
+                tmp = val + min(4, val_end - val);
+                if (!str_to_int64(val, &tmp, &int_value)) {
+                    return false;
+                }
+                strict_week_number_year = int_value;
+                val = tmp;
+                date_part_used = true;
+                break;
+            case 'r':
+                if (!from_date_format_str("%I:%i:%S %p", 11, val, val_end - val, &tmp)) {
+                    return false;
+                }
+                val = tmp;
+                time_part_used = true;
+                    already_set_time_part = true;
+                break;
+            case 'T':
+                if (!from_date_format_str("%H:%i:%S", 8, val, val_end - val, &tmp)) {
+                    return false;
+                }
+                time_part_used = true;
+                    already_set_time_part = true;
+                val = tmp;
+                break;
+            case '.':
+                while (val < val_end && ispunct(*val)) {
+                    val++;
+                }
+                break;
+            case '@':
+                while (val < val_end && isalpha(*val)) {
+                    val++;
+                }
+                break;
+            case '#':
+                while (val < val_end && isdigit(*val)) {
+                    val++;
+                }
+                break;
+            case '%': // %%, escape the %
+                if ('%' != *val) {
+                    return false;
+                }
+                val++;
+                break;
+            default:
+                return false;
+            }
+        } else if (!isspace(*ptr)) {
+            if (*ptr != *val) {
+                return false;
+            }
+            ptr++;
+            val++;
+        } else {
+            ptr++;
+        }
+    }
+
+    // continue to iterate pattern if has
+    // to find out if it has time part.
+    while (ptr < end) {
+        if (*ptr == '%' && ptr + 1 < end) {
+            ptr++;
+            switch (*ptr++) {
+            case 'H':
+            case 'h':
+            case 'I':
+            case 'i':
+            case 'k':
+            case 'l':
+            case 'r':
+            case 's':
+            case 'S':
+            case 'p':
+            case 'T':
+                time_part_used = true;
+                break;
+            default:
+                break;
+            }
+        } else {
+            ptr++;
+        }
+    }
+
+    if (usa_time) {
+        if (hour > 12 || hour < 1) {
+            return false;
+        }
+        hour = (hour % 12) + day_part;
+    }
+    if (sub_val_end) {
+        *sub_val_end = val;
+    }
+
+    // Compute timestamp type
+    if (frac_part_used) {
+        if (date_part_used) {
+            _type = TIME_DATETIME;
+        } else {
+            _type = TIME_TIME;
+        }
+    } else {
+        if (date_part_used) {
+            if (time_part_used) {
+                _type = TIME_DATETIME;
+            } else {
+                _type = TIME_DATE;
+            }
+        } else {
+            _type = TIME_TIME;
+        }
+    }
+
+    _neg = false;
+
+    // Year day
+    if (yearday > 0) {
+        uint64_t days = calc_daynr(year, 1, 1) + yearday - 1;
+        if (!get_date_from_daynr(days)) {
+            return false;
+        }
+    }
+    // weekday
+    if (week_num >= 0 && weekday > 0) {
+        // Check
+        if ((strict_week_number &&
+             (strict_week_number_year < 0 || strict_week_number_year_type != sunday_first)) ||
+            (!strict_week_number && strict_week_number_year >= 0)) {
+            return false;
+        }
+        uint64_t days = calc_daynr(strict_week_number ? strict_week_number_year : year, 1, 1);
+
+        uint8_t weekday_b = calc_weekday(days, sunday_first);
+
+        if (sunday_first) {
+            days += ((weekday_b == 0) ? 0 : 7) - weekday_b + (week_num - 1) * 7 + weekday % 7;
+        } else {
+            days += ((weekday_b <= 3) ? 0 : 7) - weekday_b + (week_num - 1) * 7 + weekday - 1;
+        }
+        if (!get_date_from_daynr(days)) {
+            return false;
+        }
+    }
+    // 1. already_set_date_part means _year, _month, _day be set, so we only set time part
+    // 2. already_set_time_part means _hour, _minute, _second, _microsecond be set,
+    //    so we only neet to set date part
+    // 3. if both are true, means all part of date_time be set, no need check_range_and_set_time
+    bool already_set_date_part = yearday > 0 || (week_num >= 0 && weekday > 0);
+    if (already_set_date_part && already_set_time_part) return true;
+    if (already_set_date_part) return check_range_and_set_time(_year, _month, _day, hour, minute, second, _type);
+    if (already_set_time_part) return check_range_and_set_time(year, month, day,
+                                                               _hour, _minute, _second, _type);
+
+    return check_range_and_set_time(year, month, day, hour, minute, second, _type);
+}
+
+bool VecDateTimeValue::date_add_interval(const TimeInterval& interval, TimeUnit unit) {
+    if (!is_valid_date()) return false;
+
+    int sign = interval.is_neg ? -1 : 1;
+    switch (unit) {
+    case SECOND:
+    case MINUTE:
+    case HOUR:
+    case SECOND_MICROSECOND:
+    case MINUTE_MICROSECOND:
+    case MINUTE_SECOND:
+    case HOUR_MICROSECOND:
+    case HOUR_SECOND:
+    case HOUR_MINUTE:
+    case DAY_MICROSECOND:
+    case DAY_SECOND:
+    case DAY_MINUTE:
+    case DAY_HOUR: {
+        // This may change the day information
+
+        int64_t seconds = (_day - 1) * 86400L + _hour * 3600L + _minute * 60 + _second +
+                          sign * (interval.day * 86400 + interval.hour * 3600 +
+                                  interval.minute * 60 + interval.second);
+        int64_t days = seconds / 86400;
+        seconds %= 86400L;
+        if (seconds < 0) {
+            seconds += 86400L;
+            days--;
+        }
+        _second = seconds % 60;
+        _minute = (seconds / 60) % 60;
+        _hour = seconds / 3600;
+        int64_t day_nr = calc_daynr(_year, _month, 1) + days;
+        if (!get_date_from_daynr(day_nr)) {
+            return false;
+        }
+        _type = TIME_DATETIME;
+        break;
+    }
+    case DAY:
+    case WEEK: {
+        // This only change day information, not change second information
+        int64_t day_nr = daynr() + interval.day * sign;
+        if (!get_date_from_daynr(day_nr)) {
+            return false;
+        }
+        break;
+    }
+    case YEAR: {
+        // This only change year information
+        _year += sign * interval.year;
+        if (_year > 9999) {
+            return false;
+        }
+        if (_month == 2 && _day == 29 && !is_leap(_year)) {
+            _day = 28;
+        }
+        break;
+    }
+    case MONTH:
+    case QUARTER:
+    case YEAR_MONTH: {
+        // This will change month and year information, maybe date.
+        int64_t months = _year * 12 + _month - 1 + sign * (12 * interval.year + interval.month);
+        _year = months / 12;
+        if (_year > 9999) {
+            return false;
+        }
+        _month = (months % 12) + 1;
+        if (_day > s_days_in_month[_month]) {
+            _day = s_days_in_month[_month];
+            if (_month == 2 && is_leap(_year)) {
+                _day++;
+            }
+        }
+        break;
+    }
+    }
+    return true;
+}
+
+bool VecDateTimeValue::unix_timestamp(int64_t* timestamp, const std::string& timezone) const {
+    cctz::time_zone ctz;
+    if (!TimezoneUtils::find_cctz_time_zone(timezone, ctz)) {
+        return false;
+    }
+    return unix_timestamp(timestamp, ctz);
+}
+
+bool VecDateTimeValue::unix_timestamp(int64_t* timestamp, const cctz::time_zone& ctz) const {
+    const auto tp =
+            cctz::convert(cctz::civil_second(_year, _month, _day, _hour, _minute, _second), ctz);
+    *timestamp = tp.time_since_epoch().count();
+    return true;
+}
+
+bool VecDateTimeValue::from_unixtime(int64_t timestamp, const std::string& timezone) {
+    cctz::time_zone ctz;
+    if (!TimezoneUtils::find_cctz_time_zone(timezone, ctz)) {
+        return false;
+    }
+    return from_unixtime(timestamp, ctz);
+}
+
+bool VecDateTimeValue::from_unixtime(int64_t timestamp, const cctz::time_zone& ctz) {
+    static const cctz::time_point<cctz::sys_seconds> epoch =
+            std::chrono::time_point_cast<cctz::sys_seconds>(
+                    std::chrono::system_clock::from_time_t(0));
+    cctz::time_point<cctz::sys_seconds> t = epoch + cctz::seconds(timestamp);
+
+    const auto tp = cctz::convert(t, ctz);
+
+    _neg = 0;
+    _type = TIME_DATETIME;
+    _year = tp.year();
+    _month = tp.month();
+    _day = tp.day();
+    _hour = tp.hour();
+    _minute = tp.minute();
+    _second = tp.second();
+
+    return true;
+}
+
+const char* VecDateTimeValue::month_name() const {
+    if (_month < 1 || _month > 12) {
+        return NULL;
+    }
+    return s_month_name[_month];
+}
+
+const char* VecDateTimeValue::day_name() const {
+    int day = weekday();
+    if (day < 0 || day >= 7) {
+        return NULL;
+    }
+    return s_day_name[day];
+}
+
+VecDateTimeValue VecDateTimeValue::local_time() {
+    VecDateTimeValue value;
+    value.from_unixtime(time(NULL), TimezoneUtils::default_time_zone);
+    return value;
+}
+
+void VecDateTimeValue::set_time(uint32_t year, uint32_t month, uint32_t day, uint32_t hour,
+        uint32_t minute, uint32_t second) {
+    _year = year;
+    _month = month;
+    _day = day;
+    _hour = hour;
+    _minute = minute;
+    _second = second;
+}
+
+void VecDateTimeValue::convert_vec_dt_to_dt(doris::DateTimeValue* dt) {  //use convert VecDateTimeValue to DateTimeValue  
+    dt->_neg = this->_neg;
+    dt->_type = this->_type;
+    dt->_hour = this->_hour;
+    dt->_minute = this->_minute;
+    dt->_second = this->_second;
+    dt->_year = this->_year;
+    dt->_month = this->_month;
+    dt->_day = this->_day;
+    dt->_microsecond = 0;
+}
+
+void VecDateTimeValue::convert_dt_to_vec_dt(doris::DateTimeValue* dt) {  //use convert DateTimeValue to VecDateTimeValue
+    this->_neg = dt->_neg;
+    this->_type = dt->_type;
+    this->_hour = dt->_hour;
+    this->_minute = dt->_minute;
+    this->_second = dt->_second;
+    this->_year = dt->_year;
+    this->_month = dt->_month;
+    this->_day = dt->_day;
+}
+
+std::ostream& operator<<(std::ostream& os, const VecDateTimeValue& value) {
+    char buf[64];
+    value.to_string(buf);
+    return os << buf;
+}
+
+// NOTE:
+//  only support DATE - DATE (no support DATETIME - DATETIME)
+std::size_t operator-(const VecDateTimeValue& v1, const VecDateTimeValue& v2) {
+    return v1.daynr() - v2.daynr();
+}
+
+std::size_t hash_value(VecDateTimeValue const& value) {
+    return HashUtil::hash(&value, sizeof(VecDateTimeValue), 0);
+}
+
+} // namespace doris
diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h
new file mode 100644
index 0000000000..4ec8d6eb63
--- /dev/null
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -0,0 +1,664 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef DORIS_BE_RUNTIME_VDATETIME_VALUE_H
+#define DORIS_BE_RUNTIME_VDATETIME_VALUE_H
+
+#include <re2/re2.h>
+#include <stdint.h>
+
+#include <chrono>
+#include <cstddef>
+#include <iostream>
+
+#include "cctz/civil_time.h"
+#include "cctz/time_zone.h"
+#include "udf/udf.h"
+#include "util/hash_util.hpp"
+#include "util/timezone_utils.h"
+
+namespace doris {
+class DateTimeValue;
+
+namespace vectorized {
+
+enum TimeUnit {
+    SECOND,
+    MINUTE,
+    HOUR,
+    DAY,
+    WEEK,
+    MONTH,
+    QUARTER,
+    YEAR,
+    SECOND_MICROSECOND,
+    MINUTE_MICROSECOND,
+    MINUTE_SECOND,
+    HOUR_MICROSECOND,
+    HOUR_SECOND,
+    HOUR_MINUTE,
+    DAY_MICROSECOND,
+    DAY_SECOND,
+    DAY_MINUTE,
+    DAY_HOUR,
+    YEAR_MONTH
+};
+
+struct TimeInterval {
+    int64_t year;
+    int64_t month;
+    int64_t day;
+    int64_t hour;
+    int64_t minute;
+    int64_t second;
+    bool is_neg;
+
+    TimeInterval()
+            : year(0),
+              month(0),
+              day(0),
+              hour(0),
+              minute(0),
+              second(0),
+              is_neg(false) {}
+
+    TimeInterval(TimeUnit unit, int64_t count, bool is_neg_param)
+            : year(0),
+              month(0),
+              day(0),
+              hour(0),
+              minute(0),
+              second(0),
+              is_neg(is_neg_param) {
+        switch (unit) {
+        case YEAR:
+            year = count;
+            break;
+        case MONTH:
+            month = count;
+            break;
+        case WEEK:
+            day = 7 * count;
+            break;
+        case DAY:
+            day = count;
+            break;
+        case HOUR:
+            hour = count;
+            break;
+        case MINUTE:
+            minute = count;
+            break;
+        case SECOND:
+            second = count;
+            break;
+        default:
+            break;
+        }
+    }
+};
+
+enum TimeType { TIME_TIME = 1, TIME_DATE = 2, TIME_DATETIME = 3 };
+
+// Used to compute week
+const int WEEK_MONDAY_FIRST = 1;
+const int WEEK_YEAR = 2;
+const int WEEK_FIRST_WEEKDAY = 4;
+
+// 9999-99-99 99:99:99; 19 + 1('\0')
+const int MAX_DTVALUE_STR_LEN = 20;
+
+const int DATE_MAX_DAYNR = 3652424;
+// two-digit years < this are 20..; >= this are 19..
+const int YY_PART_YEAR = 70;
+
+// Limits of time value
+const int TIME_MAX_HOUR = 256;
+const int TIME_MAX_MINUTE = 59;
+const int TIME_MAX_SECOND = 59;
+const int TIME_MAX_VALUE = 10000 * TIME_MAX_HOUR + 100 * TIME_MAX_MINUTE + TIME_MAX_SECOND;
+const int TIME_MAX_VALUE_SECONDS = 3600 * TIME_MAX_HOUR + 60 * TIME_MAX_MINUTE + TIME_MAX_SECOND;
+
+constexpr size_t const_length(const char* str) {
+    return (str == nullptr || *str == 0) ? 0 : const_length(str + 1) + 1;
+}
+
+constexpr size_t max_char_length(const char* const* name, size_t end) {
+    size_t res = 0;
+    for (int i = 0; i < end; ++i) {
+        res = std::max(const_length(name[i]), res);
+    }
+    return res;
+}
+
+static constexpr const char* s_month_name[] = {
+        "",     "January", "February",  "March",   "April",    "May",      "June",
+        "July", "August",  "September", "October", "November", "December", NULL};
+
+static constexpr const char* s_day_name[] = {"Monday", "Tuesday",  "Wednesday", "Thursday",
+                                             "Friday", "Saturday", "Sunday",    NULL};
+
+static constexpr size_t MAX_DAY_NAME_LEN = max_char_length(s_day_name, std::size(s_day_name));
+static constexpr size_t MAX_MONTH_NAME_LEN = max_char_length(s_month_name, std::size(s_month_name));
+
+uint8_t mysql_week_mode(uint32_t mode);
+
+class VecDateTimeValue {  // Now this type is a temp solution with little changes, maybe large refactoring follow-up.
+public:                   
+    // Constructor
+    VecDateTimeValue()
+            : _neg(0),
+              _type(TIME_DATETIME),
+              _second(0),
+              _minute(0),
+              _hour(0),
+              _day(0),      // _microsecond(0): remove it to reduce memory, and Reorder the variables 
+              _month(0),    // so this is a difference between Vectorization mode and Rowbatch mode with DateTimeValue;
+              _year(0) {}   // before int128  16 bytes  --->  after int64 8 bytes
+
+    explicit VecDateTimeValue(int64_t t) { from_date_int64(t); }
+
+    void set_time(uint32_t year, uint32_t month, uint32_t day, uint32_t hour,
+        uint32_t minute, uint32_t second);
+
+    // Converted from Olap Date or Datetime
+    bool from_olap_datetime(uint64_t datetime) {
+        _neg = 0;
+        _type = TIME_DATETIME;
+        uint64_t date = datetime / 1000000;
+        uint64_t time = datetime % 1000000;
+
+        auto [year, month, day, hour, minute, second] = std::tuple{0,0,0,0,0,0};
+        year = date / 10000;
+        date %= 10000;
+        month = date / 100;
+        day = date % 100;
+        hour = time / 10000;
+        time %= 10000;
+        minute = time / 100;
+        second = time % 100;
+
+        return check_range_and_set_time(year, month, day, hour, minute, second, _type);
+    }
+
+    uint64_t to_olap_datetime() const {
+        uint64_t date_val = _year * 10000 + _month * 100 + _day;
+        uint64_t time_val = _hour * 10000 + _minute * 100 + _second;
+        return date_val * 1000000 + time_val;
+    }
+
+    bool from_olap_date(uint64_t date) {
+        _neg = 0;
+        _type = TIME_DATE;
+
+        auto [year, month, day, hour, minute, second] = std::tuple{0,0,0,0,0,0};
+
+        day = date & 0x1f;
+        date >>= 5;
+        month = date & 0x0f;
+        date >>= 4;
+        year = date;
+
+        return check_range_and_set_time(year, month, day, hour, minute, second, _type);
+    }
+
+    uint64_t to_olap_date() const {
+        uint64_t val;
+        val = _year;
+        val <<= 4;
+        val |= _month;
+        val <<= 5;
+        val |= _day;
+        return val;
+    }
+
+    bool from_date_format_str(const char* format, int format_len, const char* value,
+                              int value_len) {
+        memset(this, 0, sizeof(*this));
+        return from_date_format_str(format, format_len, value, value_len, nullptr);
+    }
+
+    operator int64_t() const { return to_int64(); }
+
+    // Given days since 0000-01-01, construct the datetime value.
+    bool from_date_daynr(uint64_t);
+
+    // Construct Date/Datetime type value from string.
+    // At least the following formats are recogniced (based on number of digits)
+    // 'YYMMDD', 'YYYYMMDD', 'YYMMDDHHMMSS', 'YYYYMMDDHHMMSS'
+    // 'YY-MM-DD', 'YYYY-MM-DD', 'YY-MM-DD HH.MM.SS'
+    // 'YYYYMMDDTHHMMSS'
+    bool from_date_str(const char* str, int len);
+
+    // Construct Date/Datetime type value from int64_t value.
+    // Return true if convert success. Otherwise return false.
+    bool from_date_int64(int64_t value);
+
+    // Construct time type value from int64_t value.
+    // Return true if convert success. Otherwise return false.
+    bool from_time_int64(int64_t value);
+
+    // Convert this value to string
+    // this will check type to decide which format to convert
+    // TIME:  format 'hh:mm:ss.xxxxxx'
+    // DATE:  format 'YYYY-MM-DD'
+    // DATETIME:  format 'YYYY-MM-DD hh:mm:ss.xxxxxx'
+    int32_t to_buffer(char* buffer) const;
+
+    char* to_string(char* to) const;
+
+    // Convert this datetime value to string by the format string
+    bool to_format_string(const char* format, int len, char* to) const;
+
+    // compute the length of data format pattern
+    static int compute_format_len(const char* format, int len);
+
+    // Return true if range or date is invalid
+    static bool check_range(uint32_t year, uint32_t month, uint32_t day, uint32_t hour,
+        uint32_t minute, uint32_t second, uint16_t type);
+
+    static bool check_date(uint32_t year, uint32_t month, uint32_t day);
+
+    // compute the diff between two datetime value
+    template <TimeUnit unit>
+    static int64_t datetime_diff(const VecDateTimeValue& ts_value1, const VecDateTimeValue& ts_value2) {
+        switch (unit) {
+        case YEAR: {
+            int year = (ts_value2.year() - ts_value1.year());
+            if (year > 0) {
+                year -= (ts_value2.to_int64() % 10000000000 - ts_value1.to_int64() % 10000000000) <
+                        0;
+            } else if (year < 0) {
+                year += (ts_value2.to_int64() % 10000000000 - ts_value1.to_int64() % 10000000000) >
+                        0;
+            }
+            return year;
+        }
+        case MONTH: {
+            int month = (ts_value2.year() - ts_value1.year()) * 12 +
+                        (ts_value2.month() - ts_value1.month());
+            if (month > 0) {
+                month -= (ts_value2.to_int64() % 100000000 - ts_value1.to_int64() % 100000000) < 0;
+            } else if (month < 0) {
+                month += (ts_value2.to_int64() % 100000000 - ts_value1.to_int64() % 100000000) > 0;
+            }
+            return month;
+        }
+        case WEEK: {
+            int day = ts_value2.daynr() - ts_value1.daynr();
+            if (day > 0) {
+                day -= ts_value2.time_part_diff(ts_value1) < 0;
+            } else if (day < 0) {
+                day += ts_value2.time_part_diff(ts_value1) > 0;
+            }
+            return day / 7;
+        }
+        case DAY: {
+            int day = ts_value2.daynr() - ts_value1.daynr();
+            if (day > 0) {
+                day -= ts_value2.time_part_diff(ts_value1) < 0;
+            } else if (day < 0) {
+                day += ts_value2.time_part_diff(ts_value1) > 0;
+            }
+            return day;
+        }
+        case HOUR: {
+            int64_t second = ts_value2.second_diff(ts_value1);
+            int64_t hour = second / 60 / 60;
+            return hour;
+        }
+        case MINUTE: {
+            int64_t second = ts_value2.second_diff(ts_value1);
+            int64_t minute = second / 60;
+            return minute;
+        }
+        case SECOND: {
+            int64_t second = ts_value2.second_diff(ts_value1);
+            return second;
+        }
+        }
+        // Rethink the default return value
+        return 0;
+    }
+
+    // Convert this value to uint64_t
+    // Will check its type
+    int64_t to_int64() const;
+
+    bool check_range_and_set_time(uint32_t year, uint32_t month, uint32_t day, uint32_t hour,
+        uint32_t minute, uint32_t second, uint16_t type) {
+        if (check_range(year, month, day, hour, minute, second, type)) {
+            return false;
+        }
+        set_time(year, month, day, hour, minute, second);
+        return true;
+    };
+
+    inline uint64_t daynr() const { return calc_daynr(_year, _month, _day); }
+
+    // Calculate how many days since 0000-01-01
+    // 0000-01-01 is 1st B.C.
+    static uint64_t calc_daynr(uint32_t year, uint32_t month, uint32_t day);
+
+    static uint8_t calc_weekday(uint64_t daynr, bool); //W = (D + M*2 + 3*(M+1)/5 + Y + Y/4 -Y/100 + Y/400)%7
+
+    int year() const { return _year; }
+    int month() const { return _month; }
+    int quarter() const { return (_month - 1) / 3 + 1; }
+    int week() const { return week(mysql_week_mode(0)); }//00-53
+    int day() const { return _day; }
+    int hour() const { return _hour; }
+    int minute() const { return _minute; }
+    int second() const { return _second; }
+    int neg() const { return _neg; }
+
+    bool check_loss_accuracy_cast_to_date() {
+        auto loss_accuracy = _hour != 0 || _minute != 0 || _second != 0;
+        cast_to_date();
+        return loss_accuracy;
+    }
+
+    void cast_to_date() {
+        _hour = 0;
+        _minute = 0;
+        _second = 0;
+        _type = TIME_DATE;
+    }
+
+    void cast_to_time() {
+        _year = 0;
+        _month = 0;
+        _day = 0;
+        _type = TIME_TIME;
+    }
+
+    void to_datetime() { _type = TIME_DATETIME; }
+
+    // Weekday, from 0(Mon) to 6(Sun)
+    inline uint8_t weekday() const { return calc_weekday(daynr(), false); }
+    inline auto day_of_week() const { return (weekday() + 1) % 7 + 1; }
+
+    // The bits in week_format has the following meaning:
+    // WEEK_MONDAY_FIRST (0)
+    //  If not set:
+    //      Sunday is first day of week
+    //  If set:
+    //      Monday is first day of week
+    //
+    // WEEK_YEAR (1)
+    //  If not set:
+    //      Week is in range 0-53
+    //      Week 0 is returned for the the last week of the previous year (for
+    //      a date at start of january) In this case one can get 53 for the
+    //      first week of next year.  This flag ensures that the week is
+    //      relevant for the given year. Note that this flag is only
+    //      relevant if WEEK_JANUARY is not set.
+    //  If set:
+    //      Week is in range 1-53.
+    //      In this case one may get week 53 for a date in January (when
+    //      the week is that last week of previous year) and week 1 for a
+    //      date in December.
+    //
+    // WEEK_FIRST_WEEKDAY (2)
+    //  If not set
+    //      Weeks are numbered according to ISO 8601:1988
+    //  If set
+    //      The week that contains the first 'first-day-of-week' is week 1.
+    //
+    // ISO 8601:1988 means that
+    //      if the week containing January 1 has
+    //      four or more days in the new year, then it is week 1;
+    //      Otherwise it is the last week of the previous year, and the
+    //      next week is week 1.
+    uint8_t week(uint8_t) const;
+
+    uint32_t year_week(uint8_t mode) const;
+
+    // Add interval
+    bool date_add_interval(const TimeInterval& interval, TimeUnit unit);
+
+    //unix_timestamp is called with a timezone argument,
+    //it returns seconds of the value of date literal since '1970-01-01 00:00:00' UTC
+    bool unix_timestamp(int64_t* timestamp, const std::string& timezone) const;
+    bool unix_timestamp(int64_t* timestamp, const cctz::time_zone& ctz) const;
+
+    //construct datetime_value from timestamp and timezone
+    //timestamp is an internal timestamp value representing seconds since '1970-01-01 00:00:00' UTC
+    bool from_unixtime(int64_t, const std::string& timezone);
+    bool from_unixtime(int64_t, const cctz::time_zone& ctz);
+
+    bool operator==(const VecDateTimeValue& other) const {
+        // NOTE: This is not same with MySQL.
+        // MySQL convert both to int with left value type and then compare
+        // We think all fields equals.
+        int64_t v1 = to_int64_datetime_packed();
+        int64_t v2 = other.to_int64_datetime_packed();
+        return v1 == v2;
+    }
+
+    bool operator!=(const VecDateTimeValue& other) const { return !(*this == other); }
+
+    // Now, we don't support TIME_TIME type,
+    bool operator<=(const VecDateTimeValue& other) const { return !(*this > other); }
+
+    bool operator>=(const VecDateTimeValue& other) const { return !(*this < other); }
+
+    bool operator<(const VecDateTimeValue& other) const {
+        int64_t v1 = to_int64_datetime_packed();
+        int64_t v2 = other.to_int64_datetime_packed();
+        return v1 < v2;
+    }
+
+    bool operator>(const VecDateTimeValue& other) const {
+        int64_t v1 = to_int64_datetime_packed();
+        int64_t v2 = other.to_int64_datetime_packed();
+        return v1 > v2;
+    }
+
+    const char* month_name() const;
+
+    const char* day_name() const;
+
+    VecDateTimeValue& operator++() {
+        switch (_type) {
+        case TIME_DATE: {
+            TimeInterval interval(DAY, 1, false);
+            date_add_interval(interval, DAY);
+            break;
+        }
+        case TIME_DATETIME: {
+            TimeInterval interval(SECOND, 1, false);
+            date_add_interval(interval, SECOND);
+            break;
+        }
+        case TIME_TIME: {
+            TimeInterval interval(SECOND, 1, false);
+            date_add_interval(interval, SECOND);
+            break;
+        }
+        }
+        return *this;
+    }
+
+    void to_datetime_val(doris_udf::DateTimeVal* tv) const {
+        tv->packed_time = to_int64_datetime_packed();
+        tv->type = _type;
+    }
+
+    static VecDateTimeValue from_datetime_val(const doris_udf::DateTimeVal& tv) {
+        VecDateTimeValue value;
+        value.from_packed_time(tv.packed_time);
+        if (tv.type == TIME_DATE) {
+            value.cast_to_date();
+        }
+        return value;
+    }
+
+    inline uint32_t hash(int seed) const { return HashUtil::hash(this, sizeof(*this), seed); }
+
+    int day_of_year() const { return daynr() - calc_daynr(_year, 1, 1) + 1; }
+
+    // TODO(zhaochun): local time ???
+    static VecDateTimeValue local_time();
+
+    std::string debug_string() const {
+        char buf[64];
+        char* end = to_string(buf);
+        return std::string(buf, end - buf);
+    }
+
+    static VecDateTimeValue datetime_min_value() {
+        static VecDateTimeValue _s_min_datetime_value(0, TIME_DATETIME, 0, 0, 0, 0, 1, 1);
+        return _s_min_datetime_value;
+    }
+
+    static VecDateTimeValue datetime_max_value() {
+        static VecDateTimeValue _s_max_datetime_value(0, TIME_DATETIME, 23, 59, 59, 9999, 12, 31);
+        return _s_max_datetime_value;
+    }
+
+    int64_t second_diff(const VecDateTimeValue& rhs) const {
+        int day_diff = daynr() - rhs.daynr();
+        int time_diff = (hour() * 3600 + minute() * 60 + second()) -
+                        (rhs.hour() * 3600 + rhs.minute() * 60 + rhs.second());
+        return day_diff * 3600 * 24 + time_diff;
+    }
+
+    int64_t time_part_diff(const VecDateTimeValue& rhs) const {
+        int time_diff = (hour() * 3600 + minute() * 60 + second()) -
+                        (rhs.hour() * 3600 + rhs.minute() * 60 + rhs.second());
+        return time_diff;
+    }
+
+    void set_type(int type);
+
+    int type() const { return _type; }
+
+    bool is_valid_date() const { return !check_range(_year, _month, _day,
+            _hour, _minute, _second, _type) && _month > 0 && _day > 0; }
+            
+    void convert_vec_dt_to_dt(doris::DateTimeValue* dt);
+    void convert_dt_to_vec_dt(doris::DateTimeValue* dt);
+private:
+    // Used to make sure sizeof VecDateTimeValue
+    friend class UnusedClass;
+
+    void from_packed_time(int64_t packed_time) {
+        int64_t ymdhms = packed_time >> 24;
+        int64_t ymd = ymdhms >> 17;
+        int64_t hms = ymdhms % (1 << 17);
+
+        _day = ymd % (1 << 5);
+        int64_t ym = ymd >> 5;
+        _month = ym % 13;
+        _year = ym / 13;
+        _year %= 10000;
+        _second = hms % (1 << 6);
+        _minute = (hms >> 6) % (1 << 6);
+        _hour = (hms >> 12);
+        _neg = 0;
+        _type = TIME_DATETIME;
+    }
+
+    int64_t make_packed_time(int64_t time, int64_t second_part) const {
+        return (time << 24) + second_part;
+    }
+
+    // To compatible with MySQL
+    int64_t to_int64_datetime_packed() const {
+        int64_t ymd = ((_year * 13 + _month) << 5) | _day;
+        int64_t hms = (_hour << 12) | (_minute << 6) | _second;
+        int64_t tmp = make_packed_time(((ymd << 17) | hms), 0);
+        return _neg ? -tmp : tmp;
+    }
+
+    int64_t to_int64_date_packed() const {
+        int64_t ymd = ((_year * 13 + _month) << 5) | _day;
+        int64_t tmp = make_packed_time(ymd << 17, 0);
+        return _neg ? -tmp : tmp;
+    }
+
+    // Used to construct from int value
+    int64_t standardize_timevalue(int64_t value);
+
+    // Used to convert to a string.
+    char* append_date_buffer(char* to) const;
+    char* append_time_buffer(char* to) const;
+    char* to_datetime_buffer(char* to) const;
+    char* to_date_buffer(char* to) const;
+    char* to_time_buffer(char* to) const;
+
+    // Used to convert to uint64_t
+    int64_t to_datetime_int64() const;
+    int64_t to_date_int64() const;
+    int64_t to_time_int64() const;
+
+    static uint8_t calc_week(const VecDateTimeValue& value, uint8_t mode, uint32_t* year);
+
+    // This is private function which modify date but modify `_type`
+    bool get_date_from_daynr(uint64_t);
+
+    // Helper to set max, min, zero
+    void set_zero(int type);
+    void set_max_time(bool neg);
+
+    bool from_date_format_str(const char* format, int format_len, const char* value, int value_len,
+                              const char** sub_val_end);
+
+    // 1 bits for neg. 3 bits for type. 12bit for second
+    uint16_t _neg : 1;  // Used for time value.
+    uint16_t _type : 3; // Which type of this value.
+    uint16_t _second : 12;
+    uint8_t _minute;
+    uint8_t _hour;
+    uint8_t _day;
+    uint8_t _month;
+    uint16_t _year;
+
+    VecDateTimeValue(uint8_t neg, uint8_t type, uint8_t hour, uint8_t minute, uint8_t second,
+                  uint16_t year, uint8_t month, uint8_t day)
+            : _neg(neg),
+              _type(type),
+              _second(second),
+              _minute(minute),
+              _hour(hour),
+              _day(day),
+              _month(month),
+              _year(year) {}
+
+    // RE2 obj is thread safe
+    static RE2 time_zone_offset_format_reg;
+};
+
+// only support DATE - DATE (no support DATETIME - DATETIME)
+std::size_t operator-(const VecDateTimeValue& v1, const VecDateTimeValue& v2);
+
+std::ostream& operator<<(std::ostream& os, const VecDateTimeValue& value);
+
+std::size_t hash_value(VecDateTimeValue const& value);
+
+} // namespace doris
+}
+
+namespace std {
+template <>
+struct hash<doris::vectorized::VecDateTimeValue> {
+    size_t operator()(const doris::vectorized::VecDateTimeValue& v) const { return doris::vectorized::hash_value(v); }
+};
+} // namespace std
+
+#endif
diff --git a/be/src/vec/runtime/vpartition_info.cpp b/be/src/vec/runtime/vpartition_info.cpp
new file mode 100644
index 0000000000..115227dd69
--- /dev/null
+++ b/be/src/vec/runtime/vpartition_info.cpp
@@ -0,0 +1,56 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/runtime/vpartition_info.h"
+
+namespace doris::vectorized {
+Status VPartitionInfo::from_thrift(ObjectPool* pool, const TRangePartition& t_partition,
+                                   VPartitionInfo* partition) {
+    partition->_id = t_partition.partition_id;
+    RETURN_IF_ERROR(PartRange::from_thrift(pool, t_partition.range, &partition->_range));
+    if (t_partition.__isset.distributed_exprs) {
+        partition->_distributed_bucket = t_partition.distribute_bucket;
+        if (partition->_distributed_bucket == 0) {
+            return Status::InternalError("Distributed bucket is 0.");
+        }
+        RETURN_IF_ERROR(VExpr::create_expr_trees(pool, t_partition.distributed_exprs,
+                                                 &partition->_distributed_expr_ctxs));
+    }
+    return Status::OK();
+}
+
+Status VPartitionInfo::prepare(RuntimeState* state, const RowDescriptor& row_desc,
+                               const std::shared_ptr<MemTracker>& mem_tracker) {
+    if (_distributed_expr_ctxs.size() > 0) {
+        RETURN_IF_ERROR(VExpr::prepare(_distributed_expr_ctxs, state, row_desc, mem_tracker));
+    }
+    return Status::OK();
+}
+
+Status VPartitionInfo::open(RuntimeState* state) {
+    if (_distributed_expr_ctxs.size() > 0) {
+        return VExpr::open(_distributed_expr_ctxs, state);
+    }
+    return Status::OK();
+}
+
+void VPartitionInfo::close(RuntimeState* state) {
+    if (_distributed_expr_ctxs.size() > 0) {
+        VExpr::close(_distributed_expr_ctxs, state);
+    }
+}
+} // namespace doris::vectorized
\ No newline at end of file
diff --git a/be/src/vec/runtime/vpartition_info.h b/be/src/vec/runtime/vpartition_info.h
new file mode 100644
index 0000000000..3c59e7450d
--- /dev/null
+++ b/be/src/vec/runtime/vpartition_info.h
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "common/status.h"
+#include "runtime/dpp_sink_internal.h"
+#include "vec/exprs/vexpr.h"
+
+namespace doris {
+namespace vectorized {
+
+class VPartitionInfo {
+public:
+    VPartitionInfo() : _id(-1), _distributed_bucket(0) {}
+
+    static Status from_thrift(ObjectPool* pool, const TRangePartition& t_partition,
+                              VPartitionInfo* partition);
+
+    Status prepare(RuntimeState* state, const RowDescriptor& row_desc,
+                   const std::shared_ptr<MemTracker>& mem_tracker);
+
+    Status open(RuntimeState* state);
+
+    void close(RuntimeState* state);
+
+    int64_t id() const { return _id; }
+
+    const std::vector<VExprContext*>& distributed_expr_ctxs() const {
+        return _distributed_expr_ctxs;
+    }
+
+    int distributed_bucket() const { return _distributed_bucket; }
+
+    const PartRange& range() const { return _range; }
+
+private:
+    int64_t _id;
+    PartRange _range;
+    // Information used to distribute data
+    // distribute exprs
+    std::vector<VExprContext*> _distributed_expr_ctxs;
+    int32_t _distributed_bucket;
+};
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/runtime/vsorted_run_merger.cpp b/be/src/vec/runtime/vsorted_run_merger.cpp
new file mode 100644
index 0000000000..333208505b
--- /dev/null
+++ b/be/src/vec/runtime/vsorted_run_merger.cpp
@@ -0,0 +1,156 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/runtime/vsorted_run_merger.h"
+
+#include <vector>
+
+#include "exprs/expr.h"
+#include "runtime/descriptors.h"
+#include "runtime/row_batch.h"
+#include "runtime/sorter.h"
+#include "runtime/tuple_row.h"
+#include "util/debug_util.h"
+#include "util/defer_op.h"
+#include "util/runtime_profile.h"
+
+using std::vector;
+
+namespace doris::vectorized {
+
+VSortedRunMerger::VSortedRunMerger(const std::vector<VExprContext *>& ordering_expr, const std::vector<bool>& is_asc_order,
+            const std::vector<bool>& nulls_first, const size_t batch_size, int64_t limit, size_t offset, RuntimeProfile* profile)
+        :_ordering_expr(ordering_expr), _is_asc_order(is_asc_order), _nulls_first(nulls_first), _batch_size(batch_size),
+        _limit(limit), _offset(offset){
+    _get_next_timer = ADD_TIMER(profile, "MergeGetNext");
+    _get_next_block_timer = ADD_TIMER(profile, "MergeGetNextBlock");
+}
+
+Status VSortedRunMerger::prepare(const vector<BlockSupplier>& input_runs, bool parallel) {
+    for (const auto &supplier : input_runs) {
+        _cursors.emplace_back(supplier, _ordering_expr, _is_asc_order, _nulls_first);
+    }
+
+    for (auto& _cursor : _cursors) {
+        if (!_cursor._is_eof) _priority_queue.push(SortCursor(&_cursor));
+    }
+
+    for (const auto& cursor : _cursors) {
+        if (!cursor._is_eof) {
+            _empty_block = cursor.create_empty_blocks();
+            break;
+        }
+    }
+
+    return Status::OK();
+}
+
+Status VSortedRunMerger::get_next(Block* output_block, bool* eos) {
+    ScopedTimer<MonotonicStopWatch> timer(_get_next_timer);
+    // Only have one receive data queue of data, no need to do merge and
+    // copy the data of block.
+    // return the data in receive data directly
+    if (_priority_queue.size() == 1) {
+        auto current = _priority_queue.top();
+        while (_offset != 0 && current->block_ptr() != nullptr) {
+            if (_offset >= current->rows - current->pos) {
+                _offset -= (current->rows - current->pos);
+                has_next_block(current);
+            } else {
+                current->pos += _offset;
+                _offset = 0;
+            }
+        }
+
+        if (current->isFirst()) {
+            if (current->block_ptr() != nullptr) {
+                current->block_ptr()->swap(*output_block);
+                *eos = !has_next_block(current);
+            } else {
+                *eos = true;
+            }
+        } else {
+            if (current->block_ptr() != nullptr) {
+                for (int i = 0; i < current->all_columns.size(); i++) {
+                    auto& column_with_type = current->block_ptr()->get_by_position(i);
+                    column_with_type.column = column_with_type.column->cut(current->pos,
+                            current->rows - current->pos);
+                }
+                current->block_ptr()->swap(*output_block);
+                *eos = !has_next_block(current);
+            } else {
+                *eos = true;
+            }
+        }
+    } else {
+        size_t num_columns = _empty_block.columns();
+        bool mem_reuse = output_block->mem_reuse();
+        MutableColumns merged_columns = mem_reuse ?
+                output_block->mutate_columns() : _empty_block.clone_empty_columns();
+
+        /// Take rows from queue in right order and push to 'merged'.
+        size_t merged_rows = 0;
+        while (!_priority_queue.empty()) {
+            auto current = _priority_queue.top();
+            _priority_queue.pop();
+
+            if (_offset > 0) {
+                _offset--;
+            } else {
+                for (size_t i = 0; i < num_columns; ++i)
+                    merged_columns[i]->insert_from(*current->all_columns[i], current->pos);
+                ++merged_rows;
+            }
+            next_heap(current);
+            if (merged_rows == _batch_size)
+                break;
+        }
+
+        if (merged_rows == 0) {
+            *eos = true;
+            return Status::OK();
+        }
+
+        if (!mem_reuse) {
+            Block merge_block = _empty_block.clone_with_columns(std::move(merged_columns));
+            merge_block.swap(*output_block);
+        }
+    }
+
+    _num_rows_returned += output_block->rows();
+    if (_limit != -1 && _num_rows_returned >= _limit) {
+        output_block->set_num_rows(output_block->rows() - (_num_rows_returned - _limit));
+        *eos = true;
+    }
+    return Status::OK();
+}
+
+void VSortedRunMerger::next_heap(SortCursor& current) {
+    if (!current->isLast()) {
+        current->next();
+        _priority_queue.push(current);
+    } else if (has_next_block(current)) {
+        _priority_queue.push(current);
+    }
+}
+
+inline bool VSortedRunMerger::has_next_block(doris::vectorized::SortCursor &current) {
+    ScopedTimer<MonotonicStopWatch> timer(_get_next_block_timer);
+    return current->has_next_block();
+}
+
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/runtime/vsorted_run_merger.h b/be/src/vec/runtime/vsorted_run_merger.h
new file mode 100644
index 0000000000..782a361f20
--- /dev/null
+++ b/be/src/vec/runtime/vsorted_run_merger.h
@@ -0,0 +1,91 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <queue>
+
+#include "common/object_pool.h"
+#include "util/tuple_row_compare.h"
+
+#include "vec/core/sort_cursor.h"
+
+namespace doris {
+
+class RowBatch;
+class RuntimeProfile;
+
+namespace vectorized {
+class Block;
+// VSortedRunMerger is used to merge multiple sorted runs of blocks. A run is a sorted
+// sequence of blocks, which are fetched from a BlockSupplier function object.
+// Merging is implemented using a binary min-heap that maintains the run with the next
+// rows in sorted order at the top of the heap.
+//
+// Merged block of rows are retrieved from VSortedRunMerger via calls to get_next().
+class VSortedRunMerger {
+public:
+    // Function that returns the next block of rows from an input sorted run. The batch
+    // is owned by the supplier (i.e. not VSortedRunMerger). eos is indicated by an NULL
+    // batch being returned.
+    VSortedRunMerger(const std::vector<VExprContext *>& ordering_expr, const std::vector<bool>& _is_asc_order,
+            const std::vector<bool>& _nulls_first, const size_t batch_size, int64_t limit, size_t offset, RuntimeProfile* profile);
+
+    virtual ~VSortedRunMerger() = default;
+
+    // Prepare this merger to merge and return rows from the sorted runs in 'input_runs'.
+    // Retrieves the first batch from each run and sets up the binary heap implementing
+    // the priority queue.
+    Status prepare(const std::vector<BlockSupplier>& input_runs, bool parallel = false);
+
+    // Return the next block of sorted rows from this merger.
+    Status get_next(Block* output_block, bool *eos);
+
+    // Do not support now
+    virtual Status get_batch(RowBatch **output_batch) {
+        return Status::InternalError("no support method get_batch(RowBatch** output_batch)");
+    }
+
+protected:
+    const std::vector<VExprContext *>& _ordering_expr;
+    const std::vector<bool>& _is_asc_order;
+    const std::vector<bool>& _nulls_first;
+    const size_t _batch_size;
+
+    size_t _num_rows_returned = 0;
+    int64_t _limit = -1;
+    size_t _offset = 0;
+
+    std::vector<ReceiveQueueSortCursorImpl> _cursors;
+    std::priority_queue<SortCursor> _priority_queue;
+
+    Block _empty_block;
+
+    // Times calls to get_next().
+    RuntimeProfile::Counter *_get_next_timer;
+
+    // Times calls to get the next batch of rows from the input run.
+    RuntimeProfile::Counter *_get_next_block_timer;
+
+private:
+    void next_heap(SortCursor& current);
+    inline bool has_next_block(SortCursor& current);
+};
+
+}
+} // namespace doris
+
diff --git a/be/src/vec/sink/mysql_result_writer.cpp b/be/src/vec/sink/mysql_result_writer.cpp
new file mode 100644
index 0000000000..4a8f72d2e1
--- /dev/null
+++ b/be/src/vec/sink/mysql_result_writer.cpp
@@ -0,0 +1,349 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/sink/mysql_result_writer.h"
+
+#include "runtime/buffer_control_block.h"
+#include "runtime/runtime_state.h"
+#include "util/date_func.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/assert_cast.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/runtime/vdatetime_value.h"
+namespace doris {
+namespace vectorized {
+VMysqlResultWriter::VMysqlResultWriter(BufferControlBlock* sinker,
+                                       const std::vector<VExprContext*>& output_vexpr_ctxs,
+                                       RuntimeProfile* parent_profile)
+        : VResultWriter(),
+          _sinker(sinker),
+          _output_vexpr_ctxs(output_vexpr_ctxs),
+          _parent_profile(parent_profile) {}
+
+Status VMysqlResultWriter::init(RuntimeState* state) {
+    _init_profile();
+    if (nullptr == _sinker) {
+        return Status::InternalError("sinker is NULL pointer.");
+    }
+
+    return Status::OK();
+}
+
+void VMysqlResultWriter::_init_profile() {
+    _append_row_batch_timer = ADD_TIMER(_parent_profile, "AppendBatchTime");
+    _convert_tuple_timer = ADD_CHILD_TIMER(_parent_profile, "TupleConvertTime", "AppendBatchTime");
+    _result_send_timer = ADD_CHILD_TIMER(_parent_profile, "ResultRendTime", "AppendBatchTime");
+    _sent_rows_counter = ADD_COUNTER(_parent_profile, "NumSentRows", TUnit::UNIT);
+}
+
+template <PrimitiveType type, bool is_nullable>
+Status VMysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr,
+                                           std::unique_ptr<TFetchDataResult>& result) {
+    SCOPED_TIMER(_convert_tuple_timer);
+
+    const auto column_size = column_ptr->size();
+
+    doris::vectorized::ColumnPtr column;
+    if constexpr (is_nullable) {
+        column = assert_cast<const ColumnNullable&>(*column_ptr).get_nested_column_ptr();
+    } else {
+        column = column_ptr;
+    }
+
+    MysqlRowBuffer _buffer;
+    int buf_ret = 0;
+
+    if constexpr (type == TYPE_OBJECT || type == TYPE_VARCHAR) {
+        for (int i = 0; i < column_size; ++i) {
+            if (0 != buf_ret) {
+                return Status::InternalError("pack mysql buffer failed.");
+            }
+            _buffer.reset();
+
+            if constexpr (is_nullable) {
+                if (column_ptr->is_null_at(i)) {
+                    buf_ret = _buffer.push_null();
+                    result->result_batch.rows[i].append(_buffer.buf(), _buffer.length());
+                    continue;
+                }
+            }
+
+            if constexpr (type == TYPE_OBJECT) {
+                buf_ret = _buffer.push_null();
+            }
+            if constexpr (type == TYPE_VARCHAR) {
+                const auto string_val = column->get_data_at(i);
+
+                if (string_val.data == nullptr) {
+                    if (string_val.size == 0) {
+                        // 0x01 is a magic num, not useful actually, just for present ""
+                        char* tmp_val = reinterpret_cast<char*>(0x01);
+                        buf_ret = _buffer.push_string(tmp_val, string_val.size);
+                    } else {
+                        buf_ret = _buffer.push_null();
+                    }
+                } else {
+                    buf_ret = _buffer.push_string(string_val.data, string_val.size);
+                }
+            }
+
+            result->result_batch.rows[i].append(_buffer.buf(), _buffer.length());
+        }
+    } else {
+        using ColumnType = typename PrimitiveTypeTraits<type>::ColumnType;
+        auto& data = assert_cast<const ColumnType&>(*column).get_data();
+
+        for (int i = 0; i < column_size; ++i) {
+            if (0 != buf_ret) {
+                return Status::InternalError("pack mysql buffer failed.");
+            }
+            _buffer.reset();
+
+            if constexpr (is_nullable) {
+                if (column_ptr->is_null_at(i)) {
+                    buf_ret = _buffer.push_null();
+                    result->result_batch.rows[i].append(_buffer.buf(), _buffer.length());
+                    continue;
+                }
+            }
+
+            if constexpr (type == TYPE_BOOLEAN) {
+                //todo here need to using uint after MysqlRowBuffer support it
+                buf_ret = _buffer.push_tinyint(data[i]);
+            }
+            if constexpr (type == TYPE_TINYINT) {
+                buf_ret = _buffer.push_tinyint(data[i]);
+            }
+            if constexpr (type == TYPE_SMALLINT) {
+                buf_ret = _buffer.push_smallint(data[i]);
+            }
+            if constexpr (type == TYPE_INT) {
+                buf_ret = _buffer.push_int(data[i]);
+            }
+            if constexpr (type == TYPE_BIGINT) {
+                buf_ret = _buffer.push_bigint(data[i]);
+            }
+            if constexpr (type == TYPE_LARGEINT) {
+                auto v = LargeIntValue::to_string(data[i]);
+                buf_ret = _buffer.push_string(v.c_str(), v.size());
+            }
+            if constexpr (type == TYPE_FLOAT) {
+                buf_ret = _buffer.push_float(data[i]);
+            }
+            if constexpr (type == TYPE_DOUBLE) {
+                buf_ret = _buffer.push_double(data[i]);
+            }
+            if constexpr (type == TYPE_TIME) {
+                buf_ret = _buffer.push_time(data[i]);
+            }
+            if constexpr (type == TYPE_DATETIME) {
+                char buf[64];
+                auto time_num = data[i];
+                VecDateTimeValue time_val;
+                memcpy(static_cast<void*>(&time_val), &time_num, sizeof(Int64));
+                // TODO(zhaochun), this function has core risk
+                char* pos = time_val.to_string(buf);
+                buf_ret = _buffer.push_string(buf, pos - buf - 1);
+            }
+
+            if constexpr (type == TYPE_DECIMALV2) {
+                DecimalV2Value decimal_val(data[i]);
+                auto decimal_str = decimal_val.to_string();
+                buf_ret = _buffer.push_string(decimal_str.c_str(), decimal_str.length());
+            }
+
+            result->result_batch.rows[i].append(_buffer.buf(), _buffer.length());
+        }
+    }
+    if (0 != buf_ret) {
+        return Status::InternalError("pack mysql buffer failed.");
+    }
+
+    return Status::OK();
+}
+
+Status VMysqlResultWriter::append_row_batch(const RowBatch* batch) {
+    return Status::RuntimeError("Not Implemented MysqlResultWriter::append_row_batch scalar");
+}
+
+Status VMysqlResultWriter::append_block(Block& input_block) {
+    SCOPED_TIMER(_append_row_batch_timer);
+    Status status = Status::OK();
+    if (UNLIKELY(input_block.rows() == 0)) {
+        return status;
+    }
+
+    // Exec vectorized expr here to speed up, block.rows() == 0 means expr exec
+    // failed, just return the error status
+    auto block = VExprContext::get_output_block_after_execute_exprs(_output_vexpr_ctxs, input_block,
+                                                                    status);
+    auto num_rows = block.rows();
+    if (UNLIKELY(num_rows == 0)) {
+        return status;
+    }
+
+    // convert one batch
+    auto result = std::make_unique<TFetchDataResult>();
+    result->result_batch.rows.resize(num_rows);
+    for (int i = 0; status.ok() && i < _output_vexpr_ctxs.size(); ++i) {
+        auto column_ptr = block.get_by_position(i).column->convert_to_full_column_if_const();
+        auto type_ptr = block.get_by_position(i).type;
+
+        switch (_output_vexpr_ctxs[i]->root()->result_type()) {
+        case TYPE_BOOLEAN:
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_BOOLEAN, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_BOOLEAN, false>(column_ptr, result);
+            }
+            break;
+        case TYPE_TINYINT: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_TINYINT, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_TINYINT, false>(column_ptr, result);
+            }
+            break;
+        }
+        case TYPE_SMALLINT: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_SMALLINT, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_SMALLINT, false>(column_ptr, result);
+            }
+            break;
+        }
+        case TYPE_INT: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_INT, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_INT, false>(column_ptr, result);
+            }
+            break;
+        }
+        case TYPE_BIGINT: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_BIGINT, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_BIGINT, false>(column_ptr, result);
+            }
+            break;
+        }
+        case TYPE_LARGEINT: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_LARGEINT, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_LARGEINT, false>(column_ptr, result);
+            }
+            break;
+        }
+        case TYPE_FLOAT: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_FLOAT, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_FLOAT, false>(column_ptr, result);
+            }
+            break;
+        }
+        case TYPE_DOUBLE: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_DOUBLE, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_DOUBLE, false>(column_ptr, result);
+            }
+            break;
+        }
+        case TYPE_TIME: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_TIME, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_TIME, false>(column_ptr, result);
+            }
+            break;
+        }
+        case TYPE_STRING:
+        case TYPE_CHAR:
+        case TYPE_VARCHAR: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_VARCHAR, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_VARCHAR, false>(column_ptr, result);
+            }
+            break;
+        }
+        case TYPE_DECIMALV2: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_DECIMALV2, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_DECIMALV2, false>(column_ptr, result);
+            }
+            break;
+        }
+        case TYPE_DATE:
+        case TYPE_DATETIME: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_DATETIME, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_DATETIME, false>(column_ptr, result);
+            }
+            break;
+        }
+        case TYPE_HLL:
+        case TYPE_OBJECT: {
+            if (type_ptr->is_nullable()) {
+                status = _add_one_column<PrimitiveType::TYPE_OBJECT, true>(column_ptr, result);
+            } else {
+                status = _add_one_column<PrimitiveType::TYPE_OBJECT, false>(column_ptr, result);
+            }
+            break;
+        }
+        default: {
+            LOG(WARNING) << "can't convert this type to mysql type. type = "
+                         << _output_vexpr_ctxs[i]->root()->type();
+            return Status::InternalError("vec block pack mysql buffer failed.");
+        }
+        }
+
+        if (!status) {
+            LOG(WARNING) << "convert row to mysql result failed.";
+            break;
+        }
+    }
+    if (status) {
+        SCOPED_TIMER(_result_send_timer);
+        // push this batch to back
+        status = _sinker->add_batch(result);
+
+        if (status.ok()) {
+            _written_rows += num_rows;
+        } else {
+            LOG(WARNING) << "append result batch to sink failed.";
+        }
+    }
+
+    return status;
+}
+
+Status VMysqlResultWriter::close() {
+    COUNTER_SET(_sent_rows_counter, _written_rows);
+    return Status::OK();
+}
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/sink/mysql_result_writer.h b/be/src/vec/sink/mysql_result_writer.h
new file mode 100644
index 0000000000..bbd742543e
--- /dev/null
+++ b/be/src/vec/sink/mysql_result_writer.h
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "runtime/primitive_type.h"
+#include "util/mysql_row_buffer.h"
+#include "util/runtime_profile.h"
+#include "vec/core/block.h"
+#include "vec/sink/result_writer.h"
+
+namespace doris {
+class BufferControlBlock;
+class RowBatch;
+class MysqlRowBuffer;
+class TFetchDataResult;
+
+namespace vectorized {
+class VExprContext;
+
+class VMysqlResultWriter final : public VResultWriter {
+public:
+    VMysqlResultWriter(BufferControlBlock* sinker,
+                       const std::vector<vectorized::VExprContext*>& output_vexpr_ctxs,
+                       RuntimeProfile* parent_profile);
+
+    virtual Status init(RuntimeState* state) override;
+
+    virtual Status append_row_batch(const RowBatch* batch) override;
+
+    virtual Status append_block(Block& block) override;
+
+    virtual Status close() override;
+
+private:
+    void _init_profile();
+
+    template <PrimitiveType type, bool is_nullable>
+    Status _add_one_column(const ColumnPtr& column_ptr, std::unique_ptr<TFetchDataResult>& result);
+
+private:
+    BufferControlBlock* _sinker;
+
+    const std::vector<vectorized::VExprContext*>& _output_vexpr_ctxs;
+
+    RuntimeProfile* _parent_profile; // parent profile from result sink. not owned
+    // total time cost on append batch operation
+    RuntimeProfile::Counter* _append_row_batch_timer = nullptr;
+    // tuple convert timer, child timer of _append_row_batch_timer
+    RuntimeProfile::Counter* _convert_tuple_timer = nullptr;
+    // file write timer, child timer of _append_row_batch_timer
+    RuntimeProfile::Counter* _result_send_timer = nullptr;
+    // number of sent rows
+    RuntimeProfile::Counter* _sent_rows_counter = nullptr;
+};
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/sink/result_sink.cpp b/be/src/vec/sink/result_sink.cpp
new file mode 100644
index 0000000000..fda7702ab3
--- /dev/null
+++ b/be/src/vec/sink/result_sink.cpp
@@ -0,0 +1,137 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/sink/result_sink.h"
+
+#include "runtime/buffer_control_block.h"
+#include "runtime/exec_env.h"
+#include "runtime/file_result_writer.h"
+#include "runtime/result_buffer_mgr.h"
+#include "runtime/runtime_state.h"
+#include "vec/exprs/vexpr.h"
+#include "vec/sink/mysql_result_writer.h"
+
+namespace doris {
+namespace vectorized {
+
+VResultSink::VResultSink(const RowDescriptor& row_desc, const std::vector<TExpr>& t_output_expr,
+                         const TResultSink& sink, int buffer_size)
+        : _row_desc(row_desc), _t_output_expr(t_output_expr), _buf_size(buffer_size) {
+    if (!sink.__isset.type || sink.type == TResultSinkType::MYSQL_PROTOCAL) {
+        _sink_type = TResultSinkType::MYSQL_PROTOCAL;
+    } else {
+        _sink_type = sink.type;
+    }
+
+    if (_sink_type == TResultSinkType::FILE) {
+        CHECK(sink.__isset.file_options);
+        _file_opts.reset(new ResultFileOptions(sink.file_options));
+    }
+
+    _name = "ResultSink";
+}
+
+VResultSink::~VResultSink() = default;
+
+Status VResultSink::prepare_exprs(RuntimeState* state) {
+    // From the thrift expressions create the real exprs.
+    RETURN_IF_ERROR(
+            VExpr::create_expr_trees(state->obj_pool(), _t_output_expr, &_output_vexpr_ctxs));
+    // Prepare the exprs to run.
+    RETURN_IF_ERROR(VExpr::prepare(_output_vexpr_ctxs, state, _row_desc, _expr_mem_tracker));
+    return Status::OK();
+}
+Status VResultSink::prepare(RuntimeState* state) {
+    RETURN_IF_ERROR(DataSink::prepare(state));
+    auto fragment_instance_id = state->fragment_instance_id();
+    auto title = fmt::format("VDataBufferSender (dst_fragment_instance_id={:x}-{:x})",
+                             fragment_instance_id.hi, fragment_instance_id.lo);
+    // create profile
+    _profile = state->obj_pool()->add(new RuntimeProfile(title));
+    // prepare output_expr
+    RETURN_IF_ERROR(prepare_exprs(state));
+
+    // create sender
+    RETURN_IF_ERROR(state->exec_env()->result_mgr()->create_sender(state->fragment_instance_id(),
+                                                                   _buf_size, &_sender));
+
+    // create writer based on sink type
+    switch (_sink_type) {
+    case TResultSinkType::MYSQL_PROTOCAL:
+        _writer.reset(new (std::nothrow)
+                              VMysqlResultWriter(_sender.get(), _output_vexpr_ctxs, _profile));
+        break;
+    case TResultSinkType::FILE:
+        CHECK(_file_opts.get() != nullptr);
+        return Status::InternalError("Unsupport vfile result sink type");
+        // TODO:
+        /*      _writer.reset(new (std::nothrow) FileResultWriter(_file_opts.get(), _output_expr_ctxs,*/
+        /*_profile, _sender.get()));*/
+//        break;
+    default:
+        return Status::InternalError("Unknown result sink type");
+    }
+
+    RETURN_IF_ERROR(_writer->init(state));
+    return Status::OK();
+}
+
+Status VResultSink::open(RuntimeState* state) {
+    return VExpr::open(_output_vexpr_ctxs, state);
+}
+
+Status VResultSink::send(RuntimeState* state, RowBatch* batch) {
+    return Status::NotSupported("Not Implemented Result Sink::send scalar");
+}
+
+Status VResultSink::send(RuntimeState* state, Block* block) {
+    return _writer->append_block(*block);
+}
+
+Status VResultSink::close(RuntimeState* state, Status exec_status) {
+    if (_closed || _writer == nullptr|| _sender == nullptr) {
+        return Status::OK();
+    }
+
+    Status final_status = exec_status;
+    // close the writer
+    Status st = _writer->close();
+    if (!st.ok() && exec_status.ok()) {
+        // close file writer failed, should return this error to client
+        final_status = st;
+    }
+
+    // close sender, this is normal path end
+    if (_sender) {
+        _sender->update_num_written_rows(_writer->get_written_rows());
+        _sender->close(final_status);
+    }
+    state->exec_env()->result_mgr()->cancel_at_time(
+            time(NULL) + config::result_buffer_cancelled_interval_time,
+            state->fragment_instance_id());
+
+    VExpr::close(_output_vexpr_ctxs, state);
+    _closed = true;
+    return Status::OK();
+}
+
+void VResultSink::set_query_statistics(std::shared_ptr<QueryStatistics> statistics) {
+    _sender->set_query_statistics(statistics);
+}
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/sink/result_sink.h b/be/src/vec/sink/result_sink.h
new file mode 100644
index 0000000000..38f52857e2
--- /dev/null
+++ b/be/src/vec/sink/result_sink.h
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "exec/data_sink.h"
+#include "vec/sink/result_writer.h"
+
+namespace doris {
+class ObjectPool;
+class RowBatch;
+class RuntimeState;
+class RuntimeProfile;
+class BufferControlBlock;
+class ExprContext;
+class ResultWriter;
+class MemTracker;
+class ResultFileOptions;
+namespace vectorized {
+class VExprContext;
+
+class VResultSink : public DataSink {
+public:
+    // construct a buffer for the result need send to coordinator.
+    // row_desc used for convert RowBatch to TRowBatch
+    // buffer_size is the buffer size allocated to each query
+    VResultSink(const RowDescriptor& row_desc, const std::vector<TExpr>& select_exprs,
+                const TResultSink& sink, int buffer_size);
+
+    virtual ~VResultSink();
+
+    virtual Status prepare(RuntimeState* state) override;
+    virtual Status open(RuntimeState* state) override;
+
+    // not implement
+    virtual Status send(RuntimeState* state, RowBatch* batch) override;
+    virtual Status send(RuntimeState* state, Block* block) override;
+    // Flush all buffered data and close all existing channels to destination
+    // hosts. Further send() calls are illegal after calling close().
+    virtual Status close(RuntimeState* state, Status exec_status) override;
+    virtual RuntimeProfile* profile() override { return _profile; }
+
+    void set_query_statistics(std::shared_ptr<QueryStatistics> statistics) override;
+
+private:
+    Status prepare_exprs(RuntimeState* state);
+    TResultSinkType::type _sink_type;
+    // set file options when sink type is FILE
+    std::unique_ptr<ResultFileOptions> _file_opts;
+
+    ObjectPool* _obj_pool;
+    // Owned by the RuntimeState.
+    const RowDescriptor& _row_desc;
+
+    // Owned by the RuntimeState.
+    const std::vector<TExpr>& _t_output_expr;
+    std::vector<vectorized::VExprContext*> _output_vexpr_ctxs;
+
+    std::shared_ptr<BufferControlBlock> _sender;
+    std::shared_ptr<VResultWriter> _writer;
+    RuntimeProfile* _profile; // Allocated from _pool
+    int _buf_size;            // Allocated from _pool
+};
+} // namespace vectorized
+
+} // namespace doris
diff --git a/be/src/vec/sink/result_writer.h b/be/src/vec/sink/result_writer.h
new file mode 100644
index 0000000000..ef03980007
--- /dev/null
+++ b/be/src/vec/sink/result_writer.h
@@ -0,0 +1,29 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include "runtime/result_writer.h"
+namespace doris {
+namespace vectorized {
+class VResultWriter : public ResultWriter {
+public:
+    VResultWriter() : ResultWriter() {}
+
+    virtual Status append_block(Block& block) = 0;
+};
+} // namespace vectorized
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/sink/vdata_stream_sender.cpp b/be/src/vec/sink/vdata_stream_sender.cpp
new file mode 100644
index 0000000000..d953c9dda2
--- /dev/null
+++ b/be/src/vec/sink/vdata_stream_sender.cpp
@@ -0,0 +1,546 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/sink/vdata_stream_sender.h"
+
+#include <boost/bind.hpp>
+#include <fmt/format.h>
+#include <fmt/ranges.h>
+
+#include "runtime/client_cache.h"
+#include "runtime/dpp_sink_internal.h"
+#include "runtime/exec_env.h"
+#include "runtime/mem_tracker.h"
+#include "runtime/runtime_state.h"
+#include "vec/common/sip_hash.h"
+#include "vec/runtime/vdata_stream_mgr.h"
+#include "vec/runtime/vdata_stream_recvr.h"
+#include "vec/runtime/vpartition_info.h"
+
+namespace doris::vectorized {
+
+Status VDataStreamSender::Channel::init(RuntimeState* state) {
+    _be_number = state->be_number();
+
+    _capacity = std::max(1, _buffer_size / std::max(_row_desc.get_row_size(), 1));
+
+    if (_brpc_dest_addr.hostname.empty()) {
+        LOG(WARNING) << "there is no brpc destination address's hostname"
+                        ", maybe version is not compatible.";
+        return Status::InternalError("no brpc destination");
+    }
+
+    // initialize brpc request
+    _finst_id.set_hi(_fragment_instance_id.hi);
+    _finst_id.set_lo(_fragment_instance_id.lo);
+    _brpc_request.set_allocated_finst_id(&_finst_id);
+    _brpc_request.set_node_id(_dest_node_id);
+    _brpc_request.set_sender_id(_parent->_sender_id);
+    _brpc_request.set_be_number(_be_number);
+
+    _brpc_timeout_ms = std::min(3600, state->query_options().query_timeout) * 1000;
+    _brpc_stub = state->exec_env()->brpc_stub_cache()->get_stub(_brpc_dest_addr);
+
+    if (_brpc_dest_addr.hostname == BackendOptions::get_localhost()) {
+        _brpc_stub =
+                state->exec_env()->brpc_stub_cache()->get_stub("127.0.0.1", _brpc_dest_addr.port);
+    } else {
+        _brpc_stub = state->exec_env()->brpc_stub_cache()->get_stub(_brpc_dest_addr);
+    }
+
+    // In bucket shuffle join will set fragment_instance_id (-1, -1)
+    // to build a camouflaged empty channel. the ip and port is '0.0.0.0:0"
+    // so the empty channel not need call function close_internal()
+    _need_close = (_fragment_instance_id.hi != -1 && _fragment_instance_id.lo != -1);
+    return Status::OK();
+}
+
+Status VDataStreamSender::Channel::send_current_block(bool eos) {
+// TODO: Now, local exchange will cause the performance problem is in a multi-threaded scenario
+//  so this feature is turned off here. We need to re-examine this logic
+//    if (is_local()) {
+//        return send_local_block(eos);
+//    }
+    {
+        SCOPED_TIMER(_parent->_serialize_batch_timer);
+        _pb_block.Clear();
+
+        // mem-reuse of the mutable_block which reduces the overhead of memory allocation
+        // and improve cache affinity
+        auto block = _mutable_block->to_block();
+        auto uncompressed_bytes = block.serialize(&_pb_block);
+        block.clear_column_data();
+        _mutable_block->set_muatable_columns(block.mutate_columns());
+
+        auto bytes = _pb_block.ByteSizeLong();
+        COUNTER_UPDATE(_parent->_bytes_sent_counter, bytes);
+        COUNTER_UPDATE(_parent->_uncompressed_bytes_counter, uncompressed_bytes);
+    }
+    RETURN_IF_ERROR(send_block(&_pb_block, eos));
+    return Status::OK();
+}
+
+Status VDataStreamSender::Channel::send_local_block(bool eos) {
+    std::shared_ptr<VDataStreamRecvr> recvr =
+            _parent->state()->exec_env()->vstream_mgr()->find_recvr(_fragment_instance_id,
+                                                                    _dest_node_id);
+    if (recvr != nullptr) {
+        Block block = _mutable_block->to_block();
+        COUNTER_UPDATE(_parent->_local_bytes_send_counter, block.bytes());
+        recvr->add_block(&block, _parent->_sender_id, true);
+        if (eos) {
+            recvr->remove_sender(_parent->_sender_id, _be_number);
+        }
+    }
+    _mutable_block->clear();
+    return Status::OK();
+}
+
+Status VDataStreamSender::Channel::send_local_block(Block* block) {
+    std::shared_ptr<VDataStreamRecvr> recvr =
+            _parent->state()->exec_env()->vstream_mgr()->find_recvr(_fragment_instance_id,
+                                                                    _dest_node_id);
+    if (recvr != nullptr) {
+        COUNTER_UPDATE(_parent->_local_bytes_send_counter, block->bytes());
+        recvr->add_block(block, _parent->_sender_id, false);
+    }
+    return Status::OK();
+}
+
+Status VDataStreamSender::Channel::send_block(PBlock* block, bool eos) {
+    if (_closure == nullptr) {
+        _closure = new RefCountClosure<PTransmitDataResult>();
+        _closure->ref();
+    } else {
+        RETURN_IF_ERROR(_wait_last_brpc());
+        _closure->cntl.Reset();
+    }
+    VLOG_ROW << "Channel::send_batch() instance_id=" << _fragment_instance_id
+             << " dest_node=" << _dest_node_id;
+    if (_is_transfer_chain && (_send_query_statistics_with_every_batch || eos)) {
+        auto statistic = _brpc_request.mutable_query_statistics();
+        _parent->_query_statistics->to_pb(statistic);
+    }
+
+    _brpc_request.set_eos(eos);
+    if (block != nullptr) {
+        _brpc_request.set_allocated_block(block);
+    }
+    _brpc_request.set_packet_seq(_packet_seq++);
+
+    _closure->ref();
+    _closure->cntl.set_timeout_ms(_brpc_timeout_ms);
+    _brpc_stub->transmit_block(&_closure->cntl, &_brpc_request, &_closure->result, _closure);
+    if (block != nullptr) {
+        _brpc_request.release_block();
+    }
+    return Status::OK();
+}
+
+Status VDataStreamSender::Channel::add_row(Block* block, int row) {
+    if (_fragment_instance_id.lo == -1) {
+        return Status::OK();
+    }
+
+    if (_mutable_block.get() == nullptr) {
+        auto empty_block = block->clone_empty();
+        _mutable_block.reset(
+                new MutableBlock(empty_block.mutate_columns(), empty_block.get_data_types()));
+    }
+    _mutable_block->add_row(block, row);
+
+    if (_mutable_block->rows() == _parent->state()->batch_size()) {
+        RETURN_IF_ERROR(send_current_block());
+    }
+    return Status::OK();
+}
+
+Status VDataStreamSender::Channel::add_rows(Block* block, const std::vector<int>& rows) {
+    if (_fragment_instance_id.lo == -1) {
+        return Status::OK();
+    }
+
+    if (_mutable_block.get() == nullptr) {
+        auto empty_block = block->clone_empty();
+        _mutable_block.reset(
+                new MutableBlock(empty_block.mutate_columns(), empty_block.get_data_types()));
+    }
+
+    int row_wait_add = rows.size();
+    int batch_size = _parent->state()->batch_size();
+    const int* begin = &rows[0];
+
+    while (row_wait_add > 0) {
+        int row_add, max_add = batch_size - _mutable_block->rows();
+        if (row_wait_add >= max_add)  {
+            row_add = max_add;
+        } else {
+            row_add = row_wait_add;
+        }
+
+        _mutable_block->add_rows(block, begin, begin + row_add);
+
+        row_wait_add -= row_add;
+        begin += row_add;
+
+        if (row_add == max_add) {
+            RETURN_IF_ERROR(send_current_block());
+        }
+    }
+
+    return Status::OK();
+}
+
+Status VDataStreamSender::Channel::close_wait(RuntimeState* state) {
+    if (_need_close) {
+        Status st = _wait_last_brpc();
+        if (!st.ok()) {
+            state->log_error(st.get_error_msg());
+        }
+        _need_close = false;
+        return st;
+    }
+    _mutable_block.reset();
+    return Status::OK();
+}
+
+Status VDataStreamSender::Channel::close_internal() {
+    if (!_need_close) {
+        return Status::OK();
+    }
+    VLOG_RPC << "Channel::close() instance_id=" << _fragment_instance_id
+             << " dest_node=" << _dest_node_id
+             << " #rows= " << ((_mutable_block == nullptr) ? 0 : _mutable_block->rows());
+    if (_mutable_block != nullptr && _mutable_block->rows() > 0) {
+        RETURN_IF_ERROR(send_current_block(true));
+    } else {
+        RETURN_IF_ERROR(send_block(nullptr, true));
+    }
+    // Don't wait for the last packet to finish, left it to close_wait.
+    return Status::OK();
+}
+
+Status VDataStreamSender::Channel::close(RuntimeState* state) {
+    Status st = close_internal();
+    if (!st.ok()) {
+        state->log_error(st.get_error_msg());
+    }
+    return st;
+}
+
+VDataStreamSender::VDataStreamSender(ObjectPool* pool, int sender_id, const RowDescriptor& row_desc,
+                                     const TDataStreamSink& sink,
+                                     const std::vector<TPlanFragmentDestination>& destinations,
+                                     int per_channel_buffer_size,
+                                     bool send_query_statistics_with_every_batch)
+        : _sender_id(sender_id),
+          _pool(pool),
+          _row_desc(row_desc),
+          _current_channel_idx(0),
+          _part_type(sink.output_partition.type),
+          _ignore_not_found(sink.__isset.ignore_not_found ? sink.ignore_not_found : true),
+          _current_pb_block(&_pb_block1),
+          _profile(nullptr),
+          _serialize_batch_timer(nullptr),
+          _bytes_sent_counter(nullptr),
+          _local_bytes_send_counter(nullptr),
+          _dest_node_id(sink.dest_node_id) {
+    DCHECK_GT(destinations.size(), 0);
+    DCHECK(sink.output_partition.type == TPartitionType::UNPARTITIONED ||
+           sink.output_partition.type == TPartitionType::HASH_PARTITIONED ||
+           sink.output_partition.type == TPartitionType::RANDOM ||
+           sink.output_partition.type == TPartitionType::RANGE_PARTITIONED ||
+           sink.output_partition.type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED);
+    //
+    std::map<int64_t, int64_t> fragment_id_to_channel_index;
+
+    for (int i = 0; i < destinations.size(); ++i) {
+        // Select first dest as transfer chain.
+        bool is_transfer_chain = (i == 0);
+        const auto& fragment_instance_id = destinations[i].fragment_instance_id;
+        if (fragment_id_to_channel_index.find(fragment_instance_id.lo) ==
+            fragment_id_to_channel_index.end()) {
+            _channel_shared_ptrs.emplace_back(
+                    new Channel(this, row_desc, destinations[i].brpc_server, fragment_instance_id,
+                                sink.dest_node_id, per_channel_buffer_size, is_transfer_chain,
+                                send_query_statistics_with_every_batch));
+            fragment_id_to_channel_index.emplace(fragment_instance_id.lo,
+                                                 _channel_shared_ptrs.size() - 1);
+            _channels.push_back(_channel_shared_ptrs.back().get());
+        } else {
+            _channel_shared_ptrs.emplace_back(
+                    _channel_shared_ptrs[fragment_id_to_channel_index[fragment_instance_id.lo]]);
+        }
+    }
+    _name = "VDataStreamSender";
+}
+
+VDataStreamSender::~VDataStreamSender() {
+    _channel_shared_ptrs.clear();
+}
+
+Status VDataStreamSender::init(const TDataSink& tsink) {
+    RETURN_IF_ERROR(DataSink::init(tsink));
+    const TDataStreamSink& t_stream_sink = tsink.stream_sink;
+    if (_part_type == TPartitionType::HASH_PARTITIONED ||
+        _part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) {
+        RETURN_IF_ERROR(VExpr::create_expr_trees(
+                _pool, t_stream_sink.output_partition.partition_exprs, &_partition_expr_ctxs));
+    } else if (_part_type == TPartitionType::RANGE_PARTITIONED) {
+        // Range partition
+        // Partition Exprs
+        RETURN_IF_ERROR(VExpr::create_expr_trees(
+                _pool, t_stream_sink.output_partition.partition_exprs, &_partition_expr_ctxs));
+        // Partition infos
+        int num_parts = t_stream_sink.output_partition.partition_infos.size();
+        if (num_parts == 0) {
+            return Status::InternalError("Empty partition info.");
+        }
+        for (int i = 0; i < num_parts; ++i) {
+            VPartitionInfo* info = _pool->add(new VPartitionInfo());
+            RETURN_IF_ERROR(VPartitionInfo::from_thrift(
+                    _pool, t_stream_sink.output_partition.partition_infos[i], info));
+            _partition_infos.push_back(info);
+        }
+        // partitions should be in ascending order
+        std::sort(_partition_infos.begin(), _partition_infos.end(),
+                  [](const VPartitionInfo* v1, const VPartitionInfo* v2) {
+                      return v1->range() < v2->range();
+                  });
+    } else {
+        // UNPARTITIONED
+    }
+    return Status::OK();
+}
+
+Status VDataStreamSender::prepare(RuntimeState* state) {
+    RETURN_IF_ERROR(DataSink::prepare(state));
+    _state = state;
+
+    std::vector<std::string> instances;
+    for (const auto& channel : _channels) {
+        instances.emplace_back(channel->get_fragment_instance_id_str());
+    }
+    std::string title = fmt::format("VDataStreamSender (dst_id={}, dst_fragments=[{}])",
+                                    _dest_node_id, instances);
+    _profile = _pool->add(new RuntimeProfile(std::move(title)));
+    SCOPED_TIMER(_profile->total_time_counter());
+    _mem_tracker = MemTracker::CreateTracker(
+            _profile, -1, "VDataStreamSender:" + print_id(state->fragment_instance_id()),
+            state->instance_mem_tracker());
+
+    if (_part_type == TPartitionType::UNPARTITIONED || _part_type == TPartitionType::RANDOM) {
+        // Randomize the order we open/transmit to channels to avoid thundering herd problems.
+        srand(reinterpret_cast<uint64_t>(this));
+        random_shuffle(_channels.begin(), _channels.end());
+    } else if (_part_type == TPartitionType::HASH_PARTITIONED ||
+               _part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) {
+        RETURN_IF_ERROR(VExpr::prepare(_partition_expr_ctxs, state, _row_desc, _expr_mem_tracker));
+    } else {
+        RETURN_IF_ERROR(VExpr::prepare(_partition_expr_ctxs, state, _row_desc, _expr_mem_tracker));
+        for (auto iter : _partition_infos) {
+            RETURN_IF_ERROR(iter->prepare(state, _row_desc, _expr_mem_tracker));
+        }
+    }
+
+    _bytes_sent_counter = ADD_COUNTER(profile(), "BytesSent", TUnit::BYTES);
+    _uncompressed_bytes_counter = ADD_COUNTER(profile(), "UncompressedRowBatchSize", TUnit::BYTES);
+    _ignore_rows = ADD_COUNTER(profile(), "IgnoreRows", TUnit::UNIT);
+    _serialize_batch_timer = ADD_TIMER(profile(), "SerializeBatchTime");
+    _overall_throughput = profile()->add_derived_counter(
+            "OverallThroughput", TUnit::BYTES_PER_SECOND,
+            boost::bind<int64_t>(&RuntimeProfile::units_per_second, _bytes_sent_counter,
+                                 profile()->total_time_counter()),
+            "");
+    _local_bytes_send_counter = ADD_COUNTER(profile(), "LocalBytesSent", TUnit::BYTES);
+    for (int i = 0; i < _channels.size(); ++i) {
+        RETURN_IF_ERROR(_channels[i]->init(state));
+    }
+    return Status::OK();
+}
+
+Status VDataStreamSender::open(RuntimeState* state) {
+    DCHECK(state != nullptr);
+    RETURN_IF_ERROR(VExpr::open(_partition_expr_ctxs, state));
+    for (auto iter : _partition_infos) {
+        RETURN_IF_ERROR(iter->open(state));
+    }
+    return Status::OK();
+}
+
+Status VDataStreamSender::send(RuntimeState* state, RowBatch* batch) {
+    return Status::NotSupported("Not Implemented VOlapScanNode Node::get_next scalar");
+}
+
+Status VDataStreamSender::send(RuntimeState* state, Block* block) {
+    SCOPED_TIMER(_profile->total_time_counter());
+    if (_part_type == TPartitionType::UNPARTITIONED || _channels.size() == 1) {
+        // 1. serialize depends on it is not local exchange
+        // 2. send block
+        // 3. switch proto
+        //return handle_unpartitioned(block);
+        int local_size = 0;
+        for (auto channel : _channels) {
+            if (channel->is_local()) local_size++;
+        }
+        if (local_size == _channels.size()) {
+            for (auto channel : _channels) {
+                RETURN_IF_ERROR(channel->send_local_block(block));
+            }
+        } else {
+            RETURN_IF_ERROR(serialize_block(block, _current_pb_block, _channels.size()));
+            for (auto channel : _channels) {
+                if (channel->is_local()) {
+                    RETURN_IF_ERROR(channel->send_local_block(block));
+                } else {
+                    RETURN_IF_ERROR(channel->send_block(_current_pb_block));
+                }
+            }
+            _current_pb_block = (_current_pb_block == &_pb_block1 ? &_pb_block2 : &_pb_block1);
+            //VLOG_ROW << "send rows:" << block->rows();
+        }
+    } else if (_part_type == TPartitionType::RANDOM) {
+        // 1. select channel
+        Channel* current_channel = _channels[_current_channel_idx];
+        // 2. serialize
+        if (current_channel->is_local()) {
+            RETURN_IF_ERROR(current_channel->send_local_block(block));
+        } else {
+            RETURN_IF_ERROR(serialize_block(block, current_channel->pb_block()));
+            RETURN_IF_ERROR(current_channel->send_block(current_channel->pb_block()));
+        }
+        // 3. send block
+        // 4. switch proto
+        _current_channel_idx = (_current_channel_idx + 1) % _channels.size();
+    } else if (_part_type == TPartitionType::HASH_PARTITIONED) {
+        int num_channels = _channels.size();
+        // will only copy schema
+        // we don't want send temp columns
+
+        int result_size = _partition_expr_ctxs.size();
+        int result[result_size];
+        RETURN_IF_ERROR(get_partition_column_result(block, result));
+
+        // vectorized caculate hash
+        int rows = block->rows();
+        // for each row, we have a siphash val
+        std::vector<SipHash> siphashs(rows);
+        // result[j] means column index, i means rows index
+        for (int j = 0; j < result_size; ++j) {
+            auto column = block->get_by_position(result[j]).column;
+            for (int i = 0; i < rows; ++i) {
+                column->update_hash_with_value(i, siphashs[i]);
+            }
+        }
+
+        // channel2rows' subscript means channel id 
+        std::vector<vectorized::UInt64> hash_vals(rows);
+        for (int i = 0; i < rows; i++) {
+            hash_vals[i] = siphashs[i].get64();
+        }
+
+        RETURN_IF_ERROR(channel_add_rows(_channels, num_channels, hash_vals, rows, block));
+    } else if (_part_type == TPartitionType::BUCKET_SHFFULE_HASH_PARTITIONED) {
+        // 1. caculate hash
+        // 2. dispatch rows to channel
+        int num_channels = _channel_shared_ptrs.size();
+
+        int result_size = _partition_expr_ctxs.size();
+        int result[result_size];
+        RETURN_IF_ERROR(get_partition_column_result(block, result));
+
+        // vectorized caculate hash val
+        int rows = block->rows();
+        // for each row, we have a hash_val
+        std::vector<size_t> hash_vals(rows);
+
+        // result[j] means column index, i means rows index
+        for (int j = 0; j < result_size; ++j) {
+            auto& column = block->get_by_position(result[j]).column;
+            for (int i = 0; i < rows; ++i) {
+                auto val = column->get_data_at(i);
+                if (val.data == nullptr) {
+                    // nullptr is treat as 0 when hash
+                    static const int INT_VALUE = 0;
+                    static const TypeDescriptor INT_TYPE(TYPE_INT);
+                    hash_vals[i] = RawValue::zlib_crc32(&INT_VALUE, INT_TYPE, hash_vals[i]);
+                } else {
+                    hash_vals[i] = RawValue::zlib_crc32(val.data, val.size,
+                                                _partition_expr_ctxs[j]->root()->type(), hash_vals[i]);
+                }
+            }
+        }
+
+        RETURN_IF_ERROR(channel_add_rows(_channel_shared_ptrs, num_channels, hash_vals, rows, block));
+    } else {
+        // Range partition
+        // 1. caculate range
+        // 2. dispatch rows to channel
+    }
+    return Status::OK();
+}
+
+Status VDataStreamSender::close(RuntimeState* state, Status exec_status) {
+    if (_closed) return Status::OK();
+    _closed = true;
+
+    Status final_st = Status::OK();
+    for (int i = 0; i < _channels.size(); ++i) {
+        Status st = _channels[i]->close(state);
+        if (!st.ok() && final_st.ok()) {
+            final_st = st;
+        }
+    }
+    // wait all channels to finish
+    for (int i = 0; i < _channels.size(); ++i) {
+        Status st = _channels[i]->close_wait(state);
+        if (!st.ok() && final_st.ok()) {
+            final_st = st;
+        }
+    }
+    for (auto iter : _partition_infos) {
+        iter->close(state);
+    }
+    VExpr::close(_partition_expr_ctxs, state);
+    return final_st;
+}
+
+Status VDataStreamSender::handle_unpartitioned(Block* block) {
+    RETURN_IF_ERROR(serialize_block(block, _current_pb_block, _channels.size()));
+    for (auto channel : _channels) {
+        RETURN_IF_ERROR(channel->send_block(_current_pb_block));
+    }
+    _current_pb_block = (_current_pb_block == &_pb_block1 ? &_pb_block2 : &_pb_block1);
+    VLOG_ROW << "send rows:" << block->rows();
+    return Status::OK();
+}
+
+Status VDataStreamSender::serialize_block(Block* src, PBlock* dest, int num_receivers) {
+    {
+        SCOPED_TIMER(_serialize_batch_timer);
+        dest->Clear();
+        auto uncompressed_bytes = src->serialize(dest);
+        auto bytes = dest->ByteSizeLong();
+
+        COUNTER_UPDATE(_bytes_sent_counter, bytes * num_receivers);
+        COUNTER_UPDATE(_uncompressed_bytes_counter, uncompressed_bytes * num_receivers);
+    }
+
+    return Status::OK();
+}
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/sink/vdata_stream_sender.h b/be/src/vec/sink/vdata_stream_sender.h
new file mode 100644
index 0000000000..9d20987494
--- /dev/null
+++ b/be/src/vec/sink/vdata_stream_sender.h
@@ -0,0 +1,297 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "common/global_types.h"
+#include "exec/data_sink.h"
+#include "gen_cpp/PaloInternalService_types.h"
+#include "gen_cpp/data.pb.h"
+#include "gen_cpp/internal_service.pb.h"
+#include "runtime/descriptors.h"
+#include "service/backend_options.h"
+#include "service/brpc.h"
+#include "util/brpc_stub_cache.h"
+#include "util/network_util.h"
+#include "util/ref_count_closure.h"
+#include "util/uid_util.h"
+#include "vec/exprs/vexpr.h"
+
+namespace doris {
+class ObjectPool;
+class RowBatch;
+class RuntimeState;
+class RuntimeProfile;
+class BufferControlBlock;
+class ExprContext;
+class MemTracker;
+class PartRangeKey;
+
+namespace vectorized {
+class VExprContext;
+class VPartitionInfo;
+
+class VDataStreamSender final : public DataSink {
+public:
+    VDataStreamSender(ObjectPool* pool, int sender_id, const RowDescriptor& row_desc,
+                      const TDataStreamSink& sink,
+                      const std::vector<TPlanFragmentDestination>& destinations,
+                      int per_channel_buffer_size, bool send_query_statistics_with_every_batch);
+
+    ~VDataStreamSender();
+
+    virtual Status init(const TDataSink& thrift_sink) override;
+
+    virtual Status prepare(RuntimeState* state) override;
+    virtual Status open(RuntimeState* state) override;
+
+    virtual Status send(RuntimeState* state, RowBatch* batch) override;
+    virtual Status send(RuntimeState* state, Block* block) override;
+
+    virtual Status close(RuntimeState* state, Status exec_status) override;
+    virtual RuntimeProfile* profile() override { return _profile; }
+
+    RuntimeState* state() { return _state; }
+
+    Status serialize_block(Block* src, PBlock* dest, int num_receivers = 1);
+
+private:
+    class Channel;
+
+    Status get_partition_column_result(Block* block, int* result) const {
+        int counter = 0;
+        for (auto ctx : _partition_expr_ctxs) {
+            RETURN_IF_ERROR(ctx->execute(block, &result[counter++]));
+        }
+        return Status::OK();
+    }
+
+    template <typename Channels, typename HashVals>
+    Status channel_add_rows(Channels& channels, int num_channels, const HashVals& hash_vals, int rows, Block* block);
+
+    struct hash_128 {
+        uint64_t high;
+        uint64_t low;
+    };
+
+    using hash_128_t = hash_128;
+
+    Status handle_unpartitioned(Block* block);
+
+    // Sender instance id, unique within a fragment.
+    int _sender_id;
+
+    RuntimeState* _state;
+    ObjectPool* _pool;
+    const RowDescriptor& _row_desc;
+
+    int _current_channel_idx; // index of current channel to send to if _random == true
+
+    TPartitionType::type _part_type;
+    bool _ignore_not_found;
+
+    // serialized batches for broadcasting; we need two so we can write
+    // one while the other one is still being sent
+    PBlock _pb_block1;
+    PBlock _pb_block2;
+    PBlock* _current_pb_block = nullptr;
+
+    // compute per-row partition values
+    std::vector<VExprContext*> _partition_expr_ctxs;
+
+    std::vector<Channel*> _channels;
+    std::vector<std::shared_ptr<Channel>> _channel_shared_ptrs;
+
+    // map from range value to partition_id
+    // sorted in ascending orderi by range for binary search
+    std::vector<VPartitionInfo*> _partition_infos;
+
+    RuntimeProfile* _profile; // Allocated from _pool
+    RuntimeProfile::Counter* _serialize_batch_timer;
+    RuntimeProfile::Counter* _bytes_sent_counter;
+    RuntimeProfile::Counter* _uncompressed_bytes_counter;
+    RuntimeProfile::Counter* _ignore_rows;
+
+    std::shared_ptr<MemTracker> _mem_tracker;
+
+    // Throughput per total time spent in sender
+    RuntimeProfile::Counter* _overall_throughput;
+    // Used to counter send bytes under local data exchange
+    RuntimeProfile::Counter* _local_bytes_send_counter;
+    // Identifier of the destination plan node.
+    PlanNodeId _dest_node_id;
+};
+
+// TODO: support local exechange
+
+class VDataStreamSender::Channel {
+public:
+    // Create channel to send data to particular ipaddress/port/query/node
+    // combination. buffer_size is specified in bytes and a soft limit on
+    // how much tuple data is getting accumulated before being sent; it only applies
+    // when data is added via add_row() and not sent directly via send_batch().
+    Channel(VDataStreamSender* parent, const RowDescriptor& row_desc,
+            const TNetworkAddress& brpc_dest, const TUniqueId& fragment_instance_id,
+            PlanNodeId dest_node_id, int buffer_size, bool is_transfer_chain,
+            bool send_query_statistics_with_every_batch)
+            : _parent(parent),
+              _buffer_size(buffer_size),
+              _row_desc(row_desc),
+              _fragment_instance_id(fragment_instance_id),
+              _dest_node_id(dest_node_id),
+              _num_data_bytes_sent(0),
+              _packet_seq(0),
+              _need_close(false),
+              _brpc_dest_addr(brpc_dest),
+              _is_transfer_chain(is_transfer_chain),
+              _send_query_statistics_with_every_batch(send_query_statistics_with_every_batch) {
+                    std::string localhost = BackendOptions::get_localhost();
+                    _is_local = (_brpc_dest_addr.hostname == localhost) && (_brpc_dest_addr.port == config::brpc_port);
+                    if (_is_local) {
+                        LOG(INFO) << "will use local Exchange, dest_node_id is : "<<_dest_node_id;
+                    }
+                }
+    
+    virtual ~Channel() {
+        if (_closure != nullptr && _closure->unref()) {
+            delete _closure;
+        }
+        // release this before request desctruct
+        _brpc_request.release_finst_id();
+    }
+
+    // Initialize channel.
+    // Returns OK if successful, error indication otherwise.
+    Status init(RuntimeState* state);
+
+    // Copies a single row into this channel's output buffer and flushes buffer
+    // if it reaches capacity.
+    // Returns error status if any of the preceding rpcs failed, OK otherwise.
+    //Status add_row(TupleRow* row);
+
+    // Asynchronously sends a row batch.
+    // Returns the status of the most recently finished transmit_data
+    // rpc (or OK if there wasn't one that hasn't been reported yet).
+    // if batch is nullptr, send the eof packet
+    Status send_block(PBlock* block, bool eos = false);
+
+    Status add_row(Block* block, int row);
+    Status add_rows(Block* block, const std::vector<int>& row);
+
+    Status send_current_block(bool eos = false);
+
+    Status send_local_block(bool eos = false);
+
+    Status send_local_block(Block* block);
+    // Flush buffered rows and close channel. This function don't wait the response
+    // of close operation, client should call close_wait() to finish channel's close.
+    // We split one close operation into two phases in order to make multiple channels
+    // can run parallel.
+    Status close(RuntimeState* state);
+
+    // Get close wait's response, to finish channel close operation.
+    Status close_wait(RuntimeState* state);
+
+    int64_t num_data_bytes_sent() const { return _num_data_bytes_sent; }
+
+    PBlock* pb_block() { return &_pb_block; }
+
+    std::string get_fragment_instance_id_str() {
+        UniqueId uid(_fragment_instance_id);
+        return uid.to_string();
+    }
+
+    TUniqueId get_fragment_instance_id() const { return _fragment_instance_id; }
+
+    bool is_local() const { return _is_local; }
+
+private:
+    inline Status _wait_last_brpc() {
+        if (_closure == nullptr) return Status::OK();
+        auto cntl = &_closure->cntl;
+        auto call_id = _closure->cntl.call_id();
+        brpc::Join(call_id);
+        if (cntl->Failed()) {
+            std::string err = fmt::format(
+                    "failed to send brpc batch, error={}, error_text={}, client: {}",
+                    berror(cntl->ErrorCode()), cntl->ErrorText(), BackendOptions::get_localhost());
+            LOG(WARNING) << err;
+            return Status::ThriftRpcError(err);
+        }
+        return Status::OK();
+    }
+
+
+private:
+    // Serialize _batch into _thrift_batch and send via send_batch().
+    // Returns send_batch() status.
+    Status send_current_batch(bool eos = false);
+    Status close_internal();
+
+    VDataStreamSender* _parent;
+    int _buffer_size;
+
+    const RowDescriptor& _row_desc;
+    TUniqueId _fragment_instance_id;
+    PlanNodeId _dest_node_id;
+
+    // the number of TRowBatch.data bytes sent successfully
+    int64_t _num_data_bytes_sent;
+    int64_t _packet_seq;
+
+    // we're accumulating rows into this batch
+    std::unique_ptr<MutableBlock> _mutable_block;
+
+    bool _need_close;
+    int _be_number;
+
+    TNetworkAddress _brpc_dest_addr;
+
+    PUniqueId _finst_id;
+    PBlock _pb_block;
+    PTransmitDataParams _brpc_request;
+    std::shared_ptr<PBackendService_Stub> _brpc_stub = nullptr;
+    RefCountClosure<PTransmitDataResult>* _closure = nullptr;
+    int32_t _brpc_timeout_ms = 500;
+    // whether the dest can be treated as query statistics transfer chain.
+    bool _is_transfer_chain;
+    bool _send_query_statistics_with_every_batch;
+
+    size_t _capacity;
+    bool _is_local;
+};
+
+template <typename Channels, typename HashVals>
+Status VDataStreamSender::channel_add_rows(Channels& channels, int num_channels, const HashVals& hash_vals, int rows, Block* block) {
+    std::vector<int> channel2rows[num_channels];
+
+    for (int i = 0; i < rows; i++) {
+        auto cid = hash_vals[i] % num_channels;
+        channel2rows[cid].emplace_back(i);
+    }
+
+    for (int i = 0; i < num_channels; ++i) {
+        if (!channel2rows[i].empty()) {
+            RETURN_IF_ERROR(channels[i]->add_rows(block, channel2rows[i]));
+        }
+    }
+
+    return Status::OK();
+}
+
+} // namespace vectorized
+} // namespace doris
diff --git a/be/src/vec/sink/vtabet_sink.cpp b/be/src/vec/sink/vtabet_sink.cpp
new file mode 100644
index 0000000000..3b09cae1d0
--- /dev/null
+++ b/be/src/vec/sink/vtabet_sink.cpp
@@ -0,0 +1,276 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/doris_metrics.h"
+
+#include "vec/exprs/vexpr.h"
+#include "vec/exprs/vexpr_context.h"
+#include "vec/sink/vtablet_sink.h"
+#include "vec/core/block.h"
+
+namespace doris {
+namespace stream_load {
+
+VOlapTableSink::VOlapTableSink(ObjectPool* pool, const RowDescriptor& row_desc,
+                             const std::vector<TExpr>& texprs, Status* status)
+        : OlapTableSink(pool, row_desc, texprs, status) {
+    // From the thrift expressions create the real exprs.
+    vectorized::VExpr::create_expr_trees(pool, texprs, &_output_vexpr_ctxs);
+    // Do not use the origin data scala expr, clear scala expr contexts
+    _output_expr_ctxs.clear();
+    _name = "VOlapTableSink";
+}
+
+Status VOlapTableSink::init(const TDataSink& sink) {
+    RETURN_IF_ERROR(OlapTableSink::init(sink));
+    _vpartition = _pool->add(new VOlapTablePartitionParam(_schema, sink.olap_table_sink.partition));
+    return _vpartition->init();
+}
+
+Status VOlapTableSink::prepare(RuntimeState* state) {
+    // Prepare the exprs to run.
+    RETURN_IF_ERROR(vectorized::VExpr::prepare(_output_vexpr_ctxs, state, _input_row_desc, _expr_mem_tracker));
+    return OlapTableSink::prepare(state);
+}
+
+Status VOlapTableSink::open(RuntimeState* state) {
+    // Prepare the exprs to run.
+    RETURN_IF_ERROR(vectorized::VExpr::open(_output_vexpr_ctxs, state));
+    return OlapTableSink::open(state);
+}
+
+Status VOlapTableSink::send(RuntimeState* state, vectorized::Block* input_block) {
+    Status status = Status::OK();
+    if (UNLIKELY(input_block->rows() == 0)) { return status; }
+
+    SCOPED_TIMER(_profile->total_time_counter());
+    _number_input_rows += input_block->rows();
+    // update incrementally so that FE can get the progress.
+    // the real 'num_rows_load_total' will be set when sink being closed.
+    state->update_num_rows_load_total(input_block->rows());
+    state->update_num_bytes_load_total(input_block->bytes());
+    DorisMetrics::instance()->load_rows->increment(input_block->rows());
+    DorisMetrics::instance()->load_bytes->increment(input_block->bytes());
+
+    vectorized::Block block(input_block->get_columns_with_type_and_name());
+    if (!_output_vexpr_ctxs.empty()) {
+        // Do vectorized expr here to speed up load
+        block = vectorized::VExprContext::get_output_block_after_execute_exprs(
+            _output_vexpr_ctxs, *input_block, status);
+        if (UNLIKELY(block.rows() == 0)) { return status; }
+    }
+
+    auto num_rows = block.rows();
+    int num_invalid_rows = 0;
+    {
+        SCOPED_RAW_TIMER(&_validate_data_ns);
+        _filter_vec.resize(num_rows);
+        num_invalid_rows = _validate_data(state, &block, reinterpret_cast<bool*>(_filter_vec.data()));
+        _number_filtered_rows += num_invalid_rows;
+    }
+
+    BlockRow block_row;
+    SCOPED_RAW_TIMER(&_send_data_ns);
+    for (int i = 0; i < num_rows; ++i) {
+        if (num_invalid_rows > 0 && _filter_vec[i] != 0) {
+            continue;
+        }
+        const VOlapTablePartition* partition = nullptr;
+        uint32_t dist_hash = 0;
+        block_row = {&block, i};
+        if (!_vpartition->find_tablet(&block_row, &partition, &dist_hash)) {
+            std::stringstream ss;
+            ss << "no partition for this tuple. tuple="
+               << block.dump_data(i, 1);
+#if BE_TEST
+            LOG(INFO) << ss.str();
+#else
+            state->append_error_msg_to_file("", ss.str());
+#endif
+            _number_filtered_rows++;
+            continue;
+        }
+        _partition_ids.emplace(partition->id);
+        uint32_t tablet_index = dist_hash % partition->num_buckets;
+        for (int j = 0; j < partition->indexes.size(); ++j) {
+            int64_t tablet_id = partition->indexes[j].tablets[tablet_index];
+            RETURN_IF_ERROR(_channels[j]->add_row(block_row, tablet_id));
+            _number_output_rows++;
+        }
+    }
+    return Status::OK();
+}
+
+Status VOlapTableSink::close(RuntimeState* state, Status exec_status) {
+    if (_closed) return _close_status;
+    vectorized::VExpr::close(_output_vexpr_ctxs, state);
+    return OlapTableSink::close(state, exec_status);
+}
+
+int VOlapTableSink::_validate_data(doris::RuntimeState* state, doris::vectorized::Block* block,
+                                   bool* filter_map) {
+    const auto num_rows = block->rows();
+    // set all row is valid
+    memset(filter_map, 0, num_rows * sizeof(bool));
+
+    for (int i = 0; i < _output_tuple_desc->slots().size(); ++i) {
+        SlotDescriptor* desc = _output_tuple_desc->slots()[i];
+        block->get_by_position(i).column = block->get_by_position(i).column->convert_to_full_column_if_const();
+        const auto& column = block->get_by_position(i).column;
+
+        if (desc->is_nullable() && desc->type() == TYPE_OBJECT) {
+            const auto& null_map = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column)
+                    ->get_null_map_data();
+            for (int j = 0; j < num_rows; ++j) {
+                if (!filter_map[j]) {
+                    if (null_map[j]) {
+                        state->append_error_msg_to_file("", std::string("null is not allowed for "
+                                                                        "bitmap column, column_name: ") + desc->col_name() + ";");
+                        filter_map[j] = true;
+                    }
+                }
+            }
+        } else {
+            auto column_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>(*column);
+            auto& real_column_ptr = column_ptr == nullptr ? column : (column_ptr->get_nested_column_ptr());
+
+            switch (desc->type().type) {
+                case TYPE_CHAR:
+                case TYPE_VARCHAR: {
+                    const auto column_string = assert_cast<const vectorized::ColumnString *>(real_column_ptr.get());
+
+                    for (int j = 0; j < num_rows; ++j) {
+                        if (!filter_map[j]) {
+                            auto str_val = column_string->get_data_at(j);
+                            if (str_val.size > desc->type().len) {
+                                state->append_error_msg_to_file("", fmt::format(
+                                        "the length of input is too long than schema. "
+                                        "column_name: {}; input_str: [{}] schema length: {}; actual length: {}; ",
+                                        desc->col_name(), str_val.to_string(),
+                                        desc->type().len, str_val.size));
+                                filter_map[j] = true;
+                            }
+                        }
+                    }
+                    break;
+                }
+                    // TODO: Support TYPE_STRING in the future
+//            case TYPE_STRING: {
+//                StringValue* str_val = (StringValue*)slot;
+//                if (str_val->len > desc->type().MAX_STRING_LENGTH) {
+//                    ss << "the length of input is too long than schema. "
+//                       << "column_name: " << desc->col_name() << "; "
+//                       << "first 128 bytes of input_str: [" << std::string(str_val->ptr, 128)
+//                       << "] "
+//                       << "schema length: " << desc->type().MAX_STRING_LENGTH << "; "
+//                       << "actual length: " << str_val->len << "; ";
+//                    row_valid = false;
+//                    continue;
+//                }
+//                break;
+//            }
+                case TYPE_DECIMALV2: {
+                    auto column_decimal = const_cast<vectorized::ColumnDecimal
+                            <vectorized::Decimal128> *>(assert_cast<const vectorized::ColumnDecimal
+                            <vectorized::Decimal128> *>(real_column_ptr.get()));
+
+                    for (int j = 0; j < num_rows; ++j) {
+                        if (!filter_map[j]) {
+                            auto dec_val = binary_cast<vectorized::Int128, DecimalV2Value>(
+                                    column_decimal->get_data()[j]);
+                            if (dec_val.greater_than_scale(desc->type().scale)) {
+                                auto code = dec_val.round(&dec_val, desc->type().scale, HALF_UP);
+                                column_decimal->get_data()[j] = binary_cast<DecimalV2Value, vectorized::Int128>(
+                                        dec_val);
+
+                                if (code != E_DEC_OK) {
+                                    state->append_error_msg_to_file("", "round one decimal failed.value=" +
+                                                                        dec_val.to_string());
+                                    filter_map[j] = true;
+                                }
+                            }
+
+                            if (dec_val > _max_decimalv2_val[i] || dec_val < _min_decimalv2_val[i]) {
+                                state->append_error_msg_to_file("", fmt::format(
+                                        "decimal value is not valid for definition, column={}, "
+                                        "value={}, precision={}, scale= {};",
+                                        desc->col_name(), dec_val.to_string(), desc->type().precision,
+                                        desc->type().scale));
+                                filter_map[j] = true;
+                            }
+                        }
+                    }
+                    break;
+                }
+                case TYPE_HLL: {
+                    auto column_string = assert_cast<const vectorized::ColumnString *>(real_column_ptr.get());
+
+                    for (int j = 0; j < num_rows; ++j) {
+                        if (!filter_map[j]) {
+                            auto str_val = column_string->get_data_at(j);
+                            if (!HyperLogLog::is_valid(Slice(str_val.data, str_val.size))) {
+                                state->append_error_msg_to_file("", std::string(
+                                        "Content of HLL type column is invalid column_name: " + desc->col_name() +
+                                        ";"));
+                                filter_map[j] = true;
+                            }
+                        }
+                    }
+                    break;
+                }
+                default:
+                    break;
+            }
+
+            // Dispose the nullable column not match problem here, convert to nullable column
+            if (desc->is_nullable() && !column_ptr) {
+                block->get_by_position(i).column = vectorized::make_nullable(column);
+                block->get_by_position(i).type = vectorized::make_nullable(block->get_by_position(i).type);
+            }
+
+            // Dispose the nullable column not match problem here, convert to not nullable column
+            if (!desc->is_nullable() && column_ptr) {
+                const auto& null_map = column_ptr->get_null_map_data();
+                for (int j = 0; j < null_map.size(); ++j) {
+                    if (null_map[j] && !filter_map[j]) {
+                        filter_map[j] = true;
+                        std::stringstream ss;
+                        ss << "null value for not null column, column=" << desc->col_name();
+#if BE_TEST
+                        LOG(INFO) << ss.str();
+#else
+                        state->append_error_msg_to_file("", ss.str());
+#endif
+                    }
+                }
+                block->get_by_position(i).column = column_ptr->get_nested_column_ptr();
+                block->get_by_position(i).type = (reinterpret_cast<const vectorized::DataTypeNullable*>(
+                        block->get_by_position(i).type.get()))->get_nested_type();
+            }
+        }
+    }
+
+    auto filter_row = 0;
+    for (int i = 0; i < num_rows; ++i) {
+        filter_row += filter_map[i];
+    }
+    return filter_row;
+}
+
+} // namespace stream_load
+} // namespace doris
+
diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h
new file mode 100644
index 0000000000..ec40d02327
--- /dev/null
+++ b/be/src/vec/sink/vtablet_sink.h
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "exec/tablet_sink.h"
+#include "runtime/row_batch.h"
+
+namespace doris {
+
+namespace vectorized {
+class VExprContext;
+}
+
+namespace stream_load {
+
+class OlapTableSink;
+
+// Write block data to Olap Table.
+// When OlapTableSink::open() called, there will be a consumer thread running in the background.
+// When you call VOlapTableSink::send(), you will be the producer who products pending batches.
+// Join the consumer thread in close().
+class VOlapTableSink : public OlapTableSink {
+public:
+    // Construct from thrift struct which is generated by FE.
+    VOlapTableSink(ObjectPool* pool, const RowDescriptor& row_desc, const std::vector<TExpr>& texprs,
+                  Status* status);
+
+    Status init(const TDataSink& sink) override;
+    // TODO: unify the code of prepare/open/close with result sink
+    Status prepare(RuntimeState* state) override;
+
+    Status open(RuntimeState* state) override;
+
+    Status close(RuntimeState* state, Status close_status) override;
+
+    Status send(RuntimeState* state, vectorized::Block* block) override;
+
+private:
+    // make input data valid for OLAP table
+    // return number of invalid/filtered rows.
+    // invalid row number is set in Bitmap
+    int _validate_data(RuntimeState* state, vectorized::Block* block, bool* filter_map);
+
+    VOlapTablePartitionParam* _vpartition = nullptr;
+    std::vector<vectorized::VExprContext*> _output_vexpr_ctxs;
+    std::vector<uint8_t> _filter_vec;
+};
+
+} // namespace stream_load
+} // namespace doris
\ No newline at end of file
diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp
new file mode 100644
index 0000000000..04c5da726d
--- /dev/null
+++ b/be/src/vec/utils/util.hpp
@@ -0,0 +1,79 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+#include <thrift/protocol/TJSONProtocol.h>
+
+#include <boost/shared_ptr.hpp>
+
+#include "runtime/descriptors.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/core/block.h"
+
+namespace doris::vectorized {
+class VectorizedUtils {
+public:
+    static Block create_empty_columnswithtypename(const RowDescriptor& row_desc) {
+        // Block block;
+        return create_columns_with_type_and_name(row_desc);
+    }
+
+    static ColumnsWithTypeAndName create_columns_with_type_and_name(const RowDescriptor& row_desc) {
+        ColumnsWithTypeAndName columns_with_type_and_name;
+        for (const auto& tuple_desc : row_desc.tuple_descriptors()) {
+            for (const auto& slot_desc : tuple_desc->slots()) {
+                columns_with_type_and_name.emplace_back(nullptr, slot_desc->get_data_type_ptr(),
+                                                        slot_desc->col_name());
+            }
+        }
+        return columns_with_type_and_name;
+    }
+
+    static void update_null_map(NullMap& dst, const NullMap& src) {
+        size_t size = dst.size();
+        auto* __restrict l = dst.data();
+        auto* __restrict r = src.data();
+        for (size_t i = 0; i < size; ++i)
+            l[i] |= r[i];
+    }
+
+    static DataTypes get_data_types(const RowDescriptor& row_desc) {
+        DataTypes data_types;
+        for (const auto& tuple_desc : row_desc.tuple_descriptors()) {
+            for (const auto& slot_desc : tuple_desc->slots()) {
+                data_types.push_back(slot_desc->get_data_type_ptr());
+            }
+        }
+        return data_types;
+    }
+};
+} // namespace doris::vectorized
+
+namespace apache::thrift {
+template <typename ThriftStruct>
+ThriftStruct from_json_string(const std::string& json_val) {
+    using namespace apache::thrift::transport;
+    using namespace apache::thrift::protocol;
+    ThriftStruct ts;
+    TMemoryBuffer* buffer =
+            new TMemoryBuffer((uint8_t*)json_val.c_str(), (uint32_t)json_val.size());
+    std::shared_ptr<TTransport> trans(buffer);
+    TJSONProtocol protocol(trans);
+    ts.read(&protocol);
+    return ts;
+}
+} // namespace apache::thrift
diff --git a/be/test/exprs/string_functions_test.cpp b/be/test/exprs/string_functions_test.cpp
index 267d3efb2c..035e6a1a34 100644
--- a/be/test/exprs/string_functions_test.cpp
+++ b/be/test/exprs/string_functions_test.cpp
@@ -16,6 +16,7 @@
 // under the License.
 
 #include "exprs/string_functions.h"
+#include "exprs/v_string_functions.h"
 
 #include <gtest/gtest.h>
 #include <iostream>
@@ -124,7 +125,7 @@ TEST_F(StringFunctionsTest, money_format_double) {
     result = StringFunctions::money_format(context, doris_udf::DoubleVal(-36854775807.039));
     expected = AnyValUtil::from_string(ctx, std::string("-36,854,775,807.04"));
     ASSERT_EQ(expected, result);
-    
+
     delete context;
 }
 
@@ -681,73 +682,81 @@ TEST_F(StringFunctionsTest, upper) {
 TEST_F(StringFunctionsTest, ltrim) {
     // no blank
     StringVal src("hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
-    StringVal res = StringFunctions::ltrim(ctx, src);
+    StringVal res = VStringFunctions::ltrim(src);
     ASSERT_EQ(src, res);
     // empty string
     StringVal src1("");
-    res = StringFunctions::ltrim(ctx, src1);
+    res = VStringFunctions::ltrim(src1);
     ASSERT_EQ(src1, res);
     // null string
     StringVal src2(StringVal::null());
-    res = StringFunctions::ltrim(ctx, src2);
+    res = VStringFunctions::ltrim(src2);
     ASSERT_EQ(src2, res);
     // less than 16 blanks
     StringVal src3("       hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
-    res = StringFunctions::ltrim(ctx, src3);
+    res = VStringFunctions::ltrim(src3);
     ASSERT_EQ(src, res);
     // more than 16 blanks
     StringVal src4("                   hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
-    res = StringFunctions::ltrim(ctx, src4);
+    res = VStringFunctions::ltrim(src4);
     ASSERT_EQ(src, res);
     // all are blanks, less than 16 blanks
     StringVal src5("       ");
-    res = StringFunctions::ltrim(ctx, src5);
+    res = VStringFunctions::ltrim(src5);
     ASSERT_EQ(StringVal(""), res);
     // all are blanks, more than 16 blanks
     StringVal src6("                  ");
-    res = StringFunctions::ltrim(ctx, src6);
+    res = VStringFunctions::ltrim(src6);
     ASSERT_EQ(StringVal(""), res);
     // src less than 16 length
     StringVal src7(" 12345678910");
-    res = StringFunctions::ltrim(ctx, src7);
+    res = VStringFunctions::ltrim(src7);
     ASSERT_EQ(StringVal("12345678910"), res);
 }
 
 TEST_F(StringFunctionsTest, rtrim) {
     // no blank
     StringVal src("hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
-    StringVal res = StringFunctions::rtrim(ctx, src);
+    StringVal res = VStringFunctions::rtrim(src);
     ASSERT_EQ(src, res);
     // empty string
     StringVal src1("");
-    res = StringFunctions::rtrim(ctx, src1);
+    res = VStringFunctions::rtrim(src1);
     ASSERT_EQ(src1, res);
     // null string
     StringVal src2(StringVal::null());
-    res = StringFunctions::rtrim(ctx, src2);
+    res = VStringFunctions::rtrim(src2);
     ASSERT_EQ(src2, res);
     // less than 16 blanks
     StringVal src3("hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa       ");
-    res = StringFunctions::rtrim(ctx, src3);
+    res = VStringFunctions::rtrim(src3);
     ASSERT_EQ(src, res);
     // more than 16 blanks
     StringVal src4("hello worldaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa                      ");
-    res = StringFunctions::rtrim(ctx, src4);
+    res = VStringFunctions::rtrim(src4);
     ASSERT_EQ(src, res);
     // all are blanks, less than 16 blanks
     StringVal src5("       ");
-    res = StringFunctions::rtrim(ctx, src5);
+    res = VStringFunctions::rtrim(src5);
     ASSERT_EQ(StringVal(""), res);
     // all are blanks, more than 16 blanks
     StringVal src6("                  ");
-    res = StringFunctions::rtrim(ctx, src6);
+    res = VStringFunctions::rtrim(src6);
     ASSERT_EQ(StringVal(""), res);
     // src less than 16 length
     StringVal src7("12345678910 ");
-    res = StringFunctions::rtrim(ctx, src7);
+    res = VStringFunctions::rtrim(src7);
     ASSERT_EQ(StringVal("12345678910"), res);
 }
 
+TEST_F(StringFunctionsTest, is_ascii) {
+    ASSERT_EQ(true, VStringFunctions::is_ascii(StringVal("hello123")));
+    ASSERT_EQ(true, VStringFunctions::is_ascii(StringVal("hello123fwrewerwerwerwrsfqrwerwefwfwrwfsfwe")));
+    ASSERT_EQ(false, VStringFunctions::is_ascii(StringVal("运维组123")));
+    ASSERT_EQ(false, VStringFunctions::is_ascii(StringVal("hello123运维组fwrewerwerwerwrsfqrwerwefwfwrwfsfwe")));
+    ASSERT_EQ(true, VStringFunctions::is_ascii(StringVal::null()));
+    ASSERT_EQ(true, VStringFunctions::is_ascii(StringVal("")));
+}
 } // namespace doris
 
 int main(int argc, char** argv) {
diff --git a/be/test/olap/bloom_filter_column_predicate_test.cpp b/be/test/olap/bloom_filter_column_predicate_test.cpp
index 164c51d27c..24abea1215 100644
--- a/be/test/olap/bloom_filter_column_predicate_test.cpp
+++ b/be/test/olap/bloom_filter_column_predicate_test.cpp
@@ -28,6 +28,11 @@
 #include "runtime/string_value.hpp"
 #include "runtime/vectorized_row_batch.h"
 #include "util/logging.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/predicate_column.h"
+#include "vec/core/block.h"
+
+using namespace doris::vectorized;
 
 namespace doris {
 
@@ -172,6 +177,37 @@ TEST_F(TestBloomFilterColumnPredicate, FLOAT_COLUMN) {
     ASSERT_EQ(select_size, 1);
     ASSERT_FLOAT_EQ(*(float*)col_block.cell(_row_block->selection_vector()[0]).cell_ptr(), 5.1);
 
+    // for vectorized::Block no null
+    auto pred_col = PredicateColumnType<vectorized::Float32>::create();
+    pred_col->reserve(size);
+    for (int i = 0; i < size; ++i) {
+        *(col_data + i) = i + 0.1f;
+        pred_col->insert_data(reinterpret_cast<const char*>(col_data + i), 0);
+    }
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    pred->evaluate(*pred_col, _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 3);
+    ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[0]], 4.1);
+    ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[1]], 5.1);
+    ASSERT_FLOAT_EQ((float)pred_col->get_data()[_row_block->selection_vector()[2]], 6.1);
+
+    // for vectorized::Block has nulls
+    auto null_map = ColumnUInt8::create(size, 0);
+    auto& null_map_data = null_map->get_data();
+    for (int i = 0; i < size; ++i) {
+        null_map_data[i] = (i % 2 == 0);
+    }
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    auto nullable_col =
+            vectorized::ColumnNullable::create(std::move(pred_col), std::move(null_map));
+    pred->evaluate(*nullable_col, _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 1);
+    auto nested_col = check_and_get_column<PredicateColumnType<vectorized::Float32>>(
+            nullable_col->get_nested_column());
+    ASSERT_FLOAT_EQ((float)nested_col->get_data()[_row_block->selection_vector()[0]], 5.1);
+
     delete pred;
 }
 
diff --git a/be/test/olap/column_reader_test.cpp b/be/test/olap/column_reader_test.cpp
index bba88ae967..9e48e60e28 100644
--- a/be/test/olap/column_reader_test.cpp
+++ b/be/test/olap/column_reader_test.cpp
@@ -808,8 +808,10 @@ TEST_F(TestColumn, SeekShortColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -892,10 +894,13 @@ TEST_F(TestColumn, VectorizedShortColumnWithPresent) {
 TEST_F(TestColumn, SkipShortColumnWithPresent) {
     // write data
     TabletSchema tablet_schema;
+
     set_tablet_schema_with_one_column("ShortColumn", "SMALLINT", "REPLACE", 2, true, true,
                                       &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -922,8 +927,10 @@ TEST_F(TestColumn, SkipShortColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -938,10 +945,12 @@ TEST_F(TestColumn, SkipShortColumnWithPresent) {
 TEST_F(TestColumn, VectorizedIntColumnWithoutPresent) {
     // write data
     TabletSchema tablet_schema;
+
     set_tablet_schema_with_one_column("IntColumn", "INT", "REPLACE", 4, false, true,
                                       &tablet_schema);
     create_column_writer(tablet_schema);
 
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -965,8 +974,10 @@ TEST_F(TestColumn, VectorizedIntColumnWithoutPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -983,10 +994,12 @@ TEST_F(TestColumn, VectorizedIntColumnWithoutPresent) {
 TEST_F(TestColumn, VectorizedIntColumnMassWithoutPresent) {
     // write data
     TabletSchema tablet_schema;
+
     set_tablet_schema_with_one_column("IntColumn", "INT", "REPLACE", 4, false, true,
                                       &tablet_schema);
     create_column_writer(tablet_schema);
 
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1006,8 +1019,10 @@ TEST_F(TestColumn, VectorizedIntColumnMassWithoutPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1031,9 +1046,11 @@ TEST_F(TestColumn, VectorizedIntColumnMassWithoutPresent) {
 TEST_F(TestColumn, VectorizedIntColumnWithPresent) {
     // write data
     TabletSchema tablet_schema;
+
     set_tablet_schema_with_one_column("IntColumn", "INT", "REPLACE", 4, true, true, &tablet_schema);
     create_column_writer(tablet_schema);
 
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1057,8 +1074,10 @@ TEST_F(TestColumn, VectorizedIntColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1078,10 +1097,13 @@ TEST_F(TestColumn, VectorizedIntColumnWithPresent) {
 TEST_F(TestColumn, VectorizedLongColumnWithoutPresent) {
     // write data
     TabletSchema tablet_schema;
+
     set_tablet_schema_with_one_column("LongColumnWithoutPresent", "BIGINT", "REPLACE", 8, false,
                                       true, &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1106,8 +1128,10 @@ TEST_F(TestColumn, VectorizedLongColumnWithoutPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1124,10 +1148,13 @@ TEST_F(TestColumn, VectorizedLongColumnWithoutPresent) {
 TEST_F(TestColumn, VectorizedLongColumnWithPresent) {
     // write data
     TabletSchema tablet_schema;
+
     set_tablet_schema_with_one_column("LongColumnWithPresent", "BIGINT", "REPLACE", 8, true, true,
                                       &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1152,8 +1179,10 @@ TEST_F(TestColumn, VectorizedLongColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1172,10 +1201,13 @@ TEST_F(TestColumn, VectorizedLongColumnWithPresent) {
 TEST_F(TestColumn, VectorizedFloatColumnWithoutPresent) {
     // write data
     TabletSchema tablet_schema;
+
     set_tablet_schema_with_one_column("FloatColumnWithoutPresent", "FLOAT", "REPLACE", 4, false,
                                       true, &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1200,8 +1232,10 @@ TEST_F(TestColumn, VectorizedFloatColumnWithoutPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1220,10 +1254,13 @@ TEST_F(TestColumn, VectorizedFloatColumnWithPresent) {
     // write data
     TabletSchema tablet_schema;
 
+
     set_tablet_schema_with_one_column("FloatColumnWithPresent", "FLOAT", "REPLACE", 4, true, true,
                                       &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1248,8 +1285,10 @@ TEST_F(TestColumn, VectorizedFloatColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1269,10 +1308,13 @@ TEST_F(TestColumn, SeekFloatColumnWithPresent) {
     // write data
     TabletSchema tablet_schema;
 
+
     set_tablet_schema_with_one_column("FloatColumnWithPresent", "FLOAT", "REPLACE", 4, true, true,
                                       &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1301,8 +1343,10 @@ TEST_F(TestColumn, SeekFloatColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1334,10 +1378,13 @@ TEST_F(TestColumn, SkipFloatColumnWithPresent) {
     // write data
     TabletSchema tablet_schema;
 
+
     set_tablet_schema_with_one_column("FloatColumnWithPresent", "FLOAT", "REPLACE", 4, true, true,
                                       &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1362,8 +1409,10 @@ TEST_F(TestColumn, SkipFloatColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1379,10 +1428,13 @@ TEST_F(TestColumn, VectorizedDoubleColumnWithoutPresent) {
     // write data
     TabletSchema tablet_schema;
 
+
     set_tablet_schema_with_one_column("DoubleColumnWithoutPresent", "DOUBLE", "REPLACE", 8, false,
                                       true, &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1407,8 +1459,10 @@ TEST_F(TestColumn, VectorizedDoubleColumnWithoutPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1427,10 +1481,13 @@ TEST_F(TestColumn, VectorizedDoubleColumnWithPresent) {
     // write data
     TabletSchema tablet_schema;
 
+
     set_tablet_schema_with_one_column("DoubleColumnWithPresent", "DOUBLE", "REPLACE", 8, true, true,
                                       &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1455,8 +1512,10 @@ TEST_F(TestColumn, VectorizedDoubleColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1477,10 +1536,13 @@ TEST_F(TestColumn, VectorizedDatetimeColumnWithoutPresent) {
     // write data
     TabletSchema tablet_schema;
 
+
     set_tablet_schema_with_one_column("DatetimeColumnWithoutPresent", "DATETIME", "REPLACE", 8,
                                       false, true, &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1501,8 +1563,10 @@ TEST_F(TestColumn, VectorizedDatetimeColumnWithoutPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1518,10 +1582,13 @@ TEST_F(TestColumn, VectorizedDatetimeColumnWithPresent) {
     // write data
     TabletSchema tablet_schema;
 
+
     set_tablet_schema_with_one_column("DatetimeColumnWithoutPresent", "DATETIME", "REPLACE", 8,
                                       true, true, &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1548,8 +1615,10 @@ TEST_F(TestColumn, VectorizedDatetimeColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1602,8 +1671,10 @@ TEST_F(TestColumn, VectorizedDatetimeColumnZero) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1628,10 +1699,13 @@ TEST_F(TestColumn, VectorizedDateColumnWithoutPresent) {
     // write data
     TabletSchema tablet_schema;
 
+
     set_tablet_schema_with_one_column("DateColumnWithoutoutPresent", "DATE", "REPLACE", 3, false,
                                       true, &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1652,8 +1726,10 @@ TEST_F(TestColumn, VectorizedDateColumnWithoutPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1668,10 +1744,13 @@ TEST_F(TestColumn, VectorizedDateColumnWithPresent) {
     // write data
     TabletSchema tablet_schema;
 
+
     set_tablet_schema_with_one_column("DateColumnWithoutoutPresent", "DATE", "REPLACE", 3, true,
                                       true, &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1700,8 +1779,10 @@ TEST_F(TestColumn, VectorizedDateColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1724,10 +1805,13 @@ TEST_F(TestColumn, VectorizedDecimalColumnWithoutPresent) {
     // write data
     TabletSchema tablet_schema;
 
+
     set_tablet_schema_with_one_column("DecimalColumnWithoutoutPresent", "DECIMAL", "REPLACE", 12,
                                       false, true, &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1756,8 +1840,10 @@ TEST_F(TestColumn, VectorizedDecimalColumnWithoutPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1807,8 +1893,10 @@ TEST_F(TestColumn, VectorizedDecimalColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1828,10 +1916,13 @@ TEST_F(TestColumn, SkipDecimalColumnWithPresent) {
     // write data
     TabletSchema tablet_schema;
 
+
     set_tablet_schema_with_one_column("DecimalColumnWithPresent", "DECIMAL", "REPLACE", 12, true,
                                       true, &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1858,8 +1949,10 @@ TEST_F(TestColumn, SkipDecimalColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -1876,10 +1969,13 @@ TEST_F(TestColumn, SkipDecimalColumnWithPresent) {
 TEST_F(TestColumn, SeekDecimalColumnWithPresent) {
     // write data
     TabletSchema tablet_schema;
+
     set_tablet_schema_with_one_column("DecimalColumnWithPresent", "DECIMAL", "REPLACE", 12, true,
                                       true, &tablet_schema);
     create_column_writer(tablet_schema);
 
+
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1912,8 +2008,10 @@ TEST_F(TestColumn, SeekDecimalColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
 
+
     RowCursor read_row;
     read_row.init(tablet_schema);
     PositionEntryReader entry1;
@@ -1958,7 +2056,9 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithoutPresent) {
     string value2 = "-170141183460469231731687303715884105728";
 
     // write data
+
     create_column_writer(tablet_schema);
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -1987,7 +2087,9 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithoutPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -2015,7 +2117,9 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithPresent) {
     string value2 = "-170141183460469231731687303715884105728";
 
     // write data
+
     create_column_writer(tablet_schema);
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
@@ -2051,7 +2155,9 @@ TEST_F(TestColumn, VectorizedLargeIntColumnWithPresent) {
     ASSERT_EQ(_column_writer->finalize(&header), OLAP_SUCCESS);
 
     // read data
+
     create_column_reader(tablet_schema);
+
     RowCursor read_row;
     read_row.init(tablet_schema);
 
@@ -2086,7 +2192,9 @@ TEST_F(TestColumn, SkipLargeIntColumnWithPresent) {
     string value2 = "-170141183460469231731687303715884105728";
 
     // write data
+
     create_column_writer(tablet_schema);
+
     RowCursor write_row;
     write_row.init(tablet_schema);
 
diff --git a/be/test/olap/generic_iterators_test.cpp b/be/test/olap/generic_iterators_test.cpp
index 91d4944381..b73ad0271e 100644
--- a/be/test/olap/generic_iterators_test.cpp
+++ b/be/test/olap/generic_iterators_test.cpp
@@ -77,13 +77,13 @@ TEST(GenericIteratorsTest, AutoIncrement) {
 
 TEST(GenericIteratorsTest, Union) {
     auto schema = create_schema();
-    std::list<RowwiseIterator*> inputs;
+    std::vector<RowwiseIterator*> inputs;
 
     inputs.push_back(new_auto_increment_iterator(schema, 100));
     inputs.push_back(new_auto_increment_iterator(schema, 200));
     inputs.push_back(new_auto_increment_iterator(schema, 300));
 
-    auto iter = new_union_iterator(std::move(inputs),
+    auto iter = new_union_iterator(inputs,
                                    MemTracker::CreateTracker(-1, "UnionIterator", nullptr, false));
     StorageReadOptions opts;
     auto st = iter->init(opts);
@@ -117,7 +117,7 @@ TEST(GenericIteratorsTest, Union) {
 
 TEST(GenericIteratorsTest, Merge) {
     auto schema = create_schema();
-    std::list<RowwiseIterator*> inputs;
+    std::vector<RowwiseIterator*> inputs;
 
     inputs.push_back(new_auto_increment_iterator(schema, 100));
     inputs.push_back(new_auto_increment_iterator(schema, 200));
diff --git a/be/test/olap/null_predicate_test.cpp b/be/test/olap/null_predicate_test.cpp
index bf53ec22ed..ec870af4a8 100644
--- a/be/test/olap/null_predicate_test.cpp
+++ b/be/test/olap/null_predicate_test.cpp
@@ -28,6 +28,10 @@
 #include "runtime/string_value.hpp"
 #include "runtime/vectorized_row_batch.h"
 #include "util/logging.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/core/block.h"
+
+using namespace doris::vectorized;
 
 namespace doris {
 
@@ -140,6 +144,16 @@ public:
             *reinterpret_cast<TYPE*>(col_block_view.data()) = i;                                 \
         }                                                                                        \
         pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);                \
+        ASSERT_EQ(select_size, 0);                                                               \
+                                                                                                 \
+        /* for vectorized::Block no null */                                                      \
+        _row_block->clear();                                                                     \
+        select_size = _row_block->selected_size();                                               \
+        vectorized::Block vec_block = tablet_schema.create_block(return_columns);                \
+        _row_block->convert_to_vec_block(&vec_block);                                            \
+        ColumnPtr vec_col = vec_block.get_columns()[0];                                          \
+        pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),                        \
+                       _row_block->selection_vector(), &select_size);                            \
         ASSERT_EQ(select_size, 0);                                                               \
                                                                                                  \
         /* for has nulls */                                                                      \
@@ -172,6 +186,16 @@ public:
         _row_block->clear();                                                                     \
         select_size = _row_block->selected_size();                                               \
         pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);                \
+        ASSERT_EQ(select_size, 5);                                                               \
+                                                                                                 \
+        /* for vectorized::Block has nulls */                                                    \
+        _row_block->clear();                                                                     \
+        select_size = _row_block->selected_size();                                               \
+        vec_block = tablet_schema.create_block(return_columns);                                  \
+        _row_block->convert_to_vec_block(&vec_block);                                            \
+        vec_col = vec_block.get_columns()[0];                                                    \
+        pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),                        \
+                       _row_block->selection_vector(), &select_size);                            \
         ASSERT_EQ(select_size, 5);                                                               \
         pred.reset();                                                                            \
     }
@@ -216,6 +240,16 @@ TEST_F(TestNullPredicate, FLOAT_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -247,6 +281,16 @@ TEST_F(TestNullPredicate, FLOAT_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 5);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 5);
 }
 
 TEST_F(TestNullPredicate, DOUBLE_COLUMN) {
@@ -284,6 +328,16 @@ TEST_F(TestNullPredicate, DOUBLE_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -315,6 +369,16 @@ TEST_F(TestNullPredicate, DOUBLE_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 5);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 5);
 }
 
 TEST_F(TestNullPredicate, DECIMAL_COLUMN) {
@@ -355,6 +419,16 @@ TEST_F(TestNullPredicate, DECIMAL_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -388,6 +462,16 @@ TEST_F(TestNullPredicate, DECIMAL_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 4);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 4);
 }
 
 TEST_F(TestNullPredicate, STRING_COLUMN) {
@@ -440,6 +524,16 @@ TEST_F(TestNullPredicate, STRING_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -483,6 +577,16 @@ TEST_F(TestNullPredicate, STRING_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 4);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 4);
 }
 
 TEST_F(TestNullPredicate, DATE_COLUMN) {
@@ -529,6 +633,16 @@ TEST_F(TestNullPredicate, DATE_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -562,6 +676,16 @@ TEST_F(TestNullPredicate, DATE_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 2);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 2);
 }
 
 TEST_F(TestNullPredicate, DATETIME_COLUMN) {
@@ -608,6 +732,16 @@ TEST_F(TestNullPredicate, DATETIME_COLUMN) {
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 0);
 
+    // for vectorized::Block no null
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vectorized::Block vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    ColumnPtr vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 0);
+
     // for VectorizedBatch has nulls
     col_vector->set_no_nulls(false);
     bool* is_null = reinterpret_cast<bool*>(_mem_pool->allocate(size));
@@ -641,6 +775,16 @@ TEST_F(TestNullPredicate, DATETIME_COLUMN) {
     select_size = _row_block->selected_size();
     pred->evaluate(&col_block, _row_block->selection_vector(), &select_size);
     ASSERT_EQ(select_size, 2);
+
+    // for vectorized::Block has nulls
+    _row_block->clear();
+    select_size = _row_block->selected_size();
+    vec_block = tablet_schema.create_block(return_columns);
+    _row_block->convert_to_vec_block(&vec_block);
+    vec_col = vec_block.get_columns()[0];
+    pred->evaluate(const_cast<doris::vectorized::IColumn&>(*vec_col),
+                   _row_block->selection_vector(), &select_size);
+    ASSERT_EQ(select_size, 2);
 }
 
 } // namespace doris
diff --git a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
index 54614c9d3c..c91f4816f2 100644
--- a/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/binary_dict_page_test.cpp
@@ -76,6 +76,7 @@ public:
         // decode
         PageDecoderOptions decoder_options;
         BinaryDictPageDecoder page_decoder(s.slice(), decoder_options);
+
         page_decoder.set_dict_decoder(dict_page_decoder.get());
 
         status = page_decoder.init();
@@ -171,6 +172,7 @@ public:
             PageDecoderOptions decoder_options;
             BinaryDictPageDecoder page_decoder(results[slice_index].slice(), decoder_options);
             status = page_decoder.init();
+
             page_decoder.set_dict_decoder(dict_page_decoder.get());
             ASSERT_TRUE(status.ok());
 
diff --git a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
index f5978f124b..bbae3fc46e 100644
--- a/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/column_reader_writer_test.cpp
@@ -33,6 +33,9 @@
 #include "runtime/mem_tracker.h"
 #include "test_util/test_util.h"
 #include "util/file_utils.h"
+#include "vec/data_types/data_type_number.h"
+#include "vec/data_types/data_type_nothing.h"
+#include "vec/core/types.h"
 
 using std::string;
 
@@ -520,6 +523,92 @@ void test_read_default_value(string value, void* result) {
     }
 }
 
+static vectorized::MutableColumnPtr create_vectorized_column_ptr(FieldType type) {
+    if (type == OLAP_FIELD_TYPE_INT) {
+        return vectorized::DataTypeInt32().create_column();
+    } else if (type == OLAP_FIELD_TYPE_SMALLINT) {
+        return vectorized::DataTypeInt16().create_column();
+    } else if (type == OLAP_FIELD_TYPE_BIGINT) {
+        return vectorized::DataTypeInt64().create_column();
+    } else if (type == OLAP_FIELD_TYPE_LARGEINT) {
+        return vectorized::DataTypeInt128().create_column();
+    } else if (type == OLAP_FIELD_TYPE_FLOAT) {
+        return vectorized::DataTypeFloat32().create_column();
+    } else if (type == OLAP_FIELD_TYPE_DOUBLE) {
+        return vectorized::DataTypeFloat64().create_column();
+    } else if (type == OLAP_FIELD_TYPE_CHAR) {
+        return vectorized::DataTypeString().create_column();
+    } else if (type == OLAP_FIELD_TYPE_DATE) {
+        return vectorized::DataTypeDate().create_column();
+    } else if (type == OLAP_FIELD_TYPE_DATETIME) {
+        return vectorized::DataTypeDateTime().create_column();
+    } else if (type == OLAP_FIELD_TYPE_DECIMAL) {
+        return vectorized::DataTypeDecimal<vectorized::Decimal128>(27, 9).create_column();
+    }
+    return vectorized::DataTypeNothing().create_column();
+}
+
+template <FieldType type>
+void test_v_read_default_value(string value, void* result) {
+    using Type = typename TypeTraits<type>::CppType;
+    TypeInfo* type_info = get_type_info(type);
+    // read and check
+    {
+        TabletColumn tablet_column = create_with_default_value<type>(value);
+        DefaultValueColumnIterator iter(tablet_column.has_default_value(),
+                                        tablet_column.default_value(), tablet_column.is_nullable(),
+                                        type_info, tablet_column.length());
+        ColumnIteratorOptions iter_opts;
+        iter_opts.mem_tracker = std::make_shared<MemTracker>();
+        auto st = iter.init(iter_opts);
+        ASSERT_TRUE(st.ok());
+
+        // sequence read
+        {
+            st = iter.seek_to_first();
+            ASSERT_TRUE(st.ok()) << st.to_string();
+
+            vectorized::MutableColumnPtr mcp = create_vectorized_column_ptr(type);
+
+            size_t rows_read = 16;
+            bool has_null;
+            st = iter.next_batch(&rows_read, mcp, &has_null);
+
+            ASSERT_TRUE(st.ok());
+            for (int j = 0; j < rows_read; ++j) {
+                if (type == OLAP_FIELD_TYPE_CHAR) {
+                } else if (type == OLAP_FIELD_TYPE_VARCHAR || type == OLAP_FIELD_TYPE_HLL ||
+                           type == OLAP_FIELD_TYPE_OBJECT) {
+                } else if (type == OLAP_FIELD_TYPE_DATE || type == OLAP_FIELD_TYPE_DATETIME) {
+                    StringRef sr = mcp->get_data_at(j);
+                    ASSERT_EQ(sr.size, sizeof(vectorized::Int64));
+
+                    auto x = unaligned_load<vectorized::Int64>(sr.data);
+                    auto value = binary_cast<vectorized::Int64, vectorized::VecDateTimeValue>(x);
+                    char buf[64] = {};
+                    value.to_string(buf);
+                    int ret = strcmp(buf, (char*)result);
+                    ASSERT_EQ(ret, 0);
+                } else if (type == OLAP_FIELD_TYPE_DECIMAL) {
+                    StringRef sr = mcp->get_data_at(j);
+                    ASSERT_EQ(sr.size, sizeof(vectorized::Int128));
+
+                    DecimalV2Value v1(unaligned_load<vectorized::Int128>(sr.data));
+                    decimal12_t* v2 = (decimal12_t*)result;
+
+                    ASSERT_EQ(v2->integer, v1.int_value());
+                    ASSERT_EQ(v2->fraction, v1.frac_value());
+                } else {
+                    StringRef sr = mcp->get_data_at(j);
+                    ASSERT_EQ(sr.size, sizeof(Type));
+                    int ret = memcmp(sr.data, result, sr.size);
+                    ASSERT_EQ(ret, 0);
+                }
+            }
+        }
+    }
+}
+
 TEST_F(ColumnReaderWriterTest, test_nullable) {
     size_t num_uint8_rows = LOOP_LESS_OR_MORE(1024, 1024 * 1024);
     uint8_t* is_null = new uint8_t[num_uint8_rows];
@@ -676,6 +765,39 @@ TEST_F(ColumnReaderWriterTest, test_default_value) {
     test_read_default_value<OLAP_FIELD_TYPE_DECIMAL>(v_decimal, &decimal);
 }
 
+TEST_F(ColumnReaderWriterTest, test_v_default_value) {
+    std::string v_int("1");
+    int32_t result = 1;
+    test_v_read_default_value<OLAP_FIELD_TYPE_INT>(v_int, &result);
+
+    std::string v_bigint("9223372036854775807");
+    int64_t result_bigint = std::numeric_limits<int64_t>::max();
+    test_v_read_default_value<OLAP_FIELD_TYPE_BIGINT>(v_bigint, &result_bigint);
+
+    int128_t result_largeint = std::numeric_limits<int64_t>::max();
+    test_v_read_default_value<OLAP_FIELD_TYPE_LARGEINT>(v_bigint, &result_largeint);
+
+    std::string v_float("1.00");
+    float result_float = 1.00;
+    test_v_read_default_value<OLAP_FIELD_TYPE_FLOAT>(v_float, &result_float);
+
+    std::string v_double("1.99");
+    double result_double = 1.99;
+    test_v_read_default_value<OLAP_FIELD_TYPE_DOUBLE>(v_double, &result_double);
+
+    std::string v_date("2019-11-12");
+    char result_date[] = "2019-11-12";
+    test_v_read_default_value<OLAP_FIELD_TYPE_DATE>(v_date, result_date);
+
+    std::string v_datetime("2019-11-12 12:01:08");
+    char result_datetime[] = "2019-11-12 12:01:08";
+    test_v_read_default_value<OLAP_FIELD_TYPE_DATETIME>(v_datetime, &result_datetime);
+
+    std::string v_decimal("102418.000000002");
+    decimal12_t decimal = {102418, 2};
+    test_v_read_default_value<OLAP_FIELD_TYPE_DECIMAL>(v_decimal, &decimal);
+}
+
 } // namespace segment_v2
 } // namespace doris
 
diff --git a/be/test/tools/benchmark_tool.cpp b/be/test/tools/benchmark_tool.cpp
index fea489aaec..b37dd43954 100644
--- a/be/test/tools/benchmark_tool.cpp
+++ b/be/test/tools/benchmark_tool.cpp
@@ -177,10 +177,10 @@ public:
             PageDecoderOptions decoder_options;
             BinaryDictPageDecoder page_decoder(src.slice(), decoder_options);
             page_decoder.init();
+
             page_decoder.set_dict_decoder(dict_page_decoder.get());
 
             //check values
-
             size_t num = page_start_ids[slice_index + 1] - page_start_ids[slice_index];
 
             auto tracker = std::make_shared<MemTracker>();
diff --git a/be/test/vec/aggregate_functions/CMakeLists.txt b/be/test/vec/aggregate_functions/CMakeLists.txt
new file mode 100644
index 0000000000..e3dc095af0
--- /dev/null
+++ b/be/test/vec/aggregate_functions/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# where to put generated libraries
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/test/vec/aggregate_functions")
+
+ADD_BE_TEST(agg_test)
+
+
diff --git a/be/test/vec/aggregate_functions/agg_test.cpp b/be/test/vec/aggregate_functions/agg_test.cpp
new file mode 100644
index 0000000000..9bf9f8f42a
--- /dev/null
+++ b/be/test/vec/aggregate_functions/agg_test.cpp
@@ -0,0 +1,61 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <string>
+
+#include "gtest/gtest.h"
+#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
+#include "vec/columns/column_vector.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_number.h"
+
+namespace doris::vectorized {
+// declare function
+void register_aggregate_function_sum(AggregateFunctionSimpleFactory& factory);
+
+TEST(AggTest, basic_test) {
+    auto column_vector_int32 = ColumnVector<Int32>::create();
+    for (int i = 0; i < 4096; i++) {
+        column_vector_int32->insert(cast_to_nearest_field_type(i));
+    }
+    // test implement interface
+    AggregateFunctionSimpleFactory factory;
+    register_aggregate_function_sum(factory);
+    DataTypePtr data_type(std::make_shared<DataTypeInt32>());
+    DataTypes data_types = {data_type};
+    Array array;
+    auto agg_function = factory.get("sum", data_types, array);
+    AggregateDataPtr place = (char*)malloc(sizeof(uint64_t) * 4096);
+    agg_function->create(place);
+    const IColumn* column[1] = {column_vector_int32.get()};
+    for (int i = 0; i < 4096; i++) {
+        agg_function->add(place, column, i, nullptr);
+    }
+    int ans = 0;
+    for (int i = 0; i < 4096; i++) {
+        ans += i;
+    }
+    ASSERT_EQ(ans, *(int32_t*)place);
+    agg_function->destroy(place);
+}
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/core/CMakeLists.txt b/be/test/vec/core/CMakeLists.txt
new file mode 100644
index 0000000000..1531091106
--- /dev/null
+++ b/be/test/vec/core/CMakeLists.txt
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# where to put generated libraries
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/test/vec/core")
+
+ADD_BE_TEST(block_test)
+ADD_BE_TEST(column_complex_test)
+
diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp
new file mode 100644
index 0000000000..7264369595
--- /dev/null
+++ b/be/test/vec/core/block_test.cpp
@@ -0,0 +1,309 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/core/block.h"
+
+#include <gtest/gtest.h>
+
+#include <cmath>
+#include <iostream>
+#include <string>
+
+#include "exec/schema_scanner.h"
+#include "gen_cpp/data.pb.h"
+#include "runtime/row_batch.h"
+#include "runtime/tuple_row.h"
+#include "vec/columns/column_decimal.h"
+#include "vec/columns/column_nullable.h"
+#include "vec/columns/column_string.h"
+#include "vec/columns/column_vector.h"
+#include "vec/common/exception.h"
+#include "vec/data_types/data_type.h"
+#include "vec/data_types/data_type_nullable.h"
+#include "vec/runtime/vdatetime_value.h"
+namespace doris {
+
+using vectorized::Int32;
+
+TEST(BlockTest, RowBatchCovertToBlock) {
+    SchemaScanner::ColumnDesc column_descs[] = {
+            {"k1", TYPE_SMALLINT, sizeof(int16_t), true},
+            {"k2", TYPE_INT, sizeof(int32_t), false},
+            {"k3", TYPE_DOUBLE, sizeof(double), false},
+            {"k4", TYPE_VARCHAR, sizeof(StringValue), false},
+            {"k5", TYPE_DECIMALV2, sizeof(DecimalV2Value), false},
+            {"k6", TYPE_LARGEINT, sizeof(__int128), false},
+            {"k7", TYPE_DATETIME, sizeof(int64_t), false}};
+
+    SchemaScanner schema_scanner(column_descs,
+                                 sizeof(column_descs) / sizeof(SchemaScanner::ColumnDesc));
+    ObjectPool object_pool;
+    SchemaScannerParam param;
+    schema_scanner.init(&param, &object_pool);
+
+    auto tuple_desc = const_cast<TupleDescriptor*>(schema_scanner.tuple_desc());
+    RowDescriptor row_desc(tuple_desc, false);
+    auto tracker_ptr = MemTracker::CreateTracker(-1, "BlockTest", nullptr, false);
+    RowBatch row_batch(row_desc, 1024, tracker_ptr.get());
+
+    int16_t k1 = -100;
+    int32_t k2 = 100000;
+    double k3 = 7.7;
+
+    for (int i = 0; i < 1024; ++i, k1++, k2++, k3 += 0.1) {
+        auto idx = row_batch.add_row();
+        TupleRow* tuple_row = row_batch.get_row(idx);
+
+        auto tuple = (Tuple*)(row_batch.tuple_data_pool()->allocate(tuple_desc->byte_size()));
+        auto slot_desc = tuple_desc->slots()[0];
+        if (i % 5 == 0) {
+            tuple->set_null(slot_desc->null_indicator_offset());
+        } else {
+            tuple->set_not_null(slot_desc->null_indicator_offset());
+            memcpy(tuple->get_slot(slot_desc->tuple_offset()), &k1, column_descs[0].size);
+        }
+        slot_desc = tuple_desc->slots()[1];
+        memcpy(tuple->get_slot(slot_desc->tuple_offset()), &k2, column_descs[1].size);
+        slot_desc = tuple_desc->slots()[2];
+        memcpy(tuple->get_slot(slot_desc->tuple_offset()), &k3, column_descs[2].size);
+
+        // string slot
+        slot_desc = tuple_desc->slots()[3];
+        auto num_str = std::to_string(k1);
+        auto string_slot = tuple->get_string_slot(slot_desc->tuple_offset());
+        string_slot->ptr = (char*)row_batch.tuple_data_pool()->allocate(num_str.size());
+        string_slot->len = num_str.size();
+        memcpy(string_slot->ptr, num_str.c_str(), num_str.size());
+
+        slot_desc = tuple_desc->slots()[4];
+        DecimalV2Value decimalv2_num(std::to_string(k3));
+        memcpy(tuple->get_slot(slot_desc->tuple_offset()), &decimalv2_num, column_descs[4].size);
+
+        slot_desc = tuple_desc->slots()[5];
+        int128_t k6 = k1;
+        memcpy(tuple->get_slot(slot_desc->tuple_offset()), &k6, column_descs[5].size);
+
+        slot_desc = tuple_desc->slots()[6];
+        vectorized::VecDateTimeValue k7;
+        std::string now_time("2020-12-02");
+        k7.from_date_str(now_time.c_str(), now_time.size());
+        vectorized::TimeInterval time_interval(vectorized::TimeUnit::DAY, k1, false);
+        k7.date_add_interval(time_interval, vectorized::TimeUnit::DAY);
+        memcpy(tuple->get_slot(slot_desc->tuple_offset()), &k7, column_descs[6].size);
+
+        tuple_row->set_tuple(0, tuple);
+        row_batch.commit_last_row();
+    }
+
+    auto block = row_batch.convert_to_vec_block();
+    k1 = -100;
+    k2 = 100000;
+    k3 = 7.7;
+    for (int i = 0; i < 1024; ++i) {
+        vectorized::ColumnPtr column1 = block.get_columns()[0];
+        vectorized::ColumnPtr column2 = block.get_columns()[1];
+        vectorized::ColumnPtr column3 = block.get_columns()[2];
+        vectorized::ColumnPtr column4 = block.get_columns()[3];
+        vectorized::ColumnPtr column5 = block.get_columns()[4];
+        vectorized::ColumnPtr column6 = block.get_columns()[5];
+        vectorized::ColumnPtr column7 = block.get_columns()[6];
+
+        if (i % 5 != 0) {
+            ASSERT_EQ((int16_t)column1->get64(i), k1);
+        } else {
+            ASSERT_TRUE(column1->is_null_at(i));
+        }
+        ASSERT_EQ(column2->get_int(i), k2++);
+        ASSERT_EQ(column3->get_float64(i), k3);
+        ASSERT_STREQ(column4->get_data_at(i).data, std::to_string(k1).c_str());
+        auto decimal_field =
+                column5->operator[](i).get<vectorized::DecimalField<vectorized::Decimal128>>();
+        DecimalV2Value decimalv2_num(std::to_string(k3));
+        ASSERT_EQ(DecimalV2Value(decimal_field.get_value()), decimalv2_num);
+
+        int128_t larget_int = k1;
+        ASSERT_EQ(column6->operator[](i).get<vectorized::Int128>(), k1);
+
+        larget_int = column7->operator[](i).get<vectorized::Int128>();
+        vectorized::VecDateTimeValue k7;
+        memcpy(reinterpret_cast<vectorized::Int128*>(&k7), &larget_int, column_descs[6].size);
+        vectorized::VecDateTimeValue date_time_value;
+        std::string now_time("2020-12-02");
+        date_time_value.from_date_str(now_time.c_str(), now_time.size());
+        vectorized::TimeInterval time_interval(vectorized::TimeUnit::DAY, k1, false);
+        date_time_value.date_add_interval(time_interval, vectorized::TimeUnit::DAY);
+
+        ASSERT_EQ(k7, date_time_value);
+
+        k1++;
+        k3 += 0.1;
+    }
+}
+
+TEST(BlockTest, SerializeAndDeserializeBlock) {
+    {
+        auto vec = vectorized::ColumnVector<Int32>::create();
+        auto& data = vec->get_data();
+        for (int i = 0; i < 1024; ++i) {
+            data.push_back(i);
+        }
+        vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeInt32>());
+        vectorized::ColumnWithTypeAndName type_and_name(vec->get_ptr(), data_type, "test_int");
+        vectorized::Block block({type_and_name});
+        PBlock pblock;
+        block.serialize(&pblock);
+        std::string s1 = pblock.DebugString();
+        PBlock pblock2;
+        vectorized::Block block2(pblock);
+        block2.serialize(&pblock2);
+        std::string s2 = pblock2.DebugString();
+        EXPECT_EQ(s1, s2);
+    }
+    {
+        auto strcol = vectorized::ColumnString::create();
+        for (int i = 0; i < 1024; ++i) {
+            std::string is = std::to_string(i);
+            strcol->insert_data(is.c_str(), is.size());
+        }
+        vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeString>());
+        vectorized::ColumnWithTypeAndName type_and_name(strcol->get_ptr(), data_type, "test_string");
+        vectorized::Block block({type_and_name});
+        PBlock pblock;
+        block.serialize(&pblock);
+        std::string s1 = pblock.DebugString();
+        PBlock pblock2;
+        vectorized::Block block2(pblock);
+        block2.serialize(&pblock2);
+        std::string s2 = pblock2.DebugString();
+        EXPECT_EQ(s1, s2);
+    }
+    {
+        vectorized::DataTypePtr decimal_data_type(doris::vectorized::create_decimal(27, 9));
+        auto decimal_column = decimal_data_type->create_column();
+        auto& data = ((vectorized::ColumnDecimal<vectorized::Decimal<vectorized::Int128>>*)
+                              decimal_column.get())
+                             ->get_data();
+        for (int i = 0; i < 1024; ++i) {
+            __int128_t value = i * pow(10, 9) + i * pow(10, 8);
+            data.push_back(value);
+        }
+        vectorized::ColumnWithTypeAndName type_and_name(decimal_column->get_ptr(), decimal_data_type,
+                                                        "test_decimal");
+        vectorized::Block block({type_and_name});
+        PBlock pblock;
+        block.serialize(&pblock);
+        std::string s1 = pblock.DebugString();
+        PBlock pblock2;
+        vectorized::Block block2(pblock);
+        block2.serialize(&pblock2);
+        std::string s2 = pblock2.DebugString();
+        EXPECT_EQ(s1, s2);
+    }
+    // Test Block
+    {
+        auto column_vector_int32 = vectorized::ColumnVector<Int32>::create();
+        auto column_nullable_vector = vectorized::make_nullable(std::move(column_vector_int32));
+        auto mutable_nullable_vector = std::move(*column_nullable_vector).mutate();
+        for (int i = 0; i < 4096; i++) {
+            mutable_nullable_vector->insert(vectorized::cast_to_nearest_field_type(i));
+        }
+        auto data_type = vectorized::make_nullable(std::make_shared<vectorized::DataTypeInt32>());
+        vectorized::ColumnWithTypeAndName type_and_name(mutable_nullable_vector->get_ptr(),
+                                                        data_type, "test_nullable_int32");
+        vectorized::Block block({type_and_name});
+        PBlock pblock;
+        block.serialize(&pblock);
+        std::string s1 = pblock.DebugString();
+        PBlock pblock2;
+        vectorized::Block block2(pblock);
+        block2.serialize(&pblock2);
+        std::string s2 = pblock2.DebugString();
+        EXPECT_EQ(s1, s2);
+    }
+}
+
+TEST(BlockTest, dump_data) {
+    auto vec = vectorized::ColumnVector<Int32>::create();
+    auto& int32_data = vec->get_data();
+    for (int i = 0; i < 1024; ++i) {
+        int32_data.push_back(i);
+    }
+    vectorized::DataTypePtr int32_type(std::make_shared<vectorized::DataTypeInt32>());
+    vectorized::ColumnWithTypeAndName test_int(vec->get_ptr(), int32_type, "test_int");
+
+    auto strcol = vectorized::ColumnString::create();
+    for (int i = 0; i < 1024; ++i) {
+        std::string is = std::to_string(i);
+        strcol->insert_data(is.c_str(), is.size());
+    }
+    vectorized::DataTypePtr string_type(std::make_shared<vectorized::DataTypeString>());
+    vectorized::ColumnWithTypeAndName test_string(strcol->get_ptr(), string_type, "test_string");
+
+    vectorized::DataTypePtr decimal_data_type(doris::vectorized::create_decimal(27, 9));
+    auto decimal_column = decimal_data_type->create_column();
+    auto& decimal_data = ((vectorized::ColumnDecimal<vectorized::Decimal<vectorized::Int128>>*)
+                                  decimal_column.get())
+                                 ->get_data();
+    for (int i = 0; i < 1024; ++i) {
+        __int128_t value = i * pow(10, 9) + i * pow(10, 8);
+        decimal_data.push_back(value);
+    }
+    vectorized::ColumnWithTypeAndName test_decimal(decimal_column->get_ptr(), decimal_data_type,
+                                                   "test_decimal");
+
+    auto column_vector_int32 = vectorized::ColumnVector<Int32>::create();
+    auto column_nullable_vector = vectorized::make_nullable(std::move(column_vector_int32));
+    auto mutable_nullable_vector = std::move(*column_nullable_vector).mutate();
+    for (int i = 0; i < 4096; i++) {
+        mutable_nullable_vector->insert(vectorized::cast_to_nearest_field_type(i));
+    }
+    auto nint32_type = vectorized::make_nullable(std::make_shared<vectorized::DataTypeInt32>());
+    vectorized::ColumnWithTypeAndName test_nullable_int32(mutable_nullable_vector->get_ptr(),
+                                                          nint32_type, "test_nullable_int32");
+
+    auto column_vector_date = vectorized::ColumnVector<vectorized::Int64>::create();
+    auto& date_data = column_vector_date->get_data();
+    for (int i = 0; i < 1024; ++i) {
+        vectorized::VecDateTimeValue value;
+        value.from_date_int64(20210501);
+        date_data.push_back(*reinterpret_cast<vectorized::Int64*>(&value));
+    }
+    vectorized::DataTypePtr date_type(std::make_shared<vectorized::DataTypeDate>());
+    vectorized::ColumnWithTypeAndName test_date(column_vector_date->get_ptr(), date_type,
+                                                "test_date");
+
+    auto column_vector_datetime = vectorized::ColumnVector<vectorized::Int64>::create();
+    auto& datetime_data = column_vector_datetime->get_data();
+    for (int i = 0; i < 1024; ++i) {
+        vectorized::VecDateTimeValue value;
+        value.from_date_int64(20210501080910);
+        datetime_data.push_back(*reinterpret_cast<vectorized::Int64*>(&value));
+    }
+    vectorized::DataTypePtr datetime_type(std::make_shared<vectorized::DataTypeDateTime>());
+    vectorized::ColumnWithTypeAndName test_datetime(column_vector_datetime->get_ptr(), datetime_type,
+                                                    "test_datetime");
+
+    vectorized::Block block(
+            {test_int, test_string, test_decimal, test_nullable_int32, test_date, test_datetime});
+    EXPECT_GT(block.dump_data().size(), 1);
+}
+} // namespace doris
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/core/column_complex_test.cpp b/be/test/vec/core/column_complex_test.cpp
new file mode 100644
index 0000000000..c8267f7e54
--- /dev/null
+++ b/be/test/vec/core/column_complex_test.cpp
@@ -0,0 +1,52 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/columns/column_complex.h"
+
+#include <gtest/gtest.h>
+
+#include <memory>
+#include <string>
+
+#include "vec/data_types/data_type_bitmap.h"
+namespace doris::vectorized {
+TEST(ColumnComplexTest, BasicTest) {
+    using ColumnSTLString = ColumnComplexType<std::string>;
+    auto column = ColumnSTLString::create();
+    ASSERT_EQ(column->size(), 0);
+    std::string val0 = "";
+    std::string val1 = "str-1";
+
+    column->insert_data(reinterpret_cast<const char*>(&val0), sizeof(val0));
+    column->insert_data(reinterpret_cast<const char*>(&val1), sizeof(val1));
+
+    StringRef ref = column->get_data_at(0);
+    ASSERT_EQ((*reinterpret_cast<const std::string*>(ref.data)), "");
+    ref = column->get_data_at(1);
+    ASSERT_EQ((*reinterpret_cast<const std::string*>(ref.data)), val1);
+}
+
+// Test the compile failed
+TEST(ColumnComplexType, DataTypeBitmapTest) {
+    std::make_shared<DataTypeBitMap>();
+}
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/exec/CMakeLists.txt b/be/test/vec/exec/CMakeLists.txt
new file mode 100644
index 0000000000..07bdcbc767
--- /dev/null
+++ b/be/test/vec/exec/CMakeLists.txt
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# where to put generated libraries
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/test/vec/exec")
+
+ADD_BE_TEST(vgeneric_iterators_test)
diff --git a/be/test/vec/exec/vgeneric_iterators_test.cpp b/be/test/vec/exec/vgeneric_iterators_test.cpp
new file mode 100644
index 0000000000..9b4e46e324
--- /dev/null
+++ b/be/test/vec/exec/vgeneric_iterators_test.cpp
@@ -0,0 +1,207 @@
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/olap/vgeneric_iterators.h"
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "olap/olap_common.h"
+#include "olap/row_block2.h"
+#include "olap/schema.h"
+#include "util/slice.h"
+
+namespace doris {
+
+namespace vectorized {
+
+class VGenericIteratorsTest : public testing::Test {
+public:
+    VGenericIteratorsTest() {}
+    virtual ~VGenericIteratorsTest() {}
+};
+
+Schema create_schema() {
+    std::vector<TabletColumn> col_schemas;
+    col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_SMALLINT, true);
+    // c2: int
+    col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_INT, true);
+    // c3: big int
+    col_schemas.emplace_back(OLAP_FIELD_AGGREGATION_SUM, OLAP_FIELD_TYPE_BIGINT, true);
+
+    Schema schema(col_schemas, 2);
+    return schema;
+}
+
+static void create_block(Schema& schema, vectorized::Block& block)
+{
+    for (auto &column_desc : schema.columns()) {
+        ASSERT_TRUE(column_desc);
+        auto data_type = Schema::get_data_type_ptr(column_desc->type());
+        ASSERT_NE(data_type, nullptr);
+        vectorized::ColumnWithTypeAndName ctn(data_type->create_column(), data_type, column_desc->name());
+        block.insert(ctn);
+    }
+}
+
+TEST(VGenericIteratorsTest, AutoIncrement) {
+    auto schema = create_schema();
+    auto iter = vectorized::new_auto_increment_iterator(schema, 10);
+
+    StorageReadOptions opts;
+    auto st = iter->init(opts);
+    ASSERT_TRUE(st.ok());
+
+    vectorized::Block block;
+    create_block(schema, block);
+
+    auto ret = iter->next_batch(&block);
+    ASSERT_TRUE(ret.ok());
+    ASSERT_EQ(block.rows(), 10);
+
+    auto c0 = block.get_by_position(0).column;
+    auto c1 = block.get_by_position(1).column;
+    auto c2 = block.get_by_position(2).column;
+
+    ASSERT_TRUE(c0->is_numeric());
+    ASSERT_TRUE(c1->is_numeric());
+    ASSERT_TRUE(c2->is_numeric());
+
+    int row_count = 0;
+    size_t rows = block.rows();
+    for (size_t i = 0; i < rows; ++i) {
+        ASSERT_EQ(row_count,     c0->get_int(i));
+        ASSERT_EQ(row_count + 1, c1->get_int(i));
+        ASSERT_EQ(row_count + 2, c2->get_int(i));
+        row_count++;
+    }
+
+    delete iter;
+}
+
+TEST(VGenericIteratorsTest, Union) {
+    auto schema = create_schema();
+    std::vector<RowwiseIterator*> inputs;
+
+    inputs.push_back(vectorized::new_auto_increment_iterator(schema, 100));
+    inputs.push_back(vectorized::new_auto_increment_iterator(schema, 200));
+    inputs.push_back(vectorized::new_auto_increment_iterator(schema, 300));
+
+    auto iter = vectorized::new_union_iterator(inputs, MemTracker::CreateTracker(-1, "VUnionIterator", nullptr, false));
+    StorageReadOptions opts;
+    auto st = iter->init(opts);
+    ASSERT_TRUE(st.ok());
+
+    vectorized::Block block;
+    create_block(schema, block);
+
+    do {
+        st = iter->next_batch(&block);
+    } while (st.ok());
+
+    ASSERT_TRUE(st.is_end_of_file());
+    ASSERT_EQ(block.rows(), 600);
+
+    auto c0 = block.get_by_position(0).column;
+    auto c1 = block.get_by_position(1).column;
+    auto c2 = block.get_by_position(2).column;
+
+    ASSERT_TRUE(c0->is_numeric());
+    ASSERT_TRUE(c1->is_numeric());
+    ASSERT_TRUE(c2->is_numeric());
+
+    size_t row_count = 0;
+    for (size_t i = 0; i < block.rows(); ++i) {
+        size_t base_value = row_count;
+        if (row_count >= 300) {
+            base_value -= 300;
+        } else if (row_count >= 100) {
+            base_value -= 100;
+        }
+
+        ASSERT_EQ(base_value,     c0->get_int(i));
+        ASSERT_EQ(base_value + 1, c1->get_int(i));
+        ASSERT_EQ(base_value + 2, c2->get_int(i));
+        row_count++;
+    }
+
+    delete iter;
+}
+
+TEST(VGenericIteratorsTest, Merge) {
+    ASSERT_TRUE(1);
+    auto schema = create_schema();
+    std::vector<RowwiseIterator*> inputs;
+
+    inputs.push_back(vectorized::new_auto_increment_iterator(schema, 100));
+    inputs.push_back(vectorized::new_auto_increment_iterator(schema, 200));
+    inputs.push_back(vectorized::new_auto_increment_iterator(schema, 300));
+
+    auto iter = vectorized::new_merge_iterator(inputs, MemTracker::CreateTracker(-1, "VMergeIterator", nullptr, false));
+    StorageReadOptions opts;
+    auto st = iter->init(opts);
+    ASSERT_TRUE(st.ok());
+
+    vectorized::Block block;
+    create_block(schema, block);
+
+    do {
+        st = iter->next_batch(&block);
+    } while (st.ok());
+
+    ASSERT_TRUE(st.is_end_of_file());
+    ASSERT_EQ(block.rows(), 600);
+
+    auto c0 = block.get_by_position(0).column;
+    auto c1 = block.get_by_position(1).column;
+    auto c2 = block.get_by_position(2).column;
+
+    ASSERT_TRUE(c0->is_numeric());
+    ASSERT_TRUE(c1->is_numeric());
+    ASSERT_TRUE(c2->is_numeric());
+
+    size_t row_count = 0;
+    for (size_t i = 0; i < block.rows(); ++i) {
+        size_t base_value = row_count;
+        // 100 * 3, 200 * 2, 300
+        if (row_count < 300) {
+            base_value = row_count / 3;
+        } else if (row_count < 500) {
+            base_value = (row_count - 300) / 2 + 100;
+        } else {
+            base_value = row_count - 300;
+        }
+
+        ASSERT_EQ(base_value,     c0->get_int(i));
+        ASSERT_EQ(base_value + 1, c1->get_int(i));
+        ASSERT_EQ(base_value + 2, c2->get_int(i));
+        row_count++;
+    }
+
+    delete iter;
+}
+
+} // namespace vectorized
+
+} // namespace doris
+
+int main(int argc, char** argv) {
+    testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/exprs/CMakeLists.txt b/be/test/vec/exprs/CMakeLists.txt
new file mode 100644
index 0000000000..ffd85600ed
--- /dev/null
+++ b/be/test/vec/exprs/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# where to put generated libraries
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/test/vec/exprs")
+
+ADD_BE_TEST(vexpr_test)
+
diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp
new file mode 100644
index 0000000000..d9a5e4d01a
--- /dev/null
+++ b/be/test/vec/exprs/vexpr_test.cpp
@@ -0,0 +1,357 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "vec/exprs/vexpr.h"
+
+#include <thrift/protocol/TJSONProtocol.h>
+
+#include <cmath>
+#include <iostream>
+
+#include "exec/schema_scanner.h"
+#include "gen_cpp/Data_types.h"
+#include "gen_cpp/Exprs_types.h"
+#include "gen_cpp/Types_types.h"
+#include "gtest/gtest.h"
+#include "runtime/exec_env.h"
+#include "runtime/mem_tracker.h"
+#include "runtime/memory/chunk_allocator.h"
+#include "runtime/primitive_type.h"
+#include "runtime/row_batch.h"
+#include "runtime/runtime_state.h"
+#include "runtime/tuple.h"
+#include "runtime/tuple_row.h"
+#include "testutil/desc_tbl_builder.h"
+#include "vec/exprs/vliteral.h"
+#include "vec/runtime/vdatetime_value.h"
+#include "vec/utils/util.hpp"
+TEST(TEST_VEXPR, ABSTEST) {
+    doris::ChunkAllocator::init_instance(4096);
+    doris::ObjectPool object_pool;
+    doris::DescriptorTblBuilder builder(&object_pool);
+    builder.declare_tuple() << doris::TYPE_INT << doris::TYPE_DOUBLE;
+    doris::DescriptorTbl* desc_tbl = builder.build();
+
+    auto tuple_desc = const_cast<doris::TupleDescriptor*>(desc_tbl->get_tuple_descriptor(0));
+    doris::RowDescriptor row_desc(tuple_desc, false);
+    auto tracker_ptr = doris::MemTracker::CreateTracker(-1, "BlockTest", nullptr, false);
+    doris::RowBatch row_batch(row_desc, 1024, tracker_ptr.get());
+    std::string expr_json =
+            R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_6IntValE"}}},"11":{"i64":0}}}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0}}},"20":{"i32":-1},"23":{"i32":-1}}]}})|";
+    doris::TExpr exprx = apache::thrift::from_json_string<doris::TExpr>(expr_json);
+    doris::vectorized::VExprContext* context = nullptr;
+    doris::vectorized::VExpr::create_expr_tree(&object_pool, exprx, &context);
+
+    int32_t k1 = -100;
+    for (int i = 0; i < 1024; ++i, k1++) {
+        auto idx = row_batch.add_row();
+        doris::TupleRow* tuple_row = row_batch.get_row(idx);
+        auto tuple =
+                (doris::Tuple*)(row_batch.tuple_data_pool()->allocate(tuple_desc->byte_size()));
+        auto slot_desc = tuple_desc->slots()[0];
+        memcpy(tuple->get_slot(slot_desc->tuple_offset()), &k1, slot_desc->slot_size());
+        tuple_row->set_tuple(0, tuple);
+        row_batch.commit_last_row();
+    }
+
+    doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(),
+                                     doris::TQueryGlobals(), nullptr);
+    runtime_stat.init_instance_mem_tracker();
+    runtime_stat.set_desc_tbl(desc_tbl);
+    std::shared_ptr<doris::MemTracker> tracker = doris::MemTracker::CreateTracker();
+    context->prepare(&runtime_stat, row_desc, tracker);
+    context->open(&runtime_stat);
+
+    auto block = row_batch.convert_to_vec_block();
+    int ts = -1;
+    context->execute(&block, &ts);
+}
+
+TEST(TEST_VEXPR, ABSTEST2) {
+    using namespace doris;
+    SchemaScanner::ColumnDesc column_descs[] = {{"k1", TYPE_INT, sizeof(int32_t), false}};
+    SchemaScanner schema_scanner(column_descs, 1);
+    ObjectPool object_pool;
+    SchemaScannerParam param;
+    schema_scanner.init(&param, &object_pool);
+    auto tuple_desc = const_cast<TupleDescriptor*>(schema_scanner.tuple_desc());
+    RowDescriptor row_desc(tuple_desc, false);
+    auto tracker_ptr = MemTracker::CreateTracker(-1, "BlockTest", nullptr, false);
+    RowBatch row_batch(row_desc, 1024, tracker_ptr.get());
+    std::string expr_json =
+            R"|({"1":{"lst":["rec",2,{"1":{"i32":20},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"4":{"i32":1},"20":{"i32":-1},"26":{"rec":{"1":{"rec":{"2":{"str":"abs"}}},"2":{"i32":0},"3":{"lst":["rec",1,{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}]},"4":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":6}}}}]}}},"5":{"tf":0},"7":{"str":"abs(INT)"},"9":{"rec":{"1":{"str":"_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_6IntValE"}}},"11":{"i64":0}}}},{"1":{"i32":16},"2":{"rec":{"1":{"lst":["rec",1,{"1":{"i32":0},"2":{"rec":{"1":{"i32":5}}}}]}}},"4":{"i32":0},"15":{"rec":{"1":{"i32":0},"2":{"i32":0}}},"20":{"i32":-1},"23":{"i32":-1}}]}})|";
+    TExpr exprx = apache::thrift::from_json_string<TExpr>(expr_json);
+
+    doris::vectorized::VExprContext* context = nullptr;
+    doris::vectorized::VExpr::create_expr_tree(&object_pool, exprx, &context);
+
+    int32_t k1 = -100;
+    for (int i = 0; i < 1024; ++i, k1++) {
+        auto idx = row_batch.add_row();
+        doris::TupleRow* tuple_row = row_batch.get_row(idx);
+        auto tuple =
+                (doris::Tuple*)(row_batch.tuple_data_pool()->allocate(tuple_desc->byte_size()));
+        auto slot_desc = tuple_desc->slots()[0];
+        memcpy(tuple->get_slot(slot_desc->tuple_offset()), &k1, slot_desc->slot_size());
+        tuple_row->set_tuple(0, tuple);
+        row_batch.commit_last_row();
+    }
+
+    doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(),
+                                     doris::TQueryGlobals(), nullptr);
+    runtime_stat.init_instance_mem_tracker();
+    DescriptorTbl desc_tbl;
+    desc_tbl._slot_desc_map[0] = tuple_desc->slots()[0];
+    runtime_stat.set_desc_tbl(&desc_tbl);
+    std::shared_ptr<doris::MemTracker> tracker = doris::MemTracker::CreateTracker();
+    context->prepare(&runtime_stat, row_desc, tracker);
+    context->open(&runtime_stat);
+
+    auto block = row_batch.convert_to_vec_block();
+    int ts = -1;
+    context->execute(&block, &ts);
+}
+
+namespace doris {
+template <PrimitiveType T>
+struct literal_traits {};
+
+template <>
+struct literal_traits<TYPE_BOOLEAN> {
+    const static TPrimitiveType::type ttype = TPrimitiveType::BOOLEAN;
+    const static TExprNodeType::type tnode_type = TExprNodeType::BOOL_LITERAL;
+    using CXXType = bool;
+};
+
+template <>
+struct literal_traits<TYPE_SMALLINT> {
+    const static TPrimitiveType::type ttype = TPrimitiveType::SMALLINT;
+    const static TExprNodeType::type tnode_type = TExprNodeType::INT_LITERAL;
+    using CXXType = int16_t;
+};
+
+template <>
+struct literal_traits<TYPE_INT> {
+    const static TPrimitiveType::type ttype = TPrimitiveType::INT;
+    const static TExprNodeType::type tnode_type = TExprNodeType::INT_LITERAL;
+    using CXXType = int32_t;
+};
+
+template <>
+struct literal_traits<TYPE_BIGINT> {
+    const static TPrimitiveType::type ttype = TPrimitiveType::BIGINT;
+    const static TExprNodeType::type tnode_type = TExprNodeType::INT_LITERAL;
+    using CXXType = int64_t;
+};
+
+template <>
+struct literal_traits<TYPE_LARGEINT> {
+    const static TPrimitiveType::type ttype = TPrimitiveType::LARGEINT;
+    const static TExprNodeType::type tnode_type = TExprNodeType::LARGE_INT_LITERAL;
+    using CXXType = __int128_t;
+};
+
+template <>
+struct literal_traits<TYPE_FLOAT> {
+    const static TPrimitiveType::type ttype = TPrimitiveType::FLOAT;
+    const static TExprNodeType::type tnode_type = TExprNodeType::FLOAT_LITERAL;
+    using CXXType = float;
+};
+
+template <>
+struct literal_traits<TYPE_DOUBLE> {
+    const static TPrimitiveType::type ttype = TPrimitiveType::FLOAT;
+    const static TExprNodeType::type tnode_type = TExprNodeType::FLOAT_LITERAL;
+    using CXXType = float;
+};
+
+template <>
+struct literal_traits<TYPE_DATETIME> {
+    const static TPrimitiveType::type ttype = TPrimitiveType::DATE;
+    const static TExprNodeType::type tnode_type = TExprNodeType::STRING_LITERAL;
+    using CXXType = std::string;
+};
+
+template <>
+struct literal_traits<TYPE_DECIMALV2> {
+    const static TPrimitiveType::type ttype = TPrimitiveType::DECIMALV2;
+    const static TExprNodeType::type tnode_type = TExprNodeType::DECIMAL_LITERAL;
+    using CXXType = std::string;
+};
+
+template <PrimitiveType T, class U = typename literal_traits<T>::CXXType,
+          std::enable_if_t<std::is_integral<U>::value, bool> = true>
+void set_literal(TExprNode& node, const U& value) {
+    TIntLiteral int_literal;
+    int_literal.__set_value(value);
+    node.__set_int_literal(int_literal);
+}
+
+template <>
+void set_literal<TYPE_BOOLEAN, bool>(TExprNode& node, const bool& value) {
+    TBoolLiteral bool_literal;
+    bool_literal.__set_value(value);
+    node.__set_bool_literal(bool_literal);
+}
+
+template <>
+void set_literal<TYPE_LARGEINT, __int128_t>(TExprNode& node, const __int128_t& value) {
+    TLargeIntLiteral largeIntLiteral;
+    largeIntLiteral.__set_value(LargeIntValue::to_string(value));
+    node.__set_large_int_literal(largeIntLiteral);
+}
+// std::is_same<U, std::string>::value
+template <PrimitiveType T, class U = typename literal_traits<T>::CXXType,
+          std::enable_if_t<T == TYPE_DATETIME, bool> = true>
+void set_literal(TExprNode& node, const U& value) {
+    TDateLiteral date_literal;
+    date_literal.__set_value(value);
+    node.__set_date_literal(date_literal);
+}
+
+template <PrimitiveType T, class U = typename literal_traits<T>::CXXType,
+          std::enable_if_t<std::numeric_limits<U>::is_iec559, bool> = true>
+void set_literal(TExprNode& node, const U& value) {
+    TFloatLiteral floatLiteral;
+    floatLiteral.__set_value(value);
+    node.__set_float_literal(floatLiteral);
+}
+
+template <PrimitiveType T, class U = typename literal_traits<T>::CXXType,
+          std::enable_if_t<T == TYPE_DECIMALV2, bool> = true>
+void set_literal(TExprNode& node, const U& value) {
+    TDecimalLiteral decimal_literal;
+    decimal_literal.__set_value(value);
+    node.__set_decimal_literal(decimal_literal);
+}
+
+template <PrimitiveType T, class U = typename literal_traits<T>::CXXType>
+doris::TExprNode create_literal(const U& value) {
+    TExprNode node;
+    TTypeDesc type_desc;
+    TTypeNode type_node;
+    std::vector<TTypeNode> type_nodes;
+    type_nodes.emplace_back();
+    TScalarType scalar_type;
+    scalar_type.__set_precision(27);
+    scalar_type.__set_scale(9);
+    scalar_type.__set_len(20);
+    scalar_type.__set_type(literal_traits<T>::ttype);
+    type_nodes[0].__set_scalar_type(scalar_type);
+    type_desc.__set_types(type_nodes);
+    node.__set_type(type_desc);
+    node.__set_node_type(literal_traits<T>::tnode_type);
+    set_literal<T, U>(node, value);
+    return node;
+}
+} // namespace doris
+
+TEST(TEST_VEXPR, LITERALTEST) {
+    using namespace doris;
+    using namespace doris::vectorized;
+    {
+        VLiteral literal(create_literal<TYPE_BOOLEAN>(true));
+        Block block;
+        int ret = -1;
+        literal.execute(nullptr, &block, &ret);
+        auto ctn = block.safe_get_by_position(ret);
+        bool v = ctn.column->get_bool(0);
+        ASSERT_EQ(v, true);
+    }
+    {
+        VLiteral literal(create_literal<TYPE_SMALLINT>(1024));
+        Block block;
+        int ret = -1;
+        literal.execute(nullptr, &block, &ret);
+        auto ctn = block.safe_get_by_position(ret);
+        auto v = ctn.column->get64(0);
+        ASSERT_EQ(v, 1024);
+    }
+    {
+        VLiteral literal(create_literal<TYPE_INT>(1024));
+        Block block;
+        int ret = -1;
+        literal.execute(nullptr, &block, &ret);
+        auto ctn = block.safe_get_by_position(ret);
+        auto v = ctn.column->get64(0);
+        ASSERT_EQ(v, 1024);
+    }
+    {
+        VLiteral literal(create_literal<TYPE_BIGINT>(1024));
+        Block block;
+        int ret = -1;
+        literal.execute(nullptr, &block, &ret);
+        auto ctn = block.safe_get_by_position(ret);
+        auto v = ctn.column->get64(0);
+        ASSERT_EQ(v, 1024);
+    }
+    {
+        VLiteral literal(create_literal<TYPE_LARGEINT, __int128_t>(1024));
+        Block block;
+        int ret = -1;
+        literal.execute(nullptr, &block, &ret);
+        auto ctn = block.safe_get_by_position(ret);
+        auto v = (*ctn.column)[0].get<__int128_t>();
+        ASSERT_EQ(v, 1024);
+    }
+    {
+        VLiteral literal(create_literal<TYPE_FLOAT, float>(1024.0f));
+        Block block;
+        int ret = -1;
+        literal.execute(nullptr, &block, &ret);
+        auto ctn = block.safe_get_by_position(ret);
+        auto v = (*ctn.column)[0].get<double>();
+        ASSERT_FLOAT_EQ(v, 1024.0f);
+    }
+    {
+        VLiteral literal(create_literal<TYPE_DOUBLE, double>(1024.0));
+        Block block;
+        int ret = -1;
+        literal.execute(nullptr, &block, &ret);
+        auto ctn = block.safe_get_by_position(ret);
+        auto v = (*ctn.column)[0].get<double>();
+        ASSERT_FLOAT_EQ(v, 1024.0);
+    }
+    {
+        vectorized::VecDateTimeValue data_time_value;
+        const char* date = "20210407";
+        data_time_value.from_date_str(date, strlen(date));
+        __int64_t dt;
+        memcpy(&dt, &data_time_value, sizeof(__int64_t));
+        VLiteral literal(create_literal<TYPE_DATETIME, std::string>(std::string(date)));
+        Block block;
+        int ret = -1;
+        literal.execute(nullptr, &block, &ret);
+        auto ctn = block.safe_get_by_position(ret);
+        auto v = (*ctn.column)[0].get<__int64_t>();
+        ASSERT_EQ(v, dt);
+    }
+    {
+        VLiteral literal(create_literal<TYPE_DECIMALV2, std::string>(std::string("1234.56")));
+        Block block;
+        int ret = -1;
+        literal.execute(nullptr, &block, &ret);
+        auto ctn = block.safe_get_by_position(ret);
+        auto v = (*ctn.column)[0].get<DecimalField<Decimal128>>();
+        ASSERT_FLOAT_EQ(((double)v.get_value()) / (std::pow(10, v.get_scale())), 1234.56);
+    }
+}
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/function/CMakeLists.txt b/be/test/vec/function/CMakeLists.txt
new file mode 100644
index 0000000000..6ae9b05073
--- /dev/null
+++ b/be/test/vec/function/CMakeLists.txt
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# where to put generated libraries
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/test/vec/function")
+
+ADD_BE_TEST(function_bitmap_test)
+ADD_BE_TEST(function_comparison_test)
+ADD_BE_TEST(function_hash_test)
+ADD_BE_TEST(function_math_test)
+ADD_BE_TEST(function_string_test)
+ADD_BE_TEST(function_time_test)
+ADD_BE_TEST(function_ifnull_test)
+ADD_BE_TEST(function_nullif_test)
+ADD_BE_TEST(function_like_test)
+ADD_BE_TEST(function_arithmetic_test)
diff --git a/be/test/vec/function/function_arithmetic_test.cpp b/be/test/vec/function/function_arithmetic_test.cpp
new file mode 100644
index 0000000000..8dfe3d74a0
--- /dev/null
+++ b/be/test/vec/function/function_arithmetic_test.cpp
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include <string>
+
+#include "function_test_util.h"
+#include "runtime/tuple_row.h"
+#include "util/url_coding.h"
+#include "vec/core/field.h"
+
+namespace doris::vectorized {
+
+TEST(function_arithmetic_test, function_arithmetic_mod_test) {
+    std::string func_name = "mod";
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
+
+        DataSet data_set = {{{10, 1}, 0}, {{10, -2}, 0}, {{1234, 33}, 13}, {{1234, 0}, Null()}};
+
+        check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(function_arithmetic_test, function_arithmetic_divide_test) {
+    std::string func_name = "divide";
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
+        DataSet data_set = {{{1234, 34}, 36.294117647058826}, {{1234, 0}, Null()}};
+        check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Float64};
+        DataSet data_set = {{{1234.1, 34.6}, 35.667630057803464}, {{1234.34, 0.0}, Null()}};
+        check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(function_arithmetic_test, bitnot_test) {
+    std::string func_name = "bitnot";
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int32};
+
+        DataSet data_set = {{{(int32_t)30}, ~(int32_t)30},
+                            {{(int32_t)0}, ~(int32_t)0},
+                            {{(int32_t)-10}, ~(int32_t)-10},
+                            {{(int32_t)-10.44}, ~(int32_t)-10},
+                            {{(int32_t)-999.888}, ~(int32_t)-999}};
+
+        check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(function_arithmetic_test, bitand_test) {
+    std::string func_name = "bitand";
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
+
+        DataSet data_set = {{{(int32_t)30, (int32_t)12}, 30 & 12},
+                            {{(int32_t)0, (int32_t)12}, 0 & 12},
+                            {{(int32_t)-10, (int32_t)111}, -10 & 111},
+                            {{(int32_t)-999, (int32_t)888}, -999 & 888}};
+
+        check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(function_arithmetic_test, bitor_test) {
+    std::string func_name = "bitor";
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
+
+        DataSet data_set = {{{(int32_t)30, (int32_t)12}, 30 | 12},
+                            {{(int32_t)0, (int32_t)12}, 0 | 12},
+                            {{(int32_t)-10, (int32_t)111}, -10 | 111},
+                            {{(int32_t)-999, (int32_t)888}, -999 | 888}};
+
+        check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(function_arithmetic_test, bitxor_test) {
+    std::string func_name = "bitxor";
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
+
+        DataSet data_set = {{{(int32_t)30, (int32_t)12}, 30 ^ 12},
+                            {{(int32_t)0, (int32_t)12}, 0 ^ 12},
+                            {{(int32_t)-10, (int32_t)111}, -10 ^ 111},
+                            {{(int32_t)-999, (int32_t)888}, -999 ^ 888}};
+
+        check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+    }
+}
+
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    doris::CpuInfo::init();
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/function/function_bitmap_test.cpp b/be/test/vec/function/function_bitmap_test.cpp
new file mode 100644
index 0000000000..7a2c3c8e6f
--- /dev/null
+++ b/be/test/vec/function/function_bitmap_test.cpp
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include <gtest/gtest.h>
+
+#include "function_test_util.h"
+#include "util/bitmap_value.h"
+#include "vec/functions/function_totype.h"
+
+namespace doris::vectorized {
+
+TEST(function_bitmap_test, function_bitmap_min_test) {
+    std::string func_name = "bitmap_min";
+    InputTypeSet input_types = {TypeIndex::BitMap};
+
+    auto bitmap1 = new BitmapValue(1);
+    auto bitmap2 = new BitmapValue(std::vector<uint64_t>({1, 9999999}));
+    auto empty_bitmap = new BitmapValue();
+    DataSet data_set = {{{bitmap1}, (int64_t)1},
+                        {{bitmap2}, (int64_t)1},
+                        {{empty_bitmap}, (int64_t)0},
+                        {{Null()}, Null()}};
+
+    check_function<DataTypeInt64, true>(func_name, input_types, data_set);
+}
+TEST(function_bitmap_test, function_bitmap_max_test) {
+    std::string func_name = "bitmap_max";
+    InputTypeSet input_types = {TypeIndex::BitMap};
+
+    auto bitmap1 = new BitmapValue(1);
+    auto bitmap2 = new BitmapValue(std::vector<uint64_t>({1, 9999999}));
+    auto empty_bitmap = new BitmapValue();
+    DataSet data_set = {{{bitmap1}, (int64_t)1},
+                        {{bitmap2}, (int64_t)9999999},
+                        {{empty_bitmap}, (int64_t)0},
+                        {{Null()}, Null()}};
+
+    check_function<DataTypeInt64, true>(func_name, input_types, data_set);
+}
+
+TEST(function_bitmap_test, function_bitmap_to_string_test) {
+    std::string func_name = "bitmap_to_string";
+    InputTypeSet input_types = {TypeIndex::BitMap};
+
+    auto bitmap1 = new BitmapValue(1);
+    auto bitmap2 = new BitmapValue(std::vector<uint64_t>({1, 9999999}));
+    auto empty_bitmap = new BitmapValue();
+    DataSet data_set = {{{bitmap1}, std::string("1")},
+                        {{bitmap2}, std::string("1,9999999")},
+                        {{empty_bitmap}, std::string("")},
+                        {{Null()}, Null()}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_bitmap_test, function_bitmap_and_count) {
+    std::string func_name = "bitmap_and_count";
+    InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap};
+    auto bitmap1 = new BitmapValue(std::vector<uint64_t>({1, 2, 3}));
+    auto bitmap2 = new BitmapValue(std::vector<uint64_t>({3, 4, 5}));
+    auto empty_bitmap = new BitmapValue();
+    DataSet data_set = {{{bitmap1, empty_bitmap}, (int64_t)0},
+                        {{bitmap1, bitmap1}, (int64_t)3},
+                        {{bitmap1, bitmap2}, (int64_t)1}};
+
+    check_function<DataTypeInt64, true>(func_name, input_types, data_set);
+}
+
+TEST(function_bitmap_test, function_bitmap_or_count) {
+    std::string func_name = "bitmap_or_count";
+    InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap};
+    auto bitmap1 = new BitmapValue(std::vector<uint64_t>({1, 2, 3}));
+    auto bitmap2 = new BitmapValue(std::vector<uint64_t>({1, 2, 3, 4}));
+    auto bitmap3 = new BitmapValue(std::vector<uint64_t>({2, 3}));
+    auto empty_bitmap = new BitmapValue();
+    DataSet data_set = {{{bitmap1, empty_bitmap}, (int64_t)3},
+                        {{bitmap2, bitmap3}, (int64_t)4},
+                        {{bitmap1, bitmap3}, (int64_t)3}};
+
+    check_function<DataTypeInt64, true>(func_name, input_types, data_set);
+}
+
+TEST(function_bitmap_test, function_bitmap_xor_count) {
+    std::string func_name = "bitmap_xor_count";
+    InputTypeSet input_types = {TypeIndex::BitMap, TypeIndex::BitMap};
+    auto bitmap1 = new BitmapValue(std::vector<uint64_t>({1, 2, 3}));
+    auto bitmap2 = new BitmapValue(std::vector<uint64_t>({1, 2, 3, 4}));
+    auto bitmap3 = new BitmapValue(std::vector<uint64_t>({2, 3}));
+    auto bitmap4 = new BitmapValue(std::vector<uint64_t>({1, 2, 6}));
+    auto empty_bitmap = new BitmapValue();
+    DataSet data_set = {{{bitmap1, empty_bitmap}, (int64_t)3},
+                        {{bitmap2, bitmap3}, (int64_t)2},
+                        {{bitmap1, bitmap4}, (int64_t)2}};
+
+    check_function<DataTypeInt64, true>(func_name, input_types, data_set);
+}
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/function/function_comparison_test.cpp b/be/test/vec/function/function_comparison_test.cpp
new file mode 100644
index 0000000000..8050f431b4
--- /dev/null
+++ b/be/test/vec/function/function_comparison_test.cpp
@@ -0,0 +1,150 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <iostream>
+#include <string>
+
+#include "exec/schema_scanner.h"
+#include "runtime/row_batch.h"
+#include "runtime/tuple_row.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris {
+
+TEST(ComparisonTest, ComparisonFunctionTest) {
+    SchemaScanner::ColumnDesc column_descs[] = {{"k1", TYPE_SMALLINT, sizeof(int16_t), false},
+                                                {"k2", TYPE_INT, sizeof(int32_t), false},
+                                                {"k3", TYPE_DOUBLE, sizeof(double), false}};
+    SchemaScanner schema_scanner(column_descs, 3);
+    ObjectPool object_pool;
+    SchemaScannerParam param;
+    schema_scanner.init(&param, &object_pool);
+
+    auto tuple_desc = const_cast<TupleDescriptor*>(schema_scanner.tuple_desc());
+    RowDescriptor row_desc(tuple_desc, false);
+    auto tracker_ptr = MemTracker::CreateTracker(-1, "BlockTest", nullptr, false);
+    RowBatch row_batch(row_desc, 1024, tracker_ptr.get());
+
+    int16_t k1 = -100;
+    int32_t k2 = 100;
+    double k3 = 7.7;
+
+    for (int i = 0; i < 1024; ++i, k1++, k2--, k3 += 0.1) {
+        auto idx = row_batch.add_row();
+        TupleRow* tuple_row = row_batch.get_row(idx);
+
+        auto tuple = (Tuple*)(row_batch.tuple_data_pool()->allocate(tuple_desc->byte_size()));
+        auto slot_desc = tuple_desc->slots()[0];
+        memcpy(tuple->get_slot(slot_desc->tuple_offset()), &k1, column_descs[0].size);
+        slot_desc = tuple_desc->slots()[1];
+        memcpy(tuple->get_slot(slot_desc->tuple_offset()), &k2, column_descs[1].size);
+        slot_desc = tuple_desc->slots()[2];
+        memcpy(tuple->get_slot(slot_desc->tuple_offset()), &k3, column_descs[2].size);
+
+        tuple_row->set_tuple(0, tuple);
+        row_batch.commit_last_row();
+    }
+
+    vectorized::Block block = row_batch.convert_to_vec_block();
+    // 1. compute the k1 > k2
+    vectorized::ColumnNumbers arguments;
+    arguments.emplace_back(block.get_position_by_name("k1"));
+    arguments.emplace_back(block.get_position_by_name("k2"));
+
+    size_t num_columns_without_result = block.columns();
+    block.insert({nullptr, std::make_shared<vectorized::DataTypeUInt8>(), "k1 > k2"});
+
+    vectorized::ColumnsWithTypeAndName ctn = {block.get_by_position(arguments[0]),
+                                              block.get_by_position(arguments[1])};
+
+    auto greater_function_ptr = vectorized::SimpleFunctionFactory::instance().get_function(
+            "gt", ctn, std::make_shared<vectorized::DataTypeUInt8>());
+    greater_function_ptr->execute(nullptr, block, arguments, num_columns_without_result, 1024,
+                                  false);
+
+    k1 = -100;
+    k2 = 100;
+    for (int i = 0; i < 1024; ++i, k1++, k2--) {
+        vectorized::ColumnPtr column = block.get_columns()[3];
+        ASSERT_EQ(column->get_bool(i), k1 > k2);
+    }
+
+    // 2. compute the k2 <= k3
+    num_columns_without_result = block.columns();
+    block.insert({nullptr, std::make_shared<vectorized::DataTypeUInt8>(), "k2 <= k3"});
+
+    auto less_or_equals_function_ptr = vectorized::SimpleFunctionFactory::instance().get_function(
+            "le", ctn, std::make_shared<vectorized::DataTypeUInt8>());
+
+    arguments[0] = 1;
+    arguments[1] = 2;
+    less_or_equals_function_ptr->execute(nullptr, block, arguments, num_columns_without_result,
+                                         1024, false);
+
+    k2 = 100;
+    k3 = 7.7;
+    for (int i = 0; i < 1024; ++i, k3 += 0.1, k2--) {
+        vectorized::ColumnPtr column = block.get_columns()[4];
+        ASSERT_EQ(column->get_bool(i), k2 <= k3);
+    }
+
+    num_columns_without_result = block.columns();
+    block.insert({nullptr, std::make_shared<vectorized::DataTypeUInt8>(), "k1 > k2 and k2 <= k3"});
+    arguments[0] = 3;
+    arguments[1] = 4;
+
+    vectorized::ColumnsWithTypeAndName ctn2 = {block.get_by_position(arguments[0]),
+                                               block.get_by_position(arguments[1])};
+    auto and_function_ptr = vectorized::SimpleFunctionFactory::instance().get_function(
+            "and", ctn2, std::make_shared<vectorized::DataTypeUInt8>());
+    and_function_ptr->execute(nullptr, block, arguments, num_columns_without_result, 1024, false);
+
+    k1 = -100;
+    k2 = 100;
+    k3 = 7.7;
+    for (int i = 0; i < 1024; ++i, k1++, k3 += 0.1, k2--) {
+        vectorized::ColumnPtr column = block.get_columns()[5];
+        ASSERT_EQ(column->get_bool(i), k1 > k2 and k2 <= k3);
+    }
+
+    num_columns_without_result = block.columns();
+    block.insert({nullptr, std::make_shared<vectorized::DataTypeUInt8>(), "k1 > k2 or k2 <= k3"});
+    arguments[0] = 3;
+    arguments[1] = 4;
+
+    // vectorized::ColumnsWithTypeAndName ctn2 = { block.get_by_position(arguments[0]), block.get_by_position(arguments[1]) };
+    auto or_function_ptr = vectorized::SimpleFunctionFactory::instance().get_function(
+            "or", ctn2, std::make_shared<vectorized::DataTypeUInt8>());
+    or_function_ptr->execute(nullptr, block, arguments, num_columns_without_result, 1024, false);
+
+    k1 = -100;
+    k2 = 100;
+    k3 = 7.7;
+    for (int i = 0; i < 1024; ++i, k1++, k3 += 0.1, k2--) {
+        vectorized::ColumnPtr column = block.get_columns()[6];
+        ASSERT_EQ(column->get_bool(i), k1 > k2 or k2 <= k3);
+    }
+}
+
+} // namespace doris
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/function/function_hash_test.cpp b/be/test/vec/function/function_hash_test.cpp
new file mode 100644
index 0000000000..41cfc522dc
--- /dev/null
+++ b/be/test/vec/function/function_hash_test.cpp
@@ -0,0 +1,96 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include "function_test_util.h"
+#include "runtime/tuple_row.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+TEST(HashFunctionTest, murmur_hash_3_test) {
+    std::string func_name = "murmur_hash3_32";
+
+    {
+        InputTypeSet input_types = {TypeIndex::String};
+
+        DataSet data_set = {{{Null()}, Null()}, {{std::string("hello")}, (int32_t)1321743225}};
+
+        check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+        DataSet data_set = {{{std::string("hello"), std::string("world")}, (int32_t)984713481},
+                            {{std::string("hello"), Null()}, Null()}};
+
+        check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+
+        DataSet data_set = {{{std::string("hello"), std::string("world"), std::string("!")},
+                             (int32_t)-666935433},
+                            {{std::string("hello"), std::string("world"), Null()}, Null()}};
+
+        check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+    };
+}
+
+TEST(HashFunctionTest, murmur_hash_2_test) {
+    std::string func_name = "murmurHash2_64";
+
+    {
+        InputTypeSet input_types = {TypeIndex::String};
+
+        DataSet data_set = {{{Null()}, Null()},
+                            {{std::string("hello")}, (uint64_t)2191231550387646743ull}};
+
+        check_function<DataTypeUInt64, true>(func_name, input_types, data_set);
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+        DataSet data_set = {
+                {{std::string("hello"), std::string("world")}, (uint64_t)11978658642541747642ull},
+                {{std::string("hello"), Null()}, Null()}};
+
+        check_function<DataTypeUInt64, true>(func_name, input_types, data_set);
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+
+        DataSet data_set = {{{std::string("hello"), std::string("world"), std::string("!")},
+                             (uint64_t)1367324781703025231ull},
+                            {{std::string("hello"), std::string("world"), Null()}, Null()}};
+
+        check_function<DataTypeUInt64, true>(func_name, input_types, data_set);
+    };
+}
+
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/function/function_ifnull_test.cpp b/be/test/vec/function/function_ifnull_test.cpp
new file mode 100644
index 0000000000..9d5df6c2c6
--- /dev/null
+++ b/be/test/vec/function/function_ifnull_test.cpp
@@ -0,0 +1,72 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include <string>
+
+#include "exec/schema_scanner.h"
+#include "function_test_util.h"
+#include "util/url_coding.h"
+#include "vec/core/field.h"
+
+namespace doris::vectorized {
+
+TEST(IfNullTest, Int_Test) {
+    std::string func_name = "ifnull";
+    InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
+    DataSet data_set = {{{4, 10}, 4}, {{-4, 10}, -4}, {{Null(), 5}, 5}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(IfNullTest, Float_Test) {
+    std::string func_name = "ifnull";
+    InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Float64};
+    DataSet data_set = {{{4.0, 10.0}, 4.0}, {{-4.0, 10.0}, -4.0}, {{Null(), 5.0}, 5.0}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(IfNullTest, String_Test) {
+    std::string func_name = "ifnull";
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+    DataSet data_set = {{{std::string("4.0"), std::string("10.0")}, std::string("4.0")},
+                        {{std::string("hello"), std::string("10.0")}, std::string("hello")},
+                        {{Null(), std::string("world")}, std::string("world")}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(IfNullTest, String_Int_Test) {
+    std::string func_name = "ifnull";
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::DateTime};
+    DataSet data_set = {{{std::string("2021-10-24 12:32:31"), std::string("2021-10-24 13:00:01")},
+                         str_to_data_time("2021-10-24 12:32:31")},
+                        {{Null(), std::string("2021-10-24 13:00:01")},
+                         str_to_data_time("2021-10-24 13:00:01")}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/function/function_like_test.cpp b/be/test/vec/function/function_like_test.cpp
new file mode 100644
index 0000000000..e27c479569
--- /dev/null
+++ b/be/test/vec/function/function_like_test.cpp
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <string>
+
+#include "function_test_util.h"
+#include "util/cpu_info.h"
+#include "vec/core/types.h"
+
+namespace doris::vectorized {
+
+TEST(FunctionLikeTest, like) {
+    std::string func_name = "like";
+
+    DataSet data_set = {// sub_string
+                        {{std::string("abc"), std::string("%b%")}, uint8_t(1)},
+                        {{std::string("abc"), std::string("%ad%")}, uint8_t(0)},
+                        // end with
+                        {{std::string("abc"), std::string("%c")}, uint8_t(1)},
+                        {{std::string("ab"), std::string("%c")}, uint8_t(0)},
+                        // start with
+                        {{std::string("abc"), std::string("a%")}, uint8_t(1)},
+                        {{std::string("bc"), std::string("a%")}, uint8_t(0)},
+                        // equals
+                        {{std::string("abc"), std::string("abc")}, uint8_t(1)},
+                        {{std::string("abc"), std::string("ab")}, uint8_t(0)},
+                        // full regexp match
+                        {{std::string("abcd"), std::string("a_c%")}, uint8_t(1)},
+                        {{std::string("abcd"), std::string("a_d%")}, uint8_t(0)},
+                        {{std::string("abc"), std::string("__c")}, uint8_t(1)},
+                        {{std::string("abc"), std::string("_c")}, uint8_t(0)},
+                        {{std::string("abc"), std::string("_b_")}, uint8_t(1)},
+                        {{std::string("abc"), std::string("_a_")}, uint8_t(0)},
+                        {{std::string("abc"), std::string("a__")}, uint8_t(1)},
+                        {{std::string("abc"), std::string("a_")}, uint8_t(0)},
+                        // null
+                        {{std::string("abc"), Null()}, Null()},
+                        {{Null(), std::string("_x__ab%")}, Null()}};
+
+    // pattern is constant value
+    InputTypeSet const_pattern_input_types = {TypeIndex::String, Consted {TypeIndex::String}};
+    for (const auto& line : data_set) {
+        DataSet const_pattern_dataset = {line};
+        check_function<DataTypeUInt8, true>(func_name, const_pattern_input_types,
+                                            const_pattern_dataset);
+    }
+
+    // pattern is not constant value
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+    check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+}
+
+TEST(FunctionLikeTest, regexp) {
+    std::string func_name = "regexp";
+
+    DataSet data_set = {// sub_string
+                        {{std::string("abc"), std::string(".*b.*")}, uint8_t(1)},
+                        {{std::string("abc"), std::string(".*ad.*")}, uint8_t(0)},
+                        {{std::string("abc"), std::string(".*c")}, uint8_t(1)},
+                        {{std::string("abc"), std::string("a.*")}, uint8_t(1)},
+                        // end with
+                        {{std::string("abc"), std::string(".*c$")}, uint8_t(1)},
+                        {{std::string("ab"), std::string(".*c$")}, uint8_t(0)},
+                        // start with
+                        {{std::string("abc"), std::string("^a.*")}, uint8_t(1)},
+                        {{std::string("bc"), std::string("^a.*")}, uint8_t(0)},
+                        // equals
+                        {{std::string("abc"), std::string("^abc$")}, uint8_t(1)},
+                        {{std::string("abc"), std::string("^ab$")}, uint8_t(0)},
+                        // partial regexp match
+                        {{std::string("abcde"), std::string("a.*d")}, uint8_t(1)},
+                        {{std::string("abcd"), std::string("a.d")}, uint8_t(0)},
+                        {{std::string("abc"), std::string(".c")}, uint8_t(1)},
+                        {{std::string("abc"), std::string(".b.")}, uint8_t(1)},
+                        {{std::string("abc"), std::string(".a.")}, uint8_t(0)},
+                        // null
+                        {{std::string("abc"), Null()}, Null()},
+                        {{Null(), std::string("xxx.*")}, Null()}};
+
+    // pattern is constant value
+    InputTypeSet const_pattern_input_types = {TypeIndex::String, Consted {TypeIndex::String}};
+    for (const auto& line : data_set) {
+        DataSet const_pattern_dataset = {line};
+        check_function<DataTypeUInt8, true>(func_name, const_pattern_input_types,
+                                            const_pattern_dataset);
+    }
+
+    // pattern is not constant value
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+    check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+}
+
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    doris::CpuInfo::init();
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/function/function_math_test.cpp b/be/test/vec/function/function_math_test.cpp
new file mode 100644
index 0000000000..eacfd01e07
--- /dev/null
+++ b/be/test/vec/function/function_math_test.cpp
@@ -0,0 +1,499 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include <any>
+#include <cmath>
+#include <iostream>
+#include <string>
+
+#include "function_test_util.h"
+#include "runtime/tuple_row.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+using namespace ut_type;
+
+TEST(MathFunctionTest, acos_test) {
+    std::string func_name = "acos"; //[-1,1] -->[0,pi]
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {{{-1.0}, 3.1415926535897931},
+                        {{0.0}, M_PI / 2},
+                        {{0.5}, 1.0471975511965979},
+                        //{{3.14},nan("")},
+                        {{1.0}, 0.0}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, asin_test) {
+    std::string func_name = "asin"; //[-1,1] -->[-pi_2, pi_2]
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {
+            {{-1.0}, -M_PI / 2}, {{0.0}, 0.0}, {{0.5}, 0.52359877559829893}, {{1.0}, M_PI / 2}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, atan_test) {
+    std::string func_name = "atan"; //[-,+] -->(pi_2,pi_2)
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {{{-1.0}, -0.78539816339744828},
+                        {{0.0}, 0.0},
+                        {{0.5}, 0.46364760900080609},
+                        {{1.0}, 0.78539816339744828}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, cos_test) {
+    std::string func_name = "cos";
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {{{-1.0}, 0.54030230586813977},
+                        {{0.0}, 1.0},
+                        {{0.5}, 0.87758256189037276},
+                        {{M_PI}, -1.0},
+                        {{1.0}, 0.54030230586813977}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, sin_test) {
+    std::string func_name = "sin";
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {{{-1.0}, -0.8414709848078965},
+                        {{0.0}, 0.0},
+                        {{0.5}, 0.479425538604203},
+                        {{M_PI / 2}, 1.0},
+                        {{1.0}, 0.8414709848078965}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, sqrt_test) {
+    std::string func_name = "sqrt"; //sqrt(x) x>=0
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {{{0.0}, 0.0},
+                        {{2.0}, 1.4142135623730951},
+                        {{9.0}, 3.0},
+                        {{1000.0}, 31.622776601683793}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, tan_test) {
+    std::string func_name = "tan"; //tan(x)
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {{{0.0}, 0.0},
+                        {{2.0}, -2.1850398632615189},
+                        {{-1.0}, -1.5574077246549023},
+                        {{1000.0}, 1.4703241557027185}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, exp_test) {
+    std::string func_name = "exp";
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {{{-1.0}, 0.36787944117144233},
+                        {{0.0}, 1.0},
+                        {{0.5}, 1.6487212707001282},
+                        {{-800.0}, 0.0},
+                        {{1.0}, 2.7182818284590451}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, ln_test) {
+    std::string func_name = "ln"; // ln(x) x>0
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {{{1.0}, 0.0},
+                        {{0.5}, -0.69314718055994529},
+                        {{100.0}, 4.6051701859880918},
+                        {{1000.0}, 6.9077552789821368}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, log2_test) {
+    std::string func_name = "log2"; // log2(x) x>0
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {{{1.0}, 0.0},
+                        {{0.5}, -1.0},
+                        {{100.0}, 6.6438561897747244},
+                        {{1000.0}, 9.965784284662087}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, log10_test) {
+    std::string func_name = "log10"; // log10(x) x>0
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {
+            {{1.0}, 0.0}, {{0.5}, -0.3010299956639812}, {{100.0}, 2.0}, {{1000.0}, 3.0}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, log_test) {
+    std::string func_name = "log"; // log(x,y) x>0 y>0
+
+    InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Float64};
+
+    DataSet data_set = {{{10.0, 1.0}, 0.0},
+                        {{10.0, 100.0}, 2.0},
+                        {{0.1, 5.0}, -0.69897000433601886},
+                        {{2.0, 0.5}, -1.0}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, pow_test) {
+    std::string func_name = "pow"; // pow(x,y)
+
+    InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Float64};
+
+    DataSet data_set = {{{10.0, 1.0}, 10.0},
+                        {{10.0, 10.0}, 10000000000.0},
+                        {{100.0, -2.0}, 0.0001},
+                        {{2.0, 0.5}, 1.4142135623730951}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, truncate_test) {
+    std::string func_name = "truncate"; // truncate(x,y)
+
+    InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Float64};
+
+    DataSet data_set = {{{123.4567, 3.0}, 123.456}, {{-123.4567, 3.0}, -123.456},
+                        {{123.4567, 0.0}, 123.0},   {{-123.4567, 0.0}, -123.0},
+                        {{123.4567, -2.0}, 100.0},  {{-123.4567, -2.0}, -100.0},
+                        {{-123.4567, -3.0}, 0.0}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, ceil_test) {
+    std::string func_name = "ceil";
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {
+            {{2.3}, (int64_t)3}, {{2.8}, (int64_t)3}, {{-2.3}, (int64_t)-2}, {{2.8}, (int64_t)3.0}};
+
+    check_function<DataTypeInt64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, floor_test) {
+    std::string func_name = "floor";
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {
+            {{2.3}, (int64_t)2}, {{2.8}, (int64_t)2}, {{-2.3}, (int64_t)-3}, {{-2.8}, (int64_t)-3}};
+
+    check_function<DataTypeInt64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, degrees_test) {
+    std::string func_name = "degrees"; // degrees(x) rad-->C
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {{{1.0}, 57.295779513082323},
+                        {{M_PI / 2}, 90.0},
+                        {{0.0}, 0.0},
+                        {{-2.0}, -114.59155902616465}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, radians_test) {
+    std::string func_name = "radians"; // radians(x) C--->rad
+
+    InputTypeSet input_types = {TypeIndex::Float64};
+
+    DataSet data_set = {{{30.0}, 0.52359877559829882},
+                        {{90.0}, M_PI / 2},
+                        {{0.0}, 0.0},
+                        {{-60.0}, -1.0471975511965976}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, abs_test) {
+    std::string func_name = "abs";
+
+    {
+        InputTypeSet input_types = {TypeIndex::Float64};
+
+        DataSet data_set = {{{Null()}, Null()},
+                            {{-0.0123}, 0.0123},
+                            {{90.45}, 90.45},
+                            {{0.0}, 0.0},
+                            {{-60.0}, 60.0}};
+
+        check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int32};
+
+        DataSet data_set = {{{Null()}, Null()},
+                            {{INT(3)}, BIGINT(3)},
+                            {{INT(-3)}, BIGINT(3)},
+                            {{INT(0)}, BIGINT(0)},
+                            {{INT(-60)}, BIGINT(60)},
+                            {{INT(INT_MAX)}, BIGINT(INT_MAX)},
+                            {{INT(INT_MIN)}, BIGINT(-1ll * INT_MIN)}};
+
+        check_function<DataTypeInt64, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(MathFunctionTest, positive_test) {
+    std::string func_name = "positive";
+
+    {
+        InputTypeSet input_types = {TypeIndex::Float64};
+
+        DataSet data_set = {{{0.0123}, 0.0123}, {{90.45}, 90.45}, {{0.0}, 0.0}, {{-60.0}, -60.0}};
+
+        check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int32};
+
+        DataSet data_set = {{{(int32_t)3}, (int32_t)3},
+                            {{(int32_t)-3}, (int32_t)-3},
+                            {{(int32_t)0}, (int32_t)0},
+                            {{(int32_t)-60}, (int32_t)-60}};
+
+        check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(MathFunctionTest, negative_test) {
+    std::string func_name = "negative";
+
+    {
+        InputTypeSet input_types = {TypeIndex::Float64};
+
+        DataSet data_set = {{{0.0123}, -0.0123}, {{90.45}, -90.45}, {{0.0}, 0.0}, {{-60.0}, 60.0}};
+
+        check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int32};
+
+        DataSet data_set = {{{(int32_t)3}, (int32_t)-3},
+                            {{(int32_t)-3}, (int32_t)3},
+                            {{(int32_t)0}, (int32_t)0},
+                            {{(int32_t)-60}, (int32_t)60}};
+
+        check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(MathFunctionTest, sign_test) {
+    std::string func_name = "sign"; // sign(x) // 1 0 -1
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int32};
+
+        DataSet data_set = {{{(int32_t)30}, (int8_t)1.0},
+                            {{(int32_t)0}, (int8_t)0.0},
+                            {{(int32_t)-10}, (int8_t)-1.0}};
+
+        check_function<DataTypeInt8, true>(func_name, input_types, data_set);
+    }
+    {
+        InputTypeSet input_types = {TypeIndex::Float64};
+
+        DataSet data_set = {{{30.7}, (int8_t)1.0}, {{0.0}, (int8_t)0.0}, {{-10.6}, (int8_t)-1.0}};
+
+        check_function<DataTypeInt8, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(MathFunctionTest, round_test) {
+    std::string func_name = "round"; // round(double) && round(double, int)
+
+    {
+        InputTypeSet input_types = {TypeIndex::Float64};
+
+        DataSet data_set = {{{30.1}, (int64_t)30}, {{90.6}, (int64_t)91}, {{Null()}, Null()},
+                            {{0.0}, (int64_t)0},   {{-1.1}, (int64_t)-1}, {{-60.7}, (int64_t)-61}};
+
+        check_function<DataTypeInt64, true>(func_name, input_types, data_set);
+    }
+    {
+        InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Int32};
+
+        DataSet data_set = {{{3.1415926, 2}, 3.14}, {{3.1415926, 3}, 3.142}, {{Null(), -2}, Null()},
+                            {{193.0, -2}, 200.0},   {{193.0, -1}, 190.0},    {{193.0, -3}, 0.0}};
+
+        check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(MathFunctionTest, bin_test) {
+    std::string func_name = "bin";
+
+    InputTypeSet input_types = {TypeIndex::Int64};
+
+    DataSet data_set = {{{(int64_t)10}, std::string("1010")},
+                        {{(int64_t)1}, std::string("1")},
+                        {{(int64_t)0}, std::string("0")},
+                        {{Null()}, Null()}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, hex_test) {
+    std::string func_name = "hex"; // hex(int)
+
+    InputTypeSet input_types = {TypeIndex::Int64};
+
+    DataSet data_set = {{{Null()}, Null()},
+                        {{(int64_t)-1}, std::string("FFFFFFFFFFFFFFFF")},
+                        {{(int64_t)-2}, std::string("FFFFFFFFFFFFFFFE")},
+                        {{(int64_t)12}, std::string("C")},
+                        {{(int64_t)144}, std::string("90")},
+                        {{(int64_t)151233}, std::string("24EC1")},
+                        {{(int64_t)0}, std::string("0")},
+                        {{(int64_t)9223372036854775807}, std::string("7FFFFFFFFFFFFFFF")},
+                        {{(int64_t)-7453337203775808}, std::string("FFE5853AB393E6C0")}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(MathFunctionTest, random_test) {
+    std::string func_name = "random"; // random(x)
+    InputTypeSet input_types = {Consted {TypeIndex::Int64}};
+    DataSet data_set = {{{Null()}, Null()},
+                        {{(int64_t)0}, 0.15979336337046085},
+                        {{(int64_t)10}, 0.60128310734097479},
+                        {{(int64_t)123}, 0.31320017867847078},
+                        {{(int64_t)std::numeric_limits<int64_t>::max()}, 0.20676730979843233},
+                        {{(int64_t)std::numeric_limits<int64_t>::min()}, 0.15979336337046085}};
+
+    for (const auto& data : data_set) {
+        DataSet data_line = {data};
+        check_function<DataTypeFloat64, true>(func_name, input_types, data_line);
+    }
+}
+
+TEST(MathFunctionTest, conv_test) {
+    std::string func_name = "conv";
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int64, TypeIndex::Int8, TypeIndex::Int8};
+        DataSet data_set = {{{Null(), Null(), Null()}, Null()},
+                            {{BIGINT(230), TINYINT(10), TINYINT(16)}, VARCHAR("E6")},
+                            {{BIGINT(15), TINYINT(10), TINYINT(2)}, VARCHAR("1111")}};
+
+        for (const auto& data : data_set) {
+            DataSet data_line = {data};
+            check_function<DataTypeString, true>(func_name, input_types, data_line);
+        }
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int8, TypeIndex::Int8};
+        DataSet data_set = {{{Null(), Null(), Null()}, Null()},
+                            {{VARCHAR("ff"), TINYINT(16), TINYINT(10)}, VARCHAR("255")}};
+
+        for (const auto& data : data_set) {
+            DataSet data_line = {data};
+            check_function<DataTypeString, true>(func_name, input_types, data_line);
+        }
+    }
+}
+
+TEST(MathFunctionTest, money_format_test) {
+    std::string func_name = "money_format";
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int64};
+        DataSet data_set = {{{Null()}, Null()},
+                            {{BIGINT(17014116)}, VARCHAR("17,014,116.00")},
+                            {{BIGINT(-17014116)}, VARCHAR("-17,014,116.00")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::Int128};
+        DataSet data_set = {{{Null()}, Null()},
+                            {{LARGEINT(17014116)}, VARCHAR("17,014,116.00")},
+                            {{LARGEINT(-17014116)}, VARCHAR("-17,014,116.00")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+    {
+        InputTypeSet input_types = {TypeIndex::Float64};
+        DataSet data_set = {{{Null()}, Null()},
+                            {{DOUBLE(17014116.67)}, VARCHAR("17,014,116.67")},
+                            {{DOUBLE(-17014116.67)}, VARCHAR("-17,014,116.67")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+    {
+        InputTypeSet input_types = {TypeIndex::Decimal128};
+        DataSet data_set = {{{Null()}, Null()},
+                            {{DECIMAL(17014116.67)}, VARCHAR("17,014,116.67")},
+                            {{DECIMAL(-17014116.67)}, VARCHAR("-17,014,116.67")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+}
+
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/function/function_nullif_test.cpp b/be/test/vec/function/function_nullif_test.cpp
new file mode 100644
index 0000000000..73d0ca93de
--- /dev/null
+++ b/be/test/vec/function/function_nullif_test.cpp
@@ -0,0 +1,64 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include <string>
+
+#include "exec/schema_scanner.h"
+#include "function_test_util.h"
+#include "util/url_coding.h"
+#include "vec/core/field.h"
+
+namespace doris::vectorized {
+
+TEST(NullIfTest, Int_Test) {
+    std::string func_name = "nullif";
+    InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
+    DataSet data_set = {{{4, 10}, 4}, {{-4, -4}, Null()}, {{5, Null()}, 5}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(NullIfTest, Float_Test) {
+    std::string func_name = "nullif";
+    InputTypeSet input_types = {TypeIndex::Float64, TypeIndex::Float64};
+    DataSet data_set = {{{4.0, 10.0}, 4.0}, {{-4.0, -4.0}, Null()}, {{5.0, Null()}, 5.0}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(NullIfTest, String_Int_Test) {
+    std::string func_name = "nullif";
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::DateTime};
+    DataSet data_set = {
+            {{std::string("2021-10-24 12:32:31"), std::string("2021-10-24 13:00:01")},
+             str_to_data_time("2021-10-24 12:32:31")},
+            {{std::string("2021-10-24 13:00:01"), std::string("2021-10-24 13:00:01")}, Null()},
+            {{std::string("2021-10-24 13:00:01"), Null()},
+             str_to_data_time("2021-10-24 13:00:01")}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
\ No newline at end of file
diff --git a/be/test/vec/function/function_string_test.cpp b/be/test/vec/function/function_string_test.cpp
new file mode 100644
index 0000000000..5617a36e1d
--- /dev/null
+++ b/be/test/vec/function/function_string_test.cpp
@@ -0,0 +1,721 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include <string>
+
+#include "function_test_util.h"
+#include "runtime/tuple_row.h"
+#include "util/encryption_util.h"
+#include "util/url_coding.h"
+#include "vec/core/field.h"
+
+namespace doris::vectorized {
+
+TEST(function_string_test, function_string_substr_test) {
+    std::string func_name = "substr";
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32, TypeIndex::Int32};
+
+        DataSet data_set = {
+                {{std::string("asd你好"), 4, 10}, std::string("\xE4\xBD\xA0\xE5\xA5\xBD")}, //你好
+                {{std::string("hello word"), -5, 5}, std::string(" word")},
+                {{std::string("hello word"), 1, 12}, std::string("hello word")},
+                {{std::string("HELLO,!^%"), 4, 2}, std::string("LO")},
+                {{std::string(""), 5, 4}, Null()},
+                {{Null(), 5, 4}, Null()}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
+
+        DataSet data_set = {
+                {{std::string("asd你好"), 4}, std::string("\xE4\xBD\xA0\xE5\xA5\xBD")}, //你好
+                {{std::string("hello word"), -5}, std::string(" word")},
+                {{std::string("hello word"), 1}, std::string("hello word")},
+                {{std::string("HELLO,!^%"), 4}, std::string("LO,!^%")},
+                {{std::string(""), 5, 4}, Null()},
+                {{Null(), 5, 4}, Null()}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(function_string_test, function_string_strright_test) {
+    std::string func_name = "strright";
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
+
+    DataSet data_set = {{{std::string("asd"), 1}, std::string("d")},
+                        {{std::string("hello word"), -2}, std::string("ello word")},
+                        {{std::string("hello word"), 20}, std::string("hello word")},
+                        {{std::string("HELLO,!^%"), 2}, std::string("^%")},
+                        {{std::string(""), 3}, std::string("")},
+                        {{Null(), 3}, Null()}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_string_strleft_test) {
+    std::string func_name = "strleft";
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
+
+    DataSet data_set = {{{std::string("asd"), 1}, std::string("a")},
+                        {{std::string("hel  lo  "), 5}, std::string("hel  ")},
+                        {{std::string("hello word"), 20}, std::string("hello word")},
+                        {{std::string("HELLO,!^%"), 7}, std::string("HELLO,!")},
+                        {{std::string(""), 2}, Null()},
+                        {{Null(), 3}, Null()}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_string_lower_test) {
+    std::string func_name = "lower";
+    InputTypeSet input_types = {TypeIndex::String};
+    DataSet data_set = {{{std::string("ASD")}, std::string("asd")},
+                        {{std::string("HELLO123")}, std::string("hello123")},
+                        {{std::string("MYtestSTR")}, std::string("myteststr")},
+                        {{std::string("HELLO,!^%")}, std::string("hello,!^%")},
+                        {{std::string("")}, std::string("")}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_string_upper_test) {
+    std::string func_name = "upper";
+    InputTypeSet input_types = {TypeIndex::String};
+    DataSet data_set = {{{std::string("asd")}, std::string("ASD")},
+                        {{std::string("hello123")}, std::string("HELLO123")},
+                        {{std::string("HELLO,!^%")}, std::string("HELLO,!^%")},
+                        {{std::string("MYtestStr")}, std::string("MYTESTSTR")},
+                        {{std::string("")}, std::string("")}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_string_trim_test) {
+    std::string func_name = "trim";
+    InputTypeSet input_types = {TypeIndex::String};
+    DataSet data_set = {{{std::string("a sd")}, std::string("a sd")},
+                        {{std::string("  hello 123  ")}, std::string("hello 123")},
+                        {{std::string("  HELLO,!^%")}, std::string("HELLO,!^%")},
+                        {{std::string("MY test Str你好  ")}, std::string("MY test Str你好")},
+                        {{Null()}, Null()},
+                        {{std::string("")}, std::string("")}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_string_ltrim_test) {
+    std::string func_name = "ltrim";
+    InputTypeSet input_types = {TypeIndex::String};
+    DataSet data_set = {
+            {{std::string("a sd")}, std::string("a sd")},
+            {{std::string("  hello 123  ")}, std::string("hello 123  ")},
+            {{std::string("  HELLO,!^%")}, std::string("HELLO,!^%")},
+            {{std::string("  你好MY test Str你好  ")}, std::string("你好MY test Str你好  ")},
+            {{std::string("")}, std::string("")}};
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_string_rtrim_test) {
+    std::string func_name = "rtrim";
+    InputTypeSet input_types = {TypeIndex::String};
+    DataSet data_set = {{{std::string("a sd ")}, std::string("a sd")},
+                        {{std::string("hello 123  ")}, std::string("hello 123")},
+                        {{std::string("  HELLO,!^%")}, std::string("  HELLO,!^%")},
+                        {{std::string("  MY test Str你好  ")}, std::string("  MY test Str你好")},
+                        {{std::string("")}, std::string("")}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+TEST(function_string_test, function_string_repeat_test) {
+    std::string func_name = "repeat";
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32};
+
+    DataSet data_set = {{{std::string("a"), 3}, std::string("aaa")},
+                        {{std::string("hel lo"), 2}, std::string("hel lohel lo")},
+                        {{std::string("hello word"), -1}, std::string("")},
+                        {{std::string(""), 1}, std::string("")},
+                        {{std::string("HELLO,!^%"), 2}, std::string("HELLO,!^%HELLO,!^%")},
+                        {{std::string("你"), 2}, std::string("你你")}};
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_string_reverse_test) {
+    std::string func_name = "reverse";
+    InputTypeSet input_types = {TypeIndex::String};
+    DataSet data_set = {{{std::string("asd ")}, std::string(" dsa")},
+                        {{std::string("  hello 123  ")}, std::string("  321 olleh  ")},
+                        {{std::string("  HELLO,!^%")}, std::string("%^!,OLLEH  ")},
+                        {{std::string("你好啊")}, std::string("啊好你")},
+                        {{std::string("")}, std::string("")}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_string_length_test) {
+    std::string func_name = "length";
+    InputTypeSet input_types = {TypeIndex::String};
+    DataSet data_set = {{{std::string("asd ")}, int32_t(4)},
+                        {{std::string("  hello 123  ")}, int32_t(13)},
+                        {{std::string("  HELLO,!^%")}, int32_t(11)},
+                        {{std::string("你好啊")}, int32_t(9)},
+                        {{std::string("")}, int32_t(0)}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_append_trailing_char_if_absent_test) {
+    std::string func_name = "append_trailing_char_if_absent";
+
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+    DataSet data_set = {{{std::string("ASD"), std::string("D")}, std::string("ASD")},
+                        {{std::string("AS"), std::string("D")}, std::string("ASD")},
+                        {{std::string(""), std::string("")}, Null()},
+                        {{std::string(""), std::string("A")}, std::string("A")}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_starts_with_test) {
+    std::string func_name = "starts_with";
+
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+    DataSet data_set = {{{std::string("hello world"), std::string("hello")}, uint8_t(1)},
+                        {{std::string("hello world"), std::string("world")}, uint8_t(0)},
+                        {{std::string("你好"), std::string("你")}, uint8_t(1)},
+                        {{std::string(""), std::string("")}, uint8_t(1)},
+                        {{std::string("你好"), Null()}, Null()},
+                        {{Null(), std::string("")}, Null()}};
+
+    check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_ends_with_test) {
+    std::string func_name = "ends_with";
+
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+    DataSet data_set = {{{std::string("hello world"), std::string("hello")}, uint8_t(0)},
+                        {{std::string("hello world"), std::string("world")}, uint8_t(1)},
+                        {{std::string("你好"), std::string("好")}, uint8_t(1)},
+                        {{std::string(""), std::string("")}, uint8_t(1)},
+                        {{std::string("你好"), Null()}, Null()},
+                        {{Null(), std::string("")}, Null()}};
+
+    check_function<DataTypeUInt8, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_lpad_test) {
+    std::string func_name = "lpad";
+
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32, TypeIndex::String};
+
+    DataSet data_set = {{{std::string("hi"), 5, std::string("?")}, std::string("???hi")},
+                        {{std::string("g8%7IgY%AHx7luNtf8Kh"), 20, std::string("")},
+                         std::string("g8%7IgY%AHx7luNtf8Kh")},
+                        {{std::string("hi"), 1, std::string("?")}, std::string("h")},
+                        {{std::string("你好"), 1, std::string("?")}, std::string("你")},
+                        {{std::string("hi"), 0, std::string("?")}, std::string("")},
+                        {{std::string("hi"), -1, std::string("?")}, Null()},
+                        {{std::string("h"), 1, std::string("")}, std::string("h")},
+                        {{std::string("hi"), 5, std::string("")}, Null()},
+                        {{std::string("hi"), 5, std::string("ab")}, std::string("abahi")},
+                        {{std::string("hi"), 5, std::string("呵呵")}, std::string("呵呵呵hi")},
+                        {{std::string("呵呵"), 5, std::string("hi")}, std::string("hih呵呵")}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_rpad_test) {
+    std::string func_name = "rpad";
+
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::Int32, TypeIndex::String};
+
+    DataSet data_set = {{{std::string("hi"), 5, std::string("?")}, std::string("hi???")},
+                        {{std::string("g8%7IgY%AHx7luNtf8Kh"), 20, std::string("")},
+                         std::string("g8%7IgY%AHx7luNtf8Kh")},
+                        {{std::string("hi"), 1, std::string("?")}, std::string("h")},
+                        {{std::string("你好"), 1, std::string("?")}, std::string("你")},
+                        {{std::string("hi"), 0, std::string("?")}, std::string("")},
+                        {{std::string("hi"), -1, std::string("?")}, Null()},
+                        {{std::string("h"), 1, std::string("")}, std::string("h")},
+                        {{std::string("hi"), 5, std::string("")}, Null()},
+                        {{std::string("hi"), 5, std::string("ab")}, std::string("hiaba")},
+                        {{std::string("hi"), 5, std::string("呵呵")}, std::string("hi呵呵呵")},
+                        {{std::string("呵呵"), 5, std::string("hi")}, std::string("呵呵hih")}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_ascii_test) {
+    std::string func_name = "ascii";
+
+    InputTypeSet input_types = {TypeIndex::String};
+
+    DataSet data_set = {{{std::string("")}, 0},
+                        {{std::string("aa")}, 97},
+                        {{std::string("我")}, 230},
+                        {{Null()}, Null()}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_char_length_test) {
+    std::string func_name = "char_length";
+
+    InputTypeSet input_types = {TypeIndex::String};
+
+    DataSet data_set = {{{std::string("")}, 0},    {{std::string("aa")}, 2},
+                        {{std::string("我")}, 1},  {{std::string("我a")}, 2},
+                        {{std::string("a我")}, 2}, {{std::string("123")}, 3},
+                        {{Null()}, Null()}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_concat_test) {
+    std::string func_name = "concat";
+    {
+        InputTypeSet input_types = {TypeIndex::String};
+
+        DataSet data_set = {{{std::string("")}, std::string("")},
+                            {{std::string("123")}, std::string("123")},
+                            {{Null()}, Null()}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+        DataSet data_set = {{{std::string(""), std::string("")}, std::string("")},
+                            {{std::string("123"), std::string("45")}, std::string("12345")},
+                            {{std::string("123"), Null()}, Null()}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+
+        DataSet data_set = {
+                {{std::string(""), std::string("1"), std::string("")}, std::string("1")},
+                {{std::string("123"), std::string("456"), std::string("789")},
+                 std::string("123456789")},
+                {{std::string("123"), Null(), std::string("789")}, Null()}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    };
+}
+
+TEST(function_string_test, function_concat_ws_test) {
+    std::string func_name = "concat_ws";
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+        DataSet data_set = {{{std::string("-"), std::string("")}, std::string("")},
+                            {{std::string(""), std::string("123")}, std::string("123")},
+                            {{std::string(""), std::string("")}, std::string("")},
+                            {{Null(), std::string("")}, Null()},
+                            {{Null(), Null()}, Null()}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+
+        DataSet data_set = {
+                {{std::string("-"), std::string(""), std::string("")}, std::string("-")},
+                {{std::string(""), std::string("123"), std::string("456")}, std::string("123456")},
+                {{std::string(""), std::string(""), std::string("")}, std::string("")},
+                {{Null(), std::string(""), std::string("")}, Null()},
+                {{Null(), std::string(""), Null()}, Null()}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    };
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String,
+                                    TypeIndex::String};
+
+        DataSet data_set = {
+                {{std::string("-"), std::string(""), std::string(""), std::string("")},
+                 std::string("--")},
+                {{std::string(""), std::string("123"), std::string("456"), std::string("789")},
+                 std::string("123456789")},
+                {{std::string("-"), std::string(""), std::string("?"), std::string("")},
+                 std::string("-?-")},
+                {{Null(), std::string(""), std::string("?"), std::string("")}, Null()},
+                {{std::string("-"), std::string("123"), Null(), std::string("456")},
+                 std::string("123-456")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    };
+}
+
+TEST(function_string_test, function_null_or_empty_test) {
+    std::string func_name = "null_or_empty";
+
+    InputTypeSet input_types = {TypeIndex::String};
+
+    DataSet data_set = {{{std::string("")}, uint8(true)},
+                        {{std::string("aa")}, uint8(false)},
+                        {{std::string("我")}, uint8(false)},
+                        {{Null()}, uint8(true)}};
+
+    check_function<DataTypeUInt8, false>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_to_base64_test) {
+    std::string func_name = "to_base64";
+    InputTypeSet input_types = {TypeIndex::String};
+
+    DataSet data_set = {{{std::string("asd你好")}, {std::string("YXNk5L2g5aW9")}},
+                        {{std::string("hello world")}, {std::string("aGVsbG8gd29ybGQ=")}},
+                        {{std::string("HELLO,!^%")}, {std::string("SEVMTE8sIV4l")}},
+                        {{std::string("")}, {Null()}},
+                        {{std::string("MYtestSTR")}, {std::string("TVl0ZXN0U1RS")}},
+                        {{std::string("ò&ø")}, {std::string("w7Imw7g=")}}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_from_base64_test) {
+    std::string func_name = "from_base64";
+    InputTypeSet input_types = {TypeIndex::String};
+
+    DataSet data_set = {{{std::string("YXNk5L2g5aW9")}, {std::string("asd你好")}},
+                        {{std::string("aGVsbG8gd29ybGQ=")}, {std::string("hello world")}},
+                        {{std::string("SEVMTE8sIV4l")}, {std::string("HELLO,!^%")}},
+                        {{std::string("")}, {Null()}},
+                        {{std::string("TVl0ZXN0U1RS")}, {std::string("MYtestSTR")}},
+                        {{std::string("w7Imw7g=")}, {std::string("ò&ø")}},
+                        {{std::string("ò&ø")}, {Null()}},
+                        {{std::string("你好哈喽")}, {Null()}}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_reverse_test) {
+    std::string func_name = "reverse";
+    InputTypeSet input_types = {TypeIndex::String};
+    DataSet data_set = {
+            {{std::string("")}, {std::string("")}},
+            {{std::string("a")}, {std::string("a")}},
+            {{std::string("美团和和阿斯顿百度ab")}, {std::string("ba度百顿斯阿和和团美")}},
+            {{std::string("!^%")}, {std::string("%^!")}},
+            {{std::string("ò&ø")}, {std::string("ø&ò")}},
+            {{std::string("A攀c")}, {std::string("c攀A")}},
+            {{std::string("NULL")}, {std::string("LLUN")}}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_instr_test) {
+    std::string func_name = "instr";
+
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+    DataSet data_set = {{{std::string("abcdefg"), std::string("efg")}, 5},
+                        {{std::string("aa"), std::string("a")}, 1},
+                        {{std::string("我是"), std::string("是")}, 2},
+                        {{std::string("abcd"), std::string("e")}, 0},
+                        {{std::string("abcdef"), std::string("")}, 1},
+                        {{std::string(""), std::string("")}, 1},
+                        {{std::string("aaaab"), std::string("bb")}, 0}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_find_in_set_test) {
+    std::string func_name = "find_in_set";
+
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+    DataSet data_set = {{{std::string("abcdefg"), std::string("a,b,c")}, 0},
+                        {{std::string("aa"), std::string("a,aa,aaa")}, 2},
+                        {{std::string("aa"), std::string("aa,aa,aa")}, 1},
+                        {{std::string("a"), Null()}, Null()},
+                        {{Null(), std::string("aa")}, Null()},
+                        {{std::string("a"), std::string("")}, 0},
+                        {{std::string(""), std::string(",,")}, 1}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_string_splitpart_test) {
+    std::string func_name = "split_part";
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("prefix_string1"), std::string("_"), 2}, std::string("string1")},
+            {{std::string("prefix__string2"), std::string("__"), 2}, std::string("string2")},
+            {{std::string("prefix__string2"), std::string("_"), 2}, std::string("")},
+            {{std::string("prefix_string2"), std::string("__"), 1}, Null()},
+            {{Null(), std::string("__"), 1}, Null()},
+            {{std::string("prefix_string"), Null(), 1}, Null()},
+            {{std::string("prefix_string"), std::string("__"), Null()}, Null()},
+            {{std::string("prefix_string"), std::string("__"), -1}, Null()}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_md5sum_test) {
+    std::string func_name = "md5sum";
+
+    {
+        InputTypeSet input_types = {TypeIndex::String};
+        DataSet data_set = {
+                {{std::string("asd你好")}, {std::string("a38c15675555017e6b8ea042f2eb24f5")}},
+                {{std::string("hello world")}, {std::string("5eb63bbbe01eeed093cb22bb8f5acdc3")}},
+                {{std::string("HELLO,!^%")}, {std::string("b8e6e34d1cc3dc76b784ddfdfb7df800")}},
+                {{std::string("")}, {std::string("d41d8cd98f00b204e9800998ecf8427e")}},
+                {{std::string(" ")}, {std::string("7215ee9c7d9dc229d2921a40e899ec5f")}},
+                {{Null()}, {Null()}},
+                {{std::string("MYtestSTR")}, {std::string("cd24c90b3fc1192eb1879093029e87d4")}},
+                {{std::string("ò&ø")}, {std::string("fd157b4cb921fa91acc667380184d59c")}}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+        DataSet data_set = {{{std::string("asd"), std::string("你好")},
+                             {std::string("a38c15675555017e6b8ea042f2eb24f5")}},
+                            {{std::string("hello "), std::string("world")},
+                             {std::string("5eb63bbbe01eeed093cb22bb8f5acdc3")}},
+                            {{std::string("HELLO"), std::string(",!^%")},
+                             {std::string("b8e6e34d1cc3dc76b784ddfdfb7df800")}},
+                            {{Null(), std::string("HELLO")}, {Null()}}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+        DataSet data_set = {{{std::string("a"), std::string("sd"), std::string("你好")},
+                             {std::string("a38c15675555017e6b8ea042f2eb24f5")}},
+                            {{std::string(""), std::string(""), std::string("")},
+                             {std::string("d41d8cd98f00b204e9800998ecf8427e")}},
+                            {{std::string("HEL"), std::string("LO,!"), std::string("^%")},
+                             {std::string("b8e6e34d1cc3dc76b784ddfdfb7df800")}},
+                            {{Null(), std::string("HELLO"), Null()}, {Null()}}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(function_string_test, function_aes_encrypt_test) {
+    std::string func_name = "aes_encrypt";
+
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+    const char* key = "doris";
+    const char* src[6] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee", ""};
+    std::string r[5];
+
+    for (int i = 0; i < 5; i++) {
+        int cipher_len = strlen(src[i]) + 16;
+        char p[cipher_len];
+
+        int outlen = EncryptionUtil::encrypt(AES_128_ECB, (unsigned char*)src[i], strlen(src[i]),
+                                             (unsigned char*)key, strlen(key), NULL, true,
+                                             (unsigned char*)p);
+        r[i] = std::string(p, outlen);
+    }
+
+    DataSet data_set = {{{std::string(src[0]), std::string(key)}, r[0]},
+                        {{std::string(src[1]), std::string(key)}, r[1]},
+                        {{std::string(src[2]), std::string(key)}, r[2]},
+                        {{std::string(src[3]), std::string(key)}, r[3]},
+                        {{std::string(src[4]), std::string(key)}, r[4]},
+                        {{std::string(src[5]), std::string(key)}, Null()},
+                        {{Null(), std::string(key)}, Null()}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_aes_decrypt_test) {
+    std::string func_name = "aes_decrypt";
+
+    InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+
+    const char* key = "doris";
+    const char* src[5] = {"aaaaaa", "bbbbbb", "cccccc", "dddddd", "eeeeee"};
+    std::string r[5];
+
+    for (int i = 0; i < 5; i++) {
+        int cipher_len = strlen(src[i]) + 16;
+        char p[cipher_len];
+
+        int outlen = EncryptionUtil::encrypt(AES_128_ECB, (unsigned char*)src[i], strlen(src[i]),
+                                             (unsigned char*)key, strlen(key), NULL, true,
+                                             (unsigned char*)p);
+        r[i] = std::string(p, outlen);
+    }
+
+    DataSet data_set = {{{r[0], std::string(key)}, std::string(src[0])},
+                        {{r[1], std::string(key)}, std::string(src[1])},
+                        {{r[2], std::string(key)}, std::string(src[2])},
+                        {{r[3], std::string(key)}, std::string(src[3])},
+                        {{r[4], std::string(key)}, std::string(src[4])},
+                        {{Null(), std::string(key)}, Null()}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_parse_url_test) {
+    std::string func_name = "parse_url";
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String};
+        DataSet data_set = {
+                {{std::string("zhangsan"), std::string("HOST")}, {Null()}},
+                {{std::string("facebook.com/path/p1"), std::string("HOST")}, {Null()}},
+                {{std::string("http://fb.com/path/p1.p?q=1#f"), std::string("HOST")},
+                 {std::string("fb.com")}},
+                {{std::string("http://facebook.com/path/p1.php?query=1"), std::string("AUTHORITY")},
+                 {std::string("facebook.com")}},
+                {{std::string("http://facebook.com/path/p1.php?query=1"), std::string("authority")},
+                 {std::string("facebook.com")}},
+                {{std::string("http://www.baidu.com:9090/a/b/c.php"), std::string("FILE")},
+                 {std::string("/a/b/c.php")}},
+                {{std::string("http://www.baidu.com:9090/a/b/c.php"), std::string("file")},
+                 {std::string("/a/b/c.php")}},
+                {{std::string("http://www.baidu.com:9090/a/b/c.php"), std::string("PATH")},
+                 {std::string("/a/b/c.php")}},
+                {{std::string("http://www.baidu.com:9090/a/b/c.php"), std::string("path")},
+                 {std::string("/a/b/c.php")}},
+                {{std::string("http://facebook.com/path/p1.php?query=1"), std::string("PROTOCOL")},
+                 {std::string("http")}},
+                {{std::string("http://facebook.com/path/p1.php?query=1"), std::string("protocol")},
+                 {std::string("http")}},
+                {{std::string("http://www.baidu.com:9090?a=b"), std::string("QUERY")},
+                 {std::string("a=b")}},
+                {{std::string("http://www.baidu.com:9090?a=b"), std::string("query")},
+                 {std::string("a=b")}},
+                {{std::string("http://www.baidu.com:9090?a=b"), std::string("REF")}, {Null()}},
+                {{std::string("http://www.baidu.com:9090?a=b"), std::string("ref")}, {Null()}},
+                {{std::string("http://www.baidu.com:9090/a/b/c?a=b"), std::string("PORT")},
+                 {std::string("9090")}},
+                {{std::string("http://www.baidu.com/a/b/c?a=b"), std::string("PORT")}, {Null()}},
+                {{std::string("http://fb.com/path/p1.p?q=1#f"), std::string("QUERY")},
+                 {std::string("q=1")}}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+        DataSet data_set = {
+                {{std::string("http://fb.com/path/p1.p?q=1#f"), std::string("QUERY"),
+                  std::string("q")},
+                 {std::string("1")}},
+                {{std::string("fb.com/path/p1.p?q=1#f"), std::string("QUERY"), std::string("q")},
+                 {std::string("1")}},
+                {{std::string("http://facebook.com/path/p1"), std::string("QUERY"),
+                  std::string("q")},
+                 {Null()}},
+                {{std::string("http://fb.com/path/p1.p?q=1#f"), std::string("HOST"),
+                  std::string("q")},
+                 {Null()}}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+}
+
+TEST(function_string_test, function_hex_test) {
+    std::string func_name = "hex";
+    InputTypeSet input_types = {TypeIndex::String};
+    DataSet data_set = {{{Null()}, Null()},
+                        {{std::string("0")}, std::string("30")},
+                        {{std::string("1")}, std::string("31")},
+                        {{std::string("")}, std::string("")},
+                        {{std::string("123")}, std::string("313233")},
+                        {{std::string("A")}, std::string("41")},
+                        {{std::string("a")}, std::string("61")},
+                        {{std::string("我")}, std::string("E68891")},
+                        {{std::string("?")}, std::string("3F")},
+                        {{std::string("？")}, std::string("EFBC9F")}};
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_unhex_test) {
+    std::string func_name = "unhex";
+    InputTypeSet input_types = {TypeIndex::String};
+    DataSet data_set = {{{Null()}, {Null()}},
+                        {{std::string("@!#")}, std::string("")},
+                        {{std::string("")}, std::string("")},
+                        {{std::string("ò&ø")}, std::string("")},
+                        {{std::string("@@")}, std::string("")},
+                        {{std::string("61")}, std::string("a")},
+                        {{std::string("41")}, std::string("A")},
+                        {{std::string("313233")}, std::string("123")},
+                        {{std::string("EFBC9F")}, std::string("？")}};
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(function_string_test, function_coalesce_test) {
+    std::string func_name = "coalesce";
+    {
+        InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32, TypeIndex::Int32};
+        DataSet data_set = {{{Null(), Null(), (int32_t)1}, {(int32_t)1}},
+                            {{Null(), Null(), (int32_t)2}, {(int32_t)2}},
+                            {{Null(), Null(), (int32_t)3}, {(int32_t)3}},
+                            {{Null(), Null(), (int32_t)4}, {(int32_t)4}}};
+        check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::Int32};
+        DataSet data_set = {
+                {{std::string("qwer"), Null(), (int32_t)1}, {std::string("qwer")}},
+                {{std::string("asdf"), Null(), (int32_t)2}, {std::string("asdf")}},
+                {{std::string("zxcv"), Null(), (int32_t)3}, {std::string("zxcv")}},
+                {{std::string("vbnm"), Null(), (int32_t)4}, {std::string("vbnm")}},
+        };
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+
+    {
+        InputTypeSet input_types = {TypeIndex::String, TypeIndex::String, TypeIndex::String};
+        DataSet data_set = {
+                {{Null(), std::string("abc"), std::string("hij")}, {std::string("abc")}},
+                {{Null(), std::string("def"), std::string("klm")}, {std::string("def")}},
+                {{Null(), std::string(""), std::string("xyz")}, {std::string("")}},
+                {{Null(), Null(), std::string("uvw")}, {std::string("uvw")}}};
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+}
+
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    doris::CpuInfo::init();
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h
new file mode 100644
index 0000000000..d5431789e2
--- /dev/null
+++ b/be/test/vec/function/function_test_util.h
@@ -0,0 +1,374 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include <any>
+#include <iostream>
+#include <string>
+
+#include "exec/schema_scanner.h"
+#include "runtime/row_batch.h"
+#include "runtime/tuple_row.h"
+#include "testutil/function_utils.h"
+#include "udf/udf.h"
+#include "udf/udf_internal.h"
+#include "util/bitmap_value.h"
+#include "vec/columns/column_complex.h"
+#include "vec/functions/function_string.h"
+#include "vec/functions/function_string_to_string.h"
+#include "vec/functions/simple_function_factory.h"
+#include "vec/runtime/vdatetime_value.h"
+
+namespace doris::vectorized {
+
+using DataSet = std::vector<std::pair<std::vector<std::any>, std::any>>;
+using InputTypeSet = std::vector<std::any>;
+
+namespace ut_type {
+using TINYINT = int8_t;
+using SMALLINT = int16_t;
+using INT = int32_t;
+using BIGINT = int64_t;
+using LARGEINT = int128_t;
+
+using VARCHAR = std::string;
+using CHAR = std::string;
+using STRING = std::string;
+
+using DOUBLE = double;
+using FLOAT = float;
+inline auto DECIMAL = Decimal<Int128>::double_to_decimal;
+} // namespace ut_type
+
+int64_t str_to_data_time(std::string datetime_str, bool data_time = true) {
+    VecDateTimeValue v;
+    v.from_date_str(datetime_str.c_str(), datetime_str.size());
+    if (data_time) { //bool data_time only to simplifly means data_time or data to cast, just use in time-functions uint test
+        v.to_datetime();
+    } else {
+        v.cast_to_date();
+    }
+    return binary_cast<VecDateTimeValue, Int64>(v);
+}
+
+template <typename ColumnType, typename Column, typename NullColumn>
+void insert_column_to_block(std::list<ColumnPtr>& columns, ColumnsWithTypeAndName& ctn,
+                            Column&& col, NullColumn&& null_map, Block& block,
+                            const std::string& col_name, int i, bool is_const, int row_size) {
+    columns.emplace_back(ColumnNullable::create(std::move(col), std::move(null_map)));
+    ColumnWithTypeAndName type_and_name(
+            is_const ? ColumnConst::create(columns.back()->get_ptr(), row_size)
+                     : columns.back()->get_ptr(),
+            make_nullable(std::make_shared<ColumnType>()), col_name);
+    block.insert(i, type_and_name);
+    ctn.emplace_back(type_and_name);
+}
+
+// Null values are represented by Null()
+// The type of the constant column is represented as follows: Consted {TypeIndex::String}
+// A DataSet with a constant column can only have one row of data
+
+template <typename ReturnType, bool nullable = false>
+void check_function(const std::string& func_name, const std::vector<std::any>& input_types,
+                    const DataSet& data_set) {
+    size_t row_size = data_set.size();
+    size_t column_size = input_types.size();
+
+    std::list<ColumnPtr> columns;
+    Block block;
+    ColumnNumbers arguments;
+    ColumnsWithTypeAndName ctn;
+    std::vector<std::shared_ptr<ColumnPtrWrapper>> constant_col_ptrs;
+    std::vector<ColumnPtrWrapper*> constant_cols;
+    std::vector<doris_udf::FunctionContext::TypeDesc> arg_types;
+    doris_udf::FunctionContext::TypeDesc arg_type;
+    // 1. build block and column type and names
+    for (int i = 0; i < column_size; i++) {
+        TypeIndex tp;
+        bool is_const;
+        if (input_types[i].type() == typeid(Consted)) {
+            tp = std::any_cast<Consted>(input_types[i]).tp;
+            is_const = true;
+        } else {
+            tp = std::any_cast<TypeIndex>(input_types[i]);
+            is_const = false;
+        }
+
+        std::string col_name = "k" + std::to_string(i);
+
+        auto null_map = ColumnUInt8::create(row_size, false);
+        auto& null_map_data = null_map->get_data();
+
+        if (tp == TypeIndex::String) {
+            auto col = ColumnString::create();
+            for (int j = 0; j < row_size; j++) {
+                if (data_set[j].first[i].type() == typeid(Null)) {
+                    null_map_data[j] = true;
+                    col->insert_default();
+                    continue;
+                }
+                auto str = std::any_cast<ut_type::STRING>(data_set[j].first[i]);
+                col->insert_data(str.c_str(), str.size());
+            }
+            insert_column_to_block<DataTypeString>(columns, ctn, std::move(col),
+                                                   std::move(null_map), block, col_name, i,
+                                                   is_const, row_size);
+            arg_type.type = doris_udf::FunctionContext::TYPE_STRING;
+        } else if (tp == TypeIndex::BitMap) {
+            auto col = ColumnBitmap::create();
+            for (int j = 0; j < row_size; j++) {
+                if (data_set[j].first[i].type() == typeid(Null)) {
+                    null_map_data[j] = true;
+                    col->insert_default();
+                    continue;
+                }
+                BitmapValue* bitmap = std::any_cast<BitmapValue*>(data_set[j].first[i]);
+                col->insert_value(*bitmap);
+            }
+            insert_column_to_block<DataTypeBitMap>(columns, ctn, std::move(col),
+                                                   std::move(null_map), block, col_name, i,
+                                                   is_const, row_size);
+            arg_type.type = doris_udf::FunctionContext::TYPE_OBJECT;
+        } else if (tp == TypeIndex::Int8) {
+            auto col = ColumnInt8::create();
+
+            for (int j = 0; j < row_size; j++) {
+                if (data_set[j].first[i].type() == typeid(Null)) {
+                    null_map_data[j] = true;
+                    col->insert_default();
+                    continue;
+                }
+                auto value = std::any_cast<ut_type::TINYINT>(data_set[j].first[i]);
+                col->insert_data(reinterpret_cast<char*>(&value), 0);
+            }
+            insert_column_to_block<DataTypeInt8>(columns, ctn, std::move(col), std::move(null_map),
+                                                 block, col_name, i, is_const, row_size);
+            arg_type.type = doris_udf::FunctionContext::TYPE_TINYINT;
+        } else if (tp == TypeIndex::Int16) {
+            auto col = ColumnInt16::create();
+
+            for (int j = 0; j < row_size; j++) {
+                if (data_set[j].first[i].type() == typeid(Null)) {
+                    null_map_data[j] = true;
+                    col->insert_default();
+                    continue;
+                }
+                auto value = std::any_cast<ut_type::SMALLINT>(data_set[j].first[i]);
+                col->insert_data(reinterpret_cast<char*>(&value), 0);
+            }
+            insert_column_to_block<DataTypeInt16>(columns, ctn, std::move(col), std::move(null_map),
+                                                  block, col_name, i, is_const, row_size);
+            arg_type.type = doris_udf::FunctionContext::TYPE_SMALLINT;
+        } else if (tp == TypeIndex::Int32) {
+            auto col = ColumnInt32::create();
+
+            for (int j = 0; j < row_size; j++) {
+                if (data_set[j].first[i].type() == typeid(Null)) {
+                    null_map_data[j] = true;
+                    col->insert_default();
+                    continue;
+                }
+                auto value = std::any_cast<ut_type::INT>(data_set[j].first[i]);
+                col->insert_data(reinterpret_cast<char*>(&value), 0);
+            }
+            insert_column_to_block<DataTypeInt32>(columns, ctn, std::move(col), std::move(null_map),
+                                                  block, col_name, i, is_const, row_size);
+            arg_type.type = doris_udf::FunctionContext::TYPE_INT;
+        } else if (tp == TypeIndex::Int64) {
+            auto col = ColumnInt64::create();
+
+            for (int j = 0; j < row_size; j++) {
+                if (data_set[j].first[i].type() == typeid(Null)) {
+                    null_map_data[j] = true;
+                    col->insert_default();
+                    continue;
+                }
+                auto value = std::any_cast<ut_type::BIGINT>(data_set[j].first[i]);
+                col->insert_data(reinterpret_cast<char*>(&value), 0);
+            }
+            insert_column_to_block<DataTypeInt64>(columns, ctn, std::move(col), std::move(null_map),
+                                                  block, col_name, i, is_const, row_size);
+            arg_type.type = doris_udf::FunctionContext::TYPE_BIGINT;
+        } else if (tp == TypeIndex::Int128) {
+            auto col = ColumnInt128::create();
+
+            for (int j = 0; j < row_size; j++) {
+                if (data_set[j].first[i].type() == typeid(Null)) {
+                    null_map_data[j] = true;
+                    col->insert_default();
+                    continue;
+                }
+                auto value = std::any_cast<ut_type::LARGEINT>(data_set[j].first[i]);
+                col->insert_data(reinterpret_cast<char*>(&value), 0);
+            }
+            insert_column_to_block<DataTypeInt128>(columns, ctn, std::move(col),
+                                                   std::move(null_map), block, col_name, i,
+                                                   is_const, row_size);
+            arg_type.type = doris_udf::FunctionContext::TYPE_LARGEINT;
+        } else if (tp == TypeIndex::Float64) {
+            auto col = ColumnFloat64::create();
+
+            for (int j = 0; j < row_size; j++) {
+                if (data_set[j].first[i].type() == typeid(Null)) {
+                    null_map_data[j] = true;
+                    col->insert_default();
+                    continue;
+                }
+                auto value = std::any_cast<ut_type::DOUBLE>(data_set[j].first[i]);
+                col->insert_data(reinterpret_cast<char*>(&value), 0);
+            }
+            insert_column_to_block<DataTypeFloat64>(columns, ctn, std::move(col),
+                                                    std::move(null_map), block, col_name, i,
+                                                    is_const, row_size);
+            arg_type.type = doris_udf::FunctionContext::TYPE_DOUBLE;
+        } else if (tp == TypeIndex::Decimal128) {
+            auto col = ColumnDecimal<Decimal128>::create(0, 9);
+
+            for (int j = 0; j < row_size; j++) {
+                if (data_set[j].first[i].type() == typeid(Null)) {
+                    null_map_data[j] = true;
+                    col->insert_default();
+                    continue;
+                }
+                auto value = std::any_cast<Decimal<Int128>>(data_set[j].first[i]);
+                col->insert_data(reinterpret_cast<char*>(&value), 0);
+            }
+            insert_column_to_block<DataTypeDecimal<Decimal128>>(columns, ctn, std::move(col),
+                                                    std::move(null_map), block, col_name, i,
+                                                    is_const, row_size);
+            arg_type.type = doris_udf::FunctionContext::TYPE_DECIMALV2;
+        } else if (tp == TypeIndex::DateTime) {
+            static std::string date_time_format("%Y-%m-%d %H:%i:%s");
+            auto col = ColumnInt64::create();
+
+            for (int j = 0; j < row_size; j++) {
+                if (data_set[j].first[i].type() == typeid(Null)) {
+                    null_map_data[j] = true;
+                    col->insert_default();
+                    continue;
+                }
+                auto datetime_str = std::any_cast<std::string>(data_set[j].first[i]);
+                VecDateTimeValue v;
+                v.from_date_format_str(date_time_format.c_str(), date_time_format.size(),
+                                       datetime_str.c_str(), datetime_str.size());
+                v.to_datetime();
+                col->insert_data(reinterpret_cast<char*>(&v), 0);
+            }
+            insert_column_to_block<DataTypeDateTime>(columns, ctn, std::move(col),
+                                                     std::move(null_map), block, col_name, i,
+                                                     is_const, row_size);
+            arg_type.type = doris_udf::FunctionContext::TYPE_DATETIME;
+        } else if (tp == TypeIndex::Date) {
+            static std::string date_time_format("%Y-%m-%d");
+            auto col = ColumnInt64::create();
+
+            for (int j = 0; j < row_size; j++) {
+                if (data_set[j].first[i].type() == typeid(Null)) {
+                    null_map_data[j] = true;
+                    col->insert_default();
+                    continue;
+                }
+                auto datetime_str = std::any_cast<std::string>(data_set[j].first[i]);
+                VecDateTimeValue v;
+                v.from_date_format_str(date_time_format.c_str(), date_time_format.size(),
+                                       datetime_str.c_str(), datetime_str.size());
+                v.cast_to_date();
+                col->insert_data(reinterpret_cast<char*>(&v), 0);
+            }
+            insert_column_to_block<DataTypeDateTime>(columns, ctn, std::move(col),
+                                                     std::move(null_map), block, col_name, i,
+                                                     is_const, row_size);
+            arg_type.type = doris_udf::FunctionContext::TYPE_DATE;
+        } else {
+            ASSERT_TRUE(false);
+            arg_type.type = doris_udf::FunctionContext::INVALID_TYPE;
+        }
+        arguments.push_back(i);
+        arg_types.push_back(arg_type);
+        if (is_const) {
+            const auto& column = block.get_by_position(i).column;
+            std::shared_ptr<ColumnPtrWrapper> constant_col =
+                    std::make_shared<ColumnPtrWrapper>(column);
+            constant_col_ptrs.push_back(constant_col);
+            constant_cols.push_back(constant_col.get());
+        } else {
+            constant_cols.push_back(nullptr);
+        }
+    }
+
+    // 2. execute function
+    auto return_type = nullable ? make_nullable(std::make_shared<ReturnType>())
+                                : std::make_shared<ReturnType>();
+    auto func = SimpleFunctionFactory::instance().get_function(func_name, ctn, return_type);
+    ASSERT_TRUE(func != nullptr);
+
+    doris_udf::FunctionContext::TypeDesc fn_ctx_return;
+    if (std::is_same_v<ReturnType, DataTypeUInt8>) {
+        fn_ctx_return.type = doris_udf::FunctionContext::TYPE_BOOLEAN;
+    } else if (std::is_same_v<ReturnType, DataTypeFloat64>) {
+        fn_ctx_return.type = doris_udf::FunctionContext::TYPE_DOUBLE;
+    } else if (std::is_same_v<ReturnType, DataTypeInt32>) {
+        fn_ctx_return.type = doris_udf::FunctionContext::TYPE_INT;
+    } else if (std::is_same_v<ReturnType, DateTime>) {
+        fn_ctx_return.type = doris_udf::FunctionContext::TYPE_DATETIME;
+    } else {
+        fn_ctx_return.type = doris_udf::FunctionContext::INVALID_TYPE;
+    }
+
+    FunctionUtils fn_utils(fn_ctx_return, arg_types, 0);
+    auto* fn_ctx = fn_utils.get_fn_ctx();
+    fn_ctx->impl()->set_constant_cols(constant_cols);
+    func->prepare(fn_ctx, FunctionContext::FRAGMENT_LOCAL);
+    func->prepare(fn_ctx, FunctionContext::THREAD_LOCAL);
+
+    block.insert({nullptr, return_type, "result"});
+
+    auto result = block.columns() - 1;
+    func->execute(fn_ctx, block, arguments, result, row_size);
+
+    func->close(fn_ctx, FunctionContext::THREAD_LOCAL);
+    func->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL);
+
+    // 3. check the result of function
+    ColumnPtr column = block.get_columns()[result];
+    ASSERT_TRUE(column != nullptr);
+
+    for (int i = 0; i < row_size; ++i) {
+        auto check_column_data = [&]() {
+            Field field;
+            column->get(i, field);
+
+            const auto& column_data = field.get<typename ReturnType::FieldType>();
+            const auto& expect_data =
+                    std::any_cast<typename ReturnType::FieldType>(data_set[i].second);
+
+            ASSERT_EQ(column_data, expect_data);
+        };
+
+        if constexpr (nullable) {
+            bool is_null = data_set[i].second.type() == typeid(Null);
+            ASSERT_EQ(column->is_null_at(i), is_null);
+            if (!is_null) check_column_data();
+        } else {
+            check_column_data();
+        }
+    }
+}
+
+} // namespace doris::vectorized
diff --git a/be/test/vec/function/function_time_test.cpp b/be/test/vec/function/function_time_test.cpp
new file mode 100644
index 0000000000..df6747aa95
--- /dev/null
+++ b/be/test/vec/function/function_time_test.cpp
@@ -0,0 +1,501 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include <any>
+#include <iostream>
+#include <string>
+
+#include "exec/schema_scanner.h"
+#include "function_test_util.h"
+#include "runtime/row_batch.h"
+#include "runtime/tuple_row.h"
+#include "vec/functions/simple_function_factory.h"
+
+namespace doris::vectorized {
+
+TEST(TimestampFunctionsTest, day_of_week_test) {
+    std::string func_name = "dayofweek";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2001-02-03 12:34:56")}, 7},
+                        {{std::string("2020-00-01 00:00:00")}, Null()},
+                        {{std::string("2020-01-00 00:00:00")}, Null()}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, day_of_month_test) {
+    std::string func_name = "dayofmonth";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2020-00-01 00:00:00")}, Null()},
+                        {{std::string("2020-01-01 00:00:00")}, 1},
+                        {{std::string("2020-02-29 00:00:00")}, 29}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, day_of_year_test) {
+    std::string func_name = "dayofyear";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2020-00-01 00:00:00")}, Null()},
+                        {{std::string("2020-01-00 00:00:00")}, Null()},
+                        {{std::string("2020-02-29 00:00:00")}, 60}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, week_of_year_test) {
+    std::string func_name = "weekofyear";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2020-00-01 00:00:00")}, Null()},
+                        {{std::string("2020-01-00 00:00:00")}, Null()},
+                        {{std::string("2020-02-29 00:00:00")}, 9}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, year_test) {
+    std::string func_name = "year";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2021-01-01 00:00:00")}, 2021},
+                        {{std::string("2021-01-00 00:00:00")}, Null()},
+                        {{std::string("2025-05-01 00:00:00")}, 2025}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, quarter_test) {
+    std::string func_name = "quarter";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2021-01-01 00:00:00")}, 1},
+                        {{std::string("")}, Null()},
+                        {{std::string("2021-01-32 00:00:00")}, Null()},
+                        {{std::string("2025-10-23 00:00:00")}, 4}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, month_test) {
+    std::string func_name = "month";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2021-01-01 00:00:00")}, 1},
+                        {{std::string("")}, Null()},
+                        {{std::string("2021-01-32 00:00:00")}, Null()},
+                        {{std::string("2025-05-23 00:00:00")}, 5}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, day_test) {
+    std::string func_name = "day";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2021-01-01 00:00:00")}, 1},
+                        {{std::string("")}, Null()},
+                        {{std::string("2021-01-32 00:00:00")}, Null()},
+                        {{std::string("2025-05-23 00:00:00")}, 23}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, hour_test) {
+    std::string func_name = "hour";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2021-01-01 23:59:59")}, 23},
+                        {{std::string("2021-01-13 16:56:00")}, 16},
+                        {{std::string("")}, Null()},
+                        {{std::string("2025-05-23 24:00:00")}, Null()}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, minute_test) {
+    std::string func_name = "minute";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2021-01-01 23:59:50")}, 59},
+                        {{std::string("2021-01-13 16:20:00")}, 20},
+                        {{std::string("")}, Null()},
+                        {{std::string("2025-05-23 24:00:00")}, Null()}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, second_test) {
+    std::string func_name = "second";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2021-01-01 23:50:59")}, 59},
+                        {{std::string("2021-01-13 16:20:00")}, 0},
+                        {{std::string("")}, Null()},
+                        {{std::string("2025-05-23 24:00:00")}, Null()}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, from_unix_test) {
+    std::string func_name = "from_unixtime";
+
+    InputTypeSet input_types = {TypeIndex::Int32};
+
+    DataSet data_set = {{{1565080737}, std::string("2019-08-06 16:38:57")}, {{-123}, Null()}};
+
+    check_function<DataTypeString, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, timediff_test) {
+    std::string func_name = "timediff";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::DateTime};
+
+    DataSet data_set = {
+            {{std::string("2019-07-18 12:00:00"), std::string("2019-07-18 12:00:00")}, 0.0},
+            {{std::string("2019-07-18 12:00:00"), std::string("2019-07-18 13:01:02")}, -3662.0},
+            {{std::string("2019-00-18 12:00:00"), std::string("2019-07-18 13:01:02")}, Null()},
+            {{std::string("2019-07-18 12:00:00"), std::string("2019-07-00 13:01:02")}, Null()}};
+
+    check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, date_format_test) {
+    std::string func_name = "date_format";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, Consted {TypeIndex::String}};
+    {
+        DataSet data_set = {{{std::string("2009-10-04 22:23:00"), std::string("%W %M %Y")},
+                             std::string("Sunday October 2009")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+    {
+        DataSet data_set = {{{std::string("2007-10-04 22:23:00"), std::string("%H:%i:%s")},
+                             std::string("22:23:00")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+    {
+        DataSet data_set = {
+                {{std::string("1900-10-04 22:23:00"), std::string("%D %y %a %d %m %b %j")},
+                 std::string("4th 00 Thu 04 10 Oct 277")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+    {
+        DataSet data_set = {
+                {{std::string("1997-10-04 22:23:00"), std::string("%H %k %I %r %T %S %w")},
+                 std::string("22 22 10 10:23:00 PM 22:23:00 00 6")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+    {
+        DataSet data_set = {{{std::string("1999-01-01 00:00:00"), std::string("%X %V")},
+                             std::string("1998 52")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+    {
+        DataSet data_set = {
+                {{std::string("2006-06-01 00:00:00"), std::string("%d")}, std::string("01")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+    {
+        DataSet data_set = {
+                {{std::string("2006-06-01 00:00:00"), std::string("%%%d")}, std::string("%01")}};
+
+        check_function<DataTypeString, true>(func_name, input_types, data_set);
+    }
+}
+TEST(TimestampFunctionsTest, years_add_test) {
+    std::string func_name = "years_add";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-05-23 00:00:00"), 5}, str_to_data_time("2025-05-23 00:00:00")},
+            {{std::string("2020-05-23 00:00:00"), -5}, str_to_data_time("2015-05-23 00:00:00")},
+            {{std::string(""), 5}, Null()},
+            {{std::string("2020-05-23 00:00:00"), 8000}, Null()},
+            {{Null(), 5}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, years_sub_test) {
+    std::string func_name = "years_sub";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-05-23 00:00:00"), 5}, str_to_data_time("2015-05-23 00:00:00")},
+            {{std::string("2020-05-23 00:00:00"), -5}, str_to_data_time("2025-05-23 00:00:00")},
+            {{std::string(""), 5}, Null()},
+            {{std::string("2020-05-23 00:00:00"), 3000}, Null()},
+            {{Null(), 5}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, months_add_test) {
+    std::string func_name = "months_add";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-10-23 00:00:00"), -4}, str_to_data_time("2020-06-23 00:00:00")},
+            {{std::string("2020-05-23 00:00:00"), 4}, str_to_data_time("2020-09-23 00:00:00")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 00:00:00"), 10}, str_to_data_time("2021-03-23 00:00:00")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, months_sub_test) {
+    std::string func_name = "months_sub";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-05-23 00:00:00"), 4}, str_to_data_time("2020-01-23 00:00:00")},
+            {{std::string("2020-05-23 00:00:00"), -4}, str_to_data_time("2020-09-23 00:00:00")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 00:00:00"), 10}, str_to_data_time("2019-07-23 00:00:00")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, days_add_test) {
+    std::string func_name = "days_add";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-10-23 00:00:00"), -4}, str_to_data_time("2020-10-19 00:00:00")},
+            {{std::string("2020-05-23 00:00:00"), 4}, str_to_data_time("2020-05-27 00:00:00")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 00:00:00"), 10}, str_to_data_time("2020-06-2 00:00:00")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, days_sub_test) {
+    std::string func_name = "days_sub";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-05-23 00:00:00"), 4}, str_to_data_time("2020-05-19 00:00:00")},
+            {{std::string("2020-05-23 00:00:00"), -4}, str_to_data_time("2020-05-27 00:00:00")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 00:00:00"), 31}, str_to_data_time("2020-04-22 00:00:00")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, hours_add_test) {
+    std::string func_name = "hours_add";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-10-23 10:00:00"), -4}, str_to_data_time("2020-10-23 06:00:00")},
+            {{std::string("2020-05-23 10:00:00"), 4}, str_to_data_time("2020-05-23 14:00:00")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 10:00:00"), 100}, str_to_data_time("2020-05-27 14:00:00")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, hours_sub_test) {
+    std::string func_name = "hours_sub";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-05-23 10:00:00"), 4}, str_to_data_time("2020-05-23 06:00:00")},
+            {{std::string("2020-05-23 10:00:00"), -4}, str_to_data_time("2020-05-23 14:00:00")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 10:00:00"), 31}, str_to_data_time("2020-05-22 03:00:00")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, minutes_add_test) {
+    std::string func_name = "minutes_add";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-10-23 10:00:00"), 40}, str_to_data_time("2020-10-23 10:40:00")},
+            {{std::string("2020-05-23 10:00:00"), -40}, str_to_data_time("2020-05-23 09:20:00")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 10:00:00"), 100}, str_to_data_time("2020-05-23 11:40:00")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, minutes_sub_test) {
+    std::string func_name = "minutes_sub";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-05-23 10:00:00"), 40}, str_to_data_time("2020-05-23 09:20:00")},
+            {{std::string("2020-05-23 10:00:00"), -40}, str_to_data_time("2020-05-23 10:40:00")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 10:00:00"), 100}, str_to_data_time("2020-05-23 08:20:00")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, seconds_add_test) {
+    std::string func_name = "seconds_add";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-10-23 10:00:00"), 40}, str_to_data_time("2020-10-23 10:00:40")},
+            {{std::string("2020-05-23 10:00:00"), -40}, str_to_data_time("2020-05-23 09:59:20")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 10:00:00"), 100}, str_to_data_time("2020-05-23 10:01:40")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, seconds_sub_test) {
+    std::string func_name = "seconds_sub";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-05-23 10:00:00"), 40}, str_to_data_time("2020-05-23 09:59:20")},
+            {{std::string("2020-05-23 10:00:00"), -40}, str_to_data_time("2020-05-23 10:00:40")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 10:00:00"), 100}, str_to_data_time("2020-05-23 09:58:20")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, weeks_add_test) {
+    std::string func_name = "weeks_add";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-10-23 10:00:00"), 5}, str_to_data_time("2020-11-27 10:00:00")},
+            {{std::string("2020-05-23 10:00:00"), -5}, str_to_data_time("2020-04-18 10:00:00")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 10:00:00"), 100}, str_to_data_time("2022-04-23 10:00:00")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, weeks_sub_test) {
+    std::string func_name = "weeks_sub";
+
+    InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::Int32};
+
+    DataSet data_set = {
+            {{std::string("2020-05-23 10:00:00"), 5}, str_to_data_time("2020-04-18 10:00:00")},
+            {{std::string("2020-05-23 10:00:00"), -5}, str_to_data_time("2020-6-27 10:00:00")},
+            {{std::string(""), 4}, Null()},
+            {{std::string("2020-05-23 10:00:00"), 100}, str_to_data_time("2018-06-23 10:00:00")},
+            {{Null(), 4}, Null()}};
+
+    check_function<DataTypeDateTime, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, to_days_test) {
+    std::string func_name = "to_days";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {{{std::string("2021-01-01 00:00:00")}, 738156},
+                        {{std::string("")}, Null()},
+                        {{std::string("2021-01-32 00:00:00")}, Null()},
+                        {{std::string("0000-01-01 00:00:00")}, 1}};
+
+    check_function<DataTypeInt32, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, date_test) {
+    std::string func_name = "date";
+
+    InputTypeSet input_types = {TypeIndex::DateTime};
+
+    DataSet data_set = {
+            {{std::string("2021-01-01 06:00:00")}, str_to_data_time("2021-01-01", false)},
+            {{std::string("")}, Null()},
+            {{Null()}, Null()},
+            {{std::string("0000-01-01 00:00:00")}, str_to_data_time("0000-01-01", false)}};
+
+    check_function<DataTypeDate, true>(func_name, input_types, data_set);
+}
+
+TEST(TimestampFunctionsTest, makedate_test) {
+    std::string func_name = "makedate";
+
+    InputTypeSet input_types = {TypeIndex::Int32, TypeIndex::Int32};
+
+    DataSet data_set = {{{2021, 3}, str_to_data_time("2021-01-03", false)},
+                        {{2021, 95}, str_to_data_time("2021-04-05", false)},
+                        {{2021, 400}, str_to_data_time("2022-02-04", false)},
+                        {{2021, 0}, Null()},
+                        {{2021, -10}, Null()},
+                        {{-1, 3}, Null()},
+                        {{12345, 3}, Null()}};
+
+    check_function<DataTypeDate, true>(func_name, input_types, data_set);
+}
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/vec/runtime/CMakeLists.txt b/be/test/vec/runtime/CMakeLists.txt
new file mode 100644
index 0000000000..f01816299d
--- /dev/null
+++ b/be/test/vec/runtime/CMakeLists.txt
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# where to put generated libraries
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/test/vec/runtime")
+
+ADD_BE_TEST(vdata_stream_test)
+
diff --git a/be/test/vec/runtime/vdata_stream_test.cpp b/be/test/vec/runtime/vdata_stream_test.cpp
new file mode 100644
index 0000000000..24bb34c7e4
--- /dev/null
+++ b/be/test/vec/runtime/vdata_stream_test.cpp
@@ -0,0 +1,178 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "bthread/id.h"
+#include "common/object_pool.h"
+#include "gen_cpp/internal_service.pb.h"
+#include "google/protobuf/descriptor.h"
+#include "google/protobuf/service.h"
+#include "gtest/gtest.h"
+#include "runtime/exec_env.h"
+#include "testutil/desc_tbl_builder.h"
+#include "vec/columns/columns_number.h"
+#include "vec/runtime/vdata_stream_mgr.h"
+#include "vec/runtime/vdata_stream_recvr.h"
+#include "vec/sink/vdata_stream_sender.h"
+
+namespace doris::vectorized {
+
+class LocalMockBackendService : public PBackendService {
+public:
+    void transmit_block(::google::protobuf::RpcController* controller,
+                        const ::doris::PTransmitDataParams* request,
+                        ::doris::PTransmitDataResult* response, ::google::protobuf::Closure* done) {
+        stream_mgr->transmit_block(request, &done);
+    }
+
+private:
+    VDataStreamMgr* stream_mgr;
+};
+
+class MockChannel : public google::protobuf::RpcChannel {
+public:
+    MockChannel(PBackendService* service) : _service(service) {}
+
+    void CallMethod(const google::protobuf::MethodDescriptor* method,
+                    google::protobuf::RpcController* controller,
+                    const google::protobuf::Message* request, google::protobuf::Message* response,
+                    google::protobuf::Closure* done) {
+        auto call_id = ((brpc::Controller*)controller)->call_id();
+
+        bthread_id_lock_and_reset_range(call_id, NULL, 2 + 3);
+        _service->transmit_block(controller, (PTransmitDataParams*)request,
+                                 (PTransmitDataResult*)response, done);
+        // brpc::StartCancel(call_id);
+        // LOG(INFO) << bthread_id_cancel(call_id);
+        // void * data = nullptr;
+        // bthread_id_trylock(call_id, &data);
+        bthread_id_unlock_and_destroy(call_id);
+    }
+
+private:
+    std::unique_ptr<PBackendService> _service;
+};
+
+class MockBrpcStubCache : public BrpcStubCache {
+public:
+    MockBrpcStubCache(google::protobuf::RpcChannel* channel) {
+        _channel.reset(channel);
+        _stub.reset(new PBackendService_Stub(channel));
+    }
+    virtual ~MockBrpcStubCache() = default;
+    virtual std::shared_ptr<PBackendService_Stub> get_stub(const TNetworkAddress&) { return _stub; }
+
+private:
+    std::unique_ptr<google::protobuf::RpcChannel> _channel;
+    std::shared_ptr<PBackendService_Stub> _stub;
+};
+
+class VDataStreamTest : public testing::Test {
+    virtual void SetUp() override {}
+    virtual void TearDown() override {}
+
+private:
+    VDataStreamMgr _instance;
+    ObjectPool _object_pool;
+};
+
+TEST_F(VDataStreamTest, BasicTest) {
+    doris::DescriptorTblBuilder builder(&_object_pool);
+    builder.declare_tuple() << doris::TYPE_INT << doris::TYPE_DOUBLE;
+    doris::DescriptorTbl* desc_tbl = builder.build();
+    auto tuple_desc = const_cast<doris::TupleDescriptor*>(desc_tbl->get_tuple_descriptor(0));
+    doris::RowDescriptor row_desc(tuple_desc, false);
+
+    doris::RuntimeState runtime_stat(doris::TUniqueId(), doris::TQueryOptions(),
+                                     doris::TQueryGlobals(), nullptr);
+    runtime_stat.init_instance_mem_tracker();
+    runtime_stat.set_desc_tbl(desc_tbl);
+    runtime_stat.set_be_number(1);
+    runtime_stat._exec_env = _object_pool.add(new ExecEnv);
+
+    // prepare mock some method
+    LocalMockBackendService* mock_service = new LocalMockBackendService;
+    mock_service->stream_mgr = &_instance;
+    MockChannel* channel = new MockChannel(std::move(mock_service));
+
+    runtime_stat._exec_env->_brpc_stub_cache =
+            _object_pool.add(new MockBrpcStubCache(std::move(channel)));
+
+    TUniqueId uid;
+    PlanNodeId nid = 1;
+    int num_senders = 1;
+    int buffer_size = 1024 * 1024;
+    RuntimeProfile profile("profile");
+    bool is_merge = false;
+    std::shared_ptr<QueryStatisticsRecvr> statistics = std::make_shared<QueryStatisticsRecvr>();
+    auto recv = _instance.create_recvr(&runtime_stat, row_desc, uid, nid, num_senders, buffer_size,
+                                       &profile, is_merge, statistics);
+
+    // Test Sender
+    int sender_id = 1;
+    TDataSink tsink;
+    {
+        tsink.stream_sink.output_partition.type = TPartitionType::UNPARTITIONED;
+        tsink.stream_sink.dest_node_id = 1;
+    }
+    std::vector<TPlanFragmentDestination> dests;
+    {
+        TPlanFragmentDestination dest;
+        TNetworkAddress addr;
+        addr.__set_hostname("127.0.0.1");
+        addr.__set_port(8888);
+
+        dest.__set_brpc_server(addr);
+        dest.__set_fragment_instance_id(uid);
+        dest.__set_server(addr);
+        dests.push_back(dest);
+    }
+    int per_channel_buffer_size = 1024 * 1024;
+    bool send_query_statistics_with_every_batch = false;
+    VDataStreamSender sender(&_object_pool, sender_id, row_desc, tsink.stream_sink, dests,
+                             per_channel_buffer_size, send_query_statistics_with_every_batch);
+    sender.set_query_statistics(std::make_shared<QueryStatistics>());
+    sender.init(tsink);
+    sender.prepare(&runtime_stat);
+    sender.open(&runtime_stat);
+
+    auto vec = vectorized::ColumnVector<Int32>::create();
+    auto& data = vec->get_data();
+    for (int i = 0; i < 1024; ++i) {
+        data.push_back(i);
+    }
+    vectorized::DataTypePtr data_type(std::make_shared<vectorized::DataTypeInt32>());
+    vectorized::ColumnWithTypeAndName type_and_name(vec->get_ptr(), data_type, "test_int");
+    vectorized::Block block({type_and_name});
+    sender.send(&runtime_stat, &block);
+
+    Block block_2;
+    bool eos;
+    recv->get_next(&block_2, &eos);
+
+    ASSERT_EQ(block_2.rows(), 1024);
+
+    Status exec_status;
+    sender.close(&runtime_stat, exec_status);
+    recv->close();
+}
+} // namespace doris::vectorized
+
+int main(int argc, char** argv) {
+    doris::CpuInfo::init();
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js
index 4a12a28913..022b26dd5d 100644
--- a/docs/.vuepress/sidebar/en.js
+++ b/docs/.vuepress/sidebar/en.js
@@ -330,6 +330,7 @@ module.exports = [
               "ascii",
               "bit_length",
               "char_length",
+              "coalesce",
               "concat",
               "concat_ws",
               "ends_with",
@@ -450,6 +451,11 @@ module.exports = [
             directoryPath: "hash-functions/",
             children: ["murmur_hash3_32"],
           },
+          {
+            title: "Math Functions",
+            directoryPath: "math-functions/",
+            children: ["conv"],
+          },
           {
             title: "table functions",
             directoryPath: "table-functions/",
diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js
index b47618aef6..7ec172afb0 100644
--- a/docs/.vuepress/sidebar/zh-CN.js
+++ b/docs/.vuepress/sidebar/zh-CN.js
@@ -334,6 +334,7 @@ module.exports = [
               "ascii",
               "bit_length",
               "char_length",
+              "coalesce",
               "concat",
               "concat_ws",
               "ends_with",
@@ -442,6 +443,11 @@ module.exports = [
             directoryPath: "hash-functions/",
             children: ["murmur_hash3_32"],
           },
+          {
+            title: "数学函数",
+            directoryPath: "math-functions/",
+            children: ["conv"],
+          },
           {
             title: "加密和信息摘要函数",
             directoryPath: "encrypt-digest-functions/",
diff --git a/docs/en/sql-reference/sql-functions/math-functions/conv.md b/docs/en/sql-reference/sql-functions/math-functions/conv.md
new file mode 100644
index 0000000000..4ce1e1879c
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/math-functions/conv.md
@@ -0,0 +1,60 @@
+---
+{
+    "title": "conv",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# conv
+
+## description
+### Syntax
+
+`VARCHAR CONV(VARCHAR input, TINYINT from_base, TINYINT to_base)`
+`VARCHAR CONV(BIGINT input, TINYINT from_base, TINYINT to_base)`
+Convert the input number to the target base. The input base range should be within `[2,36]`. 
+
+## example
+
+```
+MySQL [test]> SELECT CONV(15,10,2);
++-----------------+
+| conv(15, 10, 2) |
++-----------------+
+| 1111            |
++-----------------+
+
+MySQL [test]> SELECT CONV('ff',16,10);
++--------------------+
+| conv('ff', 16, 10) |
++--------------------+
+| 255                |
++--------------------+
+
+MySQL [test]> SELECT CONV(230,10,16);
++-------------------+
+| conv(230, 10, 16) |
++-------------------+
+| E6                |
++-------------------+
+```
+
+## keyword
+	CONV
diff --git a/docs/en/sql-reference/sql-functions/string-functions/coalesce.md b/docs/en/sql-reference/sql-functions/string-functions/coalesce.md
new file mode 100644
index 0000000000..23fef73f49
--- /dev/null
+++ b/docs/en/sql-reference/sql-functions/string-functions/coalesce.md
@@ -0,0 +1,62 @@
+---
+{
+    "title": "coalesce",
+    "language": "en"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# coalesce
+## Description
+### Syntax
+
+`VARCHAR coalesce(VARCHAR, ...)`
+`...`
+`INT coalesce(INT, ...)`
+
+` coalesce ` function will return the first not null value. If it's all value is null, return null
+
+## example
+
+```
+MySQL> select coalesce(1,null,2);
++----------------------+
+| coalesce(1, NULL, 2) |
++----------------------+
+|                    1 |
++----------------------+
+
+MySQL> select coalesce(null,"asd",1);
++--------------------------+
+| coalesce(NULL, 'asd', 1) |
++--------------------------+
+| asd                      |
++--------------------------+
+
+MySQL> select coalesce(null,null,null);
++----------------------------+
+| coalesce(NULL, NULL, NULL) |
++----------------------------+
+|                       NULL |
++----------------------------+
+```
+## keyword
+coalesce
diff --git a/docs/zh-CN/sql-reference/sql-functions/math-functions/conv.md b/docs/zh-CN/sql-reference/sql-functions/math-functions/conv.md
new file mode 100644
index 0000000000..280228ec89
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-functions/math-functions/conv.md
@@ -0,0 +1,60 @@
+---
+{
+    "title": "conv",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+  http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# conv
+
+## description
+### Syntax
+
+`VARCHAR CONV(VARCHAR input, TINYINT from_base, TINYINT to_base)`
+`VARCHAR CONV(BIGINT input, TINYINT from_base, TINYINT to_base)`
+对输入的数字进行进制转换，输入的进制范围应该在`[2,36]`以内。
+
+## example
+
+```
+MySQL [test]> SELECT CONV(15,10,2);
++-----------------+
+| conv(15, 10, 2) |
++-----------------+
+| 1111            |
++-----------------+
+
+MySQL [test]> SELECT CONV('ff',16,10);
++--------------------+
+| conv('ff', 16, 10) |
++--------------------+
+| 255                |
++--------------------+
+
+MySQL [test]> SELECT CONV(230,10,16);
++-------------------+
+| conv(230, 10, 16) |
++-------------------+
+| E6                |
++-------------------+
+```
+
+## keyword
+	CONV
diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/coalesce.md b/docs/zh-CN/sql-reference/sql-functions/string-functions/coalesce.md
new file mode 100644
index 0000000000..0ea6c2a22e
--- /dev/null
+++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/coalesce.md
@@ -0,0 +1,63 @@
+---
+{
+    "title": "coalesce",
+    "language": "zh-CN"
+}
+---
+
+<!-- 
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+# coalesce
+## description
+### Syntax
+
+`VARCHAR coalesce(VARCHAR, ...)`
+`...`
+`INT coalesce(INT, ...)`
+
+`coalesce`函数会返回第一个非NULL的值，若全部为NULL，则返回NULL
+
+## example
+
+```
+MySQL> select coalesce(1,null,2);
++----------------------+
+| coalesce(1, NULL, 2) |
++----------------------+
+|                    1 |
++----------------------+
+
+MySQL> select coalesce(null,"asd",1);
++--------------------------+
+| coalesce(NULL, 'asd', 1) |
++--------------------------+
+| asd                      |
++--------------------------+
+
+MySQL> select coalesce(null,null,null);
++----------------------------+
+| coalesce(NULL, NULL, NULL) |
++----------------------------+
+|                       NULL |
++----------------------------+
+
+```
+## keyword
+coalesce
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyticExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyticExpr.java
index e4fd31051e..b7e73864f5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyticExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/AnalyticExpr.java
@@ -31,7 +31,7 @@ import com.google.common.base.Joiner;
 import com.google.common.base.MoreObjects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
-
+import org.apache.doris.common.util.VectorizedUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -631,6 +631,9 @@ public class AnalyticExpr extends Expr {
                 fnCall = new FunctionCallExpr(new FunctionName(LASTVALUE),
                                               getFnCall().getParams());
             } else {
+                //TODO: Now we don't want to first_value to rewrite in vectorized mode;
+                //if have to rewrite in future, could exec this rule;
+                if(!VectorizedUtil.isVectorized()) {
                 List<Expr> paramExprs = Expr.cloneList(getFnCall().getParams().exprs());
 
                 if (window.getRightBoundary().getType() == BoundaryType.PRECEDING) {
@@ -650,6 +653,7 @@ public class AnalyticExpr extends Expr {
                 fnCall = new FunctionCallExpr("FIRST_VALUE_REWRITE",
                                               new FunctionParams(paramExprs));
                 //        fnCall_.setIsInternalFnCall(true);
+                }
             }
 
             fnCall.setIsAnalyticFnCall(true);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
index cd346eb7bf..b59c22c9f4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java
@@ -219,6 +219,8 @@ public class Analyzer {
         // to the last Join clause (represented by its rhs table ref) that outer-joined it
         private final Map<TupleId, TableRef> outerJoinedTupleIds = Maps.newHashMap();
 
+        private final Set<TupleId> outerJoinedMaterializedTupleIds = Sets.newHashSet();
+
         // Map of registered conjunct to the last full outer join (represented by its
         // rhs table ref) that outer joined it.
         public final Map<ExprId, TableRef> fullOuterJoinedConjuncts = Maps.newHashMap();
@@ -791,6 +793,7 @@ public class Analyzer {
         // result.setLabel(srcSlotDesc.getLabel());
         result.setStats(srcSlotDesc.getStats());
         result.setType(srcSlotDesc.getType());
+        result.setIsNullable(srcSlotDesc.getIsNullable());
         // result.setItemTupleDesc(srcSlotDesc.getItemTupleDesc());
         return result;
     }
@@ -835,6 +838,7 @@ public class Analyzer {
 
     /**
      * Register tids as being outer-joined by Join clause represented by rhsRef.
+     * All tuple of outer join should be null in slot desc
      */
     public void registerOuterJoinedTids(List<TupleId> tids, TableRef rhsRef) {
         for (TupleId tid: tids) {
@@ -846,6 +850,27 @@ public class Analyzer {
         }
     }
 
+    public void registerOuterJoinedMaterilizeTids(List<TupleId> tids) {
+        globalState.outerJoinedMaterializedTupleIds.addAll(tids);
+    }
+
+    /**
+     * All tuple of outer join tuple should be null in slot desc
+     */
+    public void changeAllOuterJoinTupleToNull() {
+        for (TupleId tid : globalState.outerJoinedTupleIds.keySet()) {
+            for (SlotDescriptor slotDescriptor : getTupleDesc(tid).getSlots()) {
+                slotDescriptor.setIsNullable(true);
+            }
+        }
+
+        for (TupleId tid : globalState.outerJoinedMaterializedTupleIds) {
+            for (SlotDescriptor slotDescriptor : getTupleDesc(tid).getSlots()) {
+                slotDescriptor.setIsNullable(true);
+            }
+        }
+    }
+
     /**
      * Register the given tuple id as being the invisible side of a semi-join.
      */
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ArithmeticExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArithmeticExpr.java
index c07a39afee..79a1ffa0f2 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ArithmeticExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ArithmeticExpr.java
@@ -128,13 +128,13 @@ public class ArithmeticExpr extends Expr {
 
                 functionSet.addBuiltin(ScalarFunction.createVecBuiltinOperator(
                         Operator.MULTIPLY.getName(), Lists.newArrayList(t1, t2),
-                        Type.getAssignmentCompatibleType(t1, t2, false)));
+                        Type.getNextNumType(Type.getAssignmentCompatibleType(t1, t2, false))));
                 functionSet.addBuiltin(ScalarFunction.createVecBuiltinOperator(
                         Operator.ADD.getName(), Lists.newArrayList(t1, t2),
-                        Type.getAssignmentCompatibleType(t1, t2, false)));
+                        Type.getNextNumType(Type.getAssignmentCompatibleType(t1, t2, false))));
                 functionSet.addBuiltin(ScalarFunction.createVecBuiltinOperator(
                         Operator.SUBTRACT.getName(), Lists.newArrayList(t1, t2),
-                        Type.getAssignmentCompatibleType(t1, t2, false)));
+                        Type.getNextNumType(Type.getAssignmentCompatibleType(t1, t2, false))));
             }
         }
 
@@ -147,6 +147,19 @@ public class ArithmeticExpr extends Expr {
                 Lists.<Type>newArrayList(Type.DECIMALV2, Type.DECIMALV2),
                 Type.DECIMALV2, Function.NullableMode.ALWAYS_NULLABLE));
 
+        functionSet.addBuiltin(ScalarFunction.createVecBuiltinOperator(
+                Operator.MOD.getName(),
+                Lists.<Type>newArrayList(Type.FLOAT, Type.FLOAT),
+                Type.FLOAT, Function.NullableMode.ALWAYS_NULLABLE));
+        functionSet.addBuiltin(ScalarFunction.createVecBuiltinOperator(
+                Operator.MOD.getName(),
+                Lists.<Type>newArrayList(Type.DOUBLE, Type.DOUBLE),
+                Type.DOUBLE, Function.NullableMode.ALWAYS_NULLABLE));
+        functionSet.addBuiltin(ScalarFunction.createVecBuiltinOperator(
+                Operator.MOD.getName(),
+                Lists.<Type>newArrayList(Type.DECIMALV2, Type.DECIMALV2),
+                Type.DECIMALV2, Function.NullableMode.ALWAYS_NULLABLE));
+
         for (int i = 0; i < Type.getIntegerTypes().size(); i++) {
             Type t1 = Type.getIntegerTypes().get(i);
             for (int j = 0; j < Type.getIntegerTypes().size(); j++) {
@@ -156,6 +169,10 @@ public class ArithmeticExpr extends Expr {
                         Operator.INT_DIVIDE.getName(), Lists.newArrayList(t1, t2),
                         Type.getAssignmentCompatibleType(t1, t2, false),
                         Function.NullableMode.ALWAYS_NULLABLE));
+                functionSet.addBuiltin(ScalarFunction.createVecBuiltinOperator(
+                        Operator.MOD.getName(), Lists.newArrayList(t1, t2),
+                        Type.getAssignmentCompatibleType(t1, t2, false),
+                        Function.NullableMode.ALWAYS_NULLABLE));
             }
         }
     }
@@ -250,9 +267,52 @@ public class ArithmeticExpr extends Expr {
         }
     }
 
+    private boolean castIfHaveSameType(Type t1, Type t2, Type target) throws AnalysisException {
+        if (t1 == target || t2 == target) {
+            castChild(target, 0);
+            castChild(target, 1);
+            return true;
+        }
+        return false;
+    }
+
+    private void castUpperInteger(Type t1, Type t2) throws AnalysisException {
+        if (!t1.isIntegerType() || !t2.isIntegerType()) {
+            return;
+        }
+        if (castIfHaveSameType(t1, t2, Type.BIGINT)) {
+            return;
+        }
+        if (castIfHaveSameType(t1, t2, Type.INT)) {
+            return;
+        }
+        if (castIfHaveSameType(t1, t2, Type.SMALLINT)) {
+            return;
+        }
+        if (castIfHaveSameType(t1, t2, Type.TINYINT)) {
+            return;
+        }
+    }
+
     @Override
     public void analyzeImpl(Analyzer analyzer) throws AnalysisException {
         if (VectorizedUtil.isVectorized()) {
+            // bitnot is the only unary op, deal with it here
+            if (op == Operator.BITNOT) {
+                Type t = getChild(0).getType();
+                if (t.getPrimitiveType().ordinal() > PrimitiveType.LARGEINT.ordinal()) {
+                    type = Type.BIGINT;
+                    castChild(type, 0);
+                } else {
+                    type = t;
+                }
+                fn = getBuiltinFunction(
+                        analyzer, op.getName(), collectChildReturnTypes(), Function.CompareMode.IS_SUPERTYPE_OF);
+                if (fn == null) {
+                    Preconditions.checkState(false, String.format("No match for op with operand types", toSql()));
+                }
+                return;
+            }
             analyzeSubqueryInChildren();
             // if children has subquery, it will be rewritten and reanalyzed in the future.
             if (contains(Subquery.class)) {
@@ -261,30 +321,77 @@ public class ArithmeticExpr extends Expr {
 
             Type t1 = getChild(0).getType();
             Type t2 = getChild(1).getType();
+            Type commonType;
+
+            // Support null operation
+            if (t1.isNull() || t2.isNull()) {
+                castBinaryOp(t1.isNull() ? t2 : t1);
+                t1 = getChild(0).getType();
+                t2 = getChild(1).getType();
+            }
+
+            // dispose the case t1 and t2 is not numeric type
+            if (!t1.isNumericType()) {
+                castChild(t1.getNumResultType(), 0);
+                t1 = t1.getNumResultType();
+            }
+            if (!t2.isNumericType()) {
+                castChild(t2.getNumResultType(), 1);
+                t2 = t2.getNumResultType();
+            }
 
             switch (op) {
                 case MULTIPLY:
                 case ADD:
                 case SUBTRACT:
+                    if (t1.isDecimalV2() || t2.isDecimalV2()) {
+                        castBinaryOp(findCommonType(t1, t2));
+                    }
+                    if (isConstant()) {
+                        castUpperInteger(t1, t2);
+                    }
                 case MOD:
+                    if (t1.isDecimalV2() || t2.isDecimalV2()) {
+                        castBinaryOp(findCommonType(t1, t2));
+                    } else if ((t1.isFloatingPointType() || t2.isFloatingPointType()) && !t1.equals(t2)) {
+                        castBinaryOp(Type.DOUBLE);
+                    }
+                    break;
                 case INT_DIVIDE:
-                    fn = getBuiltinFunction(analyzer, op.name, collectChildReturnTypes(),
-                            Function.CompareMode.IS_IDENTICAL);
+                    if (!t1.isFixedPointType() || !t2.isFloatingPointType()) {
+                        castBinaryOp(Type.BIGINT);
+                    }
                     break;
                 case DIVIDE:
                     t1 = getChild(0).getType().getNumResultType();
                     t2 = getChild(1).getType().getNumResultType();
-                    Type commonType = findCommonType(t1, t2);
+                    commonType = findCommonType(t1, t2);
                     if (commonType.getPrimitiveType() == PrimitiveType.BIGINT
                             || commonType.getPrimitiveType() == PrimitiveType.LARGEINT) {
                         commonType = Type.DOUBLE;
                     }
                     castBinaryOp(commonType);
-                    fn = getBuiltinFunction(analyzer, op.name, collectChildReturnTypes(),
-                            Function.CompareMode.IS_IDENTICAL);
+                    break;
+                case BITAND:
+                case BITOR:
+                case BITXOR:
+                    if (t1 == Type.BOOLEAN && t2 == Type.BOOLEAN) {
+                        t1 = Type.TINYINT;
+                        t2 = Type.TINYINT;
+                    }
+                    commonType = Type.getAssignmentCompatibleType(t1, t2, false);
+                    if (commonType.getPrimitiveType().ordinal() > PrimitiveType.LARGEINT.ordinal()) {
+                        commonType = Type.BIGINT;
+                    }
+                    type = castBinaryOp(commonType);
+                    break;
+                default:
+                    Preconditions.checkState(false,
+                            "Unknown arithmetic operation " + op.toString() + " in: " + this.toSql());
                     break;
             }
-
+            fn = getBuiltinFunction(analyzer, op.name, collectChildReturnTypes(),
+                    Function.CompareMode.IS_IDENTICAL);
             if (fn == null) {
                 Preconditions.checkState(false, String.format(
                         "No match for vec function '%s' with operand types %s and %s", toSql(), t1, t2));
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java
index f1c02e78e1..d1abcad937 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java
@@ -196,6 +196,8 @@ public class BinaryPredicate extends Predicate implements Writable {
         return op;
     }
 
+    public void setOp(Operator op) { this.op = op; }
+
     @Override
     public Expr negate() {
         Operator newOp = null;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
index f071e0fd9e..2a629f9f69 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CastExpr.java
@@ -451,4 +451,11 @@ public class CastExpr extends Expr {
         }
         return -1;
     }
+
+    @Override
+    public boolean isNullable() {
+        return children.get(0).isNullable() ||
+                (children.get(0).getType().isStringType() && !getType().isStringType()) ||
+                (!children.get(0).getType().isDateType() && getType().isDateType());
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
index f6a350adac..6fa7c69f8b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FunctionCallExpr.java
@@ -1053,7 +1053,7 @@ public class FunctionCallExpr extends Expr {
         // TODO: we can't correctly determine const-ness before analyzing 'fn_'. We should
         // rework logic so that we do not call this function on unanalyzed exprs.
         // Aggregate functions are never constant.
-        if (fn instanceof AggregateFunction) return false;
+        if (fn instanceof AggregateFunction || fn == null) return false;
 
         final String fnName = this.fnName.getFunction();
         // Non-deterministic functions are never constant.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/InPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/InPredicate.java
index 0edd1a0834..e8f58f88f8 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/InPredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/InPredicate.java
@@ -67,10 +67,10 @@ public class InPredicate extends Predicate {
 
             String typeString = Function.getUdfTypeName(t.getPrimitiveType());
 
-            functionSet.addBuiltin(ScalarFunction.createBuiltin(IN_ITERATE,
+            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltin(IN_ITERATE,
                     Type.BOOLEAN, Lists.newArrayList(t, t), true,
                     "doris::InPredicate::in_iterate", null, null, false));
-            functionSet.addBuiltin(ScalarFunction.createBuiltin(NOT_IN_ITERATE,
+            functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltin(NOT_IN_ITERATE,
                     Type.BOOLEAN, Lists.newArrayList(t, t), true,
                     "doris::InPredicate::not_in_iterate", null, null, false));
 
@@ -307,4 +307,9 @@ public class InPredicate extends Predicate {
         }
         return false;
     }
+
+    @Override
+    public boolean isNullable() {
+        return hasNullableChild();
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LikePredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LikePredicate.java
index a81e3e8ab5..97d8f2afdd 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LikePredicate.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LikePredicate.java
@@ -51,13 +51,13 @@ public class LikePredicate extends Predicate {
     }
 
     public static void initBuiltins(FunctionSet functionSet) {
-        functionSet.addBuiltin(ScalarFunction.createBuiltin(
+        functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltin(
                 Operator.LIKE.name(), Type.BOOLEAN, Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
                 false,
                 "_ZN5doris13LikePredicate4likeEPN9doris_udf15FunctionContextERKNS1_9StringValES6_",
                 "_ZN5doris13LikePredicate12like_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE",
                 "_ZN5doris13LikePredicate10like_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE", true));
-        functionSet.addBuiltin(ScalarFunction.createBuiltin(
+        functionSet.addBuiltinBothScalaAndVectorized(ScalarFunction.createBuiltin(
                 Operator.REGEXP.name(), Type.BOOLEAN, Lists.<Type>newArrayList(Type.VARCHAR, Type.VARCHAR),
                 false,
                 "_ZN5doris13LikePredicate5regexEPN9doris_udf15FunctionContextERKNS1_9StringValES6_",
@@ -125,6 +125,7 @@ public class LikePredicate extends Predicate {
 
         fn = getBuiltinFunction(analyzer, op.toString(),
                 collectChildReturnTypes(), Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
+
         if (!getChild(1).getType().isNull() && getChild(1).isLiteral() && (op == Operator.REGEXP)) {
             // let's make sure the pattern works
             // TODO: this checks that it's a Java-supported regex, but the syntax supported
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
index 7216608558..22db67bfbc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java
@@ -511,6 +511,11 @@ public class SelectStmt extends QueryStmt {
             analyzer.registerConjuncts(whereClause, false, getTableRefIds());
         }
 
+        // Change all outer join tuple to null here after analyze where and from clause
+        // all solt desc of join tuple is ready. Before analyze sort info/agg info/analytic info
+        // the solt desc nullable mark must be corrected to make sure BE exec query right.
+        analyzer.changeAllOuterJoinTupleToNull();
+
         createSortInfo(analyzer);
         if (sortInfo != null && CollectionUtils.isNotEmpty(sortInfo.getOrderingExprs())) {
             if (groupingInfo != null) {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SetOperationStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SetOperationStmt.java
index 5e345a144b..669dc42c68 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SetOperationStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SetOperationStmt.java
@@ -460,6 +460,7 @@ public class SetOperationStmt extends QueryStmt {
             SlotDescriptor slotDesc = analyzer.addSlotDescriptor(tupleDesc);
             slotDesc.setLabel(getColLabels().get(i));
             slotDesc.setType(expr.getType());
+            slotDesc.setIsNullable(expr.isNullable());
             // TODO(zc)
             // slotDesc.setStats(columnStats.get(i));
             SlotRef outputSlotRef = new SlotRef(slotDesc);
@@ -484,7 +485,9 @@ public class SetOperationStmt extends QueryStmt {
                 Expr resultExpr = op.getQueryStmt().getResultExprs().get(i);
                 slotDesc.addSourceExpr(resultExpr);
                 SlotRef slotRef = resultExpr.unwrapSlotRef(false);
-                if (slotRef == null || slotRef.getDesc().getIsNullable()
+                if (slotRef == null) {
+                    isNullable |= resultExpr.isNullable();
+                } else if (slotRef.getDesc().getIsNullable()
                         || analyzer.isOuterJoined(slotRef.getDesc().getParent().getId())) isNullable = true;
                 if (op.hasAnalyticExprs()) continue;
                 slotRef = resultExpr.unwrapSlotRef(true);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
index 41bb72d63d..95f90f2518 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/TableRef.java
@@ -477,16 +477,22 @@ public class TableRef implements ParseNode, Writable {
         if (joinOp == JoinOperator.LEFT_OUTER_JOIN
                 || joinOp == JoinOperator.FULL_OUTER_JOIN) {
             analyzer.registerOuterJoinedTids(getId().asList(), this);
+            analyzer.registerOuterJoinedMaterilizeTids(getMaterializedTupleIds());
         }
         if (joinOp == JoinOperator.RIGHT_OUTER_JOIN
                 || joinOp == JoinOperator.FULL_OUTER_JOIN) {
             analyzer.registerOuterJoinedTids(leftTblRef.getAllTableRefIds(), this);
+            analyzer.registerOuterJoinedMaterilizeTids(leftTblRef.getAllMaterializedTupleIds());
         }
         // register the tuple ids of a full outer join
         if (joinOp == JoinOperator.FULL_OUTER_JOIN) {
             analyzer.registerFullOuterJoinedTids(leftTblRef.getAllTableRefIds(), this);
             analyzer.registerFullOuterJoinedTids(getId().asList(), this);
+
+            analyzer.registerOuterJoinedMaterilizeTids(leftTblRef.getAllMaterializedTupleIds());
+            analyzer.registerOuterJoinedMaterilizeTids(getMaterializedTupleIds());
         }
+
         // register the tuple id of the rhs of a left semi join
         TupleId semiJoinedTupleId = null;
         if (joinOp == JoinOperator.LEFT_SEMI_JOIN
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
index 5d86b9cc7a..82e4035fe4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/AggregateFunction.java
@@ -49,7 +49,7 @@ public class AggregateFunction extends Function {
     private static final Logger LOG = LogManager.getLogger(AggregateFunction.class);
 
     public static ImmutableSet<String> NOT_NULLABLE_AGGREGATE_FUNCTION_NAME_SET =
-            ImmutableSet.of(FunctionSet.COUNT, "ndv", FunctionSet.BITMAP_UNION_INT, FunctionSet.BITMAP_UNION_COUNT, "ndv_no_finalize");
+            ImmutableSet.of("row_number", "rank", "dense_rank", FunctionSet.COUNT, "ndv", FunctionSet.BITMAP_UNION_INT, FunctionSet.BITMAP_UNION_COUNT, "ndv_no_finalize");
 
     // Set if different from retType_, null otherwise.
     private Type intermediateType;
@@ -239,9 +239,9 @@ public class AggregateFunction extends Function {
     }
 
     public static AggregateFunction createAnalyticBuiltin(String name,
-            List<Type> argTypes, Type retType, Type intermediateType) {
+            List<Type> argTypes, Type retType, Type intermediateType, boolean vectorized) {
         return createAnalyticBuiltin(name, argTypes, retType, intermediateType, null,
-                null, null, null, null, true);
+                null, null, null, null, true, vectorized);
     }
 
     public static AggregateFunction createAnalyticBuiltin(String name,
@@ -250,16 +250,25 @@ public class AggregateFunction extends Function {
             String getValueFnSymbol, String finalizeFnSymbol) {
         return createAnalyticBuiltin(name, argTypes, retType, intermediateType,
                 initFnSymbol, updateFnSymbol, removeFnSymbol, getValueFnSymbol, finalizeFnSymbol,
-                true);
+                true, false);
     }
 
     public static AggregateFunction createAnalyticBuiltin(String name,
             List<Type> argTypes, Type retType, Type intermediateType,
             String initFnSymbol, String updateFnSymbol, String removeFnSymbol,
-            String getValueFnSymbol, String finalizeFnSymbol, boolean isUserVisible) {
+            String getValueFnSymbol, String finalizeFnSymbol, boolean vectorized) {
+        return createAnalyticBuiltin(name, argTypes, retType, intermediateType,
+                initFnSymbol, updateFnSymbol, removeFnSymbol, getValueFnSymbol, finalizeFnSymbol,
+                true, vectorized);
+    }
+
+    public static AggregateFunction createAnalyticBuiltin(String name,
+            List<Type> argTypes, Type retType, Type intermediateType,
+            String initFnSymbol, String updateFnSymbol, String removeFnSymbol,
+            String getValueFnSymbol, String finalizeFnSymbol, boolean isUserVisible, boolean vectorized) {
         AggregateFunction fn = new AggregateFunction(new FunctionName(name),
                 argTypes, retType, intermediateType, null, updateFnSymbol, initFnSymbol,
-                null, null, getValueFnSymbol, removeFnSymbol, finalizeFnSymbol);
+                null, null, getValueFnSymbol, removeFnSymbol, finalizeFnSymbol, vectorized);
         fn.setBinaryType(TFunctionBinaryType.BUILTIN);
         fn.ignoresDistinct = false;
         fn.isAnalyticFn = true;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
index 4bb7c3af9a..9a7feae8f3 100755
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Catalog.java
@@ -252,15 +252,11 @@ import com.google.common.collect.Maps;
 import com.google.common.collect.Multimap;
 import com.google.common.collect.Queues;
 import com.google.common.collect.Sets;
-import com.sleepycat.je.rep.InsufficientLogException;
-import com.sleepycat.je.rep.NetworkRestore;
-import com.sleepycat.je.rep.NetworkRestoreConfig;
 
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
-import org.codehaus.jackson.map.ObjectMapper;
 
 import java.io.BufferedReader;
 import java.io.DataInputStream;
@@ -290,6 +286,11 @@ import java.util.stream.Collectors;
 
 import javax.annotation.Nullable;
 
+import com.sleepycat.je.rep.InsufficientLogException;
+import com.sleepycat.je.rep.NetworkRestore;
+import com.sleepycat.je.rep.NetworkRestoreConfig;
+import org.codehaus.jackson.map.ObjectMapper;
+
 public class Catalog {
     private static final Logger LOG = LogManager.getLogger(Catalog.class);
     // 0 ~ 9999 used for qe
@@ -5955,14 +5956,14 @@ public class Catalog {
         return functionSet.getFunction(desc, mode, true);
     }
 
-    public boolean isNullResultWithOneNullParamFunction(String funcName) {
-        return functionSet.isNullResultWithOneNullParamFunctions(funcName);
-    }
-
     public boolean isNondeterministicFunction(String funcName) {
         return functionSet.isNondeterministicFunction(funcName);
     }
 
+    public boolean isNullResultWithOneNullParamFunction(String funcName) {
+        return functionSet.isNullResultWithOneNullParamFunctions(funcName);
+    }
+
     /**
      * create cluster
      *
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
index 03327cec58..0c8079cc6e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/FunctionSet.java
@@ -1674,6 +1674,74 @@ public class FunctionSet<min_initIN9doris_udf12DecimalV2ValEEEvPNS2_15FunctionCo
                         null,
                         prefix + STDDEV_POP_FINALIZE_SYMBOL.get(t),
                         false, false, false));
+                //vec stddev stddev_samp stddev_pop
+                addBuiltin(AggregateFunction.createBuiltin("stddev",
+                        Lists.newArrayList(t), STDDEV_RETTYPE_SYMBOL.get(t), t,
+                        prefix + STDDEV_INIT_SYMBOL.get(t),
+                        prefix + STDDEV_UPDATE_SYMBOL.get(t),
+                        prefix + STDDEV_MERGE_SYMBOL.get(t),
+                        null,
+                        prefix + STDDEV_POP_FINALIZE_SYMBOL.get(t),
+                        false, false, false, true));
+                addBuiltin(AggregateFunction.createBuiltin("stddev_samp",
+                        Lists.newArrayList(t), STDDEV_RETTYPE_SYMBOL.get(t), t,
+                        prefix + STDDEV_INIT_SYMBOL.get(t),
+                        prefix + STDDEV_UPDATE_SYMBOL.get(t),
+                        prefix + STDDEV_MERGE_SYMBOL.get(t),
+                        null,
+                        prefix + STDDEV_FINALIZE_SYMBOL.get(t),
+                        false, false, false, true));
+                addBuiltin(AggregateFunction.createBuiltin("stddev_pop",
+                        Lists.newArrayList(t), STDDEV_RETTYPE_SYMBOL.get(t), t,
+                        prefix + STDDEV_INIT_SYMBOL.get(t),
+                        prefix + STDDEV_UPDATE_SYMBOL.get(t),
+                        prefix + STDDEV_MERGE_SYMBOL.get(t),
+                        null,
+                        prefix + STDDEV_POP_FINALIZE_SYMBOL.get(t),
+                        false, false, false, true));
+                
+                //vec: variance variance_samp var_samp variance_pop var_pop
+                addBuiltin(AggregateFunction.createBuiltin("variance",
+                        Lists.newArrayList(t), STDDEV_RETTYPE_SYMBOL.get(t), t,
+                        prefix + STDDEV_INIT_SYMBOL.get(t),
+                        prefix + STDDEV_UPDATE_SYMBOL.get(t),
+                        prefix + STDDEV_MERGE_SYMBOL.get(t),
+                        null,
+                        prefix + VAR_POP_FINALIZE_SYMBOL.get(t),
+                        false, false, false, true));
+                addBuiltin(AggregateFunction.createBuiltin("variance_pop",
+                        Lists.newArrayList(t), STDDEV_RETTYPE_SYMBOL.get(t), t,
+                        prefix + STDDEV_INIT_SYMBOL.get(t),
+                        prefix + STDDEV_UPDATE_SYMBOL.get(t),
+                        prefix + STDDEV_MERGE_SYMBOL.get(t),
+                        null,
+                        prefix + VAR_POP_FINALIZE_SYMBOL.get(t),
+                        false, false, false, true));
+                addBuiltin(AggregateFunction.createBuiltin("var_pop",
+                        Lists.newArrayList(t), STDDEV_RETTYPE_SYMBOL.get(t), t,
+                        prefix + STDDEV_INIT_SYMBOL.get(t),
+                        prefix + STDDEV_UPDATE_SYMBOL.get(t),
+                        prefix + STDDEV_MERGE_SYMBOL.get(t),
+                        null,
+                        prefix + VAR_POP_FINALIZE_SYMBOL.get(t),
+                        false, false, false, true));
+                addBuiltin(AggregateFunction.createBuiltin("variance_samp",
+                        Lists.newArrayList(t), STDDEV_RETTYPE_SYMBOL.get(t), t,
+                        prefix + STDDEV_INIT_SYMBOL.get(t),
+                        prefix + STDDEV_UPDATE_SYMBOL.get(t),
+                        prefix + STDDEV_MERGE_SYMBOL.get(t),
+                        null,
+                        prefix + VAR_FINALIZE_SYMBOL.get(t),
+                        false, false, false, true));
+                addBuiltin(AggregateFunction.createBuiltin("var_samp",
+                        Lists.newArrayList(t), STDDEV_RETTYPE_SYMBOL.get(t), t,
+                        prefix + STDDEV_INIT_SYMBOL.get(t),
+                        prefix + STDDEV_UPDATE_SYMBOL.get(t),
+                        prefix + STDDEV_MERGE_SYMBOL.get(t),
+                        null,
+                        prefix + VAR_FINALIZE_SYMBOL.get(t),
+                        false, false, false, true));                        
+
                 addBuiltin(AggregateFunction.createBuiltin("variance",
                         Lists.newArrayList(t), STDDEV_RETTYPE_SYMBOL.get(t), Type.VARCHAR,
                         prefix + STDDEV_INIT_SYMBOL.get(t),
@@ -2067,13 +2135,37 @@ public class FunctionSet<min_initIN9doris_udf12DecimalV2ValEEEvPNS2_15FunctionCo
                 null,
                 prefix + "20dense_rank_get_valueEPN9doris_udf15FunctionContextERNS1_9StringValE",
                 prefix + "13rank_finalizeEPN9doris_udf15FunctionContextERNS1_9StringValE"));
+        //row_number
         addBuiltin(AggregateFunction.createAnalyticBuiltin( "row_number",
                 new ArrayList<Type>(), Type.BIGINT, Type.BIGINT,
                 prefix + "18init_zero_not_nullIN9doris_udf9BigIntValEEEvPNS2_15FunctionContextEPT_",
                 prefix + "17count_star_updateEPN9doris_udf15FunctionContextEPNS1_9BigIntValE",
                 prefix + "11count_mergeEPN9doris_udf15FunctionContextERKNS1_9BigIntValEPS4_",
                 null, null));
-
+        
+        //vec Rank
+        addBuiltin(AggregateFunction.createAnalyticBuiltin("rank",
+                Lists.<Type>newArrayList(), Type.BIGINT, Type.VARCHAR,
+                prefix + "9rank_initEPN9doris_udf15FunctionContextEPNS1_9StringValE",
+                prefix + "11rank_updateEPN9doris_udf15FunctionContextEPNS1_9StringValE",
+                null,
+                prefix + "14rank_get_valueEPN9doris_udf15FunctionContextERNS1_9StringValE",
+                prefix + "13rank_finalizeEPN9doris_udf15FunctionContextERNS1_9StringValE", true));
+        //vec Dense rank
+        addBuiltin(AggregateFunction.createAnalyticBuiltin("dense_rank",
+                Lists.<Type>newArrayList(), Type.BIGINT, Type.VARCHAR,
+                prefix + "9rank_initEPN9doris_udf15FunctionContextEPNS1_9StringValE",
+                prefix + "17dense_rank_updateEPN9doris_udf15FunctionContextEPNS1_9StringValE",
+                null,
+                prefix + "20dense_rank_get_valueEPN9doris_udf15FunctionContextERNS1_9StringValE",
+                prefix + "13rank_finalizeEPN9doris_udf15FunctionContextERNS1_9StringValE", true));
+        //vec row_number
+        addBuiltin(AggregateFunction.createAnalyticBuiltin( "row_number",
+                new ArrayList<Type>(), Type.BIGINT, Type.BIGINT,
+                prefix + "18init_zero_not_nullIN9doris_udf9BigIntValEEEvPNS2_15FunctionContextEPT_",
+                prefix + "17count_star_updateEPN9doris_udf15FunctionContextEPNS1_9BigIntValE",
+                prefix + "11count_mergeEPN9doris_udf15FunctionContextERKNS1_9BigIntValEPS4_",
+                null, null, true));
 
         for (Type t : Type.getSupportedTypes()) {
             if (t.isNull()) {
@@ -2089,15 +2181,6 @@ public class FunctionSet<min_initIN9doris_udf12DecimalV2ValEEEvPNS2_15FunctionCo
                     null,
                     t.isStringType()  ? stringValGetValue : null,
                     t.isStringType()  ? stringValSerializeOrFinalize : null));
-            // Implements FIRST_VALUE for some windows that require rewrites during planning.
-            addBuiltin(AggregateFunction.createAnalyticBuiltin(
-                    "first_value_rewrite", Lists.newArrayList(t, Type.BIGINT), t, t,
-                    t.isStringType() ? initNullString : initNull,
-                    prefix + FIRST_VALUE_REWRITE_UPDATE_SYMBOL.get(t),
-                    null,
-                    t.isStringType() ? stringValGetValue : null,
-                    t.isStringType() ? stringValSerializeOrFinalize : null,
-                    false));
 
             addBuiltin(AggregateFunction.createAnalyticBuiltin(
                     "last_value", Lists.newArrayList(t), t, t,
@@ -2107,27 +2190,73 @@ public class FunctionSet<min_initIN9doris_udf12DecimalV2ValEEEvPNS2_15FunctionCo
                     t.isStringType() ? stringValGetValue : null,
                     t.isStringType() ? stringValSerializeOrFinalize : null));
 
+            //vec first_value
+            addBuiltin(AggregateFunction.createAnalyticBuiltin(
+                    "first_value", Lists.newArrayList(t), t, t,
+                    t.isStringType() ? initNullString : initNull,
+                    prefix + FIRST_VALUE_UPDATE_SYMBOL.get(t),
+                    null,
+                    t.isStringType()  ? stringValGetValue : null,
+                    t.isStringType()  ? stringValSerializeOrFinalize : null, true));
+            // Implements FIRST_VALUE for some windows that require rewrites during planning.
+            addBuiltin(AggregateFunction.createAnalyticBuiltin(
+                    "first_value_rewrite", Lists.newArrayList(t, Type.BIGINT), t, t,
+                    t.isStringType() ? initNullString : initNull,
+                    prefix + FIRST_VALUE_REWRITE_UPDATE_SYMBOL.get(t),
+                    null,
+                    t.isStringType() ? stringValGetValue : null,
+                    t.isStringType() ? stringValSerializeOrFinalize : null,
+                    false, false));
+            //vec last_value
+            addBuiltin(AggregateFunction.createAnalyticBuiltin(
+                    "last_value", Lists.newArrayList(t), t, t,
+                    t.isStringType() ? initNullString : initNull,
+                    prefix + LAST_VALUE_UPDATE_SYMBOL.get(t),
+                    prefix + LAST_VALUE_REMOVE_SYMBOL.get(t),
+                    t.isStringType() ? stringValGetValue : null,
+                    t.isStringType() ? stringValSerializeOrFinalize : null, true));
+
             addBuiltin(AggregateFunction.createAnalyticBuiltin(
                     "lag", Lists.newArrayList(t, Type.BIGINT, t), t, t,
                     prefix + OFFSET_FN_INIT_SYMBOL.get(t),
                     prefix + OFFSET_FN_UPDATE_SYMBOL.get(t),
                     null, null, null));
+                    
             addBuiltin(AggregateFunction.createAnalyticBuiltin(
                     "lead", Lists.newArrayList(t, Type.BIGINT, t), t, t,
                     prefix + OFFSET_FN_INIT_SYMBOL.get(t),
                     prefix + OFFSET_FN_UPDATE_SYMBOL.get(t),
                     null, null, null));
+            //vec
+            addBuiltin(AggregateFunction.createAnalyticBuiltin(
+                    "lag", Lists.newArrayList(t, Type.BIGINT, t), t, t,
+                    prefix + OFFSET_FN_INIT_SYMBOL.get(t),
+                    prefix + OFFSET_FN_UPDATE_SYMBOL.get(t),
+                    null, null, null, true));
+            addBuiltin(AggregateFunction.createAnalyticBuiltin(
+                    "lead", Lists.newArrayList(t, Type.BIGINT, t), t, t,
+                    prefix + OFFSET_FN_INIT_SYMBOL.get(t),
+                    prefix + OFFSET_FN_UPDATE_SYMBOL.get(t),
+                    null, null, null, true));
 
             // lead() and lag() the default offset and the default value should be
             // rewritten to call the overrides that take all parameters.
             addBuiltin(AggregateFunction.createAnalyticBuiltin(
-                    "lag", Lists.newArrayList(t), t, t));
+                    "lag", Lists.newArrayList(t), t, t, false));
             addBuiltin(AggregateFunction.createAnalyticBuiltin(
-                    "lag", Lists.newArrayList(t, Type.BIGINT), t, t));
+                    "lag", Lists.newArrayList(t, Type.BIGINT), t, t, false));
             addBuiltin(AggregateFunction.createAnalyticBuiltin(
-                    "lead", Lists.newArrayList(t), t, t));
+                    "lead", Lists.newArrayList(t), t, t, false));
             addBuiltin(AggregateFunction.createAnalyticBuiltin(
-                    "lead", Lists.newArrayList(t, Type.BIGINT), t, t));
+                    "lead", Lists.newArrayList(t, Type.BIGINT), t, t, false));
+            addBuiltin(AggregateFunction.createAnalyticBuiltin(
+                        "lag", Lists.newArrayList(t), t, t, true));
+            addBuiltin(AggregateFunction.createAnalyticBuiltin(
+                        "lag", Lists.newArrayList(t, Type.BIGINT), t, t, true));
+            addBuiltin(AggregateFunction.createAnalyticBuiltin(
+                        "lead", Lists.newArrayList(t), t, t, true));
+            addBuiltin(AggregateFunction.createAnalyticBuiltin(
+                        "lead", Lists.newArrayList(t, Type.BIGINT), t, t, true));
         }
 
     }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
index 7e2d2ab19c..be61b723dc 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Type.java
@@ -400,6 +400,31 @@ public abstract class Type {
         return ScalarType.INVALID;
     }
 
+    public static Type getNextNumType(Type t) {
+        switch (t.getPrimitiveType()) {
+            case BOOLEAN:
+                return TINYINT;
+            case TINYINT:
+                return SMALLINT;
+            case SMALLINT:
+                return INT;
+            case INT:
+                return BIGINT;
+            case BIGINT:
+                return BIGINT;
+            case LARGEINT:
+                return LARGEINT;
+            case FLOAT:
+                return DOUBLE;
+            case DOUBLE:
+                return DOUBLE;
+            case DECIMALV2:
+                return DECIMALV2;
+            default:
+                return INVALID;
+        }
+    }
+
     /**
      * Returns null if this expr is not instance of StringLiteral or StringLiteral
      * inner value could not parse to long. otherwise return parsed Long result.
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/profile/PlanTreeBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/common/profile/PlanTreeBuilder.java
index bb71e7f08b..dab18e1421 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/profile/PlanTreeBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/profile/PlanTreeBuilder.java
@@ -109,6 +109,7 @@ public class PlanTreeBuilder {
         }
 
         if (planNode.getPlanNodeName().equals(ExchangeNode.EXCHANGE_NODE)
+                || planNode.getPlanNodeName().equals(ExchangeNode.VEXCHANGE_NODE)
                 || planNode.getPlanNodeName().equals(ExchangeNode.MERGING_EXCHANGE_NODE)) {
             exchangeNodes.add(node);
         } else {
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/profile/ProfileTreeBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/common/profile/ProfileTreeBuilder.java
index 24782fcc45..d810e02fd1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/profile/ProfileTreeBuilder.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/profile/ProfileTreeBuilder.java
@@ -50,7 +50,9 @@ import java.util.regex.Pattern;
 public class ProfileTreeBuilder {
 
     private static final String PROFILE_NAME_DATA_STREAM_SENDER = "DataStreamSender";
+    private static final String PROFILE_NAME_VDATA_STREAM_SENDER = "VDataStreamSender";
     private static final String PROFILE_NAME_DATA_BUFFER_SENDER = "DataBufferSender";
+    private static final String PROFILE_NAME_VDATA_BUFFER_SENDER = "VDataBufferSender";
     private static final String PROFILE_NAME_OLAP_TABLE_SINK = "OlapTableSink";
     private static final String PROFILE_NAME_BLOCK_MGR = "BlockMgr";
     private static final String PROFILE_NAME_BUFFER_POOL = "Buffer pool";
@@ -207,6 +209,8 @@ public class ProfileTreeBuilder {
         for (Pair<RuntimeProfile, Boolean> pair : instanceChildren) {
             RuntimeProfile profile = pair.first;
             if (profile.getName().startsWith(PROFILE_NAME_DATA_STREAM_SENDER)
+                    || profile.getName().startsWith(PROFILE_NAME_VDATA_STREAM_SENDER)
+                    || profile.getName().startsWith(PROFILE_NAME_VDATA_BUFFER_SENDER)
                     || profile.getName().startsWith(PROFILE_NAME_DATA_BUFFER_SENDER)
                     || profile.getName().startsWith(PROFILE_NAME_OLAP_TABLE_SINK)) {
                 senderNode = buildTreeNode(profile, null, fragmentId, instanceId);
@@ -297,6 +301,8 @@ public class ProfileTreeBuilder {
             return PROFILE_NAME_DATA_BUFFER_SENDER;
         } else if (name.startsWith(PROFILE_NAME_OLAP_TABLE_SINK)) {
             return PROFILE_NAME_OLAP_TABLE_SINK;
+        } else if (name.startsWith(PROFILE_NAME_VDATA_BUFFER_SENDER)) {
+            return PROFILE_NAME_VDATA_BUFFER_SENDER;
         } else {
             return null;
         }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/AnalyticPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/AnalyticPlanner.java
index 42420ad705..d26c203ffa 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/AnalyticPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/AnalyticPlanner.java
@@ -321,6 +321,7 @@ public class AnalyticPlanner {
                 }
                 // all output slots need to be materialized
                 sortSlotDesc.setIsMaterialized(true);
+                sortSlotDesc.setIsNullable(inputSlotDesc.getIsNullable());
                 sortSmap.put(new SlotRef(inputSlotDesc), new SlotRef(sortSlotDesc));
                 sortSlotExprs.add(new SlotRef(inputSlotDesc));
             }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ExchangeNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ExchangeNode.java
index f03527a82a..1c8669dadf 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/ExchangeNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ExchangeNode.java
@@ -51,6 +51,7 @@ public class ExchangeNode extends PlanNode {
     private static final Logger LOG = LogManager.getLogger(ExchangeNode.class);
 
     public static final String EXCHANGE_NODE = "EXCHANGE";
+    public static final String VEXCHANGE_NODE = "VEXCHANGE";
     public static final String MERGING_EXCHANGE_NODE = "MERGING-EXCHANGE";
 
     // The parameters based on which sorted input streams are merged by this
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
index 83d2ff50e3..2ee261874f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HashJoinNode.java
@@ -33,6 +33,7 @@ import org.apache.doris.catalog.Table;
 import org.apache.doris.common.CheckedMath;
 import org.apache.doris.common.Pair;
 import org.apache.doris.common.UserException;
+import org.apache.doris.common.util.VectorizedUtil;
 import org.apache.doris.thrift.TEqJoinCondition;
 import org.apache.doris.thrift.TExplainLevel;
 import org.apache.doris.thrift.THashJoinNode;
@@ -66,6 +67,10 @@ public class HashJoinNode extends PlanNode {
     private List<BinaryPredicate> eqJoinConjuncts = Lists.newArrayList();
     // join conjuncts from the JOIN clause that aren't equi-join predicates
     private List<Expr> otherJoinConjuncts;
+    // join conjunct from the JOIN clause that aren't equi-join predicates, only use in
+    // vec exec engine
+    private Expr votherJoinConjunct = null;
+
     private DistributionMode distrMode;
     private boolean isColocate = false; //the flag for colocate join
     private String colocateReason = ""; // if can not do colocate join, set reason here
@@ -76,15 +81,37 @@ public class HashJoinNode extends PlanNode {
         super(id, "HASH JOIN");
         Preconditions.checkArgument(eqJoinConjuncts != null && !eqJoinConjuncts.isEmpty());
         Preconditions.checkArgument(otherJoinConjuncts != null);
-        tupleIds.addAll(outer.getTupleIds());
-        tupleIds.addAll(inner.getTupleIds());
         tblRefIds.addAll(outer.getTblRefIds());
         tblRefIds.addAll(inner.getTblRefIds());
         this.innerRef = innerRef;
         this.joinOp = innerRef.getJoinOp();
+
+        // TODO: Support not vec exec engine cut unless tupleid in semi/anti join
+        if (VectorizedUtil.isVectorized()) {
+            if (joinOp.equals(JoinOperator.LEFT_ANTI_JOIN) || joinOp.equals(JoinOperator.LEFT_SEMI_JOIN)
+                    || joinOp.equals(JoinOperator.NULL_AWARE_LEFT_ANTI_JOIN)) {
+                tupleIds.addAll(outer.getTupleIds());
+            } else if (joinOp.equals(JoinOperator.RIGHT_ANTI_JOIN) || joinOp.equals(JoinOperator.RIGHT_SEMI_JOIN)) {
+                tupleIds.addAll(inner.getTupleIds());
+            } else {
+                tupleIds.addAll(outer.getTupleIds());
+                tupleIds.addAll(inner.getTupleIds());
+            }
+        } else {
+            tupleIds.addAll(outer.getTupleIds());
+            tupleIds.addAll(inner.getTupleIds());
+        }
+
         for (Expr eqJoinPredicate : eqJoinConjuncts) {
             Preconditions.checkArgument(eqJoinPredicate instanceof BinaryPredicate);
-            this.eqJoinConjuncts.add((BinaryPredicate) eqJoinPredicate);
+            BinaryPredicate eqJoin = (BinaryPredicate) eqJoinPredicate;
+            if (eqJoin.getOp().equals(BinaryPredicate.Operator.EQ_FOR_NULL)) {
+                Preconditions.checkArgument(eqJoin.getChildren().size() == 2);
+                if (!eqJoin.getChild(0).isNullable() || !eqJoin.getChild(1).isNullable()) {
+                    eqJoin.setOp(BinaryPredicate.Operator.EQ);
+                }
+            }
+            this.eqJoinConjuncts.add(eqJoin);
         }
         this.distrMode = DistributionMode.NONE;
         this.otherJoinConjuncts = otherJoinConjuncts;
@@ -575,6 +602,11 @@ public class HashJoinNode extends PlanNode {
         for (Expr e : otherJoinConjuncts) {
             msg.hash_join_node.addToOtherJoinConjuncts(e.treeToThrift());
         }
+
+        // use in vec exec engine to replace otherJoinConjuncts
+        if (votherJoinConjunct != null) {
+            msg.hash_join_node.setVotherJoinConjunct(votherJoinConjunct.treeToThrift());
+        }
     }
 
     @Override
@@ -635,4 +667,13 @@ public class HashJoinNode extends PlanNode {
             return description;
         }
     }
+
+    @Override
+    void convertToVectoriezd() {
+        if (!otherJoinConjuncts.isEmpty()) {
+            votherJoinConjunct = convertConjunctsToAndCompoundPredicate(otherJoinConjuncts);
+            initCompoundPredicate(votherJoinConjunct);
+        }
+        super.convertToVectoriezd();
+    }
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
index a1b1362c59..8a4a3597f5 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java
@@ -767,7 +767,6 @@ public class OlapScanNode extends ScanNode {
             msg.olap_scan_node.setSortColumn(sortColumn);
         }
         msg.olap_scan_node.setKeyType(olapTable.getKeysType().toThrift());
-        msg.olap_scan_node.setTableName(olapTable.getName());
     }
 
     // export some tablets
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
index 30b242c99f..a6003f304f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/PlanNode.java
@@ -301,7 +301,7 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
         return conjuncts;
     }
 
-    private void initCompoundPredicate(Expr expr) {
+    void initCompoundPredicate(Expr expr) {
         if (expr instanceof CompoundPredicate) {
             CompoundPredicate compoundPredicate = (CompoundPredicate) expr;
             compoundPredicate.setType(Type.BOOLEAN);
@@ -318,7 +318,7 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
         }
     }
 
-    private Expr convertConjunctsToAndCompoundPredicate() {
+    Expr convertConjunctsToAndCompoundPredicate(List<Expr> conjuncts) {
         List<Expr> targetConjuncts = Lists.newArrayList(conjuncts);
         while (targetConjuncts.size() > 1) {
             List<Expr> newTargetConjuncts = Lists.newArrayList();
@@ -357,6 +357,9 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
     }
 
     public void transferConjuncts(PlanNode recipient) {
+        recipient.vconjunct = vconjunct;
+        vconjunct = null;
+        
         recipient.conjuncts.addAll(conjuncts);
         conjuncts.clear();
     }
@@ -859,7 +862,7 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
 
     void convertToVectoriezd() {
         if (!conjuncts.isEmpty()) {
-            vconjunct = convertConjunctsToAndCompoundPredicate();
+            vconjunct = convertConjunctsToAndCompoundPredicate(conjuncts);
             initCompoundPredicate(vconjunct);
         }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java
index 00454b0730..f93ebdb53e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/Planner.java
@@ -30,6 +30,7 @@ import org.apache.doris.analysis.StorageBackend;
 import org.apache.doris.analysis.TupleDescriptor;
 import org.apache.doris.catalog.PrimitiveType;
 import org.apache.doris.catalog.ScalarType;
+import org.apache.doris.common.util.VectorizedUtil;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.profile.PlanTreeBuilder;
 import org.apache.doris.common.profile.PlanTreePrinter;
diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
index 250e3c3bbf..e76253a1df 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@@ -68,6 +68,7 @@ import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
 
+import org.apache.doris.common.util.VectorizedUtil;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
@@ -1069,6 +1070,16 @@ public class SingleNodePlanner {
                 && groupingInfo != null);
         root = new RepeatNode(ctx_.getNextNodeId(), root, groupingInfo, groupByClause);
         root.init(analyzer);
+        // set agg outtuple nullable
+        AggregateInfo aggInfo = selectStmt.getAggInfo();
+        TupleId aggOutTupleId = aggInfo.getOutputTupleId();
+        TupleDescriptor aggOutTupleDescriptor = analyzer.getDescTbl().getTupleDesc(aggOutTupleId);
+        int aggregateExprStartIndex = groupByClause.getGroupingExprs().size();
+        for (int i = 0; i < aggregateExprStartIndex; ++i) {
+            SlotDescriptor slot = aggOutTupleDescriptor.getSlots().get(i);
+            if (!slot.getIsNullable())
+                slot.setIsNullable(true);
+        }
         return root;
     }
 
@@ -1366,7 +1377,8 @@ public class SingleNodePlanner {
         // inline view's plan.
         ExprSubstitutionMap outputSmap = ExprSubstitutionMap.compose(
                 inlineViewRef.getSmap(), rootNode.getOutputSmap(), analyzer);
-        if (analyzer.isOuterJoined(inlineViewRef.getId())) {
+        // Vec exec engine not need the function of TupleIsNull, So here just skip wrap it
+        if (analyzer.isOuterJoined(inlineViewRef.getId()) && !VectorizedUtil.isVectorized()) {
             rootNode.setWithoutTupleIsNullOutputSmap(outputSmap);
             // Exprs against non-matched rows of an outer join should always return NULL.
             // Make the rhs exprs of the output smap nullable, if necessary. This expr wrapping
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
index 486861434c..c30e021757 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
@@ -472,7 +472,6 @@ public class Coordinator {
         }  else {
             // This is a load process.
             this.queryOptions.setIsReportSuccess(true);
-            this.queryOptions.setEnableVectorizedEngine(false);
             deltaUrls = Lists.newArrayList();
             loadCounters = Maps.newHashMap();
             List<Long> relatedBackendIds = Lists.newArrayList(addressToBackendID.values());
diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java
index 26ca3f793c..0bcbfb6e72 100755
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FEFunctions.java
@@ -350,12 +350,30 @@ public class FEFunctions {
      * Arithmetic function
      */
 
-    @FEFunction(name = "add", argTypes = { "BIGINT", "BIGINT" }, returnType = "BIGINT")
+    @FEFunction(name = "add", argTypes = { "TINYINT", "TINYINT" }, returnType = "SMALLINT")
+    public static IntLiteral addTinyint(LiteralExpr first, LiteralExpr second) throws AnalysisException {
+        long result = Math.addExact(first.getLongValue(), second.getLongValue());
+        return new IntLiteral(result, Type.SMALLINT);
+    }
+
+    @FEFunction(name = "add", argTypes = { "SMALLINT", "SMALLINT" }, returnType = "INT")
+    public static IntLiteral addSmallint(LiteralExpr first, LiteralExpr second) throws AnalysisException {
+        long result = Math.addExact(first.getLongValue(), second.getLongValue());
+        return new IntLiteral(result, Type.INT);
+    }
+
+    @FEFunction(name = "add", argTypes = { "INT", "INT" }, returnType = "BIGINT")
     public static IntLiteral addInt(LiteralExpr first, LiteralExpr second) throws AnalysisException {
         long result = Math.addExact(first.getLongValue(), second.getLongValue());
         return new IntLiteral(result, Type.BIGINT);
     }
 
+    @FEFunction(name = "add", argTypes = { "BIGINT", "BIGINT" }, returnType = "BIGINT")
+    public static IntLiteral addBigint(LiteralExpr first, LiteralExpr second) throws AnalysisException {
+        long result = Math.addExact(first.getLongValue(), second.getLongValue());
+        return new IntLiteral(result, Type.BIGINT);
+    }
+
     @FEFunction(name = "add", argTypes = { "DOUBLE", "DOUBLE" }, returnType = "DOUBLE")
     public static FloatLiteral addDouble(LiteralExpr first, LiteralExpr second) throws AnalysisException {
         double result = first.getDoubleValue() + second.getDoubleValue();
@@ -379,12 +397,30 @@ public class FEFunctions {
         return new LargeIntLiteral(result.toString());
     }
 
-    @FEFunction(name = "subtract", argTypes = { "BIGINT", "BIGINT" }, returnType = "BIGINT")
+    @FEFunction(name = "subtract", argTypes = { "TINYINT", "TINYINT" }, returnType = "SMALLINT")
+    public static IntLiteral subtractTinyint(LiteralExpr first, LiteralExpr second) throws AnalysisException {
+        long result = Math.subtractExact(first.getLongValue(), second.getLongValue());
+        return new IntLiteral(result, Type.SMALLINT);
+    }
+
+    @FEFunction(name = "subtract", argTypes = { "SMALLINT", "SMALLINT" }, returnType = "INT")
+    public static IntLiteral subtractSmallint(LiteralExpr first, LiteralExpr second) throws AnalysisException {
+        long result = Math.subtractExact(first.getLongValue(), second.getLongValue());
+        return new IntLiteral(result, Type.INT);
+    }
+
+    @FEFunction(name = "subtract", argTypes = { "INT", "INT" }, returnType = "BIGINT")
     public static IntLiteral subtractInt(LiteralExpr first, LiteralExpr second) throws AnalysisException {
         long result = Math.subtractExact(first.getLongValue(), second.getLongValue());
         return new IntLiteral(result, Type.BIGINT);
     }
 
+    @FEFunction(name = "subtract", argTypes = { "BIGINT", "BIGINT" }, returnType = "BIGINT")
+    public static IntLiteral subtractBigint(LiteralExpr first, LiteralExpr second) throws AnalysisException {
+        long result = Math.subtractExact(first.getLongValue(), second.getLongValue());
+        return new IntLiteral(result, Type.BIGINT);
+    }
+
     @FEFunction(name = "subtract", argTypes = { "DOUBLE", "DOUBLE" }, returnType = "DOUBLE")
     public static FloatLiteral subtractDouble(LiteralExpr first, LiteralExpr second) throws AnalysisException {
         double result = first.getDoubleValue() - second.getDoubleValue();
@@ -408,11 +444,27 @@ public class FEFunctions {
         return new LargeIntLiteral(result.toString());
     }
 
-    @FEFunction(name = "multiply", argTypes = { "BIGINT", "BIGINT" }, returnType = "BIGINT")
+    @FEFunction(name = "multiply", argTypes = { "TINYINT", "TINYINT" }, returnType = "SMALLINT")
+    public static IntLiteral multiplyTinyint(LiteralExpr first, LiteralExpr second) throws AnalysisException {
+        long result = Math.multiplyExact(first.getLongValue(), second.getLongValue());
+        return new IntLiteral(result, Type.SMALLINT);
+    }
+
+    @FEFunction(name = "multiply", argTypes = { "SMALLINT", "SMALLINT" }, returnType = "INT")
+    public static IntLiteral multiplySmallint(LiteralExpr first, LiteralExpr second) throws AnalysisException {
+        long result = Math.multiplyExact(first.getLongValue(), second.getLongValue());
+        return new IntLiteral(result, Type.INT);
+    }
+
+    @FEFunction(name = "multiply", argTypes = { "INT", "INT" }, returnType = "BIGINT")
     public static IntLiteral multiplyInt(LiteralExpr first, LiteralExpr second) throws AnalysisException {
-        long left = first.getLongValue();
-        long right = second.getLongValue();
-        long result = Math.multiplyExact(left, right);
+        long result = Math.multiplyExact(first.getLongValue(), second.getLongValue());
+        return new IntLiteral(result, Type.BIGINT);
+    }
+
+    @FEFunction(name = "multiply", argTypes = { "BIGINT", "BIGINT" }, returnType = "BIGINT")
+    public static IntLiteral multiplyBigint(LiteralExpr first, LiteralExpr second) throws AnalysisException {
+        long result = Math.multiplyExact(first.getLongValue(), second.getLongValue());
         return new IntLiteral(result, Type.BIGINT);
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
index 6569a735f3..8f5137f876 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/rewrite/FoldConstantsRule.java
@@ -32,6 +32,7 @@ import org.apache.doris.catalog.Type;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.LoadException;
 import org.apache.doris.common.util.TimeUtils;
+import org.apache.doris.common.util.VectorizedUtil;
 import org.apache.doris.proto.InternalService;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.qe.VariableMgr;
@@ -215,7 +216,7 @@ public class FoldConstantsRule implements ExprRewriteRule {
                 getInfoFnExpr(expr, infoFnMap);
                 return;
             }
-            constExprMap.put(expr.getId().toString(),expr.treeToThrift());
+            constExprMap.put(expr.getId().toString(), expr.treeToThrift());
             oriConstMap.put(expr.getId().toString(), expr);
         } else {
             recursiveGetChildrenConstExpr(expr, constExprMap, oriConstMap, analyzer, sysVarMap, infoFnMap);
@@ -353,7 +354,8 @@ public class FoldConstantsRule implements ExprRewriteRule {
             }
 
             TFoldConstantParams tParams = new TFoldConstantParams(map, queryGlobals);
-
+            tParams.setVecExec(VectorizedUtil.isVectorized());
+            
             Future<InternalService.PConstantExprResult> future = BackendServiceProxy.getInstance().foldConstantExpr(brpcAddress, tParams);
             InternalService.PConstantExprResult result = future.get(5, TimeUnit.SECONDS);
 
diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/CoordinatorTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/CoordinatorTest.java
index 3f7e923a88..fb86df6e33 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/qe/CoordinatorTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/qe/CoordinatorTest.java
@@ -19,6 +19,7 @@ package org.apache.doris.qe;
 
 import org.apache.doris.analysis.Analyzer;
 import org.apache.doris.analysis.BinaryPredicate;
+import org.apache.doris.analysis.BoolLiteral;
 import org.apache.doris.analysis.Expr;
 import org.apache.doris.analysis.TableRef;
 import org.apache.doris.analysis.TupleDescriptor;
@@ -162,11 +163,15 @@ public class CoordinatorTest extends Coordinator {
         tupleIdArrayList.add(testTupleId);
 
         ArrayList<Expr> testJoinexprs = new ArrayList<>();
-        BinaryPredicate binaryPredicate = new BinaryPredicate();
+
+        BinaryPredicate binaryPredicate = new BinaryPredicate(BinaryPredicate.Operator.EQ, new BoolLiteral(true),
+                        new BoolLiteral(true));
         testJoinexprs.add(binaryPredicate);
 
         HashJoinNode hashJoinNode = new HashJoinNode(testPlanNodeId, new EmptySetNode(testPlanNodeId, tupleIdArrayList),
-                new EmptySetNode(testPlanNodeId, tupleIdArrayList), new TableRef(), testJoinexprs, new ArrayList<>());
+                        new EmptySetNode(testPlanNodeId, tupleIdArrayList), new TableRef(), testJoinexprs,
+                        new ArrayList<>());
+
         hashJoinNode.setFragment(new PlanFragment(new PlanFragmentId(-1), hashJoinNode,
                 new DataPartition(TPartitionType.BUCKET_SHFFULE_HASH_PARTITIONED, testJoinexprs)));
 
diff --git a/gensrc/proto/palo_internal_service.proto b/gensrc/proto/palo_internal_service.proto
new file mode 100644
index 0000000000..07da2e37ea
--- /dev/null
+++ b/gensrc/proto/palo_internal_service.proto
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE(XXX): DEPRECATED, just use to compatiple with old version.
+// Make system can grayscale upgrade
+syntax="proto2";
+
+import "internal_service.proto";
+
+package palo;
+option java_package = "org.apache.doris.proto";
+
+option cc_generic_services = true;
+
+service PInternalService {
+    rpc transmit_data(doris.PTransmitDataParams) returns (doris.PTransmitDataResult);
+    rpc exec_plan_fragment(doris.PExecPlanFragmentRequest) returns (doris.PExecPlanFragmentResult);
+    rpc cancel_plan_fragment(doris.PCancelPlanFragmentRequest) returns (doris.PCancelPlanFragmentResult);
+    rpc fetch_data(doris.PFetchDataRequest) returns (doris.PFetchDataResult);
+    rpc tablet_writer_open(doris.PTabletWriterOpenRequest) returns (doris.PTabletWriterOpenResult);
+    rpc tablet_writer_add_batch(doris.PTabletWriterAddBatchRequest) returns (doris.PTabletWriterAddBatchResult);
+    rpc tablet_writer_cancel(doris.PTabletWriterCancelRequest) returns (doris.PTabletWriterCancelResult);
+    rpc get_info(doris.PProxyRequest) returns (doris.PProxyResult);
+    rpc update_cache(doris.PUpdateCacheRequest) returns (doris.PCacheResponse);
+    rpc fetch_cache(doris.PFetchCacheRequest) returns (doris.PFetchCacheResult);
+    rpc clear_cache(doris.PClearCacheRequest) returns (doris.PCacheResponse);
+
+    rpc merge_filter(doris.PMergeFilterRequest) returns (doris.PMergeFilterResponse);
+    rpc apply_filter(doris.PPublishFilterRequest) returns (doris.PPublishFilterResponse);
+    rpc fold_constant_expr(doris.PConstantExprRequest) returns (doris.PConstantExprResult);
+    rpc transmit_block(doris.PTransmitDataParams) returns (doris.PTransmitDataResult);
+};
diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py
index f9d3fbe828..ae7dc1169c 100755
--- a/gensrc/script/doris_builtins_functions.py
+++ b/gensrc/script/doris_builtins_functions.py
@@ -44,67 +44,67 @@ visible_functions = [
     # For functions corresponding to builtin operators, we can reuse the implementations
     [['bitand'], 'TINYINT', ['TINYINT', 'TINYINT'],
         '_ZN5doris9Operators32bitand_tiny_int_val_tiny_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_10TinyIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_10TinyIntValES6_', '', '', 'vec', ''],
     [['bitand'], 'SMALLINT', ['SMALLINT', 'SMALLINT'],
         '_ZN5doris9Operators34bitand_small_int_val_small_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_11SmallIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_11SmallIntValES6_', '', '', 'vec', ''],
     [['bitand'], 'INT', ['INT', 'INT'],
         '_ZN5doris9Operators22bitand_int_val_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_6IntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_6IntValES6_', '', '', 'vec', ''],
     [['bitand'], 'BIGINT', ['BIGINT', 'BIGINT'],
         '_ZN5doris9Operators30bitand_big_int_val_big_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_9BigIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_9BigIntValES6_', '', '', 'vec', ''],
     [['bitand'], 'LARGEINT', ['LARGEINT', 'LARGEINT'],
         '_ZN5doris9Operators34bitand_large_int_val_large_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_11LargeIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_11LargeIntValES6_', '', '', 'vec', ''],
 
     [['bitor'], 'TINYINT', ['TINYINT', 'TINYINT'],
         '_ZN5doris9Operators31bitor_tiny_int_val_tiny_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_10TinyIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_10TinyIntValES6_', '', '', 'vec', ''],
     [['bitor'], 'SMALLINT', ['SMALLINT', 'SMALLINT'],
         '_ZN5doris9Operators33bitor_small_int_val_small_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_11SmallIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_11SmallIntValES6_', '', '', 'vec', ''],
     [['bitor'], 'INT', ['INT', 'INT'],
         '_ZN5doris9Operators21bitor_int_val_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_6IntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_6IntValES6_', '', '', 'vec', ''],
     [['bitor'], 'BIGINT', ['BIGINT', 'BIGINT'],
         '_ZN5doris9Operators29bitor_big_int_val_big_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_9BigIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_9BigIntValES6_', '', '', 'vec', ''],
     [['bitor'], 'LARGEINT', ['LARGEINT', 'LARGEINT'],
         '_ZN5doris9Operators33bitor_large_int_val_large_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_11LargeIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_11LargeIntValES6_', '', '', 'vec', ''],
 
     [['bitxor'], 'TINYINT', ['TINYINT', 'TINYINT'],
         '_ZN5doris9Operators32bitxor_tiny_int_val_tiny_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_10TinyIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_10TinyIntValES6_', '', '', 'vec', ''],
     [['bitxor'], 'SMALLINT', ['SMALLINT', 'SMALLINT'],
         '_ZN5doris9Operators34bitxor_small_int_val_small_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_11SmallIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_11SmallIntValES6_', '', '', 'vec', ''],
     [['bitxor'], 'INT', ['INT', 'INT'],
         '_ZN5doris9Operators22bitxor_int_val_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_6IntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_6IntValES6_', '', '', 'vec', ''],
     [['bitxor'], 'BIGINT', ['BIGINT', 'BIGINT'],
         '_ZN5doris9Operators30bitxor_big_int_val_big_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_9BigIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_9BigIntValES6_', '', '', 'vec', ''],
     [['bitxor'], 'LARGEINT', ['LARGEINT', 'LARGEINT'],
         '_ZN5doris9Operators34bitxor_large_int_val_large_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_11LargeIntValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_11LargeIntValES6_', '', '', 'vec', ''],
 
     [['bitnot'], 'TINYINT', ['TINYINT'],
         '_ZN5doris9Operators19bitnot_tiny_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_10TinyIntValE', '', '', '', ''],
+        '15FunctionContextERKNS1_10TinyIntValE', '', '', 'vec', ''],
     [['bitnot'], 'SMALLINT', ['SMALLINT'],
         '_ZN5doris9Operators20bitnot_small_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_11SmallIntValE', '', '', '', ''],
+        '15FunctionContextERKNS1_11SmallIntValE', '', '', 'vec', ''],
     [['bitnot'], 'INT', ['INT'],
         '_ZN5doris9Operators14bitnot_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_6IntValE', '', '', '', ''],
+        '15FunctionContextERKNS1_6IntValE', '', '', 'vec', ''],
     [['bitnot'], 'BIGINT', ['BIGINT'],
         '_ZN5doris9Operators18bitnot_big_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_9BigIntValE', '', '', '', ''],
+        '15FunctionContextERKNS1_9BigIntValE', '', '', 'vec', ''],
     [['bitnot'], 'LARGEINT', ['LARGEINT'],
         '_ZN5doris9Operators20bitnot_large_int_valEPN9doris_udf'
-        '15FunctionContextERKNS1_11LargeIntValE', '', '', '', ''],
+        '15FunctionContextERKNS1_11LargeIntValE', '', '', 'vec', ''],
 
     # array functions
     [['array'], 'ARRAY', ['INT', '...'],
@@ -124,29 +124,29 @@ visible_functions = [
     # Timestamp functions
     [['unix_timestamp'], 'INT', [],
         '_ZN5doris18TimestampFunctions7to_unixEPN9doris_udf15FunctionContextE',
-        '', '', '', 'ALWAYS_NOT_NULLABLE'],
+        '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['unix_timestamp'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions7to_unixEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['unix_timestamp'], 'INT', ['DATE'],
         '_ZN5doris18TimestampFunctions7to_unixEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['unix_timestamp'], 'INT', ['VARCHAR', 'VARCHAR'],
         '_ZN5doris18TimestampFunctions7to_unixEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
-        '', '', '', 'ALWAYS_NULLABLE'],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['unix_timestamp'], 'INT', ['STRING', 'STRING'],
         '_ZN5doris18TimestampFunctions7to_unixEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
-        '', '', '', 'ALWAYS_NULLABLE'],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['from_unixtime'], 'VARCHAR', ['INT'],
         '_ZN5doris18TimestampFunctions9from_unixEPN9doris_udf15FunctionContextERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['from_unixtime'], 'VARCHAR', ['INT', 'VARCHAR'],
         '_ZN5doris18TimestampFunctions9from_unixEPN9doris_udf'
         '15FunctionContextERKNS1_6IntValERKNS1_9StringValE',
         '_ZN5doris18TimestampFunctions14format_prepareEPN9doris_udf'
         '15FunctionContextENS2_18FunctionStateScopeE',
         '_ZN5doris18TimestampFunctions12format_closeEPN9doris_udf'
-        '15FunctionContextENS2_18FunctionStateScopeE', 'vec', ''],
+        '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NULLABLE'],
     [['from_unixtime'], 'VARCHAR', ['INT', 'STRING'],
         '_ZN5doris18TimestampFunctions9from_unixEPN9doris_udf'
         '15FunctionContextERKNS1_6IntValERKNS1_9StringValE',
@@ -156,132 +156,132 @@ visible_functions = [
         '15FunctionContextENS2_18FunctionStateScopeE', 'vec', ''],
     [['now', 'current_timestamp', 'localtime', 'localtimestamp'], 'DATETIME', [],
         '_ZN5doris18TimestampFunctions3nowEPN9doris_udf15FunctionContextE',
-        '', '', '', 'ALWAYS_NOT_NULLABLE'],
+        '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['curtime', 'current_time'], 'TIME', [],
         '_ZN5doris18TimestampFunctions7curtimeEPN9doris_udf15FunctionContextE',
-        '', '', '', 'ALWAYS_NOT_NULLABLE'],
+        '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['curdate', 'current_date'], 'DATE', [],
         '_ZN5doris18TimestampFunctions7curdateEPN9doris_udf15FunctionContextE',
-        '', '', '', 'ALWAYS_NOT_NULLABLE'],
+        '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['utc_timestamp'], 'DATETIME', [],
         '_ZN5doris18TimestampFunctions13utc_timestampEPN9doris_udf15FunctionContextE',
-        '', '', '', 'ALWAYS_NOT_NULLABLE'],
+        '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['timestamp'], 'DATETIME', ['DATETIME'],
         '_ZN5doris18TimestampFunctions9timestampEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
 
     [['from_days'], 'DATE', ['INT'],
         '_ZN5doris18TimestampFunctions9from_daysEPN9doris_udf15FunctionContextERKNS1_6IntValE',
-        '', '', '', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['to_days'], 'INT', ['DATE'],
         '_ZN5doris18TimestampFunctions7to_daysEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
 
-    [['year'], 'INT', ['DATETIME'],
+     [['year'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions4yearEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['month'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions5monthEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['quarter'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions7quarterEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['dayofweek'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions11day_of_weekEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
         '', '', 'vec', ''],
     [['weekday'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions8week_dayEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['day', 'dayofmonth'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions12day_of_monthEPN9doris_udf'
-        '15FunctionContextERKNS1_11DateTimeValE', '', '', 'vec', ''],
+        '15FunctionContextERKNS1_11DateTimeValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['dayofyear'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions11day_of_yearEPN9doris_udf'
-        '15FunctionContextERKNS1_11DateTimeValE', '', '', 'vec', ''],
+        '15FunctionContextERKNS1_11DateTimeValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['weekofyear'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions12week_of_yearEPN9doris_udf'
-        '15FunctionContextERKNS1_11DateTimeValE', '', '', 'vec', ''],
+        '15FunctionContextERKNS1_11DateTimeValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['yearweek'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions9year_weekEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', '', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['yearweek'], 'INT', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions9year_weekEPN9doris_udf15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
         '', '', '', ''],
     [['week'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions4weekEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', '', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['week'], 'INT', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions4weekEPN9doris_udf15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', '', ''],
+        '', '', '', 'ALWAYS_NULLABLE'],
     [['hour'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions4hourEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['minute'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions6minuteEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['second'], 'INT', ['DATETIME'],
         '_ZN5doris18TimestampFunctions6secondEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
 
     [['makedate'], 'DATETIME', ['INT', 'INT'],
         '_ZN5doris18TimestampFunctions9make_dateEPN9doris_udf15FunctionContextERKNS1_6IntValES6_',
-        '', '', '', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['years_add'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions9years_addEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['years_sub'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions9years_subEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['months_add', 'add_months'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions10months_addEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['months_sub'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions10months_subEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['weeks_add'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions9weeks_addEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['weeks_sub'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions9weeks_subEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['days_add', 'date_add', 'adddate'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions8days_addEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['days_sub', 'date_sub', 'subdate'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions8days_subEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['hours_add'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions9hours_addEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['hours_sub'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions9hours_subEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['minutes_add'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions11minutes_addEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['minutes_sub'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions11minutes_subEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['seconds_add'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions11seconds_addEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['seconds_sub'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions11seconds_subEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['microseconds_add'], 'DATETIME', ['DATETIME', 'INT'],
         '_ZN5doris18TimestampFunctions10micros_addEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_6IntValE',
@@ -293,10 +293,10 @@ visible_functions = [
 
     [['datediff'], 'INT', ['DATETIME', 'DATETIME'],
         '_ZN5doris18TimestampFunctions9date_diffEPN9doris_udf'
-        '15FunctionContextERKNS1_11DateTimeValES6_', '', '', 'vec', ''],
+        '15FunctionContextERKNS1_11DateTimeValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['timediff'], 'TIME', ['DATETIME', 'DATETIME'],
             '_ZN5doris18TimestampFunctions9time_diffEPN9doris_udf'
-            '15FunctionContextERKNS1_11DateTimeValES6_', '', '', 'vec', ''],
+            '15FunctionContextERKNS1_11DateTimeValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'],
 
     [['str_to_date'], 'DATETIME', ['VARCHAR', 'VARCHAR'],
         '_ZN5doris18TimestampFunctions11str_to_dateEPN9doris_udf'
@@ -310,71 +310,52 @@ visible_functions = [
         '_ZN5doris18TimestampFunctions14format_prepareEPN9doris_udf'
         '15FunctionContextENS2_18FunctionStateScopeE',
         '_ZN5doris18TimestampFunctions12format_closeEPN9doris_udf'
-        '15FunctionContextENS2_18FunctionStateScopeE', 'vec', ''],
+        '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NULLABLE'],
     [['date_format'], 'VARCHAR', ['DATE', 'VARCHAR'],
         '_ZN5doris18TimestampFunctions11date_formatEPN9doris_udf'
         '15FunctionContextERKNS1_11DateTimeValERKNS1_9StringValE',
         '_ZN5doris18TimestampFunctions14format_prepareEPN9doris_udf'
         '15FunctionContextENS2_18FunctionStateScopeE',
         '_ZN5doris18TimestampFunctions12format_closeEPN9doris_udf'
-        '15FunctionContextENS2_18FunctionStateScopeE', 'vec', ''],
-    [['date_format'], 'VARCHAR', ['DATETIME', 'STRING'],
-        '_ZN5doris18TimestampFunctions11date_formatEPN9doris_udf'
-        '15FunctionContextERKNS1_11DateTimeValERKNS1_9StringValE',
-        '_ZN5doris18TimestampFunctions14format_prepareEPN9doris_udf'
-        '15FunctionContextENS2_18FunctionStateScopeE',
-        '_ZN5doris18TimestampFunctions12format_closeEPN9doris_udf'
-        '15FunctionContextENS2_18FunctionStateScopeE', 'vec', ''],
-    [['date_format'], 'VARCHAR', ['DATE', 'STRING'],
-        '_ZN5doris18TimestampFunctions11date_formatEPN9doris_udf'
-        '15FunctionContextERKNS1_11DateTimeValERKNS1_9StringValE',
-        '_ZN5doris18TimestampFunctions14format_prepareEPN9doris_udf'
-        '15FunctionContextENS2_18FunctionStateScopeE',
-        '_ZN5doris18TimestampFunctions12format_closeEPN9doris_udf'
-        '15FunctionContextENS2_18FunctionStateScopeE', 'vec', ''],
+        '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NULLABLE'],
     [['date', 'to_date'], 'DATE', ['DATETIME'],
         '_ZN5doris18TimestampFunctions7to_dateEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
-        '', '', 'vec', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
 
     [['dayname'], 'VARCHAR', ['DATETIME'],
         '_ZN5doris18TimestampFunctions8day_nameEPN9doris_udf'
-        '15FunctionContextERKNS1_11DateTimeValE', '', '', 'vec', ''],
+        '15FunctionContextERKNS1_11DateTimeValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['monthname'], 'VARCHAR', ['DATETIME'],
         '_ZN5doris18TimestampFunctions10month_nameEPN9doris_udf'
-        '15FunctionContextERKNS1_11DateTimeValE', '', '', 'vec', ''],
+        '15FunctionContextERKNS1_11DateTimeValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
 
     [['convert_tz'], 'DATETIME', ['DATETIME', 'VARCHAR', 'VARCHAR'],
             '_ZN5doris18TimestampFunctions10convert_tzEPN9doris_udf15FunctionContextERKNS1_11DateTimeValERKNS1_9StringValES9_',
             '_ZN5doris18TimestampFunctions18convert_tz_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
             '_ZN5doris18TimestampFunctions16convert_tz_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
             'vec', 'ALWAYS_NULLABLE'],
-    [['convert_tz'], 'DATETIME', ['DATETIME', 'STRING', 'STRING'],
-            '_ZN5doris18TimestampFunctions10convert_tzEPN9doris_udf15FunctionContextERKNS1_11DateTimeValERKNS1_9StringValES9_',
-            '_ZN5doris18TimestampFunctions18convert_tz_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
-            '_ZN5doris18TimestampFunctions16convert_tz_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
-            'vec', 'ALWAYS_NULLABLE'],
 
     [['years_diff'], 'BIGINT', ['DATETIME', 'DATETIME'],
             '_ZN5doris18TimestampFunctions10years_diffEPN9doris_udf15FunctionContextERKNS1_11DateTimeValES6_',
-            '', '', 'vec', ''],
+            '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['months_diff'], 'BIGINT', ['DATETIME', 'DATETIME'],
             '_ZN5doris18TimestampFunctions11months_diffEPN9doris_udf15FunctionContextERKNS1_11DateTimeValES6_',
-            '', '', 'vec', ''],
+            '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['weeks_diff'], 'BIGINT', ['DATETIME', 'DATETIME'],
             '_ZN5doris18TimestampFunctions10weeks_diffEPN9doris_udf15FunctionContextERKNS1_11DateTimeValES6_',
-            '', '', 'vec', ''],
+            '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['days_diff'], 'BIGINT', ['DATETIME', 'DATETIME'],
             '_ZN5doris18TimestampFunctions9days_diffEPN9doris_udf15FunctionContextERKNS1_11DateTimeValES6_',
-            '', '', 'vec', ''],
+            '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['hours_diff'], 'BIGINT', ['DATETIME', 'DATETIME'],
             '_ZN5doris18TimestampFunctions10hours_diffEPN9doris_udf15FunctionContextERKNS1_11DateTimeValES6_',
-            '', '', 'vec', ''],
+            '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['minutes_diff'], 'BIGINT', ['DATETIME', 'DATETIME'],
             '_ZN5doris18TimestampFunctions12minutes_diffEPN9doris_udf15FunctionContextERKNS1_11DateTimeValES6_',
-            '', '', 'vec', ''],
+            '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['seconds_diff'], 'BIGINT', ['DATETIME', 'DATETIME'],
             '_ZN5doris18TimestampFunctions12seconds_diffEPN9doris_udf15FunctionContextERKNS1_11DateTimeValES6_',
-            '', '', 'vec', ''],
+            '', '', 'vec', 'ALWAYS_NULLABLE'],
 
     [['year_floor'], 'DATETIME', ['DATETIME'],
             '_ZN5doris18TimestampFunctions10year_floorEPN9doris_udf15FunctionContextERKNS1_11DateTimeValE',
@@ -547,9 +528,9 @@ visible_functions = [
 
     # Math builtin functions
     [['pi'], 'DOUBLE', [],
-        '_ZN5doris13MathFunctions2piEPN9doris_udf15FunctionContextE', '', '', 'vec', 'ALWAYS_NULLABLE'],
+        '_ZN5doris13MathFunctions2piEPN9doris_udf15FunctionContextE', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['e'], 'DOUBLE', [],
-        '_ZN5doris13MathFunctions1eEPN9doris_udf15FunctionContextE', '', '', 'vec', 'ALWAYS_NULLABLE'],
+        '_ZN5doris13MathFunctions1eEPN9doris_udf15FunctionContextE', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
 
     [['abs'], 'DOUBLE', ['DOUBLE'],
         '_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_9DoubleValE', '', '', 'vec', ''],
@@ -568,7 +549,7 @@ visible_functions = [
     [['abs'], 'DECIMALV2', ['DECIMALV2'],
         '_ZN5doris13MathFunctions3absEPN9doris_udf15FunctionContextERKNS1_12DecimalV2ValE', '', '', 'vec', ''],
 
-    [['sign'], 'FLOAT', ['DOUBLE'],
+    [['sign'], 'TINYINT', ['DOUBLE'],
         '_ZN5doris13MathFunctions4signEPN9doris_udf15FunctionContextERKNS1_9DoubleValE', '', '', 'vec', ''],
 
     [['sin'], 'DOUBLE', ['DOUBLE'],
@@ -589,10 +570,10 @@ visible_functions = [
     [['floor', 'dfloor'], 'BIGINT', ['DOUBLE'],
             '_ZN5doris13MathFunctions5floorEPN9doris_udf15FunctionContextERKNS1_9DoubleValE', '', '', 'vec', ''],
     [['round', 'dround'], 'BIGINT', ['DOUBLE'],
-            '_ZN5doris13MathFunctions5roundEPN9doris_udf15FunctionContextERKNS1_9DoubleValE', '', '', '', ''],
+            '_ZN5doris13MathFunctions5roundEPN9doris_udf15FunctionContextERKNS1_9DoubleValE', '', '', 'vec', ''],
     [['round', 'dround'], 'DOUBLE', ['DOUBLE', 'INT'],
             '_ZN5doris13MathFunctions11round_up_toEPN9doris_udf'
-            '15FunctionContextERKNS1_9DoubleValERKNS1_6IntValE', '', '', '', ''],
+            '15FunctionContextERKNS1_9DoubleValERKNS1_6IntValE', '', '', 'vec', ''],
     [['truncate'], 'DOUBLE', ['DOUBLE', 'INT'],
             '_ZN5doris13MathFunctions8truncateEPN9doris_udf'
             '15FunctionContextERKNS1_9DoubleValERKNS1_6IntValE', '', '', 'vec', ''],
@@ -623,36 +604,36 @@ visible_functions = [
             '_ZN5doris13MathFunctions12rand_prepareEPN9doris_udf'
             '15FunctionContextENS2_18FunctionStateScopeE',
             '_ZN5doris13MathFunctions10rand_closeEPN9doris_udf'
-            '15FunctionContextENS2_18FunctionStateScopeE', '', 'ALWAYS_NOT_NULLABLE'],
+            '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['rand', 'random'], 'DOUBLE', ['BIGINT'],
             '_ZN5doris13MathFunctions9rand_seedEPN9doris_udf15FunctionContextERKNS1_9BigIntValE',
             '_ZN5doris13MathFunctions12rand_prepareEPN9doris_udf'
             '15FunctionContextENS2_18FunctionStateScopeE',
             '_ZN5doris13MathFunctions10rand_closeEPN9doris_udf'
-            '15FunctionContextENS2_18FunctionStateScopeE', '', ''],
+            '15FunctionContextENS2_18FunctionStateScopeE', 'vec', ''],
 
     [['bin'], 'VARCHAR', ['BIGINT'],
-            '_ZN5doris13MathFunctions3binEPN9doris_udf15FunctionContextERKNS1_9BigIntValE', '', '', '', ''],
+            '_ZN5doris13MathFunctions3binEPN9doris_udf15FunctionContextERKNS1_9BigIntValE', '', '', 'vec', ''],
     [['hex'], 'VARCHAR', ['BIGINT'],
-            '_ZN5doris13MathFunctions7hex_intEPN9doris_udf15FunctionContextERKNS1_9BigIntValE', '', '', '', ''],
+            '_ZN5doris13MathFunctions7hex_intEPN9doris_udf15FunctionContextERKNS1_9BigIntValE', '', '', 'vec', ''],
     [['hex'], 'VARCHAR', ['VARCHAR'],
-            '_ZN5doris13MathFunctions10hex_stringEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', '', ''],
+            '_ZN5doris13MathFunctions10hex_stringEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''],
     [['hex'], 'STRING', ['STRING'],
-            '_ZN5doris13MathFunctions10hex_stringEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', '', ''],
+            '_ZN5doris13MathFunctions10hex_stringEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''],
     [['unhex'], 'VARCHAR', ['VARCHAR'],
-            '_ZN5doris13MathFunctions5unhexEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', '', ''],
+            '_ZN5doris13MathFunctions5unhexEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['unhex'], 'STRING', ['STRING'],
-            '_ZN5doris13MathFunctions5unhexEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', '', ''],
+            '_ZN5doris13MathFunctions5unhexEPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
 
     [['conv'], 'VARCHAR', ['BIGINT', 'TINYINT', 'TINYINT'],
             '_ZN5doris13MathFunctions8conv_intEPN9doris_udf'
-            '15FunctionContextERKNS1_9BigIntValERKNS1_10TinyIntValES9_', '', '', '', ''],
+            '15FunctionContextERKNS1_9BigIntValERKNS1_10TinyIntValES9_', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['conv'], 'VARCHAR', ['VARCHAR', 'TINYINT', 'TINYINT'],
             '_ZN5doris13MathFunctions11conv_stringEPN9doris_udf'
-            '15FunctionContextERKNS1_9StringValERKNS1_10TinyIntValES9_', '', '', '', ''],
+            '15FunctionContextERKNS1_9StringValERKNS1_10TinyIntValES9_', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['conv'], 'VARCHAR', ['STRING', 'TINYINT', 'TINYINT'],
             '_ZN5doris13MathFunctions11conv_stringEPN9doris_udf'
-            '15FunctionContextERKNS1_9StringValERKNS1_10TinyIntValES9_', '', '', '', ''],
+            '15FunctionContextERKNS1_9StringValERKNS1_10TinyIntValES9_', '', '', 'vec', 'ALWAYS_NULLABLE'],
 
     [['pmod'], 'BIGINT', ['BIGINT', 'BIGINT'],
             '_ZN5doris13MathFunctions11pmod_bigintEPN9doris_udf'
@@ -824,21 +805,21 @@ visible_functions = [
     [['ifnull'], 'VARCHAR', ['VARCHAR', 'VARCHAR'], '', '', '', 'vec', 'CUSTOM'],
     [['ifnull'], 'STRING', ['STRING', 'STRING'], '', '', '', 'vec', 'CUSTOM'],
 
-    [['coalesce'], 'BOOLEAN', ['BOOLEAN', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'TINYINT', ['TINYINT', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'SMALLINT', ['SMALLINT', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'INT', ['INT', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'BIGINT', ['BIGINT', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'LARGEINT', ['LARGEINT', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'FLOAT', ['FLOAT', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'DOUBLE', ['DOUBLE', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'DATETIME', ['DATETIME', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'DATE', ['DATE', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'DECIMALV2', ['DECIMALV2', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'BITMAP', ['BITMAP', '...'], '', '', '', '', 'CUSTOM'],
+    [['coalesce'], 'BOOLEAN', ['BOOLEAN', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'TINYINT', ['TINYINT', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'SMALLINT', ['SMALLINT', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'INT', ['INT', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'BIGINT', ['BIGINT', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'LARGEINT', ['LARGEINT', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'FLOAT', ['FLOAT', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'DOUBLE', ['DOUBLE', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'DATETIME', ['DATETIME', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'DATE', ['DATE', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'DECIMALV2', ['DECIMALV2', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'BITMAP', ['BITMAP', '...'], '', '', '', 'vec', 'CUSTOM'],
     # The priority of varchar should be lower than decimal in IS_SUPERTYPE_OF mode.
-    [['coalesce'], 'VARCHAR', ['VARCHAR', '...'], '', '', '', '', 'CUSTOM'],
-    [['coalesce'], 'STRING', ['STRING', '...'], '', '', '', '', 'CUSTOM'],
+    [['coalesce'], 'VARCHAR', ['VARCHAR', '...'], '', '', '', 'vec', 'CUSTOM'],
+    [['coalesce'], 'STRING', ['STRING', '...'], '', '', '', 'vec', 'CUSTOM'],
 
     [['esquery'], 'BOOLEAN', ['VARCHAR', 'VARCHAR'],
         '_ZN5doris11ESFunctions5matchEPN'
@@ -847,16 +828,16 @@ visible_functions = [
     # String builtin functions
     [['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT'],
         '_ZN5doris15StringFunctions9substringEPN'
-        '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', '', 'ALWAYS_NULLABLE'],
+        '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT', 'INT'],
         '_ZN5doris15StringFunctions9substringEPN'
         '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValES9_', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['strleft', 'left'], 'VARCHAR', ['VARCHAR', 'INT'],
         '_ZN5doris15StringFunctions4leftEPN9doris_udf'
-        '15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', 'vec', ''],
+        '15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['strright', 'right'], 'VARCHAR', ['VARCHAR', 'INT'],
         '_ZN5doris15StringFunctions5rightEPN9doris_udf'
-        '15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', 'vec', ''],
+        '15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['ends_with'], 'BOOLEAN', ['VARCHAR', 'VARCHAR'],
         '_ZN5doris15StringFunctions9ends_withEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
         '', '', 'vec', ''],
@@ -942,34 +923,34 @@ visible_functions = [
             '_ZN5doris15StringFunctions17parse_url_prepareEPN9doris_udf'
             '15FunctionContextENS2_18FunctionStateScopeE',
             '_ZN5doris15StringFunctions15parse_url_closeEPN9doris_udf'
-            '15FunctionContextENS2_18FunctionStateScopeE', '', ''],
+            '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NULLABLE'],
     [['parse_url'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR'],
             '_ZN5doris15StringFunctions13parse_url_keyEPN9doris_udf'
             '15FunctionContextERKNS1_9StringValES6_S6_',
             '_ZN5doris15StringFunctions17parse_url_prepareEPN9doris_udf'
             '15FunctionContextENS2_18FunctionStateScopeE',
             '_ZN5doris15StringFunctions15parse_url_closeEPN9doris_udf'
-            '15FunctionContextENS2_18FunctionStateScopeE', '', ''],
+            '15FunctionContextENS2_18FunctionStateScopeE', 'vec', 'ALWAYS_NULLABLE'],
     [['money_format'], 'VARCHAR', ['BIGINT'],
         '_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_9BigIntValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['money_format'], 'VARCHAR', ['LARGEINT'],
         '_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_11LargeIntValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['money_format'], 'VARCHAR', ['DOUBLE'],
         '_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_9DoubleValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['money_format'], 'VARCHAR', ['DECIMALV2'],
         '_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_12DecimalV2ValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['split_part'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'INT'],
         '_ZN5doris15StringFunctions10split_partEPN9doris_udf15FunctionContextERKNS1_9StringValES6_RKNS1_6IntValE',
-        '', '', '', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
 
     # Longtext function
     [['substr', 'substring'], 'STRING', ['STRING', 'INT'],
         '_ZN5doris15StringFunctions9substringEPN'
-        '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', '', 'ALWAYS_NULLABLE'],
+        '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['substr', 'substring'], 'STRING', ['STRING', 'INT', 'INT'],
         '_ZN5doris15StringFunctions9substringEPN'
         '9doris_udf15FunctionContextERKNS1_9StringValERKNS1_6IntValES9_', '', '', 'vec', 'ALWAYS_NULLABLE'],
@@ -1064,69 +1045,69 @@ visible_functions = [
             '_ZN5doris15StringFunctions17parse_url_prepareEPN9doris_udf'
             '15FunctionContextENS2_18FunctionStateScopeE',
             '_ZN5doris15StringFunctions15parse_url_closeEPN9doris_udf'
-            '15FunctionContextENS2_18FunctionStateScopeE', '', ''],
+            '15FunctionContextENS2_18FunctionStateScopeE', 'vec', ''],
     [['parse_url'], 'STRING', ['STRING', 'STRING', 'STRING'],
             '_ZN5doris15StringFunctions13parse_url_keyEPN9doris_udf'
             '15FunctionContextERKNS1_9StringValES6_S6_',
             '_ZN5doris15StringFunctions17parse_url_prepareEPN9doris_udf'
             '15FunctionContextENS2_18FunctionStateScopeE',
             '_ZN5doris15StringFunctions15parse_url_closeEPN9doris_udf'
-            '15FunctionContextENS2_18FunctionStateScopeE', '', ''],
+            '15FunctionContextENS2_18FunctionStateScopeE', 'vec', ''],
     [['money_format'], 'STRING', ['BIGINT'],
         '_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_9BigIntValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['money_format'], 'STRING', ['LARGEINT'],
         '_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_11LargeIntValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['money_format'], 'STRING', ['DOUBLE'],
         '_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_9DoubleValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['money_format'], 'STRING', ['DECIMALV2'],
         '_ZN5doris15StringFunctions12money_formatEPN9doris_udf15FunctionContextERKNS1_12DecimalV2ValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['split_part'], 'STRING', ['STRING', 'STRING', 'INT'],
         '_ZN5doris15StringFunctions10split_partEPN9doris_udf15FunctionContextERKNS1_9StringValES6_RKNS1_6IntValE',
-        '', '', '', ''],
+        '', '', 'vec', 'ALWAYS_NULLABLE'],
 
     # Utility functions
     [['sleep'], 'BOOLEAN', ['INT'],
         '_ZN5doris16UtilityFunctions5sleepEPN9doris_udf15FunctionContextERKNS1_6IntValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['version'], 'VARCHAR', [],
         '_ZN5doris16UtilityFunctions7versionEPN9doris_udf15FunctionContextE',
-        '', '', '', 'ALWAYS_NOT_NULLABLE'],
+        '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
 
     # Json functions
     [['get_json_int'], 'INT', ['VARCHAR', 'VARCHAR'],
         '_ZN5doris13JsonFunctions12get_json_intEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
         '_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
         '_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
-        'vec', ''],
+        'vec', 'ALWAYS_NULLABLE'],
     [['get_json_double'], 'DOUBLE', ['VARCHAR', 'VARCHAR'],
         '_ZN5doris13JsonFunctions15get_json_doubleEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
         '_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
         '_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
-        'vec', ''],
+        'vec', 'ALWAYS_NULLABLE'],
     [['get_json_string'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],
         '_ZN5doris13JsonFunctions15get_json_stringEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
         '_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
         '_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
-        'vec', ''],
+     'vec', 'ALWAYS_NULLABLE'],
     [['get_json_int'], 'INT', ['STRING', 'STRING'],
         '_ZN5doris13JsonFunctions12get_json_intEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
         '_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
         '_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
-        'vec', ''],
+        'vec', 'ALWAYS_NULLABLE'],
     [['get_json_double'], 'DOUBLE', ['STRING', 'STRING'],
         '_ZN5doris13JsonFunctions15get_json_doubleEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
         '_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
         '_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
-        'vec', ''],
+        'vec', 'ALWAYS_NULLABLE'],
     [['get_json_string'], 'STRING', ['STRING', 'STRING'],
         '_ZN5doris13JsonFunctions15get_json_stringEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
         '_ZN5doris13JsonFunctions17json_path_prepareEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
         '_ZN5doris13JsonFunctions15json_path_closeEPN9doris_udf15FunctionContextENS2_18FunctionStateScopeE',
-        'vec', ''],
+        'vec', 'ALWAYS_NULLABLE'],
 
     [['json_array'], 'VARCHAR', ['VARCHAR', '...'],
             '_ZN5doris13JsonFunctions10json_arrayEPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
@@ -1195,7 +1176,7 @@ visible_functions = [
         '', '', '', ''],
     [['bitmap_xor_count'], 'BIGINT', ['BITMAP','BITMAP'],
         '_ZN5doris15BitmapFunctions16bitmap_xor_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['bitmap_not'], 'BITMAP', ['BITMAP','BITMAP'],
         '_ZN5doris15BitmapFunctions10bitmap_notEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
         '', '', 'vec', ''],
@@ -1231,7 +1212,7 @@ visible_functions = [
         '', '', 'vec', ''],
     [['bitmap_max'], 'BIGINT', ['BITMAP'],
         '_ZN5doris15BitmapFunctions10bitmap_maxEPN9doris_udf15FunctionContextERKNS1_9StringValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['bitmap_subset_in_range'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'],
         '_ZN5doris15BitmapFunctions22bitmap_subset_in_rangeEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_',
         '', '', 'vec', ''],
@@ -1246,10 +1227,10 @@ visible_functions = [
         '', '', '', ''],
     [['bitmap_or_count'], 'BIGINT', ['BITMAP','BITMAP','...'],
         '_ZN5doris15BitmapFunctions15bitmap_or_countEPN9doris_udf15FunctionContextERKNS1_9StringValEiPS5_',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['bitmap_or_count'], 'BIGINT', ['BITMAP','BITMAP'],
         '_ZN5doris15BitmapFunctions15bitmap_or_countEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['sub_bitmap'], 'BITMAP', ['BITMAP', 'BIGINT', 'BIGINT'],
         '_ZN5doris15BitmapFunctions10sub_bitmapEPN9doris_udf15FunctionContextERKNS1_9StringValERKNS1_9BigIntValES9_',
         '', '', 'vec', ''],
@@ -1257,18 +1238,18 @@ visible_functions = [
     # hash functions
     [['murmur_hash3_32'], 'INT', ['VARCHAR', '...'],
         '_ZN5doris13HashFunctions15murmur_hash3_32EPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
     [['murmur_hash3_32'], 'INT', ['STRING', '...'],
         '_ZN5doris13HashFunctions15murmur_hash3_32EPN9doris_udf15FunctionContextEiPKNS1_9StringValE',
-        '', '', '', ''],
+        '', '', 'vec', ''],
 
     # aes and base64 function
     [['aes_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],
         '_ZN5doris19EncryptionFunctions11aes_encryptEPN9doris_udf'
-        '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['aes_decrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR'],
         '_ZN5doris19EncryptionFunctions11aes_decryptEPN9doris_udf'
-        '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['aes_encrypt'], 'VARCHAR', ['VARCHAR', 'VARCHAR', 'VARCHAR', 'VARCHAR'],
         '_ZN5doris19EncryptionFunctions11aes_encryptEPN9doris_udf'
         '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
@@ -1289,16 +1270,13 @@ visible_functions = [
          '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
     [['from_base64'], 'VARCHAR', ['VARCHAR'],
         '_ZN5doris19EncryptionFunctions11from_base64EPN9doris_udf'
-        '15FunctionContextERKNS1_9StringValE', '', '', '', ''],
-    [['to_base64'], 'VARCHAR', ['VARCHAR'],
-        '_ZN5doris19EncryptionFunctions9to_base64EPN9doris_udf'
-        '15FunctionContextERKNS1_9StringValE', '', '', '', ''],
+        '15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['aes_encrypt'], 'STRING', ['STRING', 'STRING'],
         '_ZN5doris19EncryptionFunctions11aes_encryptEPN9doris_udf'
-        '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['aes_decrypt'], 'STRING', ['STRING', 'STRING'],
         '_ZN5doris19EncryptionFunctions11aes_decryptEPN9doris_udf'
-        '15FunctionContextERKNS1_9StringValES6_', '', '', '', ''],
+        '15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', ''],
     [['aes_encrypt'], 'STRING', ['STRING', 'STRING', 'STRING', 'STRING'],
         '_ZN5doris19EncryptionFunctions11aes_encryptEPN9doris_udf'
         '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
@@ -1319,19 +1297,23 @@ visible_functions = [
           '15FunctionContextERKNS1_9StringValES6_S6_S6_', '', '', '', ''],
     [['from_base64'], 'STRING', ['STRING'],
         '_ZN5doris19EncryptionFunctions11from_base64EPN9doris_udf'
-        '15FunctionContextERKNS1_9StringValE', '', '', '', ''],
+        '15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
     [['to_base64'], 'STRING', ['STRING'],
         '_ZN5doris19EncryptionFunctions9to_base64EPN9doris_udf'
-        '15FunctionContextERKNS1_9StringValE', '', '', '', ''],
+        '15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
+    [['to_base64'], 'VARCHAR', ['VARCHAR'],
+        '_ZN5doris19EncryptionFunctions9to_base64EPN9doris_udf'
+        '15FunctionContextERKNS1_9StringValE', '', '', 'vec', 'ALWAYS_NULLABLE'],
+
     # for compatable with MySQL
     [['md5'], 'VARCHAR', ['VARCHAR'],
-        '_ZN5doris19EncryptionFunctions3md5EPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', '', ''],
+        '_ZN5doris19EncryptionFunctions3md5EPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''],
     [['md5sum'], 'VARCHAR', ['VARCHAR', '...'],
-        '_ZN5doris19EncryptionFunctions6md5sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', '', ''],
+        '_ZN5doris19EncryptionFunctions6md5sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', 'vec', ''],
     [['md5'], 'VARCHAR', ['STRING'],
-        '_ZN5doris19EncryptionFunctions3md5EPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', '', ''],
+        '_ZN5doris19EncryptionFunctions3md5EPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', 'vec', ''],
     [['md5sum'], 'VARCHAR', ['STRING', '...'],
-        '_ZN5doris19EncryptionFunctions6md5sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', '', ''],
+        '_ZN5doris19EncryptionFunctions6md5sumEPN9doris_udf15FunctionContextEiPKNS1_9StringValE', '', '', 'vec', ''],
 
     [['sm3'], 'VARCHAR', ['VARCHAR'],
         '_ZN5doris19EncryptionFunctions3sm3EPN9doris_udf15FunctionContextERKNS1_9StringValE', '', '', '', ''],
@@ -1415,10 +1397,10 @@ visible_functions = [
     # grouping sets functions
     [['grouping_id'], 'BIGINT', ['BIGINT'],
         '_ZN5doris21GroupingSetsFunctions11grouping_idEPN9doris_udf15FunctionContextERKNS1_9BigIntValE',
-        '', '', '', 'ALWAYS_NOT_NULLABLE'],
+        '', '', 'vec', 'ALWAYS_NOT_NULLABLE'],
     [['grouping'], 'BIGINT', ['BIGINT'],
         '_ZN5doris21GroupingSetsFunctions8groupingEPN9doris_udf15FunctionContextERKNS1_9BigIntValE',
-        '' ,'', '', 'ALWAYS_NOT_NULLABLE'],
+        '' ,'', 'vec', 'ALWAYS_NOT_NULLABLE'],
 ]
 
 # Except the following functions, other function will directly return
@@ -1446,7 +1428,10 @@ nondeterministic_functions = [
     'rand',
     'now',
     'current_timestamp',
+    'localtime',
+    'localtimestamp',
     'curdate',
+    'current_date',
     'curtime',
     'current_time',
     'utc_timestamp'
diff --git a/gensrc/script/gen_build_version.sh b/gensrc/script/gen_build_version.sh
index 2937cdb9aa..b9a3f1aeb4 100755
--- a/gensrc/script/gen_build_version.sh
+++ b/gensrc/script/gen_build_version.sh
@@ -25,7 +25,7 @@
 # contains the build version based on the git hash or svn revision.
 ##############################################################
 
-build_version="trunk"
+build_version="pre-0.15.0"
 
 unset LANG
 unset LC_CTYPE
diff --git a/gensrc/script/gen_builtins_functions.py b/gensrc/script/gen_builtins_functions.py
index 95c12ff334..91a3876950 100755
--- a/gensrc/script/gen_builtins_functions.py
+++ b/gensrc/script/gen_builtins_functions.py
@@ -165,6 +165,11 @@ def generate_fe_registry_init(filename):
         java_registry_file.write("        nondeterministicFuncNames.add(\"%s\");\n" % entry)
     java_registry_file.write("        functionSet.buildNondeterministicFunctions(nondeterministicFuncNames);\n");
 
+    java_registry_file.write("        funcNames = Sets.newHashSet();\n")
+    for entry in doris_builtins_functions.null_result_with_one_null_param_functions:
+        java_registry_file.write("        funcNames.add(\"%s\");\n" % entry)
+    java_registry_file.write("        functionSet.buildNullResultWithOneNullParamFunction(funcNames);\n");
+
     java_registry_file.write(java_registry_epilogue)
     java_registry_file.close()
 
diff --git a/gensrc/thrift/DataSinks.thrift b/gensrc/thrift/DataSinks.thrift
index 3556089e77..d41d77ea5c 100644
--- a/gensrc/thrift/DataSinks.thrift
+++ b/gensrc/thrift/DataSinks.thrift
@@ -34,8 +34,6 @@ enum TDataSinkType {
     MEMORY_SCRATCH_SINK,
     ODBC_TABLE_SINK,
     RESULT_FILE_SINK,
-    VRESULT_SINK,
-    VDATA_STREAM_SINK,
 }
 
 enum TResultSinkType {
diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift
index c745d45262..2f28b327a7 100644
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@@ -390,6 +390,10 @@ struct THashJoinNode {
   // If true, this join node can (but may choose not to) generate slot filters
   // after constructing the build side that can be applied to the probe side.
   4: optional bool add_probe_filters
+
+  // anything from the ON or USING clauses (but *not* the WHERE clause) that's not an
+  // equi-join predicate, only use in vec exec engine
+  5: optional Exprs.TExpr vother_join_conjunct	
 }
 
 struct TMergeJoinNode {
@@ -780,6 +784,7 @@ struct TPlanNode {
   // Runtime filters assigned to this plan node, exist in HashJoinNode and ScanNode
   36: optional list<TRuntimeFilterDesc> runtime_filters
 
+  // Use in vec exec engine
   40: optional Exprs.TExpr vconjunct
 
   41: optional TTableFunctionNode table_function_node
diff --git a/run-be-ut.sh b/run-be-ut.sh
index 1fce850cb0..26a23f7100 100755
--- a/run-be-ut.sh
+++ b/run-be-ut.sh
@@ -47,6 +47,7 @@ Usage: $0 <options>
      --clean    clean and build ut
      --run      build and run all ut
      --run xx   build and run specified ut
+     -v         build and run all vectorized ut
      -j         build parallel
 
   Eg.
@@ -65,6 +66,7 @@ OPTS=$(getopt \
   -l 'run' \
   -l 'clean' \
   -o 'j:' \
+  -o 'v' \
   -- "$@")
 
 if [ $? != 0 ] ; then
@@ -73,20 +75,17 @@ fi
 
 eval set -- "$OPTS"
 
-PARALLEL=$[$(nproc)/4+1]
-CLEAN=
-RUN=
-if [ $# == 1 ] ; then
-    #default
-    CLEAN=0
-    RUN=0
-else
-    CLEAN=0
-    RUN=0
+PARALLEL=$[$(nproc)/5+1]
+
+CLEAN=0
+RUN=0
+VECTORIZED_ONLY=0
+if [ $# != 1 ] ; then
     while true; do 
         case "$1" in
             --clean) CLEAN=1 ; shift ;;
             --run) RUN=1 ; shift ;;
+            -v) VECTORIZED_ONLY=1 ; shift ;;
             -j) PARALLEL=$2; shift 2 ;;
             --) shift ;  break ;;
             *) echo "Internal error" ; exit 1 ;;
@@ -168,6 +167,12 @@ cp -r ${DORIS_HOME}/be/test/util/test_data ${DORIS_TEST_BINARY_DIR}/util/
 cp -r ${DORIS_HOME}/be/test/plugin/plugin_test ${DORIS_TEST_BINARY_DIR}/plugin/
 
 # find all executable test files
+
+if [ ${VECTORIZED_ONLY} -eq 1 ]; then
+    echo "Run Vectorized ut only"
+    export DORIS_TEST_BINARY_DIR=${DORIS_TEST_BINARY_DIR}/vec
+fi
+
 test_files=`find ${DORIS_TEST_BINARY_DIR} -type f -perm -111 -name "*test"`
 
 for test in ${test_files[@]}
@@ -178,5 +183,4 @@ do
         $test --gtest_output=xml:${GTEST_OUTPUT_DIR}/${file_name}.xml
     fi
 done
-
 echo "=== Finished. Gtest output: ${GTEST_OUTPUT_DIR}"